├── pyproject.toml
├── requirements.txt
├── mmdet_configs
    ├── fcos
    │   ├── fcos_center_r50_caffe_fpn_gn-head_1x_coco.py
    │   ├── fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py
    │   └── fcos_r50_caffe_fpn_gn-head_1x_coco.py
    ├── _base_
    │   ├── schedules
    │   │   ├── schedule_1x.py
    │   │   ├── schedule_20e.py
    │   │   └── schedule_2x.py
    │   ├── default_runtime.py
    │   ├── datasets
    │   │   ├── lvis_v1_instance.py
    │   │   ├── lvis_v0.5_instance.py
    │   │   ├── coco_detection.py
    │   │   ├── coco_instance.py
    │   │   ├── deepfashion.py
    │   │   ├── voc0712.py
    │   │   ├── coco_instance_semantic.py
    │   │   ├── cityscapes_detection.py
    │   │   ├── cityscapes_instance.py
    │   │   ├── wider_face.py
    │   │   └── coco_panoptic.py
    │   └── models
    │   │   ├── ssd300.py
    │   │   ├── retinanet_r50_fpn.py
    │   │   ├── rpn_r50_caffe_c4.py
    │   │   ├── rpn_r50_fpn.py
    │   │   ├── fast_rcnn_r50_fpn.py
    │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   ├── faster_rcnn_r50_fpn.py
    │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   ├── mask_rcnn_r50_fpn.py
    │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   └── cascade_mask_rcnn_r50_fpn.py
    ├── tood
    │   └── tood_r50_fpn_1x_coco.py
    ├── visdrone_tood
    │   ├── tood_full_cls_60.py
    │   └── tood_crop_480_960_cls_60.py
    ├── visdrone_vfnet
    │   ├── vfnet_full_cls_60.py
    │   └── vfnet_crop_480_960_cls_60.py
    ├── visdrone_fcos
    │   ├── fcos_full_cls_60.py
    │   └── fcos_crop_480_960_cls_60.py
    ├── vfnet
    │   └── vfnet_r50_fpn_1x_coco.py
    ├── xview_tood
    │   ├── tood_full_cls_60.py
    │   └── tood_crop_300_500_cls_60.py
    ├── xview_vfnet
    │   ├── vfnet_full_cls_60.py
    │   └── vfnet_crop_300_500_cls_60.py
    └── xview_fcos
    │   ├── fcos_full_cls_60.py
    │   └── fcos_crop_300_500_cls_60.py
├── CITATION.cff
├── visdrone
    ├── slice_visdrone.py
    └── visdrone_to_coco.py
├── xview
    ├── slice_xview.py
    ├── xview_class_labels.txt
    ├── category_id_mapping.json
    └── xview_to_coco.py
├── LICENSE
├── .gitignore
├── eval_tools
    └── predict_evaluate_analyse.py
├── mmdet_tools
    └── train.py
└── README.md


/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 120
3 | 
4 | [tool.isort]
5 | line_length = 120
6 | profile = "black"


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sahi==0.9.3
2 | pillow
3 | mmdet==2.21.0
4 | mmcv-full==1.4.3
5 | -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
6 | tensorboard>=2.7.0
7 | scipy
8 | 


--------------------------------------------------------------------------------
/mmdet_configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'
2 | model = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5))
3 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | preferred-citation:
 3 |   type: article
 4 |   title: "Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection"
 5 |   doi: 10.1109/ICIP46576.2022.9897990
 6 |   url: https://ieeexplore.ieee.org/document/9897990
 7 |   journal: 2022 IEEE International Conference on Image Processing (ICIP)
 8 |   message: "If you use this results in your work, please cite it."
 9 |   authors:
10 |   - family-names: "Akyon"
11 |     given-names: "Fatih Cagatay"
12 |   - family-names: "Altinuc"
13 |     given-names: "Sinan Onur"
14 |   - family-names: "Temizel"
15 |     given-names: "Alptekin"
16 |   year: 2022
17 |   start: 966
18 |   end: 970
19 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV1Dataset'
 4 | data_root = 'data/lvis_v1/'
 5 | data = dict(
 6 |     samples_per_gpu=2,
 7 |     workers_per_gpu=2,
 8 |     train=dict(
 9 |         _delete_=True,
10 |         type='ClassBalancedDataset',
11 |         oversample_thr=1e-3,
12 |         dataset=dict(
13 |             type=dataset_type,
14 |             ann_file=data_root + 'annotations/lvis_v1_train.json',
15 |             img_prefix=data_root)),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
19 |         img_prefix=data_root),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
23 |         img_prefix=data_root))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV05Dataset'
 4 | data_root = 'data/lvis_v0.5/'
 5 | data = dict(
 6 |     samples_per_gpu=2,
 7 |     workers_per_gpu=2,
 8 |     train=dict(
 9 |         _delete_=True,
10 |         type='ClassBalancedDataset',
11 |         oversample_thr=1e-3,
12 |         dataset=dict(
13 |             type=dataset_type,
14 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
15 |             img_prefix=data_root + 'train2017/')),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |         img_prefix=data_root + 'val2017/'),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 |         img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/visdrone/slice_visdrone.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | from sahi.scripts.slice_coco import slice
 3 | from tqdm import tqdm
 4 | 
 5 | SLICE_SIZE_LIST = [480, 640]
 6 | OVERLAP_RATIO_LIST = [0, 0.25]
 7 | IGNORE_NEGATIVE_SAMPLES = False
 8 | 
 9 | 
10 | def slice_visdrone(image_dir: str, dataset_json_path: str, output_dir: str):
11 |     total_run = len(SLICE_SIZE_LIST) * len(OVERLAP_RATIO_LIST)
12 |     current_run = 1
13 |     for slice_size in SLICE_SIZE_LIST:
14 |         for overlap_ratio in OVERLAP_RATIO_LIST:
15 |             tqdm.write(
16 |                 f"{current_run} of {total_run}: slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}"
17 |             )
18 |             slice(
19 |                 image_dir=image_dir,
20 |                 dataset_json_path=dataset_json_path,
21 |                 output_dir=output_dir,
22 |                 slice_size=slice_size,
23 |                 overlap_ratio=overlap_ratio,
24 |             )
25 |             current_run += 1
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     fire.Fire(slice_visdrone)
30 | 


--------------------------------------------------------------------------------
/xview/slice_xview.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | from sahi.scripts.slice_coco import slice
 3 | from tqdm import tqdm
 4 | 
 5 | MAX_WORKERS = 20
 6 | SLICE_SIZE_LIST = [300, 400, 500]
 7 | OVERLAP_RATIO_LIST = [0, 0.25]
 8 | IGNORE_NEGATIVE_SAMPLES = True
 9 | 
10 | 
11 | def slice_xview(image_dir: str, dataset_json_path: str, output_dir: str):
12 |     total_run = len(SLICE_SIZE_LIST) * len(OVERLAP_RATIO_LIST)
13 |     current_run = 1
14 |     for slice_size in SLICE_SIZE_LIST:
15 |         for overlap_ratio in OVERLAP_RATIO_LIST:
16 |             tqdm.write(
17 |                 f"{current_run} of {total_run}: slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}"
18 |             )
19 |             slice(
20 |                 image_dir=image_dir,
21 |                 dataset_json_path=dataset_json_path,
22 |                 output_dir=output_dir,
23 |                 slice_size=slice_size,
24 |                 overlap_ratio=overlap_ratio,
25 |             )
26 |             current_run += 1
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     fire.Fire(slice_xview)
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 fatih
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/xview/xview_class_labels.txt:
--------------------------------------------------------------------------------
 1 | 11:Fixed-wing Aircraft
 2 | 12:Small Aircraft
 3 | 13:Cargo Plane
 4 | 15:Helicopter
 5 | 17:Passenger Vehicle
 6 | 18:Small Car
 7 | 19:Bus
 8 | 20:Pickup Truck
 9 | 21:Utility Truck
10 | 23:Truck
11 | 24:Cargo Truck
12 | 25:Truck w/Box
13 | 26:Truck Tractor
14 | 27:Trailer
15 | 28:Truck w/Flatbed
16 | 29:Truck w/Liquid
17 | 32:Crane Truck
18 | 33:Railway Vehicle
19 | 34:Passenger Car
20 | 35:Cargo Car
21 | 36:Flat Car
22 | 37:Tank car
23 | 38:Locomotive
24 | 40:Maritime Vessel
25 | 41:Motorboat
26 | 42:Sailboat
27 | 44:Tugboat
28 | 45:Barge
29 | 47:Fishing Vessel
30 | 49:Ferry
31 | 50:Yacht
32 | 51:Container Ship
33 | 52:Oil Tanker
34 | 53:Engineering Vehicle
35 | 54:Tower crane
36 | 55:Container Crane
37 | 56:Reach Stacker
38 | 57:Straddle Carrier
39 | 59:Mobile Crane
40 | 60:Dump Truck
41 | 61:Haul Truck
42 | 62:Scraper/Tractor
43 | 63:Front loader/Bulldozer
44 | 64:Excavator
45 | 65:Cement Mixer
46 | 66:Ground Grader
47 | 71:Hut/Tent
48 | 72:Shed
49 | 73:Building
50 | 74:Aircraft Hangar
51 | 76:Damaged Building
52 | 77:Facility
53 | 79:Construction Site
54 | 83:Vehicle Lot
55 | 84:Helipad
56 | 86:Storage Tank
57 | 89:Shipping container lot
58 | 91:Shipping Container
59 | 93:Pylon
60 | 94:Tower
61 | 


--------------------------------------------------------------------------------
/xview/category_id_mapping.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "11": "0",
 3 |     "12": "1",
 4 |     "13": "2",
 5 |     "15": "3",
 6 |     "17": "4",
 7 |     "18": "5",
 8 |     "19": "6",
 9 |     "20": "7",
10 |     "21": "8",
11 |     "23": "9",
12 |     "24": "10",
13 |     "25": "11",
14 |     "26": "12",
15 |     "27": "13",
16 |     "28": "14",
17 |     "29": "15",
18 |     "32": "16",
19 |     "33": "17",
20 |     "34": "18",
21 |     "35": "19",
22 |     "36": "20",
23 |     "37": "21",
24 |     "38": "22",
25 |     "40": "23",
26 |     "41": "24",
27 |     "42": "25",
28 |     "44": "26",
29 |     "45": "27",
30 |     "47": "28",
31 |     "49": "29",
32 |     "50": "30",
33 |     "51": "31",
34 |     "52": "32",
35 |     "53": "33",
36 |     "54": "34",
37 |     "55": "35",
38 |     "56": "36",
39 |     "57": "37",
40 |     "59": "38",
41 |     "60": "39",
42 |     "61": "40",
43 |     "62": "41",
44 |     "63": "42",
45 |     "64": "43",
46 |     "65": "44",
47 |     "66": "45",
48 |     "71": "46",
49 |     "72": "47",
50 |     "73": "48",
51 |     "74": "49",
52 |     "76": "50",
53 |     "77": "51",
54 |     "79": "52",
55 |     "83": "53",
56 |     "84": "54",
57 |     "86": "55",
58 |     "89": "56",
59 |     "91": "57",
60 |     "93": "58",
61 |     "94": "59"
62 | }


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/instances_train2017.json',
37 |         img_prefix=data_root + 'train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/instances_val2017.json',
42 |         img_prefix=data_root + 'val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(interval=1, metric='bbox')
50 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/instances_train2017.json',
37 |         img_prefix=data_root + 'train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/instances_val2017.json',
42 |         img_prefix=data_root + 'val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(metric=['bbox', 'segm'])
50 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     backbone=dict(
 6 |         type='SSDVGG',
 7 |         depth=16,
 8 |         with_last_pool=False,
 9 |         ceil_mode=True,
10 |         out_indices=(3, 4),
11 |         out_feature_indices=(22, 34),
12 |         init_cfg=dict(
13 |             type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
14 |     neck=dict(
15 |         type='SSDNeck',
16 |         in_channels=(512, 1024),
17 |         out_channels=(512, 1024, 512, 256, 256, 256),
18 |         level_strides=(2, 2, 1, 1),
19 |         level_paddings=(1, 1, 0, 0),
20 |         l2_norm_scale=20),
21 |     bbox_head=dict(
22 |         type='SSDHead',
23 |         in_channels=(512, 1024, 512, 256, 256, 256),
24 |         num_classes=80,
25 |         anchor_generator=dict(
26 |             type='SSDAnchorGenerator',
27 |             scale_major=False,
28 |             input_size=input_size,
29 |             basesize_ratio_range=(0.15, 0.9),
30 |             strides=[8, 16, 32, 64, 100, 300],
31 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
32 |         bbox_coder=dict(
33 |             type='DeltaXYWHBBoxCoder',
34 |             target_means=[.0, .0, .0, .0],
35 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
36 |     # model training and testing settings
37 |     train_cfg=dict(
38 |         assigner=dict(
39 |             type='MaxIoUAssigner',
40 |             pos_iou_thr=0.5,
41 |             neg_iou_thr=0.5,
42 |             min_pos_iou=0.,
43 |             ignore_iof_thr=-1,
44 |             gt_max_assign_all=False),
45 |         smoothl1_beta=1.,
46 |         allowed_border=-1,
47 |         pos_weight=-1,
48 |         neg_pos_ratio=3,
49 |         debug=False),
50 |     test_cfg=dict(
51 |         nms_pre=1000,
52 |         nms=dict(type='nms', iou_threshold=0.45),
53 |         min_bbox_size=0,
54 |         score_thr=0.02,
55 |         max_per_img=200))
56 | cudnn_benchmark = True
57 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=3,
 8 |         strides=(1, 2, 2),
 9 |         dilations=(1, 1, 1),
10 |         out_indices=(2, ),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=False),
13 |         norm_eval=True,
14 |         style='caffe',
15 |         init_cfg=dict(
16 |             type='Pretrained',
17 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
18 |     neck=None,
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=1024,
22 |         feat_channels=1024,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[2, 4, 8, 16, 32],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[16]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=12000,
56 |             max_per_img=2000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/mmdet_configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py'
 2 | 
 3 | model = dict(
 4 |     backbone=dict(
 5 |         init_cfg=dict(
 6 |             type='Pretrained',
 7 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 8 |     bbox_head=dict(
 9 |         norm_on_bbox=True,
10 |         centerness_on_reg=True,
11 |         dcn_on_last_conv=False,
12 |         center_sampling=True,
13 |         conv_bias=True,
14 |         loss_bbox=dict(type='GIoULoss', loss_weight=1.0)),
15 |     # training and testing settings
16 |     test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6)))
17 | 
18 | # dataset settings
19 | img_norm_cfg = dict(
20 |     mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
21 | train_pipeline = [
22 |     dict(type='LoadImageFromFile'),
23 |     dict(type='LoadAnnotations', with_bbox=True),
24 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
25 |     dict(type='RandomFlip', flip_ratio=0.5),
26 |     dict(type='Normalize', **img_norm_cfg),
27 |     dict(type='Pad', size_divisor=32),
28 |     dict(type='DefaultFormatBundle'),
29 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
30 | ]
31 | test_pipeline = [
32 |     dict(type='LoadImageFromFile'),
33 |     dict(
34 |         type='MultiScaleFlipAug',
35 |         img_scale=(1333, 800),
36 |         flip=False,
37 |         transforms=[
38 |             dict(type='Resize', keep_ratio=True),
39 |             dict(type='RandomFlip'),
40 |             dict(type='Normalize', **img_norm_cfg),
41 |             dict(type='Pad', size_divisor=32),
42 |             dict(type='ImageToTensor', keys=['img']),
43 |             dict(type='Collect', keys=['img']),
44 |         ])
45 | ]
46 | data = dict(
47 |     samples_per_gpu=2,
48 |     workers_per_gpu=2,
49 |     train=dict(pipeline=train_pipeline),
50 |     val=dict(pipeline=test_pipeline),
51 |     test=dict(pipeline=test_pipeline))
52 | optimizer_config = dict(_delete_=True, grad_clip=None)
53 | 
54 | lr_config = dict(warmup='linear')
55 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DeepFashionDataset'
 3 | data_root = 'data/DeepFashion/In-shop/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(750, 1101),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     imgs_per_gpu=2,
33 |     workers_per_gpu=1,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 |         img_prefix=data_root + 'Img/',
38 |         pipeline=train_pipeline,
39 |         data_root=data_root),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 |         img_prefix=data_root + 'Img/',
44 |         pipeline=test_pipeline,
45 |         data_root=data_root),
46 |     test=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root +
49 |         'annotations/DeepFashion_segmentation_gallery.json',
50 |         img_prefix=data_root + 'Img/',
51 |         pipeline=test_pipeline,
52 |         data_root=data_root))
53 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOCDataset'
 3 | data_root = 'data/VOCdevkit/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1000, 600),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=3,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=[
40 |                 data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 |                 data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 |             ],
43 |             img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 |             pipeline=train_pipeline)),
45 |     val=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 |         img_prefix=data_root + 'VOC2007/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 |         img_prefix=data_root + 'VOC2007/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='mAP')
56 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(
 9 |         type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='SegRescale', scale_factor=1 / 8),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(
17 |         type='Collect',
18 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=(1333, 800),
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip', flip_ratio=0.5),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='Pad', size_divisor=32),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img']),
33 |         ])
34 | ]
35 | data = dict(
36 |     samples_per_gpu=2,
37 |     workers_per_gpu=2,
38 |     train=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_train2017.json',
41 |         img_prefix=data_root + 'train2017/',
42 |         seg_prefix=data_root + 'stuffthingmaps/train2017/',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root + 'annotations/instances_val2017.json',
52 |         img_prefix=data_root + 'val2017/',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(metric=['bbox', 'segm'])
55 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(
10 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(
39 |             type=dataset_type,
40 |             ann_file=data_root +
41 |             'annotations/instancesonly_filtered_gtFine_train.json',
42 |             img_prefix=data_root + 'leftImg8bit/train/',
43 |             pipeline=train_pipeline)),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root +
47 |         'annotations/instancesonly_filtered_gtFine_val.json',
48 |         img_prefix=data_root + 'leftImg8bit/val/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root +
53 |         'annotations/instancesonly_filtered_gtFine_test.json',
54 |         img_prefix=data_root + 'leftImg8bit/test/',
55 |         pipeline=test_pipeline))
56 | evaluation = dict(interval=1, metric='bbox')
57 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(
10 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(
39 |             type=dataset_type,
40 |             ann_file=data_root +
41 |             'annotations/instancesonly_filtered_gtFine_train.json',
42 |             img_prefix=data_root + 'leftImg8bit/train/',
43 |             pipeline=train_pipeline)),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root +
47 |         'annotations/instancesonly_filtered_gtFine_val.json',
48 |         img_prefix=data_root + 'leftImg8bit/val/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root +
53 |         'annotations/instancesonly_filtered_gtFine_test.json',
54 |         img_prefix=data_root + 'leftImg8bit/test/',
55 |         pipeline=test_pipeline))
56 | evaluation = dict(metric=['bbox', 'segm'])
57 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'WIDERFaceDataset'
 3 | data_root = 'data/WIDERFace/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile', to_float32=True),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='PhotoMetricDistortion',
10 |         brightness_delta=32,
11 |         contrast_range=(0.5, 1.5),
12 |         saturation_range=(0.5, 1.5),
13 |         hue_delta=18),
14 |     dict(
15 |         type='Expand',
16 |         mean=img_norm_cfg['mean'],
17 |         to_rgb=img_norm_cfg['to_rgb'],
18 |         ratio_range=(1, 4)),
19 |     dict(
20 |         type='MinIoURandomCrop',
21 |         min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 |         min_crop_size=0.3),
23 |     dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 |     dict(type='Normalize', **img_norm_cfg),
25 |     dict(type='RandomFlip', flip_ratio=0.5),
26 |     dict(type='DefaultFormatBundle'),
27 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(300, 300),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='Resize', keep_ratio=False),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='ImageToTensor', keys=['img']),
39 |             dict(type='Collect', keys=['img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     samples_per_gpu=60,
44 |     workers_per_gpu=2,
45 |     train=dict(
46 |         type='RepeatDataset',
47 |         times=2,
48 |         dataset=dict(
49 |             type=dataset_type,
50 |             ann_file=data_root + 'train.txt',
51 |             img_prefix=data_root + 'WIDER_train/',
52 |             min_size=17,
53 |             pipeline=train_pipeline)),
54 |     val=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + 'val.txt',
57 |         img_prefix=data_root + 'WIDER_val/',
58 |         pipeline=test_pipeline),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root + 'val.txt',
62 |         img_prefix=data_root + 'WIDER_val/',
63 |         pipeline=test_pipeline))
64 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_panoptic.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoPanopticDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(
 9 |         type='LoadPanopticAnnotations',
10 |         with_bbox=True,
11 |         with_mask=True,
12 |         with_seg=True),
13 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
14 |     dict(type='RandomFlip', flip_ratio=0.5),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size_divisor=32),
17 |     dict(type='SegRescale', scale_factor=1 / 4),
18 |     dict(type='DefaultFormatBundle'),
19 |     dict(
20 |         type='Collect',
21 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(1333, 800),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='Pad', size_divisor=32),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | data = dict(
39 |     samples_per_gpu=2,
40 |     workers_per_gpu=2,
41 |     train=dict(
42 |         type=dataset_type,
43 |         ann_file=data_root + 'annotations/panoptic_train2017.json',
44 |         img_prefix=data_root + 'train2017/',
45 |         seg_prefix=data_root + 'annotations/panoptic_train2017/',
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/panoptic_val2017.json',
50 |         img_prefix=data_root + 'val2017/',
51 |         seg_prefix=data_root + 'annotations/panoptic_val2017/',
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + 'annotations/panoptic_val2017.json',
56 |         img_prefix=data_root + 'val2017/',
57 |         seg_prefix=data_root + 'annotations/panoptic_val2017/',
58 |         pipeline=test_pipeline))
59 | evaluation = dict(interval=1, metric=['PQ'])
60 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # other
132 | *.jpg
133 | .vscode


--------------------------------------------------------------------------------
/mmdet_configs/tood/tood_r50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/coco_detection.py',
 3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 4 | ]
 5 | model = dict(
 6 |     type='TOOD',
 7 |     backbone=dict(
 8 |         type='ResNet',
 9 |         depth=50,
10 |         num_stages=4,
11 |         out_indices=(0, 1, 2, 3),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=True),
14 |         norm_eval=True,
15 |         style='pytorch',
16 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         out_channels=256,
21 |         start_level=1,
22 |         add_extra_convs='on_output',
23 |         num_outs=5),
24 |     bbox_head=dict(
25 |         type='TOODHead',
26 |         num_classes=80,
27 |         in_channels=256,
28 |         stacked_convs=6,
29 |         feat_channels=256,
30 |         anchor_type='anchor_free',
31 |         anchor_generator=dict(
32 |             type='AnchorGenerator',
33 |             ratios=[1.0],
34 |             octave_base_scale=8,
35 |             scales_per_octave=1,
36 |             strides=[8, 16, 32, 64, 128]),
37 |         bbox_coder=dict(
38 |             type='DeltaXYWHBBoxCoder',
39 |             target_means=[.0, .0, .0, .0],
40 |             target_stds=[0.1, 0.1, 0.2, 0.2]),
41 |         initial_loss_cls=dict(
42 |             type='FocalLoss',
43 |             use_sigmoid=True,
44 |             activated=True,  # use probability instead of logit as input
45 |             gamma=2.0,
46 |             alpha=0.25,
47 |             loss_weight=1.0),
48 |         loss_cls=dict(
49 |             type='QualityFocalLoss',
50 |             use_sigmoid=True,
51 |             activated=True,  # use probability instead of logit as input
52 |             beta=2.0,
53 |             loss_weight=1.0),
54 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
55 |     train_cfg=dict(
56 |         initial_epoch=4,
57 |         initial_assigner=dict(type='ATSSAssigner', topk=9),
58 |         assigner=dict(type='TaskAlignedAssigner', topk=13),
59 |         alpha=1,
60 |         beta=6,
61 |         allowed_border=-1,
62 |         pos_weight=-1,
63 |         debug=False),
64 |     test_cfg=dict(
65 |         nms_pre=1000,
66 |         min_bbox_size=0,
67 |         score_thr=0.05,
68 |         nms=dict(type='nms', iou_threshold=0.6),
69 |         max_per_img=100))
70 | # optimizer
71 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
72 | 
73 | # custom hooks
74 | custom_hooks = [dict(type='SetEpochInfoHook')]
75 | 


--------------------------------------------------------------------------------
/mmdet_configs/visdrone_tood/tood_full_cls_60.py:
--------------------------------------------------------------------------------
 1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
 2 | 
 3 | TAGS = ["tood", "crop=False", "24epochs", "num_cls=60", "repeat=5"]
 4 | EXP_NAME = "tood_full_cls_60"
 5 | DATA_ROOT = "data/visdrone2019/"
 6 | BATCH_MULTIPLIER = 8
 7 | LR_MULTIPLIER = 1
 8 | EVAL_INTERVAL = 3
 9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 | 
12 | # model settings
13 | model = dict(
14 |     bbox_head=dict(
15 |         num_classes=NUM_CLASSES,
16 |     ),
17 | )
18 | 
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
21 | train_pipeline = [
22 |     dict(type="LoadImageFromFile"),
23 |     dict(type="LoadAnnotations", with_bbox=True),
24 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
25 |     dict(type="RandomFlip", flip_ratio=0.5),
26 |     dict(type="Normalize", **img_norm_cfg),
27 |     dict(type="Pad", size_divisor=32),
28 |     dict(type="DefaultFormatBundle"),
29 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
30 | ]
31 | test_pipeline = [
32 |     dict(type="LoadImageFromFile"),
33 |     dict(
34 |         type="MultiScaleFlipAug",
35 |         img_scale=(1333, 800),
36 |         flip=False,
37 |         transforms=[
38 |             dict(type="Resize", keep_ratio=True),
39 |             dict(type="RandomFlip"),
40 |             dict(type="Normalize", **img_norm_cfg),
41 |             dict(type="Pad", size_divisor=32),
42 |             dict(type="ImageToTensor", keys=["img"]),
43 |             dict(type="Collect", keys=["img"]),
44 |         ],
45 |     ),
46 | ]
47 | 
48 | data = dict(
49 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
50 |     workers_per_gpu=4,
51 |     train=dict(
52 |         type="RepeatDataset",
53 |         times=5,
54 |         dataset=dict(
55 |             type="CocoDataset",
56 |             classes=CLASSES,
57 |             ann_file=DATA_ROOT + "coco/train.json",
58 |             img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
59 |             pipeline=train_pipeline,
60 |         ),
61 |     ),
62 |     val=dict(
63 |         classes=CLASSES,
64 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
65 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
66 |         pipeline=test_pipeline,
67 |     ),
68 |     test=dict(
69 |         classes=CLASSES,
70 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
71 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
72 |         pipeline=test_pipeline,
73 |     ),
74 | )
75 | 
76 | # optimizer
77 | # default 8 gpu
78 | # /8 for 1 gpu
79 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
80 | 
81 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
82 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
83 | 
84 | # learning policy
85 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
86 | runner = dict(type="EpochBasedRunner", max_epochs=24)
87 | 
88 | # logger settings
89 | log_config = dict(
90 |     interval=50,
91 |     hooks=[
92 |         dict(type="TextLoggerHook"),
93 |         dict(type="TensorboardLoggerHook", reset_flag=False),
94 |     ],
95 | )
96 | 
97 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
98 | work_dir = f"runs/visdrone/{EXP_NAME}/"
99 | 


--------------------------------------------------------------------------------
/mmdet_configs/visdrone_vfnet/vfnet_full_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
  2 | 
  3 | TAGS = ["vfnet", "crop=False", "24epochs", "num_cls=60", "repeat=5"]
  4 | EXP_NAME = "vfnet_full_cls_60"
  5 | DATA_ROOT = "data/visdrone2019/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 10
 10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
 11 | 
 12 | # model settings
 13 | model = dict(
 14 |     bbox_head=dict(
 15 |         num_classes=NUM_CLASSES,
 16 |     ),
 17 | )
 18 | 
 19 | # dataset settings
 20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 21 | train_pipeline = [
 22 |     dict(type="LoadImageFromFile"),
 23 |     dict(type="LoadAnnotations", with_bbox=True),
 24 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 25 |     dict(type="RandomFlip", flip_ratio=0.5),
 26 |     dict(type="Normalize", **img_norm_cfg),
 27 |     dict(type="Pad", size_divisor=32),
 28 |     dict(type="DefaultFormatBundle"),
 29 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 30 | ]
 31 | test_pipeline = [
 32 |     dict(type="LoadImageFromFile"),
 33 |     dict(
 34 |         type="MultiScaleFlipAug",
 35 |         img_scale=(1333, 800),
 36 |         flip=False,
 37 |         transforms=[
 38 |             dict(type="Resize", keep_ratio=True),
 39 |             dict(type="RandomFlip"),
 40 |             dict(type="Normalize", **img_norm_cfg),
 41 |             dict(type="Pad", size_divisor=32),
 42 |             dict(type="DefaultFormatBundle"),
 43 |             dict(type="Collect", keys=["img"]),
 44 |         ],
 45 |     ),
 46 | ]
 47 | 
 48 | data = dict(
 49 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
 50 |     workers_per_gpu=4,
 51 |     train=dict(
 52 |         type="RepeatDataset",
 53 |         times=5,
 54 |         dataset=dict(
 55 |             type="CocoDataset",
 56 |             classes=CLASSES,
 57 |             ann_file=DATA_ROOT + "coco/train.json",
 58 |             img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
 59 |             pipeline=train_pipeline,
 60 |         ),
 61 |     ),
 62 |     val=dict(
 63 |         classes=CLASSES,
 64 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 65 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 66 |         pipeline=test_pipeline,
 67 |     ),
 68 |     test=dict(
 69 |         classes=CLASSES,
 70 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 71 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 72 |         pipeline=test_pipeline,
 73 |     ),
 74 | )
 75 | 
 76 | # optimizer
 77 | # default 8 gpu
 78 | # /8 for 1 gpu
 79 | optimizer = dict(
 80 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
 81 | )
 82 | 
 83 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
 84 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
 85 | 
 86 | # learning policy
 87 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
 88 | runner = dict(type="EpochBasedRunner", max_epochs=24)
 89 | 
 90 | # logger settings
 91 | log_config = dict(
 92 |     interval=50,
 93 |     hooks=[
 94 |         dict(type="TextLoggerHook"),
 95 |         dict(type="TensorboardLoggerHook", reset_flag=False),
 96 |     ],
 97 | )
 98 | 
 99 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
100 | work_dir = f"runs/visdrone/{EXP_NAME}/"
101 | 


--------------------------------------------------------------------------------
/mmdet_configs/visdrone_fcos/fcos_full_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
  2 | 
  3 | TAGS = ["fcos", "crop=False", "24epochs", "num_cls=60", "repeat=5"]
  4 | EXP_NAME = "fcos_full_cls_60"
  5 | DATA_ROOT = "data/visdrone2019/"
  6 | BATCH_MULTIPLIER = 16
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 10
 10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
 11 | 
 12 | # model settings
 13 | model = dict(
 14 |     bbox_head=dict(
 15 |         num_classes=NUM_CLASSES,
 16 |     ),
 17 | )
 18 | 
 19 | # dataset settings
 20 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 21 | train_pipeline = [
 22 |     dict(type="LoadImageFromFile"),
 23 |     dict(type="LoadAnnotations", with_bbox=True),
 24 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 25 |     dict(type="RandomFlip", flip_ratio=0.5),
 26 |     dict(type="Normalize", **img_norm_cfg),
 27 |     dict(type="Pad", size_divisor=32),
 28 |     dict(type="DefaultFormatBundle"),
 29 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 30 | ]
 31 | test_pipeline = [
 32 |     dict(type="LoadImageFromFile"),
 33 |     dict(
 34 |         type="MultiScaleFlipAug",
 35 |         img_scale=(1333, 800),
 36 |         flip=False,
 37 |         transforms=[
 38 |             dict(type="Resize", keep_ratio=True),
 39 |             dict(type="RandomFlip"),
 40 |             dict(type="Normalize", **img_norm_cfg),
 41 |             dict(type="Pad", size_divisor=32),
 42 |             dict(type="ImageToTensor", keys=["img"]),
 43 |             dict(type="Collect", keys=["img"]),
 44 |         ],
 45 |     ),
 46 | ]
 47 | 
 48 | data = dict(
 49 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
 50 |     workers_per_gpu=4,
 51 |     train=dict(
 52 |         type="RepeatDataset",
 53 |         times=5,
 54 |         dataset=dict(
 55 |             type="CocoDataset",
 56 |             classes=CLASSES,
 57 |             ann_file=DATA_ROOT + "coco/train.json",
 58 |             img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
 59 |             pipeline=train_pipeline,
 60 |         ),
 61 |     ),
 62 |     val=dict(
 63 |         classes=CLASSES,
 64 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 65 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 66 |         pipeline=test_pipeline,
 67 |     ),
 68 |     test=dict(
 69 |         classes=CLASSES,
 70 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 71 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 72 |         pipeline=test_pipeline,
 73 |     ),
 74 | )
 75 | 
 76 | # optimizer
 77 | # default 8 gpu
 78 | # /8 for 1 gpu
 79 | optimizer = dict(
 80 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
 81 | )
 82 | 
 83 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
 84 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
 85 | 
 86 | # learning policy
 87 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
 88 | runner = dict(type="EpochBasedRunner", max_epochs=24)
 89 | 
 90 | # logger settings
 91 | log_config = dict(
 92 |     interval=50,
 93 |     hooks=[
 94 |         dict(type="TextLoggerHook"),
 95 |         dict(type="TensorboardLoggerHook", reset_flag=False),
 96 |     ],
 97 | )
 98 | 
 99 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
100 | work_dir = f"runs/visdrone/{EXP_NAME}/"
101 | 


--------------------------------------------------------------------------------
/mmdet_configs/vfnet/vfnet_r50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/coco_detection.py',
  3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
  4 | ]
  5 | # model settings
  6 | model = dict(
  7 |     type='VFNet',
  8 |     backbone=dict(
  9 |         type='ResNet',
 10 |         depth=50,
 11 |         num_stages=4,
 12 |         out_indices=(0, 1, 2, 3),
 13 |         frozen_stages=1,
 14 |         norm_cfg=dict(type='BN', requires_grad=True),
 15 |         norm_eval=True,
 16 |         style='pytorch',
 17 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 18 |     neck=dict(
 19 |         type='FPN',
 20 |         in_channels=[256, 512, 1024, 2048],
 21 |         out_channels=256,
 22 |         start_level=1,
 23 |         add_extra_convs='on_output',  # use P5
 24 |         num_outs=5,
 25 |         relu_before_extra_convs=True),
 26 |     bbox_head=dict(
 27 |         type='VFNetHead',
 28 |         num_classes=80,
 29 |         in_channels=256,
 30 |         stacked_convs=3,
 31 |         feat_channels=256,
 32 |         strides=[8, 16, 32, 64, 128],
 33 |         center_sampling=False,
 34 |         dcn_on_last_conv=False,
 35 |         use_atss=True,
 36 |         use_vfl=True,
 37 |         loss_cls=dict(
 38 |             type='VarifocalLoss',
 39 |             use_sigmoid=True,
 40 |             alpha=0.75,
 41 |             gamma=2.0,
 42 |             iou_weighted=True,
 43 |             loss_weight=1.0),
 44 |         loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
 45 |         loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
 46 |     # training and testing settings
 47 |     train_cfg=dict(
 48 |         assigner=dict(type='ATSSAssigner', topk=9),
 49 |         allowed_border=-1,
 50 |         pos_weight=-1,
 51 |         debug=False),
 52 |     test_cfg=dict(
 53 |         nms_pre=1000,
 54 |         min_bbox_size=0,
 55 |         score_thr=0.05,
 56 |         nms=dict(type='nms', iou_threshold=0.6),
 57 |         max_per_img=100))
 58 | 
 59 | # data setting
 60 | dataset_type = 'CocoDataset'
 61 | data_root = 'data/coco/'
 62 | img_norm_cfg = dict(
 63 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 64 | train_pipeline = [
 65 |     dict(type='LoadImageFromFile'),
 66 |     dict(type='LoadAnnotations', with_bbox=True),
 67 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 68 |     dict(type='RandomFlip', flip_ratio=0.5),
 69 |     dict(type='Normalize', **img_norm_cfg),
 70 |     dict(type='Pad', size_divisor=32),
 71 |     dict(type='DefaultFormatBundle'),
 72 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 73 | ]
 74 | test_pipeline = [
 75 |     dict(type='LoadImageFromFile'),
 76 |     dict(
 77 |         type='MultiScaleFlipAug',
 78 |         img_scale=(1333, 800),
 79 |         flip=False,
 80 |         transforms=[
 81 |             dict(type='Resize', keep_ratio=True),
 82 |             dict(type='RandomFlip'),
 83 |             dict(type='Normalize', **img_norm_cfg),
 84 |             dict(type='Pad', size_divisor=32),
 85 |             dict(type='DefaultFormatBundle'),
 86 |             dict(type='Collect', keys=['img']),
 87 |         ])
 88 | ]
 89 | data = dict(
 90 |     samples_per_gpu=2,
 91 |     workers_per_gpu=2,
 92 |     train=dict(pipeline=train_pipeline),
 93 |     val=dict(pipeline=test_pipeline),
 94 |     test=dict(pipeline=test_pipeline))
 95 | 
 96 | # optimizer
 97 | optimizer = dict(
 98 |     lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
 99 | optimizer_config = dict(grad_clip=None)
100 | # learning policy
101 | lr_config = dict(
102 |     policy='step',
103 |     warmup='linear',
104 |     warmup_iters=500,
105 |     warmup_ratio=0.1,
106 |     step=[8, 11])
107 | runner = dict(type='EpochBasedRunner', max_epochs=12)
108 | 


--------------------------------------------------------------------------------
/mmdet_configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/coco_detection.py',
  3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
  4 | ]
  5 | # model settings
  6 | model = dict(
  7 |     type='FCOS',
  8 |     backbone=dict(
  9 |         type='ResNet',
 10 |         depth=50,
 11 |         num_stages=4,
 12 |         out_indices=(0, 1, 2, 3),
 13 |         frozen_stages=1,
 14 |         norm_cfg=dict(type='BN', requires_grad=False),
 15 |         norm_eval=True,
 16 |         style='caffe',
 17 |         init_cfg=dict(
 18 |             type='Pretrained',
 19 |             checkpoint='open-mmlab://detectron/resnet50_caffe')),
 20 |     neck=dict(
 21 |         type='FPN',
 22 |         in_channels=[256, 512, 1024, 2048],
 23 |         out_channels=256,
 24 |         start_level=1,
 25 |         add_extra_convs='on_output',  # use P5
 26 |         num_outs=5,
 27 |         relu_before_extra_convs=True),
 28 |     bbox_head=dict(
 29 |         type='FCOSHead',
 30 |         num_classes=80,
 31 |         in_channels=256,
 32 |         stacked_convs=4,
 33 |         feat_channels=256,
 34 |         strides=[8, 16, 32, 64, 128],
 35 |         loss_cls=dict(
 36 |             type='FocalLoss',
 37 |             use_sigmoid=True,
 38 |             gamma=2.0,
 39 |             alpha=0.25,
 40 |             loss_weight=1.0),
 41 |         loss_bbox=dict(type='IoULoss', loss_weight=1.0),
 42 |         loss_centerness=dict(
 43 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
 44 |     # training and testing settings
 45 |     train_cfg=dict(
 46 |         assigner=dict(
 47 |             type='MaxIoUAssigner',
 48 |             pos_iou_thr=0.5,
 49 |             neg_iou_thr=0.4,
 50 |             min_pos_iou=0,
 51 |             ignore_iof_thr=-1),
 52 |         allowed_border=-1,
 53 |         pos_weight=-1,
 54 |         debug=False),
 55 |     test_cfg=dict(
 56 |         nms_pre=1000,
 57 |         min_bbox_size=0,
 58 |         score_thr=0.05,
 59 |         nms=dict(type='nms', iou_threshold=0.5),
 60 |         max_per_img=100))
 61 | img_norm_cfg = dict(
 62 |     mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
 63 | train_pipeline = [
 64 |     dict(type='LoadImageFromFile'),
 65 |     dict(type='LoadAnnotations', with_bbox=True),
 66 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 67 |     dict(type='RandomFlip', flip_ratio=0.5),
 68 |     dict(type='Normalize', **img_norm_cfg),
 69 |     dict(type='Pad', size_divisor=32),
 70 |     dict(type='DefaultFormatBundle'),
 71 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 72 | ]
 73 | test_pipeline = [
 74 |     dict(type='LoadImageFromFile'),
 75 |     dict(
 76 |         type='MultiScaleFlipAug',
 77 |         img_scale=(1333, 800),
 78 |         flip=False,
 79 |         transforms=[
 80 |             dict(type='Resize', keep_ratio=True),
 81 |             dict(type='RandomFlip'),
 82 |             dict(type='Normalize', **img_norm_cfg),
 83 |             dict(type='Pad', size_divisor=32),
 84 |             dict(type='ImageToTensor', keys=['img']),
 85 |             dict(type='Collect', keys=['img']),
 86 |         ])
 87 | ]
 88 | data = dict(
 89 |     samples_per_gpu=2,
 90 |     workers_per_gpu=2,
 91 |     train=dict(pipeline=train_pipeline),
 92 |     val=dict(pipeline=test_pipeline),
 93 |     test=dict(pipeline=test_pipeline))
 94 | # optimizer
 95 | optimizer = dict(
 96 |     lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
 97 | optimizer_config = dict(
 98 |     _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
 99 | # learning policy
100 | lr_config = dict(
101 |     policy='step',
102 |     warmup='constant',
103 |     warmup_iters=500,
104 |     warmup_ratio=1.0 / 3,
105 |     step=[8, 11])
106 | runner = dict(type='EpochBasedRunner', max_epochs=12)
107 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         strides=(1, 2, 2, 1),
 10 |         dilations=(1, 1, 1, 2),
 11 |         out_indices=(3, ),
 12 |         frozen_stages=1,
 13 |         norm_cfg=norm_cfg,
 14 |         norm_eval=True,
 15 |         style='caffe',
 16 |         init_cfg=dict(
 17 |             type='Pretrained',
 18 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=2048,
 22 |         feat_channels=2048,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[2, 4, 8, 16, 32],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[16]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=2048,
 41 |             featmap_strides=[16]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=2048,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=0,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=12000,
 77 |             max_per_img=2000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms=dict(type='nms', iou_threshold=0.7),
 99 |             nms_pre=6000,
100 |             max_per_img=1000,
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)))
106 | 


--------------------------------------------------------------------------------
/eval_tools/predict_evaluate_analyse.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | from sahi.predict import predict
  4 | from sahi.scripts.coco_error_analysis import analyse
  5 | from sahi.scripts.coco_evaluation import evaluate
  6 | 
  7 | MODEL_PATH = ""
  8 | MODEL_CONFIG_PATH = ""
  9 | EVAL_IMAGES_FOLDER_DIR = ""
 10 | EVAL_DATASET_JSON_PATH = ""
 11 | INFERENCE_SETTING = "XVIEW_SAHI_FI_PO"
 12 | EXPORT_VISUAL = False
 13 | 
 14 | ############ dont change below #############
 15 | 
 16 | INFERENCE_SETTING_TO_PARAMS = {
 17 |     "XVIEW_SAHI": {
 18 |         "no_standard_prediction": True,
 19 |         "no_sliced_prediction": False,
 20 |         "slice_size": 400,
 21 |         "overlap_ratio": 0,
 22 |     },
 23 |     "XVIEW_SAHI_PO": {
 24 |         "no_standard_prediction": True,
 25 |         "no_sliced_prediction": False,
 26 |         "slice_size": 400,
 27 |         "overlap_ratio": 0.25,
 28 |     },
 29 |     "XVIEW_SAHI_FI": {
 30 |         "no_standard_prediction": False,
 31 |         "no_sliced_prediction": False,
 32 |         "slice_size": 400,
 33 |         "overlap_ratio": 0,
 34 |     },
 35 |     "XVIEW_SAHI_FI_PO": {
 36 |         "no_standard_prediction": False,
 37 |         "no_sliced_prediction": False,
 38 |         "slice_size": 400,
 39 |         "overlap_ratio": 0.25,
 40 |     },
 41 |     "VISDRONE_FI": {
 42 |         "no_standard_prediction": False,
 43 |         "no_sliced_prediction": True,
 44 |         "slice_size": 640,
 45 |         "overlap_ratio": 0,
 46 |     },
 47 |     "VISDRONE_SAHI": {
 48 |         "no_standard_prediction": True,
 49 |         "no_sliced_prediction": False,
 50 |         "slice_size": 640,
 51 |         "overlap_ratio": 0,
 52 |     },
 53 |     "VISDRONE_SAHI_PO": {
 54 |         "no_standard_prediction": True,
 55 |         "no_sliced_prediction": False,
 56 |         "slice_size": 640,
 57 |         "overlap_ratio": 0.25,
 58 |     },
 59 |     "VISDRONE_SAHI_FI": {
 60 |         "no_standard_prediction": False,
 61 |         "no_sliced_prediction": False,
 62 |         "slice_size": 640,
 63 |         "overlap_ratio": 0,
 64 |     },
 65 |     "VISDRONE_SAHI_FI_PO": {
 66 |         "no_standard_prediction": False,
 67 |         "no_sliced_prediction": False,
 68 |         "slice_size": 640,
 69 |         "overlap_ratio": 0.25,
 70 |     },
 71 | }
 72 | 
 73 | setting_params = INFERENCE_SETTING_TO_PARAMS[INFERENCE_SETTING]
 74 | 
 75 | result = predict(
 76 |     model_type="mmdet",
 77 |     model_path=MODEL_PATH,
 78 |     model_config_path=MODEL_CONFIG_PATH,
 79 |     model_confidence_threshold=0.01,
 80 |     model_device="cuda:0",
 81 |     model_category_mapping=None,
 82 |     model_category_remapping=None,
 83 |     source=EVAL_IMAGES_FOLDER_DIR,
 84 |     no_standard_prediction=setting_params["no_standard_prediction"],
 85 |     no_sliced_prediction=setting_params["no_sliced_prediction"],
 86 |     image_size=None,
 87 |     slice_height=setting_params["slice_size"],
 88 |     slice_width=setting_params["slice_size"],
 89 |     overlap_height_ratio=setting_params["overlap_ratio"],
 90 |     overlap_width_ratio=setting_params["overlap_ratio"],
 91 |     postprocess_type="NMS",
 92 |     postprocess_match_metric="IOU",
 93 |     postprocess_match_threshold=0.5,
 94 |     postprocess_class_agnostic=False,
 95 |     novisual=not EXPORT_VISUAL,
 96 |     dataset_json_path=EVAL_DATASET_JSON_PATH,
 97 |     project="runs/predict_eval_analyse",
 98 |     name=INFERENCE_SETTING,
 99 |     visual_bbox_thickness=None,
100 |     visual_text_size=None,
101 |     visual_text_thickness=None,
102 |     visual_export_format="png",
103 |     verbose=1,
104 |     return_dict=True,
105 |     force_postprocess_type=True,
106 | )
107 | 
108 | result_json_path = str(Path(result["export_dir"]) / "result.json")
109 | 
110 | evaluate(
111 |     dataset_json_path=EVAL_DATASET_JSON_PATH,
112 |     result_json_path=result_json_path,
113 |     classwise=True,
114 |     max_detections=500,
115 |     return_dict=False,
116 | )
117 | 
118 | analyse(
119 |     dataset_json_path=EVAL_DATASET_JSON_PATH,
120 |     result_json_path=result_json_path,
121 |     max_detections=500,
122 |     return_dict=False,
123 | )
124 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=-1,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=2000,
 77 |             max_per_img=1000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_pre=1000,
 99 |             max_per_img=1000,
100 |             nms=dict(type='nms', iou_threshold=0.7),
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)
106 |         # soft-nms is also supported for rcnn testing
107 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 |     ))
109 | 


--------------------------------------------------------------------------------
/mmdet_configs/visdrone_tood/tood_crop_480_960_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
  2 | 
  3 | TAGS = ["tood", "crop=480_960", "24epochs", "num_cls=60", "repeat=5"]
  4 | EXP_NAME = "tood_crop_480_960_cls_60"
  5 | DATA_ROOT = "data/visdrone2019/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 10
 10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
 11 | 
 12 | # model settings
 13 | model = dict(
 14 |     bbox_head=dict(
 15 |         num_classes=NUM_CLASSES,
 16 |     ),
 17 | )
 18 | 
 19 | # dataset settings
 20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 21 | train_pipeline = [
 22 |     dict(type="LoadImageFromFile"),
 23 |     dict(type="LoadAnnotations", with_bbox=True),
 24 |     dict(
 25 |         type="AutoAugment",
 26 |         policies=[
 27 |             [
 28 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
 29 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 30 |             ],
 31 |             [
 32 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
 33 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 34 |             ],
 35 |             [
 36 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 37 |             ],
 38 |         ],
 39 |     ),
 40 |     dict(type="RandomFlip", flip_ratio=0.5),
 41 |     dict(type="Normalize", **img_norm_cfg),
 42 |     dict(type="Pad", size_divisor=32),
 43 |     dict(type="DefaultFormatBundle"),
 44 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 45 | ]
 46 | test_pipeline = [
 47 |     dict(type="LoadImageFromFile"),
 48 |     dict(
 49 |         type="MultiScaleFlipAug",
 50 |         img_scale=(1333, 800),
 51 |         flip=False,
 52 |         transforms=[
 53 |             dict(type="Resize", keep_ratio=True),
 54 |             dict(type="RandomFlip"),
 55 |             dict(type="Normalize", **img_norm_cfg),
 56 |             dict(type="Pad", size_divisor=32),
 57 |             dict(type="ImageToTensor", keys=["img"]),
 58 |             dict(type="Collect", keys=["img"]),
 59 |         ],
 60 |     ),
 61 | ]
 62 | 
 63 | data = dict(
 64 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
 65 |     workers_per_gpu=4,
 66 |     train=dict(
 67 |         type="RepeatDataset",
 68 |         times=5,
 69 |         dataset=dict(
 70 |             type="CocoDataset",
 71 |             classes=CLASSES,
 72 |             ann_file=DATA_ROOT + "coco/train.json",
 73 |             img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
 74 |             pipeline=train_pipeline,
 75 |         ),
 76 |     ),
 77 |     val=dict(
 78 |         classes=CLASSES,
 79 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 80 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 81 |         pipeline=test_pipeline,
 82 |     ),
 83 |     test=dict(
 84 |         classes=CLASSES,
 85 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 86 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 87 |         pipeline=test_pipeline,
 88 |     ),
 89 | )
 90 | 
 91 | # optimizer
 92 | # default 8 gpu
 93 | # /8 for 1 gpu
 94 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
 95 | 
 96 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
 97 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
 98 | 
 99 | # learning policy
100 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
101 | runner = dict(type="EpochBasedRunner", max_epochs=24)
102 | 
103 | # logger settings
104 | log_config = dict(
105 |     interval=50,
106 |     hooks=[
107 |         dict(type="TextLoggerHook"),
108 |         dict(type="TensorboardLoggerHook", reset_flag=False),
109 |     ],
110 | )
111 | 
112 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
113 | work_dir = f"runs/visdrone/{EXP_NAME}/"
114 | 


--------------------------------------------------------------------------------
/mmdet_configs/visdrone_vfnet/vfnet_crop_480_960_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
  2 | 
  3 | TAGS = ["vfnet", "crop=480_960", "24epochs", "num_cls=60", "repeat=5"]
  4 | EXP_NAME = "vfnet_crop_480_960_cls_60"
  5 | DATA_ROOT = "data/visdrone2019/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 10
 10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
 11 | 
 12 | # model settings
 13 | model = dict(
 14 |     bbox_head=dict(
 15 |         num_classes=NUM_CLASSES,
 16 |     ),
 17 | )
 18 | 
 19 | # dataset settings
 20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 21 | train_pipeline = [
 22 |     dict(type="LoadImageFromFile"),
 23 |     dict(type="LoadAnnotations", with_bbox=True),
 24 |     dict(
 25 |         type="AutoAugment",
 26 |         policies=[
 27 |             [
 28 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
 29 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 30 |             ],
 31 |             [
 32 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
 33 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 34 |             ],
 35 |             [
 36 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 37 |             ],
 38 |         ],
 39 |     ),
 40 |     dict(type="RandomFlip", flip_ratio=0.5),
 41 |     dict(type="Normalize", **img_norm_cfg),
 42 |     dict(type="Pad", size_divisor=32),
 43 |     dict(type="DefaultFormatBundle"),
 44 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 45 | ]
 46 | test_pipeline = [
 47 |     dict(type="LoadImageFromFile"),
 48 |     dict(
 49 |         type="MultiScaleFlipAug",
 50 |         img_scale=(1333, 800),
 51 |         flip=False,
 52 |         transforms=[
 53 |             dict(type="Resize", keep_ratio=True),
 54 |             dict(type="RandomFlip"),
 55 |             dict(type="Normalize", **img_norm_cfg),
 56 |             dict(type="Pad", size_divisor=32),
 57 |             dict(type="DefaultFormatBundle"),
 58 |             dict(type="Collect", keys=["img"]),
 59 |         ],
 60 |     ),
 61 | ]
 62 | 
 63 | data = dict(
 64 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
 65 |     workers_per_gpu=4,
 66 |     train=dict(
 67 |         type="RepeatDataset",
 68 |         times=5,
 69 |         dataset=dict(
 70 |             type="CocoDataset",
 71 |             classes=CLASSES,
 72 |             ann_file=DATA_ROOT + "coco/train.json",
 73 |             img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
 74 |             pipeline=train_pipeline,
 75 |         ),
 76 |     ),
 77 |     val=dict(
 78 |         classes=CLASSES,
 79 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 80 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 81 |         pipeline=test_pipeline,
 82 |     ),
 83 |     test=dict(
 84 |         classes=CLASSES,
 85 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 86 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 87 |         pipeline=test_pipeline,
 88 |     ),
 89 | )
 90 | 
 91 | # optimizer
 92 | # default 8 gpu
 93 | # /8 for 1 gpu
 94 | optimizer = dict(
 95 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
 96 | )
 97 | 
 98 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
 99 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
100 | 
101 | # learning policy
102 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
103 | runner = dict(type="EpochBasedRunner", max_epochs=24)
104 | 
105 | # logger settings
106 | log_config = dict(
107 |     interval=50,
108 |     hooks=[
109 |         dict(type="TextLoggerHook"),
110 |         dict(type="TensorboardLoggerHook", reset_flag=False),
111 |     ],
112 | )
113 | 
114 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
115 | work_dir = f"runs/visdrone/{EXP_NAME}/"
116 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=3,
  9 |         strides=(1, 2, 2),
 10 |         dilations=(1, 1, 1),
 11 |         out_indices=(2, ),
 12 |         frozen_stages=1,
 13 |         norm_cfg=norm_cfg,
 14 |         norm_eval=True,
 15 |         style='caffe',
 16 |         init_cfg=dict(
 17 |             type='Pretrained',
 18 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=1024,
 22 |         feat_channels=1024,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[2, 4, 8, 16, 32],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[16]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         shared_head=dict(
 38 |             type='ResLayer',
 39 |             depth=50,
 40 |             stage=3,
 41 |             stride=2,
 42 |             dilation=1,
 43 |             style='caffe',
 44 |             norm_cfg=norm_cfg,
 45 |             norm_eval=True),
 46 |         bbox_roi_extractor=dict(
 47 |             type='SingleRoIExtractor',
 48 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 49 |             out_channels=1024,
 50 |             featmap_strides=[16]),
 51 |         bbox_head=dict(
 52 |             type='BBoxHead',
 53 |             with_avg_pool=True,
 54 |             roi_feat_size=7,
 55 |             in_channels=2048,
 56 |             num_classes=80,
 57 |             bbox_coder=dict(
 58 |                 type='DeltaXYWHBBoxCoder',
 59 |                 target_means=[0., 0., 0., 0.],
 60 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 61 |             reg_class_agnostic=False,
 62 |             loss_cls=dict(
 63 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 64 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 65 |     # model training and testing settings
 66 |     train_cfg=dict(
 67 |         rpn=dict(
 68 |             assigner=dict(
 69 |                 type='MaxIoUAssigner',
 70 |                 pos_iou_thr=0.7,
 71 |                 neg_iou_thr=0.3,
 72 |                 min_pos_iou=0.3,
 73 |                 match_low_quality=True,
 74 |                 ignore_iof_thr=-1),
 75 |             sampler=dict(
 76 |                 type='RandomSampler',
 77 |                 num=256,
 78 |                 pos_fraction=0.5,
 79 |                 neg_pos_ub=-1,
 80 |                 add_gt_as_proposals=False),
 81 |             allowed_border=0,
 82 |             pos_weight=-1,
 83 |             debug=False),
 84 |         rpn_proposal=dict(
 85 |             nms_pre=12000,
 86 |             max_per_img=2000,
 87 |             nms=dict(type='nms', iou_threshold=0.7),
 88 |             min_bbox_size=0),
 89 |         rcnn=dict(
 90 |             assigner=dict(
 91 |                 type='MaxIoUAssigner',
 92 |                 pos_iou_thr=0.5,
 93 |                 neg_iou_thr=0.5,
 94 |                 min_pos_iou=0.5,
 95 |                 match_low_quality=False,
 96 |                 ignore_iof_thr=-1),
 97 |             sampler=dict(
 98 |                 type='RandomSampler',
 99 |                 num=512,
100 |                 pos_fraction=0.25,
101 |                 neg_pos_ub=-1,
102 |                 add_gt_as_proposals=True),
103 |             pos_weight=-1,
104 |             debug=False)),
105 |     test_cfg=dict(
106 |         rpn=dict(
107 |             nms_pre=6000,
108 |             max_per_img=1000,
109 |             nms=dict(type='nms', iou_threshold=0.7),
110 |             min_bbox_size=0),
111 |         rcnn=dict(
112 |             score_thr=0.05,
113 |             nms=dict(type='nms', iou_threshold=0.5),
114 |             max_per_img=100)))
115 | 


--------------------------------------------------------------------------------
/mmdet_configs/visdrone_fcos/fcos_crop_480_960_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
  2 | 
  3 | TAGS = ["fcos", "crop=480_960", "24epochs", "num_cls=60", "repeat=5"]
  4 | EXP_NAME = "fcos_crop_480_960_cls_60"
  5 | DATA_ROOT = "data/visdrone2019/"
  6 | BATCH_MULTIPLIER = 16
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 10
 10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
 11 | 
 12 | # model settings
 13 | model = dict(
 14 |     bbox_head=dict(
 15 |         num_classes=NUM_CLASSES,
 16 |     ),
 17 | )
 18 | 
 19 | # dataset settings
 20 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 21 | train_pipeline = [
 22 |     dict(type="LoadImageFromFile"),
 23 |     dict(type="LoadAnnotations", with_bbox=True),
 24 |     dict(
 25 |         type="AutoAugment",
 26 |         policies=[
 27 |             [
 28 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
 29 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 30 |             ],
 31 |             [
 32 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
 33 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 34 |             ],
 35 |             [
 36 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 37 |             ],
 38 |         ],
 39 |     ),
 40 |     dict(type="RandomFlip", flip_ratio=0.5),
 41 |     dict(type="Normalize", **img_norm_cfg),
 42 |     dict(type="Pad", size_divisor=32),
 43 |     dict(type="DefaultFormatBundle"),
 44 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 45 | ]
 46 | test_pipeline = [
 47 |     dict(type="LoadImageFromFile"),
 48 |     dict(
 49 |         type="MultiScaleFlipAug",
 50 |         img_scale=(1333, 800),
 51 |         flip=False,
 52 |         transforms=[
 53 |             dict(type="Resize", keep_ratio=True),
 54 |             dict(type="RandomFlip"),
 55 |             dict(type="Normalize", **img_norm_cfg),
 56 |             dict(type="Pad", size_divisor=32),
 57 |             dict(type="ImageToTensor", keys=["img"]),
 58 |             dict(type="Collect", keys=["img"]),
 59 |         ],
 60 |     ),
 61 | ]
 62 | 
 63 | data = dict(
 64 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
 65 |     workers_per_gpu=4,
 66 |     train=dict(
 67 |         type="RepeatDataset",
 68 |         times=5,
 69 |         dataset=dict(
 70 |             type="CocoDataset",
 71 |             classes=CLASSES,
 72 |             ann_file=DATA_ROOT + "coco/train.json",
 73 |             img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
 74 |             pipeline=train_pipeline,
 75 |         ),
 76 |     ),
 77 |     val=dict(
 78 |         classes=CLASSES,
 79 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 80 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 81 |         pipeline=test_pipeline,
 82 |     ),
 83 |     test=dict(
 84 |         classes=CLASSES,
 85 |         ann_file=DATA_ROOT + "sliced/val_640_0.json",
 86 |         img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
 87 |         pipeline=test_pipeline,
 88 |     ),
 89 | )
 90 | 
 91 | # optimizer
 92 | # default 8 gpu
 93 | # /8 for 1 gpu
 94 | optimizer = dict(
 95 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
 96 | )
 97 | 
 98 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
 99 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
100 | 
101 | # learning policy
102 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
103 | runner = dict(type="EpochBasedRunner", max_epochs=24)
104 | 
105 | # logger settings
106 | log_config = dict(
107 |     interval=50,
108 |     hooks=[
109 |         dict(type="TextLoggerHook"),
110 |         dict(type="TensorboardLoggerHook", reset_flag=False),
111 |     ],
112 | )
113 | 
114 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
115 | work_dir = f"runs/visdrone/{EXP_NAME}/"
116 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_pre=2000,
 90 |             max_per_img=1000,
 91 |             nms=dict(type='nms', iou_threshold=0.7),
 92 |             min_bbox_size=0),
 93 |         rcnn=dict(
 94 |             assigner=dict(
 95 |                 type='MaxIoUAssigner',
 96 |                 pos_iou_thr=0.5,
 97 |                 neg_iou_thr=0.5,
 98 |                 min_pos_iou=0.5,
 99 |                 match_low_quality=True,
100 |                 ignore_iof_thr=-1),
101 |             sampler=dict(
102 |                 type='RandomSampler',
103 |                 num=512,
104 |                 pos_fraction=0.25,
105 |                 neg_pos_ub=-1,
106 |                 add_gt_as_proposals=True),
107 |             mask_size=28,
108 |             pos_weight=-1,
109 |             debug=False)),
110 |     test_cfg=dict(
111 |         rpn=dict(
112 |             nms_pre=1000,
113 |             max_per_img=1000,
114 |             nms=dict(type='nms', iou_threshold=0.7),
115 |             min_bbox_size=0),
116 |         rcnn=dict(
117 |             score_thr=0.05,
118 |             nms=dict(type='nms', iou_threshold=0.5),
119 |             max_per_img=100,
120 |             mask_thr_binary=0.5)))
121 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=3,
  9 |         strides=(1, 2, 2),
 10 |         dilations=(1, 1, 1),
 11 |         out_indices=(2, ),
 12 |         frozen_stages=1,
 13 |         norm_cfg=norm_cfg,
 14 |         norm_eval=True,
 15 |         style='caffe',
 16 |         init_cfg=dict(
 17 |             type='Pretrained',
 18 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=1024,
 22 |         feat_channels=1024,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[2, 4, 8, 16, 32],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[16]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         shared_head=dict(
 38 |             type='ResLayer',
 39 |             depth=50,
 40 |             stage=3,
 41 |             stride=2,
 42 |             dilation=1,
 43 |             style='caffe',
 44 |             norm_cfg=norm_cfg,
 45 |             norm_eval=True),
 46 |         bbox_roi_extractor=dict(
 47 |             type='SingleRoIExtractor',
 48 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 49 |             out_channels=1024,
 50 |             featmap_strides=[16]),
 51 |         bbox_head=dict(
 52 |             type='BBoxHead',
 53 |             with_avg_pool=True,
 54 |             roi_feat_size=7,
 55 |             in_channels=2048,
 56 |             num_classes=80,
 57 |             bbox_coder=dict(
 58 |                 type='DeltaXYWHBBoxCoder',
 59 |                 target_means=[0., 0., 0., 0.],
 60 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 61 |             reg_class_agnostic=False,
 62 |             loss_cls=dict(
 63 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 64 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 65 |         mask_roi_extractor=None,
 66 |         mask_head=dict(
 67 |             type='FCNMaskHead',
 68 |             num_convs=0,
 69 |             in_channels=2048,
 70 |             conv_out_channels=256,
 71 |             num_classes=80,
 72 |             loss_mask=dict(
 73 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 74 |     # model training and testing settings
 75 |     train_cfg=dict(
 76 |         rpn=dict(
 77 |             assigner=dict(
 78 |                 type='MaxIoUAssigner',
 79 |                 pos_iou_thr=0.7,
 80 |                 neg_iou_thr=0.3,
 81 |                 min_pos_iou=0.3,
 82 |                 match_low_quality=True,
 83 |                 ignore_iof_thr=-1),
 84 |             sampler=dict(
 85 |                 type='RandomSampler',
 86 |                 num=256,
 87 |                 pos_fraction=0.5,
 88 |                 neg_pos_ub=-1,
 89 |                 add_gt_as_proposals=False),
 90 |             allowed_border=0,
 91 |             pos_weight=-1,
 92 |             debug=False),
 93 |         rpn_proposal=dict(
 94 |             nms_pre=12000,
 95 |             max_per_img=2000,
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             min_bbox_size=0),
 98 |         rcnn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.5,
102 |                 neg_iou_thr=0.5,
103 |                 min_pos_iou=0.5,
104 |                 match_low_quality=False,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=512,
109 |                 pos_fraction=0.25,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=True),
112 |             mask_size=14,
113 |             pos_weight=-1,
114 |             debug=False)),
115 |     test_cfg=dict(
116 |         rpn=dict(
117 |             nms_pre=6000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             max_per_img=1000,
120 |             min_bbox_size=0),
121 |         rcnn=dict(
122 |             score_thr=0.05,
123 |             nms=dict(type='nms', iou_threshold=0.5),
124 |             max_per_img=100,
125 |             mask_thr_binary=0.5)))
126 | 


--------------------------------------------------------------------------------
/mmdet_configs/xview_tood/tood_full_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
  2 | 
  3 | 
  4 | EXP_NAME = "tood_full_cls_60"
  5 | DATA_ROOT = "data/xview/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 60
 10 | DATASET_REPEAT = 30
 11 | TAGS = ["tood", "crop=False", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
 12 | CLASSES = (
 13 |     "Fixed-wing Aircraft",
 14 |     "Small Aircraft",
 15 |     "Cargo Plane",
 16 |     "Helicopter",
 17 |     "Passenger Vehicle",
 18 |     "Small Car",
 19 |     "Bus",
 20 |     "Pickup Truck",
 21 |     "Utility Truck",
 22 |     "Truck",
 23 |     "Cargo Truck",
 24 |     "Truck w/Box",
 25 |     "Truck Tractor",
 26 |     "Trailer",
 27 |     "Truck w/Flatbed",
 28 |     "Truck w/Liquid",
 29 |     "Crane Truck",
 30 |     "Railway Vehicle",
 31 |     "Passenger Car",
 32 |     "Cargo Car",
 33 |     "Flat Car",
 34 |     "Tank car",
 35 |     "Locomotive",
 36 |     "Maritime Vessel",
 37 |     "Motorboat",
 38 |     "Sailboat",
 39 |     "Tugboat",
 40 |     "Barge",
 41 |     "Fishing Vessel",
 42 |     "Ferry",
 43 |     "Yacht",
 44 |     "Container Ship",
 45 |     "Oil Tanker",
 46 |     "Engineering Vehicle",
 47 |     "Tower crane",
 48 |     "Container Crane",
 49 |     "Reach Stacker",
 50 |     "Straddle Carrier",
 51 |     "Mobile Crane",
 52 |     "Dump Truck",
 53 |     "Haul Truck",
 54 |     "Scraper/Tractor",
 55 |     "Front loader/Bulldozer",
 56 |     "Excavator",
 57 |     "Cement Mixer",
 58 |     "Ground Grader",
 59 |     "Hut/Tent",
 60 |     "Shed",
 61 |     "Building",
 62 |     "Aircraft Hangar",
 63 |     "Damaged Building",
 64 |     "Facility",
 65 |     "Construction Site",
 66 |     "Vehicle Lot",
 67 |     "Helipad",
 68 |     "Storage Tank",
 69 |     "Shipping container lot",
 70 |     "Shipping Container",
 71 |     "Pylon",
 72 |     "Tower",
 73 | )
 74 | 
 75 | # model settings
 76 | model = dict(
 77 |     bbox_head=dict(
 78 |         num_classes=NUM_CLASSES,
 79 |     ),
 80 | )
 81 | 
 82 | # dataset settings
 83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 84 | train_pipeline = [
 85 |     dict(type="LoadImageFromFile"),
 86 |     dict(type="LoadAnnotations", with_bbox=True),
 87 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 88 |     dict(type="RandomFlip", flip_ratio=0.5),
 89 |     dict(type="Normalize", **img_norm_cfg),
 90 |     dict(type="Pad", size_divisor=32),
 91 |     dict(type="DefaultFormatBundle"),
 92 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 93 | ]
 94 | test_pipeline = [
 95 |     dict(type="LoadImageFromFile"),
 96 |     dict(
 97 |         type="MultiScaleFlipAug",
 98 |         img_scale=(1333, 800),
 99 |         flip=False,
100 |         transforms=[
101 |             dict(type="Resize", keep_ratio=True),
102 |             dict(type="RandomFlip"),
103 |             dict(type="Normalize", **img_norm_cfg),
104 |             dict(type="Pad", size_divisor=32),
105 |             dict(type="ImageToTensor", keys=["img"]),
106 |             dict(type="Collect", keys=["img"]),
107 |         ],
108 |     ),
109 | ]
110 | 
111 | data = dict(
112 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
113 |     workers_per_gpu=4,
114 |     train=dict(
115 |         type="RepeatDataset",
116 |         times=DATASET_REPEAT,
117 |         dataset=dict(
118 |             type="CocoDataset",
119 |             classes=CLASSES,
120 |             ann_file=DATA_ROOT + "coco/train.json",
121 |             img_prefix=DATA_ROOT + "train_images/",
122 |             pipeline=train_pipeline,
123 |         ),
124 |     ),
125 |     val=dict(
126 |         classes=CLASSES,
127 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
128 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
129 |         pipeline=test_pipeline,
130 |     ),
131 |     test=dict(
132 |         classes=CLASSES,
133 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
134 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
135 |         pipeline=test_pipeline,
136 |     ),
137 | )
138 | 
139 | # optimizer
140 | # default 8 gpu
141 | # /8 for 1 gpu
142 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
143 | 
144 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
145 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
146 | 
147 | # learning policy
148 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
149 | runner = dict(type="EpochBasedRunner", max_epochs=24)
150 | 
151 | # logger settings
152 | log_config = dict(
153 |     interval=50,
154 |     hooks=[
155 |         dict(type="TextLoggerHook"),
156 |         dict(type="TensorboardLoggerHook", reset_flag=False),
157 |     ],
158 | )
159 | 
160 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
161 | work_dir = f"runs/xview/{EXP_NAME}/"
162 | 


--------------------------------------------------------------------------------
/mmdet_configs/xview_vfnet/vfnet_full_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
  2 | 
  3 | 
  4 | EXP_NAME = "vfnet_full_cls_60"
  5 | DATA_ROOT = "data/xview/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 60
 10 | DATASET_REPEAT = 30
 11 | TAGS = ["vfnet", "crop=False", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
 12 | CLASSES = (
 13 |     "Fixed-wing Aircraft",
 14 |     "Small Aircraft",
 15 |     "Cargo Plane",
 16 |     "Helicopter",
 17 |     "Passenger Vehicle",
 18 |     "Small Car",
 19 |     "Bus",
 20 |     "Pickup Truck",
 21 |     "Utility Truck",
 22 |     "Truck",
 23 |     "Cargo Truck",
 24 |     "Truck w/Box",
 25 |     "Truck Tractor",
 26 |     "Trailer",
 27 |     "Truck w/Flatbed",
 28 |     "Truck w/Liquid",
 29 |     "Crane Truck",
 30 |     "Railway Vehicle",
 31 |     "Passenger Car",
 32 |     "Cargo Car",
 33 |     "Flat Car",
 34 |     "Tank car",
 35 |     "Locomotive",
 36 |     "Maritime Vessel",
 37 |     "Motorboat",
 38 |     "Sailboat",
 39 |     "Tugboat",
 40 |     "Barge",
 41 |     "Fishing Vessel",
 42 |     "Ferry",
 43 |     "Yacht",
 44 |     "Container Ship",
 45 |     "Oil Tanker",
 46 |     "Engineering Vehicle",
 47 |     "Tower crane",
 48 |     "Container Crane",
 49 |     "Reach Stacker",
 50 |     "Straddle Carrier",
 51 |     "Mobile Crane",
 52 |     "Dump Truck",
 53 |     "Haul Truck",
 54 |     "Scraper/Tractor",
 55 |     "Front loader/Bulldozer",
 56 |     "Excavator",
 57 |     "Cement Mixer",
 58 |     "Ground Grader",
 59 |     "Hut/Tent",
 60 |     "Shed",
 61 |     "Building",
 62 |     "Aircraft Hangar",
 63 |     "Damaged Building",
 64 |     "Facility",
 65 |     "Construction Site",
 66 |     "Vehicle Lot",
 67 |     "Helipad",
 68 |     "Storage Tank",
 69 |     "Shipping container lot",
 70 |     "Shipping Container",
 71 |     "Pylon",
 72 |     "Tower",
 73 | )
 74 | 
 75 | # model settings
 76 | model = dict(
 77 |     bbox_head=dict(
 78 |         num_classes=NUM_CLASSES,
 79 |     ),
 80 | )
 81 | 
 82 | # dataset settings
 83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 84 | train_pipeline = [
 85 |     dict(type="LoadImageFromFile"),
 86 |     dict(type="LoadAnnotations", with_bbox=True),
 87 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 88 |     dict(type="RandomFlip", flip_ratio=0.5),
 89 |     dict(type="Normalize", **img_norm_cfg),
 90 |     dict(type="Pad", size_divisor=32),
 91 |     dict(type="DefaultFormatBundle"),
 92 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 93 | ]
 94 | test_pipeline = [
 95 |     dict(type="LoadImageFromFile"),
 96 |     dict(
 97 |         type="MultiScaleFlipAug",
 98 |         img_scale=(1333, 800),
 99 |         flip=False,
100 |         transforms=[
101 |             dict(type="Resize", keep_ratio=True),
102 |             dict(type="RandomFlip"),
103 |             dict(type="Normalize", **img_norm_cfg),
104 |             dict(type="Pad", size_divisor=32),
105 |             dict(type="DefaultFormatBundle"),
106 |             dict(type="Collect", keys=["img"]),
107 |         ],
108 |     ),
109 | ]
110 | 
111 | data = dict(
112 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
113 |     workers_per_gpu=4,
114 |     train=dict(
115 |         type="RepeatDataset",
116 |         times=DATASET_REPEAT,
117 |         dataset=dict(
118 |             type="CocoDataset",
119 |             classes=CLASSES,
120 |             ann_file=DATA_ROOT + "coco/train.json",
121 |             img_prefix=DATA_ROOT + "train_images/",
122 |             pipeline=train_pipeline,
123 |         ),
124 |     ),
125 |     val=dict(
126 |         classes=CLASSES,
127 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
128 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
129 |         pipeline=test_pipeline,
130 |     ),
131 |     test=dict(
132 |         classes=CLASSES,
133 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
134 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
135 |         pipeline=test_pipeline,
136 |     ),
137 | )
138 | 
139 | # optimizer
140 | # default 8 gpu
141 | # /8 for 1 gpu
142 | optimizer = dict(
143 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
144 | )
145 | 
146 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
147 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
148 | 
149 | # learning policy
150 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
151 | runner = dict(type="EpochBasedRunner", max_epochs=24)
152 | 
153 | # logger settings
154 | log_config = dict(
155 |     interval=50,
156 |     hooks=[
157 |         dict(type="TextLoggerHook"),
158 |         dict(type="TensorboardLoggerHook", reset_flag=False),
159 |     ],
160 | )
161 | 
162 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
163 | work_dir = f"runs/xview/{EXP_NAME}/"
164 | 


--------------------------------------------------------------------------------
/mmdet_configs/xview_fcos/fcos_full_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
  2 | 
  3 | 
  4 | EXP_NAME = "fcos_full_cls_60"
  5 | DATA_ROOT = "data/xview/"
  6 | BATCH_MULTIPLIER = 16
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 60
 10 | DATASET_REPEAT = 30
 11 | TAGS = ["fcos", "crop=False", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
 12 | CLASSES = (
 13 |     "Fixed-wing Aircraft",
 14 |     "Small Aircraft",
 15 |     "Cargo Plane",
 16 |     "Helicopter",
 17 |     "Passenger Vehicle",
 18 |     "Small Car",
 19 |     "Bus",
 20 |     "Pickup Truck",
 21 |     "Utility Truck",
 22 |     "Truck",
 23 |     "Cargo Truck",
 24 |     "Truck w/Box",
 25 |     "Truck Tractor",
 26 |     "Trailer",
 27 |     "Truck w/Flatbed",
 28 |     "Truck w/Liquid",
 29 |     "Crane Truck",
 30 |     "Railway Vehicle",
 31 |     "Passenger Car",
 32 |     "Cargo Car",
 33 |     "Flat Car",
 34 |     "Tank car",
 35 |     "Locomotive",
 36 |     "Maritime Vessel",
 37 |     "Motorboat",
 38 |     "Sailboat",
 39 |     "Tugboat",
 40 |     "Barge",
 41 |     "Fishing Vessel",
 42 |     "Ferry",
 43 |     "Yacht",
 44 |     "Container Ship",
 45 |     "Oil Tanker",
 46 |     "Engineering Vehicle",
 47 |     "Tower crane",
 48 |     "Container Crane",
 49 |     "Reach Stacker",
 50 |     "Straddle Carrier",
 51 |     "Mobile Crane",
 52 |     "Dump Truck",
 53 |     "Haul Truck",
 54 |     "Scraper/Tractor",
 55 |     "Front loader/Bulldozer",
 56 |     "Excavator",
 57 |     "Cement Mixer",
 58 |     "Ground Grader",
 59 |     "Hut/Tent",
 60 |     "Shed",
 61 |     "Building",
 62 |     "Aircraft Hangar",
 63 |     "Damaged Building",
 64 |     "Facility",
 65 |     "Construction Site",
 66 |     "Vehicle Lot",
 67 |     "Helipad",
 68 |     "Storage Tank",
 69 |     "Shipping container lot",
 70 |     "Shipping Container",
 71 |     "Pylon",
 72 |     "Tower",
 73 | )
 74 | 
 75 | # model settings
 76 | model = dict(
 77 |     bbox_head=dict(
 78 |         num_classes=NUM_CLASSES,
 79 |     ),
 80 | )
 81 | 
 82 | # dataset settings
 83 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 84 | train_pipeline = [
 85 |     dict(type="LoadImageFromFile"),
 86 |     dict(type="LoadAnnotations", with_bbox=True),
 87 |     dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 88 |     dict(type="RandomFlip", flip_ratio=0.5),
 89 |     dict(type="Normalize", **img_norm_cfg),
 90 |     dict(type="Pad", size_divisor=32),
 91 |     dict(type="DefaultFormatBundle"),
 92 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
 93 | ]
 94 | test_pipeline = [
 95 |     dict(type="LoadImageFromFile"),
 96 |     dict(
 97 |         type="MultiScaleFlipAug",
 98 |         img_scale=(1333, 800),
 99 |         flip=False,
100 |         transforms=[
101 |             dict(type="Resize", keep_ratio=True),
102 |             dict(type="RandomFlip"),
103 |             dict(type="Normalize", **img_norm_cfg),
104 |             dict(type="Pad", size_divisor=32),
105 |             dict(type="ImageToTensor", keys=["img"]),
106 |             dict(type="Collect", keys=["img"]),
107 |         ],
108 |     ),
109 | ]
110 | 
111 | data = dict(
112 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
113 |     workers_per_gpu=4,
114 |     train=dict(
115 |         type="RepeatDataset",
116 |         times=DATASET_REPEAT,
117 |         dataset=dict(
118 |             type="CocoDataset",
119 |             classes=CLASSES,
120 |             ann_file=DATA_ROOT + "coco/train.json",
121 |             img_prefix=DATA_ROOT + "train_images/",
122 |             pipeline=train_pipeline,
123 |         ),
124 |     ),
125 |     val=dict(
126 |         classes=CLASSES,
127 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
128 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
129 |         pipeline=test_pipeline,
130 |     ),
131 |     test=dict(
132 |         classes=CLASSES,
133 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
134 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
135 |         pipeline=test_pipeline,
136 |     ),
137 | )
138 | 
139 | # optimizer
140 | # default 8 gpu
141 | # /8 for 1 gpu
142 | optimizer = dict(
143 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
144 | )
145 | 
146 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
147 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
148 | 
149 | # learning policy
150 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
151 | runner = dict(type="EpochBasedRunner", max_epochs=24)
152 | 
153 | # logger settings
154 | log_config = dict(
155 |     interval=50,
156 |     hooks=[
157 |         dict(type="TextLoggerHook"),
158 |         dict(type="TensorboardLoggerHook", reset_flag=False),
159 |     ],
160 | )
161 | 
162 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
163 | work_dir = f"runs/xview/{EXP_NAME}/"
164 | 


--------------------------------------------------------------------------------
/visdrone/visdrone_to_coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | import fire
  5 | from PIL import Image
  6 | from sahi.utils.coco import Coco, CocoAnnotation, CocoCategory, CocoImage
  7 | from sahi.utils.file import save_json
  8 | from tqdm import tqdm
  9 | 
 10 | CATEGORY_ID_TO_NAME = {
 11 |     "0": "ignore",
 12 |     "1": "pedestrian",
 13 |     "2": "people",
 14 |     "3": "bicycle",
 15 |     "4": "car",
 16 |     "5": "van",
 17 |     "6": "truck",
 18 |     "7": "tricycle",
 19 |     "8": "awning-tricycle",
 20 |     "9": "bus",
 21 |     "10": "motor",
 22 |     "11": "others",
 23 | }
 24 | 
 25 | CATEGORY_ID_REMAPPING = {
 26 |     "1": "0",
 27 |     "2": "1",
 28 |     "3": "2",
 29 |     "4": "3",
 30 |     "5": "4",
 31 |     "6": "5",
 32 |     "7": "6",
 33 |     "8": "7",
 34 |     "9": "8",
 35 |     "10": "9",
 36 | }
 37 | 
 38 | NAME_TO_COCO_CATEGORY = {
 39 |     "pedestrian": {"name": "pedestrian", "supercategory": "person"},
 40 |     "people": {"name": "people", "supercategory": "person"},
 41 |     "bicycle": {"name": "bicycle", "supercategory": "bicycle"},
 42 |     "car": {"name": "car", "supercategory": "car"},
 43 |     "van": {"name": "van", "supercategory": "truck"},
 44 |     "truck": {"name": "truck", "supercategory": "truck"},
 45 |     "tricycle": {"name": "tricycle", "supercategory": "motor"},
 46 |     "awning-tricycle": {"name": "awning-tricycle", "supercategory": "motor"},
 47 |     "bus": {"name": "bus", "supercategory": "bus"},
 48 |     "motor": {"name": "motor", "supercategory": "motor"},
 49 | }
 50 | 
 51 | 
 52 | def visdrone_to_coco(
 53 |     data_folder_dir,
 54 |     output_file_path,
 55 |     category_id_remapping=None,
 56 | ):
 57 |     """
 58 |     Converts visdrone-det annotations into coco annotation.
 59 | 
 60 |     Args:
 61 |         data_folder_dir: str
 62 |             'VisDrone2019-DET-train' folder directory
 63 |         output_file_path: str
 64 |             Output file path
 65 |         category_id_remapping: dict
 66 |             Used for selecting desired category ids and mapping them.
 67 |             If not provided, VisDrone2019-DET mapping will be used.
 68 |             format: str(id) to str(id)
 69 |     """
 70 | 
 71 |     # init paths/folders
 72 |     input_image_folder = str(Path(data_folder_dir) / "images")
 73 |     input_ann_folder = str(Path(data_folder_dir) / "annotations")
 74 | 
 75 |     image_filepath_list = os.listdir(input_image_folder)
 76 | 
 77 |     Path(output_file_path).parents[0].mkdir(parents=True, exist_ok=True)
 78 | 
 79 |     if category_id_remapping is None:
 80 |         category_id_remapping = CATEGORY_ID_REMAPPING
 81 | 
 82 |     # init coco object
 83 |     coco = Coco()
 84 |     # append categories
 85 |     for category_id, category_name in CATEGORY_ID_TO_NAME.items():
 86 |         if category_id in category_id_remapping.keys():
 87 |             remapped_category_id = category_id_remapping[category_id]
 88 |             coco_category = NAME_TO_COCO_CATEGORY[category_name]
 89 |             coco.add_category(
 90 |                 CocoCategory(
 91 |                     id=int(remapped_category_id),
 92 |                     name=coco_category["name"],
 93 |                     supercategory=coco_category["supercategory"],
 94 |                 )
 95 |             )
 96 | 
 97 |     # convert visdrone annotations to coco
 98 |     for image_filename in tqdm(image_filepath_list):
 99 |         # get image properties
100 |         image_filepath = str(Path(input_image_folder) / image_filename)
101 |         annotation_filename = image_filename.split(".jpg")[0] + ".txt"
102 |         annotation_filepath = str(Path(input_ann_folder) / annotation_filename)
103 |         image = Image.open(image_filepath)
104 |         cocoimage_filename = str(Path(image_filepath)).split(str(Path(data_folder_dir)))[1]
105 |         if cocoimage_filename[0] == os.sep:
106 |             cocoimage_filename = cocoimage_filename[1:]
107 |         # create coco image object
108 |         coco_image = CocoImage(file_name=cocoimage_filename, height=image.size[1], width=image.size[0])
109 |         # parse annotation file
110 |         file = open(annotation_filepath, "r")
111 |         lines = file.readlines()
112 |         for line in lines:
113 |             # parse annotation bboxes
114 |             new_line = line.strip("\n").split(",")
115 |             bbox = [
116 |                 int(new_line[0]),
117 |                 int(new_line[1]),
118 |                 int(new_line[2]),
119 |                 int(new_line[3]),
120 |             ]
121 |             # parse category id and name
122 |             category_id = new_line[5]
123 |             if category_id in category_id_remapping.keys():
124 |                 category_name = CATEGORY_ID_TO_NAME[category_id]
125 |                 remapped_category_id = category_id_remapping[category_id]
126 |             else:
127 |                 continue
128 |             # create coco annotation and append it to coco image
129 |             coco_annotation = CocoAnnotation.from_coco_bbox(
130 |                 bbox=bbox,
131 |                 category_id=int(remapped_category_id),
132 |                 category_name=category_name,
133 |             )
134 |             if coco_annotation.area > 0:
135 |                 coco_image.add_annotation(coco_annotation)
136 |         coco.add_image(coco_image)
137 | 
138 |     save_path = output_file_path
139 |     save_json(data=coco.json, save_path=save_path)
140 | 
141 | 
142 | if __name__ == "__main__":
143 |     fire.Fire(visdrone_to_coco)
144 | 


--------------------------------------------------------------------------------
/mmdet_configs/xview_tood/tood_crop_300_500_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
  2 | 
  3 | 
  4 | EXP_NAME = "tood_crop_300_500_cls_60"
  5 | DATA_ROOT = "data/xview/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 60
 10 | DATASET_REPEAT = 50
 11 | TAGS = ["tood", "crop=300_500", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
 12 | CLASSES = (
 13 |     "Fixed-wing Aircraft",
 14 |     "Small Aircraft",
 15 |     "Cargo Plane",
 16 |     "Helicopter",
 17 |     "Passenger Vehicle",
 18 |     "Small Car",
 19 |     "Bus",
 20 |     "Pickup Truck",
 21 |     "Utility Truck",
 22 |     "Truck",
 23 |     "Cargo Truck",
 24 |     "Truck w/Box",
 25 |     "Truck Tractor",
 26 |     "Trailer",
 27 |     "Truck w/Flatbed",
 28 |     "Truck w/Liquid",
 29 |     "Crane Truck",
 30 |     "Railway Vehicle",
 31 |     "Passenger Car",
 32 |     "Cargo Car",
 33 |     "Flat Car",
 34 |     "Tank car",
 35 |     "Locomotive",
 36 |     "Maritime Vessel",
 37 |     "Motorboat",
 38 |     "Sailboat",
 39 |     "Tugboat",
 40 |     "Barge",
 41 |     "Fishing Vessel",
 42 |     "Ferry",
 43 |     "Yacht",
 44 |     "Container Ship",
 45 |     "Oil Tanker",
 46 |     "Engineering Vehicle",
 47 |     "Tower crane",
 48 |     "Container Crane",
 49 |     "Reach Stacker",
 50 |     "Straddle Carrier",
 51 |     "Mobile Crane",
 52 |     "Dump Truck",
 53 |     "Haul Truck",
 54 |     "Scraper/Tractor",
 55 |     "Front loader/Bulldozer",
 56 |     "Excavator",
 57 |     "Cement Mixer",
 58 |     "Ground Grader",
 59 |     "Hut/Tent",
 60 |     "Shed",
 61 |     "Building",
 62 |     "Aircraft Hangar",
 63 |     "Damaged Building",
 64 |     "Facility",
 65 |     "Construction Site",
 66 |     "Vehicle Lot",
 67 |     "Helipad",
 68 |     "Storage Tank",
 69 |     "Shipping container lot",
 70 |     "Shipping Container",
 71 |     "Pylon",
 72 |     "Tower",
 73 | )
 74 | 
 75 | # model settings
 76 | model = dict(
 77 |     bbox_head=dict(
 78 |         num_classes=NUM_CLASSES,
 79 |     ),
 80 | )
 81 | 
 82 | # dataset settings
 83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 84 | train_pipeline = [
 85 |     dict(type="LoadImageFromFile"),
 86 |     dict(type="LoadAnnotations", with_bbox=True),
 87 |     dict(
 88 |         type="AutoAugment",
 89 |         policies=[
 90 |             [
 91 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
 92 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 93 |             ],
 94 |             [
 95 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
 96 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 97 |             ],
 98 |             [
 99 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
100 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
101 |             ],
102 |             [
103 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
104 |             ],
105 |         ],
106 |     ),
107 |     dict(type="RandomFlip", flip_ratio=0.5),
108 |     dict(type="Normalize", **img_norm_cfg),
109 |     dict(type="Pad", size_divisor=32),
110 |     dict(type="DefaultFormatBundle"),
111 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
112 | ]
113 | test_pipeline = [
114 |     dict(type="LoadImageFromFile"),
115 |     dict(
116 |         type="MultiScaleFlipAug",
117 |         img_scale=(1333, 800),
118 |         flip=False,
119 |         transforms=[
120 |             dict(type="Resize", keep_ratio=True),
121 |             dict(type="RandomFlip"),
122 |             dict(type="Normalize", **img_norm_cfg),
123 |             dict(type="Pad", size_divisor=32),
124 |             dict(type="ImageToTensor", keys=["img"]),
125 |             dict(type="Collect", keys=["img"]),
126 |         ],
127 |     ),
128 | ]
129 | 
130 | data = dict(
131 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
132 |     workers_per_gpu=4,
133 |     train=dict(
134 |         type="RepeatDataset",
135 |         times=DATASET_REPEAT,
136 |         dataset=dict(
137 |             type="CocoDataset",
138 |             classes=CLASSES,
139 |             ann_file=DATA_ROOT + "coco/train.json",
140 |             img_prefix=DATA_ROOT + "train_images/",
141 |             pipeline=train_pipeline,
142 |         ),
143 |     ),
144 |     val=dict(
145 |         classes=CLASSES,
146 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
147 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
148 |         pipeline=test_pipeline,
149 |     ),
150 |     test=dict(
151 |         classes=CLASSES,
152 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
153 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
154 |         pipeline=test_pipeline,
155 |     ),
156 | )
157 | 
158 | # optimizer
159 | # default 8 gpu
160 | # /8 for 1 gpu
161 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
162 | 
163 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
164 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
165 | 
166 | # learning policy
167 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
168 | runner = dict(type="EpochBasedRunner", max_epochs=24)
169 | 
170 | # logger settings
171 | log_config = dict(
172 |     interval=50,
173 |     hooks=[
174 |         dict(type="TextLoggerHook"),
175 |         dict(type="TensorboardLoggerHook", reset_flag=False),
176 |     ],
177 | )
178 | 
179 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
180 | work_dir = f"runs/xview/{EXP_NAME}/"
181 | 


--------------------------------------------------------------------------------
/mmdet_configs/xview_vfnet/vfnet_crop_300_500_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
  2 | 
  3 | 
  4 | EXP_NAME = "vfnet_crop_300_500_cls_60"
  5 | DATA_ROOT = "data/xview/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 60
 10 | DATASET_REPEAT = 50
 11 | TAGS = ["vfnet", "crop=300_500", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
 12 | CLASSES = (
 13 |     "Fixed-wing Aircraft",
 14 |     "Small Aircraft",
 15 |     "Cargo Plane",
 16 |     "Helicopter",
 17 |     "Passenger Vehicle",
 18 |     "Small Car",
 19 |     "Bus",
 20 |     "Pickup Truck",
 21 |     "Utility Truck",
 22 |     "Truck",
 23 |     "Cargo Truck",
 24 |     "Truck w/Box",
 25 |     "Truck Tractor",
 26 |     "Trailer",
 27 |     "Truck w/Flatbed",
 28 |     "Truck w/Liquid",
 29 |     "Crane Truck",
 30 |     "Railway Vehicle",
 31 |     "Passenger Car",
 32 |     "Cargo Car",
 33 |     "Flat Car",
 34 |     "Tank car",
 35 |     "Locomotive",
 36 |     "Maritime Vessel",
 37 |     "Motorboat",
 38 |     "Sailboat",
 39 |     "Tugboat",
 40 |     "Barge",
 41 |     "Fishing Vessel",
 42 |     "Ferry",
 43 |     "Yacht",
 44 |     "Container Ship",
 45 |     "Oil Tanker",
 46 |     "Engineering Vehicle",
 47 |     "Tower crane",
 48 |     "Container Crane",
 49 |     "Reach Stacker",
 50 |     "Straddle Carrier",
 51 |     "Mobile Crane",
 52 |     "Dump Truck",
 53 |     "Haul Truck",
 54 |     "Scraper/Tractor",
 55 |     "Front loader/Bulldozer",
 56 |     "Excavator",
 57 |     "Cement Mixer",
 58 |     "Ground Grader",
 59 |     "Hut/Tent",
 60 |     "Shed",
 61 |     "Building",
 62 |     "Aircraft Hangar",
 63 |     "Damaged Building",
 64 |     "Facility",
 65 |     "Construction Site",
 66 |     "Vehicle Lot",
 67 |     "Helipad",
 68 |     "Storage Tank",
 69 |     "Shipping container lot",
 70 |     "Shipping Container",
 71 |     "Pylon",
 72 |     "Tower",
 73 | )
 74 | 
 75 | # model settings
 76 | model = dict(
 77 |     bbox_head=dict(
 78 |         num_classes=NUM_CLASSES,
 79 |     ),
 80 | )
 81 | 
 82 | # dataset settings
 83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 84 | train_pipeline = [
 85 |     dict(type="LoadImageFromFile"),
 86 |     dict(type="LoadAnnotations", with_bbox=True),
 87 |     dict(
 88 |         type="AutoAugment",
 89 |         policies=[
 90 |             [
 91 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
 92 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 93 |             ],
 94 |             [
 95 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
 96 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 97 |             ],
 98 |             [
 99 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
100 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
101 |             ],
102 |             [
103 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
104 |             ],
105 |         ],
106 |     ),
107 |     dict(type="RandomFlip", flip_ratio=0.5),
108 |     dict(type="Normalize", **img_norm_cfg),
109 |     dict(type="Pad", size_divisor=32),
110 |     dict(type="DefaultFormatBundle"),
111 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
112 | ]
113 | test_pipeline = [
114 |     dict(type="LoadImageFromFile"),
115 |     dict(
116 |         type="MultiScaleFlipAug",
117 |         img_scale=(1333, 800),
118 |         flip=False,
119 |         transforms=[
120 |             dict(type="Resize", keep_ratio=True),
121 |             dict(type="RandomFlip"),
122 |             dict(type="Normalize", **img_norm_cfg),
123 |             dict(type="Pad", size_divisor=32),
124 |             dict(type="DefaultFormatBundle"),
125 |             dict(type="Collect", keys=["img"]),
126 |         ],
127 |     ),
128 | ]
129 | 
130 | data = dict(
131 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
132 |     workers_per_gpu=4,
133 |     train=dict(
134 |         type="RepeatDataset",
135 |         times=DATASET_REPEAT,
136 |         dataset=dict(
137 |             type="CocoDataset",
138 |             classes=CLASSES,
139 |             ann_file=DATA_ROOT + "coco/train.json",
140 |             img_prefix=DATA_ROOT + "train_images/",
141 |             pipeline=train_pipeline,
142 |         ),
143 |     ),
144 |     val=dict(
145 |         classes=CLASSES,
146 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
147 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
148 |         pipeline=test_pipeline,
149 |     ),
150 |     test=dict(
151 |         classes=CLASSES,
152 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
153 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
154 |         pipeline=test_pipeline,
155 |     ),
156 | )
157 | 
158 | # optimizer
159 | # default 8 gpu
160 | # /8 for 1 gpu
161 | optimizer = dict(
162 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
163 | )
164 | 
165 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
166 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
167 | 
168 | # learning policy
169 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
170 | runner = dict(type="EpochBasedRunner", max_epochs=24)
171 | 
172 | # logger settings
173 | log_config = dict(
174 |     interval=50,
175 |     hooks=[
176 |         dict(type="TextLoggerHook"),
177 |         dict(type="TensorboardLoggerHook", reset_flag=False),
178 |     ],
179 | )
180 | 
181 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
182 | work_dir = f"runs/xview/{EXP_NAME}/"
183 | 


--------------------------------------------------------------------------------
/mmdet_configs/xview_fcos/fcos_crop_300_500_cls_60.py:
--------------------------------------------------------------------------------
  1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
  2 | 
  3 | 
  4 | EXP_NAME = "fcos_crop_300_500_cls_60"
  5 | DATA_ROOT = "data/xview/"
  6 | BATCH_MULTIPLIER = 8
  7 | LR_MULTIPLIER = 1
  8 | EVAL_INTERVAL = 3
  9 | NUM_CLASSES = 60
 10 | DATASET_REPEAT = 50
 11 | TAGS = ["fcos", "crop=300_500", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
 12 | CLASSES = (
 13 |     "Fixed-wing Aircraft",
 14 |     "Small Aircraft",
 15 |     "Cargo Plane",
 16 |     "Helicopter",
 17 |     "Passenger Vehicle",
 18 |     "Small Car",
 19 |     "Bus",
 20 |     "Pickup Truck",
 21 |     "Utility Truck",
 22 |     "Truck",
 23 |     "Cargo Truck",
 24 |     "Truck w/Box",
 25 |     "Truck Tractor",
 26 |     "Trailer",
 27 |     "Truck w/Flatbed",
 28 |     "Truck w/Liquid",
 29 |     "Crane Truck",
 30 |     "Railway Vehicle",
 31 |     "Passenger Car",
 32 |     "Cargo Car",
 33 |     "Flat Car",
 34 |     "Tank car",
 35 |     "Locomotive",
 36 |     "Maritime Vessel",
 37 |     "Motorboat",
 38 |     "Sailboat",
 39 |     "Tugboat",
 40 |     "Barge",
 41 |     "Fishing Vessel",
 42 |     "Ferry",
 43 |     "Yacht",
 44 |     "Container Ship",
 45 |     "Oil Tanker",
 46 |     "Engineering Vehicle",
 47 |     "Tower crane",
 48 |     "Container Crane",
 49 |     "Reach Stacker",
 50 |     "Straddle Carrier",
 51 |     "Mobile Crane",
 52 |     "Dump Truck",
 53 |     "Haul Truck",
 54 |     "Scraper/Tractor",
 55 |     "Front loader/Bulldozer",
 56 |     "Excavator",
 57 |     "Cement Mixer",
 58 |     "Ground Grader",
 59 |     "Hut/Tent",
 60 |     "Shed",
 61 |     "Building",
 62 |     "Aircraft Hangar",
 63 |     "Damaged Building",
 64 |     "Facility",
 65 |     "Construction Site",
 66 |     "Vehicle Lot",
 67 |     "Helipad",
 68 |     "Storage Tank",
 69 |     "Shipping container lot",
 70 |     "Shipping Container",
 71 |     "Pylon",
 72 |     "Tower",
 73 | )
 74 | 
 75 | # model settings
 76 | model = dict(
 77 |     bbox_head=dict(
 78 |         num_classes=NUM_CLASSES,
 79 |     ),
 80 | )
 81 | 
 82 | # dataset settings
 83 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 84 | train_pipeline = [
 85 |     dict(type="LoadImageFromFile"),
 86 |     dict(type="LoadAnnotations", with_bbox=True),
 87 |     dict(
 88 |         type="AutoAugment",
 89 |         policies=[
 90 |             [
 91 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
 92 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 93 |             ],
 94 |             [
 95 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
 96 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
 97 |             ],
 98 |             [
 99 |                 dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
100 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
101 |             ],
102 |             [
103 |                 dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
104 |             ],
105 |         ],
106 |     ),
107 |     dict(type="RandomFlip", flip_ratio=0.5),
108 |     dict(type="Normalize", **img_norm_cfg),
109 |     dict(type="Pad", size_divisor=32),
110 |     dict(type="DefaultFormatBundle"),
111 |     dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
112 | ]
113 | test_pipeline = [
114 |     dict(type="LoadImageFromFile"),
115 |     dict(
116 |         type="MultiScaleFlipAug",
117 |         img_scale=(1333, 800),
118 |         flip=False,
119 |         transforms=[
120 |             dict(type="Resize", keep_ratio=True),
121 |             dict(type="RandomFlip"),
122 |             dict(type="Normalize", **img_norm_cfg),
123 |             dict(type="Pad", size_divisor=32),
124 |             dict(type="ImageToTensor", keys=["img"]),
125 |             dict(type="Collect", keys=["img"]),
126 |         ],
127 |     ),
128 | ]
129 | 
130 | data = dict(
131 |     samples_per_gpu=2 * BATCH_MULTIPLIER,
132 |     workers_per_gpu=4,
133 |     train=dict(
134 |         type="RepeatDataset",
135 |         times=DATASET_REPEAT,
136 |         dataset=dict(
137 |             type="CocoDataset",
138 |             classes=CLASSES,
139 |             ann_file=DATA_ROOT + "coco/train.json",
140 |             img_prefix=DATA_ROOT + "train_images/",
141 |             pipeline=train_pipeline,
142 |         ),
143 |     ),
144 |     val=dict(
145 |         classes=CLASSES,
146 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
147 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
148 |         pipeline=test_pipeline,
149 |     ),
150 |     test=dict(
151 |         classes=CLASSES,
152 |         ann_file=DATA_ROOT + "sliced/val_400_0.json",
153 |         img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
154 |         pipeline=test_pipeline,
155 |     ),
156 | )
157 | 
158 | # optimizer
159 | # default 8 gpu
160 | # /8 for 1 gpu
161 | optimizer = dict(
162 |     lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
163 | )
164 | 
165 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
166 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
167 | 
168 | # learning policy
169 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
170 | runner = dict(type="EpochBasedRunner", max_epochs=24)
171 | 
172 | # logger settings
173 | log_config = dict(
174 |     interval=50,
175 |     hooks=[
176 |         dict(type="TextLoggerHook"),
177 |         dict(type="TensorboardLoggerHook", reset_flag=False),
178 |     ],
179 | )
180 | 
181 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
182 | work_dir = f"runs/xview/{EXP_NAME}/"
183 | 


--------------------------------------------------------------------------------
/xview/xview_to_coco.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from collections import defaultdict
  3 | from pathlib import Path
  4 | from typing import Dict, List
  5 | 
  6 | import fire
  7 | import numpy as np
  8 | from PIL import Image
  9 | from sahi.utils.coco import Coco, CocoAnnotation, CocoCategory, CocoImage
 10 | from sahi.utils.file import load_json, save_json
 11 | from tqdm import tqdm
 12 | 
 13 | # fix the seed
 14 | random.seed(13)
 15 | 
 16 | 
 17 | def xview_to_coco(
 18 |     train_images_dir,
 19 |     train_geojson_path,
 20 |     output_dir,
 21 |     train_split_rate=0.75,
 22 |     category_id_remapping=None,
 23 | ):
 24 |     """
 25 |     Converts visdrone-det annotations into coco annotation.
 26 | 
 27 |     Args:
 28 |         train_images_dir: str
 29 |             'train_images' folder directory
 30 |         train_geojson_path: str
 31 |             'xView_train.geojson' file path
 32 |         output_dir: str
 33 |             Output folder directory
 34 |         train_split_rate: bool
 35 |             Train split ratio
 36 |         category_id_remapping: dict
 37 |             Used for selecting desired category ids and mapping them.
 38 |             If not provided, xView mapping will be used.
 39 |             format: str(id) to str(id)
 40 |     """
 41 | 
 42 |     # init vars
 43 |     category_id_to_name = {}
 44 |     with open("xview/xview_class_labels.txt", encoding="utf8") as f:
 45 |         lines = f.readlines()
 46 |     for line in lines:
 47 |         category_id = line.split(":")[0]
 48 |         category_name = line.split(":")[1].replace("\n", "")
 49 |         category_id_to_name[category_id] = category_name
 50 | 
 51 |     if category_id_remapping is None:
 52 |         category_id_remapping = load_json("xview/category_id_mapping.json")
 53 |     category_id_remapping
 54 | 
 55 |     # init coco object
 56 |     coco = Coco()
 57 |     # append categories
 58 |     for category_id, category_name in category_id_to_name.items():
 59 |         if category_id in category_id_remapping.keys():
 60 |             remapped_category_id = category_id_remapping[category_id]
 61 |             coco.add_category(
 62 |                 CocoCategory(id=int(remapped_category_id), name=category_name)
 63 |             )
 64 | 
 65 |     # parse xview data
 66 |     coords, chips, classes, image_name_to_annotation_ind = get_labels(
 67 |         train_geojson_path
 68 |     )
 69 |     image_name_list = get_ordered_image_name_list(image_name_to_annotation_ind)
 70 | 
 71 |     # convert xView data to COCO format
 72 |     for image_name in tqdm(image_name_list, "Converting xView data into COCO format"):
 73 |         # create coco image object
 74 |         width, height = Image.open(Path(train_images_dir) / image_name).size
 75 |         coco_image = CocoImage(file_name=image_name, height=height, width=width)
 76 | 
 77 |         annotation_ind_list = image_name_to_annotation_ind[image_name]
 78 | 
 79 |         # iterate over image annotations
 80 |         for annotation_ind in annotation_ind_list:
 81 |             bbox = coords[annotation_ind].tolist()
 82 |             category_id = str(int(classes[annotation_ind].item()))
 83 |             coco_bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
 84 |             if category_id in category_id_remapping.keys():
 85 |                 category_name = category_id_to_name[category_id]
 86 |                 remapped_category_id = category_id_remapping[category_id]
 87 |             else:
 88 |                 continue
 89 |             # create coco annotation and append it to coco image
 90 |             coco_annotation = CocoAnnotation(
 91 |                 bbox=coco_bbox,
 92 |                 category_id=int(remapped_category_id),
 93 |                 category_name=category_name,
 94 |             )
 95 |             if coco_annotation.area > 0:
 96 |                 coco_image.add_annotation(coco_annotation)
 97 |         coco.add_image(coco_image)
 98 | 
 99 |     result = coco.split_coco_as_train_val(train_split_rate=train_split_rate)
100 | 
101 |     train_json_path = Path(output_dir) / "train.json"
102 |     val_json_path = Path(output_dir) / "val.json"
103 |     save_json(data=result["train_coco"].json, save_path=train_json_path)
104 |     save_json(data=result["val_coco"].json, save_path=val_json_path)
105 | 
106 | 
107 | def get_ordered_image_name_list(image_name_to_annotation_ind: Dict):
108 |     image_name_list: List[str] = list(image_name_to_annotation_ind.keys())
109 | 
110 |     def get_image_ind(image_name: str):
111 |         return int(image_name.split(".")[0])
112 | 
113 |     image_name_list.sort(key=get_image_ind)
114 | 
115 |     return image_name_list
116 | 
117 | 
118 | def get_labels(fname):
119 |     """
120 |     Gets label data from a geojson label file
121 |     Args:
122 |         fname: file path to an xView geojson label file
123 |     Output:
124 |         Returns three arrays: coords, chips, and classes corresponding to the
125 |             coordinates, file-names, and classes for each ground truth.
126 |     Modified from https://github.com/DIUx-xView.
127 |     """
128 |     data = load_json(fname)
129 | 
130 |     coords = np.zeros((len(data["features"]), 4))
131 |     chips = np.zeros((len(data["features"])), dtype="object")
132 |     classes = np.zeros((len(data["features"])))
133 |     image_name_to_annotation_ind = defaultdict(list)
134 | 
135 |     for i in tqdm(range(len(data["features"])), "Parsing xView data"):
136 |         if data["features"][i]["properties"]["bounds_imcoords"] != []:
137 |             b_id = data["features"][i]["properties"]["image_id"]
138 |             # https://github.com/DIUx-xView/xView1_baseline/issues/3
139 |             if b_id == "1395.tif":
140 |                 continue
141 |             val = np.array(
142 |                 [
143 |                     int(num)
144 |                     for num in data["features"][i]["properties"][
145 |                         "bounds_imcoords"
146 |                     ].split(",")
147 |                 ]
148 |             )
149 |             chips[i] = b_id
150 |             classes[i] = data["features"][i]["properties"]["type_id"]
151 | 
152 |             image_name_to_annotation_ind[b_id].append(i)
153 | 
154 |             if val.shape[0] != 4:
155 |                 print("Issues at %d!" % i)
156 |             else:
157 |                 coords[i] = val
158 |         else:
159 |             chips[i] = "None"
160 | 
161 |     return coords, chips, classes, image_name_to_annotation_ind
162 | 
163 | 
164 | if __name__ == "__main__":
165 |     fire.Fire(xview_to_coco)
166 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ]),
 96 |     # model training and testing settings
 97 |     train_cfg=dict(
 98 |         rpn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.7,
102 |                 neg_iou_thr=0.3,
103 |                 min_pos_iou=0.3,
104 |                 match_low_quality=True,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=256,
109 |                 pos_fraction=0.5,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=False),
112 |             allowed_border=0,
113 |             pos_weight=-1,
114 |             debug=False),
115 |         rpn_proposal=dict(
116 |             nms_pre=2000,
117 |             max_per_img=2000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             min_bbox_size=0),
120 |         rcnn=[
121 |             dict(
122 |                 assigner=dict(
123 |                     type='MaxIoUAssigner',
124 |                     pos_iou_thr=0.5,
125 |                     neg_iou_thr=0.5,
126 |                     min_pos_iou=0.5,
127 |                     match_low_quality=False,
128 |                     ignore_iof_thr=-1),
129 |                 sampler=dict(
130 |                     type='RandomSampler',
131 |                     num=512,
132 |                     pos_fraction=0.25,
133 |                     neg_pos_ub=-1,
134 |                     add_gt_as_proposals=True),
135 |                 pos_weight=-1,
136 |                 debug=False),
137 |             dict(
138 |                 assigner=dict(
139 |                     type='MaxIoUAssigner',
140 |                     pos_iou_thr=0.6,
141 |                     neg_iou_thr=0.6,
142 |                     min_pos_iou=0.6,
143 |                     match_low_quality=False,
144 |                     ignore_iof_thr=-1),
145 |                 sampler=dict(
146 |                     type='RandomSampler',
147 |                     num=512,
148 |                     pos_fraction=0.25,
149 |                     neg_pos_ub=-1,
150 |                     add_gt_as_proposals=True),
151 |                 pos_weight=-1,
152 |                 debug=False),
153 |             dict(
154 |                 assigner=dict(
155 |                     type='MaxIoUAssigner',
156 |                     pos_iou_thr=0.7,
157 |                     neg_iou_thr=0.7,
158 |                     min_pos_iou=0.7,
159 |                     match_low_quality=False,
160 |                     ignore_iof_thr=-1),
161 |                 sampler=dict(
162 |                     type='RandomSampler',
163 |                     num=512,
164 |                     pos_fraction=0.25,
165 |                     neg_pos_ub=-1,
166 |                     add_gt_as_proposals=True),
167 |                 pos_weight=-1,
168 |                 debug=False)
169 |         ]),
170 |     test_cfg=dict(
171 |         rpn=dict(
172 |             nms_pre=1000,
173 |             max_per_img=1000,
174 |             nms=dict(type='nms', iou_threshold=0.7),
175 |             min_bbox_size=0),
176 |         rcnn=dict(
177 |             score_thr=0.05,
178 |             nms=dict(type='nms', iou_threshold=0.5),
179 |             max_per_img=100)))
180 | 


--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/cascade_mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ],
 96 |         mask_roi_extractor=dict(
 97 |             type='SingleRoIExtractor',
 98 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 99 |             out_channels=256,
100 |             featmap_strides=[4, 8, 16, 32]),
101 |         mask_head=dict(
102 |             type='FCNMaskHead',
103 |             num_convs=4,
104 |             in_channels=256,
105 |             conv_out_channels=256,
106 |             num_classes=80,
107 |             loss_mask=dict(
108 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 |     # model training and testing settings
110 |     train_cfg=dict(
111 |         rpn=dict(
112 |             assigner=dict(
113 |                 type='MaxIoUAssigner',
114 |                 pos_iou_thr=0.7,
115 |                 neg_iou_thr=0.3,
116 |                 min_pos_iou=0.3,
117 |                 match_low_quality=True,
118 |                 ignore_iof_thr=-1),
119 |             sampler=dict(
120 |                 type='RandomSampler',
121 |                 num=256,
122 |                 pos_fraction=0.5,
123 |                 neg_pos_ub=-1,
124 |                 add_gt_as_proposals=False),
125 |             allowed_border=0,
126 |             pos_weight=-1,
127 |             debug=False),
128 |         rpn_proposal=dict(
129 |             nms_pre=2000,
130 |             max_per_img=2000,
131 |             nms=dict(type='nms', iou_threshold=0.7),
132 |             min_bbox_size=0),
133 |         rcnn=[
134 |             dict(
135 |                 assigner=dict(
136 |                     type='MaxIoUAssigner',
137 |                     pos_iou_thr=0.5,
138 |                     neg_iou_thr=0.5,
139 |                     min_pos_iou=0.5,
140 |                     match_low_quality=False,
141 |                     ignore_iof_thr=-1),
142 |                 sampler=dict(
143 |                     type='RandomSampler',
144 |                     num=512,
145 |                     pos_fraction=0.25,
146 |                     neg_pos_ub=-1,
147 |                     add_gt_as_proposals=True),
148 |                 mask_size=28,
149 |                 pos_weight=-1,
150 |                 debug=False),
151 |             dict(
152 |                 assigner=dict(
153 |                     type='MaxIoUAssigner',
154 |                     pos_iou_thr=0.6,
155 |                     neg_iou_thr=0.6,
156 |                     min_pos_iou=0.6,
157 |                     match_low_quality=False,
158 |                     ignore_iof_thr=-1),
159 |                 sampler=dict(
160 |                     type='RandomSampler',
161 |                     num=512,
162 |                     pos_fraction=0.25,
163 |                     neg_pos_ub=-1,
164 |                     add_gt_as_proposals=True),
165 |                 mask_size=28,
166 |                 pos_weight=-1,
167 |                 debug=False),
168 |             dict(
169 |                 assigner=dict(
170 |                     type='MaxIoUAssigner',
171 |                     pos_iou_thr=0.7,
172 |                     neg_iou_thr=0.7,
173 |                     min_pos_iou=0.7,
174 |                     match_low_quality=False,
175 |                     ignore_iof_thr=-1),
176 |                 sampler=dict(
177 |                     type='RandomSampler',
178 |                     num=512,
179 |                     pos_fraction=0.25,
180 |                     neg_pos_ub=-1,
181 |                     add_gt_as_proposals=True),
182 |                 mask_size=28,
183 |                 pos_weight=-1,
184 |                 debug=False)
185 |         ]),
186 |     test_cfg=dict(
187 |         rpn=dict(
188 |             nms_pre=1000,
189 |             max_per_img=1000,
190 |             nms=dict(type='nms', iou_threshold=0.7),
191 |             min_bbox_size=0),
192 |         rcnn=dict(
193 |             score_thr=0.05,
194 |             nms=dict(type='nms', iou_threshold=0.5),
195 |             max_per_img=100,
196 |             mask_thr_binary=0.5)))
197 | 


--------------------------------------------------------------------------------
/mmdet_tools/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import copy
  4 | import os
  5 | import os.path as osp
  6 | import time
  7 | import warnings
  8 | 
  9 | import mmcv
 10 | import torch
 11 | from mmcv import Config, DictAction
 12 | from mmcv.runner import get_dist_info, init_dist
 13 | from mmcv.utils import get_git_hash
 14 | 
 15 | from mmdet import __version__
 16 | from mmdet.apis import init_random_seed, set_random_seed, train_detector
 17 | from mmdet.datasets import build_dataset
 18 | from mmdet.models import build_detector
 19 | from mmdet.utils import collect_env, get_root_logger
 20 | 
 21 | 
 22 | def parse_args():
 23 |     parser = argparse.ArgumentParser(description='Train a detector')
 24 |     parser.add_argument('config', help='train config file path')
 25 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 26 |     parser.add_argument(
 27 |         '--resume-from', help='the checkpoint file to resume from')
 28 |     parser.add_argument(
 29 |         '--auto-resume',
 30 |         action='store_true',
 31 |         help='resume from the latest checkpoint automatically')
 32 |     parser.add_argument(
 33 |         '--no-validate',
 34 |         action='store_true',
 35 |         help='whether not to evaluate the checkpoint during training')
 36 |     group_gpus = parser.add_mutually_exclusive_group()
 37 |     group_gpus.add_argument(
 38 |         '--gpus',
 39 |         type=int,
 40 |         help='number of gpus to use '
 41 |         '(only applicable to non-distributed training)')
 42 |     group_gpus.add_argument(
 43 |         '--gpu-ids',
 44 |         type=int,
 45 |         nargs='+',
 46 |         help='ids of gpus to use '
 47 |         '(only applicable to non-distributed training)')
 48 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 49 |     parser.add_argument(
 50 |         '--deterministic',
 51 |         action='store_true',
 52 |         help='whether to set deterministic options for CUDNN backend.')
 53 |     parser.add_argument(
 54 |         '--options',
 55 |         nargs='+',
 56 |         action=DictAction,
 57 |         help='override some settings in the used config, the key-value pair '
 58 |         'in xxx=yyy format will be merged into config file (deprecate), '
 59 |         'change to --cfg-options instead.')
 60 |     parser.add_argument(
 61 |         '--cfg-options',
 62 |         nargs='+',
 63 |         action=DictAction,
 64 |         help='override some settings in the used config, the key-value pair '
 65 |         'in xxx=yyy format will be merged into config file. If the value to '
 66 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 67 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 68 |         'Note that the quotation marks are necessary and that no white space '
 69 |         'is allowed.')
 70 |     parser.add_argument(
 71 |         '--launcher',
 72 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 73 |         default='none',
 74 |         help='job launcher')
 75 |     parser.add_argument('--local_rank', type=int, default=0)
 76 |     args = parser.parse_args()
 77 |     if 'LOCAL_RANK' not in os.environ:
 78 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 79 | 
 80 |     if args.options and args.cfg_options:
 81 |         raise ValueError(
 82 |             '--options and --cfg-options cannot be both '
 83 |             'specified, --options is deprecated in favor of --cfg-options')
 84 |     if args.options:
 85 |         warnings.warn('--options is deprecated in favor of --cfg-options')
 86 |         args.cfg_options = args.options
 87 | 
 88 |     return args
 89 | 
 90 | 
 91 | def main():
 92 |     args = parse_args()
 93 | 
 94 |     cfg = Config.fromfile(args.config)
 95 |     if args.cfg_options is not None:
 96 |         cfg.merge_from_dict(args.cfg_options)
 97 |     # set cudnn_benchmark
 98 |     if cfg.get('cudnn_benchmark', False):
 99 |         torch.backends.cudnn.benchmark = True
100 | 
101 |     # work_dir is determined in this priority: CLI > segment in file > filename
102 |     if args.work_dir is not None:
103 |         # update configs according to CLI args if args.work_dir is not None
104 |         cfg.work_dir = args.work_dir
105 |     elif cfg.get('work_dir', None) is None:
106 |         # use config filename as default work_dir if cfg.work_dir is None
107 |         cfg.work_dir = osp.join('./work_dirs',
108 |                                 osp.splitext(osp.basename(args.config))[0])
109 |     if args.resume_from is not None:
110 |         cfg.resume_from = args.resume_from
111 |     cfg.auto_resume = args.auto_resume
112 |     if args.gpu_ids is not None:
113 |         cfg.gpu_ids = args.gpu_ids
114 |     else:
115 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
116 | 
117 |     # init distributed env first, since logger depends on the dist info.
118 |     if args.launcher == 'none':
119 |         distributed = False
120 |         if len(cfg.gpu_ids) > 1:
121 |             warnings.warn(
122 |                 f'We treat {cfg.gpu_ids} as gpu-ids, and reset to '
123 |                 f'{cfg.gpu_ids[0:1]} as gpu-ids to avoid potential error in '
124 |                 'non-distribute training time.')
125 |             cfg.gpu_ids = cfg.gpu_ids[0:1]
126 |     else:
127 |         distributed = True
128 |         init_dist(args.launcher, **cfg.dist_params)
129 |         # re-set gpu_ids with distributed training mode
130 |         _, world_size = get_dist_info()
131 |         cfg.gpu_ids = range(world_size)
132 | 
133 |     # create work_dir
134 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
135 |     # dump config
136 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
137 |     # init the logger before other steps
138 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
139 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
140 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
141 | 
142 |     # init the meta dict to record some important information such as
143 |     # environment info and seed, which will be logged
144 |     meta = dict()
145 |     # log env info
146 |     env_info_dict = collect_env()
147 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
148 |     dash_line = '-' * 60 + '\n'
149 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
150 |                 dash_line)
151 |     meta['env_info'] = env_info
152 |     meta['config'] = cfg.pretty_text
153 |     # log some basic info
154 |     logger.info(f'Distributed training: {distributed}')
155 |     logger.info(f'Config:\n{cfg.pretty_text}')
156 | 
157 |     # set random seeds
158 |     seed = init_random_seed(args.seed)
159 |     logger.info(f'Set random seed to {seed}, '
160 |                 f'deterministic: {args.deterministic}')
161 |     set_random_seed(seed, deterministic=args.deterministic)
162 |     cfg.seed = seed
163 |     meta['seed'] = seed
164 |     meta['exp_name'] = osp.basename(args.config)
165 | 
166 |     model = build_detector(
167 |         cfg.model,
168 |         train_cfg=cfg.get('train_cfg'),
169 |         test_cfg=cfg.get('test_cfg'))
170 |     model.init_weights()
171 | 
172 |     datasets = [build_dataset(cfg.data.train)]
173 |     if len(cfg.workflow) == 2:
174 |         val_dataset = copy.deepcopy(cfg.data.val)
175 |         val_dataset.pipeline = cfg.data.train.pipeline
176 |         datasets.append(build_dataset(val_dataset))
177 |     if cfg.checkpoint_config is not None:
178 |         # save mmdet version, config file content and class names in
179 |         # checkpoints as meta data
180 |         cfg.checkpoint_config.meta = dict(
181 |             mmdet_version=__version__ + get_git_hash()[:7],
182 |             CLASSES=datasets[0].CLASSES)
183 |     # add an attribute for visualization convenience
184 |     model.CLASSES = datasets[0].CLASSES
185 |     train_detector(
186 |         model,
187 |         datasets,
188 |         cfg,
189 |         distributed=distributed,
190 |         validate=(not args.no_validate),
191 |         timestamp=timestamp,
192 |         meta=meta)
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     main()
197 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # small-object-detection-benchmark
  2 | 
  3 | <a href="https://ieeexplore.ieee.org/document/9897990"><img src="https://img.shields.io/badge/DOI-10.1109%2FICIP46576.2022.9897990-orange.svg" alt="ci">
  4 | <a href="https://twitter.com/fcakyon"><img src="https://img.shields.io/badge/twitter-fcakyon_-blue?logo=twitter&style=flat" alt="fcakyon twitter"></a>
  5 | 
  6 | 🔥 our paper has been presented in ICIP 2022 Bordeaux, France (16-19 October 2022)
  7 | 
  8 | [📜 List of publications that cite this work (currently 300+)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=14065474760484865747&scipsc=&q=&scisbd=1)
  9 | 
 10 | ## summary
 11 | 
 12 | small-object-detection benchmark on visdrone and xview datasets using [fcos](https://arxiv.org/abs/1904.01355), [vfnet](https://arxiv.org/abs/2008.13367) and [tood](https://arxiv.org/abs/2108.07755) detectors
 13 | 
 14 | refer to [Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection](https://ieeexplore.ieee.org/document/9897990) for full technical analysis
 15 | 
 16 | ## citation
 17 | 
 18 | If you use any file/result from this repo in your work, please cite it as:
 19 | 
 20 | ```
 21 | @article{akyon2022sahi,
 22 |   title={Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection},
 23 |   author={Akyon, Fatih Cagatay and Altinuc, Sinan Onur and Temizel, Alptekin},
 24 |   journal={2022 IEEE International Conference on Image Processing (ICIP)},
 25 |   doi={10.1109/ICIP46576.2022.9897990},
 26 |   pages={966-970},
 27 |   year={2022}
 28 | }
 29 | ```
 30 | 
 31 | ## visdrone results
 32 | 
 33 | refer to table 1 in [Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection](https://ieeexplore.ieee.org/document/9897990) for more detail on visdrone results
 34 | 
 35 | [fcos_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_fi_visdrone_results.zip
 36 | [fcos_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sahi_po_visdrone_results.zip
 37 | [fcos_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sahi_fi_po_visdrone_results.zip
 38 | [fcos_sf_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_po_visdrone_results.zip
 39 | [fcos_sf_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_fi_po_visdrone_results.zip
 40 | 
 41 | [vfnet_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_fi_visdrone_results.zip
 42 | [vfnet_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sahi_po_visdrone_results.zip
 43 | [vfnet_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sahi_fi_po_visdrone_results.zip
 44 | [vfnet_sf_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_po_visdrone_results.zip
 45 | [vfnet_sf_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_fi_po_visdrone_results.zip
 46 | 
 47 | [tood_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_fi_visdrone_results.zip
 48 | [tood_sahi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_visdrone_results.zip
 49 | [tood_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_po_visdrone_results.zip
 50 | [tood_sahi_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_fi_visdrone_results.zip
 51 | [tood_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_fi_po_visdrone_results.zip
 52 | 
 53 | [tood_sf_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_fi_visdrone_results.zip
 54 | [tood_sf_sahi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_visdrone_results.zip
 55 | [tood_sf_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_po_visdrone_results.zip
 56 | [tood_sf_sahi_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_visdrone_results.zip
 57 | [tood_sf_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_po_visdrone_results.zip
 58 | 
 59 | [tood_sf_visdrone_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/tood_sf_visdrone.pth
 60 | [fcos_sf_visdrone_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/fcos_sf_visdrone.pth
 61 | 
 62 | [my_twitter_url]: https://twitter.com/fcakyon
 63 | 
 64 | |setup |AP<sub>50</sub> |AP<sub>50</sub>s |AP<sub>50</sub>m |AP<sub>50</sub>l | results | checkpoints |
 65 | |--- |--- |--- |--- |--- |--- |--- |
 66 | |FCOS+FI |25.8 |14.2 |39.6 |45.1 | [download][fcos_fi_visdrone_results_url] | [request][my_twitter_url] |
 67 | |FCOS+SAHI+PO |29.0 |18.9 |41.5 |46.4 | [download][fcos_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
 68 | |FCOS+SAHI+FI+PO |31.0 |19.8 |44.6 |49.0 | [download][fcos_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
 69 | |FCOS+SF+SAHI+PO |38.1 |25.7 |54.8 |56.9 | [download][fcos_sf_sahi_po_visdrone_results_url] | [download][fcos_sf_visdrone_checkpoint_url] |
 70 | |FCOS+SF+SAHI+FI+PO |38.5 |25.9 |55.4 |59.8 | [download][fcos_sf_sahi_fi_po_visdrone_results_url] | [download][fcos_sf_visdrone_checkpoint_url] |
 71 | |--- |--- |--- |--- |--- |--- |--- |
 72 | |VFNet+FI |28.8 |16.8 |44.0 |47.5 | [download][vfnet_fi_visdrone_results_url] | [request][my_twitter_url] |
 73 | |VFNet+SAHI+PO |32.0 |21.4 |45.8 |45.5 | [download][vfnet_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
 74 | |VFNet+SAHI+FI+PO |33.9 |22.4 |49.1 |49.4 | [download][vfnet_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
 75 | |VFNet+SF+SAHI+PO |41.9 |29.7 |58.8 |60.6 | [download][vfnet_sf_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
 76 | |VFNet+SF+SAHI+FI+PO |42.2 |29.6 |59.2 |63.3 | [download][vfnet_sf_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
 77 | |--- |--- |--- |--- |--- |--- |--- |
 78 | |TOOD+FI |29.4 |18.1 |44.1 |50.0 | [download][tood_fi_visdrone_results_url] | [request][my_twitter_url] |
 79 | |TOOD+SAHI |31.9 |22.6 |44.0 |45.2 | [download][tood_sahi_visdrone_results_url] | [request][my_twitter_url] |
 80 | |TOOD+SAHI+PO |32.5 |22.8 |45.2 |43.6 | [download][tood_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
 81 | |TOOD+SAHI+FI |34.6 |23.8 |48.5 |53.1 | [download][tood_sahi_fi_visdrone_results_url] | [request][my_twitter_url] |
 82 | |TOOD+SAHI+FI+PO |34.7 |23.8 |48.9 |50.3| [download][tood_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
 83 | |TOOD+SF+FI |36.8 |24.4 |53.8 |66.4 | [download][tood_sf_fi_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
 84 | |TOOD+SF+SAHI |42.5 |31.6 |58.0 |61.1 | [download][tood_sf_sahi_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
 85 | |TOOD+SF+SAHI+PO |43.1 |31.7 |59.0 |60.2 | [download][tood_sf_sahi_po_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
 86 | |TOOD+SF+SAHI+FI |43.4 |31.7 |59.6 |65.6 | [download][tood_sf_sahi_fi_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
 87 | |TOOD+SF+SAHI+FI+PO |43.5 |31.7 |59.8 |65.4 | [download][tood_sf_sahi_fi_po_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
 88 | 
 89 | ## xview results
 90 | 
 91 | refer to table 2 in [Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection](https://ieeexplore.ieee.org/document/9897990) for more detail on xview results
 92 | 
 93 | [fcos_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_fi_xview_results.zip
 94 | [fcos_sf_sahi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_xview_results.zip
 95 | [fcos_sf_sahi_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_fi_xview_results.zip
 96 | [fcos_sf_sahi_fi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_fi_op_xview_results.zip
 97 | [fcos_sf_sahi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_op_xview_results.zip
 98 | 
 99 | [vfnet_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_fi_xview_results.zip
100 | [vfnet_sf_sahi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_xview_results.zip
101 | [vfnet_sf_sahi_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_fi_xview_results.zip
102 | [vfnet_sf_sahi_fi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_fi_op_xview_results.zip
103 | [vfnet_sf_sahi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_op_xview_results.zip
104 | 
105 | [tood_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_fi_xview_results.zip
106 | [tood_sf_sahi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_xview_results.zip
107 | [tood_sf_sahi_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_xview_results.zip
108 | [tood_sf_sahi_fi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_op_xview_results.zip
109 | [tood_sf_sahi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_op_xview_results.zip
110 | 
111 | [fcos_sf_xview_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/fcos_sf_xview.pth
112 | [vfnet_sf_xview_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/vfnet_sf_xview.pth
113 | [tood_sf_xview_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/tood_sf_xview.pth
114 | 
115 | |setup |AP<sub>50</sub> |AP<sub>50</sub>s |AP<sub>50</sub>m |AP<sub>50</sub>l | results | checkpoints |
116 | |--- |--- |--- |--- |--- |--- |--- |
117 | |FCOS+FI |2.20 |0.10 |1.80 |7.30 | [download][fcos_fi_xview_results_url] | [request][my_twitter_url] |
118 | |FCOS+SF+SAHI |15.8 |11.9 |18.4 |11.0 | [download][fcos_sf_sahi_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
119 | |FCOS+SF+SAHI+PO |17.1 |12.2 |20.2 |12.8 | [download][fcos_sf_sahi_po_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
120 | |FCOS+SF+SAHI+FI |15.7 |11.9 |18.4 |14.3 | [download][fcos_sf_sahi_fi_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
121 | |FCOS+SF+SAHI+FI+PO |17.0 |12.2 |20.2 |15.8 | [download][fcos_sf_sahi_fi_po_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
122 | |--- |--- |--- |--- |--- |--- |--- |
123 | |VFNet+FI |2.10 |0.50 |1.80 |6.80 | [download][vfnet_fi_xview_results_url] | [request][my_twitter_url] |
124 | |VFNet+SF+SAHI | 16.0 |11.9 |17.6 |13.1 | [download][vfnet_sf_sahi_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
125 | |VFNet+SF+SAHI+PO |17.7| 13.7 |19.7 |15.4 | [download][vfnet_sf_sahi_po_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
126 | |VFNet+SF+SAHI+FI |15.8 |11.9 |17.5 |15.2 | [download][vfnet_sf_sahi_fi_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
127 | |VFNet+SF+SAHI+FI+PO |17.5 |13.7 |19.6 |17.6 | [download][vfnet_sf_sahi_fi_po_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
128 | |--- |--- |--- |--- |--- |--- |--- |
129 | |TOOD+FI |2.10 |0.10 |2.00 |5.20 | [download][tood_fi_xview_results_url] | [request][my_twitter_url] |
130 | |TOOD+SF+SAHI |19.4 |14.6 |22.5 |14.2 | [download][tood_sf_sahi_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
131 | |TOOD+SF+SAHI+PO |20.6 |14.9 |23.6 |17.0 | [download][tood_sf_sahi_po_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
132 | |TOOD+SF+SAHI+FI |19.2 |14.6 |22.3 |14.7 | [download][tood_sf_sahi_fi_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
133 | |TOOD+SF+SAHI+FI+PO |20.4 |14.9 |23.5 |17.6 | [download][tood_sf_sahi_fi_po_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
134 | 
135 | ## env setup
136 | 
137 | install pytorch:
138 | 
139 | ```bash
140 | conda install pytorch=1.10.0 torchvision=0.11.1 cudatoolkit=11.3 -c pytorch
141 | ```
142 | 
143 | install other requirements:
144 | 
145 | ```bash
146 | pip install -r requirements.txt
147 | ```
148 | 
149 | ## evaluation
150 | 
151 | - download desired checkpoint from the urls in readme.
152 | 
153 | - download xivew or visdrone dataset and convert to COCO format.
154 | 
155 | - set `MODEL_PATH`, `MODEL_CONFIG_PATH`, `EVAL_IMAGES_FOLDER_DIR`, `EVAL_DATASET_JSON_PATH`, `INFERENCE_SETTING` in [predict_evaluate_analyse script](eval_tools/predict_evaluate_analyse.py) then run the script.
156 | 
157 | ## roadmap
158 | 
159 | - [x] add train test split support for xview to coco converter
160 | - [x] add mmdet config files (fcos, vfnet and tood) for xview training (9 train experiments)
161 | - [x] add mmdet config files (fcos, vfnet and tood) for visdrone training (9 train experiments)
162 | - [x] add coco result.json files, classwise coco eval results error analysis plots for all xview experiments
163 | - [x] add coco result.json files, classwise coco eval results error analysis plots for all visdrone experiments
164 | - [X] add .py scripts for inference + evaluation + error analysis using `sahi`
165 | 


--------------------------------------------------------------------------------