├── pyproject.toml
├── requirements.txt
├── mmdet_configs
├── fcos
│ ├── fcos_center_r50_caffe_fpn_gn-head_1x_coco.py
│ ├── fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py
│ └── fcos_r50_caffe_fpn_gn-head_1x_coco.py
├── _base_
│ ├── schedules
│ │ ├── schedule_1x.py
│ │ ├── schedule_20e.py
│ │ └── schedule_2x.py
│ ├── default_runtime.py
│ ├── datasets
│ │ ├── lvis_v1_instance.py
│ │ ├── lvis_v0.5_instance.py
│ │ ├── coco_detection.py
│ │ ├── coco_instance.py
│ │ ├── deepfashion.py
│ │ ├── voc0712.py
│ │ ├── coco_instance_semantic.py
│ │ ├── cityscapes_detection.py
│ │ ├── cityscapes_instance.py
│ │ ├── wider_face.py
│ │ └── coco_panoptic.py
│ └── models
│ │ ├── ssd300.py
│ │ ├── retinanet_r50_fpn.py
│ │ ├── rpn_r50_caffe_c4.py
│ │ ├── rpn_r50_fpn.py
│ │ ├── fast_rcnn_r50_fpn.py
│ │ ├── faster_rcnn_r50_caffe_dc5.py
│ │ ├── faster_rcnn_r50_fpn.py
│ │ ├── faster_rcnn_r50_caffe_c4.py
│ │ ├── mask_rcnn_r50_fpn.py
│ │ ├── mask_rcnn_r50_caffe_c4.py
│ │ ├── cascade_rcnn_r50_fpn.py
│ │ └── cascade_mask_rcnn_r50_fpn.py
├── tood
│ └── tood_r50_fpn_1x_coco.py
├── visdrone_tood
│ ├── tood_full_cls_60.py
│ └── tood_crop_480_960_cls_60.py
├── visdrone_vfnet
│ ├── vfnet_full_cls_60.py
│ └── vfnet_crop_480_960_cls_60.py
├── visdrone_fcos
│ ├── fcos_full_cls_60.py
│ └── fcos_crop_480_960_cls_60.py
├── vfnet
│ └── vfnet_r50_fpn_1x_coco.py
├── xview_tood
│ ├── tood_full_cls_60.py
│ └── tood_crop_300_500_cls_60.py
├── xview_vfnet
│ ├── vfnet_full_cls_60.py
│ └── vfnet_crop_300_500_cls_60.py
└── xview_fcos
│ ├── fcos_full_cls_60.py
│ └── fcos_crop_300_500_cls_60.py
├── CITATION.cff
├── visdrone
├── slice_visdrone.py
└── visdrone_to_coco.py
├── xview
├── slice_xview.py
├── xview_class_labels.txt
├── category_id_mapping.json
└── xview_to_coco.py
├── LICENSE
├── .gitignore
├── eval_tools
└── predict_evaluate_analyse.py
├── mmdet_tools
└── train.py
└── README.md
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 120
3 |
4 | [tool.isort]
5 | line_length = 120
6 | profile = "black"
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sahi==0.9.3
2 | pillow
3 | mmdet==2.21.0
4 | mmcv-full==1.4.3
5 | -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
6 | tensorboard>=2.7.0
7 | scipy
8 |
--------------------------------------------------------------------------------
/mmdet_configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'
2 | model = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5))
3 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | checkpoint_config = dict(interval=1)
2 | # yapf:disable
3 | log_config = dict(
4 | interval=50,
5 | hooks=[
6 | dict(type='TextLoggerHook'),
7 | # dict(type='TensorboardLoggerHook')
8 | ])
9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 |
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | preferred-citation:
3 | type: article
4 | title: "Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection"
5 | doi: 10.1109/ICIP46576.2022.9897990
6 | url: https://ieeexplore.ieee.org/document/9897990
7 | journal: 2022 IEEE International Conference on Image Processing (ICIP)
8 | message: "If you use this results in your work, please cite it."
9 | authors:
10 | - family-names: "Akyon"
11 | given-names: "Fatih Cagatay"
12 | - family-names: "Altinuc"
13 | given-names: "Sinan Onur"
14 | - family-names: "Temizel"
15 | given-names: "Alptekin"
16 | year: 2022
17 | start: 966
18 | end: 970
19 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | _base_ = 'coco_instance.py'
3 | dataset_type = 'LVISV1Dataset'
4 | data_root = 'data/lvis_v1/'
5 | data = dict(
6 | samples_per_gpu=2,
7 | workers_per_gpu=2,
8 | train=dict(
9 | _delete_=True,
10 | type='ClassBalancedDataset',
11 | oversample_thr=1e-3,
12 | dataset=dict(
13 | type=dataset_type,
14 | ann_file=data_root + 'annotations/lvis_v1_train.json',
15 | img_prefix=data_root)),
16 | val=dict(
17 | type=dataset_type,
18 | ann_file=data_root + 'annotations/lvis_v1_val.json',
19 | img_prefix=data_root),
20 | test=dict(
21 | type=dataset_type,
22 | ann_file=data_root + 'annotations/lvis_v1_val.json',
23 | img_prefix=data_root))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | _base_ = 'coco_instance.py'
3 | dataset_type = 'LVISV05Dataset'
4 | data_root = 'data/lvis_v0.5/'
5 | data = dict(
6 | samples_per_gpu=2,
7 | workers_per_gpu=2,
8 | train=dict(
9 | _delete_=True,
10 | type='ClassBalancedDataset',
11 | oversample_thr=1e-3,
12 | dataset=dict(
13 | type=dataset_type,
14 | ann_file=data_root + 'annotations/lvis_v0.5_train.json',
15 | img_prefix=data_root + 'train2017/')),
16 | val=dict(
17 | type=dataset_type,
18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 | img_prefix=data_root + 'val2017/'),
20 | test=dict(
21 | type=dataset_type,
22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 | img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 |
--------------------------------------------------------------------------------
/visdrone/slice_visdrone.py:
--------------------------------------------------------------------------------
1 | import fire
2 | from sahi.scripts.slice_coco import slice
3 | from tqdm import tqdm
4 |
5 | SLICE_SIZE_LIST = [480, 640]
6 | OVERLAP_RATIO_LIST = [0, 0.25]
7 | IGNORE_NEGATIVE_SAMPLES = False
8 |
9 |
10 | def slice_visdrone(image_dir: str, dataset_json_path: str, output_dir: str):
11 | total_run = len(SLICE_SIZE_LIST) * len(OVERLAP_RATIO_LIST)
12 | current_run = 1
13 | for slice_size in SLICE_SIZE_LIST:
14 | for overlap_ratio in OVERLAP_RATIO_LIST:
15 | tqdm.write(
16 | f"{current_run} of {total_run}: slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}"
17 | )
18 | slice(
19 | image_dir=image_dir,
20 | dataset_json_path=dataset_json_path,
21 | output_dir=output_dir,
22 | slice_size=slice_size,
23 | overlap_ratio=overlap_ratio,
24 | )
25 | current_run += 1
26 |
27 |
28 | if __name__ == "__main__":
29 | fire.Fire(slice_visdrone)
30 |
--------------------------------------------------------------------------------
/xview/slice_xview.py:
--------------------------------------------------------------------------------
1 | import fire
2 | from sahi.scripts.slice_coco import slice
3 | from tqdm import tqdm
4 |
5 | MAX_WORKERS = 20
6 | SLICE_SIZE_LIST = [300, 400, 500]
7 | OVERLAP_RATIO_LIST = [0, 0.25]
8 | IGNORE_NEGATIVE_SAMPLES = True
9 |
10 |
11 | def slice_xview(image_dir: str, dataset_json_path: str, output_dir: str):
12 | total_run = len(SLICE_SIZE_LIST) * len(OVERLAP_RATIO_LIST)
13 | current_run = 1
14 | for slice_size in SLICE_SIZE_LIST:
15 | for overlap_ratio in OVERLAP_RATIO_LIST:
16 | tqdm.write(
17 | f"{current_run} of {total_run}: slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}"
18 | )
19 | slice(
20 | image_dir=image_dir,
21 | dataset_json_path=dataset_json_path,
22 | output_dir=output_dir,
23 | slice_size=slice_size,
24 | overlap_ratio=overlap_ratio,
25 | )
26 | current_run += 1
27 |
28 |
29 | if __name__ == "__main__":
30 | fire.Fire(slice_xview)
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 fatih
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/xview/xview_class_labels.txt:
--------------------------------------------------------------------------------
1 | 11:Fixed-wing Aircraft
2 | 12:Small Aircraft
3 | 13:Cargo Plane
4 | 15:Helicopter
5 | 17:Passenger Vehicle
6 | 18:Small Car
7 | 19:Bus
8 | 20:Pickup Truck
9 | 21:Utility Truck
10 | 23:Truck
11 | 24:Cargo Truck
12 | 25:Truck w/Box
13 | 26:Truck Tractor
14 | 27:Trailer
15 | 28:Truck w/Flatbed
16 | 29:Truck w/Liquid
17 | 32:Crane Truck
18 | 33:Railway Vehicle
19 | 34:Passenger Car
20 | 35:Cargo Car
21 | 36:Flat Car
22 | 37:Tank car
23 | 38:Locomotive
24 | 40:Maritime Vessel
25 | 41:Motorboat
26 | 42:Sailboat
27 | 44:Tugboat
28 | 45:Barge
29 | 47:Fishing Vessel
30 | 49:Ferry
31 | 50:Yacht
32 | 51:Container Ship
33 | 52:Oil Tanker
34 | 53:Engineering Vehicle
35 | 54:Tower crane
36 | 55:Container Crane
37 | 56:Reach Stacker
38 | 57:Straddle Carrier
39 | 59:Mobile Crane
40 | 60:Dump Truck
41 | 61:Haul Truck
42 | 62:Scraper/Tractor
43 | 63:Front loader/Bulldozer
44 | 64:Excavator
45 | 65:Cement Mixer
46 | 66:Ground Grader
47 | 71:Hut/Tent
48 | 72:Shed
49 | 73:Building
50 | 74:Aircraft Hangar
51 | 76:Damaged Building
52 | 77:Facility
53 | 79:Construction Site
54 | 83:Vehicle Lot
55 | 84:Helipad
56 | 86:Storage Tank
57 | 89:Shipping container lot
58 | 91:Shipping Container
59 | 93:Pylon
60 | 94:Tower
61 |
--------------------------------------------------------------------------------
/xview/category_id_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "11": "0",
3 | "12": "1",
4 | "13": "2",
5 | "15": "3",
6 | "17": "4",
7 | "18": "5",
8 | "19": "6",
9 | "20": "7",
10 | "21": "8",
11 | "23": "9",
12 | "24": "10",
13 | "25": "11",
14 | "26": "12",
15 | "27": "13",
16 | "28": "14",
17 | "29": "15",
18 | "32": "16",
19 | "33": "17",
20 | "34": "18",
21 | "35": "19",
22 | "36": "20",
23 | "37": "21",
24 | "38": "22",
25 | "40": "23",
26 | "41": "24",
27 | "42": "25",
28 | "44": "26",
29 | "45": "27",
30 | "47": "28",
31 | "49": "29",
32 | "50": "30",
33 | "51": "31",
34 | "52": "32",
35 | "53": "33",
36 | "54": "34",
37 | "55": "35",
38 | "56": "36",
39 | "57": "37",
40 | "59": "38",
41 | "60": "39",
42 | "61": "40",
43 | "62": "41",
44 | "63": "42",
45 | "64": "43",
46 | "65": "44",
47 | "66": "45",
48 | "71": "46",
49 | "72": "47",
50 | "73": "48",
51 | "74": "49",
52 | "76": "50",
53 | "77": "51",
54 | "79": "52",
55 | "83": "53",
56 | "84": "54",
57 | "86": "55",
58 | "89": "56",
59 | "91": "57",
60 | "93": "58",
61 | "94": "59"
62 | }
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CocoDataset'
3 | data_root = 'data/coco/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True),
9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 | dict(type='RandomFlip', flip_ratio=0.5),
11 | dict(type='Normalize', **img_norm_cfg),
12 | dict(type='Pad', size_divisor=32),
13 | dict(type='DefaultFormatBundle'),
14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 | dict(type='LoadImageFromFile'),
18 | dict(
19 | type='MultiScaleFlipAug',
20 | img_scale=(1333, 800),
21 | flip=False,
22 | transforms=[
23 | dict(type='Resize', keep_ratio=True),
24 | dict(type='RandomFlip'),
25 | dict(type='Normalize', **img_norm_cfg),
26 | dict(type='Pad', size_divisor=32),
27 | dict(type='ImageToTensor', keys=['img']),
28 | dict(type='Collect', keys=['img']),
29 | ])
30 | ]
31 | data = dict(
32 | samples_per_gpu=2,
33 | workers_per_gpu=2,
34 | train=dict(
35 | type=dataset_type,
36 | ann_file=data_root + 'annotations/instances_train2017.json',
37 | img_prefix=data_root + 'train2017/',
38 | pipeline=train_pipeline),
39 | val=dict(
40 | type=dataset_type,
41 | ann_file=data_root + 'annotations/instances_val2017.json',
42 | img_prefix=data_root + 'val2017/',
43 | pipeline=test_pipeline),
44 | test=dict(
45 | type=dataset_type,
46 | ann_file=data_root + 'annotations/instances_val2017.json',
47 | img_prefix=data_root + 'val2017/',
48 | pipeline=test_pipeline))
49 | evaluation = dict(interval=1, metric='bbox')
50 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CocoDataset'
3 | data_root = 'data/coco/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 | dict(type='RandomFlip', flip_ratio=0.5),
11 | dict(type='Normalize', **img_norm_cfg),
12 | dict(type='Pad', size_divisor=32),
13 | dict(type='DefaultFormatBundle'),
14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 | dict(type='LoadImageFromFile'),
18 | dict(
19 | type='MultiScaleFlipAug',
20 | img_scale=(1333, 800),
21 | flip=False,
22 | transforms=[
23 | dict(type='Resize', keep_ratio=True),
24 | dict(type='RandomFlip'),
25 | dict(type='Normalize', **img_norm_cfg),
26 | dict(type='Pad', size_divisor=32),
27 | dict(type='ImageToTensor', keys=['img']),
28 | dict(type='Collect', keys=['img']),
29 | ])
30 | ]
31 | data = dict(
32 | samples_per_gpu=2,
33 | workers_per_gpu=2,
34 | train=dict(
35 | type=dataset_type,
36 | ann_file=data_root + 'annotations/instances_train2017.json',
37 | img_prefix=data_root + 'train2017/',
38 | pipeline=train_pipeline),
39 | val=dict(
40 | type=dataset_type,
41 | ann_file=data_root + 'annotations/instances_val2017.json',
42 | img_prefix=data_root + 'val2017/',
43 | pipeline=test_pipeline),
44 | test=dict(
45 | type=dataset_type,
46 | ann_file=data_root + 'annotations/instances_val2017.json',
47 | img_prefix=data_root + 'val2017/',
48 | pipeline=test_pipeline))
49 | evaluation = dict(metric=['bbox', 'segm'])
50 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | input_size = 300
3 | model = dict(
4 | type='SingleStageDetector',
5 | backbone=dict(
6 | type='SSDVGG',
7 | depth=16,
8 | with_last_pool=False,
9 | ceil_mode=True,
10 | out_indices=(3, 4),
11 | out_feature_indices=(22, 34),
12 | init_cfg=dict(
13 | type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
14 | neck=dict(
15 | type='SSDNeck',
16 | in_channels=(512, 1024),
17 | out_channels=(512, 1024, 512, 256, 256, 256),
18 | level_strides=(2, 2, 1, 1),
19 | level_paddings=(1, 1, 0, 0),
20 | l2_norm_scale=20),
21 | bbox_head=dict(
22 | type='SSDHead',
23 | in_channels=(512, 1024, 512, 256, 256, 256),
24 | num_classes=80,
25 | anchor_generator=dict(
26 | type='SSDAnchorGenerator',
27 | scale_major=False,
28 | input_size=input_size,
29 | basesize_ratio_range=(0.15, 0.9),
30 | strides=[8, 16, 32, 64, 100, 300],
31 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
32 | bbox_coder=dict(
33 | type='DeltaXYWHBBoxCoder',
34 | target_means=[.0, .0, .0, .0],
35 | target_stds=[0.1, 0.1, 0.2, 0.2])),
36 | # model training and testing settings
37 | train_cfg=dict(
38 | assigner=dict(
39 | type='MaxIoUAssigner',
40 | pos_iou_thr=0.5,
41 | neg_iou_thr=0.5,
42 | min_pos_iou=0.,
43 | ignore_iof_thr=-1,
44 | gt_max_assign_all=False),
45 | smoothl1_beta=1.,
46 | allowed_border=-1,
47 | pos_weight=-1,
48 | neg_pos_ratio=3,
49 | debug=False),
50 | test_cfg=dict(
51 | nms_pre=1000,
52 | nms=dict(type='nms', iou_threshold=0.45),
53 | min_bbox_size=0,
54 | score_thr=0.02,
55 | max_per_img=200))
56 | cudnn_benchmark = True
57 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RetinaNet',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | start_level=1,
19 | add_extra_convs='on_input',
20 | num_outs=5),
21 | bbox_head=dict(
22 | type='RetinaHead',
23 | num_classes=80,
24 | in_channels=256,
25 | stacked_convs=4,
26 | feat_channels=256,
27 | anchor_generator=dict(
28 | type='AnchorGenerator',
29 | octave_base_scale=4,
30 | scales_per_octave=3,
31 | ratios=[0.5, 1.0, 2.0],
32 | strides=[8, 16, 32, 64, 128]),
33 | bbox_coder=dict(
34 | type='DeltaXYWHBBoxCoder',
35 | target_means=[.0, .0, .0, .0],
36 | target_stds=[1.0, 1.0, 1.0, 1.0]),
37 | loss_cls=dict(
38 | type='FocalLoss',
39 | use_sigmoid=True,
40 | gamma=2.0,
41 | alpha=0.25,
42 | loss_weight=1.0),
43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 | # model training and testing settings
45 | train_cfg=dict(
46 | assigner=dict(
47 | type='MaxIoUAssigner',
48 | pos_iou_thr=0.5,
49 | neg_iou_thr=0.4,
50 | min_pos_iou=0,
51 | ignore_iof_thr=-1),
52 | allowed_border=-1,
53 | pos_weight=-1,
54 | debug=False),
55 | test_cfg=dict(
56 | nms_pre=1000,
57 | min_bbox_size=0,
58 | score_thr=0.05,
59 | nms=dict(type='nms', iou_threshold=0.5),
60 | max_per_img=100))
61 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RPN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=3,
8 | strides=(1, 2, 2),
9 | dilations=(1, 1, 1),
10 | out_indices=(2, ),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=False),
13 | norm_eval=True,
14 | style='caffe',
15 | init_cfg=dict(
16 | type='Pretrained',
17 | checkpoint='open-mmlab://detectron2/resnet50_caffe')),
18 | neck=None,
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=1024,
22 | feat_channels=1024,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[2, 4, 8, 16, 32],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[16]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | # model training and testing settings
36 | train_cfg=dict(
37 | rpn=dict(
38 | assigner=dict(
39 | type='MaxIoUAssigner',
40 | pos_iou_thr=0.7,
41 | neg_iou_thr=0.3,
42 | min_pos_iou=0.3,
43 | ignore_iof_thr=-1),
44 | sampler=dict(
45 | type='RandomSampler',
46 | num=256,
47 | pos_fraction=0.5,
48 | neg_pos_ub=-1,
49 | add_gt_as_proposals=False),
50 | allowed_border=0,
51 | pos_weight=-1,
52 | debug=False)),
53 | test_cfg=dict(
54 | rpn=dict(
55 | nms_pre=12000,
56 | max_per_img=2000,
57 | nms=dict(type='nms', iou_threshold=0.7),
58 | min_bbox_size=0)))
59 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RPN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | # model training and testing settings
36 | train_cfg=dict(
37 | rpn=dict(
38 | assigner=dict(
39 | type='MaxIoUAssigner',
40 | pos_iou_thr=0.7,
41 | neg_iou_thr=0.3,
42 | min_pos_iou=0.3,
43 | ignore_iof_thr=-1),
44 | sampler=dict(
45 | type='RandomSampler',
46 | num=256,
47 | pos_fraction=0.5,
48 | neg_pos_ub=-1,
49 | add_gt_as_proposals=False),
50 | allowed_border=0,
51 | pos_weight=-1,
52 | debug=False)),
53 | test_cfg=dict(
54 | rpn=dict(
55 | nms_pre=2000,
56 | max_per_img=1000,
57 | nms=dict(type='nms', iou_threshold=0.7),
58 | min_bbox_size=0)))
59 |
--------------------------------------------------------------------------------
/mmdet_configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py'
2 |
3 | model = dict(
4 | backbone=dict(
5 | init_cfg=dict(
6 | type='Pretrained',
7 | checkpoint='open-mmlab://detectron2/resnet50_caffe')),
8 | bbox_head=dict(
9 | norm_on_bbox=True,
10 | centerness_on_reg=True,
11 | dcn_on_last_conv=False,
12 | center_sampling=True,
13 | conv_bias=True,
14 | loss_bbox=dict(type='GIoULoss', loss_weight=1.0)),
15 | # training and testing settings
16 | test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6)))
17 |
18 | # dataset settings
19 | img_norm_cfg = dict(
20 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
21 | train_pipeline = [
22 | dict(type='LoadImageFromFile'),
23 | dict(type='LoadAnnotations', with_bbox=True),
24 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
25 | dict(type='RandomFlip', flip_ratio=0.5),
26 | dict(type='Normalize', **img_norm_cfg),
27 | dict(type='Pad', size_divisor=32),
28 | dict(type='DefaultFormatBundle'),
29 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
30 | ]
31 | test_pipeline = [
32 | dict(type='LoadImageFromFile'),
33 | dict(
34 | type='MultiScaleFlipAug',
35 | img_scale=(1333, 800),
36 | flip=False,
37 | transforms=[
38 | dict(type='Resize', keep_ratio=True),
39 | dict(type='RandomFlip'),
40 | dict(type='Normalize', **img_norm_cfg),
41 | dict(type='Pad', size_divisor=32),
42 | dict(type='ImageToTensor', keys=['img']),
43 | dict(type='Collect', keys=['img']),
44 | ])
45 | ]
46 | data = dict(
47 | samples_per_gpu=2,
48 | workers_per_gpu=2,
49 | train=dict(pipeline=train_pipeline),
50 | val=dict(pipeline=test_pipeline),
51 | test=dict(pipeline=test_pipeline))
52 | optimizer_config = dict(_delete_=True, grad_clip=None)
53 |
54 | lr_config = dict(warmup='linear')
55 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'DeepFashionDataset'
3 | data_root = 'data/DeepFashion/In-shop/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 | dict(type='RandomFlip', flip_ratio=0.5),
11 | dict(type='Normalize', **img_norm_cfg),
12 | dict(type='Pad', size_divisor=32),
13 | dict(type='DefaultFormatBundle'),
14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 | dict(type='LoadImageFromFile'),
18 | dict(
19 | type='MultiScaleFlipAug',
20 | img_scale=(750, 1101),
21 | flip=False,
22 | transforms=[
23 | dict(type='Resize', keep_ratio=True),
24 | dict(type='RandomFlip'),
25 | dict(type='Normalize', **img_norm_cfg),
26 | dict(type='Pad', size_divisor=32),
27 | dict(type='ImageToTensor', keys=['img']),
28 | dict(type='Collect', keys=['img']),
29 | ])
30 | ]
31 | data = dict(
32 | imgs_per_gpu=2,
33 | workers_per_gpu=1,
34 | train=dict(
35 | type=dataset_type,
36 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 | img_prefix=data_root + 'Img/',
38 | pipeline=train_pipeline,
39 | data_root=data_root),
40 | val=dict(
41 | type=dataset_type,
42 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 | img_prefix=data_root + 'Img/',
44 | pipeline=test_pipeline,
45 | data_root=data_root),
46 | test=dict(
47 | type=dataset_type,
48 | ann_file=data_root +
49 | 'annotations/DeepFashion_segmentation_gallery.json',
50 | img_prefix=data_root + 'Img/',
51 | pipeline=test_pipeline,
52 | data_root=data_root))
53 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
54 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'VOCDataset'
3 | data_root = 'data/VOCdevkit/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True),
9 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 | dict(type='RandomFlip', flip_ratio=0.5),
11 | dict(type='Normalize', **img_norm_cfg),
12 | dict(type='Pad', size_divisor=32),
13 | dict(type='DefaultFormatBundle'),
14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 | dict(type='LoadImageFromFile'),
18 | dict(
19 | type='MultiScaleFlipAug',
20 | img_scale=(1000, 600),
21 | flip=False,
22 | transforms=[
23 | dict(type='Resize', keep_ratio=True),
24 | dict(type='RandomFlip'),
25 | dict(type='Normalize', **img_norm_cfg),
26 | dict(type='Pad', size_divisor=32),
27 | dict(type='ImageToTensor', keys=['img']),
28 | dict(type='Collect', keys=['img']),
29 | ])
30 | ]
31 | data = dict(
32 | samples_per_gpu=2,
33 | workers_per_gpu=2,
34 | train=dict(
35 | type='RepeatDataset',
36 | times=3,
37 | dataset=dict(
38 | type=dataset_type,
39 | ann_file=[
40 | data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 | data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 | ],
43 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 | pipeline=train_pipeline)),
45 | val=dict(
46 | type=dataset_type,
47 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 | img_prefix=data_root + 'VOC2007/',
49 | pipeline=test_pipeline),
50 | test=dict(
51 | type=dataset_type,
52 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 | img_prefix=data_root + 'VOC2007/',
54 | pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='mAP')
56 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CocoDataset'
3 | data_root = 'data/coco/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(
9 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
10 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 | dict(type='RandomFlip', flip_ratio=0.5),
12 | dict(type='Normalize', **img_norm_cfg),
13 | dict(type='Pad', size_divisor=32),
14 | dict(type='SegRescale', scale_factor=1 / 8),
15 | dict(type='DefaultFormatBundle'),
16 | dict(
17 | type='Collect',
18 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
19 | ]
20 | test_pipeline = [
21 | dict(type='LoadImageFromFile'),
22 | dict(
23 | type='MultiScaleFlipAug',
24 | img_scale=(1333, 800),
25 | flip=False,
26 | transforms=[
27 | dict(type='Resize', keep_ratio=True),
28 | dict(type='RandomFlip', flip_ratio=0.5),
29 | dict(type='Normalize', **img_norm_cfg),
30 | dict(type='Pad', size_divisor=32),
31 | dict(type='ImageToTensor', keys=['img']),
32 | dict(type='Collect', keys=['img']),
33 | ])
34 | ]
35 | data = dict(
36 | samples_per_gpu=2,
37 | workers_per_gpu=2,
38 | train=dict(
39 | type=dataset_type,
40 | ann_file=data_root + 'annotations/instances_train2017.json',
41 | img_prefix=data_root + 'train2017/',
42 | seg_prefix=data_root + 'stuffthingmaps/train2017/',
43 | pipeline=train_pipeline),
44 | val=dict(
45 | type=dataset_type,
46 | ann_file=data_root + 'annotations/instances_val2017.json',
47 | img_prefix=data_root + 'val2017/',
48 | pipeline=test_pipeline),
49 | test=dict(
50 | type=dataset_type,
51 | ann_file=data_root + 'annotations/instances_val2017.json',
52 | img_prefix=data_root + 'val2017/',
53 | pipeline=test_pipeline))
54 | evaluation = dict(metric=['bbox', 'segm'])
55 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CityscapesDataset'
3 | data_root = 'data/cityscapes/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True),
9 | dict(
10 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 | dict(type='RandomFlip', flip_ratio=0.5),
12 | dict(type='Normalize', **img_norm_cfg),
13 | dict(type='Pad', size_divisor=32),
14 | dict(type='DefaultFormatBundle'),
15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 | dict(type='LoadImageFromFile'),
19 | dict(
20 | type='MultiScaleFlipAug',
21 | img_scale=(2048, 1024),
22 | flip=False,
23 | transforms=[
24 | dict(type='Resize', keep_ratio=True),
25 | dict(type='RandomFlip'),
26 | dict(type='Normalize', **img_norm_cfg),
27 | dict(type='Pad', size_divisor=32),
28 | dict(type='ImageToTensor', keys=['img']),
29 | dict(type='Collect', keys=['img']),
30 | ])
31 | ]
32 | data = dict(
33 | samples_per_gpu=1,
34 | workers_per_gpu=2,
35 | train=dict(
36 | type='RepeatDataset',
37 | times=8,
38 | dataset=dict(
39 | type=dataset_type,
40 | ann_file=data_root +
41 | 'annotations/instancesonly_filtered_gtFine_train.json',
42 | img_prefix=data_root + 'leftImg8bit/train/',
43 | pipeline=train_pipeline)),
44 | val=dict(
45 | type=dataset_type,
46 | ann_file=data_root +
47 | 'annotations/instancesonly_filtered_gtFine_val.json',
48 | img_prefix=data_root + 'leftImg8bit/val/',
49 | pipeline=test_pipeline),
50 | test=dict(
51 | type=dataset_type,
52 | ann_file=data_root +
53 | 'annotations/instancesonly_filtered_gtFine_test.json',
54 | img_prefix=data_root + 'leftImg8bit/test/',
55 | pipeline=test_pipeline))
56 | evaluation = dict(interval=1, metric='bbox')
57 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CityscapesDataset'
3 | data_root = 'data/cityscapes/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 | dict(
10 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 | dict(type='RandomFlip', flip_ratio=0.5),
12 | dict(type='Normalize', **img_norm_cfg),
13 | dict(type='Pad', size_divisor=32),
14 | dict(type='DefaultFormatBundle'),
15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 | dict(type='LoadImageFromFile'),
19 | dict(
20 | type='MultiScaleFlipAug',
21 | img_scale=(2048, 1024),
22 | flip=False,
23 | transforms=[
24 | dict(type='Resize', keep_ratio=True),
25 | dict(type='RandomFlip'),
26 | dict(type='Normalize', **img_norm_cfg),
27 | dict(type='Pad', size_divisor=32),
28 | dict(type='ImageToTensor', keys=['img']),
29 | dict(type='Collect', keys=['img']),
30 | ])
31 | ]
32 | data = dict(
33 | samples_per_gpu=1,
34 | workers_per_gpu=2,
35 | train=dict(
36 | type='RepeatDataset',
37 | times=8,
38 | dataset=dict(
39 | type=dataset_type,
40 | ann_file=data_root +
41 | 'annotations/instancesonly_filtered_gtFine_train.json',
42 | img_prefix=data_root + 'leftImg8bit/train/',
43 | pipeline=train_pipeline)),
44 | val=dict(
45 | type=dataset_type,
46 | ann_file=data_root +
47 | 'annotations/instancesonly_filtered_gtFine_val.json',
48 | img_prefix=data_root + 'leftImg8bit/val/',
49 | pipeline=test_pipeline),
50 | test=dict(
51 | type=dataset_type,
52 | ann_file=data_root +
53 | 'annotations/instancesonly_filtered_gtFine_test.json',
54 | img_prefix=data_root + 'leftImg8bit/test/',
55 | pipeline=test_pipeline))
56 | evaluation = dict(metric=['bbox', 'segm'])
57 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'WIDERFaceDataset'
3 | data_root = 'data/WIDERFace/'
4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
5 | train_pipeline = [
6 | dict(type='LoadImageFromFile', to_float32=True),
7 | dict(type='LoadAnnotations', with_bbox=True),
8 | dict(
9 | type='PhotoMetricDistortion',
10 | brightness_delta=32,
11 | contrast_range=(0.5, 1.5),
12 | saturation_range=(0.5, 1.5),
13 | hue_delta=18),
14 | dict(
15 | type='Expand',
16 | mean=img_norm_cfg['mean'],
17 | to_rgb=img_norm_cfg['to_rgb'],
18 | ratio_range=(1, 4)),
19 | dict(
20 | type='MinIoURandomCrop',
21 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 | min_crop_size=0.3),
23 | dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 | dict(type='Normalize', **img_norm_cfg),
25 | dict(type='RandomFlip', flip_ratio=0.5),
26 | dict(type='DefaultFormatBundle'),
27 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 | ]
29 | test_pipeline = [
30 | dict(type='LoadImageFromFile'),
31 | dict(
32 | type='MultiScaleFlipAug',
33 | img_scale=(300, 300),
34 | flip=False,
35 | transforms=[
36 | dict(type='Resize', keep_ratio=False),
37 | dict(type='Normalize', **img_norm_cfg),
38 | dict(type='ImageToTensor', keys=['img']),
39 | dict(type='Collect', keys=['img']),
40 | ])
41 | ]
42 | data = dict(
43 | samples_per_gpu=60,
44 | workers_per_gpu=2,
45 | train=dict(
46 | type='RepeatDataset',
47 | times=2,
48 | dataset=dict(
49 | type=dataset_type,
50 | ann_file=data_root + 'train.txt',
51 | img_prefix=data_root + 'WIDER_train/',
52 | min_size=17,
53 | pipeline=train_pipeline)),
54 | val=dict(
55 | type=dataset_type,
56 | ann_file=data_root + 'val.txt',
57 | img_prefix=data_root + 'WIDER_val/',
58 | pipeline=test_pipeline),
59 | test=dict(
60 | type=dataset_type,
61 | ann_file=data_root + 'val.txt',
62 | img_prefix=data_root + 'WIDER_val/',
63 | pipeline=test_pipeline))
64 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/datasets/coco_panoptic.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CocoPanopticDataset'
3 | data_root = 'data/coco/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(
9 | type='LoadPanopticAnnotations',
10 | with_bbox=True,
11 | with_mask=True,
12 | with_seg=True),
13 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
14 | dict(type='RandomFlip', flip_ratio=0.5),
15 | dict(type='Normalize', **img_norm_cfg),
16 | dict(type='Pad', size_divisor=32),
17 | dict(type='SegRescale', scale_factor=1 / 4),
18 | dict(type='DefaultFormatBundle'),
19 | dict(
20 | type='Collect',
21 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
22 | ]
23 | test_pipeline = [
24 | dict(type='LoadImageFromFile'),
25 | dict(
26 | type='MultiScaleFlipAug',
27 | img_scale=(1333, 800),
28 | flip=False,
29 | transforms=[
30 | dict(type='Resize', keep_ratio=True),
31 | dict(type='RandomFlip'),
32 | dict(type='Normalize', **img_norm_cfg),
33 | dict(type='Pad', size_divisor=32),
34 | dict(type='ImageToTensor', keys=['img']),
35 | dict(type='Collect', keys=['img']),
36 | ])
37 | ]
38 | data = dict(
39 | samples_per_gpu=2,
40 | workers_per_gpu=2,
41 | train=dict(
42 | type=dataset_type,
43 | ann_file=data_root + 'annotations/panoptic_train2017.json',
44 | img_prefix=data_root + 'train2017/',
45 | seg_prefix=data_root + 'annotations/panoptic_train2017/',
46 | pipeline=train_pipeline),
47 | val=dict(
48 | type=dataset_type,
49 | ann_file=data_root + 'annotations/panoptic_val2017.json',
50 | img_prefix=data_root + 'val2017/',
51 | seg_prefix=data_root + 'annotations/panoptic_val2017/',
52 | pipeline=test_pipeline),
53 | test=dict(
54 | type=dataset_type,
55 | ann_file=data_root + 'annotations/panoptic_val2017.json',
56 | img_prefix=data_root + 'val2017/',
57 | seg_prefix=data_root + 'annotations/panoptic_val2017/',
58 | pipeline=test_pipeline))
59 | evaluation = dict(interval=1, metric=['PQ'])
60 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='FastRCNN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | roi_head=dict(
20 | type='StandardRoIHead',
21 | bbox_roi_extractor=dict(
22 | type='SingleRoIExtractor',
23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 | out_channels=256,
25 | featmap_strides=[4, 8, 16, 32]),
26 | bbox_head=dict(
27 | type='Shared2FCBBoxHead',
28 | in_channels=256,
29 | fc_out_channels=1024,
30 | roi_feat_size=7,
31 | num_classes=80,
32 | bbox_coder=dict(
33 | type='DeltaXYWHBBoxCoder',
34 | target_means=[0., 0., 0., 0.],
35 | target_stds=[0.1, 0.1, 0.2, 0.2]),
36 | reg_class_agnostic=False,
37 | loss_cls=dict(
38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 | # model training and testing settings
41 | train_cfg=dict(
42 | rcnn=dict(
43 | assigner=dict(
44 | type='MaxIoUAssigner',
45 | pos_iou_thr=0.5,
46 | neg_iou_thr=0.5,
47 | min_pos_iou=0.5,
48 | match_low_quality=False,
49 | ignore_iof_thr=-1),
50 | sampler=dict(
51 | type='RandomSampler',
52 | num=512,
53 | pos_fraction=0.25,
54 | neg_pos_ub=-1,
55 | add_gt_as_proposals=True),
56 | pos_weight=-1,
57 | debug=False)),
58 | test_cfg=dict(
59 | rcnn=dict(
60 | score_thr=0.05,
61 | nms=dict(type='nms', iou_threshold=0.5),
62 | max_per_img=100)))
63 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # other
132 | *.jpg
133 | .vscode
--------------------------------------------------------------------------------
/mmdet_configs/tood/tood_r50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/coco_detection.py',
3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
4 | ]
5 | model = dict(
6 | type='TOOD',
7 | backbone=dict(
8 | type='ResNet',
9 | depth=50,
10 | num_stages=4,
11 | out_indices=(0, 1, 2, 3),
12 | frozen_stages=1,
13 | norm_cfg=dict(type='BN', requires_grad=True),
14 | norm_eval=True,
15 | style='pytorch',
16 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
17 | neck=dict(
18 | type='FPN',
19 | in_channels=[256, 512, 1024, 2048],
20 | out_channels=256,
21 | start_level=1,
22 | add_extra_convs='on_output',
23 | num_outs=5),
24 | bbox_head=dict(
25 | type='TOODHead',
26 | num_classes=80,
27 | in_channels=256,
28 | stacked_convs=6,
29 | feat_channels=256,
30 | anchor_type='anchor_free',
31 | anchor_generator=dict(
32 | type='AnchorGenerator',
33 | ratios=[1.0],
34 | octave_base_scale=8,
35 | scales_per_octave=1,
36 | strides=[8, 16, 32, 64, 128]),
37 | bbox_coder=dict(
38 | type='DeltaXYWHBBoxCoder',
39 | target_means=[.0, .0, .0, .0],
40 | target_stds=[0.1, 0.1, 0.2, 0.2]),
41 | initial_loss_cls=dict(
42 | type='FocalLoss',
43 | use_sigmoid=True,
44 | activated=True, # use probability instead of logit as input
45 | gamma=2.0,
46 | alpha=0.25,
47 | loss_weight=1.0),
48 | loss_cls=dict(
49 | type='QualityFocalLoss',
50 | use_sigmoid=True,
51 | activated=True, # use probability instead of logit as input
52 | beta=2.0,
53 | loss_weight=1.0),
54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
55 | train_cfg=dict(
56 | initial_epoch=4,
57 | initial_assigner=dict(type='ATSSAssigner', topk=9),
58 | assigner=dict(type='TaskAlignedAssigner', topk=13),
59 | alpha=1,
60 | beta=6,
61 | allowed_border=-1,
62 | pos_weight=-1,
63 | debug=False),
64 | test_cfg=dict(
65 | nms_pre=1000,
66 | min_bbox_size=0,
67 | score_thr=0.05,
68 | nms=dict(type='nms', iou_threshold=0.6),
69 | max_per_img=100))
70 | # optimizer
71 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
72 |
73 | # custom hooks
74 | custom_hooks = [dict(type='SetEpochInfoHook')]
75 |
--------------------------------------------------------------------------------
/mmdet_configs/visdrone_tood/tood_full_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
2 |
3 | TAGS = ["tood", "crop=False", "24epochs", "num_cls=60", "repeat=5"]
4 | EXP_NAME = "tood_full_cls_60"
5 | DATA_ROOT = "data/visdrone2019/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 |
12 | # model settings
13 | model = dict(
14 | bbox_head=dict(
15 | num_classes=NUM_CLASSES,
16 | ),
17 | )
18 |
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
21 | train_pipeline = [
22 | dict(type="LoadImageFromFile"),
23 | dict(type="LoadAnnotations", with_bbox=True),
24 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
25 | dict(type="RandomFlip", flip_ratio=0.5),
26 | dict(type="Normalize", **img_norm_cfg),
27 | dict(type="Pad", size_divisor=32),
28 | dict(type="DefaultFormatBundle"),
29 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
30 | ]
31 | test_pipeline = [
32 | dict(type="LoadImageFromFile"),
33 | dict(
34 | type="MultiScaleFlipAug",
35 | img_scale=(1333, 800),
36 | flip=False,
37 | transforms=[
38 | dict(type="Resize", keep_ratio=True),
39 | dict(type="RandomFlip"),
40 | dict(type="Normalize", **img_norm_cfg),
41 | dict(type="Pad", size_divisor=32),
42 | dict(type="ImageToTensor", keys=["img"]),
43 | dict(type="Collect", keys=["img"]),
44 | ],
45 | ),
46 | ]
47 |
48 | data = dict(
49 | samples_per_gpu=2 * BATCH_MULTIPLIER,
50 | workers_per_gpu=4,
51 | train=dict(
52 | type="RepeatDataset",
53 | times=5,
54 | dataset=dict(
55 | type="CocoDataset",
56 | classes=CLASSES,
57 | ann_file=DATA_ROOT + "coco/train.json",
58 | img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
59 | pipeline=train_pipeline,
60 | ),
61 | ),
62 | val=dict(
63 | classes=CLASSES,
64 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
65 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
66 | pipeline=test_pipeline,
67 | ),
68 | test=dict(
69 | classes=CLASSES,
70 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
71 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
72 | pipeline=test_pipeline,
73 | ),
74 | )
75 |
76 | # optimizer
77 | # default 8 gpu
78 | # /8 for 1 gpu
79 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
80 |
81 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
82 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
83 |
84 | # learning policy
85 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
86 | runner = dict(type="EpochBasedRunner", max_epochs=24)
87 |
88 | # logger settings
89 | log_config = dict(
90 | interval=50,
91 | hooks=[
92 | dict(type="TextLoggerHook"),
93 | dict(type="TensorboardLoggerHook", reset_flag=False),
94 | ],
95 | )
96 |
97 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
98 | work_dir = f"runs/visdrone/{EXP_NAME}/"
99 |
--------------------------------------------------------------------------------
/mmdet_configs/visdrone_vfnet/vfnet_full_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
2 |
3 | TAGS = ["vfnet", "crop=False", "24epochs", "num_cls=60", "repeat=5"]
4 | EXP_NAME = "vfnet_full_cls_60"
5 | DATA_ROOT = "data/visdrone2019/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 |
12 | # model settings
13 | model = dict(
14 | bbox_head=dict(
15 | num_classes=NUM_CLASSES,
16 | ),
17 | )
18 |
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
21 | train_pipeline = [
22 | dict(type="LoadImageFromFile"),
23 | dict(type="LoadAnnotations", with_bbox=True),
24 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
25 | dict(type="RandomFlip", flip_ratio=0.5),
26 | dict(type="Normalize", **img_norm_cfg),
27 | dict(type="Pad", size_divisor=32),
28 | dict(type="DefaultFormatBundle"),
29 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
30 | ]
31 | test_pipeline = [
32 | dict(type="LoadImageFromFile"),
33 | dict(
34 | type="MultiScaleFlipAug",
35 | img_scale=(1333, 800),
36 | flip=False,
37 | transforms=[
38 | dict(type="Resize", keep_ratio=True),
39 | dict(type="RandomFlip"),
40 | dict(type="Normalize", **img_norm_cfg),
41 | dict(type="Pad", size_divisor=32),
42 | dict(type="DefaultFormatBundle"),
43 | dict(type="Collect", keys=["img"]),
44 | ],
45 | ),
46 | ]
47 |
48 | data = dict(
49 | samples_per_gpu=2 * BATCH_MULTIPLIER,
50 | workers_per_gpu=4,
51 | train=dict(
52 | type="RepeatDataset",
53 | times=5,
54 | dataset=dict(
55 | type="CocoDataset",
56 | classes=CLASSES,
57 | ann_file=DATA_ROOT + "coco/train.json",
58 | img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
59 | pipeline=train_pipeline,
60 | ),
61 | ),
62 | val=dict(
63 | classes=CLASSES,
64 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
65 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
66 | pipeline=test_pipeline,
67 | ),
68 | test=dict(
69 | classes=CLASSES,
70 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
71 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
72 | pipeline=test_pipeline,
73 | ),
74 | )
75 |
76 | # optimizer
77 | # default 8 gpu
78 | # /8 for 1 gpu
79 | optimizer = dict(
80 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
81 | )
82 |
83 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
84 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
85 |
86 | # learning policy
87 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
88 | runner = dict(type="EpochBasedRunner", max_epochs=24)
89 |
90 | # logger settings
91 | log_config = dict(
92 | interval=50,
93 | hooks=[
94 | dict(type="TextLoggerHook"),
95 | dict(type="TensorboardLoggerHook", reset_flag=False),
96 | ],
97 | )
98 |
99 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
100 | work_dir = f"runs/visdrone/{EXP_NAME}/"
101 |
--------------------------------------------------------------------------------
/mmdet_configs/visdrone_fcos/fcos_full_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
2 |
3 | TAGS = ["fcos", "crop=False", "24epochs", "num_cls=60", "repeat=5"]
4 | EXP_NAME = "fcos_full_cls_60"
5 | DATA_ROOT = "data/visdrone2019/"
6 | BATCH_MULTIPLIER = 16
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 |
12 | # model settings
13 | model = dict(
14 | bbox_head=dict(
15 | num_classes=NUM_CLASSES,
16 | ),
17 | )
18 |
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
21 | train_pipeline = [
22 | dict(type="LoadImageFromFile"),
23 | dict(type="LoadAnnotations", with_bbox=True),
24 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
25 | dict(type="RandomFlip", flip_ratio=0.5),
26 | dict(type="Normalize", **img_norm_cfg),
27 | dict(type="Pad", size_divisor=32),
28 | dict(type="DefaultFormatBundle"),
29 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
30 | ]
31 | test_pipeline = [
32 | dict(type="LoadImageFromFile"),
33 | dict(
34 | type="MultiScaleFlipAug",
35 | img_scale=(1333, 800),
36 | flip=False,
37 | transforms=[
38 | dict(type="Resize", keep_ratio=True),
39 | dict(type="RandomFlip"),
40 | dict(type="Normalize", **img_norm_cfg),
41 | dict(type="Pad", size_divisor=32),
42 | dict(type="ImageToTensor", keys=["img"]),
43 | dict(type="Collect", keys=["img"]),
44 | ],
45 | ),
46 | ]
47 |
48 | data = dict(
49 | samples_per_gpu=2 * BATCH_MULTIPLIER,
50 | workers_per_gpu=4,
51 | train=dict(
52 | type="RepeatDataset",
53 | times=5,
54 | dataset=dict(
55 | type="CocoDataset",
56 | classes=CLASSES,
57 | ann_file=DATA_ROOT + "coco/train.json",
58 | img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
59 | pipeline=train_pipeline,
60 | ),
61 | ),
62 | val=dict(
63 | classes=CLASSES,
64 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
65 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
66 | pipeline=test_pipeline,
67 | ),
68 | test=dict(
69 | classes=CLASSES,
70 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
71 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
72 | pipeline=test_pipeline,
73 | ),
74 | )
75 |
76 | # optimizer
77 | # default 8 gpu
78 | # /8 for 1 gpu
79 | optimizer = dict(
80 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
81 | )
82 |
83 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
84 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
85 |
86 | # learning policy
87 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
88 | runner = dict(type="EpochBasedRunner", max_epochs=24)
89 |
90 | # logger settings
91 | log_config = dict(
92 | interval=50,
93 | hooks=[
94 | dict(type="TextLoggerHook"),
95 | dict(type="TensorboardLoggerHook", reset_flag=False),
96 | ],
97 | )
98 |
99 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
100 | work_dir = f"runs/visdrone/{EXP_NAME}/"
101 |
--------------------------------------------------------------------------------
/mmdet_configs/vfnet/vfnet_r50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/coco_detection.py',
3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
4 | ]
5 | # model settings
6 | model = dict(
7 | type='VFNet',
8 | backbone=dict(
9 | type='ResNet',
10 | depth=50,
11 | num_stages=4,
12 | out_indices=(0, 1, 2, 3),
13 | frozen_stages=1,
14 | norm_cfg=dict(type='BN', requires_grad=True),
15 | norm_eval=True,
16 | style='pytorch',
17 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[256, 512, 1024, 2048],
21 | out_channels=256,
22 | start_level=1,
23 | add_extra_convs='on_output', # use P5
24 | num_outs=5,
25 | relu_before_extra_convs=True),
26 | bbox_head=dict(
27 | type='VFNetHead',
28 | num_classes=80,
29 | in_channels=256,
30 | stacked_convs=3,
31 | feat_channels=256,
32 | strides=[8, 16, 32, 64, 128],
33 | center_sampling=False,
34 | dcn_on_last_conv=False,
35 | use_atss=True,
36 | use_vfl=True,
37 | loss_cls=dict(
38 | type='VarifocalLoss',
39 | use_sigmoid=True,
40 | alpha=0.75,
41 | gamma=2.0,
42 | iou_weighted=True,
43 | loss_weight=1.0),
44 | loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
45 | loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
46 | # training and testing settings
47 | train_cfg=dict(
48 | assigner=dict(type='ATSSAssigner', topk=9),
49 | allowed_border=-1,
50 | pos_weight=-1,
51 | debug=False),
52 | test_cfg=dict(
53 | nms_pre=1000,
54 | min_bbox_size=0,
55 | score_thr=0.05,
56 | nms=dict(type='nms', iou_threshold=0.6),
57 | max_per_img=100))
58 |
59 | # data setting
60 | dataset_type = 'CocoDataset'
61 | data_root = 'data/coco/'
62 | img_norm_cfg = dict(
63 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
64 | train_pipeline = [
65 | dict(type='LoadImageFromFile'),
66 | dict(type='LoadAnnotations', with_bbox=True),
67 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
68 | dict(type='RandomFlip', flip_ratio=0.5),
69 | dict(type='Normalize', **img_norm_cfg),
70 | dict(type='Pad', size_divisor=32),
71 | dict(type='DefaultFormatBundle'),
72 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
73 | ]
74 | test_pipeline = [
75 | dict(type='LoadImageFromFile'),
76 | dict(
77 | type='MultiScaleFlipAug',
78 | img_scale=(1333, 800),
79 | flip=False,
80 | transforms=[
81 | dict(type='Resize', keep_ratio=True),
82 | dict(type='RandomFlip'),
83 | dict(type='Normalize', **img_norm_cfg),
84 | dict(type='Pad', size_divisor=32),
85 | dict(type='DefaultFormatBundle'),
86 | dict(type='Collect', keys=['img']),
87 | ])
88 | ]
89 | data = dict(
90 | samples_per_gpu=2,
91 | workers_per_gpu=2,
92 | train=dict(pipeline=train_pipeline),
93 | val=dict(pipeline=test_pipeline),
94 | test=dict(pipeline=test_pipeline))
95 |
96 | # optimizer
97 | optimizer = dict(
98 | lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
99 | optimizer_config = dict(grad_clip=None)
100 | # learning policy
101 | lr_config = dict(
102 | policy='step',
103 | warmup='linear',
104 | warmup_iters=500,
105 | warmup_ratio=0.1,
106 | step=[8, 11])
107 | runner = dict(type='EpochBasedRunner', max_epochs=12)
108 |
--------------------------------------------------------------------------------
/mmdet_configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/coco_detection.py',
3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
4 | ]
5 | # model settings
6 | model = dict(
7 | type='FCOS',
8 | backbone=dict(
9 | type='ResNet',
10 | depth=50,
11 | num_stages=4,
12 | out_indices=(0, 1, 2, 3),
13 | frozen_stages=1,
14 | norm_cfg=dict(type='BN', requires_grad=False),
15 | norm_eval=True,
16 | style='caffe',
17 | init_cfg=dict(
18 | type='Pretrained',
19 | checkpoint='open-mmlab://detectron/resnet50_caffe')),
20 | neck=dict(
21 | type='FPN',
22 | in_channels=[256, 512, 1024, 2048],
23 | out_channels=256,
24 | start_level=1,
25 | add_extra_convs='on_output', # use P5
26 | num_outs=5,
27 | relu_before_extra_convs=True),
28 | bbox_head=dict(
29 | type='FCOSHead',
30 | num_classes=80,
31 | in_channels=256,
32 | stacked_convs=4,
33 | feat_channels=256,
34 | strides=[8, 16, 32, 64, 128],
35 | loss_cls=dict(
36 | type='FocalLoss',
37 | use_sigmoid=True,
38 | gamma=2.0,
39 | alpha=0.25,
40 | loss_weight=1.0),
41 | loss_bbox=dict(type='IoULoss', loss_weight=1.0),
42 | loss_centerness=dict(
43 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
44 | # training and testing settings
45 | train_cfg=dict(
46 | assigner=dict(
47 | type='MaxIoUAssigner',
48 | pos_iou_thr=0.5,
49 | neg_iou_thr=0.4,
50 | min_pos_iou=0,
51 | ignore_iof_thr=-1),
52 | allowed_border=-1,
53 | pos_weight=-1,
54 | debug=False),
55 | test_cfg=dict(
56 | nms_pre=1000,
57 | min_bbox_size=0,
58 | score_thr=0.05,
59 | nms=dict(type='nms', iou_threshold=0.5),
60 | max_per_img=100))
61 | img_norm_cfg = dict(
62 | mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
63 | train_pipeline = [
64 | dict(type='LoadImageFromFile'),
65 | dict(type='LoadAnnotations', with_bbox=True),
66 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
67 | dict(type='RandomFlip', flip_ratio=0.5),
68 | dict(type='Normalize', **img_norm_cfg),
69 | dict(type='Pad', size_divisor=32),
70 | dict(type='DefaultFormatBundle'),
71 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
72 | ]
73 | test_pipeline = [
74 | dict(type='LoadImageFromFile'),
75 | dict(
76 | type='MultiScaleFlipAug',
77 | img_scale=(1333, 800),
78 | flip=False,
79 | transforms=[
80 | dict(type='Resize', keep_ratio=True),
81 | dict(type='RandomFlip'),
82 | dict(type='Normalize', **img_norm_cfg),
83 | dict(type='Pad', size_divisor=32),
84 | dict(type='ImageToTensor', keys=['img']),
85 | dict(type='Collect', keys=['img']),
86 | ])
87 | ]
88 | data = dict(
89 | samples_per_gpu=2,
90 | workers_per_gpu=2,
91 | train=dict(pipeline=train_pipeline),
92 | val=dict(pipeline=test_pipeline),
93 | test=dict(pipeline=test_pipeline))
94 | # optimizer
95 | optimizer = dict(
96 | lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
97 | optimizer_config = dict(
98 | _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
99 | # learning policy
100 | lr_config = dict(
101 | policy='step',
102 | warmup='constant',
103 | warmup_iters=500,
104 | warmup_ratio=1.0 / 3,
105 | step=[8, 11])
106 | runner = dict(type='EpochBasedRunner', max_epochs=12)
107 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='BN', requires_grad=False)
3 | model = dict(
4 | type='FasterRCNN',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | strides=(1, 2, 2, 1),
10 | dilations=(1, 1, 1, 2),
11 | out_indices=(3, ),
12 | frozen_stages=1,
13 | norm_cfg=norm_cfg,
14 | norm_eval=True,
15 | style='caffe',
16 | init_cfg=dict(
17 | type='Pretrained',
18 | checkpoint='open-mmlab://detectron2/resnet50_caffe')),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=2048,
22 | feat_channels=2048,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[2, 4, 8, 16, 32],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[16]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=2048,
41 | featmap_strides=[16]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=2048,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=0,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_pre=12000,
77 | max_per_img=2000,
78 | nms=dict(type='nms', iou_threshold=0.7),
79 | min_bbox_size=0),
80 | rcnn=dict(
81 | assigner=dict(
82 | type='MaxIoUAssigner',
83 | pos_iou_thr=0.5,
84 | neg_iou_thr=0.5,
85 | min_pos_iou=0.5,
86 | match_low_quality=False,
87 | ignore_iof_thr=-1),
88 | sampler=dict(
89 | type='RandomSampler',
90 | num=512,
91 | pos_fraction=0.25,
92 | neg_pos_ub=-1,
93 | add_gt_as_proposals=True),
94 | pos_weight=-1,
95 | debug=False)),
96 | test_cfg=dict(
97 | rpn=dict(
98 | nms=dict(type='nms', iou_threshold=0.7),
99 | nms_pre=6000,
100 | max_per_img=1000,
101 | min_bbox_size=0),
102 | rcnn=dict(
103 | score_thr=0.05,
104 | nms=dict(type='nms', iou_threshold=0.5),
105 | max_per_img=100)))
106 |
--------------------------------------------------------------------------------
/eval_tools/predict_evaluate_analyse.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from sahi.predict import predict
4 | from sahi.scripts.coco_error_analysis import analyse
5 | from sahi.scripts.coco_evaluation import evaluate
6 |
7 | MODEL_PATH = ""
8 | MODEL_CONFIG_PATH = ""
9 | EVAL_IMAGES_FOLDER_DIR = ""
10 | EVAL_DATASET_JSON_PATH = ""
11 | INFERENCE_SETTING = "XVIEW_SAHI_FI_PO"
12 | EXPORT_VISUAL = False
13 |
14 | ############ dont change below #############
15 |
16 | INFERENCE_SETTING_TO_PARAMS = {
17 | "XVIEW_SAHI": {
18 | "no_standard_prediction": True,
19 | "no_sliced_prediction": False,
20 | "slice_size": 400,
21 | "overlap_ratio": 0,
22 | },
23 | "XVIEW_SAHI_PO": {
24 | "no_standard_prediction": True,
25 | "no_sliced_prediction": False,
26 | "slice_size": 400,
27 | "overlap_ratio": 0.25,
28 | },
29 | "XVIEW_SAHI_FI": {
30 | "no_standard_prediction": False,
31 | "no_sliced_prediction": False,
32 | "slice_size": 400,
33 | "overlap_ratio": 0,
34 | },
35 | "XVIEW_SAHI_FI_PO": {
36 | "no_standard_prediction": False,
37 | "no_sliced_prediction": False,
38 | "slice_size": 400,
39 | "overlap_ratio": 0.25,
40 | },
41 | "VISDRONE_FI": {
42 | "no_standard_prediction": False,
43 | "no_sliced_prediction": True,
44 | "slice_size": 640,
45 | "overlap_ratio": 0,
46 | },
47 | "VISDRONE_SAHI": {
48 | "no_standard_prediction": True,
49 | "no_sliced_prediction": False,
50 | "slice_size": 640,
51 | "overlap_ratio": 0,
52 | },
53 | "VISDRONE_SAHI_PO": {
54 | "no_standard_prediction": True,
55 | "no_sliced_prediction": False,
56 | "slice_size": 640,
57 | "overlap_ratio": 0.25,
58 | },
59 | "VISDRONE_SAHI_FI": {
60 | "no_standard_prediction": False,
61 | "no_sliced_prediction": False,
62 | "slice_size": 640,
63 | "overlap_ratio": 0,
64 | },
65 | "VISDRONE_SAHI_FI_PO": {
66 | "no_standard_prediction": False,
67 | "no_sliced_prediction": False,
68 | "slice_size": 640,
69 | "overlap_ratio": 0.25,
70 | },
71 | }
72 |
73 | setting_params = INFERENCE_SETTING_TO_PARAMS[INFERENCE_SETTING]
74 |
75 | result = predict(
76 | model_type="mmdet",
77 | model_path=MODEL_PATH,
78 | model_config_path=MODEL_CONFIG_PATH,
79 | model_confidence_threshold=0.01,
80 | model_device="cuda:0",
81 | model_category_mapping=None,
82 | model_category_remapping=None,
83 | source=EVAL_IMAGES_FOLDER_DIR,
84 | no_standard_prediction=setting_params["no_standard_prediction"],
85 | no_sliced_prediction=setting_params["no_sliced_prediction"],
86 | image_size=None,
87 | slice_height=setting_params["slice_size"],
88 | slice_width=setting_params["slice_size"],
89 | overlap_height_ratio=setting_params["overlap_ratio"],
90 | overlap_width_ratio=setting_params["overlap_ratio"],
91 | postprocess_type="NMS",
92 | postprocess_match_metric="IOU",
93 | postprocess_match_threshold=0.5,
94 | postprocess_class_agnostic=False,
95 | novisual=not EXPORT_VISUAL,
96 | dataset_json_path=EVAL_DATASET_JSON_PATH,
97 | project="runs/predict_eval_analyse",
98 | name=INFERENCE_SETTING,
99 | visual_bbox_thickness=None,
100 | visual_text_size=None,
101 | visual_text_thickness=None,
102 | visual_export_format="png",
103 | verbose=1,
104 | return_dict=True,
105 | force_postprocess_type=True,
106 | )
107 |
108 | result_json_path = str(Path(result["export_dir"]) / "result.json")
109 |
110 | evaluate(
111 | dataset_json_path=EVAL_DATASET_JSON_PATH,
112 | result_json_path=result_json_path,
113 | classwise=True,
114 | max_detections=500,
115 | return_dict=False,
116 | )
117 |
118 | analyse(
119 | dataset_json_path=EVAL_DATASET_JSON_PATH,
120 | result_json_path=result_json_path,
121 | max_detections=500,
122 | return_dict=False,
123 | )
124 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='FasterRCNN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=-1,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_pre=2000,
77 | max_per_img=1000,
78 | nms=dict(type='nms', iou_threshold=0.7),
79 | min_bbox_size=0),
80 | rcnn=dict(
81 | assigner=dict(
82 | type='MaxIoUAssigner',
83 | pos_iou_thr=0.5,
84 | neg_iou_thr=0.5,
85 | min_pos_iou=0.5,
86 | match_low_quality=False,
87 | ignore_iof_thr=-1),
88 | sampler=dict(
89 | type='RandomSampler',
90 | num=512,
91 | pos_fraction=0.25,
92 | neg_pos_ub=-1,
93 | add_gt_as_proposals=True),
94 | pos_weight=-1,
95 | debug=False)),
96 | test_cfg=dict(
97 | rpn=dict(
98 | nms_pre=1000,
99 | max_per_img=1000,
100 | nms=dict(type='nms', iou_threshold=0.7),
101 | min_bbox_size=0),
102 | rcnn=dict(
103 | score_thr=0.05,
104 | nms=dict(type='nms', iou_threshold=0.5),
105 | max_per_img=100)
106 | # soft-nms is also supported for rcnn testing
107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 | ))
109 |
--------------------------------------------------------------------------------
/mmdet_configs/visdrone_tood/tood_crop_480_960_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
2 |
3 | TAGS = ["tood", "crop=480_960", "24epochs", "num_cls=60", "repeat=5"]
4 | EXP_NAME = "tood_crop_480_960_cls_60"
5 | DATA_ROOT = "data/visdrone2019/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 |
12 | # model settings
13 | model = dict(
14 | bbox_head=dict(
15 | num_classes=NUM_CLASSES,
16 | ),
17 | )
18 |
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
21 | train_pipeline = [
22 | dict(type="LoadImageFromFile"),
23 | dict(type="LoadAnnotations", with_bbox=True),
24 | dict(
25 | type="AutoAugment",
26 | policies=[
27 | [
28 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
29 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
30 | ],
31 | [
32 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
33 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
34 | ],
35 | [
36 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
37 | ],
38 | ],
39 | ),
40 | dict(type="RandomFlip", flip_ratio=0.5),
41 | dict(type="Normalize", **img_norm_cfg),
42 | dict(type="Pad", size_divisor=32),
43 | dict(type="DefaultFormatBundle"),
44 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
45 | ]
46 | test_pipeline = [
47 | dict(type="LoadImageFromFile"),
48 | dict(
49 | type="MultiScaleFlipAug",
50 | img_scale=(1333, 800),
51 | flip=False,
52 | transforms=[
53 | dict(type="Resize", keep_ratio=True),
54 | dict(type="RandomFlip"),
55 | dict(type="Normalize", **img_norm_cfg),
56 | dict(type="Pad", size_divisor=32),
57 | dict(type="ImageToTensor", keys=["img"]),
58 | dict(type="Collect", keys=["img"]),
59 | ],
60 | ),
61 | ]
62 |
63 | data = dict(
64 | samples_per_gpu=2 * BATCH_MULTIPLIER,
65 | workers_per_gpu=4,
66 | train=dict(
67 | type="RepeatDataset",
68 | times=5,
69 | dataset=dict(
70 | type="CocoDataset",
71 | classes=CLASSES,
72 | ann_file=DATA_ROOT + "coco/train.json",
73 | img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
74 | pipeline=train_pipeline,
75 | ),
76 | ),
77 | val=dict(
78 | classes=CLASSES,
79 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
80 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
81 | pipeline=test_pipeline,
82 | ),
83 | test=dict(
84 | classes=CLASSES,
85 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
86 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
87 | pipeline=test_pipeline,
88 | ),
89 | )
90 |
91 | # optimizer
92 | # default 8 gpu
93 | # /8 for 1 gpu
94 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
95 |
96 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
97 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
98 |
99 | # learning policy
100 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
101 | runner = dict(type="EpochBasedRunner", max_epochs=24)
102 |
103 | # logger settings
104 | log_config = dict(
105 | interval=50,
106 | hooks=[
107 | dict(type="TextLoggerHook"),
108 | dict(type="TensorboardLoggerHook", reset_flag=False),
109 | ],
110 | )
111 |
112 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
113 | work_dir = f"runs/visdrone/{EXP_NAME}/"
114 |
--------------------------------------------------------------------------------
/mmdet_configs/visdrone_vfnet/vfnet_crop_480_960_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
2 |
3 | TAGS = ["vfnet", "crop=480_960", "24epochs", "num_cls=60", "repeat=5"]
4 | EXP_NAME = "vfnet_crop_480_960_cls_60"
5 | DATA_ROOT = "data/visdrone2019/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 |
12 | # model settings
13 | model = dict(
14 | bbox_head=dict(
15 | num_classes=NUM_CLASSES,
16 | ),
17 | )
18 |
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
21 | train_pipeline = [
22 | dict(type="LoadImageFromFile"),
23 | dict(type="LoadAnnotations", with_bbox=True),
24 | dict(
25 | type="AutoAugment",
26 | policies=[
27 | [
28 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
29 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
30 | ],
31 | [
32 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
33 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
34 | ],
35 | [
36 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
37 | ],
38 | ],
39 | ),
40 | dict(type="RandomFlip", flip_ratio=0.5),
41 | dict(type="Normalize", **img_norm_cfg),
42 | dict(type="Pad", size_divisor=32),
43 | dict(type="DefaultFormatBundle"),
44 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
45 | ]
46 | test_pipeline = [
47 | dict(type="LoadImageFromFile"),
48 | dict(
49 | type="MultiScaleFlipAug",
50 | img_scale=(1333, 800),
51 | flip=False,
52 | transforms=[
53 | dict(type="Resize", keep_ratio=True),
54 | dict(type="RandomFlip"),
55 | dict(type="Normalize", **img_norm_cfg),
56 | dict(type="Pad", size_divisor=32),
57 | dict(type="DefaultFormatBundle"),
58 | dict(type="Collect", keys=["img"]),
59 | ],
60 | ),
61 | ]
62 |
63 | data = dict(
64 | samples_per_gpu=2 * BATCH_MULTIPLIER,
65 | workers_per_gpu=4,
66 | train=dict(
67 | type="RepeatDataset",
68 | times=5,
69 | dataset=dict(
70 | type="CocoDataset",
71 | classes=CLASSES,
72 | ann_file=DATA_ROOT + "coco/train.json",
73 | img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
74 | pipeline=train_pipeline,
75 | ),
76 | ),
77 | val=dict(
78 | classes=CLASSES,
79 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
80 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
81 | pipeline=test_pipeline,
82 | ),
83 | test=dict(
84 | classes=CLASSES,
85 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
86 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
87 | pipeline=test_pipeline,
88 | ),
89 | )
90 |
91 | # optimizer
92 | # default 8 gpu
93 | # /8 for 1 gpu
94 | optimizer = dict(
95 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
96 | )
97 |
98 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
99 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
100 |
101 | # learning policy
102 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
103 | runner = dict(type="EpochBasedRunner", max_epochs=24)
104 |
105 | # logger settings
106 | log_config = dict(
107 | interval=50,
108 | hooks=[
109 | dict(type="TextLoggerHook"),
110 | dict(type="TensorboardLoggerHook", reset_flag=False),
111 | ],
112 | )
113 |
114 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
115 | work_dir = f"runs/visdrone/{EXP_NAME}/"
116 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='BN', requires_grad=False)
3 | model = dict(
4 | type='FasterRCNN',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=3,
9 | strides=(1, 2, 2),
10 | dilations=(1, 1, 1),
11 | out_indices=(2, ),
12 | frozen_stages=1,
13 | norm_cfg=norm_cfg,
14 | norm_eval=True,
15 | style='caffe',
16 | init_cfg=dict(
17 | type='Pretrained',
18 | checkpoint='open-mmlab://detectron2/resnet50_caffe')),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=1024,
22 | feat_channels=1024,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[2, 4, 8, 16, 32],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[16]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | shared_head=dict(
38 | type='ResLayer',
39 | depth=50,
40 | stage=3,
41 | stride=2,
42 | dilation=1,
43 | style='caffe',
44 | norm_cfg=norm_cfg,
45 | norm_eval=True),
46 | bbox_roi_extractor=dict(
47 | type='SingleRoIExtractor',
48 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
49 | out_channels=1024,
50 | featmap_strides=[16]),
51 | bbox_head=dict(
52 | type='BBoxHead',
53 | with_avg_pool=True,
54 | roi_feat_size=7,
55 | in_channels=2048,
56 | num_classes=80,
57 | bbox_coder=dict(
58 | type='DeltaXYWHBBoxCoder',
59 | target_means=[0., 0., 0., 0.],
60 | target_stds=[0.1, 0.1, 0.2, 0.2]),
61 | reg_class_agnostic=False,
62 | loss_cls=dict(
63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
64 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
65 | # model training and testing settings
66 | train_cfg=dict(
67 | rpn=dict(
68 | assigner=dict(
69 | type='MaxIoUAssigner',
70 | pos_iou_thr=0.7,
71 | neg_iou_thr=0.3,
72 | min_pos_iou=0.3,
73 | match_low_quality=True,
74 | ignore_iof_thr=-1),
75 | sampler=dict(
76 | type='RandomSampler',
77 | num=256,
78 | pos_fraction=0.5,
79 | neg_pos_ub=-1,
80 | add_gt_as_proposals=False),
81 | allowed_border=0,
82 | pos_weight=-1,
83 | debug=False),
84 | rpn_proposal=dict(
85 | nms_pre=12000,
86 | max_per_img=2000,
87 | nms=dict(type='nms', iou_threshold=0.7),
88 | min_bbox_size=0),
89 | rcnn=dict(
90 | assigner=dict(
91 | type='MaxIoUAssigner',
92 | pos_iou_thr=0.5,
93 | neg_iou_thr=0.5,
94 | min_pos_iou=0.5,
95 | match_low_quality=False,
96 | ignore_iof_thr=-1),
97 | sampler=dict(
98 | type='RandomSampler',
99 | num=512,
100 | pos_fraction=0.25,
101 | neg_pos_ub=-1,
102 | add_gt_as_proposals=True),
103 | pos_weight=-1,
104 | debug=False)),
105 | test_cfg=dict(
106 | rpn=dict(
107 | nms_pre=6000,
108 | max_per_img=1000,
109 | nms=dict(type='nms', iou_threshold=0.7),
110 | min_bbox_size=0),
111 | rcnn=dict(
112 | score_thr=0.05,
113 | nms=dict(type='nms', iou_threshold=0.5),
114 | max_per_img=100)))
115 |
--------------------------------------------------------------------------------
/mmdet_configs/visdrone_fcos/fcos_crop_480_960_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
2 |
3 | TAGS = ["fcos", "crop=480_960", "24epochs", "num_cls=60", "repeat=5"]
4 | EXP_NAME = "fcos_crop_480_960_cls_60"
5 | DATA_ROOT = "data/visdrone2019/"
6 | BATCH_MULTIPLIER = 16
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 10
10 | CLASSES = ("pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor")
11 |
12 | # model settings
13 | model = dict(
14 | bbox_head=dict(
15 | num_classes=NUM_CLASSES,
16 | ),
17 | )
18 |
19 | # dataset settings
20 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
21 | train_pipeline = [
22 | dict(type="LoadImageFromFile"),
23 | dict(type="LoadAnnotations", with_bbox=True),
24 | dict(
25 | type="AutoAugment",
26 | policies=[
27 | [
28 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
29 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
30 | ],
31 | [
32 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(480, 960), allow_negative_crop=True),
33 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
34 | ],
35 | [
36 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
37 | ],
38 | ],
39 | ),
40 | dict(type="RandomFlip", flip_ratio=0.5),
41 | dict(type="Normalize", **img_norm_cfg),
42 | dict(type="Pad", size_divisor=32),
43 | dict(type="DefaultFormatBundle"),
44 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
45 | ]
46 | test_pipeline = [
47 | dict(type="LoadImageFromFile"),
48 | dict(
49 | type="MultiScaleFlipAug",
50 | img_scale=(1333, 800),
51 | flip=False,
52 | transforms=[
53 | dict(type="Resize", keep_ratio=True),
54 | dict(type="RandomFlip"),
55 | dict(type="Normalize", **img_norm_cfg),
56 | dict(type="Pad", size_divisor=32),
57 | dict(type="ImageToTensor", keys=["img"]),
58 | dict(type="Collect", keys=["img"]),
59 | ],
60 | ),
61 | ]
62 |
63 | data = dict(
64 | samples_per_gpu=2 * BATCH_MULTIPLIER,
65 | workers_per_gpu=4,
66 | train=dict(
67 | type="RepeatDataset",
68 | times=5,
69 | dataset=dict(
70 | type="CocoDataset",
71 | classes=CLASSES,
72 | ann_file=DATA_ROOT + "coco/train.json",
73 | img_prefix=DATA_ROOT + "VisDrone2019-DET-train/",
74 | pipeline=train_pipeline,
75 | ),
76 | ),
77 | val=dict(
78 | classes=CLASSES,
79 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
80 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
81 | pipeline=test_pipeline,
82 | ),
83 | test=dict(
84 | classes=CLASSES,
85 | ann_file=DATA_ROOT + "sliced/val_640_0.json",
86 | img_prefix=DATA_ROOT + "sliced/val_images_640_0/",
87 | pipeline=test_pipeline,
88 | ),
89 | )
90 |
91 | # optimizer
92 | # default 8 gpu
93 | # /8 for 1 gpu
94 | optimizer = dict(
95 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
96 | )
97 |
98 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
99 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
100 |
101 | # learning policy
102 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
103 | runner = dict(type="EpochBasedRunner", max_epochs=24)
104 |
105 | # logger settings
106 | log_config = dict(
107 | interval=50,
108 | hooks=[
109 | dict(type="TextLoggerHook"),
110 | dict(type="TensorboardLoggerHook", reset_flag=False),
111 | ],
112 | )
113 |
114 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
115 | work_dir = f"runs/visdrone/{EXP_NAME}/"
116 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
56 | mask_roi_extractor=dict(
57 | type='SingleRoIExtractor',
58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
59 | out_channels=256,
60 | featmap_strides=[4, 8, 16, 32]),
61 | mask_head=dict(
62 | type='FCNMaskHead',
63 | num_convs=4,
64 | in_channels=256,
65 | conv_out_channels=256,
66 | num_classes=80,
67 | loss_mask=dict(
68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
69 | # model training and testing settings
70 | train_cfg=dict(
71 | rpn=dict(
72 | assigner=dict(
73 | type='MaxIoUAssigner',
74 | pos_iou_thr=0.7,
75 | neg_iou_thr=0.3,
76 | min_pos_iou=0.3,
77 | match_low_quality=True,
78 | ignore_iof_thr=-1),
79 | sampler=dict(
80 | type='RandomSampler',
81 | num=256,
82 | pos_fraction=0.5,
83 | neg_pos_ub=-1,
84 | add_gt_as_proposals=False),
85 | allowed_border=-1,
86 | pos_weight=-1,
87 | debug=False),
88 | rpn_proposal=dict(
89 | nms_pre=2000,
90 | max_per_img=1000,
91 | nms=dict(type='nms', iou_threshold=0.7),
92 | min_bbox_size=0),
93 | rcnn=dict(
94 | assigner=dict(
95 | type='MaxIoUAssigner',
96 | pos_iou_thr=0.5,
97 | neg_iou_thr=0.5,
98 | min_pos_iou=0.5,
99 | match_low_quality=True,
100 | ignore_iof_thr=-1),
101 | sampler=dict(
102 | type='RandomSampler',
103 | num=512,
104 | pos_fraction=0.25,
105 | neg_pos_ub=-1,
106 | add_gt_as_proposals=True),
107 | mask_size=28,
108 | pos_weight=-1,
109 | debug=False)),
110 | test_cfg=dict(
111 | rpn=dict(
112 | nms_pre=1000,
113 | max_per_img=1000,
114 | nms=dict(type='nms', iou_threshold=0.7),
115 | min_bbox_size=0),
116 | rcnn=dict(
117 | score_thr=0.05,
118 | nms=dict(type='nms', iou_threshold=0.5),
119 | max_per_img=100,
120 | mask_thr_binary=0.5)))
121 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='BN', requires_grad=False)
3 | model = dict(
4 | type='MaskRCNN',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=3,
9 | strides=(1, 2, 2),
10 | dilations=(1, 1, 1),
11 | out_indices=(2, ),
12 | frozen_stages=1,
13 | norm_cfg=norm_cfg,
14 | norm_eval=True,
15 | style='caffe',
16 | init_cfg=dict(
17 | type='Pretrained',
18 | checkpoint='open-mmlab://detectron2/resnet50_caffe')),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=1024,
22 | feat_channels=1024,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[2, 4, 8, 16, 32],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[16]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | shared_head=dict(
38 | type='ResLayer',
39 | depth=50,
40 | stage=3,
41 | stride=2,
42 | dilation=1,
43 | style='caffe',
44 | norm_cfg=norm_cfg,
45 | norm_eval=True),
46 | bbox_roi_extractor=dict(
47 | type='SingleRoIExtractor',
48 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
49 | out_channels=1024,
50 | featmap_strides=[16]),
51 | bbox_head=dict(
52 | type='BBoxHead',
53 | with_avg_pool=True,
54 | roi_feat_size=7,
55 | in_channels=2048,
56 | num_classes=80,
57 | bbox_coder=dict(
58 | type='DeltaXYWHBBoxCoder',
59 | target_means=[0., 0., 0., 0.],
60 | target_stds=[0.1, 0.1, 0.2, 0.2]),
61 | reg_class_agnostic=False,
62 | loss_cls=dict(
63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
64 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
65 | mask_roi_extractor=None,
66 | mask_head=dict(
67 | type='FCNMaskHead',
68 | num_convs=0,
69 | in_channels=2048,
70 | conv_out_channels=256,
71 | num_classes=80,
72 | loss_mask=dict(
73 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
74 | # model training and testing settings
75 | train_cfg=dict(
76 | rpn=dict(
77 | assigner=dict(
78 | type='MaxIoUAssigner',
79 | pos_iou_thr=0.7,
80 | neg_iou_thr=0.3,
81 | min_pos_iou=0.3,
82 | match_low_quality=True,
83 | ignore_iof_thr=-1),
84 | sampler=dict(
85 | type='RandomSampler',
86 | num=256,
87 | pos_fraction=0.5,
88 | neg_pos_ub=-1,
89 | add_gt_as_proposals=False),
90 | allowed_border=0,
91 | pos_weight=-1,
92 | debug=False),
93 | rpn_proposal=dict(
94 | nms_pre=12000,
95 | max_per_img=2000,
96 | nms=dict(type='nms', iou_threshold=0.7),
97 | min_bbox_size=0),
98 | rcnn=dict(
99 | assigner=dict(
100 | type='MaxIoUAssigner',
101 | pos_iou_thr=0.5,
102 | neg_iou_thr=0.5,
103 | min_pos_iou=0.5,
104 | match_low_quality=False,
105 | ignore_iof_thr=-1),
106 | sampler=dict(
107 | type='RandomSampler',
108 | num=512,
109 | pos_fraction=0.25,
110 | neg_pos_ub=-1,
111 | add_gt_as_proposals=True),
112 | mask_size=14,
113 | pos_weight=-1,
114 | debug=False)),
115 | test_cfg=dict(
116 | rpn=dict(
117 | nms_pre=6000,
118 | nms=dict(type='nms', iou_threshold=0.7),
119 | max_per_img=1000,
120 | min_bbox_size=0),
121 | rcnn=dict(
122 | score_thr=0.05,
123 | nms=dict(type='nms', iou_threshold=0.5),
124 | max_per_img=100,
125 | mask_thr_binary=0.5)))
126 |
--------------------------------------------------------------------------------
/mmdet_configs/xview_tood/tood_full_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
2 |
3 |
4 | EXP_NAME = "tood_full_cls_60"
5 | DATA_ROOT = "data/xview/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 60
10 | DATASET_REPEAT = 30
11 | TAGS = ["tood", "crop=False", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
12 | CLASSES = (
13 | "Fixed-wing Aircraft",
14 | "Small Aircraft",
15 | "Cargo Plane",
16 | "Helicopter",
17 | "Passenger Vehicle",
18 | "Small Car",
19 | "Bus",
20 | "Pickup Truck",
21 | "Utility Truck",
22 | "Truck",
23 | "Cargo Truck",
24 | "Truck w/Box",
25 | "Truck Tractor",
26 | "Trailer",
27 | "Truck w/Flatbed",
28 | "Truck w/Liquid",
29 | "Crane Truck",
30 | "Railway Vehicle",
31 | "Passenger Car",
32 | "Cargo Car",
33 | "Flat Car",
34 | "Tank car",
35 | "Locomotive",
36 | "Maritime Vessel",
37 | "Motorboat",
38 | "Sailboat",
39 | "Tugboat",
40 | "Barge",
41 | "Fishing Vessel",
42 | "Ferry",
43 | "Yacht",
44 | "Container Ship",
45 | "Oil Tanker",
46 | "Engineering Vehicle",
47 | "Tower crane",
48 | "Container Crane",
49 | "Reach Stacker",
50 | "Straddle Carrier",
51 | "Mobile Crane",
52 | "Dump Truck",
53 | "Haul Truck",
54 | "Scraper/Tractor",
55 | "Front loader/Bulldozer",
56 | "Excavator",
57 | "Cement Mixer",
58 | "Ground Grader",
59 | "Hut/Tent",
60 | "Shed",
61 | "Building",
62 | "Aircraft Hangar",
63 | "Damaged Building",
64 | "Facility",
65 | "Construction Site",
66 | "Vehicle Lot",
67 | "Helipad",
68 | "Storage Tank",
69 | "Shipping container lot",
70 | "Shipping Container",
71 | "Pylon",
72 | "Tower",
73 | )
74 |
75 | # model settings
76 | model = dict(
77 | bbox_head=dict(
78 | num_classes=NUM_CLASSES,
79 | ),
80 | )
81 |
82 | # dataset settings
83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
84 | train_pipeline = [
85 | dict(type="LoadImageFromFile"),
86 | dict(type="LoadAnnotations", with_bbox=True),
87 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
88 | dict(type="RandomFlip", flip_ratio=0.5),
89 | dict(type="Normalize", **img_norm_cfg),
90 | dict(type="Pad", size_divisor=32),
91 | dict(type="DefaultFormatBundle"),
92 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
93 | ]
94 | test_pipeline = [
95 | dict(type="LoadImageFromFile"),
96 | dict(
97 | type="MultiScaleFlipAug",
98 | img_scale=(1333, 800),
99 | flip=False,
100 | transforms=[
101 | dict(type="Resize", keep_ratio=True),
102 | dict(type="RandomFlip"),
103 | dict(type="Normalize", **img_norm_cfg),
104 | dict(type="Pad", size_divisor=32),
105 | dict(type="ImageToTensor", keys=["img"]),
106 | dict(type="Collect", keys=["img"]),
107 | ],
108 | ),
109 | ]
110 |
111 | data = dict(
112 | samples_per_gpu=2 * BATCH_MULTIPLIER,
113 | workers_per_gpu=4,
114 | train=dict(
115 | type="RepeatDataset",
116 | times=DATASET_REPEAT,
117 | dataset=dict(
118 | type="CocoDataset",
119 | classes=CLASSES,
120 | ann_file=DATA_ROOT + "coco/train.json",
121 | img_prefix=DATA_ROOT + "train_images/",
122 | pipeline=train_pipeline,
123 | ),
124 | ),
125 | val=dict(
126 | classes=CLASSES,
127 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
128 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
129 | pipeline=test_pipeline,
130 | ),
131 | test=dict(
132 | classes=CLASSES,
133 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
134 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
135 | pipeline=test_pipeline,
136 | ),
137 | )
138 |
139 | # optimizer
140 | # default 8 gpu
141 | # /8 for 1 gpu
142 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
143 |
144 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
145 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
146 |
147 | # learning policy
148 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
149 | runner = dict(type="EpochBasedRunner", max_epochs=24)
150 |
151 | # logger settings
152 | log_config = dict(
153 | interval=50,
154 | hooks=[
155 | dict(type="TextLoggerHook"),
156 | dict(type="TensorboardLoggerHook", reset_flag=False),
157 | ],
158 | )
159 |
160 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
161 | work_dir = f"runs/xview/{EXP_NAME}/"
162 |
--------------------------------------------------------------------------------
/mmdet_configs/xview_vfnet/vfnet_full_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
2 |
3 |
4 | EXP_NAME = "vfnet_full_cls_60"
5 | DATA_ROOT = "data/xview/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 60
10 | DATASET_REPEAT = 30
11 | TAGS = ["vfnet", "crop=False", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
12 | CLASSES = (
13 | "Fixed-wing Aircraft",
14 | "Small Aircraft",
15 | "Cargo Plane",
16 | "Helicopter",
17 | "Passenger Vehicle",
18 | "Small Car",
19 | "Bus",
20 | "Pickup Truck",
21 | "Utility Truck",
22 | "Truck",
23 | "Cargo Truck",
24 | "Truck w/Box",
25 | "Truck Tractor",
26 | "Trailer",
27 | "Truck w/Flatbed",
28 | "Truck w/Liquid",
29 | "Crane Truck",
30 | "Railway Vehicle",
31 | "Passenger Car",
32 | "Cargo Car",
33 | "Flat Car",
34 | "Tank car",
35 | "Locomotive",
36 | "Maritime Vessel",
37 | "Motorboat",
38 | "Sailboat",
39 | "Tugboat",
40 | "Barge",
41 | "Fishing Vessel",
42 | "Ferry",
43 | "Yacht",
44 | "Container Ship",
45 | "Oil Tanker",
46 | "Engineering Vehicle",
47 | "Tower crane",
48 | "Container Crane",
49 | "Reach Stacker",
50 | "Straddle Carrier",
51 | "Mobile Crane",
52 | "Dump Truck",
53 | "Haul Truck",
54 | "Scraper/Tractor",
55 | "Front loader/Bulldozer",
56 | "Excavator",
57 | "Cement Mixer",
58 | "Ground Grader",
59 | "Hut/Tent",
60 | "Shed",
61 | "Building",
62 | "Aircraft Hangar",
63 | "Damaged Building",
64 | "Facility",
65 | "Construction Site",
66 | "Vehicle Lot",
67 | "Helipad",
68 | "Storage Tank",
69 | "Shipping container lot",
70 | "Shipping Container",
71 | "Pylon",
72 | "Tower",
73 | )
74 |
75 | # model settings
76 | model = dict(
77 | bbox_head=dict(
78 | num_classes=NUM_CLASSES,
79 | ),
80 | )
81 |
82 | # dataset settings
83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
84 | train_pipeline = [
85 | dict(type="LoadImageFromFile"),
86 | dict(type="LoadAnnotations", with_bbox=True),
87 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
88 | dict(type="RandomFlip", flip_ratio=0.5),
89 | dict(type="Normalize", **img_norm_cfg),
90 | dict(type="Pad", size_divisor=32),
91 | dict(type="DefaultFormatBundle"),
92 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
93 | ]
94 | test_pipeline = [
95 | dict(type="LoadImageFromFile"),
96 | dict(
97 | type="MultiScaleFlipAug",
98 | img_scale=(1333, 800),
99 | flip=False,
100 | transforms=[
101 | dict(type="Resize", keep_ratio=True),
102 | dict(type="RandomFlip"),
103 | dict(type="Normalize", **img_norm_cfg),
104 | dict(type="Pad", size_divisor=32),
105 | dict(type="DefaultFormatBundle"),
106 | dict(type="Collect", keys=["img"]),
107 | ],
108 | ),
109 | ]
110 |
111 | data = dict(
112 | samples_per_gpu=2 * BATCH_MULTIPLIER,
113 | workers_per_gpu=4,
114 | train=dict(
115 | type="RepeatDataset",
116 | times=DATASET_REPEAT,
117 | dataset=dict(
118 | type="CocoDataset",
119 | classes=CLASSES,
120 | ann_file=DATA_ROOT + "coco/train.json",
121 | img_prefix=DATA_ROOT + "train_images/",
122 | pipeline=train_pipeline,
123 | ),
124 | ),
125 | val=dict(
126 | classes=CLASSES,
127 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
128 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
129 | pipeline=test_pipeline,
130 | ),
131 | test=dict(
132 | classes=CLASSES,
133 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
134 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
135 | pipeline=test_pipeline,
136 | ),
137 | )
138 |
139 | # optimizer
140 | # default 8 gpu
141 | # /8 for 1 gpu
142 | optimizer = dict(
143 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
144 | )
145 |
146 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
147 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
148 |
149 | # learning policy
150 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
151 | runner = dict(type="EpochBasedRunner", max_epochs=24)
152 |
153 | # logger settings
154 | log_config = dict(
155 | interval=50,
156 | hooks=[
157 | dict(type="TextLoggerHook"),
158 | dict(type="TensorboardLoggerHook", reset_flag=False),
159 | ],
160 | )
161 |
162 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
163 | work_dir = f"runs/xview/{EXP_NAME}/"
164 |
--------------------------------------------------------------------------------
/mmdet_configs/xview_fcos/fcos_full_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
2 |
3 |
4 | EXP_NAME = "fcos_full_cls_60"
5 | DATA_ROOT = "data/xview/"
6 | BATCH_MULTIPLIER = 16
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 60
10 | DATASET_REPEAT = 30
11 | TAGS = ["fcos", "crop=False", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
12 | CLASSES = (
13 | "Fixed-wing Aircraft",
14 | "Small Aircraft",
15 | "Cargo Plane",
16 | "Helicopter",
17 | "Passenger Vehicle",
18 | "Small Car",
19 | "Bus",
20 | "Pickup Truck",
21 | "Utility Truck",
22 | "Truck",
23 | "Cargo Truck",
24 | "Truck w/Box",
25 | "Truck Tractor",
26 | "Trailer",
27 | "Truck w/Flatbed",
28 | "Truck w/Liquid",
29 | "Crane Truck",
30 | "Railway Vehicle",
31 | "Passenger Car",
32 | "Cargo Car",
33 | "Flat Car",
34 | "Tank car",
35 | "Locomotive",
36 | "Maritime Vessel",
37 | "Motorboat",
38 | "Sailboat",
39 | "Tugboat",
40 | "Barge",
41 | "Fishing Vessel",
42 | "Ferry",
43 | "Yacht",
44 | "Container Ship",
45 | "Oil Tanker",
46 | "Engineering Vehicle",
47 | "Tower crane",
48 | "Container Crane",
49 | "Reach Stacker",
50 | "Straddle Carrier",
51 | "Mobile Crane",
52 | "Dump Truck",
53 | "Haul Truck",
54 | "Scraper/Tractor",
55 | "Front loader/Bulldozer",
56 | "Excavator",
57 | "Cement Mixer",
58 | "Ground Grader",
59 | "Hut/Tent",
60 | "Shed",
61 | "Building",
62 | "Aircraft Hangar",
63 | "Damaged Building",
64 | "Facility",
65 | "Construction Site",
66 | "Vehicle Lot",
67 | "Helipad",
68 | "Storage Tank",
69 | "Shipping container lot",
70 | "Shipping Container",
71 | "Pylon",
72 | "Tower",
73 | )
74 |
75 | # model settings
76 | model = dict(
77 | bbox_head=dict(
78 | num_classes=NUM_CLASSES,
79 | ),
80 | )
81 |
82 | # dataset settings
83 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
84 | train_pipeline = [
85 | dict(type="LoadImageFromFile"),
86 | dict(type="LoadAnnotations", with_bbox=True),
87 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
88 | dict(type="RandomFlip", flip_ratio=0.5),
89 | dict(type="Normalize", **img_norm_cfg),
90 | dict(type="Pad", size_divisor=32),
91 | dict(type="DefaultFormatBundle"),
92 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
93 | ]
94 | test_pipeline = [
95 | dict(type="LoadImageFromFile"),
96 | dict(
97 | type="MultiScaleFlipAug",
98 | img_scale=(1333, 800),
99 | flip=False,
100 | transforms=[
101 | dict(type="Resize", keep_ratio=True),
102 | dict(type="RandomFlip"),
103 | dict(type="Normalize", **img_norm_cfg),
104 | dict(type="Pad", size_divisor=32),
105 | dict(type="ImageToTensor", keys=["img"]),
106 | dict(type="Collect", keys=["img"]),
107 | ],
108 | ),
109 | ]
110 |
111 | data = dict(
112 | samples_per_gpu=2 * BATCH_MULTIPLIER,
113 | workers_per_gpu=4,
114 | train=dict(
115 | type="RepeatDataset",
116 | times=DATASET_REPEAT,
117 | dataset=dict(
118 | type="CocoDataset",
119 | classes=CLASSES,
120 | ann_file=DATA_ROOT + "coco/train.json",
121 | img_prefix=DATA_ROOT + "train_images/",
122 | pipeline=train_pipeline,
123 | ),
124 | ),
125 | val=dict(
126 | classes=CLASSES,
127 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
128 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
129 | pipeline=test_pipeline,
130 | ),
131 | test=dict(
132 | classes=CLASSES,
133 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
134 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
135 | pipeline=test_pipeline,
136 | ),
137 | )
138 |
139 | # optimizer
140 | # default 8 gpu
141 | # /8 for 1 gpu
142 | optimizer = dict(
143 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
144 | )
145 |
146 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
147 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
148 |
149 | # learning policy
150 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
151 | runner = dict(type="EpochBasedRunner", max_epochs=24)
152 |
153 | # logger settings
154 | log_config = dict(
155 | interval=50,
156 | hooks=[
157 | dict(type="TextLoggerHook"),
158 | dict(type="TensorboardLoggerHook", reset_flag=False),
159 | ],
160 | )
161 |
162 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
163 | work_dir = f"runs/xview/{EXP_NAME}/"
164 |
--------------------------------------------------------------------------------
/visdrone/visdrone_to_coco.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | import fire
5 | from PIL import Image
6 | from sahi.utils.coco import Coco, CocoAnnotation, CocoCategory, CocoImage
7 | from sahi.utils.file import save_json
8 | from tqdm import tqdm
9 |
10 | CATEGORY_ID_TO_NAME = {
11 | "0": "ignore",
12 | "1": "pedestrian",
13 | "2": "people",
14 | "3": "bicycle",
15 | "4": "car",
16 | "5": "van",
17 | "6": "truck",
18 | "7": "tricycle",
19 | "8": "awning-tricycle",
20 | "9": "bus",
21 | "10": "motor",
22 | "11": "others",
23 | }
24 |
25 | CATEGORY_ID_REMAPPING = {
26 | "1": "0",
27 | "2": "1",
28 | "3": "2",
29 | "4": "3",
30 | "5": "4",
31 | "6": "5",
32 | "7": "6",
33 | "8": "7",
34 | "9": "8",
35 | "10": "9",
36 | }
37 |
38 | NAME_TO_COCO_CATEGORY = {
39 | "pedestrian": {"name": "pedestrian", "supercategory": "person"},
40 | "people": {"name": "people", "supercategory": "person"},
41 | "bicycle": {"name": "bicycle", "supercategory": "bicycle"},
42 | "car": {"name": "car", "supercategory": "car"},
43 | "van": {"name": "van", "supercategory": "truck"},
44 | "truck": {"name": "truck", "supercategory": "truck"},
45 | "tricycle": {"name": "tricycle", "supercategory": "motor"},
46 | "awning-tricycle": {"name": "awning-tricycle", "supercategory": "motor"},
47 | "bus": {"name": "bus", "supercategory": "bus"},
48 | "motor": {"name": "motor", "supercategory": "motor"},
49 | }
50 |
51 |
52 | def visdrone_to_coco(
53 | data_folder_dir,
54 | output_file_path,
55 | category_id_remapping=None,
56 | ):
57 | """
58 | Converts visdrone-det annotations into coco annotation.
59 |
60 | Args:
61 | data_folder_dir: str
62 | 'VisDrone2019-DET-train' folder directory
63 | output_file_path: str
64 | Output file path
65 | category_id_remapping: dict
66 | Used for selecting desired category ids and mapping them.
67 | If not provided, VisDrone2019-DET mapping will be used.
68 | format: str(id) to str(id)
69 | """
70 |
71 | # init paths/folders
72 | input_image_folder = str(Path(data_folder_dir) / "images")
73 | input_ann_folder = str(Path(data_folder_dir) / "annotations")
74 |
75 | image_filepath_list = os.listdir(input_image_folder)
76 |
77 | Path(output_file_path).parents[0].mkdir(parents=True, exist_ok=True)
78 |
79 | if category_id_remapping is None:
80 | category_id_remapping = CATEGORY_ID_REMAPPING
81 |
82 | # init coco object
83 | coco = Coco()
84 | # append categories
85 | for category_id, category_name in CATEGORY_ID_TO_NAME.items():
86 | if category_id in category_id_remapping.keys():
87 | remapped_category_id = category_id_remapping[category_id]
88 | coco_category = NAME_TO_COCO_CATEGORY[category_name]
89 | coco.add_category(
90 | CocoCategory(
91 | id=int(remapped_category_id),
92 | name=coco_category["name"],
93 | supercategory=coco_category["supercategory"],
94 | )
95 | )
96 |
97 | # convert visdrone annotations to coco
98 | for image_filename in tqdm(image_filepath_list):
99 | # get image properties
100 | image_filepath = str(Path(input_image_folder) / image_filename)
101 | annotation_filename = image_filename.split(".jpg")[0] + ".txt"
102 | annotation_filepath = str(Path(input_ann_folder) / annotation_filename)
103 | image = Image.open(image_filepath)
104 | cocoimage_filename = str(Path(image_filepath)).split(str(Path(data_folder_dir)))[1]
105 | if cocoimage_filename[0] == os.sep:
106 | cocoimage_filename = cocoimage_filename[1:]
107 | # create coco image object
108 | coco_image = CocoImage(file_name=cocoimage_filename, height=image.size[1], width=image.size[0])
109 | # parse annotation file
110 | file = open(annotation_filepath, "r")
111 | lines = file.readlines()
112 | for line in lines:
113 | # parse annotation bboxes
114 | new_line = line.strip("\n").split(",")
115 | bbox = [
116 | int(new_line[0]),
117 | int(new_line[1]),
118 | int(new_line[2]),
119 | int(new_line[3]),
120 | ]
121 | # parse category id and name
122 | category_id = new_line[5]
123 | if category_id in category_id_remapping.keys():
124 | category_name = CATEGORY_ID_TO_NAME[category_id]
125 | remapped_category_id = category_id_remapping[category_id]
126 | else:
127 | continue
128 | # create coco annotation and append it to coco image
129 | coco_annotation = CocoAnnotation.from_coco_bbox(
130 | bbox=bbox,
131 | category_id=int(remapped_category_id),
132 | category_name=category_name,
133 | )
134 | if coco_annotation.area > 0:
135 | coco_image.add_annotation(coco_annotation)
136 | coco.add_image(coco_image)
137 |
138 | save_path = output_file_path
139 | save_json(data=coco.json, save_path=save_path)
140 |
141 |
142 | if __name__ == "__main__":
143 | fire.Fire(visdrone_to_coco)
144 |
--------------------------------------------------------------------------------
/mmdet_configs/xview_tood/tood_crop_300_500_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../tood/tood_r50_fpn_1x_coco.py"]
2 |
3 |
4 | EXP_NAME = "tood_crop_300_500_cls_60"
5 | DATA_ROOT = "data/xview/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 60
10 | DATASET_REPEAT = 50
11 | TAGS = ["tood", "crop=300_500", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
12 | CLASSES = (
13 | "Fixed-wing Aircraft",
14 | "Small Aircraft",
15 | "Cargo Plane",
16 | "Helicopter",
17 | "Passenger Vehicle",
18 | "Small Car",
19 | "Bus",
20 | "Pickup Truck",
21 | "Utility Truck",
22 | "Truck",
23 | "Cargo Truck",
24 | "Truck w/Box",
25 | "Truck Tractor",
26 | "Trailer",
27 | "Truck w/Flatbed",
28 | "Truck w/Liquid",
29 | "Crane Truck",
30 | "Railway Vehicle",
31 | "Passenger Car",
32 | "Cargo Car",
33 | "Flat Car",
34 | "Tank car",
35 | "Locomotive",
36 | "Maritime Vessel",
37 | "Motorboat",
38 | "Sailboat",
39 | "Tugboat",
40 | "Barge",
41 | "Fishing Vessel",
42 | "Ferry",
43 | "Yacht",
44 | "Container Ship",
45 | "Oil Tanker",
46 | "Engineering Vehicle",
47 | "Tower crane",
48 | "Container Crane",
49 | "Reach Stacker",
50 | "Straddle Carrier",
51 | "Mobile Crane",
52 | "Dump Truck",
53 | "Haul Truck",
54 | "Scraper/Tractor",
55 | "Front loader/Bulldozer",
56 | "Excavator",
57 | "Cement Mixer",
58 | "Ground Grader",
59 | "Hut/Tent",
60 | "Shed",
61 | "Building",
62 | "Aircraft Hangar",
63 | "Damaged Building",
64 | "Facility",
65 | "Construction Site",
66 | "Vehicle Lot",
67 | "Helipad",
68 | "Storage Tank",
69 | "Shipping container lot",
70 | "Shipping Container",
71 | "Pylon",
72 | "Tower",
73 | )
74 |
75 | # model settings
76 | model = dict(
77 | bbox_head=dict(
78 | num_classes=NUM_CLASSES,
79 | ),
80 | )
81 |
82 | # dataset settings
83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
84 | train_pipeline = [
85 | dict(type="LoadImageFromFile"),
86 | dict(type="LoadAnnotations", with_bbox=True),
87 | dict(
88 | type="AutoAugment",
89 | policies=[
90 | [
91 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
92 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
93 | ],
94 | [
95 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
96 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
97 | ],
98 | [
99 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
100 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
101 | ],
102 | [
103 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
104 | ],
105 | ],
106 | ),
107 | dict(type="RandomFlip", flip_ratio=0.5),
108 | dict(type="Normalize", **img_norm_cfg),
109 | dict(type="Pad", size_divisor=32),
110 | dict(type="DefaultFormatBundle"),
111 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
112 | ]
113 | test_pipeline = [
114 | dict(type="LoadImageFromFile"),
115 | dict(
116 | type="MultiScaleFlipAug",
117 | img_scale=(1333, 800),
118 | flip=False,
119 | transforms=[
120 | dict(type="Resize", keep_ratio=True),
121 | dict(type="RandomFlip"),
122 | dict(type="Normalize", **img_norm_cfg),
123 | dict(type="Pad", size_divisor=32),
124 | dict(type="ImageToTensor", keys=["img"]),
125 | dict(type="Collect", keys=["img"]),
126 | ],
127 | ),
128 | ]
129 |
130 | data = dict(
131 | samples_per_gpu=2 * BATCH_MULTIPLIER,
132 | workers_per_gpu=4,
133 | train=dict(
134 | type="RepeatDataset",
135 | times=DATASET_REPEAT,
136 | dataset=dict(
137 | type="CocoDataset",
138 | classes=CLASSES,
139 | ann_file=DATA_ROOT + "coco/train.json",
140 | img_prefix=DATA_ROOT + "train_images/",
141 | pipeline=train_pipeline,
142 | ),
143 | ),
144 | val=dict(
145 | classes=CLASSES,
146 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
147 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
148 | pipeline=test_pipeline,
149 | ),
150 | test=dict(
151 | classes=CLASSES,
152 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
153 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
154 | pipeline=test_pipeline,
155 | ),
156 | )
157 |
158 | # optimizer
159 | # default 8 gpu
160 | # /8 for 1 gpu
161 | optimizer = dict(lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, momentum=0.9, weight_decay=0.0001)
162 |
163 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
164 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
165 |
166 | # learning policy
167 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[16, 22])
168 | runner = dict(type="EpochBasedRunner", max_epochs=24)
169 |
170 | # logger settings
171 | log_config = dict(
172 | interval=50,
173 | hooks=[
174 | dict(type="TextLoggerHook"),
175 | dict(type="TensorboardLoggerHook", reset_flag=False),
176 | ],
177 | )
178 |
179 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth"
180 | work_dir = f"runs/xview/{EXP_NAME}/"
181 |
--------------------------------------------------------------------------------
/mmdet_configs/xview_vfnet/vfnet_crop_300_500_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../vfnet/vfnet_r50_fpn_1x_coco.py"]
2 |
3 |
4 | EXP_NAME = "vfnet_crop_300_500_cls_60"
5 | DATA_ROOT = "data/xview/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 60
10 | DATASET_REPEAT = 50
11 | TAGS = ["vfnet", "crop=300_500", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
12 | CLASSES = (
13 | "Fixed-wing Aircraft",
14 | "Small Aircraft",
15 | "Cargo Plane",
16 | "Helicopter",
17 | "Passenger Vehicle",
18 | "Small Car",
19 | "Bus",
20 | "Pickup Truck",
21 | "Utility Truck",
22 | "Truck",
23 | "Cargo Truck",
24 | "Truck w/Box",
25 | "Truck Tractor",
26 | "Trailer",
27 | "Truck w/Flatbed",
28 | "Truck w/Liquid",
29 | "Crane Truck",
30 | "Railway Vehicle",
31 | "Passenger Car",
32 | "Cargo Car",
33 | "Flat Car",
34 | "Tank car",
35 | "Locomotive",
36 | "Maritime Vessel",
37 | "Motorboat",
38 | "Sailboat",
39 | "Tugboat",
40 | "Barge",
41 | "Fishing Vessel",
42 | "Ferry",
43 | "Yacht",
44 | "Container Ship",
45 | "Oil Tanker",
46 | "Engineering Vehicle",
47 | "Tower crane",
48 | "Container Crane",
49 | "Reach Stacker",
50 | "Straddle Carrier",
51 | "Mobile Crane",
52 | "Dump Truck",
53 | "Haul Truck",
54 | "Scraper/Tractor",
55 | "Front loader/Bulldozer",
56 | "Excavator",
57 | "Cement Mixer",
58 | "Ground Grader",
59 | "Hut/Tent",
60 | "Shed",
61 | "Building",
62 | "Aircraft Hangar",
63 | "Damaged Building",
64 | "Facility",
65 | "Construction Site",
66 | "Vehicle Lot",
67 | "Helipad",
68 | "Storage Tank",
69 | "Shipping container lot",
70 | "Shipping Container",
71 | "Pylon",
72 | "Tower",
73 | )
74 |
75 | # model settings
76 | model = dict(
77 | bbox_head=dict(
78 | num_classes=NUM_CLASSES,
79 | ),
80 | )
81 |
82 | # dataset settings
83 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
84 | train_pipeline = [
85 | dict(type="LoadImageFromFile"),
86 | dict(type="LoadAnnotations", with_bbox=True),
87 | dict(
88 | type="AutoAugment",
89 | policies=[
90 | [
91 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
92 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
93 | ],
94 | [
95 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
96 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
97 | ],
98 | [
99 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
100 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
101 | ],
102 | [
103 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
104 | ],
105 | ],
106 | ),
107 | dict(type="RandomFlip", flip_ratio=0.5),
108 | dict(type="Normalize", **img_norm_cfg),
109 | dict(type="Pad", size_divisor=32),
110 | dict(type="DefaultFormatBundle"),
111 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
112 | ]
113 | test_pipeline = [
114 | dict(type="LoadImageFromFile"),
115 | dict(
116 | type="MultiScaleFlipAug",
117 | img_scale=(1333, 800),
118 | flip=False,
119 | transforms=[
120 | dict(type="Resize", keep_ratio=True),
121 | dict(type="RandomFlip"),
122 | dict(type="Normalize", **img_norm_cfg),
123 | dict(type="Pad", size_divisor=32),
124 | dict(type="DefaultFormatBundle"),
125 | dict(type="Collect", keys=["img"]),
126 | ],
127 | ),
128 | ]
129 |
130 | data = dict(
131 | samples_per_gpu=2 * BATCH_MULTIPLIER,
132 | workers_per_gpu=4,
133 | train=dict(
134 | type="RepeatDataset",
135 | times=DATASET_REPEAT,
136 | dataset=dict(
137 | type="CocoDataset",
138 | classes=CLASSES,
139 | ann_file=DATA_ROOT + "coco/train.json",
140 | img_prefix=DATA_ROOT + "train_images/",
141 | pipeline=train_pipeline,
142 | ),
143 | ),
144 | val=dict(
145 | classes=CLASSES,
146 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
147 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
148 | pipeline=test_pipeline,
149 | ),
150 | test=dict(
151 | classes=CLASSES,
152 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
153 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
154 | pipeline=test_pipeline,
155 | ),
156 | )
157 |
158 | # optimizer
159 | # default 8 gpu
160 | # /8 for 1 gpu
161 | optimizer = dict(
162 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
163 | )
164 |
165 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
166 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
167 |
168 | # learning policy
169 | lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.1, step=[16, 22])
170 | runner = dict(type="EpochBasedRunner", max_epochs=24)
171 |
172 | # logger settings
173 | log_config = dict(
174 | interval=50,
175 | hooks=[
176 | dict(type="TextLoggerHook"),
177 | dict(type="TensorboardLoggerHook", reset_flag=False),
178 | ],
179 | )
180 |
181 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth"
182 | work_dir = f"runs/xview/{EXP_NAME}/"
183 |
--------------------------------------------------------------------------------
/mmdet_configs/xview_fcos/fcos_crop_300_500_cls_60.py:
--------------------------------------------------------------------------------
1 | _base_ = ["../fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"]
2 |
3 |
4 | EXP_NAME = "fcos_crop_300_500_cls_60"
5 | DATA_ROOT = "data/xview/"
6 | BATCH_MULTIPLIER = 8
7 | LR_MULTIPLIER = 1
8 | EVAL_INTERVAL = 3
9 | NUM_CLASSES = 60
10 | DATASET_REPEAT = 50
11 | TAGS = ["fcos", "crop=300_500", "24epochs", f"num_cls={NUM_CLASSES}", f"repeat={DATASET_REPEAT}"]
12 | CLASSES = (
13 | "Fixed-wing Aircraft",
14 | "Small Aircraft",
15 | "Cargo Plane",
16 | "Helicopter",
17 | "Passenger Vehicle",
18 | "Small Car",
19 | "Bus",
20 | "Pickup Truck",
21 | "Utility Truck",
22 | "Truck",
23 | "Cargo Truck",
24 | "Truck w/Box",
25 | "Truck Tractor",
26 | "Trailer",
27 | "Truck w/Flatbed",
28 | "Truck w/Liquid",
29 | "Crane Truck",
30 | "Railway Vehicle",
31 | "Passenger Car",
32 | "Cargo Car",
33 | "Flat Car",
34 | "Tank car",
35 | "Locomotive",
36 | "Maritime Vessel",
37 | "Motorboat",
38 | "Sailboat",
39 | "Tugboat",
40 | "Barge",
41 | "Fishing Vessel",
42 | "Ferry",
43 | "Yacht",
44 | "Container Ship",
45 | "Oil Tanker",
46 | "Engineering Vehicle",
47 | "Tower crane",
48 | "Container Crane",
49 | "Reach Stacker",
50 | "Straddle Carrier",
51 | "Mobile Crane",
52 | "Dump Truck",
53 | "Haul Truck",
54 | "Scraper/Tractor",
55 | "Front loader/Bulldozer",
56 | "Excavator",
57 | "Cement Mixer",
58 | "Ground Grader",
59 | "Hut/Tent",
60 | "Shed",
61 | "Building",
62 | "Aircraft Hangar",
63 | "Damaged Building",
64 | "Facility",
65 | "Construction Site",
66 | "Vehicle Lot",
67 | "Helipad",
68 | "Storage Tank",
69 | "Shipping container lot",
70 | "Shipping Container",
71 | "Pylon",
72 | "Tower",
73 | )
74 |
75 | # model settings
76 | model = dict(
77 | bbox_head=dict(
78 | num_classes=NUM_CLASSES,
79 | ),
80 | )
81 |
82 | # dataset settings
83 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
84 | train_pipeline = [
85 | dict(type="LoadImageFromFile"),
86 | dict(type="LoadAnnotations", with_bbox=True),
87 | dict(
88 | type="AutoAugment",
89 | policies=[
90 | [
91 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
92 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
93 | ],
94 | [
95 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
96 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
97 | ],
98 | [
99 | dict(type="RandomCrop", crop_type="absolute_range", crop_size=(300, 500), allow_negative_crop=True),
100 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
101 | ],
102 | [
103 | dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
104 | ],
105 | ],
106 | ),
107 | dict(type="RandomFlip", flip_ratio=0.5),
108 | dict(type="Normalize", **img_norm_cfg),
109 | dict(type="Pad", size_divisor=32),
110 | dict(type="DefaultFormatBundle"),
111 | dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
112 | ]
113 | test_pipeline = [
114 | dict(type="LoadImageFromFile"),
115 | dict(
116 | type="MultiScaleFlipAug",
117 | img_scale=(1333, 800),
118 | flip=False,
119 | transforms=[
120 | dict(type="Resize", keep_ratio=True),
121 | dict(type="RandomFlip"),
122 | dict(type="Normalize", **img_norm_cfg),
123 | dict(type="Pad", size_divisor=32),
124 | dict(type="ImageToTensor", keys=["img"]),
125 | dict(type="Collect", keys=["img"]),
126 | ],
127 | ),
128 | ]
129 |
130 | data = dict(
131 | samples_per_gpu=2 * BATCH_MULTIPLIER,
132 | workers_per_gpu=4,
133 | train=dict(
134 | type="RepeatDataset",
135 | times=DATASET_REPEAT,
136 | dataset=dict(
137 | type="CocoDataset",
138 | classes=CLASSES,
139 | ann_file=DATA_ROOT + "coco/train.json",
140 | img_prefix=DATA_ROOT + "train_images/",
141 | pipeline=train_pipeline,
142 | ),
143 | ),
144 | val=dict(
145 | classes=CLASSES,
146 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
147 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
148 | pipeline=test_pipeline,
149 | ),
150 | test=dict(
151 | classes=CLASSES,
152 | ann_file=DATA_ROOT + "sliced/val_400_0.json",
153 | img_prefix=DATA_ROOT + "sliced/val_images_400_0/",
154 | pipeline=test_pipeline,
155 | ),
156 | )
157 |
158 | # optimizer
159 | # default 8 gpu
160 | # /8 for 1 gpu
161 | optimizer = dict(
162 | lr=0.01 / 8 * BATCH_MULTIPLIER * LR_MULTIPLIER, paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0)
163 | )
164 |
165 | checkpoint_config = dict(interval=1, max_keep_ckpts=1, save_optimizer=False)
166 | evaluation = dict(interval=EVAL_INTERVAL, metric="bbox", save_best="auto")
167 |
168 | # learning policy
169 | lr_config = dict(policy="step", warmup="constant", warmup_iters=500, warmup_ratio=1.0 / 3, step=[16, 22])
170 | runner = dict(type="EpochBasedRunner", max_epochs=24)
171 |
172 | # logger settings
173 | log_config = dict(
174 | interval=50,
175 | hooks=[
176 | dict(type="TextLoggerHook"),
177 | dict(type="TensorboardLoggerHook", reset_flag=False),
178 | ],
179 | )
180 |
181 | load_from = "https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
182 | work_dir = f"runs/xview/{EXP_NAME}/"
183 |
--------------------------------------------------------------------------------
/xview/xview_to_coco.py:
--------------------------------------------------------------------------------
1 | import random
2 | from collections import defaultdict
3 | from pathlib import Path
4 | from typing import Dict, List
5 |
6 | import fire
7 | import numpy as np
8 | from PIL import Image
9 | from sahi.utils.coco import Coco, CocoAnnotation, CocoCategory, CocoImage
10 | from sahi.utils.file import load_json, save_json
11 | from tqdm import tqdm
12 |
13 | # fix the seed
14 | random.seed(13)
15 |
16 |
17 | def xview_to_coco(
18 | train_images_dir,
19 | train_geojson_path,
20 | output_dir,
21 | train_split_rate=0.75,
22 | category_id_remapping=None,
23 | ):
24 | """
25 | Converts visdrone-det annotations into coco annotation.
26 |
27 | Args:
28 | train_images_dir: str
29 | 'train_images' folder directory
30 | train_geojson_path: str
31 | 'xView_train.geojson' file path
32 | output_dir: str
33 | Output folder directory
34 | train_split_rate: bool
35 | Train split ratio
36 | category_id_remapping: dict
37 | Used for selecting desired category ids and mapping them.
38 | If not provided, xView mapping will be used.
39 | format: str(id) to str(id)
40 | """
41 |
42 | # init vars
43 | category_id_to_name = {}
44 | with open("xview/xview_class_labels.txt", encoding="utf8") as f:
45 | lines = f.readlines()
46 | for line in lines:
47 | category_id = line.split(":")[0]
48 | category_name = line.split(":")[1].replace("\n", "")
49 | category_id_to_name[category_id] = category_name
50 |
51 | if category_id_remapping is None:
52 | category_id_remapping = load_json("xview/category_id_mapping.json")
53 | category_id_remapping
54 |
55 | # init coco object
56 | coco = Coco()
57 | # append categories
58 | for category_id, category_name in category_id_to_name.items():
59 | if category_id in category_id_remapping.keys():
60 | remapped_category_id = category_id_remapping[category_id]
61 | coco.add_category(
62 | CocoCategory(id=int(remapped_category_id), name=category_name)
63 | )
64 |
65 | # parse xview data
66 | coords, chips, classes, image_name_to_annotation_ind = get_labels(
67 | train_geojson_path
68 | )
69 | image_name_list = get_ordered_image_name_list(image_name_to_annotation_ind)
70 |
71 | # convert xView data to COCO format
72 | for image_name in tqdm(image_name_list, "Converting xView data into COCO format"):
73 | # create coco image object
74 | width, height = Image.open(Path(train_images_dir) / image_name).size
75 | coco_image = CocoImage(file_name=image_name, height=height, width=width)
76 |
77 | annotation_ind_list = image_name_to_annotation_ind[image_name]
78 |
79 | # iterate over image annotations
80 | for annotation_ind in annotation_ind_list:
81 | bbox = coords[annotation_ind].tolist()
82 | category_id = str(int(classes[annotation_ind].item()))
83 | coco_bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
84 | if category_id in category_id_remapping.keys():
85 | category_name = category_id_to_name[category_id]
86 | remapped_category_id = category_id_remapping[category_id]
87 | else:
88 | continue
89 | # create coco annotation and append it to coco image
90 | coco_annotation = CocoAnnotation(
91 | bbox=coco_bbox,
92 | category_id=int(remapped_category_id),
93 | category_name=category_name,
94 | )
95 | if coco_annotation.area > 0:
96 | coco_image.add_annotation(coco_annotation)
97 | coco.add_image(coco_image)
98 |
99 | result = coco.split_coco_as_train_val(train_split_rate=train_split_rate)
100 |
101 | train_json_path = Path(output_dir) / "train.json"
102 | val_json_path = Path(output_dir) / "val.json"
103 | save_json(data=result["train_coco"].json, save_path=train_json_path)
104 | save_json(data=result["val_coco"].json, save_path=val_json_path)
105 |
106 |
107 | def get_ordered_image_name_list(image_name_to_annotation_ind: Dict):
108 | image_name_list: List[str] = list(image_name_to_annotation_ind.keys())
109 |
110 | def get_image_ind(image_name: str):
111 | return int(image_name.split(".")[0])
112 |
113 | image_name_list.sort(key=get_image_ind)
114 |
115 | return image_name_list
116 |
117 |
118 | def get_labels(fname):
119 | """
120 | Gets label data from a geojson label file
121 | Args:
122 | fname: file path to an xView geojson label file
123 | Output:
124 | Returns three arrays: coords, chips, and classes corresponding to the
125 | coordinates, file-names, and classes for each ground truth.
126 | Modified from https://github.com/DIUx-xView.
127 | """
128 | data = load_json(fname)
129 |
130 | coords = np.zeros((len(data["features"]), 4))
131 | chips = np.zeros((len(data["features"])), dtype="object")
132 | classes = np.zeros((len(data["features"])))
133 | image_name_to_annotation_ind = defaultdict(list)
134 |
135 | for i in tqdm(range(len(data["features"])), "Parsing xView data"):
136 | if data["features"][i]["properties"]["bounds_imcoords"] != []:
137 | b_id = data["features"][i]["properties"]["image_id"]
138 | # https://github.com/DIUx-xView/xView1_baseline/issues/3
139 | if b_id == "1395.tif":
140 | continue
141 | val = np.array(
142 | [
143 | int(num)
144 | for num in data["features"][i]["properties"][
145 | "bounds_imcoords"
146 | ].split(",")
147 | ]
148 | )
149 | chips[i] = b_id
150 | classes[i] = data["features"][i]["properties"]["type_id"]
151 |
152 | image_name_to_annotation_ind[b_id].append(i)
153 |
154 | if val.shape[0] != 4:
155 | print("Issues at %d!" % i)
156 | else:
157 | coords[i] = val
158 | else:
159 | chips[i] = "None"
160 |
161 | return coords, chips, classes, image_name_to_annotation_ind
162 |
163 |
164 | if __name__ == "__main__":
165 | fire.Fire(xview_to_coco)
166 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='CascadeRCNN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
35 | roi_head=dict(
36 | type='CascadeRoIHead',
37 | num_stages=3,
38 | stage_loss_weights=[1, 0.5, 0.25],
39 | bbox_roi_extractor=dict(
40 | type='SingleRoIExtractor',
41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
42 | out_channels=256,
43 | featmap_strides=[4, 8, 16, 32]),
44 | bbox_head=[
45 | dict(
46 | type='Shared2FCBBoxHead',
47 | in_channels=256,
48 | fc_out_channels=1024,
49 | roi_feat_size=7,
50 | num_classes=80,
51 | bbox_coder=dict(
52 | type='DeltaXYWHBBoxCoder',
53 | target_means=[0., 0., 0., 0.],
54 | target_stds=[0.1, 0.1, 0.2, 0.2]),
55 | reg_class_agnostic=True,
56 | loss_cls=dict(
57 | type='CrossEntropyLoss',
58 | use_sigmoid=False,
59 | loss_weight=1.0),
60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
61 | loss_weight=1.0)),
62 | dict(
63 | type='Shared2FCBBoxHead',
64 | in_channels=256,
65 | fc_out_channels=1024,
66 | roi_feat_size=7,
67 | num_classes=80,
68 | bbox_coder=dict(
69 | type='DeltaXYWHBBoxCoder',
70 | target_means=[0., 0., 0., 0.],
71 | target_stds=[0.05, 0.05, 0.1, 0.1]),
72 | reg_class_agnostic=True,
73 | loss_cls=dict(
74 | type='CrossEntropyLoss',
75 | use_sigmoid=False,
76 | loss_weight=1.0),
77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
78 | loss_weight=1.0)),
79 | dict(
80 | type='Shared2FCBBoxHead',
81 | in_channels=256,
82 | fc_out_channels=1024,
83 | roi_feat_size=7,
84 | num_classes=80,
85 | bbox_coder=dict(
86 | type='DeltaXYWHBBoxCoder',
87 | target_means=[0., 0., 0., 0.],
88 | target_stds=[0.033, 0.033, 0.067, 0.067]),
89 | reg_class_agnostic=True,
90 | loss_cls=dict(
91 | type='CrossEntropyLoss',
92 | use_sigmoid=False,
93 | loss_weight=1.0),
94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
95 | ]),
96 | # model training and testing settings
97 | train_cfg=dict(
98 | rpn=dict(
99 | assigner=dict(
100 | type='MaxIoUAssigner',
101 | pos_iou_thr=0.7,
102 | neg_iou_thr=0.3,
103 | min_pos_iou=0.3,
104 | match_low_quality=True,
105 | ignore_iof_thr=-1),
106 | sampler=dict(
107 | type='RandomSampler',
108 | num=256,
109 | pos_fraction=0.5,
110 | neg_pos_ub=-1,
111 | add_gt_as_proposals=False),
112 | allowed_border=0,
113 | pos_weight=-1,
114 | debug=False),
115 | rpn_proposal=dict(
116 | nms_pre=2000,
117 | max_per_img=2000,
118 | nms=dict(type='nms', iou_threshold=0.7),
119 | min_bbox_size=0),
120 | rcnn=[
121 | dict(
122 | assigner=dict(
123 | type='MaxIoUAssigner',
124 | pos_iou_thr=0.5,
125 | neg_iou_thr=0.5,
126 | min_pos_iou=0.5,
127 | match_low_quality=False,
128 | ignore_iof_thr=-1),
129 | sampler=dict(
130 | type='RandomSampler',
131 | num=512,
132 | pos_fraction=0.25,
133 | neg_pos_ub=-1,
134 | add_gt_as_proposals=True),
135 | pos_weight=-1,
136 | debug=False),
137 | dict(
138 | assigner=dict(
139 | type='MaxIoUAssigner',
140 | pos_iou_thr=0.6,
141 | neg_iou_thr=0.6,
142 | min_pos_iou=0.6,
143 | match_low_quality=False,
144 | ignore_iof_thr=-1),
145 | sampler=dict(
146 | type='RandomSampler',
147 | num=512,
148 | pos_fraction=0.25,
149 | neg_pos_ub=-1,
150 | add_gt_as_proposals=True),
151 | pos_weight=-1,
152 | debug=False),
153 | dict(
154 | assigner=dict(
155 | type='MaxIoUAssigner',
156 | pos_iou_thr=0.7,
157 | neg_iou_thr=0.7,
158 | min_pos_iou=0.7,
159 | match_low_quality=False,
160 | ignore_iof_thr=-1),
161 | sampler=dict(
162 | type='RandomSampler',
163 | num=512,
164 | pos_fraction=0.25,
165 | neg_pos_ub=-1,
166 | add_gt_as_proposals=True),
167 | pos_weight=-1,
168 | debug=False)
169 | ]),
170 | test_cfg=dict(
171 | rpn=dict(
172 | nms_pre=1000,
173 | max_per_img=1000,
174 | nms=dict(type='nms', iou_threshold=0.7),
175 | min_bbox_size=0),
176 | rcnn=dict(
177 | score_thr=0.05,
178 | nms=dict(type='nms', iou_threshold=0.5),
179 | max_per_img=100)))
180 |
--------------------------------------------------------------------------------
/mmdet_configs/_base_/models/cascade_mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='CascadeRCNN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
35 | roi_head=dict(
36 | type='CascadeRoIHead',
37 | num_stages=3,
38 | stage_loss_weights=[1, 0.5, 0.25],
39 | bbox_roi_extractor=dict(
40 | type='SingleRoIExtractor',
41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
42 | out_channels=256,
43 | featmap_strides=[4, 8, 16, 32]),
44 | bbox_head=[
45 | dict(
46 | type='Shared2FCBBoxHead',
47 | in_channels=256,
48 | fc_out_channels=1024,
49 | roi_feat_size=7,
50 | num_classes=80,
51 | bbox_coder=dict(
52 | type='DeltaXYWHBBoxCoder',
53 | target_means=[0., 0., 0., 0.],
54 | target_stds=[0.1, 0.1, 0.2, 0.2]),
55 | reg_class_agnostic=True,
56 | loss_cls=dict(
57 | type='CrossEntropyLoss',
58 | use_sigmoid=False,
59 | loss_weight=1.0),
60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
61 | loss_weight=1.0)),
62 | dict(
63 | type='Shared2FCBBoxHead',
64 | in_channels=256,
65 | fc_out_channels=1024,
66 | roi_feat_size=7,
67 | num_classes=80,
68 | bbox_coder=dict(
69 | type='DeltaXYWHBBoxCoder',
70 | target_means=[0., 0., 0., 0.],
71 | target_stds=[0.05, 0.05, 0.1, 0.1]),
72 | reg_class_agnostic=True,
73 | loss_cls=dict(
74 | type='CrossEntropyLoss',
75 | use_sigmoid=False,
76 | loss_weight=1.0),
77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
78 | loss_weight=1.0)),
79 | dict(
80 | type='Shared2FCBBoxHead',
81 | in_channels=256,
82 | fc_out_channels=1024,
83 | roi_feat_size=7,
84 | num_classes=80,
85 | bbox_coder=dict(
86 | type='DeltaXYWHBBoxCoder',
87 | target_means=[0., 0., 0., 0.],
88 | target_stds=[0.033, 0.033, 0.067, 0.067]),
89 | reg_class_agnostic=True,
90 | loss_cls=dict(
91 | type='CrossEntropyLoss',
92 | use_sigmoid=False,
93 | loss_weight=1.0),
94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
95 | ],
96 | mask_roi_extractor=dict(
97 | type='SingleRoIExtractor',
98 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
99 | out_channels=256,
100 | featmap_strides=[4, 8, 16, 32]),
101 | mask_head=dict(
102 | type='FCNMaskHead',
103 | num_convs=4,
104 | in_channels=256,
105 | conv_out_channels=256,
106 | num_classes=80,
107 | loss_mask=dict(
108 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 | # model training and testing settings
110 | train_cfg=dict(
111 | rpn=dict(
112 | assigner=dict(
113 | type='MaxIoUAssigner',
114 | pos_iou_thr=0.7,
115 | neg_iou_thr=0.3,
116 | min_pos_iou=0.3,
117 | match_low_quality=True,
118 | ignore_iof_thr=-1),
119 | sampler=dict(
120 | type='RandomSampler',
121 | num=256,
122 | pos_fraction=0.5,
123 | neg_pos_ub=-1,
124 | add_gt_as_proposals=False),
125 | allowed_border=0,
126 | pos_weight=-1,
127 | debug=False),
128 | rpn_proposal=dict(
129 | nms_pre=2000,
130 | max_per_img=2000,
131 | nms=dict(type='nms', iou_threshold=0.7),
132 | min_bbox_size=0),
133 | rcnn=[
134 | dict(
135 | assigner=dict(
136 | type='MaxIoUAssigner',
137 | pos_iou_thr=0.5,
138 | neg_iou_thr=0.5,
139 | min_pos_iou=0.5,
140 | match_low_quality=False,
141 | ignore_iof_thr=-1),
142 | sampler=dict(
143 | type='RandomSampler',
144 | num=512,
145 | pos_fraction=0.25,
146 | neg_pos_ub=-1,
147 | add_gt_as_proposals=True),
148 | mask_size=28,
149 | pos_weight=-1,
150 | debug=False),
151 | dict(
152 | assigner=dict(
153 | type='MaxIoUAssigner',
154 | pos_iou_thr=0.6,
155 | neg_iou_thr=0.6,
156 | min_pos_iou=0.6,
157 | match_low_quality=False,
158 | ignore_iof_thr=-1),
159 | sampler=dict(
160 | type='RandomSampler',
161 | num=512,
162 | pos_fraction=0.25,
163 | neg_pos_ub=-1,
164 | add_gt_as_proposals=True),
165 | mask_size=28,
166 | pos_weight=-1,
167 | debug=False),
168 | dict(
169 | assigner=dict(
170 | type='MaxIoUAssigner',
171 | pos_iou_thr=0.7,
172 | neg_iou_thr=0.7,
173 | min_pos_iou=0.7,
174 | match_low_quality=False,
175 | ignore_iof_thr=-1),
176 | sampler=dict(
177 | type='RandomSampler',
178 | num=512,
179 | pos_fraction=0.25,
180 | neg_pos_ub=-1,
181 | add_gt_as_proposals=True),
182 | mask_size=28,
183 | pos_weight=-1,
184 | debug=False)
185 | ]),
186 | test_cfg=dict(
187 | rpn=dict(
188 | nms_pre=1000,
189 | max_per_img=1000,
190 | nms=dict(type='nms', iou_threshold=0.7),
191 | min_bbox_size=0),
192 | rcnn=dict(
193 | score_thr=0.05,
194 | nms=dict(type='nms', iou_threshold=0.5),
195 | max_per_img=100,
196 | mask_thr_binary=0.5)))
197 |
--------------------------------------------------------------------------------
/mmdet_tools/train.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import copy
4 | import os
5 | import os.path as osp
6 | import time
7 | import warnings
8 |
9 | import mmcv
10 | import torch
11 | from mmcv import Config, DictAction
12 | from mmcv.runner import get_dist_info, init_dist
13 | from mmcv.utils import get_git_hash
14 |
15 | from mmdet import __version__
16 | from mmdet.apis import init_random_seed, set_random_seed, train_detector
17 | from mmdet.datasets import build_dataset
18 | from mmdet.models import build_detector
19 | from mmdet.utils import collect_env, get_root_logger
20 |
21 |
22 | def parse_args():
23 | parser = argparse.ArgumentParser(description='Train a detector')
24 | parser.add_argument('config', help='train config file path')
25 | parser.add_argument('--work-dir', help='the dir to save logs and models')
26 | parser.add_argument(
27 | '--resume-from', help='the checkpoint file to resume from')
28 | parser.add_argument(
29 | '--auto-resume',
30 | action='store_true',
31 | help='resume from the latest checkpoint automatically')
32 | parser.add_argument(
33 | '--no-validate',
34 | action='store_true',
35 | help='whether not to evaluate the checkpoint during training')
36 | group_gpus = parser.add_mutually_exclusive_group()
37 | group_gpus.add_argument(
38 | '--gpus',
39 | type=int,
40 | help='number of gpus to use '
41 | '(only applicable to non-distributed training)')
42 | group_gpus.add_argument(
43 | '--gpu-ids',
44 | type=int,
45 | nargs='+',
46 | help='ids of gpus to use '
47 | '(only applicable to non-distributed training)')
48 | parser.add_argument('--seed', type=int, default=None, help='random seed')
49 | parser.add_argument(
50 | '--deterministic',
51 | action='store_true',
52 | help='whether to set deterministic options for CUDNN backend.')
53 | parser.add_argument(
54 | '--options',
55 | nargs='+',
56 | action=DictAction,
57 | help='override some settings in the used config, the key-value pair '
58 | 'in xxx=yyy format will be merged into config file (deprecate), '
59 | 'change to --cfg-options instead.')
60 | parser.add_argument(
61 | '--cfg-options',
62 | nargs='+',
63 | action=DictAction,
64 | help='override some settings in the used config, the key-value pair '
65 | 'in xxx=yyy format will be merged into config file. If the value to '
66 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
67 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
68 | 'Note that the quotation marks are necessary and that no white space '
69 | 'is allowed.')
70 | parser.add_argument(
71 | '--launcher',
72 | choices=['none', 'pytorch', 'slurm', 'mpi'],
73 | default='none',
74 | help='job launcher')
75 | parser.add_argument('--local_rank', type=int, default=0)
76 | args = parser.parse_args()
77 | if 'LOCAL_RANK' not in os.environ:
78 | os.environ['LOCAL_RANK'] = str(args.local_rank)
79 |
80 | if args.options and args.cfg_options:
81 | raise ValueError(
82 | '--options and --cfg-options cannot be both '
83 | 'specified, --options is deprecated in favor of --cfg-options')
84 | if args.options:
85 | warnings.warn('--options is deprecated in favor of --cfg-options')
86 | args.cfg_options = args.options
87 |
88 | return args
89 |
90 |
91 | def main():
92 | args = parse_args()
93 |
94 | cfg = Config.fromfile(args.config)
95 | if args.cfg_options is not None:
96 | cfg.merge_from_dict(args.cfg_options)
97 | # set cudnn_benchmark
98 | if cfg.get('cudnn_benchmark', False):
99 | torch.backends.cudnn.benchmark = True
100 |
101 | # work_dir is determined in this priority: CLI > segment in file > filename
102 | if args.work_dir is not None:
103 | # update configs according to CLI args if args.work_dir is not None
104 | cfg.work_dir = args.work_dir
105 | elif cfg.get('work_dir', None) is None:
106 | # use config filename as default work_dir if cfg.work_dir is None
107 | cfg.work_dir = osp.join('./work_dirs',
108 | osp.splitext(osp.basename(args.config))[0])
109 | if args.resume_from is not None:
110 | cfg.resume_from = args.resume_from
111 | cfg.auto_resume = args.auto_resume
112 | if args.gpu_ids is not None:
113 | cfg.gpu_ids = args.gpu_ids
114 | else:
115 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
116 |
117 | # init distributed env first, since logger depends on the dist info.
118 | if args.launcher == 'none':
119 | distributed = False
120 | if len(cfg.gpu_ids) > 1:
121 | warnings.warn(
122 | f'We treat {cfg.gpu_ids} as gpu-ids, and reset to '
123 | f'{cfg.gpu_ids[0:1]} as gpu-ids to avoid potential error in '
124 | 'non-distribute training time.')
125 | cfg.gpu_ids = cfg.gpu_ids[0:1]
126 | else:
127 | distributed = True
128 | init_dist(args.launcher, **cfg.dist_params)
129 | # re-set gpu_ids with distributed training mode
130 | _, world_size = get_dist_info()
131 | cfg.gpu_ids = range(world_size)
132 |
133 | # create work_dir
134 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
135 | # dump config
136 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
137 | # init the logger before other steps
138 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
139 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
140 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
141 |
142 | # init the meta dict to record some important information such as
143 | # environment info and seed, which will be logged
144 | meta = dict()
145 | # log env info
146 | env_info_dict = collect_env()
147 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
148 | dash_line = '-' * 60 + '\n'
149 | logger.info('Environment info:\n' + dash_line + env_info + '\n' +
150 | dash_line)
151 | meta['env_info'] = env_info
152 | meta['config'] = cfg.pretty_text
153 | # log some basic info
154 | logger.info(f'Distributed training: {distributed}')
155 | logger.info(f'Config:\n{cfg.pretty_text}')
156 |
157 | # set random seeds
158 | seed = init_random_seed(args.seed)
159 | logger.info(f'Set random seed to {seed}, '
160 | f'deterministic: {args.deterministic}')
161 | set_random_seed(seed, deterministic=args.deterministic)
162 | cfg.seed = seed
163 | meta['seed'] = seed
164 | meta['exp_name'] = osp.basename(args.config)
165 |
166 | model = build_detector(
167 | cfg.model,
168 | train_cfg=cfg.get('train_cfg'),
169 | test_cfg=cfg.get('test_cfg'))
170 | model.init_weights()
171 |
172 | datasets = [build_dataset(cfg.data.train)]
173 | if len(cfg.workflow) == 2:
174 | val_dataset = copy.deepcopy(cfg.data.val)
175 | val_dataset.pipeline = cfg.data.train.pipeline
176 | datasets.append(build_dataset(val_dataset))
177 | if cfg.checkpoint_config is not None:
178 | # save mmdet version, config file content and class names in
179 | # checkpoints as meta data
180 | cfg.checkpoint_config.meta = dict(
181 | mmdet_version=__version__ + get_git_hash()[:7],
182 | CLASSES=datasets[0].CLASSES)
183 | # add an attribute for visualization convenience
184 | model.CLASSES = datasets[0].CLASSES
185 | train_detector(
186 | model,
187 | datasets,
188 | cfg,
189 | distributed=distributed,
190 | validate=(not args.no_validate),
191 | timestamp=timestamp,
192 | meta=meta)
193 |
194 |
195 | if __name__ == '__main__':
196 | main()
197 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # small-object-detection-benchmark
2 |
3 |
4 |
5 |
6 | 🔥 our paper has been presented in ICIP 2022 Bordeaux, France (16-19 October 2022)
7 |
8 | [📜 List of publications that cite this work (currently 300+)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=14065474760484865747&scipsc=&q=&scisbd=1)
9 |
10 | ## summary
11 |
12 | small-object-detection benchmark on visdrone and xview datasets using [fcos](https://arxiv.org/abs/1904.01355), [vfnet](https://arxiv.org/abs/2008.13367) and [tood](https://arxiv.org/abs/2108.07755) detectors
13 |
14 | refer to [Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection](https://ieeexplore.ieee.org/document/9897990) for full technical analysis
15 |
16 | ## citation
17 |
18 | If you use any file/result from this repo in your work, please cite it as:
19 |
20 | ```
21 | @article{akyon2022sahi,
22 | title={Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection},
23 | author={Akyon, Fatih Cagatay and Altinuc, Sinan Onur and Temizel, Alptekin},
24 | journal={2022 IEEE International Conference on Image Processing (ICIP)},
25 | doi={10.1109/ICIP46576.2022.9897990},
26 | pages={966-970},
27 | year={2022}
28 | }
29 | ```
30 |
31 | ## visdrone results
32 |
33 | refer to table 1 in [Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection](https://ieeexplore.ieee.org/document/9897990) for more detail on visdrone results
34 |
35 | [fcos_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_fi_visdrone_results.zip
36 | [fcos_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sahi_po_visdrone_results.zip
37 | [fcos_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sahi_fi_po_visdrone_results.zip
38 | [fcos_sf_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_po_visdrone_results.zip
39 | [fcos_sf_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_fi_po_visdrone_results.zip
40 |
41 | [vfnet_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_fi_visdrone_results.zip
42 | [vfnet_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sahi_po_visdrone_results.zip
43 | [vfnet_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sahi_fi_po_visdrone_results.zip
44 | [vfnet_sf_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_po_visdrone_results.zip
45 | [vfnet_sf_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_fi_po_visdrone_results.zip
46 |
47 | [tood_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_fi_visdrone_results.zip
48 | [tood_sahi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_visdrone_results.zip
49 | [tood_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_po_visdrone_results.zip
50 | [tood_sahi_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_fi_visdrone_results.zip
51 | [tood_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sahi_fi_po_visdrone_results.zip
52 |
53 | [tood_sf_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_fi_visdrone_results.zip
54 | [tood_sf_sahi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_visdrone_results.zip
55 | [tood_sf_sahi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_po_visdrone_results.zip
56 | [tood_sf_sahi_fi_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_visdrone_results.zip
57 | [tood_sf_sahi_fi_po_visdrone_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_po_visdrone_results.zip
58 |
59 | [tood_sf_visdrone_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/tood_sf_visdrone.pth
60 | [fcos_sf_visdrone_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/fcos_sf_visdrone.pth
61 |
62 | [my_twitter_url]: https://twitter.com/fcakyon
63 |
64 | |setup |AP50 |AP50s |AP50m |AP50l | results | checkpoints |
65 | |--- |--- |--- |--- |--- |--- |--- |
66 | |FCOS+FI |25.8 |14.2 |39.6 |45.1 | [download][fcos_fi_visdrone_results_url] | [request][my_twitter_url] |
67 | |FCOS+SAHI+PO |29.0 |18.9 |41.5 |46.4 | [download][fcos_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
68 | |FCOS+SAHI+FI+PO |31.0 |19.8 |44.6 |49.0 | [download][fcos_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
69 | |FCOS+SF+SAHI+PO |38.1 |25.7 |54.8 |56.9 | [download][fcos_sf_sahi_po_visdrone_results_url] | [download][fcos_sf_visdrone_checkpoint_url] |
70 | |FCOS+SF+SAHI+FI+PO |38.5 |25.9 |55.4 |59.8 | [download][fcos_sf_sahi_fi_po_visdrone_results_url] | [download][fcos_sf_visdrone_checkpoint_url] |
71 | |--- |--- |--- |--- |--- |--- |--- |
72 | |VFNet+FI |28.8 |16.8 |44.0 |47.5 | [download][vfnet_fi_visdrone_results_url] | [request][my_twitter_url] |
73 | |VFNet+SAHI+PO |32.0 |21.4 |45.8 |45.5 | [download][vfnet_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
74 | |VFNet+SAHI+FI+PO |33.9 |22.4 |49.1 |49.4 | [download][vfnet_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
75 | |VFNet+SF+SAHI+PO |41.9 |29.7 |58.8 |60.6 | [download][vfnet_sf_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
76 | |VFNet+SF+SAHI+FI+PO |42.2 |29.6 |59.2 |63.3 | [download][vfnet_sf_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
77 | |--- |--- |--- |--- |--- |--- |--- |
78 | |TOOD+FI |29.4 |18.1 |44.1 |50.0 | [download][tood_fi_visdrone_results_url] | [request][my_twitter_url] |
79 | |TOOD+SAHI |31.9 |22.6 |44.0 |45.2 | [download][tood_sahi_visdrone_results_url] | [request][my_twitter_url] |
80 | |TOOD+SAHI+PO |32.5 |22.8 |45.2 |43.6 | [download][tood_sahi_po_visdrone_results_url] | [request][my_twitter_url] |
81 | |TOOD+SAHI+FI |34.6 |23.8 |48.5 |53.1 | [download][tood_sahi_fi_visdrone_results_url] | [request][my_twitter_url] |
82 | |TOOD+SAHI+FI+PO |34.7 |23.8 |48.9 |50.3| [download][tood_sahi_fi_po_visdrone_results_url] | [request][my_twitter_url] |
83 | |TOOD+SF+FI |36.8 |24.4 |53.8 |66.4 | [download][tood_sf_fi_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
84 | |TOOD+SF+SAHI |42.5 |31.6 |58.0 |61.1 | [download][tood_sf_sahi_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
85 | |TOOD+SF+SAHI+PO |43.1 |31.7 |59.0 |60.2 | [download][tood_sf_sahi_po_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
86 | |TOOD+SF+SAHI+FI |43.4 |31.7 |59.6 |65.6 | [download][tood_sf_sahi_fi_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
87 | |TOOD+SF+SAHI+FI+PO |43.5 |31.7 |59.8 |65.4 | [download][tood_sf_sahi_fi_po_visdrone_results_url] | [download][tood_sf_visdrone_checkpoint_url] |
88 |
89 | ## xview results
90 |
91 | refer to table 2 in [Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection](https://ieeexplore.ieee.org/document/9897990) for more detail on xview results
92 |
93 | [fcos_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_fi_xview_results.zip
94 | [fcos_sf_sahi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_xview_results.zip
95 | [fcos_sf_sahi_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_fi_xview_results.zip
96 | [fcos_sf_sahi_fi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_fi_op_xview_results.zip
97 | [fcos_sf_sahi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/fcos_sf_sahi_op_xview_results.zip
98 |
99 | [vfnet_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_fi_xview_results.zip
100 | [vfnet_sf_sahi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_xview_results.zip
101 | [vfnet_sf_sahi_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_fi_xview_results.zip
102 | [vfnet_sf_sahi_fi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_fi_op_xview_results.zip
103 | [vfnet_sf_sahi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/vfnet_sf_sahi_op_xview_results.zip
104 |
105 | [tood_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_fi_xview_results.zip
106 | [tood_sf_sahi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_xview_results.zip
107 | [tood_sf_sahi_fi_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_xview_results.zip
108 | [tood_sf_sahi_fi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_fi_op_xview_results.zip
109 | [tood_sf_sahi_po_xview_results_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.1/tood_sf_sahi_op_xview_results.zip
110 |
111 | [fcos_sf_xview_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/fcos_sf_xview.pth
112 | [vfnet_sf_xview_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/vfnet_sf_xview.pth
113 | [tood_sf_xview_checkpoint_url]: https://github.com/fcakyon/sahi-benchmark/releases/download/v0.0.2/tood_sf_xview.pth
114 |
115 | |setup |AP50 |AP50s |AP50m |AP50l | results | checkpoints |
116 | |--- |--- |--- |--- |--- |--- |--- |
117 | |FCOS+FI |2.20 |0.10 |1.80 |7.30 | [download][fcos_fi_xview_results_url] | [request][my_twitter_url] |
118 | |FCOS+SF+SAHI |15.8 |11.9 |18.4 |11.0 | [download][fcos_sf_sahi_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
119 | |FCOS+SF+SAHI+PO |17.1 |12.2 |20.2 |12.8 | [download][fcos_sf_sahi_po_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
120 | |FCOS+SF+SAHI+FI |15.7 |11.9 |18.4 |14.3 | [download][fcos_sf_sahi_fi_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
121 | |FCOS+SF+SAHI+FI+PO |17.0 |12.2 |20.2 |15.8 | [download][fcos_sf_sahi_fi_po_xview_results_url] | [download][fcos_sf_xview_checkpoint_url] |
122 | |--- |--- |--- |--- |--- |--- |--- |
123 | |VFNet+FI |2.10 |0.50 |1.80 |6.80 | [download][vfnet_fi_xview_results_url] | [request][my_twitter_url] |
124 | |VFNet+SF+SAHI | 16.0 |11.9 |17.6 |13.1 | [download][vfnet_sf_sahi_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
125 | |VFNet+SF+SAHI+PO |17.7| 13.7 |19.7 |15.4 | [download][vfnet_sf_sahi_po_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
126 | |VFNet+SF+SAHI+FI |15.8 |11.9 |17.5 |15.2 | [download][vfnet_sf_sahi_fi_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
127 | |VFNet+SF+SAHI+FI+PO |17.5 |13.7 |19.6 |17.6 | [download][vfnet_sf_sahi_fi_po_xview_results_url] | [download][vfnet_sf_xview_checkpoint_url] |
128 | |--- |--- |--- |--- |--- |--- |--- |
129 | |TOOD+FI |2.10 |0.10 |2.00 |5.20 | [download][tood_fi_xview_results_url] | [request][my_twitter_url] |
130 | |TOOD+SF+SAHI |19.4 |14.6 |22.5 |14.2 | [download][tood_sf_sahi_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
131 | |TOOD+SF+SAHI+PO |20.6 |14.9 |23.6 |17.0 | [download][tood_sf_sahi_po_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
132 | |TOOD+SF+SAHI+FI |19.2 |14.6 |22.3 |14.7 | [download][tood_sf_sahi_fi_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
133 | |TOOD+SF+SAHI+FI+PO |20.4 |14.9 |23.5 |17.6 | [download][tood_sf_sahi_fi_po_xview_results_url] | [download][tood_sf_xview_checkpoint_url] |
134 |
135 | ## env setup
136 |
137 | install pytorch:
138 |
139 | ```bash
140 | conda install pytorch=1.10.0 torchvision=0.11.1 cudatoolkit=11.3 -c pytorch
141 | ```
142 |
143 | install other requirements:
144 |
145 | ```bash
146 | pip install -r requirements.txt
147 | ```
148 |
149 | ## evaluation
150 |
151 | - download desired checkpoint from the urls in readme.
152 |
153 | - download xivew or visdrone dataset and convert to COCO format.
154 |
155 | - set `MODEL_PATH`, `MODEL_CONFIG_PATH`, `EVAL_IMAGES_FOLDER_DIR`, `EVAL_DATASET_JSON_PATH`, `INFERENCE_SETTING` in [predict_evaluate_analyse script](eval_tools/predict_evaluate_analyse.py) then run the script.
156 |
157 | ## roadmap
158 |
159 | - [x] add train test split support for xview to coco converter
160 | - [x] add mmdet config files (fcos, vfnet and tood) for xview training (9 train experiments)
161 | - [x] add mmdet config files (fcos, vfnet and tood) for visdrone training (9 train experiments)
162 | - [x] add coco result.json files, classwise coco eval results error analysis plots for all xview experiments
163 | - [x] add coco result.json files, classwise coco eval results error analysis plots for all visdrone experiments
164 | - [X] add .py scripts for inference + evaluation + error analysis using `sahi`
165 |
--------------------------------------------------------------------------------