├── img ├── HR.jpg ├── Angle.jpg ├── Class.jpg ├── Height.jpg ├── Sample.jpg ├── differ.jpg ├── domain.jpg └── first.jpg ├── split_configs ├── ss_test.json ├── ss_val.json └── ss_train.json ├── Vis_CODrone_GT.py ├── README.md └── CODrone_Split.py /img/HR.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/HR.jpg -------------------------------------------------------------------------------- /img/Angle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Angle.jpg -------------------------------------------------------------------------------- /img/Class.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Class.jpg -------------------------------------------------------------------------------- /img/Height.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Height.jpg -------------------------------------------------------------------------------- /img/Sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Sample.jpg -------------------------------------------------------------------------------- /img/differ.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/differ.jpg -------------------------------------------------------------------------------- /img/domain.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/domain.jpg -------------------------------------------------------------------------------- /img/first.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/first.jpg -------------------------------------------------------------------------------- /split_configs/ss_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "nproc": 10, 3 | "img_dirs": [ 4 | "your image directory" 5 | ], 6 | "ann_dirs": ["your annotation directory"], 7 | "sizes": [ 8 | 1180 9 | ], 10 | "gaps": [ 11 | 200 12 | ], 13 | "rates": [ 14 | 1.0 15 | ], 16 | "img_rate_thr": 0.6, 17 | "iof_thr": 0.7, 18 | "no_padding": false, 19 | "padding_value": [ 20 | 104, 21 | 116, 22 | 124 23 | ], 24 | "save_dir": "your output directory", 25 | "save_ext": ".png" 26 | } 27 | -------------------------------------------------------------------------------- /split_configs/ss_val.json: -------------------------------------------------------------------------------- 1 | { 2 | "nproc": 10, 3 | "img_dirs": [ 4 | "your image directory" 5 | ], 6 | "ann_dirs": [ 7 | "your annotation directory" 8 | ], 9 | "sizes": [ 10 | 1180 11 | ], 12 | "gaps": [ 13 | 200 14 | ], 15 | "rates": [ 16 | 1.0 17 | ], 18 | "img_rate_thr": 0.6, 19 | "iof_thr": 0.7, 20 | "no_padding": false, 21 | "padding_value": [ 22 | 104, 23 | 116, 24 | 124 25 | ], 26 | "save_dir": "your save directory", 27 | "save_ext": ".png" 28 | } 29 | -------------------------------------------------------------------------------- /split_configs/ss_train.json: -------------------------------------------------------------------------------- 1 | { 2 | "nproc": 10, 3 | "img_dirs": [ 4 | "your image directory" 5 | ], 6 | "ann_dirs": [ 7 | "your annotation directory" 8 | ], 9 | "sizes": [ 10 | 1180 11 | ], 12 | "gaps": [ 13 | 200 14 | ], 15 | "rates": [ 16 | 1.0 17 | ], 18 | "img_rate_thr": 0.6, 19 | "iof_thr": 0.7, 20 | "no_padding": false, 21 | "padding_value": [ 22 | 104, 23 | 116, 24 | 124 25 | ], 26 | "save_dir": "your output directory", 27 | "save_ext": ".png" 28 | } 29 | -------------------------------------------------------------------------------- /Vis_CODrone_GT.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def draw_boxes(image_path, label_path, output_path=None, class_colors=None, thickness=2): 7 | """ 8 | Draw oriented bounding boxes (OBB) from DOTA-format annotations. 9 | 10 | Args: 11 | image_path (str): Path to the input image. 12 | label_path (str): Path to the annotation (.txt) file. 13 | output_path (str, optional): Path to save the output image. If None, only display. 14 | class_colors (dict[str, tuple[int, int, int]], optional): 15 | Mapping of class name to BGR color, e.g. {"plane": (0,255,0)}. 16 | Defaults to green if not provided. 17 | thickness (int, optional): Line thickness of the bounding box. Defaults to 2. 18 | """ 19 | # Load the image 20 | image = cv2.imread(image_path) 21 | 22 | # Read labels 23 | with open(label_path, 'r') as f: 24 | lines = f.readlines() 25 | 26 | for line in lines: 27 | parts = line.strip().split() 28 | 29 | # Parse coordinates and reshape to (4, 2) 30 | coords = np.array(list(map(float, parts[:8]))).reshape(4, 2).astype(np.int32) 31 | cls_name = parts[8] 32 | color = class_colors.get(cls_name, (0, 255, 0)) if class_colors else (0, 255, 0) 33 | 34 | # Draw oriented bounding box 35 | cv2.polylines(image, [coords], isClosed=True, color=color, thickness=thickness) 36 | 37 | # Draw class label text at box center 38 | cx, cy = np.mean(coords, axis=0).astype(int) 39 | cv2.putText( 40 | image, cls_name, (cx, cy), 41 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA 42 | ) 43 | 44 | os.makedirs(os.path.dirname(output_path), exist_ok=True) 45 | cv2.imwrite(output_path, image) 46 | # print(f"[INFO] Saved visualization -> {output_path}") 47 | 48 | 49 | def main(): 50 | """Batch visualization of DOTA-format ground truth annotations.""" 51 | img_dir = "your image directory" 52 | label_dir = "your label directory" 53 | output_dir = "your output directory" 54 | 55 | os.makedirs(output_dir, exist_ok=True) 56 | 57 | for img_name in os.listdir(img_dir): 58 | if not img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')): 59 | continue 60 | 61 | img_path = os.path.join(img_dir, img_name) 62 | label_path = os.path.join(label_dir, os.path.splitext(img_name)[0] + ".txt") 63 | out_path = os.path.join(output_dir, img_name) 64 | 65 | draw_boxes(img_path, label_path, out_path) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

CODrone: A Comprehensive Oriented Object Detection benchmark for UAV

4 |

5 | Paper PDF 6 | CODrone 7 |

8 | 9 | 10 | 11 | **😊 TL;DR** 12 | 13 | CODrone is a comprehensive oriented object detection dataset for UAVs that accurately reflects real-world conditions. 14 | 15 |
16 | 17 | 18 | 19 | 22 | 25 | 26 |
20 | 21 | 23 | 24 |
27 | 28 |
29 | 30 | **⭐ Key contributions of CODrone**: 31 | 32 | - We proposed a **large-scale**, **high-resolution** **UAV**-**oriented object detection** dataset, CODrone, which consists of over ten thousand UAV-captured images with precise **oriented bounding box** annotations and **diverse object categories**. 33 | - The proposed CODrone dataset considers multiple influential factors, including **image acquisition altitude**, **camera perspective**, **lighting conditions**, and **geographic location**. 34 | - Based on the proposed dataset, we establish a UAV-oriented object detection benchmark and conduct training and evaluation using X representative state-of-the-art methods. 35 | 36 | **🚀 Try it out!!!**: 37 | 38 | - 📥 [Download from Google Drive](https://drive.google.com/file/d/1FQ6mUaOr_kATDaH7N2bObD5SRRkV7qJy/view?usp=sharing) 39 | 40 | - 📥 [Download from Quark Drive](https://pan.quark.cn/s/6028b38fe7c8) 41 | 42 | ```shell 43 | CODrone/ 44 | ├── test/ 45 | │ ├── annfile/ # DOTA format annotation 46 | │ ├── images/ 47 | │ └── labels/ # VOC format annotation 48 | ├── train/ 49 | │ ├── annfile/ 50 | │ ├── images/ 51 | │ └── labels/ 52 | └── val/ 53 | ├── annfile/ 54 | ├── images/ 55 | └── labels/ 56 | ``` 57 | 58 | ## Characteristics 59 |
60 | 61 | | Dataset | Resolution | Categories | Altitude Gap | Camera Angles | Images | Objects | OBB | 62 | |:--------------:|:----------:|:----------:|:------------:|:-------------:|:------:|:-------:|:---:| 63 | | VisDrone2019 | 2000×1500 | 10 | * | * | 10.2k | 54.2k | | 64 | | UAVDT | 1080×540 | 3 | 60m | * | 80.0k | 841.5k | | 65 | | AU-AIR | 1920×1080 | 8 | 25m | 45 | 3.2k | 132.0k | | 66 | | CARPK | 1280×720 | 1 | * | * | 1.4k | 89.7k | | 67 | | HazyDet | 1333×800 | 3 | * | * | 11.6k | 383.0k |   | 68 | | DroneVehicle | 840×712 | 5 | 40m | 30 | 56.8k | 953.0k | ✅ | 69 | | UAV-ROD | 1920×1080 | 1 | 50m | * | 1.5k | 30.0k | ✅ | 70 | | **CODrone (ours)** | **3840×2160** | **12** | **70m** | **60** | 10.0k | 596.7k | ✅ | 71 | 72 |
73 | We present a comparison between CODrone and other commonly used UAV-based object detection datasets. 74 | CODrone significantly expands several key dimensions, including image resolution, object category diversity, and variation in flight altitude and camera angle. 75 | For resolution, CODrone employs a 3840 × 2160 high-resolution onboard camera, aligning with the capabilities of modern UAV hardware. 76 | In terms of object classes, unlike most existing UAV OOD datasets that focus primarily on vehicles, CODrone includes a more diverse range of categories, thereby increasing the difficulty and realism of the detection task. 77 | Furthermore, we explicitly annotate both altitude and camera angle for each image, enabling research into UAV pose-aware perception and related tasks. 78 | 79 | 80 | ### High resolution brings more high-quality information 81 |
82 | 83 | 84 | 85 |
86 | 87 | CODrone employs a **3840 × 2160** high-resolution onboard camera, aligning with the capabilities of modern UAV hardware. 88 | 89 | ### Multi-altitude and multi-angle captures for broad flight scenario adaptation 90 | 91 |
92 | 93 | 94 | 95 | 98 | 101 | 102 |
96 | 97 | 99 | 100 |
103 | 104 |
105 | 106 | The UAV was configured to capture imagery from two camera angles (**30°** and **90°**) and at three flight altitudes (**30 m**, **60 m**, and **100 m**), resulting in a total of **6** unique viewpoint combinations. 107 | 108 | 109 | ### More diverse scenes, broader application potential 110 |
111 | 112 | 113 | 114 |
115 | 116 | CODrone covers a wide range of environments, from urban areas and rural towns to ports and industrial zones, encompassing most scene types encountered in real-world UAV-based urban applications. 117 | 118 | ## How to visualize CODrone annotations 119 | 120 | 1) Install 121 | ```bash 122 | pip install opencv-python numpy 123 | ``` 124 | 125 | 2) Edit paths in `Vis_CODrone_GT.py` (inside `main()`): 126 | ```python 127 | img_dir = "CODrone/train/images" 128 | label_dir = "CODrone/train/annfile" 129 | output_dir = "CODrone/train/viz" 130 | ``` 131 | 132 | 3) Run 133 | ```bash 134 | python Vis_CODrone_GT.py 135 | ``` 136 | 137 | ## How to split CODrone images 138 | 139 | 1) Install 140 | ```bash 141 | pip install opencv-python numpy pillow shapely 142 | ``` 143 | 144 | 2) Run (DOTA-style sliding window & IOF assignment) — **direct args** 145 | ```bash 146 | python CODrone_Split.py --img-dirs CODrone/train/images --ann-dirs CODrone/train/annfile --sizes 1024 --gaps 512 --rates 1.0 --img-rate-thr 0.6 --iof-thr 0.7 --save-dir CODrone/train/splits --save-ext .png --nproc 8 147 | ``` 148 | 149 | or **via JSON config** (recommended; CLI flags override JSON): 150 | ```bash 151 | # examples (pick one) 152 | python CODrone_Split.py --base-json split_configs/ss_train.json 153 | python CODrone_Split.py --base-json split_configs/ss_val.json 154 | python CODrone_Split.py --base-json split_configs/ss_test.json 155 | ``` 156 | 157 | 3) Output 158 | - Patches: `CODrone/train/splits/images/` 159 | - DOTA style txts for patches: `CODrone/train/splits/annfiles/` 160 | 161 | **Note:** Tiling (sizes/gaps/rates), IOF-based object assignment, truncation handling, and label writing strictly follow **DOTA devkit** conventions. 162 | 163 | ## Citation 164 | 165 | ``` 166 | @misc{ye2025clearflexibleprecisecomprehensive, 167 | title={More Clear, More Flexible, More Precise: A Comprehensive Oriented Object Detection benchmark for UAV}, 168 | author={Kai Ye and Haidi Tang and Bowen Liu and Pingyang Dai and Liujuan Cao and Rongrong Ji}, 169 | year={2025}, 170 | eprint={2504.20032}, 171 | archivePrefix={arXiv}, 172 | primaryClass={cs.CV}, 173 | url={https://arxiv.org/abs/2504.20032}, 174 | } 175 | ``` 176 | 177 | 178 | ## License 179 | 180 | Licensed under the CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) 181 | 182 | 183 | The code is released for academic research use only. 184 | 185 | If you have any questions, please contact me via [yekai@stu.xmu.edu.cn]. 186 | -------------------------------------------------------------------------------- /CODrone_Split.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import codecs 3 | import datetime 4 | import itertools 5 | import json 6 | import logging 7 | import os 8 | import os.path as osp 9 | import time 10 | from functools import partial, reduce 11 | from math import ceil 12 | from multiprocessing import Manager, Pool 13 | 14 | import cv2 15 | import numpy as np 16 | from PIL import Image 17 | 18 | Image.MAX_IMAGE_PIXELS = None 19 | 20 | try: 21 | import shapely.geometry as shgeo 22 | except ImportError: 23 | shgeo = None 24 | 25 | 26 | def add_parser(parser): 27 | """Add arguments.""" 28 | parser.add_argument( 29 | '--base-json', 30 | type=str, 31 | default=None, 32 | help='json config file for split images') 33 | parser.add_argument( 34 | '--nproc', type=int, default=10, help='the procession number') 35 | 36 | # argument for loading data 37 | parser.add_argument( 38 | '--img-dirs', 39 | nargs='+', 40 | type=str, 41 | default=None, 42 | help='images dirs, must give a value') 43 | parser.add_argument( 44 | '--ann-dirs', 45 | nargs='+', 46 | type=str, 47 | default=None, 48 | help='annotations dirs, optional') 49 | 50 | # argument for splitting image 51 | parser.add_argument( 52 | '--sizes', 53 | nargs='+', 54 | type=int, 55 | default=[1024], 56 | help='the sizes of sliding windows') 57 | parser.add_argument( 58 | '--gaps', 59 | nargs='+', 60 | type=int, 61 | default=[512], 62 | help='the steps of sliding widnows') 63 | parser.add_argument( 64 | '--rates', 65 | nargs='+', 66 | type=float, 67 | default=[1.], 68 | help='same as DOTA devkit rate, but only change windows size') 69 | parser.add_argument( 70 | '--img-rate-thr', 71 | type=float, 72 | default=0.6, 73 | help='the minimal rate of image in window and window') 74 | parser.add_argument( 75 | '--iof-thr', 76 | type=float, 77 | default=0.7, 78 | help='the minimal iof between a object and a window') 79 | parser.add_argument( 80 | '--no-padding', 81 | action='store_true', 82 | help='not padding patches in regular size') 83 | parser.add_argument( 84 | '--padding-value', 85 | nargs='+', 86 | type=int, 87 | default=[0], 88 | help='padding value, 1 or channel number') 89 | 90 | # argument for saving 91 | parser.add_argument( 92 | '--save-dir', 93 | type=str, 94 | default='.', 95 | help='to save pkl and split images') 96 | parser.add_argument( 97 | '--save-ext', 98 | type=str, 99 | default='.png', 100 | help='the extension of saving images') 101 | 102 | 103 | def parse_args(): 104 | """Parse arguments.""" 105 | parser = argparse.ArgumentParser(description='Splitting images') 106 | add_parser(parser) 107 | args = parser.parse_args() 108 | 109 | if args.base_json is not None: 110 | with open(args.base_json, 'r') as f: 111 | prior_config = json.load(f) 112 | 113 | for action in parser._actions: 114 | if action.dest not in prior_config or \ 115 | not hasattr(action, 'default'): 116 | continue 117 | action.default = prior_config[action.dest] 118 | args = parser.parse_args() 119 | 120 | # assert arguments 121 | assert args.img_dirs is not None, "argument img_dirs can't be None" 122 | assert args.ann_dirs is None or len(args.ann_dirs) == len(args.img_dirs) 123 | assert len(args.sizes) == len(args.gaps) 124 | assert len(args.sizes) == 1 or len(args.rates) == 1 125 | assert args.save_ext in ['.png', '.jpg', 'bmp', '.tif'] 126 | assert args.iof_thr >= 0 and args.iof_thr < 1 127 | assert args.iof_thr >= 0 and args.iof_thr <= 1 128 | assert not osp.exists(args.save_dir), \ 129 | f'{osp.join(args.save_dir)} already exists' 130 | return args 131 | 132 | 133 | def get_sliding_window(info, sizes, gaps, img_rate_thr): 134 | """Get sliding windows. 135 | 136 | Args: 137 | info (dict): Dict of image's width and height. 138 | sizes (list): List of window's sizes. 139 | gaps (list): List of window's gaps. 140 | img_rate_thr (float): Threshold of window area divided by image area. 141 | 142 | Returns: 143 | list[np.array]: Information of valid windows. 144 | """ 145 | eps = 0.01 146 | windows = [] 147 | width, height = info['width'], info['height'] 148 | for size, gap in zip(sizes, gaps): 149 | assert size > gap, f'invaild size gap pair [{size} {gap}]' 150 | step = size - gap 151 | 152 | x_num = 1 if width <= size else ceil((width - size) / step + 1) 153 | x_start = [step * i for i in range(x_num)] 154 | if len(x_start) > 1 and x_start[-1] + size > width: 155 | x_start[-1] = width - size 156 | 157 | y_num = 1 if height <= size else ceil((height - size) / step + 1) 158 | y_start = [step * i for i in range(y_num)] 159 | if len(y_start) > 1 and y_start[-1] + size > height: 160 | y_start[-1] = height - size 161 | 162 | start = np.array( 163 | list(itertools.product(x_start, y_start)), dtype=np.int64) 164 | stop = start + size 165 | windows.append(np.concatenate([start, stop], axis=1)) 166 | windows = np.concatenate(windows, axis=0) 167 | 168 | img_in_wins = windows.copy() 169 | img_in_wins[:, 0::2] = np.clip(img_in_wins[:, 0::2], 0, width) 170 | img_in_wins[:, 1::2] = np.clip(img_in_wins[:, 1::2], 0, height) 171 | img_areas = (img_in_wins[:, 2] - img_in_wins[:, 0]) * \ 172 | (img_in_wins[:, 3] - img_in_wins[:, 1]) 173 | win_areas = (windows[:, 2] - windows[:, 0]) * \ 174 | (windows[:, 3] - windows[:, 1]) 175 | img_rates = img_areas / win_areas 176 | if not (img_rates > img_rate_thr).any(): 177 | max_rate = img_rates.max() 178 | img_rates[abs(img_rates - max_rate) < eps] = 1 179 | return windows[img_rates > img_rate_thr] 180 | 181 | 182 | def poly2hbb(polys): 183 | """Convert polygons to horizontal bboxes. 184 | 185 | Args: 186 | polys (np.array): Polygons with shape (N, 8) 187 | 188 | Returns: 189 | np.array: Horizontal bboxes. 190 | """ 191 | shape = polys.shape 192 | polys = polys.reshape(*shape[:-1], shape[-1] // 2, 2) 193 | lt_point = np.min(polys, axis=-2) 194 | rb_point = np.max(polys, axis=-2) 195 | return np.concatenate([lt_point, rb_point], axis=-1) 196 | 197 | 198 | def bbox_overlaps_iof(bboxes1, bboxes2, eps=1e-6): 199 | """Compute bbox overlaps (iof). 200 | 201 | Args: 202 | bboxes1 (np.array): Horizontal bboxes1. 203 | bboxes2 (np.array): Horizontal bboxes2. 204 | eps (float, optional): Defaults to 1e-6. 205 | 206 | Returns: 207 | np.array: Overlaps. 208 | """ 209 | rows = bboxes1.shape[0] 210 | cols = bboxes2.shape[0] 211 | 212 | if rows * cols == 0: 213 | return np.zeros((rows, cols), dtype=np.float32) 214 | 215 | hbboxes1 = poly2hbb(bboxes1) 216 | hbboxes2 = bboxes2 217 | hbboxes1 = hbboxes1[:, None, :] 218 | lt = np.maximum(hbboxes1[..., :2], hbboxes2[..., :2]) 219 | rb = np.minimum(hbboxes1[..., 2:], hbboxes2[..., 2:]) 220 | wh = np.clip(rb - lt, 0, np.inf) 221 | h_overlaps = wh[..., 0] * wh[..., 1] 222 | 223 | l, t, r, b = [bboxes2[..., i] for i in range(4)] 224 | polys2 = np.stack([l, t, r, t, r, b, l, b], axis=-1) 225 | if shgeo is None: 226 | raise ImportError('Please run "pip install shapely" ' 227 | 'to install shapely first.') 228 | sg_polys1 = [shgeo.Polygon(p) for p in bboxes1.reshape(rows, -1, 2)] 229 | sg_polys2 = [shgeo.Polygon(p) for p in polys2.reshape(cols, -1, 2)] 230 | overlaps = np.zeros(h_overlaps.shape) 231 | for p in zip(*np.nonzero(h_overlaps)): 232 | overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area 233 | unions = np.array([p.area for p in sg_polys1], dtype=np.float32) 234 | unions = unions[..., None] 235 | 236 | unions = np.clip(unions, eps, np.inf) 237 | outputs = overlaps / unions 238 | if outputs.ndim == 1: 239 | outputs = outputs[..., None] 240 | return outputs 241 | 242 | 243 | def get_window_obj(info, windows, iof_thr): 244 | """ 245 | 246 | Args: 247 | info (dict): Dict of bbox annotations. 248 | windows (np.array): information of sliding windows. 249 | iof_thr (float): Threshold of overlaps between bbox and window. 250 | 251 | Returns: 252 | list[dict]: List of bbox annotations of every window. 253 | """ 254 | bboxes = info['ann']['bboxes'] 255 | iofs = bbox_overlaps_iof(bboxes, windows) 256 | 257 | window_anns = [] 258 | for i in range(windows.shape[0]): 259 | win_iofs = iofs[:, i] 260 | pos_inds = np.nonzero(win_iofs >= iof_thr)[0].tolist() 261 | 262 | win_ann = dict() 263 | for k, v in info['ann'].items(): 264 | try: 265 | win_ann[k] = v[pos_inds] 266 | except TypeError: 267 | win_ann[k] = [v[i] for i in pos_inds] 268 | win_ann['trunc'] = win_iofs[pos_inds] < 1 269 | window_anns.append(win_ann) 270 | return window_anns 271 | 272 | 273 | def crop_and_save_img(info, windows, window_anns, img_dir, no_padding, 274 | padding_value, save_dir, anno_dir, img_ext): 275 | """ 276 | 277 | Args: 278 | info (dict): Image's information. 279 | windows (np.array): information of sliding windows. 280 | window_anns (list[dict]): List of bbox annotations of every window. 281 | img_dir (str): Path of images. 282 | no_padding (bool): If True, no padding. 283 | padding_value (tuple[int|float]): Padding value. 284 | save_dir (str): Save filename. 285 | anno_dir (str): Annotation filename. 286 | img_ext (str): Picture suffix. 287 | 288 | Returns: 289 | list[dict]: Information of paths. 290 | """ 291 | img = cv2.imread(osp.join(img_dir, info['filename'])) 292 | patch_infos = [] 293 | for i in range(windows.shape[0]): 294 | patch_info = dict() 295 | for k, v in info.items(): 296 | if k not in ['id', 'fileanme', 'width', 'height', 'ann']: 297 | patch_info[k] = v 298 | 299 | window = windows[i] 300 | x_start, y_start, x_stop, y_stop = window.tolist() 301 | patch_info['x_start'] = x_start 302 | patch_info['y_start'] = y_start 303 | patch_info['id'] = \ 304 | info['id'] + '__' + str(x_stop - x_start) + \ 305 | '__' + str(x_start) + '___' + str(y_start) 306 | patch_info['ori_id'] = info['id'] 307 | 308 | ann = window_anns[i] 309 | ann['bboxes'] = translate(ann['bboxes'], -x_start, -y_start) 310 | patch_info['ann'] = ann 311 | 312 | patch = img[y_start:y_stop, x_start:x_stop] 313 | if not no_padding: 314 | height = y_stop - y_start 315 | width = x_stop - x_start 316 | if height > patch.shape[0] or width > patch.shape[1]: 317 | padding_patch = np.empty((height, width, patch.shape[-1]), 318 | dtype=np.uint8) 319 | if not isinstance(padding_value, (int, float)): 320 | assert len(padding_value) == patch.shape[-1] 321 | padding_patch[...] = padding_value 322 | padding_patch[:patch.shape[0], :patch.shape[1], ...] = patch 323 | patch = padding_patch 324 | patch_info['height'] = patch.shape[0] 325 | patch_info['width'] = patch.shape[1] 326 | 327 | cv2.imwrite(osp.join(save_dir, patch_info['id'] + img_ext), patch) 328 | patch_info['filename'] = patch_info['id'] + img_ext 329 | patch_infos.append(patch_info) 330 | 331 | bboxes_num = patch_info['ann']['bboxes'].shape[0] 332 | outdir = os.path.join(anno_dir, patch_info['id'] + '.txt') 333 | 334 | with codecs.open(outdir, 'w', 'utf-8') as f_out: 335 | if bboxes_num == 0: 336 | pass 337 | else: 338 | for idx in range(bboxes_num): 339 | obj = patch_info['ann'] 340 | outline = ' '.join(list(map(str, obj['bboxes'][idx]))) 341 | diffs = str( 342 | obj['diffs'][idx]) if not obj['trunc'][idx] else '2' 343 | outline = outline + ' ' + obj['labels'][idx] + ' ' + diffs 344 | f_out.write(outline + '\n') 345 | 346 | return patch_infos 347 | 348 | 349 | def single_split(arguments, sizes, gaps, img_rate_thr, iof_thr, no_padding, 350 | padding_value, save_dir, anno_dir, img_ext, lock, prog, total, 351 | logger): 352 | """ 353 | 354 | Args: 355 | arguments (object): Parameters. 356 | sizes (list): List of window's sizes. 357 | gaps (list): List of window's gaps. 358 | img_rate_thr (float): Threshold of window area divided by image area. 359 | iof_thr (float): Threshold of overlaps between bbox and window. 360 | no_padding (bool): If True, no padding. 361 | padding_value (tuple[int|float]): Padding value. 362 | save_dir (str): Save filename. 363 | anno_dir (str): Annotation filename. 364 | img_ext (str): Picture suffix. 365 | lock (object): Lock of Manager. 366 | prog (object): Progress of Manager. 367 | total (object): Length of infos. 368 | logger (object): Logger. 369 | 370 | Returns: 371 | list[dict]: Information of paths. 372 | """ 373 | info, img_dir = arguments 374 | windows = get_sliding_window(info, sizes, gaps, img_rate_thr) 375 | window_anns = get_window_obj(info, windows, iof_thr) 376 | patch_infos = crop_and_save_img(info, windows, window_anns, img_dir, 377 | no_padding, padding_value, save_dir, 378 | anno_dir, img_ext) 379 | assert patch_infos 380 | 381 | lock.acquire() 382 | prog.value += 1 383 | msg = f'({prog.value / total:3.1%} {prog.value}:{total})' 384 | msg += ' - ' + f"Filename: {info['filename']}" 385 | msg += ' - ' + f"width: {info['width']:<5d}" 386 | msg += ' - ' + f"height: {info['height']:<5d}" 387 | msg += ' - ' + f"Objects: {len(info['ann']['bboxes']):<5d}" 388 | msg += ' - ' + f'Patches: {len(patch_infos)}' 389 | logger.info(msg) 390 | lock.release() 391 | 392 | return patch_infos 393 | 394 | 395 | def setup_logger(log_path): 396 | """Setup logger. 397 | 398 | Args: 399 | log_path (str): Path of log. 400 | 401 | Returns: 402 | object: Logger. 403 | """ 404 | logger = logging.getLogger('img split') 405 | formatter = logging.Formatter('%(asctime)s - %(message)s') 406 | now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') 407 | log_path = osp.join(log_path, now + '.log') 408 | handlers = [logging.StreamHandler(), logging.FileHandler(log_path, 'w')] 409 | 410 | for handler in handlers: 411 | handler.setFormatter(formatter) 412 | handler.setLevel(logging.INFO) 413 | logger.addHandler(handler) 414 | logger.setLevel(logging.INFO) 415 | return logger 416 | 417 | 418 | def translate(bboxes, x, y): 419 | """Map bboxes from window coordinate back to original coordinate. 420 | 421 | Args: 422 | bboxes (np.array): bboxes with window coordinate. 423 | x (float): Deviation value of x-axis. 424 | y (float): Deviation value of y-axis 425 | 426 | Returns: 427 | np.array: bboxes with original coordinate. 428 | """ 429 | dim = bboxes.shape[-1] 430 | translated = bboxes + np.array([x, y] * int(dim / 2), dtype=np.float32) 431 | return translated 432 | 433 | 434 | def load_dota(img_dir, ann_dir=None, nproc=10): 435 | """Load DOTA dataset. 436 | 437 | Args: 438 | img_dir (str): Path of images. 439 | ann_dir (str): Path of annotations. 440 | nproc (int): number of processes. 441 | 442 | Returns: 443 | list: Dataset's contents. 444 | """ 445 | assert osp.isdir(img_dir), f'The {img_dir} is not an existing dir!' 446 | assert ann_dir is None or osp.isdir( 447 | ann_dir), f'The {ann_dir} is not an existing dir!' 448 | 449 | print('Starting loading DOTA dataset information.') 450 | start_time = time.time() 451 | _load_func = partial(_load_dota_single, img_dir=img_dir, ann_dir=ann_dir) 452 | if nproc > 1: 453 | pool = Pool(nproc) 454 | contents = pool.map(_load_func, os.listdir(img_dir)) 455 | pool.close() 456 | else: 457 | contents = list(map(_load_func, os.listdir(img_dir))) 458 | contents = [c for c in contents if c is not None] 459 | end_time = time.time() 460 | print(f'Finishing loading DOTA, get {len(contents)} iamges,', 461 | f'using {end_time - start_time:.3f}s.') 462 | 463 | return contents 464 | 465 | 466 | def _load_dota_single(imgfile, img_dir, ann_dir): 467 | """Load DOTA's single image. 468 | 469 | Args: 470 | imgfile (str): Filename of single image. 471 | img_dir (str): Path of images. 472 | ann_dir (str): Path of annotations. 473 | 474 | Returns: 475 | dict: Content of single image. 476 | """ 477 | img_id, ext = osp.splitext(imgfile) 478 | if ext not in ['.jpg', '.JPG', '.png', '.tif', '.bmp']: 479 | return None 480 | 481 | imgpath = osp.join(img_dir, imgfile) 482 | size = Image.open(imgpath).size 483 | txtfile = None if ann_dir is None else osp.join(ann_dir, img_id + '.txt') 484 | content = _load_dota_txt(txtfile) 485 | 486 | content.update( 487 | dict(width=size[0], height=size[1], filename=imgfile, id=img_id)) 488 | return content 489 | 490 | 491 | def _load_dota_txt(txtfile): 492 | """Load DOTA's txt annotation. 493 | 494 | Args: 495 | txtfile (str): Filename of single txt annotation. 496 | 497 | Returns: 498 | dict: Annotation of single image. 499 | """ 500 | gsd, bboxes, labels, diffs = None, [], [], [] 501 | if txtfile is None: 502 | pass 503 | elif not osp.isfile(txtfile): 504 | print(f"Can't find {txtfile}, treated as empty txtfile") 505 | else: 506 | with open(txtfile, 'r') as f: 507 | for line in f: 508 | if line.startswith('gsd'): 509 | num = line.split(':')[-1] 510 | try: 511 | gsd = float(num) 512 | except ValueError: 513 | gsd = None 514 | continue 515 | 516 | items = line.split(' ') 517 | if len(items) >= 9: 518 | bboxes.append([float(i) for i in items[:8]]) 519 | labels.append(items[8]) 520 | diffs.append(int(items[9]) if len(items) == 10 else 0) 521 | 522 | bboxes = np.array(bboxes, dtype=np.float32) if bboxes else \ 523 | np.zeros((0, 8), dtype=np.float32) 524 | diffs = np.array(diffs, dtype=np.int64) if diffs else \ 525 | np.zeros((0,), dtype=np.int64) 526 | ann = dict(bboxes=bboxes, labels=labels, diffs=diffs) 527 | return dict(gsd=gsd, ann=ann) 528 | 529 | 530 | def main(): 531 | """Main function of image split.""" 532 | args = parse_args() 533 | 534 | if args.ann_dirs is None: 535 | args.ann_dirs = [None for _ in range(len(args.img_dirs))] 536 | padding_value = args.padding_value[0] \ 537 | if len(args.padding_value) == 1 else args.padding_value 538 | sizes, gaps = [], [] 539 | for rate in args.rates: 540 | sizes += [int(size / rate) for size in args.sizes] 541 | gaps += [int(gap / rate) for gap in args.gaps] 542 | save_imgs = osp.join(args.save_dir, 'images') 543 | save_files = osp.join(args.save_dir, 'annfiles') 544 | os.makedirs(save_imgs) 545 | os.makedirs(save_files) 546 | logger = setup_logger(args.save_dir) 547 | 548 | print('Loading original data!!!') 549 | infos, img_dirs = [], [] 550 | for img_dir, ann_dir in zip(args.img_dirs, args.ann_dirs): 551 | _infos = load_dota(img_dir=img_dir, ann_dir=ann_dir, nproc=args.nproc) 552 | _img_dirs = [img_dir for _ in range(len(_infos))] 553 | infos.extend(_infos) 554 | img_dirs.extend(_img_dirs) 555 | 556 | print('Start splitting images!!!') 557 | start = time.time() 558 | manager = Manager() 559 | worker = partial( 560 | single_split, 561 | sizes=sizes, 562 | gaps=gaps, 563 | img_rate_thr=args.img_rate_thr, 564 | iof_thr=args.iof_thr, 565 | no_padding=args.no_padding, 566 | padding_value=padding_value, 567 | save_dir=save_imgs, 568 | anno_dir=save_files, 569 | img_ext=args.save_ext, 570 | lock=manager.Lock(), 571 | prog=manager.Value('i', 0), 572 | total=len(infos), 573 | logger=logger) 574 | 575 | if args.nproc > 1: 576 | pool = Pool(args.nproc) 577 | patch_infos = pool.map(worker, zip(infos, img_dirs)) 578 | pool.close() 579 | else: 580 | patch_infos = list(map(worker, zip(infos, img_dirs))) 581 | 582 | patch_infos = reduce(lambda x, y: x + y, patch_infos) 583 | stop = time.time() 584 | print(f'Finish splitting images in {int(stop - start)} second!!!') 585 | print(f'Total images number: {len(patch_infos)}') 586 | 587 | 588 | if __name__ == '__main__': 589 | main() 590 | --------------------------------------------------------------------------------