├── img
├── HR.jpg
├── Angle.jpg
├── Class.jpg
├── Height.jpg
├── Sample.jpg
├── differ.jpg
├── domain.jpg
└── first.jpg
├── split_configs
├── ss_test.json
├── ss_val.json
└── ss_train.json
├── Vis_CODrone_GT.py
├── README.md
└── CODrone_Split.py
/img/HR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/HR.jpg
--------------------------------------------------------------------------------
/img/Angle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Angle.jpg
--------------------------------------------------------------------------------
/img/Class.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Class.jpg
--------------------------------------------------------------------------------
/img/Height.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Height.jpg
--------------------------------------------------------------------------------
/img/Sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Sample.jpg
--------------------------------------------------------------------------------
/img/differ.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/differ.jpg
--------------------------------------------------------------------------------
/img/domain.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/domain.jpg
--------------------------------------------------------------------------------
/img/first.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/first.jpg
--------------------------------------------------------------------------------
/split_configs/ss_test.json:
--------------------------------------------------------------------------------
1 | {
2 | "nproc": 10,
3 | "img_dirs": [
4 | "your image directory"
5 | ],
6 | "ann_dirs": ["your annotation directory"],
7 | "sizes": [
8 | 1180
9 | ],
10 | "gaps": [
11 | 200
12 | ],
13 | "rates": [
14 | 1.0
15 | ],
16 | "img_rate_thr": 0.6,
17 | "iof_thr": 0.7,
18 | "no_padding": false,
19 | "padding_value": [
20 | 104,
21 | 116,
22 | 124
23 | ],
24 | "save_dir": "your output directory",
25 | "save_ext": ".png"
26 | }
27 |
--------------------------------------------------------------------------------
/split_configs/ss_val.json:
--------------------------------------------------------------------------------
1 | {
2 | "nproc": 10,
3 | "img_dirs": [
4 | "your image directory"
5 | ],
6 | "ann_dirs": [
7 | "your annotation directory"
8 | ],
9 | "sizes": [
10 | 1180
11 | ],
12 | "gaps": [
13 | 200
14 | ],
15 | "rates": [
16 | 1.0
17 | ],
18 | "img_rate_thr": 0.6,
19 | "iof_thr": 0.7,
20 | "no_padding": false,
21 | "padding_value": [
22 | 104,
23 | 116,
24 | 124
25 | ],
26 | "save_dir": "your save directory",
27 | "save_ext": ".png"
28 | }
29 |
--------------------------------------------------------------------------------
/split_configs/ss_train.json:
--------------------------------------------------------------------------------
1 | {
2 | "nproc": 10,
3 | "img_dirs": [
4 | "your image directory"
5 | ],
6 | "ann_dirs": [
7 | "your annotation directory"
8 | ],
9 | "sizes": [
10 | 1180
11 | ],
12 | "gaps": [
13 | 200
14 | ],
15 | "rates": [
16 | 1.0
17 | ],
18 | "img_rate_thr": 0.6,
19 | "iof_thr": 0.7,
20 | "no_padding": false,
21 | "padding_value": [
22 | 104,
23 | 116,
24 | 124
25 | ],
26 | "save_dir": "your output directory",
27 | "save_ext": ".png"
28 | }
29 |
--------------------------------------------------------------------------------
/Vis_CODrone_GT.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import numpy as np
4 |
5 |
6 | def draw_boxes(image_path, label_path, output_path=None, class_colors=None, thickness=2):
7 | """
8 | Draw oriented bounding boxes (OBB) from DOTA-format annotations.
9 |
10 | Args:
11 | image_path (str): Path to the input image.
12 | label_path (str): Path to the annotation (.txt) file.
13 | output_path (str, optional): Path to save the output image. If None, only display.
14 | class_colors (dict[str, tuple[int, int, int]], optional):
15 | Mapping of class name to BGR color, e.g. {"plane": (0,255,0)}.
16 | Defaults to green if not provided.
17 | thickness (int, optional): Line thickness of the bounding box. Defaults to 2.
18 | """
19 | # Load the image
20 | image = cv2.imread(image_path)
21 |
22 | # Read labels
23 | with open(label_path, 'r') as f:
24 | lines = f.readlines()
25 |
26 | for line in lines:
27 | parts = line.strip().split()
28 |
29 | # Parse coordinates and reshape to (4, 2)
30 | coords = np.array(list(map(float, parts[:8]))).reshape(4, 2).astype(np.int32)
31 | cls_name = parts[8]
32 | color = class_colors.get(cls_name, (0, 255, 0)) if class_colors else (0, 255, 0)
33 |
34 | # Draw oriented bounding box
35 | cv2.polylines(image, [coords], isClosed=True, color=color, thickness=thickness)
36 |
37 | # Draw class label text at box center
38 | cx, cy = np.mean(coords, axis=0).astype(int)
39 | cv2.putText(
40 | image, cls_name, (cx, cy),
41 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
42 | )
43 |
44 | os.makedirs(os.path.dirname(output_path), exist_ok=True)
45 | cv2.imwrite(output_path, image)
46 | # print(f"[INFO] Saved visualization -> {output_path}")
47 |
48 |
49 | def main():
50 | """Batch visualization of DOTA-format ground truth annotations."""
51 | img_dir = "your image directory"
52 | label_dir = "your label directory"
53 | output_dir = "your output directory"
54 |
55 | os.makedirs(output_dir, exist_ok=True)
56 |
57 | for img_name in os.listdir(img_dir):
58 | if not img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')):
59 | continue
60 |
61 | img_path = os.path.join(img_dir, img_name)
62 | label_path = os.path.join(label_dir, os.path.splitext(img_name)[0] + ".txt")
63 | out_path = os.path.join(output_dir, img_name)
64 |
65 | draw_boxes(img_path, label_path, out_path)
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
CODrone: A Comprehensive Oriented Object Detection benchmark for UAV
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | **😊 TL;DR**
12 |
13 | CODrone is a comprehensive oriented object detection dataset for UAVs that accurately reflects real-world conditions.
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | |
22 |
23 |
24 | |
25 |
26 |
27 |
28 |
29 |
30 | **⭐ Key contributions of CODrone**:
31 |
32 | - We proposed a **large-scale**, **high-resolution** **UAV**-**oriented object detection** dataset, CODrone, which consists of over ten thousand UAV-captured images with precise **oriented bounding box** annotations and **diverse object categories**.
33 | - The proposed CODrone dataset considers multiple influential factors, including **image acquisition altitude**, **camera perspective**, **lighting conditions**, and **geographic location**.
34 | - Based on the proposed dataset, we establish a UAV-oriented object detection benchmark and conduct training and evaluation using X representative state-of-the-art methods.
35 |
36 | **🚀 Try it out!!!**:
37 |
38 | - 📥 [Download from Google Drive](https://drive.google.com/file/d/1FQ6mUaOr_kATDaH7N2bObD5SRRkV7qJy/view?usp=sharing)
39 |
40 | - 📥 [Download from Quark Drive](https://pan.quark.cn/s/6028b38fe7c8)
41 |
42 | ```shell
43 | CODrone/
44 | ├── test/
45 | │ ├── annfile/ # DOTA format annotation
46 | │ ├── images/
47 | │ └── labels/ # VOC format annotation
48 | ├── train/
49 | │ ├── annfile/
50 | │ ├── images/
51 | │ └── labels/
52 | └── val/
53 | ├── annfile/
54 | ├── images/
55 | └── labels/
56 | ```
57 |
58 | ## Characteristics
59 |
60 |
61 | | Dataset | Resolution | Categories | Altitude Gap | Camera Angles | Images | Objects | OBB |
62 | |:--------------:|:----------:|:----------:|:------------:|:-------------:|:------:|:-------:|:---:|
63 | | VisDrone2019 | 2000×1500 | 10 | * | * | 10.2k | 54.2k | |
64 | | UAVDT | 1080×540 | 3 | 60m | * | 80.0k | 841.5k | |
65 | | AU-AIR | 1920×1080 | 8 | 25m | 45 | 3.2k | 132.0k | |
66 | | CARPK | 1280×720 | 1 | * | * | 1.4k | 89.7k | |
67 | | HazyDet | 1333×800 | 3 | * | * | 11.6k | 383.0k | |
68 | | DroneVehicle | 840×712 | 5 | 40m | 30 | 56.8k | 953.0k | ✅ |
69 | | UAV-ROD | 1920×1080 | 1 | 50m | * | 1.5k | 30.0k | ✅ |
70 | | **CODrone (ours)** | **3840×2160** | **12** | **70m** | **60** | 10.0k | 596.7k | ✅ |
71 |
72 |
73 | We present a comparison between CODrone and other commonly used UAV-based object detection datasets.
74 | CODrone significantly expands several key dimensions, including image resolution, object category diversity, and variation in flight altitude and camera angle.
75 | For resolution, CODrone employs a 3840 × 2160 high-resolution onboard camera, aligning with the capabilities of modern UAV hardware.
76 | In terms of object classes, unlike most existing UAV OOD datasets that focus primarily on vehicles, CODrone includes a more diverse range of categories, thereby increasing the difficulty and realism of the detection task.
77 | Furthermore, we explicitly annotate both altitude and camera angle for each image, enabling research into UAV pose-aware perception and related tasks.
78 |
79 |
80 | ### High resolution brings more high-quality information
81 |
82 |
83 |

84 |
85 |
86 |
87 | CODrone employs a **3840 × 2160** high-resolution onboard camera, aligning with the capabilities of modern UAV hardware.
88 |
89 | ### Multi-altitude and multi-angle captures for broad flight scenario adaptation
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 | |
98 |
99 |
100 | |
101 |
102 |
103 |
104 |
105 |
106 | The UAV was configured to capture imagery from two camera angles (**30°** and **90°**) and at three flight altitudes (**30 m**, **60 m**, and **100 m**), resulting in a total of **6** unique viewpoint combinations.
107 |
108 |
109 | ### More diverse scenes, broader application potential
110 |
111 |
112 |

113 |
114 |
115 |
116 | CODrone covers a wide range of environments, from urban areas and rural towns to ports and industrial zones, encompassing most scene types encountered in real-world UAV-based urban applications.
117 |
118 | ## How to visualize CODrone annotations
119 |
120 | 1) Install
121 | ```bash
122 | pip install opencv-python numpy
123 | ```
124 |
125 | 2) Edit paths in `Vis_CODrone_GT.py` (inside `main()`):
126 | ```python
127 | img_dir = "CODrone/train/images"
128 | label_dir = "CODrone/train/annfile"
129 | output_dir = "CODrone/train/viz"
130 | ```
131 |
132 | 3) Run
133 | ```bash
134 | python Vis_CODrone_GT.py
135 | ```
136 |
137 | ## How to split CODrone images
138 |
139 | 1) Install
140 | ```bash
141 | pip install opencv-python numpy pillow shapely
142 | ```
143 |
144 | 2) Run (DOTA-style sliding window & IOF assignment) — **direct args**
145 | ```bash
146 | python CODrone_Split.py --img-dirs CODrone/train/images --ann-dirs CODrone/train/annfile --sizes 1024 --gaps 512 --rates 1.0 --img-rate-thr 0.6 --iof-thr 0.7 --save-dir CODrone/train/splits --save-ext .png --nproc 8
147 | ```
148 |
149 | or **via JSON config** (recommended; CLI flags override JSON):
150 | ```bash
151 | # examples (pick one)
152 | python CODrone_Split.py --base-json split_configs/ss_train.json
153 | python CODrone_Split.py --base-json split_configs/ss_val.json
154 | python CODrone_Split.py --base-json split_configs/ss_test.json
155 | ```
156 |
157 | 3) Output
158 | - Patches: `CODrone/train/splits/images/`
159 | - DOTA style txts for patches: `CODrone/train/splits/annfiles/`
160 |
161 | **Note:** Tiling (sizes/gaps/rates), IOF-based object assignment, truncation handling, and label writing strictly follow **DOTA devkit** conventions.
162 |
163 | ## Citation
164 |
165 | ```
166 | @misc{ye2025clearflexibleprecisecomprehensive,
167 | title={More Clear, More Flexible, More Precise: A Comprehensive Oriented Object Detection benchmark for UAV},
168 | author={Kai Ye and Haidi Tang and Bowen Liu and Pingyang Dai and Liujuan Cao and Rongrong Ji},
169 | year={2025},
170 | eprint={2504.20032},
171 | archivePrefix={arXiv},
172 | primaryClass={cs.CV},
173 | url={https://arxiv.org/abs/2504.20032},
174 | }
175 | ```
176 |
177 |
178 | ## License
179 |
180 | Licensed under the CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International)
181 |
182 |
183 | The code is released for academic research use only.
184 |
185 | If you have any questions, please contact me via [yekai@stu.xmu.edu.cn].
186 |
--------------------------------------------------------------------------------
/CODrone_Split.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import codecs
3 | import datetime
4 | import itertools
5 | import json
6 | import logging
7 | import os
8 | import os.path as osp
9 | import time
10 | from functools import partial, reduce
11 | from math import ceil
12 | from multiprocessing import Manager, Pool
13 |
14 | import cv2
15 | import numpy as np
16 | from PIL import Image
17 |
18 | Image.MAX_IMAGE_PIXELS = None
19 |
20 | try:
21 | import shapely.geometry as shgeo
22 | except ImportError:
23 | shgeo = None
24 |
25 |
26 | def add_parser(parser):
27 | """Add arguments."""
28 | parser.add_argument(
29 | '--base-json',
30 | type=str,
31 | default=None,
32 | help='json config file for split images')
33 | parser.add_argument(
34 | '--nproc', type=int, default=10, help='the procession number')
35 |
36 | # argument for loading data
37 | parser.add_argument(
38 | '--img-dirs',
39 | nargs='+',
40 | type=str,
41 | default=None,
42 | help='images dirs, must give a value')
43 | parser.add_argument(
44 | '--ann-dirs',
45 | nargs='+',
46 | type=str,
47 | default=None,
48 | help='annotations dirs, optional')
49 |
50 | # argument for splitting image
51 | parser.add_argument(
52 | '--sizes',
53 | nargs='+',
54 | type=int,
55 | default=[1024],
56 | help='the sizes of sliding windows')
57 | parser.add_argument(
58 | '--gaps',
59 | nargs='+',
60 | type=int,
61 | default=[512],
62 | help='the steps of sliding widnows')
63 | parser.add_argument(
64 | '--rates',
65 | nargs='+',
66 | type=float,
67 | default=[1.],
68 | help='same as DOTA devkit rate, but only change windows size')
69 | parser.add_argument(
70 | '--img-rate-thr',
71 | type=float,
72 | default=0.6,
73 | help='the minimal rate of image in window and window')
74 | parser.add_argument(
75 | '--iof-thr',
76 | type=float,
77 | default=0.7,
78 | help='the minimal iof between a object and a window')
79 | parser.add_argument(
80 | '--no-padding',
81 | action='store_true',
82 | help='not padding patches in regular size')
83 | parser.add_argument(
84 | '--padding-value',
85 | nargs='+',
86 | type=int,
87 | default=[0],
88 | help='padding value, 1 or channel number')
89 |
90 | # argument for saving
91 | parser.add_argument(
92 | '--save-dir',
93 | type=str,
94 | default='.',
95 | help='to save pkl and split images')
96 | parser.add_argument(
97 | '--save-ext',
98 | type=str,
99 | default='.png',
100 | help='the extension of saving images')
101 |
102 |
103 | def parse_args():
104 | """Parse arguments."""
105 | parser = argparse.ArgumentParser(description='Splitting images')
106 | add_parser(parser)
107 | args = parser.parse_args()
108 |
109 | if args.base_json is not None:
110 | with open(args.base_json, 'r') as f:
111 | prior_config = json.load(f)
112 |
113 | for action in parser._actions:
114 | if action.dest not in prior_config or \
115 | not hasattr(action, 'default'):
116 | continue
117 | action.default = prior_config[action.dest]
118 | args = parser.parse_args()
119 |
120 | # assert arguments
121 | assert args.img_dirs is not None, "argument img_dirs can't be None"
122 | assert args.ann_dirs is None or len(args.ann_dirs) == len(args.img_dirs)
123 | assert len(args.sizes) == len(args.gaps)
124 | assert len(args.sizes) == 1 or len(args.rates) == 1
125 | assert args.save_ext in ['.png', '.jpg', 'bmp', '.tif']
126 | assert args.iof_thr >= 0 and args.iof_thr < 1
127 | assert args.iof_thr >= 0 and args.iof_thr <= 1
128 | assert not osp.exists(args.save_dir), \
129 | f'{osp.join(args.save_dir)} already exists'
130 | return args
131 |
132 |
133 | def get_sliding_window(info, sizes, gaps, img_rate_thr):
134 | """Get sliding windows.
135 |
136 | Args:
137 | info (dict): Dict of image's width and height.
138 | sizes (list): List of window's sizes.
139 | gaps (list): List of window's gaps.
140 | img_rate_thr (float): Threshold of window area divided by image area.
141 |
142 | Returns:
143 | list[np.array]: Information of valid windows.
144 | """
145 | eps = 0.01
146 | windows = []
147 | width, height = info['width'], info['height']
148 | for size, gap in zip(sizes, gaps):
149 | assert size > gap, f'invaild size gap pair [{size} {gap}]'
150 | step = size - gap
151 |
152 | x_num = 1 if width <= size else ceil((width - size) / step + 1)
153 | x_start = [step * i for i in range(x_num)]
154 | if len(x_start) > 1 and x_start[-1] + size > width:
155 | x_start[-1] = width - size
156 |
157 | y_num = 1 if height <= size else ceil((height - size) / step + 1)
158 | y_start = [step * i for i in range(y_num)]
159 | if len(y_start) > 1 and y_start[-1] + size > height:
160 | y_start[-1] = height - size
161 |
162 | start = np.array(
163 | list(itertools.product(x_start, y_start)), dtype=np.int64)
164 | stop = start + size
165 | windows.append(np.concatenate([start, stop], axis=1))
166 | windows = np.concatenate(windows, axis=0)
167 |
168 | img_in_wins = windows.copy()
169 | img_in_wins[:, 0::2] = np.clip(img_in_wins[:, 0::2], 0, width)
170 | img_in_wins[:, 1::2] = np.clip(img_in_wins[:, 1::2], 0, height)
171 | img_areas = (img_in_wins[:, 2] - img_in_wins[:, 0]) * \
172 | (img_in_wins[:, 3] - img_in_wins[:, 1])
173 | win_areas = (windows[:, 2] - windows[:, 0]) * \
174 | (windows[:, 3] - windows[:, 1])
175 | img_rates = img_areas / win_areas
176 | if not (img_rates > img_rate_thr).any():
177 | max_rate = img_rates.max()
178 | img_rates[abs(img_rates - max_rate) < eps] = 1
179 | return windows[img_rates > img_rate_thr]
180 |
181 |
182 | def poly2hbb(polys):
183 | """Convert polygons to horizontal bboxes.
184 |
185 | Args:
186 | polys (np.array): Polygons with shape (N, 8)
187 |
188 | Returns:
189 | np.array: Horizontal bboxes.
190 | """
191 | shape = polys.shape
192 | polys = polys.reshape(*shape[:-1], shape[-1] // 2, 2)
193 | lt_point = np.min(polys, axis=-2)
194 | rb_point = np.max(polys, axis=-2)
195 | return np.concatenate([lt_point, rb_point], axis=-1)
196 |
197 |
198 | def bbox_overlaps_iof(bboxes1, bboxes2, eps=1e-6):
199 | """Compute bbox overlaps (iof).
200 |
201 | Args:
202 | bboxes1 (np.array): Horizontal bboxes1.
203 | bboxes2 (np.array): Horizontal bboxes2.
204 | eps (float, optional): Defaults to 1e-6.
205 |
206 | Returns:
207 | np.array: Overlaps.
208 | """
209 | rows = bboxes1.shape[0]
210 | cols = bboxes2.shape[0]
211 |
212 | if rows * cols == 0:
213 | return np.zeros((rows, cols), dtype=np.float32)
214 |
215 | hbboxes1 = poly2hbb(bboxes1)
216 | hbboxes2 = bboxes2
217 | hbboxes1 = hbboxes1[:, None, :]
218 | lt = np.maximum(hbboxes1[..., :2], hbboxes2[..., :2])
219 | rb = np.minimum(hbboxes1[..., 2:], hbboxes2[..., 2:])
220 | wh = np.clip(rb - lt, 0, np.inf)
221 | h_overlaps = wh[..., 0] * wh[..., 1]
222 |
223 | l, t, r, b = [bboxes2[..., i] for i in range(4)]
224 | polys2 = np.stack([l, t, r, t, r, b, l, b], axis=-1)
225 | if shgeo is None:
226 | raise ImportError('Please run "pip install shapely" '
227 | 'to install shapely first.')
228 | sg_polys1 = [shgeo.Polygon(p) for p in bboxes1.reshape(rows, -1, 2)]
229 | sg_polys2 = [shgeo.Polygon(p) for p in polys2.reshape(cols, -1, 2)]
230 | overlaps = np.zeros(h_overlaps.shape)
231 | for p in zip(*np.nonzero(h_overlaps)):
232 | overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
233 | unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
234 | unions = unions[..., None]
235 |
236 | unions = np.clip(unions, eps, np.inf)
237 | outputs = overlaps / unions
238 | if outputs.ndim == 1:
239 | outputs = outputs[..., None]
240 | return outputs
241 |
242 |
243 | def get_window_obj(info, windows, iof_thr):
244 | """
245 |
246 | Args:
247 | info (dict): Dict of bbox annotations.
248 | windows (np.array): information of sliding windows.
249 | iof_thr (float): Threshold of overlaps between bbox and window.
250 |
251 | Returns:
252 | list[dict]: List of bbox annotations of every window.
253 | """
254 | bboxes = info['ann']['bboxes']
255 | iofs = bbox_overlaps_iof(bboxes, windows)
256 |
257 | window_anns = []
258 | for i in range(windows.shape[0]):
259 | win_iofs = iofs[:, i]
260 | pos_inds = np.nonzero(win_iofs >= iof_thr)[0].tolist()
261 |
262 | win_ann = dict()
263 | for k, v in info['ann'].items():
264 | try:
265 | win_ann[k] = v[pos_inds]
266 | except TypeError:
267 | win_ann[k] = [v[i] for i in pos_inds]
268 | win_ann['trunc'] = win_iofs[pos_inds] < 1
269 | window_anns.append(win_ann)
270 | return window_anns
271 |
272 |
273 | def crop_and_save_img(info, windows, window_anns, img_dir, no_padding,
274 | padding_value, save_dir, anno_dir, img_ext):
275 | """
276 |
277 | Args:
278 | info (dict): Image's information.
279 | windows (np.array): information of sliding windows.
280 | window_anns (list[dict]): List of bbox annotations of every window.
281 | img_dir (str): Path of images.
282 | no_padding (bool): If True, no padding.
283 | padding_value (tuple[int|float]): Padding value.
284 | save_dir (str): Save filename.
285 | anno_dir (str): Annotation filename.
286 | img_ext (str): Picture suffix.
287 |
288 | Returns:
289 | list[dict]: Information of paths.
290 | """
291 | img = cv2.imread(osp.join(img_dir, info['filename']))
292 | patch_infos = []
293 | for i in range(windows.shape[0]):
294 | patch_info = dict()
295 | for k, v in info.items():
296 | if k not in ['id', 'fileanme', 'width', 'height', 'ann']:
297 | patch_info[k] = v
298 |
299 | window = windows[i]
300 | x_start, y_start, x_stop, y_stop = window.tolist()
301 | patch_info['x_start'] = x_start
302 | patch_info['y_start'] = y_start
303 | patch_info['id'] = \
304 | info['id'] + '__' + str(x_stop - x_start) + \
305 | '__' + str(x_start) + '___' + str(y_start)
306 | patch_info['ori_id'] = info['id']
307 |
308 | ann = window_anns[i]
309 | ann['bboxes'] = translate(ann['bboxes'], -x_start, -y_start)
310 | patch_info['ann'] = ann
311 |
312 | patch = img[y_start:y_stop, x_start:x_stop]
313 | if not no_padding:
314 | height = y_stop - y_start
315 | width = x_stop - x_start
316 | if height > patch.shape[0] or width > patch.shape[1]:
317 | padding_patch = np.empty((height, width, patch.shape[-1]),
318 | dtype=np.uint8)
319 | if not isinstance(padding_value, (int, float)):
320 | assert len(padding_value) == patch.shape[-1]
321 | padding_patch[...] = padding_value
322 | padding_patch[:patch.shape[0], :patch.shape[1], ...] = patch
323 | patch = padding_patch
324 | patch_info['height'] = patch.shape[0]
325 | patch_info['width'] = patch.shape[1]
326 |
327 | cv2.imwrite(osp.join(save_dir, patch_info['id'] + img_ext), patch)
328 | patch_info['filename'] = patch_info['id'] + img_ext
329 | patch_infos.append(patch_info)
330 |
331 | bboxes_num = patch_info['ann']['bboxes'].shape[0]
332 | outdir = os.path.join(anno_dir, patch_info['id'] + '.txt')
333 |
334 | with codecs.open(outdir, 'w', 'utf-8') as f_out:
335 | if bboxes_num == 0:
336 | pass
337 | else:
338 | for idx in range(bboxes_num):
339 | obj = patch_info['ann']
340 | outline = ' '.join(list(map(str, obj['bboxes'][idx])))
341 | diffs = str(
342 | obj['diffs'][idx]) if not obj['trunc'][idx] else '2'
343 | outline = outline + ' ' + obj['labels'][idx] + ' ' + diffs
344 | f_out.write(outline + '\n')
345 |
346 | return patch_infos
347 |
348 |
349 | def single_split(arguments, sizes, gaps, img_rate_thr, iof_thr, no_padding,
350 | padding_value, save_dir, anno_dir, img_ext, lock, prog, total,
351 | logger):
352 | """
353 |
354 | Args:
355 | arguments (object): Parameters.
356 | sizes (list): List of window's sizes.
357 | gaps (list): List of window's gaps.
358 | img_rate_thr (float): Threshold of window area divided by image area.
359 | iof_thr (float): Threshold of overlaps between bbox and window.
360 | no_padding (bool): If True, no padding.
361 | padding_value (tuple[int|float]): Padding value.
362 | save_dir (str): Save filename.
363 | anno_dir (str): Annotation filename.
364 | img_ext (str): Picture suffix.
365 | lock (object): Lock of Manager.
366 | prog (object): Progress of Manager.
367 | total (object): Length of infos.
368 | logger (object): Logger.
369 |
370 | Returns:
371 | list[dict]: Information of paths.
372 | """
373 | info, img_dir = arguments
374 | windows = get_sliding_window(info, sizes, gaps, img_rate_thr)
375 | window_anns = get_window_obj(info, windows, iof_thr)
376 | patch_infos = crop_and_save_img(info, windows, window_anns, img_dir,
377 | no_padding, padding_value, save_dir,
378 | anno_dir, img_ext)
379 | assert patch_infos
380 |
381 | lock.acquire()
382 | prog.value += 1
383 | msg = f'({prog.value / total:3.1%} {prog.value}:{total})'
384 | msg += ' - ' + f"Filename: {info['filename']}"
385 | msg += ' - ' + f"width: {info['width']:<5d}"
386 | msg += ' - ' + f"height: {info['height']:<5d}"
387 | msg += ' - ' + f"Objects: {len(info['ann']['bboxes']):<5d}"
388 | msg += ' - ' + f'Patches: {len(patch_infos)}'
389 | logger.info(msg)
390 | lock.release()
391 |
392 | return patch_infos
393 |
394 |
395 | def setup_logger(log_path):
396 | """Setup logger.
397 |
398 | Args:
399 | log_path (str): Path of log.
400 |
401 | Returns:
402 | object: Logger.
403 | """
404 | logger = logging.getLogger('img split')
405 | formatter = logging.Formatter('%(asctime)s - %(message)s')
406 | now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
407 | log_path = osp.join(log_path, now + '.log')
408 | handlers = [logging.StreamHandler(), logging.FileHandler(log_path, 'w')]
409 |
410 | for handler in handlers:
411 | handler.setFormatter(formatter)
412 | handler.setLevel(logging.INFO)
413 | logger.addHandler(handler)
414 | logger.setLevel(logging.INFO)
415 | return logger
416 |
417 |
418 | def translate(bboxes, x, y):
419 | """Map bboxes from window coordinate back to original coordinate.
420 |
421 | Args:
422 | bboxes (np.array): bboxes with window coordinate.
423 | x (float): Deviation value of x-axis.
424 | y (float): Deviation value of y-axis
425 |
426 | Returns:
427 | np.array: bboxes with original coordinate.
428 | """
429 | dim = bboxes.shape[-1]
430 | translated = bboxes + np.array([x, y] * int(dim / 2), dtype=np.float32)
431 | return translated
432 |
433 |
434 | def load_dota(img_dir, ann_dir=None, nproc=10):
435 | """Load DOTA dataset.
436 |
437 | Args:
438 | img_dir (str): Path of images.
439 | ann_dir (str): Path of annotations.
440 | nproc (int): number of processes.
441 |
442 | Returns:
443 | list: Dataset's contents.
444 | """
445 | assert osp.isdir(img_dir), f'The {img_dir} is not an existing dir!'
446 | assert ann_dir is None or osp.isdir(
447 | ann_dir), f'The {ann_dir} is not an existing dir!'
448 |
449 | print('Starting loading DOTA dataset information.')
450 | start_time = time.time()
451 | _load_func = partial(_load_dota_single, img_dir=img_dir, ann_dir=ann_dir)
452 | if nproc > 1:
453 | pool = Pool(nproc)
454 | contents = pool.map(_load_func, os.listdir(img_dir))
455 | pool.close()
456 | else:
457 | contents = list(map(_load_func, os.listdir(img_dir)))
458 | contents = [c for c in contents if c is not None]
459 | end_time = time.time()
460 | print(f'Finishing loading DOTA, get {len(contents)} iamges,',
461 | f'using {end_time - start_time:.3f}s.')
462 |
463 | return contents
464 |
465 |
466 | def _load_dota_single(imgfile, img_dir, ann_dir):
467 | """Load DOTA's single image.
468 |
469 | Args:
470 | imgfile (str): Filename of single image.
471 | img_dir (str): Path of images.
472 | ann_dir (str): Path of annotations.
473 |
474 | Returns:
475 | dict: Content of single image.
476 | """
477 | img_id, ext = osp.splitext(imgfile)
478 | if ext not in ['.jpg', '.JPG', '.png', '.tif', '.bmp']:
479 | return None
480 |
481 | imgpath = osp.join(img_dir, imgfile)
482 | size = Image.open(imgpath).size
483 | txtfile = None if ann_dir is None else osp.join(ann_dir, img_id + '.txt')
484 | content = _load_dota_txt(txtfile)
485 |
486 | content.update(
487 | dict(width=size[0], height=size[1], filename=imgfile, id=img_id))
488 | return content
489 |
490 |
491 | def _load_dota_txt(txtfile):
492 | """Load DOTA's txt annotation.
493 |
494 | Args:
495 | txtfile (str): Filename of single txt annotation.
496 |
497 | Returns:
498 | dict: Annotation of single image.
499 | """
500 | gsd, bboxes, labels, diffs = None, [], [], []
501 | if txtfile is None:
502 | pass
503 | elif not osp.isfile(txtfile):
504 | print(f"Can't find {txtfile}, treated as empty txtfile")
505 | else:
506 | with open(txtfile, 'r') as f:
507 | for line in f:
508 | if line.startswith('gsd'):
509 | num = line.split(':')[-1]
510 | try:
511 | gsd = float(num)
512 | except ValueError:
513 | gsd = None
514 | continue
515 |
516 | items = line.split(' ')
517 | if len(items) >= 9:
518 | bboxes.append([float(i) for i in items[:8]])
519 | labels.append(items[8])
520 | diffs.append(int(items[9]) if len(items) == 10 else 0)
521 |
522 | bboxes = np.array(bboxes, dtype=np.float32) if bboxes else \
523 | np.zeros((0, 8), dtype=np.float32)
524 | diffs = np.array(diffs, dtype=np.int64) if diffs else \
525 | np.zeros((0,), dtype=np.int64)
526 | ann = dict(bboxes=bboxes, labels=labels, diffs=diffs)
527 | return dict(gsd=gsd, ann=ann)
528 |
529 |
530 | def main():
531 | """Main function of image split."""
532 | args = parse_args()
533 |
534 | if args.ann_dirs is None:
535 | args.ann_dirs = [None for _ in range(len(args.img_dirs))]
536 | padding_value = args.padding_value[0] \
537 | if len(args.padding_value) == 1 else args.padding_value
538 | sizes, gaps = [], []
539 | for rate in args.rates:
540 | sizes += [int(size / rate) for size in args.sizes]
541 | gaps += [int(gap / rate) for gap in args.gaps]
542 | save_imgs = osp.join(args.save_dir, 'images')
543 | save_files = osp.join(args.save_dir, 'annfiles')
544 | os.makedirs(save_imgs)
545 | os.makedirs(save_files)
546 | logger = setup_logger(args.save_dir)
547 |
548 | print('Loading original data!!!')
549 | infos, img_dirs = [], []
550 | for img_dir, ann_dir in zip(args.img_dirs, args.ann_dirs):
551 | _infos = load_dota(img_dir=img_dir, ann_dir=ann_dir, nproc=args.nproc)
552 | _img_dirs = [img_dir for _ in range(len(_infos))]
553 | infos.extend(_infos)
554 | img_dirs.extend(_img_dirs)
555 |
556 | print('Start splitting images!!!')
557 | start = time.time()
558 | manager = Manager()
559 | worker = partial(
560 | single_split,
561 | sizes=sizes,
562 | gaps=gaps,
563 | img_rate_thr=args.img_rate_thr,
564 | iof_thr=args.iof_thr,
565 | no_padding=args.no_padding,
566 | padding_value=padding_value,
567 | save_dir=save_imgs,
568 | anno_dir=save_files,
569 | img_ext=args.save_ext,
570 | lock=manager.Lock(),
571 | prog=manager.Value('i', 0),
572 | total=len(infos),
573 | logger=logger)
574 |
575 | if args.nproc > 1:
576 | pool = Pool(args.nproc)
577 | patch_infos = pool.map(worker, zip(infos, img_dirs))
578 | pool.close()
579 | else:
580 | patch_infos = list(map(worker, zip(infos, img_dirs)))
581 |
582 | patch_infos = reduce(lambda x, y: x + y, patch_infos)
583 | stop = time.time()
584 | print(f'Finish splitting images in {int(stop - start)} second!!!')
585 | print(f'Total images number: {len(patch_infos)}')
586 |
587 |
588 | if __name__ == '__main__':
589 | main()
590 |
--------------------------------------------------------------------------------