├── img
    ├── HR.jpg
    ├── Angle.jpg
    ├── Class.jpg
    ├── Height.jpg
    ├── Sample.jpg
    ├── differ.jpg
    ├── domain.jpg
    └── first.jpg
├── split_configs
    ├── ss_test.json
    ├── ss_val.json
    └── ss_train.json
├── Vis_CODrone_GT.py
├── README.md
└── CODrone_Split.py


/img/HR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/HR.jpg


--------------------------------------------------------------------------------
/img/Angle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Angle.jpg


--------------------------------------------------------------------------------
/img/Class.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Class.jpg


--------------------------------------------------------------------------------
/img/Height.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Height.jpg


--------------------------------------------------------------------------------
/img/Sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/Sample.jpg


--------------------------------------------------------------------------------
/img/differ.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/differ.jpg


--------------------------------------------------------------------------------
/img/domain.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/domain.jpg


--------------------------------------------------------------------------------
/img/first.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AHideoKuzeA/CODrone-A-Comprehensive-Oriented-Object-Detection-benchmark-for-UAV/HEAD/img/first.jpg


--------------------------------------------------------------------------------
/split_configs/ss_test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nproc": 10,
 3 |   "img_dirs": [
 4 |     "your image directory"
 5 |   ],
 6 |   "ann_dirs": ["your annotation directory"],
 7 |   "sizes": [
 8 |     1180
 9 |   ],
10 |   "gaps": [
11 |     200
12 |   ],
13 |   "rates": [
14 |     1.0
15 |   ],
16 |   "img_rate_thr": 0.6,
17 |   "iof_thr": 0.7,
18 |   "no_padding": false,
19 |   "padding_value": [
20 |     104,
21 |     116,
22 |     124
23 |   ],
24 |   "save_dir": "your output directory",
25 |   "save_ext": ".png"
26 | }
27 | 


--------------------------------------------------------------------------------
/split_configs/ss_val.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nproc": 10,
 3 |   "img_dirs": [
 4 |     "your image directory"
 5 |   ],
 6 |   "ann_dirs": [
 7 |     "your annotation directory"
 8 |   ],
 9 |   "sizes": [
10 |     1180
11 |   ],
12 |   "gaps": [
13 |     200
14 |   ],
15 |   "rates": [
16 |     1.0
17 |   ],
18 |   "img_rate_thr": 0.6,
19 |   "iof_thr": 0.7,
20 |   "no_padding": false,
21 |   "padding_value": [
22 |     104,
23 |     116,
24 |     124
25 |   ],
26 |   "save_dir": "your save directory",
27 |   "save_ext": ".png"
28 | }
29 | 


--------------------------------------------------------------------------------
/split_configs/ss_train.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nproc": 10,
 3 |   "img_dirs": [
 4 |     "your image directory"
 5 |   ],
 6 |   "ann_dirs": [
 7 |     "your annotation directory"
 8 |   ],
 9 |   "sizes": [
10 |     1180
11 |   ],
12 |   "gaps": [
13 |     200
14 |   ],
15 |   "rates": [
16 |     1.0
17 |   ],
18 |   "img_rate_thr": 0.6,
19 |   "iof_thr": 0.7,
20 |   "no_padding": false,
21 |   "padding_value": [
22 |     104,
23 |     116,
24 |     124
25 |   ],
26 |   "save_dir": "your output directory",
27 |   "save_ext": ".png"
28 | }
29 | 


--------------------------------------------------------------------------------
/Vis_CODrone_GT.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | 
 6 | def draw_boxes(image_path, label_path, output_path=None, class_colors=None, thickness=2):
 7 |     """
 8 |     Draw oriented bounding boxes (OBB) from DOTA-format annotations.
 9 | 
10 |     Args:
11 |         image_path (str): Path to the input image.
12 |         label_path (str): Path to the  annotation (.txt) file.
13 |         output_path (str, optional): Path to save the output image. If None, only display.
14 |         class_colors (dict[str, tuple[int, int, int]], optional): 
15 |             Mapping of class name to BGR color, e.g. {"plane": (0,255,0)}.
16 |             Defaults to green if not provided.
17 |         thickness (int, optional): Line thickness of the bounding box. Defaults to 2.
18 |     """
19 |     # Load the image
20 |     image = cv2.imread(image_path)
21 | 
22 |     # Read labels
23 |     with open(label_path, 'r') as f:
24 |         lines = f.readlines()
25 | 
26 |     for line in lines:
27 |         parts = line.strip().split()
28 | 
29 |         # Parse coordinates and reshape to (4, 2)
30 |         coords = np.array(list(map(float, parts[:8]))).reshape(4, 2).astype(np.int32)
31 |         cls_name = parts[8]
32 |         color = class_colors.get(cls_name, (0, 255, 0)) if class_colors else (0, 255, 0)
33 | 
34 |         # Draw oriented bounding box
35 |         cv2.polylines(image, [coords], isClosed=True, color=color, thickness=thickness)
36 | 
37 |         # Draw class label text at box center
38 |         cx, cy = np.mean(coords, axis=0).astype(int)
39 |         cv2.putText(
40 |             image, cls_name, (cx, cy),
41 |             cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
42 |         )
43 | 
44 |     os.makedirs(os.path.dirname(output_path), exist_ok=True)
45 |     cv2.imwrite(output_path, image)
46 |     # print(f"[INFO] Saved visualization -> {output_path}")
47 | 
48 | 
49 | def main():
50 |     """Batch visualization of DOTA-format ground truth annotations."""
51 |     img_dir = "your image directory"
52 |     label_dir = "your label directory"
53 |     output_dir = "your output directory"
54 | 
55 |     os.makedirs(output_dir, exist_ok=True)
56 | 
57 |     for img_name in os.listdir(img_dir):
58 |         if not img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')):
59 |             continue
60 | 
61 |         img_path = os.path.join(img_dir, img_name)
62 |         label_path = os.path.join(label_dir, os.path.splitext(img_name)[0] + ".txt")
63 |         out_path = os.path.join(output_dir, img_name)
64 | 
65 |         draw_boxes(img_path, label_path, out_path)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 | <!--   <h1 align="center"><img height="100" src="https://github.com/imlixinyang/director3d-page/raw/master/assets/icon.ico"></h1> -->
  3 |   <h1 align="center"> <b>CODrone</b>: A Comprehensive Oriented Object Detection benchmark for UAV</h1>
  4 |   <p align="center">
  5 |         <a href="https://arxiv.org/abs/2504.20032"><img src='https://img.shields.io/badge/arXiv-CODrone-red?logo=arxiv' alt='Paper PDF'></a>
  6 |         <a href='https://drive.google.com/file/d/1FQ6mUaOr_kATDaH7N2bObD5SRRkV7qJy/view?usp=sharing'><img src='https://img.shields.io/badge/Dataset-CODrone-yellow?logo=databricks' alt='CODrone'></a>
  7 |   </p>
  8 | 
  9 | 
 10 | <!-- <img src='assets/pipeline.gif'> -->
 11 | **😊 TL;DR**
 12 | 
 13 | CODrone is a comprehensive oriented object detection dataset for UAVs that accurately reflects real-world conditions. 
 14 | 
 15 | <div align="center">
 16 |   
 17 | <table>
 18 |   <tr>
 19 |     <td align="center" valign="middle">
 20 |       <img src="img/first.jpg" width="500"/>
 21 |     </td>
 22 |     <td align="center" valign="middle">
 23 |       <img src="img/domain.jpg" width="500"/>
 24 |     </td>
 25 |   </tr>
 26 | </table>
 27 | 
 28 | </div>
 29 | 
 30 | **⭐ Key contributions of CODrone**:
 31 | 
 32 | - We proposed a **large-scale**, **high-resolution** **UAV**-**oriented object detection** dataset, CODrone, which consists of over ten thousand UAV-captured images with precise **oriented bounding box** annotations and **diverse object categories**.
 33 | - The proposed CODrone dataset considers multiple influential factors, including **image acquisition altitude**, **camera perspective**, **lighting conditions**, and **geographic location**. 
 34 | - Based on the proposed dataset, we establish a UAV-oriented object detection benchmark and conduct training and evaluation using X representative state-of-the-art methods. 
 35 | 
 36 | **🚀 Try it out!!!**:
 37 | 
 38 | - 📥 [Download from Google Drive](https://drive.google.com/file/d/1FQ6mUaOr_kATDaH7N2bObD5SRRkV7qJy/view?usp=sharing)
 39 | 
 40 | - 📥 [Download from Quark Drive](https://pan.quark.cn/s/6028b38fe7c8)
 41 |   
 42 | ```shell
 43 | CODrone/
 44 | ├── test/
 45 | │   ├── annfile/     # DOTA format annotation
 46 | │   ├── images/
 47 | │   └── labels/      # VOC format annotation
 48 | ├── train/
 49 | │   ├── annfile/
 50 | │   ├── images/
 51 | │   └── labels/
 52 | └── val/
 53 |     ├── annfile/
 54 |     ├── images/
 55 |     └── labels/
 56 | ```
 57 | 
 58 | ## Characteristics
 59 | <div align="center">
 60 |   
 61 | |     Dataset    | Resolution | Categories | Altitude Gap | Camera Angles | Images | Objects | OBB |
 62 | |:--------------:|:----------:|:----------:|:------------:|:-------------:|:------:|:-------:|:---:|
 63 | |  VisDrone2019  |  2000×1500 |     10     |       *      |       *       |  10.2k |   54.2k |     |
 64 | |      UAVDT     |   1080×540 |      3     |      60m     |       *       |  80.0k |  841.5k |     |
 65 | |     AU-AIR     |  1920×1080 |      8     |      25m     |       45      |   3.2k |  132.0k |     |
 66 | |      CARPK     |   1280×720 |      1     |       *      |       *       |   1.4k |   89.7k |     |
 67 | |     HazyDet    |   1333×800 |      3     |       *      |       *       |  11.6k |  383.0k |  　 |
 68 | |  DroneVehicle  |    840×712 |      5     |      40m     |       30      |  56.8k |  953.0k |  ✅  |
 69 | |     UAV-ROD    |  1920×1080 |      1     |      50m     |       *       |   1.5k |   30.0k |  ✅  |
 70 | | **CODrone (ours)** |  **3840×2160** |     **12**     |      **70m**     |       **60**      |  10.0k |  596.7k |  ✅  |
 71 | 
 72 | </div>
 73 | We present a comparison between CODrone and other commonly used UAV-based object detection datasets.
 74 | CODrone significantly expands several key dimensions, including image resolution, object category diversity, and variation in flight altitude and camera angle.
 75 | For resolution, CODrone employs a 3840 × 2160 high-resolution onboard camera, aligning with the capabilities of modern UAV hardware.
 76 | In terms of object classes, unlike most existing UAV OOD datasets that focus primarily on vehicles, CODrone includes a more diverse range of categories, thereby increasing the difficulty and realism of the detection task.
 77 | Furthermore, we explicitly annotate both altitude and camera angle for each image, enabling research into UAV pose-aware perception and related tasks. 
 78 | 
 79 | 
 80 | ### High resolution brings more high-quality information
 81 | <div align="center">
 82 | 
 83 | <img src="img/HR.jpg" width="800"/>
 84 | 
 85 | </div>
 86 | 
 87 | CODrone employs a **3840 × 2160** high-resolution onboard camera, aligning with the capabilities of modern UAV hardware.
 88 | 
 89 | ### Multi-altitude and multi-angle captures for broad flight scenario adaptation
 90 | 
 91 | <div align="center">
 92 |   
 93 | <table>
 94 |   <tr>
 95 |     <td align="center" valign="middle">
 96 |       <img src="img/Height.jpg" width="400"/>
 97 |     </td>
 98 |     <td align="center" valign="middle">
 99 |       <img src="img/Angle.jpg" width="400"/>
100 |     </td>
101 |   </tr>
102 | </table>
103 | 
104 | </div>
105 | 
106 | The UAV was configured to capture imagery from two camera angles (**30°** and **90°**) and at three flight altitudes (**30 m**, **60 m**, and **100 m**), resulting in a total of **6** unique viewpoint combinations.
107 | 
108 | 
109 | ### More diverse scenes, broader application potential
110 | <div align="center">
111 | 
112 | <img src="img/Sample.jpg" width="800"/>
113 | 
114 | </div>
115 | 
116 | CODrone covers a wide range of environments, from urban areas and rural towns to ports and industrial zones, encompassing most scene types encountered in real-world UAV-based urban applications.
117 | 
118 | ## How to visualize CODrone annotations
119 | 
120 | 1) Install
121 | ```bash
122 | pip install opencv-python numpy
123 | ```
124 | 
125 | 2) Edit paths in `Vis_CODrone_GT.py` (inside `main()`):
126 | ```python
127 | img_dir    = "CODrone/train/images"
128 | label_dir  = "CODrone/train/annfile"   
129 | output_dir = "CODrone/train/viz"
130 | ```
131 | 
132 | 3) Run
133 | ```bash
134 | python Vis_CODrone_GT.py
135 | ```
136 | 
137 | ## How to split CODrone images
138 | 
139 | 1) Install
140 | ```bash
141 | pip install opencv-python numpy pillow shapely
142 | ```
143 | 
144 | 2) Run (DOTA-style sliding window & IOF assignment) — **direct args**
145 | ```bash
146 | python CODrone_Split.py   --img-dirs CODrone/train/images   --ann-dirs CODrone/train/annfile   --sizes 1024   --gaps 512   --rates 1.0   --img-rate-thr 0.6   --iof-thr 0.7   --save-dir CODrone/train/splits   --save-ext .png   --nproc 8
147 | ```
148 | 
149 | or **via JSON config** (recommended; CLI flags override JSON):
150 | ```bash
151 | # examples (pick one)
152 | python CODrone_Split.py --base-json split_configs/ss_train.json
153 | python CODrone_Split.py --base-json split_configs/ss_val.json
154 | python CODrone_Split.py --base-json split_configs/ss_test.json
155 | ```
156 | 
157 | 3) Output
158 | - Patches: `CODrone/train/splits/images/`
159 | - DOTA style txts for patches: `CODrone/train/splits/annfiles/`
160 | 
161 | **Note:** Tiling (sizes/gaps/rates), IOF-based object assignment, truncation handling, and label writing strictly follow **DOTA devkit** conventions.
162 | 
163 | ## Citation
164 | 
165 | ```
166 | @misc{ye2025clearflexibleprecisecomprehensive,
167 |       title={More Clear, More Flexible, More Precise: A Comprehensive Oriented Object Detection benchmark for UAV}, 
168 |       author={Kai Ye and Haidi Tang and Bowen Liu and Pingyang Dai and Liujuan Cao and Rongrong Ji},
169 |       year={2025},
170 |       eprint={2504.20032},
171 |       archivePrefix={arXiv},
172 |       primaryClass={cs.CV},
173 |       url={https://arxiv.org/abs/2504.20032}, 
174 | }
175 | ```
176 | 
177 | 
178 | ## License
179 | 
180 | Licensed under the CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International)
181 | 
182 | 
183 | The code is released for academic research use only. 
184 | 
185 | If you have any questions, please contact me via [yekai@stu.xmu.edu.cn]. 
186 | 


--------------------------------------------------------------------------------
/CODrone_Split.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import codecs
  3 | import datetime
  4 | import itertools
  5 | import json
  6 | import logging
  7 | import os
  8 | import os.path as osp
  9 | import time
 10 | from functools import partial, reduce
 11 | from math import ceil
 12 | from multiprocessing import Manager, Pool
 13 | 
 14 | import cv2
 15 | import numpy as np
 16 | from PIL import Image
 17 | 
 18 | Image.MAX_IMAGE_PIXELS = None
 19 | 
 20 | try:
 21 |     import shapely.geometry as shgeo
 22 | except ImportError:
 23 |     shgeo = None
 24 | 
 25 | 
 26 | def add_parser(parser):
 27 |     """Add arguments."""
 28 |     parser.add_argument(
 29 |         '--base-json',
 30 |         type=str,
 31 |         default=None,
 32 |         help='json config file for split images')
 33 |     parser.add_argument(
 34 |         '--nproc', type=int, default=10, help='the procession number')
 35 | 
 36 |     # argument for loading data
 37 |     parser.add_argument(
 38 |         '--img-dirs',
 39 |         nargs='+',
 40 |         type=str,
 41 |         default=None,
 42 |         help='images dirs, must give a value')
 43 |     parser.add_argument(
 44 |         '--ann-dirs',
 45 |         nargs='+',
 46 |         type=str,
 47 |         default=None,
 48 |         help='annotations dirs, optional')
 49 | 
 50 |     # argument for splitting image
 51 |     parser.add_argument(
 52 |         '--sizes',
 53 |         nargs='+',
 54 |         type=int,
 55 |         default=[1024],
 56 |         help='the sizes of sliding windows')
 57 |     parser.add_argument(
 58 |         '--gaps',
 59 |         nargs='+',
 60 |         type=int,
 61 |         default=[512],
 62 |         help='the steps of sliding widnows')
 63 |     parser.add_argument(
 64 |         '--rates',
 65 |         nargs='+',
 66 |         type=float,
 67 |         default=[1.],
 68 |         help='same as DOTA devkit rate, but only change windows size')
 69 |     parser.add_argument(
 70 |         '--img-rate-thr',
 71 |         type=float,
 72 |         default=0.6,
 73 |         help='the minimal rate of image in window and window')
 74 |     parser.add_argument(
 75 |         '--iof-thr',
 76 |         type=float,
 77 |         default=0.7,
 78 |         help='the minimal iof between a object and a window')
 79 |     parser.add_argument(
 80 |         '--no-padding',
 81 |         action='store_true',
 82 |         help='not padding patches in regular size')
 83 |     parser.add_argument(
 84 |         '--padding-value',
 85 |         nargs='+',
 86 |         type=int,
 87 |         default=[0],
 88 |         help='padding value, 1 or channel number')
 89 | 
 90 |     # argument for saving
 91 |     parser.add_argument(
 92 |         '--save-dir',
 93 |         type=str,
 94 |         default='.',
 95 |         help='to save pkl and split images')
 96 |     parser.add_argument(
 97 |         '--save-ext',
 98 |         type=str,
 99 |         default='.png',
100 |         help='the extension of saving images')
101 | 
102 | 
103 | def parse_args():
104 |     """Parse arguments."""
105 |     parser = argparse.ArgumentParser(description='Splitting images')
106 |     add_parser(parser)
107 |     args = parser.parse_args()
108 | 
109 |     if args.base_json is not None:
110 |         with open(args.base_json, 'r') as f:
111 |             prior_config = json.load(f)
112 | 
113 |         for action in parser._actions:
114 |             if action.dest not in prior_config or \
115 |                     not hasattr(action, 'default'):
116 |                 continue
117 |             action.default = prior_config[action.dest]
118 |         args = parser.parse_args()
119 | 
120 |     # assert arguments
121 |     assert args.img_dirs is not None, "argument img_dirs can't be None"
122 |     assert args.ann_dirs is None or len(args.ann_dirs) == len(args.img_dirs)
123 |     assert len(args.sizes) == len(args.gaps)
124 |     assert len(args.sizes) == 1 or len(args.rates) == 1
125 |     assert args.save_ext in ['.png', '.jpg', 'bmp', '.tif']
126 |     assert args.iof_thr >= 0 and args.iof_thr < 1
127 |     assert args.iof_thr >= 0 and args.iof_thr <= 1
128 |     assert not osp.exists(args.save_dir), \
129 |         f'{osp.join(args.save_dir)} already exists'
130 |     return args
131 | 
132 | 
133 | def get_sliding_window(info, sizes, gaps, img_rate_thr):
134 |     """Get sliding windows.
135 | 
136 |     Args:
137 |         info (dict): Dict of image's width and height.
138 |         sizes (list): List of window's sizes.
139 |         gaps (list): List of window's gaps.
140 |         img_rate_thr (float): Threshold of window area divided by image area.
141 | 
142 |     Returns:
143 |         list[np.array]: Information of valid windows.
144 |     """
145 |     eps = 0.01
146 |     windows = []
147 |     width, height = info['width'], info['height']
148 |     for size, gap in zip(sizes, gaps):
149 |         assert size > gap, f'invaild size gap pair [{size} {gap}]'
150 |         step = size - gap
151 | 
152 |         x_num = 1 if width <= size else ceil((width - size) / step + 1)
153 |         x_start = [step * i for i in range(x_num)]
154 |         if len(x_start) > 1 and x_start[-1] + size > width:
155 |             x_start[-1] = width - size
156 | 
157 |         y_num = 1 if height <= size else ceil((height - size) / step + 1)
158 |         y_start = [step * i for i in range(y_num)]
159 |         if len(y_start) > 1 and y_start[-1] + size > height:
160 |             y_start[-1] = height - size
161 | 
162 |         start = np.array(
163 |             list(itertools.product(x_start, y_start)), dtype=np.int64)
164 |         stop = start + size
165 |         windows.append(np.concatenate([start, stop], axis=1))
166 |     windows = np.concatenate(windows, axis=0)
167 | 
168 |     img_in_wins = windows.copy()
169 |     img_in_wins[:, 0::2] = np.clip(img_in_wins[:, 0::2], 0, width)
170 |     img_in_wins[:, 1::2] = np.clip(img_in_wins[:, 1::2], 0, height)
171 |     img_areas = (img_in_wins[:, 2] - img_in_wins[:, 0]) * \
172 |                 (img_in_wins[:, 3] - img_in_wins[:, 1])
173 |     win_areas = (windows[:, 2] - windows[:, 0]) * \
174 |                 (windows[:, 3] - windows[:, 1])
175 |     img_rates = img_areas / win_areas
176 |     if not (img_rates > img_rate_thr).any():
177 |         max_rate = img_rates.max()
178 |         img_rates[abs(img_rates - max_rate) < eps] = 1
179 |     return windows[img_rates > img_rate_thr]
180 | 
181 | 
182 | def poly2hbb(polys):
183 |     """Convert polygons to horizontal bboxes.
184 | 
185 |     Args:
186 |         polys (np.array): Polygons with shape (N, 8)
187 | 
188 |     Returns:
189 |         np.array: Horizontal bboxes.
190 |     """
191 |     shape = polys.shape
192 |     polys = polys.reshape(*shape[:-1], shape[-1] // 2, 2)
193 |     lt_point = np.min(polys, axis=-2)
194 |     rb_point = np.max(polys, axis=-2)
195 |     return np.concatenate([lt_point, rb_point], axis=-1)
196 | 
197 | 
198 | def bbox_overlaps_iof(bboxes1, bboxes2, eps=1e-6):
199 |     """Compute bbox overlaps (iof).
200 | 
201 |     Args:
202 |         bboxes1 (np.array): Horizontal bboxes1.
203 |         bboxes2 (np.array): Horizontal bboxes2.
204 |         eps (float, optional): Defaults to 1e-6.
205 | 
206 |     Returns:
207 |         np.array: Overlaps.
208 |     """
209 |     rows = bboxes1.shape[0]
210 |     cols = bboxes2.shape[0]
211 | 
212 |     if rows * cols == 0:
213 |         return np.zeros((rows, cols), dtype=np.float32)
214 | 
215 |     hbboxes1 = poly2hbb(bboxes1)
216 |     hbboxes2 = bboxes2
217 |     hbboxes1 = hbboxes1[:, None, :]
218 |     lt = np.maximum(hbboxes1[..., :2], hbboxes2[..., :2])
219 |     rb = np.minimum(hbboxes1[..., 2:], hbboxes2[..., 2:])
220 |     wh = np.clip(rb - lt, 0, np.inf)
221 |     h_overlaps = wh[..., 0] * wh[..., 1]
222 | 
223 |     l, t, r, b = [bboxes2[..., i] for i in range(4)]
224 |     polys2 = np.stack([l, t, r, t, r, b, l, b], axis=-1)
225 |     if shgeo is None:
226 |         raise ImportError('Please run "pip install shapely" '
227 |                           'to install shapely first.')
228 |     sg_polys1 = [shgeo.Polygon(p) for p in bboxes1.reshape(rows, -1, 2)]
229 |     sg_polys2 = [shgeo.Polygon(p) for p in polys2.reshape(cols, -1, 2)]
230 |     overlaps = np.zeros(h_overlaps.shape)
231 |     for p in zip(*np.nonzero(h_overlaps)):
232 |         overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
233 |     unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
234 |     unions = unions[..., None]
235 | 
236 |     unions = np.clip(unions, eps, np.inf)
237 |     outputs = overlaps / unions
238 |     if outputs.ndim == 1:
239 |         outputs = outputs[..., None]
240 |     return outputs
241 | 
242 | 
243 | def get_window_obj(info, windows, iof_thr):
244 |     """
245 | 
246 |     Args:
247 |         info (dict): Dict of bbox annotations.
248 |         windows (np.array): information of sliding windows.
249 |         iof_thr (float): Threshold of overlaps between bbox and window.
250 | 
251 |     Returns:
252 |         list[dict]: List of bbox annotations of every window.
253 |     """
254 |     bboxes = info['ann']['bboxes']
255 |     iofs = bbox_overlaps_iof(bboxes, windows)
256 | 
257 |     window_anns = []
258 |     for i in range(windows.shape[0]):
259 |         win_iofs = iofs[:, i]
260 |         pos_inds = np.nonzero(win_iofs >= iof_thr)[0].tolist()
261 | 
262 |         win_ann = dict()
263 |         for k, v in info['ann'].items():
264 |             try:
265 |                 win_ann[k] = v[pos_inds]
266 |             except TypeError:
267 |                 win_ann[k] = [v[i] for i in pos_inds]
268 |         win_ann['trunc'] = win_iofs[pos_inds] < 1
269 |         window_anns.append(win_ann)
270 |     return window_anns
271 | 
272 | 
273 | def crop_and_save_img(info, windows, window_anns, img_dir, no_padding,
274 |                       padding_value, save_dir, anno_dir, img_ext):
275 |     """
276 | 
277 |     Args:
278 |         info (dict): Image's information.
279 |         windows (np.array): information of sliding windows.
280 |         window_anns (list[dict]): List of bbox annotations of every window.
281 |         img_dir (str): Path of images.
282 |         no_padding (bool): If True, no padding.
283 |         padding_value (tuple[int|float]): Padding value.
284 |         save_dir (str): Save filename.
285 |         anno_dir (str): Annotation filename.
286 |         img_ext (str): Picture suffix.
287 | 
288 |     Returns:
289 |         list[dict]: Information of paths.
290 |     """
291 |     img = cv2.imread(osp.join(img_dir, info['filename']))
292 |     patch_infos = []
293 |     for i in range(windows.shape[0]):
294 |         patch_info = dict()
295 |         for k, v in info.items():
296 |             if k not in ['id', 'fileanme', 'width', 'height', 'ann']:
297 |                 patch_info[k] = v
298 | 
299 |         window = windows[i]
300 |         x_start, y_start, x_stop, y_stop = window.tolist()
301 |         patch_info['x_start'] = x_start
302 |         patch_info['y_start'] = y_start
303 |         patch_info['id'] = \
304 |             info['id'] + '__' + str(x_stop - x_start) + \
305 |             '__' + str(x_start) + '___' + str(y_start)
306 |         patch_info['ori_id'] = info['id']
307 | 
308 |         ann = window_anns[i]
309 |         ann['bboxes'] = translate(ann['bboxes'], -x_start, -y_start)
310 |         patch_info['ann'] = ann
311 | 
312 |         patch = img[y_start:y_stop, x_start:x_stop]
313 |         if not no_padding:
314 |             height = y_stop - y_start
315 |             width = x_stop - x_start
316 |             if height > patch.shape[0] or width > patch.shape[1]:
317 |                 padding_patch = np.empty((height, width, patch.shape[-1]),
318 |                                          dtype=np.uint8)
319 |                 if not isinstance(padding_value, (int, float)):
320 |                     assert len(padding_value) == patch.shape[-1]
321 |                 padding_patch[...] = padding_value
322 |                 padding_patch[:patch.shape[0], :patch.shape[1], ...] = patch
323 |                 patch = padding_patch
324 |         patch_info['height'] = patch.shape[0]
325 |         patch_info['width'] = patch.shape[1]
326 | 
327 |         cv2.imwrite(osp.join(save_dir, patch_info['id'] + img_ext), patch)
328 |         patch_info['filename'] = patch_info['id'] + img_ext
329 |         patch_infos.append(patch_info)
330 | 
331 |         bboxes_num = patch_info['ann']['bboxes'].shape[0]
332 |         outdir = os.path.join(anno_dir, patch_info['id'] + '.txt')
333 | 
334 |         with codecs.open(outdir, 'w', 'utf-8') as f_out:
335 |             if bboxes_num == 0:
336 |                 pass
337 |             else:
338 |                 for idx in range(bboxes_num):
339 |                     obj = patch_info['ann']
340 |                     outline = ' '.join(list(map(str, obj['bboxes'][idx])))
341 |                     diffs = str(
342 |                         obj['diffs'][idx]) if not obj['trunc'][idx] else '2'
343 |                     outline = outline + ' ' + obj['labels'][idx] + ' ' + diffs
344 |                     f_out.write(outline + '\n')
345 | 
346 |     return patch_infos
347 | 
348 | 
349 | def single_split(arguments, sizes, gaps, img_rate_thr, iof_thr, no_padding,
350 |                  padding_value, save_dir, anno_dir, img_ext, lock, prog, total,
351 |                  logger):
352 |     """
353 | 
354 |     Args:
355 |         arguments (object): Parameters.
356 |         sizes (list): List of window's sizes.
357 |         gaps (list): List of window's gaps.
358 |         img_rate_thr (float): Threshold of window area divided by image area.
359 |         iof_thr (float): Threshold of overlaps between bbox and window.
360 |         no_padding (bool): If True, no padding.
361 |         padding_value (tuple[int|float]): Padding value.
362 |         save_dir (str): Save filename.
363 |         anno_dir (str): Annotation filename.
364 |         img_ext (str): Picture suffix.
365 |         lock (object): Lock of Manager.
366 |         prog (object): Progress of Manager.
367 |         total (object): Length of infos.
368 |         logger (object): Logger.
369 | 
370 |     Returns:
371 |         list[dict]: Information of paths.
372 |     """
373 |     info, img_dir = arguments
374 |     windows = get_sliding_window(info, sizes, gaps, img_rate_thr)
375 |     window_anns = get_window_obj(info, windows, iof_thr)
376 |     patch_infos = crop_and_save_img(info, windows, window_anns, img_dir,
377 |                                     no_padding, padding_value, save_dir,
378 |                                     anno_dir, img_ext)
379 |     assert patch_infos
380 | 
381 |     lock.acquire()
382 |     prog.value += 1
383 |     msg = f'({prog.value / total:3.1%} {prog.value}:{total})'
384 |     msg += ' - ' + f"Filename: {info['filename']}"
385 |     msg += ' - ' + f"width: {info['width']:<5d}"
386 |     msg += ' - ' + f"height: {info['height']:<5d}"
387 |     msg += ' - ' + f"Objects: {len(info['ann']['bboxes']):<5d}"
388 |     msg += ' - ' + f'Patches: {len(patch_infos)}'
389 |     logger.info(msg)
390 |     lock.release()
391 | 
392 |     return patch_infos
393 | 
394 | 
395 | def setup_logger(log_path):
396 |     """Setup logger.
397 | 
398 |     Args:
399 |         log_path (str): Path of log.
400 | 
401 |     Returns:
402 |         object: Logger.
403 |     """
404 |     logger = logging.getLogger('img split')
405 |     formatter = logging.Formatter('%(asctime)s - %(message)s')
406 |     now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
407 |     log_path = osp.join(log_path, now + '.log')
408 |     handlers = [logging.StreamHandler(), logging.FileHandler(log_path, 'w')]
409 | 
410 |     for handler in handlers:
411 |         handler.setFormatter(formatter)
412 |         handler.setLevel(logging.INFO)
413 |         logger.addHandler(handler)
414 |     logger.setLevel(logging.INFO)
415 |     return logger
416 | 
417 | 
418 | def translate(bboxes, x, y):
419 |     """Map bboxes from window coordinate back to original coordinate.
420 | 
421 |     Args:
422 |         bboxes (np.array): bboxes with window coordinate.
423 |         x (float): Deviation value of x-axis.
424 |         y (float): Deviation value of y-axis
425 | 
426 |     Returns:
427 |         np.array: bboxes with original coordinate.
428 |     """
429 |     dim = bboxes.shape[-1]
430 |     translated = bboxes + np.array([x, y] * int(dim / 2), dtype=np.float32)
431 |     return translated
432 | 
433 | 
434 | def load_dota(img_dir, ann_dir=None, nproc=10):
435 |     """Load DOTA dataset.
436 | 
437 |     Args:
438 |         img_dir (str): Path of images.
439 |         ann_dir (str): Path of annotations.
440 |         nproc (int): number of processes.
441 | 
442 |     Returns:
443 |         list: Dataset's contents.
444 |     """
445 |     assert osp.isdir(img_dir), f'The {img_dir} is not an existing dir!'
446 |     assert ann_dir is None or osp.isdir(
447 |         ann_dir), f'The {ann_dir} is not an existing dir!'
448 | 
449 |     print('Starting loading DOTA dataset information.')
450 |     start_time = time.time()
451 |     _load_func = partial(_load_dota_single, img_dir=img_dir, ann_dir=ann_dir)
452 |     if nproc > 1:
453 |         pool = Pool(nproc)
454 |         contents = pool.map(_load_func, os.listdir(img_dir))
455 |         pool.close()
456 |     else:
457 |         contents = list(map(_load_func, os.listdir(img_dir)))
458 |     contents = [c for c in contents if c is not None]
459 |     end_time = time.time()
460 |     print(f'Finishing loading DOTA, get {len(contents)} iamges,',
461 |           f'using {end_time - start_time:.3f}s.')
462 | 
463 |     return contents
464 | 
465 | 
466 | def _load_dota_single(imgfile, img_dir, ann_dir):
467 |     """Load DOTA's single image.
468 | 
469 |     Args:
470 |         imgfile (str): Filename of single image.
471 |         img_dir (str): Path of images.
472 |         ann_dir (str): Path of annotations.
473 | 
474 |     Returns:
475 |         dict: Content of single image.
476 |     """
477 |     img_id, ext = osp.splitext(imgfile)
478 |     if ext not in ['.jpg', '.JPG', '.png', '.tif', '.bmp']:
479 |         return None
480 | 
481 |     imgpath = osp.join(img_dir, imgfile)
482 |     size = Image.open(imgpath).size
483 |     txtfile = None if ann_dir is None else osp.join(ann_dir, img_id + '.txt')
484 |     content = _load_dota_txt(txtfile)
485 | 
486 |     content.update(
487 |         dict(width=size[0], height=size[1], filename=imgfile, id=img_id))
488 |     return content
489 | 
490 | 
491 | def _load_dota_txt(txtfile):
492 |     """Load DOTA's txt annotation.
493 | 
494 |     Args:
495 |         txtfile (str): Filename of single txt annotation.
496 | 
497 |     Returns:
498 |         dict: Annotation of single image.
499 |     """
500 |     gsd, bboxes, labels, diffs = None, [], [], []
501 |     if txtfile is None:
502 |         pass
503 |     elif not osp.isfile(txtfile):
504 |         print(f"Can't find {txtfile}, treated as empty txtfile")
505 |     else:
506 |         with open(txtfile, 'r') as f:
507 |             for line in f:
508 |                 if line.startswith('gsd'):
509 |                     num = line.split(':')[-1]
510 |                     try:
511 |                         gsd = float(num)
512 |                     except ValueError:
513 |                         gsd = None
514 |                     continue
515 | 
516 |                 items = line.split(' ')
517 |                 if len(items) >= 9:
518 |                     bboxes.append([float(i) for i in items[:8]])
519 |                     labels.append(items[8])
520 |                     diffs.append(int(items[9]) if len(items) == 10 else 0)
521 | 
522 |     bboxes = np.array(bboxes, dtype=np.float32) if bboxes else \
523 |         np.zeros((0, 8), dtype=np.float32)
524 |     diffs = np.array(diffs, dtype=np.int64) if diffs else \
525 |         np.zeros((0,), dtype=np.int64)
526 |     ann = dict(bboxes=bboxes, labels=labels, diffs=diffs)
527 |     return dict(gsd=gsd, ann=ann)
528 | 
529 | 
530 | def main():
531 |     """Main function of image split."""
532 |     args = parse_args()
533 | 
534 |     if args.ann_dirs is None:
535 |         args.ann_dirs = [None for _ in range(len(args.img_dirs))]
536 |     padding_value = args.padding_value[0] \
537 |         if len(args.padding_value) == 1 else args.padding_value
538 |     sizes, gaps = [], []
539 |     for rate in args.rates:
540 |         sizes += [int(size / rate) for size in args.sizes]
541 |         gaps += [int(gap / rate) for gap in args.gaps]
542 |     save_imgs = osp.join(args.save_dir, 'images')
543 |     save_files = osp.join(args.save_dir, 'annfiles')
544 |     os.makedirs(save_imgs)
545 |     os.makedirs(save_files)
546 |     logger = setup_logger(args.save_dir)
547 | 
548 |     print('Loading original data!!!')
549 |     infos, img_dirs = [], []
550 |     for img_dir, ann_dir in zip(args.img_dirs, args.ann_dirs):
551 |         _infos = load_dota(img_dir=img_dir, ann_dir=ann_dir, nproc=args.nproc)
552 |         _img_dirs = [img_dir for _ in range(len(_infos))]
553 |         infos.extend(_infos)
554 |         img_dirs.extend(_img_dirs)
555 | 
556 |     print('Start splitting images!!!')
557 |     start = time.time()
558 |     manager = Manager()
559 |     worker = partial(
560 |         single_split,
561 |         sizes=sizes,
562 |         gaps=gaps,
563 |         img_rate_thr=args.img_rate_thr,
564 |         iof_thr=args.iof_thr,
565 |         no_padding=args.no_padding,
566 |         padding_value=padding_value,
567 |         save_dir=save_imgs,
568 |         anno_dir=save_files,
569 |         img_ext=args.save_ext,
570 |         lock=manager.Lock(),
571 |         prog=manager.Value('i', 0),
572 |         total=len(infos),
573 |         logger=logger)
574 | 
575 |     if args.nproc > 1:
576 |         pool = Pool(args.nproc)
577 |         patch_infos = pool.map(worker, zip(infos, img_dirs))
578 |         pool.close()
579 |     else:
580 |         patch_infos = list(map(worker, zip(infos, img_dirs)))
581 | 
582 |     patch_infos = reduce(lambda x, y: x + y, patch_infos)
583 |     stop = time.time()
584 |     print(f'Finish splitting images in {int(stop - start)} second!!!')
585 |     print(f'Total images number: {len(patch_infos)}')
586 | 
587 | 
588 | if __name__ == '__main__':
589 |     main()
590 | 


--------------------------------------------------------------------------------