├── VOCdevkit └── VOC2007 │ ├── JPEGImages │ ├── 0.jpg │ ├── 1.jpg │ ├── 2.jpg │ ├── 3.jpg │ └── 4.jpg │ └── Annotations │ ├── 0.xml │ ├── 2.xml │ ├── 3.xml │ ├── 1.xml │ └── 4.xml ├── VOCdevkit_Origin └── VOC2007 │ ├── JPEGImages │ ├── 000001.jpg │ ├── 000002.jpg │ ├── 000003.jpg │ ├── 000004.jpg │ └── 000005.jpg │ └── Annotations │ ├── 000002.xml │ ├── 000003.xml │ ├── 000001.xml │ ├── 000005.xml │ └── 000004.xml ├── README.md ├── LICENSE ├── utils ├── utils.py └── random_data.py ├── test_get_random_data.py ├── test_mosaic.py ├── .gitignore ├── test_mixup.py ├── generate_get_random_data.py ├── generate_mosaic.py └── generate_mixup.py /VOCdevkit/VOC2007/JPEGImages/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit/VOC2007/JPEGImages/0.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit/VOC2007/JPEGImages/1.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit/VOC2007/JPEGImages/2.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit/VOC2007/JPEGImages/3.jpg -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/JPEGImages/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit/VOC2007/JPEGImages/4.jpg -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/JPEGImages/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit_Origin/VOC2007/JPEGImages/000001.jpg -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/JPEGImages/000002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit_Origin/VOC2007/JPEGImages/000002.jpg -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/JPEGImages/000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit_Origin/VOC2007/JPEGImages/000003.jpg -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/JPEGImages/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit_Origin/VOC2007/JPEGImages/000004.jpg -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/JPEGImages/000005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bubbliiiing/object-detection-augmentation/HEAD/VOCdevkit_Origin/VOC2007/JPEGImages/000005.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## object-detection-augmentation-这里面存放了一些目标检测算法的数据增强方法。如mosaic、mixup。 2 | --- 3 | 4 | ## 目录 5 | 1. [数据增强测试](#数据增强测试) 6 | 2. [生成图片与标签](#生成图片与标签) 7 | 8 | ## 数据增强测试 9 | 以test开头的几个py文件用于测试不同的数据增强方法。 10 | ### 测试步骤 11 | 1、Origin_VOCdevkit_path用于指定VOC数据集所在的文件夹; 12 | 2、input_shape代表数据增强后的图片的大小; 13 | 3、运行test_*.py即可查看对应的数据增强效果。 14 | 15 | ## 标签处理 16 | 以generate开头的几个py文件用于生成并保存数据增强后的标签与图片。 17 | ### 生成步骤 18 | 1、Origin_VOCdevkit_path用于指定需要增强的数据集路径; 19 | 2、Out_VOCdevkit_path用于指定输出的数据集路径; 20 | 3、Out_Num用于增强生成多少张图片; 21 | 4、input_shape代表数据增强后的图片的大小; 22 | 5、运行generate_*.py即可生成并保存数据增强后的标签与图片。 -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/Annotations/000002.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000002.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 329145082 9 | 10 | 11 | hiromori2 12 | Hiroyuki Mori 13 | 14 | 15 | 335 16 | 500 17 | 3 18 | 19 | 0 20 | 21 | train 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 139 27 | 200 28 | 207 29 | 301 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/0.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC 3 | VOCdevkit\VOC2007/JPEGImages\0.jpg 4 | 5 | My Database 6 | COCO 7 | flickr 8 | NULL 9 | 10 | 11 | NULL 12 | company 13 | 14 | 15 | 640 16 | 640 17 | 3 18 | 19 | 0 20 | 21 | train 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 233 27 | 194 28 | 334 29 | 372 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/Annotations/000003.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000003.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 138563409 9 | 10 | 11 | RandomEvent101 12 | ? 13 | 14 | 15 | 500 16 | 375 17 | 3 18 | 19 | 0 20 | 21 | sofa 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 123 27 | 155 28 | 215 29 | 195 30 | 31 | 32 | 33 | chair 34 | Left 35 | 0 36 | 0 37 | 38 | 239 39 | 156 40 | 307 41 | 205 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/Annotations/000001.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000001.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 341012865 9 | 10 | 11 | Fried Camels 12 | Jinky the Fruit Bat 13 | 14 | 15 | 353 16 | 500 17 | 3 18 | 19 | 0 20 | 21 | dog 22 | Left 23 | 1 24 | 0 25 | 26 | 48 27 | 240 28 | 195 29 | 371 30 | 31 | 32 | 33 | person 34 | Left 35 | 1 36 | 0 37 | 38 | 8 39 | 12 40 | 352 41 | 498 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Bubbliiiing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/2.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC 3 | VOCdevkit\VOC2007/JPEGImages\2.jpg 4 | 5 | My Database 6 | COCO 7 | flickr 8 | NULL 9 | 10 | 11 | NULL 12 | company 13 | 14 | 15 | 640 16 | 640 17 | 3 18 | 19 | 0 20 | 21 | chair 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 149 27 | 180 28 | 307 29 | 296 30 | 31 | 32 | 33 | sofa 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 363 39 | 178 40 | 577 41 | 272 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/3.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC 3 | VOCdevkit\VOC2007/JPEGImages\3.jpg 4 | 5 | My Database 6 | COCO 7 | flickr 8 | NULL 9 | 10 | 11 | NULL 12 | company 13 | 14 | 15 | 640 16 | 640 17 | 3 18 | 19 | 0 20 | 21 | dog 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 275 27 | 405 28 | 395 29 | 515 30 | 31 | 32 | 33 | person 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 146 39 | 213 40 | 428 41 | 622 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import xml.etree.ElementTree as ET 4 | 5 | def get_classes(sample_xmls, Origin_Annotations_path): 6 | unique_labels = [] 7 | for xml in sample_xmls: 8 | in_file = open(os.path.join(Origin_Annotations_path, xml), encoding='utf-8') 9 | tree = ET.parse(in_file) 10 | root = tree.getroot() 11 | 12 | for obj in root.iter('object'): 13 | cls = obj.find('name').text 14 | if cls not in unique_labels: 15 | unique_labels.append(cls) 16 | return unique_labels 17 | 18 | def convert_annotation(jpg_path, xml_path, classes): 19 | in_file = open(xml_path, encoding='utf-8') 20 | tree = ET.parse(in_file) 21 | root = tree.getroot() 22 | 23 | line = copy.deepcopy(jpg_path) 24 | for obj in root.iter('object'): 25 | difficult = 0 26 | if obj.find('difficult')!=None and hasattr(obj, "text"): 27 | difficult = obj.find('difficult').text 28 | if int(difficult)==1: 29 | continue 30 | 31 | cls = obj.find('name').text 32 | cls_id = classes.index(cls) 33 | 34 | xmlbox = obj.find('bndbox') 35 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) 36 | 37 | line += " " + ",".join([str(a) for a in b]) + ',' + str(cls_id) 38 | return line -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/Annotations/000005.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000005.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 325991873 9 | 10 | 11 | archintent louisville 12 | ? 13 | 14 | 15 | 500 16 | 375 17 | 3 18 | 19 | 0 20 | 21 | chair 22 | Rear 23 | 0 24 | 0 25 | 26 | 263 27 | 211 28 | 324 29 | 339 30 | 31 | 32 | 33 | chair 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 165 39 | 264 40 | 253 41 | 372 42 | 43 | 44 | 45 | chair 46 | Unspecified 47 | 1 48 | 1 49 | 50 | 5 51 | 244 52 | 67 53 | 374 54 | 55 | 56 | 57 | chair 58 | Unspecified 59 | 0 60 | 0 61 | 62 | 241 63 | 194 64 | 295 65 | 299 66 | 67 | 68 | 69 | chair 70 | Unspecified 71 | 1 72 | 1 73 | 74 | 277 75 | 186 76 | 312 77 | 220 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /test_get_random_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import sample 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw 6 | 7 | from utils.random_data import get_random_data, get_random_data_with_MixUp 8 | from utils.utils import convert_annotation, get_classes 9 | 10 | #-----------------------------------------------------------------------------------# 11 | # Origin_VOCdevkit_path 原始数据集所在的路径 12 | #-----------------------------------------------------------------------------------# 13 | Origin_VOCdevkit_path = "VOCdevkit_Origin" 14 | #-----------------------------------------------------------------------------------# 15 | # input_shape 生成的图片大小。 16 | #-----------------------------------------------------------------------------------# 17 | input_shape = [640, 640] 18 | 19 | if __name__ == "__main__": 20 | Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages") 21 | Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations") 22 | 23 | #---------------------------# 24 | # 遍历标签并赋值 25 | #---------------------------# 26 | xml_names = os.listdir(Origin_Annotations_path) 27 | 28 | #------------------------------# 29 | # 获取一个图像与标签 30 | #------------------------------# 31 | sample_xmls = sample(xml_names, 1) 32 | unique_labels = get_classes(sample_xmls, Origin_Annotations_path) 33 | 34 | jpg_name = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[0])[0] + '.jpg') 35 | xml_name = os.path.join(Origin_Annotations_path, sample_xmls[0]) 36 | 37 | line = convert_annotation(jpg_name, xml_name, unique_labels) 38 | 39 | #------------------------------# 40 | # 各自数据增强 41 | #------------------------------# 42 | image_data, box_data = get_random_data(line, input_shape) 43 | 44 | img = Image.fromarray(image_data.astype(np.uint8)) 45 | for j in range(len(box_data)): 46 | thickness = 3 47 | left, top, right, bottom = box_data[j][0:4] 48 | draw = ImageDraw.Draw(img) 49 | for i in range(thickness): 50 | draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255, 255, 255)) 51 | img.show() 52 | -------------------------------------------------------------------------------- /test_mosaic.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import sample 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw 6 | 7 | from utils.random_data import get_random_data, get_random_data_with_Mosaic 8 | from utils.utils import convert_annotation, get_classes 9 | 10 | #-----------------------------------------------------------------------------------# 11 | # Origin_VOCdevkit_path 原始数据集所在的路径 12 | #-----------------------------------------------------------------------------------# 13 | Origin_VOCdevkit_path = "VOCdevkit_Origin" 14 | #-----------------------------------------------------------------------------------# 15 | # input_shape 生成的图片大小。 16 | #-----------------------------------------------------------------------------------# 17 | input_shape = [640, 640] 18 | 19 | if __name__ == "__main__": 20 | Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages") 21 | Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations") 22 | 23 | #---------------------------# 24 | # 遍历标签并赋值 25 | #---------------------------# 26 | xml_names = os.listdir(Origin_Annotations_path) 27 | 28 | #------------------------------# 29 | # 获取4个图像与标签 30 | #------------------------------# 31 | sample_xmls = sample(xml_names, 4) 32 | unique_labels = get_classes(sample_xmls, Origin_Annotations_path) 33 | 34 | annotation_line = [] 35 | for xml in sample_xmls: 36 | line = convert_annotation(os.path.join(Origin_JPEGImages_path, os.path.splitext(xml)[0] + '.jpg'), os.path.join(Origin_Annotations_path, xml), unique_labels) 37 | annotation_line.append(line) 38 | 39 | #------------------------------# 40 | # 合并mosaic 41 | #------------------------------# 42 | image_data, box_data = get_random_data_with_Mosaic(annotation_line, input_shape) 43 | 44 | img = Image.fromarray(image_data.astype(np.uint8)) 45 | for j in range(len(box_data)): 46 | thickness = 3 47 | left, top, right, bottom = box_data[j][0:4] 48 | draw = ImageDraw.Draw(img) 49 | for i in range(thickness): 50 | draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255, 255, 255)) 51 | img.show() 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.jpg 2 | *.xml 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /VOCdevkit_Origin/VOC2007/Annotations/000004.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 000004.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 322032655 9 | 10 | 11 | paytonc 12 | Payton Chung 13 | 14 | 15 | 500 16 | 406 17 | 3 18 | 19 | 0 20 | 21 | car 22 | Frontal 23 | 0 24 | 0 25 | 26 | 13 27 | 311 28 | 84 29 | 362 30 | 31 | 32 | 33 | car 34 | Unspecified 35 | 1 36 | 0 37 | 38 | 362 39 | 330 40 | 500 41 | 389 42 | 43 | 44 | 45 | car 46 | Unspecified 47 | 0 48 | 0 49 | 50 | 235 51 | 328 52 | 334 53 | 375 54 | 55 | 56 | 57 | car 58 | Unspecified 59 | 0 60 | 0 61 | 62 | 175 63 | 327 64 | 252 65 | 364 66 | 67 | 68 | 69 | car 70 | Unspecified 71 | 0 72 | 0 73 | 74 | 139 75 | 320 76 | 189 77 | 359 78 | 79 | 80 | 81 | car 82 | Unspecified 83 | 0 84 | 0 85 | 86 | 108 87 | 325 88 | 150 89 | 353 90 | 91 | 92 | 93 | car 94 | Unspecified 95 | 0 96 | 0 97 | 98 | 84 99 | 323 100 | 121 101 | 350 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /test_mixup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import sample 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw 6 | 7 | from utils.random_data import get_random_data, get_random_data_with_MixUp 8 | from utils.utils import convert_annotation, get_classes 9 | 10 | #-----------------------------------------------------------------------------------# 11 | # Origin_VOCdevkit_path 原始数据集所在的路径 12 | #-----------------------------------------------------------------------------------# 13 | Origin_VOCdevkit_path = "VOCdevkit_Origin" 14 | #-----------------------------------------------------------------------------------# 15 | # input_shape 生成的图片大小。 16 | #-----------------------------------------------------------------------------------# 17 | input_shape = [640, 640] 18 | 19 | if __name__ == "__main__": 20 | Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages") 21 | Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations") 22 | 23 | #---------------------------# 24 | # 遍历标签并赋值 25 | #---------------------------# 26 | xml_names = os.listdir(Origin_Annotations_path) 27 | 28 | #------------------------------# 29 | # 获取两个图像与标签 30 | #------------------------------# 31 | sample_xmls = sample(xml_names, 2) 32 | unique_labels = get_classes(sample_xmls, Origin_Annotations_path) 33 | jpg_name_1 = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[0])[0] + '.jpg') 34 | jpg_name_2 = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[1])[0] + '.jpg') 35 | xml_name_1 = os.path.join(Origin_Annotations_path, sample_xmls[0]) 36 | xml_name_2 = os.path.join(Origin_Annotations_path, sample_xmls[1]) 37 | 38 | line_1 = convert_annotation(jpg_name_1, xml_name_1, unique_labels) 39 | line_2 = convert_annotation(jpg_name_2, xml_name_2, unique_labels) 40 | 41 | #------------------------------# 42 | # 各自数据增强 43 | #------------------------------# 44 | image_1, box_1 = get_random_data(line_1, input_shape) 45 | image_2, box_2 = get_random_data(line_2, input_shape) 46 | 47 | #------------------------------# 48 | # 合并mixup 49 | #------------------------------# 50 | image_data, box_data = get_random_data_with_MixUp(image_1, box_1, image_2, box_2) 51 | 52 | img = Image.fromarray(image_data.astype(np.uint8)) 53 | for j in range(len(box_data)): 54 | thickness = 3 55 | left, top, right, bottom = box_data[j][0:4] 56 | draw = ImageDraw.Draw(img) 57 | for i in range(thickness): 58 | draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255, 255, 255)) 59 | img.show() 60 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/1.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC 3 | VOCdevkit\VOC2007/JPEGImages\1.jpg 4 | 5 | My Database 6 | COCO 7 | flickr 8 | NULL 9 | 10 | 11 | NULL 12 | company 13 | 14 | 15 | 640 16 | 640 17 | 3 18 | 19 | 0 20 | 21 | car 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 12 27 | 513 28 | 181 29 | 595 30 | 31 | 32 | 33 | car 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 476 39 | 503 40 | 521 41 | 541 42 | 43 | 44 | 45 | car 46 | Unspecified 47 | 0 48 | 0 49 | 50 | 440 51 | 506 52 | 492 53 | 545 54 | 55 | 56 | 57 | car 58 | Unspecified 59 | 0 60 | 0 61 | 62 | 215 63 | 510 64 | 336 65 | 575 66 | 67 | 68 | 69 | car 70 | Unspecified 71 | 0 72 | 0 73 | 74 | 393 75 | 499 76 | 454 77 | 553 78 | 79 | 80 | 81 | car 82 | Unspecified 83 | 0 84 | 0 85 | 86 | 521 87 | 487 88 | 608 89 | 557 90 | 91 | 92 | 93 | car 94 | Unspecified 95 | 0 96 | 0 97 | 98 | 316 99 | 509 100 | 410 101 | 560 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/Annotations/4.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC 3 | VOCdevkit\VOC2007/JPEGImages\4.jpg 4 | 5 | My Database 6 | COCO 7 | flickr 8 | NULL 9 | 10 | 11 | NULL 12 | company 13 | 14 | 15 | 640 16 | 640 17 | 3 18 | 19 | 0 20 | 21 | car 22 | Unspecified 23 | 0 24 | 0 25 | 26 | 123 27 | 285 28 | 135 29 | 297 30 | 31 | 32 | 33 | car 34 | Unspecified 35 | 0 36 | 0 37 | 38 | 141 39 | 284 40 | 158 41 | 301 42 | 43 | 44 | 45 | car 46 | Unspecified 47 | 0 48 | 0 49 | 50 | 131 51 | 286 52 | 145 53 | 298 54 | 55 | 56 | 57 | car 58 | Unspecified 59 | 0 60 | 0 61 | 62 | 173 63 | 287 64 | 206 65 | 307 66 | 67 | 68 | 69 | car 70 | Unspecified 71 | 0 72 | 0 73 | 74 | 215 75 | 288 76 | 262 77 | 313 78 | 79 | 80 | 81 | car 82 | Unspecified 83 | 0 84 | 0 85 | 86 | 99 87 | 280 88 | 123 89 | 302 90 | 91 | 92 | 93 | car 94 | Unspecified 95 | 0 96 | 0 97 | 98 | 153 99 | 287 100 | 179 101 | 303 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /generate_get_random_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import sample 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw 6 | 7 | from utils.random_data import get_random_data, get_random_data_with_MixUp 8 | from utils.utils import convert_annotation, get_classes 9 | 10 | #-----------------------------------------------------------------------------------# 11 | # Origin_VOCdevkit_path 原始数据集所在的路径 12 | # Out_VOCdevkit_path 输出数据集所在的路径 13 | #-----------------------------------------------------------------------------------# 14 | Origin_VOCdevkit_path = "VOCdevkit_Origin" 15 | Out_VOCdevkit_path = "VOCdevkit" 16 | #-----------------------------------------------------------------------------------# 17 | # Out_Num 生成多少组图片 18 | # input_shape 生成的图片大小 19 | #-----------------------------------------------------------------------------------# 20 | Out_Num = 5 21 | input_shape = [640, 640] 22 | 23 | #-----------------------------------------------------------------------------------# 24 | # 下面定义了xml里面的组成模块,无需改动。 25 | #-----------------------------------------------------------------------------------# 26 | headstr = """\ 27 | 28 | VOC 29 | %s 30 | 31 | My Database 32 | COCO 33 | flickr 34 | NULL 35 | 36 | 37 | NULL 38 | company 39 | 40 | 41 | %d 42 | %d 43 | %d 44 | 45 | 0 46 | """ 47 | 48 | objstr = """\ 49 | 50 | %s 51 | Unspecified 52 | 0 53 | 0 54 | 55 | %d 56 | %d 57 | %d 58 | %d 59 | 60 | 61 | """ 62 | 63 | tailstr = '''\ 64 | 65 | ''' 66 | if __name__ == "__main__": 67 | Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages") 68 | Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations") 69 | 70 | Out_JPEGImages_path = os.path.join(Out_VOCdevkit_path, "VOC2007/JPEGImages") 71 | Out_Annotations_path = os.path.join(Out_VOCdevkit_path, "VOC2007/Annotations") 72 | 73 | if not os.path.exists(Out_JPEGImages_path): 74 | os.makedirs(Out_JPEGImages_path) 75 | if not os.path.exists(Out_Annotations_path): 76 | os.makedirs(Out_Annotations_path) 77 | #---------------------------# 78 | # 遍历标签并赋值 79 | #---------------------------# 80 | xml_names = os.listdir(Origin_Annotations_path) 81 | 82 | def write_xml(anno_path, jpg_pth, head, input_shape, boxes, unique_labels, tail): 83 | f = open(anno_path, "w") 84 | f.write(head%(jpg_pth, input_shape[0], input_shape[1], 3)) 85 | for i, box in enumerate(boxes): 86 | f.write(objstr%(str(unique_labels[int(box[4])]), box[0], box[1], box[2], box[3])) 87 | f.write(tail) 88 | 89 | #------------------------------# 90 | # 循环生成xml和jpg 91 | #------------------------------# 92 | for index in range(Out_Num): 93 | #------------------------------# 94 | # 获取一个图像与标签 95 | #------------------------------# 96 | sample_xmls = sample(xml_names, 1) 97 | unique_labels = get_classes(sample_xmls, Origin_Annotations_path) 98 | 99 | jpg_name = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[0])[0] + '.jpg') 100 | xml_name = os.path.join(Origin_Annotations_path, sample_xmls[0]) 101 | 102 | line = convert_annotation(jpg_name, xml_name, unique_labels) 103 | 104 | #------------------------------# 105 | # 各自数据增强 106 | #------------------------------# 107 | image_data, box_data = get_random_data(line, input_shape) 108 | 109 | img = Image.fromarray(image_data.astype(np.uint8)) 110 | img.save(os.path.join(Out_JPEGImages_path, str(index) + '.jpg')) 111 | write_xml(os.path.join(Out_Annotations_path, str(index) + '.xml'), os.path.join(Out_JPEGImages_path, str(index) + '.jpg'), \ 112 | headstr, input_shape, box_data, unique_labels, tailstr) 113 | -------------------------------------------------------------------------------- /generate_mosaic.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import sample 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw 6 | 7 | from utils.random_data import get_random_data, get_random_data_with_Mosaic 8 | from utils.utils import convert_annotation, get_classes 9 | 10 | #-----------------------------------------------------------------------------------# 11 | # Origin_VOCdevkit_path 原始数据集所在的路径 12 | # Out_VOCdevkit_path 输出数据集所在的路径 13 | #-----------------------------------------------------------------------------------# 14 | Origin_VOCdevkit_path = "VOCdevkit_Origin" 15 | Out_VOCdevkit_path = "VOCdevkit" 16 | #-----------------------------------------------------------------------------------# 17 | # Out_Num 利用mosaic生成多少组图片 18 | # input_shape 生成的图片大小 19 | #-----------------------------------------------------------------------------------# 20 | Out_Num = 5 21 | input_shape = [640, 640] 22 | 23 | #-----------------------------------------------------------------------------------# 24 | # 下面定义了xml里面的组成模块,无需改动。 25 | #-----------------------------------------------------------------------------------# 26 | headstr = """\ 27 | 28 | VOC 29 | %s 30 | 31 | My Database 32 | COCO 33 | flickr 34 | NULL 35 | 36 | 37 | NULL 38 | company 39 | 40 | 41 | %d 42 | %d 43 | %d 44 | 45 | 0 46 | """ 47 | 48 | objstr = """\ 49 | 50 | %s 51 | Unspecified 52 | 0 53 | 0 54 | 55 | %d 56 | %d 57 | %d 58 | %d 59 | 60 | 61 | """ 62 | 63 | tailstr = '''\ 64 | 65 | ''' 66 | if __name__ == "__main__": 67 | Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages") 68 | Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations") 69 | 70 | Out_JPEGImages_path = os.path.join(Out_VOCdevkit_path, "VOC2007/JPEGImages") 71 | Out_Annotations_path = os.path.join(Out_VOCdevkit_path, "VOC2007/Annotations") 72 | 73 | if not os.path.exists(Out_JPEGImages_path): 74 | os.makedirs(Out_JPEGImages_path) 75 | if not os.path.exists(Out_Annotations_path): 76 | os.makedirs(Out_Annotations_path) 77 | #---------------------------# 78 | # 遍历标签并赋值 79 | #---------------------------# 80 | xml_names = os.listdir(Origin_Annotations_path) 81 | 82 | def write_xml(anno_path, jpg_pth, head, input_shape, boxes, unique_labels, tail): 83 | f = open(anno_path, "w") 84 | f.write(head%(jpg_pth, input_shape[0], input_shape[1], 3)) 85 | for i, box in enumerate(boxes): 86 | f.write(objstr%(str(unique_labels[int(box[4])]), box[0], box[1], box[2], box[3])) 87 | f.write(tail) 88 | 89 | #------------------------------# 90 | # 循环生成xml和jpg 91 | #------------------------------# 92 | for index in range(Out_Num): 93 | #------------------------------# 94 | # 获取4个图像与标签 95 | #------------------------------# 96 | sample_xmls = sample(xml_names, 4) 97 | unique_labels = get_classes(sample_xmls, Origin_Annotations_path) 98 | 99 | annotation_line = [] 100 | for xml in sample_xmls: 101 | line = convert_annotation(os.path.join(Origin_JPEGImages_path, os.path.splitext(xml)[0] + '.jpg'), os.path.join(Origin_Annotations_path, xml), unique_labels) 102 | annotation_line.append(line) 103 | 104 | #------------------------------# 105 | # 合并mosaic 106 | #------------------------------# 107 | image_data, box_data = get_random_data_with_Mosaic(annotation_line, input_shape) 108 | 109 | img = Image.fromarray(image_data.astype(np.uint8)) 110 | img.save(os.path.join(Out_JPEGImages_path, str(index) + '.jpg')) 111 | write_xml(os.path.join(Out_Annotations_path, str(index) + '.xml'), os.path.join(Out_JPEGImages_path, str(index) + '.jpg'), \ 112 | headstr, input_shape, box_data, unique_labels, tailstr) 113 | -------------------------------------------------------------------------------- /generate_mixup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import sample 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw 6 | 7 | from utils.random_data import get_random_data, get_random_data_with_MixUp 8 | from utils.utils import convert_annotation, get_classes 9 | 10 | #-----------------------------------------------------------------------------------# 11 | # Origin_VOCdevkit_path 原始数据集所在的路径 12 | # Out_VOCdevkit_path 输出数据集所在的路径 13 | #-----------------------------------------------------------------------------------# 14 | Origin_VOCdevkit_path = "VOCdevkit_Origin" 15 | Out_VOCdevkit_path = "VOCdevkit" 16 | #-----------------------------------------------------------------------------------# 17 | # Out_Num 利用mixup生成多少组图片 18 | # input_shape 生成的图片大小 19 | #-----------------------------------------------------------------------------------# 20 | Out_Num = 100 21 | input_shape = [640, 640] 22 | 23 | #-----------------------------------------------------------------------------------# 24 | # 下面定义了xml里面的组成模块,无需改动。 25 | #-----------------------------------------------------------------------------------# 26 | headstr = """\ 27 | 28 | VOC 29 | %s 30 | 31 | My Database 32 | COCO 33 | flickr 34 | NULL 35 | 36 | 37 | NULL 38 | company 39 | 40 | 41 | %d 42 | %d 43 | %d 44 | 45 | 0 46 | """ 47 | 48 | objstr = """\ 49 | 50 | %s 51 | Unspecified 52 | 0 53 | 0 54 | 55 | %d 56 | %d 57 | %d 58 | %d 59 | 60 | 61 | """ 62 | 63 | tailstr = '''\ 64 | 65 | ''' 66 | if __name__ == "__main__": 67 | Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages") 68 | Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations") 69 | 70 | Out_JPEGImages_path = os.path.join(Out_VOCdevkit_path, "VOC2007/JPEGImages") 71 | Out_Annotations_path = os.path.join(Out_VOCdevkit_path, "VOC2007/Annotations") 72 | 73 | if not os.path.exists(Out_JPEGImages_path): 74 | os.makedirs(Out_JPEGImages_path) 75 | if not os.path.exists(Out_Annotations_path): 76 | os.makedirs(Out_Annotations_path) 77 | #---------------------------# 78 | # 遍历标签并赋值 79 | #---------------------------# 80 | xml_names = os.listdir(Origin_Annotations_path) 81 | 82 | def write_xml(anno_path, jpg_pth, head, input_shape, boxes, unique_labels, tail): 83 | f = open(anno_path, "w") 84 | f.write(head%(jpg_pth, input_shape[0], input_shape[1], 3)) 85 | for i, box in enumerate(boxes): 86 | f.write(objstr%(str(unique_labels[int(box[4])]), box[0], box[1], box[2], box[3])) 87 | f.write(tail) 88 | 89 | #------------------------------# 90 | # 循环生成xml和jpg 91 | #------------------------------# 92 | for index in range(Out_Num): 93 | #------------------------------# 94 | # 获取两个图像与标签 95 | #------------------------------# 96 | sample_xmls = sample(xml_names, 2) 97 | unique_labels = get_classes(sample_xmls, Origin_Annotations_path) 98 | 99 | jpg_name_1 = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[0])[0] + '.jpg') 100 | jpg_name_2 = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[1])[0] + '.jpg') 101 | xml_name_1 = os.path.join(Origin_Annotations_path, sample_xmls[0]) 102 | xml_name_2 = os.path.join(Origin_Annotations_path, sample_xmls[1]) 103 | 104 | line_1 = convert_annotation(jpg_name_1, xml_name_1, unique_labels) 105 | line_2 = convert_annotation(jpg_name_2, xml_name_2, unique_labels) 106 | 107 | #------------------------------# 108 | # 各自数据增强 109 | #------------------------------# 110 | image_1, box_1 = get_random_data(line_1, input_shape) 111 | image_2, box_2 = get_random_data(line_2, input_shape) 112 | 113 | #------------------------------# 114 | # 合并mixup 115 | #------------------------------# 116 | image_data, box_data = get_random_data_with_MixUp(image_1, box_1, image_2, box_2) 117 | 118 | img = Image.fromarray(image_data.astype(np.uint8)) 119 | img.save(os.path.join(Out_JPEGImages_path, str(index) + '.jpg')) 120 | write_xml(os.path.join(Out_Annotations_path, str(index) + '.xml'), os.path.join(Out_JPEGImages_path, str(index) + '.jpg'), \ 121 | headstr, input_shape, box_data, unique_labels, tailstr) 122 | -------------------------------------------------------------------------------- /utils/random_data.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from PIL import Image, ImageDraw 4 | 5 | 6 | def rand(a=0, b=1): 7 | return np.random.rand()*(b-a) + a 8 | 9 | def get_random_data(annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): 10 | line = annotation_line.split() 11 | #------------------------------# 12 | # 读取图像并转换成RGB图像 13 | #------------------------------# 14 | image = Image.open(line[0]) 15 | image = image.convert('RGB') 16 | 17 | #------------------------------# 18 | # 获得图像的高宽与目标高宽 19 | #------------------------------# 20 | iw, ih = image.size 21 | h, w = input_shape 22 | #------------------------------# 23 | # 获得预测框 24 | #------------------------------# 25 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 26 | 27 | if not random: 28 | scale = min(w/iw, h/ih) 29 | nw = int(iw*scale) 30 | nh = int(ih*scale) 31 | dx = (w-nw)//2 32 | dy = (h-nh)//2 33 | 34 | #---------------------------------# 35 | # 将图像多余的部分加上灰条 36 | #---------------------------------# 37 | image = image.resize((nw,nh), Image.BICUBIC) 38 | new_image = Image.new('RGB', (w,h), (128,128,128)) 39 | new_image.paste(image, (dx, dy)) 40 | image_data = np.array(new_image, np.float32) 41 | 42 | #---------------------------------# 43 | # 对真实框进行调整 44 | #---------------------------------# 45 | if len(box)>0: 46 | np.random.shuffle(box) 47 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 48 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 49 | box[:, 0:2][box[:, 0:2]<0] = 0 50 | box[:, 2][box[:, 2]>w] = w 51 | box[:, 3][box[:, 3]>h] = h 52 | box_w = box[:, 2] - box[:, 0] 53 | box_h = box[:, 3] - box[:, 1] 54 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 55 | 56 | return image_data, box 57 | 58 | #------------------------------------------# 59 | # 对图像进行缩放并且进行长和宽的扭曲 60 | #------------------------------------------# 61 | new_ar = iw/ih * rand(1-jitter,1+jitter) / rand(1-jitter,1+jitter) 62 | scale = rand(.25, 2) 63 | if new_ar < 1: 64 | nh = int(scale*h) 65 | nw = int(nh*new_ar) 66 | else: 67 | nw = int(scale*w) 68 | nh = int(nw/new_ar) 69 | image = image.resize((nw,nh), Image.BICUBIC) 70 | 71 | #------------------------------------------# 72 | # 将图像多余的部分加上灰条 73 | #------------------------------------------# 74 | dx = int(rand(0, w-nw)) 75 | dy = int(rand(0, h-nh)) 76 | new_image = Image.new('RGB', (w,h), (128,128,128)) 77 | new_image.paste(image, (dx, dy)) 78 | image = new_image 79 | 80 | #------------------------------------------# 81 | # 翻转图像 82 | #------------------------------------------# 83 | flip = rand()<.5 84 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 85 | 86 | image_data = np.array(image, np.uint8) 87 | #---------------------------------# 88 | # 对图像进行色域变换 89 | # 计算色域变换的参数 90 | #---------------------------------# 91 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 92 | #---------------------------------# 93 | # 将图像转到HSV上 94 | #---------------------------------# 95 | hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) 96 | dtype = image_data.dtype 97 | #---------------------------------# 98 | # 应用变换 99 | #---------------------------------# 100 | x = np.arange(0, 256, dtype=r.dtype) 101 | lut_hue = ((x * r[0]) % 180).astype(dtype) 102 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 103 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 104 | 105 | image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 106 | image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) 107 | 108 | #---------------------------------# 109 | # 对真实框进行调整 110 | #---------------------------------# 111 | if len(box)>0: 112 | np.random.shuffle(box) 113 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 114 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 115 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 116 | box[:, 0:2][box[:, 0:2]<0] = 0 117 | box[:, 2][box[:, 2]>w] = w 118 | box[:, 3][box[:, 3]>h] = h 119 | box_w = box[:, 2] - box[:, 0] 120 | box_h = box[:, 3] - box[:, 1] 121 | box = box[np.logical_and(box_w>1, box_h>1)] 122 | 123 | return image_data, box 124 | 125 | def merge_bboxes(bboxes, cutx, cuty): 126 | merge_bbox = [] 127 | for i in range(len(bboxes)): 128 | for box in bboxes[i]: 129 | tmp_box = [] 130 | x1, y1, x2, y2 = box[0], box[1], box[2], box[3] 131 | 132 | if i == 0: 133 | if y1 > cuty or x1 > cutx: 134 | continue 135 | if y2 >= cuty and y1 <= cuty: 136 | y2 = cuty 137 | if x2 >= cutx and x1 <= cutx: 138 | x2 = cutx 139 | 140 | if i == 1: 141 | if y2 < cuty or x1 > cutx: 142 | continue 143 | if y2 >= cuty and y1 <= cuty: 144 | y1 = cuty 145 | if x2 >= cutx and x1 <= cutx: 146 | x2 = cutx 147 | 148 | if i == 2: 149 | if y2 < cuty or x2 < cutx: 150 | continue 151 | if y2 >= cuty and y1 <= cuty: 152 | y1 = cuty 153 | if x2 >= cutx and x1 <= cutx: 154 | x1 = cutx 155 | 156 | if i == 3: 157 | if y1 > cuty or x2 < cutx: 158 | continue 159 | if y2 >= cuty and y1 <= cuty: 160 | y2 = cuty 161 | if x2 >= cutx and x1 <= cutx: 162 | x1 = cutx 163 | tmp_box.append(x1) 164 | tmp_box.append(y1) 165 | tmp_box.append(x2) 166 | tmp_box.append(y2) 167 | tmp_box.append(box[-1]) 168 | merge_bbox.append(tmp_box) 169 | return merge_bbox 170 | 171 | def get_random_data_with_Mosaic(annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4): 172 | h, w = input_shape 173 | min_offset_x = rand(0.3, 0.7) 174 | min_offset_y = rand(0.3, 0.7) 175 | 176 | image_datas = [] 177 | box_datas = [] 178 | index = 0 179 | for line in annotation_line: 180 | #---------------------------------# 181 | # 每一行进行分割 182 | #---------------------------------# 183 | line_content = line.split() 184 | #---------------------------------# 185 | # 打开图片 186 | #---------------------------------# 187 | image = Image.open(line_content[0]) 188 | image = image.convert('RGB') 189 | 190 | #---------------------------------# 191 | # 图片的大小 192 | #---------------------------------# 193 | iw, ih = image.size 194 | #---------------------------------# 195 | # 保存框的位置 196 | #---------------------------------# 197 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) 198 | 199 | #---------------------------------# 200 | # 是否翻转图片 201 | #---------------------------------# 202 | flip = rand()<.5 203 | if flip and len(box)>0: 204 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 205 | box[:, [0,2]] = iw - box[:, [2,0]] 206 | 207 | #------------------------------------------# 208 | # 对图像进行缩放并且进行长和宽的扭曲 209 | #------------------------------------------# 210 | new_ar = iw/ih * rand(1-jitter,1+jitter) / rand(1-jitter,1+jitter) 211 | scale = rand(.4, 1) 212 | if new_ar < 1: 213 | nh = int(scale*h) 214 | nw = int(nh*new_ar) 215 | else: 216 | nw = int(scale*w) 217 | nh = int(nw/new_ar) 218 | image = image.resize((nw, nh), Image.BICUBIC) 219 | 220 | #-----------------------------------------------# 221 | # 将图片进行放置,分别对应四张分割图片的位置 222 | #-----------------------------------------------# 223 | if index == 0: 224 | dx = int(w*min_offset_x) - nw 225 | dy = int(h*min_offset_y) - nh 226 | elif index == 1: 227 | dx = int(w*min_offset_x) - nw 228 | dy = int(h*min_offset_y) 229 | elif index == 2: 230 | dx = int(w*min_offset_x) 231 | dy = int(h*min_offset_y) 232 | elif index == 3: 233 | dx = int(w*min_offset_x) 234 | dy = int(h*min_offset_y) - nh 235 | 236 | new_image = Image.new('RGB', (w,h), (128,128,128)) 237 | new_image.paste(image, (dx, dy)) 238 | image_data = np.array(new_image) 239 | 240 | index = index + 1 241 | box_data = [] 242 | #---------------------------------# 243 | # 对box进行重新处理 244 | #---------------------------------# 245 | if len(box)>0: 246 | np.random.shuffle(box) 247 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 248 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 249 | box[:, 0:2][box[:, 0:2]<0] = 0 250 | box[:, 2][box[:, 2]>w] = w 251 | box[:, 3][box[:, 3]>h] = h 252 | box_w = box[:, 2] - box[:, 0] 253 | box_h = box[:, 3] - box[:, 1] 254 | box = box[np.logical_and(box_w>1, box_h>1)] 255 | box_data = np.zeros((len(box),5)) 256 | box_data[:len(box)] = box 257 | 258 | image_datas.append(image_data) 259 | box_datas.append(box_data) 260 | 261 | #---------------------------------# 262 | # 将图片分割,放在一起 263 | #---------------------------------# 264 | cutx = int(w * min_offset_x) 265 | cuty = int(h * min_offset_y) 266 | 267 | new_image = np.zeros([h, w, 3]) 268 | new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] 269 | new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] 270 | new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] 271 | new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] 272 | 273 | new_image = np.array(new_image, np.uint8) 274 | #---------------------------------# 275 | # 对图像进行色域变换 276 | # 计算色域变换的参数 277 | #---------------------------------# 278 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 279 | #---------------------------------# 280 | # 将图像转到HSV上 281 | #---------------------------------# 282 | hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV)) 283 | dtype = new_image.dtype 284 | #---------------------------------# 285 | # 应用变换 286 | #---------------------------------# 287 | x = np.arange(0, 256, dtype=r.dtype) 288 | lut_hue = ((x * r[0]) % 180).astype(dtype) 289 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 290 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 291 | 292 | new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 293 | new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB) 294 | 295 | #---------------------------------# 296 | # 对框进行进一步的处理 297 | #---------------------------------# 298 | new_boxes = merge_bboxes(box_datas, cutx, cuty) 299 | 300 | return new_image, new_boxes 301 | 302 | def get_random_data_with_MixUp(image_1, box_1, image_2, box_2): 303 | new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5 304 | if len(box_1) == 0: 305 | new_boxes = box_2 306 | elif len(box_2) == 0: 307 | new_boxes = box_1 308 | else: 309 | new_boxes = np.concatenate([box_1, box_2], axis=0) 310 | return new_image, new_boxes 311 | --------------------------------------------------------------------------------