├── CreateTxt.py ├── README.md ├── check_annotations.py ├── getClasses.py ├── rename.py ├── replace_xml_label.py ├── video_to_picture.py └── voc_annotation.py /CreateTxt.py: -------------------------------------------------------------------------------- 1 | #!F:\PyCharm-projects 2 | # coding : utf-8 3 | # author : 葛壮壮 4 | 5 | import os 6 | import random 7 | 8 | trainval_percent = 0.9 # trainval数据集占所有数据的比例 9 | train_percent = 0.9 # train数据集占trainval数据的比例 10 | xmlfilepath = 'Annotations' 11 | txtsavepath = 'ImageSets/Main' 12 | total_xml = os.listdir(xmlfilepath) 13 | 14 | num = len(total_xml) 15 | print('total number is ', num) 16 | list = range(num) 17 | tv = int(num * trainval_percent) 18 | print('trainVal number is ', tv) 19 | tr = int(tv * train_percent) 20 | print('train number is ', tr) 21 | print('test number is ', num - tv) 22 | trainval = random.sample(list, tv) 23 | train = random.sample(trainval, tr) 24 | 25 | ftrainval = open('ImageSets/Main/trainval.txt', 'w') 26 | ftest = open('ImageSets/Main/test.txt', 'w') 27 | ftrain = open('ImageSets/Main/train.txt', 'w') 28 | fval = open('ImageSets/Main/val.txt', 'w') 29 | 30 | for i in list: 31 | name = total_xml[i][:-4] + '\n' 32 | if i in trainval: 33 | ftrainval.write(name) 34 | if i in train: 35 | ftrain.write(name) 36 | else: 37 | fval.write(name) 38 | else: 39 | ftest.write(name) 40 | 41 | ftrainval.close() 42 | ftrain.close() 43 | fval.close() 44 | ftest.close() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-scripts-used-to-make-datasets 2 | This is my blog address, the blog describes in detail how to use each script. 3 | https://www.cnblogs.com/gezhuangzhuang/p/10902794.html 4 | 5 | -------------------------------------------------------------------------------- /check_annotations.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | def getFilePathList(dirPath, partOfFileName=''): 4 | allFileName_list = list(os.walk(dirPath))[0][2] 5 | fileName_list = [k for k in allFileName_list if partOfFileName in k] 6 | filePath_list = [os.path.join(dirPath, k) for k in fileName_list] 7 | return filePath_list 8 | 9 | 10 | def check_1(dirPath): 11 | jpgFilePath_list = getFilePathList(dirPath, '.jpg') 12 | allFileMarked = True 13 | for jpgFilePath in jpgFilePath_list: 14 | xmlFilePath = jpgFilePath[:-4] + '.xml' 15 | if not os.path.exists(xmlFilePath): 16 | print('%s this picture is not marked.' %jpgFilePath) 17 | allFileMarked = False 18 | if allFileMarked: 19 | print('congratulation! it is been verified that all jpg file are marked.') 20 | 21 | 22 | import xml.etree.ElementTree as ET 23 | def check_2(dirPath, className_list): 24 | className_set = set(className_list) 25 | xmlFilePath_list = getFilePathList(dirPath, '.xml') 26 | allFileCorrect = True 27 | for xmlFilePath in xmlFilePath_list: 28 | with open(xmlFilePath, 'rb') as file: 29 | fileContent = file.read() 30 | root = ET.XML(fileContent) 31 | object_list = root.findall('object') 32 | for object_item in object_list: 33 | name = object_item.find('name') 34 | className = name.text 35 | if className not in className_set: 36 | print('%s this xml file has wrong class name "%s" ' %(xmlFilePath, className)) 37 | allFileCorrect = False 38 | if allFileCorrect: 39 | print('congratulation! it is been verified that all xml file are correct.') 40 | 41 | if __name__ == '__main__': 42 | dirPath = 'Picture/' 43 | className_list = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 44 | check_1(dirPath) 45 | check_2(dirPath, className_list) 46 | -------------------------------------------------------------------------------- /getClasses.py: -------------------------------------------------------------------------------- 1 | #!F:\PyCharm-projects 2 | # coding : utf-8 3 | # author : 葛壮壮 4 | 5 | import os 6 | import xml.etree.ElementTree as ET 7 | import numpy as np 8 | 9 | np.set_printoptions(suppress=True, threshold=np.nan) 10 | import matplotlib 11 | from PIL import Image 12 | 13 | 14 | def parse_obj(xml_path, filename): 15 | tree = ET.parse(xml_path + filename) 16 | objects = [] 17 | for obj in tree.findall('object'): 18 | obj_struct = {} 19 | obj_struct['name'] = obj.find('name').text 20 | objects.append(obj_struct) 21 | return objects 22 | 23 | 24 | def read_image(image_path, filename): 25 | im = Image.open(image_path + filename) 26 | W = im.size[0] 27 | H = im.size[1] 28 | area = W * H 29 | im_info = [W, H, area] 30 | return im_info 31 | 32 | 33 | if __name__ == '__main__': 34 | xml_path = 'Annotations/' 35 | filenamess = os.listdir(xml_path) 36 | filenames = [] 37 | for name in filenamess: 38 | name = name.replace('.xml', '') 39 | filenames.append(name) 40 | recs = {} 41 | obs_shape = {} 42 | classnames = [] 43 | num_objs = {} 44 | obj_avg = {} 45 | for i, name in enumerate(filenames): 46 | recs[name] = parse_obj(xml_path, name + '.xml') 47 | for name in filenames: 48 | for object in recs[name]: 49 | if object['name'] not in num_objs.keys(): 50 | num_objs[object['name']] = 1 51 | else: 52 | num_objs[object['name']] += 1 53 | if object['name'] not in classnames: 54 | classnames.append(object['name']) 55 | for name in classnames: 56 | print('{}:{}个'.format(name, num_objs[name])) 57 | print('信息统计算完毕。') 58 | -------------------------------------------------------------------------------- /rename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gzz1529657064/Python-scripts-used-to-make-datasets/0ea7d05739b0a7bbf3fe0d789d60585d54200f3d/rename.py -------------------------------------------------------------------------------- /replace_xml_label.py: -------------------------------------------------------------------------------- 1 | 2 | # coding=utf-8 3 | import os 4 | import os.path 5 | import xml.dom.minidom 6 | 7 | path = 'Annotations' 8 | files = os.listdir(path) 9 | s = [] 10 | for xmlFile in files: 11 | portion = os.path.splitext(xmlFile) 12 | if not os.path.isdir(xmlFile): 13 | 14 | dom = xml.dom.minidom.parse(os.path.join(path, xmlFile)) 15 | 16 | root = dom.documentElement 17 | name = root.getElementsByTagName('name') 18 | 19 | for i in range(len(name)): 20 | if name[i].firstChild.data == 'pedestrain': 21 | name[i].firstChild.data = 'red pedestrian' 22 | with open(os.path.join(path, xmlFile), 'w', encoding='UTF-8') as fh: 23 | dom.writexml(fh) 24 | print('replace filename OK!') 25 | -------------------------------------------------------------------------------- /video_to_picture.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | vc = cv2.VideoCapture('E:/HDV-2019-5-8/Movie/20190508_0095.MP4') 3 | c=0 4 | rval=vc.isOpened() 5 | timeF = 30 6 | while rval: 7 | c = c + 1 8 | rval, frame = vc.read() 9 | if (c % timeF == 0): 10 | cv2.imwrite('E:/HDV-2019-5-8/digital_light/95/'+str(c).zfill(8) + '.jpg', frame) 11 | cv2.waitKey(1) 12 | 13 | vc.release() -------------------------------------------------------------------------------- /voc_annotation.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from os import getcwd 3 | 4 | sets=[('2018', 'train'), ('2018', 'val'), ('2018', 'test'), ('2018', 'trainval')] 5 | 6 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | 9 | def convert_annotation(year, image_id, list_file): 10 | in_file = open('VOCdevkit\VOC%s\Annotations\%s.xml'%(year, image_id), encoding = 'utf-8') 11 | tree=ET.parse(in_file) 12 | root = tree.getroot() 13 | 14 | for obj in root.iter('object'): 15 | difficult = obj.find('difficult').text 16 | cls = obj.find('name').text 17 | if cls not in classes or int(difficult)==1: 18 | continue 19 | cls_id = classes.index(cls) 20 | xmlbox = obj.find('bndbox') 21 | b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) 22 | #list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 23 | list_file.write(" " + str(cls_id) + ' ' + " ".join([str(a) for a in b])) 24 | 25 | wd = getcwd() 26 | 27 | for year, image_set in sets: 28 | image_ids = open('VOCdevkit\VOC%s\ImageSets\Main\%s.txt'%(year, image_set)).read().strip().split() 29 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 30 | for image_id in image_ids: 31 | list_file.write('%s\VOCdevkit\VOC%s\JPEGImages\%s.jpg'%(wd, year, image_id)) 32 | convert_annotation(year, image_id, list_file) 33 | list_file.write('\n') 34 | 35 | list_file.close() 36 | 37 | --------------------------------------------------------------------------------