├── CreateTxt.py
├── README.md
├── check_annotations.py
├── getClasses.py
├── rename.py
├── replace_xml_label.py
├── video_to_picture.py
└── voc_annotation.py


/CreateTxt.py:
--------------------------------------------------------------------------------
 1 | #!F:\PyCharm-projects
 2 | # coding : utf-8
 3 | # author : 葛壮壮
 4 | 
 5 | import os
 6 | import random
 7 | 
 8 | trainval_percent = 0.9  # trainval数据集占所有数据的比例
 9 | train_percent = 0.9  # train数据集占trainval数据的比例
10 | xmlfilepath = 'Annotations'
11 | txtsavepath = 'ImageSets/Main'
12 | total_xml = os.listdir(xmlfilepath)
13 | 
14 | num = len(total_xml)
15 | print('total number is ', num)
16 | list = range(num)
17 | tv = int(num * trainval_percent)
18 | print('trainVal number is ', tv)
19 | tr = int(tv * train_percent)
20 | print('train number is ', tr)
21 | print('test number is ', num - tv)
22 | trainval = random.sample(list, tv)
23 | train = random.sample(trainval, tr)
24 | 
25 | ftrainval = open('ImageSets/Main/trainval.txt', 'w')
26 | ftest = open('ImageSets/Main/test.txt', 'w')
27 | ftrain = open('ImageSets/Main/train.txt', 'w')
28 | fval = open('ImageSets/Main/val.txt', 'w')
29 | 
30 | for i in list:
31 |     name = total_xml[i][:-4] + '\n'
32 |     if i in trainval:
33 |         ftrainval.write(name)
34 |         if i in train:
35 |             ftrain.write(name)
36 |         else:
37 |             fval.write(name)
38 |     else:
39 |         ftest.write(name)
40 | 
41 | ftrainval.close()
42 | ftrain.close()
43 | fval.close()
44 | ftest.close()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python-scripts-used-to-make-datasets
2 | This is my blog address, the blog describes in detail how to use each script.
3 | https://www.cnblogs.com/gezhuangzhuang/p/10902794.html
4 |  
5 | 


--------------------------------------------------------------------------------
/check_annotations.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | def getFilePathList(dirPath, partOfFileName=''):
 4 |     allFileName_list = list(os.walk(dirPath))[0][2]
 5 |     fileName_list = [k for k in allFileName_list if partOfFileName in k]
 6 |     filePath_list = [os.path.join(dirPath, k) for k in fileName_list]
 7 |     return filePath_list
 8 | 
 9 | 
10 | def check_1(dirPath):
11 |     jpgFilePath_list = getFilePathList(dirPath, '.jpg')
12 |     allFileMarked = True
13 |     for jpgFilePath in jpgFilePath_list:
14 |         xmlFilePath = jpgFilePath[:-4] + '.xml'
15 |         if not os.path.exists(xmlFilePath):
16 |             print('%s this picture is not marked.' %jpgFilePath)
17 |             allFileMarked = False
18 |     if allFileMarked:
19 |         print('congratulation! it is been verified that all jpg file are marked.')
20 | 
21 |        
22 | import xml.etree.ElementTree as ET
23 | def check_2(dirPath, className_list):
24 |     className_set = set(className_list)
25 |     xmlFilePath_list = getFilePathList(dirPath, '.xml')
26 |     allFileCorrect = True
27 |     for xmlFilePath in xmlFilePath_list:
28 |         with open(xmlFilePath, 'rb') as file:
29 |             fileContent = file.read()
30 |         root = ET.XML(fileContent)
31 |         object_list = root.findall('object')
32 |         for object_item in object_list:
33 |             name = object_item.find('name')
34 |             className = name.text
35 |             if className not in className_set:
36 |                 print('%s this xml file has wrong class name "%s" ' %(xmlFilePath, className))
37 |                 allFileCorrect = False
38 |     if allFileCorrect:
39 |         print('congratulation! it is been verified that all xml file are correct.')
40 | 
41 | if __name__ == '__main__':
42 |     dirPath = 'Picture/'
43 |     className_list = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]    
44 |     check_1(dirPath)
45 |     check_2(dirPath, className_list)
46 | 


--------------------------------------------------------------------------------
/getClasses.py:
--------------------------------------------------------------------------------
 1 | #!F:\PyCharm-projects
 2 | # coding : utf-8
 3 | # author : 葛壮壮
 4 | 
 5 | import os
 6 | import xml.etree.ElementTree as ET
 7 | import numpy as np
 8 | 
 9 | np.set_printoptions(suppress=True, threshold=np.nan)
10 | import matplotlib
11 | from PIL import Image
12 | 
13 | 
14 | def parse_obj(xml_path, filename):
15 |     tree = ET.parse(xml_path + filename)
16 |     objects = []
17 |     for obj in tree.findall('object'):
18 |         obj_struct = {}
19 |         obj_struct['name'] = obj.find('name').text
20 |         objects.append(obj_struct)
21 |     return objects
22 | 
23 | 
24 | def read_image(image_path, filename):
25 |     im = Image.open(image_path + filename)
26 |     W = im.size[0]
27 |     H = im.size[1]
28 |     area = W * H
29 |     im_info = [W, H, area]
30 |     return im_info
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     xml_path = 'Annotations/'
35 |     filenamess = os.listdir(xml_path)
36 |     filenames = []
37 |     for name in filenamess:
38 |         name = name.replace('.xml', '')
39 |         filenames.append(name)
40 |     recs = {}
41 |     obs_shape = {}
42 |     classnames = []
43 |     num_objs = {}
44 |     obj_avg = {}
45 |     for i, name in enumerate(filenames):
46 |         recs[name] = parse_obj(xml_path, name + '.xml')
47 |     for name in filenames:
48 |         for object in recs[name]:
49 |             if object['name'] not in num_objs.keys():
50 |                 num_objs[object['name']] = 1
51 |             else:
52 |                 num_objs[object['name']] += 1
53 |             if object['name'] not in classnames:
54 |                 classnames.append(object['name'])
55 |     for name in classnames:
56 |         print('{}:{}个'.format(name, num_objs[name]))
57 |     print('信息统计算完毕。')
58 | 


--------------------------------------------------------------------------------
/rename.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gzz1529657064/Python-scripts-used-to-make-datasets/0ea7d05739b0a7bbf3fe0d789d60585d54200f3d/rename.py


--------------------------------------------------------------------------------
/replace_xml_label.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # coding=utf-8
 3 | import os
 4 | import os.path
 5 | import xml.dom.minidom
 6 |  
 7 | path = 'Annotations'
 8 | files = os.listdir(path)
 9 | s = []
10 | for xmlFile in files:
11 | 	portion = os.path.splitext(xmlFile)
12 | 	if not os.path.isdir(xmlFile):
13 |  
14 | 		dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))
15 | 
16 | 		root = dom.documentElement
17 | 		name = root.getElementsByTagName('name')
18 | 
19 | 		for i in range(len(name)):
20 | 			if name[i].firstChild.data == 'pedestrain':
21 | 				name[i].firstChild.data = 'red pedestrian'
22 | 	with open(os.path.join(path, xmlFile), 'w', encoding='UTF-8') as fh:
23 | 		dom.writexml(fh)
24 | 		print('replace filename OK!')
25 | 


--------------------------------------------------------------------------------
/video_to_picture.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | vc = cv2.VideoCapture('E:/HDV-2019-5-8/Movie/20190508_0095.MP4') 
 3 | c=0
 4 | rval=vc.isOpened()
 5 | timeF = 30
 6 | while rval:   
 7 |     c = c + 1
 8 |     rval, frame = vc.read()
 9 |     if (c % timeF == 0):
10 |         cv2.imwrite('E:/HDV-2019-5-8/digital_light/95/'+str(c).zfill(8) + '.jpg', frame)	
11 |     cv2.waitKey(1)
12 | 
13 | vc.release()


--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | from os import getcwd
 3 | 
 4 | sets=[('2018', 'train'), ('2018', 'val'), ('2018', 'test'), ('2018', 'trainval')]
 5 | 
 6 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
 7 | 
 8 | 
 9 | def convert_annotation(year, image_id, list_file):
10 |     in_file = open('VOCdevkit\VOC%s\Annotations\%s.xml'%(year, image_id), encoding = 'utf-8')
11 |     tree=ET.parse(in_file)
12 |     root = tree.getroot()
13 | 
14 |     for obj in root.iter('object'):
15 |         difficult = obj.find('difficult').text
16 |         cls = obj.find('name').text
17 |         if cls not in classes or int(difficult)==1:
18 |             continue
19 |         cls_id = classes.index(cls)
20 |         xmlbox = obj.find('bndbox')
21 |         b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
22 |         #list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
23 |         list_file.write(" " + str(cls_id) + ' ' + " ".join([str(a) for a in b]))
24 | 
25 | wd = getcwd()
26 | 
27 | for year, image_set in sets:
28 |     image_ids = open('VOCdevkit\VOC%s\ImageSets\Main\%s.txt'%(year, image_set)).read().strip().split()
29 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
30 |     for image_id in image_ids:
31 |         list_file.write('%s\VOCdevkit\VOC%s\JPEGImages\%s.jpg'%(wd, year, image_id))
32 |         convert_annotation(year, image_id, list_file)
33 |         list_file.write('\n')
34 |         
35 |     list_file.close()
36 | 
37 | 


--------------------------------------------------------------------------------