├── .gitattributes
├── README.md
├── lower.py
├── divide_dataset.py
├── DelNoneXml.py
├── rename.py
├── JPG与XML匹配.py
├── rename1.py
├── CreateXML.py
├── crop_detect.py
├── CountObject.py
├── ParseXML.py
└── DelAnntation.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VOCxml_operator
 2 | operate the xml files in the VOC dataset
 3 | CountObject.py:
 4 | #统计数据集中各类别个数
 5 | CreateXML.py:
 6 | #创建一个简单的xml文件
 7 | crop_detect.py:
 8 | #将数据集中的标注框从图片中剪裁出来
 9 | DelAnntation.py:
10 | #删除或者修改数据集某一类的xml文件
11 | DelNoneXml.py:
12 | #删除数据集中空的xml文件
13 | divide_dataset.py:
14 | #按照VOC数据集Main文件夹下的txt文件将测试集图片分离出来
15 | JPG与XML匹配.py:
16 | #整理数据集，删除没有对应xml文件的图片
17 | lower.py:
18 | #将图片jpg文件后缀小写或者大写
19 | rename1.py:
20 | #将图片文件的名字重新编号
21 | ParaseXML.py:
22 | #解析数据集中全部xml文件
23 | 


--------------------------------------------------------------------------------
/lower.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import glob
 3 | 
 4 | #将图片jpg文件后缀小写或者大写
 5 | 
 6 | if __name__ == "__main__":
 7 |     realpath = os.path.realpath(__file__)
 8 |     dirname = os.path.dirname(realpath)
 9 |     extension = 'JPG'
10 |     file_list = glob.glob('*.'+extension)
11 |     for filename in file_list:
12 |         #如果想要全部大写，改成lowerfilename = filename.upper()
13 |         lowerfilename = filename.lower()
14 |         filepath = os.path.join(dirname, filename)
15 |         os.rename(filepath, lowerfilename)
16 |         print(lowerfilename)


--------------------------------------------------------------------------------
/divide_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | #按照VOC数据集Main文件夹下的txt文件将测试集图片分离出来
 5 | test_path = '/home/omnisky/project/tf-faster-rcnn/data/VOCdevkit2007/VOC2007/ImageSets/Main/test.txt'
 6 | anno_path = '/home/omnisky/project/tf-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/'
 7 | new_path = '/home/omnisky/project/tf-faster-rcnn/data/VOCdevkit2007/VOC2007/testimages/'
 8 | 
 9 | def _main():
10 |     fp = open(test_path, 'r')
11 |     xml_list = fp.readlines()
12 |     fp.close()
13 |     i = 0
14 |     for file in xml_list:
15 |         xml_file = file.replace('\n', '')
16 |         shutil.copyfile(anno_path + xml_file+'.jpg', new_path + xml_file+'.jpg')
17 |         i =+ 1
18 |         print xml_file+'.jpg'
19 |     print i
20 | 
21 | if __name__ == '__main__':
22 |     _main()
23 | 


--------------------------------------------------------------------------------
/DelNoneXml.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | from pylab import *
 3 | import os
 4 | import xml.etree.ElementTree as ET
 5 | 
 6 | #删除数据集中空的xml文件
 7 | anno_path = 'F:/数据集/20190301输电线路主要缺陷负样本扩建数据集/Annotations/'
 8 | DELNUM = 0
 9 | 
10 | def _main():
11 |     filelist = os.listdir(anno_path)
12 |     all_annotation = 0
13 |     all_image = 0
14 |     for file in filelist:
15 |         _DelNoneAnnotation(file)
16 |     print(DELNUM)
17 | 
18 | def _DelNoneAnnotation(filepath):
19 |     if os.path.exists(anno_path + filepath) == False:
20 |         print(filepath+' :not found')
21 |     tree = ET.parse(anno_path + filepath)
22 |     num = 0
23 |     for annoobject in tree.iter():
24 |         if 'object' in annoobject.tag:
25 |            num += 1
26 |     if num==0:
27 |         os.remove(anno_path + filepath)
28 |         print(filepath)
29 |         global DELNUM
30 |         DELNUM += 1
31 | 
32 | if __name__ == '__main__':
33 |     _main()
34 | 


--------------------------------------------------------------------------------
/rename.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import glob
 3 | import re
 4 | 
 5 | 
 6 | if __name__ == "__main__":
 7 |     realpath = os.path.realpath(__file__)
 8 |     dirname = os.path.dirname(realpath)
 9 |     extension = 'jpg'
10 |     file_list = glob.glob('*.'+extension)
11 |     #在文件夹下面建立defectname.txt文档记录原始图像名称信息
12 |     filetxt = open(os.path.join(dirname, 'defectname.txt'), 'w', encoding='utf-8')
13 |     for index, filename in enumerate(file_list):
14 |         #修改数字为起始标号
15 |         index = index + 4643
16 |         str_index = str(index)
17 |         length = len(str_index)
18 |         for i in range(6-length):
19 |             str_index = '0' + str_index
20 |         filepath = os.path.join(dirname, filename)
21 |         newfilename = os.path.join(str_index, filename)
22 |         print("%s\n" % (newfilename), file=filetxt)
23 |         print(str_index + '.jpg')
24 |         os.rename(filepath, str_index + '.jpg')
25 |     filetxt.close()


--------------------------------------------------------------------------------
/JPG与XML匹配.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | SourceDir = 'F:/张珂老师巡检图像/new gradingring/xml/'
 3 | DestDir = 'F:/张珂老师巡检图像/new gradingring/images/'
 4 | 
 5 | if __name__ == '__main__':
 6 |     all_fileName = os.listdir(SourceDir)
 7 |     k=0
 8 |     for fileName in all_fileName:
 9 |         suffix = fileName[-4:].lower()
10 |         if suffix == ".xml":
11 |             if not os.path.exists(os.path.join(DestDir, fileName[0:-4]+".jpg")):
12 |                 print(fileName)
13 |                 os.remove(os.path.join(SourceDir, fileName))  # 删除文件
14 |             else:k=k+1
15 |     print(str(k))
16 | 
17 |     all_fileName = os.listdir(DestDir)
18 |     k=0
19 |     for fileName in all_fileName:
20 |         suffix = fileName[-4:].lower()
21 |         if suffix == ".jpg":
22 |             if not os.path.exists(os.path.join(SourceDir, fileName[0:-4]+".xml")):
23 |                 print(fileName)
24 |                 os.remove(os.path.join(DestDir, fileName))  # 删除文件
25 |             else:k=k+1
26 |     print(str(k))
27 | 


--------------------------------------------------------------------------------
/rename1.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import glob
 3 | import re
 4 | #将图片文件的名字重新编号
 5 | #图像文件夹地址
 6 | path = 'F:/张珂老师巡检图像/第二次缺陷图像 - 副本/'
 7 | text_path = 'f:/'
 8 | if __name__ == "__main__":
 9 |     '''realpath = os.path.realpath(__file__)
10 |     dirname = os.path.dirname(realpath)
11 |     extension = 'jpg'
12 |     file_list = glob.glob('*.'+extension)'''
13 |     file_list = os.listdir(path)
14 |     #在文件夹下面建立defectname.txt文档记录原始图像名称信息
15 |     filetxt = open(os.path.join(text_path, 'defectname.txt'), 'w', encoding='utf-8')
16 |     for index, filename in enumerate(file_list):
17 |         #修改数字为起始标号
18 |         index = index + 7507
19 |         str_index = str(index)
20 |        # length = len(str_index)
21 |        # for i in range(6-length):
22 |        #     str_index = '7507' + str_index
23 |         filepath = os.path.join(path, filename)
24 |         newfilename = os.path.join(str_index, filename)
25 |         print("%s\n" % (newfilename), file=filetxt)
26 |         print(str_index + '.jpg')
27 |         os.rename(filepath, path + str_index + '.jpg')
28 |     filetxt.close()


--------------------------------------------------------------------------------
/CreateXML.py:
--------------------------------------------------------------------------------
 1 | from xml.etree import ElementTree as ET
 2 | 
 3 | def main():
 4 |     _appendXML()
 5 | 
 6 | def _createXML():
 7 |     root = ET.Element('lab')
 8 |     person1 = ET.SubElement(root, 'person', {'name':'Brown'})
 9 |     age1 = ET.SubElement(person1, 'age')
10 |     age1.text = '21'
11 |     gender1 = ET.SubElement(person1, 'gender')
12 |     gender1.text = 'male'
13 |     person2 = ET.SubElement(root, 'person', {'name':'Red'})
14 |     age2 = ET.SubElement(person2, 'age')
15 |     age2.text = '23'
16 |     gender2 = ET.SubElement(person2, 'gender')
17 |     gender2.text = 'female'
18 |     tree = ET.ElementTree(root)
19 |     tree.write('G:/pythonStudy/xml/sample.xml', encoding="utf-8", xml_declaration=True)
20 | 
21 | def _appendXML():
22 |     tree = ET.parse('G:/pythonStudy/xml/sample.xml')
23 |     root = tree.getroot()
24 |     person3 = ET.Element('person', {'name':'Brown'})
25 |     age3 = ET.SubElement(person3, 'age')
26 |     age3.text = '20'
27 |     gender3 = ET.SubElement(person3, 'gender')
28 |     gender3.text = 'male'
29 |     root.append(person3)
30 |     tree.write('G:/pythonStudy/xml/sample.xml', encoding="utf-8", xml_declaration=True)
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/crop_detect.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | from pylab import *
 3 | import os
 4 | import xml.etree.ElementTree as ET
 5 | 
 6 | #将数据集中的标注框从图片中剪裁出来
 7 | ####anno_path存放xml文件，image_path存放未剪裁图片，crop_path存放剪裁后图片
 8 | anno_path = 'C:/Users/91279/Desktop/何颖宣 19.2.20/金具/xml/'
 9 | image_path = 'C:/Users/91279/Desktop/何颖宣 19.2.20/金具/images/'
10 | crop_path = 'C:/Users/91279/Desktop/何颖宣 19.2.20/金具/crop/'
11 | 
12 | 
13 | def _main():
14 |     filelist = os.listdir(anno_path)
15 |     all_annotation = 0
16 |     all_image = 0
17 |     for file in filelist:
18 |         num, annos = _ParseAnnotation(file)
19 |         #annotation_num = _crop(num, annos, file)
20 |         #all_annotation = all_annotation + annotation_num
21 |         #all_image+=1
22 |         i = 0
23 |         for j in range(num):
24 |             i += 1
25 |             _crop(i, annos[j], file)
26 |             print(file)
27 |         all_image+=1
28 |     print(all_image)
29 | 
30 | def _ParseAnnotation(filepath):
31 |     if os.path.exists(anno_path + filepath) == False:
32 |         print(filepath+' :not found')
33 |     tree = ET.parse(anno_path + filepath)
34 |     annos = [None]*30
35 |     num = 0 
36 |     for annoobject in tree.iter():
37 |         if 'object' in annoobject.tag:
38 |             for element in list(annoobject):
39 |                 if 'name' in element.tag:
40 |                     name = element.text 
41 |                 if 'bndbox' in element.tag:
42 |                     for size in list(element):
43 |                         if 'xmin' in size.tag:
44 |                             xmin = size.text
45 |                         if 'ymin' in size.tag:
46 |                             ymin = size.text
47 |                         if 'xmax' in size.tag:
48 |                             xmax = size.text
49 |                         if 'ymax' in size.tag:
50 |                             ymax = size.text
51 |                             annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)}
52 |                             #annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \
53 |                              #           'xmax':xmax, 'ymax':ymax}                     
54 |                             num += 1
55 |     return num, annos
56 | 
57 | def _crop(num, annotation, file):
58 |     filenum = os.path.splitext(file)
59 |     filename = filenum[0] + '.jpg'
60 |     if os.path.exists(image_path + filename) != True:
61 |         print(filename + 'not found')
62 |         return
63 |     box = (annotation['xmin'], annotation['ymin'], annotation['xmax'], annotation['ymax'])
64 |     pil_im = Image.open(image_path + filename)
65 |     region = pil_im.crop(box)
66 |     pil_region = Image.fromarray(uint8(region))
67 |     pil_region.save(crop_path + annotation['name']+filenum[0]+'_'+str(num)+'.jpg')
68 | 
69 | if __name__ == '__main__':
70 |     _main()
71 | 


--------------------------------------------------------------------------------
/CountObject.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import sys
 3 | import os
 4 | import xml.etree.ElementTree as ET
 5 | #sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
 6 | ##修改下面的地址为你存放xml文件的位置，注意斜杠使用/,最后末尾需要加上/
 7 | anno_path = 'F:/张珂老师巡检图像/new gradingring/xml/'
 8 | #anno_num = [0 for i in range(18)]
 9 | #将下面改成你自己的VOC数据集类别
10 | class_name = ('insulator bunch-drop', 'insulator damage', 'insulator pollution', 
11 |                   'bjsb strands','shockproof hammer deformation', 'shockproof hammer intersection', 
12 |                   'cover slab losing', 'illegal construction', 'illegal constructing', 
13 |                   'grading ring damage', 'birdnest', 'foreign body','shielded ring corrosion',
14 |                   'normal single insulator', 'normal grading ring', 'normal shockproof hammer', 
15 |                   'normal shielded ring', 'normal pre-twisted suspension clamp', 'normal single insulator2', 
16 |                   'normal grading ring2')
17 | def _main():
18 |     anno_num = [0]*30
19 |     filelist = os.listdir(anno_path)
20 |     for file in filelist:
21 |         num, annos = _ParseAnnotation(file)
22 |         anno_num = _Count(num, annos, anno_num)
23 |     for j in range(len(class_name)):
24 |         print(class_name[j]+ ': ' + str(anno_num[j]))
25 | 
26 | def _ParseAnnotation(filepath):
27 |     if os.path.exists(anno_path + filepath) == False:
28 |         print(filepath+' :not found')
29 |     tree = ET.parse(anno_path + filepath)
30 |     annos = [None]*30
31 |     num = 0 
32 |     for annoobject in tree.iter():
33 |         if 'object' in annoobject.tag:
34 |             for element in list(annoobject):
35 |                 if 'name' in element.tag:
36 |                     name = element.text 
37 |                 if 'bndbox' in element.tag:
38 |                     for size in list(element):
39 |                         if 'xmin' in size.tag:
40 |                             xmin = size.text
41 |                         if 'ymin' in size.tag:
42 |                             ymin = size.text
43 |                         if 'xmax' in size.tag:
44 |                             xmax = size.text
45 |                         if 'ymax' in size.tag:
46 |                             ymax = size.text
47 |                             annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)}
48 |                             #annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \
49 |                              #           'xmax':xmax, 'ymax':ymax}                     
50 |                             num += 1
51 |     return num, annos
52 | 
53 | def _Count(num, annos, anno_num):
54 |     for i in range(num):
55 |         for j in range(len(class_name)):
56 |             if annos[i]['name'] == class_name[j]:
57 |                 anno_num[j] += 1
58 |     return anno_num
59 | 
60 | if __name__ == '__main__':
61 |     _main()
62 | 


--------------------------------------------------------------------------------
/ParseXML.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | #coding=utf-8
 4 | #def filematch(file):
 5 | import xml.etree.ElementTree as ET
 6 | 
 7 | 
 8 | #anno_path = 'F:/数据集/luoshuan/Annos（包含正常螺栓和正常碗头）/'
 9 | anno_path = 'G:/pythonStudy/xml/'
10 | def main():
11 |     filelist = os.listdir(anno_path)
12 |   #  for file in filelist:
13 |    #     a=re.findall(r'[0-9]{2}_[0-9]{6}', file)
14 |    #    if a !=None:
15 |    #         print(a)
16 |     num, annos = _ParseAnnotation(filelist[1])
17 |     _CreateObjectAnnotation(filelist[0], annos)
18 | 
19 | def _ParseAnnotation(filepath):
20 |     if os.path.exists(anno_path + filepath) == False:
21 |         print(filepath+' :not found')
22 |     tree = ET.parse(anno_path + filepath)
23 |     annos = [None]*10
24 |     num = 0 
25 |     for annoobject in tree.iter():
26 |         if 'object' in annoobject.tag:
27 |             for element in list(annoobject):
28 |                 if 'name' in element.tag:
29 |                     name = element.text
30 |                     print(name)
31 |                 if 'bndbox' in element.tag:
32 |                     for size in list(element):
33 |                         if 'xmin' in size.tag:
34 |                             xmin = size.text
35 |                         if 'ymin' in size.tag:
36 |                             ymin = size.text
37 |                         if 'xmax' in size.tag:
38 |                             xmax = size.text
39 |                         if 'ymax' in size.tag:
40 |                             ymax = size.text
41 |                      #       annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)}
42 |                             annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \
43 |                                         'xmax':xmax, 'ymax':ymax}                     
44 |                             num += 1
45 |     for i in range(num):
46 |         print(annos[i])
47 |     return num, annos
48 | 
49 | def _CreateObjectAnnotation(filepath, annos):
50 |     if os.path.exists(anno_path + filepath) == False:
51 |         print('not found' + anno_path + filepath)
52 |     tree = ET.parse(anno_path + filepath)
53 |     root = tree.getroot()
54 |     for annotation in annos:
55 |         if annotation != None:
56 |             _CreateElement(root, annotation)  
57 |     tree.write(anno_path + filepath, encoding='utf-8', xml_declaration=True)
58 | 
59 | def _CreateElement(root, annotation):
60 |     object1 = ET.Element('object')
61 |     name = ET.SubElement(object1, 'name')
62 |     name.text = annotation['name']
63 |     pose = ET.SubElement(object1, 'pose')
64 |     pose.text = 'Unspecified'
65 |     truncated = ET.SubElement(object1, 'truncated')
66 |     truncated.text = '0'
67 |     difficult = ET.SubElement(object1, 'difficult')
68 |     difficult.text = '0'
69 |     bndbox = ET.SubElement(object1, 'bndbox')
70 |     xmin = ET.SubElement(bndbox, 'xmin')
71 |     xmin.text = annotation['xmin']
72 |     ymin = ET.SubElement(bndbox, 'ymin')
73 |     ymin.text = annotation['ymin']
74 |     xmax = ET.SubElement(bndbox, 'xmax')
75 |     xmax.text = annotation['xmax']
76 |     ymax = ET.SubElement(bndbox, 'ymax')
77 |     ymax.text = annotation['ymax']
78 |     root.append(object1)
79 | 
80 | #def FileMatch:
81 |     
82 |     
83 | if __name__ == "__main__":
84 |     main()
85 | 
86 | 


--------------------------------------------------------------------------------
/DelAnntation.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import sys
 3 | import os
 4 | import xml.etree.ElementTree as ET
 5 | #sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
 6 | #删除或者修改数据集某一类的xml文件
 7 | ##修改下面的地址为你存放xml文件的位置，注意斜杠使用/,最后末尾需要加上/
 8 | #old_annotation是修要修改的标签名，new_annotation是修改后的标签名字
 9 | anno_path = 'F:/数据集/20190301输电线路主要缺陷负样本扩建数据集/Annotations/'
10 | old_annotation = 'normal insulator'
11 | new_annotation = 'normal single insulator'
12 | del_annotations = ['strands defect']
13 | #replace = True使用替换功能
14 | #replace = False使用删除功能
15 | REPLACE = True
16 | 
17 | def _main():
18 |     filelist = os.listdir(anno_path)
19 |     i = 0
20 |     if REPLACE == True:
21 |         for file in filelist:
22 |             n_ = _Replace_Annotation(file)
23 |             if n_ > 0:
24 |                 i += 1
25 |     else:
26 |         for file in filelist:
27 |             n_ = _Del_Annotation(file)
28 |             if n_ >0:
29 |                 i += 1
30 |     print('the number of xmlfile is :' + str(i))
31 | 
32 | 
33 | def _Replace_Annotation(filepath):
34 |     if os.path.exists(anno_path + filepath) == False:
35 |         print(filepath+' :not found')
36 |     #建立xml树状结构
37 |     i = 0
38 |     while Replace_(filepath) == False:
39 |         i += 1
40 | 
41 |     return i
42 | 
43 | def Replace_(filepath):
44 |     if os.path.exists(anno_path + filepath) == False:
45 |         print(filepath+' :not found')
46 |     #建立xml树状结构
47 |     tree = ET.parse(anno_path + filepath)
48 |     #遍历xml文件 查找'name'
49 |     for annoobject in tree.iter():
50 |         if 'object' in annoobject.tag:
51 |             for element in list(annoobject):
52 |                 if 'name' in element.tag:
53 |                     #替换标签
54 |                     if element.text == old_annotation:
55 |                         element.text = new_annotation
56 |                         print(filepath)
57 |                         #重新写入xml，使修改生效
58 |                         tree.write(anno_path+filepath, encoding="utf-8", xml_declaration=True)
59 |                         return False
60 |     return True
61 | 
62 | def _Del_Annotation(filepath):
63 |     if os.path.exists(anno_path + filepath) == False:
64 |         print(filepath+' :not found')
65 |     #建立xml树状结构
66 |     i = 0
67 |     while Delete_(filepath) == False:
68 |         i += 1
69 |     return i
70 |     
71 | def Delete_(filepath):
72 |     if os.path.exists(anno_path + filepath) == False:
73 |         print(filepath+' :not found')
74 |     #建立xml树状结构
75 |     tree = ET.parse(anno_path + filepath)
76 |     #遍历xml文件 查找'name'
77 |     root = tree.getroot()
78 |     for annoobject in root.iter():
79 |         if 'object' in annoobject.tag:
80 |             for element in list(annoobject):
81 |                 if 'name' in element.tag:
82 |                     #删除标签
83 |                     for anno in del_annotations:
84 |                         if element.text == anno:
85 |                             #从根节点下删除第一个子节点
86 |                             root.remove(annoobject)
87 |                             print(filepath)
88 |                             #重新写入xml，使修改生效
89 |                             tree = ET.ElementTree(root)
90 |                             tree.write(anno_path+filepath, encoding="utf-8", xml_declaration=True)
91 |                             return False
92 |     return True
93 | 
94 | if __name__ == '__main__':
95 |     _main()
96 | 


--------------------------------------------------------------------------------