├── .gitattributes ├── README.md ├── lower.py ├── divide_dataset.py ├── DelNoneXml.py ├── rename.py ├── JPG与XML匹配.py ├── rename1.py ├── CreateXML.py ├── crop_detect.py ├── CountObject.py ├── ParseXML.py └── DelAnntation.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VOCxml_operator 2 | operate the xml files in the VOC dataset 3 | CountObject.py: 4 | #统计数据集中各类别个数 5 | CreateXML.py: 6 | #创建一个简单的xml文件 7 | crop_detect.py: 8 | #将数据集中的标注框从图片中剪裁出来 9 | DelAnntation.py: 10 | #删除或者修改数据集某一类的xml文件 11 | DelNoneXml.py: 12 | #删除数据集中空的xml文件 13 | divide_dataset.py: 14 | #按照VOC数据集Main文件夹下的txt文件将测试集图片分离出来 15 | JPG与XML匹配.py: 16 | #整理数据集,删除没有对应xml文件的图片 17 | lower.py: 18 | #将图片jpg文件后缀小写或者大写 19 | rename1.py: 20 | #将图片文件的名字重新编号 21 | ParaseXML.py: 22 | #解析数据集中全部xml文件 23 | -------------------------------------------------------------------------------- /lower.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import glob 3 | 4 | #将图片jpg文件后缀小写或者大写 5 | 6 | if __name__ == "__main__": 7 | realpath = os.path.realpath(__file__) 8 | dirname = os.path.dirname(realpath) 9 | extension = 'JPG' 10 | file_list = glob.glob('*.'+extension) 11 | for filename in file_list: 12 | #如果想要全部大写,改成lowerfilename = filename.upper() 13 | lowerfilename = filename.lower() 14 | filepath = os.path.join(dirname, filename) 15 | os.rename(filepath, lowerfilename) 16 | print(lowerfilename) -------------------------------------------------------------------------------- /divide_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | #按照VOC数据集Main文件夹下的txt文件将测试集图片分离出来 5 | test_path = '/home/omnisky/project/tf-faster-rcnn/data/VOCdevkit2007/VOC2007/ImageSets/Main/test.txt' 6 | anno_path = '/home/omnisky/project/tf-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/' 7 | new_path = '/home/omnisky/project/tf-faster-rcnn/data/VOCdevkit2007/VOC2007/testimages/' 8 | 9 | def _main(): 10 | fp = open(test_path, 'r') 11 | xml_list = fp.readlines() 12 | fp.close() 13 | i = 0 14 | for file in xml_list: 15 | xml_file = file.replace('\n', '') 16 | shutil.copyfile(anno_path + xml_file+'.jpg', new_path + xml_file+'.jpg') 17 | i =+ 1 18 | print xml_file+'.jpg' 19 | print i 20 | 21 | if __name__ == '__main__': 22 | _main() 23 | -------------------------------------------------------------------------------- /DelNoneXml.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from pylab import * 3 | import os 4 | import xml.etree.ElementTree as ET 5 | 6 | #删除数据集中空的xml文件 7 | anno_path = 'F:/数据集/20190301输电线路主要缺陷负样本扩建数据集/Annotations/' 8 | DELNUM = 0 9 | 10 | def _main(): 11 | filelist = os.listdir(anno_path) 12 | all_annotation = 0 13 | all_image = 0 14 | for file in filelist: 15 | _DelNoneAnnotation(file) 16 | print(DELNUM) 17 | 18 | def _DelNoneAnnotation(filepath): 19 | if os.path.exists(anno_path + filepath) == False: 20 | print(filepath+' :not found') 21 | tree = ET.parse(anno_path + filepath) 22 | num = 0 23 | for annoobject in tree.iter(): 24 | if 'object' in annoobject.tag: 25 | num += 1 26 | if num==0: 27 | os.remove(anno_path + filepath) 28 | print(filepath) 29 | global DELNUM 30 | DELNUM += 1 31 | 32 | if __name__ == '__main__': 33 | _main() 34 | -------------------------------------------------------------------------------- /rename.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import glob 3 | import re 4 | 5 | 6 | if __name__ == "__main__": 7 | realpath = os.path.realpath(__file__) 8 | dirname = os.path.dirname(realpath) 9 | extension = 'jpg' 10 | file_list = glob.glob('*.'+extension) 11 | #在文件夹下面建立defectname.txt文档记录原始图像名称信息 12 | filetxt = open(os.path.join(dirname, 'defectname.txt'), 'w', encoding='utf-8') 13 | for index, filename in enumerate(file_list): 14 | #修改数字为起始标号 15 | index = index + 4643 16 | str_index = str(index) 17 | length = len(str_index) 18 | for i in range(6-length): 19 | str_index = '0' + str_index 20 | filepath = os.path.join(dirname, filename) 21 | newfilename = os.path.join(str_index, filename) 22 | print("%s\n" % (newfilename), file=filetxt) 23 | print(str_index + '.jpg') 24 | os.rename(filepath, str_index + '.jpg') 25 | filetxt.close() -------------------------------------------------------------------------------- /JPG与XML匹配.py: -------------------------------------------------------------------------------- 1 | import os 2 | SourceDir = 'F:/张珂老师巡检图像/new gradingring/xml/' 3 | DestDir = 'F:/张珂老师巡检图像/new gradingring/images/' 4 | 5 | if __name__ == '__main__': 6 | all_fileName = os.listdir(SourceDir) 7 | k=0 8 | for fileName in all_fileName: 9 | suffix = fileName[-4:].lower() 10 | if suffix == ".xml": 11 | if not os.path.exists(os.path.join(DestDir, fileName[0:-4]+".jpg")): 12 | print(fileName) 13 | os.remove(os.path.join(SourceDir, fileName)) # 删除文件 14 | else:k=k+1 15 | print(str(k)) 16 | 17 | all_fileName = os.listdir(DestDir) 18 | k=0 19 | for fileName in all_fileName: 20 | suffix = fileName[-4:].lower() 21 | if suffix == ".jpg": 22 | if not os.path.exists(os.path.join(SourceDir, fileName[0:-4]+".xml")): 23 | print(fileName) 24 | os.remove(os.path.join(DestDir, fileName)) # 删除文件 25 | else:k=k+1 26 | print(str(k)) 27 | -------------------------------------------------------------------------------- /rename1.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import glob 3 | import re 4 | #将图片文件的名字重新编号 5 | #图像文件夹地址 6 | path = 'F:/张珂老师巡检图像/第二次缺陷图像 - 副本/' 7 | text_path = 'f:/' 8 | if __name__ == "__main__": 9 | '''realpath = os.path.realpath(__file__) 10 | dirname = os.path.dirname(realpath) 11 | extension = 'jpg' 12 | file_list = glob.glob('*.'+extension)''' 13 | file_list = os.listdir(path) 14 | #在文件夹下面建立defectname.txt文档记录原始图像名称信息 15 | filetxt = open(os.path.join(text_path, 'defectname.txt'), 'w', encoding='utf-8') 16 | for index, filename in enumerate(file_list): 17 | #修改数字为起始标号 18 | index = index + 7507 19 | str_index = str(index) 20 | # length = len(str_index) 21 | # for i in range(6-length): 22 | # str_index = '7507' + str_index 23 | filepath = os.path.join(path, filename) 24 | newfilename = os.path.join(str_index, filename) 25 | print("%s\n" % (newfilename), file=filetxt) 26 | print(str_index + '.jpg') 27 | os.rename(filepath, path + str_index + '.jpg') 28 | filetxt.close() -------------------------------------------------------------------------------- /CreateXML.py: -------------------------------------------------------------------------------- 1 | from xml.etree import ElementTree as ET 2 | 3 | def main(): 4 | _appendXML() 5 | 6 | def _createXML(): 7 | root = ET.Element('lab') 8 | person1 = ET.SubElement(root, 'person', {'name':'Brown'}) 9 | age1 = ET.SubElement(person1, 'age') 10 | age1.text = '21' 11 | gender1 = ET.SubElement(person1, 'gender') 12 | gender1.text = 'male' 13 | person2 = ET.SubElement(root, 'person', {'name':'Red'}) 14 | age2 = ET.SubElement(person2, 'age') 15 | age2.text = '23' 16 | gender2 = ET.SubElement(person2, 'gender') 17 | gender2.text = 'female' 18 | tree = ET.ElementTree(root) 19 | tree.write('G:/pythonStudy/xml/sample.xml', encoding="utf-8", xml_declaration=True) 20 | 21 | def _appendXML(): 22 | tree = ET.parse('G:/pythonStudy/xml/sample.xml') 23 | root = tree.getroot() 24 | person3 = ET.Element('person', {'name':'Brown'}) 25 | age3 = ET.SubElement(person3, 'age') 26 | age3.text = '20' 27 | gender3 = ET.SubElement(person3, 'gender') 28 | gender3.text = 'male' 29 | root.append(person3) 30 | tree.write('G:/pythonStudy/xml/sample.xml', encoding="utf-8", xml_declaration=True) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /crop_detect.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from pylab import * 3 | import os 4 | import xml.etree.ElementTree as ET 5 | 6 | #将数据集中的标注框从图片中剪裁出来 7 | ####anno_path存放xml文件,image_path存放未剪裁图片,crop_path存放剪裁后图片 8 | anno_path = 'C:/Users/91279/Desktop/何颖宣 19.2.20/金具/xml/' 9 | image_path = 'C:/Users/91279/Desktop/何颖宣 19.2.20/金具/images/' 10 | crop_path = 'C:/Users/91279/Desktop/何颖宣 19.2.20/金具/crop/' 11 | 12 | 13 | def _main(): 14 | filelist = os.listdir(anno_path) 15 | all_annotation = 0 16 | all_image = 0 17 | for file in filelist: 18 | num, annos = _ParseAnnotation(file) 19 | #annotation_num = _crop(num, annos, file) 20 | #all_annotation = all_annotation + annotation_num 21 | #all_image+=1 22 | i = 0 23 | for j in range(num): 24 | i += 1 25 | _crop(i, annos[j], file) 26 | print(file) 27 | all_image+=1 28 | print(all_image) 29 | 30 | def _ParseAnnotation(filepath): 31 | if os.path.exists(anno_path + filepath) == False: 32 | print(filepath+' :not found') 33 | tree = ET.parse(anno_path + filepath) 34 | annos = [None]*30 35 | num = 0 36 | for annoobject in tree.iter(): 37 | if 'object' in annoobject.tag: 38 | for element in list(annoobject): 39 | if 'name' in element.tag: 40 | name = element.text 41 | if 'bndbox' in element.tag: 42 | for size in list(element): 43 | if 'xmin' in size.tag: 44 | xmin = size.text 45 | if 'ymin' in size.tag: 46 | ymin = size.text 47 | if 'xmax' in size.tag: 48 | xmax = size.text 49 | if 'ymax' in size.tag: 50 | ymax = size.text 51 | annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)} 52 | #annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \ 53 | # 'xmax':xmax, 'ymax':ymax} 54 | num += 1 55 | return num, annos 56 | 57 | def _crop(num, annotation, file): 58 | filenum = os.path.splitext(file) 59 | filename = filenum[0] + '.jpg' 60 | if os.path.exists(image_path + filename) != True: 61 | print(filename + 'not found') 62 | return 63 | box = (annotation['xmin'], annotation['ymin'], annotation['xmax'], annotation['ymax']) 64 | pil_im = Image.open(image_path + filename) 65 | region = pil_im.crop(box) 66 | pil_region = Image.fromarray(uint8(region)) 67 | pil_region.save(crop_path + annotation['name']+filenum[0]+'_'+str(num)+'.jpg') 68 | 69 | if __name__ == '__main__': 70 | _main() 71 | -------------------------------------------------------------------------------- /CountObject.py: -------------------------------------------------------------------------------- 1 | import io 2 | import sys 3 | import os 4 | import xml.etree.ElementTree as ET 5 | #sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') 6 | ##修改下面的地址为你存放xml文件的位置,注意斜杠使用/,最后末尾需要加上/ 7 | anno_path = 'F:/张珂老师巡检图像/new gradingring/xml/' 8 | #anno_num = [0 for i in range(18)] 9 | #将下面改成你自己的VOC数据集类别 10 | class_name = ('insulator bunch-drop', 'insulator damage', 'insulator pollution', 11 | 'bjsb strands','shockproof hammer deformation', 'shockproof hammer intersection', 12 | 'cover slab losing', 'illegal construction', 'illegal constructing', 13 | 'grading ring damage', 'birdnest', 'foreign body','shielded ring corrosion', 14 | 'normal single insulator', 'normal grading ring', 'normal shockproof hammer', 15 | 'normal shielded ring', 'normal pre-twisted suspension clamp', 'normal single insulator2', 16 | 'normal grading ring2') 17 | def _main(): 18 | anno_num = [0]*30 19 | filelist = os.listdir(anno_path) 20 | for file in filelist: 21 | num, annos = _ParseAnnotation(file) 22 | anno_num = _Count(num, annos, anno_num) 23 | for j in range(len(class_name)): 24 | print(class_name[j]+ ': ' + str(anno_num[j])) 25 | 26 | def _ParseAnnotation(filepath): 27 | if os.path.exists(anno_path + filepath) == False: 28 | print(filepath+' :not found') 29 | tree = ET.parse(anno_path + filepath) 30 | annos = [None]*30 31 | num = 0 32 | for annoobject in tree.iter(): 33 | if 'object' in annoobject.tag: 34 | for element in list(annoobject): 35 | if 'name' in element.tag: 36 | name = element.text 37 | if 'bndbox' in element.tag: 38 | for size in list(element): 39 | if 'xmin' in size.tag: 40 | xmin = size.text 41 | if 'ymin' in size.tag: 42 | ymin = size.text 43 | if 'xmax' in size.tag: 44 | xmax = size.text 45 | if 'ymax' in size.tag: 46 | ymax = size.text 47 | annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)} 48 | #annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \ 49 | # 'xmax':xmax, 'ymax':ymax} 50 | num += 1 51 | return num, annos 52 | 53 | def _Count(num, annos, anno_num): 54 | for i in range(num): 55 | for j in range(len(class_name)): 56 | if annos[i]['name'] == class_name[j]: 57 | anno_num[j] += 1 58 | return anno_num 59 | 60 | if __name__ == '__main__': 61 | _main() 62 | -------------------------------------------------------------------------------- /ParseXML.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | #coding=utf-8 4 | #def filematch(file): 5 | import xml.etree.ElementTree as ET 6 | 7 | 8 | #anno_path = 'F:/数据集/luoshuan/Annos(包含正常螺栓和正常碗头)/' 9 | anno_path = 'G:/pythonStudy/xml/' 10 | def main(): 11 | filelist = os.listdir(anno_path) 12 | # for file in filelist: 13 | # a=re.findall(r'[0-9]{2}_[0-9]{6}', file) 14 | # if a !=None: 15 | # print(a) 16 | num, annos = _ParseAnnotation(filelist[1]) 17 | _CreateObjectAnnotation(filelist[0], annos) 18 | 19 | def _ParseAnnotation(filepath): 20 | if os.path.exists(anno_path + filepath) == False: 21 | print(filepath+' :not found') 22 | tree = ET.parse(anno_path + filepath) 23 | annos = [None]*10 24 | num = 0 25 | for annoobject in tree.iter(): 26 | if 'object' in annoobject.tag: 27 | for element in list(annoobject): 28 | if 'name' in element.tag: 29 | name = element.text 30 | print(name) 31 | if 'bndbox' in element.tag: 32 | for size in list(element): 33 | if 'xmin' in size.tag: 34 | xmin = size.text 35 | if 'ymin' in size.tag: 36 | ymin = size.text 37 | if 'xmax' in size.tag: 38 | xmax = size.text 39 | if 'ymax' in size.tag: 40 | ymax = size.text 41 | # annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)} 42 | annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \ 43 | 'xmax':xmax, 'ymax':ymax} 44 | num += 1 45 | for i in range(num): 46 | print(annos[i]) 47 | return num, annos 48 | 49 | def _CreateObjectAnnotation(filepath, annos): 50 | if os.path.exists(anno_path + filepath) == False: 51 | print('not found' + anno_path + filepath) 52 | tree = ET.parse(anno_path + filepath) 53 | root = tree.getroot() 54 | for annotation in annos: 55 | if annotation != None: 56 | _CreateElement(root, annotation) 57 | tree.write(anno_path + filepath, encoding='utf-8', xml_declaration=True) 58 | 59 | def _CreateElement(root, annotation): 60 | object1 = ET.Element('object') 61 | name = ET.SubElement(object1, 'name') 62 | name.text = annotation['name'] 63 | pose = ET.SubElement(object1, 'pose') 64 | pose.text = 'Unspecified' 65 | truncated = ET.SubElement(object1, 'truncated') 66 | truncated.text = '0' 67 | difficult = ET.SubElement(object1, 'difficult') 68 | difficult.text = '0' 69 | bndbox = ET.SubElement(object1, 'bndbox') 70 | xmin = ET.SubElement(bndbox, 'xmin') 71 | xmin.text = annotation['xmin'] 72 | ymin = ET.SubElement(bndbox, 'ymin') 73 | ymin.text = annotation['ymin'] 74 | xmax = ET.SubElement(bndbox, 'xmax') 75 | xmax.text = annotation['xmax'] 76 | ymax = ET.SubElement(bndbox, 'ymax') 77 | ymax.text = annotation['ymax'] 78 | root.append(object1) 79 | 80 | #def FileMatch: 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | 86 | -------------------------------------------------------------------------------- /DelAnntation.py: -------------------------------------------------------------------------------- 1 | import io 2 | import sys 3 | import os 4 | import xml.etree.ElementTree as ET 5 | #sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') 6 | #删除或者修改数据集某一类的xml文件 7 | ##修改下面的地址为你存放xml文件的位置,注意斜杠使用/,最后末尾需要加上/ 8 | #old_annotation是修要修改的标签名,new_annotation是修改后的标签名字 9 | anno_path = 'F:/数据集/20190301输电线路主要缺陷负样本扩建数据集/Annotations/' 10 | old_annotation = 'normal insulator' 11 | new_annotation = 'normal single insulator' 12 | del_annotations = ['strands defect'] 13 | #replace = True使用替换功能 14 | #replace = False使用删除功能 15 | REPLACE = True 16 | 17 | def _main(): 18 | filelist = os.listdir(anno_path) 19 | i = 0 20 | if REPLACE == True: 21 | for file in filelist: 22 | n_ = _Replace_Annotation(file) 23 | if n_ > 0: 24 | i += 1 25 | else: 26 | for file in filelist: 27 | n_ = _Del_Annotation(file) 28 | if n_ >0: 29 | i += 1 30 | print('the number of xmlfile is :' + str(i)) 31 | 32 | 33 | def _Replace_Annotation(filepath): 34 | if os.path.exists(anno_path + filepath) == False: 35 | print(filepath+' :not found') 36 | #建立xml树状结构 37 | i = 0 38 | while Replace_(filepath) == False: 39 | i += 1 40 | 41 | return i 42 | 43 | def Replace_(filepath): 44 | if os.path.exists(anno_path + filepath) == False: 45 | print(filepath+' :not found') 46 | #建立xml树状结构 47 | tree = ET.parse(anno_path + filepath) 48 | #遍历xml文件 查找'name' 49 | for annoobject in tree.iter(): 50 | if 'object' in annoobject.tag: 51 | for element in list(annoobject): 52 | if 'name' in element.tag: 53 | #替换标签 54 | if element.text == old_annotation: 55 | element.text = new_annotation 56 | print(filepath) 57 | #重新写入xml,使修改生效 58 | tree.write(anno_path+filepath, encoding="utf-8", xml_declaration=True) 59 | return False 60 | return True 61 | 62 | def _Del_Annotation(filepath): 63 | if os.path.exists(anno_path + filepath) == False: 64 | print(filepath+' :not found') 65 | #建立xml树状结构 66 | i = 0 67 | while Delete_(filepath) == False: 68 | i += 1 69 | return i 70 | 71 | def Delete_(filepath): 72 | if os.path.exists(anno_path + filepath) == False: 73 | print(filepath+' :not found') 74 | #建立xml树状结构 75 | tree = ET.parse(anno_path + filepath) 76 | #遍历xml文件 查找'name' 77 | root = tree.getroot() 78 | for annoobject in root.iter(): 79 | if 'object' in annoobject.tag: 80 | for element in list(annoobject): 81 | if 'name' in element.tag: 82 | #删除标签 83 | for anno in del_annotations: 84 | if element.text == anno: 85 | #从根节点下删除第一个子节点 86 | root.remove(annoobject) 87 | print(filepath) 88 | #重新写入xml,使修改生效 89 | tree = ET.ElementTree(root) 90 | tree.write(anno_path+filepath, encoding="utf-8", xml_declaration=True) 91 | return False 92 | return True 93 | 94 | if __name__ == '__main__': 95 | _main() 96 | --------------------------------------------------------------------------------