├── convert_YOLO ├── sample │ ├── yolo │ │ ├── AMZ01866__41hHQC--VqL_yolo.txt │ │ └── AMZ00003__51_2BvBYyPLtL_yolo.txt │ └── Annotations │ │ ├── AMZ01866__41hHQC--VqL.xml │ │ └── AMZ00003__51_2BvBYyPLtL.xml ├── README.md └── convert_YOLO.py ├── convert_VOC ├── voc2txt.py └── README.md ├── convert_VOC2COCO ├── README.md └── convert_VOC2COCO.py └── README.md /convert_YOLO/sample/yolo/AMZ01866__41hHQC--VqL_yolo.txt: -------------------------------------------------------------------------------- 1 | 4 0.47750000000000004 0.462 0.675 0.654 2 | -------------------------------------------------------------------------------- /convert_YOLO/sample/yolo/AMZ00003__51_2BvBYyPLtL_yolo.txt: -------------------------------------------------------------------------------- 1 | 2 0.4254166666666667 0.6704166666666667 0.6325000000000001 0.4658333333333334 2 | 2 0.5504166666666667 0.4475 0.3125 0.745 3 | -------------------------------------------------------------------------------- /convert_YOLO/sample/Annotations/AMZ01866__41hHQC--VqL.xml: -------------------------------------------------------------------------------- 1 | 2 | Nehal_AmazonSet2_2611 3 | AMZ01866__41hHQC--VqL.jpg 4 | C:\Users\Nehal.Gupta\Desktop\Nehal_AmazonSet2_2611\AMZ01866__41hHQC--VqL.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1000 10 | 1000 11 | 3 12 | 13 | 0 14 | 15 | necklace 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 141 21 | 136 22 | 816 23 | 790 24 | 25 | 26 | -------------------------------------------------------------------------------- /convert_YOLO/sample/Annotations/AMZ00003__51_2BvBYyPLtL.xml: -------------------------------------------------------------------------------- 1 | 2 | captured 3 | AMZ00003__51_2BvBYyPLtL.jpg 4 | C:\Users\TAM\Desktop\blackstraw\Amazon\captured\AMZ00003__51_2BvBYyPLtL.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1200 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | earring 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 132 21 | 526 22 | 891 23 | 1085 24 | 25 | 26 | 27 | earring 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 474 33 | 91 34 | 849 35 | 985 36 | 37 | 38 | -------------------------------------------------------------------------------- /convert_VOC/voc2txt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | from pathlib import Path 4 | import json 5 | from tqdm import tqdm 6 | import xmltodict 7 | 8 | 9 | # File paths of xml location, txt save location 10 | 11 | xml = Path(r"D:\praser\ann") 12 | xmls = os.listdir(xml) 13 | xmls = [f for f in xmls if f.split(".")[-1] == "xml"] 14 | 15 | txt_path = Path(r"D:\praser\txt") 16 | 17 | 18 | # code to convert xml to txt (for GroundTruth files only, add confidence score for detections) 19 | 20 | for xl in xmls: 21 | # xl = "abc.xml" 22 | file_id = xl 23 | txt_data = [] 24 | with open(os.path.join(xml, file_id), "rb") as f: 25 | xml_data = xmltodict.parse(f, xml_attribs=False) 26 | 27 | try: 28 | if isinstance(xml_data["annotation"]["object"], list): 29 | for box in xml_data["annotation"]["object"]: 30 | cls_name = box['name'] 31 | coord = box["bndbox"] 32 | txt_data.append(cls_name + " " + coord["xmin"] + " " + coord["ymin"] + " " + coord["xmax"] + " " + coord["ymax"]) 33 | else: 34 | box = xml_data["annotation"]["object"] 35 | cls_name = box['name'] 36 | coord = box["bndbox"] 37 | txt_data.append(cls_name + " " + coord["xmin"] + " " + coord["ymin"] + " " + coord["xmax"] + " " + coord["ymax"]) 38 | 39 | with open(os.path.join(txt_path, file_id.rstrip('.xml') + '.txt'), "w") as file: 40 | file.write("\n".join(txt_data)) 41 | 42 | except KeyError: 43 | print('Object details missing in file: %s' % file_id) 44 | -------------------------------------------------------------------------------- /convert_VOC/README.md: -------------------------------------------------------------------------------- 1 | # VOC_to_TXT 2 | 3 | Python script to convert PASCAL VOC annotation xml's to .txt files 4 | 5 | ### Eg: 6 | 7 | #### VOC format 8 | 9 | ``` 10 | 11 | images 12 | maksssksksss0.png 13 | 14 | 512 15 | 366 16 | 3 17 | 18 | 0 19 | 20 | without_mask 21 | Unspecified 22 | 0 23 | 0 24 | 0 25 | 26 | 79 27 | 105 28 | 109 29 | 142 30 | 31 | 32 | 33 | with_mask 34 | Unspecified 35 | 0 36 | 0 37 | 0 38 | 39 | 185 40 | 100 41 | 226 42 | 144 43 | 44 | 45 | 46 | ``` 47 | 48 | Above is the annotation file which is `.xml` (This is known as Pascal VOC format) 49 | 50 | #### .txt file 51 | 52 | ``` 53 | without_mask 79 105 109 142 54 | with_mask 185 100 226 144 55 | ``` 56 | 57 | ## To use this script 58 | 59 | - Change the Annotation file path 60 | - Specify the path to save the txt file 61 | - Run the below command 62 | 63 | ``` 64 | python voc2txt.py 65 | ``` 66 | 67 | Note: This script is for `Groundtruth` files only. For `Detection` add the confidence score in the script. 68 | -------------------------------------------------------------------------------- /convert_YOLO/README.md: -------------------------------------------------------------------------------- 1 | # Convert VOC to YOLO 2 | 3 | In YOLO labeling format, a . txt file with the same name is created for each image file in the same directory. Each . txt file contains the annotations for the corresponding image file, that is object class, object coordinates, height and width. 4 | 5 | ### VOC 6 | 7 | The annotation format originally created for the Visual Object Challenge (VOC) has become a common interchange format for object detection labels. 8 | It's well-specified and can be exported from many labeling tools including CVAT, VoTT, and RectLabel. Pascal VOC format is normally a .xml file. VOC format is as below. 9 | 10 | ``` 11 | 12 | Annotations 13 | AMZ01866__41hHQC--VqL.jpg 14 | C:\Users\Nehal.Gupta\Desktop\Nehal_AmazonSet2_2611\AMZ01866__41hHQC--VqL.jpg 15 | 16 | Unknown 17 | 18 | 19 | 1000 20 | 1000 21 | 3 22 | 23 | 0 24 | 25 | necklace 26 | Unspecified 27 | 0 28 | 0 29 | 30 | 141 31 | 136 32 | 816 33 | 790 34 | 35 | 36 | 37 | ``` 38 | 39 | ### YOLO 40 | 41 | Convert the above shown VOC format to YOLO. 42 | 43 | ``` 44 | 45 | ``` 46 | `.txt` YOLO file for above mentioned VOC. 47 | 48 | ``` 49 | 4 0.47750000000000004 0.462 0.675 0.654 50 | ``` 51 | 52 | ## How to use? 53 | 54 | - Add the file paths of Annootation xml files and output files in the script 55 | - Run the below command 56 | 57 | ``` 58 | python convert_YOLO.py 59 | ``` 60 | 61 | - Output `.txt` file will be present in the specified output folder 62 | -------------------------------------------------------------------------------- /convert_YOLO/convert_YOLO.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | from pathlib import Path 4 | import xmltodict 5 | 6 | # Specify classes present in the xmls 7 | ## Note: If you want to directly get classes without specifiying them, 8 | ## Comment the below line and directly give the class name in txt file 9 | classes = ['braclet', 'ring', 'earring', 'watch', 'necklace'] 10 | 11 | 12 | def convert(size, box): 13 | dw = 1./(size[0]) 14 | dh = 1./(size[1]) 15 | x = (box[0] + box[1])/2.0 - 1 16 | y = (box[2] + box[3])/2.0 - 1 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x*dw 20 | w = w*dw 21 | y = y*dh 22 | h = h*dh 23 | return (x, y, w, h) 24 | 25 | 26 | def convert_annotation(file, output_path): 27 | 28 | filename = file.split('\\')[-1] 29 | txt_data = [] 30 | with open(file, 'rb') as in_file: 31 | xml_data = xmltodict.parse(in_file, xml_attribs=False) 32 | 33 | size = xml_data['annotation']['size'] 34 | w = int(size['width']) 35 | h = int(size['height']) 36 | 37 | if isinstance(xml_data["annotation"]["object"], list): 38 | for obj in xml_data["annotation"]["object"]: 39 | difficult = obj['difficult'] 40 | cls = obj['name'] 41 | if cls not in classes or int(difficult) == 1: 42 | continue 43 | 44 | ## If you want the class name in txt file change the below statement 45 | cls_id = classes.index(cls) # cls_id = cls 46 | xmlbox = obj['bndbox'] 47 | b = (float(xmlbox['xmin']), float(xmlbox['xmax']), float(xmlbox['ymin']), 48 | float(xmlbox['ymax'])) 49 | bb = convert((w, h), b) 50 | txt_data.append(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 51 | else: 52 | obj = xml_data["annotation"]["object"] 53 | difficult = obj['difficult'] 54 | cls = obj['name'] 55 | if cls not in classes or int(difficult) == 1: 56 | print('Class not found %s' % cls) 57 | 58 | ## If you want the class name in txt file change the below statement 59 | cls_id = classes.index(cls) # cls_id = cls 60 | xmlbox = obj['bndbox'] 61 | b = (float(xmlbox['xmin']), float(xmlbox['xmax']), float(xmlbox['ymin']), 62 | float(xmlbox['ymax'])) 63 | bb = convert((w, h), b) 64 | txt_data.append(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 65 | 66 | with open(os.path.join(output_path, filename.rstrip('.xml') + '_yolo.txt'), 'w') as out_file: 67 | out_file.write("".join(txt_data)) 68 | 69 | 70 | # File location of xmls and output files 71 | full_dir_path = Path(r'D:\Vf\convert_YOLO\sample\Annotations') 72 | output_path = Path(r'D:\Vf\convert_YOLO\sample\yolo') 73 | 74 | # List the files in the directory 75 | list_dir = os.listdir(full_dir_path) 76 | 77 | if not os.path.exists(output_path): 78 | os.makedirs(output_path) 79 | 80 | for file in list_dir: 81 | convert_annotation(os.path.join(full_dir_path, file), output_path) 82 | 83 | print("Finished processing") 84 | -------------------------------------------------------------------------------- /convert_VOC2COCO/README.md: -------------------------------------------------------------------------------- 1 | # Convert VOC to COCO 2 | 3 | ### Pascal VOC 4 | 5 | The annotation format originally created for the Visual Object Challenge (VOC) has become a common interchange format for object detection labels. It's well-specified and can be exported from many labeling tools including CVAT, VoTT, and RectLabel. 6 | Pascal VOC format is normally a `.xml` file. VOC format is as below. 7 | 8 | ``` 9 | 10 | images 11 | maksssksksss0.png 12 | 13 | 512 14 | 366 15 | 3 16 | 17 | 0 18 | 19 | without_mask 20 | Unspecified 21 | 0 22 | 0 23 | 0 24 | 25 | 79 26 | 105 27 | 109 28 | 142 29 | 30 | 31 | 32 | with_mask 33 | Unspecified 34 | 0 35 | 0 36 | 0 37 | 38 | 185 39 | 100 40 | 226 41 | 144 42 | 43 | 44 | 45 | ``` 46 | 47 | ### COCO 48 | 49 | The COCO bounding box format is [top left x position, top left y position, width, height]. The category id corresponds to a single category specified in the categories section. Each annotation also has an id (unique to all other annotations in the dataset). 50 | COCO format is usually a `.json` file. 51 | 52 | ``` 53 | { 54 | "images": [ 55 | { 56 | "file_name": "maksssksksss0.png", 57 | "height": 512, 58 | "width": 366, 59 | "id": "maksssksksss0" 60 | } 61 | ], 62 | "type": "instances", 63 | "annotations": [ 64 | { 65 | "area": 154192, 66 | "iscrowd": 0, 67 | "image_id": "maksssksksss0", 68 | "bbox": [ 69 | 79, 70 | 105, 71 | 109, 72 | 142 73 | ], 74 | "category_id": 0, 75 | "id": 1, 76 | "ignore": 0, 77 | "segmentation": [] 78 | }, 79 | { 80 | "area": 79530, 81 | "iscrowd": 0, 82 | "image_id": "maksssksksss0", 83 | "bbox": [ 84 | 185, 85 | 100, 86 | 226, 87 | 144 88 | ], 89 | "category_id": 1, 90 | "id": 2, 91 | "ignore": 0, 92 | "segmentation": [] 93 | } 94 | ], 95 | "categories": [ 96 | { 97 | "supercategory": "none", 98 | "id": 0, 99 | "name": "without_mask" 100 | }, 101 | { 102 | "supercategory": "none", 103 | "id": 1, 104 | "name": "with_mask" 105 | } 106 | ] 107 | } 108 | ``` 109 | 110 | ## How to run? 111 | 112 | - pip install lxml 113 | - Run the following command 114 | 115 | `python convert_VOC2COCO.py xml_list.txt ../Annotations ../output.json` 116 | 117 | - **xml_list.txt** is the `.txt` file which contains all the `.xml` file names in it. 118 | 119 | ``` 120 | maksssksksss0.xml 121 | maksssksksss1.xml 122 | maksssksksss2.xml 123 | ``` 124 | 125 | - **../Annotations** is the path where the `.xml` files are present. 126 | - **../output.json** is the name of the output `.json` file in `COCO` format with complete path. 127 | 128 | -------------------------------------------------------------------------------- /convert_VOC2COCO/convert_VOC2COCO.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import json 4 | import xml.etree.ElementTree as ET 5 | 6 | 7 | # If necessary, pre-define category and its id 8 | START_BOUNDING_BOX_ID = 1 9 | PRE_DEFINE_CATEGORIES = {'cat': 0, 'dog': 1, 'rabbit': 2, 'tiger': 3, 'lion': 4} 10 | 11 | 12 | def get(root, name): 13 | vars = root.findall(name) 14 | return vars 15 | 16 | 17 | def get_and_check(root, name, length): 18 | vars = root.findall(name) 19 | if len(vars) == 0: 20 | raise NotImplementedError('Can not find %s in %s.'%(name, root.tag)) 21 | if length > 0 and len(vars) != length: 22 | raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars))) 23 | if length == 1: 24 | vars = vars[0] 25 | return vars 26 | 27 | 28 | def get_filename_as_int(filename): 29 | try: 30 | filename = os.path.splitext(filename)[0] 31 | return filename 32 | except: 33 | raise NotImplementedError('Filename %s is supposed to be an integer.'%(filename)) 34 | 35 | 36 | def convert(xml_list, xml_dir, json_file): 37 | list_fp = open(xml_list, 'r') 38 | json_dict = {"images":[], "type": "instances", "annotations": [], 39 | "categories": []} 40 | categories = PRE_DEFINE_CATEGORIES 41 | bnd_id = START_BOUNDING_BOX_ID 42 | for line in list_fp: 43 | line = line.strip() 44 | print("Processing %s"%(line)) 45 | xml_f = os.path.join(xml_dir, line) 46 | tree = ET.parse(xml_f) 47 | root = tree.getroot() 48 | path = get(root, 'path') 49 | if len(path) == 1: 50 | filename = os.path.basename(path[0].text) 51 | elif len(path) == 0: 52 | filename = get_and_check(root, 'filename', 1).text 53 | else: 54 | raise NotImplementedError('%d paths found in %s'%(len(path), line)) 55 | ## The filename must be a number 56 | image_id = get_filename_as_int(filename) 57 | size = get_and_check(root, 'size', 1) 58 | width = int(get_and_check(size, 'width', 1).text) 59 | height = int(get_and_check(size, 'height', 1).text) 60 | image = {'file_name': filename, 'height': height, 'width': width, 61 | 'id':image_id} 62 | json_dict['images'].append(image) 63 | ## Cruuently we do not support segmentation 64 | # segmented = get_and_check(root, 'segmented', 1).text 65 | # assert segmented == '0' 66 | for obj in get(root, 'object'): 67 | category = get_and_check(obj, 'name', 1).text 68 | if category not in categories: 69 | new_id = len(categories) 70 | categories[category] = new_id 71 | category_id = categories[category] 72 | bndbox = get_and_check(obj, 'bndbox', 1) 73 | xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1 74 | ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1 75 | xmax = int(get_and_check(bndbox, 'xmax', 1).text) 76 | ymax = int(get_and_check(bndbox, 'ymax', 1).text) 77 | assert(xmax > xmin) 78 | assert(ymax > ymin) 79 | o_width = abs(xmax - xmin) 80 | o_height = abs(ymax - ymin) 81 | ann = {'area': o_width*o_height, 'iscrowd': 0, 'image_id': 82 | image_id, 'bbox':[xmin, ymin, o_width, o_height], 83 | 'category_id': category_id, 'id': bnd_id, 'ignore': 0, 84 | 'segmentation': []} 85 | json_dict['annotations'].append(ann) 86 | bnd_id = bnd_id + 1 87 | 88 | for cate, cid in categories.items(): 89 | cat = {'supercategory': 'none', 'id': cid, 'name': cate} 90 | json_dict['categories'].append(cat) 91 | json_fp = open(json_file, 'w') 92 | json_str = json.dumps(json_dict) 93 | json_fp.write(json_str) 94 | json_fp.close() 95 | list_fp.close() 96 | 97 | 98 | if __name__ == '__main__': 99 | if len(sys.argv) <= 1: 100 | print('3 auguments are need.') 101 | print('Usage: %s XML_LIST.txt XML_DIR OUTPU_JSON.json'%(sys.argv[0])) 102 | exit(1) 103 | 104 | convert(sys.argv[1], sys.argv[2], sys.argv[3]) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Convert_VOC_COCO_YOLO 2 | 3 | One of the important step in Object detection is to convert the Annotation file to corresponding format based on the Object detection model used. 4 | 5 | ### Different types of Annotation formats 6 | 7 | - Pascal VOC 8 | - COCO 9 | - YOLO 10 | 11 | ### Pascal VOC 12 | 13 | The annotation format originally created for the Visual Object Challenge (VOC) has become a common interchange format for object detection labels. It's well-specified and can be exported from many labeling tools including CVAT, VoTT, and RectLabel. 14 | Pascal VOC format is normally a `.xml` file. VOC format is as below. 15 | 16 | ``` 17 | 18 | images 19 | maksssksksss0.png 20 | 21 | 512 22 | 366 23 | 3 24 | 25 | 0 26 | 27 | without_mask 28 | Unspecified 29 | 0 30 | 0 31 | 0 32 | 33 | 79 34 | 105 35 | 109 36 | 142 37 | 38 | 39 | 40 | with_mask 41 | Unspecified 42 | 0 43 | 0 44 | 0 45 | 46 | 185 47 | 100 48 | 226 49 | 144 50 | 51 | 52 | 53 | ``` 54 | 55 | ### COCO 56 | 57 | The COCO bounding box format is [top left x position, top left y position, width, height]. The category id corresponds to a single category specified in the categories section. Each annotation also has an id (unique to all other annotations in the dataset). 58 | COCO format is usually a `.json` file. 59 | 60 | ``` 61 | { 62 | "images": [ 63 | { 64 | "file_name": "918121_1557903451205255_561212074_n.jpg", 65 | "height": 640, 66 | "width": 640, 67 | "id": "918121_1557903451205255_561212074_n" 68 | } 69 | ], 70 | "type": "instances", 71 | "annotations": [ 72 | { 73 | "area": 154192, 74 | "iscrowd": 0, 75 | "image_id": "918121_1557903451205255_561212074_n", 76 | "bbox": [ 77 | 66, 78 | 202, 79 | 419, 80 | 368 81 | ], 82 | "category_id": 4, 83 | "id": 1, 84 | "ignore": 0, 85 | "segmentation": [] 86 | }, 87 | { 88 | "area": 79530, 89 | "iscrowd": 0, 90 | "image_id": "918121_1557903451205255_561212074_n", 91 | "bbox": [ 92 | 273, 93 | 47, 94 | 330, 95 | 241 96 | ], 97 | "category_id": 4, 98 | "id": 2, 99 | "ignore": 0, 100 | "segmentation": [] 101 | }, 102 | { 103 | "area": 78430, 104 | "iscrowd": 0, 105 | "image_id": "918121_1557903451205255_561212074_n", 106 | "bbox": [ 107 | 0, 108 | 92, 109 | 310, 110 | 253 111 | ], 112 | "category_id": 4, 113 | "id": 3, 114 | "ignore": 0, 115 | "segmentation": [] 116 | } 117 | ], 118 | "categories": [ 119 | { 120 | "supercategory": "none", 121 | "id": 0, 122 | "name": "bracelet" 123 | }, 124 | { 125 | "supercategory": "none", 126 | "id": 1, 127 | "name": "earring" 128 | }, 129 | { 130 | "supercategory": "none", 131 | "id": 2, 132 | "name": "necklace" 133 | }, 134 | { 135 | "supercategory": "none", 136 | "id": 3, 137 | "name": "ring" 138 | }, 139 | { 140 | "supercategory": "none", 141 | "id": 4, 142 | "name": "watch" 143 | } 144 | ] 145 | } 146 | ``` 147 | 148 | ### YOLO 149 | 150 | In YOLO labeling format, a . txt file with the same name is created for each image file in the same directory. Each . txt file contains the annotations for the corresponding image file, that is object class, object coordinates, height and width. 151 | 152 | ``` 153 | 154 | ``` 155 | 156 | Example: 157 | 158 | ``` 159 | car 45 55 29 67 160 | bus 99 83 28 44 161 | ``` 162 | 163 | ## How to use? 164 | 165 | - If you want to convert **Pascal VOC** format to **.txt** then check the `README.md` file in [convert_VOC](convert_VOC) 166 | - If you want to convert **Pascal VOC** format to **YOLO** then check the `README.md` file in [convert_YOLO](convert_YOLO) 167 | - If you want to convert **Pascal VOC** format to **COCO** then check the `README.md` file in [convert_VOC2COCO](convert_VOC2COCO) 168 | - If you want to convert **COCO** format to **.txt** then check the `README.md` file in [convert_COCO](convert_COCO) 169 | --------------------------------------------------------------------------------