├── JSON_to_txt.py ├── README.md ├── XML_to_JSON.py ├── cocoGT_to_Yolo.py ├── gt_yolo2json.py ├── pred_yolo2json.py ├── voc2coco.py └── yolo_to_voc.py /JSON_to_txt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun May 31 00:44:00 2020 4 | 5 | @author: nikhi 6 | """ 7 | 8 | 9 | import os 10 | import json 11 | from os import listdir, getcwd 12 | from os.path import join 13 | 14 | classes = ["person","bicycle","car","motorcycle","airplane","bus","train", 15 | "truck","boat","traffic light","fire hydrant","stop sign","parking meter", 16 | "bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra", 17 | "giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis", 18 | "snowboard","sports ball","kite","baseball bat","baseball glove","skateboard", 19 | "surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon", 20 | "bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza", 21 | "donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv", 22 | "laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink", 23 | "refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"] 24 | 25 | #box form[x,y,w,h] 26 | def convert(size,box): 27 | #dw = 1./size[0] 28 | #dh = 1./size[1] 29 | x = box[0] 30 | y = box[1] 31 | w = box[2] 32 | h = box[3] 33 | return (x,y,w,h) 34 | 35 | def convert_annotation(): 36 | with open('G:\My Drive\ML_DL_Stuff\Object Detection\MSCOCO_data\instances_val2017.json','r') as f: 37 | data = json.load(f) 38 | for item in data['images']: 39 | image_id = item['id'] 40 | file_name = item['file_name'] 41 | width = item['width'] 42 | height = item['height'] 43 | value = filter(lambda item1: item1['image_id'] == image_id,data['annotations']) 44 | outfile = open('G:\My Drive\ML_DL_Stuff\Object Detection\MSCOCO_data/val2017_yolo_GT_text_files/%s.txt'%(file_name[:-4]), 'a+') 45 | for item2 in value: 46 | category_id = item2['category_id'] 47 | value1 = list(filter(lambda item3: item3['id'] == category_id,data['categories'])) 48 | name = value1[0]['name'] 49 | class_id = classes.index(name) 50 | box = item2['bbox'] 51 | bb = convert((width,height),box) 52 | outfile.write(str(class_id)+" "+" ".join([str(a) for a in bb]) + '\n') 53 | outfile.close() 54 | 55 | if __name__ == '__main__': 56 | convert_annotation() 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Annotation-converters 2 | This Repo covers all formats of annotations for Object Detection and can easily convert from one form to another using attached scripts 3 | 4 | All computer vision problems require annotated datasets and for training deep neural networks data needs to be annotated in defined form. For Object Detection, there are many available formats for preparing and annotating your dataset but the most popular and used formats are Pascal VOC and Microsoft COCO. 5 | 6 | ### MS COCO ### 7 | COCO is large scale images with Common Objects in Context (COCO) for object detection, segmentation, and captioning data set. COCO has 1.5 million object instances for 80 object categories. COCO stores annotations in a JSON file.\ 8 | COCO Bounding box: _(x-top left, y-top left, width, height)_ 9 | 10 | ### Pascal VOC ### 11 | Pascal Visual Object Classes(VOC) provides standardized image data sets for object detection. Pascal VOC is an XML file, unlike COCO which has a JSON file.\ 12 | Pascal VOC Bounding box :_(xmin-top left, ymin-top left, xmax-bottom right, ymax-bottom right)_ 13 | 14 | ### Darknet YOLO ### 15 | YOLO reads or predicts bounding boxes in different format compared to VOC or COCO.\ 16 | YOLO Bounding box : _(x_center, y_center, width, height)_ --> all these coordinates are normalized with respect to image width & height. 17 | 18 | In Pascal VOC and YOLO we create a file for each of the image in the dataset. In COCO we have one file each, for entire dataset for training, testing and validation. 19 | 20 | Usually, when working on custom datasets we end up wasting lot of time in converting annotations from one format to another suitable to object detection models or frameworks. This is really frustrating and I compiled few annotations converter scripts which covers most of the cases and saves you time! You can now focus more on productive tasks such as improving model performance or training more efficiently. 21 | 22 | ## Scripts ## 23 | 24 | * **JSON_to_txt.py** 25 | * It converts MS COCO JSON file to a text file for each image and the format is _{class_id, x_min, y_min, width, height}_ 26 | 27 | 28 | * **Yolo_to_Voc.py** 29 | * It converts YOLO text files to Pascal VOC format XML files 30 | * _(x_c_n, y_c_n, width_n, height_n) --> (x_min, y_min, x_max, y_max)_ 31 | 32 | * **XML_to_JSON.py** && **voc2coco.py** 33 | * These python scripts convert all XML files of a dataset into MS COCO readable JSON file.\ 34 | `python XML_to_JSON.py ./annotations_dir/ ./json_dest_dir/coco_output.json` 35 | 36 | * **gt_yolo2json.py** && **pred_yolo2json.py** 37 | * It converts all YOLO text files into MS COCO readable JSON file. 38 | * For ground truth YOLO text files --> gt_yolo2json.py 39 | * For YOLO predicted text files --> pred_yolo2json.py 40 | 41 | * **cocoGT_to_Yolo.py** 42 | * It converts MS COCO ground truth text files to YOLO format. It also has a function to convert YOLO text files to VOC format. Feel free to change the code and switch between the functions. 43 | 44 | * **JSON --> VOC XML files** 45 | * Json2PascalVoc is a Python library for converting some special Json strings to PascalVOC format XML files.\ 46 | `pip install Json2PascalVoc` 47 | ``` 48 | from Json2PascalVoc.Converter import Converter 49 | 50 | myConverter = Converter() 51 | #returns a Converter Object 52 | myConverter.convertJsonToPascal("data.json") 53 | #Converts Json to PascalVOC XML and saves the XML file to the related file path 54 | ``` 55 | 56 | 57 | -------------------------------------------------------------------------------- /XML_to_JSON.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 28 00:44:14 2020 4 | 5 | @author: nikhi 6 | """ 7 | 8 | #!/usr/bin/python 9 | 10 | # pip install lxml 11 | 12 | import sys 13 | import os 14 | import json 15 | import xml.etree.ElementTree as ET 16 | import glob 17 | 18 | START_BOUNDING_BOX_ID = 1 19 | PRE_DEFINE_CATEGORIES = None 20 | # If necessary, pre-define category and its id 21 | # PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, 22 | # "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9, 23 | # "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, 24 | # "motorbike": 14, "person": 15, "pottedplant": 16, 25 | # "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20} 26 | 27 | 28 | def get(root, name): 29 | vars = root.findall(name) 30 | return vars 31 | 32 | 33 | def get_and_check(root, name, length): 34 | vars = root.findall(name) 35 | if len(vars) == 0: 36 | raise ValueError("Can not find %s in %s." % (name, root.tag)) 37 | if length > 0 and len(vars) != length: 38 | raise ValueError( 39 | "The size of %s is supposed to be %d, but is %d." 40 | % (name, length, len(vars)) 41 | ) 42 | if length == 1: 43 | vars = vars[0] 44 | return vars 45 | 46 | 47 | def get_filename(filename): 48 | filename = filename.replace("\\", "/") 49 | filename = os.path.splitext(os.path.basename(filename))[0] 50 | return str(filename) 51 | 52 | 53 | 54 | def get_categories(xml_files): 55 | """Generate category name to id mapping from a list of xml files. 56 | 57 | Arguments: 58 | xml_files {list} -- A list of xml file paths. 59 | 60 | Returns: 61 | dict -- category name to id mapping. 62 | """ 63 | classes_names = [] 64 | for xml_file in xml_files: 65 | tree = ET.parse(xml_file) 66 | root = tree.getroot() 67 | for member in root.findall("object"): 68 | classes_names.append(member[0].text) 69 | classes_names = list(set(classes_names)) 70 | classes_names.sort() 71 | return {name: i for i, name in enumerate(classes_names)} 72 | 73 | 74 | def convert(xml_files, json_file): 75 | json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []} 76 | if PRE_DEFINE_CATEGORIES is not None: 77 | categories = PRE_DEFINE_CATEGORIES 78 | else: 79 | categories = get_categories(xml_files) 80 | bnd_id = START_BOUNDING_BOX_ID 81 | for xml_file in xml_files: 82 | tree = ET.parse(xml_file) 83 | root = tree.getroot() 84 | path = get(root, "path") 85 | if len(path) == 1: 86 | filename = os.path.basename(path[0].text) 87 | elif len(path) == 0: 88 | filename = get_and_check(root, "filename", 1).text 89 | else: 90 | raise ValueError("%d paths found in %s" % (len(path), xml_file)) 91 | ## The filename must be a number 92 | image_id = get_filename(filename) 93 | size = get_and_check(root, "size", 1) 94 | width = int(get_and_check(size, "width", 1).text) 95 | height = int(get_and_check(size, "height", 1).text) 96 | image = { 97 | "file_name": filename, 98 | "height": height, 99 | "width": width, 100 | "id": image_id, 101 | } 102 | json_dict["images"].append(image) 103 | ## Currently we do not support segmentation. 104 | # segmented = get_and_check(root, 'segmented', 1).text 105 | # assert segmented == '0' 106 | for obj in get(root, "object"): 107 | category = get_and_check(obj, "name", 1).text 108 | if category not in categories: 109 | new_id = len(categories) 110 | categories[category] = new_id 111 | category_id = categories[category] 112 | bndbox = get_and_check(obj, "bndbox", 1) 113 | xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1 114 | ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1 115 | xmax = int(get_and_check(bndbox, "xmax", 1).text) 116 | ymax = int(get_and_check(bndbox, "ymax", 1).text) 117 | assert xmax > xmin 118 | assert ymax > ymin 119 | o_width = abs(xmax - xmin) 120 | o_height = abs(ymax - ymin) 121 | ann = { 122 | "area": o_width * o_height, 123 | "iscrowd": 0, 124 | "image_id": image_id, 125 | "bbox": [xmin, ymin, o_width, o_height], 126 | "category_id": category_id, 127 | "id": bnd_id, 128 | "ignore": 0, 129 | "segmentation": [], 130 | } 131 | json_dict["annotations"].append(ann) 132 | bnd_id = bnd_id + 1 133 | 134 | for cate, cid in categories.items(): 135 | cat = {"supercategory": "none", "id": cid, "name": cate} 136 | json_dict["categories"].append(cat) 137 | 138 | os.makedirs(os.path.dirname(json_file), exist_ok=True) 139 | json_fp = open(json_file, "w") 140 | json_str = json.dumps(json_dict) 141 | json_fp.write(json_str) 142 | json_fp.close() 143 | 144 | 145 | if __name__ == "__main__": 146 | import argparse 147 | 148 | parser = argparse.ArgumentParser( 149 | description="Convert Pascal VOC annotation to COCO format." 150 | ) 151 | parser.add_argument("xml_dir", help="Directory path to xml files.", type=str) 152 | parser.add_argument("json_file", help="Output COCO format json file.", type=str) 153 | args = parser.parse_args() 154 | xml_files = glob.glob(os.path.join(args.xml_dir, "*.xml")) 155 | 156 | # If you want to do train/test split, you can pass a subset of xml files to convert function. 157 | print("Number of xml files: {}".format(len(xml_files))) 158 | convert(xml_files, args.json_file) 159 | print("Success: {}".format(args.json_file)) -------------------------------------------------------------------------------- /cocoGT_to_Yolo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jun 23 20:38:44 2020 4 | 5 | @author: nikhi 6 | """ 7 | 8 | import sys 9 | import os 10 | import glob 11 | import cv2 12 | 13 | def convert_coco_to_yolo(left, top, width, height, img_width, img_height): 14 | ## "c" stands for center and "n" stands for normalized 15 | half_width = float(width) / 2 16 | half_height = float(height) / 2 17 | ## compute left, top, right, bottom 18 | x_c = float(left) + half_width 19 | y_c = float(top) + half_height 20 | width_n = width/img_width 21 | height_n = height/img_height 22 | x_c_n = x_c/img_width 23 | y_c_n = y_c/img_height 24 | return x_c_n, y_c_n, width_n, height_n 25 | 26 | def convert_yolo_coordinates_to_voc(x_c_n, y_c_n, width_n, height_n, img_width, img_height): 27 | ## remove normalization given the size of the image 28 | x_c = float(x_c_n) * img_width 29 | y_c = float(y_c_n) * img_height 30 | width = float(width_n) * img_width 31 | height = float(height_n) * img_height 32 | ## compute half width and half height 33 | half_width = width / 2 34 | half_height = height / 2 35 | ## compute left, top, right, bottom 36 | ## in the official VOC challenge the top-left pixel in the image has coordinates (1;1) 37 | left = int(x_c - half_width) + 1 38 | top = int(y_c - half_height) + 1 39 | right = int(x_c + half_width) + 1 40 | bottom = int(y_c + half_height) + 1 41 | return left, top, right, bottom 42 | 43 | 44 | # make sure that the cwd() in the beginning is the location of the python script (so that every path makes sense) 45 | os.chdir(os.path.dirname(os.path.abspath(__file__))) 46 | 47 | 48 | # change directory to the one with the files to be changed 49 | 50 | GT_PATH = '/home/ngunti/coco/labels/val2017/' 51 | #print(GT_PATH) 52 | os.chdir(GT_PATH) 53 | 54 | 55 | # create VOC format files 56 | txt_list = glob.glob('*.txt') 57 | if len(txt_list) == 0: 58 | print("Error: no .txt files found in ground-truth") 59 | sys.exit() 60 | for tmp_file in txt_list: 61 | #print(tmp_file) 62 | 63 | # 2. open txt file lines to a list 64 | with open(tmp_file) as f: 65 | content = f.readlines() 66 | ## remove whitespace characters like `\n` at the end of each line 67 | content = [x.strip() for x in content] 68 | # 4. create new file (YOLO format) 69 | with open(tmp_file, "w") as new_f: 70 | for line in content: 71 | ## split a line by spaces. 72 | ## "c" stands for center and "n" stands for normalized 73 | obj_id, left, top, width_n, height_n = line.split() 74 | #obj_name = obj_list[int(obj_id)] 75 | x_c_n, y_c_n, width_n, height_n = convert_coco_to_yolo(left, top, width_n, height_n) 76 | ## add new line to file 77 | #print(obj_name + " " + str(left) + " " + str(top) + " " + str(right) + " " + str(bottom)) 78 | new_f.write(obj_id + " " + str(x_c_n) + " " + str(y_c_n) + " " + str(width_n) + " " + str(height_n) + '\n') 79 | print("Conversion completed!") -------------------------------------------------------------------------------- /gt_yolo2json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import os.path as osp 5 | import json 6 | import cv2 7 | import numpy as np 8 | from tqdm import tqdm 9 | 10 | from collections import OrderedDict 11 | 12 | COCO_IDS = [1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90] 13 | 14 | 15 | 16 | def create_annotations(cat_list, img_list, ann_list): 17 | return OrderedDict({'categories': cat_list, 18 | 'images': img_list, 19 | 'annotations': ann_list}) 20 | 21 | def create_images_entry(image_id, width=None, height=None): 22 | if width is None or height is None: 23 | return OrderedDict({'id':image_id }) 24 | else: 25 | return OrderedDict({'id':image_id, 'width':width, 'height':height }) 26 | 27 | def create_categories(class_names, class_ids): 28 | return [{'id':class_ids[i], 'name':cls} for i, cls in enumerate(class_names)] 29 | 30 | def create_annotations_entry(image_id, bbox, category_id, ann_id, iscrowd=0, area=None, segmentation=None): 31 | if area is None: 32 | if segmentation is None: 33 | #Calulate area with bbox 34 | area = bbox[2] * bbox[3] 35 | else: 36 | raise NotImplementedError() 37 | 38 | return OrderedDict({ 39 | "id": ann_id, 40 | "image_id": image_id, 41 | "category_id": category_id, 42 | "iscrowd": iscrowd, 43 | "area": area, 44 | "bbox": bbox 45 | }) 46 | 47 | 48 | def get_image_id_from_path(image_path): 49 | image_path = osp.splitext(image_path)[0] 50 | m = re.search(r'\d+$', image_path) 51 | return int(m.group()) 52 | 53 | def bbox_cxcywh_to_xywh(box): 54 | x, y = box[..., 0] - box[..., 2] / 2, box[..., 1] - box[..., 3] / 2 55 | box[..., 0], box[..., 1] = x, y 56 | return box 57 | 58 | def bbox_relative_to_absolute(box, img_dim, x_idx=[0,2], y_idx=[1,3]): 59 | box[..., x_idx] *= img_dim[0] 60 | box[..., y_idx] *= img_dim[1] 61 | return box 62 | 63 | def get_img_ann_list(img_path_list, label_path_list, class_ids): 64 | img_list, ann_list = [],[] 65 | for img_path, label_path in tqdm(zip(img_path_list, label_path_list), file=sys.stdout, leave=True, total=len(img_path_list)): 66 | image_id = get_image_id_from_path(img_path) 67 | # Read Image 68 | if osp.exists(img_path): 69 | img = cv2.imread(img_path) 70 | 71 | height, width = img.shape[0], img.shape[1] 72 | img_list.append(create_images_entry(image_id, width, height)) 73 | # Read Labels 74 | if osp.exists(label_path): 75 | labels = np.loadtxt(label_path).reshape(-1,5) 76 | labels[..., 1:5] = bbox_relative_to_absolute(bbox_cxcywh_to_xywh(labels[..., 1:5]), (width, height)) 77 | 78 | for label in labels: 79 | category_id = class_ids[int(label[0])] 80 | bbox = list(label[1:5]) 81 | ann_id = len(ann_list) 82 | ann_list.append(create_annotations_entry(image_id, bbox, category_id, ann_id)) 83 | 84 | return img_list, ann_list 85 | 86 | def create_annotations_dict(target_txt, class_names, class_ids=None): 87 | if class_ids is None: 88 | class_ids = [i for i in range(len(class_names))] 89 | 90 | with open(target_txt, 'r') as f: 91 | img_path_list = [lines.strip() for lines in f.readlines()] 92 | label_path_list = [img_path.replace('jpg', 'txt').replace('images', 'labels') for img_path in img_path_list] 93 | 94 | #img_path_list, label_path_list = [img_path_list[1]], [label_path_list[1]] 95 | img_list, ann_list = get_img_ann_list(img_path_list, label_path_list, class_ids) 96 | cat_list = create_categories(class_names, class_ids) 97 | 98 | ann_dict = create_annotations(cat_list, img_list, ann_list) 99 | 100 | return ann_dict 101 | 102 | def generate_annotations_file(target_txt, class_names, out, class_ids=None): 103 | ann_dict = create_annotations_dict(target_txt, class_names, class_ids=class_ids) 104 | with open(out, 'w') as f: 105 | json.dump(ann_dict, f, indent=4, separators=(',', ':')) -------------------------------------------------------------------------------- /pred_yolo2json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import os.path as osp 5 | import json 6 | 7 | from collections import OrderedDict 8 | 9 | def get_image_id_from_path(image_path): 10 | image_path = osp.splitext(image_path)[0] 11 | m = re.search(r'\d+$', image_path) 12 | return int(m.group()) 13 | 14 | 15 | def create_results_entry(image_id, category_id, bbox, score): 16 | return OrderedDict({"image_id":image_id, 17 | "category_id":category_id, 18 | "bbox":bbox, 19 | "score":score}) 20 | 21 | def load_class_names(path): 22 | with open(path) as f: 23 | return [line.rstrip("\n") for line in f.readlines()] 24 | 25 | def write_json(input_txt, output_json, class_file, separator_key='Start processing', img_format='.jpg'): 26 | class_names = load_class_names(class_file) 27 | cls2id = {cls:id for id, cls in enumerate(class_names)} 28 | 29 | with open(output_json, 'w') as outfile: 30 | outfile.write('[') 31 | isReading = False 32 | 33 | with open(input_txt) as infile: 34 | for line in infile: 35 | if separator_key in line: 36 | next(input_txt) 37 | if img_format not in line: 38 | break 39 | 40 | # get text between two substrings (SEPARATOR_KEY and IMG_FORMAT) 41 | image_path = re.search(separator_key + '(.*)' + img_format, line) 42 | image_id = get_image_id_from_path(image_path.group(1)) 43 | 44 | isReading = True 45 | elif isReading and ":" in line: 46 | # split line on first occurrence of the character ':' and '%' 47 | class_name, info = line.split(':', 1) 48 | #class_name = class_name.replace(' ', '_') 49 | confidence, bbox_info = info.split('%', 1) 50 | 51 | # Found detection with same bbox with less class score (not best class) 52 | if len(bbox_info) > 1: 53 | # get all the coordinates of the bounding box 54 | bbox_info = bbox_info.replace(')','') # remove the character ')' 55 | bbox_info = bbox_info.split() 56 | 57 | # go through each of the parts of the string and check if it is a digit 58 | left, top, width, height = bbox_info[1], bbox_info[3], bbox_info[5], bbox_info[7] 59 | right = left + width 60 | bottom = top + height 61 | 62 | category_id = cls2id[class_name] 63 | bbox = [float(left), float(top), float(width), float(height)] 64 | score = float(confidence) / 100 65 | 66 | res = create_results_entry(image_id, category_id, bbox, score) 67 | json.dump(res, outfile, indent=4, separators=(',', ':')) 68 | outfile.write(',') 69 | 70 | # Remove trailing ',' and close the bracket 71 | outfile.seek(outfile.tell() - 1, os.SEEK_SET) 72 | outfile.truncate() 73 | outfile.write(']') 74 | outfile.close() 75 | 76 | 77 | 78 | 79 | 80 | # outfile = None 81 | # with open(IN_FILE) as infile: 82 | # for line in infile: 83 | # if SEPARATOR_KEY in line: 84 | # if IMG_FORMAT not in line: 85 | # break 86 | # # get text between two substrings (SEPARATOR_KEY and IMG_FORMAT) 87 | # image_path = re.search(SEPARATOR_KEY + '(.*)' + IMG_FORMAT, line) 88 | # # get the image name (the final component of a image_path) 89 | # # e.g., from 'data/horses_1' to 'horses_1' 90 | # image_name = os.path.basename(image_path.group(1)) 91 | # # close the previous file 92 | # if outfile is not None: 93 | # outfile.close() 94 | # # open a new file 95 | # outfile = open(os.path.join(OUTPUT_DIR, image_name + '.txt'), 'w') 96 | # elif outfile is not None: 97 | # # split line on first occurrence of the character ':' and '%' 98 | # class_name, info = line.split(':', 1) 99 | # class_name = class_name.replace(' ', '_') 100 | # confidence, bbox = info.split('%', 1) 101 | # if len(bbox) != 1: 102 | # # get all the coordinates of the bounding box 103 | # bbox = bbox.replace(')','') # remove the character ')' 104 | # # go through each of the parts of the string and check if it is a digit 105 | # left, top, width, height = [int(s) for s in bbox.split() if s.lstrip('-').isdigit()] 106 | # right = left + width 107 | # bottom = top + height 108 | # outfile.write("{} {} {} {} {} {}\n".format(class_name, float(confidence)/100, left, top, width, height)) 109 | # #outfile.write("{} {} {} {} {} {}\n".format(class_name, float(confidence)/100, left, top, right, bottom)) 110 | -------------------------------------------------------------------------------- /voc2coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | import xml.etree.ElementTree as ET 5 | from typing import Dict, List 6 | from tqdm import tqdm 7 | import re 8 | 9 | 10 | def get_label2id(labels_path: str) -> Dict[str, int]: 11 | """id is 1 start""" 12 | with open(labels_path, 'r') as f: 13 | labels_str = f.read().split() 14 | labels_ids = list(range(1, len(labels_str)+1)) 15 | return dict(zip(labels_str, labels_ids)) 16 | 17 | 18 | def get_annpaths(ann_dir_path: str = None, 19 | ann_ids_path: str = None, 20 | ext: str = '', 21 | annpaths_list_path: str = None) -> List[str]: 22 | # If use annotation paths list 23 | if annpaths_list_path is not None: 24 | with open(annpaths_list_path, 'r') as f: 25 | ann_paths = f.read().split() 26 | return ann_paths 27 | 28 | # If use annotaion ids list 29 | ext_with_dot = '.' + ext if ext != '' else '' 30 | with open(ann_ids_path, 'r') as f: 31 | ann_ids = f.read().split() 32 | ann_paths = [os.path.join(ann_dir_path, aid+ext_with_dot) for aid in ann_ids] 33 | return ann_paths 34 | 35 | 36 | def get_image_info(annotation_root, extract_num_from_imgid=True): 37 | path = annotation_root.findtext('path') 38 | if path is None: 39 | filename = annotation_root.findtext('filename') 40 | else: 41 | filename = os.path.basename(path) 42 | img_name = os.path.basename(filename) 43 | img_id = os.path.splitext(img_name)[0] 44 | if extract_num_from_imgid and isinstance(img_id, str): 45 | img_id = int(re.findall(r'\d+', img_id)[0]) 46 | 47 | size = annotation_root.find('size') 48 | width = int(size.findtext('width')) 49 | height = int(size.findtext('height')) 50 | 51 | image_info = { 52 | 'file_name': filename, 53 | 'height': height, 54 | 'width': width, 55 | 'id': img_id 56 | } 57 | return image_info 58 | 59 | 60 | def get_coco_annotation_from_obj(obj, label2id): 61 | label = obj.findtext('name') 62 | assert label in label2id, f"Error: {label} is not in label2id !" 63 | category_id = label2id[label] 64 | bndbox = obj.find('bndbox') 65 | xmin = int(bndbox.findtext('xmin')) - 1 66 | ymin = int(bndbox.findtext('ymin')) - 1 67 | xmax = int(bndbox.findtext('xmax')) 68 | ymax = int(bndbox.findtext('ymax')) 69 | assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}" 70 | o_width = xmax - xmin 71 | o_height = ymax - ymin 72 | ann = { 73 | 'area': o_width * o_height, 74 | 'iscrowd': 0, 75 | 'bbox': [xmin, ymin, o_width, o_height], 76 | 'category_id': category_id, 77 | 'ignore': 0, 78 | 'segmentation': [] # This script is not for segmentation 79 | } 80 | return ann 81 | 82 | 83 | def convert_xmls_to_cocojson(annotation_paths: List[str], 84 | label2id: Dict[str, int], 85 | output_jsonpath: str, 86 | extract_num_from_imgid: bool = True): 87 | output_json_dict = { 88 | "images": [], 89 | "type": "instances", 90 | "annotations": [], 91 | "categories": [] 92 | } 93 | bnd_id = 1 # START_BOUNDING_BOX_ID, TODO input as args ? 94 | print('Start converting !') 95 | for a_path in tqdm(annotation_paths): 96 | # Read annotation xml 97 | ann_tree = ET.parse(a_path) 98 | ann_root = ann_tree.getroot() 99 | 100 | img_info = get_image_info(annotation_root=ann_root, 101 | extract_num_from_imgid=extract_num_from_imgid) 102 | img_id = img_info['id'] 103 | output_json_dict['images'].append(img_info) 104 | 105 | for obj in ann_root.findall('object'): 106 | ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id) 107 | ann.update({'image_id': img_id, 'id': bnd_id}) 108 | output_json_dict['annotations'].append(ann) 109 | bnd_id = bnd_id + 1 110 | 111 | for label, label_id in label2id.items(): 112 | category_info = {'supercategory': 'none', 'id': label_id, 'name': label} 113 | output_json_dict['categories'].append(category_info) 114 | 115 | with open(output_jsonpath, 'w') as f: 116 | output_json = json.dumps(output_json_dict) 117 | f.write(output_json) 118 | 119 | 120 | def main(): 121 | parser = argparse.ArgumentParser( 122 | description='This script support converting voc format xmls to coco format json') 123 | parser.add_argument('--ann_dir', type=str, default=None, 124 | help='path to annotation files directory. It is not need when use --ann_paths_list') 125 | parser.add_argument('--ann_ids', type=str, default=None, 126 | help='path to annotation files ids list. It is not need when use --ann_paths_list') 127 | parser.add_argument('--ann_paths_list', type=str, default=None, 128 | help='path of annotation paths list. It is not need when use --ann_dir and --ann_ids') 129 | parser.add_argument('--labels', type=str, default=None, 130 | help='path to label list.') 131 | parser.add_argument('--output', type=str, default='output.json', help='path to output json file') 132 | parser.add_argument('--ext', type=str, default='', help='additional extension of annotation file') 133 | args = parser.parse_args() 134 | label2id = get_label2id(labels_path=args.labels) 135 | ann_paths = get_annpaths( 136 | ann_dir_path=args.ann_dir, 137 | ann_ids_path=args.ann_ids, 138 | ext=args.ext, 139 | annpaths_list_path=args.ann_paths_list 140 | ) 141 | convert_xmls_to_cocojson( 142 | annotation_paths=ann_paths, 143 | label2id=label2id, 144 | output_jsonpath=args.output, 145 | extract_num_from_imgid=True 146 | ) 147 | 148 | 149 | if __name__ == '__main__': 150 | main() 151 | -------------------------------------------------------------------------------- /yolo_to_voc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 28 00:15:22 2020 4 | 5 | @author: nikhi 6 | """ 7 | 8 | # Script to convert yolo annotations to voc format 9 | 10 | # Sample format 11 | # 12 | # _image_fashion 13 | # brooke-cagle-39574.jpg 14 | # 15 | # 1200 16 | # 800 17 | # 3 18 | # 19 | # 0 20 | # 21 | # head 22 | # Unspecified 23 | # 0 24 | # 0 25 | # 26 | # 549 27 | # 251 28 | # 625 29 | # 335 30 | # 31 | # 32 | # 33 | import os 34 | import xml.etree.cElementTree as ET 35 | from PIL import Image 36 | from math import floor 37 | 38 | ANNOTATIONS_DIR_PREFIX = "G:/My Drive/ML_DL_Stuff/Object Detection/Datasets/OID_text/" 39 | IMAGE_DIR_PREFIX = "G:/My Drive/ML_DL_Stuff/Object Detection/Datasets/OID_images/" 40 | imgExt = "jpg" 41 | imgChnls = 3 #RGB:3 ; Grayscale:1 42 | DESTINATION_DIR = "G:/My Drive/ML_DL_Stuff/Object Detection/Datasets/converted_labels" 43 | 44 | CLASS_MAPPING = { '0' : 'Tomato', 45 | '1' : 'Bread', 46 | '2' : 'Milk', 47 | '3' : 'Knife', 48 | '4' : 'Broccoli', 49 | '5' : 'Cheese', 50 | '6' : 'Fork', 51 | '7' : 'Plate', 52 | '8' : 'Table', 53 | '9' : 'Mixing_bowl', 54 | '10' : 'Carrot', 55 | '11' : 'Turkey', 56 | '12' : 'Cookie', 57 | '13' : 'Coffee_cup', 58 | '14' : 'Platter' 59 | 60 | # Add your remaining classes here. 61 | } 62 | 63 | 64 | def create_root(file_prefix, width, height): 65 | root = ET.Element("annotations") 66 | ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix) 67 | ET.SubElement(root, "folder").text = "images" 68 | size = ET.SubElement(root, "size") 69 | ET.SubElement(size, "width").text = str(width) 70 | ET.SubElement(size, "height").text = str(height) 71 | ET.SubElement(size, "depth").text = "3" 72 | return root 73 | 74 | 75 | def create_object_annotation(root, voc_labels): 76 | for voc_label in voc_labels: 77 | obj = ET.SubElement(root, "object") 78 | ET.SubElement(obj, "name").text = voc_label[0] 79 | ET.SubElement(obj, "pose").text = "Unspecified" 80 | ET.SubElement(obj, "truncated").text = str(0) 81 | ET.SubElement(obj, "difficult").text = str(0) 82 | bbox = ET.SubElement(obj, "bndbox") 83 | ET.SubElement(bbox, "xmin").text = str(voc_label[1]) 84 | ET.SubElement(bbox, "ymin").text = str(voc_label[2]) 85 | ET.SubElement(bbox, "xmax").text = str(voc_label[3]) 86 | ET.SubElement(bbox, "ymax").text = str(voc_label[4]) 87 | return root 88 | 89 | 90 | def create_file(file_prefix, width, height, voc_labels): 91 | root = create_root(file_prefix, width, height) 92 | root = create_object_annotation(root, voc_labels) 93 | tree = ET.ElementTree(root) 94 | tree.write("{}/{}.xml".format(DESTINATION_DIR, file_prefix)) 95 | 96 | 97 | def read_file(file_path): 98 | file_prefix = file_path.split(".txt")[0] 99 | image_file_name = "{}.{}".format(file_prefix,imgExt) 100 | img = Image.open("{}/{}".format(IMAGE_DIR_PREFIX, image_file_name)) 101 | print(img) 102 | 103 | w, h = img.size 104 | prueba = "{}/{}".format(ANNOTATIONS_DIR_PREFIX, file_path) 105 | print(prueba) 106 | with open(prueba) as file: 107 | lines = file.readlines() 108 | voc_labels = [] 109 | for line in lines: 110 | voc = [] 111 | line = line.strip() 112 | data = line.split() 113 | voc.append(CLASS_MAPPING.get(data[0])) 114 | bbox_width = float(data[3]) * w 115 | bbox_height = float(data[4]) * h 116 | center_x = float(data[1]) * w 117 | center_y = float(data[2]) * h 118 | voc.append(floor(center_x - (bbox_width / 2))) 119 | voc.append(floor(center_y - (bbox_height / 2))) 120 | voc.append(floor(center_x + (bbox_width / 2))) 121 | voc.append(floor(center_y + (bbox_height / 2))) 122 | voc_labels.append(voc) 123 | create_file(file_prefix, w, h, voc_labels) 124 | print("Processing complete for file: {}".format(file_path)) 125 | 126 | 127 | def start(): 128 | if not os.path.exists(DESTINATION_DIR): 129 | os.makedirs(DESTINATION_DIR) 130 | for filename in os.listdir(ANNOTATIONS_DIR_PREFIX): 131 | if filename.endswith('txt'): 132 | try: 133 | PathFileName = "{}/{}".format(ANNOTATIONS_DIR_PREFIX, filename) 134 | if os.stat(PathFileName).st_size > 0: 135 | print("Si") 136 | read_file(filename) 137 | except: 138 | print("No") 139 | 140 | else: 141 | print("Skipping file: {}".format(filename)) 142 | 143 | 144 | if __name__ == "__main__": 145 | start() --------------------------------------------------------------------------------