├── .gitignore ├── annotated_images ├── cats_dogs.record ├── cd00.png ├── cd00.xml ├── cd01.png ├── cd01.xml ├── cd02.png ├── cd02.xml ├── cd03.png ├── cd03.xml ├── cd04.png ├── cd04.xml ├── cd05.png ├── cd05.xml ├── cd06.png └── cd06.xml ├── readme.md ├── view_records.py ├── view_records_tf2.py └── voc_to_tfr.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~* 2 | /working 3 | *todo* 4 | *cache* 5 | -------------------------------------------------------------------------------- /annotated_images/cats_dogs.record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cats_dogs.record -------------------------------------------------------------------------------- /annotated_images/cd00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd00.png -------------------------------------------------------------------------------- /annotated_images/cd00.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd00.png 4 | /home/eric/Pictures/cats_dogs/cd00.png 5 | 6 | Unknown 7 | 8 | 9 | 384 10 | 270 11 | 3 12 | 13 | 0 14 | 15 | cat 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 127 21 | 111 22 | 236 23 | 194 24 | 25 | 26 | 27 | dog 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 195 33 | 26 34 | 347 35 | 163 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /annotated_images/cd01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd01.png -------------------------------------------------------------------------------- /annotated_images/cd01.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd01.png 4 | /home/eric/Pictures/cats_dogs/cd01.png 5 | 6 | Unknown 7 | 8 | 9 | 525 10 | 350 11 | 3 12 | 13 | 0 14 | 15 | dog 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 95 21 | 67 22 | 229 23 | 228 24 | 25 | 26 | 27 | cat 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 258 33 | 160 34 | 335 35 | 245 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /annotated_images/cd02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd02.png -------------------------------------------------------------------------------- /annotated_images/cd02.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd02.png 4 | /home/eric/Pictures/cats_dogs/cd02.png 5 | 6 | Unknown 7 | 8 | 9 | 300 10 | 169 11 | 3 12 | 13 | 0 14 | 15 | dog 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 77 21 | 29 22 | 155 23 | 88 24 | 25 | 26 | 27 | cat 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 159 33 | 81 34 | 203 35 | 124 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /annotated_images/cd03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd03.png -------------------------------------------------------------------------------- /annotated_images/cd03.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd03.png 4 | /home/eric/Pictures/cats_dogs/cd03.png 5 | 6 | Unknown 7 | 8 | 9 | 840 10 | 553 11 | 3 12 | 13 | 0 14 | 15 | cat 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 405 21 | 48 22 | 731 23 | 324 24 | 25 | 26 | 27 | dog 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 99 33 | 39 34 | 559 35 | 513 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /annotated_images/cd04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd04.png -------------------------------------------------------------------------------- /annotated_images/cd04.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd04.png 4 | /home/eric/Pictures/cats_dogs/cd04.png 5 | 6 | Unknown 7 | 8 | 9 | 850 10 | 477 11 | 3 12 | 13 | 0 14 | 15 | cat 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 392 21 | 7 22 | 642 23 | 239 24 | 25 | 26 | 27 | dog 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 161 33 | 50 34 | 657 35 | 464 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /annotated_images/cd05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd05.png -------------------------------------------------------------------------------- /annotated_images/cd05.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd05.png 4 | /home/eric/Pictures/cats_dogs/cd05.png 5 | 6 | Unknown 7 | 8 | 9 | 600 10 | 426 11 | 3 12 | 13 | 0 14 | 15 | dog 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 82 21 | 159 22 | 272 23 | 332 24 | 25 | 26 | 27 | dog 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 195 33 | 188 34 | 402 35 | 346 36 | 37 | 38 | 39 | cat 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 309 45 | 230 46 | 420 47 | 319 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /annotated_images/cd06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd06.png -------------------------------------------------------------------------------- /annotated_images/cd06.xml: -------------------------------------------------------------------------------- 1 | 2 | cats_dogs 3 | cd06.png 4 | /home/eric/Pictures/cats_dogs/cd06.png 5 | 6 | Unknown 7 | 8 | 9 | 636 10 | 522 11 | 3 12 | 13 | 0 14 | 15 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # tfrecord-view 2 | How to consume data from TFRecord files, which are used in the Tensorflow [object detection api](https://github.com/tensorflow/models/tree/master/research/object_detection). I use it to double ensure that my augmentation pipeline and TFRecord encoding worked properly. 3 | 4 | Currently tested in Linux. Not sure about behavior in Windows. 5 | 6 | ## Usage 7 | ### Creating a TFRecord file 8 | If you need to create a TFRecord file, see `voc_to_tfr.py`. The images and annotation files are in `annotated_images/`. 9 | 10 | ### Consuming a TFRecord file 11 | If you already have TFRecord file data, then use `view_records.py` or `view_records_tf2.py` to see how to consume it and show data. The function takes in the path to the TFRecord file, the dictionary of class labels, and a couple of optional keyword arguments like stride. It will then show the images with bounding boxes and labels for each object, if applicable. 12 | 13 | ### Installation 14 | Prereqs: tensorflow, opencv, and numpy. 15 | 16 | git clone XXX 17 | conda create --name tfrecord-view 18 | conda activate --name tfrecord-view 19 | conda install python=3 opencv numpy 20 | conda install -c anaconda tensorflow-gpu>=1.12 21 | 22 | This also assumes you have installed the object detection api (https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md), as we will use some utilities that come with it (in particular `utils.dataset_util`). 23 | 24 | ## For more info 25 | Construction of TFRecord files: 26 | - https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md 27 | - https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py 28 | 29 | On consuming TFRrecord files, there aren't a lot of great resources out there. I used this (and probably 20 other sites I can't even remember: this is one aspect of the api that keeps changing, and will surely change again once it is ported to Tensorflow 2): 30 | - https://stackoverflow.com/a/56932321/1886357 31 | 32 | I recently found this repo which is similar to this one, and has some nice ideas: 33 | - https://github.com/yinguobing/tfrecord_utility 34 | 35 | ## To do 36 | - Functionalize encoder in voc_to_tfr.py. 37 | - Look over tfrecord_utility repo maybe he found a way to simplify reading data? 38 | 39 | #### Sources for images 40 | The images are of cats and dogs, with one that has no label. The images were scraped from: 41 | - https://huggablemuggs.com/8-tricks-to-help-your-cat-and-dog-to-get-along/ 42 | - https://2catsandablog.wordpress.com/2018/08/14/do-cats-and-dogs-really-fight-like-cats-and-dogs/ 43 | - http://www.waycooldogs.com/feeding-cats-dogs-together/ 44 | - https://phz8.petinsurance.com/ownership-adoption/pet-ownership/pet-behavior/7-tips-on-combining-multi-pet-household 45 | - https://www.mercurynews.com/2019/04/15/whos-going-to-tell-mom-shes-feeding-her-dogs-the-wrong-food/ 46 | - https://www.meowingtons.com/blogs/lolcats/snuggly-cat-and-dog-best-friends-to-cheer-you-up 47 | - https://www.thesprucepets.com/cute-aquarium-sea-critters-4146506 48 | -------------------------------------------------------------------------------- /view_records.py: -------------------------------------------------------------------------------- 1 | """ 2 | view_records.py: 3 | Consume and display data from a tfrecord file: pulls image and bounding boxes for display 4 | so you can make sure things look reasonabloe, e.g., after augmentation. 5 | 6 | Hit 'n' for 'next' image, or 'esc' to quit. 7 | 8 | Part of tensorflow-view repo: https://github.com/EricThomson/tfrecord-view 9 | 10 | """ 11 | 12 | import cv2 13 | import numpy as np 14 | import tensorflow as tf 15 | tf.enable_eager_execution() 16 | import warnings 17 | warnings.filterwarnings('ignore', category = FutureWarning) #tf 1.14 and np 1.17 are clashing: temporary solution 18 | 19 | def cv_bbox(image, bbox, color = (255, 255, 255), line_width = 2): 20 | """ 21 | use opencv to add bbox to an image 22 | assumes bbox is in standard form x1 y1 x2 y2 23 | """ 24 | 25 | cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line_width) 26 | return 27 | 28 | 29 | def parse_record(data_record): 30 | """ 31 | parse the data record from a tfrecord file, typically pulled from an iterator, 32 | in this case a one_shot_iterator created from the dataset. 33 | """ 34 | feature = {'image/encoded': tf.FixedLenFeature([], tf.string), 35 | 'image/object/class/label': tf.VarLenFeature(tf.int64), 36 | 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 37 | 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 38 | 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 39 | 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 40 | 'image/filename': tf.FixedLenFeature([], tf.string) 41 | } 42 | return tf.parse_single_example(data_record, feature) 43 | 44 | 45 | def view_records(file_path, class_labels, stride = 1, verbose = 1): 46 | """ 47 | peek at the data using opencv and tensorflow tools. 48 | Inputs: 49 | file_path: path to tfrecord file (usually has 'record' extension) 50 | class_labels: dictionary of labels with name:number pairs (start with 1) 51 | stride (default 1): how many records to jump (you might have thousands so skip a few) 52 | verbose (default 1): display text output if 1, display nothing except images otherwise. 53 | 54 | Usage: 55 | Within the image window, enter 'n' for next image, 'esc' to stop seeing images. 56 | """ 57 | dataset = tf.data.TFRecordDataset([file_path]) 58 | record_iterator = dataset.make_one_shot_iterator() 59 | num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy() 60 | 61 | if verbose: 62 | print(f"\nGoing through {num_records} records with a stride of {stride}.") 63 | print("Enter 'n' to bring up next image in record.\n") 64 | for im_ind in range(num_records): 65 | 66 | #Parse and process example 67 | 68 | parsed_example = parse_record(record_iterator.get_next()) 69 | if im_ind % stride != 0: 70 | continue 71 | 72 | fname = parsed_example['image/filename'].numpy() 73 | encoded_image = parsed_example['image/encoded'] 74 | image_np = tf.image.decode_image(encoded_image, channels=3).numpy() 75 | 76 | labels = tf.sparse_tensor_to_dense(parsed_example['image/object/class/label'], default_value=0).numpy() 77 | x1norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/xmin'], default_value=0).numpy() 78 | x2norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/xmax'], default_value=0).numpy() 79 | y1norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/ymin'], default_value=0).numpy() 80 | y2norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/ymax'], default_value=0).numpy() 81 | 82 | num_bboxes = len(labels) 83 | 84 | #% Process and display image 85 | height, width = image_np[:, :, 1].shape 86 | image_copy = image_np.copy() 87 | image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB) 88 | 89 | if num_bboxes > 0: 90 | x1 = np.int64(x1norm*width) 91 | x2 = np.int64(x2norm*width) 92 | y1 = np.int64(y1norm*height) 93 | y2 = np.int64(y2norm*height) 94 | for bbox_ind in range(num_bboxes): 95 | bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind]) 96 | label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])] 97 | label_position = (bbox[0] + 5, bbox[1] + 20) 98 | cv_bbox(image_rgb, bbox, color = (250, 250, 150), line_width = 2) 99 | cv2.putText(image_rgb, 100 | label_name, 101 | label_position, 102 | cv2.FONT_HERSHEY_SIMPLEX, 103 | 1, (10, 10, 255), 2); #scale, color, thickness 104 | 105 | if verbose: 106 | print(f"\nImage {im_ind}") 107 | print(f" {fname}") 108 | print(f" Height/width: {height, width}") 109 | print(f" Num bboxes: {num_bboxes}") 110 | cv2.imshow("bb data", image_rgb) 111 | k = cv2.waitKey() 112 | if k == 27: 113 | break 114 | elif k == ord('n'): 115 | continue 116 | cv2.destroyAllWindows() 117 | if verbose: 118 | print("\n\ntfrecord-view: done going throug the data.") 119 | 120 | 121 | #%% 122 | if __name__ == '__main__': 123 | class_labels = {"dog" : 1, "cat": 2 } 124 | #Make the following using voc_to_tfr.py 125 | data_path = r"annotated_images/cats_dogs.record" 126 | 127 | verbose = 1 128 | stride = 1 129 | view_records(data_path, class_labels, stride = stride, verbose = verbose) 130 | -------------------------------------------------------------------------------- /view_records_tf2.py: -------------------------------------------------------------------------------- 1 | """ 2 | The tf2 version is based in https://github.com/jschw/tfrecord-view/blob/master/tfrecord_view_gui.py 3 | 4 | view_records.py: 5 | Consume and display data from a tfrecord file: pulls image and bounding boxes for display 6 | so you can make sure things look reasonabloe, e.g., after augmentation. 7 | 8 | Hit 'n' for 'next' image, or 'esc' to quit. 9 | 10 | Part of tensorflow-view repo: https://github.com/EricThomson/tfrecord-view 11 | 12 | """ 13 | 14 | import cv2 15 | import numpy as np 16 | import tensorflow as tf 17 | import warnings 18 | warnings.filterwarnings('ignore', category = FutureWarning) #tf 1.14 and np 1.17 are clashing: temporary solution 19 | 20 | def cv_bbox(image, bbox, color = (255, 255, 255), line_width = 2): 21 | """ 22 | use opencv to add bbox to an image 23 | assumes bbox is in standard form x1 y1 x2 y2 24 | """ 25 | 26 | cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line_width) 27 | return 28 | 29 | 30 | def parse_record(data_record): 31 | """ 32 | parse the data record from a tfrecord file, typically pulled from an iterator, 33 | in this case a one_shot_iterator created from the dataset. 34 | """ 35 | feature = {'image/encoded': tf.io.FixedLenFeature([], tf.string), 36 | 'image/object/class/label': tf.io.VarLenFeature(tf.int64), 37 | 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), 38 | 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32), 39 | 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32), 40 | 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32), 41 | 'image/filename': tf.io.FixedLenFeature([], tf.string) 42 | } 43 | return tf.io.parse_single_example(data_record, feature) 44 | 45 | 46 | def view_records(file_path, class_labels, stride = 1, verbose = 1): 47 | """ 48 | peek at the data using opencv and tensorflow tools. 49 | Inputs: 50 | file_path: path to tfrecord file (usually has 'record' extension) 51 | class_labels: dictionary of labels with name:number pairs (start with 1) 52 | stride (default 1): how many records to jump (you might have thousands so skip a few) 53 | verbose (default 1): display text output if 1, display nothing except images otherwise. 54 | 55 | Usage: 56 | Within the image window, enter 'n' for next image, 'esc' to stop seeing images. 57 | """ 58 | dataset = tf.data.TFRecordDataset([file_path]) 59 | record_iterator = iter(dataset) 60 | num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy() 61 | 62 | if verbose: 63 | print(f"\nGoing through {num_records} records with a stride of {stride}.") 64 | print("Enter 'n' to bring up next image in record.\n") 65 | for im_ind in range(num_records): 66 | 67 | #Parse and process example 68 | 69 | parsed_example = parse_record(record_iterator.get_next()) 70 | if im_ind % stride != 0: 71 | continue 72 | 73 | fname = parsed_example['image/filename'].numpy() 74 | encoded_image = parsed_example['image/encoded'] 75 | image_np = tf.image.decode_image(encoded_image, channels=3).numpy() 76 | 77 | labels = tf.sparse.to_dense(parsed_example['image/object/class/label'], default_value=0).numpy() 78 | x1norm = tf.sparse.to_dense( parsed_example['image/object/bbox/xmin'], default_value=0).numpy() 79 | x2norm = tf.sparse.to_dense( parsed_example['image/object/bbox/xmax'], default_value=0).numpy() 80 | y1norm = tf.sparse.to_dense( parsed_example['image/object/bbox/ymin'], default_value=0).numpy() 81 | y2norm = tf.sparse.to_dense( parsed_example['image/object/bbox/ymax'], default_value=0).numpy() 82 | 83 | num_bboxes = len(labels) 84 | 85 | #% Process and display image 86 | height, width = image_np[:, :, 1].shape 87 | image_copy = image_np.copy() 88 | image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB) 89 | 90 | if num_bboxes > 0: 91 | x1 = np.int64(x1norm*width) 92 | x2 = np.int64(x2norm*width) 93 | y1 = np.int64(y1norm*height) 94 | y2 = np.int64(y2norm*height) 95 | for bbox_ind in range(num_bboxes): 96 | bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind]) 97 | label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])] 98 | label_position = (bbox[0] + 5, bbox[1] + 20) 99 | cv_bbox(image_rgb, bbox, color = (250, 250, 150), line_width = 2) 100 | cv2.putText(image_rgb, 101 | label_name, 102 | label_position, 103 | cv2.FONT_HERSHEY_SIMPLEX, 104 | 1, (10, 10, 255), 2); #scale, color, thickness 105 | 106 | if verbose: 107 | print(f"\nImage {im_ind}") 108 | print(f" {fname}") 109 | print(f" Height/width: {height, width}") 110 | print(f" Num bboxes: {num_bboxes}") 111 | cv2.imshow("bb data", image_rgb) 112 | k = cv2.waitKey() 113 | if k == 27: 114 | break 115 | elif k == ord('n'): 116 | continue 117 | cv2.destroyAllWindows() 118 | if verbose: 119 | print("\n\ntfrecord-view: done going throug the data.") 120 | 121 | 122 | #%% 123 | if __name__ == '__main__': 124 | class_labels = {"dog" : 1, "cat": 2 } 125 | #Make the following using voc_to_tfr.py 126 | data_path = r"annotated_images/cats_dogs.record" 127 | 128 | verbose = 1 129 | stride = 1 130 | view_records(data_path, class_labels, stride = stride, verbose = verbose) 131 | -------------------------------------------------------------------------------- /voc_to_tfr.py: -------------------------------------------------------------------------------- 1 | """ 2 | voc_to_tfr.py 3 | Create a TFRecord file from images and Pascal VOC encoded annotation xmls. 4 | 5 | Part of tfrecord-view repo: https://github.com/EricThomson/tfrecord-view 6 | """ 7 | import numpy as np 8 | import tensorflow as tf 9 | import os 10 | import glob 11 | from lxml import etree 12 | 13 | 14 | def create_tf_example(data, 15 | image_path, 16 | label_map_dict, 17 | ignore_difficult_instances=False, 18 | verbose = 1): 19 | """ 20 | Convert image/xml-derived annotation dict to tensorflow example file to be 21 | incorporated into a TFRecord. Adapted from: 22 | https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py 23 | 24 | Notice that this function normalizes the bounding box coordinates provided 25 | by the raw data, so they are between [0, 1]. 26 | 27 | Inputs: 28 | data: dict holding PASCAL XML fields for a single image (obtained by 29 | running recursive_parse_xml_to_dict) 30 | image_path: Path to image 31 | label_map_dict: A map from string label names to integers ids. 32 | ignore_difficult_instances: Whether to skip difficult instances in the 33 | dataset (default: False). 34 | verbose (default 1): 1 to show image info during encoding, 0 otherwise 35 | 36 | Returns: 37 | example: The converted tf.Example. 38 | 39 | """ 40 | with tf.gfile.GFile(image_path, 'rb') as fid: 41 | encoded_image = fid.read() 42 | 43 | if verbose: print(f"Encoding {image_path}") 44 | # For some reason after processing xml, it frequently returns width/height switched! 45 | width = int(data['size']['width']) 46 | height = int(data['size']['height']) 47 | 48 | #If no data['object'] there are no bounding boxes 49 | if 'object' in data: 50 | annotation_list = data['object'] 51 | xmin = [] 52 | ymin = [] 53 | xmax = [] 54 | ymax = [] 55 | classes = [] 56 | classes_text = [] 57 | difficult_obj = [] 58 | 59 | 60 | for annotation in annotation_list: 61 | difficult = bool(int(annotation['difficult'])) 62 | if ignore_difficult_instances and difficult: 63 | continue 64 | 65 | difficult_obj.append(int(difficult)) 66 | 67 | x1 = annotation['bndbox']['xmin'] 68 | y1 = annotation['bndbox']['ymin'] 69 | x2 = annotation['bndbox']['xmax'] 70 | y2 = annotation['bndbox']['ymax'] 71 | xmin.append(float(x1) / width) 72 | xmax.append(float(x2) / width) 73 | ymin.append(float(y1) / height) 74 | ymax.append(float(y2) / height) 75 | classes_text.append(annotation['name'].encode('utf8')) 76 | classes.append(label_map_dict[annotation['name']]) 77 | 78 | obj_features = { 79 | 'image/height': int64_feature(height), 80 | 'image/width': int64_feature(width), 81 | 'image/filename': bytes_feature(data['filename'].encode('utf8')), 82 | 'image/encoded': bytes_feature(encoded_image), 83 | 'image/object/bbox/xmin': float_list_feature(xmin), 84 | 'image/object/bbox/xmax': float_list_feature(xmax), 85 | 'image/object/bbox/ymin': float_list_feature(ymin), 86 | 'image/object/bbox/ymax': float_list_feature(ymax), 87 | 'image/object/class/text': bytes_list_feature(classes_text), 88 | 'image/object/class/label': int64_list_feature(classes), 89 | 'image/annotated': int64_feature(0) 90 | } 91 | 92 | tf_features = tf.train.Features(feature = obj_features) 93 | tf_example = tf.train.Example(features = tf_features) 94 | 95 | 96 | else: 97 | if verbose: print("No annotations for this one") 98 | obj_features = { 99 | 'image/height': int64_feature(height), 100 | 'image/width': int64_feature(width), 101 | 'image/filename': bytes_feature(data['filename'].encode('utf8')), 102 | 'image/encoded': bytes_feature(encoded_image), 103 | 'image/annotated': int64_feature(0) 104 | } 105 | 106 | tf_features = tf.train.Features(feature = obj_features) 107 | tf_example = tf.train.Example(features = tf_features) 108 | return tf_example 109 | 110 | 111 | #Following feature encoders are from models/research/object_detection/dataset_util.py 112 | def int64_feature(value): 113 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 114 | 115 | 116 | def int64_list_feature(value): 117 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 118 | 119 | 120 | def bytes_feature(value): 121 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 122 | 123 | 124 | def bytes_list_feature(value): 125 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 126 | 127 | 128 | def float_list_feature(value): 129 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 130 | 131 | #Following is from models/research/object_detection/dataset_util.py 132 | def recursive_parse_xml_to_dict(xml): 133 | """Recursively parses XML contents to python dict. 134 | 135 | We assume that `object` tags are the only ones that can appear 136 | multiple times at the same level of a tree. 137 | 138 | Args: 139 | xml: xml tree obtained by parsing XML file contents using lxml.etree 140 | 141 | Returns: 142 | Python dictionary holding XML contents. 143 | """ 144 | if not xml: 145 | return {xml.tag: xml.text} 146 | result = {} 147 | for child in xml: 148 | child_result = recursive_parse_xml_to_dict(child) 149 | if child.tag != 'object': 150 | result[child.tag] = child_result[child.tag] 151 | else: 152 | if child.tag not in result: 153 | result[child.tag] = [] 154 | result[child.tag].append(child_result[child.tag]) 155 | return {xml.tag: result} 156 | 157 | #%% 158 | if __name__ == '__main__': 159 | # Repo 160 | class_labels = {"dog" : 1, "cat": 2 } 161 | data_path = r"annotated_images/" 162 | output_path = data_path + r'cats_dogs.record' 163 | 164 | 165 | verbose = 1 166 | filename_query = os.path.join(data_path, '*.png') #can change to any format (bmp, png etc) 167 | image_paths = np.sort(glob.glob(filename_query)) 168 | 169 | writer = tf.python_io.TFRecordWriter(output_path) 170 | for idx, image_path in enumerate(image_paths): 171 | xml_path = os.path.splitext(image_path)[0] + '.xml' 172 | 173 | with tf.gfile.GFile(xml_path, 'rb') as fid: 174 | xml_str = fid.read() 175 | 176 | xml = etree.fromstring(xml_str) 177 | xml_data = recursive_parse_xml_to_dict(xml)['annotation'] 178 | tf_example = create_tf_example(xml_data, image_path, class_labels, verbose = verbose) 179 | writer.write(tf_example.SerializeToString()) 180 | 181 | writer.close() 182 | print("Done encoding data TFRecord file") 183 | --------------------------------------------------------------------------------