├── .gitignore
├── annotated_images
├── cats_dogs.record
├── cd00.png
├── cd00.xml
├── cd01.png
├── cd01.xml
├── cd02.png
├── cd02.xml
├── cd03.png
├── cd03.xml
├── cd04.png
├── cd04.xml
├── cd05.png
├── cd05.xml
├── cd06.png
└── cd06.xml
├── readme.md
├── view_records.py
├── view_records_tf2.py
└── voc_to_tfr.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *~*
2 | /working
3 | *todo*
4 | *cache*
5 |
--------------------------------------------------------------------------------
/annotated_images/cats_dogs.record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cats_dogs.record
--------------------------------------------------------------------------------
/annotated_images/cd00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd00.png
--------------------------------------------------------------------------------
/annotated_images/cd00.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd00.png
4 | /home/eric/Pictures/cats_dogs/cd00.png
5 |
6 | Unknown
7 |
8 |
9 | 384
10 | 270
11 | 3
12 |
13 | 0
14 |
26 |
38 |
39 |
--------------------------------------------------------------------------------
/annotated_images/cd01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd01.png
--------------------------------------------------------------------------------
/annotated_images/cd01.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd01.png
4 | /home/eric/Pictures/cats_dogs/cd01.png
5 |
6 | Unknown
7 |
8 |
9 | 525
10 | 350
11 | 3
12 |
13 | 0
14 |
26 |
38 |
39 |
--------------------------------------------------------------------------------
/annotated_images/cd02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd02.png
--------------------------------------------------------------------------------
/annotated_images/cd02.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd02.png
4 | /home/eric/Pictures/cats_dogs/cd02.png
5 |
6 | Unknown
7 |
8 |
9 | 300
10 | 169
11 | 3
12 |
13 | 0
14 |
26 |
38 |
39 |
--------------------------------------------------------------------------------
/annotated_images/cd03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd03.png
--------------------------------------------------------------------------------
/annotated_images/cd03.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd03.png
4 | /home/eric/Pictures/cats_dogs/cd03.png
5 |
6 | Unknown
7 |
8 |
9 | 840
10 | 553
11 | 3
12 |
13 | 0
14 |
26 |
38 |
39 |
--------------------------------------------------------------------------------
/annotated_images/cd04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd04.png
--------------------------------------------------------------------------------
/annotated_images/cd04.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd04.png
4 | /home/eric/Pictures/cats_dogs/cd04.png
5 |
6 | Unknown
7 |
8 |
9 | 850
10 | 477
11 | 3
12 |
13 | 0
14 |
26 |
38 |
39 |
--------------------------------------------------------------------------------
/annotated_images/cd05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd05.png
--------------------------------------------------------------------------------
/annotated_images/cd05.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd05.png
4 | /home/eric/Pictures/cats_dogs/cd05.png
5 |
6 | Unknown
7 |
8 |
9 | 600
10 | 426
11 | 3
12 |
13 | 0
14 |
26 |
38 |
50 |
51 |
--------------------------------------------------------------------------------
/annotated_images/cd06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd06.png
--------------------------------------------------------------------------------
/annotated_images/cd06.xml:
--------------------------------------------------------------------------------
1 |
2 | cats_dogs
3 | cd06.png
4 | /home/eric/Pictures/cats_dogs/cd06.png
5 |
6 | Unknown
7 |
8 |
9 | 636
10 | 522
11 | 3
12 |
13 | 0
14 |
15 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # tfrecord-view
2 | How to consume data from TFRecord files, which are used in the Tensorflow [object detection api](https://github.com/tensorflow/models/tree/master/research/object_detection). I use it to double ensure that my augmentation pipeline and TFRecord encoding worked properly.
3 |
4 | Currently tested in Linux. Not sure about behavior in Windows.
5 |
6 | ## Usage
7 | ### Creating a TFRecord file
8 | If you need to create a TFRecord file, see `voc_to_tfr.py`. The images and annotation files are in `annotated_images/`.
9 |
10 | ### Consuming a TFRecord file
11 | If you already have TFRecord file data, then use `view_records.py` or `view_records_tf2.py` to see how to consume it and show data. The function takes in the path to the TFRecord file, the dictionary of class labels, and a couple of optional keyword arguments like stride. It will then show the images with bounding boxes and labels for each object, if applicable.
12 |
13 | ### Installation
14 | Prereqs: tensorflow, opencv, and numpy.
15 |
16 | git clone XXX
17 | conda create --name tfrecord-view
18 | conda activate --name tfrecord-view
19 | conda install python=3 opencv numpy
20 | conda install -c anaconda tensorflow-gpu>=1.12
21 |
22 | This also assumes you have installed the object detection api (https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md), as we will use some utilities that come with it (in particular `utils.dataset_util`).
23 |
24 | ## For more info
25 | Construction of TFRecord files:
26 | - https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
27 | - https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py
28 |
29 | On consuming TFRrecord files, there aren't a lot of great resources out there. I used this (and probably 20 other sites I can't even remember: this is one aspect of the api that keeps changing, and will surely change again once it is ported to Tensorflow 2):
30 | - https://stackoverflow.com/a/56932321/1886357
31 |
32 | I recently found this repo which is similar to this one, and has some nice ideas:
33 | - https://github.com/yinguobing/tfrecord_utility
34 |
35 | ## To do
36 | - Functionalize encoder in voc_to_tfr.py.
37 | - Look over tfrecord_utility repo maybe he found a way to simplify reading data?
38 |
39 | #### Sources for images
40 | The images are of cats and dogs, with one that has no label. The images were scraped from:
41 | - https://huggablemuggs.com/8-tricks-to-help-your-cat-and-dog-to-get-along/
42 | - https://2catsandablog.wordpress.com/2018/08/14/do-cats-and-dogs-really-fight-like-cats-and-dogs/
43 | - http://www.waycooldogs.com/feeding-cats-dogs-together/
44 | - https://phz8.petinsurance.com/ownership-adoption/pet-ownership/pet-behavior/7-tips-on-combining-multi-pet-household
45 | - https://www.mercurynews.com/2019/04/15/whos-going-to-tell-mom-shes-feeding-her-dogs-the-wrong-food/
46 | - https://www.meowingtons.com/blogs/lolcats/snuggly-cat-and-dog-best-friends-to-cheer-you-up
47 | - https://www.thesprucepets.com/cute-aquarium-sea-critters-4146506
48 |
--------------------------------------------------------------------------------
/view_records.py:
--------------------------------------------------------------------------------
1 | """
2 | view_records.py:
3 | Consume and display data from a tfrecord file: pulls image and bounding boxes for display
4 | so you can make sure things look reasonabloe, e.g., after augmentation.
5 |
6 | Hit 'n' for 'next' image, or 'esc' to quit.
7 |
8 | Part of tensorflow-view repo: https://github.com/EricThomson/tfrecord-view
9 |
10 | """
11 |
12 | import cv2
13 | import numpy as np
14 | import tensorflow as tf
15 | tf.enable_eager_execution()
16 | import warnings
17 | warnings.filterwarnings('ignore', category = FutureWarning) #tf 1.14 and np 1.17 are clashing: temporary solution
18 |
19 | def cv_bbox(image, bbox, color = (255, 255, 255), line_width = 2):
20 | """
21 | use opencv to add bbox to an image
22 | assumes bbox is in standard form x1 y1 x2 y2
23 | """
24 |
25 | cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line_width)
26 | return
27 |
28 |
29 | def parse_record(data_record):
30 | """
31 | parse the data record from a tfrecord file, typically pulled from an iterator,
32 | in this case a one_shot_iterator created from the dataset.
33 | """
34 | feature = {'image/encoded': tf.FixedLenFeature([], tf.string),
35 | 'image/object/class/label': tf.VarLenFeature(tf.int64),
36 | 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
37 | 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
38 | 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
39 | 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
40 | 'image/filename': tf.FixedLenFeature([], tf.string)
41 | }
42 | return tf.parse_single_example(data_record, feature)
43 |
44 |
45 | def view_records(file_path, class_labels, stride = 1, verbose = 1):
46 | """
47 | peek at the data using opencv and tensorflow tools.
48 | Inputs:
49 | file_path: path to tfrecord file (usually has 'record' extension)
50 | class_labels: dictionary of labels with name:number pairs (start with 1)
51 | stride (default 1): how many records to jump (you might have thousands so skip a few)
52 | verbose (default 1): display text output if 1, display nothing except images otherwise.
53 |
54 | Usage:
55 | Within the image window, enter 'n' for next image, 'esc' to stop seeing images.
56 | """
57 | dataset = tf.data.TFRecordDataset([file_path])
58 | record_iterator = dataset.make_one_shot_iterator()
59 | num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy()
60 |
61 | if verbose:
62 | print(f"\nGoing through {num_records} records with a stride of {stride}.")
63 | print("Enter 'n' to bring up next image in record.\n")
64 | for im_ind in range(num_records):
65 |
66 | #Parse and process example
67 |
68 | parsed_example = parse_record(record_iterator.get_next())
69 | if im_ind % stride != 0:
70 | continue
71 |
72 | fname = parsed_example['image/filename'].numpy()
73 | encoded_image = parsed_example['image/encoded']
74 | image_np = tf.image.decode_image(encoded_image, channels=3).numpy()
75 |
76 | labels = tf.sparse_tensor_to_dense(parsed_example['image/object/class/label'], default_value=0).numpy()
77 | x1norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/xmin'], default_value=0).numpy()
78 | x2norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/xmax'], default_value=0).numpy()
79 | y1norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/ymin'], default_value=0).numpy()
80 | y2norm = tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/ymax'], default_value=0).numpy()
81 |
82 | num_bboxes = len(labels)
83 |
84 | #% Process and display image
85 | height, width = image_np[:, :, 1].shape
86 | image_copy = image_np.copy()
87 | image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
88 |
89 | if num_bboxes > 0:
90 | x1 = np.int64(x1norm*width)
91 | x2 = np.int64(x2norm*width)
92 | y1 = np.int64(y1norm*height)
93 | y2 = np.int64(y2norm*height)
94 | for bbox_ind in range(num_bboxes):
95 | bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind])
96 | label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])]
97 | label_position = (bbox[0] + 5, bbox[1] + 20)
98 | cv_bbox(image_rgb, bbox, color = (250, 250, 150), line_width = 2)
99 | cv2.putText(image_rgb,
100 | label_name,
101 | label_position,
102 | cv2.FONT_HERSHEY_SIMPLEX,
103 | 1, (10, 10, 255), 2); #scale, color, thickness
104 |
105 | if verbose:
106 | print(f"\nImage {im_ind}")
107 | print(f" {fname}")
108 | print(f" Height/width: {height, width}")
109 | print(f" Num bboxes: {num_bboxes}")
110 | cv2.imshow("bb data", image_rgb)
111 | k = cv2.waitKey()
112 | if k == 27:
113 | break
114 | elif k == ord('n'):
115 | continue
116 | cv2.destroyAllWindows()
117 | if verbose:
118 | print("\n\ntfrecord-view: done going throug the data.")
119 |
120 |
121 | #%%
122 | if __name__ == '__main__':
123 | class_labels = {"dog" : 1, "cat": 2 }
124 | #Make the following using voc_to_tfr.py
125 | data_path = r"annotated_images/cats_dogs.record"
126 |
127 | verbose = 1
128 | stride = 1
129 | view_records(data_path, class_labels, stride = stride, verbose = verbose)
130 |
--------------------------------------------------------------------------------
/view_records_tf2.py:
--------------------------------------------------------------------------------
1 | """
2 | The tf2 version is based in https://github.com/jschw/tfrecord-view/blob/master/tfrecord_view_gui.py
3 |
4 | view_records.py:
5 | Consume and display data from a tfrecord file: pulls image and bounding boxes for display
6 | so you can make sure things look reasonabloe, e.g., after augmentation.
7 |
8 | Hit 'n' for 'next' image, or 'esc' to quit.
9 |
10 | Part of tensorflow-view repo: https://github.com/EricThomson/tfrecord-view
11 |
12 | """
13 |
14 | import cv2
15 | import numpy as np
16 | import tensorflow as tf
17 | import warnings
18 | warnings.filterwarnings('ignore', category = FutureWarning) #tf 1.14 and np 1.17 are clashing: temporary solution
19 |
20 | def cv_bbox(image, bbox, color = (255, 255, 255), line_width = 2):
21 | """
22 | use opencv to add bbox to an image
23 | assumes bbox is in standard form x1 y1 x2 y2
24 | """
25 |
26 | cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line_width)
27 | return
28 |
29 |
30 | def parse_record(data_record):
31 | """
32 | parse the data record from a tfrecord file, typically pulled from an iterator,
33 | in this case a one_shot_iterator created from the dataset.
34 | """
35 | feature = {'image/encoded': tf.io.FixedLenFeature([], tf.string),
36 | 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
37 | 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
38 | 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
39 | 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
40 | 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
41 | 'image/filename': tf.io.FixedLenFeature([], tf.string)
42 | }
43 | return tf.io.parse_single_example(data_record, feature)
44 |
45 |
46 | def view_records(file_path, class_labels, stride = 1, verbose = 1):
47 | """
48 | peek at the data using opencv and tensorflow tools.
49 | Inputs:
50 | file_path: path to tfrecord file (usually has 'record' extension)
51 | class_labels: dictionary of labels with name:number pairs (start with 1)
52 | stride (default 1): how many records to jump (you might have thousands so skip a few)
53 | verbose (default 1): display text output if 1, display nothing except images otherwise.
54 |
55 | Usage:
56 | Within the image window, enter 'n' for next image, 'esc' to stop seeing images.
57 | """
58 | dataset = tf.data.TFRecordDataset([file_path])
59 | record_iterator = iter(dataset)
60 | num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy()
61 |
62 | if verbose:
63 | print(f"\nGoing through {num_records} records with a stride of {stride}.")
64 | print("Enter 'n' to bring up next image in record.\n")
65 | for im_ind in range(num_records):
66 |
67 | #Parse and process example
68 |
69 | parsed_example = parse_record(record_iterator.get_next())
70 | if im_ind % stride != 0:
71 | continue
72 |
73 | fname = parsed_example['image/filename'].numpy()
74 | encoded_image = parsed_example['image/encoded']
75 | image_np = tf.image.decode_image(encoded_image, channels=3).numpy()
76 |
77 | labels = tf.sparse.to_dense(parsed_example['image/object/class/label'], default_value=0).numpy()
78 | x1norm = tf.sparse.to_dense( parsed_example['image/object/bbox/xmin'], default_value=0).numpy()
79 | x2norm = tf.sparse.to_dense( parsed_example['image/object/bbox/xmax'], default_value=0).numpy()
80 | y1norm = tf.sparse.to_dense( parsed_example['image/object/bbox/ymin'], default_value=0).numpy()
81 | y2norm = tf.sparse.to_dense( parsed_example['image/object/bbox/ymax'], default_value=0).numpy()
82 |
83 | num_bboxes = len(labels)
84 |
85 | #% Process and display image
86 | height, width = image_np[:, :, 1].shape
87 | image_copy = image_np.copy()
88 | image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
89 |
90 | if num_bboxes > 0:
91 | x1 = np.int64(x1norm*width)
92 | x2 = np.int64(x2norm*width)
93 | y1 = np.int64(y1norm*height)
94 | y2 = np.int64(y2norm*height)
95 | for bbox_ind in range(num_bboxes):
96 | bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind])
97 | label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])]
98 | label_position = (bbox[0] + 5, bbox[1] + 20)
99 | cv_bbox(image_rgb, bbox, color = (250, 250, 150), line_width = 2)
100 | cv2.putText(image_rgb,
101 | label_name,
102 | label_position,
103 | cv2.FONT_HERSHEY_SIMPLEX,
104 | 1, (10, 10, 255), 2); #scale, color, thickness
105 |
106 | if verbose:
107 | print(f"\nImage {im_ind}")
108 | print(f" {fname}")
109 | print(f" Height/width: {height, width}")
110 | print(f" Num bboxes: {num_bboxes}")
111 | cv2.imshow("bb data", image_rgb)
112 | k = cv2.waitKey()
113 | if k == 27:
114 | break
115 | elif k == ord('n'):
116 | continue
117 | cv2.destroyAllWindows()
118 | if verbose:
119 | print("\n\ntfrecord-view: done going throug the data.")
120 |
121 |
122 | #%%
123 | if __name__ == '__main__':
124 | class_labels = {"dog" : 1, "cat": 2 }
125 | #Make the following using voc_to_tfr.py
126 | data_path = r"annotated_images/cats_dogs.record"
127 |
128 | verbose = 1
129 | stride = 1
130 | view_records(data_path, class_labels, stride = stride, verbose = verbose)
131 |
--------------------------------------------------------------------------------
/voc_to_tfr.py:
--------------------------------------------------------------------------------
1 | """
2 | voc_to_tfr.py
3 | Create a TFRecord file from images and Pascal VOC encoded annotation xmls.
4 |
5 | Part of tfrecord-view repo: https://github.com/EricThomson/tfrecord-view
6 | """
7 | import numpy as np
8 | import tensorflow as tf
9 | import os
10 | import glob
11 | from lxml import etree
12 |
13 |
14 | def create_tf_example(data,
15 | image_path,
16 | label_map_dict,
17 | ignore_difficult_instances=False,
18 | verbose = 1):
19 | """
20 | Convert image/xml-derived annotation dict to tensorflow example file to be
21 | incorporated into a TFRecord. Adapted from:
22 | https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py
23 |
24 | Notice that this function normalizes the bounding box coordinates provided
25 | by the raw data, so they are between [0, 1].
26 |
27 | Inputs:
28 | data: dict holding PASCAL XML fields for a single image (obtained by
29 | running recursive_parse_xml_to_dict)
30 | image_path: Path to image
31 | label_map_dict: A map from string label names to integers ids.
32 | ignore_difficult_instances: Whether to skip difficult instances in the
33 | dataset (default: False).
34 | verbose (default 1): 1 to show image info during encoding, 0 otherwise
35 |
36 | Returns:
37 | example: The converted tf.Example.
38 |
39 | """
40 | with tf.gfile.GFile(image_path, 'rb') as fid:
41 | encoded_image = fid.read()
42 |
43 | if verbose: print(f"Encoding {image_path}")
44 | # For some reason after processing xml, it frequently returns width/height switched!
45 | width = int(data['size']['width'])
46 | height = int(data['size']['height'])
47 |
48 | #If no data['object'] there are no bounding boxes
49 | if 'object' in data:
50 | annotation_list = data['object']
51 | xmin = []
52 | ymin = []
53 | xmax = []
54 | ymax = []
55 | classes = []
56 | classes_text = []
57 | difficult_obj = []
58 |
59 |
60 | for annotation in annotation_list:
61 | difficult = bool(int(annotation['difficult']))
62 | if ignore_difficult_instances and difficult:
63 | continue
64 |
65 | difficult_obj.append(int(difficult))
66 |
67 | x1 = annotation['bndbox']['xmin']
68 | y1 = annotation['bndbox']['ymin']
69 | x2 = annotation['bndbox']['xmax']
70 | y2 = annotation['bndbox']['ymax']
71 | xmin.append(float(x1) / width)
72 | xmax.append(float(x2) / width)
73 | ymin.append(float(y1) / height)
74 | ymax.append(float(y2) / height)
75 | classes_text.append(annotation['name'].encode('utf8'))
76 | classes.append(label_map_dict[annotation['name']])
77 |
78 | obj_features = {
79 | 'image/height': int64_feature(height),
80 | 'image/width': int64_feature(width),
81 | 'image/filename': bytes_feature(data['filename'].encode('utf8')),
82 | 'image/encoded': bytes_feature(encoded_image),
83 | 'image/object/bbox/xmin': float_list_feature(xmin),
84 | 'image/object/bbox/xmax': float_list_feature(xmax),
85 | 'image/object/bbox/ymin': float_list_feature(ymin),
86 | 'image/object/bbox/ymax': float_list_feature(ymax),
87 | 'image/object/class/text': bytes_list_feature(classes_text),
88 | 'image/object/class/label': int64_list_feature(classes),
89 | 'image/annotated': int64_feature(0)
90 | }
91 |
92 | tf_features = tf.train.Features(feature = obj_features)
93 | tf_example = tf.train.Example(features = tf_features)
94 |
95 |
96 | else:
97 | if verbose: print("No annotations for this one")
98 | obj_features = {
99 | 'image/height': int64_feature(height),
100 | 'image/width': int64_feature(width),
101 | 'image/filename': bytes_feature(data['filename'].encode('utf8')),
102 | 'image/encoded': bytes_feature(encoded_image),
103 | 'image/annotated': int64_feature(0)
104 | }
105 |
106 | tf_features = tf.train.Features(feature = obj_features)
107 | tf_example = tf.train.Example(features = tf_features)
108 | return tf_example
109 |
110 |
111 | #Following feature encoders are from models/research/object_detection/dataset_util.py
112 | def int64_feature(value):
113 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
114 |
115 |
116 | def int64_list_feature(value):
117 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
118 |
119 |
120 | def bytes_feature(value):
121 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
122 |
123 |
124 | def bytes_list_feature(value):
125 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
126 |
127 |
128 | def float_list_feature(value):
129 | return tf.train.Feature(float_list=tf.train.FloatList(value=value))
130 |
131 | #Following is from models/research/object_detection/dataset_util.py
132 | def recursive_parse_xml_to_dict(xml):
133 | """Recursively parses XML contents to python dict.
134 |
135 | We assume that `object` tags are the only ones that can appear
136 | multiple times at the same level of a tree.
137 |
138 | Args:
139 | xml: xml tree obtained by parsing XML file contents using lxml.etree
140 |
141 | Returns:
142 | Python dictionary holding XML contents.
143 | """
144 | if not xml:
145 | return {xml.tag: xml.text}
146 | result = {}
147 | for child in xml:
148 | child_result = recursive_parse_xml_to_dict(child)
149 | if child.tag != 'object':
150 | result[child.tag] = child_result[child.tag]
151 | else:
152 | if child.tag not in result:
153 | result[child.tag] = []
154 | result[child.tag].append(child_result[child.tag])
155 | return {xml.tag: result}
156 |
157 | #%%
158 | if __name__ == '__main__':
159 | # Repo
160 | class_labels = {"dog" : 1, "cat": 2 }
161 | data_path = r"annotated_images/"
162 | output_path = data_path + r'cats_dogs.record'
163 |
164 |
165 | verbose = 1
166 | filename_query = os.path.join(data_path, '*.png') #can change to any format (bmp, png etc)
167 | image_paths = np.sort(glob.glob(filename_query))
168 |
169 | writer = tf.python_io.TFRecordWriter(output_path)
170 | for idx, image_path in enumerate(image_paths):
171 | xml_path = os.path.splitext(image_path)[0] + '.xml'
172 |
173 | with tf.gfile.GFile(xml_path, 'rb') as fid:
174 | xml_str = fid.read()
175 |
176 | xml = etree.fromstring(xml_str)
177 | xml_data = recursive_parse_xml_to_dict(xml)['annotation']
178 | tf_example = create_tf_example(xml_data, image_path, class_labels, verbose = verbose)
179 | writer.write(tf_example.SerializeToString())
180 |
181 | writer.close()
182 | print("Done encoding data TFRecord file")
183 |
--------------------------------------------------------------------------------