├── .gitignore
├── annotated_images
    ├── cats_dogs.record
    ├── cd00.png
    ├── cd00.xml
    ├── cd01.png
    ├── cd01.xml
    ├── cd02.png
    ├── cd02.xml
    ├── cd03.png
    ├── cd03.xml
    ├── cd04.png
    ├── cd04.xml
    ├── cd05.png
    ├── cd05.xml
    ├── cd06.png
    └── cd06.xml
├── readme.md
├── view_records.py
├── view_records_tf2.py
└── voc_to_tfr.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~*
2 | /working
3 | *todo*
4 | *cache*
5 | 


--------------------------------------------------------------------------------
/annotated_images/cats_dogs.record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cats_dogs.record


--------------------------------------------------------------------------------
/annotated_images/cd00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd00.png


--------------------------------------------------------------------------------
/annotated_images/cd00.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd00.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd00.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>384</width>
10 | 		<height>270</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>cat</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>127</xmin>
21 | 			<ymin>111</ymin>
22 | 			<xmax>236</xmax>
23 | 			<ymax>194</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | 	<object>
27 | 		<name>dog</name>
28 | 		<pose>Unspecified</pose>
29 | 		<truncated>0</truncated>
30 | 		<difficult>0</difficult>
31 | 		<bndbox>
32 | 			<xmin>195</xmin>
33 | 			<ymin>26</ymin>
34 | 			<xmax>347</xmax>
35 | 			<ymax>163</ymax>
36 | 		</bndbox>
37 | 	</object>
38 | </annotation>
39 | 


--------------------------------------------------------------------------------
/annotated_images/cd01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd01.png


--------------------------------------------------------------------------------
/annotated_images/cd01.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd01.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd01.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>525</width>
10 | 		<height>350</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>dog</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>95</xmin>
21 | 			<ymin>67</ymin>
22 | 			<xmax>229</xmax>
23 | 			<ymax>228</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | 	<object>
27 | 		<name>cat</name>
28 | 		<pose>Unspecified</pose>
29 | 		<truncated>0</truncated>
30 | 		<difficult>0</difficult>
31 | 		<bndbox>
32 | 			<xmin>258</xmin>
33 | 			<ymin>160</ymin>
34 | 			<xmax>335</xmax>
35 | 			<ymax>245</ymax>
36 | 		</bndbox>
37 | 	</object>
38 | </annotation>
39 | 


--------------------------------------------------------------------------------
/annotated_images/cd02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd02.png


--------------------------------------------------------------------------------
/annotated_images/cd02.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd02.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd02.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>300</width>
10 | 		<height>169</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>dog</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>77</xmin>
21 | 			<ymin>29</ymin>
22 | 			<xmax>155</xmax>
23 | 			<ymax>88</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | 	<object>
27 | 		<name>cat</name>
28 | 		<pose>Unspecified</pose>
29 | 		<truncated>0</truncated>
30 | 		<difficult>0</difficult>
31 | 		<bndbox>
32 | 			<xmin>159</xmin>
33 | 			<ymin>81</ymin>
34 | 			<xmax>203</xmax>
35 | 			<ymax>124</ymax>
36 | 		</bndbox>
37 | 	</object>
38 | </annotation>
39 | 


--------------------------------------------------------------------------------
/annotated_images/cd03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd03.png


--------------------------------------------------------------------------------
/annotated_images/cd03.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd03.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd03.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>840</width>
10 | 		<height>553</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>cat</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>405</xmin>
21 | 			<ymin>48</ymin>
22 | 			<xmax>731</xmax>
23 | 			<ymax>324</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | 	<object>
27 | 		<name>dog</name>
28 | 		<pose>Unspecified</pose>
29 | 		<truncated>0</truncated>
30 | 		<difficult>0</difficult>
31 | 		<bndbox>
32 | 			<xmin>99</xmin>
33 | 			<ymin>39</ymin>
34 | 			<xmax>559</xmax>
35 | 			<ymax>513</ymax>
36 | 		</bndbox>
37 | 	</object>
38 | </annotation>
39 | 


--------------------------------------------------------------------------------
/annotated_images/cd04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd04.png


--------------------------------------------------------------------------------
/annotated_images/cd04.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd04.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd04.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>850</width>
10 | 		<height>477</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>cat</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>392</xmin>
21 | 			<ymin>7</ymin>
22 | 			<xmax>642</xmax>
23 | 			<ymax>239</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | 	<object>
27 | 		<name>dog</name>
28 | 		<pose>Unspecified</pose>
29 | 		<truncated>0</truncated>
30 | 		<difficult>0</difficult>
31 | 		<bndbox>
32 | 			<xmin>161</xmin>
33 | 			<ymin>50</ymin>
34 | 			<xmax>657</xmax>
35 | 			<ymax>464</ymax>
36 | 		</bndbox>
37 | 	</object>
38 | </annotation>
39 | 


--------------------------------------------------------------------------------
/annotated_images/cd05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd05.png


--------------------------------------------------------------------------------
/annotated_images/cd05.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd05.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd05.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>600</width>
10 | 		<height>426</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | 	<object>
15 | 		<name>dog</name>
16 | 		<pose>Unspecified</pose>
17 | 		<truncated>0</truncated>
18 | 		<difficult>0</difficult>
19 | 		<bndbox>
20 | 			<xmin>82</xmin>
21 | 			<ymin>159</ymin>
22 | 			<xmax>272</xmax>
23 | 			<ymax>332</ymax>
24 | 		</bndbox>
25 | 	</object>
26 | 	<object>
27 | 		<name>dog</name>
28 | 		<pose>Unspecified</pose>
29 | 		<truncated>0</truncated>
30 | 		<difficult>0</difficult>
31 | 		<bndbox>
32 | 			<xmin>195</xmin>
33 | 			<ymin>188</ymin>
34 | 			<xmax>402</xmax>
35 | 			<ymax>346</ymax>
36 | 		</bndbox>
37 | 	</object>
38 | 	<object>
39 | 		<name>cat</name>
40 | 		<pose>Unspecified</pose>
41 | 		<truncated>0</truncated>
42 | 		<difficult>0</difficult>
43 | 		<bndbox>
44 | 			<xmin>309</xmin>
45 | 			<ymin>230</ymin>
46 | 			<xmax>420</xmax>
47 | 			<ymax>319</ymax>
48 | 		</bndbox>
49 | 	</object>
50 | </annotation>
51 | 


--------------------------------------------------------------------------------
/annotated_images/cd06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricThomson/tfrecord-view/7dc77b9424cb98806beacdd767ebffe54182f424/annotated_images/cd06.png


--------------------------------------------------------------------------------
/annotated_images/cd06.xml:
--------------------------------------------------------------------------------
 1 | <annotation verified="yes">
 2 | 	<folder>cats_dogs</folder>
 3 | 	<filename>cd06.png</filename>
 4 | 	<path>/home/eric/Pictures/cats_dogs/cd06.png</path>
 5 | 	<source>
 6 | 		<database>Unknown</database>
 7 | 	</source>
 8 | 	<size>
 9 | 		<width>636</width>
10 | 		<height>522</height>
11 | 		<depth>3</depth>
12 | 	</size>
13 | 	<segmented>0</segmented>
14 | </annotation>
15 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # tfrecord-view
 2 | How to consume data from TFRecord files, which are used in the Tensorflow [object detection api](https://github.com/tensorflow/models/tree/master/research/object_detection). I use it to double ensure that my augmentation pipeline and TFRecord encoding worked properly.
 3 | 
 4 | Currently tested in Linux. Not sure about behavior in Windows.
 5 | 
 6 | ## Usage
 7 | ### Creating a TFRecord file
 8 | If you need to create a TFRecord file, see `voc_to_tfr.py`. The images and annotation files are in `annotated_images/`.
 9 | 
10 | ### Consuming a TFRecord file
11 | If you already have TFRecord file data, then use `view_records.py` or `view_records_tf2.py` to see how to consume it and show data. The function takes in the path to the TFRecord file, the dictionary of class labels, and a couple of optional keyword arguments like stride. It will then show the images with bounding boxes and labels for each object, if applicable.
12 | 
13 | ### Installation
14 | Prereqs: tensorflow, opencv, and numpy.
15 | 
16 |     git clone XXX
17 |     conda create --name tfrecord-view
18 |     conda activate --name tfrecord-view
19 |     conda install python=3 opencv numpy
20 |     conda install -c anaconda tensorflow-gpu>=1.12
21 | 
22 |  This also assumes you have installed the object detection api (https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md), as we will use some utilities that come with it (in particular `utils.dataset_util`).
23 | 
24 | ## For more info
25 | Construction of TFRecord files:
26 | - https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
27 | - https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py
28 | 
29 | On consuming TFRrecord files, there aren't a lot of great resources out there. I used this (and probably 20 other sites I can't even remember: this is one aspect of the api that keeps changing, and will surely change again once it is ported to Tensorflow 2):
30 | - https://stackoverflow.com/a/56932321/1886357
31 | 
32 | I recently found this repo which is similar to this one, and has some nice ideas:
33 | - https://github.com/yinguobing/tfrecord_utility
34 | 
35 | ## To do
36 | - Functionalize encoder in voc_to_tfr.py.
37 | - Look over tfrecord_utility repo maybe he found a way to simplify reading data?
38 | 
39 | #### Sources for images
40 | The images are of cats and dogs, with one that has no label. The images were scraped from:
41 | - https://huggablemuggs.com/8-tricks-to-help-your-cat-and-dog-to-get-along/
42 | - https://2catsandablog.wordpress.com/2018/08/14/do-cats-and-dogs-really-fight-like-cats-and-dogs/
43 | - http://www.waycooldogs.com/feeding-cats-dogs-together/
44 | - https://phz8.petinsurance.com/ownership-adoption/pet-ownership/pet-behavior/7-tips-on-combining-multi-pet-household
45 | - https://www.mercurynews.com/2019/04/15/whos-going-to-tell-mom-shes-feeding-her-dogs-the-wrong-food/
46 | - https://www.meowingtons.com/blogs/lolcats/snuggly-cat-and-dog-best-friends-to-cheer-you-up
47 | - https://www.thesprucepets.com/cute-aquarium-sea-critters-4146506
48 | 


--------------------------------------------------------------------------------
/view_records.py:
--------------------------------------------------------------------------------
  1 | """
  2 | view_records.py:
  3 | Consume and display data from a tfrecord file: pulls image and bounding boxes for display
  4 | so you can make sure things look reasonabloe, e.g., after augmentation.
  5 | 
  6 | Hit 'n' for 'next' image, or 'esc' to quit.
  7 | 
  8 | Part of tensorflow-view repo: https://github.com/EricThomson/tfrecord-view
  9 | 
 10 | """
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | import tensorflow as tf
 15 | tf.enable_eager_execution()
 16 | import warnings
 17 | warnings.filterwarnings('ignore', category = FutureWarning)  #tf 1.14 and np 1.17 are clashing: temporary solution
 18 | 
 19 | def cv_bbox(image, bbox, color = (255, 255, 255), line_width = 2):
 20 |     """
 21 |     use opencv to add bbox to an image
 22 |     assumes bbox is in standard form x1 y1 x2 y2
 23 |     """
 24 | 
 25 |     cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line_width)
 26 |     return
 27 | 
 28 | 
 29 | def parse_record(data_record):
 30 |     """
 31 |     parse the data record from a tfrecord file, typically pulled from an iterator,
 32 |     in this case a one_shot_iterator created from the dataset.
 33 |     """
 34 |     feature = {'image/encoded': tf.FixedLenFeature([], tf.string),
 35 |                'image/object/class/label': tf.VarLenFeature(tf.int64),
 36 |                'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
 37 |                'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
 38 |                'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
 39 |                'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
 40 |                'image/filename': tf.FixedLenFeature([], tf.string)
 41 |                }
 42 |     return tf.parse_single_example(data_record, feature)
 43 | 
 44 | 
 45 | def view_records(file_path, class_labels, stride = 1, verbose = 1):
 46 |     """
 47 |     peek at the data using opencv and tensorflow tools.
 48 |     Inputs:
 49 |         file_path: path to tfrecord file (usually has 'record' extension)
 50 |         class_labels: dictionary of labels with name:number pairs (start with 1)
 51 |         stride (default 1): how many records to jump (you might have thousands so skip a few)
 52 |         verbose (default 1): display text output if 1, display nothing except images otherwise.
 53 | 
 54 |     Usage:
 55 |     Within the image window, enter 'n' for next image, 'esc' to stop seeing images.
 56 |     """
 57 |     dataset = tf.data.TFRecordDataset([file_path])
 58 |     record_iterator = dataset.make_one_shot_iterator()
 59 |     num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy()
 60 | 
 61 |     if verbose:
 62 |         print(f"\nGoing through {num_records} records with a stride of {stride}.")
 63 |         print("Enter 'n' to bring up next image in record.\n")
 64 |     for im_ind in range(num_records):
 65 | 
 66 |         #Parse and process example
 67 | 
 68 |         parsed_example = parse_record(record_iterator.get_next())
 69 |         if im_ind % stride != 0:
 70 |             continue
 71 | 
 72 |         fname = parsed_example['image/filename'].numpy()
 73 |         encoded_image = parsed_example['image/encoded']
 74 |         image_np = tf.image.decode_image(encoded_image, channels=3).numpy()
 75 | 
 76 |         labels =  tf.sparse_tensor_to_dense(parsed_example['image/object/class/label'], default_value=0).numpy()
 77 |         x1norm =  tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/xmin'], default_value=0).numpy()
 78 |         x2norm =  tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/xmax'], default_value=0).numpy()
 79 |         y1norm =  tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/ymin'], default_value=0).numpy()
 80 |         y2norm =  tf.sparse_tensor_to_dense( parsed_example['image/object/bbox/ymax'], default_value=0).numpy()
 81 | 
 82 |         num_bboxes = len(labels)
 83 | 
 84 |         #% Process and display image
 85 |         height, width = image_np[:, :, 1].shape
 86 |         image_copy = image_np.copy()
 87 |         image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
 88 | 
 89 |         if num_bboxes > 0:
 90 |             x1 = np.int64(x1norm*width)
 91 |             x2 = np.int64(x2norm*width)
 92 |             y1 = np.int64(y1norm*height)
 93 |             y2 = np.int64(y2norm*height)
 94 |             for bbox_ind in range(num_bboxes):
 95 |                     bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind])
 96 |                     label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])]
 97 |                     label_position = (bbox[0] + 5, bbox[1] + 20)
 98 |                     cv_bbox(image_rgb, bbox, color = (250, 250, 150), line_width = 2)
 99 |                     cv2.putText(image_rgb,
100 |                                 label_name,
101 |                                 label_position,
102 |                                 cv2.FONT_HERSHEY_SIMPLEX,
103 |                                 1, (10, 10, 255), 2); #scale, color, thickness
104 | 
105 |         if verbose:
106 |             print(f"\nImage {im_ind}")
107 |             print(f"    {fname}")
108 |             print(f"    Height/width: {height, width}")
109 |             print(f"    Num bboxes: {num_bboxes}")
110 |         cv2.imshow("bb data", image_rgb)
111 |         k = cv2.waitKey()
112 |         if k == 27:
113 |             break
114 |         elif k == ord('n'):
115 |             continue
116 |     cv2.destroyAllWindows()
117 |     if verbose:
118 |         print("\n\ntfrecord-view: done going throug the data.")
119 | 
120 | 
121 | #%%
122 | if __name__ == '__main__':
123 |     class_labels =  {"dog" : 1, "cat": 2 }
124 |     #Make the following using voc_to_tfr.py
125 |     data_path = r"annotated_images/cats_dogs.record"
126 | 
127 |     verbose = 1
128 |     stride = 1
129 |     view_records(data_path, class_labels, stride = stride, verbose = verbose)
130 | 


--------------------------------------------------------------------------------
/view_records_tf2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The tf2 version is based in https://github.com/jschw/tfrecord-view/blob/master/tfrecord_view_gui.py
  3 | 
  4 | view_records.py:
  5 | Consume and display data from a tfrecord file: pulls image and bounding boxes for display
  6 | so you can make sure things look reasonabloe, e.g., after augmentation.
  7 | 
  8 | Hit 'n' for 'next' image, or 'esc' to quit.
  9 | 
 10 | Part of tensorflow-view repo: https://github.com/EricThomson/tfrecord-view
 11 | 
 12 | """
 13 | 
 14 | import cv2
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | import warnings
 18 | warnings.filterwarnings('ignore', category = FutureWarning)  #tf 1.14 and np 1.17 are clashing: temporary solution
 19 | 
 20 | def cv_bbox(image, bbox, color = (255, 255, 255), line_width = 2):
 21 |     """
 22 |     use opencv to add bbox to an image
 23 |     assumes bbox is in standard form x1 y1 x2 y2
 24 |     """
 25 | 
 26 |     cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line_width)
 27 |     return
 28 | 
 29 | 
 30 | def parse_record(data_record):
 31 |     """
 32 |     parse the data record from a tfrecord file, typically pulled from an iterator,
 33 |     in this case a one_shot_iterator created from the dataset.
 34 |     """
 35 |     feature = {'image/encoded': tf.io.FixedLenFeature([], tf.string),
 36 |                 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
 37 |                 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
 38 |                 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
 39 |                 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
 40 |                 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
 41 |                 'image/filename': tf.io.FixedLenFeature([], tf.string)
 42 |                 }
 43 |     return tf.io.parse_single_example(data_record, feature)
 44 | 
 45 | 
 46 | def view_records(file_path, class_labels, stride = 1, verbose = 1):
 47 |     """
 48 |     peek at the data using opencv and tensorflow tools.
 49 |     Inputs:
 50 |         file_path: path to tfrecord file (usually has 'record' extension)
 51 |         class_labels: dictionary of labels with name:number pairs (start with 1)
 52 |         stride (default 1): how many records to jump (you might have thousands so skip a few)
 53 |         verbose (default 1): display text output if 1, display nothing except images otherwise.
 54 | 
 55 |     Usage:
 56 |     Within the image window, enter 'n' for next image, 'esc' to stop seeing images.
 57 |     """
 58 |     dataset = tf.data.TFRecordDataset([file_path])
 59 |     record_iterator = iter(dataset)
 60 |     num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy()
 61 | 
 62 |     if verbose:
 63 |         print(f"\nGoing through {num_records} records with a stride of {stride}.")
 64 |         print("Enter 'n' to bring up next image in record.\n")
 65 |     for im_ind in range(num_records):
 66 | 
 67 |         #Parse and process example
 68 | 
 69 |         parsed_example = parse_record(record_iterator.get_next())
 70 |         if im_ind % stride != 0:
 71 |             continue
 72 | 
 73 |         fname = parsed_example['image/filename'].numpy()
 74 |         encoded_image = parsed_example['image/encoded']
 75 |         image_np = tf.image.decode_image(encoded_image, channels=3).numpy()
 76 | 
 77 |         labels =  tf.sparse.to_dense(parsed_example['image/object/class/label'], default_value=0).numpy()
 78 |         x1norm =  tf.sparse.to_dense( parsed_example['image/object/bbox/xmin'], default_value=0).numpy()
 79 |         x2norm =  tf.sparse.to_dense( parsed_example['image/object/bbox/xmax'], default_value=0).numpy()
 80 |         y1norm =  tf.sparse.to_dense( parsed_example['image/object/bbox/ymin'], default_value=0).numpy()
 81 |         y2norm =  tf.sparse.to_dense( parsed_example['image/object/bbox/ymax'], default_value=0).numpy()
 82 | 
 83 |         num_bboxes = len(labels)
 84 | 
 85 |         #% Process and display image
 86 |         height, width = image_np[:, :, 1].shape
 87 |         image_copy = image_np.copy()
 88 |         image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
 89 | 
 90 |         if num_bboxes > 0:
 91 |             x1 = np.int64(x1norm*width)
 92 |             x2 = np.int64(x2norm*width)
 93 |             y1 = np.int64(y1norm*height)
 94 |             y2 = np.int64(y2norm*height)
 95 |             for bbox_ind in range(num_bboxes):
 96 |                     bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind])
 97 |                     label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])]
 98 |                     label_position = (bbox[0] + 5, bbox[1] + 20)
 99 |                     cv_bbox(image_rgb, bbox, color = (250, 250, 150), line_width = 2)
100 |                     cv2.putText(image_rgb,
101 |                                 label_name,
102 |                                 label_position,
103 |                                 cv2.FONT_HERSHEY_SIMPLEX,
104 |                                 1, (10, 10, 255), 2); #scale, color, thickness
105 | 
106 |         if verbose:
107 |             print(f"\nImage {im_ind}")
108 |             print(f"    {fname}")
109 |             print(f"    Height/width: {height, width}")
110 |             print(f"    Num bboxes: {num_bboxes}")
111 |         cv2.imshow("bb data", image_rgb)
112 |         k = cv2.waitKey()
113 |         if k == 27:
114 |             break
115 |         elif k == ord('n'):
116 |             continue
117 |     cv2.destroyAllWindows()
118 |     if verbose:
119 |         print("\n\ntfrecord-view: done going throug the data.")
120 | 
121 | 
122 | #%%
123 | if __name__ == '__main__':
124 |     class_labels =  {"dog" : 1, "cat": 2 }
125 |     #Make the following using voc_to_tfr.py
126 |     data_path = r"annotated_images/cats_dogs.record"
127 | 
128 |     verbose = 1
129 |     stride = 1
130 |     view_records(data_path, class_labels, stride = stride, verbose = verbose)
131 | 


--------------------------------------------------------------------------------
/voc_to_tfr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | voc_to_tfr.py
  3 | Create a TFRecord file from images and Pascal VOC encoded annotation xmls.
  4 | 
  5 | Part of tfrecord-view repo: https://github.com/EricThomson/tfrecord-view
  6 | """
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | import os
 10 | import glob
 11 | from lxml import etree
 12 | 
 13 | 
 14 | def create_tf_example(data,
 15 |                       image_path,
 16 |                       label_map_dict,
 17 |                       ignore_difficult_instances=False, 
 18 |                       verbose = 1):
 19 |     """
 20 |     Convert image/xml-derived annotation dict to tensorflow example file to be
 21 |     incorporated into a TFRecord. Adapted from:
 22 |             https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py
 23 | 
 24 |     Notice that this function normalizes the bounding box coordinates provided
 25 |     by the raw data, so they are between [0, 1].
 26 | 
 27 |     Inputs:
 28 |         data: dict holding PASCAL XML fields for a single image (obtained by
 29 |             running recursive_parse_xml_to_dict)
 30 |         image_path: Path to image
 31 |         label_map_dict: A map from string label names to integers ids.
 32 |         ignore_difficult_instances: Whether to skip difficult instances in the
 33 |             dataset    (default: False).
 34 |         verbose (default 1): 1 to show image info during encoding, 0 otherwise
 35 | 
 36 |     Returns:
 37 |         example: The converted tf.Example.
 38 | 
 39 |     """
 40 |     with tf.gfile.GFile(image_path, 'rb') as fid:
 41 |         encoded_image = fid.read()
 42 | 
 43 |     if verbose: print(f"Encoding {image_path}")
 44 |     # For some reason after processing xml, it frequently returns width/height switched!
 45 |     width = int(data['size']['width'])
 46 |     height = int(data['size']['height'])
 47 | 
 48 |     #If no data['object'] there are no bounding boxes
 49 |     if 'object' in data:
 50 |         annotation_list = data['object']
 51 |         xmin = []
 52 |         ymin = []
 53 |         xmax = []
 54 |         ymax = []
 55 |         classes = []
 56 |         classes_text = []
 57 |         difficult_obj = []
 58 |     
 59 | 
 60 |         for annotation in annotation_list:
 61 |             difficult = bool(int(annotation['difficult']))
 62 |             if ignore_difficult_instances and difficult:
 63 |                 continue
 64 | 
 65 |             difficult_obj.append(int(difficult))
 66 | 
 67 |             x1 = annotation['bndbox']['xmin']
 68 |             y1 = annotation['bndbox']['ymin']
 69 |             x2 = annotation['bndbox']['xmax']
 70 |             y2 = annotation['bndbox']['ymax']
 71 |             xmin.append(float(x1) / width)
 72 |             xmax.append(float(x2) / width)
 73 |             ymin.append(float(y1) / height)
 74 |             ymax.append(float(y2) / height)
 75 |             classes_text.append(annotation['name'].encode('utf8'))
 76 |             classes.append(label_map_dict[annotation['name']])
 77 | 
 78 |         obj_features = {
 79 |                 'image/height': int64_feature(height),
 80 |                 'image/width': int64_feature(width),
 81 |                 'image/filename': bytes_feature(data['filename'].encode('utf8')),
 82 |                 'image/encoded': bytes_feature(encoded_image),
 83 |                 'image/object/bbox/xmin': float_list_feature(xmin),
 84 |                 'image/object/bbox/xmax': float_list_feature(xmax),
 85 |                 'image/object/bbox/ymin': float_list_feature(ymin),
 86 |                 'image/object/bbox/ymax': float_list_feature(ymax),
 87 |                 'image/object/class/text': bytes_list_feature(classes_text),
 88 |                 'image/object/class/label': int64_list_feature(classes),
 89 |                 'image/annotated': int64_feature(0)
 90 |         }
 91 |     
 92 |         tf_features = tf.train.Features(feature = obj_features)
 93 |         tf_example = tf.train.Example(features = tf_features)
 94 |     
 95 |             
 96 |     else:
 97 |         if verbose: print("No annotations for this one")
 98 |         obj_features = {
 99 |                 'image/height': int64_feature(height),
100 |                 'image/width': int64_feature(width),
101 |                 'image/filename': bytes_feature(data['filename'].encode('utf8')),
102 |                 'image/encoded': bytes_feature(encoded_image),
103 |                 'image/annotated': int64_feature(0)
104 |         }
105 |     
106 |     tf_features = tf.train.Features(feature = obj_features)
107 |     tf_example = tf.train.Example(features = tf_features)
108 |     return tf_example
109 | 
110 | 
111 | #Following feature encoders are from models/research/object_detection/dataset_util.py
112 | def int64_feature(value):
113 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
114 | 
115 | 
116 | def int64_list_feature(value):
117 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
118 | 
119 | 
120 | def bytes_feature(value):
121 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
122 | 
123 | 
124 | def bytes_list_feature(value):
125 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
126 | 
127 | 
128 | def float_list_feature(value):
129 |   return tf.train.Feature(float_list=tf.train.FloatList(value=value))
130 | 
131 | #Following is from models/research/object_detection/dataset_util.py
132 | def recursive_parse_xml_to_dict(xml):
133 |   """Recursively parses XML contents to python dict.
134 | 
135 |   We assume that `object` tags are the only ones that can appear
136 |   multiple times at the same level of a tree.
137 | 
138 |   Args:
139 |     xml: xml tree obtained by parsing XML file contents using lxml.etree
140 | 
141 |   Returns:
142 |     Python dictionary holding XML contents.
143 |   """
144 |   if not xml:
145 |     return {xml.tag: xml.text}
146 |   result = {}
147 |   for child in xml:
148 |     child_result = recursive_parse_xml_to_dict(child)
149 |     if child.tag != 'object':
150 |       result[child.tag] = child_result[child.tag]
151 |     else:
152 |       if child.tag not in result:
153 |         result[child.tag] = []
154 |       result[child.tag].append(child_result[child.tag])
155 |   return {xml.tag: result}
156 | 
157 | #%%
158 | if __name__ == '__main__':
159 |     # Repo
160 |     class_labels =    {"dog" : 1, "cat": 2 }
161 |     data_path = r"annotated_images/"
162 |     output_path =    data_path + r'cats_dogs.record'
163 |     
164 |     
165 |     verbose = 1
166 |     filename_query = os.path.join(data_path, '*.png')    #can change to any format (bmp, png etc)
167 |     image_paths = np.sort(glob.glob(filename_query))
168 |     
169 |     writer = tf.python_io.TFRecordWriter(output_path)
170 |     for idx, image_path in enumerate(image_paths):
171 |         xml_path = os.path.splitext(image_path)[0] + '.xml'
172 | 
173 |         with tf.gfile.GFile(xml_path, 'rb') as fid:
174 |                 xml_str = fid.read()
175 | 
176 |         xml = etree.fromstring(xml_str)
177 |         xml_data = recursive_parse_xml_to_dict(xml)['annotation']
178 |         tf_example = create_tf_example(xml_data, image_path, class_labels, verbose = verbose)  
179 |         writer.write(tf_example.SerializeToString())
180 | 
181 |     writer.close()
182 |     print("Done encoding data TFRecord file")
183 | 


--------------------------------------------------------------------------------