├── Dockerfile
├── LICENSE.txt
├── README.md
├── build.sh
├── run-bag2tf.sh
├── run-bagdump.sh
├── run.sh
└── script
    ├── bag2tf.py
    ├── bagdump.py
    ├── bagutils.py
    └── readtf.py


/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ros:melodic-perception
 2 | 
 3 | # CPU variant of Tensorflow
 4 | ENV TENSORFLOW_VARIANT cpu/tensorflow-1.13.1-cp27-none
 5 | 
 6 | # The basics
 7 | RUN apt-get update && apt-get install -q -y \
 8 |         wget \
 9 |         pkg-config \
10 |         git-core \
11 | 	python-dev \
12 |     && apt-get clean && \
13 |     rm -rf /var/lib/apt/lists/*
14 | 
15 | # Install Pip n Python modules
16 | RUN wget https://bootstrap.pypa.io/get-pip.py && \
17 |     python get-pip.py && \
18 |     rm get-pip.py \
19 |     && pip install python numpy matplotlib ipykernel python-dateutil --upgrade \
20 |     && python -m ipykernel.kernelspec
21 | 
22 | RUN pip install scipy pandas jupyter
23 | 
24 | # Install TensorFlow
25 | RUN pip --no-cache-dir install \
26 |     http://storage.googleapis.com/tensorflow/linux/${TENSORFLOW_VARIANT}-linux_x86_64.whl
27 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # udacity-driving-reader
 2 | 
 3 | Scripts to read and dump data from the rosbag format used in the udacity self-driving dataset(s).
 4 | 
 5 | The scripts were setup to run within a Docker container so that I can extract the data from the rosbag format without needing to install ROS on my system. The docker container is built from the ROS kinetic-perception image. I've added python modules and the latest Tensorflow on top of that.
 6 | 
 7 | I've run this code on Ubuntu 16.04 and 18.04 with Docker CE installed as per https://docs.docker.com/install/linux/docker-ce/ubuntu/. No other platform has been tried.
 8 | 
 9 | Since the original release of this script, this latest iteration has been updated to support bag files with compressed images and bag files that have been split into multiple files by time or topics. With support for this, support for a reordering buffer was added to bag2tf. 
10 | 
11 | The latest versions scan all bag files and extract their info in yaml format before doing a second pass to read the data, this adds some time but provides a mechanism for supporting the variety of bag formats and splits now being used in the datasets. The info yaml files are also dumped as part of the bagdump process.
12 | 
13 | ## Installation
14 | 
15 | Checkout this code and run in place. I have not pushed docker container to hub.
16 | 
17 | ## Usage
18 | 
19 | 
20 | Build the docker container manually or using ./build.sh before executing any of the run scripts.
21 | 
22 | Run one of the run scripts for dumping to images + csv or Tensorflow sharded records files.
23 | 
24 | This and future versions of the scripts expect all datasets to exist in SEPARATE folders with only bag files for the same dataset in each folder. The input folder should thus be a folder with one folder per dataset. The bagdump script will mirror those input folders in the output, while the bag2tf will combine them all into one sharded stream.
25 | 
26 | The paths passed to the run scripts are used as docker volume mappings. **These paths must be absolute paths on your local filesystem (relative to the root)**. Keep this in mind if you try to change the input/output args.
27 | 
28 | ### Dump to images + CSV
29 | 
30 |     ./run-bagdump.sh -i [absolute dir with folders containing bag files] -o [absolute output dir] -- [args to pass to python script]
31 | 
32 | For example, if your dataset bags are in /data/dataset2-1/dataset.bag, /data/udacity-datasetElCamino/*.bag etc., and you'd like the output in /output:
33 | 
34 |     ./run-bagdump.sh -i /data -o /output
35 | 
36 | The same as above, but you want to convert to png instead of jpg:
37 | 
38 |     ./run-bagdump.sh -i /data -o /output -- -f png
39 | 
40 | ### Dump to Tensorflow sharded files
41 | 
42 | Same basic arguments as for bagdump above. There are some additional arguments of note to pass to the python script.
43 | 
44 | The default arguments write all cameras into the same sharded stream along with latest steering entry. To write images to three separate streams, one for each camera, add an -s (or --separate) argument.
45 | 
46 | i.e.
47 | 
48 |     ./run-bag2tf.sh -i /data -o /output -- --separate
49 | 
50 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | IMAGE_TAG="udacity-reader"
 3 | 
 4 | while getopts ":t:" opt; do
 5 |   case $opt in
 6 |     t) IMAGE_TAG=$OPTARG ;;
 7 |     \?)
 8 |       echo "Invalid option: -$OPTARG" >&2
 9 |       exit 1
10 |       ;;
11 |     :)
12 |       echo "Option -$OPTARG requires an argument." >&2
13 |       exit 1
14 |       ;;
15 |   esac
16 | done
17 | shift $(expr $OPTIND - 1)
18 | 
19 | docker build -t $IMAGE_TAG .
20 | 


--------------------------------------------------------------------------------
/run-bag2tf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./run.sh -r "python script/bag2tf.py" "$@"
3 | 


--------------------------------------------------------------------------------
/run-bagdump.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./run.sh -r "python script/bagdump.py" "$@"


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # defaults
 4 | INPUT_DIR="/data/"
 5 | OUTPUT_DIR="/data/output"
 6 | IMAGE_TAG="udacity-reader"
 7 | RUN_SCRIPT="/bin/bash"
 8 | INTERACTIVE="-it"
 9 | 
10 | usage() { echo "Usage: $0 [-i input_dir] [-o output_dir] [-t image_tag]" 1>&2; exit 1; }
11 | while getopts ":i:o:t:r:h" opt; do
12 |   case $opt in
13 |     i) INPUT_DIR=$OPTARG ;;
14 |     o) OUTPUT_DIR=$OPTARG ;;
15 |     t) IMAGE_TAG=$OPTARG ;;
16 |     r) RUN_SCRIPT=$OPTARG; INTERACTIVE='' ;;
17 |     h) usage ;;
18 |     \?)
19 |       echo "Invalid option: -$OPTARG" >&2
20 |       usage
21 |       ;;
22 |     :)
23 |       echo "Option -$OPTARG requires an argument." >&2
24 |       exit 1
25 |       ;;
26 |   esac
27 | done
28 | shift "$((OPTIND - 1))"
29 | 
30 | echo "Running '$RUN_SCRIPT' with input dir '$INPUT_DIR', output dir '$OUTPUT_DIR', docker image '$IMAGE_TAG'..."
31 | 
32 | docker run --rm $INTERACTIVE\
33 |   --volume="/$(pwd)/script:/script"\
34 |   --volume="$INPUT_DIR:/data"\
35 |   --volume="$OUTPUT_DIR:/output"\
36 |   $IMAGE_TAG $RUN_SCRIPT "$@"
37 | 


--------------------------------------------------------------------------------
/script/bag2tf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # ==============================================================================
  9 | 
 10 | from __future__ import print_function
 11 | from cv_bridge import CvBridge, CvBridgeError
 12 | import os
 13 | import sys
 14 | import cv2
 15 | import imghdr
 16 | import heapq
 17 | import argparse
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | 
 21 | from bagutils import *
 22 | 
 23 | 
 24 | def feature_int64(value_list):
 25 |     """Wrapper for inserting int64 features into Example proto."""
 26 |     if not isinstance(value_list, list):
 27 |         value_list = [value_list]
 28 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=value_list))
 29 | 
 30 | 
 31 | def feature_float(value_list):
 32 |     """Wrapper for inserting float features into Example proto."""
 33 |     if not isinstance(value_list, list):
 34 |         value_list = [value_list]
 35 |     return tf.train.Feature(float_list=tf.train.FloatList(value=value_list))
 36 | 
 37 | 
 38 | def feature_bytes(value):
 39 |     """Wrapper for inserting bytes features into Example proto."""
 40 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 41 | 
 42 | 
 43 | def feature_bytes_list(value_list, skip_convert=False):
 44 |     """Wrapper for inserting bytes features into Example proto."""
 45 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=value_list))
 46 | 
 47 | 
 48 | def str2float(string):
 49 |     try:
 50 |         f = float(string)
 51 |         return f, True
 52 |     except ValueError:
 53 |         return 0.0, False
 54 | 
 55 | 
 56 | def get_outdir(base_dir, name):
 57 |     outdir = os.path.join(base_dir, name)
 58 |     if not os.path.exists(outdir):
 59 |         os.makedirs(outdir)
 60 |     return outdir
 61 | 
 62 | 
 63 | def check_image_format(data):
 64 |     img_fmt = imghdr.what(None, h=data)
 65 |     return 'jpg' if img_fmt == 'jpeg' else img_fmt
 66 | 
 67 | 
 68 | def to_steering_dict(sample_list=[], target_sample_count=0):
 69 |     if not sample_list:
 70 |         count = 1 if not target_sample_count else target_sample_count
 71 |         return {
 72 |             'steer/timestamp': feature_int64([0] * count),
 73 |             'steer/angle': feature_float([0.0] * count),
 74 |             'steer/torque': feature_float([0.0] * count),
 75 |             'steer/speed': feature_float([0.0] * count),
 76 |         }
 77 |     #  extend samples to target count if set, needed if fixed sized tensors expected on read
 78 |     if target_sample_count and len(sample_list) < target_sample_count:
 79 |         sample_list += [sample_list[-1]] * (target_sample_count - len(sample_list))
 80 |     timestamps = []
 81 |     angles = []
 82 |     torques = []
 83 |     speeds = []
 84 |     for timestamp, msg in sample_list:
 85 |         timestamps += [timestamp]
 86 |         angles += [msg.steering_wheel_angle]
 87 |         torques += [msg.steering_wheel_torque]
 88 |         speeds += [msg.speed]
 89 |     steering_dict = {
 90 |         'steer/timestamp': feature_int64(timestamps),
 91 |         'steer/angle': feature_float(angles),
 92 |         'steer/torque': feature_float(torques),
 93 |         'steer/speed': feature_float(speeds),
 94 |     }
 95 |     return steering_dict
 96 | 
 97 | 
 98 | def to_gps_dict(sample_list=[], target_sample_count=0):
 99 |     if not sample_list:
100 |         count = 1 if not target_sample_count else target_sample_count
101 |         return {
102 |             'gps/timestamp': feature_int64([0] * count),
103 |             'gps/lat': feature_float([0.0] * count),
104 |             'gps/long': feature_float([0.0] * count),
105 |             'gps/alt': feature_float([0.0] * count),
106 |         }
107 |     #  extend samples to target count if set, needed if fixed sized tensors expected on read
108 |     if target_sample_count and len(sample_list) < target_sample_count:
109 |         sample_list += [sample_list[-1]] * (target_sample_count - len(sample_list))
110 |     timestamps = []
111 |     lats = []
112 |     longs = []
113 |     alts = []
114 |     for timestamp, msg in sample_list:
115 |         timestamps += [timestamp]
116 |         lats += [msg.latitude]
117 |         longs += [msg.longitude]
118 |         alts += [msg.altitude]
119 |     gps_dict = {
120 |         'gps/timestamp': feature_int64(timestamps),
121 |         'gps/lat': feature_float(lats),
122 |         'gps/long': feature_float(longs),
123 |         'gps/alt': feature_float(alts),
124 |     }
125 |     return gps_dict
126 | 
127 | 
128 | class ShardWriter():
129 |     def __init__(self, outdir, name, num_entries, max_num_shards=256):
130 |         self.num_entries = num_entries
131 |         self.outdir = outdir
132 |         self.name = name
133 |         self.max_num_shards = max_num_shards
134 |         self.num_entries_per_shard = num_entries // max_num_shards
135 |         self.write_counter = 0
136 |         self._shard_counter = 0
137 |         self._writer = None
138 | 
139 |     def _update_writer(self):
140 |         if not self._writer or self._shard_counter >= self.num_entries_per_shard:
141 |             shard = self.write_counter // self.num_entries_per_shard
142 |             assert(shard <= self.max_num_shards)
143 |             output_filename = '%s-%.5d-of-%.5d' % (self.name, shard, self.max_num_shards-1)
144 |             output_file = os.path.join(self.outdir, output_filename)
145 |             self._writer = tf.python_io.TFRecordWriter(output_file)
146 |             self._shard_counter = 0
147 | 
148 |     def write(self, example):
149 |         self._update_writer()
150 |         self._writer.write(example.SerializeToString())
151 |         self._shard_counter += 1
152 |         self.write_counter += 1
153 |         if not self.write_counter % 1000:
154 |             print('Written %d of %d images for %s' % (self.write_counter, self.num_entries, self.name))
155 |             sys.stdout.flush()
156 | 
157 | 
158 | # FIXME lame constants
159 | MIN_SPEED = 1.0  # 2 m/s ~ 8km/h ~ 5mph
160 | WRITE_ENABLE_SLOW_START = 10  # 10 steering samples above min speed before restart
161 | 
162 | 
163 | def dequeue_samples_until(queue, timestamp):
164 |     samples = []
165 |     while queue and queue[0][0] < timestamp:
166 |         samples.append(heapq.heappop(queue))
167 |     return samples
168 | 
169 | 
170 | class Processor(object):
171 | 
172 |     def __init__(self,
173 |                  save_dir,
174 |                  num_images,
175 |                  splits=('train', 1.0),
176 |                  name='records',
177 |                  image_fmt='jpg',
178 |                  center_only=False,
179 |                  debug_print=False):
180 | 
181 |         # config and helpers
182 |         self.debug_print = debug_print
183 |         self.min_buffer_ns = 240 * SEC_PER_NANOSEC  # keep x sec of sorting/sync buffer as per image timestamps
184 |         self.steering_offset_ns = 0  # shift steering timestamps by this much going into queue FIXME test
185 |         self.gps_offset_ns = 0  # shift gps timestamps by this much going into queue FIXME test
186 |         self.bridge = CvBridge()
187 | 
188 |         # example fixed write params
189 |         self.write_image_fmt = image_fmt
190 |         self.write_colorspace = b'RGB'
191 |         self.write_channels = 3
192 | 
193 |         # setup writer
194 |         # at approx 35-40KB per image, 6K per shard gives around 200MB per shard
195 |         #FIXME maybe support splitting data stream into train/validation from the same bags?
196 |         num_shards = num_images // 6000
197 |         self._outdir = get_outdir(save_dir, name)
198 |         self._writers = {}
199 |         for s in splits:
200 |             scaled_images = num_images * s[1]
201 |             scaled_shards = num_shards * s[1]
202 |             if s[0] == 'validation' and not center_only:
203 |                 scaled_images //= 3
204 |                 scaled_shards //= 3
205 |             writer = ShardWriter(self._outdir, s[0], scaled_images, max_num_shards=scaled_shards)
206 |             self._writers[s[0]] = writer
207 |         self._splits = splits
208 | 
209 |         # stats, counts, and queues
210 |         self.written_image_count = 0
211 |         self.discarded_image_count = 0
212 |         self.collect_image_stats = False
213 |         self.collect_io_stats = True
214 |         self.image_means = []
215 |         self.image_variances = []
216 |         self.steering_vals = []
217 |         self.gps_vals = []
218 |         self.reset_queues()
219 | 
220 |     def _select_writer(self):
221 |         r = np.random.random_sample()
222 |         for s in self._splits:
223 |             if r < s[1]:
224 |                 return self._writers[s[0]]
225 |             r -= s[1]
226 |         return None
227 | 
228 |     def reset_queues(self):
229 |         self.latest_image_timestamp = None
230 |         self._write_enable = False
231 |         self._speed_above_min_count = 0
232 |         self._steering_queue = []   # time sorted steering heap
233 |         self._gear_queue = [] # time sorted gear heap
234 |         self._gps_queue = []  # time sorted gps heap
235 |         self._images_queue = []  # time sorted image heap
236 |         self._head_gear_sample = None
237 |         self._head_steering_sample = None  # most recent steering timestamp/topic/msg sample pulled from queue
238 |         self._head_gps_sample = None  # most recent gps timestamp/topic/msg sample pulled from queue
239 |         self._debug_gps_next = False
240 | 
241 |     def write_example(self, image_topic, image_msg, steering_list, gps_list, dataset_id=0):
242 |         try:
243 |             assert isinstance(steering_list, list)
244 |             assert isinstance(gps_list, list)
245 |             
246 |             writer = self._select_writer()
247 |             if writer is None:
248 |                 self.discarded_image_count += 1
249 |                 return 
250 |             elif writer.name == 'validation':
251 |                 if image_topic not in CENTER_CAMERA_TOPICS:
252 |                     self.discarded_image_count += 1
253 |                     return
254 | 
255 |             image_width = 0
256 |             image_height = 0
257 |             if hasattr(image_msg, 'format') and 'compressed' in image_msg.format:
258 |                 buf = np.ndarray(shape=(1, len(image_msg.data)), dtype=np.uint8, buffer=image_msg.data)
259 |                 cv_image = cv2.imdecode(buf, cv2.IMREAD_ANYCOLOR)
260 |                 if cv_image.shape[2] != 3:
261 |                     print("Invalid image")
262 |                     return
263 |                 image_height = cv_image.shape[0]
264 |                 image_width = cv_image.shape[1]
265 |                 # Avoid re-encoding if we don't have to
266 |                 if check_image_format(image_msg.data) == self.write_image_fmt:
267 |                     encoded = buf
268 |                 else:
269 |                     _, encoded = cv2.imencode('.' + self.write_image_fmt, cv_image)
270 |             else:
271 |                 image_width = image_msg.width
272 |                 image_height = image_msg.height
273 |                 cv_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
274 |                 _, encoded = cv2.imencode('.' + self.write_image_fmt, cv_image)
275 | 
276 |             if self.collect_image_stats:
277 |                 mean, std = cv2.meanStdDev(cv_image)
278 |                 self.image_means.append(np.squeeze(mean))
279 |                 self.image_variances.append(np.squeeze(np.square(std)))
280 | 
281 |             if self.collect_io_stats:
282 |                 self.steering_vals.extend([x[1].steering_wheel_angle for x in steering_list])
283 |                 self.gps_vals.extend([[x[1].latitude, x[1].longitude] for x in gps_list])
284 | 
285 |             feature_dict = {
286 |                 'image/timestamp': feature_int64(image_msg.header.stamp.to_nsec()),
287 |                 'image/frame_id': feature_bytes(image_msg.header.frame_id),
288 |                 'image/height': feature_int64(image_height),
289 |                 'image/width': feature_int64(image_width),
290 |                 'image/channels': feature_int64(self.write_channels),
291 |                 'image/colorspace': feature_bytes(self.write_colorspace),
292 |                 'image/format': feature_bytes(self.write_image_fmt),
293 |                 'image/encoded': feature_bytes(encoded.tobytes()),
294 |                 'image/dataset_id': feature_int64(dataset_id),
295 |             }
296 |             steering_dict = to_steering_dict(steering_list, target_sample_count=2)
297 |             feature_dict.update(steering_dict)
298 |             gps_dict = to_gps_dict(gps_list, target_sample_count=2)
299 |             feature_dict.update(gps_dict)
300 |             example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
301 |             writer.write(example)
302 |             self.written_image_count += 1
303 | 
304 |         except CvBridgeError as e:
305 |             print(e)
306 | 
307 |     def push_messages(self, messages):
308 |         for timestamp, topic, msg in messages:
309 |             if topic in CAMERA_TOPICS:
310 |                 heapq.heappush(self._images_queue, (timestamp, topic, msg))
311 |                 if not self.latest_image_timestamp or timestamp > self.latest_image_timestamp:
312 |                     self.latest_image_timestamp = timestamp
313 |             elif topic == STEERING_TOPIC:
314 |                 if self.debug_print:
315 |                     print("%s: steering, %f" % (ns_to_str(timestamp), msg.steering_wheel_angle))
316 |                 timestamp += self.steering_offset_ns
317 |                 heapq.heappush(self._steering_queue, (timestamp, msg))
318 |             elif topic == GEAR_TOPIC:
319 |                 timestamp += self.steering_offset_ns # same offset as steering
320 |                 heapq.heappush(self._gear_queue, (timestamp, msg))
321 |             elif topic == GPS_FIX_TOPIC or topic == GPS_FIX_NEW_TOPIC:
322 |                 if self._debug_gps_next or self.debug_print:
323 |                     print("%s: gps     , (%f, %f)" % (ns_to_str(timestamp), msg.latitude, msg.longitude))
324 |                     self._debug_gps_next = False
325 |                 timestamp += self.gps_offset_ns
326 |                 heapq.heappush(self._gps_queue, (timestamp, msg))
327 | 
328 |     def _update_write_enable(self, image_timestamp, steering_samples, latest_gear_sample):
329 |         gear_forward = False if latest_gear_sample and latest_gear_sample[1].state.gear <= 2 else True
330 |         gear = latest_gear_sample[1].state.gear if latest_gear_sample else 0
331 |         for sample in steering_samples:
332 |             sample_speed = sample[1].speed
333 |             if self._write_enable:
334 |                 if sample_speed < MIN_SPEED or not gear_forward:
335 |                     # disable writing instantly on sample below minimum
336 |                     self._write_enable = False
337 |                     print('%s: Write disable. Speed: %s, gear: %d '
338 |                           % (ns_to_str(image_timestamp), sample_speed, gear))
339 |                     self._speed_above_min_count = 0
340 |                 else:
341 |                     self._speed_above_min_count += 1
342 |             else:  # not write enable
343 |                 # enable writing after threshold number samples above minimum seen
344 |                 if sample_speed < MIN_SPEED or not gear_forward:
345 |                     self._speed_above_min_count = 0
346 |                 else:
347 |                     self._speed_above_min_count += 1
348 |                 if self._speed_above_min_count > WRITE_ENABLE_SLOW_START:
349 |                     self._write_enable = True
350 |                     print('%s: Write enable. Speed: %s, gear: %d'
351 |                           % (ns_to_str(image_timestamp), sample_speed, gear))
352 | 
353 |     def pull_and_write(self, flush=False):
354 |         while self.pull_ready(flush):
355 |             assert self._images_queue
356 |             image_timestamp, image_topic, image_msg = heapq.heappop(self._images_queue)
357 |             if self.debug_print:
358 |                 print("Popped image: %d, %s" % (image_timestamp, image_topic))
359 | 
360 |             gear_samples = dequeue_samples_until(self._gear_queue, image_timestamp)
361 |             if gear_samples:
362 |                 self._head_gear_sample = gear_samples[-1]
363 | 
364 |             steering_samples = dequeue_samples_until(self._steering_queue, image_timestamp)
365 |             if steering_samples:
366 |                 self._head_steering_sample = steering_samples[-1]
367 |                 self._update_write_enable(image_timestamp, steering_samples, self._head_gear_sample)
368 | 
369 |             gps_samples = dequeue_samples_until(self._gps_queue, image_timestamp)
370 |             if gps_samples:
371 |                 self._head_gps_sample = gps_samples[-1]
372 | 
373 |             if self._write_enable:
374 |                 steering_list = []
375 |                 gps_list = []
376 |                 if self._head_steering_sample:
377 |                     steering_list.append(self._head_steering_sample)
378 |                 else:
379 |                     print('%s: Invalid head steering sample!' % ns_to_str(image_timestamp))
380 |                 if self._steering_queue:
381 |                     steering_list.append(self._steering_queue[0])
382 |                 else:
383 |                     print('%s: Empty steering queue!' % ns_to_str(image_timestamp))
384 |                 if self._head_gps_sample:
385 |                     gps_list.append(self._head_gps_sample)
386 |                 else:
387 |                     print('%s: Invalid head gps sample!' % ns_to_str(image_timestamp))
388 |                     self._debug_gps_next = True
389 |                 if self._gps_queue:
390 |                     gps_list.append(self._gps_queue[0])
391 |                 else:
392 |                     print('%s: Empty gps queue!' % ns_to_str(image_timestamp))
393 |                     self._debug_gps_next = True
394 | 
395 |                 self.write_example(image_topic, image_msg, steering_list, gps_list)
396 |             else:
397 |                 self.discarded_image_count += 1
398 | 
399 |     def _remaining_time(self):
400 |         if not self._images_queue:
401 |             return 0
402 |         return self.latest_image_timestamp - self._images_queue[0][0]
403 | 
404 |     def pull_ready(self, flush=False):
405 |         return self._images_queue and (flush or self._remaining_time() > self.min_buffer_ns)
406 | 
407 |     def get_writer_counts(self):
408 |         counts = []
409 |         for w in self._writers.values():
410 |             counts.append((w.name, w.write_counter))
411 |         return counts
412 | 
413 | 
414 | def main():
415 |     parser = argparse.ArgumentParser(description='Convert rosbag to tensorflow sharded records.')
416 |     parser.add_argument('-o', '--outdir', type=str, nargs='?', default='/output',
417 |         help='Output folder')
418 |     parser.add_argument('-b', '--indir', type=str, nargs='?', default='/data/',
419 |         help='Input bag file')
420 |     parser.add_argument('-f', '--image_fmt', type=str, nargs='?', default='jpg',
421 |         help='Image encode format, png or jpg')
422 |     parser.add_argument('-s', '--split', type=str, nargs='?', default='train',
423 |         help="Data subset. 'train' or 'validation'")
424 |     parser.add_argument('-k', '--keep', type=float, nargs='?', default=1.0,
425 |         help="Keep specified percent of data. 0.0 or 1.0 is all")
426 |     parser.add_argument('-c', '--center', action='store_true',
427 |         help="Center camera only for all splits")
428 |     parser.add_argument('-d', dest='debug', action='store_true', help='Debug print enable')
429 |     parser.set_defaults(center=False)
430 |     parser.set_defaults(debug=False)
431 |     args = parser.parse_args()
432 | 
433 |     image_fmt = args.image_fmt
434 |     save_dir = args.outdir
435 |     input_dir = args.indir
436 |     debug_print = args.debug
437 |     center_only = args.center
438 |     split = args.split
439 |     keep = args.keep
440 |     if keep == 0.0 or keep > 1.0:
441 |         # 0 is invalid, change to keep all
442 |         keep = 1.0
443 | 
444 |     filter_topics = [STEERING_TOPIC, GPS_FIX_TOPIC, GPS_FIX_NEW_TOPIC, GEAR_TOPIC]
445 |     split_val, is_float = str2float(split)
446 |     if is_float and 0.0 < split_val < 1.0:
447 |         # split specified as float val indicating %validation data
448 |         filter_camera_topics = CAMERA_TOPICS if not center_only else CENTER_CAMERA_TOPICS
449 |         split_val *= keep
450 |         split_list = [('train', keep - split_val), ('validation', split_val)]
451 |     elif split == 'validation' or (is_float and split_val == 1.0):
452 |         # split specified to be validation, set as 100% validation
453 |         filter_camera_topics = CENTER_CAMERA_TOPICS
454 |         split_list = [(split, keep)]
455 |     else:
456 |         # 100% train split
457 |         assert split == 'train'
458 |         filter_camera_topics = CAMERA_TOPICS if not center_only else CENTER_CAMERA_TOPICS
459 |         split_list = [(split, keep)]
460 |     filter_topics += filter_camera_topics
461 | 
462 |     num_images = 0
463 |     num_messages = 0
464 |     bagsets = find_bagsets(input_dir, filter_topics=filter_topics)
465 |     for bs in bagsets:
466 |         num_images += bs.get_message_count(filter_camera_topics)
467 |         num_messages += bs.get_message_count(filter_topics)
468 |     print("%d images, %d messages to import across %d bag sets..."
469 |           % (num_images, num_messages, len(bagsets)))
470 | 
471 |     processor = Processor(
472 |         save_dir=save_dir, num_images=num_images, image_fmt=image_fmt,
473 |         splits=split_list, center_only=center_only, debug_print=debug_print)
474 | 
475 |     num_read_messages = 0  # number of messages read by cursors
476 |     aborted = False
477 |     try:
478 |         for bs in bagsets:
479 |             print("Processing set %s. %s to %s" % (bs.name, ns_to_str(bs.start_time), ns_to_str(bs.end_time)))
480 |             sys.stdout.flush()
481 | 
482 |             cursor_group = CursorGroup(readers=bs.get_readers())
483 |             while cursor_group:
484 |                 msg_tuples = []
485 |                 cursor_group.advance_by_until(360 * SEC_PER_NANOSEC)
486 |                 cursor_group.collect_vals(msg_tuples)
487 |                 num_read_messages += len(msg_tuples)
488 |                 processor.push_messages(msg_tuples)
489 |                 if processor.pull_ready():
490 |                     processor.pull_and_write()
491 | 
492 |             processor.pull_and_write(flush=True)  # flush remaining messages after read cursors are done
493 |             processor.reset_queues()  # ready for next bag set
494 |     except KeyboardInterrupt:
495 |         aborted = True
496 | 
497 |     if not aborted:
498 |         if num_read_messages != num_messages:
499 |             print("Number of read messages (%d) doesn't match expected count (%d)" %
500 |                   (num_read_messages, num_messages))
501 |         total_processed_images = processor.written_image_count + processor.discarded_image_count
502 |         if total_processed_images != num_images:
503 |             print("Number of processed images (%d) doesn't match expected count (%d)" %
504 |                   (total_processed_images, num_images))
505 | 
506 |     print("Completed processing %d images to TF examples. %d images discarded" %
507 |           (processor.written_image_count, processor.discarded_image_count))
508 | 
509 |     print("Writer counts: ")
510 |     [print("\t%s: %d" % (x[0], x[1])) for x in processor.get_writer_counts()]
511 | 
512 |     if processor.collect_image_stats:
513 |         channel_mean = np.mean(processor.image_means, axis=0, dtype=np.float64)[::-1]
514 |         channel_std = np.sqrt(np.mean(processor.image_variances, axis=0, dtype=np.float64))[::-1]
515 |         print("Mean: ", channel_mean, ". Std deviation: ", channel_std)
516 | 
517 |     if processor.collect_io_stats:
518 |         steering_mean = np.mean(processor.steering_vals, axis=0, dtype=np.float64)
519 |         steering_std = np.std(processor.steering_vals, axis=0, dtype=np.float64)
520 |         steering_min = np.min(processor.steering_vals, axis=0)
521 |         steering_max = np.max(processor.steering_vals, axis=0)
522 |         gps_mean = np.mean(processor.gps_vals, axis=0, dtype=np.float64)
523 |         gps_std = np.std(processor.gps_vals, axis=0, dtype=np.float64)
524 |         gps_min = np.min(processor.gps_vals, axis=0)
525 |         gps_max = np.max(processor.gps_vals, axis=0)
526 |         print("Steering: ")
527 |         print("\tmean: ", steering_mean)
528 |         print("\tstd: ", steering_std)
529 |         print("\tmin: ", steering_min)
530 |         print("\tmax: ", steering_max)
531 |         print("Gps: ")
532 |         print("\tmean: ", gps_mean)
533 |         print("\tstd: ", gps_std)
534 |         print("\tmin: ", gps_min)
535 |         print("\tmax: ", gps_max)
536 | 
537 |     sys.stdout.flush()
538 | 
539 | 
540 | if __name__ == '__main__':
541 |     main()


--------------------------------------------------------------------------------
/script/bagdump.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # ==============================================================================
  9 | 
 10 | from __future__ import print_function
 11 | from cv_bridge import CvBridge, CvBridgeError
 12 | from collections import defaultdict
 13 | import os
 14 | import sys
 15 | import cv2
 16 | import imghdr
 17 | import argparse
 18 | import functools
 19 | import numpy as np
 20 | import pandas as pd
 21 | 
 22 | from bagutils import *
 23 | 
 24 | 
 25 | def get_outdir(base_dir, name):
 26 |     outdir = os.path.join(base_dir, name)
 27 |     if not os.path.exists(outdir):
 28 |         os.makedirs(outdir)
 29 |     return outdir
 30 | 
 31 | 
 32 | def check_format(data):
 33 |     img_fmt = imghdr.what(None, h=data)
 34 |     return 'jpg' if img_fmt == 'jpeg' else img_fmt
 35 | 
 36 | 
 37 | def write_image(bridge, outdir, msg, fmt='png'):
 38 |     results = {}
 39 |     image_filename = os.path.join(outdir, str(msg.header.stamp.to_nsec()) + '.' + fmt)
 40 |     try:
 41 |         if hasattr(msg, 'format') and 'compressed' in msg.format:
 42 |             buf = np.ndarray(shape=(1, len(msg.data)), dtype=np.uint8, buffer=msg.data)
 43 |             cv_image = cv2.imdecode(buf, cv2.IMREAD_ANYCOLOR)
 44 |             if cv_image.shape[2] != 3:
 45 |                 print("Invalid image %s" % image_filename)
 46 |                 return results
 47 |             results['height'] = cv_image.shape[0]
 48 |             results['width'] = cv_image.shape[1]
 49 |             # Avoid re-encoding if we don't have to
 50 |             if check_format(msg.data) == fmt:
 51 |                 buf.tofile(image_filename)
 52 |             else:
 53 |                 cv2.imwrite(image_filename, cv_image)
 54 |         else:
 55 |             cv_image = bridge.imgmsg_to_cv2(msg, "bgr8")
 56 |             cv2.imwrite(image_filename, cv_image)
 57 |     except CvBridgeError as e:
 58 |         print(e)
 59 |     results['filename'] = image_filename
 60 |     return results
 61 | 
 62 | 
 63 | def camera2dict(msg, write_results, camera_dict):
 64 |     camera_dict["timestamp"].append(msg.header.stamp.to_nsec())
 65 |     camera_dict["width"].append(write_results['width'] if 'width' in write_results else msg.width)
 66 |     camera_dict['height'].append(write_results['height'] if 'height' in write_results else msg.height)
 67 |     camera_dict["frame_id"].append(msg.header.frame_id)
 68 |     camera_dict["filename"].append(write_results['filename'])
 69 | 
 70 | 
 71 | def steering2dict(msg, steering_dict):
 72 |     steering_dict["timestamp"].append(msg.header.stamp.to_nsec())
 73 |     steering_dict["angle"].append(msg.steering_wheel_angle)
 74 |     steering_dict["torque"].append(msg.steering_wheel_torque)
 75 |     steering_dict["speed"].append(msg.speed)
 76 | 
 77 | 
 78 | def gps2dict(msg, gps_dict):
 79 |     gps_dict["timestamp"].append(msg.header.stamp.to_nsec())
 80 |     gps_dict["status"].append(msg.status.status)
 81 |     gps_dict["service"].append(msg.status.service)
 82 |     gps_dict["lat"].append(msg.latitude)
 83 |     gps_dict["long"].append(msg.longitude)
 84 |     gps_dict["alt"].append(msg.altitude)
 85 | 
 86 | 
 87 | def imu2dict(msg, imu_dict):
 88 |     imu_dict["timestamp"].append(msg.header.stamp.to_nsec())
 89 |     imu_dict["ax"].append(msg.linear_acceleration.x)
 90 |     imu_dict["ay"].append(msg.linear_acceleration.y)
 91 |     imu_dict["az"].append(msg.linear_acceleration.z)
 92 | 
 93 | 
 94 | def gear2dict(msg, gear_dict):
 95 |     gear_dict["timestamp"].append(msg.header.stamp.to_nsec())
 96 |     gear_dict["gear_state"].append(msg.state.gear)
 97 |     gear_dict["gear_cmd"].append(msg.cmd.gear)
 98 | 
 99 | 
100 | def throttle2dict(msg, throt_dict):
101 |     throt_dict["timestamp"].append(msg.header.stamp.to_nsec())
102 |     throt_dict["throttle_input"].append(msg.pedal_input)
103 | 
104 | 
105 | def brake2dict(msg, brake_dict):
106 |     brake_dict["timestamp"].append(msg.header.stamp.to_nsec())
107 |     brake_dict["brake_input"].append(msg.pedal_input)
108 | 
109 | 
110 | def camera_select(topic, select_from):
111 |     if topic.startswith('/l'):
112 |         return select_from[0]
113 |     elif topic.startswith('/c'):
114 |         return select_from[1]
115 |     elif topic.startswith('/r'):
116 |         return select_from[2]
117 |     else:
118 |         assert False, "Unexpected topic"
119 | 
120 | 
121 | def main():
122 |     parser = argparse.ArgumentParser(description='Convert rosbag to images and csv.')
123 |     parser.add_argument('-o', '--outdir', type=str, nargs='?', default='/output',
124 |         help='Output folder')
125 |     parser.add_argument('-i', '--indir', type=str, nargs='?', default='/data',
126 |         help='Input folder where bagfiles are located')
127 |     parser.add_argument('-f', '--img_format', type=str, nargs='?', default='jpg',
128 |         help='Image encode format, png or jpg')
129 |     parser.add_argument('-m', dest='msg_only', action='store_true', help='Messages only, no immages')
130 |     parser.add_argument('-d', dest='debug', action='store_true', help='Debug print enable')
131 |     parser.set_defaults(msg_only=False)
132 |     parser.set_defaults(debug=False)
133 |     args = parser.parse_args()
134 | 
135 |     img_format = args.img_format
136 |     base_outdir = args.outdir
137 |     indir = args.indir
138 |     msg_only = args.msg_only
139 |     debug_print = args.debug
140 | 
141 |     bridge = CvBridge()
142 | 
143 |     include_images = False if msg_only else True
144 |     include_others = True
145 | 
146 |     filter_topics = [STEERING_TOPIC, GPS_FIX_TOPIC, GPS_FIX_NEW_TOPIC]
147 |     if include_images:
148 |         filter_topics += CAMERA_TOPICS
149 |     if include_others:
150 |         filter_topics += OTHER_TOPICS
151 | 
152 |     bagsets = find_bagsets(indir, filter_topics=filter_topics)
153 |     for bs in bagsets:
154 |         print("Processing set %s" % bs.name)
155 |         sys.stdout.flush()
156 | 
157 |         dataset_outdir = os.path.join(base_outdir, "%s" % bs.name)
158 |         left_outdir = get_outdir(dataset_outdir, "left")
159 |         center_outdir = get_outdir(dataset_outdir, "center")
160 |         right_outdir = get_outdir(dataset_outdir, "right")
161 | 
162 |         camera_cols = ["timestamp", "width", "height", "frame_id", "filename"]
163 |         camera_dict = defaultdict(list)
164 | 
165 |         steering_cols = ["timestamp", "angle", "torque", "speed"]
166 |         steering_dict = defaultdict(list)
167 | 
168 |         gps_cols = ["timestamp", "status", "service", "lat", "long", "alt"]
169 |         gps_dict = defaultdict(list)
170 | 
171 |         if include_others:
172 |             imu_cols = ["timestamp", "ax", "ay", "az"]
173 |             imu_dict = defaultdict(list)
174 | 
175 |             throttle_cols = ["timestamp", "throttle_input"]
176 |             throttle_dict = defaultdict(list)
177 | 
178 |             brake_cols = ["timestamp", "brake_input"]
179 |             brake_dict = defaultdict(list)
180 | 
181 |             gear_cols = ["timestamp", "gear_state", "gear_cmd"]
182 |             gear_dict = defaultdict(list)
183 | 
184 |         bs.write_infos(dataset_outdir)
185 |         readers = bs.get_readers()
186 |         stats_acc = defaultdict(int)
187 | 
188 |         def _process_msg(topic, msg, stats):
189 |             timestamp = msg.header.stamp.to_nsec()
190 |             if topic in CAMERA_TOPICS:
191 |                 outdir = camera_select(topic, (left_outdir, center_outdir, right_outdir))
192 |                 if debug_print:
193 |                     print("%s_camera %d" % (topic[1], timestamp))
194 | 
195 |                 results = write_image(bridge, outdir, msg, fmt=img_format)
196 |                 results['filename'] = os.path.relpath(results['filename'], dataset_outdir)
197 |                 camera2dict(msg, results, camera_dict)
198 |                 stats['img_count'] += 1
199 |                 stats['msg_count'] += 1
200 | 
201 |             elif topic == STEERING_TOPIC:
202 |                 if debug_print:
203 |                     print("steering %d %f" % (timestamp, msg.steering_wheel_angle))
204 | 
205 |                 steering2dict(msg, steering_dict)
206 |                 stats['msg_count'] += 1
207 | 
208 |             elif topic == GPS_FIX_TOPIC or topic == GPS_FIX_NEW_TOPIC:
209 |                 if debug_print:
210 |                     print("gps      %d %d, %d" % (timestamp, msg.latitude, msg.longitude))
211 | 
212 |                 gps2dict(msg, gps_dict)
213 |                 stats['msg_count'] += 1
214 |             else:
215 |                 if include_others:
216 |                     if topic == GEAR_TOPIC:
217 |                         gear2dict(msg, gear_dict)
218 |                         stats['msg_count'] += 1
219 |                     elif topic == THROTTLE_TOPIC:
220 |                         throttle2dict(msg, throttle_dict)
221 |                         stats['msg_count'] += 1
222 |                     elif topic == BRAKE_TOPIC:
223 |                         brake2dict(msg, brake_dict)
224 |                         stats['msg_count'] += 1
225 |                     elif topic == IMU_TOPIC:
226 |                         imu2dict(msg, imu_dict)
227 |                         stats['msg_count'] += 1
228 | 
229 |         # no need to cycle through readers in any order for dumping, rip through each on in sequence
230 |         for reader in readers:
231 |             for result in reader.read_messages():
232 |                 _process_msg(*result, stats=stats_acc)
233 |                 if ((stats_acc['img_count'] and stats_acc['img_count'] % 1000 == 0) or
234 |                         (stats_acc['msg_count'] and stats_acc['msg_count'] % 10000 == 0)):
235 |                     print("%d images, %d messages processed..." %
236 |                           (stats_acc['img_count'], stats_acc['msg_count']))
237 |                     sys.stdout.flush()
238 | 
239 |         print("Writing done. %d images, %d messages processed." %
240 |               (stats_acc['img_count'], stats_acc['msg_count']))
241 |         sys.stdout.flush()
242 | 
243 |         if include_images:
244 |             camera_csv_path = os.path.join(dataset_outdir, 'camera.csv')
245 |             camera_df = pd.DataFrame(data=camera_dict, columns=camera_cols)
246 |             camera_df.to_csv(camera_csv_path, index=False)
247 | 
248 |         steering_csv_path = os.path.join(dataset_outdir, 'steering.csv')
249 |         steering_df = pd.DataFrame(data=steering_dict, columns=steering_cols)
250 |         steering_df.to_csv(steering_csv_path, index=False)
251 | 
252 |         gps_csv_path = os.path.join(dataset_outdir, 'gps.csv')
253 |         gps_df = pd.DataFrame(data=gps_dict, columns=gps_cols)
254 |         gps_df.to_csv(gps_csv_path, index=False)
255 | 
256 |         if include_others:
257 |             gear_csv_path = os.path.join(dataset_outdir, 'gear.csv')
258 |             gear_df = pd.DataFrame(data=gear_dict, columns=gear_cols)
259 |             gear_df.to_csv(gear_csv_path, index=False)
260 | 
261 |             throttle_csv_path = os.path.join(dataset_outdir, 'throttle.csv')
262 |             throttle_df = pd.DataFrame(data=throttle_dict, columns=throttle_cols)
263 |             throttle_df.to_csv(throttle_csv_path, index=False)
264 | 
265 |             brake_csv_path = os.path.join(dataset_outdir, 'brake.csv')
266 |             brake_df = pd.DataFrame(data=brake_dict, columns=brake_cols)
267 |             brake_df.to_csv(brake_csv_path, index=False)
268 | 
269 |             imu_csv_path = os.path.join(dataset_outdir, 'imu.csv')
270 |             imu_df = pd.DataFrame(data=imu_dict, columns=imu_cols)
271 |             imu_df.to_csv(imu_csv_path, index=False)
272 | 
273 |         gen_interpolated = True
274 |         if include_images and gen_interpolated:
275 |             # A little pandas magic to interpolate steering/gps samples to camera frames
276 |             camera_df['timestamp'] = pd.to_datetime(camera_df['timestamp'])
277 |             camera_df.set_index(['timestamp'], inplace=True)
278 |             camera_df.index.rename('index', inplace=True)
279 |             steering_df['timestamp'] = pd.to_datetime(steering_df['timestamp'])
280 |             steering_df.set_index(['timestamp'], inplace=True)
281 |             steering_df.index.rename('index', inplace=True)
282 |             gps_df['timestamp'] = pd.to_datetime(gps_df['timestamp'])
283 |             gps_df.set_index(['timestamp'], inplace=True)
284 |             gps_df.index.rename('index', inplace=True)
285 | 
286 |             merged = functools.reduce(lambda left, right: pd.merge(
287 |                 left, right, how='outer', left_index=True, right_index=True), [camera_df, steering_df, gps_df])
288 |             merged.interpolate(method='time', inplace=True)
289 | 
290 |             filtered_cols = ['timestamp', 'width', 'height', 'frame_id', 'filename',
291 |                              'angle', 'torque', 'speed',
292 |                              'lat', 'long', 'alt']
293 |             filtered = merged.loc[camera_df.index]  # back to only camera rows
294 |             filtered.fillna(0.0, inplace=True)
295 |             filtered['timestamp'] = filtered.index.astype('int')  # add back original timestamp integer col
296 |             filtered['width'] = filtered['width'].astype('int')  # cast back to int
297 |             filtered['height'] = filtered['height'].astype('int')  # cast back to int
298 |             filtered = filtered[filtered_cols]  # filter and reorder columns for final output
299 | 
300 |             interpolated_csv_path = os.path.join(dataset_outdir, 'interpolated.csv')
301 |             filtered.to_csv(interpolated_csv_path, header=True)
302 | 
303 | if __name__ == '__main__':
304 |     main()


--------------------------------------------------------------------------------
/script/bagutils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # ==============================================================================
  9 | 
 10 | from __future__ import print_function
 11 | from six import iteritems
 12 | from cv_bridge import CvBridge, CvBridgeError
 13 | from collections import defaultdict
 14 | import os
 15 | import sys
 16 | import fnmatch
 17 | import subprocess
 18 | import cv2
 19 | import yaml
 20 | import rosbag
 21 | import datetime
 22 | 
 23 | 
 24 | SEC_PER_NANOSEC = 1e9
 25 | MIN_PER_NANOSEC = 6e10
 26 | 
 27 | LEFT_CAMERA_TOPIC = "/left_camera/image_color"
 28 | CENTER_CAMERA_TOPIC = "/center_camera/image_color"
 29 | RIGHT_CAMERA_TOPIC = "/right_camera/image_color"
 30 | LEFT_CAMERA_COMPRESSED_TOPIC = LEFT_CAMERA_TOPIC + "/compressed"
 31 | CENTER_CAMERA_COMPRESSED_TOPIC = CENTER_CAMERA_TOPIC + "/compressed"
 32 | RIGHT_CAMERA_COMPRESSED_TOPIC = RIGHT_CAMERA_TOPIC + "/compressed"
 33 | CAMERA_TOPICS = [LEFT_CAMERA_TOPIC, CENTER_CAMERA_TOPIC, RIGHT_CAMERA_TOPIC,
 34 |                  LEFT_CAMERA_COMPRESSED_TOPIC, CENTER_CAMERA_COMPRESSED_TOPIC, RIGHT_CAMERA_COMPRESSED_TOPIC]
 35 | CENTER_CAMERA_TOPICS = [CENTER_CAMERA_TOPIC, CENTER_CAMERA_COMPRESSED_TOPIC]
 36 | STEERING_TOPIC = "/vehicle/steering_report"
 37 | GPS_FIX_TOPIC = "/vehicle/gps/fix"
 38 | GPS_FIX_NEW_TOPIC = "/fix"
 39 | 
 40 | WHEEL_SPEED_TOPIC = "/vehicle/wheel_speed_report"
 41 | THROTTLE_TOPIC = "/vehicle/throttle_report"
 42 | BRAKE_TOPIC = "/vehicle/brake_report"
 43 | GEAR_TOPIC = "/vehicle/gear_report"
 44 | IMU_TOPIC = "/vehicle/imu/data_raw"
 45 | 
 46 | OTHER_TOPICS = [
 47 |     WHEEL_SPEED_TOPIC, THROTTLE_TOPIC, BRAKE_TOPIC, GEAR_TOPIC, IMU_TOPIC]
 48 | 
 49 | CAMERA_REMAP_LCCL = {
 50 |     LEFT_CAMERA_TOPIC: CENTER_CAMERA_TOPIC,
 51 |     LEFT_CAMERA_COMPRESSED_TOPIC: CENTER_CAMERA_COMPRESSED_TOPIC,
 52 |     CENTER_CAMERA_TOPIC: LEFT_CAMERA_TOPIC,
 53 |     CENTER_CAMERA_COMPRESSED_TOPIC: LEFT_CAMERA_COMPRESSED_TOPIC,
 54 |     'left_camera': 'center_camera',
 55 |     'center_camera': 'left_camera',
 56 | }
 57 | 
 58 | 
 59 | def check_remap_hack(filename):
 60 |     if fnmatch.fnmatch(filename, "2016-10-25*.bag"):
 61 |         print(filename, 'matches remap hack.')
 62 |         return CAMERA_REMAP_LCCL
 63 |     else:
 64 |         return {}
 65 | 
 66 | 
 67 | def get_bag_info(bag_file, nanosec=True):
 68 |     info = yaml.load(subprocess.Popen(
 69 |         ['rosbag', 'info', '--yaml', bag_file],
 70 |         stdout=subprocess.PIPE).communicate()[0])
 71 |     if nanosec:
 72 |         if 'start' in info:
 73 |             info['start'] = int(info['start']*1e9)
 74 |         if 'end' in info:
 75 |             info['end'] = int(info['end']*1e9)
 76 |         if 'duration' in info:
 77 |             info['duration'] = int(info['duration']*1e9)
 78 |     return info
 79 | 
 80 | 
 81 | def get_topic_names(bag_info_yaml):
 82 |     topic_names = []
 83 |     topics = bag_info_yaml['topics']
 84 |     for t in topics:
 85 |         topic_names.append(t['topic'])
 86 |     return topic_names
 87 | 
 88 | 
 89 | def ns_to_str(timestamp_ns):
 90 |     secs = timestamp_ns / 1e9
 91 |     dt = datetime.datetime.fromtimestamp(secs)
 92 |     return dt.strftime('%Y-%m-%dT%H:%M:%S.%f')
 93 | 
 94 | 
 95 | class BagReader(object):
 96 |     def __init__(self, bagfiles, topics, remap_camera={}):
 97 |         self.bagfiles = bagfiles
 98 |         self.topics = topics
 99 |         self._remap_camera = remap_camera
100 | 
101 |     def read_messages(self):
102 |         for f in self.bagfiles:
103 |             with rosbag.Bag(f, "r") as bag:
104 |                 for topic, msg, _ in bag.read_messages(topics=self.topics):
105 |                     if self._remap_camera and topic in self._remap_camera:
106 |                         topic = self._remap_camera[topic]
107 |                         msg.header.frame_id = self._remap_camera[msg.header.frame_id]
108 |                     yield topic, msg
109 | 
110 | 
111 | JOIN_THRESH_NS = 10 * MIN_PER_NANOSEC
112 | 
113 | 
114 | class BagSet(object):
115 | 
116 |     def __init__(self, name, bagfiles, filter_topics, remap_camera={}):
117 |         self.name = name
118 |         self.files = sorted(bagfiles)
119 |         self.infos = []
120 |         self.topic_map = defaultdict(list)
121 |         self.start_time = None
122 |         self.end_time = None
123 |         self._remap_camera = remap_camera
124 |         self._process_infos(filter_topics)
125 | 
126 |     def _process_infos(self, filter_topics):
127 |         for f in self.files:
128 |             print("Extracting bag info %s" % f)
129 |             sys.stdout.flush()
130 |             info = get_bag_info(f)
131 |             if 'start' not in info or 'end' not in info:
132 |                 print('Ignoring info %s without start/end time' % info['path'])
133 |                 continue
134 |             if self._remap_camera and check_remap_hack(os.path.basename(f)):
135 |                 info['remap'] = self._remap_camera
136 |             info_start = info['start']
137 |             info_end = info['end']
138 |             if not self.start_time or not self.end_time:
139 |                 self._extend_range(info_start, info_end)
140 |             elif (info_start - JOIN_THRESH_NS) <= self.end_time and self.start_time <= (info_end + JOIN_THRESH_NS):
141 |                 self._extend_range(info_start, info_end)
142 |             else:
143 |                 print('Orphaned bag info time range, are there multiple datasets in same folder?')
144 |                 continue
145 |             self.infos.append(info)
146 |             if self._remap_camera:
147 |                 filter_topics = self._filter_topics_remap(filter_topics)
148 |             filtered = [x['topic'] for x in info['topics'] if not filter_topics or x['topic'] in filter_topics]
149 |             gps_fix_replace = False
150 |             if GPS_FIX_NEW_TOPIC in filtered and GPS_FIX_TOPIC in filtered:
151 |                 print("New GPS fix topic %s replacing old %s" % (GPS_FIX_NEW_TOPIC, GPS_FIX_TOPIC))
152 |                 gps_fix_replace = True
153 |             for x in filtered:
154 |                 if gps_fix_replace and x == GPS_FIX_TOPIC:
155 |                     # skip old gps topic
156 |                     continue
157 |                 self.topic_map[x].append((info['start'], info['path']))
158 |                 self.topic_map[x] = sorted(self.topic_map[x])
159 | 
160 |     def _extend_range(self, start_time, end_time):
161 |         if not self.start_time or start_time < self.start_time:
162 |             self.start_time = start_time
163 |         if not self.end_time or end_time > self.end_time:
164 |             self.end_time = end_time
165 | 
166 |     def _filter_topics_remap(self, filters):
167 |         return [self._remap_camera[x] if x in self._remap_camera else x for x in filters]
168 | 
169 |     def write_infos(self, dest):
170 |         for info in self.infos:
171 |             info_path = os.path.splitext(os.path.basename(info['path']))[0]
172 |             write_file = os.path.join(dest, info_path + '.yaml')
173 |             with open(write_file, 'w') as f:
174 |                 yaml.dump(info, f)
175 | 
176 |     def get_message_count(self, topic_filter=[]):
177 |         count = 0
178 |         for info in self.infos:
179 |             if self._remap_camera:
180 |                 topic_filter = self._filter_topics_remap(topic_filter)
181 |             filtered = [x['topic'] for x in info['topics'] if not topic_filter or x['topic'] in topic_filter]
182 |             gps_fix_replace = False
183 |             if GPS_FIX_NEW_TOPIC in filtered and GPS_FIX_TOPIC in filtered:
184 |                 gps_fix_replace = True
185 |             for topic in info['topics']:
186 |                 if ((not topic_filter or topic['topic'] in topic_filter) and
187 |                         (not gps_fix_replace or topic['topic'] != GPS_FIX_TOPIC)):
188 |                     count += topic['messages']
189 |         return count
190 | 
191 |     def get_readers(self):
192 |         readers = []
193 |         for topic, timestamp_files in iteritems(self.topic_map):
194 |             starts, files = zip(*timestamp_files)
195 |             merged = False
196 |             for r in readers:
197 |                 if r.bagfiles == files:
198 |                     r.topics.append(topic)
199 |                     merged = True
200 |             if not merged:
201 |                 readers.append(BagReader(bagfiles=files, topics=[topic], remap_camera=self._remap_camera))
202 |         return readers
203 | 
204 |     def __repr__(self):
205 |         return "start: %s, end: %s, topic_map: %s" % (self.start_time, self.end_time, str(self.topic_map))
206 | 
207 | 
208 | def find_bagsets(directory, filter_topics=[], pattern="*.bag"):
209 |     sets = []
210 |     for root, dirs, files in os.walk(directory):
211 |         matched_files = []
212 |         remap_camera = {}
213 |         for basename in files:
214 |             if fnmatch.fnmatch(basename, pattern):
215 |                 if not remap_camera:
216 |                     remap_camera = check_remap_hack(basename)
217 |                 filename = os.path.join(root, basename)
218 |                 matched_files.append(filename)
219 |         if matched_files:
220 |             set_name = os.path.relpath(root, directory)
221 |             bag_set = BagSet(set_name, matched_files, filter_topics, remap_camera)
222 |             sets.append(bag_set)
223 |     return sets
224 | 
225 | 
226 | class BagCursor(object):
227 |     def __init__(self, reader):
228 |         self.latest_timestamp = None
229 |         self.read_count = 0
230 |         self.done = False
231 |         self.vals = []
232 |         self.reader = reader
233 |         self._iter = reader.read_messages()
234 | 
235 |     def __bool__(self):
236 |         return not self.done
237 | 
238 |     __nonzero__ = __bool__
239 | 
240 |     # Advance cursor by one element, store element vals list
241 |     def advance(self, n=1):
242 |         if self.done:
243 |             return False
244 |         try:
245 |             while n > 0:
246 |                 topic, msg = next(self._iter)
247 |                 self.read_count += 1
248 |                 timestamp = msg.header.stamp.to_nsec()
249 |                 if not self.latest_timestamp or timestamp > self.latest_timestamp:
250 |                     self.latest_timestamp = timestamp
251 |                 self.vals.append((timestamp, topic, msg))
252 |                 n -= 1
253 |         except StopIteration:
254 |             self.done = True
255 |         return not self.done
256 | 
257 |     # Advance cursor by relative time duration in nanoseconds
258 |     def advance_by(self, duration_ns):
259 |         if not self.latest_timestamp and not self.advance():
260 |             return False
261 |         start_time_ns = self.latest_timestamp
262 |         while self.advance():
263 |             elapsed = self.latest_timestamp - start_time_ns
264 |             if elapsed >= duration_ns:
265 |                 break
266 |         return not self.done
267 | 
268 |     # Advance cursor until specified absolute time in nanoseconds
269 |     def advance_until(self, end_time_ns):
270 |         while self.advance():
271 |             if self.latest_timestamp >= end_time_ns:
272 |                 break
273 |         return not self.done
274 | 
275 |     def collect_vals(self, dest):
276 |         dest.extend(self.vals)
277 |         self.vals = []
278 | 
279 |     def clear_vals(self):
280 |         self.vals = []
281 | 
282 |     def __repr__(self):
283 |         return "Cursor for bags: %s, topics: %s" % (str(self.reader.bagfiles), str(self.reader.topics))
284 | 
285 | 
286 | class CursorGroup(object):
287 |     def __init__(self, readers=[], cursors=[]):
288 |         # a group can be created from readers or existing cursors,
289 |         if readers:
290 |             assert not cursors
291 |             self.cursors = [BagCursor(r) for r in readers]
292 |         elif cursors:
293 |             self.cursors = cursors
294 | 
295 |     def __bool__(self):
296 |         for c in self.cursors:
297 |             if c:
298 |                 return True
299 |         return False
300 | 
301 |     __nonzero__ = __bool__
302 |     
303 |     def advance(self, n=1):
304 |         all_done = True
305 |         for c in self.cursors:
306 |             if c and c.advance(n):
307 |                 all_done = False
308 |         return not all_done
309 | 
310 |     # Advance all cursors by specified duration
311 |     # Risk of cursors drifting over time from each other
312 |     def advance_by(self, duration_ns=1*SEC_PER_NANOSEC):
313 |         all_done = True
314 |         for c in self.cursors:
315 |             if c and c.advance_by(duration_ns):
316 |                 all_done = False
317 |         return not all_done
318 | 
319 |     # Advance all cursors up to same end time
320 |     def advance_until(self, end_time_ns):
321 |         all_done = True
322 |         for c in self.cursors:
323 |             if c and c.advance_until(end_time_ns):
324 |                 all_done = False
325 |         return not all_done
326 | 
327 |     # Advance the first ready cursor in group by specified amount and bring the reset
328 |     # up to same resulting end time.
329 |     # Risk of pulling in large amounts of data if leading stream has a large gap.
330 |     def advance_by_until(self, duration_ns=1*SEC_PER_NANOSEC):
331 |         all_done = True
332 |         end_time_ns = None
333 |         for c in self.cursors:
334 |             ready = False
335 |             if c:
336 |                 if not end_time_ns:
337 |                     ready = c.advance_by(duration_ns)
338 |                     end_time_ns = c.latest_timestamp
339 |                 else:
340 |                     ready = c.advance_until(end_time_ns)
341 |             if ready:
342 |                 all_done = False
343 |         return not all_done
344 | 
345 |     def collect_vals(self, dest):
346 |         for c in self.cursors:
347 |             c.collect_vals(dest)
348 | 


--------------------------------------------------------------------------------
/script/readtf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # ==============================================================================
  9 | 
 10 | from __future__ import print_function
 11 | import os
 12 | import sys
 13 | import tensorflow as tf
 14 | 
 15 | 
 16 | def datafiles(search_dir, name):
 17 |     tf_record_pattern = os.path.join(search_dir, '%s-*' % name)
 18 |     data_files = tf.gfile.Glob(tf_record_pattern)
 19 |     data_files = sorted(data_files)
 20 |     if not data_files:
 21 |       print('No files found for dataset %s at %s' % (name, search_dir))
 22 |     return data_files
 23 | 
 24 | 
 25 | def example_parser(example_serialized):
 26 |     
 27 |     feature_map = {
 28 |         'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
 29 |         'image/timestamp': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
 30 |         'steer/angle': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.0]),
 31 |         'steer/timestamp': tf.FixedLenFeature([2], dtype=tf.int64, default_value=[-1, -1]),
 32 |         #'gps/lat': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.00]),
 33 |         #'gps/long': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.0]),
 34 |         #'gps/timestamp': tf.VarLenFeature(tf.int64),
 35 |     }
 36 | 
 37 |     features = tf.parse_single_example(example_serialized, feature_map)
 38 | 
 39 |     image_timestamp = tf.cast(features['image/timestamp'], dtype=tf.int64)
 40 |     steering_angles = features['steer/angle']
 41 |     steering_timestamps = features['steer/timestamp']
 42 | 
 43 |     return features['image/encoded'], image_timestamp, steering_angles, steering_timestamps
 44 | 
 45 | 
 46 | def create_read_graph(data_dir, name, num_readers=4, estimated_examples_per_shard=64, coder=None):
 47 |     # Get sharded tf example files for the dataset
 48 |     data_files = datafiles(data_dir, name)
 49 | 
 50 |     # Create queue for sharded tf example files
 51 |     # FIXME the num_epochs argument seems to have no impact? Queue keeps looping forever if not stopped.
 52 |     filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1, num_epochs=1)
 53 | 
 54 |     # Create queue for examples
 55 |     examples_queue = tf.FIFOQueue(capacity=estimated_examples_per_shard + 4, dtypes=[tf.string])
 56 | 
 57 |     enqueue_ops = []
 58 |     processed = []
 59 |     if num_readers > 1:
 60 |         for _ in range(num_readers):
 61 |             reader = tf.TFRecordReader()
 62 |             _, example = reader.read(filename_queue)
 63 |             enqueue_ops.append(examples_queue.enqueue([example]))
 64 |         example_serialized = examples_queue.dequeue()
 65 |         tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
 66 |     else:
 67 |         reader = tf.TFRecordReader()
 68 |         _, example_serialized = reader.read(filename_queue)
 69 | 
 70 |     for x in range(10):
 71 |         image_buffer, image_timestamp, steering_angles, steering_timestamps = example_parser(example_serialized)
 72 |         decoded_image = tf.image.decode_jpeg(image_buffer)
 73 |         print(decoded_image.get_shape(), image_timestamp.get_shape(), steering_angles.get_shape(), steering_timestamps.get_shape())
 74 |         decoded_image = tf.reshape(decoded_image, shape=[480, 640, 3])
 75 |         processed.append((decoded_image, image_timestamp, steering_angles, steering_timestamps))
 76 | 
 77 |     batch_size = 10
 78 |     batch_queue_capacity = 2 * batch_size
 79 |     batch_data = tf.train.batch_join(
 80 |         processed,
 81 |         batch_size=batch_size,
 82 |         capacity=batch_queue_capacity)
 83 | 
 84 |     return batch_data
 85 | 
 86 | 
 87 | def main():
 88 |     data_dir = '/output/combined'
 89 |     num_images = 1452601
 90 | 
 91 |     # Build graph and initialize variables
 92 |     read_op = create_read_graph(data_dir, 'combined')
 93 |     init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())
 94 |     sess = tf.Session()
 95 |     sess.run(init_op)
 96 | 
 97 |     # Start input enqueue threads
 98 |     coord = tf.train.Coordinator()
 99 |     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
100 |     read_count = 0
101 |     try:
102 |         while read_count < num_images and not coord.should_stop():
103 |             images, timestamps, angles, _ = sess.run(read_op)
104 |             for i in range(images.shape[0]):
105 |                 decoded_image = images[i]
106 |                 assert decoded_image.shape[2] == 3
107 |                 print(angles[i])
108 |                 read_count += 1
109 |             if not read_count % 1000:
110 |                 print("Read %d examples" % read_count)
111 | 
112 |     except tf.errors.OutOfRangeError:
113 |         print("Reading stopped by Queue")
114 |     finally:
115 |         # Ask the threads to stop.
116 |         coord.request_stop()
117 | 
118 |     print("Done reading %d images" % read_count)
119 | 
120 |     # Wait for threads to finish.
121 |     coord.join(threads)
122 |     sess.close()
123 | 
124 | if __name__ == '__main__':
125 |     main()


--------------------------------------------------------------------------------