' + ' '.join(['' + sentence + '' for sentence in sentences]) + '
'
87 | body = body.encode('utf8')
88 |
89 | tf_example = example_pb2.Example()
90 | tf_example.features.feature['article'].bytes_list.value.extend([body])
91 | tf_example.features.feature['abstract'].bytes_list.value.extend([title])
92 | tf_example_str = tf_example.SerializeToString()
93 | str_len = len(tf_example_str)
94 | writer.write(struct.pack('q', str_len))
95 | writer.write(struct.pack('%ds' % str_len, tf_example_str))
96 |
97 | def main(unused_argv):
98 | assert FLAGS.command and FLAGS.in_directories and FLAGS.out_files
99 | output_filenames = FLAGS.out_files.split(',')
100 | input_directories = FLAGS.in_directories.split(',')
101 |
102 | if FLAGS.command == 'text_to_binary':
103 | assert FLAGS.split
104 |
105 | split_fractions = [float(s) for s in FLAGS.split.split(',')]
106 |
107 | assert len(output_filenames) == len(split_fractions)
108 |
109 | _text_to_binary(input_directories, output_filenames, split_fractions)
110 |
111 | elif FLAGS.command == 'text_to_vocabulary':
112 | assert len(output_filenames) == 1
113 |
114 | _text_to_vocabulary(input_directories, output_filenames[0])
115 |
116 | if __name__ == '__main__':
117 | tf.app.run()
--------------------------------------------------------------------------------