├── .gitattributes
├── README.md
├── LICENSE
├── dataset_tutorial.ipynb
└── .ipynb_checkpoints
    └── dataset_tutorial-checkpoint.ipynb


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorFlow Dataset Tutorial
2 | 
3 | This repository contains the notebook used in my medium article:
4 | 
5 | https://medium.com/@FrancescoZ/how-to-use-dataset-in-tensorflow-c758ef9e4428
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 FrancescoSaverioZuppichini
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/dataset_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/usr/local/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 13 |       "  from ._conv import register_converters as _register_converters\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import tensorflow as tf\n",
 19 |     "import numpy as np"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "[0.50035296 0.92651365]\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "x = np.random.sample((100,2))\n",
 37 |     "# make a dataset from a numpy array\n",
 38 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
 39 |     "\n",
 40 |     "iter = dataset.make_one_shot_iterator()\n",
 41 |     "el = iter.get_next()\n",
 42 |     "\n",
 43 |     "with tf.Session() as sess:\n",
 44 |     "    print(sess.run(el))"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "(array([0.33327842, 0.90874317]), array([0.02171065]))\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "# using two numpy arrays\n",
 62 |     "features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
 63 |     "dataset = tf.data.Dataset.from_tensor_slices((features,labels))\n",
 64 |     "\n",
 65 |     "iter = dataset.make_one_shot_iterator()\n",
 66 |     "el = iter.get_next()\n",
 67 |     "\n",
 68 |     "with tf.Session() as sess:\n",
 69 |     "    print(sess.run(el))"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "[0.00786543 0.26009214]\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "# using a tensor\n",
 87 |     "dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([100, 2]))\n",
 88 |     "\n",
 89 |     "iter = dataset.make_initializable_iterator()\n",
 90 |     "el = iter.get_next()\n",
 91 |     "\n",
 92 |     "with tf.Session() as sess:\n",
 93 |     "    sess.run(iter.initializer)\n",
 94 |     "    print(sess.run(el))"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 5,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "[0.03433903 0.7280311 ]\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "# using a placeholder\n",
112 |     "x = tf.placeholder(tf.float32, shape=[None,2])\n",
113 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
114 |     "\n",
115 |     "data = np.random.sample((100,2))\n",
116 |     "\n",
117 |     "iter = dataset.make_initializable_iterator()\n",
118 |     "el = iter.get_next()\n",
119 |     "\n",
120 |     "with tf.Session() as sess:\n",
121 |     "    sess.run(iter.initializer, feed_dict={ x: data })\n",
122 |     "    print(sess.run(el))"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 6,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "[[1]]\n",
135 |       "[[2]\n",
136 |       " [3]]\n",
137 |       "[[3]\n",
138 |       " [4]\n",
139 |       " [5]]\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "# from generator\n",
145 |     "sequence = np.array([[[1]],[[2],[3]],[[3],[4],[5]]])\n",
146 |     "\n",
147 |     "def generator():\n",
148 |     "    for el in sequence:\n",
149 |     "        yield el\n",
150 |     "\n",
151 |     "dataset = tf.data.Dataset().batch(1).from_generator(generator,\n",
152 |     "                                           output_types= tf.int64, \n",
153 |     "                                           output_shapes=(tf.TensorShape([None, 1])))\n",
154 |     "\n",
155 |     "iter = dataset.make_initializable_iterator()\n",
156 |     "el = iter.get_next()\n",
157 |     "\n",
158 |     "with tf.Session() as sess:\n",
159 |     "    sess.run(iter.initializer)\n",
160 |     "    print(sess.run(el))\n",
161 |     "    print(sess.run(el))\n",
162 |     "    print(sess.run(el))\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 7,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "name": "stdout",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "[array([1., 2.], dtype=float32), array([0.], dtype=float32)]\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "# initializable iterator to switch between data\n",
180 |     "EPOCHS = 10\n",
181 |     "\n",
182 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
183 |     "dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
184 |     "\n",
185 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
186 |     "test_data = (np.array([[1,2]]), np.array([[0]]))\n",
187 |     "\n",
188 |     "iter = dataset.make_initializable_iterator()\n",
189 |     "features, labels = iter.get_next()\n",
190 |     "\n",
191 |     "with tf.Session() as sess:\n",
192 |     "#     initialise iterator with train data\n",
193 |     "    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
194 |     "    for _ in range(EPOCHS):\n",
195 |     "        sess.run([features, labels])\n",
196 |     "#     switch to test data\n",
197 |     "    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
198 |     "    print(sess.run([features, labels]))\n",
199 |     "\n",
200 |     "    \n",
201 |     "    "
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 8,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "name": "stdout",
211 |      "output_type": "stream",
212 |      "text": [
213 |       "[array([0.94182994, 0.26802265]), array([0.81551463])]\n"
214 |      ]
215 |     }
216 |    ],
217 |    "source": [
218 |     "# Reinitializable iterator to switch between Datasets\n",
219 |     "EPOCHS = 10\n",
220 |     "# making fake data using numpy\n",
221 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
222 |     "test_data = (np.random.sample((10,2)), np.random.sample((10,1)))\n",
223 |     "# create two datasets, one for training and one for test\n",
224 |     "train_dataset = tf.data.Dataset.from_tensor_slices(train_data)\n",
225 |     "test_dataset = tf.data.Dataset.from_tensor_slices(test_data)\n",
226 |     "# create a iterator of the correct shape and type\n",
227 |     "iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
228 |     "                                           train_dataset.output_shapes)\n",
229 |     "features, labels = iter.get_next()\n",
230 |     "# create the initialisation operations\n",
231 |     "train_init_op = iter.make_initializer(train_dataset)\n",
232 |     "test_init_op = iter.make_initializer(test_dataset)\n",
233 |     "\n",
234 |     "with tf.Session() as sess:\n",
235 |     "    sess.run(train_init_op) # switch to train dataset\n",
236 |     "    for _ in range(EPOCHS):\n",
237 |     "        sess.run([features, labels])\n",
238 |     "    sess.run(test_init_op) # switch to val dataset\n",
239 |     "    print(sess.run([features, labels]))\n",
240 |     "\n",
241 |     "    \n",
242 |     "    "
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 9,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stdout",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "[0.8552025  0.13344285] [0.24534453]\n",
255 |       "[0.23880187 0.2294315 ] [0.77315474]\n",
256 |       "[0.763904 0.439595] [0.42727667]\n",
257 |       "[0.6563372 0.1366187] [0.02278621]\n",
258 |       "[0.71135175 0.394754  ] [0.8552778]\n",
259 |       "[0.7329701  0.42924434] [0.43608633]\n",
260 |       "[0.8240853 0.7750715] [0.5140434]\n",
261 |       "[0.65556693 0.67978406] [0.8228361]\n",
262 |       "[0.02365288 0.18461536] [0.85140544]\n",
263 |       "[0.48037764 0.7320316 ] [0.773141]\n",
264 |       "[0.6671238 0.8491173] [0.45188755]\n"
265 |      ]
266 |     }
267 |    ],
268 |    "source": [
269 |     "# feedable iterator to switch between iterators\n",
270 |     "EPOCHS = 10\n",
271 |     "# making fake data using numpy\n",
272 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
273 |     "test_data = (np.random.sample((10,2)), np.random.sample((10,1)))\n",
274 |     "# create placeholder\n",
275 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
276 |     "# create two datasets, one for training and one for test\n",
277 |     "train_dataset = tf.data.Dataset.from_tensor_slices((x,y))\n",
278 |     "test_dataset = tf.data.Dataset.from_tensor_slices((x,y))\n",
279 |     "# create the iterators from the dataset\n",
280 |     "train_iterator = train_dataset.make_initializable_iterator()\n",
281 |     "test_iterator = test_dataset.make_initializable_iterator()\n",
282 |     "# same as in the doc https://www.tensorflow.org/programmers_guide/datasets#creating_an_iterator\n",
283 |     "handle = tf.placeholder(tf.string, shape=[])\n",
284 |     "iter = tf.data.Iterator.from_string_handle(\n",
285 |     "    handle, train_dataset.output_types, train_dataset.output_shapes)\n",
286 |     "next_elements = iter.get_next()\n",
287 |     "\n",
288 |     "with tf.Session() as sess:\n",
289 |     "    train_handle = sess.run(train_iterator.string_handle())\n",
290 |     "    test_handle = sess.run(test_iterator.string_handle())\n",
291 |     "    \n",
292 |     "    # initialise iterators. In our case we could have used the 'one-shot' iterator instead,\n",
293 |     "    # and directly feed the data insted the Dataset.from_tensor_slices function, but this\n",
294 |     "    # approach is more general\n",
295 |     "    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
296 |     "    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
297 |     "    \n",
298 |     "    for _ in range(EPOCHS):\n",
299 |     "        x,y = sess.run(next_elements, feed_dict = {handle: train_handle})\n",
300 |     "        print(x, y)\n",
301 |     "        \n",
302 |     "    x,y = sess.run(next_elements, feed_dict = {handle: test_handle})\n",
303 |     "    print(x,y)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 10,
309 |    "metadata": {
310 |     "scrolled": true
311 |    },
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "[[0.70861276 0.91522017]\n",
318 |       " [0.993154   0.74425373]\n",
319 |       " [0.42730845 0.03037355]\n",
320 |       " [0.54031161 0.57429001]]\n"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "# BATCHING\n",
326 |     "BATCH_SIZE = 4\n",
327 |     "x = np.random.sample((100,2))\n",
328 |     "# make a dataset from a numpy array\n",
329 |     "dataset = tf.data.Dataset.from_tensor_slices(x).batch(BATCH_SIZE)\n",
330 |     "\n",
331 |     "iter = dataset.make_one_shot_iterator()\n",
332 |     "el = iter.get_next()\n",
333 |     "\n",
334 |     "with tf.Session() as sess:\n",
335 |     "    print(sess.run(el))"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 4,
341 |    "metadata": {},
342 |    "outputs": [
343 |     {
344 |      "name": "stdout",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "[1]\n",
348 |       "[2]\n",
349 |       "[3]\n",
350 |       "[4]\n",
351 |       "[1]\n",
352 |       "[2]\n",
353 |       "[3]\n",
354 |       "[4]\n"
355 |      ]
356 |     }
357 |    ],
358 |    "source": [
359 |     "# REPEAT\n",
360 |     "BATCH_SIZE = 4\n",
361 |     "x = np.array([[1],[2],[3],[4]])\n",
362 |     "# make a dataset from a numpy array\n",
363 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
364 |     "dataset = dataset.repeat()\n",
365 |     "\n",
366 |     "iter = dataset.make_one_shot_iterator()\n",
367 |     "el = iter.get_next()\n",
368 |     "\n",
369 |     "with tf.Session() as sess:\n",
370 |     "    for _ in range(8):\n",
371 |     "        print(sess.run(el))"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": [
380 |     "# MAP\n",
381 |     "x = np.array([[1],[2],[3],[4]])\n",
382 |     "# make a dataset from a numpy array\n",
383 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
384 |     "dataset = dataset.map(lambda x: x*2)\n",
385 |     "\n",
386 |     "iter = dataset.make_one_shot_iterator()\n",
387 |     "el = iter.get_next()\n",
388 |     "\n",
389 |     "with tf.Session() as sess:\n",
390 |     "#     this will run forever\n",
391 |     "        for _ in range(len(x)):\n",
392 |     "            print(sess.run(el))"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 12,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "name": "stdout",
402 |      "output_type": "stream",
403 |      "text": [
404 |       "[[3]\n",
405 |       " [1]\n",
406 |       " [2]\n",
407 |       " [4]]\n"
408 |      ]
409 |     }
410 |    ],
411 |    "source": [
412 |     "# SHUFFLE\n",
413 |     "BATCH_SIZE = 4\n",
414 |     "x = np.array([[1],[2],[3],[4]])\n",
415 |     "# make a dataset from a numpy array\n",
416 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
417 |     "dataset = dataset.shuffle(buffer_size=100)\n",
418 |     "dataset = dataset.batch(BATCH_SIZE)\n",
419 |     "\n",
420 |     "iter = dataset.make_one_shot_iterator()\n",
421 |     "el = iter.get_next()\n",
422 |     "\n",
423 |     "with tf.Session() as sess:\n",
424 |     "    print(sess.run(el))"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": 13,
430 |    "metadata": {},
431 |    "outputs": [
432 |     {
433 |      "name": "stdout",
434 |      "output_type": "stream",
435 |      "text": [
436 |       "Iter: 0, Loss: 0.1913\n",
437 |       "Iter: 1, Loss: 0.1814\n",
438 |       "Iter: 2, Loss: 0.1720\n",
439 |       "Iter: 3, Loss: 0.1631\n",
440 |       "Iter: 4, Loss: 0.1547\n",
441 |       "Iter: 5, Loss: 0.1469\n",
442 |       "Iter: 6, Loss: 0.1397\n",
443 |       "Iter: 7, Loss: 0.1329\n",
444 |       "Iter: 8, Loss: 0.1267\n",
445 |       "Iter: 9, Loss: 0.1210\n"
446 |      ]
447 |     }
448 |    ],
449 |    "source": [
450 |     "# how to pass the value to a model\n",
451 |     "EPOCHS = 10\n",
452 |     "BATCH_SIZE = 16\n",
453 |     "# using two numpy arrays\n",
454 |     "features, labels = (np.array([np.random.sample((100,2))]), \n",
455 |     "                    np.array([np.random.sample((100,1))]))\n",
456 |     "\n",
457 |     "dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)\n",
458 |     "\n",
459 |     "iter = dataset.make_one_shot_iterator()\n",
460 |     "x, y = iter.get_next()\n",
461 |     "\n",
462 |     "# make a simple model\n",
463 |     "net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
464 |     "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
465 |     "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
466 |     "\n",
467 |     "loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label\n",
468 |     "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
469 |     "\n",
470 |     "with tf.Session() as sess:\n",
471 |     "    sess.run(tf.global_variables_initializer())\n",
472 |     "    for i in range(EPOCHS):\n",
473 |     "        _, loss_value = sess.run([train_op, loss])\n",
474 |     "        print(\"Iter: {}, Loss: {:.4f}\".format(i, loss_value))"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": 18,
480 |    "metadata": {},
481 |    "outputs": [
482 |     {
483 |      "name": "stdout",
484 |      "output_type": "stream",
485 |      "text": [
486 |       "3\n",
487 |       "Training...\n",
488 |       "Iter: 0, Loss: 1.4389\n",
489 |       "Iter: 1, Loss: 1.4704\n",
490 |       "Iter: 2, Loss: 1.4081\n",
491 |       "Iter: 3, Loss: 1.2877\n",
492 |       "Iter: 4, Loss: 1.1842\n",
493 |       "Iter: 5, Loss: 1.1944\n",
494 |       "Iter: 6, Loss: 1.1166\n",
495 |       "Iter: 7, Loss: 0.9924\n",
496 |       "Iter: 8, Loss: 0.8997\n",
497 |       "Iter: 9, Loss: 0.8817\n",
498 |       "Test Loss: 0.836423\n"
499 |      ]
500 |     }
501 |    ],
502 |    "source": [
503 |     "# Wrapping all together -> Switch between train and test set using Initializable iterator\n",
504 |     "EPOCHS = 10\n",
505 |     "# create a placeholder to dynamically switch between batch sizes\n",
506 |     "batch_size = tf.placeholder(tf.int64)\n",
507 |     "BATCH_SIZE = 32\n",
508 |     "\n",
509 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
510 |     "dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).repeat()\n",
511 |     "\n",
512 |     "# using two numpy arrays\n",
513 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
514 |     "test_data = (np.random.sample((20,2)), np.random.sample((20,1)))\n",
515 |     "\n",
516 |     "iter = dataset.make_initializable_iterator()\n",
517 |     "features, labels = iter.get_next()\n",
518 |     "# make a simple model\n",
519 |     "net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
520 |     "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
521 |     "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
522 |     "\n",
523 |     "loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label\n",
524 |     "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
525 |     "\n",
526 |     "n_batches = train_data[0].shape[0] // BATCH_SIZE\n",
527 |     "\n",
528 |     "with tf.Session() as sess:\n",
529 |     "    sess.run(tf.global_variables_initializer())\n",
530 |     "    # initialise iterator with train data\n",
531 |     "    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})\n",
532 |     "    print('Training...')\n",
533 |     "    for i in range(EPOCHS):\n",
534 |     "        tot_loss = 0\n",
535 |     "        for _ in range(n_batches):\n",
536 |     "            _, loss_value = sess.run([train_op, loss])\n",
537 |     "            tot_loss += loss_value\n",
538 |     "        print(\"Iter: {}, Loss: {:.4f}\".format(i, tot_loss / n_batches))\n",
539 |     "    # initialise iterator with test data\n",
540 |     "    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})\n",
541 |     "    print('Test Loss: {:4f}'.format(sess.run(loss)))\n"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": 10,
547 |    "metadata": {},
548 |    "outputs": [
549 |     {
550 |      "name": "stdout",
551 |      "output_type": "stream",
552 |      "text": [
553 |       "Training...\n",
554 |       "Iter: 0, Loss: 0.1602\n",
555 |       "Iter: 1, Loss: 0.1191\n",
556 |       "Iter: 2, Loss: 0.0964\n",
557 |       "Iter: 3, Loss: 0.0907\n",
558 |       "Iter: 4, Loss: 0.0738\n",
559 |       "Iter: 5, Loss: 0.0819\n",
560 |       "Iter: 6, Loss: 0.0728\n",
561 |       "Iter: 7, Loss: 0.0881\n",
562 |       "Iter: 8, Loss: 0.0765\n",
563 |       "Iter: 9, Loss: 0.0729\n",
564 |       "Test Loss: 0.091081\n"
565 |      ]
566 |     }
567 |    ],
568 |    "source": [
569 |     "# Wrapping all together -> Switch between train and test set using Reinitializable iterator\n",
570 |     "EPOCHS = 10\n",
571 |     "# create a placeholder to dynamically switch between batch sizes\n",
572 |     "batch_size = tf.placeholder(tf.int64)\n",
573 |     "\n",
574 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
575 |     "train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size).repeat()\n",
576 |     "test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size) # always batch even if you want to one shot it\n",
577 |     "# using two numpy arrays\n",
578 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
579 |     "test_data = (np.random.sample((20,2)), np.random.sample((20,1)))\n",
580 |     "\n",
581 |     "# create a iterator of the correct shape and type\n",
582 |     "iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
583 |     "                                           train_dataset.output_shapes)\n",
584 |     "features, labels = iter.get_next()\n",
585 |     "# create the initialisation operations\n",
586 |     "train_init_op = iter.make_initializer(train_dataset)\n",
587 |     "test_init_op = iter.make_initializer(test_dataset)\n",
588 |     "\n",
589 |     "# make a simple model\n",
590 |     "net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
591 |     "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
592 |     "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
593 |     "\n",
594 |     "loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label\n",
595 |     "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
596 |     "\n",
597 |     "with tf.Session() as sess:\n",
598 |     "    sess.run(tf.global_variables_initializer())\n",
599 |     "    # initialise iterator with train data\n",
600 |     "    sess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: 16})\n",
601 |     "    print('Training...')\n",
602 |     "    for i in range(EPOCHS):\n",
603 |     "        tot_loss = 0\n",
604 |     "        for _ in range(n_batches):\n",
605 |     "            _, loss_value = sess.run([train_op, loss])\n",
606 |     "            tot_loss += loss_value\n",
607 |     "        print(\"Iter: {}, Loss: {:.4f}\".format(i, tot_loss / n_batches))\n",
608 |     "    # initialise iterator with test data\n",
609 |     "    sess.run(test_init_op, feed_dict = {x : test_data[0], y: test_data[1], batch_size:len(test_data[0])})\n",
610 |     "    print('Test Loss: {:4f}'.format(sess.run(loss)))\n"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": 32,
616 |    "metadata": {},
617 |    "outputs": [
618 |     {
619 |      "name": "stdout",
620 |      "output_type": "stream",
621 |      "text": [
622 |       "{'sentiment': <tf.Tensor 'IteratorGetNext_15:0' shape=(?,) dtype=int32>, 'text': <tf.Tensor 'IteratorGetNext_15:1' shape=(?,) dtype=string>}\n",
623 |       "[array([b\"@MENTION, i agree! i'm trying to finish my thesis..and so far it's not going anywhere\",\n",
624 |       "       b'@MENTION erm, sacre coeur even... james -1',\n",
625 |       "       b\"@MENTION now am depressed and br'3ii is sleepin since i came home\",\n",
626 |       "       b'just finishing what turned out to be a nice day',\n",
627 |       "       b\"it's over. it was great. dollhouse\",\n",
628 |       "       b'just got bck from cross-country practice wooo 3 miles!',\n",
629 |       "       b'@MENTION you mean a man who cheats on his wife habitually and is a complete hypocrite... they have plenty of those already.',\n",
630 |       "       b'i feel bad for che ming wang though, he cant seem to get it together... and has an era of 30+',\n",
631 |       "       b'@MENTION @MENTION ok.. so i tweeted about the rains and it is not raining anymore. atleast i wont have to water the plants tomorrow.',\n",
632 |       "       b'is gutted about katie & peter love them!',\n",
633 |       "       b\"finally home... long day... had a great time with vet's from every era!\",\n",
634 |       "       b\"@MENTION yucky!!! for whatevv reason i can't eat 5 guys anymore. just makes me gag!!!\",\n",
635 |       "       b'@MENTION ever been to the antiques roadshow? my favorite show',\n",
636 |       "       b'lovin virginia! hopefully ill pick up an accent 4 a lil while',\n",
637 |       "       b\"@MENTION yea me too i just drink a cup of tea, but don't worry i got some snacks with me\",\n",
638 |       "       b'got what could quite possibly be the worst paper cut ever today. corners of file folders = paper daggers. ouchie.',\n",
639 |       "       b'sneaking in some computer time. wish you all a good day!',\n",
640 |       "       b\"@MENTION i'm pretty bored with it too\",\n",
641 |       "       b'too early to call a landslide victory for m14? lebanonelections',\n",
642 |       "       b\"school has ruined me so much that i don't even know how to sleep in anymore\",\n",
643 |       "       b\"is at work! it's been a long weekend\",\n",
644 |       "       b'feeling a bit sick also very bored!',\n",
645 |       "       b'misses mr. hollinger. misses callin him hubby more than anything',\n",
646 |       "       b'616 words i loooveeee jackson rathbone <\\xc3\\xb4\\xc3\\xb8\\xcf\\x89',\n",
647 |       "       b'sucky day.. first i havta take the bus 2 work then it breaks down nd work suckd of course',\n",
648 |       "       b'back from skaterhockey. crazy old bears 8 @MENTION reloaded 2. 6 goals against us in the last 20 mins',\n",
649 |       "       b'this is sad! john and kate are officially filing for divorce...as bad as the times have been this still shocks me.',\n",
650 |       "       b'@MENTION i wish i could',\n",
651 |       "       b'@MENTION and what do u see!?!? *raises an eyebrow in amusement*',\n",
652 |       "       b'@MENTION helloooo promm dress..ahh i need to get onee :| whens yours ? debenhams av a salee x',\n",
653 |       "       b'@MENTION heyy! thx 4 making \"back around+lyrics\" gosshh! its totally amazing ! amazing work u got there!',\n",
654 |       "       b'@MENTION aside from the use of \"sex\" as a term, i like that latest post'],\n",
655 |       "      dtype=object), array([0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,\n",
656 |       "       0, 1, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int32)]\n"
657 |      ]
658 |     }
659 |    ],
660 |    "source": [
661 |     "# load a csv\n",
662 |     "CSV_PATH = './tweets.csv'\n",
663 |     "dataset = tf.contrib.data.make_csv_dataset(CSV_PATH, batch_size=32)\n",
664 |     "iter = dataset.make_one_shot_iterator()\n",
665 |     "next = iter.get_next()\n",
666 |     "print(next) # next is a dict with key=columns names and value=column data\n",
667 |     "inputs, labels = next['text'], next['sentiment']\n",
668 |     "\n",
669 |     "with  tf.Session() as sess:\n",
670 |     "    print(sess.run([inputs,labels]))"
671 |    ]
672 |   },
673 |   {
674 |    "cell_type": "code",
675 |    "execution_count": 2,
676 |    "metadata": {},
677 |    "outputs": [],
678 |    "source": [
679 |     "log_time = {}\n",
680 |     "# copied form https://medium.com/pythonhive/python-decorator-to-measure-the-execution-time-of-methods-fa04cb6bb36d\n",
681 |     "def how_much(method):\n",
682 |     "    def timed(*args, **kw):\n",
683 |     "        ts = time.time()\n",
684 |     "        result = method(*args, **kw)\n",
685 |     "        te = time.time()\n",
686 |     "        \n",
687 |     "        if 'log_time' in kw:\n",
688 |     "            name = kw.get('log_name', method.__name__)\n",
689 |     "            kw['log_time'][name] = (te - ts)\n",
690 |     "            \n",
691 |     "        return result\n",
692 |     "    return timed"
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "code",
697 |    "execution_count": 3,
698 |    "metadata": {},
699 |    "outputs": [
700 |     {
701 |      "name": "stdout",
702 |      "output_type": "stream",
703 |      "text": [
704 |       "((5000, 32, 32), (5000, 20)) ((1000, 32, 32), (1000, 20))\n"
705 |      ]
706 |     }
707 |    ],
708 |    "source": [
709 |     "# benchmark\n",
710 |     "import time\n",
711 |     "DATA_SIZE = 5000\n",
712 |     "DATA_SHAPE = ((32,32),(20,))\n",
713 |     "BATCH_SIZE = 64 \n",
714 |     "N_BATCHES = DATA_SIZE // BATCH_SIZE\n",
715 |     "EPOCHS = 10\n",
716 |     "\n",
717 |     "test_size = (DATA_SIZE//100)*20 \n",
718 |     "\n",
719 |     "train_shape = ((DATA_SIZE, *DATA_SHAPE[0]),(DATA_SIZE, *DATA_SHAPE[1]))\n",
720 |     "test_shape = ((test_size, *DATA_SHAPE[0]),(test_size, *DATA_SHAPE[1]))\n",
721 |     "print(train_shape, test_shape)\n",
722 |     "train_data = (np.random.sample(train_shape[0]), np.random.sample(train_shape[1]))\n",
723 |     "test_data = (np.random.sample(test_shape[0]), np.random.sample(test_shape[1])) "
724 |    ]
725 |   },
726 |   {
727 |    "cell_type": "code",
728 |    "execution_count": 4,
729 |    "metadata": {
730 |     "scrolled": false
731 |    },
732 |    "outputs": [
733 |     {
734 |      "name": "stdout",
735 |      "output_type": "stream",
736 |      "text": [
737 |       "[None, 32, 32] [None, 20]\n",
738 |       "one_shot\n",
739 |       "0\n",
740 |       "1\n",
741 |       "2\n",
742 |       "3\n",
743 |       "4\n",
744 |       "5\n",
745 |       "6\n",
746 |       "7\n",
747 |       "8\n",
748 |       "9\n",
749 |       "initialisable\n",
750 |       "0\n",
751 |       "1\n",
752 |       "2\n",
753 |       "3\n",
754 |       "4\n",
755 |       "5\n",
756 |       "6\n",
757 |       "7\n",
758 |       "8\n",
759 |       "9\n",
760 |       "reinitializable\n",
761 |       "0\n",
762 |       "1\n",
763 |       "2\n",
764 |       "3\n",
765 |       "4\n",
766 |       "5\n",
767 |       "6\n",
768 |       "7\n",
769 |       "8\n",
770 |       "9\n",
771 |       "feedable\n",
772 |       "0\n",
773 |       "1\n",
774 |       "2\n",
775 |       "3\n",
776 |       "4\n",
777 |       "5\n",
778 |       "6\n",
779 |       "7\n",
780 |       "8\n",
781 |       "9\n"
782 |      ]
783 |     },
784 |     {
785 |      "data": {
786 |       "text/plain": [
787 |        "[(1.5659220218658447, 'reinitializable'),\n",
788 |        " (1.581655740737915, 'initialisable'),\n",
789 |        " (1.7346899509429932, 'feedable'),\n",
790 |        " (2.3557801246643066, 'one_shot')]"
791 |       ]
792 |      },
793 |      "execution_count": 4,
794 |      "metadata": {},
795 |      "output_type": "execute_result"
796 |     }
797 |    ],
798 |    "source": [
799 |     "# used to keep track of the methodds\n",
800 |     "log_time = {}\n",
801 |     "\n",
802 |     "tf.reset_default_graph()\n",
803 |     "sess = tf.InteractiveSession()\n",
804 |     "\n",
805 |     "input_shape = [None, *DATA_SHAPE[0]] # [None, 64, 64, 3]\n",
806 |     "output_shape = [None,*DATA_SHAPE[1]] # [None, 20]\n",
807 |     "print(input_shape, output_shape)\n",
808 |     "\n",
809 |     "x, y = tf.placeholder(tf.float32, shape=input_shape), tf.placeholder(tf.float32, shape=output_shape)\n",
810 |     "\n",
811 |     "@how_much\n",
812 |     "def one_shot(**kwargs):\n",
813 |     "    print('one_shot')\n",
814 |     "    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).batch(BATCH_SIZE).repeat()\n",
815 |     "    train_el = train_dataset.make_one_shot_iterator().get_next()\n",
816 |     "    \n",
817 |     "    test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(BATCH_SIZE).repeat()\n",
818 |     "    test_el = test_dataset.make_one_shot_iterator().get_next()\n",
819 |     "    for i in range(EPOCHS):\n",
820 |     "        print(i)\n",
821 |     "        for _ in range(N_BATCHES):\n",
822 |     "            sess.run(train_el)\n",
823 |     "        for _ in range(N_BATCHES):\n",
824 |     "            sess.run(test_el)\n",
825 |     "            \n",
826 |     "@how_much\n",
827 |     "def initialisable(**kwargs):\n",
828 |     "    print('initialisable')\n",
829 |     "    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(BATCH_SIZE).repeat()\n",
830 |     "\n",
831 |     "    iter = dataset.make_initializable_iterator()\n",
832 |     "    elements = iter.get_next()\n",
833 |     "    \n",
834 |     "    for i in range(EPOCHS):\n",
835 |     "        print(i)\n",
836 |     "        sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
837 |     "        for _ in range(N_BATCHES):\n",
838 |     "            sess.run(elements)\n",
839 |     "        sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
840 |     "        for _ in range(N_BATCHES):\n",
841 |     "            sess.run(elements)\n",
842 |     "@how_much            \n",
843 |     "def reinitializable(**kwargs):\n",
844 |     "    print('reinitializable')\n",
845 |     "    # create two datasets, one for training and one for test\n",
846 |     "    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
847 |     "    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
848 |     "    # create a iterator of the correct shape and type\n",
849 |     "    iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
850 |     "                                               train_dataset.output_shapes)\n",
851 |     "    elements = iter.get_next()\n",
852 |     "    # create the initialisation operations\n",
853 |     "    train_init_op = iter.make_initializer(train_dataset)\n",
854 |     "    test_init_op = iter.make_initializer(test_dataset)\n",
855 |     "    \n",
856 |     "    for i in range(EPOCHS):\n",
857 |     "        print(i)\n",
858 |     "        sess.run(train_init_op, feed_dict={ x: train_data[0], y: train_data[1]})\n",
859 |     "        for _ in range(N_BATCHES):\n",
860 |     "            sess.run(elements)\n",
861 |     "        sess.run(test_init_op, feed_dict={ x: test_data[0], y: test_data[1]})\n",
862 |     "        for _ in range(N_BATCHES):\n",
863 |     "            sess.run(elements)\n",
864 |     "@how_much            \n",
865 |     "def feedable(**kwargs):\n",
866 |     "    print('feedable')\n",
867 |     "    # create two datasets, one for training and one for test\n",
868 |     "    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
869 |     "    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
870 |     "    # create the iterators from the dataset\n",
871 |     "    train_iterator = train_dataset.make_initializable_iterator()\n",
872 |     "    test_iterator = test_dataset.make_initializable_iterator()\n",
873 |     "\n",
874 |     "    handle = tf.placeholder(tf.string, shape=[])\n",
875 |     "    iter = tf.data.Iterator.from_string_handle(\n",
876 |     "        handle, train_dataset.output_types, train_dataset.output_shapes)\n",
877 |     "    elements = iter.get_next()\n",
878 |     "\n",
879 |     "    train_handle = sess.run(train_iterator.string_handle())\n",
880 |     "    test_handle = sess.run(test_iterator.string_handle())\n",
881 |     "\n",
882 |     "    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
883 |     "    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
884 |     "\n",
885 |     "    for i in range(EPOCHS):\n",
886 |     "        print(i)\n",
887 |     "        for _ in range(N_BATCHES):\n",
888 |     "            sess.run(elements, feed_dict={handle: train_handle})\n",
889 |     "        for _ in range(N_BATCHES):\n",
890 |     "            sess.run(elements, feed_dict={handle: test_handle})\n",
891 |     "            \n",
892 |     "one_shot(log_time=log_time)\n",
893 |     "initialisable(log_time=log_time)\n",
894 |     "reinitializable(log_time=log_time)\n",
895 |     "feedable(log_time=log_time)\n",
896 |     "\n",
897 |     "sorted((value,key) for (key,value) in log_time.items())\n"
898 |    ]
899 |   },
900 |   {
901 |    "cell_type": "code",
902 |    "execution_count": null,
903 |    "metadata": {},
904 |    "outputs": [],
905 |    "source": []
906 |   }
907 |  ],
908 |  "metadata": {
909 |   "kernelspec": {
910 |    "display_name": "Python 3",
911 |    "language": "python",
912 |    "name": "python3"
913 |   },
914 |   "language_info": {
915 |    "codemirror_mode": {
916 |     "name": "ipython",
917 |     "version": 3
918 |    },
919 |    "file_extension": ".py",
920 |    "mimetype": "text/x-python",
921 |    "name": "python",
922 |    "nbconvert_exporter": "python",
923 |    "pygments_lexer": "ipython3",
924 |    "version": "3.6.5"
925 |   }
926 |  },
927 |  "nbformat": 4,
928 |  "nbformat_minor": 2
929 | }
930 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/dataset_tutorial-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/usr/local/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 13 |       "  from ._conv import register_converters as _register_converters\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import tensorflow as tf\n",
 19 |     "import numpy as np"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "[0.50035296 0.92651365]\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "x = np.random.sample((100,2))\n",
 37 |     "# make a dataset from a numpy array\n",
 38 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
 39 |     "\n",
 40 |     "iter = dataset.make_one_shot_iterator()\n",
 41 |     "el = iter.get_next()\n",
 42 |     "\n",
 43 |     "with tf.Session() as sess:\n",
 44 |     "    print(sess.run(el))"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "(array([0.33327842, 0.90874317]), array([0.02171065]))\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "# using two numpy arrays\n",
 62 |     "features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
 63 |     "dataset = tf.data.Dataset.from_tensor_slices((features,labels))\n",
 64 |     "\n",
 65 |     "iter = dataset.make_one_shot_iterator()\n",
 66 |     "el = iter.get_next()\n",
 67 |     "\n",
 68 |     "with tf.Session() as sess:\n",
 69 |     "    print(sess.run(el))"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "[0.00786543 0.26009214]\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "# using a tensor\n",
 87 |     "dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([100, 2]))\n",
 88 |     "\n",
 89 |     "iter = dataset.make_initializable_iterator()\n",
 90 |     "el = iter.get_next()\n",
 91 |     "\n",
 92 |     "with tf.Session() as sess:\n",
 93 |     "    sess.run(iter.initializer)\n",
 94 |     "    print(sess.run(el))"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 5,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "[0.03433903 0.7280311 ]\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "# using a placeholder\n",
112 |     "x = tf.placeholder(tf.float32, shape=[None,2])\n",
113 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
114 |     "\n",
115 |     "data = np.random.sample((100,2))\n",
116 |     "\n",
117 |     "iter = dataset.make_initializable_iterator()\n",
118 |     "el = iter.get_next()\n",
119 |     "\n",
120 |     "with tf.Session() as sess:\n",
121 |     "    sess.run(iter.initializer, feed_dict={ x: data })\n",
122 |     "    print(sess.run(el))"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 6,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "[[1]]\n",
135 |       "[[2]\n",
136 |       " [3]]\n",
137 |       "[[3]\n",
138 |       " [4]\n",
139 |       " [5]]\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "# from generator\n",
145 |     "sequence = np.array([[[1]],[[2],[3]],[[3],[4],[5]]])\n",
146 |     "\n",
147 |     "def generator():\n",
148 |     "    for el in sequence:\n",
149 |     "        yield el\n",
150 |     "\n",
151 |     "dataset = tf.data.Dataset().batch(1).from_generator(generator,\n",
152 |     "                                           output_types= tf.int64, \n",
153 |     "                                           output_shapes=(tf.TensorShape([None, 1])))\n",
154 |     "\n",
155 |     "iter = dataset.make_initializable_iterator()\n",
156 |     "el = iter.get_next()\n",
157 |     "\n",
158 |     "with tf.Session() as sess:\n",
159 |     "    sess.run(iter.initializer)\n",
160 |     "    print(sess.run(el))\n",
161 |     "    print(sess.run(el))\n",
162 |     "    print(sess.run(el))\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 7,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "name": "stdout",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "[array([1., 2.], dtype=float32), array([0.], dtype=float32)]\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "# initializable iterator to switch between data\n",
180 |     "EPOCHS = 10\n",
181 |     "\n",
182 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
183 |     "dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
184 |     "\n",
185 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
186 |     "test_data = (np.array([[1,2]]), np.array([[0]]))\n",
187 |     "\n",
188 |     "iter = dataset.make_initializable_iterator()\n",
189 |     "features, labels = iter.get_next()\n",
190 |     "\n",
191 |     "with tf.Session() as sess:\n",
192 |     "#     initialise iterator with train data\n",
193 |     "    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
194 |     "    for _ in range(EPOCHS):\n",
195 |     "        sess.run([features, labels])\n",
196 |     "#     switch to test data\n",
197 |     "    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
198 |     "    print(sess.run([features, labels]))\n",
199 |     "\n",
200 |     "    \n",
201 |     "    "
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 8,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "name": "stdout",
211 |      "output_type": "stream",
212 |      "text": [
213 |       "[array([0.94182994, 0.26802265]), array([0.81551463])]\n"
214 |      ]
215 |     }
216 |    ],
217 |    "source": [
218 |     "# Reinitializable iterator to switch between Datasets\n",
219 |     "EPOCHS = 10\n",
220 |     "# making fake data using numpy\n",
221 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
222 |     "test_data = (np.random.sample((10,2)), np.random.sample((10,1)))\n",
223 |     "# create two datasets, one for training and one for test\n",
224 |     "train_dataset = tf.data.Dataset.from_tensor_slices(train_data)\n",
225 |     "test_dataset = tf.data.Dataset.from_tensor_slices(test_data)\n",
226 |     "# create a iterator of the correct shape and type\n",
227 |     "iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
228 |     "                                           train_dataset.output_shapes)\n",
229 |     "features, labels = iter.get_next()\n",
230 |     "# create the initialisation operations\n",
231 |     "train_init_op = iter.make_initializer(train_dataset)\n",
232 |     "test_init_op = iter.make_initializer(test_dataset)\n",
233 |     "\n",
234 |     "with tf.Session() as sess:\n",
235 |     "    sess.run(train_init_op) # switch to train dataset\n",
236 |     "    for _ in range(EPOCHS):\n",
237 |     "        sess.run([features, labels])\n",
238 |     "    sess.run(test_init_op) # switch to val dataset\n",
239 |     "    print(sess.run([features, labels]))\n",
240 |     "\n",
241 |     "    \n",
242 |     "    "
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 9,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stdout",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "[0.8552025  0.13344285] [0.24534453]\n",
255 |       "[0.23880187 0.2294315 ] [0.77315474]\n",
256 |       "[0.763904 0.439595] [0.42727667]\n",
257 |       "[0.6563372 0.1366187] [0.02278621]\n",
258 |       "[0.71135175 0.394754  ] [0.8552778]\n",
259 |       "[0.7329701  0.42924434] [0.43608633]\n",
260 |       "[0.8240853 0.7750715] [0.5140434]\n",
261 |       "[0.65556693 0.67978406] [0.8228361]\n",
262 |       "[0.02365288 0.18461536] [0.85140544]\n",
263 |       "[0.48037764 0.7320316 ] [0.773141]\n",
264 |       "[0.6671238 0.8491173] [0.45188755]\n"
265 |      ]
266 |     }
267 |    ],
268 |    "source": [
269 |     "# feedable iterator to switch between iterators\n",
270 |     "EPOCHS = 10\n",
271 |     "# making fake data using numpy\n",
272 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
273 |     "test_data = (np.random.sample((10,2)), np.random.sample((10,1)))\n",
274 |     "# create placeholder\n",
275 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
276 |     "# create two datasets, one for training and one for test\n",
277 |     "train_dataset = tf.data.Dataset.from_tensor_slices((x,y))\n",
278 |     "test_dataset = tf.data.Dataset.from_tensor_slices((x,y))\n",
279 |     "# create the iterators from the dataset\n",
280 |     "train_iterator = train_dataset.make_initializable_iterator()\n",
281 |     "test_iterator = test_dataset.make_initializable_iterator()\n",
282 |     "# same as in the doc https://www.tensorflow.org/programmers_guide/datasets#creating_an_iterator\n",
283 |     "handle = tf.placeholder(tf.string, shape=[])\n",
284 |     "iter = tf.data.Iterator.from_string_handle(\n",
285 |     "    handle, train_dataset.output_types, train_dataset.output_shapes)\n",
286 |     "next_elements = iter.get_next()\n",
287 |     "\n",
288 |     "with tf.Session() as sess:\n",
289 |     "    train_handle = sess.run(train_iterator.string_handle())\n",
290 |     "    test_handle = sess.run(test_iterator.string_handle())\n",
291 |     "    \n",
292 |     "    # initialise iterators. In our case we could have used the 'one-shot' iterator instead,\n",
293 |     "    # and directly feed the data insted the Dataset.from_tensor_slices function, but this\n",
294 |     "    # approach is more general\n",
295 |     "    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
296 |     "    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
297 |     "    \n",
298 |     "    for _ in range(EPOCHS):\n",
299 |     "        x,y = sess.run(next_elements, feed_dict = {handle: train_handle})\n",
300 |     "        print(x, y)\n",
301 |     "        \n",
302 |     "    x,y = sess.run(next_elements, feed_dict = {handle: test_handle})\n",
303 |     "    print(x,y)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 10,
309 |    "metadata": {
310 |     "scrolled": true
311 |    },
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "[[0.70861276 0.91522017]\n",
318 |       " [0.993154   0.74425373]\n",
319 |       " [0.42730845 0.03037355]\n",
320 |       " [0.54031161 0.57429001]]\n"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "# BATCHING\n",
326 |     "BATCH_SIZE = 4\n",
327 |     "x = np.random.sample((100,2))\n",
328 |     "# make a dataset from a numpy array\n",
329 |     "dataset = tf.data.Dataset.from_tensor_slices(x).batch(BATCH_SIZE)\n",
330 |     "\n",
331 |     "iter = dataset.make_one_shot_iterator()\n",
332 |     "el = iter.get_next()\n",
333 |     "\n",
334 |     "with tf.Session() as sess:\n",
335 |     "    print(sess.run(el))"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 4,
341 |    "metadata": {},
342 |    "outputs": [
343 |     {
344 |      "name": "stdout",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "[1]\n",
348 |       "[2]\n",
349 |       "[3]\n",
350 |       "[4]\n",
351 |       "[1]\n",
352 |       "[2]\n",
353 |       "[3]\n",
354 |       "[4]\n"
355 |      ]
356 |     }
357 |    ],
358 |    "source": [
359 |     "# REPEAT\n",
360 |     "BATCH_SIZE = 4\n",
361 |     "x = np.array([[1],[2],[3],[4]])\n",
362 |     "# make a dataset from a numpy array\n",
363 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
364 |     "dataset = dataset.repeat()\n",
365 |     "\n",
366 |     "iter = dataset.make_one_shot_iterator()\n",
367 |     "el = iter.get_next()\n",
368 |     "\n",
369 |     "with tf.Session() as sess:\n",
370 |     "    for _ in range(8):\n",
371 |     "        print(sess.run(el))"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": [
380 |     "# MAP\n",
381 |     "x = np.array([[1],[2],[3],[4]])\n",
382 |     "# make a dataset from a numpy array\n",
383 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
384 |     "dataset = dataset.map(lambda x: x*2)\n",
385 |     "\n",
386 |     "iter = dataset.make_one_shot_iterator()\n",
387 |     "el = iter.get_next()\n",
388 |     "\n",
389 |     "with tf.Session() as sess:\n",
390 |     "#     this will run forever\n",
391 |     "        for _ in range(len(x)):\n",
392 |     "            print(sess.run(el))"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 12,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "name": "stdout",
402 |      "output_type": "stream",
403 |      "text": [
404 |       "[[3]\n",
405 |       " [1]\n",
406 |       " [2]\n",
407 |       " [4]]\n"
408 |      ]
409 |     }
410 |    ],
411 |    "source": [
412 |     "# SHUFFLE\n",
413 |     "BATCH_SIZE = 4\n",
414 |     "x = np.array([[1],[2],[3],[4]])\n",
415 |     "# make a dataset from a numpy array\n",
416 |     "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
417 |     "dataset = dataset.shuffle(buffer_size=100)\n",
418 |     "dataset = dataset.batch(BATCH_SIZE)\n",
419 |     "\n",
420 |     "iter = dataset.make_one_shot_iterator()\n",
421 |     "el = iter.get_next()\n",
422 |     "\n",
423 |     "with tf.Session() as sess:\n",
424 |     "    print(sess.run(el))"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": 13,
430 |    "metadata": {},
431 |    "outputs": [
432 |     {
433 |      "name": "stdout",
434 |      "output_type": "stream",
435 |      "text": [
436 |       "Iter: 0, Loss: 0.1913\n",
437 |       "Iter: 1, Loss: 0.1814\n",
438 |       "Iter: 2, Loss: 0.1720\n",
439 |       "Iter: 3, Loss: 0.1631\n",
440 |       "Iter: 4, Loss: 0.1547\n",
441 |       "Iter: 5, Loss: 0.1469\n",
442 |       "Iter: 6, Loss: 0.1397\n",
443 |       "Iter: 7, Loss: 0.1329\n",
444 |       "Iter: 8, Loss: 0.1267\n",
445 |       "Iter: 9, Loss: 0.1210\n"
446 |      ]
447 |     }
448 |    ],
449 |    "source": [
450 |     "# how to pass the value to a model\n",
451 |     "EPOCHS = 10\n",
452 |     "BATCH_SIZE = 16\n",
453 |     "# using two numpy arrays\n",
454 |     "features, labels = (np.array([np.random.sample((100,2))]), \n",
455 |     "                    np.array([np.random.sample((100,1))]))\n",
456 |     "\n",
457 |     "dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)\n",
458 |     "\n",
459 |     "iter = dataset.make_one_shot_iterator()\n",
460 |     "x, y = iter.get_next()\n",
461 |     "\n",
462 |     "# make a simple model\n",
463 |     "net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
464 |     "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
465 |     "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
466 |     "\n",
467 |     "loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label\n",
468 |     "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
469 |     "\n",
470 |     "with tf.Session() as sess:\n",
471 |     "    sess.run(tf.global_variables_initializer())\n",
472 |     "    for i in range(EPOCHS):\n",
473 |     "        _, loss_value = sess.run([train_op, loss])\n",
474 |     "        print(\"Iter: {}, Loss: {:.4f}\".format(i, loss_value))"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": 18,
480 |    "metadata": {},
481 |    "outputs": [
482 |     {
483 |      "name": "stdout",
484 |      "output_type": "stream",
485 |      "text": [
486 |       "3\n",
487 |       "Training...\n",
488 |       "Iter: 0, Loss: 1.4389\n",
489 |       "Iter: 1, Loss: 1.4704\n",
490 |       "Iter: 2, Loss: 1.4081\n",
491 |       "Iter: 3, Loss: 1.2877\n",
492 |       "Iter: 4, Loss: 1.1842\n",
493 |       "Iter: 5, Loss: 1.1944\n",
494 |       "Iter: 6, Loss: 1.1166\n",
495 |       "Iter: 7, Loss: 0.9924\n",
496 |       "Iter: 8, Loss: 0.8997\n",
497 |       "Iter: 9, Loss: 0.8817\n",
498 |       "Test Loss: 0.836423\n"
499 |      ]
500 |     }
501 |    ],
502 |    "source": [
503 |     "# Wrapping all together -> Switch between train and test set using Initializable iterator\n",
504 |     "EPOCHS = 10\n",
505 |     "# create a placeholder to dynamically switch between batch sizes\n",
506 |     "batch_size = tf.placeholder(tf.int64)\n",
507 |     "BATCH_SIZE = 32\n",
508 |     "\n",
509 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
510 |     "dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).repeat()\n",
511 |     "\n",
512 |     "# using two numpy arrays\n",
513 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
514 |     "test_data = (np.random.sample((20,2)), np.random.sample((20,1)))\n",
515 |     "\n",
516 |     "iter = dataset.make_initializable_iterator()\n",
517 |     "features, labels = iter.get_next()\n",
518 |     "# make a simple model\n",
519 |     "net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
520 |     "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
521 |     "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
522 |     "\n",
523 |     "loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label\n",
524 |     "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
525 |     "\n",
526 |     "n_batches = train_data[0].shape[0] // BATCH_SIZE\n",
527 |     "\n",
528 |     "with tf.Session() as sess:\n",
529 |     "    sess.run(tf.global_variables_initializer())\n",
530 |     "    # initialise iterator with train data\n",
531 |     "    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})\n",
532 |     "    print('Training...')\n",
533 |     "    for i in range(EPOCHS):\n",
534 |     "        tot_loss = 0\n",
535 |     "        for _ in range(n_batches):\n",
536 |     "            _, loss_value = sess.run([train_op, loss])\n",
537 |     "            tot_loss += loss_value\n",
538 |     "        print(\"Iter: {}, Loss: {:.4f}\".format(i, tot_loss / n_batches))\n",
539 |     "    # initialise iterator with test data\n",
540 |     "    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})\n",
541 |     "    print('Test Loss: {:4f}'.format(sess.run(loss)))\n"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": 10,
547 |    "metadata": {},
548 |    "outputs": [
549 |     {
550 |      "name": "stdout",
551 |      "output_type": "stream",
552 |      "text": [
553 |       "Training...\n",
554 |       "Iter: 0, Loss: 0.1602\n",
555 |       "Iter: 1, Loss: 0.1191\n",
556 |       "Iter: 2, Loss: 0.0964\n",
557 |       "Iter: 3, Loss: 0.0907\n",
558 |       "Iter: 4, Loss: 0.0738\n",
559 |       "Iter: 5, Loss: 0.0819\n",
560 |       "Iter: 6, Loss: 0.0728\n",
561 |       "Iter: 7, Loss: 0.0881\n",
562 |       "Iter: 8, Loss: 0.0765\n",
563 |       "Iter: 9, Loss: 0.0729\n",
564 |       "Test Loss: 0.091081\n"
565 |      ]
566 |     }
567 |    ],
568 |    "source": [
569 |     "# Wrapping all together -> Switch between train and test set using Reinitializable iterator\n",
570 |     "EPOCHS = 10\n",
571 |     "# create a placeholder to dynamically switch between batch sizes\n",
572 |     "batch_size = tf.placeholder(tf.int64)\n",
573 |     "\n",
574 |     "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
575 |     "train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size).repeat()\n",
576 |     "test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size) # always batch even if you want to one shot it\n",
577 |     "# using two numpy arrays\n",
578 |     "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
579 |     "test_data = (np.random.sample((20,2)), np.random.sample((20,1)))\n",
580 |     "\n",
581 |     "# create a iterator of the correct shape and type\n",
582 |     "iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
583 |     "                                           train_dataset.output_shapes)\n",
584 |     "features, labels = iter.get_next()\n",
585 |     "# create the initialisation operations\n",
586 |     "train_init_op = iter.make_initializer(train_dataset)\n",
587 |     "test_init_op = iter.make_initializer(test_dataset)\n",
588 |     "\n",
589 |     "# make a simple model\n",
590 |     "net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
591 |     "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
592 |     "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
593 |     "\n",
594 |     "loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label\n",
595 |     "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
596 |     "\n",
597 |     "with tf.Session() as sess:\n",
598 |     "    sess.run(tf.global_variables_initializer())\n",
599 |     "    # initialise iterator with train data\n",
600 |     "    sess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: 16})\n",
601 |     "    print('Training...')\n",
602 |     "    for i in range(EPOCHS):\n",
603 |     "        tot_loss = 0\n",
604 |     "        for _ in range(n_batches):\n",
605 |     "            _, loss_value = sess.run([train_op, loss])\n",
606 |     "            tot_loss += loss_value\n",
607 |     "        print(\"Iter: {}, Loss: {:.4f}\".format(i, tot_loss / n_batches))\n",
608 |     "    # initialise iterator with test data\n",
609 |     "    sess.run(test_init_op, feed_dict = {x : test_data[0], y: test_data[1], batch_size:len(test_data[0])})\n",
610 |     "    print('Test Loss: {:4f}'.format(sess.run(loss)))\n"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": 32,
616 |    "metadata": {},
617 |    "outputs": [
618 |     {
619 |      "name": "stdout",
620 |      "output_type": "stream",
621 |      "text": [
622 |       "{'sentiment': <tf.Tensor 'IteratorGetNext_15:0' shape=(?,) dtype=int32>, 'text': <tf.Tensor 'IteratorGetNext_15:1' shape=(?,) dtype=string>}\n",
623 |       "[array([b\"@MENTION, i agree! i'm trying to finish my thesis..and so far it's not going anywhere\",\n",
624 |       "       b'@MENTION erm, sacre coeur even... james -1',\n",
625 |       "       b\"@MENTION now am depressed and br'3ii is sleepin since i came home\",\n",
626 |       "       b'just finishing what turned out to be a nice day',\n",
627 |       "       b\"it's over. it was great. dollhouse\",\n",
628 |       "       b'just got bck from cross-country practice wooo 3 miles!',\n",
629 |       "       b'@MENTION you mean a man who cheats on his wife habitually and is a complete hypocrite... they have plenty of those already.',\n",
630 |       "       b'i feel bad for che ming wang though, he cant seem to get it together... and has an era of 30+',\n",
631 |       "       b'@MENTION @MENTION ok.. so i tweeted about the rains and it is not raining anymore. atleast i wont have to water the plants tomorrow.',\n",
632 |       "       b'is gutted about katie & peter love them!',\n",
633 |       "       b\"finally home... long day... had a great time with vet's from every era!\",\n",
634 |       "       b\"@MENTION yucky!!! for whatevv reason i can't eat 5 guys anymore. just makes me gag!!!\",\n",
635 |       "       b'@MENTION ever been to the antiques roadshow? my favorite show',\n",
636 |       "       b'lovin virginia! hopefully ill pick up an accent 4 a lil while',\n",
637 |       "       b\"@MENTION yea me too i just drink a cup of tea, but don't worry i got some snacks with me\",\n",
638 |       "       b'got what could quite possibly be the worst paper cut ever today. corners of file folders = paper daggers. ouchie.',\n",
639 |       "       b'sneaking in some computer time. wish you all a good day!',\n",
640 |       "       b\"@MENTION i'm pretty bored with it too\",\n",
641 |       "       b'too early to call a landslide victory for m14? lebanonelections',\n",
642 |       "       b\"school has ruined me so much that i don't even know how to sleep in anymore\",\n",
643 |       "       b\"is at work! it's been a long weekend\",\n",
644 |       "       b'feeling a bit sick also very bored!',\n",
645 |       "       b'misses mr. hollinger. misses callin him hubby more than anything',\n",
646 |       "       b'616 words i loooveeee jackson rathbone <\\xc3\\xb4\\xc3\\xb8\\xcf\\x89',\n",
647 |       "       b'sucky day.. first i havta take the bus 2 work then it breaks down nd work suckd of course',\n",
648 |       "       b'back from skaterhockey. crazy old bears 8 @MENTION reloaded 2. 6 goals against us in the last 20 mins',\n",
649 |       "       b'this is sad! john and kate are officially filing for divorce...as bad as the times have been this still shocks me.',\n",
650 |       "       b'@MENTION i wish i could',\n",
651 |       "       b'@MENTION and what do u see!?!? *raises an eyebrow in amusement*',\n",
652 |       "       b'@MENTION helloooo promm dress..ahh i need to get onee :| whens yours ? debenhams av a salee x',\n",
653 |       "       b'@MENTION heyy! thx 4 making \"back around+lyrics\" gosshh! its totally amazing ! amazing work u got there!',\n",
654 |       "       b'@MENTION aside from the use of \"sex\" as a term, i like that latest post'],\n",
655 |       "      dtype=object), array([0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,\n",
656 |       "       0, 1, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int32)]\n"
657 |      ]
658 |     }
659 |    ],
660 |    "source": [
661 |     "# load a csv\n",
662 |     "CSV_PATH = './tweets.csv'\n",
663 |     "dataset = tf.contrib.data.make_csv_dataset(CSV_PATH, batch_size=32)\n",
664 |     "iter = dataset.make_one_shot_iterator()\n",
665 |     "next = iter.get_next()\n",
666 |     "print(next) # next is a dict with key=columns names and value=column data\n",
667 |     "inputs, labels = next['text'], next['sentiment']\n",
668 |     "\n",
669 |     "with  tf.Session() as sess:\n",
670 |     "    print(sess.run([inputs,labels]))"
671 |    ]
672 |   },
673 |   {
674 |    "cell_type": "code",
675 |    "execution_count": 2,
676 |    "metadata": {},
677 |    "outputs": [],
678 |    "source": [
679 |     "log_time = {}\n",
680 |     "# copied form https://medium.com/pythonhive/python-decorator-to-measure-the-execution-time-of-methods-fa04cb6bb36d\n",
681 |     "def how_much(method):\n",
682 |     "    def timed(*args, **kw):\n",
683 |     "        ts = time.time()\n",
684 |     "        result = method(*args, **kw)\n",
685 |     "        te = time.time()\n",
686 |     "        \n",
687 |     "        if 'log_time' in kw:\n",
688 |     "            name = kw.get('log_name', method.__name__)\n",
689 |     "            kw['log_time'][name] = (te - ts)\n",
690 |     "            \n",
691 |     "        return result\n",
692 |     "    return timed"
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "code",
697 |    "execution_count": 3,
698 |    "metadata": {},
699 |    "outputs": [
700 |     {
701 |      "name": "stdout",
702 |      "output_type": "stream",
703 |      "text": [
704 |       "((5000, 32, 32), (5000, 20)) ((1000, 32, 32), (1000, 20))\n"
705 |      ]
706 |     }
707 |    ],
708 |    "source": [
709 |     "# benchmark\n",
710 |     "import time\n",
711 |     "DATA_SIZE = 5000\n",
712 |     "DATA_SHAPE = ((32,32),(20,))\n",
713 |     "BATCH_SIZE = 64 \n",
714 |     "N_BATCHES = DATA_SIZE // BATCH_SIZE\n",
715 |     "EPOCHS = 10\n",
716 |     "\n",
717 |     "test_size = (DATA_SIZE//100)*20 \n",
718 |     "\n",
719 |     "train_shape = ((DATA_SIZE, *DATA_SHAPE[0]),(DATA_SIZE, *DATA_SHAPE[1]))\n",
720 |     "test_shape = ((test_size, *DATA_SHAPE[0]),(test_size, *DATA_SHAPE[1]))\n",
721 |     "print(train_shape, test_shape)\n",
722 |     "train_data = (np.random.sample(train_shape[0]), np.random.sample(train_shape[1]))\n",
723 |     "test_data = (np.random.sample(test_shape[0]), np.random.sample(test_shape[1])) "
724 |    ]
725 |   },
726 |   {
727 |    "cell_type": "code",
728 |    "execution_count": 4,
729 |    "metadata": {
730 |     "scrolled": false
731 |    },
732 |    "outputs": [
733 |     {
734 |      "name": "stdout",
735 |      "output_type": "stream",
736 |      "text": [
737 |       "[None, 32, 32] [None, 20]\n",
738 |       "one_shot\n",
739 |       "0\n",
740 |       "1\n",
741 |       "2\n",
742 |       "3\n",
743 |       "4\n",
744 |       "5\n",
745 |       "6\n",
746 |       "7\n",
747 |       "8\n",
748 |       "9\n",
749 |       "initialisable\n",
750 |       "0\n",
751 |       "1\n",
752 |       "2\n",
753 |       "3\n",
754 |       "4\n",
755 |       "5\n",
756 |       "6\n",
757 |       "7\n",
758 |       "8\n",
759 |       "9\n",
760 |       "reinitializable\n",
761 |       "0\n",
762 |       "1\n",
763 |       "2\n",
764 |       "3\n",
765 |       "4\n",
766 |       "5\n",
767 |       "6\n",
768 |       "7\n",
769 |       "8\n",
770 |       "9\n",
771 |       "feedable\n",
772 |       "0\n",
773 |       "1\n",
774 |       "2\n",
775 |       "3\n",
776 |       "4\n",
777 |       "5\n",
778 |       "6\n",
779 |       "7\n",
780 |       "8\n",
781 |       "9\n"
782 |      ]
783 |     },
784 |     {
785 |      "data": {
786 |       "text/plain": [
787 |        "[(1.5659220218658447, 'reinitializable'),\n",
788 |        " (1.581655740737915, 'initialisable'),\n",
789 |        " (1.7346899509429932, 'feedable'),\n",
790 |        " (2.3557801246643066, 'one_shot')]"
791 |       ]
792 |      },
793 |      "execution_count": 4,
794 |      "metadata": {},
795 |      "output_type": "execute_result"
796 |     }
797 |    ],
798 |    "source": [
799 |     "# used to keep track of the methodds\n",
800 |     "log_time = {}\n",
801 |     "\n",
802 |     "tf.reset_default_graph()\n",
803 |     "sess = tf.InteractiveSession()\n",
804 |     "\n",
805 |     "input_shape = [None, *DATA_SHAPE[0]] # [None, 64, 64, 3]\n",
806 |     "output_shape = [None,*DATA_SHAPE[1]] # [None, 20]\n",
807 |     "print(input_shape, output_shape)\n",
808 |     "\n",
809 |     "x, y = tf.placeholder(tf.float32, shape=input_shape), tf.placeholder(tf.float32, shape=output_shape)\n",
810 |     "\n",
811 |     "@how_much\n",
812 |     "def one_shot(**kwargs):\n",
813 |     "    print('one_shot')\n",
814 |     "    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).batch(BATCH_SIZE).repeat()\n",
815 |     "    train_el = train_dataset.make_one_shot_iterator().get_next()\n",
816 |     "    \n",
817 |     "    test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(BATCH_SIZE).repeat()\n",
818 |     "    test_el = test_dataset.make_one_shot_iterator().get_next()\n",
819 |     "    for i in range(EPOCHS):\n",
820 |     "        print(i)\n",
821 |     "        for _ in range(N_BATCHES):\n",
822 |     "            sess.run(train_el)\n",
823 |     "        for _ in range(N_BATCHES):\n",
824 |     "            sess.run(test_el)\n",
825 |     "            \n",
826 |     "@how_much\n",
827 |     "def initialisable(**kwargs):\n",
828 |     "    print('initialisable')\n",
829 |     "    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(BATCH_SIZE).repeat()\n",
830 |     "\n",
831 |     "    iter = dataset.make_initializable_iterator()\n",
832 |     "    elements = iter.get_next()\n",
833 |     "    \n",
834 |     "    for i in range(EPOCHS):\n",
835 |     "        print(i)\n",
836 |     "        sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
837 |     "        for _ in range(N_BATCHES):\n",
838 |     "            sess.run(elements)\n",
839 |     "        sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
840 |     "        for _ in range(N_BATCHES):\n",
841 |     "            sess.run(elements)\n",
842 |     "@how_much            \n",
843 |     "def reinitializable(**kwargs):\n",
844 |     "    print('reinitializable')\n",
845 |     "    # create two datasets, one for training and one for test\n",
846 |     "    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
847 |     "    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
848 |     "    # create a iterator of the correct shape and type\n",
849 |     "    iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
850 |     "                                               train_dataset.output_shapes)\n",
851 |     "    elements = iter.get_next()\n",
852 |     "    # create the initialisation operations\n",
853 |     "    train_init_op = iter.make_initializer(train_dataset)\n",
854 |     "    test_init_op = iter.make_initializer(test_dataset)\n",
855 |     "    \n",
856 |     "    for i in range(EPOCHS):\n",
857 |     "        print(i)\n",
858 |     "        sess.run(train_init_op, feed_dict={ x: train_data[0], y: train_data[1]})\n",
859 |     "        for _ in range(N_BATCHES):\n",
860 |     "            sess.run(elements)\n",
861 |     "        sess.run(test_init_op, feed_dict={ x: test_data[0], y: test_data[1]})\n",
862 |     "        for _ in range(N_BATCHES):\n",
863 |     "            sess.run(elements)\n",
864 |     "@how_much            \n",
865 |     "def feedable(**kwargs):\n",
866 |     "    print('feedable')\n",
867 |     "    # create two datasets, one for training and one for test\n",
868 |     "    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
869 |     "    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
870 |     "    # create the iterators from the dataset\n",
871 |     "    train_iterator = train_dataset.make_initializable_iterator()\n",
872 |     "    test_iterator = test_dataset.make_initializable_iterator()\n",
873 |     "\n",
874 |     "    handle = tf.placeholder(tf.string, shape=[])\n",
875 |     "    iter = tf.data.Iterator.from_string_handle(\n",
876 |     "        handle, train_dataset.output_types, train_dataset.output_shapes)\n",
877 |     "    elements = iter.get_next()\n",
878 |     "\n",
879 |     "    train_handle = sess.run(train_iterator.string_handle())\n",
880 |     "    test_handle = sess.run(test_iterator.string_handle())\n",
881 |     "\n",
882 |     "    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
883 |     "    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
884 |     "\n",
885 |     "    for i in range(EPOCHS):\n",
886 |     "        print(i)\n",
887 |     "        for _ in range(N_BATCHES):\n",
888 |     "            sess.run(elements, feed_dict={handle: train_handle})\n",
889 |     "        for _ in range(N_BATCHES):\n",
890 |     "            sess.run(elements, feed_dict={handle: test_handle})\n",
891 |     "            \n",
892 |     "one_shot(log_time=log_time)\n",
893 |     "initialisable(log_time=log_time)\n",
894 |     "reinitializable(log_time=log_time)\n",
895 |     "feedable(log_time=log_time)\n",
896 |     "\n",
897 |     "sorted((value,key) for (key,value) in log_time.items())\n"
898 |    ]
899 |   },
900 |   {
901 |    "cell_type": "code",
902 |    "execution_count": null,
903 |    "metadata": {},
904 |    "outputs": [],
905 |    "source": []
906 |   }
907 |  ],
908 |  "metadata": {
909 |   "kernelspec": {
910 |    "display_name": "Python 3",
911 |    "language": "python",
912 |    "name": "python3"
913 |   },
914 |   "language_info": {
915 |    "codemirror_mode": {
916 |     "name": "ipython",
917 |     "version": 3
918 |    },
919 |    "file_extension": ".py",
920 |    "mimetype": "text/x-python",
921 |    "name": "python",
922 |    "nbconvert_exporter": "python",
923 |    "pygments_lexer": "ipython3",
924 |    "version": "3.6.5"
925 |   }
926 |  },
927 |  "nbformat": 4,
928 |  "nbformat_minor": 2
929 | }
930 | 


--------------------------------------------------------------------------------