├── .gitignore ├── 01_simple_feedforward_neural_network.ipynb ├── 02_using_metrics_in_eager_mode.ipynb ├── 03_save_and_restore_model.ipynb ├── 04_text_data_to_tfrecords.ipynb ├── 05_images_to_tfrecords.ipynb ├── 06_read_data_in_batches_from_tfrecords.ipynb ├── 07_convolutional_neural_networks_for_emotion_recognition.ipynb ├── 08_dynamic_recurrent_neural_networks_for_sequence_classification.ipynb ├── 09_recurrent_neural_networks_for_time_series_regression.ipynb ├── README.md ├── data_utils.py ├── datasets ├── aclImdb │ ├── length_reviews.pkl │ ├── test.tfrecords │ ├── train.tfrecords │ └── word2idx.pkl ├── dummy_images │ ├── dummy.tfrecords │ ├── id0_0.jpeg │ ├── id1_0.jpeg │ ├── id2_2.jpeg │ ├── id3_4.jpeg │ ├── id4_6.jpeg │ ├── id5_2.jpeg │ ├── id6_4.jpeg │ ├── id7_3.jpeg │ ├── id8_3.jpeg │ └── id9_2.jpeg ├── dummy_text │ ├── dummy.tfrecords │ ├── neg │ │ ├── 0_3.txt │ │ ├── 1_1.txt │ │ └── 2_1.txt │ ├── pos │ │ ├── 0_9.txt │ │ ├── 1_7.txt │ │ └── 2_9.txt │ └── word2idx.pkl ├── get_imdb_dataset.sh └── load_forecasting │ └── spain_hourly_entsoe.csv ├── models_checkpoints ├── DemandRNN │ ├── -0.data-00000-of-00001 │ ├── -0.index │ └── checkpoint ├── EmotionCNN │ ├── -0.data-00000-of-00001 │ ├── -0.index │ └── checkpoint ├── ImdbRNN │ ├── -0.data-00000-of-00001 │ ├── -0.index │ └── checkpoint └── SimpleNN │ ├── -1.data-00000-of-00001 │ ├── -1.index │ └── checkpoint └── tutorials_graphics ├── 01_flowchart.png ├── 02_flowchart.png ├── 03_flowchart.png ├── 04_flowchart.png ├── 05_flowchart.png ├── images2tfrecords.png ├── moving_windows_samples.png ├── readbatches.png ├── rnn_imdb.png ├── rnn_regression.png ├── save_restore_model.png └── text2tfrecords.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | .static_storage/ 57 | .media/ 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # notebook checkpoints 108 | .ipynb_checkpoints 109 | 110 | # ignore data folder 111 | datasets/* 112 | !datasets/get_imdb_dataset.sh 113 | !datasets/load_forecasting 114 | !datasets/dummy_text 115 | !datasets/dummy_images 116 | !datasets/aclImdb 117 | datasets/aclImdb/* 118 | !datasets/aclImdb/length_reviews.pkl 119 | !datasets/aclImdb/word2idx.pkl 120 | !datasets/aclImdb/train.tfrecords 121 | !datasets/aclImdb/test.tfrecords 122 | -------------------------------------------------------------------------------- /01_simple_feedforward_neural_network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to: Build a simple neural network with TensorFlow Eager\n", 8 | "\n", 9 | "Hello everyone! In this tutorial we are going to build a simple feedforward neural network\n", 10 | "using the imperative mode of TensorFlow. Hope you will find it useful! If you have any suggestions on how I can improve the code, please let me know. \n", 11 | "\n", 12 | "### Tutorial steps:\n", 13 | "----\n", 14 | "\n", 15 | "![img](tutorials_graphics/01_flowchart.png)\n", 16 | "\n", 17 | "**Version used: TensorFlow 1.7**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Step 1: Import useful libraries and enable eager mode\n", 25 | "----" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# Import TensorFlow and TensorFlow Eager\n", 35 | "import tensorflow as tf\n", 36 | "import tensorflow.contrib.eager as tfe\n", 37 | "\n", 38 | "# Import function to generate toy classication problem\n", 39 | "from sklearn.datasets import make_moons\n", 40 | "import numpy as np\n", 41 | "\n", 42 | "# Import library for plots\n", 43 | "import matplotlib.pyplot as plt\n", 44 | "%matplotlib inline" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# Enable eager mode. Once activated it cannot be reversed! Run just once.\n", 54 | "tfe.enable_eager_execution()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Step 2: Generate toy dataset for binary classification\n", 62 | "----\n", 63 | "We will generate a toy dataset, to train our network. I chose the make_moons function from sklearn. I believe it is perfect for our task, as the classes are not linearly separable so a neural network will be very useful." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# Generate toy dataset for classification\n", 73 | "# X is a matrix of n_samples x n_features and represents the input features\n", 74 | "# y is a vector with length n_samples and represents our targets\n", 75 | "X, y = make_moons(n_samples=100, noise=0.1, random_state=2018)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Step 3: Visualize generated dataset\n", 83 | "----" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 5, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "image/png": "\n", 94 | "text/plain": [ 95 | "" 96 | ] 97 | }, 98 | "metadata": {}, 99 | "output_type": "display_data" 100 | } 101 | ], 102 | "source": [ 103 | "plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.autumn)\n", 104 | "plt.xlabel('First feature')\n", 105 | "plt.ylabel('Second feature')\n", 106 | "plt.title('Toy classification problem')\n", 107 | "plt.show()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Step 4: Build a single hidden layer neural network (Linear -> ReLU -> Linear output)\n", 115 | "----\n", 116 | "Our first trial is a simple neural network with a single hidden layer. The easiest way to build neural networks models with TensorFlow Eager is with classes. During initialization, you define the layers needed to run a forward-pass through the model.\n", 117 | "\n", 118 | "As this is a classification problem, we will be using the softmax cross-entropy loss. Normally, we would have to one-hot encode our targets. To avoid this, we will be using the sparse softmax loss, which takes as an input the raw targets. No need for further processing! HOORAY :).\n" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 15, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "class simple_nn(tf.keras.Model):\n", 128 | " def __init__(self):\n", 129 | " super(simple_nn, self).__init__()\n", 130 | " \"\"\" Define here the layers used during the forward-pass \n", 131 | " of the neural network.\n", 132 | " \"\"\" \n", 133 | " # Hidden layer.\n", 134 | " self.dense_layer = tf.layers.Dense(10, activation=tf.nn.relu)\n", 135 | " # Output layer. No activation.\n", 136 | " self.output_layer = tf.layers.Dense(2, activation=None)\n", 137 | " \n", 138 | " def predict(self, input_data):\n", 139 | " \"\"\" Runs a forward-pass through the network. \n", 140 | " Args:\n", 141 | " input_data: 2D tensor of shape (n_samples, n_features). \n", 142 | " Returns:\n", 143 | " logits: unnormalized predictions.\n", 144 | " \"\"\"\n", 145 | " hidden_activations = self.dense_layer(input_data)\n", 146 | " logits = self.output_layer(hidden_activations)\n", 147 | " return logits\n", 148 | " \n", 149 | " def loss_fn(self, input_data, target):\n", 150 | " \"\"\" Defines the loss function used during \n", 151 | " training. \n", 152 | " \"\"\"\n", 153 | " logits = self.predict(input_data)\n", 154 | " loss = tf.losses.sparse_softmax_cross_entropy(labels=target, logits=logits)\n", 155 | " return loss\n", 156 | " \n", 157 | " def grads_fn(self, input_data, target):\n", 158 | " \"\"\" Dynamically computes the gradients of the loss value\n", 159 | " with respect to the parameters of the model, in each\n", 160 | " forward pass.\n", 161 | " \"\"\"\n", 162 | " with tfe.GradientTape() as tape:\n", 163 | " loss = self.loss_fn(input_data, target)\n", 164 | " return tape.gradient(loss, self.variables)\n", 165 | " \n", 166 | " def fit(self, input_data, target, optimizer, num_epochs=500, verbose=50):\n", 167 | " \"\"\" Function to train the model, using the selected optimizer and\n", 168 | " for the desired number of epochs.\n", 169 | " \"\"\"\n", 170 | " for i in range(num_epochs):\n", 171 | " grads = self.grads_fn(input_data, target)\n", 172 | " optimizer.apply_gradients(zip(grads, self.variables))\n", 173 | " if (i==0) | ((i+1)%verbose==0):\n", 174 | " print('Loss at epoch %d: %f' %(i+1, self.loss_fn(input_data, target).numpy()))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "## Step 5: Train the model with gradient descent\n", 182 | "----\n", 183 | "Use backpropagation to train the variables of our model. Feel free to play with the learning rate and the number of epochs." 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 16, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "X_tensor = tf.constant(X)\n", 193 | "y_tensor = tf.constant(y)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 17, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stdout", 203 | "output_type": "stream", 204 | "text": [ 205 | "Loss at epoch 1: 0.653288\n", 206 | "Loss at epoch 50: 0.283921\n", 207 | "Loss at epoch 100: 0.260529\n", 208 | "Loss at epoch 150: 0.244092\n", 209 | "Loss at epoch 200: 0.221653\n", 210 | "Loss at epoch 250: 0.186211\n", 211 | "Loss at epoch 300: 0.139418\n", 212 | "Loss at epoch 350: 0.103654\n", 213 | "Loss at epoch 400: 0.078874\n", 214 | "Loss at epoch 450: 0.062550\n", 215 | "Loss at epoch 500: 0.051096\n" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "optimizer = tf.train.GradientDescentOptimizer(5e-1)\n", 221 | "model = simple_nn()\n", 222 | "model.fit(X_tensor, y_tensor, optimizer, num_epochs=500, verbose=50)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "## Step 7: Plot decision boundary\n", 230 | "----\n", 231 | "The code for visualizing the decision boundary of our model has been inspired from [this tutorial](http://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py)." 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 11, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "data": { 241 | "image/png": "\n", 242 | "text/plain": [ 243 | "" 244 | ] 245 | }, 246 | "metadata": {}, 247 | "output_type": "display_data" 248 | } 249 | ], 250 | "source": [ 251 | "# Create a mesh to plot in\n", 252 | "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", 253 | "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", 254 | "xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),\n", 255 | " np.arange(y_min, y_max, 0.01))\n", 256 | "\n", 257 | "# Predict target for each sample xx, yy\n", 258 | "Z = np.argmax(model.predict(tf.constant(np.c_[xx.ravel(), yy.ravel()])).numpy(), axis=1)\n", 259 | "\n", 260 | "# Put the result into a color plot\n", 261 | "Z = Z.reshape(xx.shape)\n", 262 | "fig = plt.figure()\n", 263 | "plt.contourf(xx, yy, Z, cmap=plt.cm.autumn, alpha=0.8)\n", 264 | "\n", 265 | "# Plot our training points\n", 266 | "plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.autumn, edgecolors='k')\n", 267 | "plt.xlim(xx.min(), xx.max())\n", 268 | "plt.ylim(yy.min(), yy.max())\n", 269 | "plt.xlabel('First feature', fontsize=15)\n", 270 | "plt.ylabel('Second feature', fontsize=15)\n", 271 | "plt.title('Toy classification problem', fontsize=15)\n", 272 | "plt.show()" 273 | ] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "Python 3", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.6.4" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 2 297 | } 298 | -------------------------------------------------------------------------------- /03_save_and_restore_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to save and restore a trained model\n", 8 | "\n", 9 | "After scrolling through the posts of [reddit.com/r/learnmachinelearning](https://www.reddit.com/r/learnmachinelearning/), I've realized that the major bottlenecks of a machine learning project occur in the data input pipeline and in the final stage of the model, where you have to save the model and make predictions on new data.\n", 10 | "So I thought that it would be useful to make a simple and straightforward tutorial to show you how you could save and restore a model that you have built with Tensorflow Eager.\n", 11 | "\n", 12 | "### Tutorial flowchart\n", 13 | "----\n", 14 | "\n", 15 | "![img](tutorials_graphics/save_restore_model.png)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Import here useful libraries" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Import TensorFlow and TensorFlow Eager\n", 32 | "import tensorflow as tf\n", 33 | "import tensorflow.contrib.eager as tfe\n", 34 | "\n", 35 | "# Import function to generate toy classication problem\n", 36 | "from sklearn.datasets import make_moons" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "# Enable eager mode. Once activated it cannot be reversed! Run just once.\n", 46 | "tfe.enable_eager_execution()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Part I: Build a simple neural network model for binary classification" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "class simple_nn(tf.keras.Model):\n", 63 | " def __init__(self):\n", 64 | " super(simple_nn, self).__init__()\n", 65 | " \"\"\" Define here the layers used during the forward-pass \n", 66 | " of the neural network.\n", 67 | " \"\"\" \n", 68 | " # Hidden layer.\n", 69 | " self.dense_layer = tf.layers.Dense(10, activation=tf.nn.relu)\n", 70 | " # Output layer. No activation.\n", 71 | " self.output_layer = tf.layers.Dense(2, activation=None)\n", 72 | " \n", 73 | " def predict(self, input_data):\n", 74 | " \"\"\" Runs a forward-pass through the network. \n", 75 | " Args:\n", 76 | " input_data: 2D tensor of shape (n_samples, n_features). \n", 77 | " Returns:\n", 78 | " logits: unnormalized predictions.\n", 79 | " \"\"\"\n", 80 | " hidden_activations = self.dense_layer(input_data)\n", 81 | " logits = self.output_layer(hidden_activations)\n", 82 | " return logits\n", 83 | " \n", 84 | " def loss_fn(self, input_data, target):\n", 85 | " \"\"\" Defines the loss function used during \n", 86 | " training. \n", 87 | " \"\"\"\n", 88 | " logits = self.predict(input_data)\n", 89 | " loss = tf.losses.sparse_softmax_cross_entropy(labels=target, logits=logits)\n", 90 | " return loss\n", 91 | " \n", 92 | " def grads_fn(self, input_data, target):\n", 93 | " \"\"\" Dynamically computes the gradients of the loss value\n", 94 | " with respect to the parameters of the model, in each\n", 95 | " forward pass.\n", 96 | " \"\"\"\n", 97 | " with tfe.GradientTape() as tape:\n", 98 | " loss = self.loss_fn(input_data, target)\n", 99 | " return tape.gradient(loss, self.variables)\n", 100 | " \n", 101 | " def fit(self, input_data, target, optimizer, num_epochs=500, verbose=50):\n", 102 | " \"\"\" Function to train the model, using the selected optimizer and\n", 103 | " for the desired number of epochs.\n", 104 | " \"\"\"\n", 105 | " for i in range(num_epochs):\n", 106 | " grads = self.grads_fn(input_data, target)\n", 107 | " optimizer.apply_gradients(zip(grads, self.variables))\n", 108 | " if (i==0) | ((i+1)%verbose==0):\n", 109 | " print('Loss at epoch %d: %f' %(i+1, self.loss_fn(input_data, target).numpy()))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## Part II: Train model " 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 9, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Generate toy dataset for classification\n", 126 | "# X is a matrix of n_samples x n_features and represents the input features\n", 127 | "# y is a vector with length n_samples and represents our targets\n", 128 | "X, y = make_moons(n_samples=100, noise=0.1, random_state=2018)\n", 129 | "X_train, y_train = tf.constant(X[:80,:]), tf.constant(y[:80])\n", 130 | "X_test, y_test = tf.constant(X[80:,:]), tf.constant(y[80:])" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 10, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "Loss at epoch 1: 0.658276\n", 143 | "Loss at epoch 50: 0.302146\n", 144 | "Loss at epoch 100: 0.268594\n", 145 | "Loss at epoch 150: 0.247425\n", 146 | "Loss at epoch 200: 0.229143\n", 147 | "Loss at epoch 250: 0.197839\n", 148 | "Loss at epoch 300: 0.143365\n", 149 | "Loss at epoch 350: 0.098039\n", 150 | "Loss at epoch 400: 0.070781\n", 151 | "Loss at epoch 450: 0.053753\n", 152 | "Loss at epoch 500: 0.042401\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "optimizer = tf.train.GradientDescentOptimizer(5e-1)\n", 158 | "model = simple_nn()\n", 159 | "model.fit(X_train, y_train, optimizer, num_epochs=500, verbose=50)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Part III: Save trained model" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 11, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "# Specify checkpoint directory\n", 176 | "checkpoint_directory = 'models_checkpoints/SimpleNN/'\n", 177 | "# Create model checkpoint\n", 178 | "checkpoint = tfe.Checkpoint(optimizer=optimizer,\n", 179 | " model=model,\n", 180 | " optimizer_step=tf.train.get_or_create_global_step())" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 12, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "'models_checkpoints/SimpleNN/-1'" 192 | ] 193 | }, 194 | "execution_count": 12, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "# Save trained model\n", 201 | "checkpoint.save(file_prefix=checkpoint_directory)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## Part IV: Restore trained model\n" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 13, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "# Reinitialize model instance\n", 218 | "model = simple_nn()\n", 219 | "optimizer = tf.train.GradientDescentOptimizer(5e-1)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 14, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "# Specify checkpoint directory\n", 229 | "checkpoint_directory = 'models_checkpoints/SimpleNN/'\n", 230 | "# Create model checkpoint\n", 231 | "checkpoint = tfe.Checkpoint(optimizer=optimizer,\n", 232 | " model=model,\n", 233 | " optimizer_step=tf.train.get_or_create_global_step())" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 15, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "" 245 | ] 246 | }, 247 | "execution_count": 15, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# Restore model from latest chekpoint\n", 254 | "checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory))" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "## Part V: Check if the model was restored correctly" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 16, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "Loss at epoch 1: 0.042220\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "model.fit(X_train, y_train, optimizer, num_epochs=1)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "The loss seems to be consistent with the loss we obtained in the last epoch of previous training :)!" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "## Part VI: Make predictions on new data" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 17, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "logits_test = model.predict(X_test)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 18, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "name": "stdout", 311 | "output_type": "stream", 312 | "text": [ 313 | "tf.Tensor(\n", 314 | "[[ 1.54352813 -0.83117302]\n", 315 | " [-1.60523365 2.82397487]\n", 316 | " [ 2.87589525 -1.36463485]\n", 317 | " [-1.39461001 2.62404279]\n", 318 | " [ 0.82305161 -0.55651397]\n", 319 | " [ 3.53674391 -2.55593046]\n", 320 | " [-2.97344627 3.46589599]\n", 321 | " [-1.69372442 2.95660466]\n", 322 | " [-1.43226137 2.65357974]\n", 323 | " [ 3.11479995 -1.31765645]\n", 324 | " [-0.65841567 1.60468631]\n", 325 | " [-2.27454367 3.60553595]\n", 326 | " [-1.50170912 2.74410115]\n", 327 | " [ 0.76261479 -0.44574208]\n", 328 | " [ 2.34516959 -1.6859307 ]\n", 329 | " [ 1.92181942 -1.63766352]\n", 330 | " [ 4.06047684 -3.03988941]\n", 331 | " [ 1.00252324 -0.78900484]\n", 332 | " [ 2.79802993 -2.2139734 ]\n", 333 | " [-1.43933035 2.68037059]], shape=(20, 2), dtype=float64)\n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "print(logits_test)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | } 348 | ], 349 | "metadata": { 350 | "kernelspec": { 351 | "display_name": "Python 3", 352 | "language": "python", 353 | "name": "python3" 354 | }, 355 | "language_info": { 356 | "codemirror_mode": { 357 | "name": "ipython", 358 | "version": 3 359 | }, 360 | "file_extension": ".py", 361 | "mimetype": "text/x-python", 362 | "name": "python", 363 | "nbconvert_exporter": "python", 364 | "pygments_lexer": "ipython3", 365 | "version": "3.6.4" 366 | } 367 | }, 368 | "nbformat": 4, 369 | "nbformat_minor": 2 370 | } 371 | -------------------------------------------------------------------------------- /04_text_data_to_tfrecords.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Text sequences to TFRecords\n", 8 | "----\n", 9 | "\n", 10 | "Hello everyone! In this tutorial, I am going to show you how can you parse your raw text data to TFRecords. I know that many people struggle with input processing pipelines, especially when you start working on your own personal project. So I really hope it is going to be useful for any of you :)!\n", 11 | "\n", 12 | "### Tutorial flowchart\n", 13 | "----\n", 14 | "![img](tutorials_graphics/text2tfrecords.png)\n", 15 | "\n", 16 | "\n", 17 | "### Dummy IMDB text data\n", 18 | "----\n", 19 | "For practice, I have chosen a few data samples from the Large Movie Review Dataset offered by Stanford." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Import here useful libraries\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "from nltk.tokenize import word_tokenize\n", 36 | "import tensorflow as tf\n", 37 | "import pandas as pd\n", 38 | "import pickle\n", 39 | "import random\n", 40 | "import glob\n", 41 | "import nltk\n", 42 | "import re\n", 43 | "\n", 44 | "try:\n", 45 | " nltk.data.find('tokenizers/punkt')\n", 46 | "except LookupError:\n", 47 | " nltk.download('punkt')" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "### Parse data to TFRecords\n", 55 | "---" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "def imdb2tfrecords(path_data='datasets/dummy_text/', min_word_frequency=5,\n", 65 | " max_words_review=700):\n", 66 | " '''\n", 67 | " This script processes the data and saves it in the default TensorFlow \n", 68 | " file format: tfrecords.\n", 69 | " \n", 70 | " Args:\n", 71 | " path_data: the path where the imdb data is stored.\n", 72 | " min_word_frequency: the minimum frequency of a word, to keep it\n", 73 | " in the vocabulary.\n", 74 | " max_words_review: the maximum number of words allowed in a review.\n", 75 | " '''\n", 76 | " # Get the filenames of the positive/negative reviews \n", 77 | " pos_files = glob.glob(path_data + 'pos/*')\n", 78 | " neg_files = glob.glob(path_data + 'neg/*')\n", 79 | "\n", 80 | " # Concatenate both positive and negative reviews filenames\n", 81 | " filenames = pos_files + neg_files\n", 82 | " \n", 83 | " # List with all the reviews in the dataset\n", 84 | " reviews = [open(filenames[i],'r').read() for i in range(len(filenames))]\n", 85 | " \n", 86 | " # Remove HTML tags\n", 87 | " reviews = [re.sub(r'<[^>]+>', ' ', review) for review in reviews]\n", 88 | " \n", 89 | " # Tokenize each review in part\n", 90 | " reviews = [word_tokenize(review) for review in reviews]\n", 91 | " \n", 92 | " # Compute the length of each review\n", 93 | " len_reviews = [len(review) for review in reviews]\n", 94 | "\n", 95 | " # Flatten nested list\n", 96 | " reviews = [word for review in reviews for word in review]\n", 97 | " \n", 98 | " # Compute the frequency of each word\n", 99 | " word_frequency = pd.value_counts(reviews)\n", 100 | " \n", 101 | " # Keep only words with frequency higher than minimum\n", 102 | " vocabulary = word_frequency[word_frequency>=min_word_frequency].index.tolist()\n", 103 | " \n", 104 | " # Add Unknown, Start and End token. \n", 105 | " extra_tokens = ['Unknown_token', 'End_token']\n", 106 | " vocabulary += extra_tokens\n", 107 | " \n", 108 | " # Create a word2idx dictionary\n", 109 | " word2idx = {vocabulary[i]: i for i in range(len(vocabulary))}\n", 110 | " \n", 111 | " # Write word vocabulary to disk\n", 112 | " pickle.dump(word2idx, open(path_data + 'word2idx.pkl', 'wb'))\n", 113 | " \n", 114 | " def text2tfrecords(filenames, writer, vocabulary, word2idx,\n", 115 | " max_words_review):\n", 116 | " '''\n", 117 | " Function to parse each review in part and write to disk\n", 118 | " as a tfrecord.\n", 119 | " \n", 120 | " Args:\n", 121 | " filenames: the paths of the review files.\n", 122 | " writer: the writer object for tfrecords.\n", 123 | " vocabulary: list with all the words included in the vocabulary.\n", 124 | " word2idx: dictionary of words and their corresponding indexes.\n", 125 | " '''\n", 126 | " # Shuffle filenames\n", 127 | " random.shuffle(filenames)\n", 128 | " for filename in filenames:\n", 129 | " review = open(filename, 'r').read()\n", 130 | " review = re.sub(r'<[^>]+>', ' ', review)\n", 131 | " review = word_tokenize(review)\n", 132 | " # Reduce review to max words\n", 133 | " review = review[-max_words_review:]\n", 134 | " # Replace words with their equivalent index from word2idx\n", 135 | " review = [word2idx[word] if word in vocabulary else \n", 136 | " word2idx['Unknown_token'] for word in review]\n", 137 | " indexed_review = review + [word2idx['End_token']]\n", 138 | " sequence_length = len(indexed_review)\n", 139 | " target = 1 if filename.split('/')[-2]=='pos' else 0\n", 140 | " # Create a Sequence Example to store our data in\n", 141 | " ex = tf.train.SequenceExample()\n", 142 | " # Add non-sequential features to our example\n", 143 | " ex.context.feature['sequence_length'].int64_list.value.append(sequence_length)\n", 144 | " ex.context.feature['target'].int64_list.value.append(target)\n", 145 | " # Add sequential feature\n", 146 | " token_indexes = ex.feature_lists.feature_list['token_indexes']\n", 147 | " for token_index in indexed_review:\n", 148 | " token_indexes.feature.add().int64_list.value.append(token_index)\n", 149 | " writer.write(ex.SerializeToString())\n", 150 | " \n", 151 | " ########################################################################## \n", 152 | " # Write data to tfrecords.This might take a while.\n", 153 | " ##########################################################################\n", 154 | " writer = tf.python_io.TFRecordWriter(path_data + 'dummy.tfrecords')\n", 155 | " text2tfrecords(filenames, writer, vocabulary, word2idx, \n", 156 | " max_words_review)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 5, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "imdb2tfrecords(path_data='datasets/dummy_text/')" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "### Parse TFRecords to TF tensors\n", 173 | "----" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 6, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "def parse_imdb_sequence(record):\n", 183 | " '''\n", 184 | " Script to parse imdb tfrecords.\n", 185 | " \n", 186 | " Returns:\n", 187 | " token_indexes: sequence of token indexes present in the review.\n", 188 | " target: the target of the movie review.\n", 189 | " sequence_length: the length of the sequence.\n", 190 | " '''\n", 191 | " context_features = {\n", 192 | " 'sequence_length': tf.FixedLenFeature([], dtype=tf.int64),\n", 193 | " 'target': tf.FixedLenFeature([], dtype=tf.int64),\n", 194 | " }\n", 195 | " sequence_features = {\n", 196 | " 'token_indexes': tf.FixedLenSequenceFeature([], dtype=tf.int64),\n", 197 | " }\n", 198 | " context_parsed, sequence_parsed = tf.parse_single_sequence_example(record, \n", 199 | " context_features=context_features, sequence_features=sequence_features)\n", 200 | " \n", 201 | " return (sequence_parsed['token_indexes'], context_parsed['target'],\n", 202 | " context_parsed['sequence_length'])" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "If you want me to add anything to this tutorial, please let me know and I will be happy to further enhance it :)." 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "kernelspec": { 215 | "display_name": "Python 3", 216 | "language": "python", 217 | "name": "python3" 218 | }, 219 | "language_info": { 220 | "codemirror_mode": { 221 | "name": "ipython", 222 | "version": 3 223 | }, 224 | "file_extension": ".py", 225 | "mimetype": "text/x-python", 226 | "name": "python", 227 | "nbconvert_exporter": "python", 228 | "pygments_lexer": "ipython3", 229 | "version": "3.6.4" 230 | } 231 | }, 232 | "nbformat": 4, 233 | "nbformat_minor": 2 234 | } 235 | -------------------------------------------------------------------------------- /05_images_to_tfrecords.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to transfer raw image data to TFRecords\n", 8 | "----\n", 9 | "\n", 10 | "Hello everyone! This tutorial, like the previous one, is focused on automatizing the data input pipeline.\n", 11 | "\n", 12 | "Most of the time, our datasets are too big to read in memory so we have to prepare a pipeline for reading the data in batches from hard disk. I always process my raw data (text, images, tabular) to TFRecords as it makes my life so much easier hehe :).\n", 13 | "\n", 14 | "### Tutorial flowchart\n", 15 | "----\n", 16 | "![img](tutorials_graphics/images2tfrecords.png)\n", 17 | "\n", 18 | "This tutorial will cover the following parts:\n", 19 | "* *create a function that reads raw images and transfers them to TFRecords.*\n", 20 | "* *create a function that parses the TFRecords to TF tensors.*\n", 21 | "\n", 22 | "So without any further due, let's get started." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Import here useful libraries\n", 30 | "----" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "import tensorflow.contrib.eager as tfe\n", 41 | "import glob" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# Enable eager mode. Once activated it cannot be reversed! Run just once.\n", 51 | "tfe.enable_eager_execution()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "### Transfer raw images to TFRecords\n", 59 | "----\n", 60 | "\n", 61 | "For this task, we will be using a few images from the FER2013 dataset, that you can find in the **datasets/dummy_images** folder. The emotion label can be found in the filename of the image.\n", 62 | "For example, picture **id7_3.jpg** has the label emotion **3**, which corresponds to the state **'Happy'** as you can see in the dictionary below." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Get the meaning of each emotion index\n", 72 | "emotion_cat = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral'}" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 5, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "def img2tfrecords(path_data='datasets/dummy_images/', image_format='jpeg'):\n", 82 | " ''' Function to transfer raw images, along with their \n", 83 | " target labels, to TFRecords.\n", 84 | " Original source code for helper functions: https://goo.gl/jEhp2B\n", 85 | " \n", 86 | " Args:\n", 87 | " path_data: the location of the raw images\n", 88 | " image_format: the format of the raw images (e.g. 'png', 'jpeg')\n", 89 | " '''\n", 90 | " \n", 91 | " def _int64_feature(value):\n", 92 | " '''Helper function.'''\n", 93 | " return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))\n", 94 | " \n", 95 | " def _bytes_feature(value):\n", 96 | " '''Helper function.'''\n", 97 | " return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))\n", 98 | " \n", 99 | " # Get the filename of each image within the directory\n", 100 | " filenames = glob.glob(path_data + '*' + image_format)\n", 101 | " \n", 102 | " # Create a TFRecordWriter\n", 103 | " writer = tf.python_io.TFRecordWriter(path_data + 'dummy.tfrecords')\n", 104 | " \n", 105 | " # Iterate through each image and write it to the TFrecords file.\n", 106 | " for filename in filenames:\n", 107 | " # Read raw image\n", 108 | " img = tf.read_file(filename).numpy()\n", 109 | " # Parse its label from the filename\n", 110 | " label = int(filename.split('_')[-1].split('.')[0])\n", 111 | " # Create an example (image, label)\n", 112 | " example = tf.train.Example(features=tf.train.Features(feature={\n", 113 | " 'label': _int64_feature(label),\n", 114 | " 'image': _bytes_feature(img)}))\n", 115 | " # Write serialized example to TFRecords\n", 116 | " writer.write(example.SerializeToString())" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Transfer raw data to TFRecords\n", 126 | "img2tfrecords()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### Parse TFRecords to TF tensors\n", 134 | "----" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 7, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "def parser(record):\n", 144 | " '''Function to parse a TFRecords example'''\n", 145 | " \n", 146 | " # Define here the features you would like to parse\n", 147 | " features = {'image': tf.FixedLenFeature((), tf.string),\n", 148 | " 'label': tf.FixedLenFeature((), tf.int64)}\n", 149 | " \n", 150 | " # Parse example\n", 151 | " parsed = tf.parse_single_example(record, features)\n", 152 | "\n", 153 | " # Decode image \n", 154 | " img = tf.image.decode_image(parsed['image'])\n", 155 | " \n", 156 | " return img, parsed['label']\n" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "If you want me to add anything to this tutorial, please let me know and I will be happy to further enhance it :)." 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.6.4" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /06_read_data_in_batches_from_tfrecords.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to read data in batches from TFRecords with TensorFlow Eager\n", 8 | "----\n", 9 | "\n", 10 | "Hello everyone, this tutorial is again focused on the input pipeline. It is quite simple, but I remember when I first started reading data in batches I got stuck in quite a few details so I thought that I might share my methods here. I really hope it will be useful for some of you.\n", 11 | "\n", 12 | "### Tutorials flowchart\n", 13 | "----\n", 14 | "![img](tutorials_graphics/readbatches.png)\n", 15 | "\n", 16 | "We are going to work on two cases:\n", 17 | "* **input data of variable sequence length** - in this case we will pad the batch on the fly to the biggest sequence length.\n", 18 | "* **image data**\n", 19 | "\n", 20 | "\n", 21 | "The data for both cases has been stored as TFRecords. You can have a look at the [**4th**](https://github.com/madalinabuzau/tensorflow-eager-tutorials/blob/master/04_text_data_to_tfrecords.ipynb) and [**5th**](https://github.com/madalinabuzau/tensorflow-eager-tutorials/blob/master/05_images_to_tfrecords.ipynb) tutorial to see how I transfer raw data to TFRecords.\n", 22 | "\n", 23 | "So, let's jump right into coding :)!" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Import here useful libraries\n", 31 | "----" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# Import library for data visualization\n", 41 | "import matplotlib.pyplot as plt\n", 42 | "\n", 43 | "# Make the plots appear inline in the notebook\n", 44 | "%matplotlib inline\n", 45 | "\n", 46 | "# Import TensorFlow and TensorFlow Eager\n", 47 | "import tensorflow as tf\n", 48 | "import tensorflow.contrib.eager as tfe" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# Enable eager mode. Once activated it cannot be reversed! Run just once.\n", 58 | "tfe.enable_eager_execution()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "### Part 1: Reading data of variable sequence length\n", 66 | "----\n", 67 | "The first part of this tutorial shows you how to read input data that comes in different lengths. In our case, we used dummy IMDB reviews from the Large Movie Database. As you can imagine, each review has a different number of words. Therefore, when we will be reading a batch of data we will pad the sequences to the maximum sequence length within a batch.\n", 68 | "\n", 69 | "To see how I obtained sequences of word indexes, along with the label and the sequence length please see [this tutorial](https://github.com/madalinabuzau/tensorflow-eager-tutorials/blob/master/04_text_data_to_tfrecords.ipynb)." 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "### 1.1. Create function to parse each TFRecord\n", 77 | "----" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "def parse_imdb_sequence(record):\n", 87 | " '''\n", 88 | " Script to parse imdb tfrecords.\n", 89 | " \n", 90 | " Returns:\n", 91 | " token_indexes: sequence of token indexes present in the review.\n", 92 | " target: the target of the movie review.\n", 93 | " sequence_length: the length of the sequence.\n", 94 | " '''\n", 95 | " context_features = {\n", 96 | " 'sequence_length': tf.FixedLenFeature([], dtype=tf.int64),\n", 97 | " 'target': tf.FixedLenFeature([], dtype=tf.int64),\n", 98 | " }\n", 99 | " sequence_features = {\n", 100 | " 'token_indexes': tf.FixedLenSequenceFeature([], dtype=tf.int64),\n", 101 | " }\n", 102 | " context_parsed, sequence_parsed = tf.parse_single_sequence_example(record, \n", 103 | " context_features=context_features, sequence_features=sequence_features)\n", 104 | " \n", 105 | " return (sequence_parsed['token_indexes'], context_parsed['target'],\n", 106 | " context_parsed['sequence_length'])" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### 1.2. Create dataset iterator\n", 114 | "----\n", 115 | "\n", 116 | "As you can see in the function above, after parsing each record, we return a sequence of word indexes, the target of the review and the sequence length. In the method *padded_batch* we only pad the first element of the record: the sequence of word indexes. The target and sequence length do not need to be padded as they are just a single number, in each example. Thus, the padded_shapes will be:\n", 117 | "* [None] -> pad the sequences to the largest dimension, unknown yet, therefore None.\n", 118 | "* [ ] -> no padding for the target.\n", 119 | "* [ ] -> no padding for the sequence length.\n" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# Select here the batch size\n", 129 | "batch_size = 2\n", 130 | "\n", 131 | "# Create dataset from TFRecords\n", 132 | "dataset = tf.data.TFRecordDataset('datasets/dummy_text/dummy.tfrecords')\n", 133 | "dataset = dataset.map(parse_imdb_sequence).shuffle(buffer_size=10000)\n", 134 | "dataset = dataset.padded_batch(batch_size, padded_shapes=([None],[],[]))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### 1.3. Iterate through data once \n", 142 | "----" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "tf.Tensor([0 1], shape=(2,), dtype=int64)\n", 155 | "tf.Tensor([1 0], shape=(2,), dtype=int64)\n", 156 | "tf.Tensor([0 1], shape=(2,), dtype=int64)\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "for review, target, sequence_length in tfe.Iterator(dataset):\n", 162 | " print(target)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 7, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "(2, 145)\n", 175 | "(2, 139)\n", 176 | "(2, 171)\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "for review, target, sequence_length in tfe.Iterator(dataset):\n", 182 | " print(review.shape)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 8, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "tf.Tensor([137 151], shape=(2,), dtype=int64)\n", 195 | "tf.Tensor([139 171], shape=(2,), dtype=int64)\n", 196 | "tf.Tensor([145 124], shape=(2,), dtype=int64)\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "for review, target, sequence_length in tfe.Iterator(dataset):\n", 202 | " print(sequence_length)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "### Part 2: Read images (and their labels) in batches\n", 210 | "----\n", 211 | "\n", 212 | "In the second part of the tutorial, we are going to visualize the images stored as TFRecords, by reading them in batches. These images are a small subsample from the FER2013 dataset." 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "### 2.1. Create function to parse each record and decode image\n", 220 | "----" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 9, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "def parser(record):\n", 230 | " '''\n", 231 | " Function to parse a TFRecords example. \n", 232 | " \n", 233 | " Returns:\n", 234 | " img: decoded image.\n", 235 | " label: the corresponding label of the image. \n", 236 | " '''\n", 237 | " \n", 238 | " # Define here the features you would like to parse\n", 239 | " features = {'image': tf.FixedLenFeature((), tf.string),\n", 240 | " 'label': tf.FixedLenFeature((), tf.int64)}\n", 241 | " \n", 242 | " # Parse example\n", 243 | " parsed = tf.parse_single_example(record, features)\n", 244 | "\n", 245 | " # Decode image \n", 246 | " img = tf.image.decode_image(parsed['image'])\n", 247 | " \n", 248 | " return img, parsed['label']" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "### 2.2. Create dataset iterator\n", 256 | "----" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 10, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "# Select here the batch size\n", 266 | "batch_size = 5\n", 267 | "\n", 268 | "# Create dataset from TFRecords\n", 269 | "dataset = tf.data.TFRecordDataset('datasets/dummy_images/dummy.tfrecords')\n", 270 | "dataset = dataset.map(parser).shuffle(buffer_size=10000)\n", 271 | "dataset = dataset.batch(batch_size)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "### 2.3. Iterate through dataset once. Visualize images." 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 11, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "image/png": "\n", 289 | "text/plain": [ 290 | "" 291 | ] 292 | }, 293 | "metadata": {}, 294 | "output_type": "display_data" 295 | }, 296 | { 297 | "data": { 298 | "image/png": "\n", 299 | "text/plain": [ 300 | "" 301 | ] 302 | }, 303 | "metadata": {}, 304 | "output_type": "display_data" 305 | } 306 | ], 307 | "source": [ 308 | "# Dictionary that stores the correspondence between integer labels and the emotions\n", 309 | "emotion_cat = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral'}\n", 310 | "\n", 311 | "# Go through the dataset once\n", 312 | "for image, label in tfe.Iterator(dataset):\n", 313 | " # Create a subplot for each batch of images\n", 314 | " f, axarr = plt.subplots(1, int(image.shape[0]), figsize=(14, 6))\n", 315 | " # Plot images\n", 316 | " for i in range(image.shape[0]):\n", 317 | " axarr[i].imshow(image[i,:,:,0], cmap='gray')\n", 318 | " axarr[i].set_title('Emotion: %s' %emotion_cat[label[i].numpy()])" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "Please let me know if you would like me to add anything to this tutorial. I will do my best to add it :)!" 326 | ] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "Python 3", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.6.4" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 2 350 | } 351 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Simple tutorials on deep learning using TensorFlow Eager 2 | 3 | This repo aims to help people who would like to start getting hands-on experience with deep learning using the TensorFlow Eager mode. TensorFlow Eager mode lets you build neural networks as easy as you would do with Numpy, with the huge advantage that it provides automatic differentiation (no more handwritten backprop. YAAAY!). It can ran also on GPUs making the neural networks training significantly faster. 4 | 5 | I will try to make the tutorials accessible for everyone, thus I will try to work on problems that do not require a GPU to work on. 6 | 7 | **TensorFlow Version used in the tutorials - 1.7** 8 | 9 | ### List of tutorials available: 10 | #### Getting started 11 | --- 12 | * **01. Build a simple neural network** - This tutorial shows you how to build and train a one-hidden layer neural network using the Eager mode of TensorFlow, on a synthetically generated dataset. 13 | 14 | 15 | * **02. Using metrics in Eager mode** - This tutorial shows you how to use metrics 16 | that are compatible with Eager mode, for three types of machine learning problems (multi-classification, imbalanced dataset and regression). 17 | 18 | 19 | #### Simple but useful stuff 20 | --- 21 | * **03. Save and restore a trained model** - Simple tutorial on how you can save a trained model and restore it at a later time to make predictions on new data. 22 | 23 | 24 | * **04. Transfer text data to TFRecords** - This tutorial shows you how to store text data of variable sequence length to TFRecords. The data can be easily padded 25 | on the fly, within a batch, when reading the dataset with an iterator. 26 | 27 | 28 | * **05. Transfer image data to TFRecords** - Easy and simple tutorial on how to transfew image data and its metadata (e.g. target) to TFRecords. 29 | 30 | 31 | * **06. How to read TFRecords data in batches** - This tutorial shows you how to read either variable length sequence data or image data, in batches, from TFRecords. 32 | 33 | 34 | #### Convolutional neural networks 35 | ---- 36 | * **07. Build a CNN for emotion recognition** - This tutorial shows you how to build a CNN from scratch using the TensorFlow Eager API and the FER2013 dataset. At the end of the tutorial you will be able to test the network on yourself using a webcam. Very fun exercise! 37 | 38 | 39 | #### Recurrent neural networks 40 | ---- 41 | * **08. Build a dynamic RNN for sequence classification** - Learn how to work with variable sequence input data. This tutorial shows you how to build a dynamic RNN using the TensorFlow Eager API and the Stanford Large Movie Review Dataset. 42 | 43 | 44 | * **09. Build a RNN for time series regression** - Learn how to build a RNN for timeseries forecasting. 45 | 46 | 47 | 48 | Requests for tutorials: 49 | ---- 50 | * If you have any requests for a specific tutorial please let me know. 51 | 52 | Improvement advice: 53 | ---- 54 | * Please let me know if you have any suggestions to improve these tutorials. The aim is to help you getting a good grasp of this framework but I am also looking to improve my programming skills so any feedback will be really appreciated :)! 55 | -------------------------------------------------------------------------------- /data_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This script contains several functions used for data processing. 3 | ''' 4 | 5 | ############################################################################# 6 | # Import here useful libraries 7 | ############################################################################# 8 | from nltk.tokenize import word_tokenize 9 | import tensorflow as tf 10 | import pandas as pd 11 | import pickle 12 | import random 13 | import glob 14 | import nltk 15 | import re 16 | 17 | try: 18 | nltk.data.find('tokenizers/punkt') 19 | except LookupError: 20 | nltk.download('punkt') 21 | 22 | 23 | def imdb2tfrecords(path_data='datasets/aclImdb/', min_word_frequency=5, 24 | max_words_review=700): 25 | ''' 26 | This script processes the data and saves it in the default TensorFlow 27 | file format: tfrecords. 28 | 29 | Args: 30 | path_data: the path where the imdb data is stored. 31 | min_word_frequency: the minimum frequency of a word, to keep it 32 | in the vocabulary. 33 | max_words_review: the maximum number of words allowed in a review. 34 | ''' 35 | # Get the filenames of the positive/negative reviews we will use 36 | # for training the RNN 37 | train_pos_files = glob.glob(path_data + 'train/pos/*') 38 | train_neg_files = glob.glob(path_data + 'train/neg/*') 39 | 40 | # Concatenate both positive and negative reviews filenames 41 | train_files = train_pos_files + train_neg_files 42 | 43 | # List with all the reviews in the train dataset 44 | reviews = [open(train_files[i],'r').read() for i in range(len(train_files))] 45 | 46 | # Remove HTML tags 47 | reviews = [re.sub(r'<[^>]+>', ' ', review) for review in reviews] 48 | 49 | # Tokenize each review in part 50 | reviews = [word_tokenize(review) for review in reviews] 51 | 52 | # Compute the length of each review 53 | len_reviews = [len(review) for review in reviews] 54 | pickle.dump(len_reviews, open(path_data + 'length_reviews.pkl', 'wb')) 55 | 56 | # Flatten nested list 57 | reviews = [word for review in reviews for word in review] 58 | 59 | # Compute the frequency of each word 60 | word_frequency = pd.value_counts(reviews) 61 | 62 | # Keep only words with frequency higher than minimum 63 | vocabulary = word_frequency[word_frequency>=min_word_frequency].index.tolist() 64 | 65 | # Add Unknown, Start and End token. 66 | extra_tokens = ['Unknown_token', 'End_token'] 67 | vocabulary += extra_tokens 68 | 69 | # Create a word2idx dictionary 70 | word2idx = {vocabulary[i]: i for i in range(len(vocabulary))} 71 | 72 | # Write word vocabulary to disk 73 | pickle.dump(word2idx, open(path_data + 'word2idx.pkl', 'wb')) 74 | 75 | def text2tfrecords(filenames, writer, vocabulary, word2idx, 76 | max_words_review): 77 | ''' 78 | Function to parse each review in part and write to disk 79 | as a tfrecord. 80 | 81 | Args: 82 | filenames: the paths of the review files. 83 | writer: the writer object for tfrecords. 84 | vocabulary: list with all the words included in the vocabulary. 85 | word2idx: dictionary of words and their corresponding indexes. 86 | ''' 87 | # Shuffle filenames 88 | random.shuffle(filenames) 89 | for filename in filenames: 90 | review = open(filename, 'r').read() 91 | review = re.sub(r'<[^>]+>', ' ', review) 92 | review = word_tokenize(review) 93 | # Reduce review to max words 94 | review = review[-max_words_review:] 95 | # Replace words with their equivalent index from word2idx 96 | review = [word2idx[word] if word in vocabulary else 97 | word2idx['Unknown_token'] for word in review] 98 | indexed_review = review + [word2idx['End_token']] 99 | sequence_length = len(indexed_review) 100 | target = 1 if filename.split('/')[-2]=='pos' else 0 101 | # Create a Sequence Example to store our data in 102 | ex = tf.train.SequenceExample() 103 | # Add non-sequential features to our example 104 | ex.context.feature['sequence_length'].int64_list.value.append(sequence_length) 105 | ex.context.feature['target'].int64_list.value.append(target) 106 | # Add sequential feature 107 | token_indexes = ex.feature_lists.feature_list['token_indexes'] 108 | for token_index in indexed_review: 109 | token_indexes.feature.add().int64_list.value.append(token_index) 110 | writer.write(ex.SerializeToString()) 111 | 112 | ########################################################################## 113 | # Write train data to tfrecords.This might take a while (~10 minutes) 114 | ########################################################################## 115 | train_writer = tf.python_io.TFRecordWriter(path_data + 'train.tfrecords') 116 | text2tfrecords(train_files, train_writer, vocabulary, word2idx, 117 | max_words_review) 118 | 119 | ########################################################################## 120 | # Get the filenames of the reviews we will use for testing the RNN 121 | ########################################################################## 122 | test_pos_files = glob.glob(path_data + 'test/pos/*') 123 | test_neg_files = glob.glob(path_data + 'test/neg/*') 124 | test_files = test_pos_files + test_neg_files 125 | 126 | ########################################################################## 127 | # Write test data to tfrecords (~10 minutes) 128 | ########################################################################## 129 | test_writer = tf.python_io.TFRecordWriter('datasets/aclImdb/test.tfrecords') 130 | text2tfrecords(test_files, test_writer, vocabulary, word2idx, 131 | max_words_review) 132 | 133 | 134 | def parse_imdb_sequence(record): 135 | ''' 136 | Script to parse imdb tfrecords. 137 | 138 | Returns: 139 | token_indexes: sequence of token indexes present in the review. 140 | target: the target of the movie review. 141 | sequence_length: the length of the sequence. 142 | ''' 143 | context_features = { 144 | 'sequence_length': tf.FixedLenFeature([], dtype=tf.int64), 145 | 'target': tf.FixedLenFeature([], dtype=tf.int64), 146 | } 147 | sequence_features = { 148 | 'token_indexes': tf.FixedLenSequenceFeature([], dtype=tf.int64), 149 | } 150 | context_parsed, sequence_parsed = tf.parse_single_sequence_example(record, 151 | context_features=context_features, sequence_features=sequence_features) 152 | 153 | return (sequence_parsed['token_indexes'], context_parsed['target'], 154 | context_parsed['sequence_length']) 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /datasets/aclImdb/length_reviews.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/aclImdb/length_reviews.pkl -------------------------------------------------------------------------------- /datasets/aclImdb/test.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/aclImdb/test.tfrecords -------------------------------------------------------------------------------- /datasets/aclImdb/train.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/aclImdb/train.tfrecords -------------------------------------------------------------------------------- /datasets/aclImdb/word2idx.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/aclImdb/word2idx.pkl -------------------------------------------------------------------------------- /datasets/dummy_images/dummy.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/dummy.tfrecords -------------------------------------------------------------------------------- /datasets/dummy_images/id0_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id0_0.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id1_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id1_0.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id2_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id2_2.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id3_4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id3_4.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id4_6.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id4_6.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id5_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id5_2.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id6_4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id6_4.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id7_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id7_3.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id8_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id8_3.jpeg -------------------------------------------------------------------------------- /datasets/dummy_images/id9_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_images/id9_2.jpeg -------------------------------------------------------------------------------- /datasets/dummy_text/dummy.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_text/dummy.tfrecords -------------------------------------------------------------------------------- /datasets/dummy_text/neg/0_3.txt: -------------------------------------------------------------------------------- 1 | Story of a man who has unnatural feelings for a pig. Starts out with a opening scene that is a terrific example of absurd comedy. A formal orchestra audience is turned into an insane, violent mob by the crazy chantings of it's singers. Unfortunately it stays absurd the WHOLE time with no general narrative eventually making it just too off putting. Even those from the era should be turned off. The cryptic dialogue would make Shakespeare seem easy to a third grader. On a technical level it's better than you might think with some good cinematography by future great Vilmos Zsigmond. Future stars Sally Kirkland and Frederic Forrest can be seen briefly. -------------------------------------------------------------------------------- /datasets/dummy_text/neg/1_1.txt: -------------------------------------------------------------------------------- 1 | Robert DeNiro plays the most unbelievably intelligent illiterate of all time. This movie is so wasteful of talent, it is truly disgusting. The script is unbelievable. The dialog is unbelievable. Jane Fonda's character is a caricature of herself, and not a funny one. The movie moves at a snail's pace, is photographed in an ill-advised manner, and is insufferably preachy. It also plugs in every cliche in the book. Swoozie Kurtz is excellent in a supporting role, but so what?

Equally annoying is this new IMDB rule of requiring ten lines for every review. When a movie is this worthless, it doesn't require ten lines of text to let other readers know that it is a waste of time and tape. Avoid this movie. -------------------------------------------------------------------------------- /datasets/dummy_text/neg/2_1.txt: -------------------------------------------------------------------------------- 1 | I saw the capsule comment said "great acting." In my opinion, these are two great actors giving horrible performances, and with zero chemistry with one another, for a great director in his all-time worst effort. Robert De Niro has to be the most ingenious and insightful illiterate of all time. Jane Fonda's performance uncomfortably drifts all over the map as she clearly has no handle on this character, mostly because the character is so poorly written. Molasses-like would be too swift an adjective for this film's excruciating pacing. Although the film's intent is to be an uplifting story of curing illiteracy, watching it is a true "bummer." I give it 1 out of 10, truly one of the worst 20 movies for its budget level that I have ever seen. -------------------------------------------------------------------------------- /datasets/dummy_text/pos/0_9.txt: -------------------------------------------------------------------------------- 1 | Bromwell High is a cartoon comedy. It ran at the same time as some other programs about school life, such as "Teachers". My 35 years in the teaching profession lead me to believe that Bromwell High's satire is much closer to reality than is "Teachers". The scramble to survive financially, the insightful students who can see right through their pathetic teachers' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I'm here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a pity that it isn't! -------------------------------------------------------------------------------- /datasets/dummy_text/pos/1_7.txt: -------------------------------------------------------------------------------- 1 | If you like adult comedy cartoons, like South Park, then this is nearly a similar format about the small adventures of three teenage girls at Bromwell High. Keisha, Natella and Latrina have given exploding sweets and behaved like bitches, I think Keisha is a good leader. There are also small stories going on with the teachers of the school. There's the idiotic principal, Mr. Bip, the nervous Maths teacher and many others. The cast is also fantastic, Lenny Henry's Gina Yashere, EastEnders Chrissie Watts, Tracy-Ann Oberman, Smack The Pony's Doon Mackichan, Dead Ringers' Mark Perry and Blunder's Nina Conti. I didn't know this came from Canada, but it is very good. Very good! -------------------------------------------------------------------------------- /datasets/dummy_text/pos/2_9.txt: -------------------------------------------------------------------------------- 1 | Bromwell High is nothing short of brilliant. Expertly scripted and perfectly delivered, this searing parody of a students and teachers at a South London Public School leaves you literally rolling with laughter. It's vulgar, provocative, witty and sharp. The characters are a superbly caricatured cross section of British society (or to be more accurate, of any society). Following the escapades of Keisha, Latrina and Natella, our three "protagonists" for want of a better term, the show doesn't shy away from parodying every imaginable subject. Political correctness flies out the window in every episode. If you enjoy shows that aren't afraid to poke fun of every taboo subject imaginable, then Bromwell High will not disappoint! -------------------------------------------------------------------------------- /datasets/dummy_text/word2idx.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/datasets/dummy_text/word2idx.pkl -------------------------------------------------------------------------------- /datasets/get_imdb_dataset.sh: -------------------------------------------------------------------------------- 1 | # Get the IMDB sentiment analysis dataset 2 | wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz 3 | # Unzip the file 4 | tar -xzvf aclImdb_v1.tar.gz 5 | # Remove the archive 6 | rm aclImdb_v1.tar.gz 7 | -------------------------------------------------------------------------------- /models_checkpoints/DemandRNN/-0.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/DemandRNN/-0.data-00000-of-00001 -------------------------------------------------------------------------------- /models_checkpoints/DemandRNN/-0.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/DemandRNN/-0.index -------------------------------------------------------------------------------- /models_checkpoints/DemandRNN/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "-0" 2 | all_model_checkpoint_paths: "-0" 3 | -------------------------------------------------------------------------------- /models_checkpoints/EmotionCNN/-0.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/EmotionCNN/-0.data-00000-of-00001 -------------------------------------------------------------------------------- /models_checkpoints/EmotionCNN/-0.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/EmotionCNN/-0.index -------------------------------------------------------------------------------- /models_checkpoints/EmotionCNN/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "-0" 2 | all_model_checkpoint_paths: "-0" 3 | -------------------------------------------------------------------------------- /models_checkpoints/ImdbRNN/-0.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/ImdbRNN/-0.data-00000-of-00001 -------------------------------------------------------------------------------- /models_checkpoints/ImdbRNN/-0.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/ImdbRNN/-0.index -------------------------------------------------------------------------------- /models_checkpoints/ImdbRNN/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "-0" 2 | all_model_checkpoint_paths: "-0" 3 | -------------------------------------------------------------------------------- /models_checkpoints/SimpleNN/-1.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/SimpleNN/-1.data-00000-of-00001 -------------------------------------------------------------------------------- /models_checkpoints/SimpleNN/-1.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/models_checkpoints/SimpleNN/-1.index -------------------------------------------------------------------------------- /models_checkpoints/SimpleNN/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "-1" 2 | all_model_checkpoint_paths: "-1" 3 | -------------------------------------------------------------------------------- /tutorials_graphics/01_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/01_flowchart.png -------------------------------------------------------------------------------- /tutorials_graphics/02_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/02_flowchart.png -------------------------------------------------------------------------------- /tutorials_graphics/03_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/03_flowchart.png -------------------------------------------------------------------------------- /tutorials_graphics/04_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/04_flowchart.png -------------------------------------------------------------------------------- /tutorials_graphics/05_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/05_flowchart.png -------------------------------------------------------------------------------- /tutorials_graphics/images2tfrecords.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/images2tfrecords.png -------------------------------------------------------------------------------- /tutorials_graphics/moving_windows_samples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/moving_windows_samples.png -------------------------------------------------------------------------------- /tutorials_graphics/readbatches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/readbatches.png -------------------------------------------------------------------------------- /tutorials_graphics/rnn_imdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/rnn_imdb.png -------------------------------------------------------------------------------- /tutorials_graphics/rnn_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/rnn_regression.png -------------------------------------------------------------------------------- /tutorials_graphics/save_restore_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/save_restore_model.png -------------------------------------------------------------------------------- /tutorials_graphics/text2tfrecords.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/madalinabuzau/tensorflow-eager-tutorials/383950cc8b7c9b99217b5edf268e623fcec4932f/tutorials_graphics/text2tfrecords.png --------------------------------------------------------------------------------