├── .gitignore ├── .whitesource ├── LICENSE ├── README.md ├── check_gpu.ipynb ├── solutions ├── tf_pet_solution.ipynb └── tf_transformer_solution.ipynb ├── tf_dataset_demo.ipynb ├── tf_pet_base.ipynb └── tf_transformer_base.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "checkRunSettings": { 3 | "vulnerableCheckRunConclusionLevel": "failure" 4 | }, 5 | "issueSettings": { 6 | "minSeverityLevel": "LOW" 7 | } 8 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 NVIDIA AI Technology Center 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyCon SG 2019 Tutorial: Optimizing TensorFlow Performance 2 | 3 | ![GitHub last commit](https://img.shields.io/github/last-commit/NVAITC/pycon-sg19-tensorflow-tutorial.svg) ![GitHub](https://img.shields.io/github/license/NVAITC/pycon-sg19-tensorflow-tutorial.svg) ![](https://img.shields.io/github/repo-size/NVAITC/pycon-sg19-tensorflow-tutorial.svg) 4 | 5 | This workshop content covers: 6 | 7 | * a brief introduction to deep learning and TensorFlow 2.0 8 | * using `tf.data` and TensorFlow Datasets 9 | * XLA compiler and Automatic Mixed Precision (AMP) 10 | * speeding up CNN (ResNet-50) with XLA and AMP 11 | * speeding up Transformer (BERT) with XLA and AMP 12 | 13 | For a quick guide to using Automatic Mixed Precision, check out this [TLDR](https://drive.google.com/open?id=1Nz2438DBQS591kHha2ENL7VBhmBaXQ_loQVi3rywRVU). 14 | 15 | ## Content 16 | 17 | **Slides** are in this [Google Drive folder](https://drive.google.com/open?id=1RR0UhnvJ3PHL4sGRe2du4_w66Kg9KNVr). 18 | 19 | **Notebooks** 20 | 21 | | Notebook | Link | Solution | 22 | | ------------------------------ | ---- | -------- | 23 | | TensorFlow Dataset & tf.data | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/tf_dataset_demo.ipynb) | | 24 | | Pet Classification with TF 2.0 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/tf_pet_base.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/solutions/tf_pet_solution.ipynb) | 25 | | Transformers with TF 2.0 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/tf_transformer_base.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/solutions/tf_transformer_solution.ipynb) | 26 | 27 | For those running the notebooks on the workshop JupyterHub or on your own hardware, you can clone this repository. 28 | 29 | ```shell 30 | git clone https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial 31 | ``` 32 | 33 | ## Workshop Information 34 | 35 | **In-person @ PyCon SG 2019** 36 | 37 | * Attend the workshop 10am to 1pm on Saturday, October 12 at [Republic Polytechnic](https://pycon.sg/venue/). 38 | * Get your tickets [here](https://www.eventnook.com/event/pyconsingapore2019/). 39 | -------------------------------------------------------------------------------- /check_gpu.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Checking for a GPU\n", 8 | "\n", 9 | "To check for a GPU, run the `nvidia-smi` command in a linux shell." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Fri Oct 11 16:48:22 2019 \n", 22 | "+-----------------------------------------------------------------------------+\n", 23 | "| NVIDIA-SMI 410.104 Driver Version: 410.104 CUDA Version: 10.0 |\n", 24 | "|-------------------------------+----------------------+----------------------+\n", 25 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 26 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 27 | "|===============================+======================+======================|\n", 28 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", 29 | "| N/A 44C P8 10W / 70W | 0MiB / 15079MiB | 0% Default |\n", 30 | "+-------------------------------+----------------------+----------------------+\n", 31 | " \n", 32 | "+-----------------------------------------------------------------------------+\n", 33 | "| Processes: GPU Memory |\n", 34 | "| GPU PID Type Process name Usage |\n", 35 | "|=============================================================================|\n", 36 | "| No running processes found |\n", 37 | "+-----------------------------------------------------------------------------+\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "!nvidia-smi" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "Python 3", 56 | "language": "python", 57 | "name": "python3" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.6.7" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 4 74 | } 75 | -------------------------------------------------------------------------------- /solutions/tf_pet_solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "laUXS-24UvPM" 8 | }, 9 | "source": [ 10 | "## Setup\n", 11 | "\n", 12 | "This section contains supplementary information, functions, and installs required packages." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "colab": {}, 20 | "colab_type": "code", 21 | "id": "1Nn_XUbFUVah" 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat -Uq" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "colab_type": "text", 32 | "id": "mgLK3zv_Vsy8" 33 | }, 34 | "source": [ 35 | "**About**\n", 36 | "\n", 37 | "\n", 38 | "\n", 39 | "This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n", 40 | "\n", 41 | "**Acknowledgements**\n", 42 | "\n", 43 | "* This notebook uses some materials adapted from TensorFlow documentation.\n", 44 | "* This notebook uses the [Oxford IIT Pet Dataset](http://www.robots.ox.ac.uk/~vgg/data/pets/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/oxford_iiit_pet)).\n", 45 | "\n", 46 | "**Dataset Citation**\n", 47 | "\n", 48 | "```\n", 49 | "@InProceedings{parkhi12a,\n", 50 | " author = \"Parkhi, O. M. and Vedaldi, A. and Zisserman, A. and Jawahar, C.~V.\",\n", 51 | " title = \"Cats and Dogs\",\n", 52 | " booktitle = \"IEEE Conference on Computer Vision and Pattern Recognition\",\n", 53 | " year = \"2012\",\n", 54 | "}\n", 55 | "```" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "colab": { 63 | "base_uri": "https://localhost:8080/" 64 | }, 65 | "colab_type": "code", 66 | "executionInfo": { 67 | "elapsed": 7175, 68 | "status": "ok", 69 | "timestamp": 1570473634312, 70 | "user": { 71 | "displayName": "Timothy Liu SG", 72 | "photoUrl": "", 73 | "userId": "04327513636844080478" 74 | }, 75 | "user_tz": -480 76 | }, 77 | "id": "zNbGLsDSUe3W", 78 | "outputId": "4e551e00-c5a8-4d41-f6c2-7269f7941a68" 79 | }, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "TensorFlow version: 2.0.0\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "import multiprocessing\n", 91 | "\n", 92 | "import tensorflow\n", 93 | "print(\"TensorFlow version:\", tensorflow.__version__)\n", 94 | "\n", 95 | "import tensorflow.compat.v2 as tf\n", 96 | "import tensorflow_datasets as tfds" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "colab": {}, 104 | "colab_type": "code", 105 | "id": "kakmnWusm51w" 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "import time\n", 110 | "\n", 111 | "class TimeHistory(tf.keras.callbacks.Callback):\n", 112 | " def on_train_begin(self, logs={}):\n", 113 | " self.times = []\n", 114 | " def on_epoch_begin(self, epoch, logs={}):\n", 115 | " self.epoch_time_start = time.time()\n", 116 | " def on_epoch_end(self, epoch, logs={}):\n", 117 | " self.times.append(time.time() - self.epoch_time_start)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": { 123 | "colab_type": "text", 124 | "id": "ObRNSvQMUxbI" 125 | }, 126 | "source": [ 127 | "# Pets Classification with TensorFlow" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "colab": { 135 | "base_uri": "https://localhost:8080/", 136 | "height": 51 137 | }, 138 | "colab_type": "code", 139 | "executionInfo": { 140 | "elapsed": 9897, 141 | "status": "ok", 142 | "timestamp": 1570473637050, 143 | "user": { 144 | "displayName": "Timothy Liu SG", 145 | "photoUrl": "", 146 | "userId": "04327513636844080478" 147 | }, 148 | "user_tz": -480 149 | }, 150 | "id": "KaI6MAuJPNDV", 151 | "outputId": "089e1e0d-0fef-4e15-c54d-62153d7f5f96" 152 | }, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "\u001b[1m\u001b[37mjupyter-admin \u001b[m Fri Oct 11 16:56:31 2019 \u001b[1m\u001b[30m410.104\u001b[m\n", 159 | "\u001b[36m[0]\u001b[m \u001b[34mTesla T4 \u001b[m |\u001b[1m\u001b[31m 66'C\u001b[m, \u001b[32m 0 %\u001b[m | \u001b[36m\u001b[1m\u001b[33m 0\u001b[m / \u001b[33m15079\u001b[m MB |\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "!gpustat" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "colab": {}, 172 | "colab_type": "code", 173 | "id": "-TiQp7Apm516" 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "# enable XLA\n", 178 | "tf.config.optimizer.set_jit(True)\n", 179 | "\n", 180 | "# enable AMP\n", 181 | "tf.keras.mixed_precision.experimental.set_policy('mixed_float16')" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "colab": {}, 189 | "colab_type": "code", 190 | "id": "BCK57jlvNpOO" 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "import tensorflow.keras.layers as layers\n", 195 | "from tensorflow.keras.applications.resnet50 import ResNet50\n", 196 | "\n", 197 | "def create_model(img_size=(224,224), num_class=2, train_base=True):\n", 198 | " # accept float16 image inputs\n", 199 | " input_layer = layers.Input(shape=(img_size[0],img_size[1],3), dtype=tf.float16)\n", 200 | " base = ResNet50(input_tensor=input_layer,\n", 201 | " include_top=False,\n", 202 | " weights=\"imagenet\")\n", 203 | " base.trainable = train_base\n", 204 | " x = base.output\n", 205 | " x = layers.GlobalAveragePooling2D()(x)\n", 206 | " # softmax only accepts float32 - need to manually cast (likely a bug)\n", 207 | " preds = layers.Dense(num_class, activation=\"softmax\", dtype=tf.float32)(x)\n", 208 | " return tf.keras.models.Model(inputs=input_layer, outputs=preds)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "colab": {}, 216 | "colab_type": "code", 217 | "id": "ICDyhaNCdNsm" 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "(train_dataset, test_dataset), info = tfds.load(name=\"oxford_iiit_pet:3.*.*\",\n", 222 | " split=[\"train\", \"test\"],\n", 223 | " shuffle_files=True,\n", 224 | " as_supervised=True,\n", 225 | " with_info=True)\n", 226 | "\n", 227 | "num_class = info.features[\"label\"].num_classes\n", 228 | "num_train = info.splits[\"train\"].num_examples\n", 229 | "num_test = info.splits[\"test\"].num_examples" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": { 236 | "colab": {}, 237 | "colab_type": "code", 238 | "id": "MdWvubj3g2aO" 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "IMG_SIZE = (224, 224)\n", 243 | "\n", 244 | "@tf.function\n", 245 | "def format_train_example(image, label):\n", 246 | " image = tf.cast(image, tf.float32)\n", 247 | " image = (image/127.5) - 1\n", 248 | " image = tf.image.resize(image, IMG_SIZE)\n", 249 | " # perform image augmentation with tf.image\n", 250 | " image = tf.image.random_flip_left_right(image)\n", 251 | " image = tf.image.random_brightness(image, 0.1)\n", 252 | " # return images as float16\n", 253 | " image = tf.cast(image, tf.float16)\n", 254 | " return image, tf.one_hot(label, num_class)\n", 255 | "\n", 256 | "@tf.function\n", 257 | "def format_eval_example(image, label):\n", 258 | " image = tf.cast(image, tf.float32)\n", 259 | " image = (image/127.5) - 1\n", 260 | " image = tf.image.resize(image, IMG_SIZE)\n", 261 | " # return images as float16\n", 262 | " image = tf.cast(image, tf.float16)\n", 263 | " return image, tf.one_hot(label, num_class)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": { 270 | "colab": {}, 271 | "colab_type": "code", 272 | "id": "GbsEAoP8XKib" 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "BATCH_SIZE = 80\n", 277 | "N_THREADS = multiprocessing.cpu_count()\n", 278 | "PREFETCH_COUNT = 8\n", 279 | "\n", 280 | "train_dataset = train_dataset.shuffle(1024)\n", 281 | "train_dataset = train_dataset.repeat(-1)\n", 282 | "train_dataset = train_dataset.map(format_train_example,\n", 283 | " num_parallel_calls=N_THREADS)\n", 284 | "train_dataset = train_dataset.batch(BATCH_SIZE)\n", 285 | "train_dataset = train_dataset.prefetch(PREFETCH_COUNT)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "colab": {}, 293 | "colab_type": "code", 294 | "id": "7-OAQB-0jb-r" 295 | }, 296 | "outputs": [], 297 | "source": [ 298 | "test_dataset = test_dataset.map(format_eval_example,\n", 299 | " num_parallel_calls=N_THREADS)\n", 300 | "test_dataset = test_dataset.repeat(-1)\n", 301 | "test_dataset = test_dataset.batch(BATCH_SIZE)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": { 308 | "colab": {}, 309 | "colab_type": "code", 310 | "id": "YAwj90pGOIAy" 311 | }, 312 | "outputs": [], 313 | "source": [ 314 | "model = create_model(IMG_SIZE, num_class, train_base=True)\n", 315 | "opt = tf.keras.optimizers.Adam()\n", 316 | "\n", 317 | "model.compile(loss=\"categorical_crossentropy\",\n", 318 | " optimizer=opt,\n", 319 | " metrics=[\"acc\"])\n", 320 | "\n", 321 | "#model.summary()" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": { 328 | "colab": {}, 329 | "colab_type": "code", 330 | "id": "0vRtm2rORBHf" 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "steps_per_epoch = num_train//BATCH_SIZE\n", 335 | "steps_test = num_test//BATCH_SIZE\n", 336 | "\n", 337 | "time_callback = TimeHistory()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "colab": { 345 | "base_uri": "https://localhost:8080/", 346 | "height": 153 347 | }, 348 | "colab_type": "code", 349 | "executionInfo": { 350 | "elapsed": 132772, 351 | "status": "ok", 352 | "timestamp": 1570473759978, 353 | "user": { 354 | "displayName": "Timothy Liu SG", 355 | "photoUrl": "", 356 | "userId": "04327513636844080478" 357 | }, 358 | "user_tz": -480 359 | }, 360 | "id": "7T8VVrn4Q12B", 361 | "outputId": "d5b7a7c7-0970-45ef-898e-283abfe787a6" 362 | }, 363 | "outputs": [ 364 | { 365 | "name": "stdout", 366 | "output_type": "stream", 367 | "text": [ 368 | "Train for 46 steps\n", 369 | "Epoch 1/5\n", 370 | "46/46 [==============================] - 68s 1s/step - loss: 1.7191 - acc: 0.5182\n", 371 | "Epoch 2/5\n", 372 | "46/46 [==============================] - 19s 418ms/step - loss: 0.6449 - acc: 0.7894\n", 373 | "Epoch 3/5\n", 374 | "46/46 [==============================] - 20s 429ms/step - loss: 0.3809 - acc: 0.8799\n", 375 | "Epoch 4/5\n", 376 | "46/46 [==============================] - 18s 400ms/step - loss: 0.2068 - acc: 0.9315\n", 377 | "Epoch 5/5\n", 378 | "46/46 [==============================] - 19s 405ms/step - loss: 0.1184 - acc: 0.9655\n" 379 | ] 380 | }, 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "" 385 | ] 386 | }, 387 | "execution_count": 13, 388 | "metadata": { 389 | "tags": [] 390 | }, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "model.fit(train_dataset, steps_per_epoch=steps_per_epoch,\n", 396 | " epochs=5, callbacks=[time_callback], verbose=1)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "colab": {}, 404 | "colab_type": "code", 405 | "id": "21CjKwKsjIiy", 406 | "outputId": "9f626740-351a-4ce2-d529-0a4c7308d92f" 407 | }, 408 | "outputs": [ 409 | { 410 | "name": "stdout", 411 | "output_type": "stream", 412 | "text": [ 413 | "Peak Img/s: 200.0\n" 414 | ] 415 | } 416 | ], 417 | "source": [ 418 | "epoch_time = min(time_callback.times)\n", 419 | "img_per_sec = num_train//epoch_time\n", 420 | "\n", 421 | "print(\"Peak Img/s:\", img_per_sec)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": { 428 | "colab": {}, 429 | "colab_type": "code", 430 | "id": "FeP0nP1Em52O" 431 | }, 432 | "outputs": [], 433 | "source": [] 434 | } 435 | ], 436 | "metadata": { 437 | "accelerator": "GPU", 438 | "colab": { 439 | "collapsed_sections": [ 440 | "laUXS-24UvPM" 441 | ], 442 | "name": "tf_pet_solution.ipynb", 443 | "provenance": [], 444 | "toc_visible": true 445 | }, 446 | "kernelspec": { 447 | "display_name": "Python 3", 448 | "language": "python", 449 | "name": "python3" 450 | }, 451 | "language_info": { 452 | "codemirror_mode": { 453 | "name": "ipython", 454 | "version": 3 455 | }, 456 | "file_extension": ".py", 457 | "mimetype": "text/x-python", 458 | "name": "python", 459 | "nbconvert_exporter": "python", 460 | "pygments_lexer": "ipython3", 461 | "version": "3.6.7" 462 | } 463 | }, 464 | "nbformat": 4, 465 | "nbformat_minor": 4 466 | } 467 | -------------------------------------------------------------------------------- /solutions/tf_transformer_solution.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.7"},"colab":{"name":"tf_transformer_solution.ipynb","provenance":[],"collapsed_sections":["NWGze6qCm6Pu"],"toc_visible":true},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"NWGze6qCm6Pu","colab_type":"text"},"source":["## Setup\n","\n","This section contains supplementary information, functions, and installs required packages."]},{"cell_type":"code","metadata":{"id":"2Z_LEdIbm6Pv","colab_type":"code","colab":{}},"source":["!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat transformers -Uq"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FWiixEDBm6Px","colab_type":"text"},"source":["**About**\n","\n","\n","\n","This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n","\n","**Acknowledgements**\n","\n","* This notebook uses some materials adapted from TensorFlow documentation.\n","* This notebook uses the [HuggingFace Transformers library](https://github.com/huggingface/transformers).\n","* This notebook uses the [GLUE (MRPC) Dataset](https://gluebenchmark.com/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/glue)).\n","\n","**Dataset Citation**\n","\n","```\n","@inproceedings{wang2019glue,\n"," title={ {GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},\n"," author={Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},\n"," note={In the Proceedings of ICLR.},\n"," year={2019}\n","}\n","```"]},{"cell_type":"code","metadata":{"id":"got-iOfem6Py","colab_type":"code","colab":{}},"source":["import tensorflow.compat.v2 as tf\n","import tensorflow_datasets"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"LJMHzkWQm6P0","colab_type":"code","colab":{}},"source":["import time\n","\n","class TimeHistory(tf.keras.callbacks.Callback):\n"," def on_train_begin(self, logs={}):\n"," self.times = []\n"," def on_epoch_begin(self, epoch, logs={}):\n"," self.epoch_time_start = time.time()\n"," def on_epoch_end(self, epoch, logs={}):\n"," self.times.append(time.time() - self.epoch_time_start)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"bpOFAo4Km6P2","colab_type":"text"},"source":["# Sequence Classification with BERT in TF 2.0"]},{"cell_type":"code","metadata":{"id":"WufuRjQxp9h_","colab_type":"code","colab":{}},"source":["!gpustat"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Sr25k7aNm6P2","colab_type":"code","colab":{}},"source":["# enable XLA\n","tf.config.optimizer.set_jit(True)\n","\n","# enable AMP via tf.config\n","tf.config.optimizer.set_experimental_options({\"auto_mixed_precision\": True})"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"V1Lz61HGm6P4","colab_type":"text"},"source":["## Load BERT Tokenizer"]},{"cell_type":"code","metadata":{"id":"tjMRDhSxm6P5","colab_type":"code","colab":{}},"source":["from transformers import BertTokenizer\n","\n","tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"saAtsXk-m6P7","colab_type":"text"},"source":["## Input Pipeline"]},{"cell_type":"markdown","metadata":{"id":"cvmttt3mm6P7","colab_type":"text"},"source":["### Load Dataset"]},{"cell_type":"code","metadata":{"id":"FyaLd5x7m6P8","colab_type":"code","outputId":"a349e397-a488-405c-d6bf-53562f87f377","colab":{}},"source":["data, info = tensorflow_datasets.load(\"glue/mrpc\", with_info=True)\n","\n","train_examples = info.splits[\"train\"].num_examples\n","valid_examples = info.splits[\"validation\"].num_examples"],"execution_count":0,"outputs":[{"output_type":"stream","text":["INFO:absl:Overwrite dataset info from restored data version.\n","INFO:absl:Reusing dataset glue (/home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2)\n","INFO:absl:Constructing tf.data.Dataset for split None, from /home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2\n","WARNING:absl:Warning: Setting shuffle_files=True because split=TRAIN and shuffle_files=None. This behavior will be deprecated on 2019-08-06, at which point shuffle_files=False will be the default for all splits.\n"],"name":"stderr"}]},{"cell_type":"markdown","metadata":{"id":"KCMkgYi5m6P-","colab_type":"text"},"source":["## Build Input Pipeline"]},{"cell_type":"code","metadata":{"id":"a-tl2kZEm6P-","colab_type":"code","colab":{}},"source":["from transformers import glue_convert_examples_to_features\n","\n","BATCH_SIZE = 40\n","\n","# Prepare dataset for GLUE as a tf.data.Dataset instance\n","train_dataset = glue_convert_examples_to_features(data[\"train\"], tokenizer, 128, \"mrpc\")\n","train_dataset = train_dataset.shuffle(512).batch(BATCH_SIZE).repeat(-1).prefetch(8)\n","\n","valid_dataset = glue_convert_examples_to_features(data[\"validation\"], tokenizer, 128, \"mrpc\")\n","valid_dataset = valid_dataset.batch(BATCH_SIZE)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JqgFrbdbm6QA","colab_type":"text"},"source":["## Build BERT Model"]},{"cell_type":"markdown","metadata":{"id":"3yh2myknm6QB","colab_type":"text"},"source":["### Load Pre-trained BERT Model"]},{"cell_type":"code","metadata":{"id":"xpEvDnxum6QB","colab_type":"code","colab":{}},"source":["from transformers import TFBertForSequenceClassification\n","\n","model = TFBertForSequenceClassification.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"ZvnCMjUzm6QD","colab_type":"code","colab":{}},"source":["opt = tf.keras.optimizers.Adam(learning_rate=3e-5)\n","# do loss scaling for optimizer\n","opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt, \"dynamic\")\n","\n","loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n","acc = tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")\n","model.compile(optimizer=opt,\n"," loss=loss,\n"," metrics=[acc])"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"TU_GlzZom6QF","colab_type":"text"},"source":["## Train BERT Model"]},{"cell_type":"code","metadata":{"id":"0p2POI4um6QF","colab_type":"code","colab":{}},"source":["time_callback = TimeHistory()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"oXXUn0sem6QH","colab_type":"code","outputId":"08bcdf53-a763-4ec1-bb69-0646efc5414a","colab":{}},"source":["history = model.fit(train_dataset, epochs=4, steps_per_epoch=train_examples//BATCH_SIZE,\n"," validation_data=valid_dataset, validation_steps=valid_examples//BATCH_SIZE,\n"," validation_freq=3, callbacks=[time_callback])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Train for 91 steps, validate for 10 steps\n","Epoch 1/4\n"],"name":"stdout"},{"output_type":"stream","text":["/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/indexed_slices.py:424: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n"," \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n","/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/indexed_slices.py:424: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n"," \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"],"name":"stderr"},{"output_type":"stream","text":["91/91 [==============================] - 101s 1s/step - loss: 0.6166 - accuracy: 0.6676\n","Epoch 2/4\n","91/91 [==============================] - 32s 354ms/step - loss: 0.4064 - accuracy: 0.8162\n","Epoch 3/4\n","91/91 [==============================] - 72s 787ms/step - loss: 0.2176 - accuracy: 0.9154 - val_loss: 0.5116 - val_accuracy: 0.8600\n","Epoch 4/4\n","91/91 [==============================] - 29s 315ms/step - loss: 0.0952 - accuracy: 0.9666\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Pzy-LYBNm6QJ","colab_type":"code","outputId":"97a91fe5-d23c-4131-dd4e-7483aa219e14","colab":{}},"source":["epoch_time = min(time_callback.times)\n","egs_per_sec = train_examples//epoch_time\n","\n","print(\"Peak Examples/s:\", egs_per_sec)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Peak Examples/s: 128.0\n"],"name":"stdout"}]}]} -------------------------------------------------------------------------------- /tf_pet_base.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "laUXS-24UvPM" 8 | }, 9 | "source": [ 10 | "## Setup\n", 11 | "\n", 12 | "This section contains supplementary information, functions, and installs required packages." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "colab": {}, 20 | "colab_type": "code", 21 | "id": "1Nn_XUbFUVah" 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat -Uq" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "colab_type": "text", 32 | "id": "mgLK3zv_Vsy8" 33 | }, 34 | "source": [ 35 | "**About**\n", 36 | "\n", 37 | "\n", 38 | "\n", 39 | "This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n", 40 | "\n", 41 | "**Acknowledgements**\n", 42 | "\n", 43 | "* This notebook uses some materials adapted from TensorFlow documentation.\n", 44 | "* This notebook uses the [Oxford IIT Pet Dataset](http://www.robots.ox.ac.uk/~vgg/data/pets/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/oxford_iiit_pet)).\n", 45 | "\n", 46 | "**Dataset Citation**\n", 47 | "\n", 48 | "```\n", 49 | "@InProceedings{parkhi12a,\n", 50 | " author = \"Parkhi, O. M. and Vedaldi, A. and Zisserman, A. and Jawahar, C.~V.\",\n", 51 | " title = \"Cats and Dogs\",\n", 52 | " booktitle = \"IEEE Conference on Computer Vision and Pattern Recognition\",\n", 53 | " year = \"2012\",\n", 54 | "}\n", 55 | "```" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "colab": { 63 | "base_uri": "https://localhost:8080/" 64 | }, 65 | "colab_type": "code", 66 | "executionInfo": { 67 | "elapsed": 7175, 68 | "status": "ok", 69 | "timestamp": 1570473634312, 70 | "user": { 71 | "displayName": "Timothy Liu SG", 72 | "photoUrl": "", 73 | "userId": "04327513636844080478" 74 | }, 75 | "user_tz": -480 76 | }, 77 | "id": "zNbGLsDSUe3W", 78 | "outputId": "4e551e00-c5a8-4d41-f6c2-7269f7941a68" 79 | }, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "TensorFlow version: 2.0.0\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "import multiprocessing\n", 91 | "\n", 92 | "import tensorflow\n", 93 | "print(\"TensorFlow version:\", tensorflow.__version__)\n", 94 | "\n", 95 | "import tensorflow.compat.v2 as tf\n", 96 | "import tensorflow_datasets as tfds" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "colab": {}, 104 | "colab_type": "code", 105 | "id": "VnKTCYVwmNCg" 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "import time\n", 110 | "\n", 111 | "class TimeHistory(tf.keras.callbacks.Callback):\n", 112 | " def on_train_begin(self, logs={}):\n", 113 | " self.times = []\n", 114 | " def on_epoch_begin(self, epoch, logs={}):\n", 115 | " self.epoch_time_start = time.time()\n", 116 | " def on_epoch_end(self, epoch, logs={}):\n", 117 | " self.times.append(time.time() - self.epoch_time_start)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": { 123 | "colab_type": "text", 124 | "id": "ObRNSvQMUxbI" 125 | }, 126 | "source": [ 127 | "# Pets Classification with TensorFlow" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "colab": { 135 | "base_uri": "https://localhost:8080/", 136 | "height": 51 137 | }, 138 | "colab_type": "code", 139 | "executionInfo": { 140 | "elapsed": 9897, 141 | "status": "ok", 142 | "timestamp": 1570473637050, 143 | "user": { 144 | "displayName": "Timothy Liu SG", 145 | "photoUrl": "", 146 | "userId": "04327513636844080478" 147 | }, 148 | "user_tz": -480 149 | }, 150 | "id": "KaI6MAuJPNDV", 151 | "outputId": "089e1e0d-0fef-4e15-c54d-62153d7f5f96" 152 | }, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "\u001b[1m\u001b[37mjupyter-admin \u001b[m Fri Oct 11 16:48:49 2019 \u001b[1m\u001b[30m410.104\u001b[m\n", 159 | "\u001b[36m[0]\u001b[m \u001b[34mTesla T4 \u001b[m |\u001b[31m 43'C\u001b[m, \u001b[32m 0 %\u001b[m | \u001b[36m\u001b[1m\u001b[33m 0\u001b[m / \u001b[33m15079\u001b[m MB |\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "!gpustat" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "colab": {}, 172 | "colab_type": "code", 173 | "id": "BCK57jlvNpOO" 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "import tensorflow.keras.layers as layers\n", 178 | "from tensorflow.keras.applications.resnet50 import ResNet50\n", 179 | "\n", 180 | "def create_model(img_size=(224,224), num_class=2, train_base=True):\n", 181 | " input_layer = layers.Input(shape=(img_size[0],img_size[1],3))\n", 182 | " base = ResNet50(input_tensor=input_layer,\n", 183 | " include_top=False,\n", 184 | " weights=\"imagenet\")\n", 185 | " base.trainable = train_base\n", 186 | " x = base.output\n", 187 | " x = layers.GlobalAveragePooling2D()(x)\n", 188 | " preds = layers.Dense(num_class, activation=\"softmax\")(x)\n", 189 | " return tf.keras.models.Model(inputs=input_layer, outputs=preds)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "colab": {}, 197 | "colab_type": "code", 198 | "id": "ICDyhaNCdNsm", 199 | "outputId": "d509178b-0855-4a0e-9ae6-3ec0b4b328ae" 200 | }, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "\u001b[1mDownloading and preparing dataset oxford_iiit_pet (801.24 MiB) to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0...\u001b[0m\n" 207 | ] 208 | }, 209 | { 210 | "data": { 211 | "application/vnd.jupyter.widget-view+json": { 212 | "model_id": "c696f88ca38f485cb2986f920fff3cc1", 213 | "version_major": 2, 214 | "version_minor": 0 215 | }, 216 | "text/plain": [ 217 | "HBox(children=(IntProgress(value=1, bar_style='info', description='Dl Completed...', max=1, style=ProgressStyl…" 218 | ] 219 | }, 220 | "metadata": { 221 | "tags": [] 222 | }, 223 | "output_type": "display_data" 224 | }, 225 | { 226 | "data": { 227 | "application/vnd.jupyter.widget-view+json": { 228 | "model_id": "4733f929cecc455894a222e7f688483a", 229 | "version_major": 2, 230 | "version_minor": 0 231 | }, 232 | "text/plain": [ 233 | "HBox(children=(IntProgress(value=1, bar_style='info', description='Dl Size...', max=1, style=ProgressStyle(des…" 234 | ] 235 | }, 236 | "metadata": { 237 | "tags": [] 238 | }, 239 | "output_type": "display_data" 240 | }, 241 | { 242 | "data": { 243 | "application/vnd.jupyter.widget-view+json": { 244 | "model_id": "762fa84f21684f3698ea3c9fbafed06b", 245 | "version_major": 2, 246 | "version_minor": 0 247 | }, 248 | "text/plain": [ 249 | "HBox(children=(IntProgress(value=1, bar_style='info', description='Extraction completed...', max=1, style=Prog…" 250 | ] 251 | }, 252 | "metadata": { 253 | "tags": [] 254 | }, 255 | "output_type": "display_data" 256 | }, 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "\n", 262 | "\n", 263 | "\n", 264 | "\n", 265 | "\n", 266 | "\n" 267 | ] 268 | }, 269 | { 270 | "data": { 271 | "application/vnd.jupyter.widget-view+json": { 272 | "model_id": "", 273 | "version_major": 2, 274 | "version_minor": 0 275 | }, 276 | "text/plain": [ 277 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))" 278 | ] 279 | }, 280 | "metadata": { 281 | "tags": [] 282 | }, 283 | "output_type": "display_data" 284 | }, 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "Shuffling and writing examples to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0.incompleteN3JX06/oxford_iiit_pet-train.tfrecord\n" 290 | ] 291 | }, 292 | { 293 | "data": { 294 | "application/vnd.jupyter.widget-view+json": { 295 | "model_id": "", 296 | "version_major": 2, 297 | "version_minor": 0 298 | }, 299 | "text/plain": [ 300 | "HBox(children=(IntProgress(value=0, max=3680), HTML(value='')))" 301 | ] 302 | }, 303 | "metadata": { 304 | "tags": [] 305 | }, 306 | "output_type": "display_data" 307 | }, 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "\r" 313 | ] 314 | }, 315 | { 316 | "data": { 317 | "application/vnd.jupyter.widget-view+json": { 318 | "model_id": "", 319 | "version_major": 2, 320 | "version_minor": 0 321 | }, 322 | "text/plain": [ 323 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))" 324 | ] 325 | }, 326 | "metadata": { 327 | "tags": [] 328 | }, 329 | "output_type": "display_data" 330 | }, 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "Shuffling and writing examples to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0.incompleteN3JX06/oxford_iiit_pet-test.tfrecord\n" 336 | ] 337 | }, 338 | { 339 | "data": { 340 | "application/vnd.jupyter.widget-view+json": { 341 | "model_id": "", 342 | "version_major": 2, 343 | "version_minor": 0 344 | }, 345 | "text/plain": [ 346 | "HBox(children=(IntProgress(value=0, max=3669), HTML(value='')))" 347 | ] 348 | }, 349 | "metadata": { 350 | "tags": [] 351 | }, 352 | "output_type": "display_data" 353 | }, 354 | { 355 | "name": "stdout", 356 | "output_type": "stream", 357 | "text": [ 358 | "\r" 359 | ] 360 | }, 361 | { 362 | "data": { 363 | "application/vnd.jupyter.widget-view+json": { 364 | "model_id": "7b21228390c643e8a1ba4e9e512627ff", 365 | "version_major": 2, 366 | "version_minor": 0 367 | }, 368 | "text/plain": [ 369 | "HBox(children=(IntProgress(value=0, description='Computing statistics...', max=2, style=ProgressStyle(descript…" 370 | ] 371 | }, 372 | "metadata": { 373 | "tags": [] 374 | }, 375 | "output_type": "display_data" 376 | }, 377 | { 378 | "data": { 379 | "application/vnd.jupyter.widget-view+json": { 380 | "model_id": "", 381 | "version_major": 2, 382 | "version_minor": 0 383 | }, 384 | "text/plain": [ 385 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))" 386 | ] 387 | }, 388 | "metadata": { 389 | "tags": [] 390 | }, 391 | "output_type": "display_data" 392 | }, 393 | { 394 | "name": "stderr", 395 | "output_type": "stream", 396 | "text": [ 397 | "WARNING:absl:Warning: Setting shuffle_files=True because split=TRAIN and shuffle_files=None. This behavior will be deprecated on 2019-08-06, at which point shuffle_files=False will be the default for all splits.\n" 398 | ] 399 | }, 400 | { 401 | "data": { 402 | "application/vnd.jupyter.widget-view+json": { 403 | "model_id": "", 404 | "version_major": 2, 405 | "version_minor": 0 406 | }, 407 | "text/plain": [ 408 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))" 409 | ] 410 | }, 411 | "metadata": { 412 | "tags": [] 413 | }, 414 | "output_type": "display_data" 415 | }, 416 | { 417 | "name": "stdout", 418 | "output_type": "stream", 419 | "text": [ 420 | "\n", 421 | "\u001b[1mDataset oxford_iiit_pet downloaded and prepared to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0. Subsequent calls will reuse this data.\u001b[0m\n" 422 | ] 423 | } 424 | ], 425 | "source": [ 426 | "(train_dataset, test_dataset), info = tfds.load(name=\"oxford_iiit_pet:3.*.*\",\n", 427 | " split=[\"train\", \"test\"],\n", 428 | " shuffle_files=True,\n", 429 | " as_supervised=True,\n", 430 | " with_info=True)\n", 431 | "\n", 432 | "num_class = info.features[\"label\"].num_classes\n", 433 | "num_train = info.splits[\"train\"].num_examples\n", 434 | "num_test = info.splits[\"test\"].num_examples" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "colab": {}, 442 | "colab_type": "code", 443 | "id": "MdWvubj3g2aO" 444 | }, 445 | "outputs": [], 446 | "source": [ 447 | "IMG_SIZE = (224, 224)\n", 448 | "\n", 449 | "@tf.function\n", 450 | "def format_train_example(image, label):\n", 451 | " image = tf.cast(image, tf.float32)\n", 452 | " image = (image/127.5) - 1\n", 453 | " image = tf.image.resize(image, IMG_SIZE)\n", 454 | " # perform image augmentation with tf.image\n", 455 | " image = tf.image.random_flip_left_right(image)\n", 456 | " image = tf.image.random_brightness(image, 0.1)\n", 457 | " return image, tf.one_hot(label, num_class)\n", 458 | "\n", 459 | "@tf.function\n", 460 | "def format_eval_example(image, label):\n", 461 | " image = tf.cast(image, tf.float32)\n", 462 | " image = (image/127.5) - 1\n", 463 | " image = tf.image.resize(image, IMG_SIZE)\n", 464 | " return image, tf.one_hot(label, num_class)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": { 471 | "colab": {}, 472 | "colab_type": "code", 473 | "id": "GbsEAoP8XKib" 474 | }, 475 | "outputs": [], 476 | "source": [ 477 | "BATCH_SIZE = 80\n", 478 | "N_THREADS = multiprocessing.cpu_count()\n", 479 | "PREFETCH_COUNT = 8\n", 480 | "\n", 481 | "train_dataset = train_dataset.shuffle(1024)\n", 482 | "train_dataset = train_dataset.repeat(-1)\n", 483 | "train_dataset = train_dataset.map(format_train_example,\n", 484 | " num_parallel_calls=N_THREADS)\n", 485 | "train_dataset = train_dataset.batch(BATCH_SIZE)\n", 486 | "train_dataset = train_dataset.prefetch(PREFETCH_COUNT)" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": { 493 | "colab": {}, 494 | "colab_type": "code", 495 | "id": "7-OAQB-0jb-r" 496 | }, 497 | "outputs": [], 498 | "source": [ 499 | "test_dataset = test_dataset.map(format_eval_example,\n", 500 | " num_parallel_calls=N_THREADS)\n", 501 | "test_dataset = test_dataset.repeat(-1)\n", 502 | "test_dataset = test_dataset.batch(BATCH_SIZE)" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": { 509 | "colab": { 510 | "base_uri": "https://localhost:8080/", 511 | "height": 51 512 | }, 513 | "colab_type": "code", 514 | "executionInfo": { 515 | "elapsed": 24373, 516 | "status": "ok", 517 | "timestamp": 1570473651568, 518 | "user": { 519 | "displayName": "Timothy Liu SG", 520 | "photoUrl": "", 521 | "userId": "04327513636844080478" 522 | }, 523 | "user_tz": -480 524 | }, 525 | "id": "YAwj90pGOIAy", 526 | "outputId": "8dfaabf3-1eab-402b-9897-42a0a45235a8" 527 | }, 528 | "outputs": [ 529 | { 530 | "name": "stdout", 531 | "output_type": "stream", 532 | "text": [ 533 | "Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5\n", 534 | "94773248/94765736 [==============================] - 6s 0us/step\n" 535 | ] 536 | } 537 | ], 538 | "source": [ 539 | "model = create_model(IMG_SIZE, num_class, train_base=True)\n", 540 | "opt = tf.keras.optimizers.Adam()\n", 541 | "\n", 542 | "model.compile(loss=\"categorical_crossentropy\",\n", 543 | " optimizer=opt,\n", 544 | " metrics=[\"acc\"])\n", 545 | "\n", 546 | "#model.summary()" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": { 553 | "colab": {}, 554 | "colab_type": "code", 555 | "id": "0vRtm2rORBHf" 556 | }, 557 | "outputs": [], 558 | "source": [ 559 | "steps_per_epoch = num_train//BATCH_SIZE\n", 560 | "steps_test = num_test//BATCH_SIZE\n", 561 | "\n", 562 | "time_callback = TimeHistory()" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": { 569 | "colab": { 570 | "base_uri": "https://localhost:8080/", 571 | "height": 153 572 | }, 573 | "colab_type": "code", 574 | "executionInfo": { 575 | "elapsed": 132772, 576 | "status": "ok", 577 | "timestamp": 1570473759978, 578 | "user": { 579 | "displayName": "Timothy Liu SG", 580 | "photoUrl": "", 581 | "userId": "04327513636844080478" 582 | }, 583 | "user_tz": -480 584 | }, 585 | "id": "7T8VVrn4Q12B", 586 | "outputId": "d5b7a7c7-0970-45ef-898e-283abfe787a6" 587 | }, 588 | "outputs": [ 589 | { 590 | "name": "stdout", 591 | "output_type": "stream", 592 | "text": [ 593 | "Train for 46 steps\n", 594 | "Epoch 1/5\n", 595 | "46/46 [==============================] - 63s 1s/step - loss: 1.8270 - acc: 0.4777\n", 596 | "Epoch 2/5\n", 597 | "46/46 [==============================] - 36s 773ms/step - loss: 0.6667 - acc: 0.7905\n", 598 | "Epoch 3/5\n", 599 | "46/46 [==============================] - 38s 832ms/step - loss: 0.3448 - acc: 0.8889\n", 600 | "Epoch 4/5\n", 601 | "46/46 [==============================] - 37s 807ms/step - loss: 0.2233 - acc: 0.9321\n", 602 | "Epoch 5/5\n", 603 | "46/46 [==============================] - 37s 812ms/step - loss: 0.1526 - acc: 0.9552\n" 604 | ] 605 | }, 606 | { 607 | "data": { 608 | "text/plain": [ 609 | "" 610 | ] 611 | }, 612 | "execution_count": 12, 613 | "metadata": { 614 | "tags": [] 615 | }, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "model.fit(train_dataset, steps_per_epoch=steps_per_epoch,\n", 621 | " epochs=5, callbacks=[time_callback], verbose=1)" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": null, 627 | "metadata": { 628 | "colab": {}, 629 | "colab_type": "code", 630 | "id": "21CjKwKsjIiy", 631 | "outputId": "f1b26bcc-8f6a-4ea2-b9ca-880d0a4f5901" 632 | }, 633 | "outputs": [ 634 | { 635 | "name": "stdout", 636 | "output_type": "stream", 637 | "text": [ 638 | "Peak Img/s: 103.0\n" 639 | ] 640 | } 641 | ], 642 | "source": [ 643 | "epoch_time = min(time_callback.times)\n", 644 | "img_per_sec = num_train//epoch_time\n", 645 | "\n", 646 | "print(\"Peak Img/s:\", img_per_sec)" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": null, 652 | "metadata": { 653 | "colab": {}, 654 | "colab_type": "code", 655 | "id": "kUJceK2mmNC5" 656 | }, 657 | "outputs": [], 658 | "source": [] 659 | } 660 | ], 661 | "metadata": { 662 | "accelerator": "GPU", 663 | "colab": { 664 | "collapsed_sections": [ 665 | "laUXS-24UvPM" 666 | ], 667 | "name": "tf_pet_base.ipynb", 668 | "provenance": [], 669 | "toc_visible": true 670 | }, 671 | "kernelspec": { 672 | "display_name": "Python 3", 673 | "language": "python", 674 | "name": "python3" 675 | }, 676 | "language_info": { 677 | "codemirror_mode": { 678 | "name": "ipython", 679 | "version": 3 680 | }, 681 | "file_extension": ".py", 682 | "mimetype": "text/x-python", 683 | "name": "python", 684 | "nbconvert_exporter": "python", 685 | "pygments_lexer": "ipython3", 686 | "version": "3.6.7" 687 | } 688 | }, 689 | "nbformat": 4, 690 | "nbformat_minor": 4 691 | } 692 | -------------------------------------------------------------------------------- /tf_transformer_base.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.7"},"colab":{"name":"tf_transformer_base.ipynb","provenance":[],"collapsed_sections":["UqslZYH8mhNU"],"toc_visible":true},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"UqslZYH8mhNU","colab_type":"text"},"source":["## Setup\n","\n","This section contains supplementary information, functions, and installs required packages."]},{"cell_type":"code","metadata":{"id":"gss2_uX1mhNV","colab_type":"code","colab":{}},"source":["!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat transformers -Uq"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8hPa-QqumhNY","colab_type":"text"},"source":["**About**\n","\n","\n","\n","This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n","\n","**Acknowledgements**\n","\n","* This notebook uses some materials adapted from TensorFlow documentation.\n","* This notebook uses the [HuggingFace Transformers library](https://github.com/huggingface/transformers).\n","* This notebook uses the [GLUE (MRPC) Dataset](https://gluebenchmark.com/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/glue)).\n","\n","**Dataset Citation**\n","\n","```\n","@inproceedings{wang2019glue,\n"," title={ {GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},\n"," author={Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},\n"," note={In the Proceedings of ICLR.},\n"," year={2019}\n","}\n","```"]},{"cell_type":"code","metadata":{"id":"5RTQHdpNmhNY","colab_type":"code","colab":{}},"source":["import tensorflow.compat.v2 as tf\n","import tensorflow_datasets"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"wm5qQqiLmhNa","colab_type":"code","colab":{}},"source":["import time\n","\n","class TimeHistory(tf.keras.callbacks.Callback):\n"," def on_train_begin(self, logs={}):\n"," self.times = []\n"," def on_epoch_begin(self, epoch, logs={}):\n"," self.epoch_time_start = time.time()\n"," def on_epoch_end(self, epoch, logs={}):\n"," self.times.append(time.time() - self.epoch_time_start)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"5DMm-1HmmhNc","colab_type":"text"},"source":["# Sequence Classification with BERT in TF 2.0"]},{"cell_type":"code","metadata":{"id":"ZM4spGMNp5wN","colab_type":"code","colab":{}},"source":["!gpustat"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WkWHOum3mhNe","colab_type":"text"},"source":["## Load BERT Tokenizer"]},{"cell_type":"code","metadata":{"id":"qqd1X8dxmhNf","colab_type":"code","colab":{}},"source":["from transformers import BertTokenizer\n","\n","tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"imu1KLWvmhNh","colab_type":"text"},"source":["## Input Pipeline"]},{"cell_type":"markdown","metadata":{"id":"nFFtBf6amhNh","colab_type":"text"},"source":["### Load Dataset"]},{"cell_type":"code","metadata":{"id":"WUAUMzVOmhNi","colab_type":"code","outputId":"40288796-f303-4af9-e905-40f3349c6d94","colab":{}},"source":["data, info = tensorflow_datasets.load(\"glue/mrpc\", with_info=True)\n","\n","train_examples = info.splits[\"train\"].num_examples\n","valid_examples = info.splits[\"validation\"].num_examples"],"execution_count":0,"outputs":[{"output_type":"stream","text":["INFO:absl:Overwrite dataset info from restored data version.\n","INFO:absl:Reusing dataset glue (/home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2)\n","INFO:absl:Constructing tf.data.Dataset for split None, from /home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2\n","WARNING:absl:Warning: Setting shuffle_files=True because split=TRAIN and shuffle_files=None. This behavior will be deprecated on 2019-08-06, at which point shuffle_files=False will be the default for all splits.\n"],"name":"stderr"}]},{"cell_type":"markdown","metadata":{"id":"LTZECdgxmhNl","colab_type":"text"},"source":["## Build Input Pipeline"]},{"cell_type":"code","metadata":{"id":"eBkLtX9bmhNl","colab_type":"code","colab":{}},"source":["from transformers import glue_convert_examples_to_features\n","\n","BATCH_SIZE = 32\n","\n","# Prepare dataset for GLUE as a tf.data.Dataset instance\n","train_dataset = glue_convert_examples_to_features(data[\"train\"], tokenizer, 128, \"mrpc\")\n","train_dataset = train_dataset.shuffle(512).batch(BATCH_SIZE).repeat(-1)\n","\n","valid_dataset = glue_convert_examples_to_features(data[\"validation\"], tokenizer, 128, \"mrpc\")\n","valid_dataset = valid_dataset.batch(BATCH_SIZE)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"vC_qPDcjmhNn","colab_type":"text"},"source":["## Build BERT Model"]},{"cell_type":"markdown","metadata":{"id":"3sAKlIGPmhNo","colab_type":"text"},"source":["### Load Pre-trained BERT Model"]},{"cell_type":"code","metadata":{"id":"vW2d3lctmhNo","colab_type":"code","colab":{}},"source":["from transformers import TFBertForSequenceClassification\n","\n","model = TFBertForSequenceClassification.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"k2qMgxExmhNq","colab_type":"code","colab":{}},"source":["opt = tf.keras.optimizers.Adam(learning_rate=3e-5)\n","\n","loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n","acc = tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")\n","model.compile(optimizer=opt,\n"," loss=loss,\n"," metrics=[acc])"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gNhNvzncmhNs","colab_type":"text"},"source":["## Train BERT Model"]},{"cell_type":"code","metadata":{"id":"it4pTXFAmhNt","colab_type":"code","colab":{}},"source":["time_callback = TimeHistory()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lWyY0SJHmhNu","colab_type":"code","outputId":"3fe4f64c-8eff-463f-9367-877abf335699","colab":{}},"source":["history = model.fit(train_dataset, epochs=4, steps_per_epoch=train_examples//BATCH_SIZE,\n"," validation_data=valid_dataset, validation_steps=valid_examples//BATCH_SIZE,\n"," validation_freq=3, callbacks=[time_callback])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Train for 114 steps, validate for 12 steps\n","Epoch 1/4\n","114/114 [==============================] - 116s 1s/step - loss: 0.5787 - accuracy: 0.7182\n","Epoch 2/4\n","114/114 [==============================] - 94s 828ms/step - loss: 0.3521 - accuracy: 0.8490\n","Epoch 3/4\n","114/114 [==============================] - 101s 886ms/step - loss: 0.1301 - accuracy: 0.9574 - val_loss: 0.4803 - val_accuracy: 0.8385\n","Epoch 4/4\n","114/114 [==============================] - 94s 828ms/step - loss: 0.0507 - accuracy: 0.9854\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IFXITNKamhNy","colab_type":"code","outputId":"ea9c70b0-adfe-4c34-cd01-69aca772ae8e","colab":{}},"source":["epoch_time = min(time_callback.times)\n","egs_per_sec = train_examples//epoch_time\n","\n","print(\"Peak Examples/s:\", egs_per_sec)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Peak Examples/s: 38.0\n"],"name":"stdout"}]}]} --------------------------------------------------------------------------------