├── .gitignore
├── .whitesource
├── LICENSE
├── README.md
├── check_gpu.ipynb
├── solutions
├── tf_pet_solution.ipynb
└── tf_transformer_solution.ipynb
├── tf_dataset_demo.ipynb
├── tf_pet_base.ipynb
└── tf_transformer_base.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 | "checkRunSettings": {
3 | "vulnerableCheckRunConclusionLevel": "failure"
4 | },
5 | "issueSettings": {
6 | "minSeverityLevel": "LOW"
7 | }
8 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 NVIDIA AI Technology Center
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyCon SG 2019 Tutorial: Optimizing TensorFlow Performance
2 |
3 |   
4 |
5 | This workshop content covers:
6 |
7 | * a brief introduction to deep learning and TensorFlow 2.0
8 | * using `tf.data` and TensorFlow Datasets
9 | * XLA compiler and Automatic Mixed Precision (AMP)
10 | * speeding up CNN (ResNet-50) with XLA and AMP
11 | * speeding up Transformer (BERT) with XLA and AMP
12 |
13 | For a quick guide to using Automatic Mixed Precision, check out this [TLDR](https://drive.google.com/open?id=1Nz2438DBQS591kHha2ENL7VBhmBaXQ_loQVi3rywRVU).
14 |
15 | ## Content
16 |
17 | **Slides** are in this [Google Drive folder](https://drive.google.com/open?id=1RR0UhnvJ3PHL4sGRe2du4_w66Kg9KNVr).
18 |
19 | **Notebooks**
20 |
21 | | Notebook | Link | Solution |
22 | | ------------------------------ | ---- | -------- |
23 | | TensorFlow Dataset & tf.data | [](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/tf_dataset_demo.ipynb) | |
24 | | Pet Classification with TF 2.0 | [](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/tf_pet_base.ipynb) | [](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/solutions/tf_pet_solution.ipynb) |
25 | | Transformers with TF 2.0 | [](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/tf_transformer_base.ipynb) | [](https://colab.research.google.com/github/NVAITC/pycon-sg19-tensorflow-tutorial/blob/master/solutions/tf_transformer_solution.ipynb) |
26 |
27 | For those running the notebooks on the workshop JupyterHub or on your own hardware, you can clone this repository.
28 |
29 | ```shell
30 | git clone https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial
31 | ```
32 |
33 | ## Workshop Information
34 |
35 | **In-person @ PyCon SG 2019**
36 |
37 | * Attend the workshop 10am to 1pm on Saturday, October 12 at [Republic Polytechnic](https://pycon.sg/venue/).
38 | * Get your tickets [here](https://www.eventnook.com/event/pyconsingapore2019/).
39 |
--------------------------------------------------------------------------------
/check_gpu.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Checking for a GPU\n",
8 | "\n",
9 | "To check for a GPU, run the `nvidia-smi` command in a linux shell."
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "name": "stdout",
19 | "output_type": "stream",
20 | "text": [
21 | "Fri Oct 11 16:48:22 2019 \n",
22 | "+-----------------------------------------------------------------------------+\n",
23 | "| NVIDIA-SMI 410.104 Driver Version: 410.104 CUDA Version: 10.0 |\n",
24 | "|-------------------------------+----------------------+----------------------+\n",
25 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
26 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
27 | "|===============================+======================+======================|\n",
28 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
29 | "| N/A 44C P8 10W / 70W | 0MiB / 15079MiB | 0% Default |\n",
30 | "+-------------------------------+----------------------+----------------------+\n",
31 | " \n",
32 | "+-----------------------------------------------------------------------------+\n",
33 | "| Processes: GPU Memory |\n",
34 | "| GPU PID Type Process name Usage |\n",
35 | "|=============================================================================|\n",
36 | "| No running processes found |\n",
37 | "+-----------------------------------------------------------------------------+\n"
38 | ]
39 | }
40 | ],
41 | "source": [
42 | "!nvidia-smi"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": []
51 | }
52 | ],
53 | "metadata": {
54 | "kernelspec": {
55 | "display_name": "Python 3",
56 | "language": "python",
57 | "name": "python3"
58 | },
59 | "language_info": {
60 | "codemirror_mode": {
61 | "name": "ipython",
62 | "version": 3
63 | },
64 | "file_extension": ".py",
65 | "mimetype": "text/x-python",
66 | "name": "python",
67 | "nbconvert_exporter": "python",
68 | "pygments_lexer": "ipython3",
69 | "version": "3.6.7"
70 | }
71 | },
72 | "nbformat": 4,
73 | "nbformat_minor": 4
74 | }
75 |
--------------------------------------------------------------------------------
/solutions/tf_pet_solution.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "laUXS-24UvPM"
8 | },
9 | "source": [
10 | "## Setup\n",
11 | "\n",
12 | "This section contains supplementary information, functions, and installs required packages."
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {
19 | "colab": {},
20 | "colab_type": "code",
21 | "id": "1Nn_XUbFUVah"
22 | },
23 | "outputs": [],
24 | "source": [
25 | "!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat -Uq"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "colab_type": "text",
32 | "id": "mgLK3zv_Vsy8"
33 | },
34 | "source": [
35 | "**About**\n",
36 | "\n",
37 | "
\n",
38 | "\n",
39 | "This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n",
40 | "\n",
41 | "**Acknowledgements**\n",
42 | "\n",
43 | "* This notebook uses some materials adapted from TensorFlow documentation.\n",
44 | "* This notebook uses the [Oxford IIT Pet Dataset](http://www.robots.ox.ac.uk/~vgg/data/pets/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/oxford_iiit_pet)).\n",
45 | "\n",
46 | "**Dataset Citation**\n",
47 | "\n",
48 | "```\n",
49 | "@InProceedings{parkhi12a,\n",
50 | " author = \"Parkhi, O. M. and Vedaldi, A. and Zisserman, A. and Jawahar, C.~V.\",\n",
51 | " title = \"Cats and Dogs\",\n",
52 | " booktitle = \"IEEE Conference on Computer Vision and Pattern Recognition\",\n",
53 | " year = \"2012\",\n",
54 | "}\n",
55 | "```"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "colab": {
63 | "base_uri": "https://localhost:8080/"
64 | },
65 | "colab_type": "code",
66 | "executionInfo": {
67 | "elapsed": 7175,
68 | "status": "ok",
69 | "timestamp": 1570473634312,
70 | "user": {
71 | "displayName": "Timothy Liu SG",
72 | "photoUrl": "",
73 | "userId": "04327513636844080478"
74 | },
75 | "user_tz": -480
76 | },
77 | "id": "zNbGLsDSUe3W",
78 | "outputId": "4e551e00-c5a8-4d41-f6c2-7269f7941a68"
79 | },
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "TensorFlow version: 2.0.0\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "import multiprocessing\n",
91 | "\n",
92 | "import tensorflow\n",
93 | "print(\"TensorFlow version:\", tensorflow.__version__)\n",
94 | "\n",
95 | "import tensorflow.compat.v2 as tf\n",
96 | "import tensorflow_datasets as tfds"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {
103 | "colab": {},
104 | "colab_type": "code",
105 | "id": "kakmnWusm51w"
106 | },
107 | "outputs": [],
108 | "source": [
109 | "import time\n",
110 | "\n",
111 | "class TimeHistory(tf.keras.callbacks.Callback):\n",
112 | " def on_train_begin(self, logs={}):\n",
113 | " self.times = []\n",
114 | " def on_epoch_begin(self, epoch, logs={}):\n",
115 | " self.epoch_time_start = time.time()\n",
116 | " def on_epoch_end(self, epoch, logs={}):\n",
117 | " self.times.append(time.time() - self.epoch_time_start)"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {
123 | "colab_type": "text",
124 | "id": "ObRNSvQMUxbI"
125 | },
126 | "source": [
127 | "# Pets Classification with TensorFlow"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {
134 | "colab": {
135 | "base_uri": "https://localhost:8080/",
136 | "height": 51
137 | },
138 | "colab_type": "code",
139 | "executionInfo": {
140 | "elapsed": 9897,
141 | "status": "ok",
142 | "timestamp": 1570473637050,
143 | "user": {
144 | "displayName": "Timothy Liu SG",
145 | "photoUrl": "",
146 | "userId": "04327513636844080478"
147 | },
148 | "user_tz": -480
149 | },
150 | "id": "KaI6MAuJPNDV",
151 | "outputId": "089e1e0d-0fef-4e15-c54d-62153d7f5f96"
152 | },
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "\u001b[1m\u001b[37mjupyter-admin \u001b[m Fri Oct 11 16:56:31 2019 \u001b[1m\u001b[30m410.104\u001b[m\n",
159 | "\u001b[36m[0]\u001b[m \u001b[34mTesla T4 \u001b[m |\u001b[1m\u001b[31m 66'C\u001b[m, \u001b[32m 0 %\u001b[m | \u001b[36m\u001b[1m\u001b[33m 0\u001b[m / \u001b[33m15079\u001b[m MB |\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "!gpustat"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "metadata": {
171 | "colab": {},
172 | "colab_type": "code",
173 | "id": "-TiQp7Apm516"
174 | },
175 | "outputs": [],
176 | "source": [
177 | "# enable XLA\n",
178 | "tf.config.optimizer.set_jit(True)\n",
179 | "\n",
180 | "# enable AMP\n",
181 | "tf.keras.mixed_precision.experimental.set_policy('mixed_float16')"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {
188 | "colab": {},
189 | "colab_type": "code",
190 | "id": "BCK57jlvNpOO"
191 | },
192 | "outputs": [],
193 | "source": [
194 | "import tensorflow.keras.layers as layers\n",
195 | "from tensorflow.keras.applications.resnet50 import ResNet50\n",
196 | "\n",
197 | "def create_model(img_size=(224,224), num_class=2, train_base=True):\n",
198 | " # accept float16 image inputs\n",
199 | " input_layer = layers.Input(shape=(img_size[0],img_size[1],3), dtype=tf.float16)\n",
200 | " base = ResNet50(input_tensor=input_layer,\n",
201 | " include_top=False,\n",
202 | " weights=\"imagenet\")\n",
203 | " base.trainable = train_base\n",
204 | " x = base.output\n",
205 | " x = layers.GlobalAveragePooling2D()(x)\n",
206 | " # softmax only accepts float32 - need to manually cast (likely a bug)\n",
207 | " preds = layers.Dense(num_class, activation=\"softmax\", dtype=tf.float32)(x)\n",
208 | " return tf.keras.models.Model(inputs=input_layer, outputs=preds)"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "colab": {},
216 | "colab_type": "code",
217 | "id": "ICDyhaNCdNsm"
218 | },
219 | "outputs": [],
220 | "source": [
221 | "(train_dataset, test_dataset), info = tfds.load(name=\"oxford_iiit_pet:3.*.*\",\n",
222 | " split=[\"train\", \"test\"],\n",
223 | " shuffle_files=True,\n",
224 | " as_supervised=True,\n",
225 | " with_info=True)\n",
226 | "\n",
227 | "num_class = info.features[\"label\"].num_classes\n",
228 | "num_train = info.splits[\"train\"].num_examples\n",
229 | "num_test = info.splits[\"test\"].num_examples"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {
236 | "colab": {},
237 | "colab_type": "code",
238 | "id": "MdWvubj3g2aO"
239 | },
240 | "outputs": [],
241 | "source": [
242 | "IMG_SIZE = (224, 224)\n",
243 | "\n",
244 | "@tf.function\n",
245 | "def format_train_example(image, label):\n",
246 | " image = tf.cast(image, tf.float32)\n",
247 | " image = (image/127.5) - 1\n",
248 | " image = tf.image.resize(image, IMG_SIZE)\n",
249 | " # perform image augmentation with tf.image\n",
250 | " image = tf.image.random_flip_left_right(image)\n",
251 | " image = tf.image.random_brightness(image, 0.1)\n",
252 | " # return images as float16\n",
253 | " image = tf.cast(image, tf.float16)\n",
254 | " return image, tf.one_hot(label, num_class)\n",
255 | "\n",
256 | "@tf.function\n",
257 | "def format_eval_example(image, label):\n",
258 | " image = tf.cast(image, tf.float32)\n",
259 | " image = (image/127.5) - 1\n",
260 | " image = tf.image.resize(image, IMG_SIZE)\n",
261 | " # return images as float16\n",
262 | " image = tf.cast(image, tf.float16)\n",
263 | " return image, tf.one_hot(label, num_class)"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": null,
269 | "metadata": {
270 | "colab": {},
271 | "colab_type": "code",
272 | "id": "GbsEAoP8XKib"
273 | },
274 | "outputs": [],
275 | "source": [
276 | "BATCH_SIZE = 80\n",
277 | "N_THREADS = multiprocessing.cpu_count()\n",
278 | "PREFETCH_COUNT = 8\n",
279 | "\n",
280 | "train_dataset = train_dataset.shuffle(1024)\n",
281 | "train_dataset = train_dataset.repeat(-1)\n",
282 | "train_dataset = train_dataset.map(format_train_example,\n",
283 | " num_parallel_calls=N_THREADS)\n",
284 | "train_dataset = train_dataset.batch(BATCH_SIZE)\n",
285 | "train_dataset = train_dataset.prefetch(PREFETCH_COUNT)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {
292 | "colab": {},
293 | "colab_type": "code",
294 | "id": "7-OAQB-0jb-r"
295 | },
296 | "outputs": [],
297 | "source": [
298 | "test_dataset = test_dataset.map(format_eval_example,\n",
299 | " num_parallel_calls=N_THREADS)\n",
300 | "test_dataset = test_dataset.repeat(-1)\n",
301 | "test_dataset = test_dataset.batch(BATCH_SIZE)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": null,
307 | "metadata": {
308 | "colab": {},
309 | "colab_type": "code",
310 | "id": "YAwj90pGOIAy"
311 | },
312 | "outputs": [],
313 | "source": [
314 | "model = create_model(IMG_SIZE, num_class, train_base=True)\n",
315 | "opt = tf.keras.optimizers.Adam()\n",
316 | "\n",
317 | "model.compile(loss=\"categorical_crossentropy\",\n",
318 | " optimizer=opt,\n",
319 | " metrics=[\"acc\"])\n",
320 | "\n",
321 | "#model.summary()"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": null,
327 | "metadata": {
328 | "colab": {},
329 | "colab_type": "code",
330 | "id": "0vRtm2rORBHf"
331 | },
332 | "outputs": [],
333 | "source": [
334 | "steps_per_epoch = num_train//BATCH_SIZE\n",
335 | "steps_test = num_test//BATCH_SIZE\n",
336 | "\n",
337 | "time_callback = TimeHistory()"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {
344 | "colab": {
345 | "base_uri": "https://localhost:8080/",
346 | "height": 153
347 | },
348 | "colab_type": "code",
349 | "executionInfo": {
350 | "elapsed": 132772,
351 | "status": "ok",
352 | "timestamp": 1570473759978,
353 | "user": {
354 | "displayName": "Timothy Liu SG",
355 | "photoUrl": "",
356 | "userId": "04327513636844080478"
357 | },
358 | "user_tz": -480
359 | },
360 | "id": "7T8VVrn4Q12B",
361 | "outputId": "d5b7a7c7-0970-45ef-898e-283abfe787a6"
362 | },
363 | "outputs": [
364 | {
365 | "name": "stdout",
366 | "output_type": "stream",
367 | "text": [
368 | "Train for 46 steps\n",
369 | "Epoch 1/5\n",
370 | "46/46 [==============================] - 68s 1s/step - loss: 1.7191 - acc: 0.5182\n",
371 | "Epoch 2/5\n",
372 | "46/46 [==============================] - 19s 418ms/step - loss: 0.6449 - acc: 0.7894\n",
373 | "Epoch 3/5\n",
374 | "46/46 [==============================] - 20s 429ms/step - loss: 0.3809 - acc: 0.8799\n",
375 | "Epoch 4/5\n",
376 | "46/46 [==============================] - 18s 400ms/step - loss: 0.2068 - acc: 0.9315\n",
377 | "Epoch 5/5\n",
378 | "46/46 [==============================] - 19s 405ms/step - loss: 0.1184 - acc: 0.9655\n"
379 | ]
380 | },
381 | {
382 | "data": {
383 | "text/plain": [
384 | ""
385 | ]
386 | },
387 | "execution_count": 13,
388 | "metadata": {
389 | "tags": []
390 | },
391 | "output_type": "execute_result"
392 | }
393 | ],
394 | "source": [
395 | "model.fit(train_dataset, steps_per_epoch=steps_per_epoch,\n",
396 | " epochs=5, callbacks=[time_callback], verbose=1)"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {
403 | "colab": {},
404 | "colab_type": "code",
405 | "id": "21CjKwKsjIiy",
406 | "outputId": "9f626740-351a-4ce2-d529-0a4c7308d92f"
407 | },
408 | "outputs": [
409 | {
410 | "name": "stdout",
411 | "output_type": "stream",
412 | "text": [
413 | "Peak Img/s: 200.0\n"
414 | ]
415 | }
416 | ],
417 | "source": [
418 | "epoch_time = min(time_callback.times)\n",
419 | "img_per_sec = num_train//epoch_time\n",
420 | "\n",
421 | "print(\"Peak Img/s:\", img_per_sec)"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": null,
427 | "metadata": {
428 | "colab": {},
429 | "colab_type": "code",
430 | "id": "FeP0nP1Em52O"
431 | },
432 | "outputs": [],
433 | "source": []
434 | }
435 | ],
436 | "metadata": {
437 | "accelerator": "GPU",
438 | "colab": {
439 | "collapsed_sections": [
440 | "laUXS-24UvPM"
441 | ],
442 | "name": "tf_pet_solution.ipynb",
443 | "provenance": [],
444 | "toc_visible": true
445 | },
446 | "kernelspec": {
447 | "display_name": "Python 3",
448 | "language": "python",
449 | "name": "python3"
450 | },
451 | "language_info": {
452 | "codemirror_mode": {
453 | "name": "ipython",
454 | "version": 3
455 | },
456 | "file_extension": ".py",
457 | "mimetype": "text/x-python",
458 | "name": "python",
459 | "nbconvert_exporter": "python",
460 | "pygments_lexer": "ipython3",
461 | "version": "3.6.7"
462 | }
463 | },
464 | "nbformat": 4,
465 | "nbformat_minor": 4
466 | }
467 |
--------------------------------------------------------------------------------
/solutions/tf_transformer_solution.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.7"},"colab":{"name":"tf_transformer_solution.ipynb","provenance":[],"collapsed_sections":["NWGze6qCm6Pu"],"toc_visible":true},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"NWGze6qCm6Pu","colab_type":"text"},"source":["## Setup\n","\n","This section contains supplementary information, functions, and installs required packages."]},{"cell_type":"code","metadata":{"id":"2Z_LEdIbm6Pv","colab_type":"code","colab":{}},"source":["!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat transformers -Uq"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FWiixEDBm6Px","colab_type":"text"},"source":["**About**\n","\n","
\n","\n","This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n","\n","**Acknowledgements**\n","\n","* This notebook uses some materials adapted from TensorFlow documentation.\n","* This notebook uses the [HuggingFace Transformers library](https://github.com/huggingface/transformers).\n","* This notebook uses the [GLUE (MRPC) Dataset](https://gluebenchmark.com/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/glue)).\n","\n","**Dataset Citation**\n","\n","```\n","@inproceedings{wang2019glue,\n"," title={ {GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},\n"," author={Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},\n"," note={In the Proceedings of ICLR.},\n"," year={2019}\n","}\n","```"]},{"cell_type":"code","metadata":{"id":"got-iOfem6Py","colab_type":"code","colab":{}},"source":["import tensorflow.compat.v2 as tf\n","import tensorflow_datasets"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"LJMHzkWQm6P0","colab_type":"code","colab":{}},"source":["import time\n","\n","class TimeHistory(tf.keras.callbacks.Callback):\n"," def on_train_begin(self, logs={}):\n"," self.times = []\n"," def on_epoch_begin(self, epoch, logs={}):\n"," self.epoch_time_start = time.time()\n"," def on_epoch_end(self, epoch, logs={}):\n"," self.times.append(time.time() - self.epoch_time_start)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"bpOFAo4Km6P2","colab_type":"text"},"source":["# Sequence Classification with BERT in TF 2.0"]},{"cell_type":"code","metadata":{"id":"WufuRjQxp9h_","colab_type":"code","colab":{}},"source":["!gpustat"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Sr25k7aNm6P2","colab_type":"code","colab":{}},"source":["# enable XLA\n","tf.config.optimizer.set_jit(True)\n","\n","# enable AMP via tf.config\n","tf.config.optimizer.set_experimental_options({\"auto_mixed_precision\": True})"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"V1Lz61HGm6P4","colab_type":"text"},"source":["## Load BERT Tokenizer"]},{"cell_type":"code","metadata":{"id":"tjMRDhSxm6P5","colab_type":"code","colab":{}},"source":["from transformers import BertTokenizer\n","\n","tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"saAtsXk-m6P7","colab_type":"text"},"source":["## Input Pipeline"]},{"cell_type":"markdown","metadata":{"id":"cvmttt3mm6P7","colab_type":"text"},"source":["### Load Dataset"]},{"cell_type":"code","metadata":{"id":"FyaLd5x7m6P8","colab_type":"code","outputId":"a349e397-a488-405c-d6bf-53562f87f377","colab":{}},"source":["data, info = tensorflow_datasets.load(\"glue/mrpc\", with_info=True)\n","\n","train_examples = info.splits[\"train\"].num_examples\n","valid_examples = info.splits[\"validation\"].num_examples"],"execution_count":0,"outputs":[{"output_type":"stream","text":["INFO:absl:Overwrite dataset info from restored data version.\n","INFO:absl:Reusing dataset glue (/home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2)\n","INFO:absl:Constructing tf.data.Dataset for split None, from /home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2\n","WARNING:absl:Warning: Setting shuffle_files=True because split=TRAIN and shuffle_files=None. This behavior will be deprecated on 2019-08-06, at which point shuffle_files=False will be the default for all splits.\n"],"name":"stderr"}]},{"cell_type":"markdown","metadata":{"id":"KCMkgYi5m6P-","colab_type":"text"},"source":["## Build Input Pipeline"]},{"cell_type":"code","metadata":{"id":"a-tl2kZEm6P-","colab_type":"code","colab":{}},"source":["from transformers import glue_convert_examples_to_features\n","\n","BATCH_SIZE = 40\n","\n","# Prepare dataset for GLUE as a tf.data.Dataset instance\n","train_dataset = glue_convert_examples_to_features(data[\"train\"], tokenizer, 128, \"mrpc\")\n","train_dataset = train_dataset.shuffle(512).batch(BATCH_SIZE).repeat(-1).prefetch(8)\n","\n","valid_dataset = glue_convert_examples_to_features(data[\"validation\"], tokenizer, 128, \"mrpc\")\n","valid_dataset = valid_dataset.batch(BATCH_SIZE)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JqgFrbdbm6QA","colab_type":"text"},"source":["## Build BERT Model"]},{"cell_type":"markdown","metadata":{"id":"3yh2myknm6QB","colab_type":"text"},"source":["### Load Pre-trained BERT Model"]},{"cell_type":"code","metadata":{"id":"xpEvDnxum6QB","colab_type":"code","colab":{}},"source":["from transformers import TFBertForSequenceClassification\n","\n","model = TFBertForSequenceClassification.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"ZvnCMjUzm6QD","colab_type":"code","colab":{}},"source":["opt = tf.keras.optimizers.Adam(learning_rate=3e-5)\n","# do loss scaling for optimizer\n","opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt, \"dynamic\")\n","\n","loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n","acc = tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")\n","model.compile(optimizer=opt,\n"," loss=loss,\n"," metrics=[acc])"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"TU_GlzZom6QF","colab_type":"text"},"source":["## Train BERT Model"]},{"cell_type":"code","metadata":{"id":"0p2POI4um6QF","colab_type":"code","colab":{}},"source":["time_callback = TimeHistory()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"oXXUn0sem6QH","colab_type":"code","outputId":"08bcdf53-a763-4ec1-bb69-0646efc5414a","colab":{}},"source":["history = model.fit(train_dataset, epochs=4, steps_per_epoch=train_examples//BATCH_SIZE,\n"," validation_data=valid_dataset, validation_steps=valid_examples//BATCH_SIZE,\n"," validation_freq=3, callbacks=[time_callback])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Train for 91 steps, validate for 10 steps\n","Epoch 1/4\n"],"name":"stdout"},{"output_type":"stream","text":["/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/indexed_slices.py:424: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n"," \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n","/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/indexed_slices.py:424: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n"," \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"],"name":"stderr"},{"output_type":"stream","text":["91/91 [==============================] - 101s 1s/step - loss: 0.6166 - accuracy: 0.6676\n","Epoch 2/4\n","91/91 [==============================] - 32s 354ms/step - loss: 0.4064 - accuracy: 0.8162\n","Epoch 3/4\n","91/91 [==============================] - 72s 787ms/step - loss: 0.2176 - accuracy: 0.9154 - val_loss: 0.5116 - val_accuracy: 0.8600\n","Epoch 4/4\n","91/91 [==============================] - 29s 315ms/step - loss: 0.0952 - accuracy: 0.9666\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Pzy-LYBNm6QJ","colab_type":"code","outputId":"97a91fe5-d23c-4131-dd4e-7483aa219e14","colab":{}},"source":["epoch_time = min(time_callback.times)\n","egs_per_sec = train_examples//epoch_time\n","\n","print(\"Peak Examples/s:\", egs_per_sec)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Peak Examples/s: 128.0\n"],"name":"stdout"}]}]}
--------------------------------------------------------------------------------
/tf_pet_base.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "laUXS-24UvPM"
8 | },
9 | "source": [
10 | "## Setup\n",
11 | "\n",
12 | "This section contains supplementary information, functions, and installs required packages."
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {
19 | "colab": {},
20 | "colab_type": "code",
21 | "id": "1Nn_XUbFUVah"
22 | },
23 | "outputs": [],
24 | "source": [
25 | "!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat -Uq"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "colab_type": "text",
32 | "id": "mgLK3zv_Vsy8"
33 | },
34 | "source": [
35 | "**About**\n",
36 | "\n",
37 | "
\n",
38 | "\n",
39 | "This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n",
40 | "\n",
41 | "**Acknowledgements**\n",
42 | "\n",
43 | "* This notebook uses some materials adapted from TensorFlow documentation.\n",
44 | "* This notebook uses the [Oxford IIT Pet Dataset](http://www.robots.ox.ac.uk/~vgg/data/pets/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/oxford_iiit_pet)).\n",
45 | "\n",
46 | "**Dataset Citation**\n",
47 | "\n",
48 | "```\n",
49 | "@InProceedings{parkhi12a,\n",
50 | " author = \"Parkhi, O. M. and Vedaldi, A. and Zisserman, A. and Jawahar, C.~V.\",\n",
51 | " title = \"Cats and Dogs\",\n",
52 | " booktitle = \"IEEE Conference on Computer Vision and Pattern Recognition\",\n",
53 | " year = \"2012\",\n",
54 | "}\n",
55 | "```"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "colab": {
63 | "base_uri": "https://localhost:8080/"
64 | },
65 | "colab_type": "code",
66 | "executionInfo": {
67 | "elapsed": 7175,
68 | "status": "ok",
69 | "timestamp": 1570473634312,
70 | "user": {
71 | "displayName": "Timothy Liu SG",
72 | "photoUrl": "",
73 | "userId": "04327513636844080478"
74 | },
75 | "user_tz": -480
76 | },
77 | "id": "zNbGLsDSUe3W",
78 | "outputId": "4e551e00-c5a8-4d41-f6c2-7269f7941a68"
79 | },
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "TensorFlow version: 2.0.0\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "import multiprocessing\n",
91 | "\n",
92 | "import tensorflow\n",
93 | "print(\"TensorFlow version:\", tensorflow.__version__)\n",
94 | "\n",
95 | "import tensorflow.compat.v2 as tf\n",
96 | "import tensorflow_datasets as tfds"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {
103 | "colab": {},
104 | "colab_type": "code",
105 | "id": "VnKTCYVwmNCg"
106 | },
107 | "outputs": [],
108 | "source": [
109 | "import time\n",
110 | "\n",
111 | "class TimeHistory(tf.keras.callbacks.Callback):\n",
112 | " def on_train_begin(self, logs={}):\n",
113 | " self.times = []\n",
114 | " def on_epoch_begin(self, epoch, logs={}):\n",
115 | " self.epoch_time_start = time.time()\n",
116 | " def on_epoch_end(self, epoch, logs={}):\n",
117 | " self.times.append(time.time() - self.epoch_time_start)"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {
123 | "colab_type": "text",
124 | "id": "ObRNSvQMUxbI"
125 | },
126 | "source": [
127 | "# Pets Classification with TensorFlow"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {
134 | "colab": {
135 | "base_uri": "https://localhost:8080/",
136 | "height": 51
137 | },
138 | "colab_type": "code",
139 | "executionInfo": {
140 | "elapsed": 9897,
141 | "status": "ok",
142 | "timestamp": 1570473637050,
143 | "user": {
144 | "displayName": "Timothy Liu SG",
145 | "photoUrl": "",
146 | "userId": "04327513636844080478"
147 | },
148 | "user_tz": -480
149 | },
150 | "id": "KaI6MAuJPNDV",
151 | "outputId": "089e1e0d-0fef-4e15-c54d-62153d7f5f96"
152 | },
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "\u001b[1m\u001b[37mjupyter-admin \u001b[m Fri Oct 11 16:48:49 2019 \u001b[1m\u001b[30m410.104\u001b[m\n",
159 | "\u001b[36m[0]\u001b[m \u001b[34mTesla T4 \u001b[m |\u001b[31m 43'C\u001b[m, \u001b[32m 0 %\u001b[m | \u001b[36m\u001b[1m\u001b[33m 0\u001b[m / \u001b[33m15079\u001b[m MB |\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "!gpustat"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "metadata": {
171 | "colab": {},
172 | "colab_type": "code",
173 | "id": "BCK57jlvNpOO"
174 | },
175 | "outputs": [],
176 | "source": [
177 | "import tensorflow.keras.layers as layers\n",
178 | "from tensorflow.keras.applications.resnet50 import ResNet50\n",
179 | "\n",
180 | "def create_model(img_size=(224,224), num_class=2, train_base=True):\n",
181 | " input_layer = layers.Input(shape=(img_size[0],img_size[1],3))\n",
182 | " base = ResNet50(input_tensor=input_layer,\n",
183 | " include_top=False,\n",
184 | " weights=\"imagenet\")\n",
185 | " base.trainable = train_base\n",
186 | " x = base.output\n",
187 | " x = layers.GlobalAveragePooling2D()(x)\n",
188 | " preds = layers.Dense(num_class, activation=\"softmax\")(x)\n",
189 | " return tf.keras.models.Model(inputs=input_layer, outputs=preds)"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {
196 | "colab": {},
197 | "colab_type": "code",
198 | "id": "ICDyhaNCdNsm",
199 | "outputId": "d509178b-0855-4a0e-9ae6-3ec0b4b328ae"
200 | },
201 | "outputs": [
202 | {
203 | "name": "stdout",
204 | "output_type": "stream",
205 | "text": [
206 | "\u001b[1mDownloading and preparing dataset oxford_iiit_pet (801.24 MiB) to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0...\u001b[0m\n"
207 | ]
208 | },
209 | {
210 | "data": {
211 | "application/vnd.jupyter.widget-view+json": {
212 | "model_id": "c696f88ca38f485cb2986f920fff3cc1",
213 | "version_major": 2,
214 | "version_minor": 0
215 | },
216 | "text/plain": [
217 | "HBox(children=(IntProgress(value=1, bar_style='info', description='Dl Completed...', max=1, style=ProgressStyl…"
218 | ]
219 | },
220 | "metadata": {
221 | "tags": []
222 | },
223 | "output_type": "display_data"
224 | },
225 | {
226 | "data": {
227 | "application/vnd.jupyter.widget-view+json": {
228 | "model_id": "4733f929cecc455894a222e7f688483a",
229 | "version_major": 2,
230 | "version_minor": 0
231 | },
232 | "text/plain": [
233 | "HBox(children=(IntProgress(value=1, bar_style='info', description='Dl Size...', max=1, style=ProgressStyle(des…"
234 | ]
235 | },
236 | "metadata": {
237 | "tags": []
238 | },
239 | "output_type": "display_data"
240 | },
241 | {
242 | "data": {
243 | "application/vnd.jupyter.widget-view+json": {
244 | "model_id": "762fa84f21684f3698ea3c9fbafed06b",
245 | "version_major": 2,
246 | "version_minor": 0
247 | },
248 | "text/plain": [
249 | "HBox(children=(IntProgress(value=1, bar_style='info', description='Extraction completed...', max=1, style=Prog…"
250 | ]
251 | },
252 | "metadata": {
253 | "tags": []
254 | },
255 | "output_type": "display_data"
256 | },
257 | {
258 | "name": "stdout",
259 | "output_type": "stream",
260 | "text": [
261 | "\n",
262 | "\n",
263 | "\n",
264 | "\n",
265 | "\n",
266 | "\n"
267 | ]
268 | },
269 | {
270 | "data": {
271 | "application/vnd.jupyter.widget-view+json": {
272 | "model_id": "",
273 | "version_major": 2,
274 | "version_minor": 0
275 | },
276 | "text/plain": [
277 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
278 | ]
279 | },
280 | "metadata": {
281 | "tags": []
282 | },
283 | "output_type": "display_data"
284 | },
285 | {
286 | "name": "stdout",
287 | "output_type": "stream",
288 | "text": [
289 | "Shuffling and writing examples to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0.incompleteN3JX06/oxford_iiit_pet-train.tfrecord\n"
290 | ]
291 | },
292 | {
293 | "data": {
294 | "application/vnd.jupyter.widget-view+json": {
295 | "model_id": "",
296 | "version_major": 2,
297 | "version_minor": 0
298 | },
299 | "text/plain": [
300 | "HBox(children=(IntProgress(value=0, max=3680), HTML(value='')))"
301 | ]
302 | },
303 | "metadata": {
304 | "tags": []
305 | },
306 | "output_type": "display_data"
307 | },
308 | {
309 | "name": "stdout",
310 | "output_type": "stream",
311 | "text": [
312 | "\r"
313 | ]
314 | },
315 | {
316 | "data": {
317 | "application/vnd.jupyter.widget-view+json": {
318 | "model_id": "",
319 | "version_major": 2,
320 | "version_minor": 0
321 | },
322 | "text/plain": [
323 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
324 | ]
325 | },
326 | "metadata": {
327 | "tags": []
328 | },
329 | "output_type": "display_data"
330 | },
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | "Shuffling and writing examples to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0.incompleteN3JX06/oxford_iiit_pet-test.tfrecord\n"
336 | ]
337 | },
338 | {
339 | "data": {
340 | "application/vnd.jupyter.widget-view+json": {
341 | "model_id": "",
342 | "version_major": 2,
343 | "version_minor": 0
344 | },
345 | "text/plain": [
346 | "HBox(children=(IntProgress(value=0, max=3669), HTML(value='')))"
347 | ]
348 | },
349 | "metadata": {
350 | "tags": []
351 | },
352 | "output_type": "display_data"
353 | },
354 | {
355 | "name": "stdout",
356 | "output_type": "stream",
357 | "text": [
358 | "\r"
359 | ]
360 | },
361 | {
362 | "data": {
363 | "application/vnd.jupyter.widget-view+json": {
364 | "model_id": "7b21228390c643e8a1ba4e9e512627ff",
365 | "version_major": 2,
366 | "version_minor": 0
367 | },
368 | "text/plain": [
369 | "HBox(children=(IntProgress(value=0, description='Computing statistics...', max=2, style=ProgressStyle(descript…"
370 | ]
371 | },
372 | "metadata": {
373 | "tags": []
374 | },
375 | "output_type": "display_data"
376 | },
377 | {
378 | "data": {
379 | "application/vnd.jupyter.widget-view+json": {
380 | "model_id": "",
381 | "version_major": 2,
382 | "version_minor": 0
383 | },
384 | "text/plain": [
385 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
386 | ]
387 | },
388 | "metadata": {
389 | "tags": []
390 | },
391 | "output_type": "display_data"
392 | },
393 | {
394 | "name": "stderr",
395 | "output_type": "stream",
396 | "text": [
397 | "WARNING:absl:Warning: Setting shuffle_files=True because split=TRAIN and shuffle_files=None. This behavior will be deprecated on 2019-08-06, at which point shuffle_files=False will be the default for all splits.\n"
398 | ]
399 | },
400 | {
401 | "data": {
402 | "application/vnd.jupyter.widget-view+json": {
403 | "model_id": "",
404 | "version_major": 2,
405 | "version_minor": 0
406 | },
407 | "text/plain": [
408 | "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
409 | ]
410 | },
411 | "metadata": {
412 | "tags": []
413 | },
414 | "output_type": "display_data"
415 | },
416 | {
417 | "name": "stdout",
418 | "output_type": "stream",
419 | "text": [
420 | "\n",
421 | "\u001b[1mDataset oxford_iiit_pet downloaded and prepared to /home/jovyan/tensorflow_datasets/oxford_iiit_pet/3.0.0. Subsequent calls will reuse this data.\u001b[0m\n"
422 | ]
423 | }
424 | ],
425 | "source": [
426 | "(train_dataset, test_dataset), info = tfds.load(name=\"oxford_iiit_pet:3.*.*\",\n",
427 | " split=[\"train\", \"test\"],\n",
428 | " shuffle_files=True,\n",
429 | " as_supervised=True,\n",
430 | " with_info=True)\n",
431 | "\n",
432 | "num_class = info.features[\"label\"].num_classes\n",
433 | "num_train = info.splits[\"train\"].num_examples\n",
434 | "num_test = info.splits[\"test\"].num_examples"
435 | ]
436 | },
437 | {
438 | "cell_type": "code",
439 | "execution_count": null,
440 | "metadata": {
441 | "colab": {},
442 | "colab_type": "code",
443 | "id": "MdWvubj3g2aO"
444 | },
445 | "outputs": [],
446 | "source": [
447 | "IMG_SIZE = (224, 224)\n",
448 | "\n",
449 | "@tf.function\n",
450 | "def format_train_example(image, label):\n",
451 | " image = tf.cast(image, tf.float32)\n",
452 | " image = (image/127.5) - 1\n",
453 | " image = tf.image.resize(image, IMG_SIZE)\n",
454 | " # perform image augmentation with tf.image\n",
455 | " image = tf.image.random_flip_left_right(image)\n",
456 | " image = tf.image.random_brightness(image, 0.1)\n",
457 | " return image, tf.one_hot(label, num_class)\n",
458 | "\n",
459 | "@tf.function\n",
460 | "def format_eval_example(image, label):\n",
461 | " image = tf.cast(image, tf.float32)\n",
462 | " image = (image/127.5) - 1\n",
463 | " image = tf.image.resize(image, IMG_SIZE)\n",
464 | " return image, tf.one_hot(label, num_class)"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": null,
470 | "metadata": {
471 | "colab": {},
472 | "colab_type": "code",
473 | "id": "GbsEAoP8XKib"
474 | },
475 | "outputs": [],
476 | "source": [
477 | "BATCH_SIZE = 80\n",
478 | "N_THREADS = multiprocessing.cpu_count()\n",
479 | "PREFETCH_COUNT = 8\n",
480 | "\n",
481 | "train_dataset = train_dataset.shuffle(1024)\n",
482 | "train_dataset = train_dataset.repeat(-1)\n",
483 | "train_dataset = train_dataset.map(format_train_example,\n",
484 | " num_parallel_calls=N_THREADS)\n",
485 | "train_dataset = train_dataset.batch(BATCH_SIZE)\n",
486 | "train_dataset = train_dataset.prefetch(PREFETCH_COUNT)"
487 | ]
488 | },
489 | {
490 | "cell_type": "code",
491 | "execution_count": null,
492 | "metadata": {
493 | "colab": {},
494 | "colab_type": "code",
495 | "id": "7-OAQB-0jb-r"
496 | },
497 | "outputs": [],
498 | "source": [
499 | "test_dataset = test_dataset.map(format_eval_example,\n",
500 | " num_parallel_calls=N_THREADS)\n",
501 | "test_dataset = test_dataset.repeat(-1)\n",
502 | "test_dataset = test_dataset.batch(BATCH_SIZE)"
503 | ]
504 | },
505 | {
506 | "cell_type": "code",
507 | "execution_count": null,
508 | "metadata": {
509 | "colab": {
510 | "base_uri": "https://localhost:8080/",
511 | "height": 51
512 | },
513 | "colab_type": "code",
514 | "executionInfo": {
515 | "elapsed": 24373,
516 | "status": "ok",
517 | "timestamp": 1570473651568,
518 | "user": {
519 | "displayName": "Timothy Liu SG",
520 | "photoUrl": "",
521 | "userId": "04327513636844080478"
522 | },
523 | "user_tz": -480
524 | },
525 | "id": "YAwj90pGOIAy",
526 | "outputId": "8dfaabf3-1eab-402b-9897-42a0a45235a8"
527 | },
528 | "outputs": [
529 | {
530 | "name": "stdout",
531 | "output_type": "stream",
532 | "text": [
533 | "Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5\n",
534 | "94773248/94765736 [==============================] - 6s 0us/step\n"
535 | ]
536 | }
537 | ],
538 | "source": [
539 | "model = create_model(IMG_SIZE, num_class, train_base=True)\n",
540 | "opt = tf.keras.optimizers.Adam()\n",
541 | "\n",
542 | "model.compile(loss=\"categorical_crossentropy\",\n",
543 | " optimizer=opt,\n",
544 | " metrics=[\"acc\"])\n",
545 | "\n",
546 | "#model.summary()"
547 | ]
548 | },
549 | {
550 | "cell_type": "code",
551 | "execution_count": null,
552 | "metadata": {
553 | "colab": {},
554 | "colab_type": "code",
555 | "id": "0vRtm2rORBHf"
556 | },
557 | "outputs": [],
558 | "source": [
559 | "steps_per_epoch = num_train//BATCH_SIZE\n",
560 | "steps_test = num_test//BATCH_SIZE\n",
561 | "\n",
562 | "time_callback = TimeHistory()"
563 | ]
564 | },
565 | {
566 | "cell_type": "code",
567 | "execution_count": null,
568 | "metadata": {
569 | "colab": {
570 | "base_uri": "https://localhost:8080/",
571 | "height": 153
572 | },
573 | "colab_type": "code",
574 | "executionInfo": {
575 | "elapsed": 132772,
576 | "status": "ok",
577 | "timestamp": 1570473759978,
578 | "user": {
579 | "displayName": "Timothy Liu SG",
580 | "photoUrl": "",
581 | "userId": "04327513636844080478"
582 | },
583 | "user_tz": -480
584 | },
585 | "id": "7T8VVrn4Q12B",
586 | "outputId": "d5b7a7c7-0970-45ef-898e-283abfe787a6"
587 | },
588 | "outputs": [
589 | {
590 | "name": "stdout",
591 | "output_type": "stream",
592 | "text": [
593 | "Train for 46 steps\n",
594 | "Epoch 1/5\n",
595 | "46/46 [==============================] - 63s 1s/step - loss: 1.8270 - acc: 0.4777\n",
596 | "Epoch 2/5\n",
597 | "46/46 [==============================] - 36s 773ms/step - loss: 0.6667 - acc: 0.7905\n",
598 | "Epoch 3/5\n",
599 | "46/46 [==============================] - 38s 832ms/step - loss: 0.3448 - acc: 0.8889\n",
600 | "Epoch 4/5\n",
601 | "46/46 [==============================] - 37s 807ms/step - loss: 0.2233 - acc: 0.9321\n",
602 | "Epoch 5/5\n",
603 | "46/46 [==============================] - 37s 812ms/step - loss: 0.1526 - acc: 0.9552\n"
604 | ]
605 | },
606 | {
607 | "data": {
608 | "text/plain": [
609 | ""
610 | ]
611 | },
612 | "execution_count": 12,
613 | "metadata": {
614 | "tags": []
615 | },
616 | "output_type": "execute_result"
617 | }
618 | ],
619 | "source": [
620 | "model.fit(train_dataset, steps_per_epoch=steps_per_epoch,\n",
621 | " epochs=5, callbacks=[time_callback], verbose=1)"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": null,
627 | "metadata": {
628 | "colab": {},
629 | "colab_type": "code",
630 | "id": "21CjKwKsjIiy",
631 | "outputId": "f1b26bcc-8f6a-4ea2-b9ca-880d0a4f5901"
632 | },
633 | "outputs": [
634 | {
635 | "name": "stdout",
636 | "output_type": "stream",
637 | "text": [
638 | "Peak Img/s: 103.0\n"
639 | ]
640 | }
641 | ],
642 | "source": [
643 | "epoch_time = min(time_callback.times)\n",
644 | "img_per_sec = num_train//epoch_time\n",
645 | "\n",
646 | "print(\"Peak Img/s:\", img_per_sec)"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": null,
652 | "metadata": {
653 | "colab": {},
654 | "colab_type": "code",
655 | "id": "kUJceK2mmNC5"
656 | },
657 | "outputs": [],
658 | "source": []
659 | }
660 | ],
661 | "metadata": {
662 | "accelerator": "GPU",
663 | "colab": {
664 | "collapsed_sections": [
665 | "laUXS-24UvPM"
666 | ],
667 | "name": "tf_pet_base.ipynb",
668 | "provenance": [],
669 | "toc_visible": true
670 | },
671 | "kernelspec": {
672 | "display_name": "Python 3",
673 | "language": "python",
674 | "name": "python3"
675 | },
676 | "language_info": {
677 | "codemirror_mode": {
678 | "name": "ipython",
679 | "version": 3
680 | },
681 | "file_extension": ".py",
682 | "mimetype": "text/x-python",
683 | "name": "python",
684 | "nbconvert_exporter": "python",
685 | "pygments_lexer": "ipython3",
686 | "version": "3.6.7"
687 | }
688 | },
689 | "nbformat": 4,
690 | "nbformat_minor": 4
691 | }
692 |
--------------------------------------------------------------------------------
/tf_transformer_base.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.7"},"colab":{"name":"tf_transformer_base.ipynb","provenance":[],"collapsed_sections":["UqslZYH8mhNU"],"toc_visible":true},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"UqslZYH8mhNU","colab_type":"text"},"source":["## Setup\n","\n","This section contains supplementary information, functions, and installs required packages."]},{"cell_type":"code","metadata":{"id":"gss2_uX1mhNV","colab_type":"code","colab":{}},"source":["!pip install tensorflow-gpu==2.0 tensorflow_datasets gpustat transformers -Uq"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8hPa-QqumhNY","colab_type":"text"},"source":["**About**\n","\n","
\n","\n","This notebook is put together by Timothy Liu (`timothyl@nvidia.com`) for the [**PyCon SG**](https://pycon.sg/) 2019 tutorial on [**Improving Deep Learning Performance in TensorFlow**](https://github.com/NVAITC/pycon-sg19-tensorflow-tutorial).\n","\n","**Acknowledgements**\n","\n","* This notebook uses some materials adapted from TensorFlow documentation.\n","* This notebook uses the [HuggingFace Transformers library](https://github.com/huggingface/transformers).\n","* This notebook uses the [GLUE (MRPC) Dataset](https://gluebenchmark.com/) ([TensorFlow Datasets page](https://www.tensorflow.org/datasets/catalog/glue)).\n","\n","**Dataset Citation**\n","\n","```\n","@inproceedings{wang2019glue,\n"," title={ {GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},\n"," author={Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},\n"," note={In the Proceedings of ICLR.},\n"," year={2019}\n","}\n","```"]},{"cell_type":"code","metadata":{"id":"5RTQHdpNmhNY","colab_type":"code","colab":{}},"source":["import tensorflow.compat.v2 as tf\n","import tensorflow_datasets"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"wm5qQqiLmhNa","colab_type":"code","colab":{}},"source":["import time\n","\n","class TimeHistory(tf.keras.callbacks.Callback):\n"," def on_train_begin(self, logs={}):\n"," self.times = []\n"," def on_epoch_begin(self, epoch, logs={}):\n"," self.epoch_time_start = time.time()\n"," def on_epoch_end(self, epoch, logs={}):\n"," self.times.append(time.time() - self.epoch_time_start)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"5DMm-1HmmhNc","colab_type":"text"},"source":["# Sequence Classification with BERT in TF 2.0"]},{"cell_type":"code","metadata":{"id":"ZM4spGMNp5wN","colab_type":"code","colab":{}},"source":["!gpustat"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WkWHOum3mhNe","colab_type":"text"},"source":["## Load BERT Tokenizer"]},{"cell_type":"code","metadata":{"id":"qqd1X8dxmhNf","colab_type":"code","colab":{}},"source":["from transformers import BertTokenizer\n","\n","tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"imu1KLWvmhNh","colab_type":"text"},"source":["## Input Pipeline"]},{"cell_type":"markdown","metadata":{"id":"nFFtBf6amhNh","colab_type":"text"},"source":["### Load Dataset"]},{"cell_type":"code","metadata":{"id":"WUAUMzVOmhNi","colab_type":"code","outputId":"40288796-f303-4af9-e905-40f3349c6d94","colab":{}},"source":["data, info = tensorflow_datasets.load(\"glue/mrpc\", with_info=True)\n","\n","train_examples = info.splits[\"train\"].num_examples\n","valid_examples = info.splits[\"validation\"].num_examples"],"execution_count":0,"outputs":[{"output_type":"stream","text":["INFO:absl:Overwrite dataset info from restored data version.\n","INFO:absl:Reusing dataset glue (/home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2)\n","INFO:absl:Constructing tf.data.Dataset for split None, from /home/jovyan/tensorflow_datasets/glue/mrpc/0.0.2\n","WARNING:absl:Warning: Setting shuffle_files=True because split=TRAIN and shuffle_files=None. This behavior will be deprecated on 2019-08-06, at which point shuffle_files=False will be the default for all splits.\n"],"name":"stderr"}]},{"cell_type":"markdown","metadata":{"id":"LTZECdgxmhNl","colab_type":"text"},"source":["## Build Input Pipeline"]},{"cell_type":"code","metadata":{"id":"eBkLtX9bmhNl","colab_type":"code","colab":{}},"source":["from transformers import glue_convert_examples_to_features\n","\n","BATCH_SIZE = 32\n","\n","# Prepare dataset for GLUE as a tf.data.Dataset instance\n","train_dataset = glue_convert_examples_to_features(data[\"train\"], tokenizer, 128, \"mrpc\")\n","train_dataset = train_dataset.shuffle(512).batch(BATCH_SIZE).repeat(-1)\n","\n","valid_dataset = glue_convert_examples_to_features(data[\"validation\"], tokenizer, 128, \"mrpc\")\n","valid_dataset = valid_dataset.batch(BATCH_SIZE)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"vC_qPDcjmhNn","colab_type":"text"},"source":["## Build BERT Model"]},{"cell_type":"markdown","metadata":{"id":"3sAKlIGPmhNo","colab_type":"text"},"source":["### Load Pre-trained BERT Model"]},{"cell_type":"code","metadata":{"id":"vW2d3lctmhNo","colab_type":"code","colab":{}},"source":["from transformers import TFBertForSequenceClassification\n","\n","model = TFBertForSequenceClassification.from_pretrained(\"bert-base-cased\")"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"k2qMgxExmhNq","colab_type":"code","colab":{}},"source":["opt = tf.keras.optimizers.Adam(learning_rate=3e-5)\n","\n","loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n","acc = tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")\n","model.compile(optimizer=opt,\n"," loss=loss,\n"," metrics=[acc])"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gNhNvzncmhNs","colab_type":"text"},"source":["## Train BERT Model"]},{"cell_type":"code","metadata":{"id":"it4pTXFAmhNt","colab_type":"code","colab":{}},"source":["time_callback = TimeHistory()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lWyY0SJHmhNu","colab_type":"code","outputId":"3fe4f64c-8eff-463f-9367-877abf335699","colab":{}},"source":["history = model.fit(train_dataset, epochs=4, steps_per_epoch=train_examples//BATCH_SIZE,\n"," validation_data=valid_dataset, validation_steps=valid_examples//BATCH_SIZE,\n"," validation_freq=3, callbacks=[time_callback])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Train for 114 steps, validate for 12 steps\n","Epoch 1/4\n","114/114 [==============================] - 116s 1s/step - loss: 0.5787 - accuracy: 0.7182\n","Epoch 2/4\n","114/114 [==============================] - 94s 828ms/step - loss: 0.3521 - accuracy: 0.8490\n","Epoch 3/4\n","114/114 [==============================] - 101s 886ms/step - loss: 0.1301 - accuracy: 0.9574 - val_loss: 0.4803 - val_accuracy: 0.8385\n","Epoch 4/4\n","114/114 [==============================] - 94s 828ms/step - loss: 0.0507 - accuracy: 0.9854\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IFXITNKamhNy","colab_type":"code","outputId":"ea9c70b0-adfe-4c34-cd01-69aca772ae8e","colab":{}},"source":["epoch_time = min(time_callback.times)\n","egs_per_sec = train_examples//epoch_time\n","\n","print(\"Peak Examples/s:\", egs_per_sec)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Peak Examples/s: 38.0\n"],"name":"stdout"}]}]}
--------------------------------------------------------------------------------