├── .gitignore ├── 01_basics_phases_ex1.ipynb ├── 02_basics_variables_ex2.ipynb ├── 03_basics_collections_ex3.ipynb ├── 04_linear_regression_ex4.ipynb ├── 05_autodiff_ex5.ipynb ├── 06_readers.ipynb ├── 07_tensorboard_ex6.ipynb ├── 08_artifical_neural_networks_ex7ex8.ipynb ├── 09_organizing_code.ipynb ├── 10_training_deep_nets_ex9.ipynb ├── 11_convolutional_neural_networks_ex10.ipynb ├── LICENSE ├── README.md ├── data └── life_satisfaction.csv ├── images ├── china.png └── intro_to_tf_course.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Specific to this project 2 | my_* 3 | checkpoint 4 | tf_logs/ 5 | tmp/ 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | -------------------------------------------------------------------------------- /01_basics_phases_ex1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# TensorFlow basics" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import tensorflow as tf\n", 47 | "tf.__version__" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Construction Phase" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | ">>> a = tf.constant(3)\n", 64 | ">>> b = tf.constant(5)\n", 65 | ">>> s = a + b" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "a" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "b" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "s" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "tf.get_default_graph()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | ">>> graph = tf.Graph()\n", 111 | ">>> with graph.as_default():\n", 112 | "... a = tf.constant(3)\n", 113 | "... b = tf.constant(5)\n", 114 | "... s = a + b\n", 115 | "..." 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Execution Phase" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | ">>> with tf.Session(graph=graph) as sess:\n", 132 | "... result = s.eval()\n", 133 | "...\n", 134 | ">>> result" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | ">>> with tf.Session(graph=graph) as sess:\n", 144 | "... result = sess.run(s)\n", 145 | "...\n", 146 | ">>> result" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | ">>> with tf.Session(graph=graph) as sess:\n", 156 | "... result = sess.run([a,b,s])\n", 157 | "...\n", 158 | ">>> result" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## Exercise 1" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "1.1) Create a simple graph that calculates $ c = \\exp(\\sqrt 8 + 3) $.\n", 180 | "\n", 181 | "**Tip**: TensorFlow's API documentation is available at:\n", 182 | "https://www.tensorflow.org/versions/master/api_docs/python/" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "1.2) Now create a `Session()` and evaluate the operation that gives you the result of the equation above:" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "1.3) Create a graph that evaluates and prints both $ b = \\sqrt 8 $ and $ c = \\exp(\\sqrt 8 + 3) $. Try to implement this in a way that only evaluates $ \\sqrt 8 $ once." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "1.4) The following code is needed to display TensorFlow graphs in Jupyter. Just run this cell then visualize your graph by calling `show_graph(`_your graph_`)`:" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "import numpy as np\n", 276 | "from IPython.display import display, HTML\n", 277 | "\n", 278 | "def strip_consts(graph_def, max_const_size=32):\n", 279 | " \"\"\"Strip large constant values from graph_def.\"\"\"\n", 280 | " strip_def = tf.GraphDef()\n", 281 | " for n0 in graph_def.node:\n", 282 | " n = strip_def.node.add() \n", 283 | " n.MergeFrom(n0)\n", 284 | " if n.op == 'Const':\n", 285 | " tensor = n.attr['value'].tensor\n", 286 | " size = len(tensor.tensor_content)\n", 287 | " if size > max_const_size:\n", 288 | " tensor.tensor_content = b\"\"%size\n", 289 | " return strip_def\n", 290 | "\n", 291 | "def show_graph(graph_def=None, max_const_size=32):\n", 292 | " \"\"\"Visualize TensorFlow graph.\"\"\"\n", 293 | " graph_def = graph_def or tf.get_default_graph()\n", 294 | " if hasattr(graph_def, 'as_graph_def'):\n", 295 | " graph_def = graph_def.as_graph_def()\n", 296 | " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", 297 | " code = \"\"\"\n", 298 | " \n", 303 | " \n", 304 | "
\n", 305 | " \n", 306 | "
\n", 307 | " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", 308 | "\n", 309 | " iframe = \"\"\"\n", 310 | " \n", 311 | " \"\"\".format(code.replace('\"', '"'))\n", 312 | " display(HTML(iframe))" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "Try not to peek at the solution below before you have done the exercise! :)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "## Exercise 1 - Solution" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "1.1)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "graph = tf.Graph()\n", 371 | "with graph.as_default():\n", 372 | " c = tf.exp(tf.add(tf.sqrt(tf.constant(8.)), tf.constant(3.)))\n", 373 | " # or simply...\n", 374 | " c = tf.exp(tf.sqrt(8.) + 3.)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "1.2)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "with tf.Session(graph=graph):\n", 391 | " c_val = c.eval()" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "c_val" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "1.3)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "graph = tf.Graph()\n", 417 | "with graph.as_default():\n", 418 | " b = tf.sqrt(8.)\n", 419 | " c = tf.exp(b + 3)" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "with tf.Session(graph=graph) as sess:\n", 429 | " b_val, c_val = sess.run([b, c])" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "b_val" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "c_val" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": {}, 453 | "source": [ 454 | "**Important**: the following implementation gives the right result, but it runs the graph twice, once to evaluate `b`, and once to evaluate `c`. Since `c` depends on `b`, it means that `b` will be evaluated twice. Not what we wanted." 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "# WRONG!\n", 464 | "with tf.Session(graph=graph):\n", 465 | " b_val = b.eval() # evaluates b\n", 466 | " c_val = c.eval() # evaluates c, which means evaluating b again!" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "b_val" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": { 482 | "scrolled": true 483 | }, 484 | "outputs": [], 485 | "source": [ 486 | "c_val" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "1.4)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "show_graph(graph)" 503 | ] 504 | } 505 | ], 506 | "metadata": { 507 | "kernelspec": { 508 | "display_name": "Python 3", 509 | "language": "python", 510 | "name": "python3" 511 | }, 512 | "language_info": { 513 | "codemirror_mode": { 514 | "name": "ipython", 515 | "version": 3 516 | }, 517 | "file_extension": ".py", 518 | "mimetype": "text/x-python", 519 | "name": "python", 520 | "nbconvert_exporter": "python", 521 | "pygments_lexer": "ipython3", 522 | "version": "3.6.3" 523 | } 524 | }, 525 | "nbformat": 4, 526 | "nbformat_minor": 2 527 | } 528 | -------------------------------------------------------------------------------- /02_basics_variables_ex2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Variables" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | ">>> graph = tf.Graph()\n", 57 | ">>> with graph.as_default():\n", 58 | "... x = tf.Variable(100)\n", 59 | "... c = tf.constant(5)\n", 60 | "... increment_op = tf.assign(x, x + c)\n", 61 | "..." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | ">>> with tf.Session(graph=graph) as sess:\n", 71 | "... x.initializer.run()\n", 72 | "... print(x.eval()) # 100\n", 73 | "... for iteration in range(10):\n", 74 | "... increment_op.eval()\n", 75 | "... print(x.eval()) # 150\n", 76 | "..." 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## Variables Initializer" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | ">>> graph = tf.Graph()\n", 93 | ">>> with graph.as_default():\n", 94 | "... x = tf.Variable(100)\n", 95 | "... c = tf.constant(5)\n", 96 | "... increment_op = tf.assign(x, x + c)\n", 97 | "... init = tf.global_variables_initializer()\n", 98 | "..." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | ">>> with tf.Session(graph=graph) as sess:\n", 108 | "... init.run()\n", 109 | "... print(x.eval()) # 100\n", 110 | "... for iteration in range(10):\n", 111 | "... increment_op.eval()\n", 112 | "... print(x.eval()) # 150\n", 113 | "..." 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Variable State" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | ">>> session1 = tf.Session(graph=graph)\n", 130 | ">>> session2 = tf.Session(graph=graph)\n", 131 | ">>> x.initializer.run(session=session1)\n", 132 | ">>> x.initializer.run(session=session2)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | ">>> increment_op.eval(session=session1)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | ">>> x.eval(session=session1)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | ">>> x.eval(session=session2)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | ">>> session1.close()\n", 169 | ">>> session2.close()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Exercise 2" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "In this exercise, we will use TensorFlow to compute $ 1 + \\frac{1}{2} + \\frac{1}{4} + \\frac{1}{8} + \\cdots $ by creating a simple graph then running it multiple times.\n", 191 | "\n", 192 | "Think about how you would solve this problem (and if you are feeling confident enough, go ahead and implement your ideas), then follow the instructions below." 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": { 219 | "collapsed": true 220 | }, 221 | "source": [ 222 | "2.1) Create a graph with two variables $ x $ and $ y $, initialized to 0.0 and 1.0 respectively. Create an operation that will perform the following assignment: $ x \\gets x + y $. Create a second operation that will perform the following assignment: $ y \\gets \\dfrac{y}{2} $." 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "source": [ 252 | "2.2) Now create a `Session()`, initialize the variables, then create a loop that will run 50 times, and at each iteration will run the first assignment operation, then the second (separately). Finally, print out the value of $ x $. The result should be very close (or equal to) 2.0." 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "2.3) Try to run the assignment operations simultaneously. What happens to the result? Run your code multiply times: do the results vary? Can you explain what is happening?" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "2.5) Bonus question (if you have time): update you graph to define the second assignment ($y \\gets \\frac{y}{2}$) inside a `tf.control_dependencies()` block, to guarantee that it runs after the first assignment ($ x \\gets x + y$). Does this finally solve the problem?" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "Try not to peek at the solution below before you have done the exercise! :)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "## Exercise 2 - Solution" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "2.1)" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "graph = tf.Graph()\n", 367 | "with graph.as_default():\n", 368 | " x = tf.Variable(0.0)\n", 369 | " y = tf.Variable(1.0)\n", 370 | " add = tf.assign(x, x + y)\n", 371 | " divide = tf.assign(y, y / 2)\n", 372 | " init = tf.global_variables_initializer()" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "2.2)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "with tf.Session(graph=graph):\n", 389 | " init.run()\n", 390 | " for iteration in range(20):\n", 391 | " add.eval()\n", 392 | " divide.eval()\n", 393 | " result = x.eval()" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "print(result)" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "2.3)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "with tf.Session(graph=graph) as sess:\n", 419 | " init.run()\n", 420 | " for iteration in range(20):\n", 421 | " sess.run([add, divide])\n", 422 | " result = x.eval()" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "result" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "2.4)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "graph = tf.Graph()\n", 448 | "with graph.as_default():\n", 449 | " x = tf.Variable(0.0)\n", 450 | " y = tf.Variable(1.0)\n", 451 | " add = tf.assign(x, x + y)\n", 452 | " with tf.control_dependencies([add]):\n", 453 | " divide = tf.assign(y, y / 2)\n", 454 | " init = tf.global_variables_initializer()" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "with tf.Session(graph=graph) as sess:\n", 464 | " init.run()\n", 465 | " for iteration in range(30):\n", 466 | " sess.run([add, divide])\n", 467 | " result = x.eval()" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "result" 477 | ] 478 | } 479 | ], 480 | "metadata": { 481 | "kernelspec": { 482 | "display_name": "Python 3", 483 | "language": "python", 484 | "name": "python3" 485 | }, 486 | "language_info": { 487 | "codemirror_mode": { 488 | "name": "ipython", 489 | "version": 3 490 | }, 491 | "file_extension": ".py", 492 | "mimetype": "text/x-python", 493 | "name": "python", 494 | "nbconvert_exporter": "python", 495 | "pygments_lexer": "ipython3", 496 | "version": "3.6.3" 497 | } 498 | }, 499 | "nbformat": 4, 500 | "nbformat_minor": 2 501 | } 502 | -------------------------------------------------------------------------------- /03_basics_collections_ex3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "*__From notebook 2 on variables:__*" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | ">>> graph = tf.Graph()\n", 57 | ">>> with graph.as_default():\n", 58 | "... x = tf.Variable(100)\n", 59 | "... c = tf.constant(5)\n", 60 | "... increment_op = tf.assign(x, x + c)\n", 61 | "..." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Collections" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | ">>> graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "tf.GraphKeys.GLOBAL_VARIABLES" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | ">>> graph.add_to_collection(\"my_collection\", c)\n", 96 | ">>> graph.get_collection(\"my_collection\")" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## Navigating the Graph" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | ">>> graph = tf.Graph()\n", 113 | ">>> with graph.as_default():\n", 114 | "... a = tf.constant(3)\n", 115 | "... b = tf.constant(5)\n", 116 | "... s = a + b\n", 117 | "...\n", 118 | ">>> graph.get_operations()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | ">>> graph.get_operation_by_name(\"add\") is s.op" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | ">>> graph.get_tensor_by_name(\"add:0\") is s" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | ">>> list(s.op.inputs)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | ">>> list(s.op.outputs)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Naming Operations" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | ">>> graph = tf.Graph()\n", 171 | ">>> with graph.as_default():\n", 172 | "... a = tf.constant(3, name='a')\n", 173 | "... b = tf.constant(5, name='b')\n", 174 | "... s = tf.add(a, b, name='s')\n", 175 | "...\n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | ">>> graph.get_operations()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "## Exercise 3" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "source": [ 207 | "3.1) Create a graph with four variables named `\"x1\"`, `\"x2\"`, `\"x3\"` and `\"x4\"`, with initial values 1.0, 2.0, 3.0 and 4.0 respectively, then write some code that prints the name of every operation in the graph." 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": { 234 | "collapsed": true 235 | }, 236 | "source": [ 237 | "3.2) Notice that for each `Variable`, TensorFlow actually created 4 operations:\n", 238 | "* the variable itself,\n", 239 | "* its initial value,\n", 240 | "* an assignment operation to assign the initial value to the variable,\n", 241 | "* and a read operation that you can safely ignore for now (for details, check out mrry's great answer to [this question](http://stackoverflow.com/questions/42783909/internals-of-variable-in-tensorflow)).\n", 242 | "\n", 243 | "Get the collection of global variables in the graph, and for each one of them use `get_operation_by_name()` to find its corresponding `/Assign` operation (just append `\"/Assign\"` to the variable's name).\n", 244 | "\n", 245 | "Hint: each object in the collection of global variables is actually a `Tensor`, not an `Operation` (it represents the variable's output, i.e., its value), so its name ends with `\":0\"`. You can get the `Operation` through the `Tensor`'s `op` attribute: its name will not end with `\":0\"`" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "3.3) Add a `tf.group()` to your graph, containing all the assignment operations you got in question 3.2. Congratulations! You have just reimplemented `tf.global_variables_initializer()`.\n", 274 | "\n", 275 | "Start a `Session()`, run your group operation, then evaluate each variable and print out the result." 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "3.4) For each assignment operation you fetched earlier, get its second input and store it in a list. Next, start a session and evaluate that list (using `sess.run()`). Print out the result: you should see `[1.0, 2.0, 3.0, 4.0]`. Can you guess why?" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "Try not to peek at the solution below before you have done the exercise! :)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "## Exercise 3 - Solution" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "3.1)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "graph = tf.Graph()\n", 362 | "with graph.as_default():\n", 363 | " x1 = tf.Variable(1.0, name=\"x1\")\n", 364 | " x2 = tf.Variable(2.0, name=\"x2\")\n", 365 | " x3 = tf.Variable(3.0, name=\"x3\")\n", 366 | " x4 = tf.Variable(4.0, name=\"x4\")" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "3.2)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "gvars = graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)\n", 383 | "init_assign_ops = [graph.get_operation_by_name(gvar.op.name + \"/Assign\")\n", 384 | " for gvar in gvars]" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "init_assign_ops" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "3.3)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "with graph.as_default():\n", 410 | " init = tf.group(*init_assign_ops)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "with tf.Session(graph=graph):\n", 420 | " init.run()\n", 421 | " print(x1.eval())\n", 422 | " print(x2.eval())\n", 423 | " print(x3.eval())\n", 424 | " print(x4.eval())" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "metadata": {}, 430 | "source": [ 431 | "3.4)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "init_val_ops = [init_assign_op.inputs[1]\n", 441 | " for init_assign_op in init_assign_ops]" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "with tf.Session(graph=graph) as sess:\n", 451 | " print(sess.run(init_val_ops))" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "Explanation: in the case of assignment operations, the first input is a reference to the variable, and the second is the assignment value. The assignment operations we have here are used to initialize the variables, so their assignment values correspond to the initial values: 1.0 for `x1`, 2.0 for `x2`, 3.0 for `x3` and 4.0 for `x4`." 459 | ] 460 | } 461 | ], 462 | "metadata": { 463 | "kernelspec": { 464 | "display_name": "Python 3", 465 | "language": "python", 466 | "name": "python3" 467 | }, 468 | "language_info": { 469 | "codemirror_mode": { 470 | "name": "ipython", 471 | "version": 3 472 | }, 473 | "file_extension": ".py", 474 | "mimetype": "text/x-python", 475 | "name": "python", 476 | "nbconvert_exporter": "python", 477 | "pygments_lexer": "ipython3", 478 | "version": "3.6.3" 479 | } 480 | }, 481 | "nbformat": 4, 482 | "nbformat_minor": 2 483 | } 484 | -------------------------------------------------------------------------------- /04_linear_regression_ex4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "# Linear Regression with TensorFlow" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Loading the training data" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "import numpy as np\n", 64 | "\n", 65 | "data = np.loadtxt(\"data/life_satisfaction.csv\",\n", 66 | " dtype=np.float32,\n", 67 | " delimiter=\",\",\n", 68 | " skiprows=1,\n", 69 | " usecols=[1, 2])\n", 70 | "X_train = data[:, 0:1] / 10000 # feature scaling\n", 71 | "y_train = data[:, 1:2]" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Each row in `X_train` represents a training instance, in this case a country. In this simple regression example, there is just one feature per instance (i.e., one column), in this case the country's GDP per capita (in tens of thousands of dollars)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "X_train" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "y_train" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## Plot the data" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "%matplotlib inline\n", 113 | "import matplotlib.pyplot as plt\n", 114 | "\n", 115 | "plt.rcParams['axes.labelsize'] = 14\n", 116 | "plt.rcParams['xtick.labelsize'] = 12\n", 117 | "plt.rcParams['ytick.labelsize'] = 12" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "def plot_life_satisfaction(X_train, y_train):\n", 127 | " plt.plot(X_train * 10000, y_train, \"bo\")\n", 128 | " plt.axis([0, 60000, 0, 10])\n", 129 | " plt.xlabel(\"GDP per capita ($)\")\n", 130 | " plt.ylabel(\"Life Satisfaction\")\n", 131 | " plt.grid()\n", 132 | "\n", 133 | "plt.figure(figsize=(10,5))\n", 134 | "plot_life_satisfaction(X_train, y_train)\n", 135 | "plt.show()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## Building the Linear Regression Model" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "## Exercise 4" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": { 162 | "collapsed": true 163 | }, 164 | "source": [ 165 | "In this exercise we will build a linear regression model using TensorFlow. If you are not familiar with the maths behind linear regression models, you can read the explanation below. If you already know this (or if you don't care much about the maths), you can just skip this explanation and simply follow the instructions given in questions 4.1 to 4.3 below.\n", 166 | "\n", 167 | "In a linear regression model, the predictions are a linear combination of the input features. In other words, the predicted value $\\hat{y}$ can be computed using the following equation:\n", 168 | "\n", 169 | "$\\hat{y} = w_1 x_1 + w_2 x_2 + \\dots + w_n x_n + b$\n", 170 | "\n", 171 | "where:\n", 172 | "* $x_1, x_2, \\dots, x_n $ are the input features,\n", 173 | "* $w_1, w_2, \\dots, w_n $, are their corresponding weights,\n", 174 | "* and $b$ is the bias term (also called the intercept term).\n", 175 | "\n", 176 | "This equation can be expressed in a more compact way using vectors:\n", 177 | "\n", 178 | "$\\hat{y} = \\langle \\mathbf{x}, \\mathbf{w} \\rangle + b$\n", 179 | "\n", 180 | "where:\n", 181 | "* $ \\mathbf{x} = \\begin{pmatrix}x_1 \\\\ x_2 \\\\ \\vdots \\\\ x_n \\end{pmatrix}$ is the input feature vector (by convention, vectors are written in bold font),\n", 182 | "* $ \\mathbf{w} = \\begin{pmatrix}w_1 \\\\ w_2 \\\\ \\vdots \\\\ w_n \\end{pmatrix}$ is the weight vector,\n", 183 | "* $\\langle \\mathbf{x}, \\mathbf{w} \\rangle$ is the inner product of vectors $\\mathbf{x}$ and $\\mathbf{w}$, equal to $w_1 x_1 + w_2 x_2 + \\dots + w_n x_n$.\n", 184 | "\n", 185 | "It is often more convenient to handle vectors as matrices with a single column (a \"column vector\"). The inner product $\\langle \\mathbf{x}, \\mathbf{w} \\rangle$ is then replaced with the matrix dot product: $\\mathbf{x}^T \\cdot \\mathbf{w}$, where $\\mathbf{x}^T$ is the transpose of the column vector $\\mathbf{x}$. Transposing a column vector gives you a \"row vector\" (i.e., a matrix with a single row): $\\mathbf{x}^T = \\begin{pmatrix} x_1 & x_2 & \\dots & x_n \\end{pmatrix}$. Once again $\\mathbf{x}^T \\cdot \\mathbf{w} = w_1 x_1 + w_2 x_2 + \\dots + w_n x_n$.\n", 186 | "\n", 187 | "Lastly, it is possible to compute predictions for many instances at a time by putting all their input features in a matrix $\\mathbf{X}$ (by convention, matrices are in capital letters with a bold font, except when they just represent column or row vectors). The vector containing the predictions for every instance can be computed using the following equation:\n", 188 | "\n", 189 | "$\\hat{\\mathbf{y}} = \\mathbf{X} \\cdot \\mathbf{w} + b$\n", 190 | "\n", 191 | "where:\n", 192 | "* $ \\hat{\\mathbf{y}} = \\begin{pmatrix}\\hat{y}^{(1)} \\\\ \\hat{y}^{(2)} \\\\ \\vdots \\\\ \\hat{y}^{(m)} \\end{pmatrix}$ is the prediction vector, containing the predictions for all $m$ instances.\n", 193 | "* $ \\mathbf{X} = \\begin{pmatrix}x_1^{(1)} & x_2^{(1)} & \\cdots & x_n^{(1)} \\\\\n", 194 | " x_1^{(2)} & x_2^{(2)} & \\cdots & x_n^{(2)} \\\\\n", 195 | " \\vdots & \\vdots & \\ddots & \\vdots \\\\\n", 196 | " x_1^{(m)} & x_2^{(m)} & \\cdots & x_n^{(m)}\\end{pmatrix} = \\begin{pmatrix}(\\mathbf{x}^{(1)})^T \\\\\n", 197 | " (\\mathbf{x}^{(2)})^T \\\\\n", 198 | " \\vdots \\\\\n", 199 | " (\\mathbf{x}^{(m)})^T\\end{pmatrix} $ is the input feature matrix. It contains the input features of all instances for which you want to make predictions. Each row represents an instance, each column represents a feature.\n", 200 | "* Note that the matrix dot product $\\mathbf{X} \\cdot \\mathbf{w}$ returns a column vector, so when we add the bias term $b$, we mean adding that value to each and every element in the column vector (this is called _broadcasting_)." 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": { 206 | "collapsed": true 207 | }, 208 | "source": [ 209 | "4.1) Create a graph containing:\n", 210 | "* a constant `X` initialized with `X_train`, which contains the input features of the training instances. In this particular example, there is just a single feature per instance (i.e., the GDP per capita).\n", 211 | "* a constant `y` initialized with `y_train`, which contains the labels of each instance (i.e., the life satisfaction).\n", 212 | "* a variable `b`, representing the bias term (initialized to 0.0).\n", 213 | "* a variable `w`, representing the weight vector (initialized to a column vector full of zeros, using `tf.zeros()`). Since there is just one input feature per instance in this example, this column vector contains a single row (it is a matrix with a single item).\n", 214 | "* an operation `y_pred` that computes the equation presented above: $\\hat{\\mathbf{y}} = \\mathbf{X} \\cdot \\mathbf{w} + b$. You will need to use `tf.matmul()`.\n", 215 | "* as always, don't forget to add an `init` operation, using `tf.global_variables_initializer()`." 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "4.2) Start a session, run the `init` operation and evaluate the predictions `y_pred`. Since both variables `b` and `w` are initialized with zeros, you should get a vector full of zeros." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "4.3) Let's measure how bad the model is using a cost function (also called a loss function). In regression tasks, it is common to use the Mean Square Error (MSE) as the cost function. It is given by the following equation:\n", 272 | "\n", 273 | "$\\text{MSE}(\\mathbf{w}, b) = \\dfrac{1}{m} \\sum\\limits_{i=1}^{m}{(\\hat{y}^{(i)}-y^{(i)})^2}$.\n", 274 | "\n", 275 | "Add an `mse` operation to your graph, to compute the Mean Square Error. Hint: use `tf.reduce_mean()` and `tf.square()`." 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "4.4) Now start a session, initalize the variables and evaluate the MSE. As you can see, the result is quite high: this makes sense since we have not trained the model yet." 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "4.5) To find the optimal values for the model parameters (i.e., the variables `w` and `b`), we will use Gradient Descent. For this, we first need to compute the gradient of the cost function with regards to the model parameters.\n", 332 | "\n", 333 | "The gradient of the MSE with regards to the weight vector $\\mathbf{w}$ is:\n", 334 | "\n", 335 | "$\\nabla_{\\mathbf{w}}\\, \\text{MSE}(\\mathbf{w}, b) =\n", 336 | "\\begin{pmatrix}\n", 337 | " \\frac{\\partial}{\\partial w_0} \\text{MSE}(\\mathbf{w}, b) \\\\\n", 338 | " \\frac{\\partial}{\\partial w_1} \\text{MSE}(\\mathbf{w}, b) \\\\\n", 339 | " \\vdots \\\\\n", 340 | " \\frac{\\partial}{\\partial w_n} \\text{MSE}(\\mathbf{w}, b)\n", 341 | "\\end{pmatrix}\n", 342 | " = \\dfrac{2}{m} \\mathbf{X}^T \\cdot (\\hat{\\mathbf{y}} - \\mathbf{y})\n", 343 | "$\n", 344 | "\n", 345 | "And the partial derivative with regards to the bias $b$ is:\n", 346 | "\n", 347 | "$\n", 348 | "\\dfrac{\\partial}{\\partial b} \\text{MSE}(\\mathbf{w}, b) = \\dfrac{2}{m} \\sum\\limits_{i=1}^{m}(\\hat{y}^{(i)}-y^{(i)})\n", 349 | "$\n", 350 | "\n", 351 | "Add the operations `gradients_w` and `gradients_b` to your graph, using the equations above." 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "4.6) To perform a Gradient Descent step, we need to subtract the gradients (multiplied by the learning rate $\\eta$) from the weight vector and the bias:\n", 380 | "\n", 381 | "$\n", 382 | "\\mathbf{w} \\gets \\mathbf{w} - \\eta \\nabla_{\\mathbf{w}}\\, \\text{MSE}(\\mathbf{w}, b)\n", 383 | "$\n", 384 | "\n", 385 | "$\n", 386 | "\\mathbf{b} \\gets \\mathbf{b} - \\eta \\dfrac{\\partial}{\\partial b} \\text{MSE}(\\mathbf{w}, b)\n", 387 | "$\n", 388 | "\n", 389 | "Add two assignment operations, `tweak_w_ops` and `tweak_b_ops` that perform the assigments above, using a small learning rate $\\eta = 0.01$." 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "4.7) That's it! We're ready to train the model. Start a session, initialize the variables, then write a loop that will repeatedly evaluate the assignment operations (e.g., 2000 times). Every 100 iterations, evaluate the MSE and print it out. Within a few hundred iterations the MSE should drop below 1.0, and eventually reach about 0.18. Congratulations! You built and trained your first Machine Learning model using TensorFlow!" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "Try not to peek at the solution below before you have done the exercise! :)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "## Exercise 4 - Solution" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "4.1)" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "graph = tf.Graph()\n", 476 | "with graph.as_default():\n", 477 | " X = tf.constant(X_train, name=\"X\")\n", 478 | " y = tf.constant(y_train, name=\"y\")\n", 479 | "\n", 480 | " b = tf.Variable(0.0, name=\"b\")\n", 481 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 482 | " y_pred = tf.add(tf.matmul(X, w), b, name=\"y_pred\") # X @ w + b\n", 483 | " \n", 484 | " init = tf.global_variables_initializer()" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "4.2)" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "with tf.Session(graph=graph) as sess:\n", 501 | " init.run()\n", 502 | " print(y_pred.eval())" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "4.3)" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "with graph.as_default():\n", 519 | " error = y_pred - y\n", 520 | " square_error = tf.square(error)\n", 521 | " mse = tf.reduce_mean(square_error, name=\"mse\")" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": {}, 527 | "source": [ 528 | "4.4)" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": null, 534 | "metadata": {}, 535 | "outputs": [], 536 | "source": [ 537 | "with tf.Session(graph=graph) as sess:\n", 538 | " init.run()\n", 539 | " print(mse.eval())" 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "4.5)" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "with graph.as_default():\n", 556 | " m = len(X_train)\n", 557 | " gradients_w = 2/m * tf.matmul(tf.transpose(X), error)\n", 558 | " gradients_b = 2 * tf.reduce_mean(error)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "metadata": {}, 564 | "source": [ 565 | "4.6)" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": null, 571 | "metadata": {}, 572 | "outputs": [], 573 | "source": [ 574 | "learning_rate = 0.01\n", 575 | "\n", 576 | "with graph.as_default():\n", 577 | " tweak_w_op = tf.assign(w, w - learning_rate * gradients_w)\n", 578 | " tweak_b_op = tf.assign(b, b - learning_rate * gradients_b)\n", 579 | " training_op = tf.group(tweak_w_op, tweak_b_op)" 580 | ] 581 | }, 582 | { 583 | "cell_type": "markdown", 584 | "metadata": {}, 585 | "source": [ 586 | "4.7)" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "n_iterations = 2000\n", 596 | "\n", 597 | "with tf.Session(graph=graph) as sess:\n", 598 | " init.run()\n", 599 | " for iteration in range(n_iterations):\n", 600 | " if iteration % 100 == 0:\n", 601 | " print(\"Iteration {:5}, MSE: {:.4f}\".format(iteration, mse.eval()))\n", 602 | " training_op.run()\n", 603 | " w_val, b_val = sess.run([w, b])" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": {}, 610 | "outputs": [], 611 | "source": [ 612 | "def plot_life_satisfaction_with_linear_model(X_train, y_train, w, b):\n", 613 | " plot_life_satisfaction(X_train, y_train)\n", 614 | " plt.plot([0, 60000], [b, w[0][0] * (60000 / 10000) + b])\n", 615 | "\n", 616 | "plt.figure(figsize=(10, 5))\n", 617 | "plot_life_satisfaction_with_linear_model(X_train, y_train, w_val, b_val)\n", 618 | "plt.show()" 619 | ] 620 | } 621 | ], 622 | "metadata": { 623 | "kernelspec": { 624 | "display_name": "Python 3", 625 | "language": "python", 626 | "name": "python3" 627 | }, 628 | "language_info": { 629 | "codemirror_mode": { 630 | "name": "ipython", 631 | "version": 3 632 | }, 633 | "file_extension": ".py", 634 | "mimetype": "text/x-python", 635 | "name": "python", 636 | "nbconvert_exporter": "python", 637 | "pygments_lexer": "ipython3", 638 | "version": "3.6.3" 639 | } 640 | }, 641 | "nbformat": 4, 642 | "nbformat_minor": 2 643 | } 644 | -------------------------------------------------------------------------------- /05_autodiff_ex5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "*__From notebook 4 linear regression__*" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "import numpy as np\n", 57 | "\n", 58 | "data = np.loadtxt(\"data/life_satisfaction.csv\",\n", 59 | " dtype=np.float32,\n", 60 | " delimiter=\",\",\n", 61 | " skiprows=1,\n", 62 | " usecols=[1, 2])\n", 63 | "X_train = data[:, 0:1] / 10000 # feature scaling\n", 64 | "y_train = data[:, 1:2]\n", 65 | "\n", 66 | "learning_rate = 0.01" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "%matplotlib inline\n", 76 | "import matplotlib.pyplot as plt\n", 77 | "\n", 78 | "plt.rcParams['axes.labelsize'] = 14\n", 79 | "plt.rcParams['xtick.labelsize'] = 12\n", 80 | "plt.rcParams['ytick.labelsize'] = 12" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "def plot_life_satisfaction(X_train, y_train):\n", 90 | " plt.plot(X_train * 10000, y_train, \"bo\")\n", 91 | " plt.axis([0, 60000, 0, 10])\n", 92 | " plt.xlabel(\"GDP per capita ($)\")\n", 93 | " plt.ylabel(\"Life Satisfaction\")\n", 94 | " plt.grid()\n", 95 | "\n", 96 | "def plot_life_satisfaction_with_linear_model(X_train, y_train, w, b):\n", 97 | " plot_life_satisfaction(X_train, y_train)\n", 98 | " plt.plot([0, 60000], [b, w[0][0] * (60000 / 10000) + b])" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# Using autodiff Instead" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "graph = tf.Graph()\n", 115 | "with graph.as_default():\n", 116 | " X = tf.constant(X_train, dtype=tf.float32, name=\"X\")\n", 117 | " y = tf.constant(y_train, dtype=tf.float32, name=\"y\")\n", 118 | "\n", 119 | " b = tf.Variable(0.0, name=\"b\")\n", 120 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 121 | " y_pred = tf.add(tf.matmul(X, w), b, name=\"y_pred\") # X @ w + b\n", 122 | " \n", 123 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 124 | "\n", 125 | " gradients_w, gradients_b = tf.gradients(mse, [w, b]) # <= IT'S AUTODIFF MAGIC!\n", 126 | "\n", 127 | " tweak_w_op = tf.assign(w, w - learning_rate * gradients_w)\n", 128 | " tweak_b_op = tf.assign(b, b - learning_rate * gradients_b)\n", 129 | " training_op = tf.group(tweak_w_op, tweak_b_op)\n", 130 | "\n", 131 | " init = tf.global_variables_initializer()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "n_iterations = 2000\n", 141 | "\n", 142 | "with tf.Session(graph=graph) as sess:\n", 143 | " init.run()\n", 144 | " for iteration in range(n_iterations):\n", 145 | " if iteration % 100 == 0:\n", 146 | " print(\"Iteration {:5}, MSE: {:.4f}\".format(iteration, mse.eval()))\n", 147 | " training_op.run()\n", 148 | " w_val, b_val = sess.run([w, b])" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "plt.figure(figsize=(10, 5))\n", 158 | "plot_life_satisfaction_with_linear_model(X_train, y_train, w_val, b_val)\n", 159 | "plt.show()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Using Optimizers " 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "graph = tf.Graph()\n", 176 | "with graph.as_default():\n", 177 | " X = tf.constant(X_train, dtype=tf.float32, name=\"X\")\n", 178 | " y = tf.constant(y_train, dtype=tf.float32, name=\"y\")\n", 179 | "\n", 180 | " b = tf.Variable(0.0, name=\"b\")\n", 181 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 182 | " y_pred = tf.add(tf.matmul(X, w), b, name=\"y_pred\") # X @ w + b\n", 183 | " \n", 184 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 185 | "\n", 186 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 187 | " training_op = optimizer.minimize(mse) # <= MOAR AUTODIFF MAGIC!\n", 188 | "\n", 189 | " init = tf.global_variables_initializer()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "n_iterations = 2000\n", 199 | "\n", 200 | "with tf.Session(graph=graph) as sess:\n", 201 | " init.run()\n", 202 | " for iteration in range(n_iterations):\n", 203 | " if iteration % 100 == 0:\n", 204 | " print(\"Iteration {:5}, MSE: {:.4f}\".format(iteration, mse.eval()))\n", 205 | " training_op.run()\n", 206 | " w_val, b_val = sess.run([w, b])" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "plt.figure(figsize=(10, 5))\n", 216 | "plot_life_satisfaction_with_linear_model(X_train, y_train, w_val, b_val)\n", 217 | "plt.show()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "## Faster Optimizers" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "learning_rate = 0.01\n", 234 | "momentum = 0.8\n", 235 | "\n", 236 | "graph = tf.Graph()\n", 237 | "with graph.as_default():\n", 238 | " X = tf.constant(X_train, dtype=tf.float32, name=\"X\")\n", 239 | " y = tf.constant(y_train, dtype=tf.float32, name=\"y\")\n", 240 | "\n", 241 | " b = tf.Variable(0.0, name=\"b\")\n", 242 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 243 | " y_pred = tf.add(tf.matmul(X, w), b, name=\"y_pred\") # X @ w + b\n", 244 | " \n", 245 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 246 | "\n", 247 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 248 | " training_op = optimizer.minimize(mse)\n", 249 | "\n", 250 | " init = tf.global_variables_initializer()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "n_iterations = 500\n", 260 | "\n", 261 | "with tf.Session(graph=graph) as sess:\n", 262 | " init.run()\n", 263 | " for iteration in range(n_iterations):\n", 264 | " if iteration % 100 == 0:\n", 265 | " print(\"Iteration {:5}, MSE: {:.4f}\".format(iteration, mse.eval()))\n", 266 | " training_op.run()\n", 267 | " w_val, b_val = sess.run([w, b])" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "plt.figure(figsize=(10, 5))\n", 277 | "plot_life_satisfaction_with_linear_model(X_train, y_train, w_val, b_val)\n", 278 | "plt.show()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "How does the optimizer know which variables to tweak? Answer: the `TRAINABLE_VARIABLES` collection." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "coll = graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)\n", 295 | "[var.op.name for var in coll]" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "## Making Predictions Outside of TensorFlow" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "cyprus_gdp_per_capita = 22000\n", 312 | "cyprus_life_satisfaction = w_val[0][0] * cyprus_gdp_per_capita / 10000 + b_val\n", 313 | "cyprus_life_satisfaction" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "## Using placeholders " 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "graph = tf.Graph()\n", 330 | "with graph.as_default():\n", 331 | " X = tf.placeholder(tf.float32, shape=[None, 1], name=\"X\") # <= None allows for any\n", 332 | " y = tf.placeholder(tf.float32, shape=[None, 1], name=\"y\") # training batch size\n", 333 | "\n", 334 | " b = tf.Variable(0.0, name=\"b\")\n", 335 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 336 | " y_pred = tf.add(tf.matmul(X, w), b, name=\"y_pred\") # X @ w + b\n", 337 | " \n", 338 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 339 | "\n", 340 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 341 | " training_op = optimizer.minimize(mse)\n", 342 | "\n", 343 | " init = tf.global_variables_initializer()" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "n_iterations = 500\n", 353 | "\n", 354 | "X_test = np.array([[22000]], dtype=np.float32) / 10000\n", 355 | "\n", 356 | "with tf.Session(graph=graph) as sess:\n", 357 | " init.run()\n", 358 | " for iteration in range(n_iterations):\n", 359 | " feed_dict = {X: X_train, y: y_train}\n", 360 | " if iteration % 100 == 0:\n", 361 | " print(\"Iteration {:5}, MSE: {:.4f}\".format(\n", 362 | " iteration, \n", 363 | " mse.eval(feed_dict))) # <= FEED TRAINING DATA\n", 364 | " training_op.run(feed_dict) # <= FEED TRAINING DATA\n", 365 | " # make the prediction:\n", 366 | " y_pred_val = y_pred.eval(feed_dict={X: X_test}) # <= FEED TEST DATA" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "y_pred_val" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "## Exercise 5" 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": { 395 | "collapsed": true 396 | }, 397 | "source": [ 398 | "5.1) Create a simple graph that computes the function $f(x) = x^2 - 3x + 1$. Define $x$ as a placeholder for a simple scalar value of type float32 value (i.e., `shape=[], dtype=tf.float32`). Create a session and evaluate $f(5)$. You should find 11.0." 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "5.2) Add an operation that computes the derivative of $f(x)$ with regards to $x$, noted $f'(x)$. Create a session and evaluate $f'(5)$. You should find 7.0.\n", 427 | "\n", 428 | "Hint: use `tf.gradients()`." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [] 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "5.3) Using a `MomentumOptimizer`, find the value of $x$ that minimizes $f(x)$. You should find $\\hat{x}=1.5$.\n", 457 | "\n", 458 | "Hint: you need to change `x` into a `Variable`. Moreover, the `MomentumOptimizer` has its own variables that need to be initialized, so don't forget to create an `init` operation using a `tf.global_variables_initializer()`, and call it at the start of the session." 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": null, 471 | "metadata": {}, 472 | "outputs": [], 473 | "source": [] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "Try not to peek at the solution below before you have done the exercise! :)" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "markdown", 498 | "metadata": {}, 499 | "source": [ 500 | "## Exercise 5 - Solution" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": {}, 506 | "source": [ 507 | "5.1)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "graph = tf.Graph()\n", 517 | "with graph.as_default():\n", 518 | " x = tf.placeholder(tf.float32, shape=[], name=\"x\")\n", 519 | " f = tf.square(x) - 3 * x + 1" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "with tf.Session(graph=graph):\n", 529 | " print(f.eval(feed_dict={x: 5.0}))" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "5.2)" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "with graph.as_default():\n", 546 | " [fp] = tf.gradients(f, [x])" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "with tf.Session(graph=graph):\n", 556 | " print(fp.eval(feed_dict={x: 5.0}))" 557 | ] 558 | }, 559 | { 560 | "cell_type": "markdown", 561 | "metadata": {}, 562 | "source": [ 563 | "5.3)" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "metadata": {}, 570 | "outputs": [], 571 | "source": [ 572 | "learning_rate = 0.01\n", 573 | "momentum = 0.8\n", 574 | "\n", 575 | "graph = tf.Graph()\n", 576 | "with graph.as_default():\n", 577 | " x = tf.Variable(0.0, name=\"x\")\n", 578 | " f = tf.square(x) - 3 * x + 1 \n", 579 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 580 | " training_op = optimizer.minimize(f)\n", 581 | " init = tf.global_variables_initializer()" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "n_iterations = 70\n", 591 | "with tf.Session(graph=graph):\n", 592 | " init.run()\n", 593 | " for iteration in range(n_iterations):\n", 594 | " training_op.run()\n", 595 | " if iteration % 10 == 0:\n", 596 | " print(\"x={:.2f}, f(x)={:.2f}\".format(x.eval(), f.eval()))" 597 | ] 598 | }, 599 | { 600 | "cell_type": "markdown", 601 | "metadata": {}, 602 | "source": [ 603 | "Note that it's possible to replace the output value of any operation, not just placeholders. So, for example, even though `x` is now a `Variable`, you can use a `feed_dict` to use any value you want, for example to compute `f(5.0)`. **Important**: this does _not_ affect the variable!" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": {}, 610 | "outputs": [], 611 | "source": [ 612 | "with tf.Session(graph=graph):\n", 613 | " init.run()\n", 614 | " print(x.eval()) # x == 0.0\n", 615 | " print(f.eval()) # f(0) == 1.0\n", 616 | " print(f.eval(feed_dict={x: 5.0})) # use 5.0 instead of the value of x, to compute f(5)\n", 617 | " print(x.eval()) # x is still 0.0\n", 618 | " print(f.eval()) # f(0) is still 1.0" 619 | ] 620 | }, 621 | { 622 | "cell_type": "markdown", 623 | "metadata": {}, 624 | "source": [ 625 | "## Saving and Restoring a Model" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": null, 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [ 634 | "graph = tf.Graph()\n", 635 | "with graph.as_default():\n", 636 | " X = tf.placeholder(tf.float32, shape=[None, 1], name=\"X\")\n", 637 | " y = tf.placeholder(tf.float32, shape=[None, 1], name=\"y\")\n", 638 | "\n", 639 | " b = tf.Variable(0.0, name=\"b\")\n", 640 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 641 | " y_pred = tf.add(tf.matmul(X, w), b, name=\"y_pred\") # X @ w + b\n", 642 | " \n", 643 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 644 | "\n", 645 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 646 | " training_op = optimizer.minimize(mse)\n", 647 | "\n", 648 | " init = tf.global_variables_initializer()\n", 649 | " saver = tf.train.Saver() # <= At the very end of the construction phase" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": null, 655 | "metadata": {}, 656 | "outputs": [], 657 | "source": [ 658 | "n_iterations = 500\n", 659 | "\n", 660 | "with tf.Session(graph=graph) as sess:\n", 661 | " init.run()\n", 662 | " for iteration in range(n_iterations):\n", 663 | " if iteration % 100 == 0:\n", 664 | " print(\"Iteration {:5}, MSE: {:.4f}\".format(\n", 665 | " iteration, \n", 666 | " mse.eval(feed_dict={X: X_train, y: y_train})))\n", 667 | " training_op.run(feed_dict={X: X_train, y: y_train}) # <= FEED THE DICT\n", 668 | " saver.save(sess, \"./my_life_satisfaction_model\")" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "metadata": {}, 675 | "outputs": [], 676 | "source": [ 677 | "with tf.Session(graph=graph) as sess:\n", 678 | " saver.restore(sess, \"./my_life_satisfaction_model\")\n", 679 | " # make the prediction:\n", 680 | " y_pred_val = y_pred.eval(feed_dict={X: X_test})" 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": null, 686 | "metadata": {}, 687 | "outputs": [], 688 | "source": [ 689 | "y_pred_val" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "## Restoring a Graph" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": null, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "model_path = \"./my_life_satisfaction_model\"\n", 706 | "graph = tf.Graph()\n", 707 | "with tf.Session(graph=graph) as sess:\n", 708 | " # restore the graph\n", 709 | " saver = tf.train.import_meta_graph(model_path + \".meta\")\n", 710 | " saver.restore(sess, model_path)\n", 711 | "\n", 712 | " # get references to the tensors we need\n", 713 | " X = graph.get_tensor_by_name(\"X:0\")\n", 714 | " y_pred = graph.get_tensor_by_name(\"y_pred:0\")\n", 715 | "\n", 716 | " # make the prediction:\n", 717 | " y_pred_val = y_pred.eval(feed_dict={X: X_test})" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": null, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [ 726 | "y_pred_val" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": null, 732 | "metadata": {}, 733 | "outputs": [], 734 | "source": [] 735 | } 736 | ], 737 | "metadata": { 738 | "kernelspec": { 739 | "display_name": "Python 3", 740 | "language": "python", 741 | "name": "python3" 742 | }, 743 | "language_info": { 744 | "codemirror_mode": { 745 | "name": "ipython", 746 | "version": 3 747 | }, 748 | "file_extension": ".py", 749 | "mimetype": "text/x-python", 750 | "name": "python", 751 | "nbconvert_exporter": "python", 752 | "pygments_lexer": "ipython3", 753 | "version": "3.6.3" 754 | } 755 | }, 756 | "nbformat": 4, 757 | "nbformat_minor": 2 758 | } 759 | -------------------------------------------------------------------------------- /06_readers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "*__From previous notebooks__*" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "learning_rate = 0.01\n", 57 | "momentum = 0.8" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "# Using Readers" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "filenames = [\"data/life_satisfaction.csv\"]\n", 74 | "n_epochs = 500\n", 75 | "\n", 76 | "graph = tf.Graph()\n", 77 | "with graph.as_default():\n", 78 | " reader = tf.TextLineReader(skip_header_lines=1)\n", 79 | "\n", 80 | " filename_queue = tf.train.string_input_producer(filenames, num_epochs=n_epochs)\n", 81 | " record_id, record = reader.read(filename_queue)\n", 82 | "\n", 83 | " record_defaults = [[''], [0.0], [0.0]]\n", 84 | " country, gdp_per_capita, life_satisfaction = tf.decode_csv(record, record_defaults=record_defaults)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "batch_size = 5\n", 94 | "with graph.as_default():\n", 95 | " X_batch, y_batch = tf.train.batch([gdp_per_capita, life_satisfaction], batch_size=batch_size)\n", 96 | " X_batch_reshaped = tf.reshape(X_batch, [-1, 1])\n", 97 | " y_batch_reshaped = tf.reshape(y_batch, [-1, 1])" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "with graph.as_default():\n", 107 | " X = tf.placeholder_with_default(X_batch_reshaped, shape=[None, 1], name=\"X\")\n", 108 | " y = tf.placeholder_with_default(y_batch_reshaped, shape=[None, 1], name=\"y\")\n", 109 | "\n", 110 | " b = tf.Variable(0.0, name=\"b\")\n", 111 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 112 | " y_pred = tf.add(tf.matmul(X / 10000, w), b, name=\"y_pred\") # X @ w + b\n", 113 | " \n", 114 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 115 | "\n", 116 | " global_step = tf.Variable(0, trainable=False, name='global_step')\n", 117 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 118 | " training_op = optimizer.minimize(mse, global_step=global_step)\n", 119 | "\n", 120 | " init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())\n", 121 | " saver = tf.train.Saver()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "with tf.Session(graph=graph) as sess:\n", 131 | " init.run()\n", 132 | " coord = tf.train.Coordinator()\n", 133 | " threads = tf.train.start_queue_runners(coord=coord)\n", 134 | " try:\n", 135 | " while not coord.should_stop():\n", 136 | " _, mse_val, global_step_val = sess.run([training_op, mse, global_step])\n", 137 | " if global_step_val % 100 == 0:\n", 138 | " print(global_step_val, mse_val)\n", 139 | " except tf.errors.OutOfRangeError:\n", 140 | " print(\"End of training\")\n", 141 | " coord.request_stop()\n", 142 | " coord.join(threads)\n", 143 | " saver.save(sess, \"./my_life_satisfaction_model\")" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 3", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 3 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython3", 163 | "version": "3.6.3" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 2 168 | } 169 | -------------------------------------------------------------------------------- /07_tensorboard_ex6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "*__From previous notebooks__*" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "learning_rate = 0.01\n", 57 | "momentum = 0.8" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "# TensorBoard" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Exercise 6" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "In this exercise, you will learn to use TensorBoard. It is a great visualization tool that comes with TensorFlow. It works by parsing special TensorFlow logs, called _summaries_, and displaying them nicely." 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "6.1) Starting the TensorBoard server. Open a Terminal and type the following commands." 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Move to the `tensorflow-safari-course` directory:" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "`~$` **`cd tensorflow-safari-course`**" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Create the `tf_logs` directory that will hold the TensorFlow data that we will want TensorBoard to display:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "`~/tensorflow-safari-course$` **`mkdir tf_logs`**" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "Activate the virtual environment:" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "`~/tensorflow-safari-course$` **`source env/bin/activate`**" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Start the TensorBoard server:" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "`(env) ~/tensorflow-safari-course$` **`tensorboard --logdir=tf_logs`**\n", 149 | "\n", 150 | "`Starting TensorBoard b'41' on port 6006\n", 151 | "(You can navigate to` http://127.0.1.1:6006 `)`" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Now visit the URL given by TensorBoard. You should see the TensorBoard interface." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "6.2) Now create a `tf.summary.FileWriter`, with the parameters: `logdir=\"tf_logs/run_number_1/\"` and `graph=graph` where `graph` is the one we built just before this exercise. This will automatically:\n", 187 | "* create the `run_number_1` directory inside the `tf_logs` directory,\n", 188 | "* create an `events.out.tfevents.*` file in that subdirectory that will contain the data that TensorBoard will display,\n", 189 | "* write the graph's definition to this file.\n", 190 | "\n", 191 | "Next, try refreshing the TensorBoard page in your browser (you may need to wait a couple minutes for it to detect the change, or else you can just restart the TensorBoard server). Visit the Graph tab: you should be able to visualize the graph." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "6.3) As you can see, the graph looks really messy in TensorBoard. We need to organize it a bit. For this, name scopes come in handy. An operation can be placed inside a name scope in one of two ways:\n", 220 | "\n", 221 | "* Add the scope as a prefix to the operation's name, for example:\n", 222 | "\n", 223 | "```python\n", 224 | "a = tf.constant(0.0, name=\"my_name_scope/a\")\n", 225 | "```\n", 226 | "\n", 227 | "* Or (generally clearer) use a `tf.name_scope()` block, for example:\n", 228 | "\n", 229 | "```python\n", 230 | "with tf.name_scope(\"my_name_scope\"):\n", 231 | " a = tf.constant(0.0, name=\"a\")\n", 232 | "```\n", 233 | "\n", 234 | "Add name scopes to the following graph, then write it to TensorBoard (using a different run number for the log directory name) and see how much better it looks, and how much easier it is to explore." 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "filenames = [\"data/life_satisfaction.csv\"]\n", 244 | "n_epochs = 500\n", 245 | "batch_size = 5\n", 246 | "\n", 247 | "graph = tf.Graph()\n", 248 | "with graph.as_default():\n", 249 | " reader = tf.TextLineReader(skip_header_lines=1)\n", 250 | "\n", 251 | " filename_queue = tf.train.string_input_producer(filenames, num_epochs=n_epochs)\n", 252 | " record_id, record = reader.read(filename_queue)\n", 253 | "\n", 254 | " record_defaults = [[''], [0.0], [0.0]]\n", 255 | " country, gdp_per_capita, life_satisfaction = tf.decode_csv(record, record_defaults=record_defaults)\n", 256 | "\n", 257 | " X_batch, y_batch = tf.train.batch([gdp_per_capita, life_satisfaction], batch_size=batch_size)\n", 258 | " X_batch_reshaped = tf.reshape(X_batch, [-1, 1])\n", 259 | " y_batch_reshaped = tf.reshape(y_batch, [-1, 1])\n", 260 | "\n", 261 | " X = tf.placeholder_with_default(X_batch_reshaped, shape=[None, 1], name=\"X\")\n", 262 | " y = tf.placeholder_with_default(y_batch_reshaped, shape=[None, 1], name=\"y\")\n", 263 | "\n", 264 | " b = tf.Variable(0.0, name=\"b\")\n", 265 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 266 | " y_pred = tf.add(tf.matmul(X / 10000, w), b, name=\"y_pred\") # X @ w + b\n", 267 | " \n", 268 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 269 | " global_step = tf.Variable(0, trainable=False, name='global_step')\n", 270 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 271 | " training_op = optimizer.minimize(mse, global_step=global_step)\n", 272 | " \n", 273 | " init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())\n", 274 | " saver = tf.train.Saver()" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "6.4) Print out the name of a few operations. Notice how the names now have the scope as a prefix." 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "6.5) TensorBoard is capable of displaying data from multiple TensorFlow runs (for example multiple training sessions). For this, we need to place the data from each run in a different subdirectory of the `tf_logs` directory. We can name these subdirectories however we want, but a simple option is to name them using a timestamp. The following `logdir()` function returns the path of a subdirectory whose name is based on the current date and time:" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "from datetime import datetime\n", 333 | "\n", 334 | "def logdir():\n", 335 | " root_logdir = \"tf_logs\"\n", 336 | " now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n", 337 | " return \"{}/run_{}/\".format(root_logdir, now)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "logdir()" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "Create a few different graphs and instantiate a different FileWriter for each one, using a different log directory every time (with the help of the `logdir()` function). Refresh TensorBoard and notice that you can browse any graph you want by selecting the appropriate run." 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "6.6) Now we will use TensorBoard to visualize the learning curve, that is the evolution of the cost function during training.\n", 382 | "\n", 383 | "* First add a scalar summary operation in the graph, using `tf.summary.scalar(\"MSE\", mse)`.\n", 384 | "* Next, update the training code to evaluate this scalar summary and write the result to the events file using the `FileWriter`'s `add_summary()` method (also specifying the training step). For performance reasons, you probably want to do this only every 10 training iterations or so.\n", 385 | "* Next, train the model.\n", 386 | "* Refresh TensorBoard, and visit the Scalars tab. Select the appropriate run and visualize the learning curve. Try zooming in and out, and play around with the options, in particular the smoothing option." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "Try not to peek at the solution below before you have done the exercise! :)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "## Exercise 6 - Solution\n", 429 | "6.1)\n", 430 | "\n", 431 | "N/A" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "6.2)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "summary_writer = tf.summary.FileWriter(\"tf_logs/run_number_1_solution/\", graph=graph)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": {}, 453 | "source": [ 454 | "6.3)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "filenames = [\"data/life_satisfaction.csv\"]\n", 464 | "n_epochs = 500\n", 465 | "batch_size = 5\n", 466 | "\n", 467 | "graph = tf.Graph()\n", 468 | "with graph.as_default():\n", 469 | " with tf.name_scope(\"reader\"):\n", 470 | " reader = tf.TextLineReader(skip_header_lines=1)\n", 471 | "\n", 472 | " filename_queue = tf.train.string_input_producer(filenames, num_epochs=n_epochs)\n", 473 | " record_id, record = reader.read(filename_queue)\n", 474 | "\n", 475 | " record_defaults = [[''], [0.0], [0.0]]\n", 476 | " country, gdp_per_capita, life_satisfaction = tf.decode_csv(record, record_defaults=record_defaults)\n", 477 | "\n", 478 | " X_batch, y_batch = tf.train.batch([gdp_per_capita, life_satisfaction], batch_size=batch_size)\n", 479 | " X_batch_reshaped = tf.reshape(X_batch, [-1, 1])\n", 480 | " y_batch_reshaped = tf.reshape(y_batch, [-1, 1])\n", 481 | "\n", 482 | " with tf.name_scope(\"linear_model\"):\n", 483 | " X = tf.placeholder_with_default(X_batch_reshaped, shape=[None, 1], name=\"X\")\n", 484 | " y = tf.placeholder_with_default(y_batch_reshaped, shape=[None, 1], name=\"y\")\n", 485 | "\n", 486 | " b = tf.Variable(0.0, name=\"b\")\n", 487 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 488 | " y_pred = tf.add(tf.matmul(X / 10000, w), b, name=\"y_pred\") # X @ w + b\n", 489 | " \n", 490 | " with tf.name_scope(\"train\"):\n", 491 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 492 | " global_step = tf.Variable(0, trainable=False, name='global_step')\n", 493 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 494 | " training_op = optimizer.minimize(mse, global_step=global_step)\n", 495 | " \n", 496 | " init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())\n", 497 | " saver = tf.train.Saver()" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": {}, 504 | "outputs": [], 505 | "source": [ 506 | "summary_writer = tf.summary.FileWriter(\"tf_logs/run_number_2_solution/\", graph=graph)" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "6.4)" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": {}, 520 | "outputs": [], 521 | "source": [ 522 | "country.name, gdp_per_capita.name, X_batch.name, y_batch.name" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": null, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "X.name, y.name, b.name, w.name, y_pred.name" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": {}, 538 | "outputs": [], 539 | "source": [ 540 | "mse.name, global_step.name, training_op.name" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | "6.5)" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [ 556 | "graph1 = tf.Graph()\n", 557 | "with graph1.as_default():\n", 558 | " a = tf.constant(1.0)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [ 567 | "summary_writer = tf.summary.FileWriter(logdir(), graph=graph)" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "graph2 = tf.Graph()\n", 577 | "with graph2.as_default():\n", 578 | " a = tf.constant(1.0, name=\"a\")\n", 579 | " b = tf.Variable(2.0, name=\"b\")\n", 580 | " c = a * b" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "If we run `logdir()` twice within the same second, we will get the same directory name twice. To avoid this, let's wait a bit over 1 second here. In real life, this is quite unlikely to happen since training a model typically takes much longer than 1 second." 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "import time\n", 597 | "time.sleep(1.1)" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": [ 606 | "summary_writer = tf.summary.FileWriter(logdir(), graph=graph)" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "time.sleep(1.1)" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "6.6)" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [ 631 | "filenames = [\"data/life_satisfaction.csv\"]\n", 632 | "n_epochs = 500\n", 633 | "batch_size = 5\n", 634 | "\n", 635 | "graph = tf.Graph()\n", 636 | "with graph.as_default():\n", 637 | " with tf.name_scope(\"reader\"):\n", 638 | " reader = tf.TextLineReader(skip_header_lines=1)\n", 639 | "\n", 640 | " filename_queue = tf.train.string_input_producer(filenames, num_epochs=n_epochs)\n", 641 | " record_id, record = reader.read(filename_queue)\n", 642 | "\n", 643 | " record_defaults = [[''], [0.0], [0.0]]\n", 644 | " country, gdp_per_capita, life_satisfaction = tf.decode_csv(record, record_defaults=record_defaults)\n", 645 | "\n", 646 | " X_batch, y_batch = tf.train.batch([gdp_per_capita, life_satisfaction], batch_size=batch_size)\n", 647 | " X_batch_reshaped = tf.reshape(X_batch, [-1, 1])\n", 648 | " y_batch_reshaped = tf.reshape(y_batch, [-1, 1])\n", 649 | "\n", 650 | " with tf.name_scope(\"linear_model\"):\n", 651 | " X = tf.placeholder_with_default(X_batch_reshaped, shape=[None, 1], name=\"X\")\n", 652 | " y = tf.placeholder_with_default(y_batch_reshaped, shape=[None, 1], name=\"y\")\n", 653 | "\n", 654 | " b = tf.Variable(0.0, name=\"b\")\n", 655 | " w = tf.Variable(tf.zeros([1, 1]), name=\"w\")\n", 656 | " y_pred = tf.add(tf.matmul(X / 10000, w), b, name=\"y_pred\")\n", 657 | " \n", 658 | " with tf.name_scope(\"train\"):\n", 659 | " mse = tf.reduce_mean(tf.square(y_pred - y), name=\"mse\")\n", 660 | " mse_summary = tf.summary.scalar('MSE', mse) # <= ADDED\n", 661 | " global_step = tf.Variable(0, trainable=False, name='global_step')\n", 662 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 663 | " training_op = optimizer.minimize(mse, global_step=global_step)\n", 664 | " \n", 665 | " init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())\n", 666 | " saver = tf.train.Saver()" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [ 675 | "summary_writer = tf.summary.FileWriter(logdir(), graph)" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": {}, 682 | "outputs": [], 683 | "source": [ 684 | "with tf.Session(graph=graph) as sess:\n", 685 | " init.run()\n", 686 | " coord = tf.train.Coordinator()\n", 687 | " threads = tf.train.start_queue_runners(coord=coord)\n", 688 | " try:\n", 689 | " while not coord.should_stop():\n", 690 | " _, mse_summary_val, global_step_val = sess.run([training_op, mse_summary, global_step])\n", 691 | " if global_step_val % 10 == 0:\n", 692 | " summary_writer.add_summary(mse_summary_val, global_step_val)\n", 693 | " except tf.errors.OutOfRangeError:\n", 694 | " print(\"End of training\")\n", 695 | " coord.request_stop()\n", 696 | " coord.join(threads)\n", 697 | " saver.save(sess, \"./my_life_satisfaction_model\")" 698 | ] 699 | } 700 | ], 701 | "metadata": { 702 | "kernelspec": { 703 | "display_name": "Python 3", 704 | "language": "python", 705 | "name": "python3" 706 | }, 707 | "language_info": { 708 | "codemirror_mode": { 709 | "name": "ipython", 710 | "version": 3 711 | }, 712 | "file_extension": ".py", 713 | "mimetype": "text/x-python", 714 | "name": "python", 715 | "nbconvert_exporter": "python", 716 | "pygments_lexer": "ipython3", 717 | "version": "3.6.3" 718 | } 719 | }, 720 | "nbformat": 4, 721 | "nbformat_minor": 2 722 | } 723 | -------------------------------------------------------------------------------- /08_artifical_neural_networks_ex7ex8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "import numpy as np\n", 50 | "%matplotlib inline\n", 51 | "import matplotlib.pyplot as plt" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "source": [ 60 | "# Artificial Neural Networks" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Exercise 7" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "Visit the [TensorFlow Playground](http://playground.tensorflow.org).\n", 82 | "* Try training the default neural network by clicking the \"Run\" button (top left). Notice how it quickly finds a good solution for the classification task. Notice that the neurons in the first hidden layer have learned simple patterns, while the neurons in the second hidden layer have learned to combine the simple patterns of the first hidden layer into more complex patterns). In general, the more layers, the more complex the patterns can be.\n", 83 | "* Try replacing the Tanh activation function with the ReLU activation function, and train the network again. Notice that it finds a solution even faster, but this time the boundaries are linear. This is due to the shape of the ReLU function.\n", 84 | "* Modify the network architecture to have just one hidden layer with three neurons. Train it multiple times (to reset the network weights, just add and remove a neuron). Notice that the training time varies a lot, and sometimes it even gets stuck in a local minimum.\n", 85 | "* Now remove one neuron to keep just 2. Notice that the neural network is now incapable of finding a good solution, even if you try multiple times. The model has too few parameters and it systematically underfits the training set.\n", 86 | "* Next, set the number of neurons to 8 and train the network several times. Notice that it is now consistently fast and never gets stuck. This highlights an important finding in neural network theory: large neural networks almost never get stuck in local minima, and even when they do these local optima are almost as good as the global optimum. However, they can still get stuck on long plateaus for a long time.\n", 87 | "* Now change the dataset to be the spiral (bottom right dataset under \"DATA\"). Change the network architecture to have 4 hidden layers with 8 neurons each. Notice that training takes much longer, and often gets stuck on plateaus for long periods of time. Also notice that the neurons in the highest layers (i.e. on the right) tend to evolve faster than the neurons in the lowest layers (i.e. on the left). This problem, called the \"vanishing gradients\" problem, can be alleviated using better weight initialization and other techniques, better optimizers (such as AdaGrad or Adam), or using Batch Normalization.\n", 88 | "* Go ahead and play with the other parameters to get a feel of what they do." 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Load the MNIST dataset" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "from tensorflow.examples.tutorials.mnist import input_data\n", 105 | "mnist = input_data.read_data_sets(\"tmp/data/\")" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "batch_size = 3\n", 115 | "X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 116 | "X_batch.shape" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "for image_data in X_batch:\n", 126 | " plt.imshow(image_data.reshape([28, 28]), cmap=\"binary\", interpolation=\"nearest\")\n", 127 | " plt.show()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "y_batch" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Exercise 8" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "8.1) Take a close look at the following neural network model and make sure you understand every line. Next, add an extra hidden layer composed of 100 neurons." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "n_inputs = 28 * 28\n", 167 | "n_hidden1 = 100\n", 168 | "n_outputs = 10\n", 169 | "\n", 170 | "graph = tf.Graph()\n", 171 | "with graph.as_default():\n", 172 | " with tf.name_scope(\"inputs\"):\n", 173 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 174 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 175 | "\n", 176 | " with tf.name_scope(\"hidden1\"):\n", 177 | " b1 = tf.Variable(tf.zeros([n_hidden1]), name=\"b1\")\n", 178 | " W1 = tf.Variable(tf.random_uniform([n_inputs, n_hidden1], -1.0, 1.0), name=\"W1\")\n", 179 | " hidden1 = tf.nn.relu(tf.matmul(X, W1) + b1)\n", 180 | " \n", 181 | " with tf.name_scope(\"output\"):\n", 182 | " b2 = tf.Variable(tf.zeros([n_outputs]), name=\"b2\")\n", 183 | " W2 = tf.Variable(tf.random_uniform([n_hidden1, n_outputs], -1.0, 1.0), name=\"W2\")\n", 184 | " logits = tf.matmul(hidden1, W2) + b2\n", 185 | " Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n", 186 | " \n", 187 | " with tf.name_scope(\"train\"):\n", 188 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 189 | " loss = tf.reduce_mean(xentropy)\n", 190 | " optimizer = tf.train.AdamOptimizer()\n", 191 | " training_op = optimizer.minimize(loss)\n", 192 | "\n", 193 | " with tf.name_scope(\"eval\"):\n", 194 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 195 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 196 | "\n", 197 | " with tf.name_scope(\"init_and_save\"):\n", 198 | " init = tf.global_variables_initializer()\n", 199 | " saver = tf.train.Saver()" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "8.2) Write the training code, and train the model for about 20 epochs (i.e. enough training iterations to go through the training set 20 times). Evaluate it on the test set: you should get over 95% accuracy.\n", 207 | "\n", 208 | "Hint: you should open a session, initialize the variables, then write the main training loop. Inside it you should use `minst.train.next_batch(batch_size)` to get the next training batch (say with `batch_size=50`), then run the `training_op`, feeding it the training batch (don't forget to feed both `X` and `y`). Every few hundred iterations, evaluate the model's accuracy on the validation set (`mnist.validation.images` and `mnist.validation.labels`), and print the result. At the end of training, save the model." 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "8.3) Bonus question: load the model you just trained and saved, and use it to make predictions on the first 200 images of the test set (`mnist.test`). Display the images that the model got wrong, and show the class probabilities that it guessed. Notice that some of the images it gets wrong are pretty poorly written, but some are obvious to us humans. We will see that Convolutional Neural Networks can do a much better job and reach human performance." 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "Try not to peek at the solution below before you have done the exercise! :)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "## Exercise 8 - Solution" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "8.1)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "n_inputs = 28 * 28\n", 295 | "n_hidden1 = 100\n", 296 | "n_hidden2 = 100\n", 297 | "n_outputs = 10\n", 298 | "\n", 299 | "graph = tf.Graph()\n", 300 | "with graph.as_default():\n", 301 | " with tf.name_scope(\"inputs\"):\n", 302 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 303 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 304 | "\n", 305 | " with tf.name_scope(\"hidden1\"):\n", 306 | " b1 = tf.Variable(tf.zeros([n_hidden1]), name=\"b1\")\n", 307 | " W1 = tf.Variable(tf.random_uniform([n_inputs, n_hidden1], -1.0, 1.0), name=\"W1\")\n", 308 | " hidden1 = tf.nn.relu(tf.matmul(X, W1) + b1)\n", 309 | "\n", 310 | " with tf.name_scope(\"hidden2\"):\n", 311 | " b2 = tf.Variable(tf.zeros([n_hidden2]), name=\"b2\")\n", 312 | " W2 = tf.Variable(tf.random_uniform([n_hidden1, n_hidden2], -1.0, 1.0), name=\"W2\")\n", 313 | " hidden2 = tf.nn.relu(tf.matmul(hidden1, W2) + b2)\n", 314 | "\n", 315 | " with tf.name_scope(\"output\"):\n", 316 | " b3 = tf.Variable(tf.zeros([n_outputs]), name=\"b3\")\n", 317 | " W3 = tf.Variable(tf.random_uniform([n_hidden2, n_outputs], -1.0, 1.0), name=\"W3\")\n", 318 | " logits = tf.matmul(hidden2, W3) + b3\n", 319 | " Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n", 320 | " \n", 321 | " with tf.name_scope(\"train\"):\n", 322 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 323 | " loss = tf.reduce_mean(xentropy)\n", 324 | " optimizer = tf.train.AdamOptimizer()\n", 325 | " training_op = optimizer.minimize(loss)\n", 326 | "\n", 327 | " with tf.name_scope(\"eval\"):\n", 328 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 329 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 330 | "\n", 331 | " with tf.name_scope(\"init_and_save\"):\n", 332 | " init = tf.global_variables_initializer()\n", 333 | " saver = tf.train.Saver()" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "8.2)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "n_epochs = 20\n", 350 | "batch_size = 50\n", 351 | "\n", 352 | "with tf.Session(graph=graph) as sess:\n", 353 | " init.run()\n", 354 | " for epoch in range(n_epochs):\n", 355 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 356 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 357 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", 358 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 359 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 360 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 361 | "\n", 362 | " save_path = saver.save(sess, \"./my_mnist_model\")" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "8.3)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "graph = tf.Graph()\n", 379 | "with tf.Session(graph=graph) as sess:\n", 380 | " saver = tf.train.import_meta_graph(\"./my_mnist_model.meta\")\n", 381 | " saver.restore(sess, \"./my_mnist_model\")\n", 382 | " X = graph.get_tensor_by_name(\"inputs/X:0\")\n", 383 | " Y_proba = graph.get_tensor_by_name(\"output/Y_proba:0\")\n", 384 | " Y_proba_val = Y_proba.eval(feed_dict={X: mnist.test.images})" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "for example_index in range(200):\n", 394 | " y_proba = Y_proba_val[example_index]\n", 395 | " y_pred = np.argmax(y_proba)\n", 396 | " y_label = mnist.test.labels[example_index]\n", 397 | " if y_pred != y_label:\n", 398 | " print(\"Actual class:{}, Predicted class: {}, Main probabilities: {}\".format(\n", 399 | " y_label,\n", 400 | " y_pred,\n", 401 | " \", \".join([\"{}:{:.1f}%\".format(n, 100*p)\n", 402 | " for n, p in enumerate(y_proba) if p > 0.01])))\n", 403 | " plt.imshow(mnist.test.images[example_index].reshape([28, 28]), cmap=\"binary\", interpolation=\"nearest\")\n", 404 | " plt.show()\n" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [] 413 | } 414 | ], 415 | "metadata": { 416 | "kernelspec": { 417 | "display_name": "Python 3", 418 | "language": "python", 419 | "name": "python3" 420 | }, 421 | "language_info": { 422 | "codemirror_mode": { 423 | "name": "ipython", 424 | "version": 3 425 | }, 426 | "file_extension": ".py", 427 | "mimetype": "text/x-python", 428 | "name": "python", 429 | "nbconvert_exporter": "python", 430 | "pygments_lexer": "ipython3", 431 | "version": "3.6.3" 432 | } 433 | }, 434 | "nbformat": 4, 435 | "nbformat_minor": 2 436 | } 437 | -------------------------------------------------------------------------------- /09_organizing_code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from tensorflow.examples.tutorials.mnist import input_data\n", 50 | "mnist = input_data.read_data_sets(\"tmp/data/\")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "source": [ 59 | "# Organizing Your Code" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "def neural_net_layer(inputs, n_neurons, activation=None, seed=None):\n", 69 | " n_inputs = int(inputs.get_shape()[1])\n", 70 | " b = tf.Variable(tf.zeros([n_neurons]), name=\"b\")\n", 71 | " W = tf.Variable(tf.random_uniform([n_inputs, n_neurons], -1.0, 1.0, seed=seed), name=\"W\")\n", 72 | " logits = tf.matmul(inputs, W) + b\n", 73 | " if activation:\n", 74 | " return activation(logits)\n", 75 | " else:\n", 76 | " return logits" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "Let's simplify our code by using `neural_net_layer()`:" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "n_inputs = 28 * 28\n", 93 | "n_hidden1 = 100\n", 94 | "n_outputs = 10\n", 95 | "\n", 96 | "graph = tf.Graph()\n", 97 | "with graph.as_default():\n", 98 | " with tf.name_scope(\"inputs\"):\n", 99 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 100 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 101 | "\n", 102 | "#########################################################################\n", 103 | "# This section is simplified (the rest is unchanged)\n", 104 | "#\n", 105 | " with tf.name_scope(\"hidden1\"):\n", 106 | " hidden1 = neural_net_layer(X, n_hidden1, activation=tf.nn.relu) # <= CHANGED\n", 107 | "\n", 108 | " with tf.name_scope(\"output\"):\n", 109 | " logits = neural_net_layer(hidden1, n_outputs) # <= CHANGED\n", 110 | " Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n", 111 | "#\n", 112 | "#\n", 113 | "#########################################################################\n", 114 | " \n", 115 | " with tf.name_scope(\"train\"):\n", 116 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 117 | " loss = tf.reduce_mean(xentropy)\n", 118 | " optimizer = tf.train.AdamOptimizer()\n", 119 | " training_op = optimizer.minimize(loss)\n", 120 | "\n", 121 | " with tf.name_scope(\"eval\"):\n", 122 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 123 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 124 | "\n", 125 | " with tf.name_scope(\"init_and_save\"):\n", 126 | " init = tf.global_variables_initializer()\n", 127 | " saver = tf.train.Saver()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "[var.op.name for var in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "Let's check that training still works:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "n_epochs = 20\n", 153 | "batch_size = 50\n", 154 | "\n", 155 | "with tf.Session(graph=graph) as sess:\n", 156 | " init.run()\n", 157 | " for epoch in range(n_epochs):\n", 158 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 159 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 160 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", 161 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 162 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 163 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 164 | "\n", 165 | " save_path = saver.save(sess, \"./my_mnist_model\")" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "Now let's use `tf.layers.dense()` instead:" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "n_inputs = 28 * 28\n", 182 | "n_hidden1 = 100\n", 183 | "n_outputs = 10\n", 184 | "\n", 185 | "graph = tf.Graph()\n", 186 | "with graph.as_default():\n", 187 | " with tf.name_scope(\"inputs\"):\n", 188 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 189 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 190 | "\n", 191 | " with tf.name_scope(\"hidden1\"):\n", 192 | " hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name=\"hidden1\") # <= CHANGED\n", 193 | "\n", 194 | " with tf.name_scope(\"output\"):\n", 195 | " logits = tf.layers.dense(hidden1, n_outputs, name=\"output\") # <= CHANGED\n", 196 | " Y_proba = tf.nn.softmax(logits)\n", 197 | " \n", 198 | " with tf.name_scope(\"train\"):\n", 199 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 200 | " loss = tf.reduce_mean(xentropy)\n", 201 | " optimizer = tf.train.AdamOptimizer()\n", 202 | " training_op = optimizer.minimize(loss)\n", 203 | "\n", 204 | " with tf.name_scope(\"eval\"):\n", 205 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 206 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 207 | "\n", 208 | " with tf.name_scope(\"init_and_save\"):\n", 209 | " init = tf.global_variables_initializer()\n", 210 | " saver = tf.train.Saver()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "[var.op.name for var in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "Let's check that training still works:" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "n_epochs = 20\n", 236 | "batch_size = 50\n", 237 | "\n", 238 | "with tf.Session(graph=graph) as sess:\n", 239 | " init.run()\n", 240 | " for epoch in range(n_epochs):\n", 241 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 242 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 243 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", 244 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 245 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 246 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 247 | "\n", 248 | " save_path = saver.save(sess, \"./my_mnist_model\")" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Now suppose you want two more hidden layers with shared weights & biases. Let's use variable scopes for this:" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "n_inputs = 28 * 28\n", 265 | "n_hidden = 100\n", 266 | "n_outputs = 10\n", 267 | "\n", 268 | "graph = tf.Graph()\n", 269 | "with graph.as_default():\n", 270 | " with tf.name_scope(\"inputs\"):\n", 271 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 272 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 273 | "\n", 274 | " hidden1 = tf.layers.dense(X, n_hidden, activation=tf.nn.relu, name=\"hidden1\") # <= CHANGED\n", 275 | " hidden2 = tf.layers.dense(hidden1, n_hidden, activation=tf.nn.relu, name=\"hidden23\") # <= CHANGED\n", 276 | " hidden3 = tf.layers.dense(hidden2, n_hidden, activation=tf.nn.relu, name=\"hidden23\", reuse=True) # <= CHANGED\n", 277 | "\n", 278 | " with tf.name_scope(\"output\"):\n", 279 | " logits = tf.layers.dense(hidden3, n_outputs, name=\"output\")\n", 280 | " Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n", 281 | " \n", 282 | " with tf.name_scope(\"train\"):\n", 283 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 284 | " loss = tf.reduce_mean(xentropy)\n", 285 | " optimizer = tf.train.AdamOptimizer()\n", 286 | " training_op = optimizer.minimize(loss)\n", 287 | "\n", 288 | " with tf.name_scope(\"eval\"):\n", 289 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 290 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 291 | "\n", 292 | " with tf.name_scope(\"init_and_save\"):\n", 293 | " init = tf.global_variables_initializer()\n", 294 | " saver = tf.train.Saver()" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "[var.op.name for var in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "Check that training works well:" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "n_epochs = 20\n", 320 | "batch_size = 50\n", 321 | "\n", 322 | "with tf.Session(graph=graph) as sess:\n", 323 | " init.run()\n", 324 | " for epoch in range(n_epochs):\n", 325 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 326 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 327 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", 328 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 329 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 330 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 331 | "\n", 332 | " save_path = saver.save(sess, \"./my_mnist_model\")" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "How would we implement variable sharing in `neural_net_layer()`?" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "def neural_net_layer(inputs, n_neurons, activation=None, name=None, reuse=None, seed=None):\n", 349 | " with tf.variable_scope(name, default_name=\"layer\", reuse=reuse):\n", 350 | " n_inputs = int(inputs.get_shape()[1])\n", 351 | " rnd_init = lambda shape, dtype, partition_info: tf.random_uniform(shape, -1.0, 1.0, dtype=dtype, seed=seed)\n", 352 | " b = tf.get_variable(\"biases\", shape=[n_neurons], initializer=rnd_init)\n", 353 | " W = tf.get_variable(\"weights\", shape=[n_inputs, n_neurons], initializer=rnd_init)\n", 354 | " logits = tf.matmul(inputs, W) + b\n", 355 | " if activation:\n", 356 | " return activation(logits)\n", 357 | " else:\n", 358 | " return logits" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "graph = tf.Graph()\n", 368 | "with graph.as_default():\n", 369 | " with tf.variable_scope(\"foo\"): \n", 370 | " a = tf.constant(1., name=\"a\")\n", 371 | " with tf.name_scope(\"bar\"): \n", 372 | " b = tf.constant(2., name=\"b\")\n", 373 | " with tf.name_scope(\"baz\"):\n", 374 | " c = tf.get_variable(\"c\", shape=[], initializer=tf.constant_initializer(2))\n", 375 | " s = tf.add_n([a,b,c], name=\"s\")" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "a.name" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "b.name" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "c.name" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": null, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "s.name" 412 | ] 413 | } 414 | ], 415 | "metadata": { 416 | "kernelspec": { 417 | "display_name": "Python 3", 418 | "language": "python", 419 | "name": "python3" 420 | }, 421 | "language_info": { 422 | "codemirror_mode": { 423 | "name": "ipython", 424 | "version": 3 425 | }, 426 | "file_extension": ".py", 427 | "mimetype": "text/x-python", 428 | "name": "python", 429 | "nbconvert_exporter": "python", 430 | "pygments_lexer": "ipython3", 431 | "version": "3.6.3" 432 | } 433 | }, 434 | "nbformat": 4, 435 | "nbformat_minor": 2 436 | } 437 | -------------------------------------------------------------------------------- /10_training_deep_nets_ex9.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from tensorflow.examples.tutorials.mnist import input_data\n", 50 | "mnist = input_data.read_data_sets(\"tmp/data/\")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "# Techniques for Training Deep Nets" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Using He initialization and the ELU activation function (with the help of a `partial()`):" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "from functools import partial\n", 74 | "\n", 75 | "n_inputs = 28 * 28\n", 76 | "n_hidden1 = 100\n", 77 | "n_hidden2 = 100\n", 78 | "n_outputs = 10\n", 79 | "\n", 80 | "graph = tf.Graph()\n", 81 | "with graph.as_default():\n", 82 | " with tf.name_scope(\"inputs\"):\n", 83 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 84 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 85 | "\n", 86 | " he_init = tf.contrib.layers.variance_scaling_initializer()\n", 87 | " \n", 88 | " dense_layer = partial(tf.layers.dense,\n", 89 | " kernel_initializer=he_init,\n", 90 | " activation=tf.nn.elu)\n", 91 | " hidden1 = dense_layer(X, n_hidden1, name=\"hidden1\")\n", 92 | " hidden2 = dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n", 93 | " logits = dense_layer(hidden2, n_outputs, activation=None, name=\"output\")\n", 94 | " Y_proba = tf.nn.softmax(logits)\n", 95 | " \n", 96 | " with tf.name_scope(\"train\"):\n", 97 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 98 | " loss = tf.reduce_mean(xentropy)\n", 99 | " optimizer = tf.train.AdamOptimizer()\n", 100 | " training_op = optimizer.minimize(loss)\n", 101 | "\n", 102 | " with tf.name_scope(\"eval\"):\n", 103 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 104 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 105 | "\n", 106 | " with tf.name_scope(\"init_and_save\"):\n", 107 | " init = tf.global_variables_initializer()\n", 108 | " saver = tf.train.Saver()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "n_epochs = 20\n", 118 | "batch_size = 50\n", 119 | "\n", 120 | "with tf.Session(graph=graph) as sess:\n", 121 | " init.run()\n", 122 | " for epoch in range(n_epochs):\n", 123 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 124 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 125 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", 126 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 127 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 128 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 129 | "\n", 130 | " save_path = saver.save(sess, \"./my_mnist_model\")" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## Exercise 9" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "In this exercise, you will add a 50% dropout rate to the following neural network model below." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "9.1) Add a `training` placeholder, of type `tf.bool`.\n", 159 | "\n", 160 | "Tip: you can use `tf.placeholder_with_default()` to make this `False` by default." 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "9.2) Add a dropout layer between the input layer and the first hidden layer, using `tf.layers.dropout()`." 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "n_inputs = 28 * 28\n", 177 | "n_hidden1 = 100\n", 178 | "n_hidden2 = 100\n", 179 | "n_outputs = 10\n", 180 | "\n", 181 | "graph = tf.Graph()\n", 182 | "with graph.as_default():\n", 183 | " with tf.name_scope(\"inputs\"):\n", 184 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 185 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 186 | "\n", 187 | " he_init = tf.contrib.layers.variance_scaling_initializer()\n", 188 | "\n", 189 | " dense_layer = partial(tf.layers.dense,\n", 190 | " kernel_initializer=he_init,\n", 191 | " activation=tf.nn.elu)\n", 192 | " hidden1 = dense_layer(X, n_hidden1, name=\"hidden1\")\n", 193 | " hidden2 = dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n", 194 | " logits = dense_layer(hidden2, n_outputs, activation=None, name=\"output\")\n", 195 | " Y_proba = tf.nn.softmax(logits)\n", 196 | " \n", 197 | " with tf.name_scope(\"train\"):\n", 198 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 199 | " loss = tf.reduce_mean(xentropy)\n", 200 | " optimizer = tf.train.AdamOptimizer()\n", 201 | " training_op = optimizer.minimize(loss)\n", 202 | "\n", 203 | " with tf.name_scope(\"eval\"):\n", 204 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 205 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 206 | "\n", 207 | " with tf.name_scope(\"init_and_save\"):\n", 208 | " init = tf.global_variables_initializer()\n", 209 | " saver = tf.train.Saver()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "9.3) Update the following training code to feed the value of the `training` placeholder, where appropriate, then run the code and see if the model performs better than without dropout." 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "n_epochs = 20\n", 247 | "batch_size = 50\n", 248 | "\n", 249 | "with tf.Session(graph=graph) as sess:\n", 250 | " init.run()\n", 251 | " for epoch in range(n_epochs):\n", 252 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 253 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 254 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", 255 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 256 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 257 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 258 | "\n", 259 | " save_path = saver.save(sess, \"./my_mnist_model\")" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "Try not to peek at the solution below before you have done the exercise! :)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "## Exercise 9 - Solution" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "9.1-2)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "n_inputs = 28 * 28\n", 318 | "n_hidden1 = 100\n", 319 | "n_hidden2 = 100\n", 320 | "n_outputs = 10\n", 321 | "\n", 322 | "dropout_rate = 0.5 # <= CHANGED\n", 323 | "\n", 324 | "graph = tf.Graph()\n", 325 | "with graph.as_default():\n", 326 | " with tf.name_scope(\"inputs\"):\n", 327 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 328 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 329 | " training = tf.placeholder_with_default(False, shape=[], name='training') # <= CHANGED\n", 330 | " X_drop = tf.layers.dropout(X, dropout_rate, training=training) # <= CHANGED\n", 331 | "\n", 332 | " he_init = tf.contrib.layers.variance_scaling_initializer()\n", 333 | "\n", 334 | " dense_layer = partial(tf.layers.dense,\n", 335 | " kernel_initializer=he_init,\n", 336 | " activation=tf.nn.elu)\n", 337 | " hidden1 = dense_layer(X_drop, n_hidden1, name=\"hidden1\") # <= CHANGED\n", 338 | " hidden2 = dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n", 339 | " logits = dense_layer(hidden2, n_outputs, activation=None, name=\"output\")\n", 340 | " Y_proba = tf.nn.softmax(logits)\n", 341 | " \n", 342 | " with tf.name_scope(\"train\"):\n", 343 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 344 | " loss = tf.reduce_mean(xentropy)\n", 345 | " optimizer = tf.train.AdamOptimizer()\n", 346 | " training_op = optimizer.minimize(loss)\n", 347 | "\n", 348 | " with tf.name_scope(\"eval\"):\n", 349 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 350 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 351 | "\n", 352 | " with tf.name_scope(\"init_and_save\"):\n", 353 | " init = tf.global_variables_initializer()\n", 354 | " saver = tf.train.Saver()" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "9.3)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "n_epochs = 20\n", 371 | "batch_size = 50\n", 372 | "\n", 373 | "with tf.Session(graph=graph) as sess:\n", 374 | " init.run()\n", 375 | " for epoch in range(n_epochs):\n", 376 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 377 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 378 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True}) # <= CHANGED\n", 379 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 380 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})\n", 381 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val)\n", 382 | "\n", 383 | " save_path = saver.save(sess, \"./my_mnist_model\")" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "## Early Stopping" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "n_epochs = 1000\n", 400 | "batch_size = 50\n", 401 | "\n", 402 | "best_acc_val = 0\n", 403 | "check_interval = 100\n", 404 | "checks_since_last_progress = 0\n", 405 | "max_checks_without_progress = 100\n", 406 | "\n", 407 | "with tf.Session(graph=graph) as sess:\n", 408 | " init.run()\n", 409 | " for epoch in range(n_epochs):\n", 410 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 411 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 412 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})\n", 413 | " if iteration % check_interval == 0:\n", 414 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})\n", 415 | " if acc_val > best_acc_val:\n", 416 | " best_acc_val = acc_val\n", 417 | " checks_since_last_progress = 0\n", 418 | " saver.save(sess, \"./my_best_model_so_far\")\n", 419 | " else:\n", 420 | " checks_since_last_progress += 1 \n", 421 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 422 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})\n", 423 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val, \"Best validation accuracy:\", best_acc_val)\n", 424 | " if checks_since_last_progress > max_checks_without_progress:\n", 425 | " print(\"Early stopping!\")\n", 426 | " saver.restore(sess, \"./my_best_model_so_far\")\n", 427 | " break\n", 428 | "\n", 429 | " acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n", 430 | " print(\"Final accuracy on test set:\", acc_test)\n", 431 | " save_path = saver.save(sess, \"./my_mnist_model\")" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "Saving the model to disk so often slows down training. Let's save to RAM instead:" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "def get_model_params():\n", 448 | " gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)\n", 449 | " return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}\n", 450 | "\n", 451 | "def restore_model_params(model_params):\n", 452 | " gvar_names = list(model_params.keys())\n", 453 | " assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + \"/Assign\")\n", 454 | " for gvar_name in gvar_names}\n", 455 | " init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}\n", 456 | " feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}\n", 457 | " tf.get_default_session().run(assign_ops, feed_dict=feed_dict)" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "metadata": {}, 464 | "outputs": [], 465 | "source": [ 466 | "n_epochs = 1000\n", 467 | "batch_size = 50\n", 468 | "\n", 469 | "best_acc_val = 0\n", 470 | "check_interval = 100\n", 471 | "checks_since_last_progress = 0\n", 472 | "max_checks_without_progress = 100\n", 473 | "best_model_params = None\n", 474 | "\n", 475 | "with tf.Session(graph=graph) as sess:\n", 476 | " init.run()\n", 477 | " for epoch in range(n_epochs):\n", 478 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 479 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 480 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})\n", 481 | " if iteration % check_interval == 0:\n", 482 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})\n", 483 | " if acc_val > best_acc_val:\n", 484 | " best_acc_val = acc_val\n", 485 | " checks_since_last_progress = 0\n", 486 | " best_model_params = get_model_params()\n", 487 | " else:\n", 488 | " checks_since_last_progress += 1\n", 489 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 490 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})\n", 491 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val, \"Best validation accuracy:\", best_acc_val)\n", 492 | " if checks_since_last_progress > max_checks_without_progress:\n", 493 | " print(\"Early stopping!\")\n", 494 | " break\n", 495 | "\n", 496 | " if best_model_params:\n", 497 | " restore_model_params(best_model_params)\n", 498 | " acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n", 499 | " print(\"Final accuracy on test set:\", acc_test)\n", 500 | " save_path = saver.save(sess, \"./my_mnist_model\")" 501 | ] 502 | } 503 | ], 504 | "metadata": { 505 | "kernelspec": { 506 | "display_name": "Python 3", 507 | "language": "python", 508 | "name": "python3" 509 | }, 510 | "language_info": { 511 | "codemirror_mode": { 512 | "name": "ipython", 513 | "version": 3 514 | }, 515 | "file_extension": ".py", 516 | "mimetype": "text/x-python", 517 | "name": "python", 518 | "nbconvert_exporter": "python", 519 | "pygments_lexer": "ipython3", 520 | "version": "3.6.3" 521 | } 522 | }, 523 | "nbformat": 4, 524 | "nbformat_minor": 2 525 | } 526 | -------------------------------------------------------------------------------- /11_convolutional_neural_networks_ex10.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "**Try not to peek at the solutions when you go through the exercises. ;-)**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First let's make sure this notebook works well in both Python 2 and Python 3:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import, division, print_function, unicode_literals" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "tf.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "import numpy as np\n", 50 | "%matplotlib inline\n", 51 | "import matplotlib.pyplot as plt\n", 52 | "from tensorflow.examples.tutorials.mnist import input_data\n", 53 | "mnist = input_data.read_data_sets(\"tmp/data/\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "def get_model_params():\n", 63 | " gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)\n", 64 | " return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "# Convolutional Neural Networks" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Load demo image:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "from scipy.misc import imread" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "china = imread(\"./images/china.png\")" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "china.shape" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "def plot_image(image):\n", 115 | " cmap = \"gray\" if len(image.shape) == 2 else None\n", 116 | " plt.imshow(image, cmap=cmap, interpolation=\"nearest\")\n", 117 | " plt.axis(\"off\")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "plt.figure(figsize=(10,7))\n", 127 | "plot_image(china)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Crop it and convert it to grayscale:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "image = china[150:220, 130:250].mean(axis=2).astype(np.float32)\n", 144 | "image.shape" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "height, width = image.shape\n", 154 | "channels = 1 # grayscale" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "plt.figure(figsize=(10,6))\n", 164 | "plot_image(image)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "basic_filters = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32) # height, width, in channels, out channels\n", 174 | "basic_filters[:, 3, 0, 0] = 1\n", 175 | "basic_filters[3, :, 0, 1] = 1\n", 176 | "plot_image(basic_filters[:, :, 0, 0])\n", 177 | "plt.show()\n", 178 | "plot_image(basic_filters[:, :, 0, 1])\n", 179 | "plt.show()" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "graph = tf.Graph()\n", 189 | "with graph.as_default():\n", 190 | " X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", 191 | " filters = tf.constant(basic_filters)\n", 192 | " convolution = tf.nn.conv2d(X, filters, strides=[1,1,1,1], padding=\"SAME\")" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "with tf.Session(graph=graph) as sess:\n", 202 | " X_batch = image.reshape(1, height, width, 1)\n", 203 | " output = convolution.eval(feed_dict={X: X_batch})" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "plt.figure(figsize=(10,6))\n", 213 | "plot_image(output[0, :, :, 0])" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "plt.figure(figsize=(10,6))\n", 223 | "plot_image(output[0, :, :, 1])" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Now let's add a max pooling layer:" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "graph = tf.Graph()\n", 240 | "with graph.as_default():\n", 241 | " X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", 242 | " filters = tf.constant(basic_filters)\n", 243 | " convolution = tf.nn.conv2d(X, filters, strides=[1,1,1,1], padding=\"SAME\")\n", 244 | " max_pool = tf.nn.max_pool(convolution, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=\"VALID\")" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "with tf.Session(graph=graph) as sess:\n", 254 | " X_batch = image.reshape(1, height, width, 1)\n", 255 | " output = max_pool.eval(feed_dict={X: X_batch})" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "plt.figure(figsize=(5,3))\n", 265 | "plot_image(output[0, :, :, 0])" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "plt.figure(figsize=(5,3))\n", 275 | "plot_image(output[0, :, :, 1])" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "## Exercise 10" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "![Exercise](https://c1.staticflickr.com/9/8101/8553474140_c50cf08708_b.jpg)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "In this final exercise, you will tackle MNIST and reach over 99% accuracy using most of what you learned in this course:\n", 297 | "\n", 298 | "* You model should be a Convolutional Neural Network composed of:\n", 299 | " * Two convolutional layers followed by a max pooling layer. The first convolutional layer should have 32 feature maps, and the second should have 64 feature maps. Both convolutional layers should use ReLU activation, 3x3 filters, SAME padding and stride 1.\n", 300 | " * One Fully Connected (FC) layer with 128 neurons, using ReLU activation.\n", 301 | " * A Fully Connected output layer with 10 outputs (to classify images in the 10 classes), using Softmax activation.\n", 302 | "* You should apply a 25% dropout rate on the outputs of the max pooling layer, and a 50% dropout rate on the outputs of the first FC layer.\n", 303 | "* As usual, you should minimize the cross-entropy, using an Adam optimizer.\n", 304 | "* Make sure to initialize all variables using He initialization.\n", 305 | "* Train the model using Early Stopping.\n", 306 | "* Use the model to predict the class of all the images in the MNIST test set. Display all the wrong predictions it makes on the first 400 images, along with the probabilities it assigned to each class." 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "Try not to peek at the solution below before you have done the exercise! :)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "![thinking](https://upload.wikimedia.org/wikipedia/commons/0/06/Filos_segundo_logo_%28flipped%29.jpg)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "## Exercise 10 - Solution" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "height = 28\n", 358 | "width = 28\n", 359 | "channels = 1\n", 360 | "\n", 361 | "conv1_fmaps = 32\n", 362 | "conv1_ksize = 3\n", 363 | "conv1_stride = 1\n", 364 | "conv1_pad = \"SAME\"\n", 365 | "\n", 366 | "conv2_fmaps = 64\n", 367 | "conv2_ksize = 3\n", 368 | "conv2_stride = 1\n", 369 | "conv2_pad = \"SAME\"\n", 370 | "conv2_dropout_rate = 0.25\n", 371 | "\n", 372 | "pool3_fmaps = conv2_fmaps\n", 373 | "\n", 374 | "n_fc1 = 128\n", 375 | "fc1_dropout_rate = 0.5\n", 376 | "\n", 377 | "n_inputs = 28 * 28\n", 378 | "n_outputs = 10\n", 379 | "\n", 380 | "graph = tf.Graph()\n", 381 | "with graph.as_default():\n", 382 | " with tf.name_scope(\"inputs\"):\n", 383 | " X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n", 384 | " X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])\n", 385 | " y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n", 386 | " training = tf.placeholder_with_default(False, shape=[], name='training')\n", 387 | "\n", 388 | " conv1 = tf.layers.conv2d(X_reshaped, conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name=\"conv1\")\n", 389 | " conv2 = tf.layers.conv2d(conv1, conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name=\"conv2\")\n", 390 | "\n", 391 | " with tf.name_scope(\"pool3\"):\n", 392 | " pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=\"VALID\")\n", 393 | " pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])\n", 394 | " pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=training)\n", 395 | "\n", 396 | " with tf.name_scope(\"fc1\"):\n", 397 | " fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name=\"fc1\")\n", 398 | " fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training)\n", 399 | "\n", 400 | " with tf.name_scope(\"output\"):\n", 401 | " logits = tf.layers.dense(fc1_drop, n_outputs, name=\"output\")\n", 402 | " Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n", 403 | "\n", 404 | " with tf.name_scope(\"train\"):\n", 405 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n", 406 | " loss = tf.reduce_mean(xentropy)\n", 407 | " optimizer = tf.train.AdamOptimizer()\n", 408 | " training_op = optimizer.minimize(loss)\n", 409 | "\n", 410 | " with tf.name_scope(\"eval\"):\n", 411 | " correct = tf.nn.in_top_k(logits, y, 1)\n", 412 | " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 413 | "\n", 414 | " with tf.name_scope(\"init_and_save\"):\n", 415 | " init = tf.global_variables_initializer()\n", 416 | " saver = tf.train.Saver()" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "Now let training begin, using early stopping. This is quite slow on a CPU, but much faster on a GPU. We achieve >99% accuracy on the test set." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "def restore_model_params(model_params):\n", 433 | " gvar_names = list(model_params.keys())\n", 434 | " assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + \"/Assign\")\n", 435 | " for gvar_name in gvar_names}\n", 436 | " init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}\n", 437 | " feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}\n", 438 | " tf.get_default_session().run(assign_ops, feed_dict=feed_dict)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "### NB: The following cell will not run on the resources available for the course. If you are running locally, it is highly recommended to use a GPU." 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "n_epochs = 1000\n", 455 | "batch_size = 50\n", 456 | "\n", 457 | "best_acc_val = 0\n", 458 | "check_interval = 100\n", 459 | "checks_since_last_progress = 0\n", 460 | "max_checks_without_progress = 100\n", 461 | "best_model_params = None\n", 462 | "\n", 463 | "with tf.Session(graph=graph) as sess:\n", 464 | " init.run()\n", 465 | " for epoch in range(n_epochs):\n", 466 | " for iteration in range(mnist.train.num_examples // batch_size):\n", 467 | " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", 468 | " sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})\n", 469 | " if iteration % check_interval == 0:\n", 470 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})\n", 471 | " if acc_val > best_acc_val:\n", 472 | " best_acc_val = acc_val\n", 473 | " checks_since_last_progress = 0\n", 474 | " best_model_params = get_model_params()\n", 475 | " else:\n", 476 | " checks_since_last_progress += 1\n", 477 | " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", 478 | " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})\n", 479 | " print(epoch, \"Train accuracy:\", acc_train, \"Validation accuracy:\", acc_val, \"Best validation accuracy:\", best_acc_val)\n", 480 | " if checks_since_last_progress > max_checks_without_progress:\n", 481 | " print(\"Early stopping!\")\n", 482 | " break\n", 483 | "\n", 484 | " if best_model_params:\n", 485 | " restore_model_params(best_model_params)\n", 486 | " acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n", 487 | " print(\"Final accuracy on test set:\", acc_test)\n", 488 | " save_path = saver.save(sess, \"./my_mnist_model\")" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "with tf.Session(graph=graph) as sess:\n", 498 | " init.run()\n", 499 | " saver.restore(sess, \"./my_mnist_model\")\n", 500 | " Y_proba_val = Y_proba.eval(feed_dict={X: mnist.test.images[2000:2400]})" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "for image, y_label, y_proba in zip(mnist.test.images[2000:2400], mnist.test.labels[2000:2400], Y_proba_val):\n", 510 | " y_pred = np.argmax(y_proba)\n", 511 | " if y_pred != y_label:\n", 512 | " print(\"Label: {}, Prediction: {}, Probabilities: {}\".format(\n", 513 | " y_label, y_pred,\n", 514 | " \" \".join([\"{}={:.1f}%\".format(n, 100*p)\n", 515 | " for n, p in enumerate(y_proba) if p > 0.01])))\n", 516 | " plt.imshow(image.reshape(28, 28), cmap=\"binary\")\n", 517 | " plt.axis(\"off\")\n", 518 | " plt.show() " 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": { 524 | "collapsed": true 525 | }, 526 | "source": [ 527 | "# What Next?" 528 | ] 529 | }, 530 | { 531 | "cell_type": "markdown", 532 | "metadata": {}, 533 | "source": [ 534 | "* Practice, practice and practice!\n", 535 | "* Go through the nice tutorials on tensorflow.org, in particular the transfer learning one.\n", 536 | "* Buy [my book](http://shop.oreilly.com/product/0636920052289.do)! :) There's a lot more material, including Recurrent Neural Networks, Deep Reinforcement Learning (including the amazing DeepMind stuff), Distributed TensorFlow, Autoencoders, and much more.\n", 537 | "* Go through the notebooks on my other Github project [github.com/ageron/handson-ml](https://github.com/ageron/handson-ml)." 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "[![book](http://akamaicovers.oreilly.com/images/0636920052289/cat.gif)](http://shop.oreilly.com/product/0636920052289.do)" 545 | ] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": {}, 550 | "source": [ 551 | "I hope you enjoyed this course!" 552 | ] 553 | } 554 | ], 555 | "metadata": { 556 | "kernelspec": { 557 | "display_name": "Python 3", 558 | "language": "python", 559 | "name": "python3" 560 | }, 561 | "language_info": { 562 | "codemirror_mode": { 563 | "name": "ipython", 564 | "version": 3 565 | }, 566 | "file_extension": ".py", 567 | "mimetype": "text/x-python", 568 | "name": "python", 569 | "nbconvert_exporter": "python", 570 | "pygments_lexer": "ipython3", 571 | "version": "3.6.3" 572 | } 573 | }, 574 | "nbformat": 4, 575 | "nbformat_minor": 2 576 | } 577 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ⛔️ DEPRECATED 2 | 3 | **This project is no longer supported, please consider using https://github.com/ageron/tf2_course instead.** 4 | 5 | Introduction to TensorFlow 6 | ========================== 7 | 8 | This project accompanies my **Introduction to TensorFlow** live online trainings ([April 27-28](https://www.safaribooksonline.com/live-training/courses/introduction-to-tensorflow/0636920079460/), [June 21-22](https://www.safaribooksonline.com/live-training/courses/introduction-to-tensorflow/0636920073918/), [September 13-14](https://www.safaribooksonline.com/live-training/courses/introduction-to-tensorflow/0636920079521), [December 13-14](https://www.safaribooksonline.com/live-training/courses/introduction-to-tensorflow/0636920079583/) 2017). It contains the exercises and their solutions, in the form of [Jupyter](http://jupyter.org/) notebooks. 9 | 10 | During the course itself, a URL will be provided for running the notebooks. You can participate in the course without installing anything local. If you prefer to work on a local installation, please follow the installation instructions below. 11 | 12 | [![book](https://raw.githubusercontent.com/ageron/tensorflow-safari-course/master/images/intro_to_tf_course.png)](https://www.safaribooksonline.com/live-training/courses/introduction-to-tensorflow/0636920079460/) 13 | 14 | If you are looking for the code accompanying my O'Reilly book, [Hands-on Machine Learning with Scikit-Learn and TensorFlow](http://shop.oreilly.com/product/0636920052289.do), visit this GitHub project: [handson-ml](https://github.com/ageron/handson-ml). 15 | 16 | # Installation 17 | 18 | First, you will need to install [git](https://git-scm.com/), if you don't have it already. 19 | 20 | Next, clone this repository by opening a terminal and typing the following commands: 21 | 22 | $ cd $HOME # or any other development directory you prefer 23 | $ git clone https://github.com/ageron/tensorflow-safari-course.git 24 | $ cd tensorflow-safari-course 25 | 26 | If you are familiar with Python and you know how to install Python libraries, go ahead and install NumPy, Matplotlib, Jupyter and TensorFlow (see `requirements.txt` for details), and jump to the [Starting Jupyter](#starting-jupyter) section. If you need detailed instructions, read on. 27 | 28 | You obviously need Python. Python 2 is already preinstalled on most systems nowadays, and sometimes even Python 3. You can check which version(s) you have by typing the following commands: 29 | 30 | $ python --version # for Python 2 31 | $ python3 --version # for Python 3 32 | 33 | Any Python 3 version should be fine, as well as Python 2.6 or 2.7. However, if you don't have Python 3, I recommend installing it (Python 2 should work, but it is deprecated so Python 3 is preferable). To do so, you have several options: on Windows or MacOSX, you can just download it from [python.org](https://www.python.org/downloads/). On MacOSX, you can alternatively use [MacPorts](https://www.macports.org/) or [Homebrew](https://brew.sh/). On Linux, unless you know what you are doing, you should use your system's packaging system. For example, on Debian or Ubuntu, type: 34 | 35 | $ sudo apt-get update 36 | $ sudo apt-get install python3 37 | 38 | Another option is to download and install [Anaconda](https://www.continuum.io/downloads). This is a package that includes both Python and many scientific libraries. You should prefer the Python 3 version. 39 | 40 | ## Using Anaconda 41 | If you chose to install Anaconda, you can optionally create an isolated Python environment dedicated to this course. This is recommended as it makes it possible to have a different environment for each project (e.g. one for this course), with potentially different libraries and library versions: 42 | 43 | $ conda create -n tfintro python=3.5 anaconda 44 | $ source activate tfintro 45 | 46 | This creates a fresh Python 3.5 environment called `tfintro`, and it activates it. This environment contains all the scientific libraries that come with Anaconda. This includes all the libraries we will need (NumPy, Matplotlib and Jupyter), except for TensorFlow, so let's install it: 47 | 48 | $ conda install -n tfintro -c conda-forge tensorflow=1.4.0 49 | 50 | This installs TensorFlow 1.4.0 in the `tfintro` environment (fetching it from the `conda-forge` repository). If you chose not to create a `tfintro` environment, then just remove the `-n tfintro` option. 51 | 52 | You are all set! Next, jump to the [Starting Jupyter](#starting-jupyter) section. 53 | 54 | ## Using pip 55 | If you are not using Anaconda, you need to install several scientific Python libraries that are necessary for this course: NumPy, Jupyter, Matplotlib and TensorFlow. For this, you can either use Python's integrated packaging system, pip, or you may prefer to use your system's own packaging system (if available, e.g. on Linux, or on MacOSX when using MacPorts or Homebrew). The advantage of using pip is that it is easy to create multiple isolated Python environments with different libraries and different library versions (e.g. one environment for each project). The advantage of using your system's packaging system is that there is less risk of having conflicts between the Python libraries versions and your system's other packages. Since I have many projects with different library requirements, I prefer to use pip with isolated environments. 56 | 57 | These are the commands you need to type in a terminal if you want to use pip to install the required libraries. Note: in all the following commands, if you chose to use Python 2 rather than Python 3, you must replace `pip3` with `pip`, and `python3` with `python`. 58 | 59 | First you need to make sure you have the latest version of pip installed: 60 | 61 | $ pip3 install --user --upgrade pip 62 | 63 | The `--user` option will install the latest version of pip only for the current user. If you prefer to install it system wide (i.e. for all users), you must have administrator rights (e.g. use `sudo pip3` instead of `pip3` on Linux), and you should remove the `--user` option. The same is true of the command below that uses the `--user` option. 64 | 65 | Next, you can optionally create an isolated environment. As explained above, this is recommended as it makes it possible to have a different environment for each project (e.g. one for this course), with potentially very different libraries, and different versions: 66 | 67 | $ pip3 install --user --upgrade virtualenv 68 | $ virtualenv -p `which python3` env 69 | 70 | This creates a new directory called `env` in the current directory, containing an isolated Python environment using Python 3. If you have multiple versions of Python 3 installed on your system, you can replace \``which python3`\` with the path to the Python executable you prefer to use. 71 | 72 | Now you want to activate this environment. You will need to run this command every time you want to use it. 73 | 74 | $ source ./env/bin/activate 75 | 76 | Next, use pip to install the required python packages. If you are not using virtualenv, you should add the `--user` option (or else you will probably need administrator rights, e.g. using `sudo pip3` instead of `pip3` on Linux). 77 | 78 | $ pip3 install --upgrade -r requirements.txt 79 | 80 | Great! You're all set, you just need to start Jupyter now. 81 | 82 | ## Starting Jupyter 83 | To start Jupyter, simply type: 84 | 85 | $ jupyter notebook 86 | 87 | This should open up your browser, and you should see Jupyter's tree view, with the contents of the current directory. If your browser does not open automatically, visit [localhost:8888](http://localhost:8888/tree). 88 | 89 | Next, just click on any `*.ipynb` to open a Jupyter notebook. 90 | 91 | That's it! Now, have fun learning TensorFlow! 92 | 93 | ## Using Docker 94 | 95 | There is a docker image available for this course: esztiorm/0636920073925 96 | 97 | If you wish to create your own image using the Dockerfile, please note that the path "tmp/data" should be changed to "/home/jovyan/tmp/data" in notebooks 8 through 11. 98 | 99 | `mnist = input_data.read_data_sets("/home/jovyan/tmp/data/")` 100 | -------------------------------------------------------------------------------- /data/life_satisfaction.csv: -------------------------------------------------------------------------------- 1 | Country,GDP per capita,Life satisfaction 2 | Russia,9054.914,6.0 3 | Turkey,9437.372,5.6 4 | Hungary,12239.893999999998,4.9 5 | Poland,12495.333999999999,5.8 6 | Slovak Republic,15991.736,6.1 7 | Estonia,17288.083,5.6 8 | Greece,18064.288,4.8 9 | Portugal,19121.592,5.1 10 | Slovenia,20732.482,5.7 11 | Spain,25864.721,6.5 12 | Korea,27195.197,5.8 13 | Italy,29866.581000000002,6.0 14 | Japan,32485.545,5.9 15 | Israel,35343.336,7.4 16 | New Zealand,37044.891,7.3 17 | France,37675.006,6.5 18 | Belgium,40106.632000000005,6.9 19 | Germany,40996.511,7.0 20 | Finland,41973.988,7.4 21 | Canada,43331.960999999996,7.3 22 | Netherlands,43603.115,7.3 23 | Austria,43724.030999999995,6.9 24 | United Kingdom,43770.687999999995,6.8 25 | Sweden,49866.265999999996,7.2 26 | Iceland,50854.583,7.5 27 | Australia,50961.865,7.3 28 | Ireland,51350.744000000006,7.0 29 | Denmark,52114.165,7.5 30 | United States,55805.204000000005,7.2 31 | -------------------------------------------------------------------------------- /images/china.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/tensorflow-safari-course/a9a4492363ff26b2721faab8af927f03186b620c/images/china.png -------------------------------------------------------------------------------- /images/intro_to_tf_course.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/tensorflow-safari-course/a9a4492363ff26b2721faab8af927f03186b620c/images/intro_to_tf_course.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | matplotlib==3.4.0 3 | numpy==1.22.0 4 | scipy==1.6.2 5 | pillow==9.0.1 6 | 7 | tensorflow==1.15.5 8 | 9 | --------------------------------------------------------------------------------