├── .gitignore ├── 01-Tensorflow Basics.ipynb ├── 02-From tf to MNIST.ipynb ├── 03-TensorBoard.ipynb ├── 04-tf.data.ipynb ├── 05-tf-higher-level-api.ipynb ├── 06-keras-intro.ipynb ├── 07-lstm.ipynb ├── HW1-CIFAR10.ipynb ├── HW2-fashion mnist.ipynb ├── LICENSE ├── README.md ├── environment.yml ├── gan ├── AutoEncoder.ipynb ├── CycleGAN-keras.ipynb ├── Simple_GAN.ipynb ├── dragan-keras.ipynb ├── fonts │ ├── SourceHanSansTC-Regular.otf │ └── TAKUMISFONT_LP.ttf ├── pix2pix-keras-font.ipynb ├── pix2pix-keras-geometry.ipynb ├── pix2pix-keras.ipynb ├── pix2pix-tf.ipynb ├── pretrained_weights │ └── font_TAKUMISFONT_LP_netG_weights.h5 └── wgan-keras.ipynb ├── mnist.pkl.xz ├── tfdot.py └── transfered ├── 01-Keras-pretrained.ipynb ├── 02-Keras-pretrained-test_others.ipynb ├── 03-On Top MNIST.ipynb ├── 04-On Top CIFAR10.ipynb ├── 05-On Top Dog Cat.ipynb ├── 06-Art style transfer.ipynb ├── HW1-Neural Matching.ipynb └── img ├── result.png ├── starry_night.jpg └── tubingen.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /01-Tensorflow Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# windows only hack for graphviz path \n", 12 | "import os\n", 13 | "for path in os.environ['PATH'].split(os.pathsep):\n", 14 | " if path.endswith(\"Library\\\\bin\"):\n", 15 | " os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import tensorflow as tf\n", 27 | "import numpy as np\n", 28 | "\n", 29 | "# 下面兩個是用來輔助圖形化\n", 30 | "from IPython.display import display\n", 31 | "from tfdot import tfdot" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## 常數及節點" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "tf.constant(42)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "tf.constant(42.)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "tf.constant([42])" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "matrix1 = tf.constant([[3., 3.]])\n", 83 | "\n", 84 | "matrix2 = tf.constant([[2.],[2.]])\n", 85 | "\n", 86 | "matrix1, matrix2" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "product = tf.matmul(matrix1, matrix2)\n", 98 | "product" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": true 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "tfdot()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "這些東西的單位叫做 graph" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "graph = tf.get_default_graph()\n", 128 | "graph" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "product.graph" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "# 從 graph 得到 tensor\n", 151 | "graph.get_tensor_by_name('MatMul:0')" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "## Q:\n", 159 | "試試看其他名稱" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### Operator" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": true 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "graph.get_operations()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "product.op" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "# 運算的輸出節點\n", 200 | "product.op.outputs" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": true 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "# 運算的輸入節點\n", 212 | "list(product.op.inputs)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## Q\n", 220 | "* 試試看將 numpy ndarray 轉成 tf.constant\n", 221 | "* 建立一個 matrix1 和 matrix2 逐項相乘的節點(猜一下是 tf.什麼)\n", 222 | "* 用 `tf.reset_default_graph()` 清掉 default graph 看看,會發生什麼事情?\n", 223 | "* 再跑一下 tfdot 看看" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": true 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "# 建立逐項相乘的參考方式\n", 235 | "# %load q_constant_mul.py" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "## Session\n", 243 | "\n", 244 | "如果圖是靜態的描述操作,動態的狀態就是 Session" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": true 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "# 建立一個 Session\n", 256 | "sess = tf.Session()\n", 257 | "# 在這個 Session 中執行 product 並且印出結果。\n", 258 | "print(sess.run(product))\n", 259 | "# 結束這個 Session\n", 260 | "sess.close()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "### 也可以用 context manager 的寫法" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "collapsed": true, 275 | "scrolled": true 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "with tf.Session() as sess:\n", 280 | " print(sess.run(product))\n", 281 | " # 也可以用\n", 282 | " print(product.eval())" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "## Q\n", 290 | "* 用 numpy 檢查結果\n", 291 | "* 把前面所有的 operation 都跑一遍" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": true 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "# 計算所有結果\n", 303 | "%run -i q_run_all_op.py" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "### Device context \n", 311 | "可以設定 device context" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "collapsed": true 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "with tf.Session() as sess:\n", 323 | " with tf.device(\"/cpu:0\"):\n", 324 | " print(sess.run(product))" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "## Q\n", 332 | "清掉前面的 default graph, 然後用不同的方式來計算 $1+2+\\cdots+10$\n", 333 | "\n", 334 | "* 用公式 $10\\cdot(10+1)/2$\n", 335 | "* 用 `tf.reduce_sum`\n", 336 | "* 用 `tf.add_n`\n", 337 | "* 用 `tf.matmul`\n", 338 | "* 用 python 迴圈建立 graph\n" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": { 345 | "collapsed": true, 346 | "scrolled": false 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "# 參考答案\n", 351 | "%run -i q_sum.py" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "### Interactive session\n", 359 | "在 notebook 中,可以用 interactive session, 就不用特別指名 session 了。 比較方便。" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": { 366 | "collapsed": true 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "# 重新設定環境\n", 371 | "tf.reset_default_graph()\n", 372 | "# 設定 default session\n", 373 | "sess = tf.InteractiveSession()" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "常數太無聊, 試試看可以改變輸入的運算" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": { 387 | "collapsed": true 388 | }, 389 | "outputs": [], 390 | "source": [ 391 | "# place holder, 先佔位子\n", 392 | "a = tf.placeholder(tf.float32, name=\"this_is_a\")\n", 393 | "b = tf.placeholder(tf.float32, name=\"this_is_b\")\n", 394 | "s = tf.add(a, b)\n", 395 | "display(tfdot())\n", 396 | "s" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "直接執行\n", 404 | "```python\n", 405 | "s.eval()\n", 406 | "```\n", 407 | "會爆掉,因為佔了位子沒人來\n", 408 | "\n", 409 | "所以要放東西進去" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": { 416 | "collapsed": true 417 | }, 418 | "outputs": [], 419 | "source": [ 420 | "s.eval({a:2, b: 5})" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "或者" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": { 434 | "collapsed": true 435 | }, 436 | "outputs": [], 437 | "source": [ 438 | "sess.run(s, {a:[1,2], b:[3,4]})" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": { 445 | "collapsed": true 446 | }, 447 | "outputs": [], 448 | "source": [ 449 | "sess.close()" 450 | ] 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "## Variable\n", 457 | "傳遞資訊不是免費的\n", 458 | "\n", 459 | "變數:存東西在 session 的空間\n" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [ 470 | "# 重新設定 graph 環境和 default session\n", 471 | "tf.reset_default_graph()\n", 472 | "sess = tf.InteractiveSession()\n", 473 | "# 計數器\n", 474 | "state = tf.Variable(0, name=\"state\")\n", 475 | "\n", 476 | "# 新的節點 計數器+1\n", 477 | "new_value = tf.add(state, tf.constant(1, name='one'), name='new_value')\n", 478 | "# 更新 state\n", 479 | "update = tf.assign(state, new_value)\n", 480 | "# 變數初始化,這也是一個節點\n", 481 | "init_op = tf.global_variables_initializer()\n", 482 | "tfdot()" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "上面都是靜態的,下面才開始在 session 中執行" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": { 496 | "collapsed": true 497 | }, 498 | "outputs": [], 499 | "source": [ 500 | "init_op.run()\n", 501 | "# or sess.run(init_op)\n", 502 | "print(state.eval())" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": { 509 | "collapsed": true 510 | }, 511 | "outputs": [], 512 | "source": [ 513 | "for _ in range(300):\n", 514 | " #執行更新\n", 515 | " print(update.eval())" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "collapsed": true 523 | }, 524 | "outputs": [], 525 | "source": [ 526 | "state.eval()" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": { 533 | "collapsed": true 534 | }, 535 | "outputs": [], 536 | "source": [ 537 | "sess.run([update]*10)" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": { 544 | "collapsed": true 545 | }, 546 | "outputs": [], 547 | "source": [ 548 | "sess.close()" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": {}, 554 | "source": [ 555 | "### Initialize from another variable" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": { 562 | "collapsed": true 563 | }, 564 | "outputs": [], 565 | "source": [ 566 | "# 重設環境\n", 567 | "tf.reset_default_graph()\n", 568 | "sess = tf.InteractiveSession()\n", 569 | "\n", 570 | "# 第一個變數 weights\n", 571 | "weights = tf.Variable(tf.random_normal((10,), stddev=0.35), name='weights')\n", 572 | "# 想要讓 w2 的初始值設定成和 weights 一樣 \n", 573 | "w1 = tf.Variable(weights.initialized_value(), name ='w1')\n", 574 | "# 想將 w_twice 設定為 weights 的兩倍\n", 575 | "w2 = tf.Variable(weights.initialized_value()*tf.constant(2., name='two'), name=\"w2\")\n", 576 | "tfdot()" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": { 583 | "collapsed": true 584 | }, 585 | "outputs": [], 586 | "source": [ 587 | "init_op = tf.global_variables_initializer()\n", 588 | "init_op.run()\n", 589 | "\n", 590 | "for v in tf.global_variables():\n", 591 | " print(v.name, v)\n", 592 | " print(v.eval())\n", 593 | " " 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": null, 599 | "metadata": { 600 | "collapsed": true 601 | }, 602 | "outputs": [], 603 | "source": [ 604 | "sess.close()" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "### 流程控制\n", 612 | "https://www.tensorflow.org/api_guides/python/control_flow_ops\n", 613 | "\n", 614 | "## Q\n", 615 | "試著用 `tf.while_loop` 來計算 $1+2+\\cdots+10$\n", 616 | "(如何將結果放入一個 `tf.Variable` 中)" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "## Higher Order Function\n", 624 | "\n", 625 | "https://www.tensorflow.org/api_guides/python/functional_ops\n", 626 | "\n", 627 | "## Q\n", 628 | "計算 $1+2+\\cdots+10$" 629 | ] 630 | }, 631 | { 632 | "cell_type": "markdown", 633 | "metadata": {}, 634 | "source": [ 635 | "## Eager Executation\n", 636 | "\n", 637 | "https://www.tensorflow.org/programmers_guide/eager\n", 638 | "## Q\n", 639 | "\n", 640 | "* 計算 $1+2+\\cdots+10$\n", 641 | "* Project Euler 第一題 https://projecteuler.net/problem=1" 642 | ] 643 | } 644 | ], 645 | "metadata": { 646 | "kernelspec": { 647 | "display_name": "Python 3", 648 | "language": "python", 649 | "name": "python3" 650 | }, 651 | "language_info": { 652 | "codemirror_mode": { 653 | "name": "ipython", 654 | "version": 3 655 | }, 656 | "file_extension": ".py", 657 | "mimetype": "text/x-python", 658 | "name": "python", 659 | "nbconvert_exporter": "python", 660 | "pygments_lexer": "ipython3", 661 | "version": "3.6.2" 662 | } 663 | }, 664 | "nbformat": 4, 665 | "nbformat_minor": 1 666 | } 667 | -------------------------------------------------------------------------------- /02-From tf to MNIST.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# windows only hack for graphviz path \n", 12 | "import os\n", 13 | "for path in os.environ['PATH'].split(os.pathsep):\n", 14 | " if path.endswith(\"Library\\\\bin\"):\n", 15 | " os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "from PIL import Image\n", 27 | "import numpy as np" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import lzma\n", 39 | "import pickle\n", 40 | "with lzma.open(\"mnist.pkl.xz\", 'rb') as f:\n", 41 | " train_set, validation_set, test_set = pickle.load(f, encoding='latin1')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "train_X, train_y = train_set\n", 53 | "validation_X, validation_y = validation_set\n", 54 | "test_X, test_y = test_set" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "from IPython.display import display\n", 66 | "def showX(X, rows=1):\n", 67 | " assert X.shape[0] % rows == 0\n", 68 | " int_X = (X*255).clip(0,255).astype('uint8')\n", 69 | " # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N\n", 70 | " int_X_reshape = int_X.reshape(rows, -1,28,28).swapaxes(1,2).reshape(28*rows,-1)\n", 71 | " display(Image.fromarray(int_X_reshape))\n", 72 | "# 訓練資料, X 的前 20 筆\n", 73 | "showX(train_X[:100],10)\n", 74 | "print(train_y)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Q\n", 82 | "看一下 mnist 資料" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "## 開始 Tensorflow" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "import tensorflow as tf\n", 101 | "from tfdot import tfdot" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Softmax regression\n", 109 | "基本上就是用\n", 110 | "$ e ^ {W x +b} $ 的比例來計算機率 \n", 111 | "\n", 112 | "其中 x 是長度 784 的向量(圖片), W 是 10x784矩陣,加上一個長度為 10 的向量。 算出來的十個數值,依照比例當成我們預估的機率。" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": true 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# 輸入的 placeholder\n", 124 | "X = tf.placeholder(tf.float32, shape=[None, 784], name=\"X\")\n", 125 | "# 權重參數,為了計算方便和一些慣例(行向量及列向量的差異),矩陣乘法的方向和上面解說相反\n", 126 | "W = tf.Variable(tf.zeros([784, 10]), name='W')\n", 127 | "b = tf.Variable(tf.zeros([10]), name='b') # 這裡可以看成是列向量\n", 128 | "\n", 129 | "tfdot()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": true 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "# 計算出來的公式\n", 141 | "Y = tf.exp(tf.matmul(X, W) +b, name=\"Y\")\n", 142 | "Y_softmax = tf.nn.softmax(Y, name=\"Y_softmax\")\n", 143 | "# or \n", 144 | "#Y_softmax = tf.div(Y, tf.reduce_sum(Y, axis=1, keep_dims=True), name=\"Y_softmax\")\n", 145 | "tfdot()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Loss function 的計算是 cross_entorpy.\n", 153 | "\n", 154 | "基本上就是 $-log(\\Pr(Y_{true}))$" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "# 真正的 Y\n", 166 | "Y_ = tf.placeholder(tf.float32, shape=[None, 10], name=\"Y_\")\n", 167 | "#和算出來的 Y 來做 cross entropy\n", 168 | "#cross_entropy = tf.reduce_mean(-tf.reduce_sum(Y_*tf.log(Y_softmax), axis=1))\n", 169 | "# or\n", 170 | "cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y_, logits=Y))\n", 171 | "tfdot()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)\n", 183 | "\n", 184 | "tfdot(size=(15,30))" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "collapsed": true 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "train_Y = np.eye(10)[train_y]\n", 196 | "test_Y = np.eye(10)[test_y]\n", 197 | "validation_Y = np.eye(10)[validation_y]" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "sess = tf.InteractiveSession()\n", 209 | "tf.global_variables_initializer().run()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "collapsed": true 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "for i in range(1000):\n", 221 | " rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)\n", 222 | " train_step.run(feed_dict={X: train_X[rnd_idx], Y_:train_Y[rnd_idx]})" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "Y.eval(feed_dict={X: train_X[:10]})" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "prediction = tf.argmax(Y, axis=1)\n", 245 | "\n", 246 | "# print predictions\n", 247 | "prediction.eval(feed_dict={X: train_X[:10]})" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": true 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "# print labels\n", 259 | "showX(train_X[:10])\n", 260 | "train_y[:10]" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "collapsed": true 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_, 1))\n", 272 | "\n", 273 | "correct_prediction.eval({X: train_X[:10] , Y_: train_Y[:10]})" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "collapsed": true 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n", 285 | "\n", 286 | "accuracy.eval(feed_dict={X: train_X[:10] , Y_: train_Y[:10]})" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "accuracy.eval(feed_dict={X: train_X , Y_: train_Y})" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "collapsed": true 305 | }, 306 | "outputs": [], 307 | "source": [ 308 | "# 合在一起來看\n", 309 | "for t in range(10):\n", 310 | " for i in range(1000):\n", 311 | " rnd_idx = np.random.choice(train_X.shape[0], 200, replace=False)\n", 312 | " train_step.run(feed_dict={X: train_X[rnd_idx], Y_:train_Y[rnd_idx]})\n", 313 | " a = accuracy.eval({X: validation_X , Y_: validation_Y})\n", 314 | " print (t, a)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "collapsed": true 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "accuracy.eval({X: test_X , Y_: test_Y})" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": true 333 | }, 334 | "outputs": [], 335 | "source": [ 336 | "sess.close()" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "# Multilayer Convolutional Network" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": { 350 | "collapsed": true 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "# 重設 session 和 graph\n", 355 | "tf.reset_default_graph()\n", 356 | "# 輸入還是一樣\n", 357 | "X = tf.placeholder(tf.float32, shape=[None, 784], name=\"X\")\n", 358 | "Y_ = tf.placeholder(tf.float32, shape=[None, 10], name=\"Y_\")" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": { 365 | "collapsed": true 366 | }, 367 | "outputs": [], 368 | "source": [ 369 | "# 設定 weight 和 bais\n", 370 | "def weight_variable(shape):\n", 371 | " initial = tf.truncated_normal(shape, stddev=0.1)\n", 372 | " return tf.Variable(initial, name ='W')\n", 373 | "def bias_variable(shape):\n", 374 | " initial = tf.constant(0.1, shape=shape)\n", 375 | " return tf.Variable(initial, name = 'b')" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": { 382 | "collapsed": true 383 | }, 384 | "outputs": [], 385 | "source": [ 386 | "# 設定 cnn 的 layers\n", 387 | "def conv2d(X, W):\n", 388 | " return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')\n", 389 | "def max_pool_2x2(X):\n", 390 | " return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": { 397 | "collapsed": true 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "# fisrt layer\n", 402 | "with tf.name_scope('conv1'):\n", 403 | " ## variables\n", 404 | " W_conv1 = weight_variable([3,3,1,32])\n", 405 | " b_conv1 = bias_variable([32])\n", 406 | " ## build the layer\n", 407 | " X_image = tf.reshape(X, [-1, 28, 28, 1])\n", 408 | " h_conv1 = tf.nn.relu(conv2d(X_image, W_conv1) + b_conv1)\n", 409 | " h_pool1 = max_pool_2x2(h_conv1)\n", 410 | "\n", 411 | "tfdot()" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": { 418 | "collapsed": true 419 | }, 420 | "outputs": [], 421 | "source": [ 422 | "# second layer\n", 423 | "with tf.name_scope('conv2'):\n", 424 | " ## variables\n", 425 | " W_conv2 = weight_variable([3,3,32,64])\n", 426 | " b_conv2 = bias_variable([64])\n", 427 | " ## build the layer\n", 428 | " h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)\n", 429 | " h_pool2 = max_pool_2x2(h_conv2)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": { 436 | "collapsed": true 437 | }, 438 | "outputs": [], 439 | "source": [ 440 | "# fully-connected layer\n", 441 | "with tf.name_scope('full'):\n", 442 | " W_fc1 = weight_variable([7*7*64, 1024])\n", 443 | " b_fc1 = bias_variable([1024])\n", 444 | " h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])\n", 445 | " h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": { 452 | "collapsed": true 453 | }, 454 | "outputs": [], 455 | "source": [ 456 | "# Dropout: A Simple Way to Prevent Neural Networks from Over fitting\n", 457 | "# https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf\n", 458 | "with tf.name_scope('dropout'):\n", 459 | " keep_prob = tf.placeholder(\"float\", name=\"keep_prob\")\n", 460 | " h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)\n", 461 | "\n", 462 | "# Readout\n", 463 | "with tf.name_scope('readout'):\n", 464 | " W_fc2 = weight_variable([1024,10])\n", 465 | " b_fc2 = bias_variable([10])\n", 466 | " Y = tf.matmul(h_fc1_drop, W_fc2)+b_fc2" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": { 473 | "collapsed": true 474 | }, 475 | "outputs": [], 476 | "source": [ 477 | "cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_, logits=Y))\n", 478 | "train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)\n", 479 | "prediction = tf.argmax(Y, 1, name=\"prediction\")\n", 480 | "correct_prediction = tf.equal(prediction, tf.argmax(Y_, 1), name=\"correction\")\n", 481 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name=\"accuracy\")" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": { 488 | "collapsed": true 489 | }, 490 | "outputs": [], 491 | "source": [ 492 | "sess = tf.InteractiveSession()\n", 493 | "tf.global_variables_initializer().run()" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": { 500 | "collapsed": true 501 | }, 502 | "outputs": [], 503 | "source": [ 504 | "%%timeit -r 1 -n 1\n", 505 | "for i in range(5000):\n", 506 | " rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)\n", 507 | " if i%250 == 0:\n", 508 | " validation_accuracy = accuracy.eval({\n", 509 | " X: validation_X[:200], Y_: validation_Y[:200], keep_prob: 1.0 })\n", 510 | " print(\"step %d, validation accuracy %g\"%(i, validation_accuracy))\n", 511 | " train_step.run({X: train_X[rnd_idx], Y_: train_Y[rnd_idx], keep_prob: 0.5 })" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": null, 517 | "metadata": { 518 | "collapsed": true 519 | }, 520 | "outputs": [], 521 | "source": [ 522 | "np.mean([accuracy.eval({X: test_X[i:i+1000], \n", 523 | " Y_: test_Y[i:i+1000],\n", 524 | " keep_prob: 1.0}) \n", 525 | " for i in range(0, test_X.shape[0], 1000)]\n", 526 | ")" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": { 533 | "collapsed": true 534 | }, 535 | "outputs": [], 536 | "source": [ 537 | "tf.train.write_graph(sess.graph_def, \"./\", \"mnist_simple.pb\", as_text=False)\n" 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "more about save load https://www.tensorflow.org/programmers_guide/saved_model" 545 | ] 546 | } 547 | ], 548 | "metadata": { 549 | "kernelspec": { 550 | "display_name": "Python 3", 551 | "language": "python", 552 | "name": "python3" 553 | }, 554 | "language_info": { 555 | "codemirror_mode": { 556 | "name": "ipython", 557 | "version": 3 558 | }, 559 | "file_extension": ".py", 560 | "mimetype": "text/x-python", 561 | "name": "python", 562 | "nbconvert_exporter": "python", 563 | "pygments_lexer": "ipython3", 564 | "version": "3.6.2" 565 | } 566 | }, 567 | "nbformat": 4, 568 | "nbformat_minor": 1 569 | } 570 | -------------------------------------------------------------------------------- /03-TensorBoard.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from PIL import Image\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import lzma\n", 24 | "import pickle\n", 25 | "with lzma.open(\"mnist.pkl.xz\", 'rb') as f:\n", 26 | " train_set, validation_set, test_set = pickle.load(f, encoding='latin1')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "train_X, train_y = train_set\n", 38 | "validation_X, validation_y = validation_set\n", 39 | "test_X, test_y = test_set\n", 40 | "train_Y = np.eye(10)[train_y]\n", 41 | "test_Y = np.eye(10)[test_y]\n", 42 | "validation_Y = np.eye(10)[validation_y]" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "from IPython.display import display\n", 54 | "def showX(X):\n", 55 | " int_X = (X*255).clip(0,255).astype('uint8')\n", 56 | " # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N\n", 57 | " int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)\n", 58 | " display(Image.fromarray(int_X_reshape))\n", 59 | "# 訓練資料, X 的前 20 筆\n", 60 | "showX(train_X[:20])\n", 61 | "print(train_y)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## 開始 Tensorflow" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "import tensorflow as tf\n", 80 | "from tfdot import tfdot" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# Multilayer Convolutional Network" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "# 使用 gfile 來讀檔\n", 99 | "from tensorflow.python.platform import gfile\n", 100 | "# 讀入 graph_def\n", 101 | "with gfile.FastGFile(\"mnist_simple.pb\",'rb') as f:\n", 102 | " graph_def = tf.GraphDef()\n", 103 | " x = f.read()\n", 104 | " #print(x)\n", 105 | " graph_def.ParseFromString(x)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "# 使用之前存下來的模型\n", 117 | "X, Y_, prediction, accuracy, train_step, keep_prob, init_op= tf.import_graph_def(graph_def, name=\"\", \n", 118 | " return_elements=[\"X:0\", \"Y_:0\", \"prediction:0\", \n", 119 | " \"accuracy:0\", \"Adam\", 'dropout/keep_prob:0', \"init\"])" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "sess = tf.InteractiveSession()\n", 131 | "init_op.run()\n", 132 | "tf.summary.scalar(accuracy.op.name, accuracy)\n", 133 | "summary_op = tf.summary.merge_all()\n", 134 | "summary_writer = tf.summary.FileWriter(\"log1\", graph=sess.graph)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": true 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "for i in range(5000):\n", 146 | " rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)\n", 147 | " if i%250 == 0: \n", 148 | " summary_str, validation_accuracy = sess.run([summary_op, accuracy],\n", 149 | " {X: validation_X[:1000], \n", 150 | " Y_: validation_Y[:1000], \n", 151 | " keep_prob: 1.0 })\n", 152 | " summary_writer.add_summary(summary_str, i)\n", 153 | " print(\"step %d, validation accuracy: %g\"%(i, validation_accuracy))\n", 154 | " train_step.run({X: train_X[rnd_idx], Y_: train_Y[rnd_idx], keep_prob: 0.5 })" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "summary_writer.close()\n", 166 | "sess.close()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "run `tensorboard --logdir=log1` in terminal and open http://localhost:6006\n", 174 | "\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "!tensorboard --logdir=log1" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "### 同時紀錄三種準確度" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": { 199 | "collapsed": true 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "sess = tf.InteractiveSession()\n", 204 | "init_op.run()\n", 205 | "acc_summary = tf.summary.scalar(\"accuracy\", accuracy)\n", 206 | "training_summary_writer = tf.summary.FileWriter(\"log2/training\", graph=sess.graph)\n", 207 | "validation_summary_writer = tf.summary.FileWriter(\"log2/validation\", graph=sess.graph)\n", 208 | "testing_summary_writer = tf.summary.FileWriter(\"log2/testing\", graph=sess.graph)\n" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "for i in range(5000):\n", 220 | " rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)\n", 221 | " if i%50 == 0: \n", 222 | " summary_str, training_acc = sess.run([acc_summary, accuracy],\n", 223 | " {X: train_X[:1000], Y_: train_Y[:1000], keep_prob: 1.0 })\n", 224 | " training_summary_writer.add_summary(summary_str, i)\n", 225 | " summary_str, validation_acc = sess.run([acc_summary, accuracy],\n", 226 | " {X: validation_X[:1000], Y_: validation_Y[:1000], keep_prob: 1.0 })\n", 227 | " validation_summary_writer.add_summary(summary_str, i)\n", 228 | " summary_str, testing_acc = sess.run([acc_summary, accuracy],\n", 229 | " {X: test_X[:1000], Y_: test_Y[:1000], keep_prob: 1.0 })\n", 230 | " testing_summary_writer.add_summary(summary_str, i)\n", 231 | " if i%250==0:\n", 232 | " print(\"step %d, train: %g, validation: %g, test: %g\"%(i, training_acc, \n", 233 | " validation_acc, testing_acc))\n", 234 | " train_step.run({X: train_X[rnd_idx], Y_: train_Y[rnd_idx], keep_prob: 0.5 })" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "testing_summary_writer.close()\n", 246 | "validation_summary_writer.close()\n", 247 | "training_summary_writer.close()\n", 248 | "sess.close()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "collapsed": true 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "!tensorboard --logdir=log2" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "## Image" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "collapsed": true 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "# accuracy\n", 278 | "accuracy = tf.Variable(0.2, name=\"xxx\")\n", 279 | "accuracy_ = tf.placeholder(\"float\")\n", 280 | "tf.summary.scalar(\"acc\", accuracy)\n", 281 | "update = tf.assign(accuracy, accuracy_)\n", 282 | "\n", 283 | "# image\n", 284 | "img_ = tf.placeholder(\"float\", shape=[None, 1080, 1920, 3])\n", 285 | "img = tf.Variable(tf.zeros([1, 1080,1920,3]))\n", 286 | "tf.summary.image(\"img\", img)\n", 287 | "update_img = tf.assign(img, img_)\n", 288 | "\n", 289 | "# merge all summary\n", 290 | "summary_op = tf.summary.merge_all()\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "collapsed": true 298 | }, 299 | "outputs": [], 300 | "source": [ 301 | "# rendering the image\n", 302 | "from math import pi, sin\n", 303 | "a = np.zeros((1080,1920,3))\n", 304 | "i_ = np.arange(1,64)\n", 305 | "c = np.random.uniform(size=(63,3))\n", 306 | "\n", 307 | "def color_arrows(t):\n", 308 | " t_ = (i_*7.15+t)\n", 309 | " x1 = (t_/3%1*1920).astype(np.int32)\n", 310 | " y1 = (np.sin(t_)*500+500).astype(np.int32)\n", 311 | " for i in range(63):\n", 312 | " a[y1[i]:y1[i]+80, x1[i]:x1[i]+80] = c[i]\n", 313 | " return a" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "# tensorflow session\n", 325 | "with tf.Session() as sess:\n", 326 | " with tf.summary.FileWriter(\"log3\", graph=sess.graph) as summary_writer:\n", 327 | " sess.run(tf.global_variables_initializer())\n", 328 | " for i in range(100):\n", 329 | " sess.run(update, feed_dict={accuracy_: i/100.0})\n", 330 | " color_arrows(i/100) \n", 331 | " sess.run(update_img, feed_dict={img_: a[None, ...]})\n", 332 | " summary_writer.add_summary(summary_op.eval(), i)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": { 339 | "collapsed": true 340 | }, 341 | "outputs": [], 342 | "source": [ 343 | "!tensorboard --logdir=log3" 344 | ] 345 | } 346 | ], 347 | "metadata": { 348 | "kernelspec": { 349 | "display_name": "Python 3", 350 | "language": "python", 351 | "name": "python3" 352 | }, 353 | "language_info": { 354 | "codemirror_mode": { 355 | "name": "ipython", 356 | "version": 3 357 | }, 358 | "file_extension": ".py", 359 | "mimetype": "text/x-python", 360 | "name": "python", 361 | "nbconvert_exporter": "python", 362 | "pygments_lexer": "ipython3", 363 | "version": "3.6.2" 364 | } 365 | }, 366 | "nbformat": 4, 367 | "nbformat_minor": 1 368 | } 369 | -------------------------------------------------------------------------------- /04-tf.data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using tf.data\n", 8 | "\n", 9 | "https://www.tensorflow.org/programmers_guide/datasets" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from PIL import Image\n", 21 | "import numpy as np\n", 22 | "import tensorflow as tf" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "dataset = tf.data.Dataset.range(10)\n", 34 | "print(dataset.output_types)\n", 35 | "print(dataset.output_shapes)\n", 36 | "\n", 37 | "iterator = dataset.make_one_shot_iterator()\n", 38 | "next_element = iterator.get_next()\n", 39 | "with tf.Session() as sess:\n", 40 | " for i in range(10):\n", 41 | " print(sess.run(next_element))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### make_initializable_iterator" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "tf.reset_default_graph()\n", 60 | "min_value = tf.placeholder(tf.int64, shape=[])\n", 61 | "max_value = tf.placeholder(tf.int64, shape=[])\n", 62 | "dataset = tf.data.Dataset.range(min_value, max_value)\n", 63 | "iterator = dataset.make_initializable_iterator()\n", 64 | "next_element = iterator.get_next()\n", 65 | "\n", 66 | "with tf.Session() as sess:\n", 67 | " # Initialize an iterator over a dataset with 10 elements.\n", 68 | " sess.run(iterator.initializer, feed_dict={min_value: 0, max_value: 5})\n", 69 | " for i in range(5):\n", 70 | " print(sess.run(next_element))\n", 71 | "\n", 72 | " # Initialize the same iterator over a dataset with 10 elements.\n", 73 | " sess.run(iterator.initializer, feed_dict={min_value: 100, max_value: 105})\n", 74 | " for i in range(5):\n", 75 | " value = sess.run(next_element)\n", 76 | " print(value)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## from_tensor_slices" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([10, 3]))\n", 95 | "\n", 96 | "print(dataset.output_types)\n", 97 | "print(dataset.output_shapes)\n", 98 | "\n", 99 | "iterator = dataset.make_initializable_iterator()\n", 100 | "\n", 101 | "next_element = iterator.get_next()\n", 102 | "with tf.Session() as sess:\n", 103 | " sess.run(iterator.initializer)\n", 104 | " for i in range(10):\n", 105 | " print(sess.run(next_element))" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## Q\n", 113 | "計算 $1+2+...+10$" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "### Reinitializable (one interator with different datasets)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": { 127 | "collapsed": true 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "# Define training and validation datasets with the same structure.\n", 132 | "training_dataset = tf.data.Dataset.range(10).map(\n", 133 | " lambda x: x + tf.random_uniform([], -10, 10, tf.int64))\n", 134 | "validation_dataset = tf.data.Dataset.range(5)\n", 135 | "\n", 136 | "# two dataset are compatible\n", 137 | "assert training_dataset.output_types == validation_dataset.output_types\n", 138 | "assert training_dataset.output_shapes == validation_dataset.output_shapes\n", 139 | "\n", 140 | "iterator = tf.data.Iterator.from_structure(training_dataset.output_types,\n", 141 | " training_dataset.output_shapes)\n", 142 | "next_element = iterator.get_next()\n", 143 | "\n", 144 | "training_init_op = iterator.make_initializer(training_dataset)\n", 145 | "validation_init_op = iterator.make_initializer(validation_dataset)\n", 146 | "\n", 147 | "def loop_through_dataset(ds_name, n):\n", 148 | " for _ in range(n):\n", 149 | " print(ds_name, _, sess.run(next_element))\n", 150 | "\n", 151 | "with tf.Session() as sess:\n", 152 | " for epoch in range(3): \n", 153 | " print(\"epoch\", epoch)\n", 154 | " # training\n", 155 | " sess.run(training_init_op)\n", 156 | " loop_through_dataset(\"train\", 10)\n", 157 | "\n", 158 | " # Validation\n", 159 | " sess.run(validation_init_op)\n", 160 | " loop_through_dataset(\"validation\", 5)\n" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "## MNIST Dataset" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": true 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "import lzma\n", 179 | "import pickle\n", 180 | "with lzma.open(\"mnist.pkl.xz\", 'rb') as f:\n", 181 | " train_set, validation_set, test_set = pickle.load(f, encoding='latin1')" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "collapsed": true 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "train_X, train_y = train_set\n", 193 | "validation_X, validation_y = validation_set\n", 194 | "test_X, test_y = test_set\n", 195 | "train_Y = np.eye(10)[train_y]\n", 196 | "test_Y = np.eye(10)[test_y]\n", 197 | "validation_Y = np.eye(10)[validation_y]" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "from IPython.display import display\n", 209 | "def showX(X):\n", 210 | " int_X = (X*255).clip(0,255).astype('uint8')\n", 211 | " # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N\n", 212 | " int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)\n", 213 | " display(Image.fromarray(int_X_reshape))\n", 214 | "# 訓練資料, X 的前 20 筆\n", 215 | "showX(train_X[:20])\n", 216 | "print(train_y)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": true 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "train_data = tf.data.Dataset.from_tensor_slices((train_X, train_Y))\n", 228 | "\n", 229 | "iterator = train_data.batch(4).make_initializable_iterator()\n", 230 | "\n", 231 | "next_minibatch = iterator.get_next()\n", 232 | "with tf.Session() as sess:\n", 233 | " sess.run(iterator.initializer)\n", 234 | " for i in range(3):\n", 235 | " print(sess.run(next_minibatch)[1])" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "# Multilayer Convolutional Network" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "training_data = tf.data.Dataset.from_tensor_slices((train_X, train_Y)).shuffle(buffer_size=10000).batch(40)\n", 254 | "validation_data = tf.data.Dataset.from_tensor_slices((validation_X, validation_Y)).batch(40)\n", 255 | "\n", 256 | "\n", 257 | "iterator = tf.data.Iterator.from_structure(training_data.output_types,\n", 258 | " training_data.output_shapes)\n", 259 | "\n", 260 | "training_init_op = iterator.make_initializer(training_data)\n", 261 | "validation_init_op = iterator.make_initializer(validation_data)\n", 262 | "\n", 263 | "X, Y_ = iterator.get_next()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": { 270 | "collapsed": true 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "# 設定 weight 和 bais\n", 275 | "def weight_variable(shape):\n", 276 | " initial = tf.truncated_normal(shape, stddev=0.1)\n", 277 | " return tf.Variable(initial, name ='W')\n", 278 | "def bias_variable(shape):\n", 279 | " initial = tf.constant(0.1, shape=shape)\n", 280 | " return tf.Variable(initial, name = 'b')\n", 281 | "\n", 282 | "# 設定 cnn 的 layers\n", 283 | "def conv2d(X, W):\n", 284 | " return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')\n", 285 | "def max_pool_2x2(X):\n", 286 | " return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')\n", 287 | "\n", 288 | "# fisrt layer\n", 289 | "with tf.name_scope('conv1'):\n", 290 | " ## variables\n", 291 | " W_conv1 = weight_variable([3,3,1,32])\n", 292 | " b_conv1 = bias_variable([32])\n", 293 | " ## build the layer\n", 294 | " X_image = tf.reshape(X, [-1, 28, 28, 1])\n", 295 | " h_conv1 = tf.nn.relu(conv2d(X_image, W_conv1) + b_conv1)\n", 296 | " h_pool1 = max_pool_2x2(h_conv1)\n", 297 | "\n", 298 | "# second layer\n", 299 | "with tf.name_scope('conv2'):\n", 300 | " ## variables\n", 301 | " W_conv2 = weight_variable([3,3,32,64])\n", 302 | " b_conv2 = bias_variable([64])\n", 303 | " ## build the layer\n", 304 | " h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)\n", 305 | " h_pool2 = max_pool_2x2(h_conv2)\n", 306 | " \n", 307 | "# fully-connected layer\n", 308 | "with tf.name_scope('full'):\n", 309 | " W_fc1 = weight_variable([7*7*64, 1024])\n", 310 | " b_fc1 = bias_variable([1024])\n", 311 | " h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])\n", 312 | " h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)\n", 313 | " \n", 314 | "# Dropout: A Simple Way to Prevent Neural Networks from Over fitting\n", 315 | "# https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf\n", 316 | "with tf.name_scope('dropout'):\n", 317 | " keep_prob = tf.placeholder(\"float\", name=\"keep_prob\")\n", 318 | " h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)\n", 319 | "\n", 320 | "# Readout\n", 321 | "with tf.name_scope('readout'):\n", 322 | " W_fc2 = weight_variable([1024,10])\n", 323 | " b_fc2 = bias_variable([10])\n", 324 | " Y = tf.matmul(h_fc1_drop, W_fc2)+b_fc2\n", 325 | " \n", 326 | "cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_, logits=Y))\n", 327 | "train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)\n", 328 | "prediction = tf.argmax(Y, 1, name=\"prediction\")\n", 329 | "correct_prediction = tf.equal(prediction, tf.argmax(Y_, 1), name=\"correction\")\n", 330 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name=\"accuracy\")" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "import time\n", 342 | "sess = tf.Session()\n", 343 | "sess.run(tf.global_variables_initializer())\n", 344 | "t0 = time.time()\n", 345 | "for epoch in range(10):\n", 346 | " sess.run(training_init_op)\n", 347 | " while True:\n", 348 | " try:\n", 349 | " sess.run(train_step, {keep_prob: 0.5 })\n", 350 | " except tf.errors.OutOfRangeError:\n", 351 | " print(\"End of epoch\", epoch, \"time:\", time.time()-t0)\n", 352 | " break\n", 353 | " sess.run(validation_init_op)\n", 354 | " validation_accuracy = np.mean([sess.run(accuracy,{keep_prob: 1.0 }) for i in range(10)])\n", 355 | " print(\"Epoch %d, validation accuracy %g\"%(epoch, validation_accuracy))\n", 356 | "sess.close()" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": { 363 | "collapsed": true 364 | }, 365 | "outputs": [], 366 | "source": [] 367 | } 368 | ], 369 | "metadata": { 370 | "kernelspec": { 371 | "display_name": "Python 3", 372 | "language": "python", 373 | "name": "python3" 374 | }, 375 | "language_info": { 376 | "codemirror_mode": { 377 | "name": "ipython", 378 | "version": 3 379 | }, 380 | "file_extension": ".py", 381 | "mimetype": "text/x-python", 382 | "name": "python", 383 | "nbconvert_exporter": "python", 384 | "pygments_lexer": "ipython3", 385 | "version": "3.6.2" 386 | } 387 | }, 388 | "nbformat": 4, 389 | "nbformat_minor": 1 390 | } 391 | -------------------------------------------------------------------------------- /05-tf-higher-level-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using tf.layers" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from PIL import Image\n", 17 | "import numpy as np\n", 18 | "import tensorflow as tf" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## MNIST Dataset" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import lzma\n", 35 | "import pickle\n", 36 | "with lzma.open(\"mnist.pkl.xz\", 'rb') as f:\n", 37 | " train_set, validation_set, test_set = pickle.load(f, encoding='latin1')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "train_X, train_y = train_set\n", 47 | "validation_X, validation_y = validation_set\n", 48 | "test_X, test_y = test_set\n", 49 | "train_Y = np.eye(10)[train_y]\n", 50 | "test_Y = np.eye(10)[test_y]\n", 51 | "validation_Y = np.eye(10)[validation_y]" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "from IPython.display import display\n", 61 | "def showX(X):\n", 62 | " int_X = (X*255).clip(0,255).astype('uint8')\n", 63 | " # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N\n", 64 | " int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)\n", 65 | " display(Image.fromarray(int_X_reshape))\n", 66 | "# 訓練資料, X 的前 20 筆\n", 67 | "showX(train_X[:20])\n", 68 | "print(train_y)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "### using dataset" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "training_data = tf.data.Dataset.from_tensor_slices((train_X, train_Y)).shuffle(buffer_size=10000).batch(40)\n", 85 | "validation_data = tf.data.Dataset.from_tensor_slices((validation_X, validation_Y)).batch(40)\n", 86 | "\n", 87 | "\n", 88 | "iterator = tf.data.Iterator.from_structure(training_data.output_types,\n", 89 | " training_data.output_shapes)\n", 90 | "\n", 91 | "training_init_op = iterator.make_initializer(training_data)\n", 92 | "validation_init_op = iterator.make_initializer(validation_data)\n", 93 | "\n", 94 | "X, Y_ = iterator.get_next()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# 設定 cnn 的 layers\n", 104 | "\n", 105 | "X_image = tf.reshape(X, [-1, 28, 28, 1])\n", 106 | "\n", 107 | "# fisrt layer\n", 108 | "h_conv1 = tf.layers.conv2d(X_image, 32, 3, padding='same', activation=tf.nn.relu)\n", 109 | "h_pool1 = tf.layers.max_pooling2d(h_conv1, 2, 2)\n", 110 | "\n", 111 | "# second layer\n", 112 | "h_conv2 = tf.layers.conv2d(h_pool1, 64, 3, padding='same', activation=tf.nn.relu)\n", 113 | "h_pool2 = tf.layers.max_pooling2d(h_conv2, 2, 2)\n", 114 | "h_pool2_flat = tf.layers.flatten(h_pool2)\n", 115 | "\n", 116 | "# fully-connected layer\n", 117 | "h_fc = tf.layers.dense(h_pool2_flat, 1024, activation=tf.nn.relu, name='fc')\n", 118 | "\n", 119 | "# Dropout\n", 120 | "with tf.name_scope('dropout'):\n", 121 | " keep_prob = tf.placeholder(\"float\", name=\"keep_prob\")\n", 122 | " h_fc_drop = tf.nn.dropout(h_fc, keep_prob)\n", 123 | "\n", 124 | "# Readout\n", 125 | "Y = tf.layers.dense(h_fc_drop, 10, activation=tf.nn.relu, name='readout')\n", 126 | "\n", 127 | "cross_entropy = tf.losses.softmax_cross_entropy(onehot_labels=Y_, logits=Y)\n", 128 | "train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)\n", 129 | "prediction = tf.argmax(Y, 1, name=\"prediction\")\n", 130 | "correct_prediction = tf.equal(prediction, tf.argmax(Y_, 1), name=\"correction\")\n", 131 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name=\"accuracy\")" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "import time\n", 141 | "sess = tf.Session()\n", 142 | "sess.run(tf.global_variables_initializer())\n", 143 | "t0 = time.time()\n", 144 | "for epoch in range(3):\n", 145 | " sess.run(training_init_op)\n", 146 | " while True:\n", 147 | " try:\n", 148 | " sess.run(train_step, {keep_prob: 0.5 })\n", 149 | " except tf.errors.OutOfRangeError:\n", 150 | " print(\"End of epoch\", epoch, \"time:\", time.time()-t0)\n", 151 | " break\n", 152 | " sess.run(validation_init_op)\n", 153 | " validation_accuracy = np.mean([sess.run(accuracy,{keep_prob: 1.0 }) for i in range(10)])\n", 154 | " print(\"Epoch %d, validation accuracy %g\"%(epoch, validation_accuracy))\n", 155 | "sess.close()" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [] 164 | } 165 | ], 166 | "metadata": { 167 | "kernelspec": { 168 | "display_name": "Python 3", 169 | "language": "python", 170 | "name": "python3" 171 | }, 172 | "language_info": { 173 | "codemirror_mode": { 174 | "name": "ipython", 175 | "version": 3 176 | }, 177 | "file_extension": ".py", 178 | "mimetype": "text/x-python", 179 | "name": "python", 180 | "nbconvert_exporter": "python", 181 | "pygments_lexer": "ipython3", 182 | "version": "3.6.5" 183 | } 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 1 187 | } 188 | -------------------------------------------------------------------------------- /06-keras-intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# windows only hack for graphviz path \n", 12 | "import os\n", 13 | "for path in os.environ['PATH'].split(os.pathsep):\n", 14 | " if path.endswith(\"Library\\\\bin\"):\n", 15 | " os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stderr", 25 | "output_type": "stream", 26 | "text": [ 27 | "Using TensorFlow backend.\n" 28 | ] 29 | } 30 | ], 31 | "source": [ 32 | "import keras\n", 33 | "from keras.models import Sequential\n", 34 | "from PIL import Image\n", 35 | "import numpy as np" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import lzma\n", 47 | "import pickle\n", 48 | "with lzma.open(\"mnist.pkl.xz\", 'rb') as f:\n", 49 | " train_set, validation_set, test_set = pickle.load(f, encoding='latin1')\n", 50 | "train_X, train_y = train_set\n", 51 | "validation_X, validation_y = validation_set\n", 52 | "test_X, test_y = test_set\n", 53 | "\n", 54 | "\n", 55 | "train_Y = np.eye(10)[train_y]\n", 56 | "test_Y = np.eye(10)[test_y]\n", 57 | "validation_Y = np.eye(10)[validation_y]\n", 58 | "\n", 59 | "# or\n", 60 | "# from keras.datasets import mnist\n", 61 | "# from keras.utils import np_utils\n", 62 | "# (train_X, train_y), (test_X, test_y) = mnist.load_data()\n", 63 | "# train_Y = np_utils.to_categorical(train_y, 10)\n", 64 | "# test_Y = np_utils.to_categorical(test_y, 10)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### logistic regression" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "from keras.layers import Dense, Activation\n", 83 | "model = Sequential()\n", 84 | "model.add(Dense(units=10, input_dim=784))\n", 85 | "model.add(Activation('softmax'))" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "model.compile(loss='categorical_crossentropy',\n", 97 | " optimizer='sgd',\n", 98 | " metrics=['accuracy'])" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 6, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "image/svg+xml": [ 109 | "\n", 110 | "\n", 111 | "G\n", 112 | "\n", 113 | "\n", 114 | "1902677000824\n", 115 | "\n", 116 | "dense_1_input: InputLayer\n", 117 | "\n", 118 | "input:\n", 119 | "\n", 120 | "output:\n", 121 | "\n", 122 | "(None, 784)\n", 123 | "\n", 124 | "(None, 784)\n", 125 | "\n", 126 | "\n", 127 | "1902676938424\n", 128 | "\n", 129 | "dense_1: Dense\n", 130 | "\n", 131 | "input:\n", 132 | "\n", 133 | "output:\n", 134 | "\n", 135 | "(None, 784)\n", 136 | "\n", 137 | "(None, 10)\n", 138 | "\n", 139 | "\n", 140 | "1902677000824->1902676938424\n", 141 | "\n", 142 | "\n", 143 | "\n", 144 | "\n", 145 | "1902676938704\n", 146 | "\n", 147 | "activation_1: Activation\n", 148 | "\n", 149 | "input:\n", 150 | "\n", 151 | "output:\n", 152 | "\n", 153 | "(None, 10)\n", 154 | "\n", 155 | "(None, 10)\n", 156 | "\n", 157 | "\n", 158 | "1902676938424->1902676938704\n", 159 | "\n", 160 | "\n", 161 | "\n", 162 | "\n", 163 | "" 164 | ], 165 | "text/plain": [ 166 | "" 167 | ] 168 | }, 169 | "execution_count": 6, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "from IPython.display import SVG, display\n", 176 | "from keras.utils.vis_utils import model_to_dot\n", 177 | "\n", 178 | "SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 7, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "Train on 50000 samples, validate on 10000 samples\n", 191 | "Epoch 1/15\n", 192 | "50000/50000 [==============================] - 0s - loss: 1.3414 - acc: 0.6857 - val_loss: 0.8683 - val_acc: 0.8318\n", 193 | "Epoch 2/15\n", 194 | "50000/50000 [==============================] - 0s - loss: 0.7797 - acc: 0.8291 - val_loss: 0.6367 - val_acc: 0.8615\n", 195 | "Epoch 3/15\n", 196 | "50000/50000 [==============================] - 0s - loss: 0.6348 - acc: 0.8509 - val_loss: 0.5439 - val_acc: 0.8743\n", 197 | "Epoch 4/15\n", 198 | "50000/50000 [==============================] - 0s - loss: 0.5644 - acc: 0.8614 - val_loss: 0.4922 - val_acc: 0.8825\n", 199 | "Epoch 5/15\n", 200 | "50000/50000 [==============================] - 0s - loss: 0.5214 - acc: 0.8681 - val_loss: 0.4590 - val_acc: 0.8887\n", 201 | "Epoch 6/15\n", 202 | "50000/50000 [==============================] - 0s - loss: 0.4919 - acc: 0.8736 - val_loss: 0.4358 - val_acc: 0.8909\n", 203 | "Epoch 7/15\n", 204 | "50000/50000 [==============================] - 0s - loss: 0.4700 - acc: 0.8773 - val_loss: 0.4182 - val_acc: 0.8938\n", 205 | "Epoch 8/15\n", 206 | "50000/50000 [==============================] - 0s - loss: 0.4530 - acc: 0.8801 - val_loss: 0.4044 - val_acc: 0.8959\n", 207 | "Epoch 9/15\n", 208 | "50000/50000 [==============================] - 0s - loss: 0.4393 - acc: 0.8828 - val_loss: 0.3932 - val_acc: 0.8982\n", 209 | "Epoch 10/15\n", 210 | "50000/50000 [==============================] - 0s - loss: 0.4280 - acc: 0.8853 - val_loss: 0.3840 - val_acc: 0.8997\n", 211 | "Epoch 11/15\n", 212 | "50000/50000 [==============================] - 0s - loss: 0.4183 - acc: 0.8867 - val_loss: 0.3762 - val_acc: 0.9005\n", 213 | "Epoch 12/15\n", 214 | "50000/50000 [==============================] - 0s - loss: 0.4101 - acc: 0.8890 - val_loss: 0.3693 - val_acc: 0.9017\n", 215 | "Epoch 13/15\n", 216 | "50000/50000 [==============================] - 0s - loss: 0.4029 - acc: 0.8902 - val_loss: 0.3637 - val_acc: 0.9029\n", 217 | "Epoch 14/15\n", 218 | "50000/50000 [==============================] - 0s - loss: 0.3965 - acc: 0.8918 - val_loss: 0.3585 - val_acc: 0.9036\n", 219 | "Epoch 15/15\n", 220 | "50000/50000 [==============================] - 0s - loss: 0.3908 - acc: 0.8931 - val_loss: 0.3540 - val_acc: 0.9038\n" 221 | ] 222 | }, 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "" 227 | ] 228 | }, 229 | "execution_count": 7, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "model.fit(train_X, train_Y, validation_data=(validation_X, validation_Y), batch_size=128, epochs=15)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 8, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "20/20 [==============================] - 0s\n" 248 | ] 249 | }, 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "array([7, 2, 1, 0, 4, 1, 4, 9, 6, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4], dtype=int64)" 254 | ] 255 | }, 256 | "execution_count": 8, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "# 預測看看 test_X 前 20 筆\n", 263 | "model.predict_classes(test_X[:20])" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 9, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/plain": [ 274 | "array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4], dtype=int64)" 275 | ] 276 | }, 277 | "execution_count": 9, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "# 對答案\n", 284 | "test_y[:20]" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 10, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | " 8032/10000 [=======================>......] - ETA: 0s" 297 | ] 298 | }, 299 | { 300 | "data": { 301 | "text/plain": [ 302 | "[0.36359533268213273, 0.90269999999999995]" 303 | ] 304 | }, 305 | "execution_count": 10, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "# 看看 test accuracy\n", 312 | "model.evaluate(test_X, test_Y)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Q \n", 320 | "* 將 `optimizer` 換成 `\"adam\"`\n", 321 | "* 將 `optimizer` 換成 `keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)`" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "### 建立 convolutional model\n", 329 | "我們之前的網路架構\n", 330 | "* convolution 2d kernel=(3,3), filters=32\n", 331 | "* relu\n", 332 | "* max pool\n", 333 | "* convolution 2d kernel=(3,3), filters=64\n", 334 | "* relu\n", 335 | "* max pool\n", 336 | "* dense units=1024\n", 337 | "* relu\n", 338 | "* dropout (rate=0.8) # 先省略這一層\n", 339 | "* dense units = 10\n", 340 | "* softmax\n", 341 | "\n", 342 | "試著架出這樣的網路\n", 343 | "\n", 344 | "然後訓練看看\n", 345 | "\n", 346 | "開頭幾行可以這樣寫\n", 347 | "```python\n", 348 | "from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Reshape\n", 349 | "model = Sequential()\n", 350 | "model.add(Reshape((28, 28, 1), input_shape=(784,) ))\n", 351 | "model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same', activation=\"relu\"))\n", 352 | "```" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 11, 358 | "metadata": { 359 | "collapsed": true 360 | }, 361 | "outputs": [], 362 | "source": [ 363 | "# 參考答案\n", 364 | "#%load q_keras_cnn.py" 365 | ] 366 | } 367 | ], 368 | "metadata": { 369 | "kernelspec": { 370 | "display_name": "Python 3", 371 | "language": "python", 372 | "name": "python3" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.6.5" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 1 389 | } 390 | -------------------------------------------------------------------------------- /07-lstm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# imdb dataset" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from keras.preprocessing import sequence\n", 18 | "from keras.models import Sequential\n", 19 | "from keras.layers import Dense, Embedding\n", 20 | "from keras.layers import LSTM\n", 21 | "from keras.datasets import imdb" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "vocabulary_size = 15000\n", 31 | "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocabulary_size)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "print(x_train[0])" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "word2num = imdb.get_word_index()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "num2word = {v:k for k,v in word2num.items()}" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "print(\" - \".join(num2word[x] for x in x_train[0]))" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "y_train[0]" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "x_train.shape, x_train.dtype" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "### padding the data" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "maxlen = 200\n", 102 | "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n", 103 | "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "x_train[2]" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "maxlen = 60\n", 122 | "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n", 123 | "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### LSTM using Keras" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "model = Sequential()\n", 140 | "model.add(Embedding(vocabulary_size, 128))\n", 141 | "model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))\n", 142 | "model.add(Dense(1, activation='sigmoid'))\n", 143 | "model.compile(loss='binary_crossentropy',\n", 144 | " optimizer='adam',\n", 145 | " metrics=['accuracy'])" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "from IPython.display import SVG, display\n", 155 | "from keras.utils.vis_utils import model_to_dot\n", 156 | "\n", 157 | "SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "model.fit(x_train, y_train,\n", 167 | " batch_size=32,\n", 168 | " epochs=2,\n", 169 | " validation_data=(x_test, y_test))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "score, acc = model.evaluate(x_test, y_test, batch_size=32)\n", 179 | "print(score, acc)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "### Tensorflow implementation\n", 187 | "https://www.tensorflow.org/tutorials/recurrent" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "import numpy as np\n", 197 | "from keras.datasets import imdb\n", 198 | "from keras.preprocessing import sequence\n", 199 | "\n", 200 | "vocabulary_size = 15000\n", 201 | "maxlen = 60\n", 202 | "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocabulary_size)\n", 203 | "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n", 204 | "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\n", 205 | "y_train = np.float32(y_train)[:, None]\n", 206 | "y_test = np.float32(y_test)[:, None]" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "# tf dataset\n", 216 | "import tensorflow as tf\n", 217 | "\n", 218 | "batch_size = 40\n", 219 | "lstm_size = 64\n", 220 | "\n", 221 | "train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size=10000).batch(batch_size)\n", 222 | "test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)\n", 223 | "\n", 224 | "iterator = tf.data.Iterator.from_structure(train_data.output_types,\n", 225 | " train_data.output_shapes)\n", 226 | "train_data_init = iterator.make_initializer(train_data)\n", 227 | "test_data_init = iterator.make_initializer(test_data)\n", 228 | "X, Y_ = iterator.get_next()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# word embedding\n", 238 | "embedding_matrix = tf.Variable(tf.random_uniform([vocabulary_size, 128], -1.0, 1.0))\n", 239 | "word_embeddings = tf.nn.embedding_lookup(embedding_matrix, X)\n", 240 | "\n", 241 | "# LSTM cell\n", 242 | "lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)\n", 243 | "\n", 244 | "# dropout wrapper\n", 245 | "keep_prob = tf.placeholder(\"float\", name=\"keep_prob\")\n", 246 | "lstm = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob, state_keep_prob=keep_prob)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "# LSTM network\n", 256 | "lstm_input =[word_embeddings[:, i] for i in range(maxlen)]\n", 257 | "init_state = state = lstm.zero_state(batch_size, dtype=tf.float32)\n", 258 | "for x in lstm_input:\n", 259 | " lstm_output, state = lstm(x, state)\n", 260 | "# or use tf.split, tf.contrib.rnn.static_rnn" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "lstm_output" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# readout\n", 279 | "Y = tf.layers.dense(lstm_output, 1, activation=tf.nn.sigmoid)\n", 280 | "\n", 281 | "# loss function and training\n", 282 | "loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y_, logits=Y))\n", 283 | "train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)\n", 284 | "prediction = tf.floor(Y+0.5)\n", 285 | "accuracy = tf.reduce_mean(tf.cast(tf.less(tf.abs(Y-Y_), 0.5), tf.float32))\n" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "import time\n", 295 | "sess = tf.Session()\n", 296 | "sess.run(tf.global_variables_initializer())\n", 297 | "t0 = time.time()\n", 298 | "for epoch in range(10):\n", 299 | " sess.run(train_data_init)\n", 300 | " while True:\n", 301 | " try:\n", 302 | " sess.run(train_step, {keep_prob: 0.8 })\n", 303 | " except tf.errors.OutOfRangeError:\n", 304 | " print(\"End of epoch\", epoch, \"time:\", time.time()-t0)\n", 305 | " break\n", 306 | " sess.run(test_data_init)\n", 307 | " test_accuracy = np.mean([sess.run(accuracy,{keep_prob: 1.0 }) for i in range(100)])\n", 308 | " print(\"Epoch %d, validation accuracy %g\"%(epoch, test_accuracy))\n", 309 | "sess.close()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [] 318 | } 319 | ], 320 | "metadata": { 321 | "kernelspec": { 322 | "display_name": "Python 3", 323 | "language": "python", 324 | "name": "python3" 325 | }, 326 | "language_info": { 327 | "codemirror_mode": { 328 | "name": "ipython", 329 | "version": 3 330 | }, 331 | "file_extension": ".py", 332 | "mimetype": "text/x-python", 333 | "name": "python", 334 | "nbconvert_exporter": "python", 335 | "pygments_lexer": "ipython3", 336 | "version": "3.6.5" 337 | } 338 | }, 339 | "nbformat": 4, 340 | "nbformat_minor": 1 341 | } 342 | -------------------------------------------------------------------------------- /HW1-CIFAR10.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "CIFAR10 是另外一個 dataset, 和 mnist 一樣,有十種類別(飛機、汽車、鳥、貓、鹿、狗、青蛙、馬、船、卡車)\n", 8 | "\n", 9 | "https://www.cs.toronto.edu/~kriz/cifar.html\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import keras\n", 19 | "from PIL import Image\n", 20 | "import numpy as np" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "from keras.datasets import cifar10\n", 32 | "from keras.utils import np_utils\n", 33 | "(train_X, train_y), (test_X, test_y) = cifar10.load_data()\n", 34 | "train_X = (train_X-127.5)/256\n", 35 | "test_X = (test_X-127.5)/256\n", 36 | "train_Y = np_utils.to_categorical(train_y, 10)\n", 37 | "test_Y = np_utils.to_categorical(test_y, 10)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### 查看一下資料" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "train_X.shape" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "train_Y.shape" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# channels x 高 x 寬 (顏色)\n", 72 | "3*32*32" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "from IPython.display import display\n", 82 | "def showX(X):\n", 83 | " int_X = (X*255+128).clip(0,255).astype('uint8')\n", 84 | " int_X_reshape = int_X.swapaxes(0,1).reshape(32,-1, 3)\n", 85 | " display(Image.fromarray(int_X_reshape))\n", 86 | "# 訓練資料, X 的前 20 筆\n", 87 | "showX(train_X[:20])\n", 88 | "print(train_y[:20,0])\n", 89 | "name_array = np.array(\"飛機、汽車、鳥、貓、鹿、狗、青蛙、馬、船、卡車\".split('、'))\n", 90 | "print(name_array[train_y[:20, 0]])" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Q \n", 98 | "* 將之前的 logistic regression 套用過來看看\n", 99 | "* 將之前的 cnn model 套用過來看看 (注意資料格式, channel x H x W 還是 H x W x channel)\n", 100 | "* 試試看改善準確度\n", 101 | "* 增加 Dropout (https://keras.io/layers/core/#dropout)\n", 102 | "* 增加 BatchNormaliztion (https://keras.io/layers/normalization/)\n", 103 | "* activation 換成其他的?" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "test accuracy%\n", 111 | "\n", 112 | "$points = (accuracy*0.8-42 points)$\n", 113 | "\n", 114 | "* 90%: 30pts\n", 115 | "* 80%: 22pts\n", 116 | "* 70%: 14pts\n", 117 | "* 60%: 6pts " 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "# 如果是用 keras \n", 129 | "#rtn = model.evaluate(test_X, test_Y)\n", 130 | "#print(\"test accuracy=\", rtn[1])" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.6.2" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 1 155 | } 156 | -------------------------------------------------------------------------------- /HW2-fashion mnist.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/home/tjw/anaconda3/envs/keras/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 13 | " from ._conv import register_converters as _register_converters\n", 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import keras\n", 20 | "from PIL import Image\n", 21 | "import numpy as np" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### load data" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz\n", 41 | "32768/29515 [=================================] - 1s 18us/step\n", 42 | "Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz\n", 43 | "26427392/26421880 [==============================] - 28s 1us/step\n", 44 | "Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz\n", 45 | "8192/5148 [===============================================] - 0s 0us/step\n", 46 | "Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz\n", 47 | "4423680/4422102 [==============================] - 12s 3us/step\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "from keras.datasets import fashion_mnist\n", 53 | "from keras.utils import np_utils\n", 54 | "(train_X, train_y), (test_X, test_y) = fashion_mnist.load_data()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 6, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "train_X = (train_X-127.5)/256\n", 66 | "test_X = (test_X-127.5)/256\n", 67 | "train_Y = np_utils.to_categorical(train_y, 10)\n", 68 | "test_Y = np_utils.to_categorical(test_y, 10)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "### 查看一下資料" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "訓練" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "分數 \n", 108 | " (test accuracy) - 70 points\n", 109 | "* 98 28pts\n", 110 | "* 95 25pts\n", 111 | "* 90 20pts\n", 112 | "* 85 15pts\n", 113 | "* 80 10pts\n", 114 | "* <=70 0pts" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# 如果是用 keras \n", 126 | "#rtn = model.evaluate(test_X, test_Y)\n", 127 | "#print(\"test accuracy=\", rtn[1])" 128 | ] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.6.2" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 1 152 | } 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tensorflow-tutorial 2 | 3 | ## 安裝環境 4 | 5 | * 下載並且安裝 Anaconda python 3.6 版本, 請盡量安裝 64-bit 版本。 6 | * clone 或者下載並解開(右邊偏上綠色的 Clone or download, 選 Download zip)本 repo 7 | * 在 anaconda console 下, cd 到本專案目錄, 執行 conda env create -f environment.yml 8 | * 如果需要更新或者網路中斷,可以 conda env update -f environment.yml 9 | * 最後執行 `. activate keras` (windows `activate keras`) 10 | * 然後 jupyter notebook 打開 notebook 11 | 12 | ## 評量標準 13 | * 01-Tensorflow Basics.ipynb 的 7 個 Q (25%) 14 | * 03-TensorBoard.ipynb 的三個 log (10%) 15 | * 04-tf.data 的 1 個 Q (5%) 16 | * HW1 (30%) 17 | * HW2 (30%) 18 | 19 | Google 表單 https://goo.gl/forms/G6Z2blK1cnxIxSwi2 20 | 請繳交程式碼或/和執行圖片 21 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: keras 2 | dependencies: 3 | - graphviz 4 | - ipython 5 | - matplotlib 6 | - notebook 7 | - numpy 8 | - pillow 9 | - pip 10 | - scikit-learn 11 | - pip: 12 | - graphviz 13 | - tensorflow 14 | - keras 15 | - pydot 16 | -------------------------------------------------------------------------------- /gan/AutoEncoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "[View in Colaboratory](https://colab.research.google.com/github/tjwei/tensorflow-tutorial/blob/master/gan/AutoEncoder.ipynb)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 0, 16 | "metadata": { 17 | "colab": {}, 18 | "colab_type": "code", 19 | "id": "-dCjQjjtl7hT" 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import keras\n", 24 | "from PIL import Image\n", 25 | "import numpy as np" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 0, 31 | "metadata": { 32 | "colab": {}, 33 | "colab_type": "code", 34 | "id": "yd_vBlv2mSj4" 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "from keras.datasets import mnist\n", 39 | "(train_X, train_y), (test_X, test_y) = mnist.load_data()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 0, 45 | "metadata": { 46 | "colab": {}, 47 | "colab_type": "code", 48 | "id": "jDKw_KXymiDZ" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "train_X.shape" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 0, 58 | "metadata": { 59 | "colab": {}, 60 | "colab_type": "code", 61 | "id": "SrjSQnzxml8v" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "train_X.dtype" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 0, 71 | "metadata": { 72 | "colab": {}, 73 | "colab_type": "code", 74 | "id": "jXHf8f5hmoUr" 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "Image.fromarray(train_X[1])" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 0, 84 | "metadata": { 85 | "colab": {}, 86 | "colab_type": "code", 87 | "id": "fk6tMBlJnbzt" 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "train_X = (train_X[..., None]-127.5)/128" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 0, 97 | "metadata": { 98 | "colab": {}, 99 | "colab_type": "code", 100 | "id": "_g1vGwZOpRXi" 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "train_X.shape" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 0, 110 | "metadata": { 111 | "colab": {}, 112 | "colab_type": "code", 113 | "id": "CTxiSDxvmz9M" 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "from IPython.display import display\n", 118 | "def showX(X, rows=1):\n", 119 | " assert X.shape[0] % rows == 0\n", 120 | " int_X = (X*128+128).clip(0,255).astype('uint8')\n", 121 | " # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N\n", 122 | " int_X_reshape = int_X.reshape(rows, -1,28,28).swapaxes(1,2).reshape(28*rows,-1)\n", 123 | " display(Image.fromarray(int_X_reshape))\n", 124 | "# 訓練資料, X 的前 20 筆\n", 125 | "showX(train_X[:10])\n", 126 | "print(train_y)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 0, 132 | "metadata": { 133 | "colab": {}, 134 | "colab_type": "code", 135 | "id": "fnciP38syalM" 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "NZ = 8" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 0, 145 | "metadata": { 146 | "colab": {}, 147 | "colab_type": "code", 148 | "id": "uX3Zqg36nWvt" 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "from keras.models import Sequential\n", 153 | "from keras.layers import Conv2D, Activation, GlobalAveragePooling2D\n", 154 | "\n", 155 | "netE = Sequential([\n", 156 | " Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='relu', input_shape=(28,28,1)),\n", 157 | " Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='relu'),\n", 158 | " Conv2D(filters=NZ, kernel_size=3, strides=2, padding='valid'),\n", 159 | " GlobalAveragePooling2D(),\n", 160 | " Activation('tanh')\n", 161 | " \n", 162 | "])" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 0, 168 | "metadata": { 169 | "colab": {}, 170 | "colab_type": "code", 171 | "id": "mdRxhU0wpj9q" 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "netE.summary()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 0, 181 | "metadata": { 182 | "colab": {}, 183 | "colab_type": "code", 184 | "id": "xYoktxlbp_cj" 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "from keras.layers import Conv2DTranspose, Reshape\n", 189 | "netG = Sequential([\n", 190 | " Reshape( (1,1,NZ), input_shape=(NZ,)),\n", 191 | " Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='valid', activation='relu'),\n", 192 | " Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='valid', activation='relu'),\n", 193 | " Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='same', activation='relu'),\n", 194 | " Conv2DTranspose(filters=1, kernel_size=3, strides=2, padding='same')\n", 195 | " \n", 196 | "])" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 0, 202 | "metadata": { 203 | "colab": {}, 204 | "colab_type": "code", 205 | "id": "76exHCVErjlK" 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "netG.summary()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 0, 215 | "metadata": { 216 | "colab": {}, 217 | "colab_type": "code", 218 | "id": "IkctGI_YrsgL" 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "from keras.models import Model\n", 223 | "EG_output = netG(netE.outputs)\n", 224 | "netEG = Model(inputs=netE.inputs, outputs=[EG_output])" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 0, 230 | "metadata": { 231 | "colab": {}, 232 | "colab_type": "code", 233 | "id": "O94CnTYbr85u" 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "netEG.summary()" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 0, 243 | "metadata": { 244 | "colab": {}, 245 | "colab_type": "code", 246 | "id": "iEpRqBoCsbly" 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "import numpy as np\n", 251 | "showX(netG.predict(np.random.normal( size=(10, NZ))))" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 0, 257 | "metadata": { 258 | "colab": {}, 259 | "colab_type": "code", 260 | "id": "RjyTCa04tA72" 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "netEG.compile(loss='mse', optimizer='adam')" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 0, 270 | "metadata": { 271 | "colab": {}, 272 | "colab_type": "code", 273 | "id": "X2xixfbYtIye" 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "netEG.fit(train_X, train_X, epochs=1)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 0, 283 | "metadata": { 284 | "colab": {}, 285 | "colab_type": "code", 286 | "id": "V98xbwAota4F" 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "showX(netG.predict(np.random.normal(size=(100, NZ))), 10)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 0, 296 | "metadata": { 297 | "colab": {}, 298 | "colab_type": "code", 299 | "id": "Nzhu7MnLtdHX" 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "showX(netEG.predict(train_X[:100]), 10)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 0, 309 | "metadata": { 310 | "colab": {}, 311 | "colab_type": "code", 312 | "id": "B56LXvZmxXuk" 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "codes = netE.predict(train_X[:1000])\n", 317 | "#codes = netE.predict(train_X[train_y==2])" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 0, 323 | "metadata": { 324 | "colab": {}, 325 | "colab_type": "code", 326 | "id": "bi2MdxYx0-Qe" 327 | }, 328 | "outputs": [], 329 | "source": [] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 0, 334 | "metadata": { 335 | "colab": {}, 336 | "colab_type": "code", 337 | "id": "nFZWb0N6xfOg" 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "codes.mean(axis=0)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 0, 347 | "metadata": { 348 | "colab": {}, 349 | "colab_type": "code", 350 | "id": "BgMaJPQ8xlIQ" 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "codes.std(axis=0)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 0, 360 | "metadata": { 361 | "colab": {}, 362 | "colab_type": "code", 363 | "id": "biMCsL07xrGK" 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "z = np.random.normal(codes.mean(axis=0), scale=codes.std(axis=0), size=(100,NZ))\n", 368 | "showX(netG.predict(z), 10)\n" 369 | ] 370 | } 371 | ], 372 | "metadata": { 373 | "accelerator": "GPU", 374 | "colab": { 375 | "collapsed_sections": [], 376 | "include_colab_link": true, 377 | "name": "AutoEncoder.ipynb", 378 | "provenance": [], 379 | "version": "0.3.2" 380 | }, 381 | "kernelspec": { 382 | "display_name": "Python 3", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.6.5" 397 | } 398 | }, 399 | "nbformat": 4, 400 | "nbformat_minor": 1 401 | } 402 | -------------------------------------------------------------------------------- /gan/CycleGAN-keras.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import keras.backend as K\n", 10 | "K.set_image_data_format('channels_last')\n", 11 | "channel_axis=-1\n", 12 | "channel_first = False" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from keras.models import Sequential, Model\n", 22 | "from keras.layers import Conv2D, ZeroPadding2D, BatchNormalization, Input, Dropout\n", 23 | "from keras.layers import Conv2DTranspose, Reshape, Activation, Cropping2D, Flatten\n", 24 | "from keras.layers import Concatenate\n", 25 | "from keras.layers.advanced_activations import LeakyReLU\n", 26 | "from keras.activations import relu\n", 27 | "from keras.initializers import RandomNormal" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# Weights initializations\n", 37 | "# bias are initailized as 0\n", 38 | "def __conv_init(a):\n", 39 | " print(\"conv_init\", a)\n", 40 | " k = RandomNormal(0, 0.02)(a) # for convolution kernel\n", 41 | " k.conv_weight = True \n", 42 | " return k\n", 43 | "conv_init = RandomNormal(0, 0.02)\n", 44 | "gamma_init = RandomNormal(1., 0.02) # for batch normalization\n" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# HACK speed up theano\n", 54 | "if K._BACKEND == 'theano':\n", 55 | " import keras.backend.theano_backend as theano_backend\n", 56 | " def _preprocess_conv2d_kernel(kernel, data_format):\n", 57 | " #return kernel\n", 58 | " if hasattr(kernel, \"original\"):\n", 59 | " print(\"use original\")\n", 60 | " return kernel.original\n", 61 | " elif hasattr(kernel, '_keras_shape'):\n", 62 | " s = kernel._keras_shape\n", 63 | " print(\"use reshape\",s)\n", 64 | " kernel = kernel.reshape((s[3], s[2],s[0], s[1]))\n", 65 | " else:\n", 66 | " kernel = kernel.dimshuffle((3, 2, 0, 1))\n", 67 | " return kernel\n", 68 | " theano_backend._preprocess_conv2d_kernel = _preprocess_conv2d_kernel" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# Basic discriminator\n", 78 | "def conv2d(f, *a, **k):\n", 79 | " return Conv2D(f, kernel_initializer = conv_init, *a, **k)\n", 80 | "def batchnorm():\n", 81 | " return BatchNormalization(momentum=0.9, axis=channel_axis, epsilon=1.01e-5,\n", 82 | " gamma_initializer = gamma_init)\n", 83 | "def BASIC_D(nc_in, ndf, max_layers=3, use_sigmoid=True):\n", 84 | " \"\"\"DCGAN_D(nc, ndf, max_layers=3)\n", 85 | " nc: channels\n", 86 | " ndf: filters of the first layer\n", 87 | " max_layers: max hidden layers\n", 88 | " \"\"\" \n", 89 | " if channel_first:\n", 90 | " input_a = Input(shape=(nc_in, None, None))\n", 91 | " else:\n", 92 | " input_a = Input(shape=(None, None, nc_in))\n", 93 | " _ = input_a\n", 94 | " _ = conv2d(ndf, kernel_size=4, strides=2, padding=\"same\", name = 'First') (_)\n", 95 | " _ = LeakyReLU(alpha=0.2)(_)\n", 96 | " \n", 97 | " for layer in range(1, max_layers): \n", 98 | " out_feat = ndf * min(2**layer, 8)\n", 99 | " _ = conv2d(out_feat, kernel_size=4, strides=2, padding=\"same\", \n", 100 | " use_bias=False, name = 'pyramid.{0}'.format(layer) \n", 101 | " ) (_)\n", 102 | " _ = batchnorm()(_, training=1) \n", 103 | " _ = LeakyReLU(alpha=0.2)(_)\n", 104 | " \n", 105 | " out_feat = ndf*min(2**max_layers, 8)\n", 106 | " _ = ZeroPadding2D(1)(_)\n", 107 | " _ = conv2d(out_feat, kernel_size=4, use_bias=False, name = 'pyramid_last') (_)\n", 108 | " _ = batchnorm()(_, training=1)\n", 109 | " _ = LeakyReLU(alpha=0.2)(_)\n", 110 | " \n", 111 | " # final layer\n", 112 | " _ = ZeroPadding2D(1)(_)\n", 113 | " _ = conv2d(1, kernel_size=4, name = 'final'.format(out_feat, 1), \n", 114 | " activation = \"sigmoid\" if use_sigmoid else None) (_) \n", 115 | " return Model(inputs=[input_a], outputs=_)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "def UNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True): \n", 125 | " max_nf = 8*ngf \n", 126 | " def block(x, s, nf_in, use_batchnorm=True, nf_out=None, nf_next=None):\n", 127 | " # print(\"block\",x,s,nf_in, use_batchnorm, nf_out, nf_next)\n", 128 | " assert s>=2 and s%2==0\n", 129 | " if nf_next is None:\n", 130 | " nf_next = min(nf_in*2, max_nf)\n", 131 | " if nf_out is None:\n", 132 | " nf_out = nf_in\n", 133 | " x = conv2d(nf_next, kernel_size=4, strides=2, use_bias=(not (use_batchnorm and s>2)),\n", 134 | " padding=\"same\", name = 'conv_{0}'.format(s)) (x)\n", 135 | " if s>2:\n", 136 | " if use_batchnorm:\n", 137 | " x = batchnorm()(x, training=1)\n", 138 | " x2 = LeakyReLU(alpha=0.2)(x)\n", 139 | " x2 = block(x2, s//2, nf_next)\n", 140 | " x = Concatenate(axis=channel_axis)([x, x2]) \n", 141 | " x = Activation(\"relu\")(x)\n", 142 | " x = Conv2DTranspose(nf_out, kernel_size=4, strides=2, use_bias=not use_batchnorm,\n", 143 | " kernel_initializer = conv_init, \n", 144 | " name = 'convt.{0}'.format(s))(x) \n", 145 | " x = Cropping2D(1)(x)\n", 146 | " if use_batchnorm:\n", 147 | " x = batchnorm()(x, training=1)\n", 148 | " if s <=8:\n", 149 | " x = Dropout(0.5)(x, training=1)\n", 150 | " return x\n", 151 | " \n", 152 | " s = isize if fixed_input_size else None\n", 153 | " if channel_first:\n", 154 | " _ = inputs = Input(shape=(nc_in, s, s))\n", 155 | " else:\n", 156 | " _ = inputs = Input(shape=(s, s, nc_in)) \n", 157 | " _ = block(_, isize, nc_in, False, nf_out=nc_out, nf_next=ngf)\n", 158 | " _ = Activation('tanh')(_)\n", 159 | " return Model(inputs=inputs, outputs=[_])" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": true 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "nc_in = 3\n", 171 | "nc_out = 3\n", 172 | "ngf = 64\n", 173 | "ndf = 64\n", 174 | "use_lsgan = True\n", 175 | "λ = 10 if use_lsgan else 100\n", 176 | "\n", 177 | "loadSize = 143\n", 178 | "imageSize = 128\n", 179 | "batchSize = 1\n", 180 | "lrD = 2e-4\n", 181 | "lrG = 2e-4" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "netDA = BASIC_D(nc_in, ndf, use_sigmoid = not use_lsgan)\n", 191 | "netDB = BASIC_D(nc_out, ndf, use_sigmoid = not use_lsgan)\n", 192 | "netDA.summary()\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": { 199 | "scrolled": true 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "from IPython.display import SVG\n", 204 | "from keras.utils.vis_utils import model_to_dot\n", 205 | "\n", 206 | "\n", 207 | "netGB = UNET_G(imageSize, nc_in, nc_out, ngf)\n", 208 | "netGA = UNET_G(imageSize, nc_out, nc_in, ngf)\n", 209 | "#SVG(model_to_dot(netG, show_shapes=True).create(prog='dot', format='svg'))\n", 210 | "netGA.summary()\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": true 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "from keras.optimizers import RMSprop, SGD, Adam" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "if use_lsgan:\n", 231 | " loss_fn = lambda output, target : K.mean(K.abs(K.square(output-target)))\n", 232 | "else:\n", 233 | " loss_fn = lambda output, target : -K.mean(K.log(output+1e-12)*target+K.log(1-output+1e-12)*(1-target))\n", 234 | "\n", 235 | "def cycle_variables(netG1, netG2):\n", 236 | " real_input = netG1.inputs[0]\n", 237 | " fake_output = netG1.outputs[0]\n", 238 | " rec_input = netG2([fake_output])\n", 239 | " fn_generate = K.function([real_input], [fake_output, rec_input])\n", 240 | " return real_input, fake_output, rec_input, fn_generate\n", 241 | "\n", 242 | "real_A, fake_B, rec_A, cycleA_generate = cycle_variables(netGB, netGA)\n", 243 | "real_B, fake_A, rec_B, cycleB_generate = cycle_variables(netGA, netGB)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "def D_loss(netD, real, fake, rec):\n", 253 | " output_real = netD([real])\n", 254 | " output_fake = netD([fake])\n", 255 | " loss_D_real = loss_fn(output_real, K.ones_like(output_real))\n", 256 | " loss_D_fake = loss_fn(output_fake, K.zeros_like(output_fake))\n", 257 | " loss_G = loss_fn(output_fake, K.ones_like(output_fake))\n", 258 | " loss_D = loss_D_real+loss_D_fake\n", 259 | " loss_cyc = K.mean(K.abs(rec-real))\n", 260 | " return loss_D, loss_G, loss_cyc\n", 261 | "\n", 262 | "loss_DA, loss_GA, loss_cycA = D_loss(netDA, real_A, fake_A, rec_A)\n", 263 | "loss_DB, loss_GB, loss_cycB = D_loss(netDB, real_B, fake_B, rec_B)\n", 264 | "loss_cyc = loss_cycA+loss_cycB" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "collapsed": true 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "loss_G = loss_GA+loss_GB+λ*loss_cyc\n", 276 | "loss_D = loss_DA+loss_DB\n", 277 | "\n", 278 | "weightsD = netDA.trainable_weights + netDB.trainable_weights\n", 279 | "weightsG = netGA.trainable_weights + netGB.trainable_weights\n", 280 | "\n", 281 | "training_updates = Adam(lr=lrD, beta_1=0.5).get_updates(weightsD,[],loss_D)\n", 282 | "netD_train = K.function([real_A, real_B],[loss_DA/2, loss_DB/2], training_updates)\n", 283 | "training_updates = Adam(lr=lrG, beta_1=0.5).get_updates(weightsG,[], loss_G)\n", 284 | "netG_train = K.function([real_A, real_B], [loss_GA, loss_GB, loss_cyc], training_updates)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "from PIL import Image\n", 294 | "import numpy as np\n", 295 | "import glob\n", 296 | "from random import randint, shuffle\n", 297 | "\n", 298 | "def load_data(file_pattern):\n", 299 | " return glob.glob(file_pattern)\n", 300 | "\n", 301 | "def read_image(fn):\n", 302 | " im = Image.open(fn).convert('RGB')\n", 303 | " im = im.resize( (loadSize, loadSize), Image.BILINEAR )\n", 304 | " arr = np.array(im)/255*2-1\n", 305 | " w1,w2 = (loadSize-imageSize)//2,(loadSize+imageSize)//2\n", 306 | " h1,h2 = w1,w2\n", 307 | " img = arr[h1:h2, w1:w2, :]\n", 308 | " if randint(0,1):\n", 309 | " img=img[:,::-1]\n", 310 | " if channel_first: \n", 311 | " img = np.moveaxis(img, 2, 0)\n", 312 | " return img\n", 313 | "\n", 314 | "#data = \"edges2shoes\"\n", 315 | "data = \"horse2zebra\"\n", 316 | "train_A = load_data('CycleGAN/{}/trainA/*.jpg'.format(data))\n", 317 | "train_B = load_data('CycleGAN/{}/trainB/*.jpg'.format(data))\n", 318 | "\n", 319 | "assert len(train_A) and len(train_B)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "collapsed": true 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "def minibatch(data, batchsize):\n", 331 | " length = len(data)\n", 332 | " epoch = i = 0\n", 333 | " tmpsize = None \n", 334 | " while True:\n", 335 | " size = tmpsize if tmpsize else batchsize\n", 336 | " if i+size > length:\n", 337 | " shuffle(data)\n", 338 | " i = 0\n", 339 | " epoch+=1 \n", 340 | " rtn = [read_image(data[j]) for j in range(i,i+size)]\n", 341 | " i+=size\n", 342 | " tmpsize = yield epoch, np.float32(rtn) \n", 343 | "\n", 344 | "def minibatchAB(dataA, dataB, batchsize):\n", 345 | " batchA=minibatch(dataA, batchsize)\n", 346 | " batchB=minibatch(dataB, batchsize)\n", 347 | " tmpsize = None \n", 348 | " while True: \n", 349 | " ep1, A = batchA.send(tmpsize)\n", 350 | " ep2, B = batchB.send(tmpsize)\n", 351 | " tmpsize = yield max(ep1, ep2), A, B" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "from IPython.display import display\n", 361 | "def showX(X, rows=1):\n", 362 | " assert X.shape[0]%rows == 0\n", 363 | " int_X = ( (X+1)/2*255).clip(0,255).astype('uint8')\n", 364 | " if channel_first:\n", 365 | " int_X = np.moveaxis(int_X.reshape(-1,3,imageSize,imageSize), 1, 3)\n", 366 | " else:\n", 367 | " int_X = int_X.reshape(-1,imageSize,imageSize, 3)\n", 368 | " int_X = int_X.reshape(rows, -1, imageSize, imageSize,3).swapaxes(1,2).reshape(rows*imageSize,-1, 3)\n", 369 | " display(Image.fromarray(int_X))" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "train_batch = minibatchAB(train_A, train_B, 6)\n", 379 | "\n", 380 | "_, A, B = next(train_batch)\n", 381 | "showX(A)\n", 382 | "showX(B)\n", 383 | "del train_batch, A, B" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": { 390 | "collapsed": true 391 | }, 392 | "outputs": [], 393 | "source": [ 394 | "def showG(A,B):\n", 395 | " assert A.shape==B.shape\n", 396 | " def G(fn_generate, X):\n", 397 | " r = np.array([fn_generate([X[i:i+1]]) for i in range(X.shape[0])])\n", 398 | " return r.swapaxes(0,1)[:,:,0] \n", 399 | " rA = G(cycleA_generate, A)\n", 400 | " rB = G(cycleB_generate, B)\n", 401 | " arr = np.concatenate([A,B,rA[0],rB[0],rA[1],rB[1]])\n", 402 | " showX(arr, 3)" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": null, 408 | "metadata": { 409 | "scrolled": true 410 | }, 411 | "outputs": [], 412 | "source": [ 413 | "import time\n", 414 | "from IPython.display import clear_output\n", 415 | "t0 = time.time()\n", 416 | "niter = 150\n", 417 | "gen_iterations = 0\n", 418 | "epoch = 0\n", 419 | "errCyc_sum = errGA_sum = errGB_sum = errDA_sum = errDB_sum = 0\n", 420 | "\n", 421 | "display_iters = 50\n", 422 | "#val_batch = minibatch(valAB, 6, direction)\n", 423 | "train_batch = minibatchAB(train_A, train_B, batchSize)\n", 424 | "\n", 425 | "while epoch < niter: \n", 426 | " epoch, A, B = next(train_batch) \n", 427 | " errDA, errDB = netD_train([A, B])\n", 428 | " errDA_sum +=errDA\n", 429 | " errDB_sum +=errDB\n", 430 | "\n", 431 | " # epoch, trainA, trainB = next(train_batch)\n", 432 | " errGA, errGB, errCyc = netG_train([A, B])\n", 433 | " errGA_sum += errGA\n", 434 | " errGB_sum += errGB\n", 435 | " errCyc_sum += errCyc\n", 436 | " gen_iterations+=1\n", 437 | " if gen_iterations%display_iters==0:\n", 438 | " #if gen_iterations%(5*display_iters)==0:\n", 439 | " clear_output()\n", 440 | " print('[%d/%d][%d] Loss_D: %f %f Loss_G: %f %f loss_cyc %f'\n", 441 | " % (epoch, niter, gen_iterations, errDA_sum/display_iters, errDB_sum/display_iters,\n", 442 | " errGA_sum/display_iters, errGB_sum/display_iters, \n", 443 | " errCyc_sum/display_iters), time.time()-t0)\n", 444 | " _, A, B = train_batch.send(4)\n", 445 | " showG(A,B) \n", 446 | " errCyc_sum = errGA_sum = errGB_sum = errDA_sum = errDB_sum = 0" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": {}, 452 | "source": [ 453 | "\n", 454 | "\n", 455 | "\n", 456 | "\n", 457 | "\n", 458 | "\n", 459 | "\n" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": { 475 | "collapsed": true 476 | }, 477 | "outputs": [], 478 | "source": [] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "collapsed": true 485 | }, 486 | "outputs": [], 487 | "source": [] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": { 493 | "collapsed": true 494 | }, 495 | "outputs": [], 496 | "source": [] 497 | } 498 | ], 499 | "metadata": { 500 | "kernelspec": { 501 | "display_name": "Python 3", 502 | "language": "python", 503 | "name": "python3" 504 | }, 505 | "language_info": { 506 | "codemirror_mode": { 507 | "name": "ipython", 508 | "version": 3 509 | }, 510 | "file_extension": ".py", 511 | "mimetype": "text/x-python", 512 | "name": "python", 513 | "nbconvert_exporter": "python", 514 | "pygments_lexer": "ipython3", 515 | "version": "3.6.2" 516 | } 517 | }, 518 | "nbformat": 4, 519 | "nbformat_minor": 1 520 | } 521 | -------------------------------------------------------------------------------- /gan/Simple_GAN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Simple GAN.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "accelerator": "GPU" 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "[View in Colaboratory](https://colab.research.google.com/github/tjwei/tensorflow-tutorial/blob/master/gan/Simple_GAN.ipynb)" 27 | ] 28 | }, 29 | { 30 | "metadata": { 31 | "id": "-dCjQjjtl7hT", 32 | "colab_type": "code", 33 | "colab": {} 34 | }, 35 | "cell_type": "code", 36 | "source": [ 37 | "import keras\n", 38 | "from PIL import Image\n", 39 | "import numpy as np" 40 | ], 41 | "execution_count": 0, 42 | "outputs": [] 43 | }, 44 | { 45 | "metadata": { 46 | "id": "yd_vBlv2mSj4", 47 | "colab_type": "code", 48 | "colab": {} 49 | }, 50 | "cell_type": "code", 51 | "source": [ 52 | "from keras.datasets import mnist\n", 53 | "(train_X, train_y), (test_X, test_y) = mnist.load_data()" 54 | ], 55 | "execution_count": 0, 56 | "outputs": [] 57 | }, 58 | { 59 | "metadata": { 60 | "id": "fk6tMBlJnbzt", 61 | "colab_type": "code", 62 | "colab": {} 63 | }, 64 | "cell_type": "code", 65 | "source": [ 66 | "train_X = (train_X[..., None]-127.5)/128" 67 | ], 68 | "execution_count": 0, 69 | "outputs": [] 70 | }, 71 | { 72 | "metadata": { 73 | "id": "CTxiSDxvmz9M", 74 | "colab_type": "code", 75 | "colab": {} 76 | }, 77 | "cell_type": "code", 78 | "source": [ 79 | "from IPython.display import display\n", 80 | "def showX(X, rows=1):\n", 81 | " assert X.shape[0] % rows == 0\n", 82 | " int_X = (X*128+128).clip(0,255).astype('uint8')\n", 83 | " # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N\n", 84 | " int_X_reshape = int_X.reshape(rows, -1,28,28).swapaxes(1,2).reshape(28*rows,-1)\n", 85 | " display(Image.fromarray(int_X_reshape))\n", 86 | "# 訓練資料, X 的前 20 筆\n", 87 | "showX(train_X[:10])\n", 88 | "print(train_y)" 89 | ], 90 | "execution_count": 0, 91 | "outputs": [] 92 | }, 93 | { 94 | "metadata": { 95 | "id": "fnciP38syalM", 96 | "colab_type": "code", 97 | "colab": {} 98 | }, 99 | "cell_type": "code", 100 | "source": [ 101 | "NZ = 32" 102 | ], 103 | "execution_count": 0, 104 | "outputs": [] 105 | }, 106 | { 107 | "metadata": { 108 | "id": "uX3Zqg36nWvt", 109 | "colab_type": "code", 110 | "colab": {} 111 | }, 112 | "cell_type": "code", 113 | "source": [ 114 | "from keras.models import Sequential\n", 115 | "from keras.layers import Conv2D, Activation, GlobalAveragePooling2D, Reshape\n", 116 | "\n", 117 | "netD = Sequential([\n", 118 | " Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='selu', input_shape=(28,28,1)),\n", 119 | " Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='selu'),\n", 120 | " Conv2D(filters=32, kernel_size=3, strides=2, padding='valid', activation=\"selu\"),\n", 121 | " Conv2D(filters=1, kernel_size=3, strides=3, padding='valid', activation=\"sigmoid\"),\n", 122 | " Reshape((1,)),\n", 123 | "])" 124 | ], 125 | "execution_count": 0, 126 | "outputs": [] 127 | }, 128 | { 129 | "metadata": { 130 | "id": "mdRxhU0wpj9q", 131 | "colab_type": "code", 132 | "colab": {} 133 | }, 134 | "cell_type": "code", 135 | "source": [ 136 | "netD.summary()" 137 | ], 138 | "execution_count": 0, 139 | "outputs": [] 140 | }, 141 | { 142 | "metadata": { 143 | "id": "xYoktxlbp_cj", 144 | "colab_type": "code", 145 | "colab": {} 146 | }, 147 | "cell_type": "code", 148 | "source": [ 149 | "from keras.layers import Conv2DTranspose, Reshape\n", 150 | "netG = Sequential([\n", 151 | " Reshape( (1,1,NZ), input_shape=(NZ,)),\n", 152 | " Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='valid', activation='selu'),\n", 153 | " Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='valid', activation='selu'),\n", 154 | " Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='same', activation='selu'),\n", 155 | " Conv2DTranspose(filters=1, kernel_size=3, strides=2, padding='same', activation='tanh')\n", 156 | "])" 157 | ], 158 | "execution_count": 0, 159 | "outputs": [] 160 | }, 161 | { 162 | "metadata": { 163 | "id": "76exHCVErjlK", 164 | "colab_type": "code", 165 | "colab": {} 166 | }, 167 | "cell_type": "code", 168 | "source": [ 169 | "netG.summary()" 170 | ], 171 | "execution_count": 0, 172 | "outputs": [] 173 | }, 174 | { 175 | "metadata": { 176 | "id": "IkctGI_YrsgL", 177 | "colab_type": "code", 178 | "colab": {} 179 | }, 180 | "cell_type": "code", 181 | "source": [ 182 | "from keras.models import Model\n", 183 | "GD_output = netD(netG.outputs)\n", 184 | "netGD = Model(inputs=netG.inputs, outputs=[GD_output])" 185 | ], 186 | "execution_count": 0, 187 | "outputs": [] 188 | }, 189 | { 190 | "metadata": { 191 | "id": "O94CnTYbr85u", 192 | "colab_type": "code", 193 | "colab": {} 194 | }, 195 | "cell_type": "code", 196 | "source": [ 197 | "netGD.summary()" 198 | ], 199 | "execution_count": 0, 200 | "outputs": [] 201 | }, 202 | { 203 | "metadata": { 204 | "id": "iEpRqBoCsbly", 205 | "colab_type": "code", 206 | "colab": {} 207 | }, 208 | "cell_type": "code", 209 | "source": [ 210 | "import numpy as np\n", 211 | "showX(netG.predict(np.random.normal( size=(10, NZ))))" 212 | ], 213 | "execution_count": 0, 214 | "outputs": [] 215 | }, 216 | { 217 | "metadata": { 218 | "id": "Cj7k9uNP6Wza", 219 | "colab_type": "code", 220 | "colab": {} 221 | }, 222 | "cell_type": "code", 223 | "source": [ 224 | "import keras.backend as K\n", 225 | "from keras.optimizers import RMSprop, SGD, Adam\n", 226 | "\n", 227 | "#loss_fn = lambda output, target : K.mean(K.binary_crossentropy(output, target))\n", 228 | "loss_fn = lambda output, target : -K.mean(K.log(output+1e-12)*target+K.log(1-output+1e-12)*(1-target))\n", 229 | "\n", 230 | "input_real = netD.inputs[0]\n", 231 | "input_z = netG.inputs[0]\n", 232 | "\n", 233 | "output_D_real = netD.outputs[0]\n", 234 | "output_D_fake = netGD.outputs[0]\n", 235 | "\n", 236 | "loss_D_real = loss_fn(output_D_real, K.ones_like(output_D_real))\n", 237 | "loss_D_fake = loss_fn(output_D_fake, K.zeros_like(output_D_fake))\n", 238 | "loss_D = (loss_D_real +loss_D_fake)/2\n", 239 | "\n", 240 | "training_updates = RMSprop(lr=1e-3).get_updates(netD.trainable_weights,[],loss_D)\n", 241 | "netD_train = K.function([input_z, input_real],[loss_D/2], training_updates)\n", 242 | "\n", 243 | "\n", 244 | "\n", 245 | "\n" 246 | ], 247 | "execution_count": 0, 248 | "outputs": [] 249 | }, 250 | { 251 | "metadata": { 252 | "id": "_TXY-fI1-Q3n", 253 | "colab_type": "code", 254 | "colab": {} 255 | }, 256 | "cell_type": "code", 257 | "source": [ 258 | "loss_G = loss_fn(output_D_fake, K.ones_like(output_D_fake))\n", 259 | "training_updates = RMSprop(lr=1e-3).get_updates(netG.trainable_weights,[], loss_G)\n", 260 | "netG_train = K.function([input_z], [loss_G], training_updates)" 261 | ], 262 | "execution_count": 0, 263 | "outputs": [] 264 | }, 265 | { 266 | "metadata": { 267 | "id": "UqRGXird-ld4", 268 | "colab_type": "code", 269 | "colab": {} 270 | }, 271 | "cell_type": "code", 272 | "source": [ 273 | "def minibatch(dataset, batchsize):\n", 274 | " while True:\n", 275 | " yield dataset[np.random.choice(dataset.shape[0], batchsize, replace=False)]\n" 276 | ], 277 | "execution_count": 0, 278 | "outputs": [] 279 | }, 280 | { 281 | "metadata": { 282 | "id": "ZoP2QSWwBaGK", 283 | "colab_type": "code", 284 | "colab": {} 285 | }, 286 | "cell_type": "code", 287 | "source": [ 288 | "train_batch = minibatch(train_X, 10)\n", 289 | "for i in range(5):\n", 290 | " X = next(train_batch)\n", 291 | " showX(X)" 292 | ], 293 | "execution_count": 0, 294 | "outputs": [] 295 | }, 296 | { 297 | "metadata": { 298 | "id": "84kwx8dJDaTE", 299 | "colab_type": "code", 300 | "colab": {} 301 | }, 302 | "cell_type": "code", 303 | "source": [ 304 | "import time\n", 305 | "from IPython.display import clear_output\n", 306 | "t0 = time.time()\n", 307 | "gen_iterations = 0\n", 308 | "errG = 0\n", 309 | "errG_sum = errD_sum = 0\n", 310 | "\n", 311 | "display_iters = 100\n", 312 | "batch_size = 32\n", 313 | "\n", 314 | "train_batch = minibatch(train_X, batch_size)\n", 315 | "fixed_Z = np.random.normal(scale=0.5, size=(100, NZ))\n", 316 | "while gen_iterations < 10000: \n", 317 | " X = next(train_batch)\n", 318 | " Z = np.random.normal(size=(batch_size, NZ))\n", 319 | " errD, = netD_train([Z, X])\n", 320 | " errD_sum +=errD\n", 321 | " \n", 322 | " #Z = np.random.normal(size=(batch_size, NZ))\n", 323 | " errG, = netG_train([Z])\n", 324 | " errG_sum += errG \n", 325 | " gen_iterations+=1\n", 326 | " \n", 327 | " if gen_iterations%display_iters==0:\n", 328 | " clear_output(True)\n", 329 | " print('[%d] Loss_D: %f Loss_G: %f'\n", 330 | " % (gen_iterations, errD_sum/display_iters, errG_sum/display_iters), time.time()-t0)\n", 331 | " fakeX = netG.predict(fixed_Z)\n", 332 | " showX(fakeX, 10)\n", 333 | " errG_sum = errD_sum = 0" 334 | ], 335 | "execution_count": 0, 336 | "outputs": [] 337 | } 338 | ] 339 | } -------------------------------------------------------------------------------- /gan/dragan-keras.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "os.environ['KERAS_BACKEND']='tensorflow' # 也可以使用 tensorflow\n", 13 | "#os.environ['THEANO_FLAGS']='floatX=float32,device=cuda,exception_verbosity=high'\n", 14 | "os.environ['THEANO_FLAGS']='floatX=float32,device=cuda,optimizer=fast_compile'" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "modifed from https://github.com/martinarjovsky/WassersteinGAN " 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import keras.backend as K\n", 33 | "K.set_image_data_format('channels_first')\n", 34 | "from keras.models import Sequential, Model\n", 35 | "from keras.layers import Conv2D, ZeroPadding2D, BatchNormalization, Input\n", 36 | "from keras.layers import Conv2DTranspose, Reshape, Activation, Cropping2D, Flatten\n", 37 | "from keras.layers.advanced_activations import LeakyReLU\n", 38 | "from keras.activations import relu\n", 39 | "from keras.initializers import RandomNormal\n", 40 | "conv_init = RandomNormal(0, 0.02)\n", 41 | "gamma_init = RandomNormal(1., 0.02)\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "def DCGAN_D(isize, nz, nc, ndf, n_extra_layers=0):\n", 53 | " assert isize%2==0\n", 54 | " _ = inputs = Input(shape=(nc, isize, isize))\n", 55 | " _ = Conv2D(filters=ndf, kernel_size=4, strides=2, use_bias=False,\n", 56 | " padding = \"same\",\n", 57 | " kernel_initializer = conv_init, \n", 58 | " name = 'initial.conv.{0}-{1}'.format(nc, ndf) \n", 59 | " ) (_)\n", 60 | " _ = LeakyReLU(alpha=0.2, name = 'initial.relu.{0}'.format(ndf))(_)\n", 61 | " csize, cndf = isize// 2, ndf\n", 62 | " while csize > 5:\n", 63 | " assert csize%2==0\n", 64 | " in_feat = cndf\n", 65 | " out_feat = cndf*2\n", 66 | " _ = Conv2D(filters=out_feat, kernel_size=4, strides=2, use_bias=False,\n", 67 | " padding = \"same\",\n", 68 | " kernel_initializer = conv_init,\n", 69 | " name = 'pyramid.{0}-{1}.conv'.format(in_feat, out_feat) \n", 70 | " ) (_)\n", 71 | " if 0: # toggle batchnormalization\n", 72 | " _ = BatchNormalization(name = 'pyramid.{0}.batchnorm'.format(out_feat), \n", 73 | " momentum=0.9, axis=1, epsilon=1.01e-5,\n", 74 | " gamma_initializer = gamma_init, \n", 75 | " )(_, training=1) \n", 76 | " _ = LeakyReLU(alpha=0.2, name = 'pyramid.{0}.relu'.format(out_feat))(_)\n", 77 | " csize, cndf = (csize+1)//2, cndf*2\n", 78 | " _ = Conv2D(filters=1, kernel_size=csize, strides=1, use_bias=False,\n", 79 | " kernel_initializer = conv_init,\n", 80 | " name = 'final.{0}-{1}.conv'.format(cndf, 1) \n", 81 | " ) (_)\n", 82 | " outputs = Flatten()(_)\n", 83 | " return Model(inputs=inputs, outputs=outputs)\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "def DCGAN_G(isize, nz, nc, ngf, n_extra_layers=0):\n", 95 | " cngf= ngf//2\n", 96 | " tisize = isize\n", 97 | " while tisize > 5:\n", 98 | " cngf = cngf * 2\n", 99 | " assert tisize%2==0\n", 100 | " tisize = tisize // 2\n", 101 | " _ = inputs = Input(shape=(nz,))\n", 102 | " _ = Reshape((nz, 1,1))(_)\n", 103 | " _ = Conv2DTranspose(filters=cngf, kernel_size=tisize, strides=1, use_bias=False,\n", 104 | " kernel_initializer = conv_init, \n", 105 | " name = 'initial.{0}-{1}.convt'.format(nz, cngf))(_)\n", 106 | " _ = BatchNormalization(gamma_initializer = gamma_init, momentum=0.9, axis=1, epsilon=1.01e-5,\n", 107 | " name = 'initial.{0}.batchnorm'.format(cngf))(_, training=1)\n", 108 | " _ = Activation(\"relu\", name = 'initial.{0}.relu'.format(cngf))(_)\n", 109 | " csize, cndf = tisize, cngf\n", 110 | " \n", 111 | "\n", 112 | " while csize < isize//2:\n", 113 | " in_feat = cngf\n", 114 | " out_feat = cngf//2\n", 115 | " _ = Conv2DTranspose(filters=out_feat, kernel_size=4, strides=2, use_bias=False,\n", 116 | " kernel_initializer = conv_init, padding=\"same\",\n", 117 | " name = 'pyramid.{0}-{1}.convt'.format(in_feat, out_feat) \n", 118 | " ) (_)\n", 119 | " _ = BatchNormalization(gamma_initializer = gamma_init, \n", 120 | " momentum=0.9, axis=1, epsilon=1.01e-5,\n", 121 | " name = 'pyramid.{0}.batchnorm'.format(out_feat))(_, training=1)\n", 122 | " \n", 123 | " _ = Activation(\"relu\", name = 'pyramid.{0}.relu'.format(out_feat))(_)\n", 124 | " csize, cngf = csize*2, cngf//2\n", 125 | " _ = Conv2DTranspose(filters=nc, kernel_size=4, strides=2, use_bias=False,\n", 126 | " kernel_initializer = conv_init, padding=\"same\",\n", 127 | " name = 'final.{0}-{1}.convt'.format(cngf, nc)\n", 128 | " )(_)\n", 129 | " outputs = Activation(\"tanh\", name = 'final.{0}.tanh'.format(nc))(_)\n", 130 | " return Model(inputs=inputs, outputs=outputs)\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "Parameters" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "nc = 3\n", 149 | "nz = 100\n", 150 | "ngf = 64\n", 151 | "ndf = 64\n", 152 | "n_extra_layers = 0\n", 153 | "Diters = 5\n", 154 | "λ = 10\n", 155 | "\n", 156 | "imageSize = 32\n", 157 | "batchSize = 64\n", 158 | "lrD = 1e-4\n", 159 | "lrG = 1e-4\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "print models" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "netD = DCGAN_D(imageSize, nz, nc, ndf, n_extra_layers)\n", 178 | "netD.summary()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": false 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "netG = DCGAN_G(imageSize, nz, nc, ngf, n_extra_layers)\n", 190 | "netG.summary()" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "from keras.optimizers import RMSprop, SGD, Adam" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "compute Wasserstein loss and gradient penalty" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "netD_real_input = Input(shape=(nc, imageSize, imageSize))\n", 220 | "noisev = Input(shape=(nz,))\n", 221 | "netD_fake_input = netG(noisev)\n", 222 | "\n", 223 | "ϵ_input = K.placeholder(shape=(None, nc,imageSize,imageSize))\n", 224 | "netD_mixed_input = Input(shape=(nc, imageSize, imageSize), tensor=netD_real_input + ϵ_input)\n", 225 | "\n", 226 | "\n", 227 | "loss_real = K.mean(netD(netD_real_input))\n", 228 | "loss_fake = K.mean(netD(netD_fake_input))\n", 229 | "\n", 230 | "grad_mixed = K.gradients(netD(netD_mixed_input), [netD_mixed_input])[0]\n", 231 | "norm_grad_mixed = K.sqrt(K.sum(K.square(grad_mixed), axis=[1,2,3]))\n", 232 | "grad_penalty = K.mean(K.square(norm_grad_mixed -1))\n", 233 | "\n", 234 | "loss = loss_fake - loss_real + λ * grad_penalty\n", 235 | "\n", 236 | "\n", 237 | "training_updates = Adam(lr=lrD).get_updates(netD.trainable_weights,[],loss)\n", 238 | "netD_train = K.function([netD_real_input, noisev, ϵ_input],\n", 239 | " [loss_real, loss_fake], \n", 240 | " training_updates)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "loss for netG" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": true 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "loss = -loss_fake \n", 259 | "training_updates = Adam(lr=lrG).get_updates(netG.trainable_weights,[], loss)\n", 260 | "netG_train = K.function([noisev], [loss], training_updates)\n" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "source": [ 269 | "Download CIFAR10 if needed" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "from PIL import Image\n", 281 | "import numpy as np\n", 282 | "import tarfile\n", 283 | "\n", 284 | "# Download dataset\n", 285 | "url = \"https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\"\n", 286 | "import os\n", 287 | "import urllib\n", 288 | "from urllib.request import urlretrieve\n", 289 | "def reporthook(a,b,c):\n", 290 | " print(\"\\rdownloading: %5.1f%%\"%(a*b*100.0/c), end=\"\")\n", 291 | "tar_gz = \"cifar-10-python.tar.gz\"\n", 292 | "if not os.path.isfile(tar_gz):\n", 293 | " print('Downloading data from %s' % url)\n", 294 | " urlretrieve(url, tar_gz, reporthook=reporthook)\n", 295 | "\n", 296 | "import pickle\n", 297 | "train_X=[]\n", 298 | "train_y=[]\n", 299 | "tar_gz = \"cifar-10-python.tar.gz\"\n", 300 | "with tarfile.open(tar_gz) as tarf:\n", 301 | " for i in range(1, 6):\n", 302 | " dataset = \"cifar-10-batches-py/data_batch_%d\"%i\n", 303 | " print(\"load\",dataset)\n", 304 | " with tarf.extractfile(dataset) as f:\n", 305 | " result = pickle.load(f, encoding='latin1')\n", 306 | " train_X.extend( result['data'].reshape(-1,3,32,32)/255*2-1)\n", 307 | " train_y.extend(result['labels'])\n", 308 | " train_X=np.float32(train_X)\n", 309 | " train_y=np.int32(train_y)\n", 310 | " dataset = \"cifar-10-batches-py/test_batch\"\n", 311 | " print(\"load\",dataset)\n", 312 | " with tarf.extractfile(dataset) as f:\n", 313 | " result = pickle.load(f, encoding='latin1')\n", 314 | " test_X=np.float32(result['data'].reshape(-1,3,32,32)/255*2-1)\n", 315 | " test_y=np.int32(result['labels'])\n", 316 | " " 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "also using test_X" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "train_X = np.concatenate([train_X, test_X])\n", 335 | "train_X = np.concatenate([train_X[:,:,:,::-1], train_X])" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "utility to show images" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": { 349 | "collapsed": false 350 | }, 351 | "outputs": [], 352 | "source": [ 353 | "from IPython.display import display\n", 354 | "def showX(X, rows=1):\n", 355 | " assert X.shape[0]%rows == 0\n", 356 | " int_X = ( (X+1)/2*255).clip(0,255).astype('uint8')\n", 357 | " # N*3072 -> N*3*32*32 -> 32 * 32N * 3\n", 358 | " int_X = np.moveaxis(int_X.reshape(-1,3,32,32), 1, 3)\n", 359 | " int_X = int_X.reshape(rows, -1, 32, 32,3).swapaxes(1,2).reshape(rows*32,-1, 3)\n", 360 | " display(Image.fromarray(int_X))\n", 361 | "# 訓練資料, X 的前 20 筆\n", 362 | "showX(train_X[:20])\n", 363 | "print(train_y[:20])\n", 364 | "name_array = np.array(\"airplane car bird cat deer dog frog horse boat truck\".split(' '))\n", 365 | "print(name_array[train_y[:20]])" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": { 372 | "collapsed": true 373 | }, 374 | "outputs": [], 375 | "source": [ 376 | "fixed_noise = np.random.normal(size=(batchSize, nz)).astype('float32')" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": false, 384 | "scrolled": false 385 | }, 386 | "outputs": [], 387 | "source": [ 388 | "import time\n", 389 | "t0 = time.time()\n", 390 | "niter = 100\n", 391 | "gen_iterations = 0\n", 392 | "errG = 0\n", 393 | "targetD = np.float32([2]*batchSize+[-2]*batchSize)[:, None]\n", 394 | "targetG = np.ones(batchSize, dtype=np.float32)[:, None]\n", 395 | "for epoch in range(niter):\n", 396 | " i = 0\n", 397 | " # 每個 epoch 洗牌一下\n", 398 | " np.random.shuffle(train_X)\n", 399 | " batches = train_X.shape[0]//batchSize\n", 400 | " while i < batches:\n", 401 | " if gen_iterations < 25 or gen_iterations % 500 == 0:\n", 402 | " _Diters = 100\n", 403 | " else:\n", 404 | " _Diters = Diters\n", 405 | " j = 0\n", 406 | " while j < _Diters and i < batches:\n", 407 | " j+=1\n", 408 | " real_data = train_X[i*batchSize:(i+1)*batchSize]\n", 409 | " i+=1\n", 410 | " noise = np.random.normal(size=(batchSize, nz)) \n", 411 | " ϵ = real_data.std() * np.random.uniform(-0.5,0.5, size=real_data.shape) \n", 412 | " ϵ *= np.random.uniform(size=(batchSize, 1,1,1))\n", 413 | " errD_real, errD_fake = netD_train([real_data, noise, ϵ]) \n", 414 | " errD = errD_real - errD_fake\n", 415 | " \n", 416 | " if gen_iterations%500==0:\n", 417 | " print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f Loss_D_real: %f Loss_D_fake %f'\n", 418 | " % (epoch, niter, i, batches, gen_iterations,errD, errG, errD_real, errD_fake), time.time()-t0)\n", 419 | " fake = netG.predict(fixed_noise)\n", 420 | " showX(fake, 4)\n", 421 | " \n", 422 | " noise = np.random.normal(size=(batchSize, nz)) \n", 423 | " errG, = netG_train([noise])\n", 424 | " gen_iterations+=1 \n", 425 | " " 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": { 432 | "collapsed": true 433 | }, 434 | "outputs": [], 435 | "source": [] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "collapsed": true 442 | }, 443 | "outputs": [], 444 | "source": [] 445 | } 446 | ], 447 | "metadata": { 448 | "kernelspec": { 449 | "display_name": "Python 3", 450 | "language": "python", 451 | "name": "python3" 452 | }, 453 | "language_info": { 454 | "codemirror_mode": { 455 | "name": "ipython", 456 | "version": 3 457 | }, 458 | "file_extension": ".py", 459 | "mimetype": "text/x-python", 460 | "name": "python", 461 | "nbconvert_exporter": "python", 462 | "pygments_lexer": "ipython3", 463 | "version": "3.5.3" 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 1 468 | } 469 | -------------------------------------------------------------------------------- /gan/fonts/SourceHanSansTC-Regular.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/gan/fonts/SourceHanSansTC-Regular.otf -------------------------------------------------------------------------------- /gan/fonts/TAKUMISFONT_LP.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/gan/fonts/TAKUMISFONT_LP.ttf -------------------------------------------------------------------------------- /gan/pix2pix-keras-geometry.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Keras implementation of https://phillipi.github.io/pix2pix" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "os.environ['KERAS_BACKEND']='tensorflow' # can choose theano, tensorflow, cntk\n", 20 | "os.environ['THEANO_FLAGS']='floatX=float32,device=cuda,optimizer=fast_run,dnn.library_path=/usr/lib'\n", 21 | "#os.environ['THEANO_FLAGS']='floatX=float32,device=cuda,optimizer=fast_compile,dnn.library_path=/usr/lib'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import keras.backend as K\n", 31 | "if os.environ['KERAS_BACKEND'] =='theano':\n", 32 | " channel_axis=1\n", 33 | " K.set_image_data_format('channels_first')\n", 34 | " channel_first = True\n", 35 | "else:\n", 36 | " K.set_image_data_format('channels_last')\n", 37 | " channel_axis=-1\n", 38 | " channel_first = False" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "from keras.models import Sequential, Model\n", 50 | "from keras.layers import Conv2D, ZeroPadding2D, BatchNormalization, Input, Dropout\n", 51 | "from keras.layers import Conv2DTranspose, Reshape, Activation, Cropping2D, Flatten\n", 52 | "from keras.layers import Concatenate\n", 53 | "from keras.layers.advanced_activations import LeakyReLU\n", 54 | "from keras.activations import relu\n", 55 | "from keras.initializers import RandomNormal" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Weights initializations\n", 67 | "# bias are initailized as 0\n", 68 | "def __conv_init(a):\n", 69 | " print(\"conv_init\", a)\n", 70 | " k = RandomNormal(0, 0.02)(a) # for convolution kernel\n", 71 | " k.conv_weight = True \n", 72 | " return k\n", 73 | "conv_init = RandomNormal(0, 0.02)\n", 74 | "gamma_init = RandomNormal(1., 0.02) # for batch normalization\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# HACK speed up theano\n", 86 | "if K._BACKEND == 'theano':\n", 87 | " import keras.backend.theano_backend as theano_backend\n", 88 | " def _preprocess_conv2d_kernel(kernel, data_format):\n", 89 | " #return kernel\n", 90 | " if hasattr(kernel, \"original\"):\n", 91 | " print(\"use original\")\n", 92 | " return kernel.original\n", 93 | " elif hasattr(kernel, '_keras_shape'):\n", 94 | " s = kernel._keras_shape\n", 95 | " print(\"use reshape\",s)\n", 96 | " kernel = kernel.reshape((s[3], s[2],s[0], s[1]))\n", 97 | " else:\n", 98 | " kernel = kernel.dimshuffle((3, 2, 0, 1))\n", 99 | " return kernel\n", 100 | " theano_backend._preprocess_conv2d_kernel = _preprocess_conv2d_kernel" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# Basic discriminator\n", 112 | "def conv2d(f, *a, **k):\n", 113 | " return Conv2D(f, kernel_initializer = conv_init, *a, **k)\n", 114 | "def batchnorm():\n", 115 | " return BatchNormalization(momentum=0.9, axis=channel_axis, epsilon=1.01e-5,\n", 116 | " gamma_initializer = gamma_init)\n", 117 | "def BASIC_D(nc_in, nc_out, ndf, max_layers=3):\n", 118 | " \"\"\"DCGAN_D(nc, ndf, max_layers=3)\n", 119 | " nc: channels\n", 120 | " ndf: filters of the first layer\n", 121 | " max_layers: max hidden layers\n", 122 | " \"\"\" \n", 123 | " if channel_first:\n", 124 | " input_a, input_b = Input(shape=(nc_in, None, None)), Input(shape=(nc_out, None, None))\n", 125 | " else:\n", 126 | " input_a, input_b = Input(shape=(None, None, nc_in)), Input(shape=(None, None, nc_out))\n", 127 | " _ = Concatenate(axis=channel_axis)([input_a, input_b])\n", 128 | " _ = conv2d(ndf, kernel_size=4, strides=2, padding=\"same\", name = 'First') (_)\n", 129 | " _ = LeakyReLU(alpha=0.2)(_)\n", 130 | " \n", 131 | " for layer in range(1, max_layers): \n", 132 | " out_feat = ndf * min(2**layer, 8)\n", 133 | " _ = conv2d(out_feat, kernel_size=4, strides=2, padding=\"same\", \n", 134 | " use_bias=False, name = 'pyramid.{0}'.format(layer) \n", 135 | " ) (_)\n", 136 | " _ = batchnorm()(_, training=1) \n", 137 | " _ = LeakyReLU(alpha=0.2)(_)\n", 138 | " \n", 139 | " out_feat = ndf*min(2**max_layers, 8)\n", 140 | " _ = ZeroPadding2D(1)(_)\n", 141 | " _ = conv2d(out_feat, kernel_size=4, use_bias=False, name = 'pyramid_last') (_)\n", 142 | " _ = batchnorm()(_, training=1)\n", 143 | " _ = LeakyReLU(alpha=0.2)(_)\n", 144 | " \n", 145 | " # final layer\n", 146 | " _ = ZeroPadding2D(1)(_)\n", 147 | " _ = conv2d(1, kernel_size=4, name = 'final'.format(out_feat, 1), \n", 148 | " activation = \"sigmoid\") (_) \n", 149 | " return Model(inputs=[input_a, input_b], outputs=_)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "def UNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True): \n", 161 | " max_nf = 8*ngf \n", 162 | " def block(x, s, nf_in, use_batchnorm=True, nf_out=None, nf_next=None):\n", 163 | " # print(\"block\",x,s,nf_in, use_batchnorm, nf_out, nf_next)\n", 164 | " assert s>=2 and s%2==0\n", 165 | " if nf_next is None:\n", 166 | " nf_next = min(nf_in*2, max_nf)\n", 167 | " if nf_out is None:\n", 168 | " nf_out = nf_in\n", 169 | " x = conv2d(nf_next, kernel_size=4, strides=2, use_bias=(not (use_batchnorm and s>2)),\n", 170 | " padding=\"same\", name = 'conv_{0}'.format(s)) (x)\n", 171 | " if s>2:\n", 172 | " if use_batchnorm:\n", 173 | " x = batchnorm()(x, training=1)\n", 174 | " x2 = LeakyReLU(alpha=0.2)(x)\n", 175 | " x2 = block(x2, s//2, nf_next)\n", 176 | " x = Concatenate(axis=channel_axis)([x, x2]) \n", 177 | " x = Activation(\"relu\")(x)\n", 178 | " x = Conv2DTranspose(nf_out, kernel_size=4, strides=2, use_bias=not use_batchnorm,\n", 179 | " kernel_initializer = conv_init, \n", 180 | " name = 'convt.{0}'.format(s))(x) \n", 181 | " x = Cropping2D(1)(x)\n", 182 | " if use_batchnorm:\n", 183 | " x = batchnorm()(x, training=1)\n", 184 | " if s <=8:\n", 185 | " x = Dropout(0.5)(x, training=1)\n", 186 | " return x\n", 187 | " \n", 188 | " s = isize if fixed_input_size else None\n", 189 | " if channel_first:\n", 190 | " _ = inputs = Input(shape=(nc_in, s, s))\n", 191 | " else:\n", 192 | " _ = inputs = Input(shape=(s, s, nc_in)) \n", 193 | " _ = block(_, isize, nc_in, False, nf_out=nc_out, nf_next=ngf)\n", 194 | " _ = Activation('tanh')(_)\n", 195 | " return Model(inputs=inputs, outputs=[_])" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": true 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "nc_in = 3\n", 207 | "nc_out = 3\n", 208 | "ngf = 64\n", 209 | "ndf = 64\n", 210 | "λ = 10\n", 211 | "\n", 212 | "loadSize = 286\n", 213 | "imageSize = 256\n", 214 | "batchSize = 1\n", 215 | "lrD = 2e-4\n", 216 | "lrG = 2e-4" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "netD = BASIC_D(nc_in, nc_out, ndf)\n", 226 | "netD.summary()\n" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "scrolled": true 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "from IPython.display import SVG\n", 238 | "from keras.utils.vis_utils import model_to_dot\n", 239 | "\n", 240 | "\n", 241 | "netG = UNET_G(imageSize, nc_in, nc_out, ngf)\n", 242 | "#SVG(model_to_dot(netG, show_shapes=True).create(prog='dot', format='svg'))\n", 243 | "netG.summary()\n" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "collapsed": true 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "from keras.optimizers import RMSprop, SGD, Adam" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "collapsed": true 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "real_A = netG.input\n", 266 | "fake_B = netG.output\n", 267 | "netG_generate = K.function([real_A], [fake_B])\n", 268 | "real_B = netD.inputs[1]\n", 269 | "output_D_real = netD([real_A, real_B])\n", 270 | "output_D_fake = netD([real_A, fake_B])" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "collapsed": true 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "#loss_fn = lambda output, target : K.mean(K.binary_crossentropy(output, target))\n", 282 | "loss_fn = lambda output, target : -K.mean(K.log(output+1e-12)*target+K.log(1-output+1e-12)*(1-target))\n", 283 | "\n", 284 | "loss_D_real = loss_fn(output_D_real, K.ones_like(output_D_real))\n", 285 | "loss_D_fake = loss_fn(output_D_fake, K.zeros_like(output_D_fake))\n", 286 | "loss_G_fake = loss_fn(output_D_fake, K.ones_like(output_D_fake))\n", 287 | "\n", 288 | "\n", 289 | "loss_L1 = K.mean(K.abs(fake_B-real_B))" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": true 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "loss_D = loss_D_real +loss_D_fake\n", 301 | "training_updates = Adam(lr=lrD, beta_1=0.5).get_updates(netD.trainable_weights,[],loss_D)\n", 302 | "netD_train = K.function([real_A, real_B],[loss_D/2], training_updates)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "loss_G = loss_G_fake #+ 100 * loss_L1\n", 314 | "training_updates = Adam(lr=lrG, beta_1=0.5).get_updates(netG.trainable_weights,[], loss_G)\n", 315 | "netG_train = K.function([real_A, real_B], [loss_G_fake, loss_L1], training_updates)\n", 316 | "\n" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": true 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "from PIL import Image, ImageDraw\n", 328 | "import numpy as np\n", 329 | "import glob\n", 330 | "from random import randint, shuffle\n", 331 | "from math import pi\n", 332 | "from random import randint\n", 333 | "\n", 334 | "def read_image():\n", 335 | " r = randint(imageSize//10,imageSize//5)\n", 336 | " x = randint(imageSize//4, imageSize*3//4)\n", 337 | " y = randint(imageSize//4, imageSize*3//4)\n", 338 | " color = (randint(128,255), randint(128,255), randint(128,255))\n", 339 | " im = Image.new('RGB', (imageSize, imageSize), (0,0,0))\n", 340 | " draw = ImageDraw.Draw(im)\n", 341 | " draw.ellipse([x-r, y-r, x+r, y+r], fill=color)\n", 342 | " imgA = np.array(im)/255*2-1\n", 343 | " \n", 344 | " im = Image.new('RGB', (imageSize, imageSize), (0,0,0)) \n", 345 | " draw = ImageDraw.Draw(im) \n", 346 | " l = pi**0.5 * r /2\n", 347 | " if randint(0,1):\n", 348 | " l*=2**0.5\n", 349 | " draw.polygon([x, y-l, x+l, y, x,y+l, x-l,y], fill=color)\n", 350 | " else:\n", 351 | " draw.rectangle([x-l, y-l, x+l, y+l], fill=color)\n", 352 | " imgB = np.array(im)/255*2-1\n", 353 | "\n", 354 | " if channel_first:\n", 355 | " imgA = np.moveaxis(imgA, 2, 0)\n", 356 | " imgB = np.moveaxis(imgB, 2, 0)\n", 357 | " return imgA, imgB\n" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "def minibatch(batchsize):\n", 369 | " length = 10000\n", 370 | " epoch = i = 0\n", 371 | " tmpsize = None \n", 372 | " while True:\n", 373 | " size = tmpsize if tmpsize else batchsize\n", 374 | " if i+size > length: \n", 375 | " i = 0\n", 376 | " epoch+=1 \n", 377 | " dataA = []\n", 378 | " dataB = []\n", 379 | " for j in range(i,i+size):\n", 380 | " imgA,imgB = read_image()\n", 381 | " dataA.append(imgA)\n", 382 | " dataB.append(imgB)\n", 383 | " dataA = np.float32(dataA)\n", 384 | " dataB = np.float32(dataB)\n", 385 | " i+=size\n", 386 | " tmpsize = yield epoch, dataA, dataB \n", 387 | " " 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "metadata": { 394 | "collapsed": true 395 | }, 396 | "outputs": [], 397 | "source": [ 398 | "from IPython.display import display\n", 399 | "def showX(X, rows=1):\n", 400 | " assert X.shape[0]%rows == 0\n", 401 | " int_X = ( (X+1)/2*255).clip(0,255).astype('uint8')\n", 402 | " if channel_first:\n", 403 | " int_X = np.moveaxis(int_X.reshape(-1,3,imageSize,imageSize), 1, 3)\n", 404 | " else:\n", 405 | " int_X = int_X.reshape(-1,imageSize,imageSize, 3)\n", 406 | " int_X = int_X.reshape(rows, -1, imageSize, imageSize,3).swapaxes(1,2).reshape(rows*imageSize,-1, 3)\n", 407 | " display(Image.fromarray(int_X))" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "train_batch = minibatch(6)\n", 417 | "_, trainA, trainB = next(train_batch)\n", 418 | "showX(trainA)\n", 419 | "showX(trainB)\n", 420 | "del train_batch, trainA, trainB" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": { 427 | "collapsed": true 428 | }, 429 | "outputs": [], 430 | "source": [ 431 | "def netG_gen(A):\n", 432 | " return np.concatenate([netG_generate([A[i:i+1]])[0] for i in range(A.shape[0])], axis=0)" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": { 439 | "scrolled": false 440 | }, 441 | "outputs": [], 442 | "source": [ 443 | "import time\n", 444 | "from IPython.display import clear_output\n", 445 | "t0 = time.time()\n", 446 | "niter = 50\n", 447 | "gen_iterations = 0\n", 448 | "errL1 = epoch = errG = 0\n", 449 | "errL1_sum = errG_sum = errD_sum = 0\n", 450 | "\n", 451 | "display_iters = 100\n", 452 | "train_batch = minibatch(batchSize)\n", 453 | "\n", 454 | "while epoch < niter: \n", 455 | " epoch, trainA, trainB = next(train_batch) \n", 456 | " errD, = netD_train([trainA, trainB])\n", 457 | " errD_sum +=errD\n", 458 | "\n", 459 | " errG, errL1 = netG_train([trainA, trainB])\n", 460 | " errG_sum += errG\n", 461 | " errL1_sum += errL1\n", 462 | " gen_iterations+=1\n", 463 | " if gen_iterations%display_iters==0:\n", 464 | " if gen_iterations%(5*display_iters)==0:\n", 465 | " clear_output()\n", 466 | " print('[%d/%d][%d] Loss_D: %f Loss_G: %f loss_L1: %f'\n", 467 | " % (epoch, niter, gen_iterations, errD_sum/display_iters, errG_sum/display_iters, errL1_sum/display_iters), time.time()-t0)\n", 468 | " _, valA, valB = train_batch.send(6) \n", 469 | " fakeB = netG_gen(valA)\n", 470 | " showX(np.concatenate([valA, valB, fakeB], axis=0), 3)\n", 471 | " errL1_sum = errG_sum = errD_sum = 0 \n", 472 | " " 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": { 479 | "collapsed": true 480 | }, 481 | "outputs": [], 482 | "source": [ 483 | "_, valA, valB = train_batch.send(6) \n", 484 | "fakeB = netG_gen(valA)\n", 485 | "showX(np.concatenate([valA, valB, fakeB], axis=0), 3)\n" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": { 492 | "collapsed": true 493 | }, 494 | "outputs": [], 495 | "source": [] 496 | } 497 | ], 498 | "metadata": { 499 | "kernelspec": { 500 | "display_name": "Python 3", 501 | "language": "python", 502 | "name": "python3" 503 | }, 504 | "language_info": { 505 | "codemirror_mode": { 506 | "name": "ipython", 507 | "version": 3 508 | }, 509 | "file_extension": ".py", 510 | "mimetype": "text/x-python", 511 | "name": "python", 512 | "nbconvert_exporter": "python", 513 | "pygments_lexer": "ipython3", 514 | "version": "3.6.2" 515 | } 516 | }, 517 | "nbformat": 4, 518 | "nbformat_minor": 1 519 | } 520 | -------------------------------------------------------------------------------- /gan/pix2pix-tf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Tensorflow implementation of https://phillipi.github.io/pix2pix\n", 8 | "support tensorflow 1.3+" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import numpy as np\n", 18 | "import tensorflow as tf\n", 19 | "#tf.logging.set_verbosity(tf.logging.INFO)\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# Weights initializations\n", 29 | "# bias are initailized as 0\n", 30 | "\n", 31 | "conv_init = tf.random_normal_initializer(stddev=0.02)\n", 32 | "gamma_init = tf.random_normal_initializer(stddev=0.02, mean=1)\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "def LeakyReLU(_):\n", 44 | " return tf.maximum(_*0.2, _)\n", 45 | "\n", 46 | "def __LeakyReLU(x, leak=0.2, name=\"lrelu\"):\n", 47 | " with tf.variable_scope(name):\n", 48 | " f1 = 0.5 * (1 + leak)\n", 49 | " f2 = 0.5 * (1 - leak)\n", 50 | " return f1 * x + f2 * abs(x)\n", 51 | " \n", 52 | "def ZeroPadding2D(_):\n", 53 | " return tf.pad(_, [[0,0],[1,1],[1,1],[0,0]])\n", 54 | "\n", 55 | "class Model:\n", 56 | " def __init__(self, BUILDER, inputs, outputs, scope_name=None, **kwargs):\n", 57 | " self.inputs = inputs\n", 58 | " self.outputs = outputs\n", 59 | " self.scope_name=scope_name\n", 60 | " self.kwargs =kwargs\n", 61 | " self.BUILDER=BUILDER\n", 62 | " self.trainable_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope_name)\n", 63 | " def __call__(self, **kwargs):\n", 64 | " m = self.BUILDER(scope_name=self.scope_name, **self.kwargs, **kwargs)\n", 65 | " print(m.scope_name)\n", 66 | " return m.outputs\n", 67 | " " 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# Basic discriminator\n", 77 | "def conv2d(_, f, *a, **k):\n", 78 | " return tf.layers.conv2d(_, f, *a, kernel_initializer = conv_init, **k)\n", 79 | "\n", 80 | "def batchnorm(_, name=None):\n", 81 | " return tf.layers.batch_normalization(_, epsilon=1e-5, training=True, fused=True,\n", 82 | " gamma_initializer = gamma_init, axis=3)\n", 83 | "\n", 84 | "def BASIC_D(nc_in, nc_out, ndf, max_layers=3, scope_name=None, input_a=None, input_b=None):\n", 85 | " reuse = None if scope_name is None else True\n", 86 | " with tf.variable_scope(scope_name, \"BASIC_D\", [nc_in, nc_out, ndf, max_layers], reuse=reuse) as scope:\n", 87 | " scope_name = scope.name\n", 88 | " if input_a is None:\n", 89 | " input_a = tf.placeholder(tf.float32,shape=(None, 256, 256, nc_in), name='input_a')\n", 90 | " if input_b is None:\n", 91 | " input_b = tf.placeholder(tf.float32, shape=(None, 256, 256, nc_out), name='input_b')\n", 92 | " _ = tf.concat([input_a, input_b], axis=-1)\n", 93 | " _ = conv2d(_, ndf, kernel_size=4, strides=2, padding=\"same\", \n", 94 | " name = 'First', activation=LeakyReLU)\n", 95 | " \n", 96 | " for layer in range(1, max_layers): \n", 97 | " out_feat = ndf * min(2**layer, 8)\n", 98 | " _ = conv2d(_, out_feat, kernel_size=4, strides=2, padding=\"same\", \n", 99 | " use_bias=False, name = 'pyramid.{0}'.format(layer))\n", 100 | " _ = batchnorm(_, name='batch_{}'.format(layer)) \n", 101 | " _ = LeakyReLU(_)\n", 102 | " \n", 103 | " out_feat = ndf*min(2**max_layers, 8)\n", 104 | " _ = ZeroPadding2D(_)\n", 105 | " _ = conv2d(_, out_feat, kernel_size=4, use_bias=False, name = 'pyramid_last') \n", 106 | " _ = batchnorm(_, name='batch_last')\n", 107 | " _ = LeakyReLU(_)\n", 108 | " \n", 109 | " # final layer\n", 110 | " _ = ZeroPadding2D(_)\n", 111 | " _ = conv2d(_, 1, kernel_size=4, name = 'final', activation = tf.nn.sigmoid) \n", 112 | " return Model(BASIC_D, inputs=[input_a, input_b], outputs=[_], scope_name=scope_name,\n", 113 | " nc_in=nc_in, nc_out=nc_out, ndf=ndf, max_layers=max_layers)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def UNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True, input_a=None, scope_name=None): \n", 123 | " max_nf = 8*ngf \n", 124 | " def block(x, s, nf_in, use_batchnorm=True, nf_out=None, nf_next=None):\n", 125 | " # print(\"block\",x,s,nf_in, use_batchnorm, nf_out, nf_next)\n", 126 | " assert s>=2 and s%2==0\n", 127 | " if nf_next is None:\n", 128 | " nf_next = min(nf_in*2, max_nf)\n", 129 | " if nf_out is None:\n", 130 | " nf_out = nf_in\n", 131 | " x = conv2d(x, nf_next, kernel_size=4, strides=2, use_bias=(not (use_batchnorm and s>2)),\n", 132 | " padding=\"same\", name = 'conv_{0}'.format(s))\n", 133 | " if s>2:\n", 134 | " if use_batchnorm:\n", 135 | " x = batchnorm(x, name='batch_{}.1'.format(s))\n", 136 | " x2 = LeakyReLU(x)\n", 137 | " x2 = block(x2, s//2, nf_next)\n", 138 | " x = tf.concat([x, x2], axis=-1)\n", 139 | " x = tf.nn.relu(x)\n", 140 | " x = tf.layers.conv2d_transpose(x, nf_out, kernel_size=4, strides=2, \n", 141 | " use_bias=(not use_batchnorm), padding='same',\n", 142 | " kernel_initializer = conv_init, \n", 143 | " name = 'convt.{0}'.format(s))\n", 144 | " \n", 145 | " if use_batchnorm:\n", 146 | " x = batchnorm(x, name='batch_{}.2'.format(s))\n", 147 | " if s <=8:\n", 148 | " x = tf.layers.dropout(x, rate=0.5, training=True)\n", 149 | " return x\n", 150 | " \n", 151 | " s = isize if fixed_input_size else None\n", 152 | " reuse = None if scope_name is None else True\n", 153 | " with tf.variable_scope(None, \"UNET_G\", [isize, nc_in, nc_out, ngf, fixed_input_size], reuse=reuse) as scope:\n", 154 | " scope_name = scope.name\n", 155 | " if input_a is None:\n", 156 | " input_a = tf.placeholder(shape=(None, s, s, nc_in), dtype=tf.float32, name='input_a') \n", 157 | " _ = block(input_a, isize, nc_in, False, nf_out=nc_out, nf_next=ngf)\n", 158 | " _ = tf.nn.tanh(_)\n", 159 | " return Model(UNET_G, inputs=[input_a], outputs=[_], scope_name=scope_name, \n", 160 | " isize=isize, nc_in=nc_in, nc_out=nc_out, ngf=ngf, fixed_input_size=fixed_input_size)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "nc_in = 3\n", 172 | "nc_out = 3\n", 173 | "ngf = 64\n", 174 | "ndf = 64\n", 175 | "λ = 10\n", 176 | "\n", 177 | "loadSize = 286\n", 178 | "imageSize = 256\n", 179 | "batchSize = 1\n", 180 | "lrD = 2e-4\n", 181 | "lrG = 2e-4" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "netD = BASIC_D(nc_in, nc_out, ndf)\n" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "scrolled": false 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "netG = UNET_G(imageSize, nc_in, nc_out, ngf, input_a=netD.inputs[0])\n" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "def build_functions():\n", 211 | " assert netG.inputs[0] is netD.inputs[0]\n", 212 | " real_A = netG.inputs[0]\n", 213 | " fake_B = netG.outputs[0]\n", 214 | " def netG_generate(A, sess): \n", 215 | " return sess.run(netG.outputs[0],feed_dict={real_A:A})\n", 216 | " real_B = netD.inputs[1]\n", 217 | " output_D_real = netD.outputs[0] #(input_a=real_A, input_b=real_B)[0]\n", 218 | " output_D_fake = netD(input_a=real_A, input_b=fake_B)[0]\n", 219 | "\n", 220 | " loss_fn = lambda output, target : -tf.reduce_mean(tf.log(output+1e-12)*target+tf.log(1-output+1e-12)*(1-target))\n", 221 | "\n", 222 | " loss_D_real = loss_fn(output_D_real, tf.ones_like(output_D_real))\n", 223 | " loss_D_fake = loss_fn(output_D_fake, tf.zeros_like(output_D_fake))\n", 224 | " loss_G_fake = loss_fn(output_D_fake, tf.ones_like(output_D_fake))\n", 225 | "\n", 226 | "\n", 227 | " loss_L1 = tf.reduce_mean(tf.abs(fake_B-real_B))\n", 228 | "\n", 229 | " loss_D = loss_D_real +loss_D_fake\n", 230 | "\n", 231 | " optimizerD = tf.train.AdamOptimizer(lrD, beta1=0.5).minimize(loss_D, var_list=netD.trainable_weights)\n", 232 | "\n", 233 | " loss_G = loss_G_fake + 100 * loss_L1\n", 234 | "\n", 235 | " optimizerG = tf.train.AdamOptimizer(lrG, beta1=0.5).minimize(loss_G, var_list=netG.trainable_weights)\n", 236 | " def netD_train(A, B, sess):\n", 237 | " return sess.run(\n", 238 | " [optimizerD, loss_D/2],feed_dict={real_A:A, real_B:B})[1:]\n", 239 | " def netG_train(A, B, sess):\n", 240 | " return sess.run(\n", 241 | " [optimizerG, loss_G_fake, loss_L1],feed_dict={real_A:A, real_B:B})[1:]\n", 242 | " return netG_generate, netD_train, netG_train\n", 243 | "netG_generate, netD_train, netG_train = build_functions()" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "from PIL import Image\n", 253 | "import numpy as np\n", 254 | "import glob\n", 255 | "from random import randint, shuffle\n", 256 | "\n", 257 | "def load_data(file_pattern):\n", 258 | " return glob.glob(file_pattern)\n", 259 | "def read_image(fn, direction=0):\n", 260 | " im = Image.open(fn)\n", 261 | " im = im.resize( (loadSize*2, loadSize), Image.BILINEAR )\n", 262 | " arr = np.array(im)/255*2-1\n", 263 | " w1,w2 = (loadSize-imageSize)//2,(loadSize+imageSize)//2\n", 264 | " h1,h2 = w1,w2\n", 265 | " imgA = arr[h1:h2, loadSize+w1:loadSize+w2, :]\n", 266 | " imgB = arr[h1:h2, w1:w2, :]\n", 267 | " if randint(0,1):\n", 268 | " imgA=imgA[:,::-1]\n", 269 | " imgB=imgB[:,::-1]\n", 270 | " if direction==0:\n", 271 | " return imgA, imgB\n", 272 | " else:\n", 273 | " return imgB,imgA\n", 274 | "\n", 275 | "data = \"edges2shoes\"\n", 276 | "data = \"facades\"\n", 277 | "direction = 0\n", 278 | "trainAB = load_data('pix2pix/{}/train/*.jpg'.format(data))\n", 279 | "valAB = load_data('pix2pix/{}/val/*.jpg'.format(data))\n", 280 | "assert len(trainAB) and len(valAB)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "collapsed": true 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "def minibatch(dataAB, batchsize, direction=0):\n", 292 | " length = len(dataAB)\n", 293 | " epoch = i = 0\n", 294 | " tmpsize = None \n", 295 | " while True:\n", 296 | " size = tmpsize if tmpsize else batchsize\n", 297 | " if i+size > length:\n", 298 | " shuffle(dataAB)\n", 299 | " i = 0\n", 300 | " epoch+=1 \n", 301 | " dataA = []\n", 302 | " dataB = []\n", 303 | " for j in range(i,i+size):\n", 304 | " imgA,imgB = read_image(dataAB[j], direction)\n", 305 | " dataA.append(imgA)\n", 306 | " dataB.append(imgB)\n", 307 | " dataA = np.float32(dataA)\n", 308 | " dataB = np.float32(dataB)\n", 309 | " i+=size\n", 310 | " tmpsize = yield epoch, dataA, dataB \n", 311 | " " 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "from IPython.display import display\n", 321 | "def showX(X, rows=1):\n", 322 | " assert X.shape[0]%rows == 0\n", 323 | " int_X = ( (X+1)/2*255).clip(0,255).astype('uint8')\n", 324 | " int_X = int_X.reshape(-1,imageSize,imageSize, 3)\n", 325 | " int_X = int_X.reshape(rows, -1, imageSize, imageSize,3).swapaxes(1,2).reshape(rows*imageSize,-1, 3)\n", 326 | " display(Image.fromarray(int_X))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "train_batch = minibatch(trainAB, 6, direction=direction)\n", 336 | "_, trainA, trainB = next(train_batch)\n", 337 | "showX(trainA)\n", 338 | "showX(trainB)\n", 339 | "del train_batch, trainA, trainB" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "def netG_gen(A):\n", 351 | " return np.concatenate([netG_generate(A[i:i+1], sess) for i in range(A.shape[0])], axis=0)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "config = tf.ConfigProto(allow_soft_placement = True)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "scrolled": false 375 | }, 376 | "outputs": [], 377 | "source": [ 378 | "import time\n", 379 | "from IPython.display import clear_output\n", 380 | "t0 = time.time()\n", 381 | "niter = 50\n", 382 | "gen_iterations = 0\n", 383 | "errL1 = epoch = errG = 0\n", 384 | "errL1_sum = errG_sum = errD_sum = 0\n", 385 | "\n", 386 | "display_iters = 500\n", 387 | "val_batch = minibatch(valAB, 6, direction)\n", 388 | "train_batch = minibatch(trainAB, batchSize, direction)\n", 389 | "\n", 390 | "with tf.Session(config=config) as sess:\n", 391 | " sess.run(tf.global_variables_initializer())\n", 392 | " while epoch < niter: \n", 393 | " epoch, trainA, trainB = next(train_batch) \n", 394 | " errD, = netD_train(trainA, trainB, sess)\n", 395 | " errD_sum +=errD\n", 396 | "\n", 397 | " errG, errL1 = netG_train(trainA, trainB, sess)\n", 398 | " errG_sum += errG\n", 399 | " errL1_sum += errL1\n", 400 | " gen_iterations+=1\n", 401 | " if gen_iterations%display_iters==0:\n", 402 | " if gen_iterations%(5*display_iters)==0:\n", 403 | " clear_output()\n", 404 | " print('[%d/%d][%d] Loss_D: %f Loss_G: %f loss_L1: %f'\n", 405 | " % (epoch, niter, gen_iterations, errD_sum/display_iters, errG_sum/display_iters, errL1_sum/display_iters), time.time()-t0)\n", 406 | " _, valA, valB = train_batch.send(6) \n", 407 | " fakeB = netG_gen(valA)\n", 408 | " showX(np.concatenate([valA, valB, fakeB], axis=0), 3)\n", 409 | " errL1_sum = errG_sum = errD_sum = 0\n", 410 | " _, valA, valB = next(val_batch)\n", 411 | " fakeB = netG_gen(valA)\n", 412 | " showX(np.concatenate([valA, valB, fakeB], axis=0), 3)\n", 413 | " " 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "tf.global_variables()" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": { 429 | "collapsed": true 430 | }, 431 | "outputs": [], 432 | "source": [] 433 | } 434 | ], 435 | "metadata": { 436 | "kernelspec": { 437 | "display_name": "Python 3", 438 | "language": "python", 439 | "name": "python3" 440 | }, 441 | "language_info": { 442 | "codemirror_mode": { 443 | "name": "ipython", 444 | "version": 3 445 | }, 446 | "file_extension": ".py", 447 | "mimetype": "text/x-python", 448 | "name": "python", 449 | "nbconvert_exporter": "python", 450 | "pygments_lexer": "ipython3", 451 | "version": "3.6.2" 452 | } 453 | }, 454 | "nbformat": 4, 455 | "nbformat_minor": 1 456 | } 457 | -------------------------------------------------------------------------------- /gan/pretrained_weights/font_TAKUMISFONT_LP_netG_weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/gan/pretrained_weights/font_TAKUMISFONT_LP_netG_weights.h5 -------------------------------------------------------------------------------- /mnist.pkl.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/mnist.pkl.xz -------------------------------------------------------------------------------- /tfdot.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from graphviz import Digraph 3 | from random import randint 4 | from collections import defaultdict 5 | color_table = { 6 | "Const": "yellow", 7 | "MatMul": "#bbffbb", 8 | "Variable": "#ffbbbb", 9 | "Assign": "#bbbbff" 10 | } 11 | 12 | 13 | def split_name(n): 14 | ns = n.split('/') 15 | return "/".join(ns[:-1]), ns[-1] 16 | 17 | def common_name_space(n1, n2): 18 | ns1 = n1.split('/')[:-1] 19 | ns2 = n2.split('/')[:-1] 20 | l = min(len(ns1), len(ns2)) 21 | rtn = [] 22 | for i in range(l): 23 | if ns1[i] != ns2[i]: 24 | break 25 | rtn.append(ns1[i]) 26 | return "/".join(rtn) 27 | 28 | import html 29 | def tfdot(graph=None, size=(10,30)): 30 | def get_dot_data(name_space): 31 | if name_space !='': 32 | parent, _ = split_name(name_space) 33 | if name_space not in dot_data_dict[parent]['subgraphs']: 34 | get_dot_data(parent)['subgraphs'].add(name_space) 35 | return dot_data_dict[name_space] 36 | 37 | def update_dot(name_space=''): 38 | name = "cluster_"+name_space if name_space else 'root' 39 | dot = Digraph(comment="subgraph: "+name_space, name=name, 40 | graph_attr={"ratio":"compress", 41 | "size":"{},{}".format(*size)} 42 | ) 43 | dot.body.append('label="%s"'%name_space) 44 | dot_data = dot_data_dict[name_space] 45 | for s in dot_data['subgraphs']: 46 | #print(name_space, s) 47 | dot.subgraph(update_dot(s)) 48 | for node in dot_data['nodes']: 49 | #print(name_space, "node", node) 50 | dot.node(**node) 51 | for edge in dot_data['edges']: 52 | attr = extra_attr.get(edge, {}) 53 | dot.edge(*edge, **attr) 54 | return dot 55 | 56 | 57 | dot_data_dict = defaultdict(lambda :{"subgraphs":set(), "edges":set(), "nodes": []}) 58 | extra_attr = {} 59 | if graph is None: 60 | graph = tf.get_default_graph() 61 | for op in graph.get_operations(): 62 | if op.type not in color_table: 63 | new_color = "#%02x%02x%02x"%tuple(randint(0,100)+155 for i in range(3)) 64 | color_table[op.type] = new_color 65 | color = color_table.get(op.type, "white") 66 | name_space, name = split_name(op.name) 67 | outputs_label = "".join("output:{} {}".format( 68 | html.escape(str(o.shape)), html.escape(o.dtype.name)) for o in op.outputs) 69 | name_label = "{}".format(name) 70 | op_label = "{}:".format(op.node_def.op) 71 | label = '''< 72 | 73 | {}{}{}
>'''.format( op_label, name_label, outputs_label) 74 | dot_data = get_dot_data(name_space) 75 | dot_data['nodes'].append(dict(name=op.name, 76 | label=label, style="filled", fillcolor=color)) 77 | 78 | for op in graph.get_operations(): 79 | for i, ip in enumerate(op.inputs): 80 | name_space = common_name_space(ip.op.name, op.name) 81 | dot_data = get_dot_data(name_space) 82 | if op.type == 'Assign' and i ==0: 83 | dot_data['edges'].add((op.name, ip.op.name)) 84 | extra_attr[(op.name, ip.op.name)]={'color': 'red'} 85 | else: 86 | dot_data['edges'].add((ip.op.name, op.name)) 87 | return update_dot() -------------------------------------------------------------------------------- /transfered/01-Keras-pretrained.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# windows only hack for graphviz path \n", 12 | "import os\n", 13 | "for path in os.environ['PATH'].split(os.pathsep):\n", 14 | " if path.endswith(\"Library\\\\bin\"):\n", 15 | " os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import keras\n", 27 | "from keras.models import Sequential\n", 28 | "from PIL import Image\n", 29 | "import numpy as np" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import keras.backend as K\n", 41 | "# 設定 channels first\n", 42 | "K.set_image_data_format('channels_last')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "# 第一次使用時,系統會下載權重,會需要一點時間\n", 54 | "pretrained = keras.applications.vgg16.VGG16()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "pretrained" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "# 看一下網路的樣子\n", 77 | "from IPython.display import SVG, display\n", 78 | "from keras.utils.vis_utils import model_to_dot\n", 79 | "\n", 80 | "SVG(model_to_dot(pretrained, show_shapes=True).create(prog='dot', format='svg'))" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### 看一下 imagenet 的分類" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "from keras.applications import imagenet_utils" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "imagenet_utils.CLASS_INDEX_PATH" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "from urllib.request import urlopen\n", 121 | "import json\n", 122 | "with urlopen(imagenet_utils.CLASS_INDEX_PATH) as jsonf:\n", 123 | " data = jsonf.read()" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "class_dict = json.loads(data.decode())\n", 135 | "[class_dict[str(i)][1] for i in range(1000)]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Imagenet 2012 網頁\n", 143 | "\n", 144 | "http://image-net.org/challenges/LSVRC/2012/signup\n", 145 | "\n", 146 | "資料下載\n", 147 | "\n", 148 | "http://academictorrents.com/browse.php?search=imagenet\n", 149 | "\n", 150 | "一千張圖片\n", 151 | "\n", 152 | "https://www.dropbox.com/s/vippynksgd8c6qt/ILSVRC2012_val_1000.tar?dl=0" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "# 下載 圖片\n", 164 | "import os\n", 165 | "import urllib\n", 166 | "from urllib.request import urlretrieve\n", 167 | "dataset = 'ILSVRC2012_val_1000.tar'\n", 168 | "def reporthook(a,b,c):\n", 169 | " print(\"\\rdownloading: %5.1f%%\"%(a*b*100.0/c), end=\"\")\n", 170 | " \n", 171 | "if not os.path.isfile(dataset):\n", 172 | " origin = \"https://www.dropbox.com/s/vippynksgd8c6qt/ILSVRC2012_val_1000.tar?dl=1\"\n", 173 | " print('Downloading data from %s' % origin)\n", 174 | " urlretrieve(origin, dataset, reporthook=reporthook)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# 解開圖片\n", 186 | "from tarfile import TarFile\n", 187 | "tar = TarFile(dataset)\n", 188 | "tar.extractall()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "# 讀取圖片\n", 200 | "from PIL import Image as pimage\n", 201 | "from glob import glob\n", 202 | "imgs = []\n", 203 | "files = list(glob('ILSVRC2012_img_val/ILSVRC2012_val_*.JPEG'))\n", 204 | "for fn in files:\n", 205 | " img = pimage.open(fn)\n", 206 | " if img.mode != 'RGB':\n", 207 | " img = img.convert('RGB')\n", 208 | " img = np.array(img.resize((224,224))) \n", 209 | " imgs.append(img)\n", 210 | "imgs = np.array(imgs)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "# 準備資料,轉成通用的格式(扣掉顏色的中間值)\n", 222 | "p_imgs = imagenet_utils.preprocess_input(np.float32(imgs))\n", 223 | "del imgs" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "# 實際\n", 235 | "predictions = pretrained.predict(p_imgs)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "# 對應編碼\n", 247 | "results = imagenet_utils.decode_predictions(predictions)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "from IPython.display import Image, HTML, display\n", 259 | "for fn, res in zip(files[:100], results[:100]):\n", 260 | " res_text = \"\".join(\"
  • {:05.2f}% : {}
  • \".format(x[2]*100, x[1]) for x in res)\n", 261 | " display(HTML(\"\"\"\n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | "
      {}
    \n", 267 | " \"\"\".format(fn, res_text))) " 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.5.3" 288 | } 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 1 292 | } 293 | -------------------------------------------------------------------------------- /transfered/02-Keras-pretrained-test_others.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# windows only hack for graphviz path \n", 12 | "import os\n", 13 | "for path in os.environ['PATH'].split(os.pathsep):\n", 14 | " if path.endswith(\"Library\\\\bin\"):\n", 15 | " os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# 設定環境變數來控制 keras, theano\n", 27 | "os.environ['KERAS_BACKEND']=\"tensorflow\"\n", 28 | "#os.environ['THEANO_FLAGS']=\"floatX=float32, device=cuda\"" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import keras\n", 40 | "from keras.models import Sequential\n", 41 | "from PIL import Image\n", 42 | "import numpy as np" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "import keras.backend as K\n", 54 | "# 設定 channels_first 或 channels_last\n", 55 | "K.set_image_data_format('channels_last')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "pretrained = keras.applications.xception.Xception()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "from IPython.display import SVG, display\n", 78 | "from keras.utils.vis_utils import model_to_dot\n", 79 | "\n", 80 | "SVG(model_to_dot(pretrained, show_shapes=True).create(prog='dot', format='svg'))" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### 看一下 imagenet 的分類" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "from keras.applications import imagenet_utils" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "# 讀取圖片,使用 image.load_img\n", 110 | "from keras.preprocessing import image\n", 111 | "from glob import glob\n", 112 | "imgs = []\n", 113 | "files = list(glob('ILSVRC2012_img_val/ILSVRC2012_val_*.JPEG'))\n", 114 | "for fn in files:\n", 115 | " img = image.load_img(fn, target_size=(299,299)) \n", 116 | " imgs.append(np.array(img))\n", 117 | "imgs = np.array(imgs)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "# 使用 xception 的 preprocess\n", 129 | "from keras.applications.xception import preprocess_input\n", 130 | "p_imgs = preprocess_input(np.float32(imgs))\n", 131 | "del imgs" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# 如果 channel_first 可以調整\n", 143 | "#import numpy as np\n", 144 | "#p_imgs = np.moveaxis(p_imgs, 3, 1)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# 實際\n", 156 | "predictions = pretrained.predict(p_imgs)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": false 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "# 對應編碼\n", 168 | "results = imagenet_utils.decode_predictions(predictions)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "from IPython.display import Image, HTML, display\n", 180 | "for fn, res in zip(files[:100], results[:100]):\n", 181 | " res_text = \"\".join(\"
  • {:05.2f}% : {}
  • \".format(x[2]*100, x[1]) for x in res)\n", 182 | " display(HTML(\"\"\"\n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | "
      {}
    \n", 188 | " \"\"\".format(fn, res_text))) " 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.5.3" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 1 213 | } 214 | -------------------------------------------------------------------------------- /transfered/06-Art style transfer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "# 設定環境變數來控制 keras, theano\n", 13 | "os.environ['KERAS_BACKEND']=\"tensorflow\"\n", 14 | "#os.environ['THEANO_FLAGS']=\"floatX=float32, device=cuda\"" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import keras\n", 26 | "from keras.models import Sequential\n", 27 | "from PIL import Image\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import keras.backend as K\n", 40 | "# 設定 channels_first 或 channels_last\n", 41 | "K.set_image_data_format('channels_last')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "from keras.preprocessing.image import load_img\n", 53 | "from IPython.display import display\n", 54 | "img_H, img_W = 360, 480\n", 55 | "def preprocess_image(filename):\n", 56 | " img = np.array(load_img(filename, target_size=(img_H, img_W))) \n", 57 | " img = img[None, ...].astype('float32')\n", 58 | " img = keras.applications.vgg16.preprocess_input(img)\n", 59 | " return img\n", 60 | "def show_image(arr):\n", 61 | " arr = arr.reshape(img_H, img_W,3)+[103.939, 116.779, 123.68]\n", 62 | " arr = arr.clip(0,255).astype('uint8')[:,:, ::-1]\n", 63 | " display(Image.fromarray(arr))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "from keras import backend as K\n", 75 | "from keras.engine.topology import Layer\n", 76 | "import numpy as np\n", 77 | "\n", 78 | "class ImageLayer(Layer):\n", 79 | "\n", 80 | " def __init__(self, init_img=None, **kwargs):\n", 81 | " if init_img is None:\n", 82 | " self.init_img = np.random.uniform(-50,50,size=(1,img_H, img_W, 3)).astype('float32')\n", 83 | " else:\n", 84 | " self.init_img = init_img\n", 85 | " super().__init__(**kwargs)\n", 86 | " def initializer(self, size):\n", 87 | " return self.init_img\n", 88 | "\n", 89 | " def build(self, input_shape):\n", 90 | " # Create a trainable weight variable for this layer. \n", 91 | " self.img = self.add_weight(shape=(1, img_H, img_W, 3), \n", 92 | " initializer=self.initializer,\n", 93 | " trainable=True)\n", 94 | " super().build(input_shape) # Be sure to call this somewhere!\n", 95 | " \n", 96 | " def call(self, x):\n", 97 | " return self.img\n", 98 | "\n", 99 | " def compute_output_shape(self, input_shape):\n", 100 | " return (1, img_H, img_W, 3)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# 結構的圖片\n", 112 | "#base_image = preprocess_image(\"img/tubingen.jpg\")\n", 113 | "base_image = preprocess_image(\"img/tubingen.jpg\")\n", 114 | "show_image(base_image)\n", 115 | "style_image = preprocess_image(\"img/starry_night.jpg\")\n", 116 | "show_image(style_image)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "image_layer = ImageLayer( init_img=.9*base_image +.1*style_image,\n", 128 | " name='image_layer')(keras.layers.Input(shape=(0,)))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "# Hack\n", 140 | "_load_weights = keras.models.Model.load_weights\n", 141 | "def my_load_weights(self, fn):\n", 142 | " return _load_weights(self, fn, by_name=True)\n", 143 | "keras.models.Model.load_weights = my_load_weights\n", 144 | "\n", 145 | "# 將以上三個圖片送入 vgg16\n", 146 | "vgg16_model = keras.applications.vgg16.VGG16(weights='imagenet', input_tensor=image_layer,\n", 147 | " include_top=False, input_shape=(img_H, img_W,3))\n", 148 | "# unhack\n", 149 | "keras.models.Model.load_weights = _load_weights\n", 150 | "\n", 151 | "# 比較簡單的方式取得各層\n", 152 | "outputs_dict = {layer.name :layer.output for layer in vgg16_model.layers }\n", 153 | "outputs_dict" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "import tensorflow as tf\n", 165 | "w = vgg16_model.get_layer('image_layer').weights[0]\n", 166 | "style_feature_names = ['block1_conv1', 'block2_conv1',\n", 167 | " 'block3_conv1', 'block4_conv1',\n", 168 | " 'block5_conv1']\n", 169 | "style_features = [outputs_dict[x] for x in style_feature_names]\n", 170 | "content_feature = outputs_dict['block4_conv2']\n", 171 | "with tf.Session() as sess:\n", 172 | " sess.run(tf.global_variables_initializer()) \n", 173 | " target_content_feature = sess.run(content_feature, feed_dict={w: base_image}) \n", 174 | " target_style_features = sess.run(style_features, feed_dict={w: style_image}) " 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# 各種 Norms 和 loss function\n", 186 | "# 取自 https://github.com/fchollet/keras/blob/master/examples/neural_style_transfer.py\n", 187 | "# compute the neural style loss\n", 188 | "# first we need to define 4 util functions\n", 189 | "\n", 190 | "# the gram matrix of an image tensor (feature-wise outer product)\n", 191 | "def gram_matrix(x):\n", 192 | " assert K.ndim(x) == 3\n", 193 | " features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))\n", 194 | " gram = K.dot(features, K.transpose(features))\n", 195 | " return gram\n", 196 | "\n", 197 | "# the \"style loss\" is designed to maintain\n", 198 | "# the style of the reference image in the generated image.\n", 199 | "# It is based on the gram matrices (which capture style) of\n", 200 | "# feature maps from the style reference image\n", 201 | "# and from the generated image\n", 202 | "\n", 203 | "def style_loss(combination, target):\n", 204 | " assert K.ndim(combination) == 3\n", 205 | " assert np.ndim(target) ==3\n", 206 | " S = gram_matrix(K.constant(target))\n", 207 | " C = gram_matrix(combination) \n", 208 | " size = target.size\n", 209 | " return K.sum(K.square(S - C)) / (4. * (size ** 2))\n", 210 | "\n", 211 | "# an auxiliary loss function\n", 212 | "# designed to maintain the \"content\" of the\n", 213 | "# base image in the generated image\n", 214 | "\n", 215 | "def content_loss(combination, target):\n", 216 | " assert np.ndim(target) ==3\n", 217 | " assert K.ndim(combination) == 3\n", 218 | " size = target.size\n", 219 | " return K.sum(K.square(combination - K.constant(target)))/size\n", 220 | "\n", 221 | "# the 3rd loss function, total variation loss,\n", 222 | "# designed to keep the generated image locally coherent\n", 223 | "\n", 224 | "def total_variation_loss(x):\n", 225 | " assert K.ndim(x) == 4\n", 226 | " a = K.square(x[:, :-1, :-1, :] - x[:, 1: , :-1, :])\n", 227 | " b = K.square(x[:, :-1, :-1, :] - x[:, :-1, 1: , :])\n", 228 | " size = img_H * img_W * 3\n", 229 | " return K.sum(K.pow(a + b, 1.25))/size" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": { 236 | "collapsed": false 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "content_weight = .5\n", 241 | "style_weight = 1.0\n", 242 | "total_variation_weight = 1e-6" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "#content_weight = 20\n", 254 | "#style_weight = 1.0\n", 255 | "#total_variation_weight = 5e-4" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "loss_c = content_loss(content_feature[0], target_content_feature[0])\n", 267 | "loss_s = K.variable(0.)\n", 268 | "for layer, target_layer in zip(style_features, target_style_features):\n", 269 | " loss_s = 2*loss_s + style_loss(layer[0], target_layer[0])\n", 270 | "loss_s /= len(style_features)\n", 271 | "loss_t = total_variation_loss(outputs_dict['image_layer'])\n", 272 | "loss = content_weight * loss_c + style_weight*loss_s + total_variation_weight * loss_t" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": { 279 | "collapsed": false 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "#train_step = tf.train.AdamOptimizer(5e-2).minimize(loss, var_list=[w])\n", 284 | "train_step = tf.train.AdamOptimizer(0.1).minimize(loss, var_list=[w])" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": { 291 | "collapsed": false 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "with tf.Session() as sess:\n", 296 | " tf.global_variables_initializer().run()\n", 297 | " for i in range(50000):\n", 298 | " if i%100==0:\n", 299 | " if i%500==0:\n", 300 | " show_image(w.eval())\n", 301 | " print(i, sess.run([loss, loss_s, loss_c, loss_t]))\n", 302 | " train_step.run()" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": { 308 | "collapsed": true 309 | }, 310 | "source": [ 311 | "### 參考結果\n", 312 | "" 313 | ] 314 | } 315 | ], 316 | "metadata": { 317 | "kernelspec": { 318 | "display_name": "Python 3", 319 | "language": "python", 320 | "name": "python3" 321 | }, 322 | "language_info": { 323 | "codemirror_mode": { 324 | "name": "ipython", 325 | "version": 3 326 | }, 327 | "file_extension": ".py", 328 | "mimetype": "text/x-python", 329 | "name": "python", 330 | "nbconvert_exporter": "python", 331 | "pygments_lexer": "ipython3", 332 | "version": "3.5.3" 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 1 337 | } 338 | -------------------------------------------------------------------------------- /transfered/HW1-Neural Matching.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "實作 https://github.com/tjwei/Neural-Matching/blob/master/matching-theano-VGG-one-patch.ipynb\n", 8 | "\n", 9 | "可參考: \n", 10 | "* https://arxiv.org/abs/1601.04589\n", 11 | "* https://github.com/awentzonline/image-analogies" 12 | ] 13 | } 14 | ], 15 | "metadata": { 16 | "kernelspec": { 17 | "display_name": "Python 3", 18 | "language": "python", 19 | "name": "python3" 20 | }, 21 | "language_info": { 22 | "codemirror_mode": { 23 | "name": "ipython", 24 | "version": 3 25 | }, 26 | "file_extension": ".py", 27 | "mimetype": "text/x-python", 28 | "name": "python", 29 | "nbconvert_exporter": "python", 30 | "pygments_lexer": "ipython3", 31 | "version": "3.5.3" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 1 36 | } 37 | -------------------------------------------------------------------------------- /transfered/img/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/transfered/img/result.png -------------------------------------------------------------------------------- /transfered/img/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/transfered/img/starry_night.jpg -------------------------------------------------------------------------------- /transfered/img/tubingen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjwei/tensorflow-tutorial/d3ba9f6adfac92abc0c3988e7f7c3be1c64280fe/transfered/img/tubingen.jpg --------------------------------------------------------------------------------