├── .gitignore
├── README.md
├── convolution
    ├── ResNet.ipynb
    ├── convolution model.ipynb
    └── images
    │   ├── Convolution_schematic.gif
    │   ├── PAD.png
    │   ├── SIGNS.png
    │   ├── a_pool.png
    │   ├── ave-pool.png
    │   ├── ave_pool1.png
    │   ├── average_pool.png
    │   ├── conv.png
    │   ├── conv1.png
    │   ├── conv_kiank.mp4
    │   ├── conv_nn.png
    │   ├── images
    │       ├── convblock_kiank.png
    │       ├── idblock2_kiank.png
    │       ├── idblock3_kiank.png
    │       ├── my_image.jpg
    │       ├── resnet_kiank.png
    │       ├── signs_data_kiank.png
    │       ├── skip_connection_kiank.png
    │       └── vanishing_grad_kiank.png
    │   ├── max_pool.png
    │   ├── max_pool1.png
    │   ├── model.png
    │   ├── thumbs_up.jpg
    │   └── vert_horiz_kiank.png
├── deep-neural-network
    ├── Deep NN.ipynb
    └── images
    │   └── backprop_kiank.png
├── dropout
    ├── dropout.ipynb
    ├── images
    │   └── dropout1_kiank.mp4
    └── model.py
├── examples
    └── dataloader.py
├── logistic-regression
    └── logistic-regression.ipynb
├── optimization
    └── optimization.ipynb
├── regularization
    ├── model.py
    └── regularization.ipynb
├── shallow-neural-network
    ├── images
    │   ├── 1-hidden-nn.png
    │   └── multi-layer.png
    └── one-hidden-layer-nn.ipynb
└── tensorflow
    └── tf-hands-on.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Working files
  2 | */.ipynb_checkpoints
  3 | Makefile
  4 | .idea
  5 | start.sh
  6 | test.py
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deep-learning
 2 | ---
 3 | ## Reading List
 4 | ---
 5 | 1. [Logistic Regression](https://towardsdatascience.com/logistic-regression-step-by-step-implementation-f032a89936ca)
 6 | 2. [Shallow Neural Network](https://towardsdatascience.com/building-a-shallow-neural-network-a4e2728441e0)
 7 | 3. [Deep Neural Network](https://towardsdatascience.com/code-a-deep-neural-network-a5fd26ec41c4)
 8 | 4. [Regularization & Dropout](https://towardsdatascience.com/regularization-dropout-in-deep-learning-5198c2bf6107)
 9 | 5. [Optimization Methods](https://towardsdatascience.com/optimization-methods-in-deep-learning-790629f184b1)
10 | 


--------------------------------------------------------------------------------
/convolution/convolution model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Convolutional Neural Network"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Forward\n",
 15 |     "---\n",
 16 |     "- Zero Paddings\n",
 17 |     "- Convolutional Layer\n",
 18 |     "- Pooling\n",
 19 |     "- Combination: Conv + Pool"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "Zero Padding pads 0s at the edge of an image, benefits include:\n",
 27 |     "- It allows you to use a CONV layer without necessarily shrinking the height and width of the volumes. This is important for building deeper networks, since otherwise the height/width would shrink as you go to deeper layers. An important special case is the \"same\" convolution, in which the height/width is exactly preserved after one layer. \n",
 28 |     "\n",
 29 |     "- It helps us keep more of the information at the border of an image. Without padding, very few values at the next layer would be affected by pixels at the edges of an image.\n",
 30 |     "\n",
 31 |     "<img src=\"images/PAD.png\" style=\"width:600px;height:400px;\">\n",
 32 |     "\n",
 33 |     "<caption><center>from Deep Learning Specilization Course</center></caption>"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Consider an input of batched images with shape:\n",
 41 |     "\n",
 42 |     "$$(m, n_W, n_H, n_C)$$\n",
 43 |     "\n",
 44 |     "Where $m$ is the batch size, $n_W$ is the width of the image, $n_H$ is the height and $n_C$ is number of channels -- RGB would have 3 channels.\n",
 45 |     "\n",
 46 |     "After padded with size $p$, the size would become\n",
 47 |     "\n",
 48 |     "$$(m, n_W + 2p, n_H + 2p, n_C)$$"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 1,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "%matplotlib inline\n",
 58 |     "\n",
 59 |     "import numpy as np\n",
 60 |     "import matplotlib.pyplot as plt"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 2,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "def zero_pads(X, pad):\n",
 70 |     "    \"\"\"\n",
 71 |     "    X has shape (m, n_W, n_H, n_C)\n",
 72 |     "    \"\"\"\n",
 73 |     "    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)\n",
 74 |     "    return X_pad"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "name": "stdout",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "X shape (3, 4, 4, 3)\n",
 87 |       "X_pad shape (3, 8, 8, 3)\n"
 88 |      ]
 89 |     },
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "Text(0.5, 1.0, 'paded')"
 94 |       ]
 95 |      },
 96 |      "execution_count": 3,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     },
100 |     {
101 |      "data": {
102 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAADHCAYAAAAwLRlnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPFElEQVR4nO3df6zddX3H8efLtijc1lJSlmELQqYjGv8Q06EGwxjOyFB0S7ZFnG4zGrZlEIxsqNvMLHNL949xiGMQfqgDZIaaoAxxXaxjDEELorEtLkAgrda0Cg3l6sDCe3+cc+Vwe9t7as+53085z0dyk/Pr+72vWz73xed+f6aqkCS16wVdB5AkHZhFLUmNs6glqXEWtSQ1zqKWpMZZ1JLUOIt6gSX5lyQfGfVnpRYl+WiS6xZ62eebxV0HmDRV9afj+Kyk5y9n1AsoyaKuM0g6/FjUI5DkFUm+lmR3ks1J3tZ//dNJLk9ya5Jp4Df6r31sYNmLk+xI8oMk70tSSV42sPzH+o/PSLI9yUVJdvaXeU8nP7Ce95I8nOTDSbYkeSzJtUlelGRFkluS7Oq/fkuS1QPLnZTkv5LsSbIBWDlrva9Lcmf/d+XbSc4YdtlJZlEfoiRLgC8B/wH8EnABcH2Sk/sfeSfw98Ay4I5Zy54FfAD4TeBlwBnzfLtfBpYDq4D3Ap9KsmIkP4i0rz8A3gz8CvCrwN/Q64xrgZcCJwA/BS4bWOYG4B56Jft3wB/NvJFkFfDvwMeAY4C/ANYnOXa+ZSedRX3oXgcsBdZV1VNV9VXgFuDc/vs3V9X/VNUzVfV/s5b9feDaqtpcVT8BPjrP9/oZcElV/ayqbgWeAE6eZxnpF3VZVW2rqkfpTTbOraofV9X6qvpJVe3pv/7rAElOAH4N+EhVPVlVt9ObxMx4F3BrVd3a/33YAGwCzh5i2YlmUR+6lwDbquqZgdceoTfrBdg237IDzw/0WYAfV9Xegec/ofc/CWkcBsfjI8BLkhyV5IokjyR5HLgdOLq//+UlwGNVNT1ruRkvBX6vv9ljd5LdwBuA44ZYdqJZ1IfuB8DxSQb/LU8Avt9/fKDLE+4AVg88P37E2aRDMTgeT6A31i+i91fca6vqxcDp/fdDbzyvSDI1a7kZ24B/raqjB76mqmrdEMtONIv60N1Nb2Z7cZIl/Z0j5wA3DrHs54H39HdGHgV4zLRa8udJVic5Bvhr4N/o7Wv5KbC7//rfzny4qh6htyljbZIjkryB3u/CjOuAc5K8Ocmi/s7JM5KsHmLZiWZRH6KqeoregPot4EfAPwN/WFX3D7Hsl4FLgY3AA8Bd/beeHE9a6aDcQG8n+UPAg/R2An4COJLeWL8LuG3WMu8EXgs8Sq/EPzvzRlVtA94O/BWwi94M+y95tof2u+ykizcOaEeSVwDfBV44a1u0tKCSPAy8r6r+s+ssckbduSS/k+SF/cPs/hH4kiUtaZBF3b0/AXbS+9PyaeDPuo0jqTVu+pCkxjmjlqTGWdSS1LixXOZ02bJltXJle9dTefLJNo9627FjR9cR9mvVqlXzf2iBPfbYY0xPT2ehv+/U1FStWOGlVTQeBxrXYynqlStXcskll4xj1YfkwQcf7DrCnNauXdt1hP264IILuo6wj09+8pOdfN8VK1Zw/vnnd/K99fx32WWX7fc9N31IUuMsaklqnEUtSY2zqCWpcRa1JlqSs5J8L8kDST7UdR5pLha1Jlb/Yvefonflw1cC5yZ5ZbeppH1Z1JpkpwIPVNVD/cvV3kjvMpxSUyxqTbJVPPd2U9t59hZqACQ5L8mmJJump6eRumBRSwdQVVdW1ZqqWjM1NTX/AtIYWNSaZN/nufcFXM2z97qUmmFRa5J9E3h5kpOSHAG8A/hix5mkfYzlWh/S4aCq9iY5H/gKsAi4pqo2dxxL2odFrYlWVbcCt3adQzqQoTZ9eFKAJHVn3qL2pABJ6tYwM2pPCpCkDg1T1POeFCBJGp+RHZ43eAbXnj17RrVaSZp4wxT1UCcFDJ7BtWzZslHlk6SJN0xRe1KAJHVo3uOoPSlAkro11AkvnhQgSd3xWh+S1DiLWpIaZ1FLUuMsaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNG+rqeQdr165dXHHFFeNY9SG5+eabu44wp/Xr13cdYb92797ddYR9PP30011HaMqqVaO7M96DDz44snWtXbt2ZOsCWLdu3cjWVVUjW9dCcEYtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRa2IlOT7JxiRbkmxOcmHXmaS5jOU4aukwsRe4qKruTbIMuCfJhqra0nUwaZAzak2sqtpRVff2H+8BtgKjO3tEGhGLWgKSnAicAtw96/XzkmxKsml6erqTbJJFrYmXZCmwHnh/VT0++F5VXVlVa6pqzdTUVDcBNfEsak20JEvolfT1VfWFrvNIc7GoNbGSBLga2FpVH+86j7Q/FrUm2WnAu4Ezk9zX/zq761DSbB6ep4lVVXcA6TqHNJ95Z9RJrkmyM8l3FyKQJOm5htn08WngrDHnkCTtx7xFXVW3A48uQBZJ0hzcRi0d5kZ527tR3q5u1LeYG+Vt4ZYvXz6ydS2EkR31MXgG1969e0e1WkmaeCMr6sEzuBYvdqIuSaPicdSS1LhhDs/7HPB14OQk25O8d/yxJEkz5t1GUVXnLkQQSdLc3PQhSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNs6glqXEWtSQ1zqKWpMZZ1JLUuLFc4X/x4sWsXLlyHKs+JEm6jjCnc845p+sI+/XBD36w6wj7uO2227qO0JRR/q6N8ndk1ON6lGPx8ssvH9m6FoIzaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1oTLcmiJN9KckvXWaT9sag16S4EtnYdQjoQi1oTK8lq4C3AVV1nkQ7EotYk+wRwMfDM/j6Q5Lwkm5Jsmp6eXrBg0iCLWhMpyVuBnVV1z4E+V1VXVtWaqlozNTW1QOmk57KoNalOA96W5GHgRuDMJNd1G0mam0WtiVRVH66q1VV1IvAO4KtV9a6OY0lzsqglqXFjucypdDipqq8BX+s4hrRf886okxyfZGOSLUk2J7lwIYJJknqGmVHvBS6qqnuTLAPuSbKhqraMOZskiSFm1FW1o6ru7T/eQ+8srlXjDiZJ6jmonYlJTgROAe4eSxpJ0j6G3pmYZCmwHnh/VT0+x/vnAecBHHnkkSMLKOnALr300pGt68477xzZupYuXTqydQEsX758pOs7nAw1o06yhF5JX19VX5jrM4NncB1xxBGjzChJE22Yoz4CXA1sraqPjz+SJGnQMDPq04B30zvF9r7+19ljziVJ6pt3G3VV3QFkAbJIkubgKeSS1DiLWpIaZ1FLUuMsaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNs6glqXFD34rrYJxwwgkjvT3QqIzyNkOjNOpbFo1Si7c/WrRoUdcRmnLDDTd0HWFOL3jBaOeB69atG+n6DifOqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRa6IlOTrJTUnuT7I1yeu7ziTNNpbD86TDyD8Bt1XV7yY5Ajiq60DSbBa1JlaS5cDpwB8DVNVTwFNdZpLm4qYPTbKTgF3AtUm+leSqJFODH0hyXpJNSTZNT093k1ITz6LWJFsMvAa4vKpOAaaBDw1+oKqurKo1VbVmampqrnVIY2dRa5JtB7ZX1d395zfRK26pKRa1JlZV/RDYluTk/ktvBLZ0GEmakzsTNekuAK7vH/HxEPCejvNI+7CoNdGq6j5gTdc5pAOZd9NHkhcl+UaSbyfZnGTtQgSTJPUMM6N+Ejizqp5IsgS4I8mXq+quMWeTJDFEUVdVAU/0ny7pf9U4Q0mSnjXUUR9JFiW5D9gJbBg4nEmSNGZDFXVVPV1VrwZWA6cmedXszwyewfXoo4+OOKYkTa6DOo66qnYDG4Gz5njv52dwHXPMMSOKJ0ka5qiPY5Mc3X98JPAm4P4x55Ik9Q1z1MdxwGeSLKJX7J+vqlvGG0uSNGOYoz6+A5yyAFkkSXPwWh+S1DiLWpIaZ1FLUuMsaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNs6glqXHp3bt2xCtNdgGPjGh1K4EfjWhdo2SugzPKXC+tqmNHtK6hHcS4noT/BqNkrp79juuxFPUoJdlUVWu6zjGbuQ5Oq7nGodWf1VwHp6VcbvqQpMZZ1JLUuMOhqK/sOsB+mOvgtJprHFr9Wc11cJrJ1fw2akmadIfDjFqSJlqzRZ3krCTfS/JAkg91nWdGkmuS7Ezy3a6zzEhyfJKNSbYk2Zzkwq4zASR5UZJvJPl2P9farjONU4tjttWxMSPJoiTfSnJL11lmJDk6yU1J7k+yNcnrO8/U4qaPJIuA/wXeBGwHvgmcW1VbOg0GJDkdeAL4bFW9qus8AEmOA46rqnuTLAPuAX6763+vJAGmquqJJEuAO4ALq+quLnONQ6tjttWxMSPJB4A1wIur6q1d5wFI8hngv6vqqiRHAEdV1e4uM7U6oz4VeKCqHqqqp4Abgbd3nAmAqrodeLTrHIOqakdV3dt/vAfYCqzqNhVUzxP9p0v6X+3NDEajyTHb6tgASLIaeAtwVddZZiRZDpwOXA1QVU91XdLQblGvArYNPN9OI4OrdUlOBE4B7u44CvDzP23vA3YCG6qqiVxj0PyYbW1sAJ8ALgae6TjHoJOAXcC1/U0yVyWZ6jpUq0WtX0CSpcB64P1V9XjXeQCq6umqejWwGjg1SRObiyZNa2MjyVuBnVV1T9dZZlkMvAa4vKpOAaaBzvc3tFrU3weOH3i+uv+a9qO/DXg9cH1VfaHrPLP1/3zcCJzVcZRxaXbMNjo2TgPeluRhepuJzkxyXbeRgN5fQtsH/vK7iV5xd6rVov4m8PIkJ/U35r8D+GLHmZrV32l3NbC1qj7edZ4ZSY5NcnT/8ZH0drTd32mo8WlyzLY6Nqrqw1W1uqpOpPdv9dWqelfHsaiqHwLbkpzcf+mNQOc7Xpss6qraC5wPfIXezo/PV9XmblP1JPkc8HXg5CTbk7y360z0Zifvpjcrua//dXbXoYDjgI1JvkOvyDZUVTOHYY1Sw2O21bHRsguA6/vj9tXAP3Qbp9HD8yRJz2pyRi1JepZFLUmNs6glqXEWtSQ1zqKWpMZZ1JLUOItakhpnUUtS4/4fNlqhXlEo2SUAAAAASUVORK5CYII=\n",
103 |       "text/plain": [
104 |        "<Figure size 432x288 with 2 Axes>"
105 |       ]
106 |      },
107 |      "metadata": {
108 |       "needs_background": "light"
109 |      },
110 |      "output_type": "display_data"
111 |     }
112 |    ],
113 |    "source": [
114 |     "X = np.random.randn(3, 4, 4, 3)\n",
115 |     "X_pad = zero_pads(X, 2)\n",
116 |     "\n",
117 |     "print('X shape', X.shape)\n",
118 |     "print('X_pad shape', X_pad.shape)\n",
119 |     "\n",
120 |     "plt.subplot(1, 2, 1)\n",
121 |     "plt.imshow(X[0, :, :, 1], cmap='gray')\n",
122 |     "plt.title('origin')\n",
123 |     "\n",
124 |     "plt.subplot(1, 2, 2)\n",
125 |     "plt.imshow(X_pad[0, :, :, 1], cmap='gray')\n",
126 |     "plt.title('paded')"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "## One Step of Convolutional Layer\n",
134 |     "---\n",
135 |     "<img src=\"images/Convolution_schematic.gif\" style=\"width:500px;height:300px;\">\n",
136 |     "\n",
137 |     "Consider a filter mapped to one piece of the image, with \n",
138 |     "\n",
139 |     "$$ \\text{filter size:} \\quad (f, f, n_C) $$\n",
140 |     "$$ \\text{piece of image} \\quad (f, f, n_C) $$\n",
141 |     "\n",
142 |     "Where filter has the deepth of the piece of input image.\n",
143 |     "\n",
144 |     "Another way to look at this is you can think of the filter as the weights $W$, and for each piece of the image, it serve as an input $X$, so in the convolutional process, the formula equals:\n",
145 |     "\n",
146 |     "$$ Z = sum(W*X) + b $$\n",
147 |     "$$ A = g(Z) $$\n",
148 |     "\n",
149 |     "Where $b$ is the bias and $g$ is the activation function. Doesn't it look very similar to the equations in the dense neural network?"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 4,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "def sigmoid(x):\n",
159 |     "    return 1/(1 + np.exp(-x))\n",
160 |     "\n",
161 |     "\n",
162 |     "def one_step_conv(X, W, b):\n",
163 |     "    \"\"\"\n",
164 |     "    X is the input, and W is the filter, both have the size (f, f, n_C)\n",
165 |     "    b is the bias for this specific filter (note that different filters don't share the same bias)\n",
166 |     "    Here suppose that we all take sigmoid as the activation function\n",
167 |     "    \"\"\"\n",
168 |     "    assert X.shape == W.shape\n",
169 |     "    Z = np.sum(np.multiply(W, X)) + b\n",
170 |     "    A = sigmoid(Z)\n",
171 |     "    return A"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 5,
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "1.7501778145874707e-06\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "X = np.random.randn(10, 10, 3)\n",
189 |     "W = np.random.randn(10, 10, 3)\n",
190 |     "b = 0\n",
191 |     "\n",
192 |     "A = one_step_conv(X, W, b)\n",
193 |     "print(A)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "## Convolution\n",
201 |     "---\n",
202 |     "<center>\n",
203 |     "<video width=\"620\" height=\"440\" src=\"images/conv_kiank.mp4\" type=\"video/mp4\" controls>\n",
204 |     "</video>\n",
205 |     "</center>\n",
206 |     "\n",
207 |     "Now the input (here we use `A_prev` ) would be a batch of whole images with size \n",
208 |     "\n",
209 |     "$$ (m, n_{W_{prev}}, n_{H_{prev}}, n_{C_{prev}}) $$\n",
210 |     "\n",
211 |     "Filter with size\n",
212 |     "\n",
213 |     "$$ (n_{C}, f, f, n_{C_{prev}}) $$\n",
214 |     "\n",
215 |     "Where $n_{C}$ is the number of filters, which would become the depth of the output image.\n",
216 |     "\n",
217 |     "Bias with size\n",
218 |     "\n",
219 |     "$$ (n_{C}, 1) $$\n",
220 |     "\n",
221 |     "And parameters include:\n",
222 |     "$$ \\text{padding of each image:} \\enspace pad $$\n",
223 |     "\n",
224 |     "$$ \\text{moving step:} \\enspace stride $$\n",
225 |     "\n",
226 |     "So the resulting output would have size:\n",
227 |     "\n",
228 |     "$$ (m, \\lfloor\\frac{n_{W_{prev}} + 2p - f}{stride}\\rfloor  + 1, \\lfloor\\frac{n_{H_{prev}} + 2p - f}{stride}\\rfloor  + 1, n_C)$$\n",
229 |     "\n",
230 |     "Now given a image from the input, we will need to slice it into pieces and multiply with the filter one by one. \n",
231 |     "\n",
232 |     "Consider a 2D image with size $(n_{W_{prev}}, n_{H_{prev}})$, and stride is $s$, filter size of $f$, then the top-left corner of the output image would have mapping:\n",
233 |     "```python\n",
234 |     "input[0:(0 + f), 0:(0 + f)] -> output[0, 0]\n",
235 |     "```\n",
236 |     "\n",
237 |     "And\n",
238 |     "```python\n",
239 |     "input[s:(s + f), 0:(0 + f)] -> output[1, 0]\n",
240 |     "```\n",
241 |     "\n",
242 |     "The pattern would be:\n",
243 |     "```python\n",
244 |     "input[i*s:(i*s + f), j*s:(j*s + f)] -> output[i, j]\n",
245 |     "```\n",
246 |     "\n",
247 |     "We will make use of this pattern in our implementation of slice the origin image and map to the output."
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 6,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "def conv(A_prev, filters, bias, parameters):\n",
257 |     "    \"\"\"\n",
258 |     "    A_prev: the input of batched images with shape: (m, n_W_prev, n_H_prev, n_C_prev)\n",
259 |     "    filters has shape: (n_C, f, f, n_C_prev)\n",
260 |     "    \"\"\"\n",
261 |     "    \n",
262 |     "    m, n_W_prev, n_H_prev, n_C_prev = A_prev.shape\n",
263 |     "    pad, stride = parameters['pad'], parameters['stride']\n",
264 |     "    n_C, f, f, _ = filters.shape\n",
265 |     "    \n",
266 |     "    n_W = (n_W_prev + 2*pad - f) // stride + 1\n",
267 |     "    n_H = (n_H_prev + 2*pad - f) // stride + 1\n",
268 |     "    \n",
269 |     "    output = np.zeros((m, n_W, n_H, n_C))\n",
270 |     "    padded_A_prev = zero_pads(A_prev, pad)\n",
271 |     "    \n",
272 |     "    for i in range(m):\n",
273 |     "        # take out the image\n",
274 |     "        padded_img = padded_A_prev[i]\n",
275 |     "        for c in range(n_C):\n",
276 |     "            # take out filters and bias for the channel\n",
277 |     "            fil = filters[c]\n",
278 |     "            b = bias[c]\n",
279 |     "            for w in range(n_W):\n",
280 |     "                for h in range(n_H):\n",
281 |     "                    w_range = (stride*w, stride*w + f)\n",
282 |     "                    h_range = (stride*h, stride*h + f)\n",
283 |     "                    img_slice = padded_img[w_range[0]:w_range[1], h_range[0]:h_range[1], :]\n",
284 |     "                    output[i, w, h, c] = one_step_conv(img_slice, fil, b)\n",
285 |     "    \n",
286 |     "    return output"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 7,
292 |    "metadata": {
293 |     "scrolled": true
294 |    },
295 |    "outputs": [
296 |     {
297 |      "name": "stdout",
298 |      "output_type": "stream",
299 |      "text": [
300 |       "(4, 15, 15, 10)\n"
301 |      ]
302 |     }
303 |    ],
304 |    "source": [
305 |     "A_prev = np.random.randn(4, 28, 28, 3)\n",
306 |     "filters = np.random.randn(10, 3, 3, 3)  # filter size (3, 3, 3)\n",
307 |     "bias = np.zeros(10)\n",
308 |     "parameters = {'pad': 2, 'stride': 2}\n",
309 |     "\n",
310 |     "Z = conv(A_prev, filters, bias, parameters)\n",
311 |     "print(Z.shape)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "# Pooling\n",
319 |     "---\n",
320 |     "After convolutional layer, it typically follows a pooling layer. The pooling (POOL) layer reduces the height and width of the input. It helps reduce computation, as well as helps make feature detectors more invariant to its position in the input. The two types of pooling layers are: \n",
321 |     "\n",
322 |     "- Max-pooling layer: slides an ($f, f$) window over the input and stores the max value of the window in the output.\n",
323 |     "\n",
324 |     "- Average-pooling layer: slides an ($f, f$) window over the input and stores the average value of the window in the output.\n",
325 |     "\n",
326 |     "<table>\n",
327 |     "<td>\n",
328 |     "<img src=\"images/max_pool1.png\" style=\"width:500px;height:300px;\">\n",
329 |     "<td>\n",
330 |     "\n",
331 |     "<td>\n",
332 |     "<img src=\"images/a_pool.png\" style=\"width:500px;height:300px;\">\n",
333 |     "<td>\n",
334 |     "</table>\n",
335 |     "\n",
336 |     "<caption><center>from Deep Learning Specilization Course</center></caption>\n",
337 |     "\n",
338 |     "The process is pretty much the same as the convolutional layer, with a filter and a stride, at each step, we will take a slice of the whole image and compute one value -- either max or average -- from it.\n",
339 |     "\n",
340 |     "Given filter size $f$, stride $s$ and input size:\n",
341 |     "$$ (m, n_{W_{prev}}, n_{H_{prev}}, n_{C_{prev}}) $$\n",
342 |     "\n",
343 |     "Output would have size:\n",
344 |     "\n",
345 |     "$$ (m, \\lfloor\\frac{n_{W_{prev}} - f}{stride}\\rfloor  + 1, \\lfloor\\frac{n_{H_{prev}} - f}{stride}\\rfloor  + 1, n_C)$$\n",
346 |     "$$ n_C = n_{C_{prev}}$$\n",
347 |     "\n",
348 |     "Note that pooling does not change the depth of an image."
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": 8,
354 |    "metadata": {},
355 |    "outputs": [],
356 |    "source": [
357 |     "def pooling(A_prev, parameters, mode='max'):\n",
358 |     "    \"\"\"\n",
359 |     "    A_prev: the input of batched images with shape: (m, n_W_prev, n_H_prev, n_C_prev)\n",
360 |     "    \"\"\"\n",
361 |     "    m, n_W_prev, n_H_prev, n_C_prev = A_prev.shape\n",
362 |     "    f, stride = parameters['f'], parameters['stride']\n",
363 |     "    \n",
364 |     "    n_W = (n_W_prev - f)//stride + 1\n",
365 |     "    n_H = (n_H_prev - f)//stride + 1\n",
366 |     "    n_C = n_C_prev\n",
367 |     "    \n",
368 |     "    output = np.zeros((m, n_W, n_H, n_C))\n",
369 |     "    for i in range(m):\n",
370 |     "        img = A_prev[i]\n",
371 |     "        for w in range(n_W):\n",
372 |     "            for h in range(n_H):\n",
373 |     "                for c in range(n_C):\n",
374 |     "                    w_range = (stride*w, stride*w + f)\n",
375 |     "                    h_range = (stride*h, stride*h + f)\n",
376 |     "                    img_slice = img[w_range[0]:w_range[1], h_range[0]:h_range[1], c]\n",
377 |     "                    if mode == 'max':\n",
378 |     "                        output[i, w, h, c] = np.max(img_slice)\n",
379 |     "                    elif mode == 'average':\n",
380 |     "                        output[i, w, h, c] = np.mean(img_slice)\n",
381 |     "    return output"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": 9,
387 |    "metadata": {},
388 |    "outputs": [
389 |     {
390 |      "name": "stdout",
391 |      "output_type": "stream",
392 |      "text": [
393 |       "(4, 14, 14, 3)\n"
394 |      ]
395 |     }
396 |    ],
397 |    "source": [
398 |     "A_prev = np.random.randn(4, 28, 28, 3)\n",
399 |     "parameters = {'f': 2, 'stride': 2}\n",
400 |     "\n",
401 |     "A = pooling(A_prev, parameters, mode='max')\n",
402 |     "print(A.shape)"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "code",
407 |    "execution_count": null,
408 |    "metadata": {},
409 |    "outputs": [],
410 |    "source": []
411 |   }
412 |  ],
413 |  "metadata": {
414 |   "kernelspec": {
415 |    "display_name": "Python 3",
416 |    "language": "python",
417 |    "name": "python3"
418 |   },
419 |   "language_info": {
420 |    "codemirror_mode": {
421 |     "name": "ipython",
422 |     "version": 3
423 |    },
424 |    "file_extension": ".py",
425 |    "mimetype": "text/x-python",
426 |    "name": "python",
427 |    "nbconvert_exporter": "python",
428 |    "pygments_lexer": "ipython3",
429 |    "version": "3.8.3"
430 |   }
431 |  },
432 |  "nbformat": 4,
433 |  "nbformat_minor": 4
434 | }
435 | 


--------------------------------------------------------------------------------
/convolution/images/Convolution_schematic.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/Convolution_schematic.gif


--------------------------------------------------------------------------------
/convolution/images/PAD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/PAD.png


--------------------------------------------------------------------------------
/convolution/images/SIGNS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/SIGNS.png


--------------------------------------------------------------------------------
/convolution/images/a_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/a_pool.png


--------------------------------------------------------------------------------
/convolution/images/ave-pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/ave-pool.png


--------------------------------------------------------------------------------
/convolution/images/ave_pool1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/ave_pool1.png


--------------------------------------------------------------------------------
/convolution/images/average_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/average_pool.png


--------------------------------------------------------------------------------
/convolution/images/conv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv.png


--------------------------------------------------------------------------------
/convolution/images/conv1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv1.png


--------------------------------------------------------------------------------
/convolution/images/conv_kiank.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv_kiank.mp4


--------------------------------------------------------------------------------
/convolution/images/conv_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv_nn.png


--------------------------------------------------------------------------------
/convolution/images/images/convblock_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/convblock_kiank.png


--------------------------------------------------------------------------------
/convolution/images/images/idblock2_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/idblock2_kiank.png


--------------------------------------------------------------------------------
/convolution/images/images/idblock3_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/idblock3_kiank.png


--------------------------------------------------------------------------------
/convolution/images/images/my_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/my_image.jpg


--------------------------------------------------------------------------------
/convolution/images/images/resnet_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/resnet_kiank.png


--------------------------------------------------------------------------------
/convolution/images/images/signs_data_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/signs_data_kiank.png


--------------------------------------------------------------------------------
/convolution/images/images/skip_connection_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/skip_connection_kiank.png


--------------------------------------------------------------------------------
/convolution/images/images/vanishing_grad_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/vanishing_grad_kiank.png


--------------------------------------------------------------------------------
/convolution/images/max_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/max_pool.png


--------------------------------------------------------------------------------
/convolution/images/max_pool1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/max_pool1.png


--------------------------------------------------------------------------------
/convolution/images/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/model.png


--------------------------------------------------------------------------------
/convolution/images/thumbs_up.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/thumbs_up.jpg


--------------------------------------------------------------------------------
/convolution/images/vert_horiz_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/vert_horiz_kiank.png


--------------------------------------------------------------------------------
/deep-neural-network/Deep NN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Build a Multi-Layer Neural Network\n"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Weights Initialization\n",
 15 |     "---\n",
 16 |     "Firstly, weights need to be initialized for different layers. Note that in general, the input is not considered as a layer, but output is.\n",
 17 |     "\n",
 18 |     "For `lth` layer, \n",
 19 |     "- weight $W^{[l]}$ has shape $(n^{[l]}, n^{[l-1]})$\n",
 20 |     "- bias $b^{[l]}$ has shape $(n^{[l]}, 1)$\n",
 21 |     "\n",
 22 |     "where $n^{[0]}$ equals the number input feature."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import numpy as np"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 2,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "def weights_init(layers_dim):\n",
 41 |     "    params = {}\n",
 42 |     "    \n",
 43 |     "    n = len(layers_dim)\n",
 44 |     "    for i in range(1, n):\n",
 45 |     "        params['W' + str(i)] = np.random.randn(layers_dim[i], layers_dim[i-1])*0.01\n",
 46 |     "        params['b' + str(i)] = np.zeros((layers_dim[i], 1))\n",
 47 |     "    return params"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "{'W1': array([[-0.00197904, -0.01213369],\n",
 59 |        "        [ 0.00689108,  0.00453008],\n",
 60 |        "        [-0.00691454,  0.01541786],\n",
 61 |        "        [-0.01061402,  0.00787606],\n",
 62 |        "        [ 0.01147524,  0.00290551]]),\n",
 63 |        " 'b1': array([[0.],\n",
 64 |        "        [0.],\n",
 65 |        "        [0.],\n",
 66 |        "        [0.],\n",
 67 |        "        [0.]]),\n",
 68 |        " 'W2': array([[-0.00037371, -0.0026616 ,  0.00046249,  0.00950304,  0.01676771]]),\n",
 69 |        " 'b2': array([[0.]])}"
 70 |       ]
 71 |      },
 72 |      "execution_count": 3,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "p = weights_init([2, 5, 1])\n",
 79 |     "p"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "# Forward\n",
 87 |     "---\n",
 88 |     "## Equations of Multi-layer\n",
 89 |     "---\n",
 90 |     "$$ Z^{[l]} = W^{[l]}A^{[l-1]} + b^{[l]} $$\n",
 91 |     "\n",
 92 |     "$$ A^{[l]} = g^{[l]}(Z^{[l]}) $$\n",
 93 |     "\n",
 94 |     "Where $l$ is the `lth` layer."
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 4,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def sigmoid(x):\n",
104 |     "    return 1/(1 + np.exp(-x))\n",
105 |     "\n",
106 |     "\n",
107 |     "def relu(x):\n",
108 |     "    return np.maximum(x, 0)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 5,
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "[0.23147522 0.11920292 0.78583498] [0.  0.  1.3]\n"
121 |      ]
122 |     }
123 |    ],
124 |    "source": [
125 |     "x = np.array([-1.2, -2.0, 1.3])\n",
126 |     "\n",
127 |     "sx = sigmoid(x)\n",
128 |     "rx = relu(x)\n",
129 |     "\n",
130 |     "print(sx, rx)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 6,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "def forward(X, params):\n",
140 |     "    # intermediate layer use relu as activation\n",
141 |     "    # last layer use sigmoid\n",
142 |     "    n_layers = int(len(params)/2)\n",
143 |     "    A = X\n",
144 |     "    cache = {}\n",
145 |     "    for i in range(1, n_layers):\n",
146 |     "        W, b = params['W'+str(i)], params['b'+str(i)]\n",
147 |     "        Z = np.dot(W, A) + b\n",
148 |     "        A = relu(Z)\n",
149 |     "        cache['Z'+str(i)] = Z\n",
150 |     "        cache['A'+str(i)] = A\n",
151 |     "    \n",
152 |     "    # last layer\n",
153 |     "    W, b = params['W'+str(i+1)], params['b'+str(i+1)]\n",
154 |     "    Z = np.dot(W, A) + b\n",
155 |     "    A = sigmoid(Z)\n",
156 |     "    cache['Z'+str(i+1)] = Z\n",
157 |     "    cache['A'+str(i+1)] = A\n",
158 |     "    \n",
159 |     "    return cache, A"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 7,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "X = np.array([1., 1.]).reshape(2, 1)\n",
169 |     "cache, A = forward(X, p)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 8,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "text/plain": [
180 |        "{'Z1': array([[-0.01411272],\n",
181 |        "        [ 0.01142116],\n",
182 |        "        [ 0.00850332],\n",
183 |        "        [-0.00273796],\n",
184 |        "        [ 0.01438075]]),\n",
185 |        " 'A1': array([[0.        ],\n",
186 |        "        [0.01142116],\n",
187 |        "        [0.00850332],\n",
188 |        "        [0.        ],\n",
189 |        "        [0.01438075]]),\n",
190 |        " 'Z2': array([[0.00021467]]),\n",
191 |        " 'A2': array([[0.50005367]])}"
192 |       ]
193 |      },
194 |      "execution_count": 8,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "cache"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 9,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "array([[0.50005367]])"
212 |       ]
213 |      },
214 |      "execution_count": 9,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "A"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "# Cost Function\n",
228 |     "---\n",
229 |     "Still we consider this a binary classification, the cost of a batch would be:\n",
230 |     "\n",
231 |     "$$-\\frac{1}{m} \\sum\\limits_{i = 1}^{m} (y^{(i)}\\log\\left(a^{[L] (i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right)) $$\n",
232 |     "\n",
233 |     "Where $a$ is the predicted value, and $y$ is the actual one."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 10,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "def compute_cost(A, Y):\n",
243 |     "    \"\"\"\n",
244 |     "    For binary classification, both A and Y would have shape (1, m), where m is the batch size\n",
245 |     "    \"\"\"\n",
246 |     "    assert A.shape == Y.shape\n",
247 |     "    m = A.shape[1]\n",
248 |     "    s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
249 |     "    loss = -s/m\n",
250 |     "    return np.squeeze(loss)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 11,
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "name": "stdout",
260 |      "output_type": "stream",
261 |      "text": [
262 |       "0.23101772979827936\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "A = np.array([[0.9, 0.3]])\n",
268 |     "Y = np.array([[1, 0]])\n",
269 |     "\n",
270 |     "loss = compute_cost(A, Y)\n",
271 |     "print(loss)"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "# Backward Propagation\n",
279 |     "---\n",
280 |     "<img src='images/backprop_kiank.png' style=\"width:800px;height:250px;\">\n",
281 |     "<caption><center> **[source]**: https://github.com/enggen/Deep-Learning-Coursera </center></caption>\n",
282 |     "\n",
283 |     "The backward gradient can be calculated in recurrent fashion:\n",
284 |     "\n",
285 |     "$$ dZ^{[l]} = dA^{[l]} * g^{[l]'}(Z^{[l]}) $$\n",
286 |     "\n",
287 |     "$$ dW^{[l]} = \\frac{\\partial \\mathcal{L} }{\\partial W^{[l]}} = \\frac{1}{m} dZ^{[l]} A^{[l-1] T} $$\n",
288 |     "$$ db^{[l]} = \\frac{\\partial \\mathcal{L} }{\\partial b^{[l]}} = \\frac{1}{m} \\sum_{i = 1}^{m} dZ^{[l](i)}$$\n",
289 |     "$$ dA^{[l-1]} = \\frac{\\partial \\mathcal{L} }{\\partial A^{[l-1]}} = W^{[l] T} dZ^{[l]} $$\n"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "First, implementation of derivative of `sigmoid` and `relu` is required."
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 12,
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "def sigmoid_grad(A, Z):\n",
306 |     "    grad = np.multiply(A, 1-A)\n",
307 |     "    return grad\n",
308 |     "\n",
309 |     "\n",
310 |     "def relu_grad(A, Z):\n",
311 |     "    grad = np.zeros(Z.shape)\n",
312 |     "    grad[Z>0] = 1\n",
313 |     "    return grad"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 13,
319 |    "metadata": {},
320 |    "outputs": [
321 |     {
322 |      "name": "stdout",
323 |      "output_type": "stream",
324 |      "text": [
325 |       "[[0.87901144 0.29881771]\n",
326 |       " [1.68253349 2.73842285]\n",
327 |       " [1.97877652 0.12396486]] \n",
328 |       "\n",
329 |       "[[0.70661733 0.57415347]\n",
330 |       " [0.84323972 0.93925618]\n",
331 |       " [0.87855068 0.53095159]] \n",
332 |       "\n",
333 |       "[[0.20730928 0.24450126]\n",
334 |       " [0.1321865  0.05705401]\n",
335 |       " [0.10669938 0.249042  ]]\n"
336 |      ]
337 |     }
338 |    ],
339 |    "source": [
340 |     "z = np.random.randn(3, 2)\n",
341 |     "a = sigmoid(z)\n",
342 |     "g = sigmoid_grad(a, z)\n",
343 |     "\n",
344 |     "print(z, '\\n')\n",
345 |     "print(a, '\\n')\n",
346 |     "print(g)"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 14,
352 |    "metadata": {},
353 |    "outputs": [
354 |     {
355 |      "name": "stdout",
356 |      "output_type": "stream",
357 |      "text": [
358 |       "[[ 0.63378157  1.22440607]\n",
359 |       " [-0.94572088  0.34021284]\n",
360 |       " [-0.14649618  0.22595908]] \n",
361 |       "\n",
362 |       "[[0.63378157 1.22440607]\n",
363 |       " [0.         0.34021284]\n",
364 |       " [0.         0.22595908]] \n",
365 |       "\n",
366 |       "[[1. 1.]\n",
367 |       " [0. 1.]\n",
368 |       " [0. 1.]]\n"
369 |      ]
370 |     }
371 |    ],
372 |    "source": [
373 |     "z = np.random.randn(3, 2)\n",
374 |     "a = relu(z)\n",
375 |     "g = relu_grad(a, z)\n",
376 |     "\n",
377 |     "print(z, '\\n')\n",
378 |     "print(a, '\\n')\n",
379 |     "print(g)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "Following the equations above, we have our implementation of backward propagation. Note that except the last layer where `sigmoid` function is used, the rest we all apply `relu` derivative to get the gradients."
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": 15,
392 |    "metadata": {},
393 |    "outputs": [],
394 |    "source": [
395 |     "def backward(params, cache, X, Y):\n",
396 |     "    \"\"\"\n",
397 |     "    params: weight [W, b]\n",
398 |     "    cache: result [A, Z]\n",
399 |     "    Y: shape (1, m)\n",
400 |     "    \"\"\"\n",
401 |     "    grad = {}\n",
402 |     "    n_layers = int(len(params)/2)\n",
403 |     "    m = Y.shape[1]\n",
404 |     "    cache['A0'] = X\n",
405 |     "    \n",
406 |     "    for l in range(n_layers, 0, -1):\n",
407 |     "        A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
408 |     "        W = params['W'+str(l)]\n",
409 |     "        if l == n_layers:\n",
410 |     "            dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
411 |     "        \n",
412 |     "        if l == n_layers:\n",
413 |     "            dZ = np.multiply(dA, sigmoid_grad(A, Z))\n",
414 |     "        else:\n",
415 |     "            dZ = np.multiply(dA, relu_grad(A, Z))\n",
416 |     "        dW = np.dot(dZ, A_prev.T)/m\n",
417 |     "        db = np.sum(dZ, axis=1, keepdims=True)/m\n",
418 |     "        dA = np.dot(W.T, dZ)\n",
419 |     "\n",
420 |     "        grad['dW'+str(l)] = dW\n",
421 |     "        grad['db'+str(l)] = db\n",
422 |     "    \n",
423 |     "    return grad"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": 16,
429 |    "metadata": {},
430 |    "outputs": [
431 |     {
432 |      "data": {
433 |       "text/plain": [
434 |        "{'dW2': array([[ 0.        , -0.00570997, -0.0042512 ,  0.        , -0.00718961]]),\n",
435 |        " 'db2': array([[-0.49994633]]),\n",
436 |        " 'dW1': array([[ 0.        ,  0.        ],\n",
437 |        "        [ 0.00133066,  0.00133066],\n",
438 |        "        [-0.00023122, -0.00023122],\n",
439 |        "        [ 0.        ,  0.        ],\n",
440 |        "        [-0.00838296, -0.00838296]]),\n",
441 |        " 'db1': array([[ 0.        ],\n",
442 |        "        [ 0.00133066],\n",
443 |        "        [-0.00023122],\n",
444 |        "        [ 0.        ],\n",
445 |        "        [-0.00838296]])}"
446 |       ]
447 |      },
448 |      "execution_count": 16,
449 |      "metadata": {},
450 |      "output_type": "execute_result"
451 |     }
452 |    ],
453 |    "source": [
454 |     "g = backward(p, cache, np.array([[1], [1]]), np.array([[1]]))\n",
455 |     "g"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "markdown",
460 |    "metadata": {},
461 |    "source": [
462 |     "Now given the gradients, we have our weights updated as following:\n",
463 |     "    \n",
464 |     "$$ W^{[l]} -= dW^{[l]} $$\n",
465 |     "$$ b^{[l]} -= db^{[l]} $$"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": 17,
471 |    "metadata": {},
472 |    "outputs": [],
473 |    "source": [
474 |     "def optimize(params, grads, lr):\n",
475 |     "    n_layers = int(len(params)/2)\n",
476 |     "    for i in range(1, n_layers+1):\n",
477 |     "        dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
478 |     "        params['W'+str(i)] -= lr*dW\n",
479 |     "        params['b'+str(i)] -= lr*db\n",
480 |     "    return params"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": 18,
486 |    "metadata": {},
487 |    "outputs": [
488 |     {
489 |      "data": {
490 |       "text/plain": [
491 |        "{'W1': array([[-0.00197904, -0.01213369],\n",
492 |        "        [ 0.00556042,  0.00319942],\n",
493 |        "        [-0.00668332,  0.01564908],\n",
494 |        "        [-0.01061402,  0.00787606],\n",
495 |        "        [ 0.0198582 ,  0.01128847]]),\n",
496 |        " 'b1': array([[ 0.        ],\n",
497 |        "        [-0.00133066],\n",
498 |        "        [ 0.00023122],\n",
499 |        "        [ 0.        ],\n",
500 |        "        [ 0.00838296]]),\n",
501 |        " 'W2': array([[-0.00037371,  0.00304836,  0.00471369,  0.00950304,  0.02395732]]),\n",
502 |        " 'b2': array([[0.49994633]])}"
503 |       ]
504 |      },
505 |      "execution_count": 18,
506 |      "metadata": {},
507 |      "output_type": "execute_result"
508 |     }
509 |    ],
510 |    "source": [
511 |     "s = optimize(p, g, 1)\n",
512 |     "s"
513 |    ]
514 |   },
515 |   {
516 |    "cell_type": "markdown",
517 |    "metadata": {},
518 |    "source": [
519 |     "# Apply on Dataset\n",
520 |     "---\n",
521 |     "Let's have our model apply on created dataset with 200 features."
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": 19,
527 |    "metadata": {},
528 |    "outputs": [
529 |     {
530 |      "name": "stdout",
531 |      "output_type": "stream",
532 |      "text": [
533 |       "train shape (8000, 200)\n",
534 |       "test shape (2000, 200)\n"
535 |      ]
536 |     }
537 |    ],
538 |    "source": [
539 |     "from sklearn import datasets\n",
540 |     "\n",
541 |     "\n",
542 |     "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
543 |     "\n",
544 |     "X_train, X_test = X[:8000], X[8000:]\n",
545 |     "y_train, y_test = y[:8000], y[8000:]\n",
546 |     "\n",
547 |     "print('train shape', X_train.shape)\n",
548 |     "print('test shape', X_test.shape)"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": 21,
554 |    "metadata": {},
555 |    "outputs": [],
556 |    "source": [
557 |     "def generate_batch(X, batch_size):\n",
558 |     "    n = X.shape[0]\n",
559 |     "    batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
560 |     "    return batches\n",
561 |     "\n",
562 |     "\n",
563 |     "def accuracy(Y, Y_pred):\n",
564 |     "    \"\"\"\n",
565 |     "    Y: vector of true value\n",
566 |     "    Y_pred: vector of predicted value\n",
567 |     "    \"\"\"\n",
568 |     "    \n",
569 |     "    assert Y.shape[0] == 1\n",
570 |     "    assert Y.shape == Y_pred.shape\n",
571 |     "    Y_pred = np.round(Y_pred)\n",
572 |     "    acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
573 |     "    return acc"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 21,
579 |    "metadata": {},
580 |    "outputs": [],
581 |    "source": [
582 |     "def train(X_train, y_train, layers: list, batch_size=200, n_iter=100, lr=0.1):\n",
583 |     "    # prepare batch training\n",
584 |     "    batches = generate_batch(X_train, batch_size)\n",
585 |     "    # init weights\n",
586 |     "    parameters = weights_init(layers)\n",
587 |     "    for i in range(n_iter):\n",
588 |     "        for batch in batches:\n",
589 |     "            X = X_train[batch, :].T\n",
590 |     "            Y = y_train[batch].reshape(1, -1)\n",
591 |     "            cache, A = forward(X, parameters)\n",
592 |     "            grads = backward(parameters, cache, X, Y)\n",
593 |     "            parameters = optimize(parameters, grads, lr)\n",
594 |     "            \n",
595 |     "        if i%10 == 0:\n",
596 |     "            loss = compute_cost(A, Y)\n",
597 |     "            print(f'iteration {i}: loss {loss}')\n",
598 |     "    return parameters"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "markdown",
603 |    "metadata": {},
604 |    "source": [
605 |     "Let's build a 3-layer neural network, with input of 200 features."
606 |    ]
607 |   },
608 |   {
609 |    "cell_type": "code",
610 |    "execution_count": 22,
611 |    "metadata": {},
612 |    "outputs": [
613 |     {
614 |      "name": "stdout",
615 |      "output_type": "stream",
616 |      "text": [
617 |       "iteration 0: loss 0.6930831830164655\n",
618 |       "iteration 10: loss 0.6930082822907631\n",
619 |       "iteration 20: loss 0.6929949487286129\n",
620 |       "iteration 30: loss 0.6929543341306021\n",
621 |       "iteration 40: loss 0.6927636568599188\n",
622 |       "iteration 50: loss 0.690073352055835\n",
623 |       "iteration 60: loss 0.2531001812337807\n",
624 |       "iteration 70: loss 0.127696331048521\n",
625 |       "iteration 80: loss 0.08193633942165292\n",
626 |       "iteration 90: loss 0.05580582920505571\n"
627 |      ]
628 |     }
629 |    ],
630 |    "source": [
631 |     "trained_param = train(X_train, y_train, layers=[200, 20, 10, 1], batch_size=200, n_iter=100, lr=0.05)"
632 |    ]
633 |   },
634 |   {
635 |    "cell_type": "code",
636 |    "execution_count": 23,
637 |    "metadata": {},
638 |    "outputs": [],
639 |    "source": [
640 |     "cache, pred = forward(X_test.T, trained_param)"
641 |    ]
642 |   },
643 |   {
644 |    "cell_type": "code",
645 |    "execution_count": 24,
646 |    "metadata": {},
647 |    "outputs": [
648 |     {
649 |      "name": "stdout",
650 |      "output_type": "stream",
651 |      "text": [
652 |       "accuracy: 94.39999999999999%\n"
653 |      ]
654 |     }
655 |    ],
656 |    "source": [
657 |     "acc = accuracy(y_test.reshape(1, -1), pred)\n",
658 |     "\n",
659 |     "print(f'accuracy: {acc*100}%')"
660 |    ]
661 |   },
662 |   {
663 |    "cell_type": "markdown",
664 |    "metadata": {},
665 |    "source": [
666 |     "# In a Class"
667 |    ]
668 |   },
669 |   {
670 |    "cell_type": "code",
671 |    "execution_count": 19,
672 |    "metadata": {},
673 |    "outputs": [],
674 |    "source": [
675 |     "class deepNN:\n",
676 |     "    def __init__(self, layers):\n",
677 |     "        self.layers = layers\n",
678 |     "        self.params = {}\n",
679 |     "       \n",
680 |     "    \n",
681 |     "    def weights_init(self):\n",
682 |     "        n = len(self.layers)\n",
683 |     "        for i in range(1, n):\n",
684 |     "            self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01\n",
685 |     "            self.params['b' + str(i)] = np.zeros((self.layers[i], 1))\n",
686 |     "    \n",
687 |     "    @staticmethod\n",
688 |     "    def sigmoid(x):\n",
689 |     "        return 1/(1 + np.exp(-x))\n",
690 |     "\n",
691 |     "    @staticmethod\n",
692 |     "    def relu(x):\n",
693 |     "        return np.maximum(x, 0)\n",
694 |     "    \n",
695 |     "    @staticmethod\n",
696 |     "    def compute_cost(A, Y):\n",
697 |     "        \"\"\"\n",
698 |     "        For binary classification, both A and Y would have shape (1, m), where m is the batch size\n",
699 |     "        \"\"\"\n",
700 |     "        assert A.shape == Y.shape\n",
701 |     "        m = A.shape[1]\n",
702 |     "        s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
703 |     "        loss = -s/m\n",
704 |     "        return np.squeeze(loss)\n",
705 |     "    \n",
706 |     "    @staticmethod\n",
707 |     "    def sigmoid_grad(A, Z):\n",
708 |     "        grad = np.multiply(A, 1-A)\n",
709 |     "        return grad\n",
710 |     "\n",
711 |     "    @staticmethod\n",
712 |     "    def relu_grad(A, Z):\n",
713 |     "        grad = np.zeros(Z.shape)\n",
714 |     "        grad[Z>0] = 1\n",
715 |     "        return grad\n",
716 |     "    \n",
717 |     "    \n",
718 |     "    def forward(self, X):\n",
719 |     "        # intermediate layer use relu as activation\n",
720 |     "        # last layer use sigmoid\n",
721 |     "        n_layers = int(len(self.params)/2)\n",
722 |     "        A = X\n",
723 |     "        cache = {}\n",
724 |     "        for i in range(1, n_layers):\n",
725 |     "            W, b = self.params['W'+str(i)], self.params['b'+str(i)]\n",
726 |     "            Z = np.dot(W, A) + b\n",
727 |     "            A = self.relu(Z)\n",
728 |     "            cache['Z'+str(i)] = Z\n",
729 |     "            cache['A'+str(i)] = A\n",
730 |     "\n",
731 |     "        # last layer\n",
732 |     "        W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]\n",
733 |     "        Z = np.dot(W, A) + b\n",
734 |     "        A = self.sigmoid(Z)\n",
735 |     "        cache['Z'+str(i+1)] = Z\n",
736 |     "        cache['A'+str(i+1)] = A\n",
737 |     "\n",
738 |     "        return cache, A\n",
739 |     "    \n",
740 |     "    def backward(self, cache, X, Y):\n",
741 |     "        \"\"\"\n",
742 |     "        cache: result [A, Z]\n",
743 |     "        Y: shape (1, m)\n",
744 |     "        \"\"\"\n",
745 |     "        grad = {}\n",
746 |     "        n_layers = int(len(self.params)/2)\n",
747 |     "        m = Y.shape[1]\n",
748 |     "        cache['A0'] = X\n",
749 |     "\n",
750 |     "        for l in range(n_layers, 0, -1):\n",
751 |     "            A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
752 |     "            W = self.params['W'+str(l)]\n",
753 |     "            if l == n_layers:\n",
754 |     "                dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
755 |     "\n",
756 |     "            if l == n_layers:\n",
757 |     "                dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
758 |     "            else:\n",
759 |     "                dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
760 |     "            dW = np.dot(dZ, A_prev.T)/m\n",
761 |     "            db = np.sum(dZ, axis=1, keepdims=True)/m\n",
762 |     "            dA = np.dot(W.T, dZ)\n",
763 |     "\n",
764 |     "            grad['dW'+str(l)] = dW\n",
765 |     "            grad['db'+str(l)] = db\n",
766 |     "\n",
767 |     "        return grad\n",
768 |     "    \n",
769 |     "    def optimize(self, grads, lr):\n",
770 |     "        n_layers = int(len(self.params)/2)\n",
771 |     "        for i in range(1, n_layers+1):\n",
772 |     "            dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
773 |     "            self.params['W'+str(i)] -= lr*dW\n",
774 |     "            self.params['b'+str(i)] -= lr*db\n",
775 |     "    \n",
776 |     "    @staticmethod\n",
777 |     "    def generate_batch(X, batch_size):\n",
778 |     "        n = X.shape[0]\n",
779 |     "        batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
780 |     "        return batches\n",
781 |     "    \n",
782 |     "    \n",
783 |     "    def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1):\n",
784 |     "        # prepare batch training\n",
785 |     "        batches = self.generate_batch(X_train, batch_size)\n",
786 |     "        # init weights\n",
787 |     "        self.weights_init()\n",
788 |     "        for i in range(n_iter):\n",
789 |     "            for batch in batches:\n",
790 |     "                X = X_train[batch, :].T\n",
791 |     "                Y = y_train[batch].reshape(1, -1)\n",
792 |     "                cache, A = self.forward(X)\n",
793 |     "                grads = self.backward(cache, X, Y)\n",
794 |     "                self.optimize(grads, lr)\n",
795 |     "\n",
796 |     "            if i%10 == 0:\n",
797 |     "                loss = self.compute_cost(A, Y)\n",
798 |     "                print(f'iteration {i}: loss {loss}')"
799 |    ]
800 |   },
801 |   {
802 |    "cell_type": "code",
803 |    "execution_count": 5,
804 |    "metadata": {},
805 |    "outputs": [
806 |     {
807 |      "name": "stdout",
808 |      "output_type": "stream",
809 |      "text": [
810 |       "train shape (8000, 200)\n",
811 |       "test shape (2000, 200)\n"
812 |      ]
813 |     }
814 |    ],
815 |    "source": [
816 |     "from sklearn import datasets\n",
817 |     "\n",
818 |     "\n",
819 |     "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
820 |     "\n",
821 |     "X_train, X_test = X[:8000], X[8000:]\n",
822 |     "y_train, y_test = y[:8000], y[8000:]\n",
823 |     "\n",
824 |     "print('train shape', X_train.shape)\n",
825 |     "print('test shape', X_test.shape)"
826 |    ]
827 |   },
828 |   {
829 |    "cell_type": "code",
830 |    "execution_count": 20,
831 |    "metadata": {},
832 |    "outputs": [
833 |     {
834 |      "name": "stdout",
835 |      "output_type": "stream",
836 |      "text": [
837 |       "iteration 0: loss 0.6930968966284916\n",
838 |       "iteration 10: loss 0.6930261983198653\n",
839 |       "iteration 20: loss 0.6930234151665605\n",
840 |       "iteration 30: loss 0.6930149122135475\n",
841 |       "iteration 40: loss 0.6929815230264361\n",
842 |       "iteration 50: loss 0.6927740045307099\n",
843 |       "iteration 60: loss 0.6880564419952588\n",
844 |       "iteration 70: loss 0.2200907541999881\n",
845 |       "iteration 80: loss 0.11582658029026635\n",
846 |       "iteration 90: loss 0.08402195069870581\n"
847 |      ]
848 |     }
849 |    ],
850 |    "source": [
851 |     "layers = [200, 20, 10, 1]\n",
852 |     "model = deepNN(layers)\n",
853 |     "\n",
854 |     "model.train(X_train, y_train, batch_size=200, n_iter=100, lr=0.05)"
855 |    ]
856 |   },
857 |   {
858 |    "cell_type": "code",
859 |    "execution_count": 23,
860 |    "metadata": {},
861 |    "outputs": [
862 |     {
863 |      "name": "stdout",
864 |      "output_type": "stream",
865 |      "text": [
866 |       "accuracy: 94.55%\n"
867 |      ]
868 |     }
869 |    ],
870 |    "source": [
871 |     "_, pred = model.forward(X_test.T)\n",
872 |     "acc = accuracy(y_test.reshape(1, -1), pred)\n",
873 |     "\n",
874 |     "print(f'accuracy: {acc*100}%')"
875 |    ]
876 |   },
877 |   {
878 |    "cell_type": "code",
879 |    "execution_count": null,
880 |    "metadata": {},
881 |    "outputs": [],
882 |    "source": []
883 |   }
884 |  ],
885 |  "metadata": {
886 |   "kernelspec": {
887 |    "display_name": "Python 3",
888 |    "language": "python",
889 |    "name": "python3"
890 |   },
891 |   "language_info": {
892 |    "codemirror_mode": {
893 |     "name": "ipython",
894 |     "version": 3
895 |    },
896 |    "file_extension": ".py",
897 |    "mimetype": "text/x-python",
898 |    "name": "python",
899 |    "nbconvert_exporter": "python",
900 |    "pygments_lexer": "ipython3",
901 |    "version": "3.8.3"
902 |   }
903 |  },
904 |  "nbformat": 4,
905 |  "nbformat_minor": 4
906 | }
907 | 


--------------------------------------------------------------------------------
/deep-neural-network/images/backprop_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/deep-neural-network/images/backprop_kiank.png


--------------------------------------------------------------------------------
/dropout/dropout.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dropout\n",
  8 |     "---\n",
  9 |     "Dropout prevents overfitting by randomly shutting down some output units. The video from Coursera vividly illustrate the process.\n",
 10 |     "\n",
 11 |     "<center>\n",
 12 |     "<video width=\"600\" height=\"500\" src=\"images/dropout1_kiank.mp4\" controls>\n",
 13 |     "</video>\n",
 14 |     "</center>\n",
 15 |     "\n",
 16 |     "In the process above, in each iteration, some units on layer `[2]` would be randomly muted, meaning that there would be less neurons working in the forward process, thus the overall structure of neural network is simplified. Meanwhile the trained model would be more robust, since the model no longer can rely on any specific neurons anymore (as they could be muted in the process), all other neurons would need to learn in the training. "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "# Foward\n",
 24 |     "---\n",
 25 |     "You can think of dropout as adding an extra layer to the forward process.\n",
 26 |     "\n",
 27 |     "In the previous sessions, we have the forward equations as following,\n",
 28 |     "\n",
 29 |     "__Without Dropout__\n",
 30 |     "\n",
 31 |     "$$ Z^{[l]} = W^{[l]}A^{[l-1]} + b^{[l]} $$\n",
 32 |     "\n",
 33 |     "$$ A^{[l]} = g^{[l]}(Z^{[l]})$$\n",
 34 |     "\n",
 35 |     "Where $g$ is the activation function. Now with dropout an extra layer is applied to $A^{[l]}$.\n",
 36 |     "\n",
 37 |     "__Dropout__\n",
 38 |     "\n",
 39 |     "$$ Z^{[l]} = W^{[l]}A^{[l-1]} + b^{[l]} $$\n",
 40 |     "\n",
 41 |     "$$ A^{[l]} = g^{[l]}(Z^{[l]})$$\n",
 42 |     "\n",
 43 |     "$$ A^{[l]} = D^{[l]}(A^{[l]})$$\n",
 44 |     "\n",
 45 |     "Where $D$ is the dropout layer. The key factor in the dropout layer is `keep_prob` parameter, which specifies the probability of keeping each unit. Say if `keep_prob = 0.8`, we would have 80% chance of keeping each output unit as it is, and 20% chance set them to 0.\n",
 46 |     "\n",
 47 |     "The implementation would be adding an extra mask to the result $A$. Assume we have an output $A^{[l]}$ with four elements as following,\n",
 48 |     "\n",
 49 |     "$$ \\begin{pmatrix}\n",
 50 |     "a_1^{[l]} \\\\\n",
 51 |     "a_2^{[l]} \\\\\n",
 52 |     "a_3^{[l]} \\\\\n",
 53 |     "a_4^{[l]}\n",
 54 |     "\\end{pmatrix}$$\n",
 55 |     "\n",
 56 |     "And we want to mute the third unit while keeping the rest, what we need is a matrix of the same shape and do a element-wise multiplication as following,\n",
 57 |     "\n",
 58 |     "$$ \\begin{pmatrix}\n",
 59 |     "a_1^{[l]} \\\\\n",
 60 |     "a_2^{[l]} \\\\\n",
 61 |     "a_3^{[l]} \\\\\n",
 62 |     "a_4^{[l]}\n",
 63 |     "\\end{pmatrix} * \n",
 64 |     "\\begin{pmatrix}\n",
 65 |     "1 \\\\\n",
 66 |     "1 \\\\\n",
 67 |     "0 \\\\\n",
 68 |     "1\n",
 69 |     "\\end{pmatrix} = \n",
 70 |     "\\begin{pmatrix}\n",
 71 |     "a_1^{[l]} \\\\\n",
 72 |     "a_2^{[l]} \\\\\n",
 73 |     "0 \\\\\n",
 74 |     "a_4^{[l]}\n",
 75 |     "\\end{pmatrix}\n",
 76 |     "$$\n",
 77 |     "\n",
 78 |     "Let's first initialize some weight parameters."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 1,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "import numpy as np\n",
 88 |     "\n",
 89 |     "layers = [3, 10, 1]\n",
 90 |     "def weights_init():\n",
 91 |     "    params = {}\n",
 92 |     "    n = len(layers)\n",
 93 |     "    for i in range(1, n):\n",
 94 |     "        params['W' + str(i)] = np.random.randn(layers[i], layers[i-1])*0.01\n",
 95 |     "        params['b' + str(i)] = np.zeros((layers[i], 1))\n",
 96 |     "    return params"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 2,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "{'W1': array([[ 0.01273373, -0.0029192 , -0.02510443],\n",
108 |        "        [-0.00323293,  0.01198572, -0.0127173 ],\n",
109 |        "        [ 0.00092662,  0.00766214,  0.02222858],\n",
110 |        "        [-0.01368964, -0.02118227,  0.01315665],\n",
111 |        "        [-0.02065367,  0.01095289,  0.00727299],\n",
112 |        "        [-0.00980028, -0.02437653, -0.0162406 ],\n",
113 |        "        [ 0.00637576, -0.02312436, -0.000291  ],\n",
114 |        "        [-0.0029315 ,  0.01407064,  0.00237895],\n",
115 |        "        [-0.00581215, -0.00695063,  0.00948468],\n",
116 |        "        [-0.00774545, -0.008947  ,  0.01390741]]),\n",
117 |        " 'b1': array([[0.],\n",
118 |        "        [0.],\n",
119 |        "        [0.],\n",
120 |        "        [0.],\n",
121 |        "        [0.],\n",
122 |        "        [0.],\n",
123 |        "        [0.],\n",
124 |        "        [0.],\n",
125 |        "        [0.],\n",
126 |        "        [0.]]),\n",
127 |        " 'W2': array([[ 0.00217584,  0.01116851, -0.01580682,  0.00626901, -0.0053493 ,\n",
128 |        "          0.01537351,  0.00633889,  0.0061288 ,  0.01380906, -0.00308319]]),\n",
129 |        " 'b2': array([[0.]])}"
130 |       ]
131 |      },
132 |      "execution_count": 2,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "params = weights_init()\n",
139 |     "params"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "# Forward"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 3,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "keep_probs = [.5]\n",
156 |     "\n",
157 |     "def sigmoid(x):\n",
158 |     "    return 1/(1 + np.exp(-x))\n",
159 |     "\n",
160 |     "\n",
161 |     "def relu(x):\n",
162 |     "    return np.maximum(x, 0)\n",
163 |     "\n",
164 |     "\n",
165 |     "def forward(X):\n",
166 |     "    # intermediate layer use relu as activation\n",
167 |     "    # last layer use sigmoid\n",
168 |     "    n_layers = int(len(params)/2)\n",
169 |     "    A = X\n",
170 |     "    cache = {}\n",
171 |     "    for i in range(1, n_layers):\n",
172 |     "        W, b = params['W'+str(i)], params['b'+str(i)]\n",
173 |     "        Z = np.dot(W, A) + b\n",
174 |     "        A = relu(Z)\n",
175 |     "        # dropout\n",
176 |     "        keep_prob = keep_probs[i-1]\n",
177 |     "        D = np.random.rand(A.shape[0], A.shape[1])\n",
178 |     "        D = (D < keep_prob).astype(int)\n",
179 |     "        A = np.multiply(D, A)\n",
180 |     "        # rescale\n",
181 |     "        A = A/keep_prob\n",
182 |     "        \n",
183 |     "        cache['Z'+str(i)] = Z\n",
184 |     "        cache['A'+str(i)] = A\n",
185 |     "        cache['D'+str(i)] = D\n",
186 |     "\n",
187 |     "    # last layer\n",
188 |     "    W, b = params['W'+str(i+1)], params['b'+str(i+1)]\n",
189 |     "    Z = np.dot(W, A) + b\n",
190 |     "    A = sigmoid(Z)\n",
191 |     "    \n",
192 |     "    cache['Z'+str(i+1)] = Z\n",
193 |     "    cache['A'+str(i+1)] = A\n",
194 |     "\n",
195 |     "    return cache, A"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 4,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "X = np.array([[1.2], [3], [-2]])\n",
205 |     "cache, _ = forward(X)"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 5,
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "{'Z1': array([[ 0.05673173],\n",
217 |        "        [ 0.05751222],\n",
218 |        "        [-0.02035879],\n",
219 |        "        [-0.10628767],\n",
220 |        "        [-0.00647173],\n",
221 |        "        [-0.05240875],\n",
222 |        "        [-0.06114016],\n",
223 |        "        [ 0.03393622],\n",
224 |        "        [-0.04679583],\n",
225 |        "        [-0.06395034]]),\n",
226 |        " 'A1': array([[0.        ],\n",
227 |        "        [0.11502445],\n",
228 |        "        [0.        ],\n",
229 |        "        [0.        ],\n",
230 |        "        [0.        ],\n",
231 |        "        [0.        ],\n",
232 |        "        [0.        ],\n",
233 |        "        [0.        ],\n",
234 |        "        [0.        ],\n",
235 |        "        [0.        ]]),\n",
236 |        " 'D1': array([[0],\n",
237 |        "        [1],\n",
238 |        "        [1],\n",
239 |        "        [1],\n",
240 |        "        [1],\n",
241 |        "        [0],\n",
242 |        "        [1],\n",
243 |        "        [0],\n",
244 |        "        [1],\n",
245 |        "        [0]]),\n",
246 |        " 'Z2': array([[0.00128465]]),\n",
247 |        " 'A2': array([[0.50032116]])}"
248 |       ]
249 |      },
250 |      "execution_count": 5,
251 |      "metadata": {},
252 |      "output_type": "execute_result"
253 |     }
254 |    ],
255 |    "source": [
256 |     "cache"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "Our layers is set to [3, 10, 1], where 3 is the input layer and 1 is the output layer. In the example above we give the hidden layer a `keep_prob` ratio of `0.5`, so some of the units are muted.\n",
264 |     "\n",
265 |     "(__Note__: After dropout, `A` needs to rescale to `A = A / keep_prob`, since some of the units are disabled, the left units need to be amplified in order to match the expected value)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "# Backward\n",
273 |     "---\n",
274 |     "The backward process is to mask the same function `D` to the corresponding `dA`."
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 6,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "# dummy code, full version needs to be inside a Class\n",
284 |     "def backward(self, cache, X, Y, keep_probs):\n",
285 |     "    \"\"\"\n",
286 |     "    cache: result [A, Z]\n",
287 |     "    Y: shape (1, m)\n",
288 |     "    \"\"\"\n",
289 |     "    grad = {}\n",
290 |     "    n_layers = int(len(self.params)/2)\n",
291 |     "    m = Y.shape[1]\n",
292 |     "    cache['A0'] = X\n",
293 |     "\n",
294 |     "    for l in range(n_layers, 0, -1):\n",
295 |     "        A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
296 |     "        W = self.params['W'+str(l)]\n",
297 |     "        if l == n_layers:\n",
298 |     "            dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
299 |     "\n",
300 |     "        if l == n_layers:\n",
301 |     "            dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
302 |     "        else:\n",
303 |     "            # dropout version\n",
304 |     "            D = cache['D' + str(l)]\n",
305 |     "            dA = np.multiply(dA, D)\n",
306 |     "            # rescale\n",
307 |     "            dA = dA/keep_probs[l-1]\n",
308 |     "            \n",
309 |     "            dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
310 |     "        dW = np.dot(dZ, A_prev.T)/m\n",
311 |     "        db = np.sum(dZ, axis=1, keepdims=True)/m\n",
312 |     "        dA = np.dot(W.T, dZ)\n",
313 |     "\n",
314 |     "        grad['dW'+str(l)] = dW\n",
315 |     "        grad['db'+str(l)] = db\n",
316 |     "\n",
317 |     "    return grad"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "markdown",
322 |    "metadata": {},
323 |    "source": [
324 |     "Note that in back propagation, $dA$ also needs to be rescaled"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "Now let's put everything in a class and apply it on a classification task."
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 7,
337 |    "metadata": {},
338 |    "outputs": [],
339 |    "source": [
340 |     "import numpy as np\n",
341 |     "\n",
342 |     "\n",
343 |     "class deepNN:\n",
344 |     "    def __init__(self, layers):\n",
345 |     "        self.layers = layers\n",
346 |     "        self.params = {}\n",
347 |     "        self.dropout = []\n",
348 |     "        self.A = 0\n",
349 |     "        self.Y = 0\n",
350 |     "       \n",
351 |     "    \n",
352 |     "    def weights_init(self):\n",
353 |     "        n = len(self.layers)\n",
354 |     "        for i in range(1, n):\n",
355 |     "            self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01\n",
356 |     "            self.params['b' + str(i)] = np.zeros((self.layers[i], 1))\n",
357 |     "    \n",
358 |     "    @staticmethod\n",
359 |     "    def sigmoid(x):\n",
360 |     "        return 1/(1 + np.exp(-x))\n",
361 |     "\n",
362 |     "    @staticmethod\n",
363 |     "    def relu(x):\n",
364 |     "        return np.maximum(x, 0)\n",
365 |     "    \n",
366 |     "    @staticmethod\n",
367 |     "    def compute_cost(A, Y):\n",
368 |     "        \"\"\"\n",
369 |     "        For binary classification, both A and Y would have shape (1, m), where m is the batch size\n",
370 |     "        \"\"\"\n",
371 |     "        assert A.shape == Y.shape\n",
372 |     "        m = A.shape[1]\n",
373 |     "        s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
374 |     "        loss = -s/m\n",
375 |     "        return np.squeeze(loss)\n",
376 |     "    \n",
377 |     "    @staticmethod\n",
378 |     "    def sigmoid_grad(A, Z):\n",
379 |     "        grad = np.multiply(A, 1-A)\n",
380 |     "        return grad\n",
381 |     "\n",
382 |     "    @staticmethod\n",
383 |     "    def relu_grad(A, Z):\n",
384 |     "        grad = np.zeros(Z.shape)\n",
385 |     "        grad[Z>0] = 1\n",
386 |     "        return grad\n",
387 |     "    \n",
388 |     "    \n",
389 |     "    def forward(self, X):\n",
390 |     "        # intermediate layer use relu as activation\n",
391 |     "        # last layer use sigmoid\n",
392 |     "        n_layers = int(len(self.params)/2)\n",
393 |     "        A = X\n",
394 |     "        cache = {}\n",
395 |     "        for i in range(1, n_layers):\n",
396 |     "            W, b = self.params['W'+str(i)], self.params['b'+str(i)]\n",
397 |     "            Z = np.dot(W, A) + b\n",
398 |     "            A = self.relu(Z)\n",
399 |     "            \n",
400 |     "            keep_prob = self.dropout[i-1]\n",
401 |     "            D = np.random.rand(A.shape[0], A.shape[1])\n",
402 |     "            D = np.int64(D < keep_prob)\n",
403 |     "            A = np.multiply(A, D)\n",
404 |     "            A = A/keep_prob\n",
405 |     "            \n",
406 |     "            cache['Z'+str(i)] = Z\n",
407 |     "            cache['A'+str(i)] = A\n",
408 |     "            cache['D'+str(i)] = D\n",
409 |     "\n",
410 |     "        # last layer\n",
411 |     "        W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]\n",
412 |     "        Z = np.dot(W, A) + b\n",
413 |     "        A = self.sigmoid(Z)\n",
414 |     "        cache['Z'+str(i+1)] = Z\n",
415 |     "        cache['A'+str(i+1)] = A\n",
416 |     "\n",
417 |     "        return cache, A\n",
418 |     "    \n",
419 |     "    def backward(self, cache, X, Y):\n",
420 |     "        \"\"\"\n",
421 |     "        cache: result [A, Z]\n",
422 |     "        Y: shape (1, m)\n",
423 |     "        \"\"\"\n",
424 |     "        grad = {}\n",
425 |     "        n_layers = int(len(self.params)/2)\n",
426 |     "        m = Y.shape[1]\n",
427 |     "        cache['A0'] = X\n",
428 |     "\n",
429 |     "        for l in range(n_layers, 0, -1):\n",
430 |     "            A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
431 |     "            W = self.params['W'+str(l)]\n",
432 |     "            if l == n_layers:\n",
433 |     "                dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
434 |     "\n",
435 |     "            if l == n_layers:\n",
436 |     "                dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
437 |     "            else:\n",
438 |     "                keep_prob = self.dropout[l-1]\n",
439 |     "                D = cache['D' + str(l)]\n",
440 |     "                dA = np.multiply(dA, D)\n",
441 |     "                dA = dA/keep_prob\n",
442 |     "                dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
443 |     "            dW = np.dot(dZ, A_prev.T)/m\n",
444 |     "            db = np.sum(dZ, axis=1, keepdims=True)/m\n",
445 |     "            dA = np.dot(W.T, dZ)\n",
446 |     "\n",
447 |     "            grad['dW'+str(l)] = dW\n",
448 |     "            grad['db'+str(l)] = db\n",
449 |     "\n",
450 |     "        return grad\n",
451 |     "    \n",
452 |     "    def optimize(self, grads, lr):\n",
453 |     "        n_layers = int(len(self.params)/2)\n",
454 |     "        for i in range(1, n_layers+1):\n",
455 |     "            dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
456 |     "            self.params['W'+str(i)] -= lr*dW\n",
457 |     "            self.params['b'+str(i)] -= lr*db\n",
458 |     "    \n",
459 |     "    @staticmethod\n",
460 |     "    def generate_batch(X, batch_size):\n",
461 |     "        n = X.shape[0]\n",
462 |     "        batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
463 |     "        return batches\n",
464 |     "    \n",
465 |     "    def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1, dropout:list=[]):\n",
466 |     "        self.dropout = dropout\n",
467 |     "        # prepare batch training\n",
468 |     "        batches = self.generate_batch(X_train, batch_size)\n",
469 |     "        # init weights\n",
470 |     "        self.weights_init()\n",
471 |     "        for i in range(n_iter):\n",
472 |     "            for batch in batches:\n",
473 |     "                X = X_train[batch, :].T\n",
474 |     "                Y = y_train[batch].reshape(1, -1)\n",
475 |     "                cache, A = self.forward(X)\n",
476 |     "                grads = self.backward(cache, X, Y)\n",
477 |     "                self.optimize(grads, lr)\n",
478 |     "\n",
479 |     "            if i%10 == 0:\n",
480 |     "                loss = self.compute_cost(A, Y)\n",
481 |     "                print(f'iteration {i}: loss {loss}')\n",
482 |     "\n",
483 |     "\n",
484 |     "def accuracy(Y, Y_pred):\n",
485 |     "    \"\"\"\n",
486 |     "    Y: vector of true value\n",
487 |     "    Y_pred: vector of predicted value\n",
488 |     "    \"\"\"\n",
489 |     "    \n",
490 |     "    assert Y.shape[0] == 1\n",
491 |     "    assert Y.shape == Y_pred.shape\n",
492 |     "    Y_pred = np.round(Y_pred)\n",
493 |     "    acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
494 |     "    return acc"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "execution_count": 8,
500 |    "metadata": {},
501 |    "outputs": [
502 |     {
503 |      "name": "stdout",
504 |      "output_type": "stream",
505 |      "text": [
506 |       "train shape (8000, 200)\n",
507 |       "test shape (2000, 200)\n"
508 |      ]
509 |     }
510 |    ],
511 |    "source": [
512 |     "from sklearn import datasets\n",
513 |     "\n",
514 |     "\n",
515 |     "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
516 |     "\n",
517 |     "X_train, X_test = X[:8000], X[8000:]\n",
518 |     "y_train, y_test = y[:8000], y[8000:]\n",
519 |     "\n",
520 |     "print('train shape', X_train.shape)\n",
521 |     "print('test shape', X_test.shape)"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": 9,
527 |    "metadata": {},
528 |    "outputs": [],
529 |    "source": [
530 |     "layers = [200, 100, 20, 1]\n",
531 |     "dropout_ratio = [.8, .8]\n",
532 |     "\n",
533 |     "model = deepNN(layers)"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": 10,
539 |    "metadata": {
540 |     "scrolled": false
541 |    },
542 |    "outputs": [
543 |     {
544 |      "name": "stdout",
545 |      "output_type": "stream",
546 |      "text": [
547 |       "iteration 0: loss 0.6931117424217708\n",
548 |       "iteration 10: loss 0.6929932803338776\n",
549 |       "iteration 20: loss 0.6930042095652803\n",
550 |       "iteration 30: loss 0.6929270740459051\n",
551 |       "iteration 40: loss 0.6928159708817516\n",
552 |       "iteration 50: loss 0.6925835619958787\n",
553 |       "iteration 60: loss 0.6922472029699815\n",
554 |       "iteration 70: loss 0.690539000831825\n",
555 |       "iteration 80: loss 0.6813485269598472\n",
556 |       "iteration 90: loss 0.5140425561651449\n",
557 |       "iteration 100: loss 0.26596827522989325\n",
558 |       "iteration 110: loss 0.21297544130219212\n",
559 |       "iteration 120: loss 0.15886453417841173\n",
560 |       "iteration 130: loss 0.14315587310443184\n",
561 |       "iteration 140: loss 0.08922784883210816\n"
562 |      ]
563 |     }
564 |    ],
565 |    "source": [
566 |     "model.train(X_train, y_train, batch_size=200, n_iter=150, lr=0.02, dropout=dropout_ratio)"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": 11,
572 |    "metadata": {},
573 |    "outputs": [
574 |     {
575 |      "name": "stdout",
576 |      "output_type": "stream",
577 |      "text": [
578 |       "accuracy 0.936\n"
579 |      ]
580 |     }
581 |    ],
582 |    "source": [
583 |     "_, pred = model.forward(X_test.T)\n",
584 |     "acc = accuracy(y_test.reshape(1, -1), pred)\n",
585 |     "\n",
586 |     "print(f'accuracy {acc}')"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": null,
592 |    "metadata": {},
593 |    "outputs": [],
594 |    "source": []
595 |   }
596 |  ],
597 |  "metadata": {
598 |   "kernelspec": {
599 |    "display_name": "Python 3",
600 |    "language": "python",
601 |    "name": "python3"
602 |   },
603 |   "language_info": {
604 |    "codemirror_mode": {
605 |     "name": "ipython",
606 |     "version": 3
607 |    },
608 |    "file_extension": ".py",
609 |    "mimetype": "text/x-python",
610 |    "name": "python",
611 |    "nbconvert_exporter": "python",
612 |    "pygments_lexer": "ipython3",
613 |    "version": "3.8.3"
614 |   }
615 |  },
616 |  "nbformat": 4,
617 |  "nbformat_minor": 4
618 | }
619 | 


--------------------------------------------------------------------------------
/dropout/images/dropout1_kiank.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/dropout/images/dropout1_kiank.mp4


--------------------------------------------------------------------------------
/dropout/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class deepNN:
  5 |     def __init__(self, layers):
  6 |         self.layers = layers
  7 |         self.params = {}
  8 |        
  9 |     
 10 |     def weights_init(self):
 11 |         n = len(self.layers)
 12 |         for i in range(1, n):
 13 |             self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01
 14 |             self.params['b' + str(i)] = np.zeros((self.layers[i], 1))
 15 |     
 16 |     @staticmethod
 17 |     def sigmoid(x):
 18 |         return 1/(1 + np.exp(-x))
 19 | 
 20 |     @staticmethod
 21 |     def relu(x):
 22 |         return np.maximum(x, 0)
 23 |     
 24 |     @staticmethod
 25 |     def compute_cost(A, Y):
 26 |         """
 27 |         For binary classification, both A and Y would have shape (1, m), where m is the batch size
 28 |         """
 29 |         assert A.shape == Y.shape
 30 |         m = A.shape[1]
 31 |         s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))
 32 |         loss = -s/m
 33 |         return np.squeeze(loss)
 34 |     
 35 |     @staticmethod
 36 |     def sigmoid_grad(A, Z):
 37 |         grad = np.multiply(A, 1-A)
 38 |         return grad
 39 | 
 40 |     @staticmethod
 41 |     def relu_grad(A, Z):
 42 |         grad = np.zeros(Z.shape)
 43 |         grad[Z>0] = 1
 44 |         return grad
 45 |     
 46 |     
 47 |     def forward(self, X):
 48 |         # intermediate layer use relu as activation
 49 |         # last layer use sigmoid
 50 |         n_layers = int(len(self.params)/2)
 51 |         A = X
 52 |         cache = {}
 53 |         for i in range(1, n_layers):
 54 |             W, b = self.params['W'+str(i)], self.params['b'+str(i)]
 55 |             Z = np.dot(W, A) + b
 56 |             A = self.relu(Z)
 57 |             cache['Z'+str(i)] = Z
 58 |             cache['A'+str(i)] = A
 59 | 
 60 |         # last layer
 61 |         W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]
 62 |         Z = np.dot(W, A) + b
 63 |         A = self.sigmoid(Z)
 64 |         cache['Z'+str(i+1)] = Z
 65 |         cache['A'+str(i+1)] = A
 66 | 
 67 |         return cache, A
 68 |     
 69 |     def backward(self, cache, X, Y):
 70 |         """
 71 |         cache: result [A, Z]
 72 |         Y: shape (1, m)
 73 |         """
 74 |         grad = {}
 75 |         n_layers = int(len(self.params)/2)
 76 |         m = Y.shape[1]
 77 |         cache['A0'] = X
 78 | 
 79 |         for l in range(n_layers, 0, -1):
 80 |             A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]
 81 |             W = self.params['W'+str(l)]
 82 |             if l == n_layers:
 83 |                 dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)
 84 | 
 85 |             if l == n_layers:
 86 |                 dZ = np.multiply(dA, self.sigmoid_grad(A, Z))
 87 |             else:
 88 |                 dZ = np.multiply(dA, self.relu_grad(A, Z))
 89 |             dW = np.dot(dZ, A_prev.T)/m
 90 |             db = np.sum(dZ, axis=1, keepdims=True)/m
 91 |             dA = np.dot(W.T, dZ)
 92 | 
 93 |             grad['dW'+str(l)] = dW
 94 |             grad['db'+str(l)] = db
 95 | 
 96 |         return grad
 97 |     
 98 |     def optimize(self, grads, lr):
 99 |         n_layers = int(len(self.params)/2)
100 |         for i in range(1, n_layers+1):
101 |             dW, db = grads['dW'+str(i)], grads['db'+str(i)]
102 |             self.params['W'+str(i)] -= lr*dW
103 |             self.params['b'+str(i)] -= lr*db
104 |     
105 |     @staticmethod
106 |     def generate_batch(X, batch_size):
107 |         n = X.shape[0]
108 |         batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]
109 |         return batches
110 |     
111 |     def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1):
112 |         # prepare batch training
113 |         batches = self.generate_batch(X_train, batch_size)
114 |         # init weights
115 |         self.weights_init()
116 |         for i in range(n_iter):
117 |             for batch in batches:
118 |                 X = X_train[batch, :].T
119 |                 Y = y_train[batch].reshape(1, -1)
120 |                 cache, A = self.forward(X)
121 |                 grads = self.backward(cache, X, Y)
122 |                 self.optimize(grads, lr)
123 | 
124 |             if i%10 == 0:
125 |                 loss = self.compute_cost(A, Y)
126 |                 print(f'iteration {i}: loss {loss}')
127 | 
128 | 
129 | def accuracy(Y, Y_pred):
130 |     """
131 |     Y: vector of true value
132 |     Y_pred: vector of predicted value
133 |     """
134 |     
135 |     assert Y.shape[0] == 1
136 |     assert Y.shape == Y_pred.shape
137 |     Y_pred = np.round(Y_pred)
138 |     acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size
139 |     return acc


--------------------------------------------------------------------------------
/examples/dataloader.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras.layers import Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
  3 | from tensorflow.keras import Model
  4 | import os
  5 | import pandas as pd
  6 | import numpy as np
  7 | import subprocess
  8 | import gc
  9 | import glob
 10 | from tensorflow.keras.applications import ResNet50
 11 | 
 12 | 
 13 | # key parameter assignment
 14 | EPOCH = 20
 15 | BATCH_SIZE = 16
 16 | PATIENCE = 5
 17 | DATA_PATH = '/kaggle/input/state-farm-distracted-driver-detection'
 18 | 
 19 | 
 20 | classes = [f'c{i}' for i in range(10)]
 21 | seed = 2020
 22 | validation_split = 0.2
 23 | 
 24 | driver_list = pd.read_csv(f'{DATA_PATH}/driver_imgs_list.csv')
 25 | drivers = np.unique(driver_list['subject'].values)
 26 | 
 27 | split = int(np.floor(validation_split * len(drivers)))
 28 | np.random.seed(seed)
 29 | trn_idx, val_idx = drivers[split:], drivers[:split]
 30 | print(f'train idx {trn_idx} \n val idx {val_idx}')
 31 | 
 32 | 
 33 | # mkdirs
 34 | split_dir = 'driver_split'
 35 | if not os.path.exists(split_dir):
 36 |     cmd = f'mkdir {split_dir}'
 37 |     subprocess.call(cmd, shell=True)
 38 |     for d in ['train', 'valid', 'test']:
 39 |         cmd = f'mkdir {split_dir}/{d}'
 40 |         subprocess.call(cmd, shell=True)
 41 |         if d == 'test':
 42 |             continue
 43 |         for cl in classes:
 44 |             cmd = f'mkdir {split_dir}/{d}/{cl}'
 45 |             subprocess.call(cmd, shell=True)
 46 |             
 47 | # ../driver_split/train/c0-c9
 48 | # ../driver_split/valid/c0-c9
 49 | 
 50 | 
 51 | # train and valid
 52 | trn_cnt = 0
 53 | val_cnt = 0
 54 | for i, driver_info in driver_list.iterrows():
 55 |     driver = driver_info['subject']
 56 |     label = driver_info['classname']
 57 |     img_path = driver_info['img']
 58 | 
 59 |     if driver in trn_idx:
 60 |         if not os.path.exists(f'{split_dir}/train/{label}/{img_path}'):
 61 |             os.symlink(os.path.abspath(f'{DATA_PATH}/imgs/train/{label}/{img_path}'), f'{split_dir}/train/{label}/{img_path}')
 62 |         trn_cnt += 1
 63 |     else:
 64 |         if not os.path.exists(f'{split_dir}/valid/{label}/{img_path}'):
 65 |             os.symlink(os.path.abspath(f'{DATA_PATH}/imgs/train/{label}/{img_path}'), f'{split_dir}/valid/{label}/{img_path}')
 66 |         val_cnt += 1
 67 |         
 68 |         
 69 |         
 70 | 
 71 | test_data_path = '/kaggle/working/driver_split/test/data'
 72 | if not os.path.exists(test_data_path):
 73 |     subprocess.call(f'mkdir {test_data_path}', shell=True)
 74 | 
 75 | cnt = 0
 76 | 
 77 | test_files = []
 78 | for file in glob.glob(f'{DATA_PATH}/imgs/test/*.jpg'):
 79 |     cnt += 1
 80 |     base_name = os.path.basename(file)
 81 |     if not os.path.exists(f'{test_data_path}/{base_name}'):
 82 |         os.symlink(file, f'{test_data_path}/{base_name}')
 83 |         test_files.append(base_name)
 84 | 
 85 | print(f'total {cnt} files linked')
 86 | 
 87 | 
 88 | train_dir = f'{split_dir}/train/'
 89 | val_dir = f'{split_dir}/valid/'
 90 | test_dir = '/kaggle/working/driver_split/test'
 91 | 
 92 | 
 93 | # tf.data.Dataset object
 94 | 
 95 | train_dataset = tf.keras.preprocessing.image_dataset_from_directory(train_dir, 
 96 |                                                                     labels='inferred', 
 97 |                                                                     label_mode='categorical',
 98 |                                                                     batch_size=32,
 99 |                                                                     image_size=(224, 224))
100 | 
101 | 
102 | val_dataset = tf.keras.preprocessing.image_dataset_from_directory(val_dir, 
103 |                                                                   labels='inferred', 
104 |                                                                   label_mode='categorical',
105 |                                                                   batch_size=32,
106 |                                                                   image_size=(224, 224))
107 | 
108 | 
109 | test_dataset = tf.keras.preprocessing.image_dataset_from_directory(test_dir, 
110 |                                                                   label_mode=None,
111 |                                                                   batch_size=32,
112 |                                                                   image_size=(224, 224))
113 | 
114 | 
115 | norm_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1/255.)
116 | 
117 | norm_train_dataset = train_dataset.map(lambda x, y: (norm_layer(x), y))
118 | norm_val_dataset = val_dataset.map(lambda x, y: (norm_layer(x), y))
119 | norm_test_dataset = test_dataset.map(lambda x: norm_layer(x))
120 | 
121 | for b_X, b_y in norm_train_dataset:
122 |     print('batch X shape', b_X.shape)
123 |     print('batch y shape', b_y.shape)
124 |     print(f'max {np.max(b_X[0])}  min {np.min(b_X[0])}')
125 |     break
126 |     
127 | 
128 | norm_train_dataset = norm_train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
129 | norm_val_dataset = norm_val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
130 | 
131 | 
132 | input_size = (224, 224, 3)
133 | 
134 | def get_model():
135 |     model_res = ResNet50(include_top=True, input_shape=input_size, weights='imagenet')
136 |     # take the last global average pooling with fewer parameters
137 |     x = model_res.layers[-2].output
138 |     
139 |     x = Dense(2048)(x)
140 |     x = Activation('relu')(x)
141 |     x = Dropout(.5)(x)
142 |     
143 |     x = Dense(2048)(x)
144 |     x = Activation('relu')(x)
145 |     x = Dropout(.5)(x)
146 |     
147 |     x = Dense(10)(x)
148 |     outputs = Activation('softmax')(x)
149 | 
150 |     model = Model(model_res.input, outputs)
151 |     return model
152 | 
153 | 
154 | model = get_model()
155 | 
156 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
157 | model.summary()
158 | 
159 | 
160 | callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
161 | checkpoint = tf.keras.callbacks.ModelCheckpoint('/tmp/checkpoint', monitor='val_accuracy', save_best_only=True)
162 | 
163 | model.fit(norm_train_dataset, validation_data=norm_val_dataset, epochs=20, callbacks=[callback, checkpoint])
164 | 
165 | # prediction
166 | test_pred = model.predict(test_dataset)


--------------------------------------------------------------------------------
/regularization/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class deepNN:
  5 |     def __init__(self, layers):
  6 |         self.layers = layers
  7 |         self.params = {}
  8 |        
  9 |     
 10 |     def weights_init(self):
 11 |         n = len(self.layers)
 12 |         for i in range(1, n):
 13 |             self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01
 14 |             self.params['b' + str(i)] = np.zeros((self.layers[i], 1))
 15 |     
 16 |     @staticmethod
 17 |     def sigmoid(x):
 18 |         return 1/(1 + np.exp(-x))
 19 | 
 20 |     @staticmethod
 21 |     def relu(x):
 22 |         return np.maximum(x, 0)
 23 |     
 24 |     @staticmethod
 25 |     def compute_cost(A, Y):
 26 |         """
 27 |         For binary classification, both A and Y would have shape (1, m), where m is the batch size
 28 |         """
 29 |         assert A.shape == Y.shape
 30 |         m = A.shape[1]
 31 |         s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))
 32 |         loss = -s/m
 33 |         return np.squeeze(loss)
 34 |     
 35 |     @staticmethod
 36 |     def sigmoid_grad(A, Z):
 37 |         grad = np.multiply(A, 1-A)
 38 |         return grad
 39 | 
 40 |     @staticmethod
 41 |     def relu_grad(A, Z):
 42 |         grad = np.zeros(Z.shape)
 43 |         grad[Z>0] = 1
 44 |         return grad
 45 |     
 46 |     
 47 |     def forward(self, X):
 48 |         # intermediate layer use relu as activation
 49 |         # last layer use sigmoid
 50 |         n_layers = int(len(self.params)/2)
 51 |         A = X
 52 |         cache = {}
 53 |         for i in range(1, n_layers):
 54 |             W, b = self.params['W'+str(i)], self.params['b'+str(i)]
 55 |             Z = np.dot(W, A) + b
 56 |             A = self.relu(Z)
 57 |             cache['Z'+str(i)] = Z
 58 |             cache['A'+str(i)] = A
 59 | 
 60 |         # last layer
 61 |         W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]
 62 |         Z = np.dot(W, A) + b
 63 |         A = self.sigmoid(Z)
 64 |         cache['Z'+str(i+1)] = Z
 65 |         cache['A'+str(i+1)] = A
 66 | 
 67 |         return cache, A
 68 |     
 69 |     def backward(self, cache, X, Y):
 70 |         """
 71 |         cache: result [A, Z]
 72 |         Y: shape (1, m)
 73 |         """
 74 |         grad = {}
 75 |         n_layers = int(len(self.params)/2)
 76 |         m = Y.shape[1]
 77 |         cache['A0'] = X
 78 | 
 79 |         for l in range(n_layers, 0, -1):
 80 |             A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]
 81 |             W = self.params['W'+str(l)]
 82 |             if l == n_layers:
 83 |                 dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)
 84 | 
 85 |             if l == n_layers:
 86 |                 dZ = np.multiply(dA, self.sigmoid_grad(A, Z))
 87 |             else:
 88 |                 dZ = np.multiply(dA, self.relu_grad(A, Z))
 89 |             dW = np.dot(dZ, A_prev.T)/m
 90 |             db = np.sum(dZ, axis=1, keepdims=True)/m
 91 |             dA = np.dot(W.T, dZ)
 92 | 
 93 |             grad['dW'+str(l)] = dW
 94 |             grad['db'+str(l)] = db
 95 | 
 96 |         return grad
 97 |     
 98 |     def optimize(self, grads, lr):
 99 |         n_layers = int(len(self.params)/2)
100 |         for i in range(1, n_layers+1):
101 |             dW, db = grads['dW'+str(i)], grads['db'+str(i)]
102 |             self.params['W'+str(i)] -= lr*dW
103 |             self.params['b'+str(i)] -= lr*db
104 |     
105 |     @staticmethod
106 |     def generate_batch(X, batch_size):
107 |         n = X.shape[0]
108 |         batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]
109 |         return batches
110 |     
111 |     def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1):
112 |         # prepare batch training
113 |         batches = self.generate_batch(X_train, batch_size)
114 |         # init weights
115 |         self.weights_init()
116 |         for i in range(n_iter):
117 |             for batch in batches:
118 |                 X = X_train[batch, :].T
119 |                 Y = y_train[batch].reshape(1, -1)
120 |                 cache, A = self.forward(X)
121 |                 grads = self.backward(cache, X, Y)
122 |                 self.optimize(grads, lr)
123 | 
124 |             if i%10 == 0:
125 |                 loss = self.compute_cost(A, Y)
126 |                 print(f'iteration {i}: loss {loss}')
127 | 
128 | 
129 | def accuracy(Y, Y_pred):
130 |     """
131 |     Y: vector of true value
132 |     Y_pred: vector of predicted value
133 |     """
134 |     
135 |     assert Y.shape[0] == 1
136 |     assert Y.shape == Y_pred.shape
137 |     Y_pred = np.round(Y_pred)
138 |     acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size
139 |     return acc


--------------------------------------------------------------------------------
/regularization/regularization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Regularization\n",
  8 |     "---\n",
  9 |     "Regularization helps to prevent model from overfitting by adding an extra penalization term at the end of the loss function.\n",
 10 |     "\n",
 11 |     "$$J = -\\frac{1}{m} \\sum\\limits_{i = 1}^{m} \\large{(}\\small  y^{(i)}\\log\\left(a^{[L](i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right) \\large{)} \\tag{1}$$\n",
 12 |     "To:\n",
 13 |     "$$J_{regularized} = \\small \\underbrace{-\\frac{1}{m} \\sum\\limits_{i = 1}^{m} \\large{(}\\small y^{(i)}\\log\\left(a^{[L](i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right) \\large{)} }_\\text{cross-entropy cost} + \\underbrace{\\frac{1}{m} \\frac{\\lambda}{2} \\sum\\limits_l\\sum\\limits_k\\sum\\limits_j W_{k,j}^{[l]2} }_\\text{L2 regularization cost} \\tag{2}$$\n",
 14 |     "\n",
 15 |     "Where $m$ is the batch size. The shown regularization is called `L2 regularization`, where `L2` applies square to weights, `L1 regularization` applies absolute value, which has the form of $|W|$.\n",
 16 |     "\n",
 17 |     "The appended extra term would enlarge the loss when either there are too many weights or the weight becomes too large, and the adjustable factor $\\lambda$ emphasis on how much we want to penalize on the weights.\n",
 18 |     "\n",
 19 |     "_**1. Why penalizing weights would help to prevent overfitting?**_\n",
 20 |     "\n",
 21 |     "An intuitive understanding would be that in the process of minimizing the new loss function, some of the weights would decrease close to zero so that the corresponding neurons would have very small effect to our results, as if we are training on a smaller neural network with fewer neurons."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "# Forward\n",
 29 |     "---\n",
 30 |     "In the forward process, we need only to change the loss function. let's review the cost function we've built in `deepNN`."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 1,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import numpy as np\n",
 40 |     "from model import deepNN"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 2,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "model = deepNN([2, 4, 1])"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "name": "stdout",
 59 |      "output_type": "stream",
 60 |      "text": [
 61 |       "loss: 0.7512649762748712\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "A = np.array([[.3, .5, .7]])\n",
 67 |     "Y = np.array([[1, 1, 1]])\n",
 68 |     "\n",
 69 |     "loss = model.compute_cost(A, Y)\n",
 70 |     "print(f'loss: {loss}')"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 13,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def compute_loss(A, Y, parameters, reg=True, lambd=.2):\n",
 80 |     "    \"\"\"\n",
 81 |     "    With L2 regularization\n",
 82 |     "    parameters: dict with 'W1', 'b1', 'W2', ...\n",
 83 |     "    \"\"\"\n",
 84 |     "    assert A.shape == Y.shape\n",
 85 |     "    n_layer = len(parameters)//2\n",
 86 |     "    m = A.shape[1]\n",
 87 |     "    s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
 88 |     "    loss = -s/m\n",
 89 |     "    if reg:\n",
 90 |     "        p = 0\n",
 91 |     "        for i in range(1, n_layer+1):\n",
 92 |     "            p += np.sum(np.square(parameters['W'+str(i)]))\n",
 93 |     "        loss += (1/m)*(lambd/2)*p\n",
 94 |     "    return np.squeeze(loss)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 6,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "{'W1': array([[ 0.00224882, -0.00683036],\n",
106 |        "        [-0.0155842 ,  0.00439355],\n",
107 |        "        [ 0.0026745 ,  0.00287223],\n",
108 |        "        [-0.00977243,  0.00515391]]),\n",
109 |        " 'b1': array([[0.],\n",
110 |        "        [0.],\n",
111 |        "        [0.],\n",
112 |        "        [0.]]),\n",
113 |        " 'W2': array([[-0.02002206,  0.00227708,  0.00470624,  0.00502016]]),\n",
114 |        " 'b2': array([[0.]])}"
115 |       ]
116 |      },
117 |      "execution_count": 6,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "model.weights_init()\n",
124 |     "model.params"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 14,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "loss: 0.7512951351356093\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "loss = compute_loss(A, Y, model.params)\n",
142 |     "print(f'loss: {loss}')"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "# Backward\n",
150 |     "---\n",
151 |     "The backward propagation of `L2 reglularization` is actually straight forward, we only need to add the gradient of the L2 term.\n",
152 |     "\n",
153 |     "$$ \\underbrace{\\frac{\\partial{J}^{\\text{L2 Reg}}}{\\partial{W}}}_{\\text{new gradient}} = \\underbrace{ \\frac{\\partial{J}^{\\text{old}}}{\\partial{W}} }_{\\text{new gradient}} + \\frac{\\lambda}{m}|W|$$"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 15,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "def backward(params, cache, X, Y, lambd=0.2):\n",
163 |     "    \"\"\"\n",
164 |     "    params: weight [W, b]\n",
165 |     "    cache: result [A, Z]\n",
166 |     "    Y: shape (1, m)\n",
167 |     "    \"\"\"\n",
168 |     "    grad = {}\n",
169 |     "    n_layers = int(len(params)/2)\n",
170 |     "    m = Y.shape[1]\n",
171 |     "    cache['A0'] = X\n",
172 |     "    \n",
173 |     "    for l in range(n_layers, 0, -1):\n",
174 |     "        A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
175 |     "        W = params['W'+str(l)]\n",
176 |     "        if l == n_layers:\n",
177 |     "            dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
178 |     "        \n",
179 |     "        if l == n_layers:\n",
180 |     "            dZ = np.multiply(dA, sigmoid_grad(A, Z))\n",
181 |     "        else:\n",
182 |     "            dZ = np.multiply(dA, relu_grad(A, Z))\n",
183 |     "        \n",
184 |     "        # with an extra gradient at the end, other terms would remain the same\n",
185 |     "        dW = np.dot(dZ, A_prev.T)/m + (lambd/m)*W\n",
186 |     "        \n",
187 |     "        db = np.sum(dZ, axis=1, keepdims=True)/m\n",
188 |     "        dA = np.dot(W.T, dZ)\n",
189 |     "\n",
190 |     "        grad['dW'+str(l)] = dW\n",
191 |     "        grad['db'+str(l)] = db\n",
192 |     "    \n",
193 |     "    return grad"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "# Ensemble"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 36,
206 |    "metadata": {},
207 |    "outputs": [],
208 |    "source": [
209 |     "class deepNN:\n",
210 |     "    def __init__(self, layers):\n",
211 |     "        self.layers = layers\n",
212 |     "        self.params = {}\n",
213 |     "        self.reg = False\n",
214 |     "        self.lambd = .2\n",
215 |     "       \n",
216 |     "    \n",
217 |     "    def weights_init(self):\n",
218 |     "        n = len(self.layers)\n",
219 |     "        for i in range(1, n):\n",
220 |     "            self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01\n",
221 |     "            self.params['b' + str(i)] = np.zeros((self.layers[i], 1))\n",
222 |     "    \n",
223 |     "    @staticmethod\n",
224 |     "    def sigmoid(x):\n",
225 |     "        return 1/(1 + np.exp(-x))\n",
226 |     "\n",
227 |     "    @staticmethod\n",
228 |     "    def relu(x):\n",
229 |     "        return np.maximum(x, 0)\n",
230 |     "    \n",
231 |     "    def compute_loss(self, A, Y):\n",
232 |     "        \"\"\"\n",
233 |     "        With L2 regularization\n",
234 |     "        \"\"\"\n",
235 |     "        assert A.shape == Y.shape\n",
236 |     "        n_layer = len(self.params)//2\n",
237 |     "        m = A.shape[1]\n",
238 |     "        s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
239 |     "        loss = -s/m\n",
240 |     "        if self.reg:\n",
241 |     "            p = 0\n",
242 |     "            for i in range(1, n_layer+1):\n",
243 |     "                p += np.sum(np.square(self.params['W'+str(i)]))\n",
244 |     "            loss += (1/m)*(self.lambd/2)*p\n",
245 |     "        return np.squeeze(loss)\n",
246 |     "    \n",
247 |     "    @staticmethod\n",
248 |     "    def sigmoid_grad(A, Z):\n",
249 |     "        grad = np.multiply(A, 1-A)\n",
250 |     "        return grad\n",
251 |     "\n",
252 |     "    @staticmethod\n",
253 |     "    def relu_grad(A, Z):\n",
254 |     "        grad = np.zeros(Z.shape)\n",
255 |     "        grad[Z>0] = 1\n",
256 |     "        return grad\n",
257 |     "    \n",
258 |     "    \n",
259 |     "    def forward(self, X):\n",
260 |     "        # intermediate layer use relu as activation\n",
261 |     "        # last layer use sigmoid\n",
262 |     "        n_layers = int(len(self.params)/2)\n",
263 |     "        A = X\n",
264 |     "        cache = {}\n",
265 |     "        for i in range(1, n_layers):\n",
266 |     "            W, b = self.params['W'+str(i)], self.params['b'+str(i)]\n",
267 |     "            Z = np.dot(W, A) + b\n",
268 |     "            A = self.relu(Z)\n",
269 |     "            cache['Z'+str(i)] = Z\n",
270 |     "            cache['A'+str(i)] = A\n",
271 |     "\n",
272 |     "        # last layer\n",
273 |     "        W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]\n",
274 |     "        Z = np.dot(W, A) + b\n",
275 |     "        A = self.sigmoid(Z)\n",
276 |     "        cache['Z'+str(i+1)] = Z\n",
277 |     "        cache['A'+str(i+1)] = A\n",
278 |     "\n",
279 |     "        return cache, A\n",
280 |     "    \n",
281 |     "    def backward(self, cache, X, Y):\n",
282 |     "        \"\"\"\n",
283 |     "        cache: result [A, Z]\n",
284 |     "        Y: shape (1, m)\n",
285 |     "        \"\"\"\n",
286 |     "        grad = {}\n",
287 |     "        n_layers = int(len(self.params)/2)\n",
288 |     "        m = Y.shape[1]\n",
289 |     "        cache['A0'] = X\n",
290 |     "\n",
291 |     "        for l in range(n_layers, 0, -1):\n",
292 |     "            A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
293 |     "            W = self.params['W'+str(l)]\n",
294 |     "            if l == n_layers:\n",
295 |     "                dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
296 |     "\n",
297 |     "            if l == n_layers:\n",
298 |     "                dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
299 |     "            else:\n",
300 |     "                dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
301 |     "                \n",
302 |     "            dW = np.dot(dZ, A_prev.T)/m + (self.lambd/m)*W\n",
303 |     "            db = np.sum(dZ, axis=1, keepdims=True)/m\n",
304 |     "            dA = np.dot(W.T, dZ)\n",
305 |     "\n",
306 |     "            grad['dW'+str(l)] = dW\n",
307 |     "            grad['db'+str(l)] = db\n",
308 |     "\n",
309 |     "        return grad\n",
310 |     "    \n",
311 |     "    def optimize(self, grads, lr):\n",
312 |     "        n_layers = int(len(self.params)/2)\n",
313 |     "        for i in range(1, n_layers+1):\n",
314 |     "            dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
315 |     "            self.params['W'+str(i)] -= lr*dW\n",
316 |     "            self.params['b'+str(i)] -= lr*db\n",
317 |     "    \n",
318 |     "    @staticmethod\n",
319 |     "    def generate_batch(X, batch_size):\n",
320 |     "        n = X.shape[0]\n",
321 |     "        batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
322 |     "        return batches\n",
323 |     "    \n",
324 |     "    \n",
325 |     "    def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1, reg=True, lambd=.7):\n",
326 |     "        self.lambd = lambd\n",
327 |     "        self.reg = reg\n",
328 |     "        # prepare batch training\n",
329 |     "        batches = self.generate_batch(X_train, batch_size)\n",
330 |     "        # init weights\n",
331 |     "        self.weights_init()\n",
332 |     "        for i in range(n_iter):\n",
333 |     "            for batch in batches:\n",
334 |     "                X = X_train[batch, :].T\n",
335 |     "                Y = y_train[batch].reshape(1, -1)\n",
336 |     "                cache, A = self.forward(X)\n",
337 |     "                grads = self.backward(cache, X, Y)\n",
338 |     "                self.optimize(grads, lr)\n",
339 |     "\n",
340 |     "            if i%10 == 0:\n",
341 |     "                loss = self.compute_loss(A, Y)\n",
342 |     "                print(f'iteration {i}: loss {loss}')"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 39,
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "def accuracy(Y, Y_pred):\n",
352 |     "    \"\"\"\n",
353 |     "    Y: vector of true value\n",
354 |     "    Y_pred: vector of predicted value\n",
355 |     "    \"\"\"\n",
356 |     "    \n",
357 |     "    assert Y.shape[0] == 1\n",
358 |     "    assert Y.shape == Y_pred.shape\n",
359 |     "    Y_pred = np.round(Y_pred)\n",
360 |     "    acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
361 |     "    return acc"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 17,
367 |    "metadata": {},
368 |    "outputs": [
369 |     {
370 |      "name": "stdout",
371 |      "output_type": "stream",
372 |      "text": [
373 |       "train shape (8000, 200)\n",
374 |       "test shape (2000, 200)\n"
375 |      ]
376 |     }
377 |    ],
378 |    "source": [
379 |     "from sklearn import datasets\n",
380 |     "\n",
381 |     "\n",
382 |     "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
383 |     "\n",
384 |     "X_train, X_test = X[:8000], X[8000:]\n",
385 |     "y_train, y_test = y[:8000], y[8000:]\n",
386 |     "\n",
387 |     "print('train shape', X_train.shape)\n",
388 |     "print('test shape', X_test.shape)"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": 37,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "layers = [200, 100, 20, 1]\n",
398 |     "model = deepNN(layers)"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 54,
404 |    "metadata": {},
405 |    "outputs": [
406 |     {
407 |      "name": "stdout",
408 |      "output_type": "stream",
409 |      "text": [
410 |       "iteration 0: loss 0.6985036506635248\n",
411 |       "iteration 10: loss 0.6974289293126693\n",
412 |       "iteration 20: loss 0.696563499398262\n",
413 |       "iteration 30: loss 0.6955727021117409\n",
414 |       "iteration 40: loss 0.6845170595754049\n",
415 |       "iteration 50: loss 0.23561800014771372\n",
416 |       "iteration 60: loss 0.15567224031891935\n",
417 |       "iteration 70: loss 0.12669228589646375\n",
418 |       "iteration 80: loss 0.11069865608869393\n",
419 |       "iteration 90: loss 0.1007637548980789\n",
420 |       "iteration 100: loss 0.09435682482867866\n",
421 |       "iteration 110: loss 0.09060941295356366\n",
422 |       "iteration 120: loss 0.08884491050012915\n",
423 |       "iteration 130: loss 0.08739359237666255\n",
424 |       "iteration 140: loss 0.08695416115831198\n"
425 |      ]
426 |     }
427 |    ],
428 |    "source": [
429 |     "model.train(X_train, y_train, batch_size=200, n_iter=150, lr=0.05, reg=True, lambd=1)"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": 55,
435 |    "metadata": {},
436 |    "outputs": [
437 |     {
438 |      "name": "stdout",
439 |      "output_type": "stream",
440 |      "text": [
441 |       "accuracy 0.9425\n"
442 |      ]
443 |     }
444 |    ],
445 |    "source": [
446 |     "_, pred = model.forward(X_test.T)\n",
447 |     "acc = accuracy(y_test.reshape(1, -1), pred)\n",
448 |     "\n",
449 |     "print(f'accuracy {acc}')"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": 46,
455 |    "metadata": {},
456 |    "outputs": [],
457 |    "source": [
458 |     "from model import deepNN as deepNNOld"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 51,
464 |    "metadata": {},
465 |    "outputs": [],
466 |    "source": [
467 |     "layers = [200, 100, 20, 1]\n",
468 |     "model_unreg = deepNNOld(layers)"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "markdown",
473 |    "metadata": {},
474 |    "source": [
475 |     "Actually when we have the `iteration` goes up, the model would continue to overfit that causes error in the divide operation, suspecting that in the forward process, result $A$ gets too close to 0.\n",
476 |     "\n",
477 |     "In contrast, the model above with regularization would not overfit."
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": 52,
483 |    "metadata": {},
484 |    "outputs": [
485 |     {
486 |      "name": "stdout",
487 |      "output_type": "stream",
488 |      "text": [
489 |       "iteration 0: loss 0.6930918829042935\n",
490 |       "iteration 10: loss 0.6930065395149767\n",
491 |       "iteration 20: loss 0.6929575889989992\n",
492 |       "iteration 30: loss 0.6926539088979596\n",
493 |       "iteration 40: loss 0.6849650117201506\n",
494 |       "iteration 50: loss 0.20267451014178056\n",
495 |       "iteration 60: loss 0.09037465413243737\n",
496 |       "iteration 70: loss 0.04981389115902148\n",
497 |       "iteration 80: loss 0.02654689714177362\n",
498 |       "iteration 90: loss 0.015971046473694038\n",
499 |       "iteration 100: loss 0.010199685977249701\n",
500 |       "iteration 110: loss 0.007221608028851772\n",
501 |       "iteration 120: loss 0.004961759731198219\n",
502 |       "iteration 130: loss 0.0034589244309720397\n",
503 |       "iteration 140: loss 0.0025630729230403\n"
504 |      ]
505 |     }
506 |    ],
507 |    "source": [
508 |     "model_unreg.train(X_train, y_train, batch_size=200, n_iter=150, lr=0.05)"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": 53,
514 |    "metadata": {},
515 |    "outputs": [
516 |     {
517 |      "name": "stdout",
518 |      "output_type": "stream",
519 |      "text": [
520 |       "accuracy 0.9335\n"
521 |      ]
522 |     }
523 |    ],
524 |    "source": [
525 |     "_, pred = model_unreg.forward(X_test.T)\n",
526 |     "acc = accuracy(y_test.reshape(1, -1), pred)\n",
527 |     "\n",
528 |     "print(f'accuracy {acc}')"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": null,
534 |    "metadata": {},
535 |    "outputs": [],
536 |    "source": []
537 |   }
538 |  ],
539 |  "metadata": {
540 |   "kernelspec": {
541 |    "display_name": "Python 3",
542 |    "language": "python",
543 |    "name": "python3"
544 |   },
545 |   "language_info": {
546 |    "codemirror_mode": {
547 |     "name": "ipython",
548 |     "version": 3
549 |    },
550 |    "file_extension": ".py",
551 |    "mimetype": "text/x-python",
552 |    "name": "python",
553 |    "nbconvert_exporter": "python",
554 |    "pygments_lexer": "ipython3",
555 |    "version": "3.8.3"
556 |   }
557 |  },
558 |  "nbformat": 4,
559 |  "nbformat_minor": 4
560 | }
561 | 


--------------------------------------------------------------------------------
/shallow-neural-network/images/1-hidden-nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/shallow-neural-network/images/1-hidden-nn.png


--------------------------------------------------------------------------------
/shallow-neural-network/images/multi-layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/shallow-neural-network/images/multi-layer.png


--------------------------------------------------------------------------------
/shallow-neural-network/one-hidden-layer-nn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# One Hidden Layer NN\n",
  8 |     "---\n",
  9 |     "We will build a shallow dense neural network with one hidden layer, and the following structure is used for illustration purpose.\n",
 10 |     "\n",
 11 |     "<img src='images/1-hidden-nn.png'>\n",
 12 |     "\n",
 13 |     "Where in the graph above, we have a input vector $x = (x_1, x_2)$, containing 2 features and 4 hidden units $a1, a2, a3$ and $a4$, and output one value $y_1 \\in [0, 1]$ (consider this a binary classification task with a prediction of probability)"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "In each hidden unit, take $a_1$ as example, a linear operation followed by an activation function is conducted. So given input $x = (x_1, x_2)$, inside node $a_1$, we have:\n",
 21 |     "\n",
 22 |     "$$z_1 = w_{11}x_1 + w_{12}x_2$$\n",
 23 |     "$$a_1 = activation(z_1)$$\n",
 24 |     "\n",
 25 |     "Here $w_{11}$ denotes weight 1 of node 1, $w_{12}$ denotes weight 2 of node 1. Same for node $a_2$, it would have:\n",
 26 |     "\n",
 27 |     "$$z_2 = w_{21}x_1 + w_{22}x_2$$\n",
 28 |     "$$a_2 = activation(z_2)$$\n",
 29 |     "\n",
 30 |     "And same for $a_3$ and $a_4$ and so on ..."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "# Vectorization of One Input\n",
 38 |     "---\n",
 39 |     "Now let's put the weights into matrix and input into a vector to simplify the expression.\n",
 40 |     "\n",
 41 |     "$$ z^{[1]} = W^{[1]}x + b^{[1]} \\tag1 $$\n",
 42 |     "\n",
 43 |     "$$ a^{[1]} = \\tanh{Z^{[1]}} \\tag2 $$\n",
 44 |     "\n",
 45 |     "$$ z^{[2]} = W^{[2]}a^{[1]} + b^{[2]} \\tag3 $$\n",
 46 |     "\n",
 47 |     "$$ \\hat{y} = a^{[2]} = \\sigma({Z^{[2]}}) \\tag4 $$\n",
 48 |     "\n",
 49 |     "$$ L(y, \\hat{y}) = -[y\\log{\\hat{y}} + (1 - y)\\log{(1 - \\hat{y})}] $$\n",
 50 |     "\n",
 51 |     "Here we've assumed that the second activation function to be $\\tanh$ and the output activation function to be $sigmoid$ (note that superscript $[i]$ denotes the $ith$ layer). \n",
 52 |     "\n",
 53 |     "For the dimension of each matrix, we have:\n",
 54 |     "\n",
 55 |     "- $ W^{[1]}$ in the case above would have dimension $4 \\times 2$, with each $ith$ row is the weight of node $i$\n",
 56 |     "- $b^{[1]}$ has dimension $4 \\times 1$\n",
 57 |     "- $z^{[1]}$ and $a^{[1]}$ both have dimention $4 \\times 1$\n",
 58 |     "- $W^{[2]}$ has dimension $1 \\times 4$\n",
 59 |     "- consequently, $z^{[2]}$ and $a^{[2]}$ would have dimensition $1 \\times 1$, which is a single value\n",
 60 |     "\n",
 61 |     "The loss function $L$ for a single value would be the same as logistic regression's.\n",
 62 |     "\n",
 63 |     "Function $\\tanh$ and $sigmoid$ looks as below."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 1,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "%matplotlib inline\n",
 73 |     "\n",
 74 |     "import numpy as np\n",
 75 |     "import matplotlib.pyplot as plt"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "def tanh(x):\n",
 85 |     "    return np.tanh(x)\n",
 86 |     "\n",
 87 |     "def sigmoid(x):\n",
 88 |     "    return 1/(1 + np.exp(-x))"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 3,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "data": {
 98 |       "text/plain": [
 99 |        "Text(0.5, 1.0, 'tanh')"
100 |       ]
101 |      },
102 |      "execution_count": 3,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     },
106 |     {
107 |      "data": {
108 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEICAYAAAB74HFBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA0YklEQVR4nO3deZzcVZ3v/9en9ySdtbPQ2RNIgGZJgCa4orKDStBBCW44g4OPeYjeUWdGGMflos5FZ+7ozO+iY1QGRCUgV65R4iAgmyIhDWQhCUmapNd0kk5vSXrvqs/vj/o2FJ3ekq6uby3v5+NR9Hc536pPUZ3TnzrnfM8xd0dERERETk5O2AGIiIiIpDMlUyIiIiJjoGRKREREZAyUTImIiIiMgZIpERERkTFQMiUiIiIyBkqm5ISY2T+a2Y9T7XXNrMrMLktmTCIi8czMzey0sOOQ5MsLOwBJL+7+z9n0uiKS2cysCviUuz8ediySvtQyJSIiIjIGSqZkSGb2JTOrN7OjZrbLzC41s6+b2c/iynzCzKrNrMnMvhLf3RaU/aWZ/Sx4jm1mttzMbjezQ2ZWa2ZXxD3XXDNbb2bNZlZpZn8dd27g63487nW/nKz/JyKSOczsPmAh8BszO2Zm/xDUWQfMrM3MnjGzs+LK32Nmd5nZI0GdttHMTh3wtJeZ2R4zaw3KWlLflIRCyZQMysxOB24FLnT3ycCVQNWAMmXA94GPAqXAVGDegKd6P3AfMB14GXiU2O/dPOAO4IdxZdcBdcBc4Hrgn83skkFiKwN+AHw8KFsCzD/pNysiWcndPw7UAO9392J3/w7wO2AZMBt4Cfj5gMvWAP+TWJ1WCXxrwPn3ARcC5wIfJlZ3SoZTMiVDiQCFQJmZ5bt7lbu/NqDM9cBv3P2P7t4DfBUYuNjjs+7+qLv3Ab8EZgF3unsvseRpsZlNM7MFwNuBL7l7l7tvBn4MfGKQ2K4Hfuvuz7h7N/AVIJqQdy0iWc3d73b3o0Hd8nVghZlNjSvysLu/ENRpPwdWDniKO9291d1rgCcHOS8ZSMmUDMrdK4G/JVaZHDKzdWY2d0CxuUBt3DUdQNOAMgfjtjuBw+4eidsHKA6eq9ndj8aVr+b4lq7BXrd9kNcVETkhZpZrZnea2WtmdoQ3WuNnxhU7ELfdQaz+4gTOSwZSMiVDcvdfuPs7gEXEWpy+PaBIA3Hda2Y2gViX28nYD8wws8lxxxYC9YOUbQAWxL3uxDG8rohkt/jW9I8Aq4HLiA1bWBwc17gnGZaSKRmUmZ1uZpeYWSHQRawVaWBX2kPA+83sbWZWQKwV66QqHXevBZ4D/peZFZnZucDNwM8GKf4Q8D4ze0fwuneg32UROTkHgaXB9mSgm1hL90RAU7LIqOgPkAylELgTOEys2Xo2cHt8AXffDnyW2NinBuAYcIhYZXQybiT2TXA/8DDwtcHmfgle9zPAL4LXbSE2cF1E5ET9L+CfzKwVmEFseEE9sAN4PsS4JI2Y+8DxwiInx8yKgVZgmbvvCzkcERGRpFDLlIyJmb3fzCaa2STgX4FtDJhCQUREJJMpmZKxWk2sW24/sblZ1riaO0VEJIuom09ERERkDNQyJSIiIjIGeWG98MyZM33x4sVhvbyIhODFF1887O6zwo5jrFR/iWSf4eqv0JKpxYsXU1FREdbLi0gIzKw67BgSQfWXSPYZrv5SN5+IiIjIGCiZEhERERkDJVMiIiIiY6BkSkRERGQMlEyJiIiIjMGIyZSZ3W1mh8zslSHOm5n9h5lVmtlWMzs/8WGKiAxtLPWUmd1kZnuCx03Ji1pEMsVoWqbuAa4a5vzVxJYRWQbcAvxg7GGJiJyQeziJesrMZgBfAy4CVgFfM7Pp4xqpiGScEeeZcvdnzGzxMEVWAz8N1mN73symmVmpuzckKkiRTBaNOt19UTp7I3T1RujsjdDTF409IlF6g599Eacv6kSiTl80th/12CMShYg77k406jgQdWL77rgTHIttQ+zc69sQt/3G8XhvOhQUKMzP5TPvOW18/secgJOtp4B3A4+5ezOAmT1GLCm7f5xDFhkTd+dIZx9Huno51t1He3cfx7r76OyJxOqNiNMXidIbib5eb7jH6onX64S4uiH204PnDl7j5INLxFtMitXnzePUWcVjfp5ETNo5D6iN268Ljh2XTJnZLcS+FbJw4cIEvLRI6jnW3UddSwd1zZ0cPtZNU3tP7OexHlo6ejjS1cexrl6OdsUqv46eSNghnxQzmFKUnxLJ1CgMVU8Ndfw4qr8kDH2RKK/sP8KfX2tib+MxGtq62N/Wyf7WTrp6o2GHNySzsCMYnRULpqVMMjVq7r4WWAtQXl6ePqmryCBa2nvYvv8IOxra2L7/CHsb26lr6aClo/e4ssWFecyYVMD0SQVMKcpj/rQJFBfmMbkoj4mFeUwsyKUoL4cJBbkU5edSmJdDQV4O+bk5FOTmkJebQ36ukZeTQ16ukZtj5FrsZ06wnZMDOWbBA8wMs9gxI/gZVHBmYFjws/+YxW3H9uUNqr8kWQ60dbFhWwPPvXaYjXubOdrdB8DsyYXMnTaBM06ZzHtOn03p1CKmTMinuDCPSYV5FAd1SX+9kZ8XqzPycw3rry/sjbogvg7o//f/Rh2hf/8nIhHJVD2wIG5/fnBMJKM0Hevm2T2HeXp3I8/vbaKhrev1c6VTizhtdjHnzC9lwfSJzJ8+gfnTJzBnShEzJhVQlJ8bYuTC0PVUPbGuvvjjTyUtKpE4nT0R1j6zl/98+jU6eyMsLpnI+1bM5W2nlvDWU0uYWVwYdogyhEQkU+uBW81sHbFBnG0aLyWZora5g4derOOpXYfYWt+GO8yYVMDbTi3h3PlTKSudStncKcyYVBB2qDK8QespM3sU+Oe4QedXALeHFaRkJ3fnN1sbuHPDTva3dfHec0r5uytPZ8nMSWGHJqM0YjJlZvcT++Y208zqiN35kg/g7v8JbACuASqBDuAvxytYkWSIRp2n9zTysz9X84ddhzDg/IXT+cJly3nX6bM4e+5UcnLUBJ5KTraecvdmM/sGsCl4qjv6B6OLJMOBti5u/cVLVFS3UFY6he/esJKLlpaEHZacoNHczXfjCOcd+EzCIhIJSV8kys831nD3n/ZR3dTBzOJCbn3Pady4aiFzp00IOzwZxljqKXe/G7h7POISGU5Xb4S//mkFexuPcecHz+FD5QvI1Re1tJTUAegiqerF6ma+/PArvHrgKBcsms4XLl/O1WeXUpCnRQJEJPHcnX98eBvb6tv40SfKubxsTtghyRgomZKs1tLew52/e5UHKmopnVrEf37sfK486xTdySIi4+q//lTFr16q5/OXLVcilQGUTEnW+u9XGrj9V9s42tXHpy9eyucuXcakQv2TEJHx9afKw3xrw06uKJvDZy9Ji3naZAT6yyFZ6Sd/3Mc3H9nBufOm8p3rV3D6KZPDDklEskBtcwe3/uIlls6cxL/dsFI3s2QIJVOSVaJR51sbdvKTP+7jyrPm8O9rztMcUCKSFJGo8+n7XiQSdX70iXKK1RKeMfRJStbo6o3wxV9u4ZGtDXzybYv5yvvKdOeMiCTNM3sa2dFwhO/dsJLFmkMqoyiZkqxwrLuPv7pnEy/sa+b2q8/glouXapC5iCTVuhdqKJlUwDXnlIYdiiSYkinJeNGo88UHN/NidQv/vmYlq1cOuo6tiMi4OXS0iyd2HuKv3rFEU65kIH2ikvF+8PRrPLr9ILdffYYSKREJxf99sZ6+qHPDhQtGLixpR8mUZLSndh3iX3+/i9Ur53LzO5aEHY6IZCF354FNNaxaPINTZxWHHY6MAyVTkrGqm9r53P0vc/qcydz5wXM1RkpEQrFxXzNVTR2sWaVWqUylZEoyUkdPH5++70XMjLUfL2dCgaY/EJFwrHuhhslFeVx9tgaeZyolU5KRbv/VNnYdPMp/3HgeC0smhh2OiGSpto5eNrxygOtWztOXugymZEoyzlO7DvHrzfv520uX867ls8IOR0Sy2MMv19HTF1UXX4ZTMiUZpS8S5ZuP7GRxyUT+5t2nhh2OiGQxd2fdplrOmTeVs+ZODTscGUdKpiSj/HxjDZWHjvGP15ypuVxEJFRb69p49cBRtUplAf21kYzR2tHDdx/fzdtPK+HysjlhhyNJZmZXmdkuM6s0s9sGOf9dM9scPHabWWvcuUjcufVJDVwy1gMVtUzIz+XaFXPDDkXGmWZAl4zxvcf3cKSzl396b5mmQcgyZpYL3AVcDtQBm8xsvbvv6C/j7p+PK/9Z4Ly4p+h095VJCleyxJ8qD3Px8plMLsoPOxQZZ2qZkoxQeego9z1fzZpVCzmzdErY4UjyrQIq3X2vu/cA64DVw5S/Ebg/KZFJVmpp76G6qYPzFk4POxRJAiVTkhG++chOJubn8sXLl4cdioRjHlAbt18XHDuOmS0ClgB/iDtcZGYVZva8mV03xHW3BGUqGhsbExS2ZKrNda0ArJg/LdQ4JDmUTEnae3LXIZ7a1cjnLl1GSXFh2OFI6lsDPOTukbhji9y9HPgI8D0zO+5WUHdf6+7l7l4+a5am3JDhbaltxQzOma+7+LKBkilJa+7O9x7bzaKSidz0tsVhhyPhqQfib5maHxwbzBoGdPG5e33wcy/wFG8eTyVywrbUtrJ89mSKCzU0ORsomZK09nJtK1vq2rj5HUs0FUJ22wQsM7MlZlZALGE67q48MzsDmA78Oe7YdDMrDLZnAm8Hdgy8VmS03J3Nta2sWKBWqWyhlFnS2r3PVVFcmMcHz58fdigSInfvM7NbgUeBXOBud99uZncAFe7en1itAda5u8ddfibwQzOLEvuCeWf8XYAiJ6q2uZOWjl5WLJgWdiiSJEqmJG0dOtrFhm0NfPSiRWpKF9x9A7BhwLGvDtj/+iDXPQecM67BSVbpH3y+UslU1lC/iKStX2ysoTfifOKti8IORUTkdZtrWinKz2H5nMlhhyJJomRK0lJPX5Sfb6zhXctnsXRWcdjhiIi8bktdK2fPnUp+rv7EZgt90pKWfvdKA41Hu/mk7uATkRTSG4nySn2buviyjJIpSUv3PlfF4pKJvGu55vsRkdSx68BRuvuiGnyeZZRMSdrZVtfGSzWtfPyti8nJ0Rp8IpI6Nte2Ahp8nm2UTEnauee5KiYW5PKhck2HICKpZXNtKyWTCpg/fULYoUgSjSqZMrOrzGyXmVWa2W2DnF9oZk+a2ctmttXMrkl8qCLQdKyb32zdzwfPn8cUrcQuIilmS20rKxZMw0yt5tlkxGTKzHKBu4CrgTLgRjMrG1Dsn4AH3f08YpPifT/RgYoAPPxyPT19UW566+KwQxEReZOjXb1UNh7T4sZZaDQtU6uASnff6+49wDpg9YAyDkwJtqcC+xMXosgbfru1gbPmTmGZ5m8RkRSzra4Nd1i5cFrYoUiSjSaZmgfUxu3XBcfifR34mJnVEZuB+LODPZGZ3WJmFWZW0djYeBLhSjarb+1kc20r15xTGnYoIiLH6Z/5fMV8rcmXbRI1AP1G4B53nw9cA9xnZsc9t7uvdfdydy+fNUu3tMuJ+d22BgDeq2RKRFLQ5ppWFpdMZNrEgrBDkSQbTTJVDyyI258fHIt3M/AggLv/GSgCZiYiQJF+j2xroKx0CotnTgo7FBGR42ypa9WUCFlqNMnUJmCZmS0xswJiA8zXDyhTA1wKYGZnEkum1I8nCbO/tZOXa1p577lqlRKR1HOgrYuDR7o1WWeWGjGZcvc+4FbgUWAnsbv2tpvZHWZ2bVDsi8Bfm9kW4H7gk+7u4xW0ZJ8NQRefxkuJSCraXNsCoGQqS+WNppC7byA2sDz+2FfjtncAb09saCJv2LCtgTNLp7BEXXwikoJePXAUMygrnTJyYck4mgFdUt7+1k5eqmnlveecEnYoIiKDqmnqoHRKEUX5uWGHIiFQMiUp73evHADUxSfDG8VKDZ80s0Yz2xw8PhV37iYz2xM8bkpu5JIJqpraWVgyMewwJCSj6uYTCdMjW/dzximTWTqrOOxQJEXFrdRwObG58DaZ2fpgCEK8B9z91gHXzgC+BpQTm4D4xeDaliSELhmiprmDS8+YE3YYEhK1TElK6+/ie5/u4pPhjWalhqFcCTzm7s1BAvUYcNU4xSkZ6Fh3H4eP9bBoplqmspWSKUlp6uKTURrNSg0AfxEsxv6QmfXPnzeqa7WCgwyluqkdgEUzdINMtlIyJSltw7YGdfFJovwGWOzu5xJrfbr3RC7WCg4ylJqmDgAWacxU1lIyJSnr0JEuXqxuUauUjMaIKzW4e5O7dwe7PwYuGO21IsOpbo4lUxqAnr2UTEnKenbPYQAuOWN2yJFIGhhxpQYzi8/KryU2CTHEJiS+wsymm9l04IrgmMioVDe1M2NSAVOK8sMORUKiu/kkZT2zp5GZxQWaBE9G5O59Zta/UkMucHf/Sg1AhbuvBz4XrNrQBzQDnwyubTazbxBLyADucPfmpL8JSVvVTR3q4stySqYkJUWjzh/3HOady2aSk2NhhyNpYBQrNdwO3D7EtXcDd49rgJKxqps6uHDx9LDDkBCpm09S0o6GIzS19/DOZRroKyKpq7svwv62ThaW6E6+bKZkSlLSM3tit56/c9nMkCMRERlaXUsn7rBY3XxZTcmUpKRndx/mjFMmM3tKUdihiIgM6fU5ppRMZTUlU5JyOnr6qKhu5uLl6uITkdRWHcwxtVATdmY1JVOScp7f20RvxLlY46VEJMVVN3UwqSCXmcUFYYciIVIyJSnnmd2HKczLoVx3x4hIiqtuamdhySTMdNdxNlMyJSnn2T2NXLS0hKL83LBDEREZVnVzhwafi5IpSS31rZ281tjOxbqLT0RSXCTq1DV3ahkZUTIlqeXZ3bEpETT4XERSXUNbJz2RKIs0+DzrKZmSlPLsnsOcMqWIZbOLww5FRGRYNcGdfOrmEyVTkjIiUeePlbElZDSYU0RSXVX/tAhKprKekilJGVvrWmnr7OWd6uITkTRQ3dxOfq5ROnVC2KFIyJRMScp4ds9hzOAdp2nwuYikvpqmDhbMmEiuFmPPekqmJGU8u6eRs+dOZcYkTX4nIqmvqqmDRTPUxSdKpiRFdPVG2FzbyttOLQk7FBGREbk7NU3tLCrRnXyiZEpSxEs1LfRGnIuWzgg7FElTZnaVme0ys0ozu22Q818wsx1mttXMnjCzRXHnIma2OXisT27kko6a2nto74logWMBIC/sAEQAXtjXjBlcsEjJlJw4M8sF7gIuB+qATWa23t13xBV7GSh39w4z+xvgO8ANwblOd1+ZzJglvVU3tQMomRJALVOSIjbubaasdApTJ+SHHYqkp1VApbvvdfceYB2wOr6Auz/p7h3B7vPA/CTHKBmkOpgWQd18AkqmJAX09EV5qaaFVUvUKiUnbR5QG7dfFxwbys3A7+L2i8yswsyeN7PrBrvAzG4JylQ0NjaOOWBJb1VNHZjB/OmaFkHUzScpYGtdK919US5aosHnMv7M7GNAOfCuuMOL3L3ezJYCfzCzbe7+Wvx17r4WWAtQXl7uSQtYUlJNUztzp06gME8LsssoW6ZGGtgZlPlwMLhzu5n9IrFhSibbuK8ZgAsXTw85Eklj9cCCuP35wbE3MbPLgC8D17p7d/9xd68Pfu4FngLOG89gJf1VN3dovJS8bsRkKm5g59VAGXCjmZUNKLMMuB14u7ufBfxt4kOVTLVxXzPLZhdTUlwYdiiSvjYBy8xsiZkVAGuAN92VZ2bnAT8klkgdijs+3cwKg+2ZwNuB+IHrIsepblIyJW8YTcvUiAM7gb8G7nL3FoD4ikpkOH2RKC9WNWtKBBkTd+8DbgUeBXYCD7r7djO7w8yuDYr9C1AM/HLAFAhnAhVmtgV4ErhzwF2AIm9ytKuX5vYeDT6X141mzNRgAzsvGlBmOYCZ/QnIBb7u7v898InM7BbgFoCFCxeeTLySYbbvP0J7T4RVGi8lY+TuG4ANA459NW77siGuew44Z3yjk0xS29wJwILpapmSmETdzZcHLAPeDdwI/MjMpg0s5O5r3b3c3ctnzdJithKbXwrgIt3JJyJpYn9rLJmapzv5JDCaZGo0AzvrgPXu3uvu+4DdxJIrkWFt3NfM4pKJzJlSFHYoIiKjUt+fTE1TMiUxo0mmRhzYCfw/Yq1S/QM4lwN7ExemZKJo1NlU1awpEUQkrexv7aQgL4cSLcougRGTqVEO7HwUaDKzHcQGcP69uzeNV9CSGXYdPEpbZ68m6xSRtFLf2sncqUXk5FjYoUiKGNWknaMY2OnAF4KHyKhs3BvLt3Unn4ikk/2tncxVF5/E0XIyEpoXqpqZN20C83VHjIikkfrWTo2XkjdRMiWhcHde2NesLj4RSSs9fVEOHe1Wy5S8iZIpCcVrje0cPtajKRFEJK0cPNKFu+7kkzdTMiWh6J9fSi1TIpJO6lo0x5QcT8mUhGJTVTMziwtZMlPLMYhI+uifsFPdfBJPyZSEYlNVMxcuno6Zbi0WkfTRn0yVTtVEw/IGJVOSdA1tndS1dFK+WF18IpJe9rd1MrO4gKL83LBDkRSiZEqSrqKqBYALF08PORIRkRNT16JpEeR4SqYk6SqqmplYkEtZ6ZSwQxEROSGasFMGo2RKkm5TVQvnLZxGXq5+/UQkfbg7+1u7lEzJcfTXTJLqaFcvrx44QvkijZcSkfTS0tFLZ29E3XxyHCVTklQv17QSdbhQg88lwczsKjPbZWaVZnbbIOcLzeyB4PxGM1scd+724PguM7syqYFL2tC0CDIUJVOSVBVVzeTmGCsXTgs7FMkgZpYL3AVcDZQBN5pZ2YBiNwMt7n4a8F3g28G1ZcAa4CzgKuD7wfOJvEl9kEypZUoGUjIlSbWpqoWy0ikUF+aFHYpkllVApbvvdfceYB2wekCZ1cC9wfZDwKUWm+hsNbDO3bvdfR9QGTyfyJu80TKlOabkzZRMSdL0RqK8XNtCuaZEkMSbB9TG7dcFxwYt4+59QBtQMsprMbNbzKzCzCoaGxsTGLqki/qWToryc5gxqSDsUCTFKJmSpNm+/whdvVGNl5K05O5r3b3c3ctnzZoVdjgSgv1tsWkRtHKDDKRkSpKmoiq2uHH5IrVMScLVAwvi9ucHxwYtY2Z5wFSgaZTXilDf2qXxUjIoJVOSNJuqmllUMpHZUzTeQBJuE7DMzJaYWQGxAeXrB5RZD9wUbF8P/MHdPTi+JrjbbwmwDHghSXFLGqnX7OcyBI0ClqRwdyqqWnj36bPDDkUykLv3mdmtwKNALnC3u283szuACndfD/wEuM/MKoFmYgkXQbkHgR1AH/AZd4+E8kYkZXX1Rjh8rFvTIsiglExJUuw73E5Te48Gn8u4cfcNwIYBx74at90FfGiIa78FfGtcA5S0dqCtC9AcUzI4dfNJUlRUa3FjEUlfmhZBhqNkSpKioqqZ6RPzOXVWcdihiIicsLogmZo/bWLIkUgqUjIlSVFR1cIFi2bolmIRSUv7WzsxgzlTC8MORVKQkikZd4ePdbP3cLu6+EQkbe1v7WRWcSGFeVppSI6nZErG3aZ9wfxSmqxTRNJUfWsn86Zr8LkMTsmUjLuN+5qZkJ/LOfOmhh2KiMhJ2d/apTv5ZEhKpmTcbdzXzPmLplGQp183EUk/7h5rmVIyJUPQXzcZV20dvbx64AgXLSkJOxQRkZPS1N5DT1+UuVM1LYIMTsmUjKuK6mbcYdUSjZcSkfRU3xKbFmHedE2LIINTMiXjauO+Zgpyc1i5YFrYoYiInBRN2CkjUTIl42rjvmZWLphGUb5uJxaR9FQfJFMaMyVDGVUyZWZXmdkuM6s0s9uGKfcXZuZmVp64ECVdHevu45X6NnXxiUhaq2/tZFJBLlMn5IcdiqSoEZMpM8sF7gKuBsqAG82sbJByk4H/AWxMdJCSnl6qbiESdS5aqmRKRNLX/tZO5k6boBUcZEijaZlaBVS6+1537wHWAasHKfcN4NtAVwLjkzS2cV8TuTnG+Qs187mIpC/NMSUjGU0yNQ+ojduvC469zszOBxa4+yPDPZGZ3WJmFWZW0djYeMLBSnp5YV8zZ8+byqTCvLBDERE5aTXNHczX7OcyjDEPQDezHODfgC+OVNbd17p7ubuXz5o1a6wvLSmsqzfClto23qLxUiKSxlo7emjr7GVxyaSwQ5EUNppkqh5YELc/PzjWbzJwNvCUmVUBbwHWaxB6dnu5ppWeSFSDz2XcmdkMM3vMzPYEP4/rVzazlWb2ZzPbbmZbzeyGuHP3mNk+M9scPFYm9Q1ISqtu6gBgYYnmmJKhjSaZ2gQsM7MlZlYArAHW95909zZ3n+nui919MfA8cK27V4xLxJIWXtjXjJkWN5akuA14wt2XAU8E+wN1AJ9w97OAq4Dvmdm0uPN/7+4rg8fm8Q5Y0kd1cyyZUsuUDGfEZMrd+4BbgUeBncCD7r7dzO4ws2vHO0BJTxv3NXHmKVN0K7Ekw2rg3mD7XuC6gQXcfbe77wm29wOHAI01kBHVNLUDsHCGWqZkaKMaGezuG4ANA459dYiy7x57WJLOevqivFTTwpoLF4YdimSHOe7eEGwfAOYMV9jMVgEFwGtxh79lZl8laNly9+5BrrsFuAVg4UL9bmeLqqYOZk8uZEKBJh6WoWkGdEm4bfWtdPVGeYvml5IEMbPHzeyVQR5vmqbF3R3wYZ6nFLgP+Et3jwaHbwfOAC4EZgBfGuxa3UCTnWqaOtTFJyPSPeuScBv3NQNwocZLSYK4+2VDnTOzg2ZW6u4NQbJ0aIhyU4BHgC+7+/Nxz93fqtVtZv8F/F0CQ5c0V93czjuXKXmW4allShJu495mls0upqS4MOxQJDusB24Ktm8Cfj2wQHDzzMPAT939oQHnSoOfRmy81SvjGaykj86eCAePdLNYd/LJCJRMSUL1RqJUVDVrSgRJpjuBy81sD3BZsI+ZlZvZj4MyHwYuBj45yBQIPzezbcA2YCbwzaRGLymrprl/WgR188nw1M0nCfVSdQvtPRHeuWxm2KFIlnD3JuDSQY5XAJ8Ktn8G/GyI6y8Z1wAlbVUFd/It0p18MgK1TElCPbvnMLk5xltPVTIlIumtpklzTMnoKJmShHp2TyMrF0zT/FIikvaqm9uZOiGfqRNVn8nwlExJwjS397C1vo2LdeeLiGSA6qYODT6XUVEyJQnzp8rDuMM7l6uLT0TSX3VThwafy6gomZKEeXZPI1OK8jh33tSwQxERGZPeSJT61k4NPpdRUTIlCeHuPLP7MO9YNpO8XP1aiUh6q2/pJBJ1FqmbT0ZBf/UkISoPHePAkS7NFCwiGaE6mGNqkbr5ZBSUTElCPL27EUDzS4lIRqjun2NKLVMyCkqmJCGe3XOYpbMmMX+6Kh4RSX/VTR0U5ecwe7KWxZKRKZmSMevqjbBxX5OmRBCRjFHd1MGiGZOILdkoMjwlUzJmFVUtdPVGuVhTIohIhqhualcXn4yakikZs2f3NJKfa1y0pCTsUERExiwadWqaO5RMyagpmZIxe3p3Ixcsms6kQq2bLSLp79DRbrr7opqwU0ZNyZSMyaEjXbx64CgXL9d4KRHJDFXBnXxaSkZGS8mUjMkfKw8DaPC5iGSMmqZgjqkZapmS0VEyJWPy5K5GSiYVUFY6JexQJEuZ2Qwze8zM9gQ/pw9RLmJmm4PH+rjjS8xso5lVmtkDZlaQvOglFVU1tZOXY8ydVhR2KJImlEzJSevqjfDEzoNccdYccnJ0+7CE5jbgCXdfBjwR7A+m091XBo9r445/G/iuu58GtAA3j2+4kuqqmzuYP32ClsaSUdNvipy0p3Y10tET4ZpzSsMORbLbauDeYPte4LrRXmixSYQuAR46meslM9U0dWjwuZwQJVNy0jZsa2D6xHzeulRTIkio5rh7Q7B9AJgzRLkiM6sws+fN7LrgWAnQ6u59wX4dMG+wi83sluD6isbGxkTFLinG3alqatfgczkhupddTkp/F9/7V8xVU7iMOzN7HDhlkFNfjt9xdzczH+JpFrl7vZktBf5gZtuAttHG4O5rgbUA5eXlQ72GpLnWjl6OdvWxcIaSKRk9JVNyUp7e3Ui7uvgkSdz9sqHOmdlBMyt19wYzKwUODfEc9cHPvWb2FHAe8H+BaWaWF7ROzQfqE/4GJG1UN8fu5Fusbj45AWpSkJOyYVsD0ybm89ZT1cUnoVsP3BRs3wT8emABM5tuZoXB9kzg7cAOd3fgSeD64a6X7FEdzDGl2c/lRCiZkhMW6+I7xJVlp5CvLj4J353A5Wa2B7gs2MfMys3sx0GZM4EKM9tCLHm60913BOe+BHzBzCqJjaH6SVKjl5RSHcwxtUDdfHIC1M0nJ+yZ3Y0c6+7jmnPVxSfhc/cm4NJBjlcAnwq2nwPOGeL6vcCq8YxR0sfOhiMsnDGRovzcsEORNKJmBTlh/V18b1MXn4hkmC21raxcMC3sMCTNjCqZMrOrzGxXMEPwcRPimdkXzGyHmW01syfMbFHiQ5VU0NUb4fGdh7iibI66+EQkoxw60sX+ti5WKJmSEzTiX0MzywXuAq4GyoAbzaxsQLGXgXJ3P5fY5HffSXSgkhqe3XOYY919vPfcuWGHIiKSUJtrWwHUMiUnbDRNC6uASnff6+49wDpiMw6/zt2fdPeOYPd5YrcXSwZSF5+IZKotda3k5RhnzdVao3JiRpNMzQNq4/aHnCE4cDPwu8FOaAbh9NbdF+HxHQfVxSciGWlzbStnlE7W4HM5YQn9i2hmHwPKgX8Z7Ly7r3X3cncvnzVrViJfWpLgqV2NHO3u00SdIpJxolFna22buvjkpIxmaoR6YEHc/qAzBJvZZcSWdniXu3cnJjxJJff9uZrSqUW847SZYYciIpJQew+3c7S7jxXzp4UdiqSh0bRMbQKWmdkSMysA1hCbcfh1ZnYe8EPgWncfdCkHSW97Dh7lj5WH+dhbFmktPhHJOBp8LmMx4l/FYL2qW4FHgZ3Ag+6+3czuMLNrg2L/AhQDvzSzzWa2foinkzR175+rKMjL4cZVC8MORUQk4bbUtlJcmMeps4rDDkXS0KhmQHf3DcCGAce+Grc95CKkkv6OdPXyq5fquXbFXGZMKgg7HBGRhNtS18q586eSk2NhhyJpSP01MqJfVtTR0RPhk29bHHYoIiIJ19UbYWfDEU3WKSdNyZQMKxp17vtzFRcsms7Z86aGHY6ISMLtaDhCb8Q1XkpOmpIpGdbTuxupaurgJrVKiUiG2qLB5zJGSqZkWPc8V8XsyYVcffYpYYciIjIuNte2csqUIuZMKQo7FElTSqZkSHsbj/H07kY+etEizXguIhlrS22rWqVkTPQXUob00z9Xk59r3HjRgpELi4ikodaOHqqaOjT4XMZEyZQMqqW9h4derOO955Qye7KaviV1mdkMM3vMzPYEP6cPUuY9wRx4/Y8uM7suOHePme2LO7cy2e9BwtM/WeeKBbrBRk6ekikZ1Pce301HTx9/8+7Twg5FZCS3AU+4+zLgiWD/Tdz9SXdf6e4rgUuADuD3cUX+vv+8u29OQsySIrbUtmEG52oZGRkDJVNynN0Hj/KzjTV89KJFnH7K5LDDERnJauDeYPte4LoRyl8P/M7dO8YzKEkPW+paWTa7mOLCUc1hLTIoJVPyJu7ON367g4kFuXz+8uVhhyMyGnPcvSHYPgDMGaH8GuD+Ace+ZWZbzey7ZlY42EVmdouZVZhZRWNj4xhDllTg7myubdXixjJmSqbkTZ7a1cizew7zPy5dpqVjJGWY2eNm9sogj9Xx5dzdAR/meUqBc4itNdrvduAM4EJgBvClwa5197XuXu7u5bNmzRrrW5IUUNfSSXN7jwafy5ipXVNe1xuJ8o1HdrB05iQ+8dbFYYcj8rrh1v80s4NmVuruDUGydGiYp/ow8LC798Y9d3+rVreZ/RfwdwkJWlLe73ccBOAtS2eEHImkO7VMyet+9nw1exvb+fJ7z6QgT78akjbWAzcF2zcBvx6m7I0M6OILEjDMzIiNt3ol8SFKqnF3HthUw8oF0zhttsaGytjoL6YAsakQvvf4Ht65bCaXnDE77HBETsSdwOVmtge4LNjHzMrN7Mf9hcxsMbAAeHrA9T83s23ANmAm8M1kBC3heqmmld0Hj7HmQs2jJ2Onbj4B4DuP7uJoVy9feV8ZsS/oIunB3ZuASwc5XgF8Km6/Cpg3SLlLxjM+SU0PbKphUkEu718xN+xQJAOoZUp4+OU67n+hhk+9cynL56i5W0Qy29GuXn6zpYH3r5jLJE2JIAmgZCrLbd/fxu2/2sZFS2bw91eeHnY4IiLj7jdbGujsjbBm1cKwQ5EMoWQqi7W09/Dp+15k+sQC7vro+VrMWESywrpNNZxxymRWzNcSMpIY+uuZpSJR53PrXubQkW5+8LELmFk86DyFIiIZZfv+NrbWtbHmwgUaHyoJo87iLPWvv9/Fs3sO8+2/OIeVmrBORLLEA5tqKcjL4brzjrsXQeSkqWUqCz2wqYYfPPUaH7loITdcqDEDIpIdunojPPxyPdecfQrTJmqFB0kctUxlEXfnP56o5LuP7+ady2bytfeXhR2SiEjSbNjWwNGuPn2JlIRTMpUl+iJRvvLrV7j/hVo+eP487vzguZrlXESyhrtz/ws1LC6ZqOVjJOGUTGWB9u4+bv3FSzy5q5Fb33MaX7xiuQZeikhWuftPVWyqauHr79fExJJ4SqYyXOWhY3z+gc1s39/Gtz5wNh+9aFHYIYmIJNWfKg/zzxt2ckXZHC3iLuNCyVSG6uyJ8H+e3MPaZ/ZSlJ/L2o+Xc1nZnLDDEhFJqtrmDm79xUssnTmJf7thJTk5apWSxFMylYGe2HmQr63fTl1LJx88bx63X3MmsyZrHikRyS4dPX3cct+LRKLOjz5RTrGWjpFxot+sDBGNOs9WHubuP+7j6d2NnDa7mHW3vIW3LC0JOzQRkaRzd/7hoa3sOnCEuz95IYtnTgo7JMlgSqbSXGtHDw+9WMfPnq+mqqmDmcUF3Hb1GfzV25fobj0RyUrt3X38y6O7+O3WBm67+gzeffrssEOSDKdkKg01tHXy9K5Gnt7dyJO7DtHVG6V80XQ+f/lyrj67VEmUiGSlaNT5f5vr+fZ/v8rBI918/C2L+PTFS8MOS7KAkqkUF406VU3t7Gg4wpbaVp7ZfZhdB48CUDq1iOsvmM9HVi2ibO6UkCMVEQnPSzUt/M/f7GBLbSsr5k/l+x+9gAsWTQ87LMkSo0qmzOwq4N+BXODH7n7ngPOFwE+BC4Am4AZ3r0psqJnL3Wlu76G2pZO6lg5qmzupbelg14Gj7Gw4QkdPBICC3BwuXDKd6y84k3edPotls4s1X4pkPTP7EPB14ExglbtXDFFu0HrMzJYA64AS4EXg4+7ek4TQZQzaOnvZuLeJ515r4rnXDrP74DFmTy7kf39oBR84b57u2pOkGjGZMrNc4C7gcqAO2GRm6919R1yxm4EWdz/NzNYA3wZuGI+Aw+TuRKJOX/CIRJzuSITeiNPTF6U3EqW7N0pnbyT26InQ1RvhWHcfR7v6ONbdy9GuPo509tLU3kPTsR6a2rtpbu+hN+Jveq1pE/NZPnsyHy5fQNncKZSVTmHZnGIK83JDevciKesV4IPAD4cqMEI99m3gu+6+zsz+k1h99oPxD1sGikSd7r5YndneHaE9qDub2rvZ39rJ/tYu9rd2UtvSya4DR4g6FOXncOHiGXy4fAE3rlrIJN2xJyEYzW/dKqDS3fcCmNk6YDUQn0ytJvbNEOAh4P+Ymbn7mzOEk1Db3MFf3rMJiCUz/QZ9Yn/juLvjQP8ljuP+xn7Ug/3geNRj10TdiXqsey3qTsSdaBQiQSI1Frk5RnFhHpOL8igpLqR0ahFnzZ1CSXEhsycXsmDGRBbMmMC8aROYXJQ/ptcSyRbuvhMYqZV20HrMzHYClwAfCcrdS6wuS1gy9dn7X+bVhiOJerqEOdnabLBq3eM2BquD++tbiCVMEfc3vpxGnJ5I7MvoSFXspIJc5k6bwNxpE7iibBlvO7WElQun6UumhG40ydQ8oDZuvw64aKgy7t5nZm3EmswPxxcys1uAWwAWLhzdQpOFeTmcPmdy3JMMuhn/Gq8fN4uV6a9kLfhPTlAmdt4wg5wcIyfuXE6OkWsWHDdycyA3J4f8HCM318jLMXJzcijIy6EwN4f8PKMgN5fCvBwmFORSlJ9DUX4uRfm5TC7Mo7gojwn5ueqWEwnHUPVYCdDq7n1xx+cN9gQnU38BLJg+gUg0ehIhjz8btBYd1YVDHhpYB/fXqf11b47FvljGto381+vPHPJzY3XqpMI8igtzmVSQR3FhHjOKCyidOoEpRXmqQyUlJbU91N3XAmsBysvLR/XFaPaUIu766PnjGpeIpDYzexw4ZZBTX3b3XycjhpOpvwD+4aozxi0mEUkNo0mm6oEFcfvzg2ODlakzszxgKrGB6CIiY+bul43xKYaqx5qAaWaWF7RODVa/iYgMazQTEm0ClpnZEjMrANYA6weUWQ/cFGxfD/whEeOlREQSZNB6LKinniRWb0GsHktKS5eIZI4Rk6ng29qtwKPATuBBd99uZneY2bVBsZ8AJWZWCXwBuG28AhYRiWdmHzCzOuCtwCNm9mhwfK6ZbYCh67HgKb4EfCGov0qI1WciIqNmYTUglZeXe0XFoNPBiEiGMrMX3b087DjGSvWXSPYZrv7SuiMiIiIiY6BkSkRERGQMlEyJiIiIjIGSKREREZExCG0Aupk1AtUncMlMBsyonoGy4T2C3mcmOdH3uMjdZ41XMMmi+mtI2fA+s+E9gt7nYIasv0JLpk6UmVVkwl1Aw8mG9wh6n5kkG95jImTL/6dseJ/Z8B5B7/NEqZtPREREZAyUTImIiIiMQTolU2vDDiAJsuE9gt5nJsmG95gI2fL/KRveZza8R9D7PCFpM2ZKREREJBWlU8uUiIiISMpRMiUiIiIyBimdTJnZh8xsu5lFzax8wLnbzazSzHaZ2ZVhxZhoZvZ1M6s3s83B45qwY0oUM7sq+Lwqzey2sOMZL2ZWZWbbgs8vY1bDNbO7zeyQmb0Sd2yGmT1mZnuCn9PDjDHVZFsdlsn1F6gOS2fjXX+ldDIFvAJ8EHgm/qCZlQFrgLOAq4Dvm1lu8sMbN99195XBY0PYwSRC8PncBVwNlAE3Bp9jpnpP8Pll0jwt9xD79xbvNuAJd18GPBHsyxuysQ7LuPoLVIdlgHsYx/orpZMpd9/p7rsGObUaWOfu3e6+D6gEViU3OjlBq4BKd9/r7j3AOmKfo6QJd38GaB5weDVwb7B9L3BdMmNKdarDMorqsDQ23vVXSidTw5gH1Mbt1wXHMsWtZrY1aJbMlG6TTP/M4jnwezN70cxuCTuYcTbH3RuC7QPAnDCDSSOZ/O8hE+svyOzPbKBsqcMSVn/lJSaek2dmjwOnDHLqy+7+62THkwzDvWfgB8A3iP0yfwP438BfJS86SYB3uHu9mc0GHjOzV4NvRRnN3d3Msm6ulWyrw1R/ZYWsq8PGWn+Fnky5+2UncVk9sCBuf35wLC2M9j2b2Y+A345zOMmS1p/ZiXD3+uDnITN7mFj3QKZWRAfNrNTdG8ysFDgUdkDJlm11WJbWX5DGn9mJyqI6LGH1V7p2860H1phZoZktAZYBL4QcU0IEH2i/DxAbwJoJNgHLzGyJmRUQG3y7PuSYEs7MJpnZ5P5t4Aoy5zMczHrgpmD7JiDjWmLGSUbWYRlcf4HqsEyUsPor9Jap4ZjZB4D/D5gFPGJmm939SnffbmYPAjuAPuAz7h4JM9YE+o6ZrSTWTF4FfDrUaBLE3fvM7FbgUSAXuNvdt4cc1niYAzxsZhD79/ULd//vcENKDDO7H3g3MNPM6oCvAXcCD5rZzUA18OHwIkw9WViHZWT9BarDwg1p7Ma7/tJyMiIiIiJjkK7dfCIiIiIpQcmUiIiIyBgomRIREREZAyVTIiIiImOgZEpERERkDJRMiYiIiIyBkikRERGRMfj/AbZzy5peSV7cAAAAAElFTkSuQmCC\n",
109 |       "text/plain": [
110 |        "<Figure size 720x288 with 2 Axes>"
111 |       ]
112 |      },
113 |      "metadata": {
114 |       "needs_background": "light"
115 |      },
116 |      "output_type": "display_data"
117 |     }
118 |    ],
119 |    "source": [
120 |     "plt.figure(figsize=[10, 4])\n",
121 |     "x = np.linspace(-10, 10)\n",
122 |     "\n",
123 |     "plt.subplot(1, 2, 1)\n",
124 |     "plt.plot(x, sigmoid(x))\n",
125 |     "plt.title('sigmoid')\n",
126 |     "\n",
127 |     "plt.subplot(1, 2, 2)\n",
128 |     "plt.plot(x, tanh(x))\n",
129 |     "plt.title('tanh')"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "Notice that the only difference of these functions is the scale of y"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "# Formula of Batch Training\n",
144 |     "---\n",
145 |     "The above shows the formula of a single input vector, however in actual training processes, a batch is trained instead of 1 at a time. The change applied in the formula is trivial, we just need to replace the single vector $x$ with a matrix $X$ with size $n \\times m$, where $n$ is number of features and $m$ is the the batch size -- samples are stacked column wise, and the following result matrix are applied likewise.\n",
146 |     "\n",
147 |     "$$ Z^{[1]} = W^{[1]}X + b^{[1]} \\tag5 $$\n",
148 |     "\n",
149 |     "$$ A^{[1]} = \\tanh{Z^{[1]}} \\tag6 $$\n",
150 |     "\n",
151 |     "$$ Z^{[2]} = W^{[2]}A^{[1]} + b^{[2]} \\tag7 $$\n",
152 |     "\n",
153 |     "$$ \\hat{Y} = A^{[2]} = \\sigma({Z^{[2]}}) \\tag8 $$\n",
154 |     "\n",
155 |     "$$ J(W^{[1]}, b^{[1]}, W^{[2]}, b^{[2]}) = \\frac{1}{m} \\sum_{i}^{m}L(y^{(i)}, \\hat{y}^{(i)}) \\tag9 $$\n",
156 |     "\n",
157 |     "For the dimension of each matrix taken in this example, we have:\n",
158 |     "\n",
159 |     "- $X$ has dimension $2 \\times m$, as here there are 2 features and $m$ is the batch size\n",
160 |     "- $W^{[1]}$ in the case above would have dimension $4 \\times 2$, with each $ith$ row is the weight of node $i$\n",
161 |     "- $b^{[1]}$ has dimension $4 \\times 1$\n",
162 |     "- $Z^{[1]}$ and $A^{[1]}$ both have dimension $4 \\times m$\n",
163 |     "- $W^{[2]}$ has dimension $1 \\times 4$\n",
164 |     "- consequently, $Z^{[2]}$ and $A^{[2]}$ would have dimension $1 \\times m$\n",
165 |     "\n",
166 |     "Same as logistic regression, for batch training, the average loss for all training samples.\n",
167 |     "\n",
168 |     "This is all for the forward propagation. To activate our neurons to learn, we need to get derivative of weight parameters and update them use gradient descent.\n",
169 |     "\n",
170 |     "But now it is enough for us to implement the forward propagation first."
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "# Generate Sample Dataset\n",
178 |     "---\n",
179 |     "Here we generate a simple binary classification task with 5000 data points and 20 features for later model validation."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 4,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "train shape (4000, 20)\n",
192 |       "test shape (1000, 20)\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "from sklearn import datasets\n",
198 |     "\n",
199 |     "\n",
200 |     "X, y = datasets.make_classification(n_samples=5000, random_state=123)\n",
201 |     "\n",
202 |     "X_train, X_test = X[:4000], X[4000:]\n",
203 |     "y_train, y_test = y[:4000], y[4000:]\n",
204 |     "\n",
205 |     "print('train shape', X_train.shape)\n",
206 |     "print('test shape', X_test.shape)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "# Weights Initialization\n",
214 |     "---\n",
215 |     "Our neural network has 1 hidden layer and 2 layers in total(hidden layer + output layer), so there are 4 weight matrices to initialize ($W^{[1]}, b^{[1]}$ and $W^{[2]}, b^{[2]}$). Notice that the weights are initialized relatively small so that the gradients would be higher thus learning faster in the beginning phase."
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 5,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "def init_weights(n_input, n_hidden, n_output):\n",
225 |     "    params = {}\n",
226 |     "    params['W1'] = np.random.randn(n_hidden, n_input) * 0.01\n",
227 |     "    params['b1'] = np.zeros((n_hidden, 1))\n",
228 |     "    params['W2'] = np.random.randn(n_output, n_hidden) * 0.01\n",
229 |     "    params['b2'] = np.zeros((n_output, 1))\n",
230 |     "    \n",
231 |     "    return params"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 6,
237 |    "metadata": {
238 |     "scrolled": true
239 |    },
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "W1 shape (10, 20)\n",
246 |       "b1 shape (10, 1)\n",
247 |       "W2 shape (1, 10)\n",
248 |       "b2 shape (1, 1)\n"
249 |      ]
250 |     }
251 |    ],
252 |    "source": [
253 |     "params = init_weights(20, 10, 1)\n",
254 |     "\n",
255 |     "print('W1 shape', params['W1'].shape)\n",
256 |     "print('b1 shape', params['b1'].shape)\n",
257 |     "print('W2 shape', params['W2'].shape)\n",
258 |     "print('b2 shape', params['b2'].shape)"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "# Forward Propagation\n",
266 |     "---\n",
267 |     "Let's implement the forward process following equations $(5) \\sim (8)$."
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 7,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "def forward(X, params):\n",
277 |     "    \"\"\"\n",
278 |     "    X: need to have shape (n_features x m_samples)\n",
279 |     "    \"\"\"\n",
280 |     "    W1, b1, W2, b2 = params['W1'], params['b1'], params['W2'], params['b2']\n",
281 |     "    A0 = X\n",
282 |     "    \n",
283 |     "    cache = {}\n",
284 |     "    Z1 = np.dot(W1, A0) + b1\n",
285 |     "    A1 = tanh(Z1)\n",
286 |     "    Z2 = np.dot(W2, A1) + b2\n",
287 |     "    A2 = sigmoid(Z2)\n",
288 |     "    \n",
289 |     "    cache['Z1'] = Z1\n",
290 |     "    cache['A1'] = A1\n",
291 |     "    cache['Z2'] = Z2\n",
292 |     "    cache['A2'] = A2\n",
293 |     "    return  cache"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 8,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "name": "stdout",
303 |      "output_type": "stream",
304 |      "text": [
305 |       "Z1 shape (10, 100)\n",
306 |       "A1 shape (10, 100)\n",
307 |       "Z2 shape (1, 100)\n",
308 |       "A2 shape (1, 100)\n"
309 |      ]
310 |     }
311 |    ],
312 |    "source": [
313 |     "# get 100 samples\n",
314 |     "inp = X[:100].T\n",
315 |     "\n",
316 |     "cache = forward(inp, params)\n",
317 |     "\n",
318 |     "print('Z1 shape', cache['Z1'].shape)\n",
319 |     "print('A1 shape', cache['A1'].shape)\n",
320 |     "print('Z2 shape', cache['Z2'].shape)\n",
321 |     "print('A2 shape', cache['A2'].shape)"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "# Loss Function\n",
329 |     "---\n",
330 |     "Following equation $(9)$, let's calculate the loss of each batch."
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 9,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": [
339 |     "def loss(Y, Y_hat):\n",
340 |     "    \"\"\"\n",
341 |     "    Y: vector of true value\n",
342 |     "    Y_hat: vector of predicted value\n",
343 |     "    \"\"\"\n",
344 |     "    assert Y.shape[0] == 1\n",
345 |     "    assert Y.shape == Y_hat.shape\n",
346 |     "    m = Y.shape[1]\n",
347 |     "    s = Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)\n",
348 |     "    loss = -np.sum(s) / m\n",
349 |     "    return loss"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": 10,
355 |    "metadata": {},
356 |    "outputs": [
357 |     {
358 |      "name": "stdout",
359 |      "output_type": "stream",
360 |      "text": [
361 |       "loss 1.4237153959578408\n"
362 |      ]
363 |     }
364 |    ],
365 |    "source": [
366 |     "Y = np.array([np.random.choice([0, 1]) for i in range(10)]).reshape(1, -1)\n",
367 |     "Y_hat = np.random.uniform(0, 1, 10).reshape(1, -1)\n",
368 |     "\n",
369 |     "l = loss(Y, Y_hat)\n",
370 |     "print(f'loss {l}')"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "# Back Propagation\n",
378 |     "---\n",
379 |     "Now it comes to the back propagation which is the key to our weights update. Given the loss function $L$ we defined above, we have gradients as follows:\n",
380 |     "\n",
381 |     "$$ dZ^{[2]} = A^{[2]} - Y \\tag1 $$\n",
382 |     "\n",
383 |     "$$ dW^{[2]} = \\frac{1}{m}dZ^{[2]}A^{[1]^T} \\tag2 $$\n",
384 |     "\n",
385 |     "$$ db^{[2]} = \\frac{1}{m}np.sum(dZ^{[2]}, axis=1, keepdims=True) \\tag3 $$\n",
386 |     "\n",
387 |     "$$ dZ^{[1]} = W^{[2]T}dZ^{[2]} * (1 - Z^{[1]^2}) \\tag4 $$\n",
388 |     "\n",
389 |     "$$ dW^{[1]} = \\frac{1}{m}dZ^{[1]}X^{T} \\tag5 $$\n",
390 |     "\n",
391 |     "$$ db^{[1]} = \\frac{1}{m}np.sum(dZ^{[1]}, axis=1, keepdims=True) \\tag6 $$"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "In equation $(4)$ is element-wise multiplication, and the gradient of $\\tanh{x}$ is $1 - x^2$. You can try to deduct the equation above by yourself, but I basically took it from internet.\n",
399 |     "\n",
400 |     "Let's break down the shape of each element, given number of each layer equals `(n_x, n_h, n_y)` and batch size equals `m`:\n",
401 |     "\n",
402 |     "- $A^{[2]}$, $Y$ and $dZ^{[2]}$ has shape `(n_y, m)`\n",
403 |     "- Because $A^{[1]}$ has shape `(n_h, m)`, $dW^{[2]}$ would have shape `(n_y, n_h)`\n",
404 |     "- $db^{[2]}$ has shape `(n_y, 1)`\n",
405 |     "\n",
406 |     "- Because $dZ^{[2]}$ has shape `(n_y, m)`, $W^{[2]}$ has shape`(n_y, n_h)`, $dZ^{[1]}$ would have shape `(n_h, m)`\n",
407 |     "- In equation $(5)$, $X$ has shape `(n_x, m)`, so $dW^{[1]}$ has shape `(n_h, n_x)`\n",
408 |     "- $db^{[1]}$ has shape `(n_h, 1)`\n",
409 |     "\n",
410 |     "\n",
411 |     "Once we understand the formula, implementation should come with ease."
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 11,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "def backward(params, cache, X, Y):\n",
421 |     "    \"\"\"\n",
422 |     "    [From coursera deep-learning course]\n",
423 |     "    params: we initiate above with W1, b1, W2, b2\n",
424 |     "    cache: the intermediate caculation we saved with Z1, A1, Z2, A2\n",
425 |     "    X: shape of (n_x, m)\n",
426 |     "    Y: shape (n_y, m)\n",
427 |     "    \"\"\"\n",
428 |     "    \n",
429 |     "    m = X.shape[1]\n",
430 |     "\n",
431 |     "    W1 = params['W1']\n",
432 |     "    W2 = params['W2']\n",
433 |     "    A1 = cache['A1']\n",
434 |     "    A2 = cache['A2']\n",
435 |     "\n",
436 |     "    dZ2 = A2 - Y\n",
437 |     "    dW2 = (1 / m) * np.dot(dZ2, A1.T)\n",
438 |     "    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)\n",
439 |     "    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))\n",
440 |     "    dW1 = (1 / m) * np.dot(dZ1, X.T)\n",
441 |     "    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)\n",
442 |     "\n",
443 |     "    grads = {\"dW1\": dW1,\n",
444 |     "             \"db1\": db1,\n",
445 |     "             \"dW2\": dW2,\n",
446 |     "             \"db2\": db2}\n",
447 |     "\n",
448 |     "    return grads"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "markdown",
453 |    "metadata": {},
454 |    "source": [
455 |     "# Batch Loader\n",
456 |     "---\n",
457 |     "Now let's ensemble everything into a class."
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "code",
462 |    "execution_count": 12,
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": [
466 |     "class ShallowNN:\n",
467 |     "    def __init__(self, n_input, n_hidden, n_output):\n",
468 |     "        self.n_input = n_input\n",
469 |     "        self.n_hidden = n_hidden\n",
470 |     "        self.n_output = n_output\n",
471 |     "        self.params = {}\n",
472 |     "        self.cache = {}\n",
473 |     "        self.grads = {}\n",
474 |     "        \n",
475 |     "    def compute_loss(self, Y, Y_hat):\n",
476 |     "        \"\"\"\n",
477 |     "        Y: vector of true value\n",
478 |     "        Y_hat: vector of predicted value\n",
479 |     "        \"\"\"\n",
480 |     "        assert Y.shape[0] == 1\n",
481 |     "        assert Y.shape == Y_hat.shape\n",
482 |     "        m = Y.shape[1]\n",
483 |     "        s = Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)\n",
484 |     "        loss = -np.sum(s) / m\n",
485 |     "        return loss\n",
486 |     "    \n",
487 |     "    \n",
488 |     "    def init_weights(self):\n",
489 |     "        self.params['W1'] = np.random.randn(self.n_hidden, self.n_input) * 0.01\n",
490 |     "        self.params['b1'] = np.zeros((self.n_hidden, 1))\n",
491 |     "        self.params['W2'] = np.random.randn(self.n_output, self.n_hidden) * 0.01\n",
492 |     "        self.params['b2'] = np.zeros((self.n_output, 1))\n",
493 |     "    \n",
494 |     "    \n",
495 |     "    def forward(self, X):\n",
496 |     "        \"\"\"\n",
497 |     "        X: need to have shape (n_features x m_samples)\n",
498 |     "        \"\"\"\n",
499 |     "        W1, b1, W2, b2 = self.params['W1'], self.params['b1'], self.params['W2'], self.params['b2']\n",
500 |     "        A0 = X\n",
501 |     "\n",
502 |     "        Z1 = np.dot(W1, A0) + b1\n",
503 |     "        A1 = tanh(Z1)\n",
504 |     "        Z2 = np.dot(W2, A1) + b2\n",
505 |     "        A2 = sigmoid(Z2)\n",
506 |     "\n",
507 |     "        self.cache['Z1'] = Z1\n",
508 |     "        self.cache['A1'] = A1\n",
509 |     "        self.cache['Z2'] = Z2\n",
510 |     "        self.cache['A2'] = A2\n",
511 |     "     \n",
512 |     "    \n",
513 |     "    def backward(self, X, Y):\n",
514 |     "        \"\"\"\n",
515 |     "        [From coursera deep-learning course]\n",
516 |     "        params: we initiate above with W1, b1, W2, b2\n",
517 |     "        cache: the intermediate caculation we saved with Z1, A1, Z2, A2\n",
518 |     "        X: shape of (n_x, m)\n",
519 |     "        Y: shape (n_y, m)\n",
520 |     "        \"\"\"\n",
521 |     "\n",
522 |     "        m = X.shape[1]\n",
523 |     "\n",
524 |     "        W1 = self.params['W1']\n",
525 |     "        W2 = self.params['W2']\n",
526 |     "        A1 = self.cache['A1']\n",
527 |     "        A2 = self.cache['A2']\n",
528 |     "\n",
529 |     "        dZ2 = A2 - Y\n",
530 |     "        dW2 = (1 / m) * np.dot(dZ2, A1.T)\n",
531 |     "        db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)\n",
532 |     "        dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))\n",
533 |     "        dW1 = (1 / m) * np.dot(dZ1, X.T)\n",
534 |     "        db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)\n",
535 |     "\n",
536 |     "        self.grads = {\"dW1\": dW1,\n",
537 |     "                      \"db1\": db1,\n",
538 |     "                      \"dW2\": dW2,\n",
539 |     "                      \"db2\": db2}\n",
540 |     "\n",
541 |     "        \n",
542 |     "    def get_batch_indices(self, X_train, batch_size):\n",
543 |     "        n = X_train.shape[0]\n",
544 |     "        indices = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
545 |     "        return indices\n",
546 |     "    \n",
547 |     "    \n",
548 |     "    def update_weights(self, lr):\n",
549 |     "        W1, b1, W2, b2 = self.params['W1'], self.params['b1'], self.params['W2'], self.params['b2']\n",
550 |     "        dW1, db1, dW2, db2 = self.grads['dW1'], self.grads['db1'], self.grads['dW2'], self.grads['db2']\n",
551 |     "        self.params['W1'] -= dW1\n",
552 |     "        self.params['W2'] -= dW2\n",
553 |     "        self.params['b1'] -= db1\n",
554 |     "        self.params['b2'] -= db2\n",
555 |     "    \n",
556 |     "    \n",
557 |     "    def fit(self, X_train, y_train, batch_size=32, n_iterations=100, lr=0.01):\n",
558 |     "        self.init_weights()\n",
559 |     "        \n",
560 |     "        indices = self.get_batch_indices(X_train, batch_size)\n",
561 |     "        for i in range(n_iterations):\n",
562 |     "            for ind in indices:\n",
563 |     "                X = X_train[ind, :].T\n",
564 |     "                Y = y_train[ind].reshape(1, batch_size)\n",
565 |     "                \n",
566 |     "                self.forward(X)\n",
567 |     "                self.backward(X, Y)\n",
568 |     "                self.update_weights(lr)\n",
569 |     "            \n",
570 |     "            if i % 10 == 0:\n",
571 |     "                Y_hat = self.cache['A2']\n",
572 |     "                loss = self.compute_loss(Y, Y_hat)\n",
573 |     "                print(f'iteration {i}: loss {loss}')\n",
574 |     "            \n",
575 |     "            \n",
576 |     "    def predict(self, X):\n",
577 |     "        W1, b1, W2, b2 = self.params['W1'], self.params['b1'], self.params['W2'], self.params['b2']\n",
578 |     "        A0 = X\n",
579 |     "\n",
580 |     "        Z1 = np.dot(W1, A0) + b1\n",
581 |     "        A1 = tanh(Z1)\n",
582 |     "        Z2 = np.dot(W2, A1) + b2\n",
583 |     "        A2 = sigmoid(Z2)\n",
584 |     "\n",
585 |     "        return A2\n",
586 |     "\n",
587 |     "    \n",
588 |     "def accuracy(Y, Y_pred):\n",
589 |     "    \"\"\"\n",
590 |     "    Y: vector of true value\n",
591 |     "    Y_pred: vector of predicted value\n",
592 |     "    \"\"\"\n",
593 |     "    def _to_binary(x):\n",
594 |     "        return 1 if x > .5 else 0\n",
595 |     "\n",
596 |     "    assert Y.shape[0] == 1\n",
597 |     "    assert Y.shape == Y_pred.shape\n",
598 |     "    Y_pred = np.vectorize(_to_binary)(Y_pred)\n",
599 |     "    acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
600 |     "    return acc"
601 |    ]
602 |   },
603 |   {
604 |    "cell_type": "code",
605 |    "execution_count": 13,
606 |    "metadata": {},
607 |    "outputs": [],
608 |    "source": [
609 |     "model = ShallowNN(20, 10, 1)"
610 |    ]
611 |   },
612 |   {
613 |    "cell_type": "code",
614 |    "execution_count": 14,
615 |    "metadata": {},
616 |    "outputs": [
617 |     {
618 |      "name": "stdout",
619 |      "output_type": "stream",
620 |      "text": [
621 |       "iteration 0: loss 0.19575153437513237\n",
622 |       "iteration 10: loss 0.08698150022188056\n",
623 |       "iteration 20: loss 0.07983220808062544\n",
624 |       "iteration 30: loss 0.07437750278137427\n",
625 |       "iteration 40: loss 0.06677985931984107\n",
626 |       "iteration 50: loss 0.05925402693910988\n",
627 |       "iteration 60: loss 0.054844001823287386\n",
628 |       "iteration 70: loss 0.0523565446212034\n",
629 |       "iteration 80: loss 0.051366335822876\n",
630 |       "iteration 90: loss 0.050606347795966344\n",
631 |       "iteration 100: loss 0.04997955343968667\n",
632 |       "iteration 110: loss 0.04945583968865451\n",
633 |       "iteration 120: loss 0.04878268474552334\n",
634 |       "iteration 130: loss 0.04795982774874325\n",
635 |       "iteration 140: loss 0.047676519502507106\n",
636 |       "iteration 150: loss 0.04813515615412707\n",
637 |       "iteration 160: loss 0.04845653653570918\n",
638 |       "iteration 170: loss 0.04849591579686214\n",
639 |       "iteration 180: loss 0.04844944650406025\n",
640 |       "iteration 190: loss 0.048350205002389776\n"
641 |      ]
642 |     }
643 |    ],
644 |    "source": [
645 |     "model.fit(X_train, y_train, batch_size=100, n_iterations=200, lr=0.01)"
646 |    ]
647 |   },
648 |   {
649 |    "cell_type": "code",
650 |    "execution_count": 15,
651 |    "metadata": {},
652 |    "outputs": [
653 |     {
654 |      "name": "stdout",
655 |      "output_type": "stream",
656 |      "text": [
657 |       "accuracy: 95.1%\n"
658 |      ]
659 |     }
660 |    ],
661 |    "source": [
662 |     "y_preds = model.predict(X_test.T)\n",
663 |     "\n",
664 |     "acc = accuracy(y_test.reshape(1, -1), y_preds)\n",
665 |     "print(f'accuracy: {acc*100}%')"
666 |    ]
667 |   },
668 |   {
669 |    "cell_type": "code",
670 |    "execution_count": null,
671 |    "metadata": {},
672 |    "outputs": [],
673 |    "source": []
674 |   }
675 |  ],
676 |  "metadata": {
677 |   "kernelspec": {
678 |    "display_name": "Python 3",
679 |    "language": "python",
680 |    "name": "python3"
681 |   },
682 |   "language_info": {
683 |    "codemirror_mode": {
684 |     "name": "ipython",
685 |     "version": 3
686 |    },
687 |    "file_extension": ".py",
688 |    "mimetype": "text/x-python",
689 |    "name": "python",
690 |    "nbconvert_exporter": "python",
691 |    "pygments_lexer": "ipython3",
692 |    "version": "3.8.3"
693 |   }
694 |  },
695 |  "nbformat": 4,
696 |  "nbformat_minor": 4
697 | }
698 | 


--------------------------------------------------------------------------------
/tensorflow/tf-hands-on.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%matplotlib inline\n",
 10 |     "\n",
 11 |     "import tensorflow as tf\n",
 12 |     "import numpy as np\n",
 13 |     "import matplotlib.pyplot as plt"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "training size (60000, 28, 28)\n",
 26 |       "test size (10000, 28, 28)\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "minist = tf.keras.datasets.mnist\n",
 32 |     "\n",
 33 |     "(x_train, y_train), (x_test, y_test) = minist.load_data()\n",
 34 |     "x_train, x_test = x_train/255., x_test/255.\n",
 35 |     "\n",
 36 |     "print('training size', x_train.shape)\n",
 37 |     "print('test size', x_test.shape)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "data": {
 47 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAIKCAYAAAB7ptYOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA8i0lEQVR4nO3deZhcZZn38d+dprMQtiwQQghrEjCgJtIsAgoIOsirBASBjDiRYYwo+4CCODM6o86LI4KigAYJCYqgIyAZRRAwgspiAoYlCSQBEkkICTthSeh03+8fKedtuU+nq+up5Zzq7+e6+urqX5+q85zquzt3Tj31HHN3AQAApOjX6AEAAIDio6EAAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMmSGgozO9zMHjezJWZ2frUGBdQTdYyio4aRB1bpOhRm1iJpkaQPSlouaY6kye6+oHrDA2qLOkbRUcPIi00S7ruPpCXu/qQkmdn1kiZJ6raI+9sAH6jBCbtEX7ZGLz3v7ltX+WF7VcfUMFJQwyi6jdVwSkMxStLTXb5eLmnfjd1hoAZrXzs0YZfoy+7wny+rwcP2qo6pYaSghlF0G6vhlIaiLGY2VdJUSRqoTWu9O6DqqGEUHTWMekiZlLlC0uguX29fyv6Gu09z9zZ3b2vVgITdATXRYx1Tw8g5ahi5kNJQzJE01sx2NrP+kk6QNKs6wwLqhjpG0VHDyIWKX/Jw9/Vmdpqk2yS1SJru7vOrNjKgDqhjFB01jLxImkPh7rdIuqVKYwEagjpG0VHDyANWygQAAMloKAAAQDIaCgAAkIyGAgAAJKOhAAAAyWgoAABAMhoKAACQjIYCAAAko6EAAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMmSrjaK2lv/gb1CtvJz60L20HtnZt7/3fdOCdl2l/UPWcvsBysYHQAAG3CGAgAAJKOhAAAAyWgoAABAsqQ5FGa2VNIaSR2S1rt7WzUGBdQTdYyio4aRB9WYlHmIuz9fhcfp8zoPmhiyS6d/L2RjWuOPrbObx/zze68O2eNtHSH7/E779TzA5kYdF9zrx+4bsm/81xUh++px/xAyn/toTcZUZ9RwTj3xzfeGbOHfx7/trdYSsvd/bmrIBv3iT9UZWJXxkgcAAEiW2lC4pN+Y2QNmFtsooBioYxQdNYyGS33J40B3X2Fm20i63cwec/e7u25QKu6pkjRQmybuDqiJjdYxNYwCoIbRcElnKNx9Renzakk3SdonY5tp7t7m7m2tGpCyO6Ameqpjahh5Rw0jDyo+Q2FmgyX1c/c1pdsfkvQfVRtZk2v/UJyE/YXLfxSyca1xVcvOjCmYT7a3Z+7nlc74x2Nixt+TdR/eO2SDZj8S9712beZ+iioPdfzmpNCH681hcXLW0On31mM4hbW6Lf7/6KtLP9qAkdRXHmoYGzx79v6Z+e+O/6+QtXv8257JU0ZUXykveYyQdJOZ/fVxfuLut1ZlVED9UMcoOmoYuVBxQ+HuT0p6dxXHAtQddYyio4aRF7xtFAAAJKOhAAAAybh8eZW1bLFFyF5//+4hO/uSn4TskEGvZTxieT3fjJeyJwPdeXlcoe2PX7k0ZLf/8PshG//j00K2y3lMDKy2Z94ff8ab7vpy3HB67cdSGP3ipFXf4c2QHbrNYyG707J/V4BUr43OXrN4aL8yJ2AWHGcoAABAMhoKAACQjIYCAAAko6EAAADJmJRZZcuvGRWyOXtfVvP9/sc2czLzWzeLE9BOWvqhkM3c6Y6QbTH+hfSBoUf//pH/Dtk3FsafEf6/ll13DNljB8VZqxP+dGLItpsTV4AFeuu1j+8bshuO/k43W1tIvv9ynKx/x3FxBeXBy+aHLHvqZ+NxhgIAACSjoQAAAMloKAAAQDIaCgAAkIxJmRVa/4G9MvPrJnwvZP1U3ippJy07NGRz73hHyB45Oe5j9psDMx9zm7lx9cAlL8XJQK3/OTtk/eI8ItRAq61v9BAKZ5MfvlHWdm8+EVeuBXpr7Uf2CdmX/2+cBDyutfw/mjOvPDxk2y64p3cDyxnOUAAAgGQ0FAAAIBkNBQAASEZDAQAAkvU4KdPMpkv6iKTV7r5nKRsq6aeSdpK0VNJx7v5S7YbZWJ0HTQzZpdPjxEhJGtMan9LOjHXNjnzs6JC1HPt6yLb6Px6y8T+KlxUfd9nTmePp9/SfQzbk93G79q93hOyGd8VJR/94yBkha5n9YOa+8yQPddx54ITM/H0D/1CrXTatnQaXt4rr6DtiXRdVHmq4r1p54tqQHTIoZlJL5v2nLD0sZNt+p9gTMLOUc4ZihqS3T0c9X9Kd7j5W0p2lr4E8myHqGMU2Q9QwcqzHhsLd75b04tviSZJmlm7PlHRUdYcFVBd1jKKjhpF3lc6hGOHuK0u3n5U0okrjAeqJOkbRUcPIjeRJme7ukuIL/SVmNtXM5prZ3HatS90dUBMbq2NqGEVADaPRKm0oVpnZSEkqfV7d3YbuPs3d29y9rVUDKtwdUBNl1TE1jByjhpEblS69PUvSFEkXlj7fXLURNZjttUfInv/nuHz1uNbs5bQfyGj+f/va+JC9cP3okA176d6Qbfnj+2KWsd9aLN48oiX+4XnhrLjk8TZx1e6iqGsdL/vIoMx8m5ZNa7nbwttkpx1CduzQWWXdd9BT8Q0PzfO+D0lN/Le4UTbZflTI5r/v6pC1e6ykhe3Zj/mXi8eFbLDu7/3gcq7HMxRmdp2keyXtZmbLzexkbSjeD5rZYkmHlb4Gcos6RtFRw8i7Hs9QuPvkbr4Vr2QF5BR1jKKjhpF3rJQJAACS0VAAAIBklU7KbAr9No2T4db/16shu2/3G0P21Pq3Mh/zny84J2RDfv+XkG0zOE7GLsJksX1GLgvZ0voPo5A2GbOm7G3XPrZV7QZSME9/e3DIDhgQl7O/6tXt451fjr/PwF+17LFbyNp+8mjFj3f8jfHSBJK06w1xcn0z4gwFAABIRkMBAACS0VAAAIBkNBQAACBZn56U+eZBcVXM23a/vKz7/tOZZ2fmm/8iTr6pxSqWaG7bzI2TDouqZfiwkK06Jq4cOPS45Zn3v2vcVRnpwJBccdlRIdtm1T09jg9917IjY23+fNifM7ZsCcnfP/HRkI278InM/RRhwn01cIYCAAAko6EAAADJaCgAAEAyGgoAAJCsT0/KfNdX54WsX0aPddKyeO2dQb/4Uy2G1DCtFicdtXvcrsUyQlTdm0NjHcb1IsvX+b6JIfMWy9z26cPiZevf2i5el7lf/zjV7Dfv+27IWjN282xH3Me/Pnl05nhe7IwTVDftF/c94v64EinVir968aT3huymU76ZsWVrSE55+qCQtU+JNdzxXFwVuS/hDAUAAEhGQwEAAJLRUAAAgGQ9NhRmNt3MVpvZo12yr5jZCjObV/o4orbDBNJQxyg6ahh5V86kzBmSvifpmrfll7j7RVUfUY28/Mk4IedfRsThd6p/yB74zfiQ7aDmWoGv3eMkt07FyXC3LozPxVg9WJMxVdkMNbiO162Nk70kqTNj6uDVF1wSslmnTah43+cN+2HI+il7Uuab/lbInumI9fG95w4O2WF3nBWyrf4cf6dG/mZVyGxZ9kqZzy0cFLIRLXGSqM95JPP+TWSGmuBvcT1kXZb8nq99L2PLuOJqlnuX7xSy0Usrv8x5s+rxDIW73y3pxTqMBagZ6hhFRw0j71LmUJxmZg+XTsMNqdqIgPqijlF01DByodKG4gpJu0qaIGmlpG91t6GZTTWzuWY2t13rKtwdUBNl1TE1jByjhpEbFTUU7r7K3TvcvVPSlZL22ci209y9zd3bWhUXAgEapdw6poaRV9Qw8qSilTLNbKS7ryx9ebSk3M9OWR/ndWnLfnGy2L1r4y/bLtc8Ex+vKqOqvX6bbhqyxy7aM2PLB0LyiSc/HLLdz3wqZEW9NG+963jMiVmXRZb2+L+nhWz03iuquu/Zq+Plwp/79faZ2w6bHyc89r91TsaWcbtxmlvWeLJqZsV5+2duu/eAe0N2/WujytpPsyvi3+J6WHRB/LuXNfG8XDtcGDNWYY16bCjM7DpJB0sabmbLJX1Z0sFmNkEbntOlkj5TuyEC6ahjFB01jLzrsaFw98kZ8VU1GAtQM9Qxio4aRt6xUiYAAEhGQwEAAJL16cuXZ3mhY7OQrX9yaf0H0ktZky8l6fEL3xmyxybFFeN+/caWIXvmsjEh2/yl+yoYHTZm5y/GSYf1MFL5utTypu9/ruxt/2X2MSEbpz9VczgogM6DJmbmX2v7RcWP+cFHTwjZZnOZ61oOzlAAAIBkNBQAACAZDQUAAEhGQwEAAJLRUAAAgGS8y+Ntzv3jx0M2LmNZ6kbKmtm8+p/fzNx2YVt8R8ehjxwfssGHPxmyzcU7OpBPO97MwseQvj5jWma+Z2t59XHuyveHbMvJL4WsqJcXqDfOUAAAgGQ0FAAAIBkNBQAASEZDAQAAkvWdSZkWo34Z/dR3DrwuZJdpXC1GVJZl//HekN3wDxeHbFxr/8z7v+dPU0K23dEL0gcGAA02sX/2/4nbvbxplPde/Z6QbfPSPUlj6ss4QwEAAJLRUAAAgGQ0FAAAIFmPDYWZjTaz2Wa2wMzmm9mZpXyomd1uZotLn4fUfrhA71HDaAbUMfKunEmZ6yWd4+4Pmtnmkh4ws9slfUrSne5+oZmdL+l8SefVbqiJMhZO61RnyA4a9ELIzpqxV8h2vTreV5Jan10TslUHbR2yoccvD9npO9wZsg9vGlfpnPX6iJD9wyOHZ45n+A8GZ+Z9THPUcB/VYvH/PS+Naw3Ztr+ux2gaqk/X8dM/3zNkrTYv6TFH/u75kLEqZuV6PEPh7ivd/cHS7TWSFkoaJWmSpJmlzWZKOqpGYwSSUMNoBtQx8q5XcyjMbCdJEyXdL2mEu68sfetZSfG/zUDOUMNoBtQx8qjshsLMNpN0g6Sz3P3Vrt9zd1fmiwqSmU01s7lmNrdd65IGC6SghtEMKqljahj1UFZDYWat2lDA17r7jaV4lZmNLH1/pKTVWfd192nu3ububa0aUI0xA71GDaMZVFrH1DDqocdJmWZmkq6StNDduy7ROEvSFEkXlj7fXJMR1tlAi0/Jwg9+P2R/eN/AzPsvXrdtyE7acmnF4znzmfeF7NZ7JoRs7Jlcarw7fa2Gm02HZ0yA7oNveO9Lddx50MSQfXvCj0PW3YqYr3SuDdnevz4rZLsvY9XgairnXR4HSPqkpEfM/ndK7QXaULw/M7OTJS2TdFxNRgiko4bRDKhj5FqPDYW7/0GZV8KQJB1a3eEA1UcNoxlQx8i7PnjiEAAAVBsNBQAASNZnLl8+4ndxAv95n4mXBv/GtveW9XjvH/hWZn7gwKVl3f/P62IvN/muqSEbd1JcKXOsmICJvu2Nvd9o9BBQQ2uH9g/ZgQNfz9iyJfP+t72xQ8jGTZ0Tsuz1jlEpzlAAAIBkNBQAACAZDQUAAEhGQwEAAJL1mUmZHYueCNnij+8UsvGnnx6yBcd9N2nfu9/yuZDtdnmcVDbuz3ECJtDXZV2+HED+8JsKAACS0VAAAIBkNBQAACAZDQUAAEjWZyZlZln/5NKQjTk7ZkeevXfSfsYprtDmSY8INJ91d2ydmXdMYD3DvmaLec+G7PTlHwjZ90ffVY/hoEycoQAAAMloKAAAQDIaCgAAkIyGAgAAJOuxoTCz0WY228wWmNl8MzuzlH/FzFaY2bzSxxG1Hy7Qe9Qwio4aRhGU8y6P9ZLOcfcHzWxzSQ+Y2e2l713i7hfVbnhAVVDDBbDtJfdk5kdc8p6Q7aJ5NR5N7vSpGl7/1LKQLd8vbvcR7VWH0aBcPTYU7r5S0srS7TVmtlDSqFoPDKgWahhFRw2jCHo1h8LMdpI0UdL9peg0M3vYzKab2ZBqDw6oNmoYRUcNI6/KbijMbDNJN0g6y91flXSFpF0lTdCGzvlb3dxvqpnNNbO57VqXPmKgQtQwio4aRp6V1VCYWas2FPG17n6jJLn7KnfvcPdOSVdK2ifrvu4+zd3b3L2tVQOqNW6gV6hhFB01jLwr510eJukqSQvd/eIu+cgumx0t6dHqDw9IRw2j6KhhFEE57/I4QNInJT1iZvNK2QWSJpvZBG24LMVSSZ+pwfiAaqCGUXTUMHKvnHd5/EGSZXzrluoPB6g+ahhFRw2jCFgpEwAAJKOhAAAAyWgoAABAMhoKAACQjIYCAAAko6EAAADJaCgAAEAyc/f67czsOUl/vS7tcEnP123ntcWx1MeO7r51IwdADRdGXo+HGq6dZjoWKb/H020N17Wh+Jsdm81197aG7LzKOJa+qZmeq2Y6Fqn5jqdWmul5aqZjkYp5PLzkAQAAktFQAACAZI1sKKY1cN/VxrH0Tc30XDXTsUjNdzy10kzPUzMdi1TA42nYHAoAANA8eMkDAAAko6EAAADJ6t5QmNnhZva4mS0xs/Prvf9UZjbdzFab2aNdsqFmdruZLS59HtLIMZbLzEab2WwzW2Bm883szFJeyOOpF2o4P6jhyhW5jqnhfKprQ2FmLZIuk/RhSeMlTTaz8fUcQxXMkHT427LzJd3p7mMl3Vn6ugjWSzrH3cdL2k/SqaWfR1GPp+ao4dyhhivQBHU8Q9Rw7tT7DMU+kpa4+5Pu/pak6yVNqvMYkrj73ZJefFs8SdLM0u2Zko6q55gq5e4r3f3B0u01khZKGqWCHk+dUMM5Qg1XrNB1TA3nU70bilGSnu7y9fJSVnQj3H1l6fazkkY0cjCVMLOdJE2UdL+a4HhqiBrOKWq4V5qxjgv/My96DTMps8p8w/twC/VeXDPbTNINks5y91e7fq+Ix4M0RfyZU8Poqog/82ao4Xo3FCskje7y9falrOhWmdlISSp9Xt3g8ZTNzFq1oYivdfcbS3Fhj6cOqOGcoYYr0ox1XNifebPUcL0bijmSxprZzmbWX9IJkmbVeQy1MEvSlNLtKZJubuBYymZmJukqSQvd/eIu3yrk8dQJNZwj1HDFmrGOC/kzb6oadve6fkg6QtIiSU9I+lK991+F8V8naaWkdm143fFkScO0YRbuYkl3SBra6HGWeSwHasNptIclzSt9HFHU46nj80YN5+SDGk567gpbx9RwPj9YehsAACRjUiYAAEhGQwEAAJLRUAAAgGQ0FAAAIBkNBQAASEZDAQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkSQ2FmR1uZo+b2RIzO79agwLqiTpG0VHDyIOKLw5mZi3acKW6D2rD1d7mSJrs7gu6u09/G+ADNbii/QFr9NLz7r51NR+zt3VMDSMFNYyi21gNb5LwuPtIWuLuT0qSmV0vaZKkbhuKgRqsfe3QhF2iL7vDf76sBg/bqzqmhpGCGkbRbayGU17yGCXp6S5fLy9lQJFQxyg6ahi5kHKGoixmNlXSVEkaqE1rvTug6qhhFB01jHpIOUOxQtLoLl9vX8r+hrtPc/c2d29r1YCE3QE10WMdU8PIOWoYuZDSUMyRNNbMdjaz/pJOkDSrOsMC6oY6RtFRw8iFil/ycPf1ZnaapNsktUia7u7zqzYyoA6oYxQdNYy8SJpD4e63SLqlSmMBGoI6RtFRw8gDVsoEAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMloKAAAQDIaCgAAkIyGAgAAJKOhAAAAyWgoAABAMhoKAACQjIYCAAAko6EAAADJkq42CgBAESy6eq+QPfV3V4Xs4hd3Cdkdx7WFrGPBouoMrIlwhgIAACSjoQAAAMloKAAAQLKkORRmtlTSGkkdkta7e3yhCcg56hhFRw0jD6oxKfMQd3++Co8DNBJ1XCMtw4aGzLbcImR/OWa7zPuvHe4hG/PvD4Ws8403KhhdU6GGS1r22C1kNx9yWcjavTVkpw55PGQ/f9eHQrb5ggoH18R4yQMAACRLbShc0m/M7AEzm5q1gZlNNbO5Zja3XesSdwfUxEbrmBpGAVDDaLjUlzwOdPcVZraNpNvN7DF3v7vrBu4+TdI0SdrChsZzl0DjbbSOqWEUADWMhks6Q+HuK0qfV0u6SdI+1RgUUE/UMYqOGkYeVHyGwswGS+rn7mtKtz8k6T+qNjKgDqjjyvXbc/eQLf7ioJD94zvvCdk5w25L2vc7RpwSsrGfeiDpMYuKGs6w4tkQnbHohJDdvscN9RhNn5HykscISTeZ2V8f5yfufmtVRgXUD3WMoqOGkQsVNxTu/qSkd1dxLEDdUccoOmoYecHbRgEAQDIaCgAAkIzLl9fBW38XV8Fd9onOkH32PXeF7Kwh5V0i950/PD0z33RlfIfYy/vH96HveG3sLfvfNresfaO52N7vDNmSs1tC9rsDvxeyrVsGhKxfxv9bfvXGkJA9uW6bzPFkrVz4o/dfGbKv7j0lZD7nkczHRHPrePmVkC1bPjZuuEcdBtOHcIYCAAAko6EAAADJaCgAAEAyGgoAAJCMSZlV9twp7w3Zd78QL5vbNqAjZFmT16YsPSxkE7f8S8ge+qfvlDvEzP3sP3RyyIamLWaIHGnZeuuQLfrOqMxt/2f/y0O2S2u8zLMUJ2BmufrV0SH7xTEHhqxzQNY+pFN/GSdlZv3+vDkirtI5sJwBoum0jIgTfN/3jvImuKNynKEAAADJaCgAAEAyGgoAAJCMhgIAACRjUmYZrLV/yNYeln0tnhu++M2QbbdJnLx28rIPhmzZRbuFbPCv5oVs9qY7hOyum8Zlj2fsrMz87V6dNyxkQ8u6J4pgxYlxlcD5B3U3kTd7cmQ5fpw1AfOo/UPW8XicIGcTWbYQVbL54BAdMXROxQ+3ei8L2VYPx7+5HQv69sRPzlAAAIBkNBQAACAZDQUAAEjWY0NhZtPNbLWZPdolG2pmt5vZ4tLneOlAIEeoYxQdNYy8K2dS5gxJ35N0TZfsfEl3uvuFZnZ+6evzqj+8fFh5Wrz8+J/O7W5CW5yA+fElHw3Z+mPaQ7bp8/eHLF58XHpm6l4hu39s+Stl/vqNzUM25gdPh2x92Y9YCDPUh+t41JFLk+7/89e2DdnFiw4N2YgvxIrteHxxWft46Z1b9H5gfcsM9eEa7o2OJU+F7F/+5/iQHTM5rmKcZf7fXxqyia+cGbLRTMrcOHe/W9KLb4snSZpZuj1T0lHVHRZQXdQxio4aRt5VOodihLuvLN1+VtKIKo0HqCfqGEVHDSM3kidlursr+8y8JMnMpprZXDOb2651qbsDamJjdUwNowioYTRapQ3FKjMbKUmlz6u729Ddp7l7m7u3tZZ5dUKgTsqqY2oYOUYNIzcqbShmSZpSuj1F0s3VGQ5QV9Qxio4aRm70+C4PM7tO0sGShpvZcklflnShpJ+Z2cmSlkk6rpaDrKfF3903ZI9/7Lsh6+zm/u+4/ZSQ7X7u0pB1PP9Cb4f2v075bNrfjK99fUrIhjx9b9Jj5l1fq+Pg0/F/peNPPT1z09G3d4Rs8PxnQzZ8WZzRHu9ZvjdGxOWN8f/1+RpOtOu598Vwcv3H0cx6bCjcvbunPL5nDMgp6hhFRw0j71gpEwAAJKOhAAAAyWgoAABAsnKW3m5aT3xrv5A9/rG4FOsrnWtD9vHH/j7zMXc7PWOi2po1ZY2n3+DBIXvh2HeFbNJm34z31aDMx9z9v08N2ZgZzT0BE1HWUsRjzo5Zd+qxDHv73uX9ngDV0motIWvvdlUl9IQzFAAAIBkNBQAASEZDAQAAktFQAACAZH1mUmbLiG1CNvPoy0PWmbEGZtYEzP4fXJa5n+5W0Hy7fhPGh2zP6QtD9rURl2bcO656eMC8EzL3s9tX4mOmrGYI/NVf/m3/kK3fNGNGW9YCmBmbfWxs+ZOFT1t+cMgG3fpgObsB/le7x7+GWf8GoDycoQAAAMloKAAAQDIaCgAAkIyGAgAAJOszkzJtYJzI2DagvOmJg87oHx9vx9GZ2y4+ZfuQfeiwOFns7G2mhWyHTeJql1nTgzo8TjWznw7PHE/Hy4szc6Bliy0y87X7jA1Z6xdXhezh3b9b1n6yVyMsf2rw7Dc3DdnyqTuEzNfHCcgA6oczFAAAIBkNBQAASEZDAQAAkvXYUJjZdDNbbWaPdsm+YmYrzGxe6eOI2g4TSEMdo+ioYeRdOZMyZ0j6nqRr3pZf4u4XVX1ENeJr14Xs/nWtIdt3QHvIbr7j+pClrqZ2x5txEuXijOvmHjLotZDNfStOEt3qGi5J3oMZaoI6LocNiBOQ3zronSE7+/IfZd7/kEF3hmxVR/z9mf3mkJD926JJIbtujxkh226TOMbuDOwXfyefPG6rkO3y+MCQda5dW/Z+CmCG+kgNo5h6PEPh7ndLerEOYwFqhjpG0VHDyLuUORSnmdnDpdNw8b8qQDFQxyg6ahi5UGlDcYWkXSVNkLRS0re629DMpprZXDOb26542hRooLLqmBpGjlHDyI2KGgp3X+XuHe7eKelKSftsZNtp7t7m7m2tGVfJBBql3DqmhpFX1DDypKKVMs1spLuvLH15tKRHN7Z9HnSsWh2yL3/2n0J20ffjJc3fFedA6sevZq+U+bW7jgzZuBlxYtgmq14J2TbXxZdHDxn925BNmR3HPU5zM8eD7hWxjt+u38A4EfGF4yeG7Pf/eWnZj7nHdaeHbPvZcWXLAb+aE7JhI+Mk4utu2ytk5wwr/6nOmij98Kfi8bz36TNCNuKah0LW+cYbZe8775qhhhspexXX8u67xf7x35S+rseGwsyuk3SwpOFmtlzSlyUdbGYTJLmkpZI+U7shAumoYxQdNYy867GhcPfJGfFVNRgLUDPUMYqOGkbesVImAABIRkMBAACS9ZnLl2fpf1ucyHjBzt2+YaUs4/SnsrZbMynu51c73Byydo8936ClGbNE0fSyVsB87OJ3xWxSeRMwJz1+VGY+7ptPhixrUvMmo7cP2btn/SVknx+2IGSvdL4Vsn1vOCdzPCN3j/u+850/Ddm9/xqP+/jJHwnZ85fGVUMHvhAnfnan5XcPlr0t8q3d42TjcldBvuvd14XsyP1Ozt74vod7Na6i4gwFAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEjWp9/l0UjrB8VertwZxzvPiDPp11dnWMgJ2yT+aj7+7XeH7LEjLwvZ8vXx4k9H/uALIdtp+hOZ+16f8Y6O9sPi8tl7fuPPIfvyNg+E7OpXdwzZj7700ZCNufG+zPG0DB8WsoM/GJcHf/34uJz9TROvDNn2l5Z3LYtfvh73K0nTxu1S1v2Rf7v/Nl7GYMEHplX8eIumZr8Db1x2aTcdzlAAAIBkNBQAACAZDQUAAEhGQwEAAJIxKbNBNr8+Y5bOt+o/DuTT05+PS7M/duR3QvZMxgTMj1/4+ZDt9Iu4nPaLH9g5c99+4uYh+/mecd9bt8TJjXtcHydLjpv2fMg2ffz+zH1n6Xj+hZBtcV1WFu977OfiZNQRxy4rb8fnbNXNN+aXd3/k3oBFg2L4gfqPo1lwhgIAACSjoQAAAMloKAAAQLIeGwozG21ms81sgZnNN7MzS/lQM7vdzBaXPg+p/XCB3qOG0QyoY+RdOZMy10s6x90fNLPNJT1gZrdL+pSkO939QjM7X9L5ks6r3VCby5oT9stI4yqDqIrC1fAVn768rO0GWsw+esrdIRt1xkshm7LF//RiRBkTMH9yRsjGfHFOyDrWN24d120uvydkXt5TK2lFVcdSBYWr47wb/dVYH9d9YlTIPrH5yrIe76nDf5iZf/jdk0PW+dDCsh6zSHo8Q+HuK939wdLtNZIWSholaZKkmaXNZko6qkZjBJJQw2gG1DHyrldzKMxsJ0kTJd0vaYS7/7Vte1bSiOoODag+ahjNgDpGHpXdUJjZZpJukHSWu7/a9Xvu7pK8m/tNNbO5Zja3XfE980C9UMNoBpXUMTWMeiiroTCzVm0o4Gvd/cZSvMrMRpa+P1JSvEShJHef5u5t7t7WmvE6LFAP1DCaQaV1TA2jHnqclGlmJukqSQvd/eIu35olaYqkC0ufb67JCJvUK7vwjt16KWIN3/3a7iHbd8AjIRuasVrlBcPnlbWPjzz2scz8L/duH7Jdfh4vDT5mfpxE7A2cgNnsiljHRTTjL/uHbPIe/13Wfdszz3H2HeW8y+MASZ+U9IiZzStlF2hD8f7MzE6WtEzScTUZIZCOGkYzoI6Raz02FO7+B0kZb06TJB1a3eEA1UcNoxlQx8g7zrsDAIBkNBQAACAZly9vkFF3vRGy1tNaQtbXJ/n0Vfccsl3I9v1EvK7yK+9+K2SbPNcasnHfj6s+bvJs5ptatNPap0PWmbkl0HzWzdg2ht+s/ziKiDMUAAAgGQ0FAABIRkMBAACS0VAAAIBkTMpsEPvjvJDNeHWbkE3ePE6me2OPkSHr//TyqowL+dDxwoshG3FpvNRyuVeBYv1KoDxD5sXfvcte2i1kpw55vB7DKRTOUAAAgGQ0FAAAIBkNBQAASEZDAQAAkjEpM0cu+cGxIZt87ndCNvJfl4TshZfflf2g9z2cPC4A6Cs6FiwK2W17bhEz7d2LR12YMKLi4AwFAABIRkMBAACS0VAAAIBkPTYUZjbazGab2QIzm29mZ5byr5jZCjObV/o4ovbDBXqPGkbRUcMognImZa6XdI67P2hmm0t6wMxuL33vEne/qHbDA6qCGkbRUcPIvR4bCndfKWll6fYaM1soaVStB9YXjfpRXMr1+KM+ErKfjvllyA76t8mZjzn077cMWcfLr1QwuuKihlF01DCKoFdzKMxsJ0kTJd1fik4zs4fNbLqZDan24IBqo4ZRdNQw8qrshsLMNpN0g6Sz3P1VSVdI2lXSBG3onL/Vzf2mmtlcM5vbrnXpIwYqRA2j6Khh5FlZDYWZtWpDEV/r7jdKkruvcvcOd++UdKWkfbLu6+7T3L3N3dtaNaBa4wZ6hRpG0VHDyLty3uVhkq6StNDdL+6Sd72G9tGSHq3+8IB01DCKjhpGEZTzLo8DJH1S0iNmNq+UXSBpsplNkOSSlkr6TA3G16d0PP9CyN46ZljI3vGt+FQvPOwHmY955O4nx7DvLcdNDaPoqGHkXjnv8viDJMv41i3VHw5QfdQwio4aRhGwUiYAAEhGQwEAAJLRUAAAgGTlTMpEA2VN1Bw7JWZHau9uHqHPTcAEADQAZygAAEAyGgoAAJCMhgIAACSjoQAAAMnM3eu3M7PnJC0rfTlc0vN123ltcSz1saO7b93IAVDDhZHX46GGa6eZjkXK7/F0W8N1bSj+Zsdmc929rSE7rzKOpW9qpueqmY5Far7jqZVmep6a6VikYh4PL3kAAIBkNBQAACBZIxuKaQ3cd7VxLH1TMz1XzXQsUvMdT6000/PUTMciFfB4GjaHAgAANA9e8gAAAMnq3lCY2eFm9riZLTGz8+u9/1RmNt3MVpvZo12yoWZ2u5ktLn0e0sgxlsvMRpvZbDNbYGbzzezMUl7I46kXajg/qOHKFbmOqeF8qmtDYWYtki6T9GFJ4yVNNrPx9RxDFcyQdPjbsvMl3enuYyXdWfq6CNZLOsfdx0vaT9KppZ9HUY+n5qjh3KGGK9AEdTxD1HDu1PsMxT6Slrj7k+7+lqTrJU2q8xiSuPvdkl58WzxJ0szS7ZmSjqrnmCrl7ivd/cHS7TWSFkoapYIeT51QwzlCDVes0HVMDedTvRuKUZKe7vL18lJWdCPcfWXp9rOSRjRyMJUws50kTZR0v5rgeGqIGs4parhXmrGOC/8zL3oNMymzynzD22YK9dYZM9tM0g2SznL3V7t+r4jHgzRF/JlTw+iqiD/zZqjhejcUKySN7vL19qWs6FaZ2UhJKn1e3eDxlM3MWrWhiK919xtLcWGPpw6o4ZyhhivSjHVc2J95s9RwvRuKOZLGmtnOZtZf0gmSZtV5DLUwS9KU0u0pkm5u4FjKZmYm6SpJC9394i7fKuTx1Ak1nCPUcMWasY4L+TNvqhp297p+SDpC0iJJT0j6Ur33X4XxXydppaR2bXjd8WRJw7RhFu5iSXdIGtrocZZ5LAdqw2m0hyXNK30cUdTjqePzRg3n5IMaTnruClvH1HA+P1gpEwAAJGNSJgAASEZDAQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEhGQwEAAJLRUAAAgGQ0FAAAIFlSQ2Fmh5vZ42a2xMzOr9aggHqijlF01DDyoOKrjZpZizZc+vaD2nD52DmSJrv7gu7u098G+EANrmh/wBq99Ly7b13Nx+xtHVPDSEENo+g2VsObJDzuPpKWuPuTkmRm10uaJKnbhmKgBmtfOzRhl+jL7vCfL6vBw/aqjqlhpKCGUXQbq+GUlzxGSXq6y9fLS9nfMLOpZjbXzOa2a13C7oCa6LGOqWHkHDWMXKj5pEx3n+bube7e1qoBtd4dUHXUMIqOGkY9pDQUKySN7vL19qUMKBLqGEVHDSMXUhqKOZLGmtnOZtZf0gmSZlVnWEDdUMcoOmoYuVDxpEx3X29mp0m6TVKLpOnuPr9qIwPqgDpG0VHDyIuUd3nI3W+RdEuVxgI0BHWMoqOGkQeslAkAAJLRUAAAgGQ0FAAAIBkNBQAASEZDAQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEhGQwEAAJIlXW0UAADU17A/DglZP/OQPbf/y3UYTZcx1HVvAACgKdFQAACAZEkveZjZUklrJHVIWu/ubdUYFFBP1DGKjhpGHlRjDsUh7v58FR4HaCTqGEVHDaOhmJRZZbbXHiHr7B+f5hUHDw7Z/NMvD1m7d1RnYD049NFjQzZ40sqQda5dW4/hIGdswICQvfHhd4fsXV96KGSL915XkzEBfcGiq+LJpjk7fCdk7/39qSHbRfNqMaRupc6hcEm/MbMHzGxqNQYENAB1jKKjhtFwqWcoDnT3FWa2jaTbzewxd7+76wal4p4qSQO1aeLugJrYaB1TwygAahgNl3SGwt1XlD6vlnSTpH0ytpnm7m3u3taqeNoUaLSe6pgaRt5Rw8iDihsKMxtsZpv/9bakD0l6tFoDA+qBOkbRUcPIi5SXPEZIusnM/vo4P3H3W6syqpzx98bJZ4s/1T9z20s+cF3IWm19yA4btCZk7R77u051ljPEZLfv+bOQTfjRP4Zs588+E7KO51+oyZjqpM/UcYqWrYeHbPZl3w/Z79fGPynf3PmjIVv/1LLqDAwSNdw0Fl0RTvJrzocuCdmazrgq5hZ3DarJmHqj4obC3Z+UFP+lBQqEOkbRUcPIC1bKBAAAyWgoAABAMhoKAACQjJUyy+BfezFkj+1+YwNGUl/z9p8esr/b93MhG/CrQk/KRBW9b2CcgPz1HYaGrB+TMoHg4IkLQ7Z5v/gGgM8tOzxkw39wb03G1BucoQAAAMloKAAAQDIaCgAAkIyGAgAAJGNSZhlW/G50DHcv//73ro1r5//jLZ+OG1rGneOCaJn2e8+izPzqnX5T3gMAVdBi/B8F+fTmpLgK5fBzngrZuuNbQrZ+5bNVH8/qz+0fsm+MiKti/vjVHUP20hd3CFk/NX5yPL/9AAAgGQ0FAABIRkMBAACS0VAAAIBkTMosww4Xzg3Z0T+bXPb97a32kI196v6kMb3dy8OHZeZ33Ld5yLIunZ7lA48cH7ItZs8PWX0usI4i6PBYDe2bxj8zcZoyUFsnXvjLkJ20xdMhO2yvz4Zs4C+rPylzyqm3hGzCgPib8emvHh2yob9v/KqYWThDAQAAktFQAACAZDQUAAAgWY8NhZlNN7PVZvZol2yomd1uZotLn4fUdphAGuoYRUcNI+/KmZQ5Q9L3JF3TJTtf0p3ufqGZnV/6+rzqDy8fvP2tkHU8vqQBI+neqo+Ny8zf2f/mjLS8KXHPPBMvO73ZG0/2Zlh5MkN9vI4bZfVerSEb/esGDKT4ZogartjKt7YKWaeWhWz9oKwli9N0HjQxZJM2+27I2n1QHM/A6o+nVno8Q+Hud0t68W3xJEkzS7dnSjqqusMCqos6RtFRw8i7SudQjHD3laXbz0oaUaXxAPVEHaPoqGHkRvKkTHd3beQSVmY21czmmtncdq1L3R1QExurY2oYRUANo9EqbShWmdlISSp9Xt3dhu4+zd3b3L2tleVskC9l1TE1jByjhpEblTYUsyRNKd2eIilr5h+Qd9Qxio4aRm70+C4PM7tO0sGShpvZcklflnShpJ+Z2cmSlkk6rpaDxN967rPvDdnuJz6Wue2Ilsr/N/KOLzwVso6KH62xqOM03h6Xj1/UvjZk41oHhuzNneO7pNB71HD5Fl+6b8huGhbfVXHFy/HdcVvdtyJk68vcb8tWW2bmz5/7esi22yT+bT77mf1DNuKqB0LW7RyDBuuxoXD37i5acWiVxwLUDHWMoqOGkXeslAkAAJLRUAAAgGQ0FAAAIFk5S2+jTlafFifkTPnsLSE7cYuLQrZ5v/5J+/7qc+8Jma9jMh026FgV3414xhPHh+zW3XmTAeqnZbcxmfmPPnJFyN7wOLH4xi99KGSDnv5TxeNZfPnOmfmj77kyZHe8uXm8/97FXiOEMxQAACAZDQUAAEhGQwEAAJLRUAAAgGRMyixDyx67hWzRSUMytz3owEcr3s8vR8eV3DrVmbFl+RMwl7THNd6Ov+KckO1w06q47zVPlL0fAKglP2BCyE646peZ27YNiGv67n7rmSEb94vKJ2Au/VpcsXju+y/uZuv4T+15P/zHkI3SPRWPJw84QwEAAJLRUAAAgGQ0FAAAIBkNBQAASMakzLfJmvjzqatvCtmkwc/XYO/V7+/OWBJXMxz1jTjxp6iXJUf+bTb0jUYPATlmrXGS+crT2kI299w4ab3VWjIfs93j39KPTXgwZLO+ESdWjvn3h0LWb9ttQnbkEfeFrEWWOZ4J98QJmDtcWOwJmFk4QwEAAJLRUAAAgGQ0FAAAIFmPDYWZTTez1Wb2aJfsK2a2wszmlT6OqO0wgTTUMYqOGkbelTMpc4ak70m65m35Je4er6PdhFrkIetXg5M7WROM2uOue+XWd8QJpe/7xKkh2/LaOMGoycxQH6/jRrkh49LNp+uABoyk8GaoCWv42VPiBMw/nfudkGWtGdzd38drXh0Vsv/c9v6YnRizCw7bN2Qf3PLXITtk0Gshu3/dwMzx7PDxRzLzZtPjv4rufrekF+swFqBmqGMUHTWMvEv5b/ZpZvZw6TRc9oUtgPyjjlF01DByodKG4gpJu0qaIGmlpG91t6GZTTWzuWY2t13rKtwdUBNl1TE1jByjhpEbFTUU7r7K3TvcvVPSlZL22ci209y9zd3bWjWg0nECVVduHVPDyCtqGHlS0UqZZjbS3VeWvjxaUuXX7M4Z++O8kF111OEhO/9TwzLvv8Ntb4Ws5c14CfEUi09uDdljh19R1X30Bc1cx/Xw9B9Gx3D3+o+jLytaDT93SlyZ8p7zvh2yNZ3tIVvQPjhkXzr3M5n7GfhC/Dt8538uDdnVO/0mZFmTN7Mm4WdNEm3rH/crSWcvWRiy7xzzsfiYD8XtiqTHhsLMrpN0sKThZrZc0pclHWxmEyS5pKWSsn+qQE5Qxyg6ahh512ND4e6TM+KrajAWoGaoYxQdNYy8Y6VMAACQjIYCAAAk4/LlZehYsChku3yhAQMpecfirWMY540CNbXZ0+Ut47q5xe1axo8LWdbvGZrL+H+Ikw5nvT4iZP85Lb66M/Jb8XLfmypOoOzOC+e8K2Rnf/d9Ibtku9+X/Zhv12LZly///CPHhGy7hxZUvJ+84gwFAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEjGuzwKaNXHxjR6CID6lbmifNbM985Bcfl4NL8HbhsfshevHx6ykY/Hd3SkenPEwJCdvvVvM7aMtbnff5wWsuEPvV72vkcvWRGyjrLvXRycoQAAAMloKAAAQDIaCgAAkIyGAgAAJOszkzJtwICQvfzxiSEbcvP8kHWuWVOTMZVj5Tn7h+zmM/4rY8t4fEAtDZlxb8i+/4UdQ3bKlstCtvjs/iEbc2J1xoX82uHf42TLWkxObNk6Xp5g+TFxFvGY1vh389o1I0M2/Aex1nujGSdgZuEMBQAASEZDAQAAkvXYUJjZaDObbWYLzGy+mZ1Zyoea2e1mtrj0eUjthwv0HjWMZkAdI+/KOUOxXtI57j5e0n6STjWz8ZLOl3Snu4+VdGfpayCPqGE0A+oYudbjpEx3XylpZen2GjNbKGmUpEmSDi5tNlPS7ySdV5NR9tLaj+4Tsi3P/UvI7hrz3ZAdPWdyfMDHqz8pc5OR24ZsxbG7hOynp18Usu02KX8C5qqOdSFrfdPLvn8zKGINF9VF9/1dyA4/9NshG/eZRSHrrMWAmgh1XL7F58TVhBceemnI7l0XV8X82ZHvy3jEJ6oxrKbXqzkUZraTpImS7pc0olTgkvSspBHVHRpQfdQwmgF1jDwqu6Ews80k3SDpLHd/tev33N0lZf6318ymmtlcM5vbrvi/ZaBeqGE0g0rqmBpGPZTVUJhZqzYU8LXufmMpXmVmI0vfHylpddZ93X2au7e5e1srayWgQahhNINK65gaRj2U8y4Pk3SVpIXufnGXb82SNKV0e4qkm6s/PCAdNYxmQB0j78pZKfMASZ+U9IiZzStlF0i6UNLPzOxkScskHVeTEVbg775+V8jOGfZoWfd97IItYvjavqlDCk7YP6689ottfhWyzoxL6WaZsjROhpOkJVfvFrJhN6at+lZAhavhZtKhjMuXv7m2ASMpPOr4bVrGj8vMv3r09SHr8PiK5kmzTgnZmEX3pQ+sjyrnXR5/kDL+ImxwaHWHA1QfNYxmQB0j71gpEwAAJKOhAAAAyWgoAABAsj5z+fJyLTzsBw3ce+zv7l0b3+L16fv/IWRjPr048xGHvd7nJmAiZ3bdZFDIXjgprmY77CpqFb1z3I2/y8yP3iy+A/w9950UsjFnMQGzmjhDAQAAktFQAACAZDQUAAAgGQ0FAABI1pSTMn97xgEhu+ZzcRLYQwdMr8dw9ONXR4dsZftWIZv+YBz3mCs7QrbLH+eFjEs/Iw+uPij+Tr3U+WbIhj/8Wsgyr8wGbMTXbz4mM598YrxU+aBbMlZBRlVxhgIAACSjoQAAAMloKAAAQDIaCgAAkKwpJ2W2/O7BkO38p01DttcZZ4Zs5me+HbI9+8cL/H3gkeMz9/3K77YN2Y4/XRGy9U8tC9lYPZD5mEBRfH7hsSE7dsc/h6zf6+tCFqcfAxu3y3nZq6seed7eIRsmVmKtNc5QAACAZDQUAAAgGQ0FAABI1mNDYWajzWy2mS0ws/lmdmYp/4qZrTCzeaWPI2o/XKD3qGEUHTWMIihnUuZ6See4+4NmtrmkB8zs9tL3LnH3i2o3PKAqqGEUHTWM3OuxoXD3lZJWlm6vMbOFkkbVemDV1vnGGyEbdeE9IbvgwrhEd5bN9GTZ+fqyHhG10iw1XARDP7IoZL/V4Iwt43boHjWMIujVHAoz20nSREn3l6LTzOxhM5tuZkOqPTig2qhhFB01jLwqu6Ews80k3SDpLHd/VdIVknaVNEEbOudvdXO/qWY218zmtiu+9xyoF2oYRUcNI8/KaijMrFUbivhad79Rktx9lbt3uHunpCslZb5W4O7T3L3N3dtaNaBa4wZ6hRpG0VHDyLty3uVhkq6StNDdL+6Sj+yy2dGSHq3+8IB01DCKjhpGEZTzLo8DJH1S0iNmNq+UXSBpsplNkOSSlkr6TA3GB1QDNYyio4aRe+W8y+MPkuLFLKRbqj8coPqoYRQdNYwiYKVMAACQjIYCAAAko6EAAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMloKAAAQDJz9/rtzOw5SctKXw6X9Hzddl5bHEt97OjuWzdyANRwYeT1eKjh2mmmY5Hyezzd1nBdG4q/2bHZXHdva8jOq4xj6Zua6blqpmORmu94aqWZnqdmOhapmMfDSx4AACAZDQUAAEjWyIZiWgP3XW0cS9/UTM9VMx2L1HzHUyvN9Dw107FIBTyehs2hAAAAzYOXPAAAQLK6NxRmdriZPW5mS8zs/HrvP5WZTTez1Wb2aJdsqJndbmaLS5+HNHKM5TKz0WY228wWmNl8MzuzlBfyeOqFGs4ParhyRa5jajif6tpQmFmLpMskfVjSeEmTzWx8PcdQBTMkHf627HxJd7r7WEl3lr4ugvWSznH38ZL2k3Rq6edR1OOpOWo4d6jhCjRBHc8QNZw79T5DsY+kJe7+pLu/Jel6SZPqPIYk7n63pBffFk+SNLN0e6ako+o5pkq5+0p3f7B0e42khZJGqaDHUyfUcI5QwxUrdB1Tw/lU74ZilKSnu3y9vJQV3Qh3X1m6/aykEY0cTCXMbCdJEyXdryY4nhqihnOKGu6VZqzjwv/Mi17DTMqsMt/wtplCvXXGzDaTdIOks9z91a7fK+LxIE0Rf+bUMLoq4s+8GWq43g3FCkmju3y9fSkrulVmNlKSSp9XN3g8ZTOzVm0o4mvd/cZSXNjjqQNqOGeo4Yo0Yx0X9mfeLDVc74ZijqSxZrazmfWXdIKkWXUeQy3MkjSldHuKpJsbOJaymZlJukrSQne/uMu3Cnk8dUIN5wg1XLFmrONC/sybqobdva4fko6QtEjSE5K+VO/9V2H810laKaldG153PFnSMG2YhbtY0h2ShjZ6nGUey4HacBrtYUnzSh9HFPV46vi8UcM5+aCGk567wtYxNZzPD1bKBAAAyZiUCQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEj2/wCngSznSBSVjwAAAABJRU5ErkJggg==\n",
 48 |       "text/plain": [
 49 |        "<Figure size 648x648 with 9 Axes>"
 50 |       ]
 51 |      },
 52 |      "metadata": {
 53 |       "needs_background": "light"
 54 |      },
 55 |      "output_type": "display_data"
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "plt.figure(figsize=[9, 9])\n",
 60 |     "\n",
 61 |     "for i in range(1, 10):\n",
 62 |     "    plt.subplot(3, 3, i)\n",
 63 |     "    plt.imshow(x_train[i])"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "## Model"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 4,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "model = tf.keras.models.Sequential([\n",
 80 |     "    tf.keras.layers.Flatten(input_shape=[28, 28]),\n",
 81 |     "    tf.keras.layers.Dense(128, activation='relu'),\n",
 82 |     "    tf.keras.layers.Dropout(.2),\n",
 83 |     "    tf.keras.layers.Dense(10)\n",
 84 |     "])"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 5,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "Model: \"sequential\"\n",
 97 |       "_________________________________________________________________\n",
 98 |       "Layer (type)                 Output Shape              Param #   \n",
 99 |       "=================================================================\n",
100 |       "flatten (Flatten)            (None, 784)               0         \n",
101 |       "_________________________________________________________________\n",
102 |       "dense (Dense)                (None, 128)               100480    \n",
103 |       "_________________________________________________________________\n",
104 |       "dropout (Dropout)            (None, 128)               0         \n",
105 |       "_________________________________________________________________\n",
106 |       "dense_1 (Dense)              (None, 10)                1290      \n",
107 |       "=================================================================\n",
108 |       "Total params: 101,770\n",
109 |       "Trainable params: 101,770\n",
110 |       "Non-trainable params: 0\n",
111 |       "_________________________________________________________________\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "model.summary()"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "In the model, we have `784` input features, and in the first layer, we have `128` nodes, so the corresponding weights $W$ and $b$ would have size $(783, 128)$ and $(128, 1)$, which, in total adds up to\n",
124 |     "\n",
125 |     "$$ 784 \\times 128 + 128 = 100480$$\n",
126 |     "\n",
127 |     "Same goes with the `dense_3` layer.\n",
128 |     "\n",
129 |     "__Note__: It is possible to bake this tf.nn.softmax in as the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to provide an exact and numerically stable loss calculation for all models when using a softmax output."
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 6,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "[[ 0.15655103 -0.48250246  0.29978698 -0.6595523  -0.27254325  0.18561608\n",
142 |       "  -0.42510659  0.32197294 -0.18982276 -1.2221566 ]]\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "predictions = model(x_train[:1])\n",
148 |     "print(predictions.numpy())"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "Transfor into probability using `softmax`."
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 7,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "[[0.13328804 0.07034832 0.15381466 0.05893347 0.08678365 0.1372189\n",
168 |       "  0.07450415 0.15726532 0.0942677  0.03357577]]\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "predictions = tf.nn.softmax(predictions)\n",
174 |     "print(predictions.numpy())"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "## Loss Function"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "In `SparseCategoricalCrossentropy`, labels are to be provided as integers"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 8,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "data": {
198 |       "text/plain": [
199 |        "2.2661917"
200 |       ]
201 |      },
202 |      "execution_count": 8,
203 |      "metadata": {},
204 |      "output_type": "execute_result"
205 |     }
206 |    ],
207 |    "source": [
208 |     "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
209 |     "loss_fn(y_train[:1], predictions).numpy()"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "## Train"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 9,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "model.compile(optimizer='adam',\n",
226 |     "              loss=loss_fn,\n",
227 |     "              metrics=['accuracy'])"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 10,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "name": "stdout",
237 |      "output_type": "stream",
238 |      "text": [
239 |       "Epoch 1/5\n",
240 |       "1875/1875 [==============================] - 1s 620us/step - loss: 0.2957 - accuracy: 0.9146\n",
241 |       "Epoch 2/5\n",
242 |       "1875/1875 [==============================] - 1s 610us/step - loss: 0.1434 - accuracy: 0.9573\n",
243 |       "Epoch 3/5\n",
244 |       "1875/1875 [==============================] - 1s 610us/step - loss: 0.1056 - accuracy: 0.9686\n",
245 |       "Epoch 4/5\n",
246 |       "1875/1875 [==============================] - 1s 621us/step - loss: 0.0878 - accuracy: 0.9729\n",
247 |       "Epoch 5/5\n",
248 |       "1875/1875 [==============================] - 1s 620us/step - loss: 0.0737 - accuracy: 0.9772\n"
249 |      ]
250 |     },
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "<tensorflow.python.keras.callbacks.History at 0x14c5207f0>"
255 |       ]
256 |      },
257 |      "execution_count": 10,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "model.fit(x_train, y_train, epochs=5)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "## Evaluate"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 11,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "name": "stdout",
280 |      "output_type": "stream",
281 |      "text": [
282 |       "313/313 - 0s - loss: 0.0733 - accuracy: 0.9771\n"
283 |      ]
284 |     },
285 |     {
286 |      "data": {
287 |       "text/plain": [
288 |        "[0.0732896476984024, 0.9771000146865845]"
289 |       ]
290 |      },
291 |      "execution_count": 11,
292 |      "metadata": {},
293 |      "output_type": "execute_result"
294 |     }
295 |    ],
296 |    "source": [
297 |     "model.evaluate(x_test, y_test, verbose=2)"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "## Predict"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 12,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "data": {
314 |       "text/plain": [
315 |        "array([7, 2, 1, 0, 4])"
316 |       ]
317 |      },
318 |      "execution_count": 12,
319 |      "metadata": {},
320 |      "output_type": "execute_result"
321 |     }
322 |    ],
323 |    "source": [
324 |     "tf.argmax(model.predict(x_test[:5]), axis=-1).numpy()"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "####################################################################################"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 1,
337 |    "metadata": {},
338 |    "outputs": [],
339 |    "source": [
340 |     "import tensorflow as tf\n",
341 |     "\n",
342 |     "from tensorflow.keras.layers import Dense, Flatten, Conv2D\n",
343 |     "from tensorflow.keras import Model"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 2,
349 |    "metadata": {},
350 |    "outputs": [
351 |     {
352 |      "name": "stdout",
353 |      "output_type": "stream",
354 |      "text": [
355 |       "train shape (60000, 28, 28, 1)\n",
356 |       "test shape (10000, 28, 28, 1)\n"
357 |      ]
358 |     }
359 |    ],
360 |    "source": [
361 |     "mnist = tf.keras.datasets.mnist\n",
362 |     "\n",
363 |     "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
364 |     "x_train, x_test = x_train / 255.0, x_test / 255.0\n",
365 |     "\n",
366 |     "# Add a channels dimension\n",
367 |     "x_train = x_train[..., tf.newaxis].astype(\"float32\")\n",
368 |     "x_test = x_test[..., tf.newaxis].astype(\"float32\")\n",
369 |     "\n",
370 |     "print('train shape', x_train.shape)\n",
371 |     "print('test shape', x_test.shape)"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "## Get Batch"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": 3,
384 |    "metadata": {},
385 |    "outputs": [],
386 |    "source": [
387 |     "train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)\n",
388 |     "test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "## Model"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": 4,
401 |    "metadata": {},
402 |    "outputs": [],
403 |    "source": [
404 |     "class MyModel(Model):\n",
405 |     "    def __init__(self):\n",
406 |     "        super(MyModel, self).__init__()\n",
407 |     "        self.conv1 = Conv2D(32, 3, activation='relu')\n",
408 |     "        self.flatten = Flatten()\n",
409 |     "        self.dense1 = Dense(128, activation='relu')\n",
410 |     "        self.dense2 = Dense(10)\n",
411 |     "        \n",
412 |     "    def call(self, x):\n",
413 |     "        x = self.conv1(x)\n",
414 |     "        x = self.flatten(x)\n",
415 |     "        x = self.dense1(x)\n",
416 |     "        x = self.dense2(x)\n",
417 |     "        # softmax need not to apply\n",
418 |     "        return x\n",
419 |     "\n",
420 |     "model = MyModel()"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "## Optimizer and Loss"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {},
433 |    "source": [
434 |     "These metrics accumulate the values over epochs and then print the overall result."
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": 5,
440 |    "metadata": {},
441 |    "outputs": [],
442 |    "source": [
443 |     "train_loss = tf.keras.metrics.Mean(name='train_loss')\n",
444 |     "train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n",
445 |     "\n",
446 |     "test_loss = tf.keras.metrics.Mean(name='test_loss')\n",
447 |     "test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": 6,
453 |    "metadata": {},
454 |    "outputs": [],
455 |    "source": [
456 |     "loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
457 |     "optimizer = tf.keras.optimizers.Adam()"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "code",
462 |    "execution_count": 7,
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": [
466 |     "# step of one batch\n",
467 |     "@tf.function\n",
468 |     "def train_step(images, labels):\n",
469 |     "    with tf.GradientTape() as tape:\n",
470 |     "        predictions = model(images)\n",
471 |     "        loss = loss_obj(labels, predictions)\n",
472 |     "    gradients = tape.gradient(loss, model.trainable_variables)\n",
473 |     "    optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
474 |     "    \n",
475 |     "    train_loss(loss)\n",
476 |     "    train_accuracy(labels, predictions)"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": 8,
482 |    "metadata": {},
483 |    "outputs": [],
484 |    "source": [
485 |     "@tf.function\n",
486 |     "def test_step(images, labels):\n",
487 |     "    # training=False is only needed if there are layers with different\n",
488 |     "    # behavior during training versus inference (e.g. Dropout).\n",
489 |     "    predictions = model(images, training=False)\n",
490 |     "    t_loss = loss_obj(labels, predictions)\n",
491 |     "\n",
492 |     "    test_loss(t_loss)\n",
493 |     "    test_accuracy(labels, predictions)"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "markdown",
498 |    "metadata": {},
499 |    "source": [
500 |     "## Training"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": 9,
506 |    "metadata": {},
507 |    "outputs": [
508 |     {
509 |      "name": "stdout",
510 |      "output_type": "stream",
511 |      "text": [
512 |       "Epoch 1, Loss: 0.1341124325990677, Accuracy: 96.08833312988281, Test Loss: 0.07015062868595123, Test Accuracy: 97.7699966430664\n",
513 |       "Epoch 2, Loss: 0.04252898693084717, Accuracy: 98.69166564941406, Test Loss: 0.05851253867149353, Test Accuracy: 98.13999938964844\n",
514 |       "Epoch 3, Loss: 0.02277100831270218, Accuracy: 99.24666595458984, Test Loss: 0.05304492637515068, Test Accuracy: 98.38999938964844\n",
515 |       "Epoch 4, Loss: 0.012743637897074223, Accuracy: 99.58499908447266, Test Loss: 0.057635437697172165, Test Accuracy: 98.3499984741211\n",
516 |       "Epoch 5, Loss: 0.00996498391032219, Accuracy: 99.66999816894531, Test Loss: 0.07684854418039322, Test Accuracy: 98.23999786376953\n"
517 |      ]
518 |     }
519 |    ],
520 |    "source": [
521 |     "EPOCHS = 5\n",
522 |     "for epoch in range(EPOCHS):\n",
523 |     "    # re-calculate loss and accuracy of each epoch\n",
524 |     "    train_loss.reset_states()\n",
525 |     "    train_accuracy.reset_states()\n",
526 |     "    test_loss.reset_states()\n",
527 |     "    test_accuracy.reset_states()\n",
528 |     "    \n",
529 |     "    for images, labels in train_ds:\n",
530 |     "        train_step(images, labels)\n",
531 |     "        \n",
532 |     "    for images, labels in test_ds:\n",
533 |     "        test_step(images, labels)\n",
534 |     "    \n",
535 |     "    print(\n",
536 |     "        f'Epoch {epoch + 1}, '\n",
537 |     "        f'Loss: {train_loss.result()}, '\n",
538 |     "        f'Accuracy: {train_accuracy.result() * 100}, '\n",
539 |     "        f'Test Loss: {test_loss.result()}, '\n",
540 |     "        f'Test Accuracy: {test_accuracy.result() * 100}'\n",
541 |     "    )"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": null,
547 |    "metadata": {},
548 |    "outputs": [],
549 |    "source": []
550 |   }
551 |  ],
552 |  "metadata": {
553 |   "kernelspec": {
554 |    "display_name": "Python 3",
555 |    "language": "python",
556 |    "name": "python3"
557 |   },
558 |   "language_info": {
559 |    "codemirror_mode": {
560 |     "name": "ipython",
561 |     "version": 3
562 |    },
563 |    "file_extension": ".py",
564 |    "mimetype": "text/x-python",
565 |    "name": "python",
566 |    "nbconvert_exporter": "python",
567 |    "pygments_lexer": "ipython3",
568 |    "version": "3.8.3"
569 |   }
570 |  },
571 |  "nbformat": 4,
572 |  "nbformat_minor": 4
573 | }
574 | 


--------------------------------------------------------------------------------