├── .gitignore
├── README.md
├── convolution
├── ResNet.ipynb
├── convolution model.ipynb
└── images
│ ├── Convolution_schematic.gif
│ ├── PAD.png
│ ├── SIGNS.png
│ ├── a_pool.png
│ ├── ave-pool.png
│ ├── ave_pool1.png
│ ├── average_pool.png
│ ├── conv.png
│ ├── conv1.png
│ ├── conv_kiank.mp4
│ ├── conv_nn.png
│ ├── images
│ ├── convblock_kiank.png
│ ├── idblock2_kiank.png
│ ├── idblock3_kiank.png
│ ├── my_image.jpg
│ ├── resnet_kiank.png
│ ├── signs_data_kiank.png
│ ├── skip_connection_kiank.png
│ └── vanishing_grad_kiank.png
│ ├── max_pool.png
│ ├── max_pool1.png
│ ├── model.png
│ ├── thumbs_up.jpg
│ └── vert_horiz_kiank.png
├── deep-neural-network
├── Deep NN.ipynb
└── images
│ └── backprop_kiank.png
├── dropout
├── dropout.ipynb
├── images
│ └── dropout1_kiank.mp4
└── model.py
├── examples
└── dataloader.py
├── logistic-regression
└── logistic-regression.ipynb
├── optimization
└── optimization.ipynb
├── regularization
├── model.py
└── regularization.ipynb
├── shallow-neural-network
├── images
│ ├── 1-hidden-nn.png
│ └── multi-layer.png
└── one-hidden-layer-nn.ipynb
└── tensorflow
└── tf-hands-on.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Working files
2 | */.ipynb_checkpoints
3 | Makefile
4 | .idea
5 | start.sh
6 | test.py
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | pip-wheel-metadata/
31 | share/python-wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 |
37 | # PyInstaller
38 | # Usually these files are written by a python script from a template
39 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .nox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.cover
57 | *.py,cover
58 | .hypothesis/
59 | .pytest_cache/
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 | local_settings.py
68 | db.sqlite3
69 | db.sqlite3-journal
70 |
71 | # Flask stuff:
72 | instance/
73 | .webassets-cache
74 |
75 | # Scrapy stuff:
76 | .scrapy
77 |
78 | # Sphinx documentation
79 | docs/_build/
80 |
81 | # PyBuilder
82 | target/
83 |
84 | # Jupyter Notebook
85 | .ipynb_checkpoints
86 |
87 | # IPython
88 | profile_default/
89 | ipython_config.py
90 |
91 | # pyenv
92 | .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # deep-learning
2 | ---
3 | ## Reading List
4 | ---
5 | 1. [Logistic Regression](https://towardsdatascience.com/logistic-regression-step-by-step-implementation-f032a89936ca)
6 | 2. [Shallow Neural Network](https://towardsdatascience.com/building-a-shallow-neural-network-a4e2728441e0)
7 | 3. [Deep Neural Network](https://towardsdatascience.com/code-a-deep-neural-network-a5fd26ec41c4)
8 | 4. [Regularization & Dropout](https://towardsdatascience.com/regularization-dropout-in-deep-learning-5198c2bf6107)
9 | 5. [Optimization Methods](https://towardsdatascience.com/optimization-methods-in-deep-learning-790629f184b1)
10 |
--------------------------------------------------------------------------------
/convolution/convolution model.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Convolutional Neural Network"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Forward\n",
15 | "---\n",
16 | "- Zero Paddings\n",
17 | "- Convolutional Layer\n",
18 | "- Pooling\n",
19 | "- Combination: Conv + Pool"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "Zero Padding pads 0s at the edge of an image, benefits include:\n",
27 | "- It allows you to use a CONV layer without necessarily shrinking the height and width of the volumes. This is important for building deeper networks, since otherwise the height/width would shrink as you go to deeper layers. An important special case is the \"same\" convolution, in which the height/width is exactly preserved after one layer. \n",
28 | "\n",
29 | "- It helps us keep more of the information at the border of an image. Without padding, very few values at the next layer would be affected by pixels at the edges of an image.\n",
30 | "\n",
31 | "
\n",
32 | "\n",
33 | "
from Deep Learning Specilization Course"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "Consider an input of batched images with shape:\n",
41 | "\n",
42 | "$$(m, n_W, n_H, n_C)$$\n",
43 | "\n",
44 | "Where $m$ is the batch size, $n_W$ is the width of the image, $n_H$ is the height and $n_C$ is number of channels -- RGB would have 3 channels.\n",
45 | "\n",
46 | "After padded with size $p$, the size would become\n",
47 | "\n",
48 | "$$(m, n_W + 2p, n_H + 2p, n_C)$$"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "%matplotlib inline\n",
58 | "\n",
59 | "import numpy as np\n",
60 | "import matplotlib.pyplot as plt"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 2,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "def zero_pads(X, pad):\n",
70 | " \"\"\"\n",
71 | " X has shape (m, n_W, n_H, n_C)\n",
72 | " \"\"\"\n",
73 | " X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)\n",
74 | " return X_pad"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "name": "stdout",
84 | "output_type": "stream",
85 | "text": [
86 | "X shape (3, 4, 4, 3)\n",
87 | "X_pad shape (3, 8, 8, 3)\n"
88 | ]
89 | },
90 | {
91 | "data": {
92 | "text/plain": [
93 | "Text(0.5, 1.0, 'paded')"
94 | ]
95 | },
96 | "execution_count": 3,
97 | "metadata": {},
98 | "output_type": "execute_result"
99 | },
100 | {
101 | "data": {
102 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAADHCAYAAAAwLRlnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPFElEQVR4nO3df6zddX3H8efLtijc1lJSlmELQqYjGv8Q06EGwxjOyFB0S7ZFnG4zGrZlEIxsqNvMLHNL949xiGMQfqgDZIaaoAxxXaxjDEELorEtLkAgrda0Cg3l6sDCe3+cc+Vwe9t7as+53085z0dyk/Pr+72vWz73xed+f6aqkCS16wVdB5AkHZhFLUmNs6glqXEWtSQ1zqKWpMZZ1JLUOIt6gSX5lyQfGfVnpRYl+WiS6xZ62eebxV0HmDRV9afj+Kyk5y9n1AsoyaKuM0g6/FjUI5DkFUm+lmR3ks1J3tZ//dNJLk9ya5Jp4Df6r31sYNmLk+xI8oMk70tSSV42sPzH+o/PSLI9yUVJdvaXeU8nP7Ce95I8nOTDSbYkeSzJtUlelGRFkluS7Oq/fkuS1QPLnZTkv5LsSbIBWDlrva9Lcmf/d+XbSc4YdtlJZlEfoiRLgC8B/wH8EnABcH2Sk/sfeSfw98Ay4I5Zy54FfAD4TeBlwBnzfLtfBpYDq4D3Ap9KsmIkP4i0rz8A3gz8CvCrwN/Q64xrgZcCJwA/BS4bWOYG4B56Jft3wB/NvJFkFfDvwMeAY4C/ANYnOXa+ZSedRX3oXgcsBdZV1VNV9VXgFuDc/vs3V9X/VNUzVfV/s5b9feDaqtpcVT8BPjrP9/oZcElV/ayqbgWeAE6eZxnpF3VZVW2rqkfpTTbOraofV9X6qvpJVe3pv/7rAElOAH4N+EhVPVlVt9ObxMx4F3BrVd3a/33YAGwCzh5i2YlmUR+6lwDbquqZgdceoTfrBdg237IDzw/0WYAfV9Xegec/ofc/CWkcBsfjI8BLkhyV5IokjyR5HLgdOLq//+UlwGNVNT1ruRkvBX6vv9ljd5LdwBuA44ZYdqJZ1IfuB8DxSQb/LU8Avt9/fKDLE+4AVg88P37E2aRDMTgeT6A31i+i91fca6vqxcDp/fdDbzyvSDI1a7kZ24B/raqjB76mqmrdEMtONIv60N1Nb2Z7cZIl/Z0j5wA3DrHs54H39HdGHgV4zLRa8udJVic5Bvhr4N/o7Wv5KbC7//rfzny4qh6htyljbZIjkryB3u/CjOuAc5K8Ocmi/s7JM5KsHmLZiWZRH6KqeoregPot4EfAPwN/WFX3D7Hsl4FLgY3AA8Bd/beeHE9a6aDcQG8n+UPAg/R2An4COJLeWL8LuG3WMu8EXgs8Sq/EPzvzRlVtA94O/BWwi94M+y95tof2u+ykizcOaEeSVwDfBV44a1u0tKCSPAy8r6r+s+ssckbduSS/k+SF/cPs/hH4kiUtaZBF3b0/AXbS+9PyaeDPuo0jqTVu+pCkxjmjlqTGWdSS1LixXOZ02bJltXJle9dTefLJNo9627FjR9cR9mvVqlXzf2iBPfbYY0xPT2ehv+/U1FStWOGlVTQeBxrXYynqlStXcskll4xj1YfkwQcf7DrCnNauXdt1hP264IILuo6wj09+8pOdfN8VK1Zw/vnnd/K99fx32WWX7fc9N31IUuMsaklqnEUtSY2zqCWpcRa1JlqSs5J8L8kDST7UdR5pLha1Jlb/Yvefonflw1cC5yZ5ZbeppH1Z1JpkpwIPVNVD/cvV3kjvMpxSUyxqTbJVPPd2U9t59hZqACQ5L8mmJJump6eRumBRSwdQVVdW1ZqqWjM1NTX/AtIYWNSaZN/nufcFXM2z97qUmmFRa5J9E3h5kpOSHAG8A/hix5mkfYzlWh/S4aCq9iY5H/gKsAi4pqo2dxxL2odFrYlWVbcCt3adQzqQoTZ9eFKAJHVn3qL2pABJ6tYwM2pPCpCkDg1T1POeFCBJGp+RHZ43eAbXnj17RrVaSZp4wxT1UCcFDJ7BtWzZslHlk6SJN0xRe1KAJHVo3uOoPSlAkro11AkvnhQgSd3xWh+S1DiLWpIaZ1FLUuMsaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNG+rqeQdr165dXHHFFeNY9SG5+eabu44wp/Xr13cdYb92797ddYR9PP30011HaMqqVaO7M96DDz44snWtXbt2ZOsCWLdu3cjWVVUjW9dCcEYtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRa2IlOT7JxiRbkmxOcmHXmaS5jOU4aukwsRe4qKruTbIMuCfJhqra0nUwaZAzak2sqtpRVff2H+8BtgKjO3tEGhGLWgKSnAicAtw96/XzkmxKsml6erqTbJJFrYmXZCmwHnh/VT0++F5VXVlVa6pqzdTUVDcBNfEsak20JEvolfT1VfWFrvNIc7GoNbGSBLga2FpVH+86j7Q/FrUm2WnAu4Ezk9zX/zq761DSbB6ep4lVVXcA6TqHNJ95Z9RJrkmyM8l3FyKQJOm5htn08WngrDHnkCTtx7xFXVW3A48uQBZJ0hzcRi0d5kZ527tR3q5u1LeYG+Vt4ZYvXz6ydS2EkR31MXgG1969e0e1WkmaeCMr6sEzuBYvdqIuSaPicdSS1LhhDs/7HPB14OQk25O8d/yxJEkz5t1GUVXnLkQQSdLc3PQhSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNs6glqXEWtSQ1zqKWpMZZ1JLUuLFc4X/x4sWsXLlyHKs+JEm6jjCnc845p+sI+/XBD36w6wj7uO2227qO0JRR/q6N8ndk1ON6lGPx8ssvH9m6FoIzaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1oTLcmiJN9KckvXWaT9sag16S4EtnYdQjoQi1oTK8lq4C3AVV1nkQ7EotYk+wRwMfDM/j6Q5Lwkm5Jsmp6eXrBg0iCLWhMpyVuBnVV1z4E+V1VXVtWaqlozNTW1QOmk57KoNalOA96W5GHgRuDMJNd1G0mam0WtiVRVH66q1VV1IvAO4KtV9a6OY0lzsqglqXFjucypdDipqq8BX+s4hrRf886okxyfZGOSLUk2J7lwIYJJknqGmVHvBS6qqnuTLAPuSbKhqraMOZskiSFm1FW1o6ru7T/eQ+8srlXjDiZJ6jmonYlJTgROAe4eSxpJ0j6G3pmYZCmwHnh/VT0+x/vnAecBHHnkkSMLKOnALr300pGt68477xzZupYuXTqydQEsX758pOs7nAw1o06yhF5JX19VX5jrM4NncB1xxBGjzChJE22Yoz4CXA1sraqPjz+SJGnQMDPq04B30zvF9r7+19ljziVJ6pt3G3VV3QFkAbJIkubgKeSS1DiLWpIaZ1FLUuMsaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNs6glqXFD34rrYJxwwgkjvT3QqIzyNkOjNOpbFo1Si7c/WrRoUdcRmnLDDTd0HWFOL3jBaOeB69atG+n6DifOqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRa6IlOTrJTUnuT7I1yeu7ziTNNpbD86TDyD8Bt1XV7yY5Ajiq60DSbBa1JlaS5cDpwB8DVNVTwFNdZpLm4qYPTbKTgF3AtUm+leSqJFODH0hyXpJNSTZNT093k1ITz6LWJFsMvAa4vKpOAaaBDw1+oKqurKo1VbVmampqrnVIY2dRa5JtB7ZX1d395zfRK26pKRa1JlZV/RDYluTk/ktvBLZ0GEmakzsTNekuAK7vH/HxEPCejvNI+7CoNdGq6j5gTdc5pAOZd9NHkhcl+UaSbyfZnGTtQgSTJPUMM6N+Ejizqp5IsgS4I8mXq+quMWeTJDFEUVdVAU/0ny7pf9U4Q0mSnjXUUR9JFiW5D9gJbBg4nEmSNGZDFXVVPV1VrwZWA6cmedXszwyewfXoo4+OOKYkTa6DOo66qnYDG4Gz5njv52dwHXPMMSOKJ0ka5qiPY5Mc3X98JPAm4P4x55Ik9Q1z1MdxwGeSLKJX7J+vqlvGG0uSNGOYoz6+A5yyAFkkSXPwWh+S1DiLWpIaZ1FLUuMsaklqnEUtSY2zqCWpcRa1JDXOopakxlnUktQ4i1qSGmdRS1LjLGpJapxFLUmNs6glqXHp3bt2xCtNdgGPjGh1K4EfjWhdo2SugzPKXC+tqmNHtK6hHcS4noT/BqNkrp79juuxFPUoJdlUVWu6zjGbuQ5Oq7nGodWf1VwHp6VcbvqQpMZZ1JLUuMOhqK/sOsB+mOvgtJprHFr9Wc11cJrJ1fw2akmadIfDjFqSJlqzRZ3krCTfS/JAkg91nWdGkmuS7Ezy3a6zzEhyfJKNSbYk2Zzkwq4zASR5UZJvJPl2P9farjONU4tjttWxMSPJoiTfSnJL11lmJDk6yU1J7k+yNcnrO8/U4qaPJIuA/wXeBGwHvgmcW1VbOg0GJDkdeAL4bFW9qus8AEmOA46rqnuTLAPuAX6763+vJAGmquqJJEuAO4ALq+quLnONQ6tjttWxMSPJB4A1wIur6q1d5wFI8hngv6vqqiRHAEdV1e4uM7U6oz4VeKCqHqqqp4Abgbd3nAmAqrodeLTrHIOqakdV3dt/vAfYCqzqNhVUzxP9p0v6X+3NDEajyTHb6tgASLIaeAtwVddZZiRZDpwOXA1QVU91XdLQblGvArYNPN9OI4OrdUlOBE4B7u44CvDzP23vA3YCG6qqiVxj0PyYbW1sAJ8ALgae6TjHoJOAXcC1/U0yVyWZ6jpUq0WtX0CSpcB64P1V9XjXeQCq6umqejWwGjg1SRObiyZNa2MjyVuBnVV1T9dZZlkMvAa4vKpOAaaBzvc3tFrU3weOH3i+uv+a9qO/DXg9cH1VfaHrPLP1/3zcCJzVcZRxaXbMNjo2TgPeluRhepuJzkxyXbeRgN5fQtsH/vK7iV5xd6rVov4m8PIkJ/U35r8D+GLHmZrV32l3NbC1qj7edZ4ZSY5NcnT/8ZH0drTd32mo8WlyzLY6Nqrqw1W1uqpOpPdv9dWqelfHsaiqHwLbkpzcf+mNQOc7Xpss6qraC5wPfIXezo/PV9XmblP1JPkc8HXg5CTbk7y360z0Zifvpjcrua//dXbXoYDjgI1JvkOvyDZUVTOHYY1Sw2O21bHRsguA6/vj9tXAP3Qbp9HD8yRJz2pyRi1JepZFLUmNs6glqXEWtSQ1zqKWpMZZ1JLUOItakhpnUUtS4/4fNlqhXlEo2SUAAAAASUVORK5CYII=\n",
103 | "text/plain": [
104 | ""
105 | ]
106 | },
107 | "metadata": {
108 | "needs_background": "light"
109 | },
110 | "output_type": "display_data"
111 | }
112 | ],
113 | "source": [
114 | "X = np.random.randn(3, 4, 4, 3)\n",
115 | "X_pad = zero_pads(X, 2)\n",
116 | "\n",
117 | "print('X shape', X.shape)\n",
118 | "print('X_pad shape', X_pad.shape)\n",
119 | "\n",
120 | "plt.subplot(1, 2, 1)\n",
121 | "plt.imshow(X[0, :, :, 1], cmap='gray')\n",
122 | "plt.title('origin')\n",
123 | "\n",
124 | "plt.subplot(1, 2, 2)\n",
125 | "plt.imshow(X_pad[0, :, :, 1], cmap='gray')\n",
126 | "plt.title('paded')"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "## One Step of Convolutional Layer\n",
134 | "---\n",
135 | "
\n",
136 | "\n",
137 | "Consider a filter mapped to one piece of the image, with \n",
138 | "\n",
139 | "$$ \\text{filter size:} \\quad (f, f, n_C) $$\n",
140 | "$$ \\text{piece of image} \\quad (f, f, n_C) $$\n",
141 | "\n",
142 | "Where filter has the deepth of the piece of input image.\n",
143 | "\n",
144 | "Another way to look at this is you can think of the filter as the weights $W$, and for each piece of the image, it serve as an input $X$, so in the convolutional process, the formula equals:\n",
145 | "\n",
146 | "$$ Z = sum(W*X) + b $$\n",
147 | "$$ A = g(Z) $$\n",
148 | "\n",
149 | "Where $b$ is the bias and $g$ is the activation function. Doesn't it look very similar to the equations in the dense neural network?"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 4,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "def sigmoid(x):\n",
159 | " return 1/(1 + np.exp(-x))\n",
160 | "\n",
161 | "\n",
162 | "def one_step_conv(X, W, b):\n",
163 | " \"\"\"\n",
164 | " X is the input, and W is the filter, both have the size (f, f, n_C)\n",
165 | " b is the bias for this specific filter (note that different filters don't share the same bias)\n",
166 | " Here suppose that we all take sigmoid as the activation function\n",
167 | " \"\"\"\n",
168 | " assert X.shape == W.shape\n",
169 | " Z = np.sum(np.multiply(W, X)) + b\n",
170 | " A = sigmoid(Z)\n",
171 | " return A"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": 5,
177 | "metadata": {},
178 | "outputs": [
179 | {
180 | "name": "stdout",
181 | "output_type": "stream",
182 | "text": [
183 | "1.7501778145874707e-06\n"
184 | ]
185 | }
186 | ],
187 | "source": [
188 | "X = np.random.randn(10, 10, 3)\n",
189 | "W = np.random.randn(10, 10, 3)\n",
190 | "b = 0\n",
191 | "\n",
192 | "A = one_step_conv(X, W, b)\n",
193 | "print(A)"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "## Convolution\n",
201 | "---\n",
202 | "\n",
203 | "\n",
205 | "\n",
206 | "\n",
207 | "Now the input (here we use `A_prev` ) would be a batch of whole images with size \n",
208 | "\n",
209 | "$$ (m, n_{W_{prev}}, n_{H_{prev}}, n_{C_{prev}}) $$\n",
210 | "\n",
211 | "Filter with size\n",
212 | "\n",
213 | "$$ (n_{C}, f, f, n_{C_{prev}}) $$\n",
214 | "\n",
215 | "Where $n_{C}$ is the number of filters, which would become the depth of the output image.\n",
216 | "\n",
217 | "Bias with size\n",
218 | "\n",
219 | "$$ (n_{C}, 1) $$\n",
220 | "\n",
221 | "And parameters include:\n",
222 | "$$ \\text{padding of each image:} \\enspace pad $$\n",
223 | "\n",
224 | "$$ \\text{moving step:} \\enspace stride $$\n",
225 | "\n",
226 | "So the resulting output would have size:\n",
227 | "\n",
228 | "$$ (m, \\lfloor\\frac{n_{W_{prev}} + 2p - f}{stride}\\rfloor + 1, \\lfloor\\frac{n_{H_{prev}} + 2p - f}{stride}\\rfloor + 1, n_C)$$\n",
229 | "\n",
230 | "Now given a image from the input, we will need to slice it into pieces and multiply with the filter one by one. \n",
231 | "\n",
232 | "Consider a 2D image with size $(n_{W_{prev}}, n_{H_{prev}})$, and stride is $s$, filter size of $f$, then the top-left corner of the output image would have mapping:\n",
233 | "```python\n",
234 | "input[0:(0 + f), 0:(0 + f)] -> output[0, 0]\n",
235 | "```\n",
236 | "\n",
237 | "And\n",
238 | "```python\n",
239 | "input[s:(s + f), 0:(0 + f)] -> output[1, 0]\n",
240 | "```\n",
241 | "\n",
242 | "The pattern would be:\n",
243 | "```python\n",
244 | "input[i*s:(i*s + f), j*s:(j*s + f)] -> output[i, j]\n",
245 | "```\n",
246 | "\n",
247 | "We will make use of this pattern in our implementation of slice the origin image and map to the output."
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 6,
253 | "metadata": {},
254 | "outputs": [],
255 | "source": [
256 | "def conv(A_prev, filters, bias, parameters):\n",
257 | " \"\"\"\n",
258 | " A_prev: the input of batched images with shape: (m, n_W_prev, n_H_prev, n_C_prev)\n",
259 | " filters has shape: (n_C, f, f, n_C_prev)\n",
260 | " \"\"\"\n",
261 | " \n",
262 | " m, n_W_prev, n_H_prev, n_C_prev = A_prev.shape\n",
263 | " pad, stride = parameters['pad'], parameters['stride']\n",
264 | " n_C, f, f, _ = filters.shape\n",
265 | " \n",
266 | " n_W = (n_W_prev + 2*pad - f) // stride + 1\n",
267 | " n_H = (n_H_prev + 2*pad - f) // stride + 1\n",
268 | " \n",
269 | " output = np.zeros((m, n_W, n_H, n_C))\n",
270 | " padded_A_prev = zero_pads(A_prev, pad)\n",
271 | " \n",
272 | " for i in range(m):\n",
273 | " # take out the image\n",
274 | " padded_img = padded_A_prev[i]\n",
275 | " for c in range(n_C):\n",
276 | " # take out filters and bias for the channel\n",
277 | " fil = filters[c]\n",
278 | " b = bias[c]\n",
279 | " for w in range(n_W):\n",
280 | " for h in range(n_H):\n",
281 | " w_range = (stride*w, stride*w + f)\n",
282 | " h_range = (stride*h, stride*h + f)\n",
283 | " img_slice = padded_img[w_range[0]:w_range[1], h_range[0]:h_range[1], :]\n",
284 | " output[i, w, h, c] = one_step_conv(img_slice, fil, b)\n",
285 | " \n",
286 | " return output"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 7,
292 | "metadata": {
293 | "scrolled": true
294 | },
295 | "outputs": [
296 | {
297 | "name": "stdout",
298 | "output_type": "stream",
299 | "text": [
300 | "(4, 15, 15, 10)\n"
301 | ]
302 | }
303 | ],
304 | "source": [
305 | "A_prev = np.random.randn(4, 28, 28, 3)\n",
306 | "filters = np.random.randn(10, 3, 3, 3) # filter size (3, 3, 3)\n",
307 | "bias = np.zeros(10)\n",
308 | "parameters = {'pad': 2, 'stride': 2}\n",
309 | "\n",
310 | "Z = conv(A_prev, filters, bias, parameters)\n",
311 | "print(Z.shape)"
312 | ]
313 | },
314 | {
315 | "cell_type": "markdown",
316 | "metadata": {},
317 | "source": [
318 | "# Pooling\n",
319 | "---\n",
320 | "After convolutional layer, it typically follows a pooling layer. The pooling (POOL) layer reduces the height and width of the input. It helps reduce computation, as well as helps make feature detectors more invariant to its position in the input. The two types of pooling layers are: \n",
321 | "\n",
322 | "- Max-pooling layer: slides an ($f, f$) window over the input and stores the max value of the window in the output.\n",
323 | "\n",
324 | "- Average-pooling layer: slides an ($f, f$) window over the input and stores the average value of the window in the output.\n",
325 | "\n",
326 | "\n",
327 | "\n",
328 | " \n",
329 | " | \n",
330 | "\n",
331 | " | \n",
332 | " \n",
333 | " | \n",
334 | " |
\n",
335 | "\n",
336 | "from Deep Learning Specilization Course\n",
337 | "\n",
338 | "The process is pretty much the same as the convolutional layer, with a filter and a stride, at each step, we will take a slice of the whole image and compute one value -- either max or average -- from it.\n",
339 | "\n",
340 | "Given filter size $f$, stride $s$ and input size:\n",
341 | "$$ (m, n_{W_{prev}}, n_{H_{prev}}, n_{C_{prev}}) $$\n",
342 | "\n",
343 | "Output would have size:\n",
344 | "\n",
345 | "$$ (m, \\lfloor\\frac{n_{W_{prev}} - f}{stride}\\rfloor + 1, \\lfloor\\frac{n_{H_{prev}} - f}{stride}\\rfloor + 1, n_C)$$\n",
346 | "$$ n_C = n_{C_{prev}}$$\n",
347 | "\n",
348 | "Note that pooling does not change the depth of an image."
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": 8,
354 | "metadata": {},
355 | "outputs": [],
356 | "source": [
357 | "def pooling(A_prev, parameters, mode='max'):\n",
358 | " \"\"\"\n",
359 | " A_prev: the input of batched images with shape: (m, n_W_prev, n_H_prev, n_C_prev)\n",
360 | " \"\"\"\n",
361 | " m, n_W_prev, n_H_prev, n_C_prev = A_prev.shape\n",
362 | " f, stride = parameters['f'], parameters['stride']\n",
363 | " \n",
364 | " n_W = (n_W_prev - f)//stride + 1\n",
365 | " n_H = (n_H_prev - f)//stride + 1\n",
366 | " n_C = n_C_prev\n",
367 | " \n",
368 | " output = np.zeros((m, n_W, n_H, n_C))\n",
369 | " for i in range(m):\n",
370 | " img = A_prev[i]\n",
371 | " for w in range(n_W):\n",
372 | " for h in range(n_H):\n",
373 | " for c in range(n_C):\n",
374 | " w_range = (stride*w, stride*w + f)\n",
375 | " h_range = (stride*h, stride*h + f)\n",
376 | " img_slice = img[w_range[0]:w_range[1], h_range[0]:h_range[1], c]\n",
377 | " if mode == 'max':\n",
378 | " output[i, w, h, c] = np.max(img_slice)\n",
379 | " elif mode == 'average':\n",
380 | " output[i, w, h, c] = np.mean(img_slice)\n",
381 | " return output"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": 9,
387 | "metadata": {},
388 | "outputs": [
389 | {
390 | "name": "stdout",
391 | "output_type": "stream",
392 | "text": [
393 | "(4, 14, 14, 3)\n"
394 | ]
395 | }
396 | ],
397 | "source": [
398 | "A_prev = np.random.randn(4, 28, 28, 3)\n",
399 | "parameters = {'f': 2, 'stride': 2}\n",
400 | "\n",
401 | "A = pooling(A_prev, parameters, mode='max')\n",
402 | "print(A.shape)"
403 | ]
404 | },
405 | {
406 | "cell_type": "code",
407 | "execution_count": null,
408 | "metadata": {},
409 | "outputs": [],
410 | "source": []
411 | }
412 | ],
413 | "metadata": {
414 | "kernelspec": {
415 | "display_name": "Python 3",
416 | "language": "python",
417 | "name": "python3"
418 | },
419 | "language_info": {
420 | "codemirror_mode": {
421 | "name": "ipython",
422 | "version": 3
423 | },
424 | "file_extension": ".py",
425 | "mimetype": "text/x-python",
426 | "name": "python",
427 | "nbconvert_exporter": "python",
428 | "pygments_lexer": "ipython3",
429 | "version": "3.8.3"
430 | }
431 | },
432 | "nbformat": 4,
433 | "nbformat_minor": 4
434 | }
435 |
--------------------------------------------------------------------------------
/convolution/images/Convolution_schematic.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/Convolution_schematic.gif
--------------------------------------------------------------------------------
/convolution/images/PAD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/PAD.png
--------------------------------------------------------------------------------
/convolution/images/SIGNS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/SIGNS.png
--------------------------------------------------------------------------------
/convolution/images/a_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/a_pool.png
--------------------------------------------------------------------------------
/convolution/images/ave-pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/ave-pool.png
--------------------------------------------------------------------------------
/convolution/images/ave_pool1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/ave_pool1.png
--------------------------------------------------------------------------------
/convolution/images/average_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/average_pool.png
--------------------------------------------------------------------------------
/convolution/images/conv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv.png
--------------------------------------------------------------------------------
/convolution/images/conv1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv1.png
--------------------------------------------------------------------------------
/convolution/images/conv_kiank.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv_kiank.mp4
--------------------------------------------------------------------------------
/convolution/images/conv_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/conv_nn.png
--------------------------------------------------------------------------------
/convolution/images/images/convblock_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/convblock_kiank.png
--------------------------------------------------------------------------------
/convolution/images/images/idblock2_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/idblock2_kiank.png
--------------------------------------------------------------------------------
/convolution/images/images/idblock3_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/idblock3_kiank.png
--------------------------------------------------------------------------------
/convolution/images/images/my_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/my_image.jpg
--------------------------------------------------------------------------------
/convolution/images/images/resnet_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/resnet_kiank.png
--------------------------------------------------------------------------------
/convolution/images/images/signs_data_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/signs_data_kiank.png
--------------------------------------------------------------------------------
/convolution/images/images/skip_connection_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/skip_connection_kiank.png
--------------------------------------------------------------------------------
/convolution/images/images/vanishing_grad_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/images/vanishing_grad_kiank.png
--------------------------------------------------------------------------------
/convolution/images/max_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/max_pool.png
--------------------------------------------------------------------------------
/convolution/images/max_pool1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/max_pool1.png
--------------------------------------------------------------------------------
/convolution/images/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/model.png
--------------------------------------------------------------------------------
/convolution/images/thumbs_up.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/thumbs_up.jpg
--------------------------------------------------------------------------------
/convolution/images/vert_horiz_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/convolution/images/vert_horiz_kiank.png
--------------------------------------------------------------------------------
/deep-neural-network/Deep NN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Build a Multi-Layer Neural Network\n"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Weights Initialization\n",
15 | "---\n",
16 | "Firstly, weights need to be initialized for different layers. Note that in general, the input is not considered as a layer, but output is.\n",
17 | "\n",
18 | "For `lth` layer, \n",
19 | "- weight $W^{[l]}$ has shape $(n^{[l]}, n^{[l-1]})$\n",
20 | "- bias $b^{[l]}$ has shape $(n^{[l]}, 1)$\n",
21 | "\n",
22 | "where $n^{[0]}$ equals the number input feature."
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "import numpy as np"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 2,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "def weights_init(layers_dim):\n",
41 | " params = {}\n",
42 | " \n",
43 | " n = len(layers_dim)\n",
44 | " for i in range(1, n):\n",
45 | " params['W' + str(i)] = np.random.randn(layers_dim[i], layers_dim[i-1])*0.01\n",
46 | " params['b' + str(i)] = np.zeros((layers_dim[i], 1))\n",
47 | " return params"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "data": {
57 | "text/plain": [
58 | "{'W1': array([[-0.00197904, -0.01213369],\n",
59 | " [ 0.00689108, 0.00453008],\n",
60 | " [-0.00691454, 0.01541786],\n",
61 | " [-0.01061402, 0.00787606],\n",
62 | " [ 0.01147524, 0.00290551]]),\n",
63 | " 'b1': array([[0.],\n",
64 | " [0.],\n",
65 | " [0.],\n",
66 | " [0.],\n",
67 | " [0.]]),\n",
68 | " 'W2': array([[-0.00037371, -0.0026616 , 0.00046249, 0.00950304, 0.01676771]]),\n",
69 | " 'b2': array([[0.]])}"
70 | ]
71 | },
72 | "execution_count": 3,
73 | "metadata": {},
74 | "output_type": "execute_result"
75 | }
76 | ],
77 | "source": [
78 | "p = weights_init([2, 5, 1])\n",
79 | "p"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "# Forward\n",
87 | "---\n",
88 | "## Equations of Multi-layer\n",
89 | "---\n",
90 | "$$ Z^{[l]} = W^{[l]}A^{[l-1]} + b^{[l]} $$\n",
91 | "\n",
92 | "$$ A^{[l]} = g^{[l]}(Z^{[l]}) $$\n",
93 | "\n",
94 | "Where $l$ is the `lth` layer."
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 4,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "def sigmoid(x):\n",
104 | " return 1/(1 + np.exp(-x))\n",
105 | "\n",
106 | "\n",
107 | "def relu(x):\n",
108 | " return np.maximum(x, 0)"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 5,
114 | "metadata": {},
115 | "outputs": [
116 | {
117 | "name": "stdout",
118 | "output_type": "stream",
119 | "text": [
120 | "[0.23147522 0.11920292 0.78583498] [0. 0. 1.3]\n"
121 | ]
122 | }
123 | ],
124 | "source": [
125 | "x = np.array([-1.2, -2.0, 1.3])\n",
126 | "\n",
127 | "sx = sigmoid(x)\n",
128 | "rx = relu(x)\n",
129 | "\n",
130 | "print(sx, rx)"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 6,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "def forward(X, params):\n",
140 | " # intermediate layer use relu as activation\n",
141 | " # last layer use sigmoid\n",
142 | " n_layers = int(len(params)/2)\n",
143 | " A = X\n",
144 | " cache = {}\n",
145 | " for i in range(1, n_layers):\n",
146 | " W, b = params['W'+str(i)], params['b'+str(i)]\n",
147 | " Z = np.dot(W, A) + b\n",
148 | " A = relu(Z)\n",
149 | " cache['Z'+str(i)] = Z\n",
150 | " cache['A'+str(i)] = A\n",
151 | " \n",
152 | " # last layer\n",
153 | " W, b = params['W'+str(i+1)], params['b'+str(i+1)]\n",
154 | " Z = np.dot(W, A) + b\n",
155 | " A = sigmoid(Z)\n",
156 | " cache['Z'+str(i+1)] = Z\n",
157 | " cache['A'+str(i+1)] = A\n",
158 | " \n",
159 | " return cache, A"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 7,
165 | "metadata": {},
166 | "outputs": [],
167 | "source": [
168 | "X = np.array([1., 1.]).reshape(2, 1)\n",
169 | "cache, A = forward(X, p)"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 8,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "data": {
179 | "text/plain": [
180 | "{'Z1': array([[-0.01411272],\n",
181 | " [ 0.01142116],\n",
182 | " [ 0.00850332],\n",
183 | " [-0.00273796],\n",
184 | " [ 0.01438075]]),\n",
185 | " 'A1': array([[0. ],\n",
186 | " [0.01142116],\n",
187 | " [0.00850332],\n",
188 | " [0. ],\n",
189 | " [0.01438075]]),\n",
190 | " 'Z2': array([[0.00021467]]),\n",
191 | " 'A2': array([[0.50005367]])}"
192 | ]
193 | },
194 | "execution_count": 8,
195 | "metadata": {},
196 | "output_type": "execute_result"
197 | }
198 | ],
199 | "source": [
200 | "cache"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 9,
206 | "metadata": {},
207 | "outputs": [
208 | {
209 | "data": {
210 | "text/plain": [
211 | "array([[0.50005367]])"
212 | ]
213 | },
214 | "execution_count": 9,
215 | "metadata": {},
216 | "output_type": "execute_result"
217 | }
218 | ],
219 | "source": [
220 | "A"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "# Cost Function\n",
228 | "---\n",
229 | "Still we consider this a binary classification, the cost of a batch would be:\n",
230 | "\n",
231 | "$$-\\frac{1}{m} \\sum\\limits_{i = 1}^{m} (y^{(i)}\\log\\left(a^{[L] (i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right)) $$\n",
232 | "\n",
233 | "Where $a$ is the predicted value, and $y$ is the actual one."
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 10,
239 | "metadata": {},
240 | "outputs": [],
241 | "source": [
242 | "def compute_cost(A, Y):\n",
243 | " \"\"\"\n",
244 | " For binary classification, both A and Y would have shape (1, m), where m is the batch size\n",
245 | " \"\"\"\n",
246 | " assert A.shape == Y.shape\n",
247 | " m = A.shape[1]\n",
248 | " s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
249 | " loss = -s/m\n",
250 | " return np.squeeze(loss)"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 11,
256 | "metadata": {},
257 | "outputs": [
258 | {
259 | "name": "stdout",
260 | "output_type": "stream",
261 | "text": [
262 | "0.23101772979827936\n"
263 | ]
264 | }
265 | ],
266 | "source": [
267 | "A = np.array([[0.9, 0.3]])\n",
268 | "Y = np.array([[1, 0]])\n",
269 | "\n",
270 | "loss = compute_cost(A, Y)\n",
271 | "print(loss)"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {},
277 | "source": [
278 | "# Backward Propagation\n",
279 | "---\n",
280 | "
\n",
281 | " **[source]**: https://github.com/enggen/Deep-Learning-Coursera \n",
282 | "\n",
283 | "The backward gradient can be calculated in recurrent fashion:\n",
284 | "\n",
285 | "$$ dZ^{[l]} = dA^{[l]} * g^{[l]'}(Z^{[l]}) $$\n",
286 | "\n",
287 | "$$ dW^{[l]} = \\frac{\\partial \\mathcal{L} }{\\partial W^{[l]}} = \\frac{1}{m} dZ^{[l]} A^{[l-1] T} $$\n",
288 | "$$ db^{[l]} = \\frac{\\partial \\mathcal{L} }{\\partial b^{[l]}} = \\frac{1}{m} \\sum_{i = 1}^{m} dZ^{[l](i)}$$\n",
289 | "$$ dA^{[l-1]} = \\frac{\\partial \\mathcal{L} }{\\partial A^{[l-1]}} = W^{[l] T} dZ^{[l]} $$\n"
290 | ]
291 | },
292 | {
293 | "cell_type": "markdown",
294 | "metadata": {},
295 | "source": [
296 | "First, implementation of derivative of `sigmoid` and `relu` is required."
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": 12,
302 | "metadata": {},
303 | "outputs": [],
304 | "source": [
305 | "def sigmoid_grad(A, Z):\n",
306 | " grad = np.multiply(A, 1-A)\n",
307 | " return grad\n",
308 | "\n",
309 | "\n",
310 | "def relu_grad(A, Z):\n",
311 | " grad = np.zeros(Z.shape)\n",
312 | " grad[Z>0] = 1\n",
313 | " return grad"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 13,
319 | "metadata": {},
320 | "outputs": [
321 | {
322 | "name": "stdout",
323 | "output_type": "stream",
324 | "text": [
325 | "[[0.87901144 0.29881771]\n",
326 | " [1.68253349 2.73842285]\n",
327 | " [1.97877652 0.12396486]] \n",
328 | "\n",
329 | "[[0.70661733 0.57415347]\n",
330 | " [0.84323972 0.93925618]\n",
331 | " [0.87855068 0.53095159]] \n",
332 | "\n",
333 | "[[0.20730928 0.24450126]\n",
334 | " [0.1321865 0.05705401]\n",
335 | " [0.10669938 0.249042 ]]\n"
336 | ]
337 | }
338 | ],
339 | "source": [
340 | "z = np.random.randn(3, 2)\n",
341 | "a = sigmoid(z)\n",
342 | "g = sigmoid_grad(a, z)\n",
343 | "\n",
344 | "print(z, '\\n')\n",
345 | "print(a, '\\n')\n",
346 | "print(g)"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 14,
352 | "metadata": {},
353 | "outputs": [
354 | {
355 | "name": "stdout",
356 | "output_type": "stream",
357 | "text": [
358 | "[[ 0.63378157 1.22440607]\n",
359 | " [-0.94572088 0.34021284]\n",
360 | " [-0.14649618 0.22595908]] \n",
361 | "\n",
362 | "[[0.63378157 1.22440607]\n",
363 | " [0. 0.34021284]\n",
364 | " [0. 0.22595908]] \n",
365 | "\n",
366 | "[[1. 1.]\n",
367 | " [0. 1.]\n",
368 | " [0. 1.]]\n"
369 | ]
370 | }
371 | ],
372 | "source": [
373 | "z = np.random.randn(3, 2)\n",
374 | "a = relu(z)\n",
375 | "g = relu_grad(a, z)\n",
376 | "\n",
377 | "print(z, '\\n')\n",
378 | "print(a, '\\n')\n",
379 | "print(g)"
380 | ]
381 | },
382 | {
383 | "cell_type": "markdown",
384 | "metadata": {},
385 | "source": [
386 | "Following the equations above, we have our implementation of backward propagation. Note that except the last layer where `sigmoid` function is used, the rest we all apply `relu` derivative to get the gradients."
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 15,
392 | "metadata": {},
393 | "outputs": [],
394 | "source": [
395 | "def backward(params, cache, X, Y):\n",
396 | " \"\"\"\n",
397 | " params: weight [W, b]\n",
398 | " cache: result [A, Z]\n",
399 | " Y: shape (1, m)\n",
400 | " \"\"\"\n",
401 | " grad = {}\n",
402 | " n_layers = int(len(params)/2)\n",
403 | " m = Y.shape[1]\n",
404 | " cache['A0'] = X\n",
405 | " \n",
406 | " for l in range(n_layers, 0, -1):\n",
407 | " A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
408 | " W = params['W'+str(l)]\n",
409 | " if l == n_layers:\n",
410 | " dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
411 | " \n",
412 | " if l == n_layers:\n",
413 | " dZ = np.multiply(dA, sigmoid_grad(A, Z))\n",
414 | " else:\n",
415 | " dZ = np.multiply(dA, relu_grad(A, Z))\n",
416 | " dW = np.dot(dZ, A_prev.T)/m\n",
417 | " db = np.sum(dZ, axis=1, keepdims=True)/m\n",
418 | " dA = np.dot(W.T, dZ)\n",
419 | "\n",
420 | " grad['dW'+str(l)] = dW\n",
421 | " grad['db'+str(l)] = db\n",
422 | " \n",
423 | " return grad"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 16,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "data": {
433 | "text/plain": [
434 | "{'dW2': array([[ 0. , -0.00570997, -0.0042512 , 0. , -0.00718961]]),\n",
435 | " 'db2': array([[-0.49994633]]),\n",
436 | " 'dW1': array([[ 0. , 0. ],\n",
437 | " [ 0.00133066, 0.00133066],\n",
438 | " [-0.00023122, -0.00023122],\n",
439 | " [ 0. , 0. ],\n",
440 | " [-0.00838296, -0.00838296]]),\n",
441 | " 'db1': array([[ 0. ],\n",
442 | " [ 0.00133066],\n",
443 | " [-0.00023122],\n",
444 | " [ 0. ],\n",
445 | " [-0.00838296]])}"
446 | ]
447 | },
448 | "execution_count": 16,
449 | "metadata": {},
450 | "output_type": "execute_result"
451 | }
452 | ],
453 | "source": [
454 | "g = backward(p, cache, np.array([[1], [1]]), np.array([[1]]))\n",
455 | "g"
456 | ]
457 | },
458 | {
459 | "cell_type": "markdown",
460 | "metadata": {},
461 | "source": [
462 | "Now given the gradients, we have our weights updated as following:\n",
463 | " \n",
464 | "$$ W^{[l]} -= dW^{[l]} $$\n",
465 | "$$ b^{[l]} -= db^{[l]} $$"
466 | ]
467 | },
468 | {
469 | "cell_type": "code",
470 | "execution_count": 17,
471 | "metadata": {},
472 | "outputs": [],
473 | "source": [
474 | "def optimize(params, grads, lr):\n",
475 | " n_layers = int(len(params)/2)\n",
476 | " for i in range(1, n_layers+1):\n",
477 | " dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
478 | " params['W'+str(i)] -= lr*dW\n",
479 | " params['b'+str(i)] -= lr*db\n",
480 | " return params"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 18,
486 | "metadata": {},
487 | "outputs": [
488 | {
489 | "data": {
490 | "text/plain": [
491 | "{'W1': array([[-0.00197904, -0.01213369],\n",
492 | " [ 0.00556042, 0.00319942],\n",
493 | " [-0.00668332, 0.01564908],\n",
494 | " [-0.01061402, 0.00787606],\n",
495 | " [ 0.0198582 , 0.01128847]]),\n",
496 | " 'b1': array([[ 0. ],\n",
497 | " [-0.00133066],\n",
498 | " [ 0.00023122],\n",
499 | " [ 0. ],\n",
500 | " [ 0.00838296]]),\n",
501 | " 'W2': array([[-0.00037371, 0.00304836, 0.00471369, 0.00950304, 0.02395732]]),\n",
502 | " 'b2': array([[0.49994633]])}"
503 | ]
504 | },
505 | "execution_count": 18,
506 | "metadata": {},
507 | "output_type": "execute_result"
508 | }
509 | ],
510 | "source": [
511 | "s = optimize(p, g, 1)\n",
512 | "s"
513 | ]
514 | },
515 | {
516 | "cell_type": "markdown",
517 | "metadata": {},
518 | "source": [
519 | "# Apply on Dataset\n",
520 | "---\n",
521 | "Let's have our model apply on created dataset with 200 features."
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": 19,
527 | "metadata": {},
528 | "outputs": [
529 | {
530 | "name": "stdout",
531 | "output_type": "stream",
532 | "text": [
533 | "train shape (8000, 200)\n",
534 | "test shape (2000, 200)\n"
535 | ]
536 | }
537 | ],
538 | "source": [
539 | "from sklearn import datasets\n",
540 | "\n",
541 | "\n",
542 | "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
543 | "\n",
544 | "X_train, X_test = X[:8000], X[8000:]\n",
545 | "y_train, y_test = y[:8000], y[8000:]\n",
546 | "\n",
547 | "print('train shape', X_train.shape)\n",
548 | "print('test shape', X_test.shape)"
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "execution_count": 21,
554 | "metadata": {},
555 | "outputs": [],
556 | "source": [
557 | "def generate_batch(X, batch_size):\n",
558 | " n = X.shape[0]\n",
559 | " batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
560 | " return batches\n",
561 | "\n",
562 | "\n",
563 | "def accuracy(Y, Y_pred):\n",
564 | " \"\"\"\n",
565 | " Y: vector of true value\n",
566 | " Y_pred: vector of predicted value\n",
567 | " \"\"\"\n",
568 | " \n",
569 | " assert Y.shape[0] == 1\n",
570 | " assert Y.shape == Y_pred.shape\n",
571 | " Y_pred = np.round(Y_pred)\n",
572 | " acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
573 | " return acc"
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": 21,
579 | "metadata": {},
580 | "outputs": [],
581 | "source": [
582 | "def train(X_train, y_train, layers: list, batch_size=200, n_iter=100, lr=0.1):\n",
583 | " # prepare batch training\n",
584 | " batches = generate_batch(X_train, batch_size)\n",
585 | " # init weights\n",
586 | " parameters = weights_init(layers)\n",
587 | " for i in range(n_iter):\n",
588 | " for batch in batches:\n",
589 | " X = X_train[batch, :].T\n",
590 | " Y = y_train[batch].reshape(1, -1)\n",
591 | " cache, A = forward(X, parameters)\n",
592 | " grads = backward(parameters, cache, X, Y)\n",
593 | " parameters = optimize(parameters, grads, lr)\n",
594 | " \n",
595 | " if i%10 == 0:\n",
596 | " loss = compute_cost(A, Y)\n",
597 | " print(f'iteration {i}: loss {loss}')\n",
598 | " return parameters"
599 | ]
600 | },
601 | {
602 | "cell_type": "markdown",
603 | "metadata": {},
604 | "source": [
605 | "Let's build a 3-layer neural network, with input of 200 features."
606 | ]
607 | },
608 | {
609 | "cell_type": "code",
610 | "execution_count": 22,
611 | "metadata": {},
612 | "outputs": [
613 | {
614 | "name": "stdout",
615 | "output_type": "stream",
616 | "text": [
617 | "iteration 0: loss 0.6930831830164655\n",
618 | "iteration 10: loss 0.6930082822907631\n",
619 | "iteration 20: loss 0.6929949487286129\n",
620 | "iteration 30: loss 0.6929543341306021\n",
621 | "iteration 40: loss 0.6927636568599188\n",
622 | "iteration 50: loss 0.690073352055835\n",
623 | "iteration 60: loss 0.2531001812337807\n",
624 | "iteration 70: loss 0.127696331048521\n",
625 | "iteration 80: loss 0.08193633942165292\n",
626 | "iteration 90: loss 0.05580582920505571\n"
627 | ]
628 | }
629 | ],
630 | "source": [
631 | "trained_param = train(X_train, y_train, layers=[200, 20, 10, 1], batch_size=200, n_iter=100, lr=0.05)"
632 | ]
633 | },
634 | {
635 | "cell_type": "code",
636 | "execution_count": 23,
637 | "metadata": {},
638 | "outputs": [],
639 | "source": [
640 | "cache, pred = forward(X_test.T, trained_param)"
641 | ]
642 | },
643 | {
644 | "cell_type": "code",
645 | "execution_count": 24,
646 | "metadata": {},
647 | "outputs": [
648 | {
649 | "name": "stdout",
650 | "output_type": "stream",
651 | "text": [
652 | "accuracy: 94.39999999999999%\n"
653 | ]
654 | }
655 | ],
656 | "source": [
657 | "acc = accuracy(y_test.reshape(1, -1), pred)\n",
658 | "\n",
659 | "print(f'accuracy: {acc*100}%')"
660 | ]
661 | },
662 | {
663 | "cell_type": "markdown",
664 | "metadata": {},
665 | "source": [
666 | "# In a Class"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": 19,
672 | "metadata": {},
673 | "outputs": [],
674 | "source": [
675 | "class deepNN:\n",
676 | " def __init__(self, layers):\n",
677 | " self.layers = layers\n",
678 | " self.params = {}\n",
679 | " \n",
680 | " \n",
681 | " def weights_init(self):\n",
682 | " n = len(self.layers)\n",
683 | " for i in range(1, n):\n",
684 | " self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01\n",
685 | " self.params['b' + str(i)] = np.zeros((self.layers[i], 1))\n",
686 | " \n",
687 | " @staticmethod\n",
688 | " def sigmoid(x):\n",
689 | " return 1/(1 + np.exp(-x))\n",
690 | "\n",
691 | " @staticmethod\n",
692 | " def relu(x):\n",
693 | " return np.maximum(x, 0)\n",
694 | " \n",
695 | " @staticmethod\n",
696 | " def compute_cost(A, Y):\n",
697 | " \"\"\"\n",
698 | " For binary classification, both A and Y would have shape (1, m), where m is the batch size\n",
699 | " \"\"\"\n",
700 | " assert A.shape == Y.shape\n",
701 | " m = A.shape[1]\n",
702 | " s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
703 | " loss = -s/m\n",
704 | " return np.squeeze(loss)\n",
705 | " \n",
706 | " @staticmethod\n",
707 | " def sigmoid_grad(A, Z):\n",
708 | " grad = np.multiply(A, 1-A)\n",
709 | " return grad\n",
710 | "\n",
711 | " @staticmethod\n",
712 | " def relu_grad(A, Z):\n",
713 | " grad = np.zeros(Z.shape)\n",
714 | " grad[Z>0] = 1\n",
715 | " return grad\n",
716 | " \n",
717 | " \n",
718 | " def forward(self, X):\n",
719 | " # intermediate layer use relu as activation\n",
720 | " # last layer use sigmoid\n",
721 | " n_layers = int(len(self.params)/2)\n",
722 | " A = X\n",
723 | " cache = {}\n",
724 | " for i in range(1, n_layers):\n",
725 | " W, b = self.params['W'+str(i)], self.params['b'+str(i)]\n",
726 | " Z = np.dot(W, A) + b\n",
727 | " A = self.relu(Z)\n",
728 | " cache['Z'+str(i)] = Z\n",
729 | " cache['A'+str(i)] = A\n",
730 | "\n",
731 | " # last layer\n",
732 | " W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]\n",
733 | " Z = np.dot(W, A) + b\n",
734 | " A = self.sigmoid(Z)\n",
735 | " cache['Z'+str(i+1)] = Z\n",
736 | " cache['A'+str(i+1)] = A\n",
737 | "\n",
738 | " return cache, A\n",
739 | " \n",
740 | " def backward(self, cache, X, Y):\n",
741 | " \"\"\"\n",
742 | " cache: result [A, Z]\n",
743 | " Y: shape (1, m)\n",
744 | " \"\"\"\n",
745 | " grad = {}\n",
746 | " n_layers = int(len(self.params)/2)\n",
747 | " m = Y.shape[1]\n",
748 | " cache['A0'] = X\n",
749 | "\n",
750 | " for l in range(n_layers, 0, -1):\n",
751 | " A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
752 | " W = self.params['W'+str(l)]\n",
753 | " if l == n_layers:\n",
754 | " dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
755 | "\n",
756 | " if l == n_layers:\n",
757 | " dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
758 | " else:\n",
759 | " dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
760 | " dW = np.dot(dZ, A_prev.T)/m\n",
761 | " db = np.sum(dZ, axis=1, keepdims=True)/m\n",
762 | " dA = np.dot(W.T, dZ)\n",
763 | "\n",
764 | " grad['dW'+str(l)] = dW\n",
765 | " grad['db'+str(l)] = db\n",
766 | "\n",
767 | " return grad\n",
768 | " \n",
769 | " def optimize(self, grads, lr):\n",
770 | " n_layers = int(len(self.params)/2)\n",
771 | " for i in range(1, n_layers+1):\n",
772 | " dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
773 | " self.params['W'+str(i)] -= lr*dW\n",
774 | " self.params['b'+str(i)] -= lr*db\n",
775 | " \n",
776 | " @staticmethod\n",
777 | " def generate_batch(X, batch_size):\n",
778 | " n = X.shape[0]\n",
779 | " batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
780 | " return batches\n",
781 | " \n",
782 | " \n",
783 | " def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1):\n",
784 | " # prepare batch training\n",
785 | " batches = self.generate_batch(X_train, batch_size)\n",
786 | " # init weights\n",
787 | " self.weights_init()\n",
788 | " for i in range(n_iter):\n",
789 | " for batch in batches:\n",
790 | " X = X_train[batch, :].T\n",
791 | " Y = y_train[batch].reshape(1, -1)\n",
792 | " cache, A = self.forward(X)\n",
793 | " grads = self.backward(cache, X, Y)\n",
794 | " self.optimize(grads, lr)\n",
795 | "\n",
796 | " if i%10 == 0:\n",
797 | " loss = self.compute_cost(A, Y)\n",
798 | " print(f'iteration {i}: loss {loss}')"
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": 5,
804 | "metadata": {},
805 | "outputs": [
806 | {
807 | "name": "stdout",
808 | "output_type": "stream",
809 | "text": [
810 | "train shape (8000, 200)\n",
811 | "test shape (2000, 200)\n"
812 | ]
813 | }
814 | ],
815 | "source": [
816 | "from sklearn import datasets\n",
817 | "\n",
818 | "\n",
819 | "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
820 | "\n",
821 | "X_train, X_test = X[:8000], X[8000:]\n",
822 | "y_train, y_test = y[:8000], y[8000:]\n",
823 | "\n",
824 | "print('train shape', X_train.shape)\n",
825 | "print('test shape', X_test.shape)"
826 | ]
827 | },
828 | {
829 | "cell_type": "code",
830 | "execution_count": 20,
831 | "metadata": {},
832 | "outputs": [
833 | {
834 | "name": "stdout",
835 | "output_type": "stream",
836 | "text": [
837 | "iteration 0: loss 0.6930968966284916\n",
838 | "iteration 10: loss 0.6930261983198653\n",
839 | "iteration 20: loss 0.6930234151665605\n",
840 | "iteration 30: loss 0.6930149122135475\n",
841 | "iteration 40: loss 0.6929815230264361\n",
842 | "iteration 50: loss 0.6927740045307099\n",
843 | "iteration 60: loss 0.6880564419952588\n",
844 | "iteration 70: loss 0.2200907541999881\n",
845 | "iteration 80: loss 0.11582658029026635\n",
846 | "iteration 90: loss 0.08402195069870581\n"
847 | ]
848 | }
849 | ],
850 | "source": [
851 | "layers = [200, 20, 10, 1]\n",
852 | "model = deepNN(layers)\n",
853 | "\n",
854 | "model.train(X_train, y_train, batch_size=200, n_iter=100, lr=0.05)"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": 23,
860 | "metadata": {},
861 | "outputs": [
862 | {
863 | "name": "stdout",
864 | "output_type": "stream",
865 | "text": [
866 | "accuracy: 94.55%\n"
867 | ]
868 | }
869 | ],
870 | "source": [
871 | "_, pred = model.forward(X_test.T)\n",
872 | "acc = accuracy(y_test.reshape(1, -1), pred)\n",
873 | "\n",
874 | "print(f'accuracy: {acc*100}%')"
875 | ]
876 | },
877 | {
878 | "cell_type": "code",
879 | "execution_count": null,
880 | "metadata": {},
881 | "outputs": [],
882 | "source": []
883 | }
884 | ],
885 | "metadata": {
886 | "kernelspec": {
887 | "display_name": "Python 3",
888 | "language": "python",
889 | "name": "python3"
890 | },
891 | "language_info": {
892 | "codemirror_mode": {
893 | "name": "ipython",
894 | "version": 3
895 | },
896 | "file_extension": ".py",
897 | "mimetype": "text/x-python",
898 | "name": "python",
899 | "nbconvert_exporter": "python",
900 | "pygments_lexer": "ipython3",
901 | "version": "3.8.3"
902 | }
903 | },
904 | "nbformat": 4,
905 | "nbformat_minor": 4
906 | }
907 |
--------------------------------------------------------------------------------
/deep-neural-network/images/backprop_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/deep-neural-network/images/backprop_kiank.png
--------------------------------------------------------------------------------
/dropout/dropout.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Dropout\n",
8 | "---\n",
9 | "Dropout prevents overfitting by randomly shutting down some output units. The video from Coursera vividly illustrate the process.\n",
10 | "\n",
11 | "\n",
12 | "\n",
14 | "\n",
15 | "\n",
16 | "In the process above, in each iteration, some units on layer `[2]` would be randomly muted, meaning that there would be less neurons working in the forward process, thus the overall structure of neural network is simplified. Meanwhile the trained model would be more robust, since the model no longer can rely on any specific neurons anymore (as they could be muted in the process), all other neurons would need to learn in the training. "
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "# Foward\n",
24 | "---\n",
25 | "You can think of dropout as adding an extra layer to the forward process.\n",
26 | "\n",
27 | "In the previous sessions, we have the forward equations as following,\n",
28 | "\n",
29 | "__Without Dropout__\n",
30 | "\n",
31 | "$$ Z^{[l]} = W^{[l]}A^{[l-1]} + b^{[l]} $$\n",
32 | "\n",
33 | "$$ A^{[l]} = g^{[l]}(Z^{[l]})$$\n",
34 | "\n",
35 | "Where $g$ is the activation function. Now with dropout an extra layer is applied to $A^{[l]}$.\n",
36 | "\n",
37 | "__Dropout__\n",
38 | "\n",
39 | "$$ Z^{[l]} = W^{[l]}A^{[l-1]} + b^{[l]} $$\n",
40 | "\n",
41 | "$$ A^{[l]} = g^{[l]}(Z^{[l]})$$\n",
42 | "\n",
43 | "$$ A^{[l]} = D^{[l]}(A^{[l]})$$\n",
44 | "\n",
45 | "Where $D$ is the dropout layer. The key factor in the dropout layer is `keep_prob` parameter, which specifies the probability of keeping each unit. Say if `keep_prob = 0.8`, we would have 80% chance of keeping each output unit as it is, and 20% chance set them to 0.\n",
46 | "\n",
47 | "The implementation would be adding an extra mask to the result $A$. Assume we have an output $A^{[l]}$ with four elements as following,\n",
48 | "\n",
49 | "$$ \\begin{pmatrix}\n",
50 | "a_1^{[l]} \\\\\n",
51 | "a_2^{[l]} \\\\\n",
52 | "a_3^{[l]} \\\\\n",
53 | "a_4^{[l]}\n",
54 | "\\end{pmatrix}$$\n",
55 | "\n",
56 | "And we want to mute the third unit while keeping the rest, what we need is a matrix of the same shape and do a element-wise multiplication as following,\n",
57 | "\n",
58 | "$$ \\begin{pmatrix}\n",
59 | "a_1^{[l]} \\\\\n",
60 | "a_2^{[l]} \\\\\n",
61 | "a_3^{[l]} \\\\\n",
62 | "a_4^{[l]}\n",
63 | "\\end{pmatrix} * \n",
64 | "\\begin{pmatrix}\n",
65 | "1 \\\\\n",
66 | "1 \\\\\n",
67 | "0 \\\\\n",
68 | "1\n",
69 | "\\end{pmatrix} = \n",
70 | "\\begin{pmatrix}\n",
71 | "a_1^{[l]} \\\\\n",
72 | "a_2^{[l]} \\\\\n",
73 | "0 \\\\\n",
74 | "a_4^{[l]}\n",
75 | "\\end{pmatrix}\n",
76 | "$$\n",
77 | "\n",
78 | "Let's first initialize some weight parameters."
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 1,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "import numpy as np\n",
88 | "\n",
89 | "layers = [3, 10, 1]\n",
90 | "def weights_init():\n",
91 | " params = {}\n",
92 | " n = len(layers)\n",
93 | " for i in range(1, n):\n",
94 | " params['W' + str(i)] = np.random.randn(layers[i], layers[i-1])*0.01\n",
95 | " params['b' + str(i)] = np.zeros((layers[i], 1))\n",
96 | " return params"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 2,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "data": {
106 | "text/plain": [
107 | "{'W1': array([[ 0.01273373, -0.0029192 , -0.02510443],\n",
108 | " [-0.00323293, 0.01198572, -0.0127173 ],\n",
109 | " [ 0.00092662, 0.00766214, 0.02222858],\n",
110 | " [-0.01368964, -0.02118227, 0.01315665],\n",
111 | " [-0.02065367, 0.01095289, 0.00727299],\n",
112 | " [-0.00980028, -0.02437653, -0.0162406 ],\n",
113 | " [ 0.00637576, -0.02312436, -0.000291 ],\n",
114 | " [-0.0029315 , 0.01407064, 0.00237895],\n",
115 | " [-0.00581215, -0.00695063, 0.00948468],\n",
116 | " [-0.00774545, -0.008947 , 0.01390741]]),\n",
117 | " 'b1': array([[0.],\n",
118 | " [0.],\n",
119 | " [0.],\n",
120 | " [0.],\n",
121 | " [0.],\n",
122 | " [0.],\n",
123 | " [0.],\n",
124 | " [0.],\n",
125 | " [0.],\n",
126 | " [0.]]),\n",
127 | " 'W2': array([[ 0.00217584, 0.01116851, -0.01580682, 0.00626901, -0.0053493 ,\n",
128 | " 0.01537351, 0.00633889, 0.0061288 , 0.01380906, -0.00308319]]),\n",
129 | " 'b2': array([[0.]])}"
130 | ]
131 | },
132 | "execution_count": 2,
133 | "metadata": {},
134 | "output_type": "execute_result"
135 | }
136 | ],
137 | "source": [
138 | "params = weights_init()\n",
139 | "params"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {},
145 | "source": [
146 | "# Forward"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 3,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "keep_probs = [.5]\n",
156 | "\n",
157 | "def sigmoid(x):\n",
158 | " return 1/(1 + np.exp(-x))\n",
159 | "\n",
160 | "\n",
161 | "def relu(x):\n",
162 | " return np.maximum(x, 0)\n",
163 | "\n",
164 | "\n",
165 | "def forward(X):\n",
166 | " # intermediate layer use relu as activation\n",
167 | " # last layer use sigmoid\n",
168 | " n_layers = int(len(params)/2)\n",
169 | " A = X\n",
170 | " cache = {}\n",
171 | " for i in range(1, n_layers):\n",
172 | " W, b = params['W'+str(i)], params['b'+str(i)]\n",
173 | " Z = np.dot(W, A) + b\n",
174 | " A = relu(Z)\n",
175 | " # dropout\n",
176 | " keep_prob = keep_probs[i-1]\n",
177 | " D = np.random.rand(A.shape[0], A.shape[1])\n",
178 | " D = (D < keep_prob).astype(int)\n",
179 | " A = np.multiply(D, A)\n",
180 | " # rescale\n",
181 | " A = A/keep_prob\n",
182 | " \n",
183 | " cache['Z'+str(i)] = Z\n",
184 | " cache['A'+str(i)] = A\n",
185 | " cache['D'+str(i)] = D\n",
186 | "\n",
187 | " # last layer\n",
188 | " W, b = params['W'+str(i+1)], params['b'+str(i+1)]\n",
189 | " Z = np.dot(W, A) + b\n",
190 | " A = sigmoid(Z)\n",
191 | " \n",
192 | " cache['Z'+str(i+1)] = Z\n",
193 | " cache['A'+str(i+1)] = A\n",
194 | "\n",
195 | " return cache, A"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 4,
201 | "metadata": {},
202 | "outputs": [],
203 | "source": [
204 | "X = np.array([[1.2], [3], [-2]])\n",
205 | "cache, _ = forward(X)"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 5,
211 | "metadata": {},
212 | "outputs": [
213 | {
214 | "data": {
215 | "text/plain": [
216 | "{'Z1': array([[ 0.05673173],\n",
217 | " [ 0.05751222],\n",
218 | " [-0.02035879],\n",
219 | " [-0.10628767],\n",
220 | " [-0.00647173],\n",
221 | " [-0.05240875],\n",
222 | " [-0.06114016],\n",
223 | " [ 0.03393622],\n",
224 | " [-0.04679583],\n",
225 | " [-0.06395034]]),\n",
226 | " 'A1': array([[0. ],\n",
227 | " [0.11502445],\n",
228 | " [0. ],\n",
229 | " [0. ],\n",
230 | " [0. ],\n",
231 | " [0. ],\n",
232 | " [0. ],\n",
233 | " [0. ],\n",
234 | " [0. ],\n",
235 | " [0. ]]),\n",
236 | " 'D1': array([[0],\n",
237 | " [1],\n",
238 | " [1],\n",
239 | " [1],\n",
240 | " [1],\n",
241 | " [0],\n",
242 | " [1],\n",
243 | " [0],\n",
244 | " [1],\n",
245 | " [0]]),\n",
246 | " 'Z2': array([[0.00128465]]),\n",
247 | " 'A2': array([[0.50032116]])}"
248 | ]
249 | },
250 | "execution_count": 5,
251 | "metadata": {},
252 | "output_type": "execute_result"
253 | }
254 | ],
255 | "source": [
256 | "cache"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {},
262 | "source": [
263 | "Our layers is set to [3, 10, 1], where 3 is the input layer and 1 is the output layer. In the example above we give the hidden layer a `keep_prob` ratio of `0.5`, so some of the units are muted.\n",
264 | "\n",
265 | "(__Note__: After dropout, `A` needs to rescale to `A = A / keep_prob`, since some of the units are disabled, the left units need to be amplified in order to match the expected value)"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "# Backward\n",
273 | "---\n",
274 | "The backward process is to mask the same function `D` to the corresponding `dA`."
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 6,
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "# dummy code, full version needs to be inside a Class\n",
284 | "def backward(self, cache, X, Y, keep_probs):\n",
285 | " \"\"\"\n",
286 | " cache: result [A, Z]\n",
287 | " Y: shape (1, m)\n",
288 | " \"\"\"\n",
289 | " grad = {}\n",
290 | " n_layers = int(len(self.params)/2)\n",
291 | " m = Y.shape[1]\n",
292 | " cache['A0'] = X\n",
293 | "\n",
294 | " for l in range(n_layers, 0, -1):\n",
295 | " A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
296 | " W = self.params['W'+str(l)]\n",
297 | " if l == n_layers:\n",
298 | " dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
299 | "\n",
300 | " if l == n_layers:\n",
301 | " dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
302 | " else:\n",
303 | " # dropout version\n",
304 | " D = cache['D' + str(l)]\n",
305 | " dA = np.multiply(dA, D)\n",
306 | " # rescale\n",
307 | " dA = dA/keep_probs[l-1]\n",
308 | " \n",
309 | " dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
310 | " dW = np.dot(dZ, A_prev.T)/m\n",
311 | " db = np.sum(dZ, axis=1, keepdims=True)/m\n",
312 | " dA = np.dot(W.T, dZ)\n",
313 | "\n",
314 | " grad['dW'+str(l)] = dW\n",
315 | " grad['db'+str(l)] = db\n",
316 | "\n",
317 | " return grad"
318 | ]
319 | },
320 | {
321 | "cell_type": "markdown",
322 | "metadata": {},
323 | "source": [
324 | "Note that in back propagation, $dA$ also needs to be rescaled"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {},
330 | "source": [
331 | "Now let's put everything in a class and apply it on a classification task."
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": 7,
337 | "metadata": {},
338 | "outputs": [],
339 | "source": [
340 | "import numpy as np\n",
341 | "\n",
342 | "\n",
343 | "class deepNN:\n",
344 | " def __init__(self, layers):\n",
345 | " self.layers = layers\n",
346 | " self.params = {}\n",
347 | " self.dropout = []\n",
348 | " self.A = 0\n",
349 | " self.Y = 0\n",
350 | " \n",
351 | " \n",
352 | " def weights_init(self):\n",
353 | " n = len(self.layers)\n",
354 | " for i in range(1, n):\n",
355 | " self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01\n",
356 | " self.params['b' + str(i)] = np.zeros((self.layers[i], 1))\n",
357 | " \n",
358 | " @staticmethod\n",
359 | " def sigmoid(x):\n",
360 | " return 1/(1 + np.exp(-x))\n",
361 | "\n",
362 | " @staticmethod\n",
363 | " def relu(x):\n",
364 | " return np.maximum(x, 0)\n",
365 | " \n",
366 | " @staticmethod\n",
367 | " def compute_cost(A, Y):\n",
368 | " \"\"\"\n",
369 | " For binary classification, both A and Y would have shape (1, m), where m is the batch size\n",
370 | " \"\"\"\n",
371 | " assert A.shape == Y.shape\n",
372 | " m = A.shape[1]\n",
373 | " s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
374 | " loss = -s/m\n",
375 | " return np.squeeze(loss)\n",
376 | " \n",
377 | " @staticmethod\n",
378 | " def sigmoid_grad(A, Z):\n",
379 | " grad = np.multiply(A, 1-A)\n",
380 | " return grad\n",
381 | "\n",
382 | " @staticmethod\n",
383 | " def relu_grad(A, Z):\n",
384 | " grad = np.zeros(Z.shape)\n",
385 | " grad[Z>0] = 1\n",
386 | " return grad\n",
387 | " \n",
388 | " \n",
389 | " def forward(self, X):\n",
390 | " # intermediate layer use relu as activation\n",
391 | " # last layer use sigmoid\n",
392 | " n_layers = int(len(self.params)/2)\n",
393 | " A = X\n",
394 | " cache = {}\n",
395 | " for i in range(1, n_layers):\n",
396 | " W, b = self.params['W'+str(i)], self.params['b'+str(i)]\n",
397 | " Z = np.dot(W, A) + b\n",
398 | " A = self.relu(Z)\n",
399 | " \n",
400 | " keep_prob = self.dropout[i-1]\n",
401 | " D = np.random.rand(A.shape[0], A.shape[1])\n",
402 | " D = np.int64(D < keep_prob)\n",
403 | " A = np.multiply(A, D)\n",
404 | " A = A/keep_prob\n",
405 | " \n",
406 | " cache['Z'+str(i)] = Z\n",
407 | " cache['A'+str(i)] = A\n",
408 | " cache['D'+str(i)] = D\n",
409 | "\n",
410 | " # last layer\n",
411 | " W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]\n",
412 | " Z = np.dot(W, A) + b\n",
413 | " A = self.sigmoid(Z)\n",
414 | " cache['Z'+str(i+1)] = Z\n",
415 | " cache['A'+str(i+1)] = A\n",
416 | "\n",
417 | " return cache, A\n",
418 | " \n",
419 | " def backward(self, cache, X, Y):\n",
420 | " \"\"\"\n",
421 | " cache: result [A, Z]\n",
422 | " Y: shape (1, m)\n",
423 | " \"\"\"\n",
424 | " grad = {}\n",
425 | " n_layers = int(len(self.params)/2)\n",
426 | " m = Y.shape[1]\n",
427 | " cache['A0'] = X\n",
428 | "\n",
429 | " for l in range(n_layers, 0, -1):\n",
430 | " A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
431 | " W = self.params['W'+str(l)]\n",
432 | " if l == n_layers:\n",
433 | " dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
434 | "\n",
435 | " if l == n_layers:\n",
436 | " dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
437 | " else:\n",
438 | " keep_prob = self.dropout[l-1]\n",
439 | " D = cache['D' + str(l)]\n",
440 | " dA = np.multiply(dA, D)\n",
441 | " dA = dA/keep_prob\n",
442 | " dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
443 | " dW = np.dot(dZ, A_prev.T)/m\n",
444 | " db = np.sum(dZ, axis=1, keepdims=True)/m\n",
445 | " dA = np.dot(W.T, dZ)\n",
446 | "\n",
447 | " grad['dW'+str(l)] = dW\n",
448 | " grad['db'+str(l)] = db\n",
449 | "\n",
450 | " return grad\n",
451 | " \n",
452 | " def optimize(self, grads, lr):\n",
453 | " n_layers = int(len(self.params)/2)\n",
454 | " for i in range(1, n_layers+1):\n",
455 | " dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
456 | " self.params['W'+str(i)] -= lr*dW\n",
457 | " self.params['b'+str(i)] -= lr*db\n",
458 | " \n",
459 | " @staticmethod\n",
460 | " def generate_batch(X, batch_size):\n",
461 | " n = X.shape[0]\n",
462 | " batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
463 | " return batches\n",
464 | " \n",
465 | " def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1, dropout:list=[]):\n",
466 | " self.dropout = dropout\n",
467 | " # prepare batch training\n",
468 | " batches = self.generate_batch(X_train, batch_size)\n",
469 | " # init weights\n",
470 | " self.weights_init()\n",
471 | " for i in range(n_iter):\n",
472 | " for batch in batches:\n",
473 | " X = X_train[batch, :].T\n",
474 | " Y = y_train[batch].reshape(1, -1)\n",
475 | " cache, A = self.forward(X)\n",
476 | " grads = self.backward(cache, X, Y)\n",
477 | " self.optimize(grads, lr)\n",
478 | "\n",
479 | " if i%10 == 0:\n",
480 | " loss = self.compute_cost(A, Y)\n",
481 | " print(f'iteration {i}: loss {loss}')\n",
482 | "\n",
483 | "\n",
484 | "def accuracy(Y, Y_pred):\n",
485 | " \"\"\"\n",
486 | " Y: vector of true value\n",
487 | " Y_pred: vector of predicted value\n",
488 | " \"\"\"\n",
489 | " \n",
490 | " assert Y.shape[0] == 1\n",
491 | " assert Y.shape == Y_pred.shape\n",
492 | " Y_pred = np.round(Y_pred)\n",
493 | " acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
494 | " return acc"
495 | ]
496 | },
497 | {
498 | "cell_type": "code",
499 | "execution_count": 8,
500 | "metadata": {},
501 | "outputs": [
502 | {
503 | "name": "stdout",
504 | "output_type": "stream",
505 | "text": [
506 | "train shape (8000, 200)\n",
507 | "test shape (2000, 200)\n"
508 | ]
509 | }
510 | ],
511 | "source": [
512 | "from sklearn import datasets\n",
513 | "\n",
514 | "\n",
515 | "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
516 | "\n",
517 | "X_train, X_test = X[:8000], X[8000:]\n",
518 | "y_train, y_test = y[:8000], y[8000:]\n",
519 | "\n",
520 | "print('train shape', X_train.shape)\n",
521 | "print('test shape', X_test.shape)"
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": 9,
527 | "metadata": {},
528 | "outputs": [],
529 | "source": [
530 | "layers = [200, 100, 20, 1]\n",
531 | "dropout_ratio = [.8, .8]\n",
532 | "\n",
533 | "model = deepNN(layers)"
534 | ]
535 | },
536 | {
537 | "cell_type": "code",
538 | "execution_count": 10,
539 | "metadata": {
540 | "scrolled": false
541 | },
542 | "outputs": [
543 | {
544 | "name": "stdout",
545 | "output_type": "stream",
546 | "text": [
547 | "iteration 0: loss 0.6931117424217708\n",
548 | "iteration 10: loss 0.6929932803338776\n",
549 | "iteration 20: loss 0.6930042095652803\n",
550 | "iteration 30: loss 0.6929270740459051\n",
551 | "iteration 40: loss 0.6928159708817516\n",
552 | "iteration 50: loss 0.6925835619958787\n",
553 | "iteration 60: loss 0.6922472029699815\n",
554 | "iteration 70: loss 0.690539000831825\n",
555 | "iteration 80: loss 0.6813485269598472\n",
556 | "iteration 90: loss 0.5140425561651449\n",
557 | "iteration 100: loss 0.26596827522989325\n",
558 | "iteration 110: loss 0.21297544130219212\n",
559 | "iteration 120: loss 0.15886453417841173\n",
560 | "iteration 130: loss 0.14315587310443184\n",
561 | "iteration 140: loss 0.08922784883210816\n"
562 | ]
563 | }
564 | ],
565 | "source": [
566 | "model.train(X_train, y_train, batch_size=200, n_iter=150, lr=0.02, dropout=dropout_ratio)"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": 11,
572 | "metadata": {},
573 | "outputs": [
574 | {
575 | "name": "stdout",
576 | "output_type": "stream",
577 | "text": [
578 | "accuracy 0.936\n"
579 | ]
580 | }
581 | ],
582 | "source": [
583 | "_, pred = model.forward(X_test.T)\n",
584 | "acc = accuracy(y_test.reshape(1, -1), pred)\n",
585 | "\n",
586 | "print(f'accuracy {acc}')"
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": null,
592 | "metadata": {},
593 | "outputs": [],
594 | "source": []
595 | }
596 | ],
597 | "metadata": {
598 | "kernelspec": {
599 | "display_name": "Python 3",
600 | "language": "python",
601 | "name": "python3"
602 | },
603 | "language_info": {
604 | "codemirror_mode": {
605 | "name": "ipython",
606 | "version": 3
607 | },
608 | "file_extension": ".py",
609 | "mimetype": "text/x-python",
610 | "name": "python",
611 | "nbconvert_exporter": "python",
612 | "pygments_lexer": "ipython3",
613 | "version": "3.8.3"
614 | }
615 | },
616 | "nbformat": 4,
617 | "nbformat_minor": 4
618 | }
619 |
--------------------------------------------------------------------------------
/dropout/images/dropout1_kiank.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/dropout/images/dropout1_kiank.mp4
--------------------------------------------------------------------------------
/dropout/model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class deepNN:
5 | def __init__(self, layers):
6 | self.layers = layers
7 | self.params = {}
8 |
9 |
10 | def weights_init(self):
11 | n = len(self.layers)
12 | for i in range(1, n):
13 | self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01
14 | self.params['b' + str(i)] = np.zeros((self.layers[i], 1))
15 |
16 | @staticmethod
17 | def sigmoid(x):
18 | return 1/(1 + np.exp(-x))
19 |
20 | @staticmethod
21 | def relu(x):
22 | return np.maximum(x, 0)
23 |
24 | @staticmethod
25 | def compute_cost(A, Y):
26 | """
27 | For binary classification, both A and Y would have shape (1, m), where m is the batch size
28 | """
29 | assert A.shape == Y.shape
30 | m = A.shape[1]
31 | s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))
32 | loss = -s/m
33 | return np.squeeze(loss)
34 |
35 | @staticmethod
36 | def sigmoid_grad(A, Z):
37 | grad = np.multiply(A, 1-A)
38 | return grad
39 |
40 | @staticmethod
41 | def relu_grad(A, Z):
42 | grad = np.zeros(Z.shape)
43 | grad[Z>0] = 1
44 | return grad
45 |
46 |
47 | def forward(self, X):
48 | # intermediate layer use relu as activation
49 | # last layer use sigmoid
50 | n_layers = int(len(self.params)/2)
51 | A = X
52 | cache = {}
53 | for i in range(1, n_layers):
54 | W, b = self.params['W'+str(i)], self.params['b'+str(i)]
55 | Z = np.dot(W, A) + b
56 | A = self.relu(Z)
57 | cache['Z'+str(i)] = Z
58 | cache['A'+str(i)] = A
59 |
60 | # last layer
61 | W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]
62 | Z = np.dot(W, A) + b
63 | A = self.sigmoid(Z)
64 | cache['Z'+str(i+1)] = Z
65 | cache['A'+str(i+1)] = A
66 |
67 | return cache, A
68 |
69 | def backward(self, cache, X, Y):
70 | """
71 | cache: result [A, Z]
72 | Y: shape (1, m)
73 | """
74 | grad = {}
75 | n_layers = int(len(self.params)/2)
76 | m = Y.shape[1]
77 | cache['A0'] = X
78 |
79 | for l in range(n_layers, 0, -1):
80 | A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]
81 | W = self.params['W'+str(l)]
82 | if l == n_layers:
83 | dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)
84 |
85 | if l == n_layers:
86 | dZ = np.multiply(dA, self.sigmoid_grad(A, Z))
87 | else:
88 | dZ = np.multiply(dA, self.relu_grad(A, Z))
89 | dW = np.dot(dZ, A_prev.T)/m
90 | db = np.sum(dZ, axis=1, keepdims=True)/m
91 | dA = np.dot(W.T, dZ)
92 |
93 | grad['dW'+str(l)] = dW
94 | grad['db'+str(l)] = db
95 |
96 | return grad
97 |
98 | def optimize(self, grads, lr):
99 | n_layers = int(len(self.params)/2)
100 | for i in range(1, n_layers+1):
101 | dW, db = grads['dW'+str(i)], grads['db'+str(i)]
102 | self.params['W'+str(i)] -= lr*dW
103 | self.params['b'+str(i)] -= lr*db
104 |
105 | @staticmethod
106 | def generate_batch(X, batch_size):
107 | n = X.shape[0]
108 | batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]
109 | return batches
110 |
111 | def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1):
112 | # prepare batch training
113 | batches = self.generate_batch(X_train, batch_size)
114 | # init weights
115 | self.weights_init()
116 | for i in range(n_iter):
117 | for batch in batches:
118 | X = X_train[batch, :].T
119 | Y = y_train[batch].reshape(1, -1)
120 | cache, A = self.forward(X)
121 | grads = self.backward(cache, X, Y)
122 | self.optimize(grads, lr)
123 |
124 | if i%10 == 0:
125 | loss = self.compute_cost(A, Y)
126 | print(f'iteration {i}: loss {loss}')
127 |
128 |
129 | def accuracy(Y, Y_pred):
130 | """
131 | Y: vector of true value
132 | Y_pred: vector of predicted value
133 | """
134 |
135 | assert Y.shape[0] == 1
136 | assert Y.shape == Y_pred.shape
137 | Y_pred = np.round(Y_pred)
138 | acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size
139 | return acc
--------------------------------------------------------------------------------
/examples/dataloader.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras.layers import Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
3 | from tensorflow.keras import Model
4 | import os
5 | import pandas as pd
6 | import numpy as np
7 | import subprocess
8 | import gc
9 | import glob
10 | from tensorflow.keras.applications import ResNet50
11 |
12 |
13 | # key parameter assignment
14 | EPOCH = 20
15 | BATCH_SIZE = 16
16 | PATIENCE = 5
17 | DATA_PATH = '/kaggle/input/state-farm-distracted-driver-detection'
18 |
19 |
20 | classes = [f'c{i}' for i in range(10)]
21 | seed = 2020
22 | validation_split = 0.2
23 |
24 | driver_list = pd.read_csv(f'{DATA_PATH}/driver_imgs_list.csv')
25 | drivers = np.unique(driver_list['subject'].values)
26 |
27 | split = int(np.floor(validation_split * len(drivers)))
28 | np.random.seed(seed)
29 | trn_idx, val_idx = drivers[split:], drivers[:split]
30 | print(f'train idx {trn_idx} \n val idx {val_idx}')
31 |
32 |
33 | # mkdirs
34 | split_dir = 'driver_split'
35 | if not os.path.exists(split_dir):
36 | cmd = f'mkdir {split_dir}'
37 | subprocess.call(cmd, shell=True)
38 | for d in ['train', 'valid', 'test']:
39 | cmd = f'mkdir {split_dir}/{d}'
40 | subprocess.call(cmd, shell=True)
41 | if d == 'test':
42 | continue
43 | for cl in classes:
44 | cmd = f'mkdir {split_dir}/{d}/{cl}'
45 | subprocess.call(cmd, shell=True)
46 |
47 | # ../driver_split/train/c0-c9
48 | # ../driver_split/valid/c0-c9
49 |
50 |
51 | # train and valid
52 | trn_cnt = 0
53 | val_cnt = 0
54 | for i, driver_info in driver_list.iterrows():
55 | driver = driver_info['subject']
56 | label = driver_info['classname']
57 | img_path = driver_info['img']
58 |
59 | if driver in trn_idx:
60 | if not os.path.exists(f'{split_dir}/train/{label}/{img_path}'):
61 | os.symlink(os.path.abspath(f'{DATA_PATH}/imgs/train/{label}/{img_path}'), f'{split_dir}/train/{label}/{img_path}')
62 | trn_cnt += 1
63 | else:
64 | if not os.path.exists(f'{split_dir}/valid/{label}/{img_path}'):
65 | os.symlink(os.path.abspath(f'{DATA_PATH}/imgs/train/{label}/{img_path}'), f'{split_dir}/valid/{label}/{img_path}')
66 | val_cnt += 1
67 |
68 |
69 |
70 |
71 | test_data_path = '/kaggle/working/driver_split/test/data'
72 | if not os.path.exists(test_data_path):
73 | subprocess.call(f'mkdir {test_data_path}', shell=True)
74 |
75 | cnt = 0
76 |
77 | test_files = []
78 | for file in glob.glob(f'{DATA_PATH}/imgs/test/*.jpg'):
79 | cnt += 1
80 | base_name = os.path.basename(file)
81 | if not os.path.exists(f'{test_data_path}/{base_name}'):
82 | os.symlink(file, f'{test_data_path}/{base_name}')
83 | test_files.append(base_name)
84 |
85 | print(f'total {cnt} files linked')
86 |
87 |
88 | train_dir = f'{split_dir}/train/'
89 | val_dir = f'{split_dir}/valid/'
90 | test_dir = '/kaggle/working/driver_split/test'
91 |
92 |
93 | # tf.data.Dataset object
94 |
95 | train_dataset = tf.keras.preprocessing.image_dataset_from_directory(train_dir,
96 | labels='inferred',
97 | label_mode='categorical',
98 | batch_size=32,
99 | image_size=(224, 224))
100 |
101 |
102 | val_dataset = tf.keras.preprocessing.image_dataset_from_directory(val_dir,
103 | labels='inferred',
104 | label_mode='categorical',
105 | batch_size=32,
106 | image_size=(224, 224))
107 |
108 |
109 | test_dataset = tf.keras.preprocessing.image_dataset_from_directory(test_dir,
110 | label_mode=None,
111 | batch_size=32,
112 | image_size=(224, 224))
113 |
114 |
115 | norm_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1/255.)
116 |
117 | norm_train_dataset = train_dataset.map(lambda x, y: (norm_layer(x), y))
118 | norm_val_dataset = val_dataset.map(lambda x, y: (norm_layer(x), y))
119 | norm_test_dataset = test_dataset.map(lambda x: norm_layer(x))
120 |
121 | for b_X, b_y in norm_train_dataset:
122 | print('batch X shape', b_X.shape)
123 | print('batch y shape', b_y.shape)
124 | print(f'max {np.max(b_X[0])} min {np.min(b_X[0])}')
125 | break
126 |
127 |
128 | norm_train_dataset = norm_train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
129 | norm_val_dataset = norm_val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
130 |
131 |
132 | input_size = (224, 224, 3)
133 |
134 | def get_model():
135 | model_res = ResNet50(include_top=True, input_shape=input_size, weights='imagenet')
136 | # take the last global average pooling with fewer parameters
137 | x = model_res.layers[-2].output
138 |
139 | x = Dense(2048)(x)
140 | x = Activation('relu')(x)
141 | x = Dropout(.5)(x)
142 |
143 | x = Dense(2048)(x)
144 | x = Activation('relu')(x)
145 | x = Dropout(.5)(x)
146 |
147 | x = Dense(10)(x)
148 | outputs = Activation('softmax')(x)
149 |
150 | model = Model(model_res.input, outputs)
151 | return model
152 |
153 |
154 | model = get_model()
155 |
156 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
157 | model.summary()
158 |
159 |
160 | callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
161 | checkpoint = tf.keras.callbacks.ModelCheckpoint('/tmp/checkpoint', monitor='val_accuracy', save_best_only=True)
162 |
163 | model.fit(norm_train_dataset, validation_data=norm_val_dataset, epochs=20, callbacks=[callback, checkpoint])
164 |
165 | # prediction
166 | test_pred = model.predict(test_dataset)
--------------------------------------------------------------------------------
/regularization/model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class deepNN:
5 | def __init__(self, layers):
6 | self.layers = layers
7 | self.params = {}
8 |
9 |
10 | def weights_init(self):
11 | n = len(self.layers)
12 | for i in range(1, n):
13 | self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01
14 | self.params['b' + str(i)] = np.zeros((self.layers[i], 1))
15 |
16 | @staticmethod
17 | def sigmoid(x):
18 | return 1/(1 + np.exp(-x))
19 |
20 | @staticmethod
21 | def relu(x):
22 | return np.maximum(x, 0)
23 |
24 | @staticmethod
25 | def compute_cost(A, Y):
26 | """
27 | For binary classification, both A and Y would have shape (1, m), where m is the batch size
28 | """
29 | assert A.shape == Y.shape
30 | m = A.shape[1]
31 | s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))
32 | loss = -s/m
33 | return np.squeeze(loss)
34 |
35 | @staticmethod
36 | def sigmoid_grad(A, Z):
37 | grad = np.multiply(A, 1-A)
38 | return grad
39 |
40 | @staticmethod
41 | def relu_grad(A, Z):
42 | grad = np.zeros(Z.shape)
43 | grad[Z>0] = 1
44 | return grad
45 |
46 |
47 | def forward(self, X):
48 | # intermediate layer use relu as activation
49 | # last layer use sigmoid
50 | n_layers = int(len(self.params)/2)
51 | A = X
52 | cache = {}
53 | for i in range(1, n_layers):
54 | W, b = self.params['W'+str(i)], self.params['b'+str(i)]
55 | Z = np.dot(W, A) + b
56 | A = self.relu(Z)
57 | cache['Z'+str(i)] = Z
58 | cache['A'+str(i)] = A
59 |
60 | # last layer
61 | W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]
62 | Z = np.dot(W, A) + b
63 | A = self.sigmoid(Z)
64 | cache['Z'+str(i+1)] = Z
65 | cache['A'+str(i+1)] = A
66 |
67 | return cache, A
68 |
69 | def backward(self, cache, X, Y):
70 | """
71 | cache: result [A, Z]
72 | Y: shape (1, m)
73 | """
74 | grad = {}
75 | n_layers = int(len(self.params)/2)
76 | m = Y.shape[1]
77 | cache['A0'] = X
78 |
79 | for l in range(n_layers, 0, -1):
80 | A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]
81 | W = self.params['W'+str(l)]
82 | if l == n_layers:
83 | dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)
84 |
85 | if l == n_layers:
86 | dZ = np.multiply(dA, self.sigmoid_grad(A, Z))
87 | else:
88 | dZ = np.multiply(dA, self.relu_grad(A, Z))
89 | dW = np.dot(dZ, A_prev.T)/m
90 | db = np.sum(dZ, axis=1, keepdims=True)/m
91 | dA = np.dot(W.T, dZ)
92 |
93 | grad['dW'+str(l)] = dW
94 | grad['db'+str(l)] = db
95 |
96 | return grad
97 |
98 | def optimize(self, grads, lr):
99 | n_layers = int(len(self.params)/2)
100 | for i in range(1, n_layers+1):
101 | dW, db = grads['dW'+str(i)], grads['db'+str(i)]
102 | self.params['W'+str(i)] -= lr*dW
103 | self.params['b'+str(i)] -= lr*db
104 |
105 | @staticmethod
106 | def generate_batch(X, batch_size):
107 | n = X.shape[0]
108 | batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]
109 | return batches
110 |
111 | def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1):
112 | # prepare batch training
113 | batches = self.generate_batch(X_train, batch_size)
114 | # init weights
115 | self.weights_init()
116 | for i in range(n_iter):
117 | for batch in batches:
118 | X = X_train[batch, :].T
119 | Y = y_train[batch].reshape(1, -1)
120 | cache, A = self.forward(X)
121 | grads = self.backward(cache, X, Y)
122 | self.optimize(grads, lr)
123 |
124 | if i%10 == 0:
125 | loss = self.compute_cost(A, Y)
126 | print(f'iteration {i}: loss {loss}')
127 |
128 |
129 | def accuracy(Y, Y_pred):
130 | """
131 | Y: vector of true value
132 | Y_pred: vector of predicted value
133 | """
134 |
135 | assert Y.shape[0] == 1
136 | assert Y.shape == Y_pred.shape
137 | Y_pred = np.round(Y_pred)
138 | acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size
139 | return acc
--------------------------------------------------------------------------------
/regularization/regularization.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Regularization\n",
8 | "---\n",
9 | "Regularization helps to prevent model from overfitting by adding an extra penalization term at the end of the loss function.\n",
10 | "\n",
11 | "$$J = -\\frac{1}{m} \\sum\\limits_{i = 1}^{m} \\large{(}\\small y^{(i)}\\log\\left(a^{[L](i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right) \\large{)} \\tag{1}$$\n",
12 | "To:\n",
13 | "$$J_{regularized} = \\small \\underbrace{-\\frac{1}{m} \\sum\\limits_{i = 1}^{m} \\large{(}\\small y^{(i)}\\log\\left(a^{[L](i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right) \\large{)} }_\\text{cross-entropy cost} + \\underbrace{\\frac{1}{m} \\frac{\\lambda}{2} \\sum\\limits_l\\sum\\limits_k\\sum\\limits_j W_{k,j}^{[l]2} }_\\text{L2 regularization cost} \\tag{2}$$\n",
14 | "\n",
15 | "Where $m$ is the batch size. The shown regularization is called `L2 regularization`, where `L2` applies square to weights, `L1 regularization` applies absolute value, which has the form of $|W|$.\n",
16 | "\n",
17 | "The appended extra term would enlarge the loss when either there are too many weights or the weight becomes too large, and the adjustable factor $\\lambda$ emphasis on how much we want to penalize on the weights.\n",
18 | "\n",
19 | "_**1. Why penalizing weights would help to prevent overfitting?**_\n",
20 | "\n",
21 | "An intuitive understanding would be that in the process of minimizing the new loss function, some of the weights would decrease close to zero so that the corresponding neurons would have very small effect to our results, as if we are training on a smaller neural network with fewer neurons."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "# Forward\n",
29 | "---\n",
30 | "In the forward process, we need only to change the loss function. let's review the cost function we've built in `deepNN`."
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 1,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "import numpy as np\n",
40 | "from model import deepNN"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 2,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "model = deepNN([2, 4, 1])"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "loss: 0.7512649762748712\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "A = np.array([[.3, .5, .7]])\n",
67 | "Y = np.array([[1, 1, 1]])\n",
68 | "\n",
69 | "loss = model.compute_cost(A, Y)\n",
70 | "print(f'loss: {loss}')"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 13,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "def compute_loss(A, Y, parameters, reg=True, lambd=.2):\n",
80 | " \"\"\"\n",
81 | " With L2 regularization\n",
82 | " parameters: dict with 'W1', 'b1', 'W2', ...\n",
83 | " \"\"\"\n",
84 | " assert A.shape == Y.shape\n",
85 | " n_layer = len(parameters)//2\n",
86 | " m = A.shape[1]\n",
87 | " s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
88 | " loss = -s/m\n",
89 | " if reg:\n",
90 | " p = 0\n",
91 | " for i in range(1, n_layer+1):\n",
92 | " p += np.sum(np.square(parameters['W'+str(i)]))\n",
93 | " loss += (1/m)*(lambd/2)*p\n",
94 | " return np.squeeze(loss)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 6,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/plain": [
105 | "{'W1': array([[ 0.00224882, -0.00683036],\n",
106 | " [-0.0155842 , 0.00439355],\n",
107 | " [ 0.0026745 , 0.00287223],\n",
108 | " [-0.00977243, 0.00515391]]),\n",
109 | " 'b1': array([[0.],\n",
110 | " [0.],\n",
111 | " [0.],\n",
112 | " [0.]]),\n",
113 | " 'W2': array([[-0.02002206, 0.00227708, 0.00470624, 0.00502016]]),\n",
114 | " 'b2': array([[0.]])}"
115 | ]
116 | },
117 | "execution_count": 6,
118 | "metadata": {},
119 | "output_type": "execute_result"
120 | }
121 | ],
122 | "source": [
123 | "model.weights_init()\n",
124 | "model.params"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 14,
130 | "metadata": {},
131 | "outputs": [
132 | {
133 | "name": "stdout",
134 | "output_type": "stream",
135 | "text": [
136 | "loss: 0.7512951351356093\n"
137 | ]
138 | }
139 | ],
140 | "source": [
141 | "loss = compute_loss(A, Y, model.params)\n",
142 | "print(f'loss: {loss}')"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "# Backward\n",
150 | "---\n",
151 | "The backward propagation of `L2 reglularization` is actually straight forward, we only need to add the gradient of the L2 term.\n",
152 | "\n",
153 | "$$ \\underbrace{\\frac{\\partial{J}^{\\text{L2 Reg}}}{\\partial{W}}}_{\\text{new gradient}} = \\underbrace{ \\frac{\\partial{J}^{\\text{old}}}{\\partial{W}} }_{\\text{new gradient}} + \\frac{\\lambda}{m}|W|$$"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 15,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "def backward(params, cache, X, Y, lambd=0.2):\n",
163 | " \"\"\"\n",
164 | " params: weight [W, b]\n",
165 | " cache: result [A, Z]\n",
166 | " Y: shape (1, m)\n",
167 | " \"\"\"\n",
168 | " grad = {}\n",
169 | " n_layers = int(len(params)/2)\n",
170 | " m = Y.shape[1]\n",
171 | " cache['A0'] = X\n",
172 | " \n",
173 | " for l in range(n_layers, 0, -1):\n",
174 | " A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
175 | " W = params['W'+str(l)]\n",
176 | " if l == n_layers:\n",
177 | " dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
178 | " \n",
179 | " if l == n_layers:\n",
180 | " dZ = np.multiply(dA, sigmoid_grad(A, Z))\n",
181 | " else:\n",
182 | " dZ = np.multiply(dA, relu_grad(A, Z))\n",
183 | " \n",
184 | " # with an extra gradient at the end, other terms would remain the same\n",
185 | " dW = np.dot(dZ, A_prev.T)/m + (lambd/m)*W\n",
186 | " \n",
187 | " db = np.sum(dZ, axis=1, keepdims=True)/m\n",
188 | " dA = np.dot(W.T, dZ)\n",
189 | "\n",
190 | " grad['dW'+str(l)] = dW\n",
191 | " grad['db'+str(l)] = db\n",
192 | " \n",
193 | " return grad"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "# Ensemble"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 36,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "class deepNN:\n",
210 | " def __init__(self, layers):\n",
211 | " self.layers = layers\n",
212 | " self.params = {}\n",
213 | " self.reg = False\n",
214 | " self.lambd = .2\n",
215 | " \n",
216 | " \n",
217 | " def weights_init(self):\n",
218 | " n = len(self.layers)\n",
219 | " for i in range(1, n):\n",
220 | " self.params['W' + str(i)] = np.random.randn(self.layers[i], self.layers[i-1])*0.01\n",
221 | " self.params['b' + str(i)] = np.zeros((self.layers[i], 1))\n",
222 | " \n",
223 | " @staticmethod\n",
224 | " def sigmoid(x):\n",
225 | " return 1/(1 + np.exp(-x))\n",
226 | "\n",
227 | " @staticmethod\n",
228 | " def relu(x):\n",
229 | " return np.maximum(x, 0)\n",
230 | " \n",
231 | " def compute_loss(self, A, Y):\n",
232 | " \"\"\"\n",
233 | " With L2 regularization\n",
234 | " \"\"\"\n",
235 | " assert A.shape == Y.shape\n",
236 | " n_layer = len(self.params)//2\n",
237 | " m = A.shape[1]\n",
238 | " s = np.dot(Y, np.log(A.T)) + np.dot(1-Y, np.log((1 - A).T))\n",
239 | " loss = -s/m\n",
240 | " if self.reg:\n",
241 | " p = 0\n",
242 | " for i in range(1, n_layer+1):\n",
243 | " p += np.sum(np.square(self.params['W'+str(i)]))\n",
244 | " loss += (1/m)*(self.lambd/2)*p\n",
245 | " return np.squeeze(loss)\n",
246 | " \n",
247 | " @staticmethod\n",
248 | " def sigmoid_grad(A, Z):\n",
249 | " grad = np.multiply(A, 1-A)\n",
250 | " return grad\n",
251 | "\n",
252 | " @staticmethod\n",
253 | " def relu_grad(A, Z):\n",
254 | " grad = np.zeros(Z.shape)\n",
255 | " grad[Z>0] = 1\n",
256 | " return grad\n",
257 | " \n",
258 | " \n",
259 | " def forward(self, X):\n",
260 | " # intermediate layer use relu as activation\n",
261 | " # last layer use sigmoid\n",
262 | " n_layers = int(len(self.params)/2)\n",
263 | " A = X\n",
264 | " cache = {}\n",
265 | " for i in range(1, n_layers):\n",
266 | " W, b = self.params['W'+str(i)], self.params['b'+str(i)]\n",
267 | " Z = np.dot(W, A) + b\n",
268 | " A = self.relu(Z)\n",
269 | " cache['Z'+str(i)] = Z\n",
270 | " cache['A'+str(i)] = A\n",
271 | "\n",
272 | " # last layer\n",
273 | " W, b = self.params['W'+str(i+1)], self.params['b'+str(i+1)]\n",
274 | " Z = np.dot(W, A) + b\n",
275 | " A = self.sigmoid(Z)\n",
276 | " cache['Z'+str(i+1)] = Z\n",
277 | " cache['A'+str(i+1)] = A\n",
278 | "\n",
279 | " return cache, A\n",
280 | " \n",
281 | " def backward(self, cache, X, Y):\n",
282 | " \"\"\"\n",
283 | " cache: result [A, Z]\n",
284 | " Y: shape (1, m)\n",
285 | " \"\"\"\n",
286 | " grad = {}\n",
287 | " n_layers = int(len(self.params)/2)\n",
288 | " m = Y.shape[1]\n",
289 | " cache['A0'] = X\n",
290 | "\n",
291 | " for l in range(n_layers, 0, -1):\n",
292 | " A, A_prev, Z = cache['A' + str(l)], cache['A' + str(l-1)], cache['Z' + str(l)]\n",
293 | " W = self.params['W'+str(l)]\n",
294 | " if l == n_layers:\n",
295 | " dA = -np.divide(Y, A) + np.divide(1 - Y, 1 - A)\n",
296 | "\n",
297 | " if l == n_layers:\n",
298 | " dZ = np.multiply(dA, self.sigmoid_grad(A, Z))\n",
299 | " else:\n",
300 | " dZ = np.multiply(dA, self.relu_grad(A, Z))\n",
301 | " \n",
302 | " dW = np.dot(dZ, A_prev.T)/m + (self.lambd/m)*W\n",
303 | " db = np.sum(dZ, axis=1, keepdims=True)/m\n",
304 | " dA = np.dot(W.T, dZ)\n",
305 | "\n",
306 | " grad['dW'+str(l)] = dW\n",
307 | " grad['db'+str(l)] = db\n",
308 | "\n",
309 | " return grad\n",
310 | " \n",
311 | " def optimize(self, grads, lr):\n",
312 | " n_layers = int(len(self.params)/2)\n",
313 | " for i in range(1, n_layers+1):\n",
314 | " dW, db = grads['dW'+str(i)], grads['db'+str(i)]\n",
315 | " self.params['W'+str(i)] -= lr*dW\n",
316 | " self.params['b'+str(i)] -= lr*db\n",
317 | " \n",
318 | " @staticmethod\n",
319 | " def generate_batch(X, batch_size):\n",
320 | " n = X.shape[0]\n",
321 | " batches = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
322 | " return batches\n",
323 | " \n",
324 | " \n",
325 | " def train(self, X_train, y_train, batch_size=200, n_iter=100, lr=0.1, reg=True, lambd=.7):\n",
326 | " self.lambd = lambd\n",
327 | " self.reg = reg\n",
328 | " # prepare batch training\n",
329 | " batches = self.generate_batch(X_train, batch_size)\n",
330 | " # init weights\n",
331 | " self.weights_init()\n",
332 | " for i in range(n_iter):\n",
333 | " for batch in batches:\n",
334 | " X = X_train[batch, :].T\n",
335 | " Y = y_train[batch].reshape(1, -1)\n",
336 | " cache, A = self.forward(X)\n",
337 | " grads = self.backward(cache, X, Y)\n",
338 | " self.optimize(grads, lr)\n",
339 | "\n",
340 | " if i%10 == 0:\n",
341 | " loss = self.compute_loss(A, Y)\n",
342 | " print(f'iteration {i}: loss {loss}')"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 39,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "def accuracy(Y, Y_pred):\n",
352 | " \"\"\"\n",
353 | " Y: vector of true value\n",
354 | " Y_pred: vector of predicted value\n",
355 | " \"\"\"\n",
356 | " \n",
357 | " assert Y.shape[0] == 1\n",
358 | " assert Y.shape == Y_pred.shape\n",
359 | " Y_pred = np.round(Y_pred)\n",
360 | " acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
361 | " return acc"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 17,
367 | "metadata": {},
368 | "outputs": [
369 | {
370 | "name": "stdout",
371 | "output_type": "stream",
372 | "text": [
373 | "train shape (8000, 200)\n",
374 | "test shape (2000, 200)\n"
375 | ]
376 | }
377 | ],
378 | "source": [
379 | "from sklearn import datasets\n",
380 | "\n",
381 | "\n",
382 | "X, y = datasets.make_classification(n_samples=10000, n_features=200, random_state=123)\n",
383 | "\n",
384 | "X_train, X_test = X[:8000], X[8000:]\n",
385 | "y_train, y_test = y[:8000], y[8000:]\n",
386 | "\n",
387 | "print('train shape', X_train.shape)\n",
388 | "print('test shape', X_test.shape)"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 37,
394 | "metadata": {},
395 | "outputs": [],
396 | "source": [
397 | "layers = [200, 100, 20, 1]\n",
398 | "model = deepNN(layers)"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": 54,
404 | "metadata": {},
405 | "outputs": [
406 | {
407 | "name": "stdout",
408 | "output_type": "stream",
409 | "text": [
410 | "iteration 0: loss 0.6985036506635248\n",
411 | "iteration 10: loss 0.6974289293126693\n",
412 | "iteration 20: loss 0.696563499398262\n",
413 | "iteration 30: loss 0.6955727021117409\n",
414 | "iteration 40: loss 0.6845170595754049\n",
415 | "iteration 50: loss 0.23561800014771372\n",
416 | "iteration 60: loss 0.15567224031891935\n",
417 | "iteration 70: loss 0.12669228589646375\n",
418 | "iteration 80: loss 0.11069865608869393\n",
419 | "iteration 90: loss 0.1007637548980789\n",
420 | "iteration 100: loss 0.09435682482867866\n",
421 | "iteration 110: loss 0.09060941295356366\n",
422 | "iteration 120: loss 0.08884491050012915\n",
423 | "iteration 130: loss 0.08739359237666255\n",
424 | "iteration 140: loss 0.08695416115831198\n"
425 | ]
426 | }
427 | ],
428 | "source": [
429 | "model.train(X_train, y_train, batch_size=200, n_iter=150, lr=0.05, reg=True, lambd=1)"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 55,
435 | "metadata": {},
436 | "outputs": [
437 | {
438 | "name": "stdout",
439 | "output_type": "stream",
440 | "text": [
441 | "accuracy 0.9425\n"
442 | ]
443 | }
444 | ],
445 | "source": [
446 | "_, pred = model.forward(X_test.T)\n",
447 | "acc = accuracy(y_test.reshape(1, -1), pred)\n",
448 | "\n",
449 | "print(f'accuracy {acc}')"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 46,
455 | "metadata": {},
456 | "outputs": [],
457 | "source": [
458 | "from model import deepNN as deepNNOld"
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": 51,
464 | "metadata": {},
465 | "outputs": [],
466 | "source": [
467 | "layers = [200, 100, 20, 1]\n",
468 | "model_unreg = deepNNOld(layers)"
469 | ]
470 | },
471 | {
472 | "cell_type": "markdown",
473 | "metadata": {},
474 | "source": [
475 | "Actually when we have the `iteration` goes up, the model would continue to overfit that causes error in the divide operation, suspecting that in the forward process, result $A$ gets too close to 0.\n",
476 | "\n",
477 | "In contrast, the model above with regularization would not overfit."
478 | ]
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": 52,
483 | "metadata": {},
484 | "outputs": [
485 | {
486 | "name": "stdout",
487 | "output_type": "stream",
488 | "text": [
489 | "iteration 0: loss 0.6930918829042935\n",
490 | "iteration 10: loss 0.6930065395149767\n",
491 | "iteration 20: loss 0.6929575889989992\n",
492 | "iteration 30: loss 0.6926539088979596\n",
493 | "iteration 40: loss 0.6849650117201506\n",
494 | "iteration 50: loss 0.20267451014178056\n",
495 | "iteration 60: loss 0.09037465413243737\n",
496 | "iteration 70: loss 0.04981389115902148\n",
497 | "iteration 80: loss 0.02654689714177362\n",
498 | "iteration 90: loss 0.015971046473694038\n",
499 | "iteration 100: loss 0.010199685977249701\n",
500 | "iteration 110: loss 0.007221608028851772\n",
501 | "iteration 120: loss 0.004961759731198219\n",
502 | "iteration 130: loss 0.0034589244309720397\n",
503 | "iteration 140: loss 0.0025630729230403\n"
504 | ]
505 | }
506 | ],
507 | "source": [
508 | "model_unreg.train(X_train, y_train, batch_size=200, n_iter=150, lr=0.05)"
509 | ]
510 | },
511 | {
512 | "cell_type": "code",
513 | "execution_count": 53,
514 | "metadata": {},
515 | "outputs": [
516 | {
517 | "name": "stdout",
518 | "output_type": "stream",
519 | "text": [
520 | "accuracy 0.9335\n"
521 | ]
522 | }
523 | ],
524 | "source": [
525 | "_, pred = model_unreg.forward(X_test.T)\n",
526 | "acc = accuracy(y_test.reshape(1, -1), pred)\n",
527 | "\n",
528 | "print(f'accuracy {acc}')"
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": null,
534 | "metadata": {},
535 | "outputs": [],
536 | "source": []
537 | }
538 | ],
539 | "metadata": {
540 | "kernelspec": {
541 | "display_name": "Python 3",
542 | "language": "python",
543 | "name": "python3"
544 | },
545 | "language_info": {
546 | "codemirror_mode": {
547 | "name": "ipython",
548 | "version": 3
549 | },
550 | "file_extension": ".py",
551 | "mimetype": "text/x-python",
552 | "name": "python",
553 | "nbconvert_exporter": "python",
554 | "pygments_lexer": "ipython3",
555 | "version": "3.8.3"
556 | }
557 | },
558 | "nbformat": 4,
559 | "nbformat_minor": 4
560 | }
561 |
--------------------------------------------------------------------------------
/shallow-neural-network/images/1-hidden-nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/shallow-neural-network/images/1-hidden-nn.png
--------------------------------------------------------------------------------
/shallow-neural-network/images/multi-layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MJeremy2017/deep-learning/57f0dd57bcd7f6d00045fdd28e61f0c9db92e8a4/shallow-neural-network/images/multi-layer.png
--------------------------------------------------------------------------------
/shallow-neural-network/one-hidden-layer-nn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# One Hidden Layer NN\n",
8 | "---\n",
9 | "We will build a shallow dense neural network with one hidden layer, and the following structure is used for illustration purpose.\n",
10 | "\n",
11 | "
\n",
12 | "\n",
13 | "Where in the graph above, we have a input vector $x = (x_1, x_2)$, containing 2 features and 4 hidden units $a1, a2, a3$ and $a4$, and output one value $y_1 \\in [0, 1]$ (consider this a binary classification task with a prediction of probability)"
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {},
19 | "source": [
20 | "In each hidden unit, take $a_1$ as example, a linear operation followed by an activation function is conducted. So given input $x = (x_1, x_2)$, inside node $a_1$, we have:\n",
21 | "\n",
22 | "$$z_1 = w_{11}x_1 + w_{12}x_2$$\n",
23 | "$$a_1 = activation(z_1)$$\n",
24 | "\n",
25 | "Here $w_{11}$ denotes weight 1 of node 1, $w_{12}$ denotes weight 2 of node 1. Same for node $a_2$, it would have:\n",
26 | "\n",
27 | "$$z_2 = w_{21}x_1 + w_{22}x_2$$\n",
28 | "$$a_2 = activation(z_2)$$\n",
29 | "\n",
30 | "And same for $a_3$ and $a_4$ and so on ..."
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "# Vectorization of One Input\n",
38 | "---\n",
39 | "Now let's put the weights into matrix and input into a vector to simplify the expression.\n",
40 | "\n",
41 | "$$ z^{[1]} = W^{[1]}x + b^{[1]} \\tag1 $$\n",
42 | "\n",
43 | "$$ a^{[1]} = \\tanh{Z^{[1]}} \\tag2 $$\n",
44 | "\n",
45 | "$$ z^{[2]} = W^{[2]}a^{[1]} + b^{[2]} \\tag3 $$\n",
46 | "\n",
47 | "$$ \\hat{y} = a^{[2]} = \\sigma({Z^{[2]}}) \\tag4 $$\n",
48 | "\n",
49 | "$$ L(y, \\hat{y}) = -[y\\log{\\hat{y}} + (1 - y)\\log{(1 - \\hat{y})}] $$\n",
50 | "\n",
51 | "Here we've assumed that the second activation function to be $\\tanh$ and the output activation function to be $sigmoid$ (note that superscript $[i]$ denotes the $ith$ layer). \n",
52 | "\n",
53 | "For the dimension of each matrix, we have:\n",
54 | "\n",
55 | "- $ W^{[1]}$ in the case above would have dimension $4 \\times 2$, with each $ith$ row is the weight of node $i$\n",
56 | "- $b^{[1]}$ has dimension $4 \\times 1$\n",
57 | "- $z^{[1]}$ and $a^{[1]}$ both have dimention $4 \\times 1$\n",
58 | "- $W^{[2]}$ has dimension $1 \\times 4$\n",
59 | "- consequently, $z^{[2]}$ and $a^{[2]}$ would have dimensition $1 \\times 1$, which is a single value\n",
60 | "\n",
61 | "The loss function $L$ for a single value would be the same as logistic regression's.\n",
62 | "\n",
63 | "Function $\\tanh$ and $sigmoid$ looks as below."
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 1,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "%matplotlib inline\n",
73 | "\n",
74 | "import numpy as np\n",
75 | "import matplotlib.pyplot as plt"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 2,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "def tanh(x):\n",
85 | " return np.tanh(x)\n",
86 | "\n",
87 | "def sigmoid(x):\n",
88 | " return 1/(1 + np.exp(-x))"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 3,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "data": {
98 | "text/plain": [
99 | "Text(0.5, 1.0, 'tanh')"
100 | ]
101 | },
102 | "execution_count": 3,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | },
106 | {
107 | "data": {
108 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEICAYAAAB74HFBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA0YklEQVR4nO3deZzcVZ3v/9en9ySdtbPQ2RNIgGZJgCa4orKDStBBCW44g4OPeYjeUWdGGMflos5FZ+7ozO+iY1QGRCUgV65R4iAgmyIhDWQhCUmapNd0kk5vSXrvqs/vj/o2FJ3ekq6uby3v5+NR9Hc536pPUZ3TnzrnfM8xd0dERERETk5O2AGIiIiIpDMlUyIiIiJjoGRKREREZAyUTImIiIiMgZIpERERkTFQMiUiIiIyBkqm5ISY2T+a2Y9T7XXNrMrMLktmTCIi8czMzey0sOOQ5MsLOwBJL+7+z9n0uiKS2cysCviUuz8ediySvtQyJSIiIjIGSqZkSGb2JTOrN7OjZrbLzC41s6+b2c/iynzCzKrNrMnMvhLf3RaU/aWZ/Sx4jm1mttzMbjezQ2ZWa2ZXxD3XXDNbb2bNZlZpZn8dd27g63487nW/nKz/JyKSOczsPmAh8BszO2Zm/xDUWQfMrM3MnjGzs+LK32Nmd5nZI0GdttHMTh3wtJeZ2R4zaw3KWlLflIRCyZQMysxOB24FLnT3ycCVQNWAMmXA94GPAqXAVGDegKd6P3AfMB14GXiU2O/dPOAO4IdxZdcBdcBc4Hrgn83skkFiKwN+AHw8KFsCzD/pNysiWcndPw7UAO9392J3/w7wO2AZMBt4Cfj5gMvWAP+TWJ1WCXxrwPn3ARcC5wIfJlZ3SoZTMiVDiQCFQJmZ5bt7lbu/NqDM9cBv3P2P7t4DfBUYuNjjs+7+qLv3Ab8EZgF3unsvseRpsZlNM7MFwNuBL7l7l7tvBn4MfGKQ2K4Hfuvuz7h7N/AVIJqQdy0iWc3d73b3o0Hd8nVghZlNjSvysLu/ENRpPwdWDniKO9291d1rgCcHOS8ZSMmUDMrdK4G/JVaZHDKzdWY2d0CxuUBt3DUdQNOAMgfjtjuBw+4eidsHKA6eq9ndj8aVr+b4lq7BXrd9kNcVETkhZpZrZnea2WtmdoQ3WuNnxhU7ELfdQaz+4gTOSwZSMiVDcvdfuPs7gEXEWpy+PaBIA3Hda2Y2gViX28nYD8wws8lxxxYC9YOUbQAWxL3uxDG8rohkt/jW9I8Aq4HLiA1bWBwc17gnGZaSKRmUmZ1uZpeYWSHQRawVaWBX2kPA+83sbWZWQKwV66QqHXevBZ4D/peZFZnZucDNwM8GKf4Q8D4ze0fwuneg32UROTkHgaXB9mSgm1hL90RAU7LIqOgPkAylELgTOEys2Xo2cHt8AXffDnyW2NinBuAYcIhYZXQybiT2TXA/8DDwtcHmfgle9zPAL4LXbSE2cF1E5ET9L+CfzKwVmEFseEE9sAN4PsS4JI2Y+8DxwiInx8yKgVZgmbvvCzkcERGRpFDLlIyJmb3fzCaa2STgX4FtDJhCQUREJJMpmZKxWk2sW24/sblZ1riaO0VEJIuom09ERERkDNQyJSIiIjIGeWG98MyZM33x4sVhvbyIhODFF1887O6zwo5jrFR/iWSf4eqv0JKpxYsXU1FREdbLi0gIzKw67BgSQfWXSPYZrv5SN5+IiIjIGCiZEhERERkDJVMiIiIiY6BkSkRERGQMlEyJiIiIjMGIyZSZ3W1mh8zslSHOm5n9h5lVmtlWMzs/8WGKiAxtLPWUmd1kZnuCx03Ji1pEMsVoWqbuAa4a5vzVxJYRWQbcAvxg7GGJiJyQeziJesrMZgBfAy4CVgFfM7Pp4xqpiGScEeeZcvdnzGzxMEVWAz8N1mN73symmVmpuzckKkiRTBaNOt19UTp7I3T1RujsjdDTF409IlF6g599Eacv6kSiTl80th/12CMShYg77k406jgQdWL77rgTHIttQ+zc69sQt/3G8XhvOhQUKMzP5TPvOW18/secgJOtp4B3A4+5ezOAmT1GLCm7f5xDFhkTd+dIZx9Huno51t1He3cfx7r76OyJxOqNiNMXidIbib5eb7jH6onX64S4uiH204PnDl7j5INLxFtMitXnzePUWcVjfp5ETNo5D6iN268Ljh2XTJnZLcS+FbJw4cIEvLRI6jnW3UddSwd1zZ0cPtZNU3tP7OexHlo6ejjS1cexrl6OdsUqv46eSNghnxQzmFKUnxLJ1CgMVU8Ndfw4qr8kDH2RKK/sP8KfX2tib+MxGtq62N/Wyf7WTrp6o2GHNySzsCMYnRULpqVMMjVq7r4WWAtQXl6ePqmryCBa2nvYvv8IOxra2L7/CHsb26lr6aClo/e4ssWFecyYVMD0SQVMKcpj/rQJFBfmMbkoj4mFeUwsyKUoL4cJBbkU5edSmJdDQV4O+bk5FOTmkJebQ36ukZeTQ16ukZtj5FrsZ06wnZMDOWbBA8wMs9gxI/gZVHBmYFjws/+YxW3H9uUNqr8kWQ60dbFhWwPPvXaYjXubOdrdB8DsyYXMnTaBM06ZzHtOn03p1CKmTMinuDCPSYV5FAd1SX+9kZ8XqzPycw3rry/sjbogvg7o//f/Rh2hf/8nIhHJVD2wIG5/fnBMJKM0Hevm2T2HeXp3I8/vbaKhrev1c6VTizhtdjHnzC9lwfSJzJ8+gfnTJzBnShEzJhVQlJ8bYuTC0PVUPbGuvvjjTyUtKpE4nT0R1j6zl/98+jU6eyMsLpnI+1bM5W2nlvDWU0uYWVwYdogyhEQkU+uBW81sHbFBnG0aLyWZora5g4derOOpXYfYWt+GO8yYVMDbTi3h3PlTKSudStncKcyYVBB2qDK8QespM3sU+Oe4QedXALeHFaRkJ3fnN1sbuHPDTva3dfHec0r5uytPZ8nMSWGHJqM0YjJlZvcT++Y208zqiN35kg/g7v8JbACuASqBDuAvxytYkWSIRp2n9zTysz9X84ddhzDg/IXT+cJly3nX6bM4e+5UcnLUBJ5KTraecvdmM/sGsCl4qjv6B6OLJMOBti5u/cVLVFS3UFY6he/esJKLlpaEHZacoNHczXfjCOcd+EzCIhIJSV8kys831nD3n/ZR3dTBzOJCbn3Pady4aiFzp00IOzwZxljqKXe/G7h7POISGU5Xb4S//mkFexuPcecHz+FD5QvI1Re1tJTUAegiqerF6ma+/PArvHrgKBcsms4XLl/O1WeXUpCnRQJEJPHcnX98eBvb6tv40SfKubxsTtghyRgomZKs1tLew52/e5UHKmopnVrEf37sfK486xTdySIi4+q//lTFr16q5/OXLVcilQGUTEnW+u9XGrj9V9s42tXHpy9eyucuXcakQv2TEJHx9afKw3xrw06uKJvDZy9Ji3naZAT6yyFZ6Sd/3Mc3H9nBufOm8p3rV3D6KZPDDklEskBtcwe3/uIlls6cxL/dsFI3s2QIJVOSVaJR51sbdvKTP+7jyrPm8O9rztMcUCKSFJGo8+n7XiQSdX70iXKK1RKeMfRJStbo6o3wxV9u4ZGtDXzybYv5yvvKdOeMiCTNM3sa2dFwhO/dsJLFmkMqoyiZkqxwrLuPv7pnEy/sa+b2q8/glouXapC5iCTVuhdqKJlUwDXnlIYdiiSYkinJeNGo88UHN/NidQv/vmYlq1cOuo6tiMi4OXS0iyd2HuKv3rFEU65kIH2ikvF+8PRrPLr9ILdffYYSKREJxf99sZ6+qHPDhQtGLixpR8mUZLSndh3iX3+/i9Ur53LzO5aEHY6IZCF354FNNaxaPINTZxWHHY6MAyVTkrGqm9r53P0vc/qcydz5wXM1RkpEQrFxXzNVTR2sWaVWqUylZEoyUkdPH5++70XMjLUfL2dCgaY/EJFwrHuhhslFeVx9tgaeZyolU5KRbv/VNnYdPMp/3HgeC0smhh2OiGSpto5eNrxygOtWztOXugymZEoyzlO7DvHrzfv520uX867ls8IOR0Sy2MMv19HTF1UXX4ZTMiUZpS8S5ZuP7GRxyUT+5t2nhh2OiGQxd2fdplrOmTeVs+ZODTscGUdKpiSj/HxjDZWHjvGP15ypuVxEJFRb69p49cBRtUplAf21kYzR2tHDdx/fzdtPK+HysjlhhyNJZmZXmdkuM6s0s9sGOf9dM9scPHabWWvcuUjcufVJDVwy1gMVtUzIz+XaFXPDDkXGmWZAl4zxvcf3cKSzl396b5mmQcgyZpYL3AVcDtQBm8xsvbvv6C/j7p+PK/9Z4Ly4p+h095VJCleyxJ8qD3Px8plMLsoPOxQZZ2qZkoxQeego9z1fzZpVCzmzdErY4UjyrQIq3X2vu/cA64DVw5S/Ebg/KZFJVmpp76G6qYPzFk4POxRJAiVTkhG++chOJubn8sXLl4cdioRjHlAbt18XHDuOmS0ClgB/iDtcZGYVZva8mV03xHW3BGUqGhsbExS2ZKrNda0ArJg/LdQ4JDmUTEnae3LXIZ7a1cjnLl1GSXFh2OFI6lsDPOTukbhji9y9HPgI8D0zO+5WUHdf6+7l7l4+a5am3JDhbaltxQzOma+7+LKBkilJa+7O9x7bzaKSidz0tsVhhyPhqQfib5maHxwbzBoGdPG5e33wcy/wFG8eTyVywrbUtrJ89mSKCzU0ORsomZK09nJtK1vq2rj5HUs0FUJ22wQsM7MlZlZALGE67q48MzsDmA78Oe7YdDMrDLZnAm8Hdgy8VmS03J3Nta2sWKBWqWyhlFnS2r3PVVFcmMcHz58fdigSInfvM7NbgUeBXOBud99uZncAFe7en1itAda5u8ddfibwQzOLEvuCeWf8XYAiJ6q2uZOWjl5WLJgWdiiSJEqmJG0dOtrFhm0NfPSiRWpKF9x9A7BhwLGvDtj/+iDXPQecM67BSVbpH3y+UslU1lC/iKStX2ysoTfifOKti8IORUTkdZtrWinKz2H5nMlhhyJJomRK0lJPX5Sfb6zhXctnsXRWcdjhiIi8bktdK2fPnUp+rv7EZgt90pKWfvdKA41Hu/mk7uATkRTSG4nySn2buviyjJIpSUv3PlfF4pKJvGu55vsRkdSx68BRuvuiGnyeZZRMSdrZVtfGSzWtfPyti8nJ0Rp8IpI6Nte2Ahp8nm2UTEnauee5KiYW5PKhck2HICKpZXNtKyWTCpg/fULYoUgSjSqZMrOrzGyXmVWa2W2DnF9oZk+a2ctmttXMrkl8qCLQdKyb32zdzwfPn8cUrcQuIilmS20rKxZMw0yt5tlkxGTKzHKBu4CrgTLgRjMrG1Dsn4AH3f08YpPifT/RgYoAPPxyPT19UW566+KwQxEReZOjXb1UNh7T4sZZaDQtU6uASnff6+49wDpg9YAyDkwJtqcC+xMXosgbfru1gbPmTmGZ5m8RkRSzra4Nd1i5cFrYoUiSjSaZmgfUxu3XBcfifR34mJnVEZuB+LODPZGZ3WJmFWZW0djYeBLhSjarb+1kc20r15xTGnYoIiLH6Z/5fMV8rcmXbRI1AP1G4B53nw9cA9xnZsc9t7uvdfdydy+fNUu3tMuJ+d22BgDeq2RKRFLQ5ppWFpdMZNrEgrBDkSQbTTJVDyyI258fHIt3M/AggLv/GSgCZiYiQJF+j2xroKx0CotnTgo7FBGR42ypa9WUCFlqNMnUJmCZmS0xswJiA8zXDyhTA1wKYGZnEkum1I8nCbO/tZOXa1p577lqlRKR1HOgrYuDR7o1WWeWGjGZcvc+4FbgUWAnsbv2tpvZHWZ2bVDsi8Bfm9kW4H7gk+7u4xW0ZJ8NQRefxkuJSCraXNsCoGQqS+WNppC7byA2sDz+2FfjtncAb09saCJv2LCtgTNLp7BEXXwikoJePXAUMygrnTJyYck4mgFdUt7+1k5eqmnlveecEnYoIiKDqmnqoHRKEUX5uWGHIiFQMiUp73evHADUxSfDG8VKDZ80s0Yz2xw8PhV37iYz2xM8bkpu5JIJqpraWVgyMewwJCSj6uYTCdMjW/dzximTWTqrOOxQJEXFrdRwObG58DaZ2fpgCEK8B9z91gHXzgC+BpQTm4D4xeDaliSELhmiprmDS8+YE3YYEhK1TElK6+/ie5/u4pPhjWalhqFcCTzm7s1BAvUYcNU4xSkZ6Fh3H4eP9bBoplqmspWSKUlp6uKTURrNSg0AfxEsxv6QmfXPnzeqa7WCgwyluqkdgEUzdINMtlIyJSltw7YGdfFJovwGWOzu5xJrfbr3RC7WCg4ylJqmDgAWacxU1lIyJSnr0JEuXqxuUauUjMaIKzW4e5O7dwe7PwYuGO21IsOpbo4lUxqAnr2UTEnKenbPYQAuOWN2yJFIGhhxpQYzi8/KryU2CTHEJiS+wsymm9l04IrgmMioVDe1M2NSAVOK8sMORUKiu/kkZT2zp5GZxQWaBE9G5O59Zta/UkMucHf/Sg1AhbuvBz4XrNrQBzQDnwyubTazbxBLyADucPfmpL8JSVvVTR3q4stySqYkJUWjzh/3HOady2aSk2NhhyNpYBQrNdwO3D7EtXcDd49rgJKxqps6uHDx9LDDkBCpm09S0o6GIzS19/DOZRroKyKpq7svwv62ThaW6E6+bKZkSlLSM3tit56/c9nMkCMRERlaXUsn7rBY3XxZTcmUpKRndx/mjFMmM3tKUdihiIgM6fU5ppRMZTUlU5JyOnr6qKhu5uLl6uITkdRWHcwxtVATdmY1JVOScp7f20RvxLlY46VEJMVVN3UwqSCXmcUFYYciIVIyJSnnmd2HKczLoVx3x4hIiqtuamdhySTMdNdxNlMyJSnn2T2NXLS0hKL83LBDEREZVnVzhwafi5IpSS31rZ281tjOxbqLT0RSXCTq1DV3ahkZUTIlqeXZ3bEpETT4XERSXUNbJz2RKIs0+DzrKZmSlPLsnsOcMqWIZbOLww5FRGRYNcGdfOrmEyVTkjIiUeePlbElZDSYU0RSXVX/tAhKprKekilJGVvrWmnr7OWd6uITkTRQ3dxOfq5ROnVC2KFIyJRMScp4ds9hzOAdp2nwuYikvpqmDhbMmEiuFmPPekqmJGU8u6eRs+dOZcYkTX4nIqmvqqmDRTPUxSdKpiRFdPVG2FzbyttOLQk7FBGREbk7NU3tLCrRnXyiZEpSxEs1LfRGnIuWzgg7FElTZnaVme0ys0ozu22Q818wsx1mttXMnjCzRXHnIma2OXisT27kko6a2nto74logWMBIC/sAEQAXtjXjBlcsEjJlJw4M8sF7gIuB+qATWa23t13xBV7GSh39w4z+xvgO8ANwblOd1+ZzJglvVU3tQMomRJALVOSIjbubaasdApTJ+SHHYqkp1VApbvvdfceYB2wOr6Auz/p7h3B7vPA/CTHKBmkOpgWQd18AkqmJAX09EV5qaaFVUvUKiUnbR5QG7dfFxwbys3A7+L2i8yswsyeN7PrBrvAzG4JylQ0NjaOOWBJb1VNHZjB/OmaFkHUzScpYGtdK919US5aosHnMv7M7GNAOfCuuMOL3L3ezJYCfzCzbe7+Wvx17r4WWAtQXl7uSQtYUlJNUztzp06gME8LsssoW6ZGGtgZlPlwMLhzu5n9IrFhSibbuK8ZgAsXTw85Eklj9cCCuP35wbE3MbPLgC8D17p7d/9xd68Pfu4FngLOG89gJf1VN3dovJS8bsRkKm5g59VAGXCjmZUNKLMMuB14u7ufBfxt4kOVTLVxXzPLZhdTUlwYdiiSvjYBy8xsiZkVAGuAN92VZ2bnAT8klkgdijs+3cwKg+2ZwNuB+IHrIsepblIyJW8YTcvUiAM7gb8G7nL3FoD4ikpkOH2RKC9WNWtKBBkTd+8DbgUeBXYCD7r7djO7w8yuDYr9C1AM/HLAFAhnAhVmtgV4ErhzwF2AIm9ytKuX5vYeDT6X141mzNRgAzsvGlBmOYCZ/QnIBb7u7v898InM7BbgFoCFCxeeTLySYbbvP0J7T4RVGi8lY+TuG4ANA459NW77siGuew44Z3yjk0xS29wJwILpapmSmETdzZcHLAPeDdwI/MjMpg0s5O5r3b3c3ctnzdJithKbXwrgIt3JJyJpYn9rLJmapzv5JDCaZGo0AzvrgPXu3uvu+4DdxJIrkWFt3NfM4pKJzJlSFHYoIiKjUt+fTE1TMiUxo0mmRhzYCfw/Yq1S/QM4lwN7ExemZKJo1NlU1awpEUQkrexv7aQgL4cSLcougRGTqVEO7HwUaDKzHcQGcP69uzeNV9CSGXYdPEpbZ68m6xSRtFLf2sncqUXk5FjYoUiKGNWknaMY2OnAF4KHyKhs3BvLt3Unn4ikk/2tncxVF5/E0XIyEpoXqpqZN20C83VHjIikkfrWTo2XkjdRMiWhcHde2NesLj4RSSs9fVEOHe1Wy5S8iZIpCcVrje0cPtajKRFEJK0cPNKFu+7kkzdTMiWh6J9fSi1TIpJO6lo0x5QcT8mUhGJTVTMziwtZMlPLMYhI+uifsFPdfBJPyZSEYlNVMxcuno6Zbi0WkfTRn0yVTtVEw/IGJVOSdA1tndS1dFK+WF18IpJe9rd1MrO4gKL83LBDkRSiZEqSrqKqBYALF08PORIRkRNT16JpEeR4SqYk6SqqmplYkEtZ6ZSwQxEROSGasFMGo2RKkm5TVQvnLZxGXq5+/UQkfbg7+1u7lEzJcfTXTJLqaFcvrx44QvkijZcSkfTS0tFLZ29E3XxyHCVTklQv17QSdbhQg88lwczsKjPbZWaVZnbbIOcLzeyB4PxGM1scd+724PguM7syqYFL2tC0CDIUJVOSVBVVzeTmGCsXTgs7FMkgZpYL3AVcDZQBN5pZ2YBiNwMt7n4a8F3g28G1ZcAa4CzgKuD7wfOJvEl9kEypZUoGUjIlSbWpqoWy0ikUF+aFHYpkllVApbvvdfceYB2wekCZ1cC9wfZDwKUWm+hsNbDO3bvdfR9QGTyfyJu80TKlOabkzZRMSdL0RqK8XNtCuaZEkMSbB9TG7dcFxwYt4+59QBtQMsprMbNbzKzCzCoaGxsTGLqki/qWToryc5gxqSDsUCTFKJmSpNm+/whdvVGNl5K05O5r3b3c3ctnzZoVdjgSgv1tsWkRtHKDDKRkSpKmoiq2uHH5IrVMScLVAwvi9ucHxwYtY2Z5wFSgaZTXilDf2qXxUjIoJVOSNJuqmllUMpHZUzTeQBJuE7DMzJaYWQGxAeXrB5RZD9wUbF8P/MHdPTi+JrjbbwmwDHghSXFLGqnX7OcyBI0ClqRwdyqqWnj36bPDDkUykLv3mdmtwKNALnC3u283szuACndfD/wEuM/MKoFmYgkXQbkHgR1AH/AZd4+E8kYkZXX1Rjh8rFvTIsiglExJUuw73E5Te48Gn8u4cfcNwIYBx74at90FfGiIa78FfGtcA5S0dqCtC9AcUzI4dfNJUlRUa3FjEUlfmhZBhqNkSpKioqqZ6RPzOXVWcdihiIicsLogmZo/bWLIkUgqUjIlSVFR1cIFi2bolmIRSUv7WzsxgzlTC8MORVKQkikZd4ePdbP3cLu6+EQkbe1v7WRWcSGFeVppSI6nZErG3aZ9wfxSmqxTRNJUfWsn86Zr8LkMTsmUjLuN+5qZkJ/LOfOmhh2KiMhJ2d/apTv5ZEhKpmTcbdzXzPmLplGQp183EUk/7h5rmVIyJUPQXzcZV20dvbx64AgXLSkJOxQRkZPS1N5DT1+UuVM1LYIMTsmUjKuK6mbcYdUSjZcSkfRU3xKbFmHedE2LIINTMiXjauO+Zgpyc1i5YFrYoYiInBRN2CkjUTIl42rjvmZWLphGUb5uJxaR9FQfJFMaMyVDGVUyZWZXmdkuM6s0s9uGKfcXZuZmVp64ECVdHevu45X6NnXxiUhaq2/tZFJBLlMn5IcdiqSoEZMpM8sF7gKuBsqAG82sbJByk4H/AWxMdJCSnl6qbiESdS5aqmRKRNLX/tZO5k6boBUcZEijaZlaBVS6+1537wHWAasHKfcN4NtAVwLjkzS2cV8TuTnG+Qs187mIpC/NMSUjGU0yNQ+ojduvC469zszOBxa4+yPDPZGZ3WJmFWZW0djYeMLBSnp5YV8zZ8+byqTCvLBDERE5aTXNHczX7OcyjDEPQDezHODfgC+OVNbd17p7ubuXz5o1a6wvLSmsqzfClto23qLxUiKSxlo7emjr7GVxyaSwQ5EUNppkqh5YELc/PzjWbzJwNvCUmVUBbwHWaxB6dnu5ppWeSFSDz2XcmdkMM3vMzPYEP4/rVzazlWb2ZzPbbmZbzeyGuHP3mNk+M9scPFYm9Q1ISqtu6gBgYYnmmJKhjSaZ2gQsM7MlZlYArAHW95909zZ3n+nui919MfA8cK27V4xLxJIWXtjXjJkWN5akuA14wt2XAU8E+wN1AJ9w97OAq4Dvmdm0uPN/7+4rg8fm8Q5Y0kd1cyyZUsuUDGfEZMrd+4BbgUeBncCD7r7dzO4ws2vHO0BJTxv3NXHmKVN0K7Ekw2rg3mD7XuC6gQXcfbe77wm29wOHAI01kBHVNLUDsHCGWqZkaKMaGezuG4ANA459dYiy7x57WJLOevqivFTTwpoLF4YdimSHOe7eEGwfAOYMV9jMVgEFwGtxh79lZl8laNly9+5BrrsFuAVg4UL9bmeLqqYOZk8uZEKBJh6WoWkGdEm4bfWtdPVGeYvml5IEMbPHzeyVQR5vmqbF3R3wYZ6nFLgP+Et3jwaHbwfOAC4EZgBfGuxa3UCTnWqaOtTFJyPSPeuScBv3NQNwocZLSYK4+2VDnTOzg2ZW6u4NQbJ0aIhyU4BHgC+7+/Nxz93fqtVtZv8F/F0CQ5c0V93czjuXKXmW4allShJu495mls0upqS4MOxQJDusB24Ktm8Cfj2wQHDzzMPAT939oQHnSoOfRmy81SvjGaykj86eCAePdLNYd/LJCJRMSUL1RqJUVDVrSgRJpjuBy81sD3BZsI+ZlZvZj4MyHwYuBj45yBQIPzezbcA2YCbwzaRGLymrprl/WgR188nw1M0nCfVSdQvtPRHeuWxm2KFIlnD3JuDSQY5XAJ8Ktn8G/GyI6y8Z1wAlbVUFd/It0p18MgK1TElCPbvnMLk5xltPVTIlIumtpklzTMnoKJmShHp2TyMrF0zT/FIikvaqm9uZOiGfqRNVn8nwlExJwjS397C1vo2LdeeLiGSA6qYODT6XUVEyJQnzp8rDuMM7l6uLT0TSX3VThwafy6gomZKEeXZPI1OK8jh33tSwQxERGZPeSJT61k4NPpdRUTIlCeHuPLP7MO9YNpO8XP1aiUh6q2/pJBJ1FqmbT0ZBf/UkISoPHePAkS7NFCwiGaE6mGNqkbr5ZBSUTElCPL27EUDzS4lIRqjun2NKLVMyCkqmJCGe3XOYpbMmMX+6Kh4RSX/VTR0U5ecwe7KWxZKRKZmSMevqjbBxX5OmRBCRjFHd1MGiGZOILdkoMjwlUzJmFVUtdPVGuVhTIohIhqhualcXn4yakikZs2f3NJKfa1y0pCTsUERExiwadWqaO5RMyagpmZIxe3p3Ixcsms6kQq2bLSLp79DRbrr7opqwU0ZNyZSMyaEjXbx64CgXL9d4KRHJDFXBnXxaSkZGS8mUjMkfKw8DaPC5iGSMmqZgjqkZapmS0VEyJWPy5K5GSiYVUFY6JexQJEuZ2Qwze8zM9gQ/pw9RLmJmm4PH+rjjS8xso5lVmtkDZlaQvOglFVU1tZOXY8ydVhR2KJImlEzJSevqjfDEzoNccdYccnJ0+7CE5jbgCXdfBjwR7A+m091XBo9r445/G/iuu58GtAA3j2+4kuqqmzuYP32ClsaSUdNvipy0p3Y10tET4ZpzSsMORbLbauDeYPte4LrRXmixSYQuAR46meslM9U0dWjwuZwQJVNy0jZsa2D6xHzeulRTIkio5rh7Q7B9AJgzRLkiM6sws+fN7LrgWAnQ6u59wX4dMG+wi83sluD6isbGxkTFLinG3alqatfgczkhupddTkp/F9/7V8xVU7iMOzN7HDhlkFNfjt9xdzczH+JpFrl7vZktBf5gZtuAttHG4O5rgbUA5eXlQ72GpLnWjl6OdvWxcIaSKRk9JVNyUp7e3Ui7uvgkSdz9sqHOmdlBMyt19wYzKwUODfEc9cHPvWb2FHAe8H+BaWaWF7ROzQfqE/4GJG1UN8fu5Fusbj45AWpSkJOyYVsD0ybm89ZT1cUnoVsP3BRs3wT8emABM5tuZoXB9kzg7cAOd3fgSeD64a6X7FEdzDGl2c/lRCiZkhMW6+I7xJVlp5CvLj4J353A5Wa2B7gs2MfMys3sx0GZM4EKM9tCLHm60913BOe+BHzBzCqJjaH6SVKjl5RSHcwxtUDdfHIC1M0nJ+yZ3Y0c6+7jmnPVxSfhc/cm4NJBjlcAnwq2nwPOGeL6vcCq8YxR0sfOhiMsnDGRovzcsEORNKJmBTlh/V18b1MXn4hkmC21raxcMC3sMCTNjCqZMrOrzGxXMEPwcRPimdkXzGyHmW01syfMbFHiQ5VU0NUb4fGdh7iibI66+EQkoxw60sX+ti5WKJmSEzTiX0MzywXuAq4GyoAbzaxsQLGXgXJ3P5fY5HffSXSgkhqe3XOYY919vPfcuWGHIiKSUJtrWwHUMiUnbDRNC6uASnff6+49wDpiMw6/zt2fdPeOYPd5YrcXSwZSF5+IZKotda3k5RhnzdVao3JiRpNMzQNq4/aHnCE4cDPwu8FOaAbh9NbdF+HxHQfVxSciGWlzbStnlE7W4HM5YQn9i2hmHwPKgX8Z7Ly7r3X3cncvnzVrViJfWpLgqV2NHO3u00SdIpJxolFna22buvjkpIxmaoR6YEHc/qAzBJvZZcSWdniXu3cnJjxJJff9uZrSqUW847SZYYciIpJQew+3c7S7jxXzp4UdiqSh0bRMbQKWmdkSMysA1hCbcfh1ZnYe8EPgWncfdCkHSW97Dh7lj5WH+dhbFmktPhHJOBp8LmMx4l/FYL2qW4FHgZ3Ag+6+3czuMLNrg2L/AhQDvzSzzWa2foinkzR175+rKMjL4cZVC8MORUQk4bbUtlJcmMeps4rDDkXS0KhmQHf3DcCGAce+Grc95CKkkv6OdPXyq5fquXbFXGZMKgg7HBGRhNtS18q586eSk2NhhyJpSP01MqJfVtTR0RPhk29bHHYoIiIJ19UbYWfDEU3WKSdNyZQMKxp17vtzFRcsms7Z86aGHY6ISMLtaDhCb8Q1XkpOmpIpGdbTuxupaurgJrVKiUiG2qLB5zJGSqZkWPc8V8XsyYVcffYpYYciIjIuNte2csqUIuZMKQo7FElTSqZkSHsbj/H07kY+etEizXguIhlrS22rWqVkTPQXUob00z9Xk59r3HjRgpELi4ikodaOHqqaOjT4XMZEyZQMqqW9h4derOO955Qye7KaviV1mdkMM3vMzPYEP6cPUuY9wRx4/Y8uM7suOHePme2LO7cy2e9BwtM/WeeKBbrBRk6ekikZ1Pce301HTx9/8+7Twg5FZCS3AU+4+zLgiWD/Tdz9SXdf6e4rgUuADuD3cUX+vv+8u29OQsySIrbUtmEG52oZGRkDJVNynN0Hj/KzjTV89KJFnH7K5LDDERnJauDeYPte4LoRyl8P/M7dO8YzKEkPW+paWTa7mOLCUc1hLTIoJVPyJu7ON367g4kFuXz+8uVhhyMyGnPcvSHYPgDMGaH8GuD+Ace+ZWZbzey7ZlY42EVmdouZVZhZRWNj4xhDllTg7myubdXixjJmSqbkTZ7a1cizew7zPy5dpqVjJGWY2eNm9sogj9Xx5dzdAR/meUqBc4itNdrvduAM4EJgBvClwa5197XuXu7u5bNmzRrrW5IUUNfSSXN7jwafy5ipXVNe1xuJ8o1HdrB05iQ+8dbFYYcj8rrh1v80s4NmVuruDUGydGiYp/ow8LC798Y9d3+rVreZ/RfwdwkJWlLe73ccBOAtS2eEHImkO7VMyet+9nw1exvb+fJ7z6QgT78akjbWAzcF2zcBvx6m7I0M6OILEjDMzIiNt3ol8SFKqnF3HthUw8oF0zhttsaGytjoL6YAsakQvvf4Ht65bCaXnDE77HBETsSdwOVmtge4LNjHzMrN7Mf9hcxsMbAAeHrA9T83s23ANmAm8M1kBC3heqmmld0Hj7HmQs2jJ2Onbj4B4DuP7uJoVy9feV8ZsS/oIunB3ZuASwc5XgF8Km6/Cpg3SLlLxjM+SU0PbKphUkEu718xN+xQJAOoZUp4+OU67n+hhk+9cynL56i5W0Qy29GuXn6zpYH3r5jLJE2JIAmgZCrLbd/fxu2/2sZFS2bw91eeHnY4IiLj7jdbGujsjbBm1cKwQ5EMoWQqi7W09/Dp+15k+sQC7vro+VrMWESywrpNNZxxymRWzNcSMpIY+uuZpSJR53PrXubQkW5+8LELmFk86DyFIiIZZfv+NrbWtbHmwgUaHyoJo87iLPWvv9/Fs3sO8+2/OIeVmrBORLLEA5tqKcjL4brzjrsXQeSkqWUqCz2wqYYfPPUaH7loITdcqDEDIpIdunojPPxyPdecfQrTJmqFB0kctUxlEXfnP56o5LuP7+ady2bytfeXhR2SiEjSbNjWwNGuPn2JlIRTMpUl+iJRvvLrV7j/hVo+eP487vzguZrlXESyhrtz/ws1LC6ZqOVjJOGUTGWB9u4+bv3FSzy5q5Fb33MaX7xiuQZeikhWuftPVWyqauHr79fExJJ4SqYyXOWhY3z+gc1s39/Gtz5wNh+9aFHYIYmIJNWfKg/zzxt2ckXZHC3iLuNCyVSG6uyJ8H+e3MPaZ/ZSlJ/L2o+Xc1nZnLDDEhFJqtrmDm79xUssnTmJf7thJTk5apWSxFMylYGe2HmQr63fTl1LJx88bx63X3MmsyZrHikRyS4dPX3cct+LRKLOjz5RTrGWjpFxot+sDBGNOs9WHubuP+7j6d2NnDa7mHW3vIW3LC0JOzQRkaRzd/7hoa3sOnCEuz95IYtnTgo7JMlgSqbSXGtHDw+9WMfPnq+mqqmDmcUF3Hb1GfzV25fobj0RyUrt3X38y6O7+O3WBm67+gzeffrssEOSDKdkKg01tHXy9K5Gnt7dyJO7DtHVG6V80XQ+f/lyrj67VEmUiGSlaNT5f5vr+fZ/v8rBI918/C2L+PTFS8MOS7KAkqkUF406VU3t7Gg4wpbaVp7ZfZhdB48CUDq1iOsvmM9HVi2ibO6UkCMVEQnPSzUt/M/f7GBLbSsr5k/l+x+9gAsWTQ87LMkSo0qmzOwq4N+BXODH7n7ngPOFwE+BC4Am4AZ3r0psqJnL3Wlu76G2pZO6lg5qmzupbelg14Gj7Gw4QkdPBICC3BwuXDKd6y84k3edPotls4s1X4pkPTP7EPB14ExglbtXDFFu0HrMzJYA64AS4EXg4+7ek4TQZQzaOnvZuLeJ515r4rnXDrP74DFmTy7kf39oBR84b57u2pOkGjGZMrNc4C7gcqAO2GRm6919R1yxm4EWdz/NzNYA3wZuGI+Aw+TuRKJOX/CIRJzuSITeiNPTF6U3EqW7N0pnbyT26InQ1RvhWHcfR7v6ONbdy9GuPo509tLU3kPTsR6a2rtpbu+hN+Jveq1pE/NZPnsyHy5fQNncKZSVTmHZnGIK83JDevciKesV4IPAD4cqMEI99m3gu+6+zsz+k1h99oPxD1sGikSd7r5YndneHaE9qDub2rvZ39rJ/tYu9rd2UtvSya4DR4g6FOXncOHiGXy4fAE3rlrIJN2xJyEYzW/dKqDS3fcCmNk6YDUQn0ytJvbNEOAh4P+Ymbn7mzOEk1Db3MFf3rMJiCUz/QZ9Yn/juLvjQP8ljuP+xn7Ug/3geNRj10TdiXqsey3qTsSdaBQiQSI1Frk5RnFhHpOL8igpLqR0ahFnzZ1CSXEhsycXsmDGRBbMmMC8aROYXJQ/ptcSyRbuvhMYqZV20HrMzHYClwAfCcrdS6wuS1gy9dn7X+bVhiOJerqEOdnabLBq3eM2BquD++tbiCVMEfc3vpxGnJ5I7MvoSFXspIJc5k6bwNxpE7iibBlvO7WElQun6UumhG40ydQ8oDZuvw64aKgy7t5nZm3EmswPxxcys1uAWwAWLhzdQpOFeTmcPmdy3JMMuhn/Gq8fN4uV6a9kLfhPTlAmdt4wg5wcIyfuXE6OkWsWHDdycyA3J4f8HCM318jLMXJzcijIy6EwN4f8PKMgN5fCvBwmFORSlJ9DUX4uRfm5TC7Mo7gojwn5ueqWEwnHUPVYCdDq7n1xx+cN9gQnU38BLJg+gUg0ehIhjz8btBYd1YVDHhpYB/fXqf11b47FvljGto381+vPHPJzY3XqpMI8igtzmVSQR3FhHjOKCyidOoEpRXmqQyUlJbU91N3XAmsBysvLR/XFaPaUIu766PnjGpeIpDYzexw4ZZBTX3b3XycjhpOpvwD+4aozxi0mEUkNo0mm6oEFcfvzg2ODlakzszxgKrGB6CIiY+bul43xKYaqx5qAaWaWF7RODVa/iYgMazQTEm0ClpnZEjMrANYA6weUWQ/cFGxfD/whEeOlREQSZNB6LKinniRWb0GsHktKS5eIZI4Rk6ng29qtwKPATuBBd99uZneY2bVBsZ8AJWZWCXwBuG28AhYRiWdmHzCzOuCtwCNm9mhwfK6ZbYCh67HgKb4EfCGov0qI1WciIqNmYTUglZeXe0XFoNPBiEiGMrMX3b087DjGSvWXSPYZrv7SuiMiIiIiY6BkSkRERGQMlEyJiIiIjIGSKREREZExCG0Aupk1AtUncMlMBsyonoGy4T2C3mcmOdH3uMjdZ41XMMmi+mtI2fA+s+E9gt7nYIasv0JLpk6UmVVkwl1Aw8mG9wh6n5kkG95jImTL/6dseJ/Z8B5B7/NEqZtPREREZAyUTImIiIiMQTolU2vDDiAJsuE9gt5nJsmG95gI2fL/KRveZza8R9D7PCFpM2ZKREREJBWlU8uUiIiISMpRMiUiIiIyBimdTJnZh8xsu5lFzax8wLnbzazSzHaZ2ZVhxZhoZvZ1M6s3s83B45qwY0oUM7sq+Lwqzey2sOMZL2ZWZWbbgs8vY1bDNbO7zeyQmb0Sd2yGmT1mZnuCn9PDjDHVZFsdlsn1F6gOS2fjXX+ldDIFvAJ8EHgm/qCZlQFrgLOAq4Dvm1lu8sMbN99195XBY0PYwSRC8PncBVwNlAE3Bp9jpnpP8Pll0jwt9xD79xbvNuAJd18GPBHsyxuysQ7LuPoLVIdlgHsYx/orpZMpd9/p7rsGObUaWOfu3e6+D6gEViU3OjlBq4BKd9/r7j3AOmKfo6QJd38GaB5weDVwb7B9L3BdMmNKdarDMorqsDQ23vVXSidTw5gH1Mbt1wXHMsWtZrY1aJbMlG6TTP/M4jnwezN70cxuCTuYcTbH3RuC7QPAnDCDSSOZ/O8hE+svyOzPbKBsqcMSVn/lJSaek2dmjwOnDHLqy+7+62THkwzDvWfgB8A3iP0yfwP438BfJS86SYB3uHu9mc0GHjOzV4NvRRnN3d3Msm6ulWyrw1R/ZYWsq8PGWn+Fnky5+2UncVk9sCBuf35wLC2M9j2b2Y+A345zOMmS1p/ZiXD3+uDnITN7mFj3QKZWRAfNrNTdG8ysFDgUdkDJlm11WJbWX5DGn9mJyqI6LGH1V7p2860H1phZoZktAZYBL4QcU0IEH2i/DxAbwJoJNgHLzGyJmRUQG3y7PuSYEs7MJpnZ5P5t4Aoy5zMczHrgpmD7JiDjWmLGSUbWYRlcf4HqsEyUsPor9Jap4ZjZB4D/D5gFPGJmm939SnffbmYPAjuAPuAz7h4JM9YE+o6ZrSTWTF4FfDrUaBLE3fvM7FbgUSAXuNvdt4cc1niYAzxsZhD79/ULd//vcENKDDO7H3g3MNPM6oCvAXcCD5rZzUA18OHwIkw9WViHZWT9BarDwg1p7Ma7/tJyMiIiIiJjkK7dfCIiIiIpQcmUiIiIyBgomRIREREZAyVTIiIiImOgZEpERERkDJRMiYiIiIyBkikRERGRMfj/AbZzy5peSV7cAAAAAElFTkSuQmCC\n",
109 | "text/plain": [
110 | ""
111 | ]
112 | },
113 | "metadata": {
114 | "needs_background": "light"
115 | },
116 | "output_type": "display_data"
117 | }
118 | ],
119 | "source": [
120 | "plt.figure(figsize=[10, 4])\n",
121 | "x = np.linspace(-10, 10)\n",
122 | "\n",
123 | "plt.subplot(1, 2, 1)\n",
124 | "plt.plot(x, sigmoid(x))\n",
125 | "plt.title('sigmoid')\n",
126 | "\n",
127 | "plt.subplot(1, 2, 2)\n",
128 | "plt.plot(x, tanh(x))\n",
129 | "plt.title('tanh')"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "Notice that the only difference of these functions is the scale of y"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "# Formula of Batch Training\n",
144 | "---\n",
145 | "The above shows the formula of a single input vector, however in actual training processes, a batch is trained instead of 1 at a time. The change applied in the formula is trivial, we just need to replace the single vector $x$ with a matrix $X$ with size $n \\times m$, where $n$ is number of features and $m$ is the the batch size -- samples are stacked column wise, and the following result matrix are applied likewise.\n",
146 | "\n",
147 | "$$ Z^{[1]} = W^{[1]}X + b^{[1]} \\tag5 $$\n",
148 | "\n",
149 | "$$ A^{[1]} = \\tanh{Z^{[1]}} \\tag6 $$\n",
150 | "\n",
151 | "$$ Z^{[2]} = W^{[2]}A^{[1]} + b^{[2]} \\tag7 $$\n",
152 | "\n",
153 | "$$ \\hat{Y} = A^{[2]} = \\sigma({Z^{[2]}}) \\tag8 $$\n",
154 | "\n",
155 | "$$ J(W^{[1]}, b^{[1]}, W^{[2]}, b^{[2]}) = \\frac{1}{m} \\sum_{i}^{m}L(y^{(i)}, \\hat{y}^{(i)}) \\tag9 $$\n",
156 | "\n",
157 | "For the dimension of each matrix taken in this example, we have:\n",
158 | "\n",
159 | "- $X$ has dimension $2 \\times m$, as here there are 2 features and $m$ is the batch size\n",
160 | "- $W^{[1]}$ in the case above would have dimension $4 \\times 2$, with each $ith$ row is the weight of node $i$\n",
161 | "- $b^{[1]}$ has dimension $4 \\times 1$\n",
162 | "- $Z^{[1]}$ and $A^{[1]}$ both have dimension $4 \\times m$\n",
163 | "- $W^{[2]}$ has dimension $1 \\times 4$\n",
164 | "- consequently, $Z^{[2]}$ and $A^{[2]}$ would have dimension $1 \\times m$\n",
165 | "\n",
166 | "Same as logistic regression, for batch training, the average loss for all training samples.\n",
167 | "\n",
168 | "This is all for the forward propagation. To activate our neurons to learn, we need to get derivative of weight parameters and update them use gradient descent.\n",
169 | "\n",
170 | "But now it is enough for us to implement the forward propagation first."
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "# Generate Sample Dataset\n",
178 | "---\n",
179 | "Here we generate a simple binary classification task with 5000 data points and 20 features for later model validation."
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 4,
185 | "metadata": {},
186 | "outputs": [
187 | {
188 | "name": "stdout",
189 | "output_type": "stream",
190 | "text": [
191 | "train shape (4000, 20)\n",
192 | "test shape (1000, 20)\n"
193 | ]
194 | }
195 | ],
196 | "source": [
197 | "from sklearn import datasets\n",
198 | "\n",
199 | "\n",
200 | "X, y = datasets.make_classification(n_samples=5000, random_state=123)\n",
201 | "\n",
202 | "X_train, X_test = X[:4000], X[4000:]\n",
203 | "y_train, y_test = y[:4000], y[4000:]\n",
204 | "\n",
205 | "print('train shape', X_train.shape)\n",
206 | "print('test shape', X_test.shape)"
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "metadata": {},
212 | "source": [
213 | "# Weights Initialization\n",
214 | "---\n",
215 | "Our neural network has 1 hidden layer and 2 layers in total(hidden layer + output layer), so there are 4 weight matrices to initialize ($W^{[1]}, b^{[1]}$ and $W^{[2]}, b^{[2]}$). Notice that the weights are initialized relatively small so that the gradients would be higher thus learning faster in the beginning phase."
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 5,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "def init_weights(n_input, n_hidden, n_output):\n",
225 | " params = {}\n",
226 | " params['W1'] = np.random.randn(n_hidden, n_input) * 0.01\n",
227 | " params['b1'] = np.zeros((n_hidden, 1))\n",
228 | " params['W2'] = np.random.randn(n_output, n_hidden) * 0.01\n",
229 | " params['b2'] = np.zeros((n_output, 1))\n",
230 | " \n",
231 | " return params"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": 6,
237 | "metadata": {
238 | "scrolled": true
239 | },
240 | "outputs": [
241 | {
242 | "name": "stdout",
243 | "output_type": "stream",
244 | "text": [
245 | "W1 shape (10, 20)\n",
246 | "b1 shape (10, 1)\n",
247 | "W2 shape (1, 10)\n",
248 | "b2 shape (1, 1)\n"
249 | ]
250 | }
251 | ],
252 | "source": [
253 | "params = init_weights(20, 10, 1)\n",
254 | "\n",
255 | "print('W1 shape', params['W1'].shape)\n",
256 | "print('b1 shape', params['b1'].shape)\n",
257 | "print('W2 shape', params['W2'].shape)\n",
258 | "print('b2 shape', params['b2'].shape)"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "# Forward Propagation\n",
266 | "---\n",
267 | "Let's implement the forward process following equations $(5) \\sim (8)$."
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 7,
273 | "metadata": {},
274 | "outputs": [],
275 | "source": [
276 | "def forward(X, params):\n",
277 | " \"\"\"\n",
278 | " X: need to have shape (n_features x m_samples)\n",
279 | " \"\"\"\n",
280 | " W1, b1, W2, b2 = params['W1'], params['b1'], params['W2'], params['b2']\n",
281 | " A0 = X\n",
282 | " \n",
283 | " cache = {}\n",
284 | " Z1 = np.dot(W1, A0) + b1\n",
285 | " A1 = tanh(Z1)\n",
286 | " Z2 = np.dot(W2, A1) + b2\n",
287 | " A2 = sigmoid(Z2)\n",
288 | " \n",
289 | " cache['Z1'] = Z1\n",
290 | " cache['A1'] = A1\n",
291 | " cache['Z2'] = Z2\n",
292 | " cache['A2'] = A2\n",
293 | " return cache"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 8,
299 | "metadata": {},
300 | "outputs": [
301 | {
302 | "name": "stdout",
303 | "output_type": "stream",
304 | "text": [
305 | "Z1 shape (10, 100)\n",
306 | "A1 shape (10, 100)\n",
307 | "Z2 shape (1, 100)\n",
308 | "A2 shape (1, 100)\n"
309 | ]
310 | }
311 | ],
312 | "source": [
313 | "# get 100 samples\n",
314 | "inp = X[:100].T\n",
315 | "\n",
316 | "cache = forward(inp, params)\n",
317 | "\n",
318 | "print('Z1 shape', cache['Z1'].shape)\n",
319 | "print('A1 shape', cache['A1'].shape)\n",
320 | "print('Z2 shape', cache['Z2'].shape)\n",
321 | "print('A2 shape', cache['A2'].shape)"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | "# Loss Function\n",
329 | "---\n",
330 | "Following equation $(9)$, let's calculate the loss of each batch."
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 9,
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "def loss(Y, Y_hat):\n",
340 | " \"\"\"\n",
341 | " Y: vector of true value\n",
342 | " Y_hat: vector of predicted value\n",
343 | " \"\"\"\n",
344 | " assert Y.shape[0] == 1\n",
345 | " assert Y.shape == Y_hat.shape\n",
346 | " m = Y.shape[1]\n",
347 | " s = Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)\n",
348 | " loss = -np.sum(s) / m\n",
349 | " return loss"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 10,
355 | "metadata": {},
356 | "outputs": [
357 | {
358 | "name": "stdout",
359 | "output_type": "stream",
360 | "text": [
361 | "loss 1.4237153959578408\n"
362 | ]
363 | }
364 | ],
365 | "source": [
366 | "Y = np.array([np.random.choice([0, 1]) for i in range(10)]).reshape(1, -1)\n",
367 | "Y_hat = np.random.uniform(0, 1, 10).reshape(1, -1)\n",
368 | "\n",
369 | "l = loss(Y, Y_hat)\n",
370 | "print(f'loss {l}')"
371 | ]
372 | },
373 | {
374 | "cell_type": "markdown",
375 | "metadata": {},
376 | "source": [
377 | "# Back Propagation\n",
378 | "---\n",
379 | "Now it comes to the back propagation which is the key to our weights update. Given the loss function $L$ we defined above, we have gradients as follows:\n",
380 | "\n",
381 | "$$ dZ^{[2]} = A^{[2]} - Y \\tag1 $$\n",
382 | "\n",
383 | "$$ dW^{[2]} = \\frac{1}{m}dZ^{[2]}A^{[1]^T} \\tag2 $$\n",
384 | "\n",
385 | "$$ db^{[2]} = \\frac{1}{m}np.sum(dZ^{[2]}, axis=1, keepdims=True) \\tag3 $$\n",
386 | "\n",
387 | "$$ dZ^{[1]} = W^{[2]T}dZ^{[2]} * (1 - Z^{[1]^2}) \\tag4 $$\n",
388 | "\n",
389 | "$$ dW^{[1]} = \\frac{1}{m}dZ^{[1]}X^{T} \\tag5 $$\n",
390 | "\n",
391 | "$$ db^{[1]} = \\frac{1}{m}np.sum(dZ^{[1]}, axis=1, keepdims=True) \\tag6 $$"
392 | ]
393 | },
394 | {
395 | "cell_type": "markdown",
396 | "metadata": {},
397 | "source": [
398 | "In equation $(4)$ is element-wise multiplication, and the gradient of $\\tanh{x}$ is $1 - x^2$. You can try to deduct the equation above by yourself, but I basically took it from internet.\n",
399 | "\n",
400 | "Let's break down the shape of each element, given number of each layer equals `(n_x, n_h, n_y)` and batch size equals `m`:\n",
401 | "\n",
402 | "- $A^{[2]}$, $Y$ and $dZ^{[2]}$ has shape `(n_y, m)`\n",
403 | "- Because $A^{[1]}$ has shape `(n_h, m)`, $dW^{[2]}$ would have shape `(n_y, n_h)`\n",
404 | "- $db^{[2]}$ has shape `(n_y, 1)`\n",
405 | "\n",
406 | "- Because $dZ^{[2]}$ has shape `(n_y, m)`, $W^{[2]}$ has shape`(n_y, n_h)`, $dZ^{[1]}$ would have shape `(n_h, m)`\n",
407 | "- In equation $(5)$, $X$ has shape `(n_x, m)`, so $dW^{[1]}$ has shape `(n_h, n_x)`\n",
408 | "- $db^{[1]}$ has shape `(n_h, 1)`\n",
409 | "\n",
410 | "\n",
411 | "Once we understand the formula, implementation should come with ease."
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": 11,
417 | "metadata": {},
418 | "outputs": [],
419 | "source": [
420 | "def backward(params, cache, X, Y):\n",
421 | " \"\"\"\n",
422 | " [From coursera deep-learning course]\n",
423 | " params: we initiate above with W1, b1, W2, b2\n",
424 | " cache: the intermediate caculation we saved with Z1, A1, Z2, A2\n",
425 | " X: shape of (n_x, m)\n",
426 | " Y: shape (n_y, m)\n",
427 | " \"\"\"\n",
428 | " \n",
429 | " m = X.shape[1]\n",
430 | "\n",
431 | " W1 = params['W1']\n",
432 | " W2 = params['W2']\n",
433 | " A1 = cache['A1']\n",
434 | " A2 = cache['A2']\n",
435 | "\n",
436 | " dZ2 = A2 - Y\n",
437 | " dW2 = (1 / m) * np.dot(dZ2, A1.T)\n",
438 | " db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)\n",
439 | " dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))\n",
440 | " dW1 = (1 / m) * np.dot(dZ1, X.T)\n",
441 | " db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)\n",
442 | "\n",
443 | " grads = {\"dW1\": dW1,\n",
444 | " \"db1\": db1,\n",
445 | " \"dW2\": dW2,\n",
446 | " \"db2\": db2}\n",
447 | "\n",
448 | " return grads"
449 | ]
450 | },
451 | {
452 | "cell_type": "markdown",
453 | "metadata": {},
454 | "source": [
455 | "# Batch Loader\n",
456 | "---\n",
457 | "Now let's ensemble everything into a class."
458 | ]
459 | },
460 | {
461 | "cell_type": "code",
462 | "execution_count": 12,
463 | "metadata": {},
464 | "outputs": [],
465 | "source": [
466 | "class ShallowNN:\n",
467 | " def __init__(self, n_input, n_hidden, n_output):\n",
468 | " self.n_input = n_input\n",
469 | " self.n_hidden = n_hidden\n",
470 | " self.n_output = n_output\n",
471 | " self.params = {}\n",
472 | " self.cache = {}\n",
473 | " self.grads = {}\n",
474 | " \n",
475 | " def compute_loss(self, Y, Y_hat):\n",
476 | " \"\"\"\n",
477 | " Y: vector of true value\n",
478 | " Y_hat: vector of predicted value\n",
479 | " \"\"\"\n",
480 | " assert Y.shape[0] == 1\n",
481 | " assert Y.shape == Y_hat.shape\n",
482 | " m = Y.shape[1]\n",
483 | " s = Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)\n",
484 | " loss = -np.sum(s) / m\n",
485 | " return loss\n",
486 | " \n",
487 | " \n",
488 | " def init_weights(self):\n",
489 | " self.params['W1'] = np.random.randn(self.n_hidden, self.n_input) * 0.01\n",
490 | " self.params['b1'] = np.zeros((self.n_hidden, 1))\n",
491 | " self.params['W2'] = np.random.randn(self.n_output, self.n_hidden) * 0.01\n",
492 | " self.params['b2'] = np.zeros((self.n_output, 1))\n",
493 | " \n",
494 | " \n",
495 | " def forward(self, X):\n",
496 | " \"\"\"\n",
497 | " X: need to have shape (n_features x m_samples)\n",
498 | " \"\"\"\n",
499 | " W1, b1, W2, b2 = self.params['W1'], self.params['b1'], self.params['W2'], self.params['b2']\n",
500 | " A0 = X\n",
501 | "\n",
502 | " Z1 = np.dot(W1, A0) + b1\n",
503 | " A1 = tanh(Z1)\n",
504 | " Z2 = np.dot(W2, A1) + b2\n",
505 | " A2 = sigmoid(Z2)\n",
506 | "\n",
507 | " self.cache['Z1'] = Z1\n",
508 | " self.cache['A1'] = A1\n",
509 | " self.cache['Z2'] = Z2\n",
510 | " self.cache['A2'] = A2\n",
511 | " \n",
512 | " \n",
513 | " def backward(self, X, Y):\n",
514 | " \"\"\"\n",
515 | " [From coursera deep-learning course]\n",
516 | " params: we initiate above with W1, b1, W2, b2\n",
517 | " cache: the intermediate caculation we saved with Z1, A1, Z2, A2\n",
518 | " X: shape of (n_x, m)\n",
519 | " Y: shape (n_y, m)\n",
520 | " \"\"\"\n",
521 | "\n",
522 | " m = X.shape[1]\n",
523 | "\n",
524 | " W1 = self.params['W1']\n",
525 | " W2 = self.params['W2']\n",
526 | " A1 = self.cache['A1']\n",
527 | " A2 = self.cache['A2']\n",
528 | "\n",
529 | " dZ2 = A2 - Y\n",
530 | " dW2 = (1 / m) * np.dot(dZ2, A1.T)\n",
531 | " db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)\n",
532 | " dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))\n",
533 | " dW1 = (1 / m) * np.dot(dZ1, X.T)\n",
534 | " db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)\n",
535 | "\n",
536 | " self.grads = {\"dW1\": dW1,\n",
537 | " \"db1\": db1,\n",
538 | " \"dW2\": dW2,\n",
539 | " \"db2\": db2}\n",
540 | "\n",
541 | " \n",
542 | " def get_batch_indices(self, X_train, batch_size):\n",
543 | " n = X_train.shape[0]\n",
544 | " indices = [range(i, i+batch_size) for i in range(0, n, batch_size)]\n",
545 | " return indices\n",
546 | " \n",
547 | " \n",
548 | " def update_weights(self, lr):\n",
549 | " W1, b1, W2, b2 = self.params['W1'], self.params['b1'], self.params['W2'], self.params['b2']\n",
550 | " dW1, db1, dW2, db2 = self.grads['dW1'], self.grads['db1'], self.grads['dW2'], self.grads['db2']\n",
551 | " self.params['W1'] -= dW1\n",
552 | " self.params['W2'] -= dW2\n",
553 | " self.params['b1'] -= db1\n",
554 | " self.params['b2'] -= db2\n",
555 | " \n",
556 | " \n",
557 | " def fit(self, X_train, y_train, batch_size=32, n_iterations=100, lr=0.01):\n",
558 | " self.init_weights()\n",
559 | " \n",
560 | " indices = self.get_batch_indices(X_train, batch_size)\n",
561 | " for i in range(n_iterations):\n",
562 | " for ind in indices:\n",
563 | " X = X_train[ind, :].T\n",
564 | " Y = y_train[ind].reshape(1, batch_size)\n",
565 | " \n",
566 | " self.forward(X)\n",
567 | " self.backward(X, Y)\n",
568 | " self.update_weights(lr)\n",
569 | " \n",
570 | " if i % 10 == 0:\n",
571 | " Y_hat = self.cache['A2']\n",
572 | " loss = self.compute_loss(Y, Y_hat)\n",
573 | " print(f'iteration {i}: loss {loss}')\n",
574 | " \n",
575 | " \n",
576 | " def predict(self, X):\n",
577 | " W1, b1, W2, b2 = self.params['W1'], self.params['b1'], self.params['W2'], self.params['b2']\n",
578 | " A0 = X\n",
579 | "\n",
580 | " Z1 = np.dot(W1, A0) + b1\n",
581 | " A1 = tanh(Z1)\n",
582 | " Z2 = np.dot(W2, A1) + b2\n",
583 | " A2 = sigmoid(Z2)\n",
584 | "\n",
585 | " return A2\n",
586 | "\n",
587 | " \n",
588 | "def accuracy(Y, Y_pred):\n",
589 | " \"\"\"\n",
590 | " Y: vector of true value\n",
591 | " Y_pred: vector of predicted value\n",
592 | " \"\"\"\n",
593 | " def _to_binary(x):\n",
594 | " return 1 if x > .5 else 0\n",
595 | "\n",
596 | " assert Y.shape[0] == 1\n",
597 | " assert Y.shape == Y_pred.shape\n",
598 | " Y_pred = np.vectorize(_to_binary)(Y_pred)\n",
599 | " acc = float(np.dot(Y, Y_pred.T) + np.dot(1 - Y, 1 - Y_pred.T))/Y.size\n",
600 | " return acc"
601 | ]
602 | },
603 | {
604 | "cell_type": "code",
605 | "execution_count": 13,
606 | "metadata": {},
607 | "outputs": [],
608 | "source": [
609 | "model = ShallowNN(20, 10, 1)"
610 | ]
611 | },
612 | {
613 | "cell_type": "code",
614 | "execution_count": 14,
615 | "metadata": {},
616 | "outputs": [
617 | {
618 | "name": "stdout",
619 | "output_type": "stream",
620 | "text": [
621 | "iteration 0: loss 0.19575153437513237\n",
622 | "iteration 10: loss 0.08698150022188056\n",
623 | "iteration 20: loss 0.07983220808062544\n",
624 | "iteration 30: loss 0.07437750278137427\n",
625 | "iteration 40: loss 0.06677985931984107\n",
626 | "iteration 50: loss 0.05925402693910988\n",
627 | "iteration 60: loss 0.054844001823287386\n",
628 | "iteration 70: loss 0.0523565446212034\n",
629 | "iteration 80: loss 0.051366335822876\n",
630 | "iteration 90: loss 0.050606347795966344\n",
631 | "iteration 100: loss 0.04997955343968667\n",
632 | "iteration 110: loss 0.04945583968865451\n",
633 | "iteration 120: loss 0.04878268474552334\n",
634 | "iteration 130: loss 0.04795982774874325\n",
635 | "iteration 140: loss 0.047676519502507106\n",
636 | "iteration 150: loss 0.04813515615412707\n",
637 | "iteration 160: loss 0.04845653653570918\n",
638 | "iteration 170: loss 0.04849591579686214\n",
639 | "iteration 180: loss 0.04844944650406025\n",
640 | "iteration 190: loss 0.048350205002389776\n"
641 | ]
642 | }
643 | ],
644 | "source": [
645 | "model.fit(X_train, y_train, batch_size=100, n_iterations=200, lr=0.01)"
646 | ]
647 | },
648 | {
649 | "cell_type": "code",
650 | "execution_count": 15,
651 | "metadata": {},
652 | "outputs": [
653 | {
654 | "name": "stdout",
655 | "output_type": "stream",
656 | "text": [
657 | "accuracy: 95.1%\n"
658 | ]
659 | }
660 | ],
661 | "source": [
662 | "y_preds = model.predict(X_test.T)\n",
663 | "\n",
664 | "acc = accuracy(y_test.reshape(1, -1), y_preds)\n",
665 | "print(f'accuracy: {acc*100}%')"
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "execution_count": null,
671 | "metadata": {},
672 | "outputs": [],
673 | "source": []
674 | }
675 | ],
676 | "metadata": {
677 | "kernelspec": {
678 | "display_name": "Python 3",
679 | "language": "python",
680 | "name": "python3"
681 | },
682 | "language_info": {
683 | "codemirror_mode": {
684 | "name": "ipython",
685 | "version": 3
686 | },
687 | "file_extension": ".py",
688 | "mimetype": "text/x-python",
689 | "name": "python",
690 | "nbconvert_exporter": "python",
691 | "pygments_lexer": "ipython3",
692 | "version": "3.8.3"
693 | }
694 | },
695 | "nbformat": 4,
696 | "nbformat_minor": 4
697 | }
698 |
--------------------------------------------------------------------------------
/tensorflow/tf-hands-on.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%matplotlib inline\n",
10 | "\n",
11 | "import tensorflow as tf\n",
12 | "import numpy as np\n",
13 | "import matplotlib.pyplot as plt"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 2,
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "name": "stdout",
23 | "output_type": "stream",
24 | "text": [
25 | "training size (60000, 28, 28)\n",
26 | "test size (10000, 28, 28)\n"
27 | ]
28 | }
29 | ],
30 | "source": [
31 | "minist = tf.keras.datasets.mnist\n",
32 | "\n",
33 | "(x_train, y_train), (x_test, y_test) = minist.load_data()\n",
34 | "x_train, x_test = x_train/255., x_test/255.\n",
35 | "\n",
36 | "print('training size', x_train.shape)\n",
37 | "print('test size', x_test.shape)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 3,
43 | "metadata": {},
44 | "outputs": [
45 | {
46 | "data": {
47 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAIKCAYAAAB7ptYOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA8i0lEQVR4nO3deZhcZZn38d+dprMQtiwQQghrEjCgJtIsAgoIOsirBASBjDiRYYwo+4CCODM6o86LI4KigAYJCYqgIyAZRRAwgspiAoYlCSQBEkkICTthSeh03+8fKedtuU+nq+up5Zzq7+e6+urqX5+q85zquzt3Tj31HHN3AQAApOjX6AEAAIDio6EAAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMmSGgozO9zMHjezJWZ2frUGBdQTdYyio4aRB1bpOhRm1iJpkaQPSlouaY6kye6+oHrDA2qLOkbRUcPIi00S7ruPpCXu/qQkmdn1kiZJ6raI+9sAH6jBCbtEX7ZGLz3v7ltX+WF7VcfUMFJQwyi6jdVwSkMxStLTXb5eLmnfjd1hoAZrXzs0YZfoy+7wny+rwcP2qo6pYaSghlF0G6vhlIaiLGY2VdJUSRqoTWu9O6DqqGEUHTWMekiZlLlC0uguX29fyv6Gu09z9zZ3b2vVgITdATXRYx1Tw8g5ahi5kNJQzJE01sx2NrP+kk6QNKs6wwLqhjpG0VHDyIWKX/Jw9/Vmdpqk2yS1SJru7vOrNjKgDqhjFB01jLxImkPh7rdIuqVKYwEagjpG0VHDyANWygQAAMloKAAAQDIaCgAAkIyGAgAAJKOhAAAAyWgoAABAMhoKAACQjIYCAAAko6EAAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMmSrjaK2lv/gb1CtvJz60L20HtnZt7/3fdOCdl2l/UPWcvsBysYHQAAG3CGAgAAJKOhAAAAyWgoAABAsqQ5FGa2VNIaSR2S1rt7WzUGBdQTdYyio4aRB9WYlHmIuz9fhcfp8zoPmhiyS6d/L2RjWuOPrbObx/zze68O2eNtHSH7/E779TzA5kYdF9zrx+4bsm/81xUh++px/xAyn/toTcZUZ9RwTj3xzfeGbOHfx7/trdYSsvd/bmrIBv3iT9UZWJXxkgcAAEiW2lC4pN+Y2QNmFtsooBioYxQdNYyGS33J40B3X2Fm20i63cwec/e7u25QKu6pkjRQmybuDqiJjdYxNYwCoIbRcElnKNx9Renzakk3SdonY5tp7t7m7m2tGpCyO6Ameqpjahh5Rw0jDyo+Q2FmgyX1c/c1pdsfkvQfVRtZk2v/UJyE/YXLfxSyca1xVcvOjCmYT7a3Z+7nlc74x2Nixt+TdR/eO2SDZj8S9712beZ+iioPdfzmpNCH681hcXLW0On31mM4hbW6Lf7/6KtLP9qAkdRXHmoYGzx79v6Z+e+O/6+QtXv8257JU0ZUXykveYyQdJOZ/fVxfuLut1ZlVED9UMcoOmoYuVBxQ+HuT0p6dxXHAtQddYyio4aRF7xtFAAAJKOhAAAAybh8eZW1bLFFyF5//+4hO/uSn4TskEGvZTxieT3fjJeyJwPdeXlcoe2PX7k0ZLf/8PshG//j00K2y3lMDKy2Z94ff8ab7vpy3HB67cdSGP3ipFXf4c2QHbrNYyG707J/V4BUr43OXrN4aL8yJ2AWHGcoAABAMhoKAACQjIYCAAAko6EAAADJmJRZZcuvGRWyOXtfVvP9/sc2czLzWzeLE9BOWvqhkM3c6Y6QbTH+hfSBoUf//pH/Dtk3FsafEf6/ll13DNljB8VZqxP+dGLItpsTV4AFeuu1j+8bshuO/k43W1tIvv9ynKx/x3FxBeXBy+aHLHvqZ+NxhgIAACSjoQAAAMloKAAAQDIaCgAAkIxJmRVa/4G9MvPrJnwvZP1U3ippJy07NGRz73hHyB45Oe5j9psDMx9zm7lx9cAlL8XJQK3/OTtk/eI8ItRAq61v9BAKZ5MfvlHWdm8+EVeuBXpr7Uf2CdmX/2+cBDyutfw/mjOvPDxk2y64p3cDyxnOUAAAgGQ0FAAAIBkNBQAASEZDAQAAkvU4KdPMpkv6iKTV7r5nKRsq6aeSdpK0VNJx7v5S7YbZWJ0HTQzZpdPjxEhJGtMan9LOjHXNjnzs6JC1HPt6yLb6Px6y8T+KlxUfd9nTmePp9/SfQzbk93G79q93hOyGd8VJR/94yBkha5n9YOa+8yQPddx54ITM/H0D/1CrXTatnQaXt4rr6DtiXRdVHmq4r1p54tqQHTIoZlJL5v2nLD0sZNt+p9gTMLOUc4ZihqS3T0c9X9Kd7j5W0p2lr4E8myHqGMU2Q9QwcqzHhsLd75b04tviSZJmlm7PlHRUdYcFVBd1jKKjhpF3lc6hGOHuK0u3n5U0okrjAeqJOkbRUcPIjeRJme7ukuIL/SVmNtXM5prZ3HatS90dUBMbq2NqGEVADaPRKm0oVpnZSEkqfV7d3YbuPs3d29y9rVUDKtwdUBNl1TE1jByjhpEblS69PUvSFEkXlj7fXLURNZjttUfInv/nuHz1uNbs5bQfyGj+f/va+JC9cP3okA176d6Qbfnj+2KWsd9aLN48oiX+4XnhrLjk8TZx1e6iqGsdL/vIoMx8m5ZNa7nbwttkpx1CduzQWWXdd9BT8Q0PzfO+D0lN/Le4UTbZflTI5r/v6pC1e6ykhe3Zj/mXi8eFbLDu7/3gcq7HMxRmdp2keyXtZmbLzexkbSjeD5rZYkmHlb4Gcos6RtFRw8i7Hs9QuPvkbr4Vr2QF5BR1jKKjhpF3rJQJAACS0VAAAIBklU7KbAr9No2T4db/16shu2/3G0P21Pq3Mh/zny84J2RDfv+XkG0zOE7GLsJksX1GLgvZ0voPo5A2GbOm7G3XPrZV7QZSME9/e3DIDhgQl7O/6tXt451fjr/PwF+17LFbyNp+8mjFj3f8jfHSBJK06w1xcn0z4gwFAABIRkMBAACS0VAAAIBkNBQAACBZn56U+eZBcVXM23a/vKz7/tOZZ2fmm/8iTr6pxSqWaG7bzI2TDouqZfiwkK06Jq4cOPS45Zn3v2vcVRnpwJBccdlRIdtm1T09jg9917IjY23+fNifM7ZsCcnfP/HRkI278InM/RRhwn01cIYCAAAko6EAAADJaCgAAEAyGgoAAJCsT0/KfNdX54WsX0aPddKyeO2dQb/4Uy2G1DCtFicdtXvcrsUyQlTdm0NjHcb1IsvX+b6JIfMWy9z26cPiZevf2i5el7lf/zjV7Dfv+27IWjN282xH3Me/Pnl05nhe7IwTVDftF/c94v64EinVir968aT3huymU76ZsWVrSE55+qCQtU+JNdzxXFwVuS/hDAUAAEhGQwEAAJLRUAAAgGQ9NhRmNt3MVpvZo12yr5jZCjObV/o4orbDBNJQxyg6ahh5V86kzBmSvifpmrfll7j7RVUfUY28/Mk4IedfRsThd6p/yB74zfiQ7aDmWoGv3eMkt07FyXC3LozPxVg9WJMxVdkMNbiO162Nk70kqTNj6uDVF1wSslmnTah43+cN+2HI+il7Uuab/lbInumI9fG95w4O2WF3nBWyrf4cf6dG/mZVyGxZ9kqZzy0cFLIRLXGSqM95JPP+TWSGmuBvcT1kXZb8nq99L2PLuOJqlnuX7xSy0Usrv8x5s+rxDIW73y3pxTqMBagZ6hhFRw0j71LmUJxmZg+XTsMNqdqIgPqijlF01DByodKG4gpJu0qaIGmlpG91t6GZTTWzuWY2t13rKtwdUBNl1TE1jByjhpEbFTUU7r7K3TvcvVPSlZL22ci209y9zd3bWhUXAgEapdw6poaRV9Qw8qSilTLNbKS7ryx9ebSk3M9OWR/ndWnLfnGy2L1r4y/bLtc8Ex+vKqOqvX6bbhqyxy7aM2PLB0LyiSc/HLLdz3wqZEW9NG+963jMiVmXRZb2+L+nhWz03iuquu/Zq+Plwp/79faZ2w6bHyc89r91TsaWcbtxmlvWeLJqZsV5+2duu/eAe0N2/WujytpPsyvi3+J6WHRB/LuXNfG8XDtcGDNWYY16bCjM7DpJB0sabmbLJX1Z0sFmNkEbntOlkj5TuyEC6ahjFB01jLzrsaFw98kZ8VU1GAtQM9Qxio4aRt6xUiYAAEhGQwEAAJL16cuXZ3mhY7OQrX9yaf0H0ktZky8l6fEL3xmyxybFFeN+/caWIXvmsjEh2/yl+yoYHTZm5y/GSYf1MFL5utTypu9/ruxt/2X2MSEbpz9VczgogM6DJmbmX2v7RcWP+cFHTwjZZnOZ61oOzlAAAIBkNBQAACAZDQUAAEhGQwEAAJLRUAAAgGS8y+Ntzv3jx0M2LmNZ6kbKmtm8+p/fzNx2YVt8R8ehjxwfssGHPxmyzcU7OpBPO97MwseQvj5jWma+Z2t59XHuyveHbMvJL4WsqJcXqDfOUAAAgGQ0FAAAIBkNBQAASEZDAQAAkvWdSZkWo34Z/dR3DrwuZJdpXC1GVJZl//HekN3wDxeHbFxr/8z7v+dPU0K23dEL0gcGAA02sX/2/4nbvbxplPde/Z6QbfPSPUlj6ss4QwEAAJLRUAAAgGQ0FAAAIFmPDYWZjTaz2Wa2wMzmm9mZpXyomd1uZotLn4fUfrhA71HDaAbUMfKunEmZ6yWd4+4Pmtnmkh4ws9slfUrSne5+oZmdL+l8SefVbqiJMhZO61RnyA4a9ELIzpqxV8h2vTreV5Jan10TslUHbR2yoccvD9npO9wZsg9vGlfpnPX6iJD9wyOHZ45n+A8GZ+Z9THPUcB/VYvH/PS+Naw3Ztr+ux2gaqk/X8dM/3zNkrTYv6TFH/u75kLEqZuV6PEPh7ivd/cHS7TWSFkoaJWmSpJmlzWZKOqpGYwSSUMNoBtQx8q5XcyjMbCdJEyXdL2mEu68sfetZSfG/zUDOUMNoBtQx8qjshsLMNpN0g6Sz3P3Vrt9zd1fmiwqSmU01s7lmNrdd65IGC6SghtEMKqljahj1UFZDYWat2lDA17r7jaV4lZmNLH1/pKTVWfd192nu3ububa0aUI0xA71GDaMZVFrH1DDqocdJmWZmkq6StNDduy7ROEvSFEkXlj7fXJMR1tlAi0/Jwg9+P2R/eN/AzPsvXrdtyE7acmnF4znzmfeF7NZ7JoRs7Jlcarw7fa2Gm02HZ0yA7oNveO9Lddx50MSQfXvCj0PW3YqYr3SuDdnevz4rZLsvY9XgairnXR4HSPqkpEfM/ndK7QXaULw/M7OTJS2TdFxNRgiko4bRDKhj5FqPDYW7/0GZV8KQJB1a3eEA1UcNoxlQx8i7PnjiEAAAVBsNBQAASNZnLl8+4ndxAv95n4mXBv/GtveW9XjvH/hWZn7gwKVl3f/P62IvN/muqSEbd1JcKXOsmICJvu2Nvd9o9BBQQ2uH9g/ZgQNfz9iyJfP+t72xQ8jGTZ0Tsuz1jlEpzlAAAIBkNBQAACAZDQUAAEhGQwEAAJL1mUmZHYueCNnij+8UsvGnnx6yBcd9N2nfu9/yuZDtdnmcVDbuz3ECJtDXZV2+HED+8JsKAACS0VAAAIBkNBQAACAZDQUAAEjWZyZlZln/5NKQjTk7ZkeevXfSfsYprtDmSY8INJ91d2ydmXdMYD3DvmaLec+G7PTlHwjZ90ffVY/hoEycoQAAAMloKAAAQDIaCgAAkIyGAgAAJOuxoTCz0WY228wWmNl8MzuzlH/FzFaY2bzSxxG1Hy7Qe9Qwio4aRhGU8y6P9ZLOcfcHzWxzSQ+Y2e2l713i7hfVbnhAVVDDBbDtJfdk5kdc8p6Q7aJ5NR5N7vSpGl7/1LKQLd8vbvcR7VWH0aBcPTYU7r5S0srS7TVmtlDSqFoPDKgWahhFRw2jCHo1h8LMdpI0UdL9peg0M3vYzKab2ZBqDw6oNmoYRUcNI6/KbijMbDNJN0g6y91flXSFpF0lTdCGzvlb3dxvqpnNNbO57VqXPmKgQtQwio4aRp6V1VCYWas2FPG17n6jJLn7KnfvcPdOSVdK2ifrvu4+zd3b3L2tVQOqNW6gV6hhFB01jLwr510eJukqSQvd/eIu+cgumx0t6dHqDw9IRw2j6KhhFEE57/I4QNInJT1iZvNK2QWSJpvZBG24LMVSSZ+pwfiAaqCGUXTUMHKvnHd5/EGSZXzrluoPB6g+ahhFRw2jCFgpEwAAJKOhAAAAyWgoAABAMhoKAACQjIYCAAAko6EAAADJaCgAAEAyc/f67czsOUl/vS7tcEnP123ntcWx1MeO7r51IwdADRdGXo+HGq6dZjoWKb/H020N17Wh+Jsdm81197aG7LzKOJa+qZmeq2Y6Fqn5jqdWmul5aqZjkYp5PLzkAQAAktFQAACAZI1sKKY1cN/VxrH0Tc30XDXTsUjNdzy10kzPUzMdi1TA42nYHAoAANA8eMkDAAAko6EAAADJ6t5QmNnhZva4mS0xs/Prvf9UZjbdzFab2aNdsqFmdruZLS59HtLIMZbLzEab2WwzW2Bm883szFJeyOOpF2o4P6jhyhW5jqnhfKprQ2FmLZIuk/RhSeMlTTaz8fUcQxXMkHT427LzJd3p7mMl3Vn6ugjWSzrH3cdL2k/SqaWfR1GPp+ao4dyhhivQBHU8Q9Rw7tT7DMU+kpa4+5Pu/pak6yVNqvMYkrj73ZJefFs8SdLM0u2Zko6q55gq5e4r3f3B0u01khZKGqWCHk+dUMM5Qg1XrNB1TA3nU70bilGSnu7y9fJSVnQj3H1l6fazkkY0cjCVMLOdJE2UdL+a4HhqiBrOKWq4V5qxjgv/My96DTMps8p8w/twC/VeXDPbTNINks5y91e7fq+Ix4M0RfyZU8Poqog/82ao4Xo3FCskje7y9falrOhWmdlISSp9Xt3g8ZTNzFq1oYivdfcbS3Fhj6cOqOGcoYYr0ox1XNifebPUcL0bijmSxprZzmbWX9IJkmbVeQy1MEvSlNLtKZJubuBYymZmJukqSQvd/eIu3yrk8dQJNZwj1HDFmrGOC/kzb6oadve6fkg6QtIiSU9I+lK991+F8V8naaWkdm143fFkScO0YRbuYkl3SBra6HGWeSwHasNptIclzSt9HFHU46nj80YN5+SDGk567gpbx9RwPj9YehsAACRjUiYAAEhGQwEAAJLRUAAAgGQ0FAAAIBkNBQAASEZDAQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkSQ2FmR1uZo+b2RIzO79agwLqiTpG0VHDyIOKLw5mZi3acKW6D2rD1d7mSJrs7gu6u09/G+ADNbii/QFr9NLz7r51NR+zt3VMDSMFNYyi21gNb5LwuPtIWuLuT0qSmV0vaZKkbhuKgRqsfe3QhF2iL7vDf76sBg/bqzqmhpGCGkbRbayGU17yGCXp6S5fLy9lQJFQxyg6ahi5kHKGoixmNlXSVEkaqE1rvTug6qhhFB01jHpIOUOxQtLoLl9vX8r+hrtPc/c2d29r1YCE3QE10WMdU8PIOWoYuZDSUMyRNNbMdjaz/pJOkDSrOsMC6oY6RtFRw8iFil/ycPf1ZnaapNsktUia7u7zqzYyoA6oYxQdNYy8SJpD4e63SLqlSmMBGoI6RtFRw8gDVsoEAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMloKAAAQDIaCgAAkIyGAgAAJKOhAAAAyWgoAABAMhoKAACQjIYCAAAko6EAAADJkq42CgBAESy6eq+QPfV3V4Xs4hd3Cdkdx7WFrGPBouoMrIlwhgIAACSjoQAAAMloKAAAQLKkORRmtlTSGkkdkta7e3yhCcg56hhFRw0jD6oxKfMQd3++Co8DNBJ1XCMtw4aGzLbcImR/OWa7zPuvHe4hG/PvD4Ws8403KhhdU6GGS1r22C1kNx9yWcjavTVkpw55PGQ/f9eHQrb5ggoH18R4yQMAACRLbShc0m/M7AEzm5q1gZlNNbO5Zja3XesSdwfUxEbrmBpGAVDDaLjUlzwOdPcVZraNpNvN7DF3v7vrBu4+TdI0SdrChsZzl0DjbbSOqWEUADWMhks6Q+HuK0qfV0u6SdI+1RgUUE/UMYqOGkYeVHyGwswGS+rn7mtKtz8k6T+qNjKgDqjjyvXbc/eQLf7ioJD94zvvCdk5w25L2vc7RpwSsrGfeiDpMYuKGs6w4tkQnbHohJDdvscN9RhNn5HykscISTeZ2V8f5yfufmtVRgXUD3WMoqOGkQsVNxTu/qSkd1dxLEDdUccoOmoYecHbRgEAQDIaCgAAkIzLl9fBW38XV8Fd9onOkH32PXeF7Kwh5V0i950/PD0z33RlfIfYy/vH96HveG3sLfvfNresfaO52N7vDNmSs1tC9rsDvxeyrVsGhKxfxv9bfvXGkJA9uW6bzPFkrVz4o/dfGbKv7j0lZD7nkczHRHPrePmVkC1bPjZuuEcdBtOHcIYCAAAko6EAAADJaCgAAEAyGgoAAJCMSZlV9twp7w3Zd78QL5vbNqAjZFmT16YsPSxkE7f8S8ge+qfvlDvEzP3sP3RyyIamLWaIHGnZeuuQLfrOqMxt/2f/y0O2S2u8zLMUJ2BmufrV0SH7xTEHhqxzQNY+pFN/GSdlZv3+vDkirtI5sJwBoum0jIgTfN/3jvImuKNynKEAAADJaCgAAEAyGgoAAJCMhgIAACRjUmYZrLV/yNYeln0tnhu++M2QbbdJnLx28rIPhmzZRbuFbPCv5oVs9qY7hOyum8Zlj2fsrMz87V6dNyxkQ8u6J4pgxYlxlcD5B3U3kTd7cmQ5fpw1AfOo/UPW8XicIGcTWbYQVbL54BAdMXROxQ+3ei8L2VYPx7+5HQv69sRPzlAAAIBkNBQAACAZDQUAAEjWY0NhZtPNbLWZPdolG2pmt5vZ4tLneOlAIEeoYxQdNYy8K2dS5gxJ35N0TZfsfEl3uvuFZnZ+6evzqj+8fFh5Wrz8+J/O7W5CW5yA+fElHw3Z+mPaQ7bp8/eHLF58XHpm6l4hu39s+Stl/vqNzUM25gdPh2x92Y9YCDPUh+t41JFLk+7/89e2DdnFiw4N2YgvxIrteHxxWft46Z1b9H5gfcsM9eEa7o2OJU+F7F/+5/iQHTM5rmKcZf7fXxqyia+cGbLRTMrcOHe/W9KLb4snSZpZuj1T0lHVHRZQXdQxio4aRt5VOodihLuvLN1+VtKIKo0HqCfqGEVHDSM3kidlursr+8y8JMnMpprZXDOb2651qbsDamJjdUwNowioYTRapQ3FKjMbKUmlz6u729Ddp7l7m7u3tZZ5dUKgTsqqY2oYOUYNIzcqbShmSZpSuj1F0s3VGQ5QV9Qxio4aRm70+C4PM7tO0sGShpvZcklflnShpJ+Z2cmSlkk6rpaDrKfF3903ZI9/7Lsh6+zm/u+4/ZSQ7X7u0pB1PP9Cb4f2v075bNrfjK99fUrIhjx9b9Jj5l1fq+Pg0/F/peNPPT1z09G3d4Rs8PxnQzZ8WZzRHu9ZvjdGxOWN8f/1+RpOtOu598Vwcv3H0cx6bCjcvbunPL5nDMgp6hhFRw0j71gpEwAAJKOhAAAAyWgoAABAsnKW3m5aT3xrv5A9/rG4FOsrnWtD9vHH/j7zMXc7PWOi2po1ZY2n3+DBIXvh2HeFbNJm34z31aDMx9z9v08N2ZgZzT0BE1HWUsRjzo5Zd+qxDHv73uX9ngDV0motIWvvdlUl9IQzFAAAIBkNBQAASEZDAQAAktFQAACAZH1mUmbLiG1CNvPoy0PWmbEGZtYEzP4fXJa5n+5W0Hy7fhPGh2zP6QtD9rURl2bcO656eMC8EzL3s9tX4mOmrGYI/NVf/m3/kK3fNGNGW9YCmBmbfWxs+ZOFT1t+cMgG3fpgObsB/le7x7+GWf8GoDycoQAAAMloKAAAQDIaCgAAkIyGAgAAJOszkzJtYJzI2DagvOmJg87oHx9vx9GZ2y4+ZfuQfeiwOFns7G2mhWyHTeJql1nTgzo8TjWznw7PHE/Hy4szc6Bliy0y87X7jA1Z6xdXhezh3b9b1n6yVyMsf2rw7Dc3DdnyqTuEzNfHCcgA6oczFAAAIBkNBQAASEZDAQAAkvXYUJjZdDNbbWaPdsm+YmYrzGxe6eOI2g4TSEMdo+ioYeRdOZMyZ0j6nqRr3pZf4u4XVX1ENeJr14Xs/nWtIdt3QHvIbr7j+pClrqZ2x5txEuXijOvmHjLotZDNfStOEt3qGi5J3oMZaoI6LocNiBOQ3zronSE7+/IfZd7/kEF3hmxVR/z9mf3mkJD926JJIbtujxkh226TOMbuDOwXfyefPG6rkO3y+MCQda5dW/Z+CmCG+kgNo5h6PEPh7ndLerEOYwFqhjpG0VHDyLuUORSnmdnDpdNw8b8qQDFQxyg6ahi5UGlDcYWkXSVNkLRS0re629DMpprZXDOb26542hRooLLqmBpGjlHDyI2KGgp3X+XuHe7eKelKSftsZNtp7t7m7m2tGVfJBBql3DqmhpFX1DDypKKVMs1spLuvLH15tKRHN7Z9HnSsWh2yL3/2n0J20ffjJc3fFedA6sevZq+U+bW7jgzZuBlxYtgmq14J2TbXxZdHDxn925BNmR3HPU5zM8eD7hWxjt+u38A4EfGF4yeG7Pf/eWnZj7nHdaeHbPvZcWXLAb+aE7JhI+Mk4utu2ytk5wwr/6nOmij98Kfi8bz36TNCNuKah0LW+cYbZe8775qhhhspexXX8u67xf7x35S+rseGwsyuk3SwpOFmtlzSlyUdbGYTJLmkpZI+U7shAumoYxQdNYy867GhcPfJGfFVNRgLUDPUMYqOGkbesVImAABIRkMBAACS9ZnLl2fpf1ucyHjBzt2+YaUs4/SnsrZbMynu51c73Byydo8936ClGbNE0fSyVsB87OJ3xWxSeRMwJz1+VGY+7ptPhixrUvMmo7cP2btn/SVknx+2IGSvdL4Vsn1vOCdzPCN3j/u+850/Ddm9/xqP+/jJHwnZ85fGVUMHvhAnfnan5XcPlr0t8q3d42TjcldBvuvd14XsyP1Ozt74vod7Na6i4gwFAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEjWp9/l0UjrB8VertwZxzvPiDPp11dnWMgJ2yT+aj7+7XeH7LEjLwvZ8vXx4k9H/uALIdtp+hOZ+16f8Y6O9sPi8tl7fuPPIfvyNg+E7OpXdwzZj7700ZCNufG+zPG0DB8WsoM/GJcHf/34uJz9TROvDNn2l5Z3LYtfvh73K0nTxu1S1v2Rf7v/Nl7GYMEHplX8eIumZr8Db1x2aTcdzlAAAIBkNBQAACAZDQUAAEhGQwEAAJIxKbNBNr8+Y5bOt+o/DuTT05+PS7M/duR3QvZMxgTMj1/4+ZDt9Iu4nPaLH9g5c99+4uYh+/mecd9bt8TJjXtcHydLjpv2fMg2ffz+zH1n6Xj+hZBtcV1WFu977OfiZNQRxy4rb8fnbNXNN+aXd3/k3oBFg2L4gfqPo1lwhgIAACSjoQAAAMloKAAAQLIeGwozG21ms81sgZnNN7MzS/lQM7vdzBaXPg+p/XCB3qOG0QyoY+RdOZMy10s6x90fNLPNJT1gZrdL+pSkO939QjM7X9L5ks6r3VCby5oT9stI4yqDqIrC1fAVn768rO0GWsw+esrdIRt1xkshm7LF//RiRBkTMH9yRsjGfHFOyDrWN24d120uvydkXt5TK2lFVcdSBYWr47wb/dVYH9d9YlTIPrH5yrIe76nDf5iZf/jdk0PW+dDCsh6zSHo8Q+HuK939wdLtNZIWSholaZKkmaXNZko6qkZjBJJQw2gG1DHyrldzKMxsJ0kTJd0vaYS7/7Vte1bSiOoODag+ahjNgDpGHpXdUJjZZpJukHSWu7/a9Xvu7pK8m/tNNbO5Zja3XfE980C9UMNoBpXUMTWMeiiroTCzVm0o4Gvd/cZSvMrMRpa+P1JSvEShJHef5u5t7t7WmvE6LFAP1DCaQaV1TA2jHnqclGlmJukqSQvd/eIu35olaYqkC0ufb67JCJvUK7vwjt16KWIN3/3a7iHbd8AjIRuasVrlBcPnlbWPjzz2scz8L/duH7Jdfh4vDT5mfpxE7A2cgNnsiljHRTTjL/uHbPIe/13Wfdszz3H2HeW8y+MASZ+U9IiZzStlF2hD8f7MzE6WtEzScTUZIZCOGkYzoI6Raz02FO7+B0kZb06TJB1a3eEA1UcNoxlQx8g7zrsDAIBkNBQAACAZly9vkFF3vRGy1tNaQtbXJ/n0Vfccsl3I9v1EvK7yK+9+K2SbPNcasnHfj6s+bvJs5ptatNPap0PWmbkl0HzWzdg2ht+s/ziKiDMUAAAgGQ0FAABIRkMBAACS0VAAAIBkTMpsEPvjvJDNeHWbkE3ePE6me2OPkSHr//TyqowL+dDxwoshG3FpvNRyuVeBYv1KoDxD5sXfvcte2i1kpw55vB7DKRTOUAAAgGQ0FAAAIBkNBQAASEZDAQAAkjEpM0cu+cGxIZt87ndCNvJfl4TshZfflf2g9z2cPC4A6Cs6FiwK2W17bhEz7d2LR12YMKLi4AwFAABIRkMBAACS0VAAAIBkPTYUZjbazGab2QIzm29mZ5byr5jZCjObV/o4ovbDBXqPGkbRUcMognImZa6XdI67P2hmm0t6wMxuL33vEne/qHbDA6qCGkbRUcPIvR4bCndfKWll6fYaM1soaVStB9YXjfpRXMr1+KM+ErKfjvllyA76t8mZjzn077cMWcfLr1QwuuKihlF01DCKoFdzKMxsJ0kTJd1fik4zs4fNbLqZDan24IBqo4ZRdNQw8qrshsLMNpN0g6Sz3P1VSVdI2lXSBG3onL/Vzf2mmtlcM5vbrnXpIwYqRA2j6Khh5FlZDYWZtWpDEV/r7jdKkruvcvcOd++UdKWkfbLu6+7T3L3N3dtaNaBa4wZ6hRpG0VHDyLty3uVhkq6StNDdL+6Sd72G9tGSHq3+8IB01DCKjhpGEZTzLo8DJH1S0iNmNq+UXSBpsplNkOSSlkr6TA3G16d0PP9CyN46ZljI3vGt+FQvPOwHmY955O4nx7DvLcdNDaPoqGHkXjnv8viDJMv41i3VHw5QfdQwio4aRhGwUiYAAEhGQwEAAJLRUAAAgGTlTMpEA2VN1Bw7JWZHau9uHqHPTcAEADQAZygAAEAyGgoAAJCMhgIAACSjoQAAAMnM3eu3M7PnJC0rfTlc0vN123ltcSz1saO7b93IAVDDhZHX46GGa6eZjkXK7/F0W8N1bSj+Zsdmc929rSE7rzKOpW9qpueqmY5Far7jqZVmep6a6VikYh4PL3kAAIBkNBQAACBZIxuKaQ3cd7VxLH1TMz1XzXQsUvMdT6000/PUTMciFfB4GjaHAgAANA9e8gAAAMnq3lCY2eFm9riZLTGz8+u9/1RmNt3MVpvZo12yoWZ2u5ktLn0e0sgxlsvMRpvZbDNbYGbzzezMUl7I46kXajg/qOHKFbmOqeF8qmtDYWYtki6T9GFJ4yVNNrPx9RxDFcyQdPjbsvMl3enuYyXdWfq6CNZLOsfdx0vaT9KppZ9HUY+n5qjh3KGGK9AEdTxD1HDu1PsMxT6Slrj7k+7+lqTrJU2q8xiSuPvdkl58WzxJ0szS7ZmSjqrnmCrl7ivd/cHS7TWSFkoapYIeT51QwzlCDVes0HVMDedTvRuKUZKe7vL18lJWdCPcfWXp9rOSRjRyMJUws50kTZR0v5rgeGqIGs4parhXmrGOC/8zL3oNMymzynzD22YK9dYZM9tM0g2SznL3V7t+r4jHgzRF/JlTw+iqiD/zZqjhejcUKySN7vL19qWs6FaZ2UhJKn1e3eDxlM3MWrWhiK919xtLcWGPpw6o4ZyhhivSjHVc2J95s9RwvRuKOZLGmtnOZtZf0gmSZtV5DLUwS9KU0u0pkm5u4FjKZmYm6SpJC9394i7fKuTx1Ak1nCPUcMWasY4L+TNvqhp297p+SDpC0iJJT0j6Ur33X4XxXydppaR2bXjd8WRJw7RhFu5iSXdIGtrocZZ5LAdqw2m0hyXNK30cUdTjqePzRg3n5IMaTnruClvH1HA+P1gpEwAAJGNSJgAASEZDAQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEhGQwEAAJLRUAAAgGQ0FAAAIFlSQ2Fmh5vZ42a2xMzOr9aggHqijlF01DDyoOKrjZpZizZc+vaD2nD52DmSJrv7gu7u098G+EANrmh/wBq99Ly7b13Nx+xtHVPDSEENo+g2VsObJDzuPpKWuPuTkmRm10uaJKnbhmKgBmtfOzRhl+jL7vCfL6vBw/aqjqlhpKCGUXQbq+GUlzxGSXq6y9fLS9nfMLOpZjbXzOa2a13C7oCa6LGOqWHkHDWMXKj5pEx3n+bube7e1qoBtd4dUHXUMIqOGkY9pDQUKySN7vL19qUMKBLqGEVHDSMXUhqKOZLGmtnOZtZf0gmSZlVnWEDdUMcoOmoYuVDxpEx3X29mp0m6TVKLpOnuPr9qIwPqgDpG0VHDyIuUd3nI3W+RdEuVxgI0BHWMoqOGkQeslAkAAJLRUAAAgGQ0FAAAIBkNBQAASEZDAQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEhGQwEAAJIlXW0UAADU17A/DglZP/OQPbf/y3UYTZcx1HVvAACgKdFQAACAZEkveZjZUklrJHVIWu/ubdUYFFBP1DGKjhpGHlRjDsUh7v58FR4HaCTqGEVHDaOhmJRZZbbXHiHr7B+f5hUHDw7Z/NMvD1m7d1RnYD049NFjQzZ40sqQda5dW4/hIGdswICQvfHhd4fsXV96KGSL915XkzEBfcGiq+LJpjk7fCdk7/39qSHbRfNqMaRupc6hcEm/MbMHzGxqNQYENAB1jKKjhtFwqWcoDnT3FWa2jaTbzewxd7+76wal4p4qSQO1aeLugJrYaB1TwygAahgNl3SGwt1XlD6vlnSTpH0ytpnm7m3u3taqeNoUaLSe6pgaRt5Rw8iDihsKMxtsZpv/9bakD0l6tFoDA+qBOkbRUcPIi5SXPEZIusnM/vo4P3H3W6syqpzx98bJZ4s/1T9z20s+cF3IWm19yA4btCZk7R77u051ljPEZLfv+bOQTfjRP4Zs588+E7KO51+oyZjqpM/UcYqWrYeHbPZl3w/Z79fGPynf3PmjIVv/1LLqDAwSNdw0Fl0RTvJrzocuCdmazrgq5hZ3DarJmHqj4obC3Z+UFP+lBQqEOkbRUcPIC1bKBAAAyWgoAABAMhoKAACQjJUyy+BfezFkj+1+YwNGUl/z9p8esr/b93MhG/CrQk/KRBW9b2CcgPz1HYaGrB+TMoHg4IkLQ7Z5v/gGgM8tOzxkw39wb03G1BucoQAAAMloKAAAQDIaCgAAkIyGAgAAJGNSZhlW/G50DHcv//73ro1r5//jLZ+OG1rGneOCaJn2e8+izPzqnX5T3gMAVdBi/B8F+fTmpLgK5fBzngrZuuNbQrZ+5bNVH8/qz+0fsm+MiKti/vjVHUP20hd3CFk/NX5yPL/9AAAgGQ0FAABIRkMBAACS0VAAAIBkTMosww4Xzg3Z0T+bXPb97a32kI196v6kMb3dy8OHZeZ33Ld5yLIunZ7lA48cH7ItZs8PWX0usI4i6PBYDe2bxj8zcZoyUFsnXvjLkJ20xdMhO2yvz4Zs4C+rPylzyqm3hGzCgPib8emvHh2yob9v/KqYWThDAQAAktFQAACAZDQUAAAgWY8NhZlNN7PVZvZol2yomd1uZotLn4fUdphAGuoYRUcNI+/KmZQ5Q9L3JF3TJTtf0p3ufqGZnV/6+rzqDy8fvP2tkHU8vqQBI+neqo+Ny8zf2f/mjLS8KXHPPBMvO73ZG0/2Zlh5MkN9vI4bZfVerSEb/esGDKT4ZogartjKt7YKWaeWhWz9oKwli9N0HjQxZJM2+27I2n1QHM/A6o+nVno8Q+Hud0t68W3xJEkzS7dnSjqqusMCqos6RtFRw8i7SudQjHD3laXbz0oaUaXxAPVEHaPoqGHkRvKkTHd3beQSVmY21czmmtncdq1L3R1QExurY2oYRUANo9EqbShWmdlISSp9Xt3dhu4+zd3b3L2tleVskC9l1TE1jByjhpEblTYUsyRNKd2eIilr5h+Qd9Qxio4aRm70+C4PM7tO0sGShpvZcklflnShpJ+Z2cmSlkk6rpaDxN967rPvDdnuJz6Wue2Ilsr/N/KOLzwVso6KH62xqOM03h6Xj1/UvjZk41oHhuzNneO7pNB71HD5Fl+6b8huGhbfVXHFy/HdcVvdtyJk68vcb8tWW2bmz5/7esi22yT+bT77mf1DNuKqB0LW7RyDBuuxoXD37i5acWiVxwLUDHWMoqOGkXeslAkAAJLRUAAAgGQ0FAAAIFk5S2+jTlafFifkTPnsLSE7cYuLQrZ5v/5J+/7qc+8Jma9jMh026FgV3414xhPHh+zW3XmTAeqnZbcxmfmPPnJFyN7wOLH4xi99KGSDnv5TxeNZfPnOmfmj77kyZHe8uXm8/97FXiOEMxQAACAZDQUAAEhGQwEAAJLRUAAAgGRMyixDyx67hWzRSUMytz3owEcr3s8vR8eV3DrVmbFl+RMwl7THNd6Ov+KckO1w06q47zVPlL0fAKglP2BCyE646peZ27YNiGv67n7rmSEb94vKJ2Au/VpcsXju+y/uZuv4T+15P/zHkI3SPRWPJw84QwEAAJLRUAAAgGQ0FAAAIBkNBQAASMakzLfJmvjzqatvCtmkwc/XYO/V7+/OWBJXMxz1jTjxp6iXJUf+bTb0jUYPATlmrXGS+crT2kI299w4ab3VWjIfs93j39KPTXgwZLO+ESdWjvn3h0LWb9ttQnbkEfeFrEWWOZ4J98QJmDtcWOwJmFk4QwEAAJLRUAAAgGQ0FAAAIFmPDYWZTTez1Wb2aJfsK2a2wszmlT6OqO0wgTTUMYqOGkbelTMpc4ak70m65m35Je4er6PdhFrkIetXg5M7WROM2uOue+XWd8QJpe/7xKkh2/LaOMGoycxQH6/jRrkh49LNp+uABoyk8GaoCWv42VPiBMw/nfudkGWtGdzd38drXh0Vsv/c9v6YnRizCw7bN2Qf3PLXITtk0Gshu3/dwMzx7PDxRzLzZtPjv4rufrekF+swFqBmqGMUHTWMvEv5b/ZpZvZw6TRc9oUtgPyjjlF01DByodKG4gpJu0qaIGmlpG91t6GZTTWzuWY2t13rKtwdUBNl1TE1jByjhpEbFTUU7r7K3TvcvVPSlZL22ci209y9zd3bWjWg0nECVVduHVPDyCtqGHlS0UqZZjbS3VeWvjxaUuXX7M4Z++O8kF111OEhO/9TwzLvv8Ntb4Ws5c14CfEUi09uDdljh19R1X30Bc1cx/Xw9B9Gx3D3+o+jLytaDT93SlyZ8p7zvh2yNZ3tIVvQPjhkXzr3M5n7GfhC/Dt8538uDdnVO/0mZFmTN7Mm4WdNEm3rH/crSWcvWRiy7xzzsfiYD8XtiqTHhsLMrpN0sKThZrZc0pclHWxmEyS5pKWSsn+qQE5Qxyg6ahh512ND4e6TM+KrajAWoGaoYxQdNYy8Y6VMAACQjIYCAAAk4/LlZehYsChku3yhAQMpecfirWMY540CNbXZ0+Ut47q5xe1axo8LWdbvGZrL+H+Ikw5nvT4iZP85Lb66M/Jb8XLfmypOoOzOC+e8K2Rnf/d9Ibtku9+X/Zhv12LZly///CPHhGy7hxZUvJ+84gwFAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEjGuzwKaNXHxjR6CID6lbmifNbM985Bcfl4NL8HbhsfshevHx6ykY/Hd3SkenPEwJCdvvVvM7aMtbnff5wWsuEPvV72vkcvWRGyjrLvXRycoQAAAMloKAAAQDIaCgAAkIyGAgAAJOszkzJtwICQvfzxiSEbcvP8kHWuWVOTMZVj5Tn7h+zmM/4rY8t4fEAtDZlxb8i+/4UdQ3bKlstCtvjs/iEbc2J1xoX82uHf42TLWkxObNk6Xp5g+TFxFvGY1vh389o1I0M2/Aex1nujGSdgZuEMBQAASEZDAQAAkvXYUJjZaDObbWYLzGy+mZ1Zyoea2e1mtrj0eUjthwv0HjWMZkAdI+/KOUOxXtI57j5e0n6STjWz8ZLOl3Snu4+VdGfpayCPqGE0A+oYudbjpEx3XylpZen2GjNbKGmUpEmSDi5tNlPS7ySdV5NR9tLaj+4Tsi3P/UvI7hrz3ZAdPWdyfMDHqz8pc5OR24ZsxbG7hOynp18Usu02KX8C5qqOdSFrfdPLvn8zKGINF9VF9/1dyA4/9NshG/eZRSHrrMWAmgh1XL7F58TVhBceemnI7l0XV8X82ZHvy3jEJ6oxrKbXqzkUZraTpImS7pc0olTgkvSspBHVHRpQfdQwmgF1jDwqu6Ews80k3SDpLHd/tev33N0lZf6318ymmtlcM5vbrvi/ZaBeqGE0g0rqmBpGPZTVUJhZqzYU8LXufmMpXmVmI0vfHylpddZ93X2au7e5e1srayWgQahhNINK65gaRj2U8y4Pk3SVpIXufnGXb82SNKV0e4qkm6s/PCAdNYxmQB0j78pZKfMASZ+U9IiZzStlF0i6UNLPzOxkScskHVeTEVbg775+V8jOGfZoWfd97IItYvjavqlDCk7YP6689ottfhWyzoxL6WaZsjROhpOkJVfvFrJhN6at+lZAhavhZtKhjMuXv7m2ASMpPOr4bVrGj8vMv3r09SHr8PiK5kmzTgnZmEX3pQ+sjyrnXR5/kDL+ImxwaHWHA1QfNYxmQB0j71gpEwAAJKOhAAAAyWgoAABAsj5z+fJyLTzsBw3ce+zv7l0b3+L16fv/IWRjPr048xGHvd7nJmAiZ3bdZFDIXjgprmY77CpqFb1z3I2/y8yP3iy+A/w9950UsjFnMQGzmjhDAQAAktFQAACAZDQUAAAgGQ0FAABI1pSTMn97xgEhu+ZzcRLYQwdMr8dw9ONXR4dsZftWIZv+YBz3mCs7QrbLH+eFjEs/Iw+uPij+Tr3U+WbIhj/8Wsgyr8wGbMTXbz4mM598YrxU+aBbMlZBRlVxhgIAACSjoQAAAMloKAAAQDIaCgAAkKwpJ2W2/O7BkO38p01DttcZZ4Zs5me+HbI9+8cL/H3gkeMz9/3K77YN2Y4/XRGy9U8tC9lYPZD5mEBRfH7hsSE7dsc/h6zf6+tCFqcfAxu3y3nZq6seed7eIRsmVmKtNc5QAACAZDQUAAAgGQ0FAABI1mNDYWajzWy2mS0ws/lmdmYp/4qZrTCzeaWPI2o/XKD3qGEUHTWMIihnUuZ6See4+4NmtrmkB8zs9tL3LnH3i2o3PKAqqGEUHTWM3OuxoXD3lZJWlm6vMbOFkkbVemDV1vnGGyEbdeE9IbvgwrhEd5bN9GTZ+fqyHhG10iw1XARDP7IoZL/V4Iwt43boHjWMIujVHAoz20nSREn3l6LTzOxhM5tuZkOqPTig2qhhFB01jLwqu6Ews80k3SDpLHd/VdIVknaVNEEbOudvdXO/qWY218zmtiu+9xyoF2oYRUcNI8/KaijMrFUbivhad79Rktx9lbt3uHunpCslZb5W4O7T3L3N3dtaNaBa4wZ6hRpG0VHDyLty3uVhkq6StNDdL+6Sj+yy2dGSHq3+8IB01DCKjhpGEZTzLo8DJH1S0iNmNq+UXSBpsplNkOSSlkr6TA3GB1QDNYyio4aRe+W8y+MPkuLFLKRbqj8coPqoYRQdNYwiYKVMAACQjIYCAAAko6EAAADJaCgAAEAyGgoAAJCMhgIAACSjoQAAAMloKAAAQDJz9/rtzOw5SctKXw6X9Hzddl5bHEt97OjuWzdyANRwYeT1eKjh2mmmY5Hyezzd1nBdG4q/2bHZXHdva8jOq4xj6Zua6blqpmORmu94aqWZnqdmOhapmMfDSx4AACAZDQUAAEjWyIZiWgP3XW0cS9/UTM9VMx2L1HzHUyvN9Dw107FIBTyehs2hAAAAzYOXPAAAQLK6NxRmdriZPW5mS8zs/HrvP5WZTTez1Wb2aJdsqJndbmaLS5+HNHKM5TKz0WY228wWmNl8MzuzlBfyeOqFGs4ParhyRa5jajif6tpQmFmLpMskfVjSeEmTzWx8PcdQBTMkHf627HxJd7r7WEl3lr4ugvWSznH38ZL2k3Rq6edR1OOpOWo4d6jhCjRBHc8QNZw79T5DsY+kJe7+pLu/Jel6SZPqPIYk7n63pBffFk+SNLN0e6ako+o5pkq5+0p3f7B0e42khZJGqaDHUyfUcI5QwxUrdB1Tw/lU74ZilKSnu3y9vJQV3Qh3X1m6/aykEY0cTCXMbCdJEyXdryY4nhqihnOKGu6VZqzjwv/Mi17DTMqsMt/wtplCvXXGzDaTdIOks9z91a7fK+LxIE0Rf+bUMLoq4s+8GWq43g3FCkmju3y9fSkrulVmNlKSSp9XN3g8ZTOzVm0o4mvd/cZSXNjjqQNqOGeo4Yo0Yx0X9mfeLDVc74ZijqSxZrazmfWXdIKkWXUeQy3MkjSldHuKpJsbOJaymZlJukrSQne/uMu3Cnk8dUIN5wg1XLFmrONC/sybqobdva4fko6QtEjSE5K+VO/9V2H810laKaldG153PFnSMG2YhbtY0h2ShjZ6nGUey4HacBrtYUnzSh9HFPV46vi8UcM5+aCGk567wtYxNZzPD1bKBAAAyZiUCQAAktFQAACAZDQUAAAgGQ0FAABIRkMBAACS0VAAAIBkNBQAACAZDQUAAEj2/wCngSznSBSVjwAAAABJRU5ErkJggg==\n",
48 | "text/plain": [
49 | ""
50 | ]
51 | },
52 | "metadata": {
53 | "needs_background": "light"
54 | },
55 | "output_type": "display_data"
56 | }
57 | ],
58 | "source": [
59 | "plt.figure(figsize=[9, 9])\n",
60 | "\n",
61 | "for i in range(1, 10):\n",
62 | " plt.subplot(3, 3, i)\n",
63 | " plt.imshow(x_train[i])"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {},
69 | "source": [
70 | "## Model"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 4,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "model = tf.keras.models.Sequential([\n",
80 | " tf.keras.layers.Flatten(input_shape=[28, 28]),\n",
81 | " tf.keras.layers.Dense(128, activation='relu'),\n",
82 | " tf.keras.layers.Dropout(.2),\n",
83 | " tf.keras.layers.Dense(10)\n",
84 | "])"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 5,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "Model: \"sequential\"\n",
97 | "_________________________________________________________________\n",
98 | "Layer (type) Output Shape Param # \n",
99 | "=================================================================\n",
100 | "flatten (Flatten) (None, 784) 0 \n",
101 | "_________________________________________________________________\n",
102 | "dense (Dense) (None, 128) 100480 \n",
103 | "_________________________________________________________________\n",
104 | "dropout (Dropout) (None, 128) 0 \n",
105 | "_________________________________________________________________\n",
106 | "dense_1 (Dense) (None, 10) 1290 \n",
107 | "=================================================================\n",
108 | "Total params: 101,770\n",
109 | "Trainable params: 101,770\n",
110 | "Non-trainable params: 0\n",
111 | "_________________________________________________________________\n"
112 | ]
113 | }
114 | ],
115 | "source": [
116 | "model.summary()"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "In the model, we have `784` input features, and in the first layer, we have `128` nodes, so the corresponding weights $W$ and $b$ would have size $(783, 128)$ and $(128, 1)$, which, in total adds up to\n",
124 | "\n",
125 | "$$ 784 \\times 128 + 128 = 100480$$\n",
126 | "\n",
127 | "Same goes with the `dense_3` layer.\n",
128 | "\n",
129 | "__Note__: It is possible to bake this tf.nn.softmax in as the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to provide an exact and numerically stable loss calculation for all models when using a softmax output."
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 6,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "[[ 0.15655103 -0.48250246 0.29978698 -0.6595523 -0.27254325 0.18561608\n",
142 | " -0.42510659 0.32197294 -0.18982276 -1.2221566 ]]\n"
143 | ]
144 | }
145 | ],
146 | "source": [
147 | "predictions = model(x_train[:1])\n",
148 | "print(predictions.numpy())"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "Transfor into probability using `softmax`."
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 7,
161 | "metadata": {},
162 | "outputs": [
163 | {
164 | "name": "stdout",
165 | "output_type": "stream",
166 | "text": [
167 | "[[0.13328804 0.07034832 0.15381466 0.05893347 0.08678365 0.1372189\n",
168 | " 0.07450415 0.15726532 0.0942677 0.03357577]]\n"
169 | ]
170 | }
171 | ],
172 | "source": [
173 | "predictions = tf.nn.softmax(predictions)\n",
174 | "print(predictions.numpy())"
175 | ]
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "metadata": {},
180 | "source": [
181 | "## Loss Function"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "In `SparseCategoricalCrossentropy`, labels are to be provided as integers"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 8,
194 | "metadata": {},
195 | "outputs": [
196 | {
197 | "data": {
198 | "text/plain": [
199 | "2.2661917"
200 | ]
201 | },
202 | "execution_count": 8,
203 | "metadata": {},
204 | "output_type": "execute_result"
205 | }
206 | ],
207 | "source": [
208 | "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
209 | "loss_fn(y_train[:1], predictions).numpy()"
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {},
215 | "source": [
216 | "## Train"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 9,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "model.compile(optimizer='adam',\n",
226 | " loss=loss_fn,\n",
227 | " metrics=['accuracy'])"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 10,
233 | "metadata": {},
234 | "outputs": [
235 | {
236 | "name": "stdout",
237 | "output_type": "stream",
238 | "text": [
239 | "Epoch 1/5\n",
240 | "1875/1875 [==============================] - 1s 620us/step - loss: 0.2957 - accuracy: 0.9146\n",
241 | "Epoch 2/5\n",
242 | "1875/1875 [==============================] - 1s 610us/step - loss: 0.1434 - accuracy: 0.9573\n",
243 | "Epoch 3/5\n",
244 | "1875/1875 [==============================] - 1s 610us/step - loss: 0.1056 - accuracy: 0.9686\n",
245 | "Epoch 4/5\n",
246 | "1875/1875 [==============================] - 1s 621us/step - loss: 0.0878 - accuracy: 0.9729\n",
247 | "Epoch 5/5\n",
248 | "1875/1875 [==============================] - 1s 620us/step - loss: 0.0737 - accuracy: 0.9772\n"
249 | ]
250 | },
251 | {
252 | "data": {
253 | "text/plain": [
254 | ""
255 | ]
256 | },
257 | "execution_count": 10,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "model.fit(x_train, y_train, epochs=5)"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {},
269 | "source": [
270 | "## Evaluate"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 11,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "name": "stdout",
280 | "output_type": "stream",
281 | "text": [
282 | "313/313 - 0s - loss: 0.0733 - accuracy: 0.9771\n"
283 | ]
284 | },
285 | {
286 | "data": {
287 | "text/plain": [
288 | "[0.0732896476984024, 0.9771000146865845]"
289 | ]
290 | },
291 | "execution_count": 11,
292 | "metadata": {},
293 | "output_type": "execute_result"
294 | }
295 | ],
296 | "source": [
297 | "model.evaluate(x_test, y_test, verbose=2)"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "## Predict"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 12,
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "data": {
314 | "text/plain": [
315 | "array([7, 2, 1, 0, 4])"
316 | ]
317 | },
318 | "execution_count": 12,
319 | "metadata": {},
320 | "output_type": "execute_result"
321 | }
322 | ],
323 | "source": [
324 | "tf.argmax(model.predict(x_test[:5]), axis=-1).numpy()"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {},
330 | "source": [
331 | "####################################################################################"
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": 1,
337 | "metadata": {},
338 | "outputs": [],
339 | "source": [
340 | "import tensorflow as tf\n",
341 | "\n",
342 | "from tensorflow.keras.layers import Dense, Flatten, Conv2D\n",
343 | "from tensorflow.keras import Model"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 2,
349 | "metadata": {},
350 | "outputs": [
351 | {
352 | "name": "stdout",
353 | "output_type": "stream",
354 | "text": [
355 | "train shape (60000, 28, 28, 1)\n",
356 | "test shape (10000, 28, 28, 1)\n"
357 | ]
358 | }
359 | ],
360 | "source": [
361 | "mnist = tf.keras.datasets.mnist\n",
362 | "\n",
363 | "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
364 | "x_train, x_test = x_train / 255.0, x_test / 255.0\n",
365 | "\n",
366 | "# Add a channels dimension\n",
367 | "x_train = x_train[..., tf.newaxis].astype(\"float32\")\n",
368 | "x_test = x_test[..., tf.newaxis].astype(\"float32\")\n",
369 | "\n",
370 | "print('train shape', x_train.shape)\n",
371 | "print('test shape', x_test.shape)"
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "## Get Batch"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": 3,
384 | "metadata": {},
385 | "outputs": [],
386 | "source": [
387 | "train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)\n",
388 | "test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {},
394 | "source": [
395 | "## Model"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 4,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "class MyModel(Model):\n",
405 | " def __init__(self):\n",
406 | " super(MyModel, self).__init__()\n",
407 | " self.conv1 = Conv2D(32, 3, activation='relu')\n",
408 | " self.flatten = Flatten()\n",
409 | " self.dense1 = Dense(128, activation='relu')\n",
410 | " self.dense2 = Dense(10)\n",
411 | " \n",
412 | " def call(self, x):\n",
413 | " x = self.conv1(x)\n",
414 | " x = self.flatten(x)\n",
415 | " x = self.dense1(x)\n",
416 | " x = self.dense2(x)\n",
417 | " # softmax need not to apply\n",
418 | " return x\n",
419 | "\n",
420 | "model = MyModel()"
421 | ]
422 | },
423 | {
424 | "cell_type": "markdown",
425 | "metadata": {},
426 | "source": [
427 | "## Optimizer and Loss"
428 | ]
429 | },
430 | {
431 | "cell_type": "markdown",
432 | "metadata": {},
433 | "source": [
434 | "These metrics accumulate the values over epochs and then print the overall result."
435 | ]
436 | },
437 | {
438 | "cell_type": "code",
439 | "execution_count": 5,
440 | "metadata": {},
441 | "outputs": [],
442 | "source": [
443 | "train_loss = tf.keras.metrics.Mean(name='train_loss')\n",
444 | "train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n",
445 | "\n",
446 | "test_loss = tf.keras.metrics.Mean(name='test_loss')\n",
447 | "test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": 6,
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
457 | "optimizer = tf.keras.optimizers.Adam()"
458 | ]
459 | },
460 | {
461 | "cell_type": "code",
462 | "execution_count": 7,
463 | "metadata": {},
464 | "outputs": [],
465 | "source": [
466 | "# step of one batch\n",
467 | "@tf.function\n",
468 | "def train_step(images, labels):\n",
469 | " with tf.GradientTape() as tape:\n",
470 | " predictions = model(images)\n",
471 | " loss = loss_obj(labels, predictions)\n",
472 | " gradients = tape.gradient(loss, model.trainable_variables)\n",
473 | " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
474 | " \n",
475 | " train_loss(loss)\n",
476 | " train_accuracy(labels, predictions)"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": 8,
482 | "metadata": {},
483 | "outputs": [],
484 | "source": [
485 | "@tf.function\n",
486 | "def test_step(images, labels):\n",
487 | " # training=False is only needed if there are layers with different\n",
488 | " # behavior during training versus inference (e.g. Dropout).\n",
489 | " predictions = model(images, training=False)\n",
490 | " t_loss = loss_obj(labels, predictions)\n",
491 | "\n",
492 | " test_loss(t_loss)\n",
493 | " test_accuracy(labels, predictions)"
494 | ]
495 | },
496 | {
497 | "cell_type": "markdown",
498 | "metadata": {},
499 | "source": [
500 | "## Training"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 9,
506 | "metadata": {},
507 | "outputs": [
508 | {
509 | "name": "stdout",
510 | "output_type": "stream",
511 | "text": [
512 | "Epoch 1, Loss: 0.1341124325990677, Accuracy: 96.08833312988281, Test Loss: 0.07015062868595123, Test Accuracy: 97.7699966430664\n",
513 | "Epoch 2, Loss: 0.04252898693084717, Accuracy: 98.69166564941406, Test Loss: 0.05851253867149353, Test Accuracy: 98.13999938964844\n",
514 | "Epoch 3, Loss: 0.02277100831270218, Accuracy: 99.24666595458984, Test Loss: 0.05304492637515068, Test Accuracy: 98.38999938964844\n",
515 | "Epoch 4, Loss: 0.012743637897074223, Accuracy: 99.58499908447266, Test Loss: 0.057635437697172165, Test Accuracy: 98.3499984741211\n",
516 | "Epoch 5, Loss: 0.00996498391032219, Accuracy: 99.66999816894531, Test Loss: 0.07684854418039322, Test Accuracy: 98.23999786376953\n"
517 | ]
518 | }
519 | ],
520 | "source": [
521 | "EPOCHS = 5\n",
522 | "for epoch in range(EPOCHS):\n",
523 | " # re-calculate loss and accuracy of each epoch\n",
524 | " train_loss.reset_states()\n",
525 | " train_accuracy.reset_states()\n",
526 | " test_loss.reset_states()\n",
527 | " test_accuracy.reset_states()\n",
528 | " \n",
529 | " for images, labels in train_ds:\n",
530 | " train_step(images, labels)\n",
531 | " \n",
532 | " for images, labels in test_ds:\n",
533 | " test_step(images, labels)\n",
534 | " \n",
535 | " print(\n",
536 | " f'Epoch {epoch + 1}, '\n",
537 | " f'Loss: {train_loss.result()}, '\n",
538 | " f'Accuracy: {train_accuracy.result() * 100}, '\n",
539 | " f'Test Loss: {test_loss.result()}, '\n",
540 | " f'Test Accuracy: {test_accuracy.result() * 100}'\n",
541 | " )"
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": null,
547 | "metadata": {},
548 | "outputs": [],
549 | "source": []
550 | }
551 | ],
552 | "metadata": {
553 | "kernelspec": {
554 | "display_name": "Python 3",
555 | "language": "python",
556 | "name": "python3"
557 | },
558 | "language_info": {
559 | "codemirror_mode": {
560 | "name": "ipython",
561 | "version": 3
562 | },
563 | "file_extension": ".py",
564 | "mimetype": "text/x-python",
565 | "name": "python",
566 | "nbconvert_exporter": "python",
567 | "pygments_lexer": "ipython3",
568 | "version": "3.8.3"
569 | }
570 | },
571 | "nbformat": 4,
572 | "nbformat_minor": 4
573 | }
574 |
--------------------------------------------------------------------------------