├── .gitignore ├── 1 - Linear Regression from Scratch.ipynb ├── 3 - Binary Logistic Regression from Scratch.ipynb ├── 4 - Multinomial Logistic Regression.ipynb ├── LICENSE ├── README.md └── assets ├── gradient-descent-1.png ├── gradient-descent-1.xml ├── gradient-descent-2.png ├── gradient-descent-2.xml ├── gradient-descent-3.png ├── gradient-descent-3.xml └── sigmoid.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /1 - Linear Regression from Scratch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "np.random.seed(1)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "m = np.random.randint(0, 25)\n", 29 | "b = np.random.randint(0, 25)\n", 30 | "xs = np.arange(0, 10, 0.01)\n", 31 | "ys = [(m*x+b)+np.random.normal(scale=5) for x in np.arange(0, 10, 0.01)]" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "image/png": "\n", 42 | "text/plain": [ 43 | "
" 44 | ] 45 | }, 46 | "metadata": {}, 47 | "output_type": "display_data" 48 | } 49 | ], 50 | "source": [ 51 | "plt.scatter(xs, ys)\n", 52 | "plt.show()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "def calc_error(y_hat, y):\n", 62 | " \"\"\"\n", 63 | " Mean squared error between target, y, and prediction y_hat.\n", 64 | " \"\"\"\n", 65 | " error = 0.5 * (y - y_hat) ** 2\n", 66 | " return error" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 6, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "def calc_errors(y_hats, ys):\n", 76 | " \"\"\"\n", 77 | " Calculates the average error between a list of predictions, y_hats\n", 78 | " and a list of labels, ys.\n", 79 | " \"\"\"\n", 80 | " error = np.mean([calc_error(y_hat, y) for y_hat, y in zip(y_hats, ys)])\n", 81 | " return error" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 7, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "22869.0" 93 | ] 94 | }, 95 | "execution_count": 7, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "calc_errors([100,200,300], [1,2,3])" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 8, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "pred_m = np.random.normal()\n", 111 | "pred_b = np.random.normal()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 9, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "def predict(pred_m, pred_b, x):\n", 121 | " return pred_m * x + pred_b" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 10, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "def predicts(pred_m, pred_b, xs):\n", 131 | " return [predict(pred_m, pred_b, x) for x in xs]" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 11, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "y_hats = predicts(pred_m, pred_b, xs)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 12, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "image/png": "\n", 151 | "text/plain": [ 152 | "
" 153 | ] 154 | }, 155 | "metadata": {}, 156 | "output_type": "display_data" 157 | } 158 | ], 159 | "source": [ 160 | "plt.scatter(xs, ys)\n", 161 | "plt.plot(xs, y_hats, c='r')\n", 162 | "plt.show()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 13, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "1141.4820519282366" 174 | ] 175 | }, 176 | "execution_count": 13, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "calc_errors(y_hats, ys)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 14, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "def gradient_step(pred_m, pred_b, xs, ys, eta=0.01):\n", 192 | " \"\"\"\n", 193 | " Performs one gradient descent step using all of the data points\n", 194 | " pred_m (float): predicted m value\n", 195 | " pred_b (float): predicted b value\n", 196 | " xs (list[float]): x values for data points\n", 197 | " ys (list[float]): y values for data points\n", 198 | " eta (float): learning rate\n", 199 | " \"\"\"\n", 200 | " \n", 201 | " N = len(xs)\n", 202 | " grad_m = 0\n", 203 | " grad_b = 0\n", 204 | " \n", 205 | " for x, y in zip(xs, ys):\n", 206 | " grad_m += - (1/N) * x * (y - (pred_m * x + pred_b)) \n", 207 | " grad_b += - (1/N) * (y - (pred_m * x + pred_b))\n", 208 | " \n", 209 | " new_m = pred_m - (eta * grad_m)\n", 210 | " new_b = pred_b - (eta * grad_b)\n", 211 | " \n", 212 | " return new_m, new_b" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 15, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "pred_m, pred_b = gradient_step(pred_m, pred_b, xs, ys)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 16, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "y_hats_init = y_hats" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 17, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "y_hats = predicts(pred_m, pred_b, xs)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 18, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "511.83320752069864" 251 | ] 252 | }, 253 | "execution_count": 18, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "calc_errors(y_hats, ys)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 19, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "image/png": "\n", 270 | "text/plain": [ 271 | "
" 272 | ] 273 | }, 274 | "metadata": {}, 275 | "output_type": "display_data" 276 | } 277 | ], 278 | "source": [ 279 | "plt.scatter(xs, ys)\n", 280 | "plt.plot(xs, y_hats_init, c='g')\n", 281 | "plt.plot(xs, y_hats, c='r')\n", 282 | "plt.show()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 20, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "errors = []\n", 292 | "\n", 293 | "for _ in range(1000):\n", 294 | " pred_m, pred_b = gradient_step(pred_m, pred_b, xs, ys)\n", 295 | " y_hats = predicts(pred_m, pred_b, xs)\n", 296 | " error = calc_errors(y_hats, ys)\n", 297 | " errors.append(error)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 21, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "y_hats = predicts(pred_m, pred_b, xs)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 22, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "image/png": "\n", 317 | "text/plain": [ 318 | "
" 319 | ] 320 | }, 321 | "metadata": {}, 322 | "output_type": "display_data" 323 | } 324 | ], 325 | "source": [ 326 | "plt.scatter(xs, ys)\n", 327 | "plt.plot(xs, y_hats_init, c='g')\n", 328 | "plt.plot(xs, y_hats, c='r')\n", 329 | "plt.show()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 23, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFhJJREFUeJzt3W2MXNd93/Hvf2b2ic+U+CBapEzJpt3YMSwLrCvVLeDWrWMLReUCcWGhqAlXhfpCRZ0iQCG3L9y+CJACtZ0aSIWosWslSJ2oiRMrhhtHkI0afWE5VOsqkiVKlKVYlPgoUXwmd3fm9MWc2R2Sc2eWu0stz+X3AwzuveeemT13L/m7Z8/ch0gpIUmqr8ZKN0CSdHUZ9JJUcwa9JNWcQS9JNWfQS1LNGfSSVHMGvSTVnEEvSTVn0EtSzbVWugEAmzZtSjt37lzpZkhSUZ566qljKaXNo+pdE0G/c+dO9u7du9LNkKSiRMRfLaSeQzeSVHMGvSTVnEEvSTVn0EtSzRn0klRzBr0k1ZxBL0k1V3TQ7zt0ii//+T6Onb6w0k2RpGtW0UH/4pFTfPX7+3nzzPRKN0WSrllFB30QK90ESbrmFR30PSmtdAsk6dpVdNBH7tAnTHpJqlJ20OepPXpJqlZ20DtEL0kjFR30PfboJala4UHf7dI7Ri9J1YoO+rkvY815SapUdtCvdAMkqQBFB70kabSigz7y2I1DN5JUreygz1O/jJWkamUHvYP0kjRS0UHf49CNJFUrOujn73UjSapSdtD3LpiySy9JlYoOek+kl6TRyg76zP68JFUrOui9TbEkjVZ20Mf8mfSSpMHKDvqVboAkFaDooO9x6EaSqhUd9J5HL0mjlR30eFMzSRql7KB3kF6SRio66Hu8MlaSqhUd9J5cKUmjFR30+MxYSRqp6KAPz6SXpJFGBn1E7IiIH0TEcxHxbER8PpffEBGPR8SLeboxl0dEfDUi9kfE0xFxx9XeCJ8wJUnVFtKjnwV+NaX0C8CdwAMR8T7gQeCJlNIu4Im8DPBJYFd+3Q88tOytzrwDgiSNNjLoU0oHU0r/J8+fAp4DbgbuAR7J1R4BPpXn7wF+J3X9CNgQEduWveX4ZawkLcQVjdFHxE7gQ8CTwNaU0kHoHgyALbnazcCrfW87kMsu/az7I2JvROw9evTolbec/puaSZKqLDjoI2IN8EfAr6SUTg6rOqDssk53SunhlNLulNLuzZs3L7QZA3nWjSRVW1DQR8QY3ZD/vZTSt3Lx4d6QTJ4eyeUHgB19b98OvL48zb20Xd2pX8ZKUrWFnHUTwNeA51JKX+5b9RiwJ8/vAb7dV/7ZfPbNncCJ3hDPcvPBI5I0WmsBdT4C/FPgLyPiJ7ns3wK/DjwaEfcBPwc+ndd9F7gb2A+cBT63rC3u4xC9JI02MuhTSv+b6md8fGxA/QQ8sMR2XRE79JJUregrY5m7TbFRL0lVig56HzwiSaOVHfQr3QBJKkDRQT/HLr0kVSo66HtXxnoevSRVKzvo89TvYiWpWtlB7yC9JI1UdND32KOXpGpFB33vCVPmvCRVKzvo554Za9RLUpWig16SNFotgt7+vCRVKzro54duVrYdknQtKzvofWqsJI1UdtB7Hr0kjVR00Pc4dCNJ1YoOem9TLEmjlR30cw8eWeGGSNI1rOygd4xekkYqOuh7vE2xJFUrOui9TbEkjVZ20PtlrCSNVHTQ+9RYSRqt8KDv8u6VklSt6KD3rBtJGq3soM9TO/SSVK3soLdLL0kjFR30PZ5HL0nVig56h24kabSyg94Hj0jSSGUHvefRS9JIRQd9jx16SapWdNDPD90Y9ZJUpeig7zHmJanayKCPiK9HxJGIeKav7N9HxGsR8ZP8urtv3RciYn9E7IuIX7paDe/+rDxj0ktSpYX06L8BfGJA+VdSSrfn13cBIuJ9wGeA9+f3/JeIaC5XYy/lBVOSNNrIoE8p/RB4c4Gfdw/w+ymlCymll4H9wIeX0L4F8YIpSaq2lDH6fxkRT+ehnY257Gbg1b46B3LZVeEFU5I02mKD/iHgXcDtwEHgS7l80FjKwBiOiPsjYm9E7D169OiiGuGDRyRptEUFfUrpcEqpnVLqAP+V+eGZA8COvqrbgdcrPuPhlNLulNLuzZs3L6YZXjAlSQuwqKCPiG19i/8I6J2R8xjwmYiYiIhbgV3Aj5fWxNEcupGkaq1RFSLim8BHgU0RcQD4IvDRiLid7qjJK8C/AEgpPRsRjwI/BWaBB1JK7avT9P6hG5NekqqMDPqU0r0Dir82pP6vAb+2lEYtlF/GStJoZV8Z6xC9JI1UdtBnduglqVrRQT931o1jN5JUqeyg9zx6SRqp7KBf6QZIUgGKDvoeR24kqVrRQd+7e6UPHpGkamUHfZ4a85JUreygd5BekkYqOuh7HLmRpGpFB33vPHpzXpKqFR3089dLGfWSVKXooHeMXpJGKzroJUmjFR303qZYkkYrO+h7F0z5dawkVSo76Fe6AZJUgKKDvsehG0mqVnTQe5tiSRqt7KDvXTBl0ktSpbKD3kF6SRqp6KDv8awbSapWj6A35yWpUtFB33DsRpJGKjzou9NOxy69JFUpOuh7V8aa85JUreigb8ydR2/SS1KVooPeHr0kjVZ00EP3XHofPCJJ1YoP+kYEHYNekirVIOg9j16Shik+6CPCMXpJGqL8oMcxekkapvigb0R4cqUkDVGDoPfKWEkaZmTQR8TXI+JIRDzTV3ZDRDweES/m6cZcHhHx1YjYHxFPR8QdV7Px0Dvr5mr/FEkq10J69N8APnFJ2YPAEymlXcATeRngk8Cu/LofeGh5mjlE4OmVkjTEyKBPKf0QePOS4nuAR/L8I8Cn+sp/J3X9CNgQEduWq7GDeAdLSRpusWP0W1NKBwHydEsuvxl4ta/egVx21TTs0UvSUMv9Zeyg7vXAFI6I+yNib0TsPXr06OJ/oFfGStJQiw36w70hmTw9kssPADv66m0HXh/0ASmlh1NKu1NKuzdv3rzIZnhlrCSNstigfwzYk+f3AN/uK/9sPvvmTuBEb4jnavHKWEkarjWqQkR8E/gosCkiDgBfBH4deDQi7gN+Dnw6V/8ucDewHzgLfO4qtPkiDe9eKUlDjQz6lNK9Fas+NqBuAh5YaqOuROAYvSQNU4srY815SapWfNA7Ri9Jw9Ug6B2jl6Rhig96714pScPVIOi9MlaShqlB0DtGL0nDFB/03r1SkoYrPugbERV305EkQS2C3h69JA1TfNB7ZawkDVd+0HtlrCQNVXzQe9aNJA1XftA3vDJWkoYpPugdo5ek4YoP+oZnV0rSUMUHvXevlKThahD0jtFL0jDFB30jwtMrJWmIGgS9V8ZK0jDFB313jN6gl6Qq5Qc9+GWsJA1RfNB790pJGq78oG84Ri9JwxQf9F4ZK0nDFR/0jUbQNuclqVLxQd9qBB2/jZWkSsUHfbMRzBr0klSp+KBvNYJ2p7PSzZCka1bxQW+PXpKGKz7ouz16g16SqhQf9M1Gg1lPu5GkSsUHvT16SRqu+KBvNh2jl6Rhig96z7qRpOGKD3rPupGk4VpLeXNEvAKcAtrAbEppd0TcAPwBsBN4BfjHKaXjS2tmNcfoJWm45ejR/52U0u0ppd15+UHgiZTSLuCJvHzVNBsNe/SSNMTVGLq5B3gkzz8CfOoq/Iw5zQb26CVpiKUGfQL+PCKeioj7c9nWlNJBgDzdssSfMVSz0aDdSSRvVSxJAy1pjB74SErp9YjYAjweEc8v9I35wHA/wC233LLoBrQaAXR79a1mLPpzJKmultSjTym9nqdHgD8GPgwcjohtAHl6pOK9D6eUdqeUdm/evHnRbWj2gt4evSQNtOigj4jVEbG2Nw98HHgGeAzYk6vtAb691EYO09+jlyRdbilDN1uBP46I3uf895TSn0XEXwCPRsR9wM+BTy+9mdV6PXrPvJGkwRYd9CmlnwEfHFD+BvCxpTTqSvR69N7YTJIGK/7K2PFWE4DpWW+DIEmDFB/0k2PdTTg/017hlkjStakGQd/t0Z+fNeglaZAaBH2vR+/QjSQNUn7Q5zF6h24kabDig35izKCXpGGKD3qHbiRpuBoEfbdHf8EvYyVpoOKDfioH/dlpg16SBik+6DeuGgfg+NnpFW6JJF2big/6qfEmU2NN3jxt0EvSIMUHPcANq8d584xBL0mD1CLot6yb4PUT51a6GZJ0TapF0L9361qeP3SKjrcqlqTL1CLo77ztRt46O8Nv/mA/R06dX+nmSNI1ZanPjL0m3P2Bbfzp/3udLz3+Al96/AW2rJ3g/e9Yxy/evJ73v2Md73/Hem7eMEWj4TNlJV1/ahH0460Gv71nN8+8dpIfv/Imz75+gmdfO8kPXzw294jBVeNNdm1dy3u3ruE9W9fynq1ree9Na9mydoL8lCxJqqVaBD1ARPCB7ev5wPb1c2XnZ9rsO3SKnx48yQuHT/HC4VN8//mjPLr3wFyd9VNjvGfrGt69ZS23bVrNrZtWc+vm1ezYuIrxVi1GtiRd52oT9INMjjX54I4NfHDHhovK3zh9gRcOn+aFw6fYd/gU+w6d4n8+c5C3zs7M1Wk2gh0bp9iZw797EFjDLTesYtuGScaaHgQklaHWQV/lxjUT3LVmgrvedeNF5cfPTPPyG2d4+egZXj42/3ryZ29yru/umI2AbeunuHnjFNs3TrF94yp25On2jVNsWz9JywOBpGvEdRn0VTauHmfj6nHuuGXjReUpJQ6fvMDPjp3mwJvnOHD8LAeOn+PA8XP86KU3OHjyNVLfmZ3NRnDTukm2rpvgpvWTbF03yU3rJi+b792QTZKuJoN+ASKCm9Z3w5l3Xb5+erbDwRPncvh3DwKvHT/HoZPnef7QKf7XvqOcGXDTtfVTY90DwvpJNq0ZZ9OaCW5cnadr5qc3rp7w+wJJi2bQL4PxVoN33riad964urLOqfMzHD55nkMnLnDo5Pk8f55DJ89z5OR5XjpymmOnL3BhdvB99ddNtti0ZmIu/DeuHmfD1BgbVo2xYWq8O12Vp1NjrF81xkTLvxgkGfRvm7WTY6ydHOPdW9ZW1kkpcWa6zRunL3Ds9AWOnZ7mjdPT88tnuvMvHjnNW2eneevsDLNDrgaeGmuyYdUY6/sOCOumWqyZGGPNZIu1Ey3WTrZYM9lizUQrt7E332L1eMtrD6QaMOivIRHBmolu0A7766AnpcTpC7O8dXaGE+dmeOvsDG+dm+5bns5lM5w4O8NLR09z+sIsp8/PcurC7ILa1GvP2skWqydarBpvsmq8ydR4i1Vjze7dQ8ebc/Orxlt5fXO+7lj/+5pMjjVpNcLrF6S3iUFfsIiY+0thxxW+t9NJnJme5fSFWU6d777mDgLnZ+bKu9P55XPTbd46O8O5mTZnp7vL52bazLSv7D5DjYCJVpPJsQYTrSYTYw0mWnm+1cjLeb41X2dy7PKy8WaDsWaDsVaDsUYw1mzQaka3vNWglcvG++a7r8jv6c43Pfiopgz661SjMX+Q2LZ+dP1RZtodzk63OTfdPQCczQeAbll3ubf+/EybC7MdLszm6UzffC4/P9PhxLmZvO7iuudn2xed5bRcIpgL/VY+GIzn+VajeyDovS5fbly2vtFfL4JWc36+2egejBox6LPm5xvR/ZxG0J2P7gG+N9+IIIL5uhXrG72yRt/8XN35sl7dZmPI+t46ussBcMlyRORpt5ygcl3v2BrDPsMD8JIY9FoWY80G66carJ8au+o/K6XEbCfl4G9zfrbDbLvDTDsx0+4w205Mtztz8zPtDtN98zN9dXvzs3l+umJ+tpPopMRsO9HupMuWz7XbzHYS7U6HdgfanfyeXLfd97q0fLbTwRuvLszQAwnVBwv6lwd8BvQObJccfOZ+blzWjoumuWbV++bePWD9Z/76Dv75375tSb+XUQx6FSciusMuzQZrJurxTzilSw4gnUS73Z3vpO76TiIvJ9LcPLQ7aej67nKi3Rm+vtO3vtO3PvXKOsy9t52nid4USInUnZD65/P2Mbec+srnl3u/h0HrLvv8AZ9B388a+vlVn9G3Tb22Qv7ZFy1fvGJ+faqoP3h9b2bTmokr+reyGPX4XyIVLvLQjmfE6mrwKhxJqjmDXpJqzqCXpJoz6CWp5gx6Sao5g16Sas6gl6SaM+glqeaid7XWijYi4ijwV4t8+ybg2DI2pwRu8/XBbb4+LGWb35lS2jyq0jUR9EsREXtTSrtXuh1vJ7f5+uA2Xx/ejm126EaSas6gl6Saq0PQP7zSDVgBbvP1wW2+Plz1bS5+jF6SNFwdevSSpCGKDvqI+ERE7IuI/RHx4Eq3Z7lExI6I+EFEPBcRz0bE53P5DRHxeES8mKcbc3lExFfz7+HpiLhjZbdgcSKiGRH/NyK+k5dvjYgn8/b+QUSM5/KJvLw/r9+5ku1eiojYEBF/GBHP5/19V533c0T86/xv+pmI+GZETNZxP0fE1yPiSEQ801d2xfs1Ivbk+i9GxJ7FtqfYoI+IJvCbwCeB9wH3RsT7VrZVy2YW+NWU0i8AdwIP5G17EHgipbQLeCIvQ/d3sCu/7gceevubvCw+DzzXt/wfga/k7T0O3JfL7wOOp5TeDXwl1yvVfwb+LKX014AP0t3+Wu7niLgZ+FfA7pTSLwJN4DPUcz9/A/jEJWVXtF8j4gbgi8DfAD4MfLF3cLhiKT9GrLQXcBfwvb7lLwBfWOl2XaVt/Tbw94F9wLZctg3Yl+d/C7i3r/5cvVJewPb8j//vAt+h+1jNY0Dr0v0NfA+4K8+3cr1Y6W1YxDavA16+tO113c/AzcCrwA15v30H+KW67mdgJ/DMYvcrcC/wW33lF9W7klexPXrm/9H0HMhltZL/XP0Q8CSwNaV0ECBPt+Rqdfhd/Abwb4BOXr4ReCulNJuX+7dpbnvz+hO5fmluA44C/y0PWf12RKympvs5pfQa8J+AnwMH6e63p6j/fu650v26bPu75KCPAWW1OoUoItYAfwT8Skrp5LCqA8qK+V1ExD8AjqSUnuovHlA1LWBdSVrAHcBDKaUPAWeY/3N+kKK3Ow873APcCrwDWE132OJSddvPo1Rt57Jtf8lBfwDY0be8HXh9hdqy7CJijG7I/15K6Vu5+HBEbMvrtwFHcnnpv4uPAP8wIl4Bfp/u8M1vABsiovcA+/5tmtvevH498Obb2eBlcgA4kFJ6Mi//Id3gr+t+/nvAyymloymlGeBbwN+k/vu550r367Lt75KD/i+AXfkb+3G6X+o8tsJtWhYREcDXgOdSSl/uW/UY0PvmfQ/dsfte+Wfzt/d3Aid6fyKWIKX0hZTS9pTSTrr78fsppX8C/AD45Vzt0u3t/R5+OdcvrqeXUjoEvBoR781FHwN+Sk33M90hmzsjYlX+N97b3lrv5z5Xul+/B3w8Ijbmv4Y+nsuu3Ep/YbHELzvuBl4AXgL+3Uq3Zxm362/R/RPtaeAn+XU33fHJJ4AX8/SGXD/onoH0EvCXdM9qWPHtWOS2fxT4Tp6/DfgxsB/4H8BELp/My/vz+ttWut1L2N7bgb15X/8JsLHO+xn4D8DzwDPA7wITddzPwDfpfg8xQ7dnft9i9ivwz/L27wc+t9j2eGWsJNVcyUM3kqQFMOglqeYMekmqOYNekmrOoJekmjPoJanmDHpJqjmDXpJq7v8DfpMSahQmL64AAAAASUVORK5CYII=\n", 340 | "text/plain": [ 341 | "
" 342 | ] 343 | }, 344 | "metadata": {}, 345 | "output_type": "display_data" 346 | } 347 | ], 348 | "source": [ 349 | "plt.plot(errors)\n", 350 | "plt.show()" 351 | ] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.6.5" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 2 375 | } 376 | -------------------------------------------------------------------------------- /4 - Multinomial Logistic Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "np.random.seed(1)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "x0s = np.random.normal(loc=3, size=(1000, 2)) # examples for class 0\n", 29 | "y0s = np.zeros(1000) # labels for class 0\n", 30 | "x1s = np.random.normal(loc=6, size=(1000, 2)) # examples for class 1\n", 31 | "y1s = np.ones(1000) # labels for class 1\n", 32 | "x2s = np.random.normal(loc=9, size=(1000, 2)) # examples for class 2\n", 33 | "y2s = np.full(1000, 2.) # labels for class 2" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "xs = np.concatenate((x0s, x1s, x2s)) #all examples\n", 43 | "ys = np.concatenate((y0s, y1s, y2s)) #all labels" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "image/png": "\n", 54 | "text/plain": [ 55 | "
" 56 | ] 57 | }, 58 | "metadata": {}, 59 | "output_type": "display_data" 60 | } 61 | ], 62 | "source": [ 63 | "plt.scatter(xs[:,0], xs[:,1], c=ys, alpha=0.25)\n", 64 | "plt.show()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "$e_n = - \\sum^M_{m=1} y_{n,m} \\log(\\hat{y}_{n,m})$" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 6, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def calc_error(y_hat, y, eps=1e-10):\n", 81 | " \"\"\"\n", 82 | " Cross-entropy loss between labels, y, and predictions, y_hat \n", 83 | " \"\"\"\n", 84 | " error = - np.sum([_y*np.log(_y_hat + eps) for (_y, _y_hat) in zip(y, y_hat)])\n", 85 | " \n", 86 | " return error" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 7, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "0.510825623599324\n", 99 | "0.2231435511892097\n", 100 | "0.10536051554671516\n", 101 | "-1.000000082690371e-10\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "print(calc_error([0.2, 0.2, 0.6], [0, 0, 1]))\n", 107 | "\n", 108 | "print(calc_error([0.1, 0.1, 0.8], [0, 0, 1]))\n", 109 | "\n", 110 | "print(calc_error([0.05, 0.05, 0.9], [0, 0, 1]))\n", 111 | "\n", 112 | "print(calc_error([0, 0, 1], [0, 0, 1]))" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 8, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "def softmax(x):\n", 122 | " \"\"\"Compute softmax values for each sets of scores in x.\"\"\"\n", 123 | " return np.exp(x) / np.sum(np.exp(x))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 9, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "array([0.09003057, 0.24472847, 0.66524096])" 135 | ] 136 | }, 137 | "execution_count": 9, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "softmax([1, 2, 3])" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stderr", 153 | "output_type": "stream", 154 | "text": [ 155 | "/home/ben/miniconda3/envs/pytorch04/lib/python3.6/site-packages/ipykernel_launcher.py:3: RuntimeWarning: overflow encountered in exp\n", 156 | " This is separate from the ipykernel package so we can avoid doing imports until\n", 157 | "/home/ben/miniconda3/envs/pytorch04/lib/python3.6/site-packages/ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in true_divide\n", 158 | " This is separate from the ipykernel package so we can avoid doing imports until\n" 159 | ] 160 | }, 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "array([nan, nan, nan])" 165 | ] 166 | }, 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "softmax([1000, 2000, 3000])" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "https://stackoverflow.com/questions/42599498/numercially-stable-softmax\n", 181 | "https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 11, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "def softmax(x):\n", 191 | " \"\"\"Compute softmax values for each sets of scores in x.\"\"\"\n", 192 | " e_x = np.exp(x - np.max(x))\n", 193 | " return e_x / np.sum(e_x)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 12, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "array([0.09003057, 0.24472847, 0.66524096])" 205 | ] 206 | }, 207 | "execution_count": 12, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "softmax([1, 2, 3])" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 13, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "array([0., 0., 1.])" 225 | ] 226 | }, 227 | "execution_count": 13, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "softmax([1000, 2000, 3000])" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 14, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "w = np.random.normal(size=(3,2))\n", 243 | "b = np.random.normal(size=(3,1))" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 17, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "def predict(w, b, x):\n", 253 | " pred_vals = np.matmul(w, x)\n", 254 | " pred_vals = softmax(pred_vals)\n", 255 | " pred_class = np.argmax(pred_vals)\n", 256 | " return pred_vals, pred_class" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 21, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "(array([4.13837282e-08, 1.67298610e-04, 9.99832660e-01]), 2)" 268 | ] 269 | }, 270 | "execution_count": 21, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "predict(w, b, xs[0])" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 38, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "def predicts(w, b, xs):\n", 286 | " pred_vals, pred_classes = map(np.array, zip(*[predict(w, b, x) for x in xs]))\n", 287 | " return pred_vals, pred_classes" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 40, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "y_hats, c_hats = predicts(w, b, xs)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 41, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "1005\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "n_correct = (c_hats == ys).sum()\n", 314 | "\n", 315 | "print(n_correct)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 42, 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "name": "stdout", 325 | "output_type": "stream", 326 | "text": [ 327 | "0.335\n" 328 | ] 329 | } 330 | ], 331 | "source": [ 332 | "accuracy = n_correct / len(c_hats)\n", 333 | "\n", 334 | "print(accuracy)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "https://deepnotes.io/softmax-crossentropy\n", 342 | "https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/\n", 343 | "http://cs231n.github.io/convolutional-networks/\n", 344 | "\n", 345 | "\\begin{align*}\n", 346 | "z &= w_0 x_0 + w_1 x_1 + b\\\\\n", 347 | "\\hat{y} &= \\sigma (z)\\\\\n", 348 | "\\sigma (z_j) &= \\frac{e^{z_j}}{\\sum^K_{k=1}e^{z_k}} \\\\\n", 349 | "e_n &= - \\sum^M_{m=1} y_{n,m} \\log(\\hat{y}_{n,m}) \\\\\n", 350 | " &= - (y_{n,0} \\log(\\hat{y}_{n,0}) + y_{n,1} \\log(\\hat{y}_{n,1}) + y_{n,2} \\log(\\hat{y}_{n,2}))\n", 351 | "\\end{align*}\n", 352 | "\n", 353 | "Derivative of softmax:\n", 354 | "\n", 355 | "$$ \\sigma (z_j) = \\frac{e^{z_j}}{\\sum^K_{k=1}e^{z_k}} $$\n", 356 | "\n", 357 | "$$\\frac{d\\sigma (z_j)}{d z_j} = \\frac{d}{d z_j} \\frac{e^{z_j}}{\\sum^K_{k=1}e^{z_k}} $$\n", 358 | "\n", 359 | "When $i = j$:\n", 360 | "\n", 361 | "$$ \\frac{d}{d z_j} \\sigma (z_j) = \\frac{e^{z_j}}{\\sum^K_{k=1}e^{z_k}} = \\frac{e^{z_i} \\sum^K_{k=1} e^{z_k} - e^{z_j}e^{z_i}}{(\\sum^K_{k=1} e^z_k)^2} $$ \n", 362 | "\n", 363 | "$$=\\frac{e^{z_i}(\\sum^K_{k=1}e^{z_k}-e^{z_j})}{(\\sum^K_{k=1}e^{z_k})^2}$$\n", 364 | "\n", 365 | "$$=\\frac{e^{z_j}}{\\sum^K_{k=1}e^{z_k}} \\frac{(\\sum^K_{k=1}e^{z_k}-e^{z_j})}{\\sum^K_{k=1}e^{z_k}}$$\n", 366 | "\n", 367 | "$$=p_i(1-p_j)$$\n", 368 | "\n", 369 | "When $i \\neq j$:\n", 370 | "\n", 371 | "\n", 372 | "$$ \\frac{d}{d z_j} \\sigma (z_j) = \\frac{e^{z_j}}{\\sum^K_{k=1}e^{z_k}} = \\frac{0 - e^{z_j}e^{z_i}}{(\\sum^K_{k=1} e^z_k)^2} $$ \n", 373 | "\n", 374 | "$$=\\frac{-e^{z_j}}{\\sum^K_{k=1}e^{z_k}} \\frac{(\\sum^K_{k=1}e^{z_i})}{\\sum^K_{k=1}e^{z_k}}$$\n", 375 | "\n", 376 | "$$=-p_j*p_i$$\n", 377 | "\n", 378 | "For loss function:\n", 379 | "\n", 380 | "$$L= - \\sum_iy_i\\log(p_i)$$\n", 381 | "\n", 382 | "$$=\\frac{d L}{\\hat{y}_i} = -\\sum^ky_k \\frac{d \\log (p_k)}{d \\hat{y}_i}$$\n", 383 | "\n", 384 | "$$=-\\sum_ky_k\\frac{d \\log(p_k)}{d p_k} \\times \\frac{d p_k}{\\hat{y_i}}$$\n", 385 | "\n", 386 | "\n", 387 | "$$=- \\sum y_k \\frac{1}{p_k} \\times \\frac{d p_k}{d o_i}$$\n", 388 | "\n", 389 | "From the softmax derivation earlier:\n", 390 | "\n", 391 | "$$\\frac{d L}{d o_i} = -y_i(1-p_i)-\\sum_{k \\neq i} y_k \\frac{1}{p_k}(-p_kp_i)$$\n", 392 | "\n", 393 | "$$" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 44, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "def gradient_step(w, b, xs, ys, eta=0.1):\n", 403 | " \"\"\"\n", 404 | " Performs one gradient descent step using all of the data points\n", 405 | " pred_w0 (float): predicted w0 value\n", 406 | " pred_w1 (float): predicted w1 value\n", 407 | " pred_b (float): predicted b value\n", 408 | " xs (ndarray[float,float]): x values for data points\n", 409 | " ys (ndarray[float]): y values for data points\n", 410 | " eta (float): learning rate\n", 411 | " \"\"\"\n", 412 | " \n", 413 | " N = len(xs) #number of examples, used to average gradients\n", 414 | " grads_w0 = [0, 0, 0]\n", 415 | " grads_w1 = [0, 0, 0]\n", 416 | " grads_b = [0, 0, 0]\n", 417 | " \n", 418 | " for x, y in zip(xs, ys):\n", 419 | " y_hat, _ = predict(w, b, x) #get prediction\n", 420 | " \n", 421 | " #de = ? #derivative of error w.r.t. prediction\n", 422 | " #dz = ? #derivative of softmax\n", 423 | " \n", 424 | " \n", 425 | " #de = (y_hat - y) / (y_hat * (1 - y_hat)) #derivative of error w.r.t. prediction \n", 426 | " #dz = y_hat * (1 - y_hat) #derivative of sigmoid\n", 427 | " dedz = y_hat - y #simplified equation for the two derivatives above\n", 428 | " dw0 = x[0] #derivative of sigmoid w.r.t w0\n", 429 | " dw1 = x[1] #derivative of sigmoid w.r.t w1\n", 430 | " grad_w0 += (1/N) * dedz * dw0 #summing averaged gradients for w0\n", 431 | " grad_w1 += (1/N) * dedz * dw1 #summing averaged gradients for w1\n", 432 | " grad_b += (1/N) * dedz #summing averaged gradients for b\n", 433 | " \n", 434 | " new_w0 = pred_w0 - (eta * grad_w0) #parameter update for w0\n", 435 | " new_w1 = pred_w1 - (eta * grad_w1) #parameter update for w1\n", 436 | " new_b = pred_b - (eta * grad_b) #parameter update for b\n", 437 | " \n", 438 | " return new_w0, new_w1, new_b" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 45, 444 | "metadata": {}, 445 | "outputs": [ 446 | { 447 | "name": "stdout", 448 | "output_type": "stream", 449 | "text": [ 450 | "[4.13837282e-08 1.67298610e-04 9.99832660e-01]\n" 451 | ] 452 | }, 453 | { 454 | "ename": "AssertionError", 455 | "evalue": "", 456 | "output_type": "error", 457 | "traceback": [ 458 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 459 | "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", 460 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mgradient_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 461 | "\u001b[0;32m\u001b[0m in \u001b[0;36mgradient_step\u001b[0;34m(w, b, xs, ys, eta)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0my_hat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#get prediction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_hat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;31m#de = (y_hat - y) / (y_hat * (1 - y_hat)) #derivative of error w.r.t. prediction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;31m#dz = y_hat * (1 - y_hat) #derivative of sigmoid\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 462 | "\u001b[0;31mAssertionError\u001b[0m: " 463 | ] 464 | } 465 | ], 466 | "source": [ 467 | "gradient_step(w, b, xs, ys)" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [] 476 | } 477 | ], 478 | "metadata": { 479 | "kernelspec": { 480 | "display_name": "Python 3", 481 | "language": "python", 482 | "name": "python3" 483 | }, 484 | "language_info": { 485 | "codemirror_mode": { 486 | "name": "ipython", 487 | "version": 3 488 | }, 489 | "file_extension": ".py", 490 | "mimetype": "text/x-python", 491 | "name": "python", 492 | "nbconvert_exporter": "python", 493 | "pygments_lexer": "ipython3", 494 | "version": "3.6.5" 495 | } 496 | }, 497 | "nbformat": 4, 498 | "nbformat_minor": 2 499 | } 500 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ben Trevett 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gradient Descent 2 | 3 | Let's learn gradient descent by using linear regression, logistic regression and neural networks! 4 | 5 | ## Getting Started 6 | 7 | We'll be making use of `scikit-learn`, `numpy` and `matplotlib`. You can install all three with: 8 | 9 | ``` 10 | pip install scikit-learn numpy matplotlib 11 | ``` 12 | 13 | ## Tutorials 14 | 15 | * 1 - Linear Regression from Scratch 16 | 17 | * 2 - Linear Regression with scikit-learn 18 | 19 | * 3 - Logistic Regression from Scratch 20 | 21 | * 4 - Logistic Regression with scikit-learn 22 | 23 | * 5 - Neural Networks: Forward Propagation 24 | 25 | * 6 - Neural Networks: Backward Propagation -------------------------------------------------------------------------------- /assets/gradient-descent-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentrevett/gradient-descent/9c8aef6727534021484f666c44f5a02f1a2612ba/assets/gradient-descent-1.png -------------------------------------------------------------------------------- /assets/gradient-descent-1.xml: -------------------------------------------------------------------------------- 1 | 7VdNj5swEP01HFcCnJDkmI/d9tKq0lbq2YGBWOtgakw++us7YJvPhGRXOexhc4js5/HzMO/BgEPW+9M3SbPdDxEBd3w3Ojlk4/j+bB7gfwmcNUBmUw0kkkUa8hrglf0DA7oGLVgEeSdQCcEVy7pgKNIUQtXBqJTi2A2LBe+emtEEBsBrSPkQ/cMitdPo3A8a/DuwZGdP9oKFXtnS8C2RokjNeY5P4uqnl/fUcpkLzXc0EscWRJ4dspZCKD3an9bAy9Lasul9L1dW67wlpOqeDb7ecKC8AJtxlZc621pAGi3LkuIs5DTPWeiQ1U7tOQIeDutLcHESi1S90D3jpfBrUUgGEul+wtEsGq29AOf6KIgGWjTZe3VN0Gog9qDkGUOOjShTU8ldSw+LSeBUsUOXnhpvJDVdfcIvwfBg3zU29gLDY11sVbMUOV5gCGZXu8o3iCY9HkVlAmrAg4PWVTdQpeFlPcmXniN6+l0Z/PkH9Zy4N4geJ+jkgqABx7qstjhIVFV2DbA+UOrT0T74Wwi78JRXyi0xwJ9mp2bRsoClwRQ1U5cd4cGJiLXS6hlPwUl1rSYBk6DbKqB0W1YWrCrhdOVMN4jQQoncWKyccpakOOYQl1QHkIrhU3tpYCWy0sAZDVma/C4nm6fJBaOOutoUHKnh9F4fXzGavd9bNicXbN5/vLQd3bHQiF+mY375qBUGDjtaJC9qvd2WCVrwl11u2oXMeo+TxXzgl8kFv5AH+CV4V8NIRQqP6hYx43wtuJDVKSSiMI/LVpQrKd6gtRKEc9jGlsHk5dWR9gWNIBIW8gCRWf7s7cjvdZFHdaM7mxFqSs+tMHM3Xe+ei55LZ+5oWnUeNj4Yjye9lyT7MdAYWWd8ZyfFafMarcObTxXy/B8= -------------------------------------------------------------------------------- /assets/gradient-descent-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentrevett/gradient-descent/9c8aef6727534021484f666c44f5a02f1a2612ba/assets/gradient-descent-2.png -------------------------------------------------------------------------------- /assets/gradient-descent-2.xml: -------------------------------------------------------------------------------- 1 | 7VdNc5swEP01HDMDCBPnGNtxemmnM+lMzzIsoImMqBD+6K/vCiQbAXYSTw49hINHelqe1vse+vDIcnt4lrQqvosUuBf66cEjKy8MyX2Evxo4dkAYGiCXLO2g4Ay8sL9gQN+gDUuhdgKVEFyxygUTUZaQKAejUoq9G5YJ7s5a0RxGwEtC+Rj9zVJVdOg8jM/4N2B5YWcO4oduZEOT11yKpjTzeSHJ2qcb3lLLZf5oXdBU7HsQefLIUgqhutb2sASuS2vL1r23vjB6yltCqd7zgvlDO8obsBm3eamjrQWU6aMuKfZKUSK4KNSWYy/A5il/HzuZKNWabhnXqi9FIxlI5PoBezNohA5i3WecLwUXsp2FpBTmWaIZlRSv0BuJkzlsMstg8gpOkVYfgkjSyB2kZnhcClMdSB3xTWGeQWxBySMG7M+Sz4xORU9ti0ngVLGdaxlqnJef6E4z/BQMMwl985GEvuEx38jcdxlqLGAC5qW+hAOe6A0eRWUOasSDmtJjL6zSAfXldIMHd5rw3r+a1ikPGx9fjyfxID5w4rHRZWx7PcXOUOvuaaeHH3J6wmlds+SzzP6/ezEYFJ+EN5pxSBS9z4w36Em+9LyiZzj4mD5rcXnv6nKDoNGEoDHHuiw22MhVW/YOYENA6+NoH/9phB24q1vlHjEgnFWH86BlAUuDKXZMLjvCoxkR66U1MJ6Cg3KtJgGToJs2QLvNrLYYPVt4sxUitFGiNhbTXc7yEtscMk21A6kYnk8eDaxEpQ1c0YSV+S/dWd1FU7vsNVdfdLGeDQ5XfXzBaPZ779mcTNh8uLz0He1Y6IpfZtf8cqsVRg7bW6RuTnr7PRP04C+7vGkXcj9YTh7mI79EE34hn+CXIJgwzHDH4ByvGPqMuy+YghcsmB7Z4yXHFYjWVXfvyNhBnzgX7Zm/PXv6w50l13uPaSdiq3ehLsg56Zqz9eBYvF77+GgSSVOGhR+Gf2TXsiZJkAfDrKIj+SZEvqzoLHIUDebjFSCYUDT4uKLYPV+Mug3mfPkkT/8A -------------------------------------------------------------------------------- /assets/gradient-descent-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentrevett/gradient-descent/9c8aef6727534021484f666c44f5a02f1a2612ba/assets/gradient-descent-3.png -------------------------------------------------------------------------------- /assets/gradient-descent-3.xml: -------------------------------------------------------------------------------- 1 | 7VnLctowFP0alun4HXsZSEgXbacz6UzTpbCFUSssKotXv75XtmQk2xBgSNLOkEVGOrpcXXTO0SMZ+KP55pGjxewzyzAdeE62Gfj3A88LAw9+S2BbA17i1EDOSVZD7g54In+wAnXYkmS4tAIFY1SQhQ2mrChwKiwMcc7WdtiUUXvWBcpxB3hKEe2i30kmZjUae9EO/4hJPtMzu1FSj0xQ+ivnbFmo+QaeP61+6uE50rnUFy1nKGNrA/IfBv6IMybq1nwzwlQurV62+nPjPaNN3RwX4pgPqC+0QnSJdcVVXWKr1wIX2Z1cUugVrABwOBNzCj0Xmk39DnSmrBBjNCdUsj5iS04wh1xf8FoNKqLdSPYJpSNGGa9m8TOE42kqMwrOfmFjJEpjPJnqDKout4nU/PiApEu+wpka7i6FWh2cWeSrhXnEbI4F30LAekd5qHiaGWxrjGOKBFnZkkFKeXmTrpnhKyNQiecok3iOyqM8Ejt2hhIWMMXqQyaFrTzBC3kE4jkWnTzAKdoaYQsZUO4v103sabxb52BZTR06Pjoc70eteNeKh0Zdse4ZjO2gSt39SvdOUnpKUVmS9FJi/9e16LYW3/fOFGM7UXCcGM/g07/yeYBPr2WmS20ux+4uZxAa9BAaUViX4QQauaiWvQZIG5D8WNxHv5dMD9yUFXN3EOCFi81uUGfBOg2UWGeyswPcmREwo6yW8ATeCFtqHEMRaFIFSLWp3Raiw+EgvAcELQUrlcRkl5K8gDbFU5lqhbkgcD+5U7BgCyngBUpJkX+TnfuboO+UPaTqvSqWs+HNQR3vEZr2uyFzv0fm7e3FVLQloQN6CQ/p5VwpdBS21ki5bPh2DBEY8FUuL8rFv21tJ0nc0UvQoxf/AnpprtzGAQG7tb5RqrttyubymKhW3SAEb4h4VrBs/9DtAqp4Njt6xDpcfmIhtmqhJXMAMS5mLGcFop9YRYh76iHUc9ttHT+9bNVbv4L046raxRUW91N42pF06vXSD1xLGW582euf/qYH7wuUwgNTqmA9IwI/gV3kyBqeuLYaULmoX51TspEMDKsXX8VFh/pc3jxU2xSX/c5R6ms9isZjB35kEo4yAkS2w0+Ri94iUsgDYVoyHfP2iGa/n8OglzXTz26Pn91L+NntEtjjZ9Mkp5j7Q/jO9j7T0XGPo5N3cbS+ouu93rmwo+Oro1/B0UHrhHaCt3O0/6qO/j/9nHT9XAv9rf3sJYmtDO/Cfk6ufn4FP+tVbFgL387Px/zN7croiYw2bybNaM+b+0KMQnf3r4ja0rt/9/gPfwE= -------------------------------------------------------------------------------- /assets/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentrevett/gradient-descent/9c8aef6727534021484f666c44f5a02f1a2612ba/assets/sigmoid.png --------------------------------------------------------------------------------