├── .gitignore ├── 01-linear-regression.ipynb ├── 02-logistic-regression.ipynb ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /01-linear-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Linear Regression -- Weight Confidence Intervals" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "image/png": "\n", 18 | "text/plain": [ 19 | "
" 20 | ] 21 | }, 22 | "metadata": { 23 | "needs_background": "light" 24 | }, 25 | "output_type": "display_data" 26 | } 27 | ], 28 | "source": [ 29 | "import matplotlib.pyplot as plt\n", 30 | "%matplotlib inline\n", 31 | "\n", 32 | "from mlxtend.plotting import scatterplotmatrix\n", 33 | "from sklearn.linear_model import LinearRegression\n", 34 | "from sklearn.preprocessing import StandardScaler\n", 35 | "from scipy import stats\n", 36 | "import numpy as np\n", 37 | "\n", 38 | "\n", 39 | "# https://en.wikipedia.org/wiki/Simple_linear_regression#Confidence_intervals\n", 40 | "# This data set gives average masses for women as a function of their height in a sample of American women of age 30–39. \n", 41 | "\n", 42 | "height_in_m = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83]\n", 43 | "mass_in_kg = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46]\n", 44 | "\n", 45 | "np.random.seed(0)\n", 46 | "rand1 = np.random.normal(size=len(height_in_m), scale=10, loc=5)\n", 47 | "rand2 = np.random.normal(size=len(height_in_m))\n", 48 | "\n", 49 | "X_train = np.array([(i, j, k) for i, j, k in zip(height_in_m, rand1, rand2)])\n", 50 | "y_train = np.array(mass_in_kg)\n", 51 | "\n", 52 | "sc_features = StandardScaler()\n", 53 | "sc_target = StandardScaler()\n", 54 | "\n", 55 | "X_std = sc_features.fit_transform(X_train)\n", 56 | "y_std = sc_target.fit_transform(y_train.reshape(-1, 1)).flatten()\n", 57 | "\n", 58 | "scatterplotmatrix(X_std, names=['Height','Rand 1', 'Rand 2'], \n", 59 | " figsize=(6, 5))\n", 60 | "plt.tight_layout()\n", 61 | "plt.show()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Weight coefficients" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEECAYAAAAlEzNMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAVQklEQVR4nO3dfbRldX3f8feHGREIIg8zBMvMMNAOSabG8DBiKGkFQQOmZZJqGqZieFphpYUgoWlDo6UJriSG1FhdQSM1GmIMRIzV0WDRWELSAIbhUWYQnSDIBCujImJBYeTbP/YeOJw5994zw933zJ39fq111t0Pv73P99zfrPmc/fDbN1WFJKm/dpt0AZKkyTIIJKnnDAJJ6jmDQJJ6ziCQpJ4zCCSp5xZOuoDttWjRolq+fPmky5CkeeXWW2/9elUtHrVu3gXB8uXLWbdu3aTLkKR5JckDU63z1JAk9ZxBIEk9ZxBIUs8ZBJLUc50FQZL3J3k4yd1TrE+SdyXZmOSuJEd1VYskaWpdHhH8EXDyNOtPAVa0r3OB93RYiyRpCp0FQVX9NfDNaZqsBv64GjcD+yZ5SVf1SJJGm+Q1goOBBwfmN7XLJElzaJIDyjJi2ci/kpPkXJrTRyxbtmyH33D5xX+xw9tqeve/7acmXYKkHTTJI4JNwNKB+SXAQ6MaVtUVVbWqqlYtXjxyhLQkaQdNMgjWAj/f3j3048CjVfXVCdYjSb3U2amhJFcBxwOLkmwC/ivwAoCq+gPgWuC1wEbgceCsrmqRJE2tsyCoqjUzrC/gvK7eX5I0HkcWS1LPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs91GgRJTk5yb5KNSS4esX5ZkuuT3J7kriSv7bIeSdK2OguCJAuAy4FTgJXAmiQrh5q9BfhwVR0JnAa8u6t6JEmjdXlEcAywsaruq6ongauB1UNtCtinnX4x8FCH9UiSRugyCA4GHhyY39QuG/TrwOlJNgHXAr80akdJzk2yLsm6zZs3d1GrJPVWl0GQEctqaH4N8EdVtQR4LfDBJNvUVFVXVNWqqlq1ePHiDkqVpP7qMgg2AUsH5pew7amfc4APA1TVTcAewKIOa5IkDekyCG4BViQ5NMnuNBeD1w61+QpwIkCSH6EJAs/9SNIc6iwIqmoLcD5wHXAPzd1B65NcmuTUttl/AH4hyZ3AVcCZVTV8+kiS1KGFXe68qq6luQg8uOySgekNwHFd1iBJmp4jiyWp5wwCSeo5g0CSes4gkKSeMwgkqecMAknqOYNAknrOIJCknjMIJKnnDAJJ6jmDQJJ6ziCQpJ4zCCSp5wwCSeo5g0CSes4gkKSeMwgkqecMAknqOYNAknrOIJCknjMIJKnnDAJJ6jmDQJJ6ziCQpJ4zCCSp5wwCSeo5g0CSem6sIEjj9CSXtPPLkhzTbWmSpLkw7hHBu4FjgTXt/GPA5Z1UJEmaUwvHbPeKqjoqye0AVfVIkt07rEuSNEfGPSJ4KskCoACSLAae7qwqSdKcGTcI3gX8T+DAJL8J/B/gtzqrSpI0Z8YKgqr6EPCfgN8Gvgr8dFVdM9N2SU5Ocm+SjUkunqLNv0myIcn6JH+6PcVLkp6/aa8RJNl/YPZh4KrBdVX1zWm2XUBzQfnVwCbgliRrq2rDQJsVwH8GjmuvOxy4Yx9DkrSjZrpYfCvNdYEAy4BH2ul9ga8Ah06z7THAxqq6DyDJ1cBqYMNAm18ALq+qRwCq6uEd+AySpOdh2lNDVXVoVR0GXAf8q6paVFUHAP8S+OgM+z4YeHBgflO7bNDhwOFJ/jbJzUlO3r7yJUnP17gXi19eVddunamqTwGvnGGbjFhWQ/MLgRXA8TRjFN6XZN9tdpScm2RdknWbN28es2RJ0jjGDYKvJ3lLkuVJDknyZuAbM2yzCVg6ML8EeGhEm49X1VNV9WXgXppgeI6quqKqVlXVqsWLF49ZsiRpHOMGwRpgMc0tpB8DDuTZUcZTuQVYkeTQdvDZacDaoTYfA04ASLKI5lTRfWPWJEmaBWONLG7vDnrT9uy4qrYkOZ/m+sIC4P1VtT7JpcC6qlrbrntNkg3A94H/WFUzHWlIkmbRWEGQ5Hq2Pb9PVb1quu3a6wrXDi27ZGC6gIvalyRpAsZ91tCvDEzvAbwO2DL75UiS5tq4p4ZuHVr0t0lu6KAeSdIcG/fU0OAI492Ao4GDOqlIkjSnxj01NDjCeAvwZeCcroqSJM2dcYPgR6rqu4MLkrywg3okSXNs3HEEN45YdtNsFiJJmoyZnj56EM3zgfZMciTPPjZiH2CvjmuTJM2BmU4N/SRwJs3jIX5vYPljwK91VJMkaQ5NGwRVdSVwZZLXVdWfz1FNkqQ5NNOpodOr6k+A5Um2Gf1bVb83YjNJ0jwy06mhH2h/7t11IZKkyZjp1NB725+/MTflSJLm2rgjixfT/FnJ5YPbVNXZ3ZQlSZor4w4o+zjwN8Bf0jwuWpK0ixg3CPaqql/ttBJJ0kSMO7L4k0le22klkqSJGDcI3kQTBk8k+XaSx5J8u8vCJElzY9y/R/CirguRJE3GuHcNHTVi8aPAA1XlXyqTpHls3IvF7waOAj7fzv8ocCdwQJJfrKpPd1GcJKl7414juB84sqqOrqqjgSOAu4GTgMs6qk2SNAfGDYIfrqr1W2eqagNNMNzXTVmSpLky7qmhe5O8B7i6nf854IvtXyl7qpPKJElzYtwjgjOBjcCFwC8D97XLngJO6KIwSdLcGPf20SeAt7evYd+Z1YokSXNq3NtHVwC/DawE9ti6vKoO66guSdIcGffU0AeA9wBbaE4F/THwwa6KkiTNnXGDYM+q+iyQqnqgqn4deFV3ZUmS5sq4dw19N8luwJeSnA/8A3Bgd2VJkubKuEcEFwJ7ARcARwNvBM7oqihJ0twZ966hW9rJ7wBndVeOJGmuTRsESdZOt76qTp3dciRJc22mI4JjgQeBq4DPAem8IknSnJrpGsFBwK8BLwXeCbwa+HpV3VBVN8y08yQnJ7k3ycYkF0/T7vVJKsmq7SlekvT8TRsEVfX9qvpfVXUG8OM0j5n4qyS/NNOOkywALgdOoRmItibJyhHtXkRzEfpzO1C/JOl5mvGuoSQvTPKvgT8BzgPeBXx0jH0fA2ysqvuq6kmaB9atHtHurTSPsv7u2FVLkmbNtEGQ5ErgRpo/SvMbVfXyqnprVf3DGPs+mOb6wlab2mWD+z8SWFpVn5yhjnOTrEuybvPmzWO8tSRpXDNdLH4j8P+Aw4ELkmeuFQeoqtpnmm1HXViuZ1Y2A9TeQfMU02lV1RXAFQCrVq2qGZpLkrbDtEFQVeMOOBtlE7B0YH4J8NDA/ItoLkL/VRswBwFrk5xaVeuex/tKkrbD8/mPfia3ACuSHJpkd+A04JlxCVX1aFUtqqrlVbUcuBkwBCRpjnUWBFW1BTgfuA64B/hwVa1PcmkSB6JJ0k5i3IfO7ZCquha4dmjZJVO0Pb7LWiRJo3V5akiSNA8YBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST3XaRAkOTnJvUk2Jrl4xPqLkmxIcleSzyY5pMt6JEnb6iwIkiwALgdOAVYCa5KsHGp2O7Cqql4GfAS4rKt6JEmjdXlEcAywsaruq6ongauB1YMNqur6qnq8nb0ZWNJhPZKkEboMgoOBBwfmN7XLpnIO8KkO65EkjbCww31nxLIa2TA5HVgFvHKK9ecC5wIsW7ZstuqTJNHtEcEmYOnA/BLgoeFGSU4C3gycWlXfG7WjqrqiqlZV1arFixd3Uqwk9VWXQXALsCLJoUl2B04D1g42SHIk8F6aEHi4w1okSVPoLAiqagtwPnAdcA/w4apan+TSJKe2zX4X2Bu4JskdSdZOsTtJUke6vEZAVV0LXDu07JKB6ZO6fH9J0swcWSxJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzCyddgDSd5Rf/xaRL2GXd/7af6mS/9ll3uuozjwgkqec6DYIkJye5N8nGJBePWP/CJH/Wrv9ckuVd1iNJ2lZnQZBkAXA5cAqwEliTZOVQs3OAR6rqnwDvAH6nq3okSaN1eURwDLCxqu6rqieBq4HVQ21WA1e20x8BTkySDmuSJA3p8mLxwcCDA/ObgFdM1aaqtiR5FDgA+PpgoyTnAue2s99Jcm8nFe98FjH0u9hZxWM5mEf9BfZZq099dshUK7oMglHf7GsH2lBVVwBXzEZR80mSdVW1atJ1aDz21/xjnzW6PDW0CVg6ML8EeGiqNkkWAi8GvtlhTZKkIV0GwS3AiiSHJtkdOA1YO9RmLXBGO/164H9X1TZHBJKk7nR2aqg9538+cB2wAHh/Va1PcimwrqrWAn8IfDDJRpojgdO6qmee6t3psHnO/pp/7DMgfgGXpH5zZLEk9ZxBIEk9ZxB0KMl3hubPTPL7M2xz6qjHcQy1OT7JJ6dYd2GSvba/2l1fku8nuSPJ3Uk+kWTfWdrv8iR3z8a+hvb7L5LclmRLktfP9v7ng3nYZxcl2ZDkriSfTTLlvfs7E4NgJ1NVa6vqbc9jFxcCBsFoT1TVEVX1UpqbE86bdEEz+ApwJvCnE65jkuZbn90OrKqql9E8LeGyCdczFoNgQpIsTvLnSW5pX8e1y585akjyj5Pc3K6/dOgIY+8kH0nyhSQfSuMC4B8B1ye5fgIfaz65iWZkO0n2br+93Zbk80lWt8uXJ7knyf9Isj7Jp5Ps2a47OsmdSW5i4D+nJHsk+UC7n9uTnNAuPzPJx9pvtV9Ocn777fH2to/3Hy6wqu6vqruAp+fg9zEfzIc+u76qHm9nb6YZP7XTMwi6tWd7WHtHkjuASwfWvRN4R1W9HHgd8L4R278TeGfbZngw3pE03/5XAocBx1XVu9p2J1TVCbP8WXYZaR6IeCLPjmv5LvAzVXUUcALw9oFnXq0ALq+qfwp8i6avAD4AXFBVxw7t/jyAqvpRYA1wZZI92nUvBf4tzXO4fhN4vKqOpPkP7udn91PuWuZpn50DfGp7P+skGATd2npYe0RVHQFcMrDuJOD324BYC+yT5EVD2x8LXNNOD58e+Luq2lRVTwN3AMtnv/xdzp7t7/sbwP7AZ9rlAX4ryV3AX9J86/zBdt2Xq+qOdvpWYHmSFwP7VtUN7fIPDrzHT2ydr6ovAA8Ah7frrq+qx6pqM/Ao8Il2+eex/6YyL/ssyenAKuB3t/sTT4BBMDm7AccOBMXBVfXYdmz/vYHp7+NfmxvHE20gHwLszrOnB94ALAaObtd/Ddj6jXDU7zmMeCZWa7qn5w7u6+mB+aex/6Yy7/osyUnAm4FTq+p7o9rsbAyCyfk0cP7WmSRHjGhzM88e1o476voxYPjIQgOq6lHgAuBXkryA5hlXD1fVU+354Wnv9KiqbwGPJvmJdtEbBlb/9db5JIcDy4C+PC23M/Olz5IcCbyXJgQe3pF9TIJBMDkXAKva28w2AL84os2FwEVJ/g54Cc2h6UyuAD7lxeLpVdXtwJ00Afshmr5YR/MfwhfG2MVZwOXthccnBpa/G1iQ5PPAnwFn7ui3wiQvT7IJ+FngvUnW78h+dhXzoc9oTgXtDVzTXhscfr7aTslHTOzE0owHeKKqKslpwJqqGv7jPpL0vHhecud2NM0F5dDc/XD2hOuRtAvyiECSes5rBJLUcwZBR5LsmeSGJAuSnJHkS+3rjCna/1iSm9rRjZ9Isk+7fPeBUY93Jjl+YJufay82r09y2cDy85Oc1fmH3MXsQJ/tn+QzbZvPJNmvXf6Gtl/uSnJjkh9rl//Q4ADDJN9OcmG77r8ledXcfdpdg302S6rKVwcvmvud30QzCOa+9ud+7fR+I9rfAryynT4beOvAfj7QTh9IM0BmN+AAmmfRLG7XXQmc2E7vBdw+6d/BfHvtQJ9dBlzcTl8M/E47/c+2tgdOAT43YtsFwP8FDmnnDwE+PenfwXx72Wez8/KIoDtvAD4O/CTwmar6ZlU9QjMy8uQR7X+I5n5m2jZbxw+sBD4LUM19yd+iGbF4GPDFakY8QjO68nVtu8eB+5McM9sfahe3vX22miaAaX/+NEBV3dhuB1M/b+ZE4O+r6oF2mweAA5IcNFsfpifss1lgEHQgzd9oPqyq7qcZ+v7gwOpN7bJhdwOnttM/Cyxtp+8EVidZmORQmjuJlgIbgR9O85CthTT/oJcO7G8d8M9n5xPt+nawz36wqr4K0P48cESbqZ43cxpw1dCy24Djtq/y/rLPZo9B0I1FNN/cYfTw9VG3ap0NnJfkVpqRwU+2y99P8496HfDfgRuBLe23l39HMwDmb4D7gS0D+3uY5kmkGs+O9Nm02hGv5wC/OrR8d5rQv2ZoE/ts+9hns8Qg6MYTPPvck00895v6ErZ9kihV9YWqek1VHU3zrePv2+VbquqXq3ke0WpgX+BL7bpPVNUrqnma4r1bl7f24LmjJzW97e4z4GtJXgLQ/nzmkQJJXkbzRNnVVfWNoe1OAW6rqq8NLbfPto99NksMgg6039YXpHmU7XXAa5Ls196h8Jp22XMkObD9uRvwFuAP2vm9kvxAO/1qmqOBDUPb7Af8e577KOvDaU43aQw70mc0T43denfKGTTnqkmyDPgo8Maq+uKI7daw7SkGsM+2i302iyZ9tXpXfQF/CJzUTp9Nc05/I3DWQJv30fw1I2jufPhi+3obzw72W07zbf8emgvChwxsfxWwoX2dNvT+twGLJv17mE+vHeizA2gu5H+p/bn/QJtHaB4PfgewbmD7vWgeqfziofd+QdvHCyf9e5hPL/tsdl6OLO5ImqcQXlRVb+zTe89nE+6znwGOqqr/MtfvPZ/ZZ7PDU0MdqeZJiden+ctKc20RMO//cc61CffZQuDtE3jfec0+mx0eEUhSz3lEIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPff/AVsHCCgFNAleAAAAAElFTkSuQmCC\n", 79 | "text/plain": [ 80 | "
" 81 | ] 82 | }, 83 | "metadata": { 84 | "needs_background": "light" 85 | }, 86 | "output_type": "display_data" 87 | } 88 | ], 89 | "source": [ 90 | "lr = LinearRegression()\n", 91 | "lr.fit(X_std, y_std)\n", 92 | "\n", 93 | "fig, ax = plt.subplots()\n", 94 | "ax.bar([0, 1, 2], lr.coef_)\n", 95 | "\n", 96 | "ax.set_xticks([0, 1, 2])\n", 97 | "ax.set_xticklabels([f'Height\\n({lr.coef_[0]:.3f})',\n", 98 | " f'Random 1\\n({lr.coef_[1]:.3f})',\n", 99 | " f'Random 2\\n({lr.coef_[2]:.3f})'])\n", 100 | "plt.ylabel('Magnitude')\n", 101 | "plt.show()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 3, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "-2.1024223302105473e-15" 113 | ] 114 | }, 115 | "execution_count": 3, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "lr.intercept_" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "[65.4774427]\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# y = 0.5 in kg\n", 139 | "print(0.5 * np.sqrt(sc_target.var_) + sc_target.mean_)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 5, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "[72.2763281]\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "# y = 1.5 in kg\n", 157 | "print(1.5 * np.sqrt(sc_target.var_) + sc_target.mean_)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 6, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "[6.7988854]\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "print(np.sqrt(sc_target.var_))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 7, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "image/png": "\n", 185 | "text/plain": [ 186 | "
" 187 | ] 188 | }, 189 | "metadata": { 190 | "needs_background": "light" 191 | }, 192 | "output_type": "display_data" 193 | } 194 | ], 195 | "source": [ 196 | "y_pred = lr.predict(X_std)\n", 197 | "plt.scatter(X_std[:, 0], y_std)\n", 198 | "\n", 199 | "x1, x2 = X_std[:, 0].argmin(), X_std[:, 0].argmax()\n", 200 | "plt.plot([X_std[x1, 0], X_std[x2, 0]], [y_std[x1], y_std[x2]])\n", 201 | "\n", 202 | "plt.show()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 8, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "def std_err_linearregression(y_true, y_pred, x):\n", 212 | " n = len(y_true)\n", 213 | " mse = np.sum((y_true - y_pred)**2) / (n-2)\n", 214 | " std_err = (np.sqrt(mse) / np.sqrt(np.sum((x - np.mean(x, axis=0))**2, axis=0)))\n", 215 | " return std_err\n", 216 | "\n", 217 | "\n", 218 | "def weight_intervals(n, weight, std_err, alpha=0.05):\n", 219 | " t_value = stats.t.ppf(1 - alpha/2, df=n - 2)\n", 220 | " temp = t_value * std_err\n", 221 | " lower = weight - temp\n", 222 | " upper = weight + temp\n", 223 | "\n", 224 | " return lower, upper" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 9, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "y_pred = lr.predict(X_std)\n", 234 | "\n", 235 | "std_err = std_err_linearregression(y_std, y_pred, X_std)\n", 236 | "\n", 237 | "lower, upper = weight_intervals(len(y_std), lr.coef_, std_err)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 25, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "image/png": "\n", 248 | "text/plain": [ 249 | "
" 250 | ] 251 | }, 252 | "metadata": { 253 | "needs_background": "light" 254 | }, 255 | "output_type": "display_data" 256 | } 257 | ], 258 | "source": [ 259 | "fig, ax = plt.subplots()\n", 260 | "\n", 261 | "ax.hlines(0, xmin=-0.1, xmax=2.2, linestyle='dashed', color='skyblue')\n", 262 | "ax.errorbar([0, 1, 2], lr.coef_, yerr=upper - lr.coef_, fmt='.k')\n", 263 | "\n", 264 | "ax.set_xticks([0, 1, 2])\n", 265 | "ax.set_xticklabels([f'Height\\n({lr.coef_[0]:.3f})',\n", 266 | " f'Random 1\\n({lr.coef_[1]:.3f})',\n", 267 | " f'Random 2\\n({lr.coef_[2]:.3f})'])\n", 268 | "plt.ylabel('Magnitude');" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "lower, upper" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "---" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 33, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "import statsmodels.api as sm\n", 294 | "\n", 295 | "mod = sm.OLS(y_std, X_std)\n", 296 | "res = mod.fit()\n", 297 | "lower, upper = res.conf_int(0.05)[:, 0], res.conf_int(0.05)[:, 1]" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 34, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "(array([ 0.93337272, -0.03899289, -0.03548572]),\n", 309 | " array([1.06536057, 0.09297103, 0.09006354]))" 310 | ] 311 | }, 312 | "execution_count": 34, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "lower, upper" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 35, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "image/png": "\n", 329 | "text/plain": [ 330 | "
" 331 | ] 332 | }, 333 | "metadata": { 334 | "needs_background": "light" 335 | }, 336 | "output_type": "display_data" 337 | } 338 | ], 339 | "source": [ 340 | "fig, ax = plt.subplots()\n", 341 | "\n", 342 | "ax.hlines(0, xmin=-0.1, xmax=2.2, linestyle='dashed', color='skyblue')\n", 343 | "ax.errorbar([0, 1, 2], res.params, yerr=upper - res.params, fmt='.k')\n", 344 | "\n", 345 | "ax.set_xticks([0, 1, 2])\n", 346 | "ax.set_xticklabels([f'Height\\n({lr.coef_[0]:.3f})',\n", 347 | " f'Random 1\\n({lr.coef_[1]:.3f})',\n", 348 | " f'Random 2\\n({lr.coef_[2]:.3f})'])\n", 349 | "plt.ylabel('Magnitude');" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [] 358 | } 359 | ], 360 | "metadata": { 361 | "kernelspec": { 362 | "display_name": "Python 3", 363 | "language": "python", 364 | "name": "python3" 365 | }, 366 | "language_info": { 367 | "codemirror_mode": { 368 | "name": "ipython", 369 | "version": 3 370 | }, 371 | "file_extension": ".py", 372 | "mimetype": "text/x-python", 373 | "name": "python", 374 | "nbconvert_exporter": "python", 375 | "pygments_lexer": "ipython3", 376 | "version": "3.7.1" 377 | } 378 | }, 379 | "nbformat": 4, 380 | "nbformat_minor": 4 381 | } 382 | -------------------------------------------------------------------------------- /02-logistic-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Logistic Regression -- Weight Confidence Intervals" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "image/png": "\n", 18 | "text/plain": [ 19 | "
" 20 | ] 21 | }, 22 | "metadata": { 23 | "needs_background": "light" 24 | }, 25 | "output_type": "display_data" 26 | } 27 | ], 28 | "source": [ 29 | "import matplotlib.pyplot as plt\n", 30 | "%matplotlib inline\n", 31 | "\n", 32 | "from mlxtend.plotting import scatterplotmatrix\n", 33 | "from sklearn.linear_model import LogisticRegression\n", 34 | "from sklearn.preprocessing import StandardScaler\n", 35 | "from sklearn.datasets import load_iris\n", 36 | "from scipy import stats\n", 37 | "import numpy as np\n", 38 | "\n", 39 | "\n", 40 | "iris = load_iris()\n", 41 | "\n", 42 | "\n", 43 | "X_train, y_train = iris.data[50:150, :3], iris.target[50:150]\n", 44 | "y_train = np.array(50*[0] + 50*[1])\n", 45 | "\n", 46 | "sc_features = StandardScaler()\n", 47 | "sc_target = StandardScaler()\n", 48 | "\n", 49 | "X_std = sc_features.fit_transform(X_train)\n", 50 | "\n", 51 | "fig, axes = scatterplotmatrix(X_std[y_train==0], figsize=(6, 5), alpha=0.5)\n", 52 | "fig, axes = scatterplotmatrix(X_std[y_train==1], fig_axes=(fig, axes), alpha=0.5,\n", 53 | " names=['Sepal Length (std.)','Sepal Width (std.)', 'Petal Length (std.)'])\n", 54 | "\n", 55 | "plt.tight_layout()\n", 56 | "plt.show()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "import pandas as pd\n", 66 | "\n", 67 | "df1 = pd.DataFrame(X_std)\n", 68 | "df2 = pd.DataFrame(y_train)\n", 69 | "\n", 70 | "df = pd.concat((df1, df2), axis=1)\n", 71 | "df.columns = ['sepal length', 'sepal width', 'petal length', 'species']\n", 72 | "\n", 73 | "df.to_csv('data.csv', index=None)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Weight coefficients" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEECAYAAAAh5uNxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAXUklEQVR4nO3debRlZXnn8e/PKpRZlCobZbAk4kCrzVDBORjEjlEbtMVWFAPKajppZ2NalGTF6DJBTUxMG42VVkMrjUZApR0ZQjmgIsUgFiBKIypIQhERwRHw6T/2W3C43OFU1T1331v7+1nrrrP3u6fnnvec85z9vnu/J1WFJGl47tV3AJKkfpgAJGmgTACSNFAmAEkaKBOAJA2UCUCSBmp53wFsihUrVtSqVav6DkOSlpQLL7zwxqpaObV8SSWAVatWsW7dur7DkKQlJcn3piu3CUiSBsoEIEkDZQKQpIEyAUjSQJkAJGmgTACSNFAmAEkaKBOAJA3UkroRTNLitur4T/cdwlbrmhOfOe/79AxAkgbKBCBJA2UCkKSBMgFI0kCZACRpoEwAkjRQJgBJGqiJJ4AkH0hyQ5L1I2X3T3JWku+0x/tNOg5J0t0txBnAPwJPn1J2PHBOVe0DnNPmJUkLaOIJoKq+CPxoSvHhwElt+iTg2ZOOQ5J0d331Afy7qroeoD0+YKYVkxyXZF2SdRs2bFiwACVpa7foO4Grak1Vra6q1StX3uNH7SVJm6mvBPCvSR4I0B5v6CkOSRqsvhLAGcDRbfpo4JM9xSFJg7UQl4GeAnwVeHiSa5McC5wIPC3Jd4CntXlJ0gKa+O8BVNWRMyx66qSPLUma2aLvBJYkTYYJQJIGygQgSQNlApCkgTIBSNJAmQAkaaBMAJI0UCYASRooE4AkDZQJQJIGygQgSQNlApCkgTIBSNJAmQAkaaBMAJI0UCYASRooE4AkDZQJQJIGygQgSQNlApCkgTIBSNJA9ZoAkrwmyWVJ1ic5Jcm2fcYjSUPSWwJIsjvwSmB1VT0KWAa8oK94JGlo+m4CWg5sl2Q5sD3ww57jkaTB6C0BVNV1wF8C3weuB26uqjOnrpfkuCTrkqzbsGHDQocpSVutPpuA7gccDjwEeBCwQ5Kjpq5XVWuqanVVrV65cuVChylJW60+m4AOBb5bVRuq6jbgdOAJPcYjSYPSZwL4PvC4JNsnCfBU4Ioe45GkQemzD+B84FTgIuCbLZY1fcUjSUOzvM+DV9WfAn/aZwySNFR9XwYqSeqJCUCSBsoEIEkDZQKQpIEyAUjSQJkAJGmgTACSNFAmAEkaKBOAJA2UCUCSBsoEIEkDZQKQpIEyAUjSQJkAJGmgTACSNFAmAEkaKBOAJA2UCUCSBsoEIEkDZQKQpIHqNQEk2SXJqUm+leSKJI/vMx5JGpLlPR//XcDnquqIJPcGtu85HkkajN4SQJKdgd8CjgGoql8Bv+orHkkamj6bgPYGNgAfTHJxkv+VZIce45GkQekzASwHDgDeW1X7Az8Fjp+6UpLjkqxLsm7Dhg0LHaMkbbX6TADXAtdW1flt/lS6hHA3VbWmqlZX1eqVK1cuaICStDXrLQFU1b8AP0jy8Fb0VODyvuKRpKEZqxM4SYAXAXtX1ZuT7AXsVlVf38LjvwI4uV0BdDXwki3cnyRpTONeBfQe4NfAIcCbgVuA04Df3JKDV9UlwOot2YckafOMmwAeW1UHJLkYoKpuat/aJUlL1Lh9ALclWQYUQJKVdGcEkqQlatwE8LfAx4EHJHkr8GXgzycWlSRp4sZqAqqqk5NcSHelToBnV9UVE41MkjRRsyaAJPcfmb0BOGV0WVX9aFKBSZIma64zgAvp2v0D7AXc1KZ3Ab4PPGSi0UmSJmbWPoCqekhV7Q18HvhPVbWiqnYFngWcvhABSpImY9xO4N+sqs9snKmqzwIHTyYkSdJCGPc+gBuT/DHwYbomoaOAf5tYVJKkiRv3DOBIYCXdpaCfAB7QyiRJS9S4l4H+CHjVhGORJC2gcQeDO5d2F/Coqjpk3iOSJC2IcfsAXjcyvS3wXOD2+Q9HkrRQxm0CunBK0XlJvjCBeCRJC2TcJqDRO4LvBRwI7DaRiCRJC2LcJqDRO4JvB74LHDupoCRJkzduAnhkVf1itCDJfSYQjyRpgYx7H8BXpin76nwGIklaWHONBrobsDuwXZL96ZqAAHYGtp9wbJKkCZqrCeh3gGOAPYB3jpTfArxxQjFJkhbArAmgqk4CTkry3Ko6bYFikiQtgLmagI6qqg8Dq5K8duryqnrnNJttkvZbw+uA66rqWVu6P0nSeOZqAtqhPe44wRheBVxB168gSVogczUBva89/tkkDp5kD+CZwFuBe5xhSJImZ9w7gVcC/xVYNbpNVb10C4//N8D/AHaa5djHAccB7LXXXlt4OEnSRuPeCPZJ4EvA2cAd83HgJM8CbqiqC5M8Zab1qmoNsAZg9erV9xiRVJK0ecZNANtX1evn+dhPBA5L8gy6EUZ3TvLhqjpqno8jSZrGuHcCf6p9UM+bqnpDVe1RVauAFwD/7Ie/JC2ccRPAq+iSwM+T/CTJLUl+MsnAJEmTNe7vAczYSTsfqmotsHaSx5Ak3d24VwEdME3xzcD3qspfBpOkJWjcTuD3AAcA32zzjwa+Aeya5Per6sxJBCdJmpxx+wCuAfavqgOr6kBgP2A9cCjw9gnFJkmaoHETwCOq6rKNM1V1OV1CuHoyYUmSJm3cJqArk7wX+Eibfz7w7farYLdNJDJJ0kSNewZwDHAV8GrgNcDVrew24LcnEZgkabLGvQz058Bftb+pbp3XiCRJC2Lcy0D3Af4C2Jdu2AYAqmrvCcUlSZqwcZuAPgi8F7idrsnnfwMfmlRQkqTJGzcBbFdV5wCpqu9V1ZuAQyYXliRp0sa9CugXSe4FfCfJy4HrgAdMLixJ0qSNewbwamB74JXAgcCLgaMnFZQkafLGvQrogjZ5K/CSyYUjSVoosyaAJGfMtryqDpvfcCRJC2WuM4DHAz8ATgHOBzLxiCRJC2KuBLAb8DTgSOCFwKeBU0bHBZIkLU2zdgJX1R1V9bmqOhp4HN1wEGuTvGJBopMkTcycncBtwLdn0p0FrAL+Fjh9smFJkiZtrk7gk4BHAZ8F/qyq1i9IVJKkiZvrDODFwE+BhwGvTO7sAw5QVbXzBGOTJE3QrAmgqsa9UWyTJdmTbkyh3YBfA2uq6l2TOp4k6e7GHQpiEm4H/rCqLkqyE3BhkrPar41JkiZsYt/w51JV11fVRW36FuAKYPe+4pGkoektAYxKsgrYn+5mM0nSAug9ASTZETgNeHVV/WSa5cclWZdk3YYNGxY+QEnaSvWaAJJsQ/fhf3JVTXtvQVWtqarVVbV65cqVCxugJG3FeksA6a4pfT9wRVW9s684JGmo+jwDeCLdfQaHJLmk/T2jx3gkaVB6uwy0qr6Mo4tKUm967wSWJPXDBCBJA2UCkKSBMgFI0kCZACRpoEwAkjRQJgBJGigTgCQNlAlAkgbKBCBJA2UCkKSBMgFI0kCZACRpoEwAkjRQJgBJGigTgCQNlAlAkgbKBCBJA2UCkKSBMgFI0kCZACRpoHpNAEmenuTKJFclOb7PWCRpaHpLAEmWAX8H/C6wL3Bkkn37ikeShqbPM4CDgKuq6uqq+hXwEeDwHuORpEHpMwHsDvxgZP7aViZJWgDLezx2pimre6yUHAccB7DXXntt9sFWHf/pzd5Ws7vmxGdOZL/W2eRMqs4mtV9NRp9nANcCe47M7wH8cOpKVbWmqlZX1eqVK1cuWHCStLXrMwFcAOyT5CFJ7g28ADijx3gkaVB6awKqqtuTvBz4PLAM+EBVXdZXPFp8bE6QJqvPPgCq6jPAZ/qMQZKGyjuBJWmgTACSNFAmAEkaKBOAJA2UCUCSBsoEIEkDZQKQpIEyAUjSQJkAJGmgTACSNFAmAEkaKBOAJA2UCUCSBsoEIEkDZQKQpIEyAUjSQJkAJGmgTACSNFAmAEkaKBOAJA2UCUCSBqqXBJDkHUm+leTSJB9PsksfcUjSkPV1BnAW8KiqegzwbeANPcUhSYPVSwKoqjOr6vY2+zVgjz7ikKQhWwx9AC8FPjvTwiTHJVmXZN2GDRsWMCxJ2rotn9SOk5wN7DbNohOq6pNtnROA24GTZ9pPVa0B1gCsXr26JhCqJA3SxBJAVR062/IkRwPPAp5aVX6wS9ICm1gCmE2SpwOvBw6uqp/1EYMkDV1ffQDvBnYCzkpySZK/7ykOSRqsXs4AquqhfRxXknSXxXAVkCSpByYASRqoXpqA+nDNic/sOwRJWlQ8A5CkgTIBSNJAmQAkaaBMAJI0UCYASRooE4AkDZQJQJIGygQgSQNlApCkgcpSGoo/yQbge33HsUBWADf2HYTGZn0tPUOqswdX1cqphUsqAQxJknVVtbrvODQe62vpsc5sApKkwTIBSNJAmQAWrzV9B6BNYn0tPYOvM/sAJGmgPAOQpIEyAUjSQJkAZpDkhCSXJbk0ySVJHjvP+39Kkk+NWz7Px37jyPSqJOsnebyF1mPdXZxkvza9PMlPkxw1svzCJAckOSzJ8TPs+9b2uCrJC0fKj0ny7vn8PxaLJHe0elqf5GNJtp9j/TfOtnxkvWuSrBi3fL4keXaSfUfm1yZZlJebmgCmkeTxwLOAA6rqMcChwA/6jWpejfUGWop6rruvAE9o0/8BuHLjfJIdgL2Bb1TVGVV14hz7WgW8cI51thY/r6r9qupRwK+A359j/cX++n02sO+cay0CJoDpPRC4sap+CVBVN1bVDwGSHJjkC+3b3OeTPLCVr03yN0m+0r7JHNTKD2plF7fHh29OQHMc921Jvp7k20me3Mq3T/JP7VvwR5Ocn2R1khOB7do3rpPb7pcl+Yf2rfnMJNtt0bPXrz7r7jzuSgBPAP4e2K/NHwRcVFV3jH6bT/KQJF9NckGSt4zs60Tgya2eXtPKHpTkc0m+k+TtW/QsLV5fAh4KkOSo9rq+JMn7kiyb7vWb5BOtTi9LctzmHDTJDkk+0Orh4iSHt/Jjkpw+3fOe5Nj2nlvb3j/vTvIE4DDgHS3G32irP2/qe3RRqCr/pvwBOwKXAN8G3gMc3Mq3ofuWt7LNPx/4QJteC/xDm/4tYH2b3hlY3qYPBU5r008BPjXNse9RPsZx/6pNPwM4u02/Dnhfm34UcDuwus3fOrLvVW3Zfm3+n4Cj+q6DJVp3q4Cr2/QpwCOAc4GdgBOAN7dlxwDvbtNnAL/Xpl+2sW6mHqNtczVwX2BbuiFR9uz7+Z6nOtv4Py8HPgn8AfBI4P8C27Rl7xl5nm6dsv392+N2wHpg1zZ/DbBimuPdoxz4842ve2CX9vrZYabnHXhQ28/922vrSyN1+o/AESP7Xss079HF8Lcc3UNV3ZrkQODJwG8DH21ttuvoPkzPSgKwDLh+ZNNT2vZfTLJzkl3o3vwnJdkHKLoXy6Z6+BzHPb09Xkj3IQTwJOBdLZ71SS6dZf/frapLptnHktNn3VXVNUnunWQ3ug//K4ELgMfSnRH8z2k2eyLw3Db9IeBtsxzinKq6GSDJ5cCD2TqaJrdLsvH19yXg/cBxwIHABa2+tgNumGH7VyZ5TpveE9gH+LdNjOE/AocleV2b3xbYq01P97yvAL5QVT9q5R8DHjbL/qd7j/bOBDCDqrqDLnOvTfJN4Gi6yrusqh4/02bTzL8FOLeqnpNkVdvnpsocx/1le7yDu+o0m7D/X45M30H3Zluyeq67rwJHANdXVSX5Gt2H/EHA18Y89kym1tPW8v79eVXtN1qQ7lP/pKp6w2wbJnkK3dnZ46vqZ0nW0n14b6oAz62qK6fs/7FM/7xvyvsLpn+P9s4+gGkkeXj71rfRfnSnflcCK9N1NJJkmyT/fmS957fyJwE3t28N9wWua8uP2cyQ5jrudL4M/Je2/r7Ao0eW3ZZkc85EFr1FUHfnAa+hSwS0x98D/qWqfjzD+i9o0y8aKb+F7gxkqM4BjkjyAIAk90/y4LZs9PV7X+Cm9uH/COBxm3m8zwOvaImHJPvPsf7XgYOT3C/Jcu46i4MlVHcmgOntSHfqf3lrOtkXeFNV/Yru293bknyDrq35CSPb3ZTkK3Sdf8e2srcDf5HkPLpmh3E8Ncm1G//oToVnO+503kP3gXcp8HrgUuDmtmwNcGnu6gTemvRdd+fRXe3zVYCqur5t+5UZ1n8V8LIkF9B9mG10KXB7km+MdAIPRlVdDvwxcGarx7PoOvjh7q/fzwHL2zpvYeazrKkuHXmPvbNtu00rX9/mZ4vvOrp+g/OBs4HLuev99RHgj1pn8m/MsItFwaEg5kk79XxdVa3rOxaAJMvoOtB+0V6E5wAPax+EGrHY6k5LQ5IdW5/TcuDjdBcVfLzvuDbFommL0rzbHji3nSoH+AM//KV59aYkh9L1OZwJfKLneDaZZwCSNFD2AUjSQJkA5lmS7dLdbbpsSvlrN3ZMJjln5IqGqduvTXJlu4vwko1XQYwsPyJJpY0tkmTXJOcmuTVTxopJcnaS+833/7g1maW+7pPuDuqr0t1FvWqabfdsz/0V6e5CfdXIsue1sl9nZBwY62vLjNZXurtzf5wp4zKlu7v6/HR37n40yb2n2c82SU5K8s1Wf28YWfb09h68KiNjNs203yQvT/KSSf7fk2ICmH8vBU5v16KPupjuTtzHAKfSXWEykxdVNzbKflV1580vSXYCXkl35cFGvwD+hO7O36k+BPz3zfgfhmSm+jqW7vLChwJ/zfQ3aN0O/GFVPZLu8sOX5a5BwNYD/xn44pRtrK8tM1pf7wBePM06bwP+uqr2AW7irqu6Rj0PuE9VPZruKrv/lm4AvmXA3wG/S3cF2ZEjdTrTfj9A975cckwA8+9FdLez301VnVtVP2uzXwP22Ix9v4UucfxiZL8/raovj5aNOAM4cjOOMyTT1hdwOHBSmz6V7tLcu938U1XXV9VFbfoW4Apg9zZ/xdSbilq59bVl7qyvqjqH7pr7O7U6OoSuzqCrw2dPs58CdmhX8GxHNwjdT+hu2Luqqq5uF018BDh8tv229/U1aWNILSUmgHnUTgn3rqpr5lj1WOCzsyz/YGv++ZONHzrpbkzZs6rGHiq6qm4C7pNk13G3GZI56mt32jALVXU73TXeMz6PrYlof+5+drZJrK/Zjfn+2hX4caszgGtpSXmKU4Gf0g0H8n3gL9uwDnfW+5Tt59rvOrrhR5YULwOdXyuA6e72vFO68eFXAwfPsMqLquq61txzGvDiJB+ma4Y4ZjNiuoFu4KpNHRtlCGarr+lu9Z/2krkkO9LV1aur6idbGJP1NbM531+MX28H0Q3L8CDgfsCXkpw9y/Zz7fcGuvGflhTPAObXz2njkCR568aO3I0L2zXDJwCHVRuueKp2h+HGJoX/Q/dC3YluILO1Sa6ha28+I+P9yMS2LS7d02z1dS3dwGK0ZoL7Aj+auoN2n8VpwMlVdfrU5ZvB+prZnfU1ixuBXVqdQdfU+sNp1nsh8Lmquq31s51H98Xsznqfsv1c+12S9WYCmEftFH5Zkm2r6oSNHblwZxPO++g+/Kcd1TDdr0itaNPb0P2wyfqqurmqVlTVqqpaRdeHcNhcd6625qPd6Iat1RSz1Rdde/zRbfoI4J9ryk0z7fl9P3BFVb1zS+OxvmY3Wl+zrFN0Q3Af0YqOZvo+nu8Dh6SzA92Xqm/Rjd66T7vi59504zSdMcZ+H0bX8b+0bOl40v7dY6zx9wOHTlN+NvCvdGPQXEL3otq47JL2uAPdqJWXApfRDee8bJp9raWN7d/mr6H7dnor3TeYfVv5atoY9v5tcn1tC3wMuIpu4K+9W/mDgM+06SfRNQNcOlKvz2jLntPq4pet3j9vfc1vfdENHb2B7pv3tcDvtPK9W51d1erwPq38MO76TYYd27LL6Mbx+aORYzyD7vcA/h9wwkj5tPttyy5imt8eWOx/3gk8z9o3/ddW1XSXpy10LO+iSzTn9B3LYmV9LS2Lqb42WowxjcsmoHlWVRfTjcEz7uiRk7TeD5PZWV9LyyKrr41W0N3bseR4BiBJA+UZgCQNlAlAkgbKBCBJA2UCkKSBMgFI0kD9f48bbI6NON7CAAAAAElFTkSuQmCC\n", 91 | "text/plain": [ 92 | "
" 93 | ] 94 | }, 95 | "metadata": { 96 | "needs_background": "light" 97 | }, 98 | "output_type": "display_data" 99 | } 100 | ], 101 | "source": [ 102 | "lor = LogisticRegression(random_state=0, solver='newton-cg', C=1e8)\n", 103 | "\n", 104 | "# set C=1e8 to negate regularization to allow comparison with\n", 105 | "# statsmodel coefficients later\n", 106 | "\n", 107 | "lor.fit(X_std, y_train)\n", 108 | "\n", 109 | "fig, ax = plt.subplots()\n", 110 | "ax.bar([0, 1, 2], lor.coef_.flatten())\n", 111 | "\n", 112 | "ax.set_xticks([0, 1, 2])\n", 113 | "ax.set_xticklabels([f'Sepal Length\\n({lor.coef_.flatten()[0]:.3f})',\n", 114 | " f'Sepal Width\\n({lor.coef_.flatten()[1]:.3f})',\n", 115 | " f'Petal Length\\n({lor.coef_.flatten()[2]:.3f})'])\n", 116 | "plt.ylabel('Magnitude')\n", 117 | "plt.show()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "train accuracy 0.95\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "print('train accuracy', lor.score(X_std, y_train))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "def std_err_logisticregression(y_true, y_pred_proba, X):\n", 144 | " # based on code from \n", 145 | " # https://stats.stackexchange.com/questions/89484/how-to-compute-the-standard-errors-of-a-logistic-regressions-coefficients\n", 146 | "\n", 147 | " # Design matrix -- add column of 1's at the beginning of your X_train matrix\n", 148 | " X_design = np.hstack([np.ones((X.shape[0], 1)), X])\n", 149 | " \n", 150 | " # Initiate matrix of 0's, fill diagonal with each predicted observation's variance\n", 151 | " V = np.diagflat(np.product(y_pred_proba, axis=1))\n", 152 | "\n", 153 | " # Covariance matrix\n", 154 | " cov = np.linalg.inv(X_design.T @ V @ X_design)\n", 155 | "\n", 156 | " # Standard errors:\n", 157 | " std_errs = np.sqrt(np.diag(cov))\n", 158 | " \n", 159 | " return std_errs\n", 160 | "\n", 161 | "\n", 162 | "def weight_intervals(n, weight, std_err, alpha=0.05):\n", 163 | " t_value = stats.t.ppf(1 - alpha/2, df=n - 2)\n", 164 | " temp = t_value * std_err\n", 165 | " lower = weight - temp\n", 166 | " upper = weight + temp\n", 167 | "\n", 168 | " return lower, upper" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 6, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "y_pred_proba = lor.predict_proba(X_std)\n", 178 | "std_err = std_err_logisticregression(y_train, y_pred_proba, X_std)\n", 179 | "\n", 180 | "lower, upper = weight_intervals(len(y_train), lor.coef_.flatten(), std_err[1:])" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 7, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "image/png": "\n", 191 | "text/plain": [ 192 | "
" 193 | ] 194 | }, 195 | "metadata": { 196 | "needs_background": "light" 197 | }, 198 | "output_type": "display_data" 199 | } 200 | ], 201 | "source": [ 202 | "fig, ax = plt.subplots()\n", 203 | "\n", 204 | "ax.hlines(0, xmin=-0.1, xmax=2.2, linestyle='dashed', color='skyblue')\n", 205 | "ax.errorbar([0, 1, 2], lor.coef_.flatten(), yerr=upper - lor.coef_.flatten(), fmt='.k')\n", 206 | "\n", 207 | "ax.set_xticks([0, 1, 2])\n", 208 | "ax.set_xticklabels([f'Sepal Length\\n({lor.coef_.flatten()[0]:.3f})',\n", 209 | " f'Sepal Width\\n({lor.coef_.flatten()[1]:.3f})',\n", 210 | " f'Petal Length\\n({lor.coef_.flatten()[2]:.3f})'])\n", 211 | "plt.ylabel('Magnitude');" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 8, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "(array([-4.77771221, -1.71641308, 4.43242151]),\n", 223 | " array([-0.30375066, 1.2935979 , 17.16730293]))" 224 | ] 225 | }, 226 | "execution_count": 8, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "lower, upper" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "---" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 9, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "name": "stdout", 249 | "output_type": "stream", 250 | "text": [ 251 | "Optimization terminated successfully.\n", 252 | " Current function value: 0.120691\n", 253 | " Iterations: 10\n", 254 | " Function evaluations: 11\n", 255 | " Gradient evaluations: 20\n", 256 | " Hessian evaluations: 10\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "import statsmodels.api as sm\n", 262 | "\n", 263 | "\n", 264 | "model = sm.Logit(y_train, X_std)\n", 265 | "res = model.fit(method='ncg')\n", 266 | "lower, upper = res.conf_int(0.05)[:, 0], res.conf_int(0.05)[:, 1]" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 10, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/plain": [ 277 | "(array([-4.78965224, -1.62105453, 4.48828742]),\n", 278 | " array([-0.40139451, 1.25645007, 16.7429816 ]))" 279 | ] 280 | }, 281 | "execution_count": 10, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "lower, upper" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 11, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "image/png": "\n", 298 | "text/plain": [ 299 | "
" 300 | ] 301 | }, 302 | "metadata": { 303 | "needs_background": "light" 304 | }, 305 | "output_type": "display_data" 306 | } 307 | ], 308 | "source": [ 309 | "fig, ax = plt.subplots()\n", 310 | "\n", 311 | "ax.hlines(0, xmin=-0.1, xmax=2.2, linestyle='dashed', color='skyblue')\n", 312 | "ax.errorbar([0, 1, 2], res.params, yerr=upper - res.params, fmt='.k')\n", 313 | "\n", 314 | "ax.set_xticks([0, 1, 2])\n", 315 | "ax.set_xticklabels([f'Sepal Length\\n({res.params[0]:.3f})',\n", 316 | " f'Sepal Width\\n({res.params[1]:.3f})',\n", 317 | " f'Petal Length\\n({res.params[2]:.3f})'])\n", 318 | "plt.ylabel('Magnitude');" 319 | ] 320 | } 321 | ], 322 | "metadata": { 323 | "kernelspec": { 324 | "display_name": "Python 3", 325 | "language": "python", 326 | "name": "python3" 327 | }, 328 | "language_info": { 329 | "codemirror_mode": { 330 | "name": "ipython", 331 | "version": 3 332 | }, 333 | "file_extension": ".py", 334 | "mimetype": "text/x-python", 335 | "name": "python", 336 | "nbconvert_exporter": "python", 337 | "pygments_lexer": "ipython3", 338 | "version": "3.7.1" 339 | } 340 | }, 341 | "nbformat": 4, 342 | "nbformat_minor": 4 343 | } 344 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Sebastian Raschka 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Interpretable Machine Learning Blog 2 | 3 | Code examples for my Interpretable Machine Learning Blog Series 4 | 5 | - Part 1: [https://sebastianraschka.com/blog/2020/interpretable-ml-1.html](https://sebastianraschka.com/blog/2020/interpretable-ml-1.html) --------------------------------------------------------------------------------