├── .gitignore ├── LICENSE.md ├── README.md ├── ml-animations ├── LICENSE ├── README.md ├── gif │ ├── cnn │ │ └── cnn_1d.gif │ ├── decision_tree │ │ ├── decision_tree.gif │ │ ├── decision_tree_1.gif │ │ ├── decision_tree_2.gif │ │ ├── decision_tree_3.gif │ │ └── decision_tree_4.gif │ └── knn │ │ └── knn.gif └── notebooks │ ├── cnn_1d.ipynb │ ├── cross_validation_draft.ipynb │ ├── decision_tree.ipynb │ ├── knn_draft.ipynb │ └── logistic_regression.ipynb ├── ml-clustering └── clustering-mixed-data.ipynb ├── ml-deploy-model ├── README.md ├── data │ ├── README.md │ ├── abalone_train.csv │ ├── abalone_validation.csv │ ├── column_names.csv │ └── raw │ │ ├── abalone.csv │ │ └── preprocessing.py ├── deploy-with-flask.ipynb ├── deploy-with-flask │ ├── build_model.ipynb │ ├── web_api │ │ ├── Procfile │ │ ├── abalone_predictor.joblib │ │ ├── app.py │ │ ├── default.profraw │ │ └── requirements.txt │ └── web_application │ │ ├── Procfile │ │ ├── abalone_predictor.joblib │ │ ├── app.py │ │ ├── requirements.txt │ │ ├── static │ │ └── style.css │ │ └── templates │ │ ├── home.html │ │ └── prediction.html ├── deploy-with-sagemaker.ipynb ├── deploy-with-sagemaker │ └── xgboost_abalone.ipynb └── docs │ └── img │ ├── flask_images │ ├── fl_0.png │ ├── fl_1.png │ ├── fl_10.png │ ├── fl_11.png │ ├── fl_12.png │ ├── fl_13.png │ ├── fl_14.png │ ├── fl_15.png │ ├── fl_16.png │ ├── fl_2.png │ ├── fl_3.png │ ├── fl_4.png │ ├── fl_5.png │ ├── fl_6.png │ ├── fl_7.png │ ├── fl_8.png │ └── fl_9.png │ ├── ml-deploy.png │ └── sagemaker_images │ ├── sm_0.png │ ├── sm_1.png │ ├── sm_10.png │ ├── sm_11.png │ ├── sm_12.png │ ├── sm_13.png │ ├── sm_14.png │ ├── sm_15.png │ ├── sm_16.png │ ├── sm_17.png │ ├── sm_18.png │ ├── sm_19.png │ ├── sm_2.png │ ├── sm_20.png │ ├── sm_21.png │ ├── sm_22.png │ ├── sm_23.png │ ├── sm_3.png │ ├── sm_4.png │ ├── sm_5.png │ ├── sm_6.png │ ├── sm_7.png │ ├── sm_8.png │ └── sm_9.png ├── ml-image-generation ├── GANs │ ├── GAN.ipynb │ ├── conditional_GAN.ipynb │ ├── convolutional_GAN_(run_with_google_colab).ipynb │ └── saved_models │ │ ├── GAN_cgan_generator.h5 │ │ └── convolutional_GAN_images │ │ ├── image_at_epoch_0001.png │ │ └── image_at_epoch_0002.png └── autoencoders │ ├── autoencoders.ipynb │ ├── checkpoints │ ├── cvae_2L │ │ ├── checkpoint │ │ ├── cvae_2L.data-00000-of-00002 │ │ ├── cvae_2L.data-00001-of-00002 │ │ └── cvae_2L.index │ ├── cvae_8L │ │ ├── checkpoint │ │ ├── cvae_8L.data-00000-of-00002 │ │ ├── cvae_8L.data-00001-of-00002 │ │ └── cvae_8L.index │ ├── vae_2L │ │ ├── checkpoint │ │ ├── vae_2L.data-00000-of-00002 │ │ ├── vae_2L.data-00001-of-00002 │ │ └── vae_2L.index │ └── vae_8L │ │ ├── checkpoint │ │ ├── vae_8L.data-00000-of-00002 │ │ ├── vae_8L.data-00001-of-00002 │ │ └── vae_8L.index │ ├── requirements.txt │ └── variational_autoencoders.ipynb └── ml-timeseries ├── README.md ├── data ├── README.md └── sales_data.csv ├── docs └── img │ ├── ts_1.png │ └── ts_2.png └── notebooks └── supervised_time_series_intro.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | /ml-timeseries/notebooks/supervised_time_series_intro_notes.md -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Tomas Beuzen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A collection of data science and machine learning tutorials 2 | 3 | ![](https://img.shields.io/badge/-tutorial-informational) 4 | ![](https://img.shields.io/badge/-machine--learning-important) 5 | ![](https://img.shields.io/badge/-data--science-lightgrey) 6 | 7 | This repository contains a variety of useful data science/machine learning tutorials I've developed over time. Here's the current list: 8 | 9 | - [animations of machine learning models for pedagogy](ml-animations) 10 | - [deploying machine learning models with Amazon Sagemaker or Flask](ml-deploy-model) 11 | - [image generation with VAEs and GANs](ml-image-generation) 12 | - [supervised learning for time series data](ml-timeseries) -------------------------------------------------------------------------------- /ml-animations/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Tomas Beuzen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ml-animations/README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Animations 2 | 3 | I find visuals, particularly animations, especially useful for understanding how machine learning algorithms work. This repository houses animations that I've developed for teaching purposes. 4 | 5 | ## Decision Tree 6 | 7 | The animation below shows a decision tree being created. For every potential split in the raw data (shown on the left panel), the [Gini impurity](https://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity) is calculated (right panel). The split resulting in the minimum impurity is selected as the split for the tree, and the process is repeated until all data points have been split into homogenous groups. 8 | 9 | ![Decision Tree](./gif/decision_tree/decision_tree.gif) 10 | 11 | ## *k*-Nearest Neighors 12 | 13 | The animation below shows the prediction of an unknown point using increasing values of *k* in the *k*-nearest neighbors algorithm. The animation only shows odd values for *k*. In a two-class problem such as that shown, even values of *k* may result in ties, such that a decision would have to be made on how to predict the query point, for example, a random class may be predicted, or the class of the closest point may be predicted. 14 | 15 | ![kNN](./gif/knn/knn.gif) 16 | 17 | ## Convolutional Neural Network 18 | 19 | ### 1D ConvNet 20 | 21 | The animation below shows how a 1D sequence (d=1) of 20 observations (T=20) is "broken into" 4 sequences by a 1D convolutional layer with 4 filters (f=4) of length 3. The original input is actually 2D with shape (d=1, T=20). This is a little confusing given that we are working with a "1D ConvNet", but you should think of the 1D as referring to the dimensionality of the filters being passed over the data (not the data itself) - as you can see in the example below, we are passing a 1D filter of length 3 over the data. The output of the 1D convolutional layer is 3D with shape (d=1, f=4, T=20). Note that the ends of the input sequence have been zero-padded to facilitate convolution there. The weights of the filters are just random numbers here, the network has not been trained. There are 16 parameters in the network: (1 input x 4 filters) * (3 weights per filter) + (4 biases) = 16 parameters. 22 | 23 | ![cnn](./gif/cnn/cnn_1d.gif) 24 | -------------------------------------------------------------------------------- /ml-animations/gif/cnn/cnn_1d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/cnn/cnn_1d.gif -------------------------------------------------------------------------------- /ml-animations/gif/decision_tree/decision_tree.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree.gif -------------------------------------------------------------------------------- /ml-animations/gif/decision_tree/decision_tree_1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_1.gif -------------------------------------------------------------------------------- /ml-animations/gif/decision_tree/decision_tree_2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_2.gif -------------------------------------------------------------------------------- /ml-animations/gif/decision_tree/decision_tree_3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_3.gif -------------------------------------------------------------------------------- /ml-animations/gif/decision_tree/decision_tree_4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_4.gif -------------------------------------------------------------------------------- /ml-animations/gif/knn/knn.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/knn/knn.gif -------------------------------------------------------------------------------- /ml-animations/notebooks/cnn_1d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from tensorflow.keras.layers import Conv1D\n", 18 | "from tensorflow.keras.models import Sequential\n", 19 | "from tensorflow.random import set_seed\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import matplotlib.patches as patches\n", 22 | "import matplotlib.animation as animation\n", 23 | "from IPython.display import HTML, Image\n", 24 | "plt.style.use('ggplot')\n", 25 | "params = {'legend.fontsize': '18',\n", 26 | " 'axes.labelsize': '20',\n", 27 | " 'axes.labelweight': 'bold',\n", 28 | " 'axes.titlesize':'20',\n", 29 | " 'xtick.labelsize':'18',\n", 30 | " 'ytick.labelsize':'18'}\n", 31 | "plt.rcParams.update(params)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "#### Functions" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "def mk_fig():\n", 48 | " \"\"\"\n", 49 | " Convenience function to plot figure canvas\n", 50 | "\n", 51 | " Returns\n", 52 | " -------\n", 53 | " fig, axes\n", 54 | " Figure and axes objects\n", 55 | " \"\"\"\n", 56 | " fig, axes = plt.subplots(1, 2, figsize=(14, 4.5))\n", 57 | " axes[0].set_xlim(-2, 22)\n", 58 | " axes[0].set_ylim(-1, 1)\n", 59 | " axes[0].set_xlabel('x')\n", 60 | " axes[0].set_ylabel('y')\n", 61 | " axes[0].set_title('Input sequence')\n", 62 | " axes[1].set_xlim(-2, 22)\n", 63 | " axes[1].set_ylim(-1, 1)\n", 64 | " axes[1].set_xlabel('x')\n", 65 | " axes[1].set_title('Conv1D layer output')\n", 66 | " axes[0].plot([-10], [-10], '-k', marker='.', ms=13, label='Original data')\n", 67 | " axes[0].plot([-10], [-10], '-k', marker='.', markerfacecolor='w', ms=13, zorder=1, label='Zero padding')\n", 68 | " axes[0].legend(facecolor='w', loc=3)\n", 69 | " \n", 70 | " return fig, axes" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "#### Data" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "np.random.seed(1)\n", 87 | "n = 21\n", 88 | "x = (np.sin(np.linspace(0, 3, n)) + np.random.randn(n)*0.1) * 0.8" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "#### Model" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Model: \"conv1d_model\"\n", 108 | "_________________________________________________________________\n", 109 | "Layer (type) Output Shape Param # \n", 110 | "=================================================================\n", 111 | "conv1d (Conv1D) (None, 21, 4) 16 \n", 112 | "=================================================================\n", 113 | "Total params: 16\n", 114 | "Trainable params: 16\n", 115 | "Non-trainable params: 0\n", 116 | "_________________________________________________________________\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "set_seed(1)\n", 122 | "model = Sequential(name=\"conv1d_model\")\n", 123 | "filters = 4\n", 124 | "kernel_size = 3\n", 125 | "model.add(Conv1D(filters, kernel_size=kernel_size, input_shape=(n, 1), padding='same'))\n", 126 | "x_out = model.predict(x[None,:,None])[0]\n", 127 | "model.summary()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "array([[-0.4235797 , -0.26317865, 0.20251465],\n", 139 | " [ 0.5078381 , 0.18025243, 0.13268387],\n", 140 | " [ 0.1656707 , 0.60182637, 0.1728267 ],\n", 141 | " [-0.08279335, -0.08209336, 0.14476752]], dtype=float32)" 142 | ] 143 | }, 144 | "execution_count": 6, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "model.get_weights()[0][:,0,:].T" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "#### Create and save animations" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 5, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "fig, axes = mk_fig()\n", 167 | "fig.tight_layout()\n", 168 | "ec = [(0.89, 0.29, 0.2, 1), (0.2, 0.54, 0.74, 1), (0.60, 0.56, 0.84, 1), (0.47, 0.47, 0.47, 1)]\n", 169 | "fc = [(0.89, 0.29, 0.2, 0.2), (0.2, 0.54, 0.74, 0.2), (0.60, 0.56, 0.84, 0.2), (0.47, 0.47, 0.47, 0.2)]\n", 170 | "\n", 171 | "def init():\n", 172 | " axes[0].plot(x, '-k', marker='.', ms=13)\n", 173 | " axes[0].plot([-1, 0], [0, x[0]], 'k', marker='.', markerfacecolor='w', ms=13, zorder=1, label='zero padding')\n", 174 | " axes[0].plot([20, 21], [ x[-1], 0], 'k', marker='.', markerfacecolor='w', ms=13, zorder=1)\n", 175 | "\n", 176 | "def animate(i):\n", 177 | " axes[0].set_title(f'Input sequence. Passing filter {i//21 + 1}.')\n", 178 | " [p.remove() for p in reversed(axes[0].patches)];\n", 179 | " p = []\n", 180 | " p.append(patches.Rectangle((i%n-1.5, x[i%n]-0.4), 1, 0.8, linewidth=1, edgecolor=ec[i//21], facecolor=fc[i//21]))\n", 181 | " p.append(patches.Rectangle((i%n-0.5, x[i%n]-0.4), 1, 0.8, linewidth=1, edgecolor=ec[i//21], facecolor=fc[i//21]))\n", 182 | " p.append(patches.Rectangle((i%n+0.5, x[i%n]-0.4), 1, 0.8, linewidth=1, edgecolor=ec[i//21], facecolor=fc[i//21]))\n", 183 | " for _ in p:\n", 184 | " axes[0].add_patch(_)\n", 185 | " axes[1].plot(x_out[:i%n+1,i//21], color=ec[i//21], marker='.', ms=13);\n", 186 | " \n", 187 | "plt.close(fig)\n", 188 | "ani = animation.FuncAnimation(fig,\n", 189 | " animate,\n", 190 | " init_func=init,\n", 191 | " frames=21*filters)\n", 192 | "ani.save('../gif/cnn/cnn_1d.gif', writer='imagemagick', fps=3, dpi=75)\n", 193 | "# HTML(ani.to_jshtml())" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python [conda env:tf]", 200 | "language": "python", 201 | "name": "conda-env-tf-py" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.6.10" 214 | }, 215 | "toc": { 216 | "base_numbering": 1, 217 | "nav_menu": {}, 218 | "number_sections": true, 219 | "sideBar": true, 220 | "skip_h1_title": true, 221 | "title_cell": "Table of Contents", 222 | "title_sidebar": "Contents", 223 | "toc_cell": false, 224 | "toc_position": {}, 225 | "toc_section_display": true, 226 | "toc_window_display": false 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 2 231 | } 232 | -------------------------------------------------------------------------------- /ml-animations/notebooks/cross_validation_draft.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 14, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import matplotlib.animation as animation\n", 19 | "from IPython.display import HTML, Image\n", 20 | "plt.style.use('ggplot')\n", 21 | "params = {'legend.fontsize': '18',\n", 22 | " 'axes.labelsize': '20',\n", 23 | " 'axes.labelweight': 'bold',\n", 24 | " 'axes.titlesize':'20',\n", 25 | " 'xtick.labelsize':'18',\n", 26 | " 'ytick.labelsize':'18'}\n", 27 | "plt.rcParams.update(params)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "#### Functions" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 15, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "def gini(*args):\n", 44 | " \"\"\"\n", 45 | " Calculates the gini impurity for binary class data.\n", 46 | "\n", 47 | " Parameters\n", 48 | " ----------\n", 49 | " *args : int\n", 50 | " Number of examples in class i\n", 51 | "\n", 52 | " Returns\n", 53 | " -------\n", 54 | " float\n", 55 | " The gini impurity\n", 56 | " \"\"\"\n", 57 | " n = sum(args) # total examples\n", 58 | " gini = 0\n", 59 | " for c in args:\n", 60 | " gini += (c / n) * (1 - (c / n))\n", 61 | " return gini\n", 62 | "\n", 63 | "\n", 64 | "def split(x, y, splits):\n", 65 | " \"\"\"\n", 66 | " Calculates the gini impurity for binary class data.\n", 67 | "\n", 68 | " Parameters\n", 69 | " ----------\n", 70 | " x : int\n", 71 | " Feature values\n", 72 | " y : int\n", 73 | " Corresponding target values\n", 74 | " splits : int\n", 75 | " Vector of splits to calculate gini criterion for\n", 76 | "\n", 77 | " Returns\n", 78 | " -------\n", 79 | " list\n", 80 | " List of gini impurity for each split\n", 81 | " \"\"\"\n", 82 | " gini_splits = []\n", 83 | " for i in splits:\n", 84 | " mask = x < i\n", 85 | " gini_L = gini(sum(y[mask] == 0),\n", 86 | " sum(y[mask] == 1))\n", 87 | " p_L = sum(mask) / len(mask)\n", 88 | " gini_R = gini(sum(y[~mask] == 0),\n", 89 | " sum(y[~mask] == 1))\n", 90 | " p_R = sum(~mask) / len(mask)\n", 91 | " gini_splits.append(gini_L * p_L + gini_R * p_R)\n", 92 | " \n", 93 | " return gini_splits\n", 94 | "\n", 95 | "\n", 96 | "def mk_fig():\n", 97 | " \"\"\"\n", 98 | " Convenience function to plot figure canvas\n", 99 | "\n", 100 | " Returns\n", 101 | " -------\n", 102 | " fig, axes\n", 103 | " Figure and axes objects\n", 104 | " \"\"\"\n", 105 | " fig, axes = plt.subplots(1, 2, figsize=(14, 7))\n", 106 | " axes[0].set_xlim(-1, 11)\n", 107 | " axes[0].set_ylim(-1, 11)\n", 108 | " axes[0].set_xlabel('X1')\n", 109 | " axes[0].set_ylabel('X2')\n", 110 | " axes[0].xaxis.label.set_color('#988ED5')\n", 111 | " axes[0].yaxis.label.set_color('#E8A2A5')\n", 112 | " axes[1].set_xlim(-1, 11)\n", 113 | " axes[1].set_ylim(0, 1)\n", 114 | " axes[1].set_xlabel('Feature Value')\n", 115 | " axes[1].set_ylabel('Gini Impurity')\n", 116 | " \n", 117 | " return fig, axes" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "#### Data" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 16, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "np.random.seed(3)\n", 134 | "x1 = np.random.randint(0, 10, 10)\n", 135 | "x2 = np.random.randint(0, 10, 10)\n", 136 | "y = np.random.randint(0, 2, 10)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "#### Splits" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 17, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "x1_unique = np.unique(x1)\n", 153 | "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n", 154 | "x1_gini = split(x1, y, x1_splits)\n", 155 | "\n", 156 | "x2_unique = np.unique(x2)\n", 157 | "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n", 158 | "x2_gini = split(x2, y, x2_splits)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "#### Create and save animations" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "##### First split" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 18, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "fig, axes = mk_fig()\n", 182 | "mask = y == 0\n", 183 | "j = len(x1_splits)\n", 184 | "f = len(x1_splits) + len(x2_splits) + 1\n", 185 | "\n", 186 | "def init():\n", 187 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 188 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 189 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 190 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 191 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 192 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 193 | "\n", 194 | "def animate(i):\n", 195 | " if i <= j: # plot x1 splits\n", 196 | " axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n", 197 | " axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n", 198 | " elif i < f: # plot x2 splits\n", 199 | " axes[0].hlines(x2_splits[:(i-j)], -1, 11, '#E8A2A5')\n", 200 | " axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n", 201 | " else: # highlight optimum split\n", 202 | " if min(x1_gini) <= min(x2_gini):\n", 203 | " k = np.argmin(x1_gini)\n", 204 | " axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n", 205 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 206 | " else:\n", 207 | " k = np.argmin(x1_gini)\n", 208 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc=\"None\")\n", 209 | " axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=2)\n", 210 | " \n", 211 | "plt.close(fig)\n", 212 | "ani = animation.FuncAnimation(fig,\n", 213 | " animate,\n", 214 | " init_func=init,\n", 215 | " frames=f + 1,\n", 216 | " interval=600)\n", 217 | "ani.save('../gif/decision_tree/decision_tree_1.gif', writer='imagemagick', fps=1, dpi=150)\n", 218 | "# HTML(ani.to_jshtml())" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "##### Second split" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 19, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "split_1 = 4\n", 235 | "mask_1 = x1 > split_1" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 20, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "x1_unique = np.unique(x1[mask_1])\n", 245 | "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n", 246 | "x1_gini = split(x1[mask_1], y[mask_1], x1_splits)\n", 247 | "\n", 248 | "x2_unique = np.unique(x2[mask_1])\n", 249 | "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n", 250 | "x2_gini = split(x2[mask_1], y[mask_1], x2_splits)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 21, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "fig, axes = mk_fig()\n", 260 | "mask = y == 0\n", 261 | "j = len(x1_splits)\n", 262 | "f = len(x1_splits) + len(x2_splits) + 1\n", 263 | "\n", 264 | "def init():\n", 265 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 266 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 267 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 268 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 269 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 270 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 271 | " axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n", 272 | "\n", 273 | "def animate(i):\n", 274 | " if i <= j: # plot x1 splits\n", 275 | " axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n", 276 | " axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n", 277 | " elif i < f: # plot x2 splits\n", 278 | " axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n", 279 | " axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n", 280 | " else: # highlight optimum split\n", 281 | " if min(x1_gini) <= min(x2_gini):\n", 282 | " k = np.argmin(x1_gini)\n", 283 | " axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n", 284 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 285 | " else:\n", 286 | " k = np.argmin(x2_gini)\n", 287 | " axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n", 288 | " axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 289 | " \n", 290 | "plt.close(fig)\n", 291 | "ani = animation.FuncAnimation(fig,\n", 292 | " animate,\n", 293 | " init_func=init,\n", 294 | " frames=f + 1,\n", 295 | " interval=600)\n", 296 | "ani.save('../gif/decision_tree/decision_tree_2.gif', writer='imagemagick', fps=1, dpi=150)\n", 297 | "# HTML(ani.to_jshtml())" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "##### Third split" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 22, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "split_2 = 4.5\n", 314 | "mask_2 = (x1 > split_1) & (x2 > split_2)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 23, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "x1_unique = np.unique(x1[mask_2])\n", 324 | "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n", 325 | "x1_gini = split(x1[mask_2], y[mask_2], x1_splits)\n", 326 | "\n", 327 | "x2_unique = np.unique(x2[mask_2])\n", 328 | "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n", 329 | "x2_gini = split(x2[mask_2], y[mask_2], x2_splits)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 24, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "fig, axes = mk_fig()\n", 339 | "mask = y == 0\n", 340 | "j = len(x1_splits)\n", 341 | "f = len(x1_splits) + len(x2_splits) + 1\n", 342 | "\n", 343 | "def init():\n", 344 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 345 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 346 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 347 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 348 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 349 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 350 | " axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n", 351 | " axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n", 352 | "\n", 353 | "def animate(i):\n", 354 | " if i <= j: # plot x1 splits\n", 355 | " axes[0].vlines(x1_splits[:i], split_2, 11, '#988ED5')\n", 356 | " axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n", 357 | " elif i < f: # plot x2 splits\n", 358 | " axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n", 359 | " axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n", 360 | " else: # highlight optimum split\n", 361 | " if min(x1_gini) <= min(x2_gini):\n", 362 | " k = np.argmin(x1_gini)\n", 363 | " axes[0].vlines(x1_splits[k], split_2, 11, 'k', lw=3)\n", 364 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 365 | " else:\n", 366 | " k = np.argmin(x2_gini)\n", 367 | " axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n", 368 | " axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 369 | " \n", 370 | "plt.close(fig)\n", 371 | "ani = animation.FuncAnimation(fig,\n", 372 | " animate,\n", 373 | " init_func=init,\n", 374 | " frames=f + 1,\n", 375 | " interval=600)\n", 376 | "ani.save('../gif/decision_tree/decision_tree_3.gif', writer='imagemagick', fps=1, dpi=150)\n", 377 | "# HTML(ani.to_jshtml())" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "##### Final tree" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 25, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "split_3 = 6.5" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 26, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "fig, axes = mk_fig()\n", 403 | "mask = y == 0\n", 404 | "j = len(x1_splits)\n", 405 | "f = len(x1_splits) + len(x2_splits) + 1\n", 406 | "\n", 407 | "def init():\n", 408 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 409 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 410 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 411 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 412 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 413 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 414 | " axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n", 415 | " axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n", 416 | " axes[0].hlines(split_3, split_1, 11, 'k', lw=3)\n", 417 | "\n", 418 | "def animate(i):\n", 419 | " return\n", 420 | " \n", 421 | "plt.close(fig)\n", 422 | "ani = animation.FuncAnimation(fig,\n", 423 | " animate,\n", 424 | " init_func=init,\n", 425 | " frames=1,\n", 426 | " interval=600)\n", 427 | "ani.save('../gif/decision_tree/decision_tree_4.gif', writer='imagemagick', fps=1, dpi=150)\n", 428 | "# HTML(ani.to_jshtml())" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "#### View animations" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 27, 441 | "metadata": {}, 442 | "outputs": [ 443 | { 444 | "data": { 445 | "text/html": [ 446 | "" 447 | ], 448 | "text/plain": [ 449 | "" 450 | ] 451 | }, 452 | "execution_count": 27, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [ 458 | "Image(url='../gif/decision_tree/decision_tree_1.gif')" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 28, 464 | "metadata": {}, 465 | "outputs": [ 466 | { 467 | "data": { 468 | "text/html": [ 469 | "" 470 | ], 471 | "text/plain": [ 472 | "" 473 | ] 474 | }, 475 | "execution_count": 28, 476 | "metadata": {}, 477 | "output_type": "execute_result" 478 | } 479 | ], 480 | "source": [ 481 | "Image(url='../gif/decision_tree/decision_tree_2.gif')" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 29, 487 | "metadata": {}, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/html": [ 492 | "" 493 | ], 494 | "text/plain": [ 495 | "" 496 | ] 497 | }, 498 | "execution_count": 29, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "Image(url='../gif/decision_tree/decision_tree_3.gif')" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 30, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "data": { 514 | "text/html": [ 515 | "" 516 | ], 517 | "text/plain": [ 518 | "" 519 | ] 520 | }, 521 | "execution_count": 30, 522 | "metadata": {}, 523 | "output_type": "execute_result" 524 | } 525 | ], 526 | "source": [ 527 | "Image(url='../gif/decision_tree/decision_tree_4.gif')" 528 | ] 529 | } 530 | ], 531 | "metadata": { 532 | "kernelspec": { 533 | "display_name": "Python 3", 534 | "language": "python", 535 | "name": "python3" 536 | }, 537 | "language_info": { 538 | "codemirror_mode": { 539 | "name": "ipython", 540 | "version": 3 541 | }, 542 | "file_extension": ".py", 543 | "mimetype": "text/x-python", 544 | "name": "python", 545 | "nbconvert_exporter": "python", 546 | "pygments_lexer": "ipython3", 547 | "version": "3.7.4" 548 | } 549 | }, 550 | "nbformat": 4, 551 | "nbformat_minor": 2 552 | } 553 | -------------------------------------------------------------------------------- /ml-animations/notebooks/decision_tree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 14, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import matplotlib.animation as animation\n", 19 | "from IPython.display import HTML, Image\n", 20 | "plt.style.use('ggplot')\n", 21 | "params = {'legend.fontsize': '18',\n", 22 | " 'axes.labelsize': '20',\n", 23 | " 'axes.labelweight': 'bold',\n", 24 | " 'axes.titlesize':'20',\n", 25 | " 'xtick.labelsize':'18',\n", 26 | " 'ytick.labelsize':'18'}\n", 27 | "plt.rcParams.update(params)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "#### Functions" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 15, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "def gini(*args):\n", 44 | " \"\"\"\n", 45 | " Calculates the gini impurity for binary class data.\n", 46 | "\n", 47 | " Parameters\n", 48 | " ----------\n", 49 | " *args : int\n", 50 | " Number of examples in class i\n", 51 | "\n", 52 | " Returns\n", 53 | " -------\n", 54 | " float\n", 55 | " The gini impurity\n", 56 | " \"\"\"\n", 57 | " n = sum(args) # total examples\n", 58 | " gini = 0\n", 59 | " for c in args:\n", 60 | " gini += (c / n) * (1 - (c / n))\n", 61 | " return gini\n", 62 | "\n", 63 | "\n", 64 | "def split(x, y, splits):\n", 65 | " \"\"\"\n", 66 | " Calculates the gini impurity for binary class data.\n", 67 | "\n", 68 | " Parameters\n", 69 | " ----------\n", 70 | " x : int\n", 71 | " Feature values\n", 72 | " y : int\n", 73 | " Corresponding target values\n", 74 | " splits : int\n", 75 | " Vector of splits to calculate gini criterion for\n", 76 | "\n", 77 | " Returns\n", 78 | " -------\n", 79 | " list\n", 80 | " List of gini impurity for each split\n", 81 | " \"\"\"\n", 82 | " gini_splits = []\n", 83 | " for i in splits:\n", 84 | " mask = x < i\n", 85 | " gini_L = gini(sum(y[mask] == 0),\n", 86 | " sum(y[mask] == 1))\n", 87 | " p_L = sum(mask) / len(mask)\n", 88 | " gini_R = gini(sum(y[~mask] == 0),\n", 89 | " sum(y[~mask] == 1))\n", 90 | " p_R = sum(~mask) / len(mask)\n", 91 | " gini_splits.append(gini_L * p_L + gini_R * p_R)\n", 92 | " \n", 93 | " return gini_splits\n", 94 | "\n", 95 | "\n", 96 | "def mk_fig():\n", 97 | " \"\"\"\n", 98 | " Convenience function to plot figure canvas\n", 99 | "\n", 100 | " Returns\n", 101 | " -------\n", 102 | " fig, axes\n", 103 | " Figure and axes objects\n", 104 | " \"\"\"\n", 105 | " fig, axes = plt.subplots(1, 2, figsize=(14, 7))\n", 106 | " axes[0].set_xlim(-1, 11)\n", 107 | " axes[0].set_ylim(-1, 11)\n", 108 | " axes[0].set_xlabel('X1')\n", 109 | " axes[0].set_ylabel('X2')\n", 110 | " axes[0].xaxis.label.set_color('#988ED5')\n", 111 | " axes[0].yaxis.label.set_color('#E8A2A5')\n", 112 | " axes[1].set_xlim(-1, 11)\n", 113 | " axes[1].set_ylim(0, 1)\n", 114 | " axes[1].set_xlabel('Feature Value')\n", 115 | " axes[1].set_ylabel('Gini Impurity')\n", 116 | " \n", 117 | " return fig, axes" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "#### Data" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 16, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "np.random.seed(3)\n", 134 | "x1 = np.random.randint(0, 10, 10)\n", 135 | "x2 = np.random.randint(0, 10, 10)\n", 136 | "y = np.random.randint(0, 2, 10)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "#### Splits" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 17, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "x1_unique = np.unique(x1)\n", 153 | "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n", 154 | "x1_gini = split(x1, y, x1_splits)\n", 155 | "\n", 156 | "x2_unique = np.unique(x2)\n", 157 | "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n", 158 | "x2_gini = split(x2, y, x2_splits)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "#### Create and save animations" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "##### First split" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 18, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "fig, axes = mk_fig()\n", 182 | "mask = y == 0\n", 183 | "j = len(x1_splits)\n", 184 | "f = len(x1_splits) + len(x2_splits) + 1\n", 185 | "\n", 186 | "def init():\n", 187 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 188 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 189 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 190 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 191 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 192 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 193 | "\n", 194 | "def animate(i):\n", 195 | " if i <= j: # plot x1 splits\n", 196 | " axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n", 197 | " axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n", 198 | " elif i < f: # plot x2 splits\n", 199 | " axes[0].hlines(x2_splits[:(i-j)], -1, 11, '#E8A2A5')\n", 200 | " axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n", 201 | " else: # highlight optimum split\n", 202 | " if min(x1_gini) <= min(x2_gini):\n", 203 | " k = np.argmin(x1_gini)\n", 204 | " axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n", 205 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 206 | " else:\n", 207 | " k = np.argmin(x1_gini)\n", 208 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc=\"None\")\n", 209 | " axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=2)\n", 210 | " \n", 211 | "plt.close(fig)\n", 212 | "ani = animation.FuncAnimation(fig,\n", 213 | " animate,\n", 214 | " init_func=init,\n", 215 | " frames=f + 1,\n", 216 | " interval=600)\n", 217 | "ani.save('../gif/decision_tree/decision_tree_1.gif', writer='imagemagick', fps=1, dpi=150)\n", 218 | "# HTML(ani.to_jshtml())" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "##### Second split" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 19, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "split_1 = 4\n", 235 | "mask_1 = x1 > split_1" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 20, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "x1_unique = np.unique(x1[mask_1])\n", 245 | "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n", 246 | "x1_gini = split(x1[mask_1], y[mask_1], x1_splits)\n", 247 | "\n", 248 | "x2_unique = np.unique(x2[mask_1])\n", 249 | "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n", 250 | "x2_gini = split(x2[mask_1], y[mask_1], x2_splits)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 21, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "fig, axes = mk_fig()\n", 260 | "mask = y == 0\n", 261 | "j = len(x1_splits)\n", 262 | "f = len(x1_splits) + len(x2_splits) + 1\n", 263 | "\n", 264 | "def init():\n", 265 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 266 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 267 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 268 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 269 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 270 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 271 | " axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n", 272 | "\n", 273 | "def animate(i):\n", 274 | " if i <= j: # plot x1 splits\n", 275 | " axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n", 276 | " axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n", 277 | " elif i < f: # plot x2 splits\n", 278 | " axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n", 279 | " axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n", 280 | " else: # highlight optimum split\n", 281 | " if min(x1_gini) <= min(x2_gini):\n", 282 | " k = np.argmin(x1_gini)\n", 283 | " axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n", 284 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 285 | " else:\n", 286 | " k = np.argmin(x2_gini)\n", 287 | " axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n", 288 | " axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 289 | " \n", 290 | "plt.close(fig)\n", 291 | "ani = animation.FuncAnimation(fig,\n", 292 | " animate,\n", 293 | " init_func=init,\n", 294 | " frames=f + 1,\n", 295 | " interval=600)\n", 296 | "ani.save('../gif/decision_tree/decision_tree_2.gif', writer='imagemagick', fps=1, dpi=150)\n", 297 | "# HTML(ani.to_jshtml())" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "##### Third split" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 22, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "split_2 = 4.5\n", 314 | "mask_2 = (x1 > split_1) & (x2 > split_2)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 23, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "x1_unique = np.unique(x1[mask_2])\n", 324 | "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n", 325 | "x1_gini = split(x1[mask_2], y[mask_2], x1_splits)\n", 326 | "\n", 327 | "x2_unique = np.unique(x2[mask_2])\n", 328 | "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n", 329 | "x2_gini = split(x2[mask_2], y[mask_2], x2_splits)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 24, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "fig, axes = mk_fig()\n", 339 | "mask = y == 0\n", 340 | "j = len(x1_splits)\n", 341 | "f = len(x1_splits) + len(x2_splits) + 1\n", 342 | "\n", 343 | "def init():\n", 344 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 345 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 346 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 347 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 348 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 349 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 350 | " axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n", 351 | " axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n", 352 | "\n", 353 | "def animate(i):\n", 354 | " if i <= j: # plot x1 splits\n", 355 | " axes[0].vlines(x1_splits[:i], split_2, 11, '#988ED5')\n", 356 | " axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n", 357 | " elif i < f: # plot x2 splits\n", 358 | " axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n", 359 | " axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n", 360 | " else: # highlight optimum split\n", 361 | " if min(x1_gini) <= min(x2_gini):\n", 362 | " k = np.argmin(x1_gini)\n", 363 | " axes[0].vlines(x1_splits[k], split_2, 11, 'k', lw=3)\n", 364 | " axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 365 | " else:\n", 366 | " k = np.argmin(x2_gini)\n", 367 | " axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n", 368 | " axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n", 369 | " \n", 370 | "plt.close(fig)\n", 371 | "ani = animation.FuncAnimation(fig,\n", 372 | " animate,\n", 373 | " init_func=init,\n", 374 | " frames=f + 1,\n", 375 | " interval=600)\n", 376 | "ani.save('../gif/decision_tree/decision_tree_3.gif', writer='imagemagick', fps=1, dpi=150)\n", 377 | "# HTML(ani.to_jshtml())" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "##### Final tree" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 25, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "split_3 = 6.5" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 26, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "fig, axes = mk_fig()\n", 403 | "mask = y == 0\n", 404 | "j = len(x1_splits)\n", 405 | "f = len(x1_splits) + len(x2_splits) + 1\n", 406 | "\n", 407 | "def init():\n", 408 | " axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n", 409 | " axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n", 410 | " axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n", 411 | " axes[1].plot(-1, -1, c='#988ED5', label='X1 splits') # legend place-holders\n", 412 | " axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n", 413 | " axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n", 414 | " axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n", 415 | " axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n", 416 | " axes[0].hlines(split_3, split_1, 11, 'k', lw=3)\n", 417 | "\n", 418 | "def animate(i):\n", 419 | " return\n", 420 | " \n", 421 | "plt.close(fig)\n", 422 | "ani = animation.FuncAnimation(fig,\n", 423 | " animate,\n", 424 | " init_func=init,\n", 425 | " frames=1,\n", 426 | " interval=600)\n", 427 | "ani.save('../gif/decision_tree/decision_tree_4.gif', writer='imagemagick', fps=1, dpi=150)\n", 428 | "# HTML(ani.to_jshtml())" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "#### View animations" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 27, 441 | "metadata": {}, 442 | "outputs": [ 443 | { 444 | "data": { 445 | "text/html": [ 446 | "" 447 | ], 448 | "text/plain": [ 449 | "" 450 | ] 451 | }, 452 | "execution_count": 27, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [ 458 | "Image(url='../gif/decision_tree/decision_tree_1.gif')" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 28, 464 | "metadata": {}, 465 | "outputs": [ 466 | { 467 | "data": { 468 | "text/html": [ 469 | "" 470 | ], 471 | "text/plain": [ 472 | "" 473 | ] 474 | }, 475 | "execution_count": 28, 476 | "metadata": {}, 477 | "output_type": "execute_result" 478 | } 479 | ], 480 | "source": [ 481 | "Image(url='../gif/decision_tree/decision_tree_2.gif')" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 29, 487 | "metadata": {}, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/html": [ 492 | "" 493 | ], 494 | "text/plain": [ 495 | "" 496 | ] 497 | }, 498 | "execution_count": 29, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "Image(url='../gif/decision_tree/decision_tree_3.gif')" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 30, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "data": { 514 | "text/html": [ 515 | "" 516 | ], 517 | "text/plain": [ 518 | "" 519 | ] 520 | }, 521 | "execution_count": 30, 522 | "metadata": {}, 523 | "output_type": "execute_result" 524 | } 525 | ], 526 | "source": [ 527 | "Image(url='../gif/decision_tree/decision_tree_4.gif')" 528 | ] 529 | } 530 | ], 531 | "metadata": { 532 | "kernelspec": { 533 | "display_name": "Python 3", 534 | "language": "python", 535 | "name": "python3" 536 | }, 537 | "language_info": { 538 | "codemirror_mode": { 539 | "name": "ipython", 540 | "version": 3 541 | }, 542 | "file_extension": ".py", 543 | "mimetype": "text/x-python", 544 | "name": "python", 545 | "nbconvert_exporter": "python", 546 | "pygments_lexer": "ipython3", 547 | "version": "3.7.4" 548 | } 549 | }, 550 | "nbformat": 4, 551 | "nbformat_minor": 2 552 | } 553 | -------------------------------------------------------------------------------- /ml-animations/notebooks/knn_draft.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from scipy.stats import mode\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import matplotlib.animation as animation\n", 20 | "from IPython.display import HTML, Image\n", 21 | "plt.style.use('ggplot')\n", 22 | "params = {'legend.fontsize': '18',\n", 23 | " 'axes.labelsize': '20',\n", 24 | " 'axes.labelweight': 'bold',\n", 25 | " 'axes.titlesize':'20',\n", 26 | " 'xtick.labelsize':'18',\n", 27 | " 'ytick.labelsize':'18'}\n", 28 | "plt.rcParams.update(params)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "#### Functions" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "def closest_node(node, nodes, n=1):\n", 45 | " \"\"\"\n", 46 | " Find the closest point in a list to a query point.\n", 47 | "\n", 48 | " Parameters\n", 49 | " ----------\n", 50 | " node : array\n", 51 | " query point\n", 52 | " nodes: array\n", 53 | " array of points to compare to\n", 54 | " n: int\n", 55 | " return n closest pairs\n", 56 | "\n", 57 | " Returns\n", 58 | " -------\n", 59 | " int\n", 60 | " The index of the closest point in the list\n", 61 | " \"\"\"\n", 62 | " nodes = np.asarray(nodes)\n", 63 | " dist = np.sum((nodes - node)**2, axis=1)\n", 64 | " \n", 65 | " return np.argsort(dist)[:n]\n", 66 | "\n", 67 | "def mk_fig():\n", 68 | " \"\"\"\n", 69 | " Convenience function to plot figure canvas\n", 70 | "\n", 71 | " Returns\n", 72 | " -------\n", 73 | " fig, axes\n", 74 | " Figure and axes objects\n", 75 | " \"\"\"\n", 76 | " fig, axes = plt.subplots(1, 1, figsize=(7, 7))\n", 77 | " axes.set_xlim(-1, 11)\n", 78 | " axes.set_ylim(-1, 11)\n", 79 | " axes.set_xlabel('X1')\n", 80 | " axes.set_ylabel('X2')\n", 81 | "\n", 82 | " return fig, axes" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "#### Data" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "np.random.seed(11)\n", 99 | "X = np.array([np.random.randint(0, 10, 9),\n", 100 | " np.random.randint(0, 10, 9)]).T\n", 101 | "y = np.random.randint(0, 2, 9)\n", 102 | "xq = np.array([6, 3]) # query point" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "#### Create and save animations" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "fig, axes = mk_fig()\n", 119 | "mask = y == 0\n", 120 | "colors = ['#E24A33', '#348ABD']\n", 121 | "\n", 122 | "def init():\n", 123 | " axes.scatter(X[mask, 0], X[mask, 1], s=100, c='#E24A33', label='Class 0', zorder=2)\n", 124 | " axes.scatter(X[~mask, 0], X[~mask, 1], s=100, c='#348ABD', label='Class 1', zorder=2)\n", 125 | " axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n", 126 | " axes.legend(facecolor='#F0F0F0', framealpha=1)\n", 127 | "\n", 128 | "def animate(i):\n", 129 | " if i == 0:\n", 130 | " axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n", 131 | " if i == 1:\n", 132 | " k = closest_node(xq, X, i)\n", 133 | " axes.plot([xq[0], X[k,0]],\n", 134 | " [xq[1], X[k,1]],\n", 135 | " 'k-', zorder=1)\n", 136 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n", 137 | " elif i % 2 == 1:\n", 138 | " k = closest_node(xq, X, i)\n", 139 | " axes.plot([np.repeat(xq[0], 2), X[k[-2:],0]],\n", 140 | " [np.repeat(xq[1], 2), X[k[-2:],1]],\n", 141 | " 'k-', zorder=1)\n", 142 | " if sum(y[k] == 0) > sum(y[k] == 1):\n", 143 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[0], edgecolor='k', lw=2, zorder=2)\n", 144 | " elif sum(y[k] == 1) > sum(y[k] == 0):\n", 145 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[1], edgecolor='k', lw=2, zorder=2)\n", 146 | " else: # if equal counts, set to closest point's color\n", 147 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n", 148 | " \n", 149 | "plt.close(fig)\n", 150 | "ani = animation.FuncAnimation(fig,\n", 151 | " animate,\n", 152 | " init_func=init,\n", 153 | " frames=10,\n", 154 | " interval=600)\n", 155 | "ani.save('../gif/knn/knn.gif', writer='imagemagick', fps=1, dpi=75)\n", 156 | "# HTML(ani.to_jshtml())" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "#### View animations" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "Image(url='../gif/knn/knn.gif')" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 3 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython3", 192 | "version": "3.7.4" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /ml-animations/notebooks/logistic_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 33, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from scipy.optimize import minimize\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import matplotlib.animation as animation\n", 20 | "from IPython.display import HTML, Image\n", 21 | "plt.style.use('ggplot')\n", 22 | "params = {'legend.fontsize': '18',\n", 23 | " 'axes.labelsize': '20',\n", 24 | " 'axes.labelweight': 'bold',\n", 25 | " 'axes.titlesize':'20',\n", 26 | " 'xtick.labelsize':'18',\n", 27 | " 'ytick.labelsize':'18'}\n", 28 | "plt.rcParams.update(params)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "#### Functions" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 36, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "def loss_lr(w, X, y):\n", 45 | " return np.sum(np.log(1 + np.exp(-y*(X@w))))\n", 46 | "\n", 47 | "def loss_lr_grad(w, X, y):\n", 48 | " return -X.T @ (y/(1+np.exp(y*(X@w))))\n", 49 | "\n", 50 | "def mk_fig():\n", 51 | " \"\"\"\n", 52 | " Convenience function to plot figure canvas\n", 53 | "\n", 54 | " Returns\n", 55 | " -------\n", 56 | " fig, axes\n", 57 | " Figure and axes objects\n", 58 | " \"\"\"\n", 59 | " fig, axes = plt.subplots(1, 1, figsize=(7, 7))\n", 60 | " axes.set_xlim(-1, 21)\n", 61 | " axes.set_ylim(-1.5, 1.5)\n", 62 | " axes.set_xlabel('x')\n", 63 | " axes.set_ylabel('y')\n", 64 | "\n", 65 | " return fig, axes" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "#### Data" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 113, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "np.random.seed(1)\n", 82 | "x = np.concatenate((np.random.randint(0, 12, 10),\n", 83 | " np.random.randint(8, 20, 10))\n", 84 | " )\n", 85 | "y = np.concatenate((np.ones((10,)) * -1,\n", 86 | " np.ones(10,))\n", 87 | " )" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 114, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "[]" 99 | ] 100 | }, 101 | "execution_count": 114, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | }, 105 | { 106 | "data": { 107 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAHBCAYAAADHHtqNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de3TU9Z3/8ddMrlwSIEwSCMgtgCZBBUFMYjYWEpWbVvxtQUVbPT271do97bYe22699aiI/tweD8jZ0z22rOAqIDcvQLqGKFJCKDS0QoIQwkVMCLcJ5ZpMJjO/P/hl1jQXJsn3+5lM8nyc4znme/nMO+/5Mq98r+Pw+/1+AQAAI5yhLgAAgN6E4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgyJDXUBXrF+/XkeOHNHhw4d16tQpJSYmaunSpR0a48knn9Tp06dbnffWW28pPj7eilIBAJAU5sH73nvvqX///ho9erQuXbrU6XGGDRumuXPntpjep0+frpQHAEALYR28S5YsUXJysiTpZz/7merq6jo1zoABA5Sbm2tlaQAAtCqsz/E2ha4VGhsbdfnyZcvGAwCgNWG9x2uViooKPfzww2psbFTfvn01ZcoUPfTQQ0pISAh1aQCAHqbXB+/w4cM1ffp0DRs2TI2NjSorK1NRUZH27dunhQsXthm+hYWFKiwslCQtWrTIZMkAgDDm6CnfTtR0jrejVzW35o9//KMWL16s6dOn6/HHHw9qnerq6i6/rsvl0pkzZ7o8DlpHf+1Hj+1Hj+1nVY9TUlJanR7W53jtkpOTo8TERO3ZsyfUpQAAehiCtw1JSUk6f/58qMsAAPQwBG8bampqNHDgwFCXAQDoYXpN8J45c0ZVVVXyer2BaRcvXmx12YKCAp09e1aTJ082VR4AoJcI66uaP//888DjHs+fPy+v16u1a9dKkhITE5s9FOPNN99UeXm53nzzTSUlJUmStm7dqqKiIk2cOFGJiYny+XwqKyvTrl27lJycrHnz5pn/pQAAPVpYB29RUZHKy8ubTVu1apUkKT09/ZpPo0pNTdW+fftUXFwcOJ+blJSkb3/727rvvvvUr18/ewoHAPRaPeZ2olDjdqLuj/7ajx7bjx7bj9uJAADoQQheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMigx1AV21fv16HTlyRIcPH9apU6eUmJiopUuXdnicrVu3auPGjaqqqlLfvn01efJkPfTQQ4qPj7ehaqDnuOJp1P/9Y7X+UnNJfkkOSZOG9NPP70hRdESE8Xo8Xp9W/OW0Pjv6NzX6/IpwOjRt1AB995ZERTqD39do9Pm14/gFFR3+mzxen6IjncobM0BZI+LkdDiMjdEddafehGOPHX6/3x/qIrpi3rx56t+/v0aPHq3Dhw+rb9++HQ7ejz/+WMuXL1d6erpycnJ09uxZffzxx0pMTNTChQsVGxt7zTGqq6s7+ysEuFwunTlzpsvjoHX013plJy/pV4XH1dqHiEPSy/nXKSO5n7F6vjpXp6cKjqm+sWVFMREOvT5jpEYMvPa/53N1Xr302dc6WlunBt//To9ySqMGxeqZbw3XwNj291usGKM1od6Ou1NvunuPU1JSWp0e9oealyxZot///vd69tlnlZCQ0OH1z58/r1WrVik1NVXPPfec8vPzNX/+fP34xz/W119/rU2bNtlQNRD+PI2NbYauJPkl/arwuDyNjUbq8fp8bYauJNU3+vVUwTF5fb5W5zfx+f166bOvVXG2+Ye5JDX4pIqzdXrps6/la2efxYoxuqPu1Jtw7nHYB29ycnKX1t+1a5fq6+s1c+ZMOb9xGGrKlClKTk7Wtm3buloi0CO9urW6zdBt4pf02uddPxoUjOWlp9sM3Sb1jX6t2HO63WV2fHVBR2vr2l3maG2dSo5fsHWM7qg79Sacexz2wdtVlZWVkqTx48e3mDdu3DhVV1errq79NxfojfbUXApuuRPBLddVnx79W3DLHWl/uS2H/9ZiD+rvNfikLZVtj2PFGN1Rd+pNOPc47C+u6iq32y1JrR6mTkhIkN/vl9vtbnGsvrCwUIWFhZKkRYsWyeVydbmWyMhIS8ZB6+ivtYI9gOeTjPTd568IarlGv6PdevzO4PbQfc62tycrxmhLKLfj7tSbcO5xrw9ej8cj6Wqj/15UVFSzZb4pPz9f+fn5gZ+tOBEf6osmejr6a61grxd1ypp/H9d8HUdwfwpEOPzt1uPwBXdO2unztjmOFWO0JZTbcXfqTTj0uMdeXNVV0dHRkiSv19tiXkNDQ7NlAPyvSUOCu1p50lAzVzVPGzUguOVGt79c3pgBirrGJ2OUU8pLbXscK8bojrpTb8K5x70+eJsOMTcdcv4mt9sth8PRqaulgZ7u53ekXHOv1yHp6dzW/+q32ndvSVRMRPsVxUQ49MikxHaXyRoRp1GD2r/laNSgWGVeF2frGN1Rd+pNOPe41wdvamqqJOngwYMt5lVUVCglJSWo+3iB3iY6IkIv51/XZvg23cdr6iEakU6nXp8xss3wbbqP91oP0XA6HHrmW8M1bnBsiz2qKKc0bvDV+0PbeziDFWN0R92pN+Hc4151jvfMmTOqr69XcnJy4JzurbfeqmXLlqmgoEA5OTmBW4p2796tkydPav78+aEsGejWMpL7afUD4/Tq59X6y4lL8unqX/OThvbT07nmn1w1YmCs3p03Tsv3nNZnR/4mr8+vSKdD00YP0COTgn9y1cDYSL1290iVHL+gLZV/U32jTzERTuWlDlDmdcE9EcmKMbqj7tSbcO1x2D+56vPPP9fp01fvyysoKJDX69WcOXMkSYmJicrNzQ0s+8ILL6i8vFxvvvmmkpKSAtM/+ugjrVixQhkZGbr99tvldrv10UcfyeVy6ZVXXuHJVT0E/bUfPbYfPbaf3RdXhf0eb1FRkcrLy5tNW7VqlSQpPT29WfC25Z577lFcXJw2btyoZcuWqU+fPsrKytKCBQs4zAwAsFTY7/F2F+zxdn/013702H702H7cTgQAQA9C8AIAYBDBCwCAQQQvAAAGEbwAABhE8AIAYBDBCwCAQQQvAAAGEbwAABhE8AIAYBDBCwCAQQQvAAAGEbwAABhE8AIAYBDBCwCAQQQvAAAGRYa6AAAAOqKxsVFVVVXav39/4L8DBw5o8eLFuummm0Jd3jURvACAkPL5fDp37pxOnjypAwcOBML0yy+/VFVVVdDj/Pa3v9XSpUttrNQaBC8AwBZer1enTp3SoUOHtG/fPu3du1f79u3T4cOHuzx2SkqK0tLSlJaWpvT0dN1www0aO3asBVXbj+AFAATlwoULKisr0549e7R582a53W7deeedOnDggPbu3Su3292l8ceMGaMbbrhB6enpSktL0/XXX6+hQ4cqNjbWot+geyB4AaAXa2xs1MmTJ1VeXq5du3bp/fff18mTJ4Ne/z//8z/bnR8XF6cJEyYE/ktPT1dKSooGDBggh8PR1fLDEsELAD2M3+/XxYsXdeLECZWXl6uoqEhr1661/HXS0tI0Z84cTZgwQWlpaUpKSlJUVJTlr9PTELwAECY8Ho+OHz+u8vJy7d27Vx999JFKSkosf52srCzNnDlTN998s2644Qb179/f8tfozQheAAghv9+v2tpanT59WjU1NSouLtaGDRv09ddfW/5a9913n6ZPn66bb75Zo0ePVkREhOWvgWsjeAHABj6fT2fPntWJEydUWVmpoqIirVu3zvLXSU5O1owZMzRt2jRNmjRJgwcP7rXnTsMFwQsAHeT1elVTU6MvvvhCRUVFWrVqlXw+n6WvkZGRoby8PE2fPl033HCD4uLiJEkul0tnzpyx9LVgFsELALq6h3ro0CFt2bJFb7/9to4fP27L62RnZysvL0933HGHRo4cqb59+9ryOui+CF4APd6pU6e0ZcsWffDBB9q2bZstrzFz5kzl5eXp1ltv1fDhw3vcvaewDsELIGw1NjaqtLRUmzdv1m9/+1tbXmP27NmaNWuWbrzxRl133XWKjo625XXQexC8ALqly5cv69NPP9WyZcu0Y8cOy8fPy8vT7NmzNXXqVF133XWKjOTjEGawpQEwrrS0VPfcc48tY6elpekf//EflZ+fzy0z6JYIXgCW8fv9eu+99/TUU0/ZMv4///M/a9asWZo4cSJPSELYIngBBM3tdqugoECLFy+29Kpfl8ulV199VZMnT9bgwYPldDotGxvobgheAJKuhur27du1ZMkSlZWVWTburFmz9JOf/EQpKSkaOHAgD3dAr0fwAr3AxYsX9ec//1n/8R//YentNN///vc1b948jRw5UnFxcTzcAQgCwQuEufr6en3xxRdavny5pY8k/M53vqNHHnlE6enp6tOnj2XjAr0dwQt0c42NjTp69Kg2bdqkDz/8UOXl5V0e86677tJjjz2mzMxM7ksFDCN4gRBqaGhQdXW1ioqK9OGHH+pPf/pTl8e85ZZb9MQTT2jatGnsqQLdEMEL2OjChQsqKSnR+vXr9cEHH3R5vGnTpumee+7RnXfeqYSEBAsqBGAawQt0kt/v17Fjx/TBBx9ow4YNOnjwYJfGmzJliu69917l5eVp5MiRXP0L9FAEL9AGj8ejTz/9VOvXr9dHH33UpbFSUlI0d+5czZw5UxMmTODhD0AvRvCiV/L7/Tp8+LA+/PBDbdiwQYcOHerSeHl5ebr//vs1bdo0DRgwwKIqAfREBC96pMbGRh07dkw7d+7Ujh07tHbt2k6Pdd111+m+++7T3LlzNX78eA4BA+gSghdhyev16uDBgyopKdGOHTu0c+dOnT17tlNj3XXXXbrvvvt055138qXkAGxH8KJb8ng82rdvX7NgvXTpUofGSElJUWZmpjIzM3XbbbdpypQpOnfunE0VA0BwCF6ExJUrV1RaWho4FFxSUiKfz9ehMcaMGaOsrKxAsA4dOrTdh+vzfasAugM+iWCL8+fPa9euXYFgLS0t7fAYGRkZgT3WW2+9VS6Xi/OrAMIewYtOOXv2rEpKSgKHgvfv39/hMSZPnhzYY508ebLi4+NtqBQAuheCF62qqqpqFqxHjhzp0PqRkZG67bbbAsE6ceJEHl8IAOoBwevz+bRp0yYVFhbq9OnTio+PV1ZWlubNm6fY2Nhrrj9v3rxWp8fExGjFihVWl9stNN3D2nTR0o4dO3TixIkOjdG/f3/ddtttys7OVmZmpjIyMngoBAAEIeyD9+2339bmzZs1depUzZkzR1VVVdq8ebOOHDmiZ599tt2LbZqkpaUpLy+v2bRwvhCnsbFR+/fvV0lJSSBYa2trOzSGy+VSZmZmYI91/PjxQfUSANC+8E0XScePH1dBQYGmTp2qp556KjA9KSlJy5YtU3FxsXJycq45TlJSknJzc+0s1VJerzdwq01xcbFKSko6fKvN8OHDmwUrzwYGADPCOni3b98uv9+v2bNnN5uel5end999V9u2bQsqeKWrYeb1eoM6PG23hoYG/fWvfw2cXy0pKVFdXV2Hxhg3blzgHGvTrTYAgNAL6+CtrKyUw+HQ2LFjm02Pjo7WqFGjgn7+bklJibZt2yafz6f4+HhlZ2frgQceMPoUowULFuizzz4Levm0tDRlZWUFgnXw4MH2FQcAsExYB6/b7VZ8fHyrF/UMGjRIBw4ckNfrbfd87dixY5WZmakhQ4boypUr2rNnjwoKClReXq6XXnqpzT3gwsJCFRYWSpIWLVokl8vVpd+lvLy82c8TJ05Ubm6ucnNzdfvtt/PdqxaIjIzs8vuE9tFj+9Fj+9nd47AOXo/H02aoRkdHS5Lq6+vbDd6FCxc2+/mOO+7QiBEjtHLlSm3atEn3339/q+vl5+crPz8/8POZM2c6Wn4ze/bskcvlanUcn8/X5fGhNvsL69Bj+9Fj+1nV45SUlFanh/VlqtHR0fJ6va3O83g8kq7eFtRR9957ryIjIzv1tCUAANoT1sGbkJCg8+fPq6GhocW82tpaxcXFdeq2oMjISA0aNEgXLlywokwAAALCOnhTU1Pl9/tbXETl8Xh09OhRpaamdmpcj8cjt9vNF5oDACwX1sGbnZ0th8OhjRs3Npu+ZcsW1dfXN7uVqKamRlVVVc2Wa2uPdtWqVWpsbNTkyZOtLxoA0KuF9cVVI0aM0N13362CggK9/vrrmjRpUuDJVenp6c2C98UXX9Tp06e1evXqwLS1a9eqoqJCGRkZcrlcqqur0549e1RWVqZx48Zp5syZofi1AAA9WFgHryQ9+uijSkxMVGFhoUpLSxUXF6cZM2Zo/vz513zEYUZGhqqqqrR161ZdvHhRTqdTQ4YM0QMPPKA5c+YErowGAMAqDr/f7w91ET1BdXV1l8fgNgF70V/70WP70WP7cTsRAAA9CMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgUGSoC+gqn8+nTZs2qbCwUKdPn1Z8fLyysrI0b948xcbG2r4+AAAdEfQe74YNG3Tu3Dk7a+mUt99+W8uXL9fw4cP12GOPKTMzU5s3b9arr74qn89n+/oAAHRE0Hu87733nlatWqVbbrlFeXl5mjhxopzO0B6pPn78uAoKCjR16lQ99dRTgelJSUlatmyZiouLlZOTY9v6AAB0VIeS0+fzaffu3Xr11Vf1wx/+UCtXrtTJkyftqu2atm/fLr/fr9mzZzebnpeXp5iYGG3bts3W9QEA6Kig93ijo6Pl8XgCP9fW1mr9+vXasGGD0tPTlZeXp9tuu02RkeZOG1dWVsrhcGjs2LEtah01apQOHTpk6/oAAHRU0Cn5u9/9Trt379b27dv1l7/8RV6vV5Lk9/tVVlamsrIy9e/fX//wD/+g6dOna8SIEbYV3cTtdis+Pl5RUVEt5g0aNEgHDhyQ1+tt84+Brq4PAEBHdWiPNzs7W9nZ2bpy5Yp27typ4uJi7d27N3AR0sWLF7V582Zt3rxZGRkZ+t73vqeRI0faVrzH42kzFKOjoyVJ9fX1bS7TlfULCwtVWFgoSVq0aJFcLleH6/97kZGRloyD1tFf+9Fj+9Fj+9nd407tyvXp00ff+ta3lJOTo8LCQr3zzjtqaGhotkxZWZl+9atf6bnnntP48eMtKfbvRUdHq66urtV5TYfFY2JibFk/Pz9f+fn5gZ/PnDkTVM3tcblcloyD1tFf+9Fj+9Fj+1nV45SUlFand+qy5JqaGr3zzjt64okntGzZshah27SH2NDQoNWrV3fmJYKSkJCg8+fPt3h96eo56Li4uHYPE3d1fQAAOiroVPH5fPrTn/6kTz75RGVlZfL7/c0HioxUTk6OZs2apeTkZC1ZskS7d+9WZWWl5UU3SU1N1V//+lcdOnRIaWlpgekej0dHjx5tNs2O9QEA6Kigg/fxxx/X3/72txbTBw4cqLvuukt33nmn4uPjA9Pz8/O1e/duXb582ZpKW5Gdna3169dr48aNzUJyy5Ytqq+vb3YPbk1NjRobGzVs2LBOrQ8AgBWCDt6/D90xY8Zo1qxZys7OVkRERIvl2zu3apURI0bo7rvvVkFBgV5//XVNmjRJVVVV2rx5s9LT05sF54svvqjTp083O/TdkfUBALBCh05gOp1OTZ06VbNnz77mBVPp6elatWpVl4oLxqOPPqrExEQVFhaqtLRUcXFxmjFjhubPnx/Uk7W6uj4AAB3h8P/9ydo2/Pd//7dmzJihwYMH211TWKquru7yGFytaC/6az96bD96bD+7r2oOeo93wYIFXS4CAIDejmOpAAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYFBnqArpq69at2rhxo6qqqtS3b19NnjxZDz30kOLj44Na/8knn9Tp06dbnffWW28FPQ4AAMEI6+D9+OOPtXz5cqWnp+uxxx7T2bNn9fHHH+vgwYNauHChYmNjgxpn2LBhmjt3bovpffr0sbpkAEAvF7bBe/78ea1atUqpqal67rnn5HRePWqempqq1157TZs2bdL9998f1FgDBgxQbm6uneUCACApjM/x7tq1S/X19Zo5c2YgdCVpypQpSk5O1rZt2zo0XmNjoy5fvmx1mQAANBO2e7yVlZWSpPHjx7eYN27cOG3fvl11dXVBHW6uqKjQww8/rMbGRvXt21dTpkzRQw89pISEBMvrBgD0bmEbvG63W5JaDceEhAT5/X653W6lpKS0O87w4cM1ffp0DRs2TI2NjSorK1NRUZH27dunhQsXthm+hYWFKiwslCQtWrRILperi7+RFBkZack4aB39tR89th89tp/dPQ558F66dEkbN24MevlZs2apf//+8ng8kq426O9FRUVJUmCZ9vzyl79s9vPtt9+u9PR0LV68WKtXr9bjjz/e6nr5+fnKz88P/HzmzJmgf4e2uFwuS8ZB6+iv/eix/eix/azqcVs7ft0ieNesWRP08rm5uerfv7+io6MlSV6vN/D/TRoaGiSpxfRg5eTk6L333tOePXs6tT4AAG0JefAmJSVp9erVHV6v6RCw2+3WkCFDms1zu91yOBxdOkeblJSkL7/8stPrAwDQmrC9qjk1NVWSdPDgwRbzKioqlJKSEvR9vK2pqanRwIEDO70+AACtCdvgvfXWWxUdHa2CggL5fL7A9N27d+vkyZPKyclptvyZM2dUVVUlr9cbmHbx4sVWxy4oKNDZs2c1efJke4oHAPRaIT/U3Fnx8fGaP3++VqxYoRdffFG333673G63PvroIw0bNkyzZ89utvybb76p8vJyvfnmm0pKSpJ09XGTRUVFmjhxohITE+Xz+VRWVqZdu3YpOTlZ8+bNC8WvBgDowcI2eCXpnnvuUVxcnDZu3Khly5apT58+ysrK0oIFC4I6zJyamqp9+/apuLhY58+fl3T13O63v/1t3XffferXr5/dvwIAoJdx+P1+f6iL6Amqq6u7PAa3CdiL/tqPHtuPHtvP7tuJwvYcLwAA4YjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMCgy1AV0xSeffKL9+/fr8OHDOnHihPx+v1avXt3hcUpLS7Vu3TodO3ZMkZGRuvHGG/Xwww8rKSnJhqqb83k98q9dLpV8ppM+n+R0SpnT5Pg/35MzMri3x+9rlP/PO+TfsUXyeKToaDmy8+W4JUsOZ+/+28qK/lrJqvfKinG623bzzfdKjY1SRERI3yvALg6/3+8PdRGd9eSTT+rChQsaPXq0Tp06pbNnz3Y4eHfu3Knf/OY3GjlypPLy8nT58mVt2rRJTqdTr7zyihISEoIap7q6usP1+6q+kv+Vn0n19S1nxsTI8ct/l3PYiHbH8J8/J9+bL0lfH5EaGv53RlSUNHy0nD96Ro74gR2urSewor9Wsuq9smIcu7Ybl8ulM2fOdHi97vZedWed7TGCZ1WPU1JSWp0e1sF76tQpuVwuOZ1OLVq0SKWlpR0KXq/XqyeffFIRERH6zW9+o9jYWEnS0aNH9fOf/1zTp0/XD37wg6DG6mjw+rxe+X/yYOsfNE1iYuR44702/9r3+3zyLXpaOnKw7TFGj5fzF6/1uj1fK/prJaveKyvGsXO76cwHVnd7r7o7gtd+dgdvWH8aJyUlydmFQCkvL1dtba2mT58eCF1JGjVqlDIyMlRcXCyv12tFqS34177d/geNJNXXy79+RdtjlO64usfSnq+PyL+npBMVhjcr+mtpPRa9V1aM0922m+72XgF2C+vg7arKykpJ0vjx41vMGzdunK5cuaITJ07Y8+Ilnwa3XHFRm7P8xYXNDxO2pqFB/u2FHSish7Cgv1ay6r2yYpxut910s/cKsFuvPm5TW1srSa2ex22a5na7dd1117WYX1hYqMLCqx9MixYtksvl6tBrn/T5glvQ19jm2G6/X9f4+OPRdS8AAA+uSURBVJQkRfl9SuhgfeHOiv5ayar3yopx7NxuIiMjQ/JvoTfpTI/RMXb3OOTBe+nSJW3cuDHo5WfNmqX+/ftb8tr1///wVlRUVIt5TdPq2zgElp+fr/z8/MDPHT4fEOwhcmdEm2M3OhxBDdHgcPa+c0IW9NdKVr1XVoxj53bTqXNj3ey96u44x2s/u8/xdovgXbNmTdDL5+bmWha8MTExkqSGVg67NU1rWsZymdOkwg+uvVz29DZnObLz5f/yi/YPG0ZFyXF7ftvzeyoL+mslq94rK8bpdttNN3uvALuFPHiTkpI6de+tFQYNGiTp6uHk4cOHN5vndrsltX4Y2gqO//M9+bcVXPtKzrmPtD3GLVny/8/o9q9OHT5ajkmZXag0PFnRX0vrsei9smKc7rbddLf3CrBbr764KjU1VZJ08GDLD6CKigr16dNHQ4cOteW1nZGRcvzy36W29qib7l1s5/YJh9Mp54+ekUaPv3r/5TdFRV29JeRHz/S6W4kka/prJaveKyvG6W7bTXd7rwC79Zotuba2VpcvX5bL5QocPk5PT9egQYNUVFSkOXPmNLuPt6ysTNOmTVOkjf/YncNGyPfGe/Kve1va8Znka5ScEVL2dDnmPhLUB40jfqCcv3hN/j075N++RfLUS9ExctyeL8ekzF4Zuk2s6K+VrHqvrBinu203Ld6rRq8UERmy9wqwU1g/QGP37t06duyYJGnbtm2qrq7W/PnzJUn9+vXTjBkzAssuXbpUW7du1fPPP6+MjIzA9B07duiNN94IPLnqypUrgYu9Xn31VVufXPX3uGjCXvTXfvTYfvTYfj3+4qqu2Llzp7Zu3dps2qpVqyRJiYmJzYK3LVlZWYqOjta6deu0YsUKRUVFacKECVqwYIFt53cBAL1XWO/xdifs8XZ/9Nd+9Nh+9Nh+PDISAIAehOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMCgyFAX0BWffPKJ9u/fr8OHD+vEiRPy+/1avXp1h8Z44YUXVF5e3uq8V155RampqVaUCgCApDAP3g0bNujChQsaPXq06uvrdfbs2U6NExcXp+9973stpicnJ3e1RAAAmgnr4H3++eflcrnkdDq1aNGiTgdvbGyscnNzLa4OAICWwvocb1JSkpxOa34Fn8+ny5cvy+/3WzIeAACtCes9Xqu43W5997vflcfjUUxMjG6++WY9+OCDGjZsWKhLAwD0ML0+eJOSknT99ddr5MiRcjqdqqioUEFBgfbu3asXX3xRI0aMaHW9wsJCFRYWSpIWLVqklJQUS+qxahy0jv7ajx7bjx7bz84eO/whPrZ66dIlbdy4MejlZ82apf79+7eYvmjRIpWWlnb4qubW7N+/Xy+88IImTJigZ599tsvjBesXv/iFFi1aZOz1ehv6az96bD96bD+7exzyPd5Lly5pzZo1QS+fm5vbavBaKS0tTenp6SorK5PH41F0dLStrwcA6D1CHrxJSUmW7KVaLTExUWVlZbp48aISEhJCXQ4AoIcI66ua7XTixAlFRETYvnf9Tfn5+cZeqzeiv/ajx/ajx/azu8e9Jnhra2tVVVWl+vr6wLTLly/L5/O1WLa0tFQHDhzQTTfdZPQwM/+g7EV/7UeP7UeP7Wd3j0N+qLkrdu/erWPHjkmSampqJElr166VJPXr108zZswILPvuu+9q69atev7555WRkSFJ2rdvn5YvX67JkycrOTlZTqdThw4d0rZt29p8mhUAAF0R1sG7c+dObd26tdm0VatWSbp6jvabwdualJQUjRkzRqWlpTp37pwaGxs1ePBg3Xnnnbr//vs5twsAsFzIbycCAKA3Ces93nDn8/m0adMmFRYW6vTp04qPj1dWVpbmzZun2NjYUJfXI8ybN6/V6TExMVqxYoXhasLb+vXrdeTIER0+fFinTp1SYmKili5d2ubyFRUVWrlypSoqKuRwODR+/HgtWLBAo0aNMld0mOlIj5cuXdriiF+Tn/70p8rMzLSz1LBUXV2tbdu26YsvvlBNTY0aGhqUnJysrKwszZo1q8XnbnV1td555x3t379fXq9Xo0eP1rx58zRhwoQu1UHwhtDbb7+tzZs3a+rUqZozZ46qqqq0efNmHTlyRM8++6xlz6Hu7dLS0pSXl9dsWmQkm35Hvffee+rfv79Gjx6tS5cutbvswYMH9etf/1oJCQmBP37+8Ic/6LnnntNLL73U5hPheruO9LjJj370oxbTxo4da3VpPcKnn36qP/zhD5oyZYpycnIUERGhsrIyrVy5Ujt27NDLL78cuKC2pqZGzzzzjCIiInTvvfeqb9++2rJli15++WX98pe/1E033dTpOvj0CZHjx4+roKBAU6dO1VNPPRWYnpSUpGXLlqm4uFg5OTkhrLDnSEpK4tunLLBkyZLAV2X+7Gc/U11dXZvLLlu2TJGRkYHwlaTs7Gz967/+q5YvX65nnnnGSM3hpiM9bsK2HbzMzEzNnTtXffv2DUy76667NHToUK1bt05FRUWBa4PeffddXbp0Sa+++mrgKM0dd9yhn/70p/rd736nN954Qw6Ho1N1sEsVItu3b5ff79fs2bObTc/Ly1NMTIy2bdsWosp6Jq/XG9SHGNoW7PdT19TUqLKyUpmZmc0uUExISFBmZqb27t2rc+fO2VVmWOvMd4D7/f42b41Ec6mpqc1Ct0l2drYk6auvvpIk1dXV6c9//rMyMjKanRqJjY3V9OnTdeLECVVWVna6DvZ4Q6SyslIOh6PFIaHo6GiNGjVKhw4dClFlPU9JSYm2bdsmn8+n+Ph4ZWdn64EHHmj1HyC6rmnbHT9+fIt548aN06effqrDhw/rlltuMV1aj/Too4/qypUrioyMVFpamh544AGNGzcu1GWFlabvch84cKCkqwHc0NDQ6jbcNO3QoUOdPqRP8IaI2+1WfHy8oqKiWswbNGiQDhw4IK/Xy7nILho7dqwyMzM1ZMgQXblyRXv27FFBQYHKy8v10ksvcRGbDWprayWp1dvxmqa53W6jNfVEAwcO1OzZszVmzBjFxMTo2LFj2rRpk5577rkun4PsTXw+n9asWaOIiIjA6b2m7dOubZhP9RDxeDxthmrTyf36+nqCt4sWLlzY7Oc77rhDI0aM0MqVK7Vp0ybdf//9Iaqs52p6Olxrf1R+c9tG1yxYsKDZz1OnTlVOTo6efvppvfXWW1q8eHGIKgsv//Vf/6WKigo9+OCDga8C9Hg8klq/CLNpu25apjM4xxsi0dHR8nq9rc5rekNjYmJMltRr3HvvvYqMjFRpaWmoS+mRmrbbhoaGFvPYtu01dOhQZWVlqaamRtXV1aEup9tbuXKlCgoKlJ+fr7lz5wamN/2B2NpndNN23ZXHCRO8IZKQkKDz58+3+uFUW1uruLg49nZtEhkZqUGDBunChQuhLqVHGjRokKTWD8W1dwgP1khMTJQktu9rWL16tdatW6dvfetb+qd/+qdm89o7nGzFNkzwhkhqaqr8fn+Li6g8Ho+OHj2q1NTUEFXW83k8Hrndbg0YMCDUpfRITRecHDx4sMW8podpjBkzxnRZvcaJEyckie27He+//77WrFmj3NxcPf744y1uCxoxYoSioqJa3YabpnXlM5rgDZHs7Gw5HA5t3Lix2fQtW7aovr6ee3gt0NZf/KtWrVJjY6MmT55suKLeYciQIUpNTVVJSUmzPQa3262SkhJNmDAhcPUoOqeurq7Vc4xHjhxRSUmJhg0bpiFDhoSgsu5vzZo1ev/995Wbm6sf/vCHrT6oKDY2VpMnT1ZZWZmOHj0amF5XV6eioiINHTq0Sw8p4VhmiIwYMUJ33323CgoK9Prrr2vSpEmBJ1elp6cTvBZYu3atKioqlJGRIZfLpbq6Ou3Zs0dlZWUaN26cZs6cGeoSw8rnn3+u06dPS5LOnz8vr9cb+DawxMTEZg9yePTRR/XrX/9azz//fOCBBAUFBfL5fHrkkUfMFx8mgu1xTU2NFi5cqFtvvVVDhw4NXNX86aefyul06gc/+EHIfofurKCgQKtXr5bL5dKNN96oP/7xj83mDxw4MHA1+EMPPaS9e/fq5Zdf1uzZs9WnTx9t2bJFbrdbv/jFLzr98AyJL0kIKZ/Pp40bNwae1RwXF6fs7GzNnz+f21wssGvXLv3P//yPvvrqK128eFFOp1NDhgxRVlaW5syZY/S7lnuCF154QeXl5a3OS09P1wsvvNBs2sGDB5s9q/n666/Xgw8+yGHmdgTb43PnzmnFihWqrKxUbW2tPB6PBg4cqIyMDM2dO1fDhg0zWHX4aO/51lLL7fjrr7/Wu+++q/Ly8sCzmr/zne90+VYtghcAAIM4xwsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYxLcTAeiwr776Sv/2b/8W+Gq6e+65p9m3Di1evDjwzS9xcXF67bXXNHjw4JDUCnQ37PEC6LARI0bo0UcfDfz88ccf68svv5QkFRcXN/u6tSeeeILQBb6B4AXQKfn5+crKypIk+f1+LV26VNXV1XrrrbcCy8yaNUtTpkwJVYlAt8TXAgLotMuXL+vpp5/WqVOnJEmxsbGqq6uTJI0ZM0YvvfSSIiM5owV8E3u8ADqtb9+++vGPf6yIiAhJCoRunz599JOf/ITQBVpB8ALoknHjxmn8+PHNpt1yyy0aMmRIiCoCujeCF0CXfPLJJ9q/f3+zacXFxSotLQ1RRUD3RvAC6LTjx4/r7bffDvw8fPhwSf97sZXb7Q5VaUC3RfAC6BSPx6M33ngjcC9vRkaGXnnllUD4XrhwQUuWLJHP5wtlmUC3Q/AC6JTf//73On78uCSpX79+evLJJxUTE6N/+Zd/CVxUVVZWpnXr1oWyTKDbIXgBdFhxcbGKiooCP3//+9+Xy+WSJI0ePVrz588PzFuzZk2Lc8BAb8Z9vAAAGMQeLwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABv0/pzLTmF2zP6YAAAAASUVORK5CYII=\n", 108 | "text/plain": [ 109 | "
" 110 | ] 111 | }, 112 | "metadata": {}, 113 | "output_type": "display_data" 114 | } 115 | ], 116 | "source": [ 117 | "fig, axes = mk_fig()\n", 118 | "mask = y == -1\n", 119 | "colors = ['#E24A33', '#348ABD']\n", 120 | "\n", 121 | "axes.scatter(x[mask], y[mask], s=100, label='Class 0', zorder=2)\n", 122 | "axes.scatter(x[~mask], y[~mask], s=100, label='Class 1', zorder=2)\n", 123 | "\n", 124 | "w0 = np.array([0])\n", 125 | "w = minimize(lambda w: loss_lr(w, x[:, None], y), w0, jac=lambda w: loss_lr_grad(w, x[:, None], y)).x \n", 126 | "p = 1 / (1+np.exp(-w*x))\n", 127 | "axes.plot(x, p, '-k')" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 115, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "from sklearn.linear_model import LogisticRegression" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 120, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "lr = LogisticRegression(C=1e8)\n", 146 | "lr.fit(x[:, None], y);" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 121, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "array([[0.9953184]])" 158 | ] 159 | }, 160 | "execution_count": 121, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "lr.coef_" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 122, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | "array([-1., 1., -1., -1., 1., -1., -1., -1., -1., -1., 1., 1., -1.,\n", 178 | " 1., 1., -1., 1., 1., 1., -1.])" 179 | ] 180 | }, 181 | "execution_count": 122, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "lr.predict(x[:, None])" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 123, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "array([[9.93534766e-01, 6.46523442e-03],\n", 199 | " [2.81490423e-01, 7.18509577e-01],\n", 200 | " [8.85833853e-01, 1.14166147e-01],\n", 201 | " [7.41457200e-01, 2.58542800e-01],\n", 202 | " [2.81490423e-01, 7.18509577e-01],\n", 203 | " [9.93534766e-01, 6.46523442e-03],\n", 204 | " [9.99955118e-01, 4.48823176e-05],\n", 205 | " [9.99955118e-01, 4.48823176e-05],\n", 206 | " [9.99878576e-01, 1.21423660e-04],\n", 207 | " [9.54531158e-01, 4.54688420e-02],\n", 208 | " [1.93972562e-02, 9.80602744e-01],\n", 209 | " [9.97768505e-04, 9.99002231e-01],\n", 210 | " [5.14555334e-01, 4.85444666e-01],\n", 211 | " [1.26485284e-01, 8.73514716e-01],\n", 212 | " [5.08002779e-02, 9.49199722e-01],\n", 213 | " [5.14555334e-01, 4.85444666e-01],\n", 214 | " [1.26485284e-01, 8.73514716e-01],\n", 215 | " [1.36421087e-04, 9.99863579e-01],\n", 216 | " [3.69013075e-04, 9.99630987e-01],\n", 217 | " [5.14555334e-01, 4.85444666e-01]])" 218 | ] 219 | }, 220 | "execution_count": 123, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "lr.predict_proba(x[:, None])" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "#### Create and save animations" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 4, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "fig, axes = mk_fig()\n", 243 | "mask = y == 0\n", 244 | "colors = ['#E24A33', '#348ABD']\n", 245 | "\n", 246 | "def init():\n", 247 | " axes.scatter(X[mask, 0], X[mask, 1], s=100, c='#E24A33', label='Class 0', zorder=2)\n", 248 | " axes.scatter(X[~mask, 0], X[~mask, 1], s=100, c='#348ABD', label='Class 1', zorder=2)\n", 249 | " axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n", 250 | " axes.legend(facecolor='#F0F0F0', framealpha=1)\n", 251 | "\n", 252 | "def animate(i):\n", 253 | " if i == 0:\n", 254 | " axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n", 255 | " if i == 1:\n", 256 | " k = closest_node(xq, X, i)\n", 257 | " axes.plot([xq[0], X[k,0]],\n", 258 | " [xq[1], X[k,1]],\n", 259 | " 'k-', zorder=1)\n", 260 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n", 261 | " elif i % 2 == 1:\n", 262 | " k = closest_node(xq, X, i)\n", 263 | " axes.plot([np.repeat(xq[0], 2), X[k[-2:],0]],\n", 264 | " [np.repeat(xq[1], 2), X[k[-2:],1]],\n", 265 | " 'k-', zorder=1)\n", 266 | " if sum(y[k] == 0) > sum(y[k] == 1):\n", 267 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[0], edgecolor='k', lw=2, zorder=2)\n", 268 | " elif sum(y[k] == 1) > sum(y[k] == 0):\n", 269 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[1], edgecolor='k', lw=2, zorder=2)\n", 270 | " else: # if equal counts, set to closest point's color\n", 271 | " axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n", 272 | " \n", 273 | "plt.close(fig)\n", 274 | "ani = animation.FuncAnimation(fig,\n", 275 | " animate,\n", 276 | " init_func=init,\n", 277 | " frames=10,\n", 278 | " interval=600)\n", 279 | "ani.save('../gif/knn/knn.gif', writer='imagemagick', fps=1, dpi=75)\n", 280 | "# HTML(ani.to_jshtml())" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "#### View animations" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "Image(url='../gif/knn/knn.gif')" 297 | ] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python 3", 303 | "language": "python", 304 | "name": "python3" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.7.4" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 2 321 | } 322 | -------------------------------------------------------------------------------- /ml-deploy-model/README.md: -------------------------------------------------------------------------------- 1 | # A data scientist's guide to deploying machine learning models 2 | 3 | ![](https://img.shields.io/badge/-tutorial-informational) 4 | ![](https://img.shields.io/badge/-machine--learning-important) 5 | ![](https://img.shields.io/badge/-aws-lightgrey) 6 | 7 |

8 | 9 |

10 | 11 | The aim of this repository is to provide a simple guide to deploying machine learning (ML) models for data scientists familiar with machine learning in a local environment, but interested in learning how to deploy their models. Deployment refers to the act of making your ML model available in a production environment, where it can be accessed and utilised by other software. 12 | 13 | Perhaps surprisingly, deployment is a process that is quite unfamiliar to many data scientists - in large part due to the need for some level of familiarity with software engineering. Fortunately, there are many tools available to help us data scientists with deploying our models. This repository focuses on currently and commonly used tools for ML deployment and is overwhelmingly practical, aiming to provide you with a useful overview of these tools and a foundation for using and expanding upon them in future. Here is a current list of tutorials, click a link to get started (to follow these tutorials, I recommend cloning this repository to your local machine): 14 | 15 | 1. [Building and deploying a machine learning model with Amazon Sagemaker](deploy-with-sagemaker.ipynb) 16 | 2. [Deploying a machine learning model with Flask and Heroku](deploy-with-flask.ipynb) 17 | 18 | ### My recommendations for deploying ML models 19 | 20 | Having gone through the process of deploying a ML model via different methods/tools, I have a slight preference for Amazon SageMaker. Not only is it highly scalable, but, in addition, everything related to your model/service can be kept in one place: data in S3, notebooks in SageMaker, APIs in API Gateway, etc. However, in saying that, Flask has its use cases too, not only does it provide an easy method of developing a web application (not just an endpoint), but it is free and I found it slightly easier to learn than SageMaker. As always with ML, there's no free lunch and the right tool for the job depends on the job itself. 21 | -------------------------------------------------------------------------------- /ml-deploy-model/data/README.md: -------------------------------------------------------------------------------- 1 | This data is modified after that provided by the UCI Machine Learning Repository, available [here](https://archive.ics.uci.edu/ml/datasets/abalone). 2 | 3 | The data has been modified from the original, namely, the "Sex" characteristic has been one-hot-encoded and the "rings" target variable has had +1.5 added to it to represent the abalone age in years. It has been shuffled, one-hot-encoded and split into training and validation sets. The training set has 3341 rows and the validation set has 836 rows. 4 | 5 | This preprocessing and splitting can be reproduced using the [build_model.ipynb](../deploy-with-flask/build_model.ipynb) in this repository. 6 | -------------------------------------------------------------------------------- /ml-deploy-model/data/column_names.csv: -------------------------------------------------------------------------------- 1 | age,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,sex_I,sex_M -------------------------------------------------------------------------------- /ml-deploy-model/data/raw/preprocessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | model_data = pd.get_dummies(pd.read_csv('abalone.csv'), 5 | drop_first=True) 6 | 7 | model_data[['age']] = model_data[['rings']] + 1 8 | model_data = model_data.drop(columns="rings") 9 | train_data, validation_data = np.split(model_data.sample( 10 | frac=1, random_state=123), [int(0.8 * len(model_data))]) 11 | 12 | pd.concat([train_data['age'], train_data.drop(['age'], axis=1)], 13 | axis=1).to_csv('../abalone_train.csv', index=False, header=False) 14 | pd.concat([validation_data['age'], validation_data.drop(['age'], axis=1)], axis=1).to_csv( 15 | '../abalone_validation.csv', index=False, header=False) 16 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A tutorial for deploying a model with Flask, Docker and Heroku" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Deployment refers to the act of making your machine learning model available in a production environment, where it can be accessed and utilised by other tools, workflows and software. Deployment is typically one of the last stages in the machine learning workflow and can be one of the most difficult.\n", 15 | "\n", 16 | "Flask is a web framework for Python, meaning that it provides functionality for building APIs and web applications. In this tutorial, we will explore:\n", 17 | "\n", 18 | "1. using Flask to create a simple API to interface with a machine leanring model; and,\n", 19 | "2. using Flask to create a simple web application that integrates our API with some basic html.\n", 20 | "\n", 21 | "The aim of this tutorial is to introduce you to deploying machine learning models with Flask. I will not be giving an in-depth introduction to Flask here, I only intend to show how easy it is to deploy a model with Flask and to provide a foundation for which you can build off of to deploy your models in efficient and creative ways in the future.\n", 22 | "\n", 23 | "We'll be using SageMaker’s implementation of the XGBoost algorithm to train and host a regression model to predict the age of abalone based on the classic abalone dataset hosted [here](https://archive.ics.uci.edu/ml/datasets/abalone). We aim to predict the age of abalone based on eight physical measurements. The data provided in the tutorial has been modified from the original, namely, the \"Sex\" characteristic has been one-hot-encoded and the \"rings\" target variable has had +1.5 added to it to represent the abalone age in years. The data is located in the [data folder](./data). It has been shuffled, one-hot-encoded and split into training and validation sets for you. The training set has 3341 rows and the validation set has 836 rows. The data looks like this:\n", 24 | "\n", 25 | "\n", 26 | "\n", 27 | "If you're interested in other ways of deploying your machine learning model, check out [my other tutorial using Amazon SageMaker](deploy-with-sagemaker.ipynb)." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# Contents\n", 35 | "\n", 36 | "1. [Requirements](#1)\n", 37 | "2. [Preparing the model we wish to deploy](#2)\n", 38 | "3. [Setting up your directory structure and environment](#3)\n", 39 | "4. [Model deployment](#4)\n", 40 | "5. [Building and deploying a web API](#5)\n", 41 | "6. [Building and deploying a web application](#6)\n", 42 | "7. [End and next steps](#7)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 1. Requirements " 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "To complete this tutorial, you'll need the following:\n", 57 | "\n", 58 | "- Heroku account. Register [here](https://www.heroku.com/).\n", 59 | "- Heroku CLI. Download [here](https://devcenter.heroku.com/categories/command-line).\n", 60 | "- The Postman app and a free account. Download/register [here](https://www.postman.com/)." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "# 2. Preparing the model we wish to deploy " 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "In contrast to the [Amazon SageMaker tutorial](deploy-with-sagemaker.ipynb) where we trained and built a model using SageMaker, here we will develop the model we wish to deploy locally. The notebook [build_model.ipynb](flask/build_model.ipynb) in this repository builds a simple Random Forest regression model using the abalone dataset. To simplify things a little, this model is trained on only four input features: `['length', 'diameter', 'height', 'whole_weight']` as the four-feature model did not seem to be too much worse than the full-featured model for the purpose of this tutorial:\n", 75 | "\n", 76 | "\n", 77 | "\n", 78 | "The [build_model.ipynb](flask/build_model.ipynb) notebook saves the trained model using `joblib` into the appropriate directory location (described in the next section)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "# 3. Setting up your directory structure and environment " 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "We'll be needing a specific directory structure to help us easily deploy our machine learning model. As I'll discuss in the next section, we have two options for deploying our model: 1. as a web api service; or, 2. as a web application. The directory structure (provided in this repository in the [deploy-with-flask directory](deploy-with-flask)) that we need to follow looks like this:\n", 93 | "\n", 94 | "```shell\n", 95 | "flask\n", 96 | "├── build_model.ipynb # this notebook contains the model building code\n", 97 | "├── web_api\n", 98 | "│ └── abalone_predictor.joblib # this is the machine learning model we have built locally\n", 99 | "│ └── app.py # the file that defines our flask API\n", 100 | "│ └── Procfile # required by Heroku to help start flask app\n", 101 | "│ └── requirements.txt # file containing required packages\n", 102 | "│ \n", 103 | "└── web_application\n", 104 | " └── abalone_predictor.joblib # this is the machine learning model we have built locally\n", 105 | " └── app.py # the file that defines our flask API\n", 106 | " └── Procfile # required by Heroku to help start flask app\n", 107 | " └── requirements.txt # file containing required packages\n", 108 | " └── templates # this subdirectory contains HTML templates to help us build the web application\n", 109 | " │ └── style.css # css template to be used in web application\n", 110 | " └── static # this subdirectory contains CSS style sheets\n", 111 | " └── home.html # html template to be used in web application\n", 112 | " └── prediction.html # html template to be used in web application\n", 113 | "```" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "At this point, you should also set up your development environment. I've provided a [`requirements.txt`](deploy-with-flask/web_api/requirements.txt) file in the repository. I recommend creating a new virtual environment (I use conda, so: `$ conda create -n python=3.6`) and to then install the required packages from `requirements.txt` using `pip install -r requirements.txt`." 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "# 4. Model deployment " 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "As mentioned previously, we have two options for deploying our abalone prediction model. We can:\n", 135 | "\n", 136 | "1. develop a RESTful web API that accepts HTTP requests in the form of input data and returns a prediction;\n", 137 | "2. build a web application with a HTML user-interface that interacts directly with our API.\n", 138 | "\n", 139 | "We'll explore both options below." 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "# 5. Building and deploying a web API " 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "It is extremely easy to create a RESTful API with Python and Flask. We already have the model we wish to deploy, we just need to create an API that allows users to access our model - by \"access\" I mean, we want users to be able to send data to our model and to receive a prediction in return." 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "## 5.1 Building the Flask API" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "All we need to create our API is a single Python file named `app.py`. This file is located in the [`web_api`](deploy-with-flask/web_api) folder in this repository. This tutorial is not a tutorial on how to use Flask, rather, I want to show you how you can easily deploy a model with the help of Flask. There are many good online resources for learning about Flask - as a starter, I highly recommend the free [Flask tutorial video series by Corey Schafer](https://www.youtube.com/playlist?list=PL-osiE80TeTs4UjLw5MM6OjgkjFeUxCYH). With that said, let's open up `app.py` and briefly discuss what's going on in the file.\n", 168 | "\n", 169 | "\n", 170 | "" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "The `app.py` module is extremely simple. Each section of the code is numbered and described below:\n", 178 | "\n", 179 | "1. We first create an instance of the Flask class, every Flask application will have this line.\n", 180 | "2. We then paste in a Python function that accepts as input our trained machine learning model and some input data and return the model prediction.\n", 181 | "3. We then load up our pre-trained model.\n", 182 | "4. The `@` symbol denotes a decorator. You don't need to know too much about decorators to understand what's going on here. Basically, we are defining our home page and populating it with some basic HTML text.\n", 183 | "5. We the define a new route at the URL `/predict` which will accept json POST requests, make a prediction with our previosuly defined prediction function, and then return the result.\n", 184 | "6. This piece of Python code simply allows us to start running our flask application by directly invoking the module with python from the command line - let's do that now!" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "## 5.2 Testing the Flask API" 192 | ] 193 | }, 194 | { 195 | "attachments": {}, 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "Open up a terminal and `cd` to the location of `app.py`. Then, type `python app.py`. You should see something like the following.\n", 200 | "\n", 201 | "\n", 202 | "\n", 203 | "Copy-and-paste the URL `http://127.0.0.1:5000/` into your browser of choice (this is the IP address of your local machine followed by the port, 5000, that Flask runs on by default).\n", 204 | "\n", 205 | "\n", 206 | "\n", 207 | "Great, our Flask app is up and running!\n", 208 | "\n", 209 | "We can open up Postman to make sure that we can send JSON POST requests to our app and receive a prediction in return. To do that:\n", 210 | "\n", 211 | "1. Open up Postman on your computer.\n", 212 | "2. Click \"Create a request\".\n", 213 | "3. Change the request to a \"POST\" request.\n", 214 | "4. Enter the URL `http://127.0.0.1:5000/predict`.\n", 215 | "5. Click the \"Body\" tab, click the \"raw\" radio button, and from the drop-down choose \"JSON\".\n", 216 | "6. Paste the following into the body (feel free to change the numbers if you like):\n", 217 | "\n", 218 | "```\n", 219 | "{\"length\": 0.41,\n", 220 | "\"diameter\": 0.33,\n", 221 | "\"height\": 0.10,\n", 222 | "\"whole_weight\": 0.36}\n", 223 | "```\n", 224 | "\n", 225 | "7. Click \"Send\". You should receive a prediction back (in my case, it was 9.14).\n", 226 | "\n", 227 | "" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "## 5.3 Deploying the API" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "Okay, so we have a working API, we now want to deploy it to the web so others can send requests. We will use Heroku to deploy our app but you could also use other services such as AWS.\n", 242 | "\n", 243 | "1. Head over to [Heroku](https://dashboard.heroku.com/), log-in, and click \"Create new app\".\n", 244 | "2. Choose a unique name for your app.\n", 245 | "\n", 246 | "\n", 247 | "\n", 248 | "3. We will be using the Heroku CLI to deploy our model. All we have to do is follow the simple instructions provided (note that for more complex applications, you may choose to containerize everything in a Docker container to deploy to Heroku).\n", 249 | "\n", 250 | "\n", 251 | "\n", 252 | "4. If you follow those commands, you should eventually see something like the following message verifying that your flask app has been deployed:\n", 253 | "\n", 254 | "\n", 255 | "\n", 256 | "5. Your app is now live on the web and anyone can send API requests to it! Let's give it a try in Postman. Open up Postman and repeat the steps outlined above except now with the url `https://my-abalone-predictor.herokuapp.com/predict`. If you sent your request correctly, you should receive a model prediction in return. Awesome!\n", 257 | "\n", 258 | "" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "# 6. Building and deploying a web application " 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "In section 5, we deployed our model as an endpoint that can receive JSON requests and return a prediction. Great! However, Flask has the ability to create entire web applications, not just a simple API, and I want to briefly introduce that functionality here. We only need to refactor our code a little bit and link it up with some html and css to create our web application." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "We will use Flask to create a html form, accept data submitted to the form, and return a prediction using the submitted data. I won't go into too much detail here, I just want to show you what's possible and give you a foundation to build off. Let's open up our web application's [`app.py`](deploy-with-flask/web_application/app.py) file and go through the code step-by-step:\n", 280 | "\n", 281 | "" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "1. We'll be using `wtforms` and `flask_wtf` to help us build our form so we need to add those to our import list. We're also importing a few useful modules from `flask` itself to help us build our web app.\n", 289 | "2. We create an instance of the Flask class and we also create a `SECRET_KEY` which basically allows us to store and use information specific to a user in a session (more on that [here](https://flask.palletsprojects.com/en/1.1.x/quickstart/#sessions)).\n", 290 | "3. We again define our prediction function.\n", 291 | "4. Load up our pre-trained model.\n", 292 | "5. We now construct a simple form - there is an input for each of our input features, as well as a submit button.\n", 293 | "6. We want our home page to actually return the form we just created. So we instantiate a form, we validate it (check that each field has some data), and we then redirect the user to a page \"prediction\" where results will be displayed. The home page will be rendered with the help of the `home.html` file located [here](deploy-with-flask/web_application/templates/home.html) and we are passing the `form` to the template so we can use it in the rendering of the page. \n", 294 | "7. The \"prediction\" page will store the input data as a dictionary and pass it to our model predict function. The page is rendered wit the help of the `prediction.html` file located [here](deploy-with-flask/web_application/templates/prediction.html), and uses the `results` output by our model prediction function.\n", 295 | "\n", 296 | "With that done, let's test out our application." 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "## 6.2 Testing the web application" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "1. Open up a terminal and `cd` to the location of our web application's `app.py` file.\n", 311 | "2. Type `python app.py` and then copy and paste the URL `http://127.0.0.1:5000/` into your browser. You should see something like the following.\n", 312 | "\n", 313 | "\n", 314 | "\n", 315 | "3. Our web application is working! Let's try and make a prediction:\n", 316 | "\n", 317 | "\n", 318 | "\n", 319 | "\n", 320 | "\n", 321 | "4. Looks like our predictions are working too!\n", 322 | "\n", 323 | "This application is of course extremely simple and Flask is capable of building much more sophisticated web applications (have a look at the [docs](https://flask.palletsprojects.com/en/1.1.x/)), but hopefully this has given you a taste and some ideas as to what's possible with deploying your machine learning model as an application." 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "## 6.3 Deploying the web application" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "We now have a working application, let's deploy it to the web using Heroku.\n", 338 | "\n", 339 | "1. Head over to [Heroku](https://dashboard.heroku.com/), log-in, and click \"Create new app\".\n", 340 | "2. Choose a unique name for your app.\n", 341 | "\n", 342 | "\n", 343 | "\n", 344 | "3. We will again be using the Heroku CLI to deploy our model. Once again, follow the simple instructions provided by Heroku to deploy your web application.\n", 345 | "\n", 346 | "\n", 347 | "\n", 348 | "\n", 349 | "\n", 350 | "4. If you follow those commands, you should eventually see a message verifying that your web application has been deployed!\n", 351 | "5. Open up the provided URL and share your application with the world!" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "# 7. End and next steps " 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "Congratulations! You just deployed a model using Flask and Heroku. Hopefully this tutorial gave you some insight into how a machine leanring model can be deployed using these tools and how you might be able to expand upon the concepts presented to quickly and creatively deploy your models!\n", 366 | "\n", 367 | "I recommend checking out the [Flask docs](https://flask.palletsprojects.com/en/1.1.x/) to learn more about Flask. There is also an excellent and thorough [Flask tutorial by Miguel Grinberg](https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-world) that you might find useful." 368 | ] 369 | } 370 | ], 371 | "metadata": { 372 | "kernelspec": { 373 | "display_name": "Python 3", 374 | "language": "python", 375 | "name": "python3" 376 | }, 377 | "language_info": { 378 | "codemirror_mode": { 379 | "name": "ipython", 380 | "version": 3 381 | }, 382 | "file_extension": ".py", 383 | "mimetype": "text/x-python", 384 | "name": "python", 385 | "nbconvert_exporter": "python", 386 | "pygments_lexer": "ipython3", 387 | "version": "3.7.4" 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 4 392 | } 393 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/build_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Build abalone age predictor" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This short and sweet notebook documents the steps taken to build the abalone age prediction model we will deploy using Flask and Heroku." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Imports" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import joblib\n", 31 | "import pandas as pd\n", 32 | "from sklearn.ensemble import RandomForestRegressor\n", 33 | "from sklearn.metrics import mean_absolute_error" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Load data" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "train_df = pd.read_csv('../data/abalone_train.csv',\n", 50 | " names = ['age', 'length', 'diameter', 'height',\n", 51 | " 'whole_weight', 'shucked_weight', 'viscera_weight',\n", 52 | " 'shell_weight', 'sex_I', 'sex_M'])\n", 53 | "valid_df = pd.read_csv('../data/abalone_validation.csv',\n", 54 | " names = ['age', 'length', 'diameter', 'height',\n", 55 | " 'whole_weight', 'shucked_weight', 'viscera_weight',\n", 56 | " 'shell_weight', 'sex_I', 'sex_M'])" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "### Build model" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Using all of the features:" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "MAE = 1.52 years\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df.drop(columns='age'),\n", 88 | " train_df['age'])\n", 89 | "predicted_age = model.predict(valid_df.drop(columns='age'))\n", 90 | "mae = mean_absolute_error(predicted_age, valid_df['age'])\n", 91 | "print(f\"MAE = {mae:.2f} years\")" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "Using only the features `['age', 'length', 'diameter', 'height']`:" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "MAE = 1.88 years\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "features = ['length', 'diameter', 'height', 'whole_weight']\n", 116 | "model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df[features],\n", 117 | " train_df['age'])\n", 118 | "predicted_age = model.predict(valid_df[features])\n", 119 | "mae = mean_absolute_error(predicted_age, valid_df['age'])\n", 120 | "print(f\"MAE = {mae:.2f} years\")" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "### Save model" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "I'm happy enough with the performance of the reduced-feature model on the validation data. So I'll now re-fit the model on the full dataset to get it ready for deployment. Then save model using joblib." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "features = ['length', 'diameter', 'height', 'whole_weight']\n", 144 | "full_X = pd.concat((train_df[features], valid_df[features]))\n", 145 | "full_y = pd.concat((train_df['age'], valid_df['age']))\n", 146 | "model = RandomForestRegressor(n_estimators=100).fit(full_X,\n", 147 | " full_y)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "Save to both the web_api and web_application folders:" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 6, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "with open('web_api/abalone_predictor.joblib', 'wb') as f:\n", 164 | " joblib.dump(model, f)\n", 165 | "with open('web_application/abalone_predictor.joblib', 'wb') as f:\n", 166 | " joblib.dump(model, f)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Prediction function" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "Here we will define a function that accepts input data and returns a prediction. We will use this function to develop our web API and web application using Flask." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 7, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "input_json = {'length': 0.41,\n", 190 | " 'diameter': 0.33,\n", 191 | " 'height': 0.10,\n", 192 | " 'whole_weight': 0.36}" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 8, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "def return_prediction(model, input_json):\n", 202 | " \n", 203 | " input_data = [[input_json[k] for k in input_json.keys()]]\n", 204 | " prediction = model.predict(input_data)[0]\n", 205 | " \n", 206 | " return prediction" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 9, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "9.14" 218 | ] 219 | }, 220 | "execution_count": 9, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "return_prediction(model, input_json)" 227 | ] 228 | } 229 | ], 230 | "metadata": { 231 | "kernelspec": { 232 | "display_name": "Python 3", 233 | "language": "python", 234 | "name": "python3" 235 | }, 236 | "language_info": { 237 | "codemirror_mode": { 238 | "name": "ipython", 239 | "version": 3 240 | }, 241 | "file_extension": ".py", 242 | "mimetype": "text/x-python", 243 | "name": "python", 244 | "nbconvert_exporter": "python", 245 | "pygments_lexer": "ipython3", 246 | "version": "3.7.4" 247 | } 248 | }, 249 | "nbformat": 4, 250 | "nbformat_minor": 2 251 | } 252 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_api/Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:app -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_api/abalone_predictor.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/deploy-with-flask/web_api/abalone_predictor.joblib -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_api/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | import joblib 3 | 4 | # 1. create an instance of the Flask class 5 | app = Flask(__name__) 6 | 7 | # 2. define a prediction function 8 | def return_prediction(model, input_json): 9 | 10 | input_data = [[input_json[k] for k in input_json.keys()]] 11 | prediction = model.predict(input_data)[0] 12 | 13 | return prediction 14 | 15 | # 3. load our abalone age predictor model 16 | model = joblib.load('abalone_predictor.joblib') 17 | 18 | # 4. set up our home page 19 | @app.route("/") 20 | def index(): 21 | return """ 22 |

Welcome to our abalone prediction service

23 | To use this service, make a JSON post request to the /predict url with the following fields: 24 |
    25 |
  • length
  • 26 |
  • diameter
  • 27 |
  • height
  • 28 |
  • whole_weight
  • 29 |
30 | """ 31 | 32 | # 5. define a new route which will accept POST requests and return our model predictions 33 | @app.route('/predict', methods=['POST']) 34 | def abalone_prediction(): 35 | content = request.json 36 | results = return_prediction(model, content) 37 | return jsonify(results) 38 | 39 | # 6. allows us to run flask using $ python app.py 40 | if __name__ == '__main__': 41 | app.run() 42 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_api/default.profraw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/deploy-with-flask/web_api/default.profraw -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_api/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2020.4.5.1 2 | click==7.1.1 3 | Flask==1.1.2 4 | Flask-WTF==0.14.3 5 | gunicorn==20.0.4 6 | itsdangerous==1.1.0 7 | Jinja2==2.11.3 8 | joblib==0.14.1 9 | MarkupSafe==1.1.1 10 | numpy==1.18.2 11 | scikit-learn==0.22.2 12 | scipy==1.4.1 13 | Werkzeug==1.0.1 14 | WTForms==2.2.1 15 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:app -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/abalone_predictor.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/deploy-with-flask/web_application/abalone_predictor.joblib -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/app.py: -------------------------------------------------------------------------------- 1 | # 1. Imports 2 | from flask import Flask, render_template, session, url_for, redirect 3 | from flask_wtf import FlaskForm 4 | from wtforms import TextField, SubmitField 5 | import joblib 6 | 7 | # 2. create an instance of the Flask class 8 | app = Flask(__name__) 9 | app.config['SECRET_KEY'] = 'asecretkey' 10 | 11 | # 3. define a prediction function 12 | def return_prediction(model, input_json): 13 | 14 | input_data = [[input_json[k] for k in input_json.keys()]] 15 | prediction = model.predict(input_data)[0] 16 | 17 | return prediction 18 | 19 | # 4. load our abalone age predictor model 20 | model = joblib.load('abalone_predictor.joblib') 21 | 22 | # 5. create a WTForm Class 23 | class PredictForm(FlaskForm): 24 | 25 | length = TextField("Shell length") 26 | diameter = TextField("Shell diameter") 27 | height = TextField("Shell height") 28 | whole_weight = TextField("Whole weight") 29 | submit = SubmitField("Predict") 30 | 31 | # 6. set up our home page 32 | @app.route("/", methods=["GET", "POST"]) 33 | def index(): 34 | 35 | # Create instance of the form 36 | form = PredictForm() 37 | 38 | # Validate the form 39 | if form.validate_on_submit(): 40 | session['length'] = form.length.data 41 | session['diameter'] = form.diameter.data 42 | session['height'] = form.height.data 43 | session['whole_weight'] = form.whole_weight.data 44 | return redirect(url_for("prediction")) 45 | 46 | return render_template('home.html', form=form) 47 | 48 | # 7. define a new "prediction" route that processes form input and returns a model prediction 49 | @app.route('/prediction') 50 | def prediction(): 51 | 52 | content = {} 53 | content['length'] = float(session['length']) 54 | content['diameter'] = float(session['diameter']) 55 | content['height'] = float(session['height']) 56 | content['whole_weight'] = float(session['whole_weight']) 57 | results = return_prediction(model, content) 58 | return render_template('prediction.html', results=results) 59 | 60 | # 8. allows us to run flask using $ python app.py 61 | if __name__ == '__main__': 62 | app.run() 63 | 64 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2020.4.5.1 2 | click==7.1.1 3 | Flask==1.1.2 4 | Flask-WTF==0.14.3 5 | gunicorn==20.0.4 6 | itsdangerous==1.1.0 7 | Jinja2==2.11.2 8 | joblib==0.14.1 9 | MarkupSafe==1.1.1 10 | numpy==1.18.2 11 | scikit-learn==0.22.2 12 | scipy==1.4.1 13 | Werkzeug==1.0.1 14 | WTForms==2.2.1 15 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/static/style.css: -------------------------------------------------------------------------------- 1 | 2 | body { 3 | background-color: rgb(238, 238, 238); 4 | color: rgb(10, 9, 34); 5 | } 6 | 7 | label { 8 | width: 150px; 9 | } 10 | 11 | input { 12 | width: 100px; 13 | } 14 | 15 | #submit { 16 | background-color: rgb(53, 157, 53); 17 | width: 100px; 18 | margin-left: 154px; 19 | font-weight: bold; 20 | } -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/templates/home.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Abalone Prediction Service 13 | 14 | 15 | 16 |
17 |

Abalone Age Predictor

18 |

Please enter your abalone measurements below:

19 |
20 | {# This hidden_tag is a CSRF security feature. #} 21 | {{ form.hidden_tag() }} 22 | {{ form.length.label }} {{form.length}} 23 |
24 | {{ form.diameter.label }} {{form.diameter}} 25 |
26 | {{ form.height.label }} {{form.height}} 27 |
28 | {{ form.whole_weight.label }} {{form.whole_weight}} 29 |
30 | {{ form.submit() }} 31 |
32 |
33 | 34 | 35 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-flask/web_application/templates/prediction.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Abalone Prediction Service 13 | 14 | 15 | 16 |
17 |

Thank You. Here is the information you gave:

18 |
    19 |
  • Shell length: {{session['length']}}
  • 20 |
  • Shell diameter: {{session['diameter']}}
  • 21 |
  • Shell height: {{session['height']}}
  • 22 |
  • Whole weight: {{session['whole_weight']}}
  • 23 |
24 |

Your predicted abalone age is: {{results}} years

25 |
26 | 27 | 28 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-sagemaker.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A tutorial for: building deploying a model on Amazon Sagemaker" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Deployment refers to the act of making your machine learning model available in a production environment, where it can be accessed and utilised by other tools, workflows and software. Deployment is typically one of the last stages in the machine learning workflow and can be one of the most difficult.\n", 15 | "\n", 16 | "This is where [Amazon SageMaker](https://aws.amazon.com/sagemaker/) comes in. SageMaker is a relatively new Amazon service that supports all of the steps of a machine learning model development: data labelling, model building, training, optimization, and deployment. You can choose to use all or any combination of these key features in SageMaker. In addition, SageMaker is based on Jupyter notebooks (which are familiar to most data scientists these days), comes with many built-in state-of-the-art algorithms, and provides a host of ready-to-use examples to get you up and running quickly.\n", 17 | "\n", 18 | "In this tutorial I'll walk you through building and deploying a machine learning model using SageMaker. While the SageMaker docs are quite good, I still found it a little difficult to get going on SageMaker - so this tutorial provides a simple walkthrough of using SageMaker for model buidling and deployment.\n", 19 | "\n", 20 | "We'll be using SageMaker’s implementation of the XGBoost algorithm to train and host a regression model to predict the age of abalone based on the classic abalone dataset hosted [here](https://archive.ics.uci.edu/ml/datasets/abalone). We aim to predict the age of abalone based on eight physical measurements. The data provided in the tutorial has been modified from the original, namely, the \"Sex\" characteristic has been one-hot-encoded and the \"rings\" target variable has had +1.5 added to it to represent the abalone age in years. The data is located in the [data folder](./data). It has been shuffled, one-hot-encoded and split into training and validation sets for you. The training set has 3341 rows and the validation set has 836 rows. The data looks like this:\n", 21 | "\n", 22 | "\n", 23 | "\n", 24 | "If you're interested in other ways of deploying your machine learning model, check out [my other tutorial using Flask](deploy-with-flask.ipynb)." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Contents\n", 32 | "\n", 33 | "1. [Requirements](#1)\n", 34 | "2. [Preparing the data](#2)\n", 35 | "3. [Setting up SageMaker](#3)\n", 36 | "4. [Building and deploying the model](#4)\n", 37 | "5. [Using the model](#5)\n", 38 | "6. [End](#6)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# 1. Requirements " 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "To complete this tutorial, you'll need the following:\n", 53 | "\n", 54 | "- An AWS account. Register [here](https://console.aws.amazon.com/).\n", 55 | "- The Postman app and a free account. Download/register [here](https://www.postman.com/)." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "# 2. Preparing the data " 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Many of the Amazon SageMaker examples provided in the docs use data that are downloaded from online sources, but I wanted to emulate the process of using a custom dataset in this tutorial. To make the data available to SageMaker we need to host it in an S3 bucket:\n", 70 | "\n", 71 | "1. Head over to AWS, log in, and search for S3:\n", 72 | "\n", 73 | "\n", 74 | "\n", 75 | "2. Choose \"Create Bucket\".\n", 76 | "3. Provide a globally unique name for your bucket. I named mine \"deploy-tutorial-tb\".\n", 77 | "4. Leave remaining settings as default and click \"Create\" in the lower left. You've now created a storage bucket to hold your data.\n", 78 | "\n", 79 | "\n", 80 | "\n", 81 | "5. Click on the bucket you just created and then click \"Upload\" on the top left corner.\n", 82 | "6. Drag and drop into the upload prompt the two data files provided with this repository, called \"abalone_train.csv\" and \"abalone_validation.csv\". Then click \"Upload\" in the lower left corner to complete the upload.\n", 83 | "\n", 84 | "\n", 85 | "\n", 86 | "7. You will see the data now in the bucket. Your data is now ready to be accessed by SageMaker.\n", 87 | "\n", 88 | "" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# 3. Setting up SageMaker " 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "We will now prepare SageMaker to build and deploy a machine learning model.\n", 103 | "\n", 104 | "1. Head back to the main AWS dashboard and search for SageMaker.\n", 105 | "\n", 106 | "\n", 107 | "\n", 108 | "2. Click \"Notebook instances\" in the panel on the left side of the screen. Then click \"Create notebook instance\" in top right of the screen.\n", 109 | "\n", 110 | "\n", 111 | "\n", 112 | "3. Give your notebook instance a name, I called mine \"deploy-tutorial-tb\".\n", 113 | "4. Scroll down and in the IAM role field select \"Create a new role\" from the drop down. In the pop-up, select \"Any S3 bucket\" and then click \"Create role\" at the bottom right (feel free to specify only a specific bucket, the one you created previously, if you wish).\n", 114 | "\n", 115 | "\n", 116 | "\n", 117 | "5. Leave all remaining fields as default, scroll to the bottom of the page and click \"Create notebook instance\".\n", 118 | "6. Wait a few minutes for status of your newly created notebook to change from \"Pending\" to \"InService\".\n", 119 | "\n", 120 | "\n", 121 | "\n", 122 | "7. Click \"Open Jupyter\" to open your notebook instance." 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "We'll now upload the notebook I've created for this tutorial, which is located in the [deploy-with-sagemaker folder](deploy-with-sagemaker) in this repository\n", 130 | "\n", 131 | "1. Click the \"New\" dropdown button at the top right and select \"Folder\". Click the checkbox next to your newly created folder, and then click \"Rename\" in the menu bar above to give the folder a name such as \"deploy-tutorial\".\n", 132 | "\n", 133 | "2. Click the folder to enter it and then click \"Upload\" in top right corner. Choose the [xgboost_abalone.ipynb](deploy-with-sagemaker/xgboost_abalone.ipynb) notebook file downloaded from this repo and upload it. Open up the notebook. We'll use this notebook to build and deploy the model as described in the following section. At this point, your screen should look something like the following:\n", 134 | "\n", 135 | "\n", 136 | "\n", 137 | "> Note 1: If you get an error \"Kernel not found\". Use the dropdown menu that appeared to choose the \"conda_python3\" kernel and select \"Set kernel\".\n", 138 | "\n", 139 | "> Note 2: Amazon SageMaker has many example notebooks available for you to use - no matter what your use case is, you should be able to find a base notebook to work off. You can check out the available examples by clicking the \"SageMaker Examples\" tab in your opened notebook instance (note that it may take a few minutes for the examples to become available to you, you can periodically click the little refresh button at the top right corner of the screen to refresh the list). You can either \"Preview\" (just to have a look) or \"Use\" (will copy the notebook to your root) these notebooks." 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "# 4. Building and deploying the model " 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "In this section we will build and deploy a model to predict the age of abalone. The notebook we just uploaded has all the details of the dataset and XGBoost model we'll be using for this task. Let's walk through it step-by-step:\n", 154 | "\n", 155 | "1. We first need to point our notebook to the dataset we uploaded to S3 previously. All that is required here is to enter the name of your S3 bucket into the cell and run it (recall that I called mine \"deploy-tutorial-tb\").\n", 156 | "\n", 157 | "\n", 158 | "\n", 159 | "2. SageMaker uses Docker containers to allow users to train and deploy models. There are many pre-built Docker images available, particularly suited for SageMaker's built-in algorithms and we will use one of those here (note however that you can always upload your own custom docker container - more on that in the [docs](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html)). Run the next cell to load the xgboost docker container.\n", 160 | "\n", 161 | "\n", 162 | "\n", 163 | "3. The following cell sets the parameters for, and executes training of the XGBoost model. You shouldn't need to change anything here, but feel free to take a look at what's going on inside this cell. The cell will periodically print feedback on the status of the training job. When it's finished you should see a \"Completed\" message - training should take around 5 minutes.\n", 164 | "\n", 165 | "\n", 166 | "\n", 167 | "4. Now we need to create a SageMaker model from the training job above. Run cells under the headings \"4. Create the model\" and \"5. Create endpoint\" to create the model and deploy it to an endpoint that will be available to provide inferences - it will take about 10 minutes to run all of these cells (the notebook provides more details on what each of these cells are doing). You will eventually receive an output that your endpoint has been created.\n", 168 | "5. If you stop the tutorial here, be sure to shut down the endpoint you created by running the cell under the heading \"7. Delete Endpoint\" - else you will continue to be charged by Amazon.\n", 169 | "\n", 170 | "> Note that a key functionality of SageMaker is model tuning. I'm not describing any tuning/optimization here, but it is well described in the [SageMaker docs](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning.html)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "# 5. Using the model " 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Now that we've deployed the model as an endpoint, we need want to use it in a production environment. There are two main ways we may want to use our model:\n", 185 | "\n", 186 | "1. To act as a HTTPS endpoint that can provide inferences on a case-by-case basis (e.g., for a web application). We will focus on this use case here.\n", 187 | "2. To get predictions for an entire dataset. You can read about this use case in the [SageMaker docs](https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-batch.html).\n", 188 | "\n", 189 | "\n", 190 | "To allow users to interact with our machine learning model we need an API. Put simply, APIs facilitate applications to work together without having to know exactly how they’re implemented. You can think of a simple wall plug as an API: manufactures of electronic equipment know that they can attach a particular power cord to their device which will allow the device to \"interface with\" (i.e., use) the electricity supply through the wall plug - the manufacturers don't need to know how the electricity is supplied or what kinf of infrastructure is behing the wall plug, they just know that if their device fits the wall plug, they can access the electricity. We will use a combination of Amazon API Gateway and Amazon Lambda to create an API that will allow users to use our machine learning model for predictions. This section is a shortened summary of this [Amazon blog post](https://aws.amazon.com/blogs/machine-learning/call-an-amazon-sagemaker-model-endpoint-using-amazon-api-gateway-and-aws-lambda/)." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### 5.1 Create a Lambda function that calls the SageMaker Runtime Invoke_Endpoint\n", 198 | "\n", 199 | "Our first task is to create a function that will be invoked through an API request. We'll create that funciton using Amazon Lambda.\n", 200 | "\n", 201 | "1. Head back to the main AWS dashboard and search for Lambda.\n", 202 | "\n", 203 | "\n", 204 | "\n", 205 | "2. In the Lambda dashboard select \"Create function\".\n", 206 | "3. With the \"Author from scratch\" tab selected, give your function a name (I called the function \"predict_abalone\"), choose Python 3.6, and select \"Create a new role with basic Lambda permissions\".\n", 207 | "\n", 208 | "\n", 209 | "\n", 210 | "4. Before we write any code, select the \"Permissions\" tab at the top of the screen, and then click on the new role that was created for you when you created the lambda function.\n", 211 | "\n", 212 | "\n", 213 | "\n", 214 | "5. In the new screen that opens up, click the name of the role called e.g., \"AWSLambdaBasicExecutionRole-\\*\", then click \"Edit policy\", click the JSON tab, and then replace the contents of the JSON with the following. Then click \"Review policy\" at the bottom right, and then \"Save changes\".\n", 215 | "\n", 216 | "```\n", 217 | "{\n", 218 | " \"Version\": \"2012-10-17\",\n", 219 | " \"Statement\": [\n", 220 | " {\n", 221 | " \"Sid\": \"VisualEditor0\",\n", 222 | " \"Effect\": \"Allow\",\n", 223 | " \"Action\": \"sagemaker:InvokeEndpoint\",\n", 224 | " \"Resource\": \"*\"\n", 225 | " }\n", 226 | " ]\n", 227 | "}\n", 228 | "```\n", 229 | "\n", 230 | "\n", 231 | "\n", 232 | "6. Now head back to Amazon Lambda and click the \"Configuration\" tab. Scroll down and copy-and-paste the following code into the editor. This is the function we will trigger with an API request.\n", 233 | "\n", 234 | "```python\n", 235 | "import os\n", 236 | "import boto3\n", 237 | "import math\n", 238 | "\n", 239 | "# grab environment variables\n", 240 | "ENDPOINT_NAME = os.environ['ENDPOINT_NAME']\n", 241 | "runtime = boto3.client('runtime.sagemaker')\n", 242 | "\n", 243 | "def lambda_handler(event, context):\n", 244 | " \n", 245 | " \n", 246 | " payload = event\n", 247 | " response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME, \n", 248 | " ContentType='text/csv', \n", 249 | " Body=payload)\n", 250 | " result = response['Body'].read()\n", 251 | " result = result.decode(\"utf-8\")\n", 252 | " result = result.split(',')\n", 253 | " result = [math.ceil(float(i)) for i in result]\n", 254 | " \n", 255 | " return result[0]\n", 256 | "```\n", 257 | "\n", 258 | "\n", 259 | "\n", 260 | "7. `ENDPOINT_NAME` in the code above is an environment variable that holds the name of the SageMaker model endpoint you deployed using the sample notebook. Scroll down the page and click the button \"Manage environment variables\" under the \"Environment variables\" tab. Click \"Add environment variable\". Call the key \"ENDPOINT_NAME\" and the value the name of your endpoint - for example, mine was \"deploy-tutorial-2020-04-13-16-03-41\". Click \"Save\".\n", 261 | "\n", 262 | "" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "### 5.2 Create an API Gateway – Integration request setup\n", 270 | "\n", 271 | "We're almost there, we've created a Python function in Lambda that calls our SageMaker model endpoint. We'll now use API Gateway to help users easily access this function via an API request." 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "1. Head back to the main AWS dashboard and search for Amazon API Gateway.\n", 279 | "\n", 280 | "\n", 281 | "\n", 282 | "2. Under the \"REST API\" tab click \"Build\".\n", 283 | "3. Click the \"New API\" radio button.\n", 284 | "4. Give your API a name like \"PredictAbalone\" and leave other settings as default. Click \"Create API\".\n", 285 | "5. Next, select \"Create Resource\" from the \"Actions\" drop-down menu and give the resource a name like “predictabalone” and click \"Create resource\".\n", 286 | "\n", 287 | "\n", 288 | "\n", 289 | "6. From the \"Actions\" drop-down menu, choose \"Create Method\" and select \"POST\".\n", 290 | "7. On the screen that appears, choose \"Integration type: Lambda Function\" and in the \"Lambda function\" text box, search for and find the function we created earlier (mine was called \"predict_abalone\"). Click \"Save\".\n", 291 | "\n", 292 | "\n", 293 | "\n", 294 | "8. From the \"Actions\" drop-down menu, choose \"Deploy API\". Create a new \"Deploy stage\" called \"Test\" and then click \"Deploy\".\n", 295 | "\n", 296 | "\n", 297 | "\n", 298 | "9. Note the invoke URL that was created when you deployed your API. It should be something like: `https://{restapi_id}.execute-api.{region}.amazonaws.com/test/predictabalone`. Note that the resource name \"predictabalone\" is not appended automatically.\n", 299 | "\n", 300 | "For more detailed information on how to create an API with API Gateway, refer to the [documentation](https://docs.aws.amazon.com/apigateway/latest/developerguide/how-to-create-api.html). " 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "### 5.3 Testing our model with Postman\n", 308 | "\n", 309 | "Now that we have a deployed model endpoint and have set up our API we can test everything out. We'll use Postman to send an API request and (hopefully) receive back a prediction from our model. You can download the latest version of Postman [here](https://www.postman.com/downloads/)." 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "1. Open up the Postman application and then select \"Create a request\".\n", 317 | "2. Choose \"POST\" as the request method and the paste your invoke URL from the previous step into \"Enter request url\" field.\n", 318 | "3. Select the \"Body\" tab and then the \"raw\" radio button. Post the following example data into the body field: \n", 319 | "\n", 320 | "`\"0.41,0.325,0.1,0.3555,0.146,0.072,0.105,0,1\"`\n", 321 | "\n", 322 | "4. Finally, click \"Send\" and you should receive a results of `10` back - the predicted age of the abalone for the sent data.\n", 323 | "\n", 324 | "" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "# 6. End and next steps" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "Congratulations! You just created a model endpoint deployed and hosted by Amazon SageMaker and were able to invoke that endpoint with the help of API Gateway and a Lambda function - so cool! Have fun integrating this endpoint into your other software/workflows/apps!\n", 339 | "\n", 340 | "We only scratched the surface of Amazon SageMaker's capabilities in this tutorial. I highly recommend checking out the [SageMaker docs](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) to see what else it can do!" 341 | ] 342 | } 343 | ], 344 | "metadata": { 345 | "kernelspec": { 346 | "display_name": "Python 3", 347 | "language": "python", 348 | "name": "python3" 349 | }, 350 | "language_info": { 351 | "codemirror_mode": { 352 | "name": "ipython", 353 | "version": 3 354 | }, 355 | "file_extension": ".py", 356 | "mimetype": "text/x-python", 357 | "name": "python", 358 | "nbconvert_exporter": "python", 359 | "pygments_lexer": "ipython3", 360 | "version": "3.7.4" 361 | } 362 | }, 363 | "nbformat": 4, 364 | "nbformat_minor": 4 365 | } 366 | -------------------------------------------------------------------------------- /ml-deploy-model/deploy-with-sagemaker/xgboost_abalone.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Predicting abalone age with Amazon SageMaker and the XGBoost algorithm\n", 8 | "\n", 9 | "**Created by: [Tomas Beuzen](https://tomasbeuzen.github.io/). Hosted on [GitHub](xxx).**" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "---\n", 17 | "## Contents\n", 18 | "1. [Introduction](#1)\n", 19 | "2. [Setup](#2)\n", 20 | "3. [Training the XGBoost model ](#3)\n", 21 | "4. [Create the model](#4)\n", 22 | "5. [Create endpoint](#5)\n", 23 | "6. [Validate the model for use](#6)\n", 24 | "7. [Delete Endpoint](#7)\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "---\n", 32 | "## 1. Introduction \n", 33 | "\n", 34 | "This notebook demonstrates the use of Amazon SageMaker’s implementation of the XGBoost algorithm to train and host a regression model. It uses the classic abalone dataset which, the original version of which can be found [here](https://archive.ics.uci.edu/ml/datasets/abalone). Briefly, the number of \"rings\" present in an abalone shell is a proxy for the age of the abalone (age [years] = rings + 1.5). We aim to predict the age of abalone based on eight physical measurements. The data provided in the tutorial has been modified from the original, namely, the \"Sex\" characteristic has been one-hot-encoded and the \"rings\" target variable has had 1.5 added to it to represent the abalone age in years.\n", 35 | "\n", 36 | "This notebook is modified after the example provided by Amazon [here](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/xgboost_abalone/xgboost_abalone.ipynb). It has been significantly stripped down and modified to provide a bare minimum example illustrating how to build and deploy a model using SageMaker." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "---\n", 44 | "## 2. Setup \n", 45 | "\n", 46 | "\n", 47 | "This notebook was created and tested on an ml.m2.medium notebook instance. The following code sets up paths to the S3 bucket we stored our data in previously." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "isConfigCell": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "import os\n", 59 | "import boto3\n", 60 | "import re\n", 61 | "import sagemaker\n", 62 | "\n", 63 | "role = sagemaker.get_execution_role()\n", 64 | "region = boto3.Session().region_name\n", 65 | "\n", 66 | "bucket = \"deploy-tutorial-tb\" # <-- insert your bucket name here\n", 67 | "bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region, bucket)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "---\n", 75 | "## 3. Training the XGBoost model \n", 76 | "\n", 77 | "The following cell loads the Amazon sagemaker xgboost docker image" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "from sagemaker.amazon.amazon_estimator import get_image_uri\n", 87 | "container = get_image_uri(region, 'xgboost', '0.90-1')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "The following cell sets the parameters for, and executes training of the XGBoost model. You should not have to change any setting here (unless you want to change the name of the job which is clearly marked in a comment below). Training the model should take around 5 minutes. The code periodically pings the status of the job and prints the output." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "%%time\n", 104 | "import boto3\n", 105 | "from time import gmtime, strftime\n", 106 | "\n", 107 | "job_name = 'deploy-tutorial-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime()) # <-- feel free to change the name of your job if you wish\n", 108 | "print(\"Training job\", job_name)\n", 109 | "\n", 110 | "create_training_params = \\\n", 111 | "{\n", 112 | " \"AlgorithmSpecification\": {\n", 113 | " \"TrainingImage\": container,\n", 114 | " \"TrainingInputMode\": \"File\"\n", 115 | " },\n", 116 | " \"RoleArn\": role,\n", 117 | " \"OutputDataConfig\": {\n", 118 | " \"S3OutputPath\": bucket_path + \"/xgboost-model\"\n", 119 | " },\n", 120 | " \"ResourceConfig\": {\n", 121 | " \"InstanceCount\": 1,\n", 122 | " \"InstanceType\": \"ml.m5.2xlarge\",\n", 123 | " \"VolumeSizeInGB\": 5\n", 124 | " },\n", 125 | " \"TrainingJobName\": job_name,\n", 126 | " \"HyperParameters\": {\n", 127 | " \"max_depth\":\"5\",\n", 128 | " \"eta\":\"0.2\",\n", 129 | " \"gamma\":\"4\",\n", 130 | " \"min_child_weight\":\"6\",\n", 131 | " \"subsample\":\"0.7\",\n", 132 | " \"silent\":\"0\",\n", 133 | " \"objective\":\"reg:linear\",\n", 134 | " \"num_round\":\"50\"\n", 135 | " },\n", 136 | " \"StoppingCondition\": {\n", 137 | " \"MaxRuntimeInSeconds\": 3600\n", 138 | " },\n", 139 | " \"InputDataConfig\": [\n", 140 | " {\n", 141 | " \"ChannelName\": \"train\",\n", 142 | " \"DataSource\": {\n", 143 | " \"S3DataSource\": {\n", 144 | " \"S3DataType\": \"S3Prefix\",\n", 145 | " \"S3Uri\": bucket_path + '/abalone_train',\n", 146 | " \"S3DataDistributionType\": \"FullyReplicated\"\n", 147 | " }\n", 148 | " },\n", 149 | " \"ContentType\": \"text/csv\",\n", 150 | " \"CompressionType\": \"None\"\n", 151 | " },\n", 152 | " {\n", 153 | " \"ChannelName\": \"validation\",\n", 154 | " \"DataSource\": {\n", 155 | " \"S3DataSource\": {\n", 156 | " \"S3DataType\": \"S3Prefix\",\n", 157 | " \"S3Uri\": bucket_path + '/abalone_validation',\n", 158 | " \"S3DataDistributionType\": \"FullyReplicated\"\n", 159 | " }\n", 160 | " },\n", 161 | " \"ContentType\": \"text/csv\",\n", 162 | " \"CompressionType\": \"None\"\n", 163 | " }\n", 164 | " ]\n", 165 | "}\n", 166 | "\n", 167 | "\n", 168 | "client = boto3.client('sagemaker', region_name=region)\n", 169 | "client.create_training_job(**create_training_params)\n", 170 | "\n", 171 | "import time\n", 172 | "\n", 173 | "status = client.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']\n", 174 | "print(status)\n", 175 | "while status !='Completed' and status!='Failed':\n", 176 | " time.sleep(60)\n", 177 | " status = client.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']\n", 178 | " print(status)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "If you see the message \"Completed\" that means training sucessfully completed and the output model was stored in the output path specified by `training_params['OutputDataConfig']` above." 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "---\n", 193 | "## 4. Create the model \n", 194 | "In order to set up hosting, we have to import the model from training to hosting. The cell below creates a SageMaker Model from the training output above." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "%%time\n", 204 | "import boto3\n", 205 | "from time import gmtime, strftime\n", 206 | "\n", 207 | "model_name=job_name + '-model'\n", 208 | "print(model_name)\n", 209 | "\n", 210 | "info = client.describe_training_job(TrainingJobName=job_name)\n", 211 | "model_data = info['ModelArtifacts']['S3ModelArtifacts']\n", 212 | "print(model_data)\n", 213 | "\n", 214 | "primary_container = {\n", 215 | " 'Image': container,\n", 216 | " 'ModelDataUrl': model_data\n", 217 | "}\n", 218 | "\n", 219 | "create_model_response = client.create_model(\n", 220 | " ModelName = model_name,\n", 221 | " ExecutionRoleArn = role,\n", 222 | " PrimaryContainer = primary_container)\n", 223 | "\n", 224 | "print(create_model_response['ModelArn'])" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "---\n", 232 | "## 5. Create endpoint \n", 233 | "\n", 234 | "Now that we've created a model we need to create a HTTPS endpoint where your machine learning model is available to provide inferences.\n", 235 | "\n", 236 | "### Create endpoint configuration\n", 237 | "SageMaker supports configuring REST endpoints in hosting with multiple models, e.g. for A/B testing purposes. In order to support this, we need to create an endpoint configuration which describes the distribution of traffic across the models, whether split, shadowed, or sampled in some way. In addition, and more relevant for the current tutorial, the endpoint configuration describes the instance type required for model deployment. " 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "from time import gmtime, strftime\n", 247 | "\n", 248 | "endpoint_config_name = 'deploy-tutorial-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", 249 | "print(endpoint_config_name)\n", 250 | "create_endpoint_config_response = client.create_endpoint_config(\n", 251 | " EndpointConfigName = endpoint_config_name,\n", 252 | " ProductionVariants=[{\n", 253 | " 'InstanceType':'ml.m5.xlarge',\n", 254 | " 'InitialVariantWeight':1,\n", 255 | " 'InitialInstanceCount':1,\n", 256 | " 'ModelName':model_name,\n", 257 | " 'VariantName':'AllTraffic'}])\n", 258 | "\n", 259 | "print(\"Endpoint Config Arn: \" + create_endpoint_config_response['EndpointConfigArn'])" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "### Create endpoint\n", 267 | "Finally we will create the endpoint that serves up the model, using the name and configuration defined above. The end result is an endpoint that can be validated and incorporated into production applications. It will take about 10 minutes to run the cell below and set up the endpoint." 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "scrolled": true 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "%%time\n", 279 | "import time\n", 280 | "\n", 281 | "endpoint_name = 'deploy-tutorial-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", 282 | "print(endpoint_name)\n", 283 | "create_endpoint_response = client.create_endpoint(\n", 284 | " EndpointName=endpoint_name,\n", 285 | " EndpointConfigName=endpoint_config_name)\n", 286 | "print(create_endpoint_response['EndpointArn'])\n", 287 | "\n", 288 | "resp = client.describe_endpoint(EndpointName=endpoint_name)\n", 289 | "status = resp['EndpointStatus']\n", 290 | "while status=='Creating':\n", 291 | " print(\"Status: \" + status)\n", 292 | " time.sleep(60)\n", 293 | " resp = client.describe_endpoint(EndpointName=endpoint_name)\n", 294 | " status = resp['EndpointStatus']\n", 295 | "\n", 296 | "print(\"Arn: \" + resp['EndpointArn'])\n", 297 | "print(\"Status: \" + status)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "---\n", 305 | "## 6. Validate the model for use \n", 306 | "Now that we've created the endpoint we can test that our model is available to perform inference. Let's try it out by making a single prediction which we call the \"payload\" in the cell below." 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "import math\n", 316 | "\n", 317 | "features = 'length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,sex_I,sex_M'\n", 318 | "payload = '0.41,0.325,0.1,0.3555,0.146,0.072,0.105,0,1'\n", 319 | "\n", 320 | "runtime_client = boto3.client('runtime.sagemaker', region_name=region)\n", 321 | "response = runtime_client.invoke_endpoint(EndpointName=endpoint_name, \n", 322 | " ContentType='text/csv', \n", 323 | " Body=payload)\n", 324 | "result = response['Body'].read()\n", 325 | "result = result.decode(\"utf-8\")\n", 326 | "result = result.split(',')\n", 327 | "result = [math.ceil(float(i)) for i in result]\n", 328 | "print(features)\n", 329 | "print(payload)\n", 330 | "print (f'Prediction: {result[0]:.0f}')" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "## 7. Delete Endpoint \n", 338 | "Once you are done using the endpoint, you can use the following to delete it. " 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "client.delete_endpoint(EndpointName=endpoint_name)" 348 | ] 349 | } 350 | ], 351 | "metadata": { 352 | "anaconda-cloud": {}, 353 | "celltoolbar": "Raw Cell Format", 354 | "kernelspec": { 355 | "display_name": "Python 3", 356 | "language": "python", 357 | "name": "python3" 358 | }, 359 | "language_info": { 360 | "codemirror_mode": { 361 | "name": "ipython", 362 | "version": 3 363 | }, 364 | "file_extension": ".py", 365 | "mimetype": "text/x-python", 366 | "name": "python", 367 | "nbconvert_exporter": "python", 368 | "pygments_lexer": "ipython3", 369 | "version": "3.7.4" 370 | } 371 | }, 372 | "nbformat": 4, 373 | "nbformat_minor": 2 374 | } 375 | -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_0.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_1.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_10.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_11.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_12.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_13.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_14.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_15.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_16.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_2.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_3.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_4.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_5.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_6.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_7.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_8.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/flask_images/fl_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_9.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/ml-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/ml-deploy.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_0.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_1.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_10.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_11.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_12.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_13.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_14.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_15.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_16.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_17.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_18.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_19.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_2.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_20.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_21.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_22.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_23.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_3.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_4.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_5.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_6.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_7.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_8.png -------------------------------------------------------------------------------- /ml-deploy-model/docs/img/sagemaker_images/sm_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_9.png -------------------------------------------------------------------------------- /ml-image-generation/GANs/saved_models/GAN_cgan_generator.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/GANs/saved_models/GAN_cgan_generator.h5 -------------------------------------------------------------------------------- /ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0001.png -------------------------------------------------------------------------------- /ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0002.png -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_2L/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "cvae_2L" 2 | all_model_checkpoint_paths: "cvae_2L" 3 | -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00000-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00001-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.index -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_8L/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "cvae_8L" 2 | all_model_checkpoint_paths: "cvae_8L" 3 | -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00000-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00001-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.index -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_2L/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "vae_2L" 2 | all_model_checkpoint_paths: "vae_2L" 3 | -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00000-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00001-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.index -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_8L/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "vae_8L" 2 | all_model_checkpoint_paths: "vae_8L" 3 | -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00000-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00001-of-00002 -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.index -------------------------------------------------------------------------------- /ml-image-generation/autoencoders/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.9.0 2 | appnope==0.1.0 3 | astor==0.8.1 4 | attrs==19.3.0 5 | backcall==0.1.0 6 | bleach==3.3.0 7 | cachetools==4.0.0 8 | certifi==2019.11.28 9 | chardet==3.0.4 10 | cycler==0.10.0 11 | decorator==4.4.2 12 | defusedxml==0.6.0 13 | entrypoints==0.3 14 | gast==0.2.2 15 | google-auth==1.13.1 16 | google-auth-oauthlib==0.4.1 17 | google-pasta==0.2.0 18 | grpcio==1.27.2 19 | h5py==2.10.0 20 | idna==2.9 21 | importlib-metadata==1.6.0 22 | ipykernel==5.2.0 23 | ipython==7.13.0 24 | ipython-genutils==0.2.0 25 | ipywidgets==7.5.1 26 | jedi==0.16.0 27 | Jinja2==2.11.3 28 | joblib==0.14.1 29 | json5==0.9.4 30 | jsonschema==3.2.0 31 | jupyter==1.0.0 32 | jupyter-client==6.1.2 33 | jupyter-console==6.1.0 34 | jupyter-contrib-core==0.3.3 35 | jupyter-contrib-nbextensions==0.5.1 36 | jupyter-core==4.6.3 37 | jupyter-highlight-selected-word==0.2.0 38 | jupyter-latex-envs==1.4.6 39 | jupyter-nbextensions-configurator==0.4.1 40 | jupyterlab==2.1.1 41 | jupyterlab-server==1.1.1 42 | Keras-Applications==1.0.8 43 | Keras-Preprocessing==1.1.0 44 | kiwisolver==1.2.0 45 | lxml==4.6.3 46 | Markdown==3.2.1 47 | MarkupSafe==1.1.1 48 | matplotlib==3.1.1 49 | mistune==0.8.4 50 | nbconvert==5.6.1 51 | nbformat==5.0.5 52 | notebook==6.1.5 53 | numpy==1.18.2 54 | oauthlib==3.1.0 55 | opt-einsum==3.2.0 56 | pandas==0.25.2 57 | pandocfilters==1.4.2 58 | parso==0.6.2 59 | pexpect==4.8.0 60 | pickleshare==0.7.5 61 | Pillow==8.1.1 62 | prometheus-client==0.7.1 63 | prompt-toolkit==3.0.5 64 | protobuf==3.11.3 65 | ptyprocess==0.6.0 66 | pyasn1==0.4.8 67 | pyasn1-modules==0.2.8 68 | Pygments==2.6.1 69 | pyparsing==2.4.6 70 | pyrsistent==0.16.0 71 | python-dateutil==2.8.1 72 | pytz==2019.3 73 | PyYAML==5.3.1 74 | pyzmq==19.0.0 75 | qtconsole==4.7.2 76 | QtPy==1.9.0 77 | requests==2.23.0 78 | requests-oauthlib==1.3.0 79 | rsa==4.0 80 | scikit-learn==0.21.3 81 | scipy==1.4.1 82 | seaborn==0.9.0 83 | Send2Trash==1.5.0 84 | six==1.14.0 85 | tensorboard==2.0.2 86 | tensorflow==2.4.0 87 | tensorflow-estimator==2.0.1 88 | termcolor==1.1.0 89 | terminado==0.8.3 90 | testpath==0.4.4 91 | tornado==6.0.4 92 | traitlets==4.3.3 93 | urllib3==1.25.8 94 | wcwidth==0.1.9 95 | webencodings==0.5.1 96 | Werkzeug==1.0.1 97 | widgetsnbextension==3.5.1 98 | wrapt==1.12.1 99 | zipp==3.1.0 100 | -------------------------------------------------------------------------------- /ml-timeseries/README.md: -------------------------------------------------------------------------------- 1 | # Supervised learning for time series 2 | 3 | In this repository are some notes and examples on using supervised machine learning for modelling time series data. -------------------------------------------------------------------------------- /ml-timeseries/data/README.md: -------------------------------------------------------------------------------- 1 | # Data availability 2 | 3 | The data "sales_data.csv" is a dataset of "Retail Sales of Clothing and Clothing Accessory Stores" made available by the Federal Reserve Bank of St Louis and can be accessed [here](https://fred.stlouisfed.org/series/MRTSSM448USN). [1] 4 | 5 | #### Citation 6 | 7 | [1] U.S. Census Bureau, Retail Sales: Clothing and Clothing Accessory Stores [MRTSSM448USN], retrieved from FRED, Federal Reserve Bank of St. Louis; https://fred.stlouisfed.org/series/MRTSSM448USN. 8 | -------------------------------------------------------------------------------- /ml-timeseries/data/sales_data.csv: -------------------------------------------------------------------------------- 1 | date,sales 2 | 1992-01-01,6938 3 | 1992-02-01,7524 4 | 1992-03-01,8475 5 | 1992-04-01,9401 6 | 1992-05-01,9558 7 | 1992-06-01,9182 8 | 1992-07-01,9103 9 | 1992-08-01,10513 10 | 1992-09-01,9573 11 | 1992-10-01,10254 12 | 1992-11-01,11187 13 | 1992-12-01,18395 14 | 1993-01-01,7502 15 | 1993-02-01,7524 16 | 1993-03-01,8766 17 | 1993-04-01,9867 18 | 1993-05-01,10063 19 | 1993-06-01,9635 20 | 1993-07-01,9794 21 | 1993-08-01,10628 22 | 1993-09-01,10013 23 | 1993-10-01,10346 24 | 1993-11-01,11760 25 | 1993-12-01,18851 26 | 1994-01-01,7280 27 | 1994-02-01,7902 28 | 1994-03-01,9921 29 | 1994-04-01,9869 30 | 1994-05-01,10009 31 | 1994-06-01,9893 32 | 1994-07-01,9735 33 | 1994-08-01,11157 34 | 1994-09-01,10217 35 | 1994-10-01,10730 36 | 1994-11-01,12354 37 | 1994-12-01,20016 38 | 1995-01-01,7518 39 | 1995-02-01,7961 40 | 1995-03-01,9815 41 | 1995-04-01,10168 42 | 1995-05-01,10620 43 | 1995-06-01,10301 44 | 1995-07-01,9784 45 | 1995-08-01,11264 46 | 1995-09-01,10710 47 | 1995-10-01,10439 48 | 1995-11-01,12751 49 | 1995-12-01,20002 50 | 1996-01-01,7684 51 | 1996-02-01,8991 52 | 1996-03-01,10349 53 | 1996-04-01,10570 54 | 1996-05-01,11405 55 | 1996-06-01,10554 56 | 1996-07-01,10202 57 | 1996-08-01,12134 58 | 1996-09-01,10623 59 | 1996-10-01,11250 60 | 1996-11-01,12875 61 | 1996-12-01,19944 62 | 1997-01-01,8194 63 | 1997-02-01,8835 64 | 1997-03-01,10840 65 | 1997-04-01,10131 66 | 1997-05-01,11505 67 | 1997-06-01,10654 68 | 1997-07-01,10734 69 | 1997-08-01,12461 70 | 1997-09-01,10942 71 | 1997-10-01,11635 72 | 1997-11-01,13244 73 | 1997-12-01,21118 74 | 1998-01-01,8800 75 | 1998-02-01,9499 76 | 1998-03-01,10863 77 | 1998-04-01,11825 78 | 1998-05-01,12239 79 | 1998-06-01,11451 80 | 1998-07-01,11633 81 | 1998-08-01,12971 82 | 1998-09-01,11214 83 | 1998-10-01,12384 84 | 1998-11-01,13854 85 | 1998-12-01,22418 86 | 1999-01-01,9237 87 | 1999-02-01,10171 88 | 1999-03-01,12081 89 | 1999-04-01,12386 90 | 1999-05-01,13167 91 | 1999-06-01,12280 92 | 1999-07-01,12461 93 | 1999-08-01,13734 94 | 1999-09-01,12357 95 | 1999-10-01,12948 96 | 1999-11-01,14643 97 | 1999-12-01,24286 98 | 2000-01-01,9447 99 | 2000-02-01,11170 100 | 2000-03-01,12841 101 | 2000-04-01,13124 102 | 2000-05-01,13735 103 | 2000-06-01,12953 104 | 2000-07-01,12500 105 | 2000-08-01,14610 106 | 2000-09-01,13375 107 | 2000-10-01,13369 108 | 2000-11-01,15675 109 | 2000-12-01,24875 110 | 2001-01-01,10060 111 | 2001-02-01,11450 112 | 2001-03-01,13067 113 | 2001-04-01,13362 114 | 2001-05-01,13787 115 | 2001-06-01,12935 116 | 2001-07-01,12600 117 | 2001-08-01,14818 118 | 2001-09-01,12104 119 | 2001-10-01,13218 120 | 2001-11-01,15352 121 | 2001-12-01,24534 122 | 2002-01-01,10344 123 | 2002-02-01,11730 124 | 2002-03-01,13977 125 | 2002-04-01,13195 126 | 2002-05-01,14150 127 | 2002-06-01,13210 128 | 2002-07-01,12873 129 | 2002-08-01,15113 130 | 2002-09-01,12445 131 | 2002-10-01,14006 132 | 2002-11-01,15911 133 | 2002-12-01,25350 134 | 2003-01-01,10804 135 | 2003-02-01,11662 136 | 2003-03-01,13452 137 | 2003-04-01,13691 138 | 2003-05-01,14730 139 | 2003-06-01,13496 140 | 2003-07-01,13854 141 | 2003-08-01,15522 142 | 2003-09-01,13567 143 | 2003-10-01,14601 144 | 2003-11-01,16555 145 | 2003-12-01,26760 146 | 2004-01-01,11790 147 | 2004-02-01,13344 148 | 2004-03-01,14760 149 | 2004-04-01,15058 150 | 2004-05-01,15379 151 | 2004-06-01,14237 152 | 2004-07-01,14667 153 | 2004-08-01,15588 154 | 2004-09-01,14224 155 | 2004-10-01,15570 156 | 2004-11-01,17230 157 | 2004-12-01,28406 158 | 2005-01-01,12046 159 | 2005-02-01,13878 160 | 2005-03-01,15727 161 | 2005-04-01,15708 162 | 2005-05-01,15989 163 | 2005-06-01,15559 164 | 2005-07-01,15218 165 | 2005-08-01,16697 166 | 2005-09-01,14960 167 | 2005-10-01,16509 168 | 2005-11-01,18402 169 | 2005-12-01,30276 170 | 2006-01-01,12893 171 | 2006-02-01,14474 172 | 2006-03-01,16386 173 | 2006-04-01,16848 174 | 2006-05-01,17103 175 | 2006-06-01,16505 176 | 2006-07-01,16275 177 | 2006-08-01,17832 178 | 2006-09-01,16767 179 | 2006-10-01,17253 180 | 2006-11-01,19391 181 | 2006-12-01,31462 182 | 2007-01-01,13927 183 | 2007-02-01,15077 184 | 2007-03-01,18045 185 | 2007-04-01,17096 186 | 2007-05-01,18474 187 | 2007-06-01,17289 188 | 2007-07-01,16883 189 | 2007-08-01,18850 190 | 2007-09-01,16765 191 | 2007-10-01,17614 192 | 2007-11-01,20550 193 | 2007-12-01,30635 194 | 2008-01-01,14173 195 | 2008-02-01,15876 196 | 2008-03-01,17770 197 | 2008-04-01,17103 198 | 2008-05-01,19084 199 | 2008-06-01,17007 200 | 2008-07-01,17369 201 | 2008-08-01,19041 202 | 2008-09-01,15882 203 | 2008-10-01,16796 204 | 2008-11-01,18756 205 | 2008-12-01,26726 206 | 2009-01-01,13387 207 | 2009-02-01,14684 208 | 2009-03-01,15563 209 | 2009-04-01,16337 210 | 2009-05-01,17264 211 | 2009-06-01,15434 212 | 2009-07-01,16007 213 | 2009-08-01,17656 214 | 2009-09-01,15630 215 | 2009-10-01,17053 216 | 2009-11-01,18332 217 | 2009-12-01,27128 218 | 2010-01-01,13216 219 | 2010-02-01,14816 220 | 2010-03-01,17390 221 | 2010-04-01,17042 222 | 2010-05-01,17727 223 | 2010-06-01,16138 224 | 2010-07-01,16842 225 | 2010-08-01,17923 226 | 2010-09-01,16232 227 | 2010-10-01,17412 228 | 2010-11-01,20003 229 | 2010-12-01,28545 230 | 2011-01-01,13703 231 | 2011-02-01,15931 232 | 2011-03-01,18252 233 | 2011-04-01,18647 234 | 2011-05-01,18771 235 | 2011-06-01,17783 236 | 2011-07-01,17937 237 | 2011-08-01,19263 238 | 2011-09-01,17999 239 | 2011-10-01,18255 240 | 2011-11-01,20957 241 | 2011-12-01,31108 242 | 2012-01-01,14358 243 | 2012-02-01,17931 244 | 2012-03-01,20162 245 | 2012-04-01,18601 246 | 2012-05-01,19952 247 | 2012-06-01,18717 248 | 2012-07-01,18266 249 | 2012-08-01,20750 250 | 2012-09-01,18391 251 | 2012-10-01,18845 252 | 2012-11-01,22016 253 | 2012-12-01,31504 254 | 2013-01-01,15155 255 | 2013-02-01,17382 256 | 2013-03-01,20575 257 | 2013-04-01,19176 258 | 2013-05-01,20780 259 | 2013-06-01,18939 260 | 2013-07-01,19176 261 | 2013-08-01,21558 262 | 2013-09-01,18111 263 | 2013-10-01,19855 264 | 2013-11-01,22437 265 | 2013-12-01,31578 266 | 2014-01-01,15179 267 | 2014-02-01,17507 268 | 2014-03-01,20070 269 | 2014-04-01,20322 270 | 2014-05-01,21568 271 | 2014-06-01,18947 272 | 2014-07-01,19828 273 | 2014-08-01,21993 274 | 2014-09-01,18646 275 | 2014-10-01,20220 276 | 2014-11-01,23491 277 | 2014-12-01,32638 278 | 2015-01-01,15764 279 | 2015-02-01,17980 280 | 2015-03-01,20752 281 | 2015-04-01,20389 282 | 2015-05-01,22145 283 | 2015-06-01,19667 284 | 2015-07-01,20564 285 | 2015-08-01,22314 286 | 2015-09-01,19151 287 | 2015-10-01,20637 288 | 2015-11-01,23090 289 | 2015-12-01,33345 290 | 2016-01-01,15694 291 | 2016-02-01,18939 292 | 2016-03-01,21492 293 | 2016-04-01,20428 294 | 2016-05-01,21656 295 | 2016-06-01,20160 296 | 2016-07-01,20667 297 | 2016-08-01,22388 298 | 2016-09-01,19790 299 | 2016-10-01,20500 300 | 2016-11-01,23644 301 | 2016-12-01,34482 302 | 2017-01-01,15663 303 | 2017-02-01,17745 304 | 2017-03-01,21028 305 | 2017-04-01,20852 306 | 2017-05-01,21606 307 | 2017-06-01,20322 308 | 2017-07-01,20535 309 | 2017-08-01,22536 310 | 2017-09-01,19720 311 | 2017-10-01,20307 312 | 2017-11-01,24438 313 | 2017-12-01,33720 314 | 2018-01-01,15881 315 | 2018-02-01,18585 316 | 2018-03-01,22404 317 | 2018-04-01,20616 318 | 2018-05-01,23764 319 | 2018-06-01,21589 320 | 2018-07-01,21919 321 | 2018-08-01,23381 322 | 2018-09-01,20260 323 | 2018-10-01,21473 324 | 2018-11-01,25831 325 | 2018-12-01,34706 326 | 2019-01-01,16410 327 | 2019-02-01,18134 328 | 2019-03-01,22093 329 | 2019-04-01,21597 330 | 2019-05-01,23200 331 | 2019-06-01,21123 332 | 2019-07-01,21714 333 | 2019-08-01,23791 334 | 2019-09-01,19695 335 | 2019-10-01,21113 -------------------------------------------------------------------------------- /ml-timeseries/docs/img/ts_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-timeseries/docs/img/ts_1.png -------------------------------------------------------------------------------- /ml-timeseries/docs/img/ts_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-timeseries/docs/img/ts_2.png --------------------------------------------------------------------------------