├── .gitignore
├── LICENSE.md
├── README.md
├── ml-animations
    ├── LICENSE
    ├── README.md
    ├── gif
    │   ├── cnn
    │   │   └── cnn_1d.gif
    │   ├── decision_tree
    │   │   ├── decision_tree.gif
    │   │   ├── decision_tree_1.gif
    │   │   ├── decision_tree_2.gif
    │   │   ├── decision_tree_3.gif
    │   │   └── decision_tree_4.gif
    │   └── knn
    │   │   └── knn.gif
    └── notebooks
    │   ├── cnn_1d.ipynb
    │   ├── cross_validation_draft.ipynb
    │   ├── decision_tree.ipynb
    │   ├── knn_draft.ipynb
    │   └── logistic_regression.ipynb
├── ml-clustering
    └── clustering-mixed-data.ipynb
├── ml-deploy-model
    ├── README.md
    ├── data
    │   ├── README.md
    │   ├── abalone_train.csv
    │   ├── abalone_validation.csv
    │   ├── column_names.csv
    │   └── raw
    │   │   ├── abalone.csv
    │   │   └── preprocessing.py
    ├── deploy-with-flask.ipynb
    ├── deploy-with-flask
    │   ├── build_model.ipynb
    │   ├── web_api
    │   │   ├── Procfile
    │   │   ├── abalone_predictor.joblib
    │   │   ├── app.py
    │   │   ├── default.profraw
    │   │   └── requirements.txt
    │   └── web_application
    │   │   ├── Procfile
    │   │   ├── abalone_predictor.joblib
    │   │   ├── app.py
    │   │   ├── requirements.txt
    │   │   ├── static
    │   │       └── style.css
    │   │   └── templates
    │   │       ├── home.html
    │   │       └── prediction.html
    ├── deploy-with-sagemaker.ipynb
    ├── deploy-with-sagemaker
    │   └── xgboost_abalone.ipynb
    └── docs
    │   └── img
    │       ├── flask_images
    │           ├── fl_0.png
    │           ├── fl_1.png
    │           ├── fl_10.png
    │           ├── fl_11.png
    │           ├── fl_12.png
    │           ├── fl_13.png
    │           ├── fl_14.png
    │           ├── fl_15.png
    │           ├── fl_16.png
    │           ├── fl_2.png
    │           ├── fl_3.png
    │           ├── fl_4.png
    │           ├── fl_5.png
    │           ├── fl_6.png
    │           ├── fl_7.png
    │           ├── fl_8.png
    │           └── fl_9.png
    │       ├── ml-deploy.png
    │       └── sagemaker_images
    │           ├── sm_0.png
    │           ├── sm_1.png
    │           ├── sm_10.png
    │           ├── sm_11.png
    │           ├── sm_12.png
    │           ├── sm_13.png
    │           ├── sm_14.png
    │           ├── sm_15.png
    │           ├── sm_16.png
    │           ├── sm_17.png
    │           ├── sm_18.png
    │           ├── sm_19.png
    │           ├── sm_2.png
    │           ├── sm_20.png
    │           ├── sm_21.png
    │           ├── sm_22.png
    │           ├── sm_23.png
    │           ├── sm_3.png
    │           ├── sm_4.png
    │           ├── sm_5.png
    │           ├── sm_6.png
    │           ├── sm_7.png
    │           ├── sm_8.png
    │           └── sm_9.png
├── ml-image-generation
    ├── GANs
    │   ├── GAN.ipynb
    │   ├── conditional_GAN.ipynb
    │   ├── convolutional_GAN_(run_with_google_colab).ipynb
    │   └── saved_models
    │   │   ├── GAN_cgan_generator.h5
    │   │   └── convolutional_GAN_images
    │   │       ├── image_at_epoch_0001.png
    │   │       └── image_at_epoch_0002.png
    └── autoencoders
    │   ├── autoencoders.ipynb
    │   ├── checkpoints
    │       ├── cvae_2L
    │       │   ├── checkpoint
    │       │   ├── cvae_2L.data-00000-of-00002
    │       │   ├── cvae_2L.data-00001-of-00002
    │       │   └── cvae_2L.index
    │       ├── cvae_8L
    │       │   ├── checkpoint
    │       │   ├── cvae_8L.data-00000-of-00002
    │       │   ├── cvae_8L.data-00001-of-00002
    │       │   └── cvae_8L.index
    │       ├── vae_2L
    │       │   ├── checkpoint
    │       │   ├── vae_2L.data-00000-of-00002
    │       │   ├── vae_2L.data-00001-of-00002
    │       │   └── vae_2L.index
    │       └── vae_8L
    │       │   ├── checkpoint
    │       │   ├── vae_8L.data-00000-of-00002
    │       │   ├── vae_8L.data-00001-of-00002
    │       │   └── vae_8L.index
    │   ├── requirements.txt
    │   └── variational_autoencoders.ipynb
└── ml-timeseries
    ├── README.md
    ├── data
        ├── README.md
        └── sales_data.csv
    ├── docs
        └── img
        │   ├── ts_1.png
        │   └── ts_2.png
    └── notebooks
        └── supervised_time_series_intro.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | /ml-timeseries/notebooks/supervised_time_series_intro_notes.md


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Tomas Beuzen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A collection of data science and machine learning tutorials
 2 | 
 3 | ![](https://img.shields.io/badge/-tutorial-informational)
 4 | ![](https://img.shields.io/badge/-machine--learning-important)
 5 | ![](https://img.shields.io/badge/-data--science-lightgrey)
 6 | 
 7 | This repository contains a variety of useful data science/machine learning tutorials I've developed over time. Here's the current list:
 8 | 
 9 | - [animations of machine learning models for pedagogy](ml-animations)
10 | - [deploying machine learning models with Amazon Sagemaker or Flask](ml-deploy-model)
11 | - [image generation with VAEs and GANs](ml-image-generation)
12 | - [supervised learning for time series data](ml-timeseries)


--------------------------------------------------------------------------------
/ml-animations/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Tomas Beuzen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ml-animations/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Animations
 2 | 
 3 | I find visuals, particularly animations, especially useful for understanding how machine learning algorithms work. This repository houses animations that I've developed for teaching purposes.
 4 | 
 5 | ## Decision Tree
 6 | 
 7 | The animation below shows a decision tree being created. For every potential split in the raw data (shown on the left panel), the [Gini impurity](https://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity) is calculated (right panel). The split resulting in the minimum impurity is selected as the split for the tree, and the process is repeated until all data points have been split into homogenous groups.
 8 | 
 9 | ![Decision Tree](./gif/decision_tree/decision_tree.gif)
10 | 
11 | ## *k*-Nearest Neighors
12 | 
13 | The animation below shows the prediction of an unknown point using increasing values of *k* in the *k*-nearest neighbors algorithm. The animation only shows odd values for *k*. In a two-class problem such as that shown, even values of *k* may result in ties, such that a decision would have to be made on how to predict the query point, for example, a random class may be predicted, or the class of the closest point may be predicted.
14 | 
15 | ![kNN](./gif/knn/knn.gif)
16 | 
17 | ## Convolutional Neural Network
18 | 
19 | ### 1D ConvNet
20 | 
21 | The animation below shows how a 1D sequence (d=1) of 20 observations (T=20) is "broken into" 4 sequences by a 1D convolutional layer with 4 filters (f=4) of length 3. The original input is actually 2D with shape (d=1, T=20). This is a little confusing given that we are working with a "1D ConvNet", but you should think of the 1D as referring to the dimensionality of the filters being passed over the data (not the data itself) - as you can see in the example below, we are passing a 1D filter of length 3 over the data. The output of the 1D convolutional layer is 3D with shape (d=1, f=4, T=20). Note that the ends of the input sequence have been zero-padded to facilitate convolution there. The weights of the filters are just random numbers here, the network has not been trained. There are 16 parameters in the network: (1 input x 4 filters) * (3 weights per filter) + (4 biases) = 16 parameters.
22 | 
23 | ![cnn](./gif/cnn/cnn_1d.gif)
24 | 


--------------------------------------------------------------------------------
/ml-animations/gif/cnn/cnn_1d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/cnn/cnn_1d.gif


--------------------------------------------------------------------------------
/ml-animations/gif/decision_tree/decision_tree.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree.gif


--------------------------------------------------------------------------------
/ml-animations/gif/decision_tree/decision_tree_1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_1.gif


--------------------------------------------------------------------------------
/ml-animations/gif/decision_tree/decision_tree_2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_2.gif


--------------------------------------------------------------------------------
/ml-animations/gif/decision_tree/decision_tree_3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_3.gif


--------------------------------------------------------------------------------
/ml-animations/gif/decision_tree/decision_tree_4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/decision_tree/decision_tree_4.gif


--------------------------------------------------------------------------------
/ml-animations/gif/knn/knn.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-animations/gif/knn/knn.gif


--------------------------------------------------------------------------------
/ml-animations/notebooks/cnn_1d.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### Imports"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from tensorflow.keras.layers import Conv1D\n",
 18 |     "from tensorflow.keras.models import Sequential\n",
 19 |     "from tensorflow.random import set_seed\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import matplotlib.patches as patches\n",
 22 |     "import matplotlib.animation as animation\n",
 23 |     "from IPython.display import HTML, Image\n",
 24 |     "plt.style.use('ggplot')\n",
 25 |     "params = {'legend.fontsize': '18',\n",
 26 |     "          'axes.labelsize': '20',\n",
 27 |     "          'axes.labelweight': 'bold',\n",
 28 |     "          'axes.titlesize':'20',\n",
 29 |     "          'xtick.labelsize':'18',\n",
 30 |     "          'ytick.labelsize':'18'}\n",
 31 |     "plt.rcParams.update(params)"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "#### Functions"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "def mk_fig():\n",
 48 |     "    \"\"\"\n",
 49 |     "    Convenience function to plot figure canvas\n",
 50 |     "\n",
 51 |     "    Returns\n",
 52 |     "    -------\n",
 53 |     "    fig, axes\n",
 54 |     "        Figure and axes objects\n",
 55 |     "    \"\"\"\n",
 56 |     "    fig, axes = plt.subplots(1, 2, figsize=(14, 4.5))\n",
 57 |     "    axes[0].set_xlim(-2, 22)\n",
 58 |     "    axes[0].set_ylim(-1, 1)\n",
 59 |     "    axes[0].set_xlabel('x')\n",
 60 |     "    axes[0].set_ylabel('y')\n",
 61 |     "    axes[0].set_title('Input sequence')\n",
 62 |     "    axes[1].set_xlim(-2, 22)\n",
 63 |     "    axes[1].set_ylim(-1, 1)\n",
 64 |     "    axes[1].set_xlabel('x')\n",
 65 |     "    axes[1].set_title('Conv1D layer output')\n",
 66 |     "    axes[0].plot([-10], [-10], '-k', marker='.', ms=13, label='Original data')\n",
 67 |     "    axes[0].plot([-10], [-10], '-k', marker='.', markerfacecolor='w', ms=13, zorder=1, label='Zero padding')\n",
 68 |     "    axes[0].legend(facecolor='w', loc=3)\n",
 69 |     "    \n",
 70 |     "    return fig, axes"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "#### Data"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 3,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "np.random.seed(1)\n",
 87 |     "n = 21\n",
 88 |     "x = (np.sin(np.linspace(0, 3, n)) + np.random.randn(n)*0.1) * 0.8"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "#### Model"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "Model: \"conv1d_model\"\n",
108 |       "_________________________________________________________________\n",
109 |       "Layer (type)                 Output Shape              Param #   \n",
110 |       "=================================================================\n",
111 |       "conv1d (Conv1D)              (None, 21, 4)             16        \n",
112 |       "=================================================================\n",
113 |       "Total params: 16\n",
114 |       "Trainable params: 16\n",
115 |       "Non-trainable params: 0\n",
116 |       "_________________________________________________________________\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "set_seed(1)\n",
122 |     "model = Sequential(name=\"conv1d_model\")\n",
123 |     "filters = 4\n",
124 |     "kernel_size = 3\n",
125 |     "model.add(Conv1D(filters, kernel_size=kernel_size, input_shape=(n, 1), padding='same'))\n",
126 |     "x_out = model.predict(x[None,:,None])[0]\n",
127 |     "model.summary()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "array([[-0.4235797 , -0.26317865,  0.20251465],\n",
139 |        "       [ 0.5078381 ,  0.18025243,  0.13268387],\n",
140 |        "       [ 0.1656707 ,  0.60182637,  0.1728267 ],\n",
141 |        "       [-0.08279335, -0.08209336,  0.14476752]], dtype=float32)"
142 |       ]
143 |      },
144 |      "execution_count": 6,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "model.get_weights()[0][:,0,:].T"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "#### Create and save animations"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 5,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "fig, axes = mk_fig()\n",
167 |     "fig.tight_layout()\n",
168 |     "ec = [(0.89, 0.29, 0.2, 1), (0.2, 0.54, 0.74, 1), (0.60, 0.56, 0.84, 1), (0.47, 0.47, 0.47, 1)]\n",
169 |     "fc = [(0.89, 0.29, 0.2, 0.2), (0.2, 0.54, 0.74, 0.2), (0.60, 0.56, 0.84, 0.2), (0.47, 0.47, 0.47, 0.2)]\n",
170 |     "\n",
171 |     "def init():\n",
172 |     "    axes[0].plot(x, '-k', marker='.', ms=13)\n",
173 |     "    axes[0].plot([-1, 0], [0, x[0]], 'k', marker='.', markerfacecolor='w', ms=13, zorder=1, label='zero padding')\n",
174 |     "    axes[0].plot([20, 21], [ x[-1], 0], 'k', marker='.', markerfacecolor='w', ms=13, zorder=1)\n",
175 |     "\n",
176 |     "def animate(i):\n",
177 |     "    axes[0].set_title(f'Input sequence. Passing filter {i//21 + 1}.')\n",
178 |     "    [p.remove() for p in reversed(axes[0].patches)];\n",
179 |     "    p = []\n",
180 |     "    p.append(patches.Rectangle((i%n-1.5, x[i%n]-0.4), 1, 0.8, linewidth=1, edgecolor=ec[i//21], facecolor=fc[i//21]))\n",
181 |     "    p.append(patches.Rectangle((i%n-0.5, x[i%n]-0.4), 1, 0.8, linewidth=1, edgecolor=ec[i//21], facecolor=fc[i//21]))\n",
182 |     "    p.append(patches.Rectangle((i%n+0.5, x[i%n]-0.4), 1, 0.8, linewidth=1, edgecolor=ec[i//21], facecolor=fc[i//21]))\n",
183 |     "    for _ in p:\n",
184 |     "        axes[0].add_patch(_)\n",
185 |     "    axes[1].plot(x_out[:i%n+1,i//21], color=ec[i//21], marker='.', ms=13);\n",
186 |     "        \n",
187 |     "plt.close(fig)\n",
188 |     "ani = animation.FuncAnimation(fig,\n",
189 |     "                              animate,\n",
190 |     "                              init_func=init,\n",
191 |     "                              frames=21*filters)\n",
192 |     "ani.save('../gif/cnn/cnn_1d.gif', writer='imagemagick', fps=3, dpi=75)\n",
193 |     "# HTML(ani.to_jshtml())"
194 |    ]
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python [conda env:tf]",
200 |    "language": "python",
201 |    "name": "conda-env-tf-py"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.6.10"
214 |   },
215 |   "toc": {
216 |    "base_numbering": 1,
217 |    "nav_menu": {},
218 |    "number_sections": true,
219 |    "sideBar": true,
220 |    "skip_h1_title": true,
221 |    "title_cell": "Table of Contents",
222 |    "title_sidebar": "Contents",
223 |    "toc_cell": false,
224 |    "toc_position": {},
225 |    "toc_section_display": true,
226 |    "toc_window_display": false
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 2
231 | }
232 | 


--------------------------------------------------------------------------------
/ml-animations/notebooks/cross_validation_draft.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### Imports"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 14,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import matplotlib.animation as animation\n",
 19 |     "from IPython.display import HTML, Image\n",
 20 |     "plt.style.use('ggplot')\n",
 21 |     "params = {'legend.fontsize': '18',\n",
 22 |     "          'axes.labelsize': '20',\n",
 23 |     "          'axes.labelweight': 'bold',\n",
 24 |     "          'axes.titlesize':'20',\n",
 25 |     "          'xtick.labelsize':'18',\n",
 26 |     "          'ytick.labelsize':'18'}\n",
 27 |     "plt.rcParams.update(params)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "#### Functions"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 15,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "def gini(*args):\n",
 44 |     "    \"\"\"\n",
 45 |     "    Calculates the gini impurity for binary class data.\n",
 46 |     "\n",
 47 |     "    Parameters\n",
 48 |     "    ----------\n",
 49 |     "    *args : int\n",
 50 |     "        Number of examples in class i\n",
 51 |     "\n",
 52 |     "    Returns\n",
 53 |     "    -------\n",
 54 |     "    float\n",
 55 |     "        The gini impurity\n",
 56 |     "    \"\"\"\n",
 57 |     "    n = sum(args)  # total examples\n",
 58 |     "    gini = 0\n",
 59 |     "    for c in args:\n",
 60 |     "        gini += (c / n) * (1 - (c / n))\n",
 61 |     "    return gini\n",
 62 |     "\n",
 63 |     "\n",
 64 |     "def split(x, y, splits):\n",
 65 |     "    \"\"\"\n",
 66 |     "    Calculates the gini impurity for binary class data.\n",
 67 |     "\n",
 68 |     "    Parameters\n",
 69 |     "    ----------\n",
 70 |     "    x : int\n",
 71 |     "        Feature values\n",
 72 |     "    y : int\n",
 73 |     "        Corresponding target values\n",
 74 |     "    splits : int\n",
 75 |     "        Vector of splits to calculate gini criterion for\n",
 76 |     "\n",
 77 |     "    Returns\n",
 78 |     "    -------\n",
 79 |     "    list\n",
 80 |     "        List of gini impurity for each split\n",
 81 |     "    \"\"\"\n",
 82 |     "    gini_splits = []\n",
 83 |     "    for i in splits:\n",
 84 |     "        mask = x < i\n",
 85 |     "        gini_L = gini(sum(y[mask] == 0),\n",
 86 |     "                      sum(y[mask] == 1))\n",
 87 |     "        p_L = sum(mask) / len(mask)\n",
 88 |     "        gini_R = gini(sum(y[~mask] == 0),\n",
 89 |     "                      sum(y[~mask] == 1))\n",
 90 |     "        p_R = sum(~mask) / len(mask)\n",
 91 |     "        gini_splits.append(gini_L * p_L + gini_R * p_R)\n",
 92 |     "        \n",
 93 |     "    return gini_splits\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "def mk_fig():\n",
 97 |     "    \"\"\"\n",
 98 |     "    Convenience function to plot figure canvas\n",
 99 |     "\n",
100 |     "    Returns\n",
101 |     "    -------\n",
102 |     "    fig, axes\n",
103 |     "        Figure and axes objects\n",
104 |     "    \"\"\"\n",
105 |     "    fig, axes = plt.subplots(1, 2, figsize=(14, 7))\n",
106 |     "    axes[0].set_xlim(-1, 11)\n",
107 |     "    axes[0].set_ylim(-1, 11)\n",
108 |     "    axes[0].set_xlabel('X1')\n",
109 |     "    axes[0].set_ylabel('X2')\n",
110 |     "    axes[0].xaxis.label.set_color('#988ED5')\n",
111 |     "    axes[0].yaxis.label.set_color('#E8A2A5')\n",
112 |     "    axes[1].set_xlim(-1, 11)\n",
113 |     "    axes[1].set_ylim(0, 1)\n",
114 |     "    axes[1].set_xlabel('Feature Value')\n",
115 |     "    axes[1].set_ylabel('Gini Impurity')\n",
116 |     "    \n",
117 |     "    return fig, axes"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "#### Data"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 16,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "np.random.seed(3)\n",
134 |     "x1 = np.random.randint(0, 10, 10)\n",
135 |     "x2 = np.random.randint(0, 10, 10)\n",
136 |     "y = np.random.randint(0, 2, 10)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "#### Splits"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 17,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "x1_unique = np.unique(x1)\n",
153 |     "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n",
154 |     "x1_gini = split(x1, y, x1_splits)\n",
155 |     "\n",
156 |     "x2_unique = np.unique(x2)\n",
157 |     "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n",
158 |     "x2_gini = split(x2, y, x2_splits)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "#### Create and save animations"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "##### First split"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 18,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "fig, axes = mk_fig()\n",
182 |     "mask = y == 0\n",
183 |     "j = len(x1_splits)\n",
184 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
185 |     "\n",
186 |     "def init():\n",
187 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
188 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
189 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
190 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
191 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
192 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
193 |     "\n",
194 |     "def animate(i):\n",
195 |     "    if i <= j:  # plot x1 splits\n",
196 |     "        axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n",
197 |     "        axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n",
198 |     "    elif i < f:  # plot x2 splits\n",
199 |     "        axes[0].hlines(x2_splits[:(i-j)], -1, 11, '#E8A2A5')\n",
200 |     "        axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n",
201 |     "    else:  # highlight optimum split\n",
202 |     "        if min(x1_gini) <= min(x2_gini):\n",
203 |     "            k = np.argmin(x1_gini)\n",
204 |     "            axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n",
205 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
206 |     "        else:\n",
207 |     "            k = np.argmin(x1_gini)\n",
208 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc=\"None\")\n",
209 |     "            axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=2)\n",
210 |     "            \n",
211 |     "plt.close(fig)\n",
212 |     "ani = animation.FuncAnimation(fig,\n",
213 |     "                              animate,\n",
214 |     "                              init_func=init,\n",
215 |     "                              frames=f + 1,\n",
216 |     "                              interval=600)\n",
217 |     "ani.save('../gif/decision_tree/decision_tree_1.gif', writer='imagemagick', fps=1, dpi=150)\n",
218 |     "# HTML(ani.to_jshtml())"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "##### Second split"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 19,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "split_1 = 4\n",
235 |     "mask_1 = x1 > split_1"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 20,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "x1_unique = np.unique(x1[mask_1])\n",
245 |     "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n",
246 |     "x1_gini = split(x1[mask_1], y[mask_1], x1_splits)\n",
247 |     "\n",
248 |     "x2_unique = np.unique(x2[mask_1])\n",
249 |     "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n",
250 |     "x2_gini = split(x2[mask_1], y[mask_1], x2_splits)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 21,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "fig, axes = mk_fig()\n",
260 |     "mask = y == 0\n",
261 |     "j = len(x1_splits)\n",
262 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
263 |     "\n",
264 |     "def init():\n",
265 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
266 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
267 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
268 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
269 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
270 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
271 |     "    axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n",
272 |     "\n",
273 |     "def animate(i):\n",
274 |     "    if i <= j:  # plot x1 splits\n",
275 |     "        axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n",
276 |     "        axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n",
277 |     "    elif i < f:  # plot x2 splits\n",
278 |     "        axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n",
279 |     "        axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n",
280 |     "    else:  # highlight optimum split\n",
281 |     "        if min(x1_gini) <= min(x2_gini):\n",
282 |     "            k = np.argmin(x1_gini)\n",
283 |     "            axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n",
284 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
285 |     "        else:\n",
286 |     "            k = np.argmin(x2_gini)\n",
287 |     "            axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n",
288 |     "            axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
289 |     "            \n",
290 |     "plt.close(fig)\n",
291 |     "ani = animation.FuncAnimation(fig,\n",
292 |     "                              animate,\n",
293 |     "                              init_func=init,\n",
294 |     "                              frames=f + 1,\n",
295 |     "                              interval=600)\n",
296 |     "ani.save('../gif/decision_tree/decision_tree_2.gif', writer='imagemagick', fps=1, dpi=150)\n",
297 |     "# HTML(ani.to_jshtml())"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "##### Third split"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 22,
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": [
313 |     "split_2 = 4.5\n",
314 |     "mask_2 = (x1 > split_1) & (x2 > split_2)"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 23,
320 |    "metadata": {},
321 |    "outputs": [],
322 |    "source": [
323 |     "x1_unique = np.unique(x1[mask_2])\n",
324 |     "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n",
325 |     "x1_gini = split(x1[mask_2], y[mask_2], x1_splits)\n",
326 |     "\n",
327 |     "x2_unique = np.unique(x2[mask_2])\n",
328 |     "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n",
329 |     "x2_gini = split(x2[mask_2], y[mask_2], x2_splits)"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": 24,
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": [
338 |     "fig, axes = mk_fig()\n",
339 |     "mask = y == 0\n",
340 |     "j = len(x1_splits)\n",
341 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
342 |     "\n",
343 |     "def init():\n",
344 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
345 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
346 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
347 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
348 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
349 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
350 |     "    axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n",
351 |     "    axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n",
352 |     "\n",
353 |     "def animate(i):\n",
354 |     "    if i <= j:  # plot x1 splits\n",
355 |     "        axes[0].vlines(x1_splits[:i], split_2, 11, '#988ED5')\n",
356 |     "        axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n",
357 |     "    elif i < f:  # plot x2 splits\n",
358 |     "        axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n",
359 |     "        axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n",
360 |     "    else:  # highlight optimum split\n",
361 |     "        if min(x1_gini) <= min(x2_gini):\n",
362 |     "            k = np.argmin(x1_gini)\n",
363 |     "            axes[0].vlines(x1_splits[k], split_2, 11, 'k', lw=3)\n",
364 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
365 |     "        else:\n",
366 |     "            k = np.argmin(x2_gini)\n",
367 |     "            axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n",
368 |     "            axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
369 |     "            \n",
370 |     "plt.close(fig)\n",
371 |     "ani = animation.FuncAnimation(fig,\n",
372 |     "                              animate,\n",
373 |     "                              init_func=init,\n",
374 |     "                              frames=f + 1,\n",
375 |     "                              interval=600)\n",
376 |     "ani.save('../gif/decision_tree/decision_tree_3.gif', writer='imagemagick', fps=1, dpi=150)\n",
377 |     "# HTML(ani.to_jshtml())"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "##### Final tree"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 25,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "split_3 = 6.5"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": 26,
399 |    "metadata": {},
400 |    "outputs": [],
401 |    "source": [
402 |     "fig, axes = mk_fig()\n",
403 |     "mask = y == 0\n",
404 |     "j = len(x1_splits)\n",
405 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
406 |     "\n",
407 |     "def init():\n",
408 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
409 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
410 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
411 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
412 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
413 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
414 |     "    axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n",
415 |     "    axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n",
416 |     "    axes[0].hlines(split_3, split_1, 11, 'k', lw=3)\n",
417 |     "\n",
418 |     "def animate(i):\n",
419 |     "    return\n",
420 |     "            \n",
421 |     "plt.close(fig)\n",
422 |     "ani = animation.FuncAnimation(fig,\n",
423 |     "                              animate,\n",
424 |     "                              init_func=init,\n",
425 |     "                              frames=1,\n",
426 |     "                              interval=600)\n",
427 |     "ani.save('../gif/decision_tree/decision_tree_4.gif', writer='imagemagick', fps=1, dpi=150)\n",
428 |     "# HTML(ani.to_jshtml())"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {},
434 |    "source": [
435 |     "#### View animations"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 27,
441 |    "metadata": {},
442 |    "outputs": [
443 |     {
444 |      "data": {
445 |       "text/html": [
446 |        "<img src=\"../gif/decision_tree/decision_tree_1.gif\"/>"
447 |       ],
448 |       "text/plain": [
449 |        "<IPython.core.display.Image object>"
450 |       ]
451 |      },
452 |      "execution_count": 27,
453 |      "metadata": {},
454 |      "output_type": "execute_result"
455 |     }
456 |    ],
457 |    "source": [
458 |     "Image(url='../gif/decision_tree/decision_tree_1.gif')"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 28,
464 |    "metadata": {},
465 |    "outputs": [
466 |     {
467 |      "data": {
468 |       "text/html": [
469 |        "<img src=\"../gif/decision_tree/decision_tree_2.gif\"/>"
470 |       ],
471 |       "text/plain": [
472 |        "<IPython.core.display.Image object>"
473 |       ]
474 |      },
475 |      "execution_count": 28,
476 |      "metadata": {},
477 |      "output_type": "execute_result"
478 |     }
479 |    ],
480 |    "source": [
481 |     "Image(url='../gif/decision_tree/decision_tree_2.gif')"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "code",
486 |    "execution_count": 29,
487 |    "metadata": {},
488 |    "outputs": [
489 |     {
490 |      "data": {
491 |       "text/html": [
492 |        "<img src=\"../gif/decision_tree/decision_tree_3.gif\"/>"
493 |       ],
494 |       "text/plain": [
495 |        "<IPython.core.display.Image object>"
496 |       ]
497 |      },
498 |      "execution_count": 29,
499 |      "metadata": {},
500 |      "output_type": "execute_result"
501 |     }
502 |    ],
503 |    "source": [
504 |     "Image(url='../gif/decision_tree/decision_tree_3.gif')"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "code",
509 |    "execution_count": 30,
510 |    "metadata": {},
511 |    "outputs": [
512 |     {
513 |      "data": {
514 |       "text/html": [
515 |        "<img src=\"../gif/decision_tree/decision_tree_4.gif\"/>"
516 |       ],
517 |       "text/plain": [
518 |        "<IPython.core.display.Image object>"
519 |       ]
520 |      },
521 |      "execution_count": 30,
522 |      "metadata": {},
523 |      "output_type": "execute_result"
524 |     }
525 |    ],
526 |    "source": [
527 |     "Image(url='../gif/decision_tree/decision_tree_4.gif')"
528 |    ]
529 |   }
530 |  ],
531 |  "metadata": {
532 |   "kernelspec": {
533 |    "display_name": "Python 3",
534 |    "language": "python",
535 |    "name": "python3"
536 |   },
537 |   "language_info": {
538 |    "codemirror_mode": {
539 |     "name": "ipython",
540 |     "version": 3
541 |    },
542 |    "file_extension": ".py",
543 |    "mimetype": "text/x-python",
544 |    "name": "python",
545 |    "nbconvert_exporter": "python",
546 |    "pygments_lexer": "ipython3",
547 |    "version": "3.7.4"
548 |   }
549 |  },
550 |  "nbformat": 4,
551 |  "nbformat_minor": 2
552 | }
553 | 


--------------------------------------------------------------------------------
/ml-animations/notebooks/decision_tree.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### Imports"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 14,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import matplotlib.animation as animation\n",
 19 |     "from IPython.display import HTML, Image\n",
 20 |     "plt.style.use('ggplot')\n",
 21 |     "params = {'legend.fontsize': '18',\n",
 22 |     "          'axes.labelsize': '20',\n",
 23 |     "          'axes.labelweight': 'bold',\n",
 24 |     "          'axes.titlesize':'20',\n",
 25 |     "          'xtick.labelsize':'18',\n",
 26 |     "          'ytick.labelsize':'18'}\n",
 27 |     "plt.rcParams.update(params)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "#### Functions"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 15,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "def gini(*args):\n",
 44 |     "    \"\"\"\n",
 45 |     "    Calculates the gini impurity for binary class data.\n",
 46 |     "\n",
 47 |     "    Parameters\n",
 48 |     "    ----------\n",
 49 |     "    *args : int\n",
 50 |     "        Number of examples in class i\n",
 51 |     "\n",
 52 |     "    Returns\n",
 53 |     "    -------\n",
 54 |     "    float\n",
 55 |     "        The gini impurity\n",
 56 |     "    \"\"\"\n",
 57 |     "    n = sum(args)  # total examples\n",
 58 |     "    gini = 0\n",
 59 |     "    for c in args:\n",
 60 |     "        gini += (c / n) * (1 - (c / n))\n",
 61 |     "    return gini\n",
 62 |     "\n",
 63 |     "\n",
 64 |     "def split(x, y, splits):\n",
 65 |     "    \"\"\"\n",
 66 |     "    Calculates the gini impurity for binary class data.\n",
 67 |     "\n",
 68 |     "    Parameters\n",
 69 |     "    ----------\n",
 70 |     "    x : int\n",
 71 |     "        Feature values\n",
 72 |     "    y : int\n",
 73 |     "        Corresponding target values\n",
 74 |     "    splits : int\n",
 75 |     "        Vector of splits to calculate gini criterion for\n",
 76 |     "\n",
 77 |     "    Returns\n",
 78 |     "    -------\n",
 79 |     "    list\n",
 80 |     "        List of gini impurity for each split\n",
 81 |     "    \"\"\"\n",
 82 |     "    gini_splits = []\n",
 83 |     "    for i in splits:\n",
 84 |     "        mask = x < i\n",
 85 |     "        gini_L = gini(sum(y[mask] == 0),\n",
 86 |     "                      sum(y[mask] == 1))\n",
 87 |     "        p_L = sum(mask) / len(mask)\n",
 88 |     "        gini_R = gini(sum(y[~mask] == 0),\n",
 89 |     "                      sum(y[~mask] == 1))\n",
 90 |     "        p_R = sum(~mask) / len(mask)\n",
 91 |     "        gini_splits.append(gini_L * p_L + gini_R * p_R)\n",
 92 |     "        \n",
 93 |     "    return gini_splits\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "def mk_fig():\n",
 97 |     "    \"\"\"\n",
 98 |     "    Convenience function to plot figure canvas\n",
 99 |     "\n",
100 |     "    Returns\n",
101 |     "    -------\n",
102 |     "    fig, axes\n",
103 |     "        Figure and axes objects\n",
104 |     "    \"\"\"\n",
105 |     "    fig, axes = plt.subplots(1, 2, figsize=(14, 7))\n",
106 |     "    axes[0].set_xlim(-1, 11)\n",
107 |     "    axes[0].set_ylim(-1, 11)\n",
108 |     "    axes[0].set_xlabel('X1')\n",
109 |     "    axes[0].set_ylabel('X2')\n",
110 |     "    axes[0].xaxis.label.set_color('#988ED5')\n",
111 |     "    axes[0].yaxis.label.set_color('#E8A2A5')\n",
112 |     "    axes[1].set_xlim(-1, 11)\n",
113 |     "    axes[1].set_ylim(0, 1)\n",
114 |     "    axes[1].set_xlabel('Feature Value')\n",
115 |     "    axes[1].set_ylabel('Gini Impurity')\n",
116 |     "    \n",
117 |     "    return fig, axes"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "#### Data"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 16,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "np.random.seed(3)\n",
134 |     "x1 = np.random.randint(0, 10, 10)\n",
135 |     "x2 = np.random.randint(0, 10, 10)\n",
136 |     "y = np.random.randint(0, 2, 10)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "#### Splits"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 17,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "x1_unique = np.unique(x1)\n",
153 |     "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n",
154 |     "x1_gini = split(x1, y, x1_splits)\n",
155 |     "\n",
156 |     "x2_unique = np.unique(x2)\n",
157 |     "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n",
158 |     "x2_gini = split(x2, y, x2_splits)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "#### Create and save animations"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "##### First split"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 18,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "fig, axes = mk_fig()\n",
182 |     "mask = y == 0\n",
183 |     "j = len(x1_splits)\n",
184 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
185 |     "\n",
186 |     "def init():\n",
187 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
188 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
189 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
190 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
191 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
192 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
193 |     "\n",
194 |     "def animate(i):\n",
195 |     "    if i <= j:  # plot x1 splits\n",
196 |     "        axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n",
197 |     "        axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n",
198 |     "    elif i < f:  # plot x2 splits\n",
199 |     "        axes[0].hlines(x2_splits[:(i-j)], -1, 11, '#E8A2A5')\n",
200 |     "        axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n",
201 |     "    else:  # highlight optimum split\n",
202 |     "        if min(x1_gini) <= min(x2_gini):\n",
203 |     "            k = np.argmin(x1_gini)\n",
204 |     "            axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n",
205 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
206 |     "        else:\n",
207 |     "            k = np.argmin(x1_gini)\n",
208 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc=\"None\")\n",
209 |     "            axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=2)\n",
210 |     "            \n",
211 |     "plt.close(fig)\n",
212 |     "ani = animation.FuncAnimation(fig,\n",
213 |     "                              animate,\n",
214 |     "                              init_func=init,\n",
215 |     "                              frames=f + 1,\n",
216 |     "                              interval=600)\n",
217 |     "ani.save('../gif/decision_tree/decision_tree_1.gif', writer='imagemagick', fps=1, dpi=150)\n",
218 |     "# HTML(ani.to_jshtml())"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "##### Second split"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 19,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "split_1 = 4\n",
235 |     "mask_1 = x1 > split_1"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 20,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "x1_unique = np.unique(x1[mask_1])\n",
245 |     "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n",
246 |     "x1_gini = split(x1[mask_1], y[mask_1], x1_splits)\n",
247 |     "\n",
248 |     "x2_unique = np.unique(x2[mask_1])\n",
249 |     "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n",
250 |     "x2_gini = split(x2[mask_1], y[mask_1], x2_splits)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 21,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "fig, axes = mk_fig()\n",
260 |     "mask = y == 0\n",
261 |     "j = len(x1_splits)\n",
262 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
263 |     "\n",
264 |     "def init():\n",
265 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
266 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
267 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
268 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
269 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
270 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
271 |     "    axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n",
272 |     "\n",
273 |     "def animate(i):\n",
274 |     "    if i <= j:  # plot x1 splits\n",
275 |     "        axes[0].vlines(x1_splits[:i], -1, 11, '#988ED5')\n",
276 |     "        axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n",
277 |     "    elif i < f:  # plot x2 splits\n",
278 |     "        axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n",
279 |     "        axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n",
280 |     "    else:  # highlight optimum split\n",
281 |     "        if min(x1_gini) <= min(x2_gini):\n",
282 |     "            k = np.argmin(x1_gini)\n",
283 |     "            axes[0].vlines(x1_splits[k], -1, 11, 'k', lw=3)\n",
284 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
285 |     "        else:\n",
286 |     "            k = np.argmin(x2_gini)\n",
287 |     "            axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n",
288 |     "            axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
289 |     "            \n",
290 |     "plt.close(fig)\n",
291 |     "ani = animation.FuncAnimation(fig,\n",
292 |     "                              animate,\n",
293 |     "                              init_func=init,\n",
294 |     "                              frames=f + 1,\n",
295 |     "                              interval=600)\n",
296 |     "ani.save('../gif/decision_tree/decision_tree_2.gif', writer='imagemagick', fps=1, dpi=150)\n",
297 |     "# HTML(ani.to_jshtml())"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "##### Third split"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 22,
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": [
313 |     "split_2 = 4.5\n",
314 |     "mask_2 = (x1 > split_1) & (x2 > split_2)"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 23,
320 |    "metadata": {},
321 |    "outputs": [],
322 |    "source": [
323 |     "x1_unique = np.unique(x1[mask_2])\n",
324 |     "x1_splits = x1_unique[:-1] + np.diff(x1_unique) / 2\n",
325 |     "x1_gini = split(x1[mask_2], y[mask_2], x1_splits)\n",
326 |     "\n",
327 |     "x2_unique = np.unique(x2[mask_2])\n",
328 |     "x2_splits = x2_unique[:-1] + np.diff(x2_unique) / 2\n",
329 |     "x2_gini = split(x2[mask_2], y[mask_2], x2_splits)"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": 24,
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": [
338 |     "fig, axes = mk_fig()\n",
339 |     "mask = y == 0\n",
340 |     "j = len(x1_splits)\n",
341 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
342 |     "\n",
343 |     "def init():\n",
344 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
345 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
346 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
347 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
348 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
349 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
350 |     "    axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n",
351 |     "    axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n",
352 |     "\n",
353 |     "def animate(i):\n",
354 |     "    if i <= j:  # plot x1 splits\n",
355 |     "        axes[0].vlines(x1_splits[:i], split_2, 11, '#988ED5')\n",
356 |     "        axes[1].plot(x1_splits[:i], x1_gini[:i], '#988ED5', marker='o', ms=9)\n",
357 |     "    elif i < f:  # plot x2 splits\n",
358 |     "        axes[0].hlines(x2_splits[:(i-j)], split_1, 11, '#E8A2A5')\n",
359 |     "        axes[1].plot(x2_splits[:(i-j)], x2_gini[:(i-j)], '#E8A2A5', marker='o', ms=9)\n",
360 |     "    else:  # highlight optimum split\n",
361 |     "        if min(x1_gini) <= min(x2_gini):\n",
362 |     "            k = np.argmin(x1_gini)\n",
363 |     "            axes[0].vlines(x1_splits[k], split_2, 11, 'k', lw=3)\n",
364 |     "            axes[1].plot(x1_splits[k], x1_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
365 |     "        else:\n",
366 |     "            k = np.argmin(x2_gini)\n",
367 |     "            axes[0].hlines(x2_splits[k], split_1, 11, 'k', lw=3)\n",
368 |     "            axes[1].plot(x2_splits[k], x2_gini[k], 'o', mec='k', mfc='None', mew=3, ms=20)\n",
369 |     "            \n",
370 |     "plt.close(fig)\n",
371 |     "ani = animation.FuncAnimation(fig,\n",
372 |     "                              animate,\n",
373 |     "                              init_func=init,\n",
374 |     "                              frames=f + 1,\n",
375 |     "                              interval=600)\n",
376 |     "ani.save('../gif/decision_tree/decision_tree_3.gif', writer='imagemagick', fps=1, dpi=150)\n",
377 |     "# HTML(ani.to_jshtml())"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "##### Final tree"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 25,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "split_3 = 6.5"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": 26,
399 |    "metadata": {},
400 |    "outputs": [],
401 |    "source": [
402 |     "fig, axes = mk_fig()\n",
403 |     "mask = y == 0\n",
404 |     "j = len(x1_splits)\n",
405 |     "f = len(x1_splits) + len(x2_splits) + 1\n",
406 |     "\n",
407 |     "def init():\n",
408 |     "    axes[0].scatter(x1[mask], x2[mask], s=100, c='#E24A33', label='Class 0')\n",
409 |     "    axes[0].scatter(x1[~mask], x2[~mask], s=100, c='#348ABD', label='Class 1')\n",
410 |     "    axes[0].legend(facecolor='#F0F0F0', framealpha=1)\n",
411 |     "    axes[1].plot(-1, -1, c='#988ED5', label='X1 splits')  # legend place-holders\n",
412 |     "    axes[1].plot(-1, -1, c='#E8A2A5', label='X2 splits')\n",
413 |     "    axes[1].legend(facecolor='#F0F0F0', framealpha=1)\n",
414 |     "    axes[0].vlines(split_1, -1, 11, 'k', lw=3)\n",
415 |     "    axes[0].hlines(split_2, split_1, 11, 'k', lw=3)\n",
416 |     "    axes[0].hlines(split_3, split_1, 11, 'k', lw=3)\n",
417 |     "\n",
418 |     "def animate(i):\n",
419 |     "    return\n",
420 |     "            \n",
421 |     "plt.close(fig)\n",
422 |     "ani = animation.FuncAnimation(fig,\n",
423 |     "                              animate,\n",
424 |     "                              init_func=init,\n",
425 |     "                              frames=1,\n",
426 |     "                              interval=600)\n",
427 |     "ani.save('../gif/decision_tree/decision_tree_4.gif', writer='imagemagick', fps=1, dpi=150)\n",
428 |     "# HTML(ani.to_jshtml())"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {},
434 |    "source": [
435 |     "#### View animations"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 27,
441 |    "metadata": {},
442 |    "outputs": [
443 |     {
444 |      "data": {
445 |       "text/html": [
446 |        "<img src=\"../gif/decision_tree/decision_tree_1.gif\"/>"
447 |       ],
448 |       "text/plain": [
449 |        "<IPython.core.display.Image object>"
450 |       ]
451 |      },
452 |      "execution_count": 27,
453 |      "metadata": {},
454 |      "output_type": "execute_result"
455 |     }
456 |    ],
457 |    "source": [
458 |     "Image(url='../gif/decision_tree/decision_tree_1.gif')"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 28,
464 |    "metadata": {},
465 |    "outputs": [
466 |     {
467 |      "data": {
468 |       "text/html": [
469 |        "<img src=\"../gif/decision_tree/decision_tree_2.gif\"/>"
470 |       ],
471 |       "text/plain": [
472 |        "<IPython.core.display.Image object>"
473 |       ]
474 |      },
475 |      "execution_count": 28,
476 |      "metadata": {},
477 |      "output_type": "execute_result"
478 |     }
479 |    ],
480 |    "source": [
481 |     "Image(url='../gif/decision_tree/decision_tree_2.gif')"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "code",
486 |    "execution_count": 29,
487 |    "metadata": {},
488 |    "outputs": [
489 |     {
490 |      "data": {
491 |       "text/html": [
492 |        "<img src=\"../gif/decision_tree/decision_tree_3.gif\"/>"
493 |       ],
494 |       "text/plain": [
495 |        "<IPython.core.display.Image object>"
496 |       ]
497 |      },
498 |      "execution_count": 29,
499 |      "metadata": {},
500 |      "output_type": "execute_result"
501 |     }
502 |    ],
503 |    "source": [
504 |     "Image(url='../gif/decision_tree/decision_tree_3.gif')"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "code",
509 |    "execution_count": 30,
510 |    "metadata": {},
511 |    "outputs": [
512 |     {
513 |      "data": {
514 |       "text/html": [
515 |        "<img src=\"../gif/decision_tree/decision_tree_4.gif\"/>"
516 |       ],
517 |       "text/plain": [
518 |        "<IPython.core.display.Image object>"
519 |       ]
520 |      },
521 |      "execution_count": 30,
522 |      "metadata": {},
523 |      "output_type": "execute_result"
524 |     }
525 |    ],
526 |    "source": [
527 |     "Image(url='../gif/decision_tree/decision_tree_4.gif')"
528 |    ]
529 |   }
530 |  ],
531 |  "metadata": {
532 |   "kernelspec": {
533 |    "display_name": "Python 3",
534 |    "language": "python",
535 |    "name": "python3"
536 |   },
537 |   "language_info": {
538 |    "codemirror_mode": {
539 |     "name": "ipython",
540 |     "version": 3
541 |    },
542 |    "file_extension": ".py",
543 |    "mimetype": "text/x-python",
544 |    "name": "python",
545 |    "nbconvert_exporter": "python",
546 |    "pygments_lexer": "ipython3",
547 |    "version": "3.7.4"
548 |   }
549 |  },
550 |  "nbformat": 4,
551 |  "nbformat_minor": 2
552 | }
553 | 


--------------------------------------------------------------------------------
/ml-animations/notebooks/knn_draft.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### Imports"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from scipy.stats import mode\n",
 18 |     "import matplotlib.pyplot as plt\n",
 19 |     "import matplotlib.animation as animation\n",
 20 |     "from IPython.display import HTML, Image\n",
 21 |     "plt.style.use('ggplot')\n",
 22 |     "params = {'legend.fontsize': '18',\n",
 23 |     "          'axes.labelsize': '20',\n",
 24 |     "          'axes.labelweight': 'bold',\n",
 25 |     "          'axes.titlesize':'20',\n",
 26 |     "          'xtick.labelsize':'18',\n",
 27 |     "          'ytick.labelsize':'18'}\n",
 28 |     "plt.rcParams.update(params)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "#### Functions"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "def closest_node(node, nodes, n=1):\n",
 45 |     "    \"\"\"\n",
 46 |     "    Find the closest point in a list to a query point.\n",
 47 |     "\n",
 48 |     "    Parameters\n",
 49 |     "    ----------\n",
 50 |     "    node : array\n",
 51 |     "        query point\n",
 52 |     "    nodes: array\n",
 53 |     "        array of points to compare to\n",
 54 |     "    n: int\n",
 55 |     "        return n closest pairs\n",
 56 |     "\n",
 57 |     "    Returns\n",
 58 |     "    -------\n",
 59 |     "    int\n",
 60 |     "        The index of the closest point in the list\n",
 61 |     "    \"\"\"\n",
 62 |     "    nodes = np.asarray(nodes)\n",
 63 |     "    dist = np.sum((nodes - node)**2, axis=1)\n",
 64 |     "    \n",
 65 |     "    return np.argsort(dist)[:n]\n",
 66 |     "\n",
 67 |     "def mk_fig():\n",
 68 |     "    \"\"\"\n",
 69 |     "    Convenience function to plot figure canvas\n",
 70 |     "\n",
 71 |     "    Returns\n",
 72 |     "    -------\n",
 73 |     "    fig, axes\n",
 74 |     "        Figure and axes objects\n",
 75 |     "    \"\"\"\n",
 76 |     "    fig, axes = plt.subplots(1, 1, figsize=(7, 7))\n",
 77 |     "    axes.set_xlim(-1, 11)\n",
 78 |     "    axes.set_ylim(-1, 11)\n",
 79 |     "    axes.set_xlabel('X1')\n",
 80 |     "    axes.set_ylabel('X2')\n",
 81 |     "\n",
 82 |     "    return fig, axes"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "#### Data"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 3,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "np.random.seed(11)\n",
 99 |     "X = np.array([np.random.randint(0, 10, 9),\n",
100 |     "              np.random.randint(0, 10, 9)]).T\n",
101 |     "y = np.random.randint(0, 2, 9)\n",
102 |     "xq = np.array([6, 3])  # query point"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "#### Create and save animations"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 4,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "fig, axes = mk_fig()\n",
119 |     "mask = y == 0\n",
120 |     "colors = ['#E24A33', '#348ABD']\n",
121 |     "\n",
122 |     "def init():\n",
123 |     "    axes.scatter(X[mask, 0], X[mask, 1], s=100, c='#E24A33', label='Class 0', zorder=2)\n",
124 |     "    axes.scatter(X[~mask, 0], X[~mask, 1], s=100, c='#348ABD', label='Class 1', zorder=2)\n",
125 |     "    axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n",
126 |     "    axes.legend(facecolor='#F0F0F0', framealpha=1)\n",
127 |     "\n",
128 |     "def animate(i):\n",
129 |     "    if i == 0:\n",
130 |     "        axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n",
131 |     "    if i == 1:\n",
132 |     "        k = closest_node(xq, X, i)\n",
133 |     "        axes.plot([xq[0], X[k,0]],\n",
134 |     "                  [xq[1], X[k,1]],\n",
135 |     "                  'k-', zorder=1)\n",
136 |     "        axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n",
137 |     "    elif i % 2 == 1:\n",
138 |     "        k = closest_node(xq, X, i)\n",
139 |     "        axes.plot([np.repeat(xq[0], 2), X[k[-2:],0]],\n",
140 |     "                  [np.repeat(xq[1], 2), X[k[-2:],1]],\n",
141 |     "                  'k-', zorder=1)\n",
142 |     "        if sum(y[k] == 0) > sum(y[k] == 1):\n",
143 |     "            axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[0], edgecolor='k', lw=2, zorder=2)\n",
144 |     "        elif sum(y[k] == 1) > sum(y[k] == 0):\n",
145 |     "            axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[1], edgecolor='k', lw=2, zorder=2)\n",
146 |     "        else:  # if equal counts, set to closest point's color\n",
147 |     "            axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n",
148 |     "            \n",
149 |     "plt.close(fig)\n",
150 |     "ani = animation.FuncAnimation(fig,\n",
151 |     "                              animate,\n",
152 |     "                              init_func=init,\n",
153 |     "                              frames=10,\n",
154 |     "                              interval=600)\n",
155 |     "ani.save('../gif/knn/knn.gif', writer='imagemagick', fps=1, dpi=75)\n",
156 |     "# HTML(ani.to_jshtml())"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "#### View animations"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "Image(url='../gif/knn/knn.gif')"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "kernelspec": {
178 |    "display_name": "Python 3",
179 |    "language": "python",
180 |    "name": "python3"
181 |   },
182 |   "language_info": {
183 |    "codemirror_mode": {
184 |     "name": "ipython",
185 |     "version": 3
186 |    },
187 |    "file_extension": ".py",
188 |    "mimetype": "text/x-python",
189 |    "name": "python",
190 |    "nbconvert_exporter": "python",
191 |    "pygments_lexer": "ipython3",
192 |    "version": "3.7.4"
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 2
197 | }
198 | 


--------------------------------------------------------------------------------
/ml-animations/notebooks/logistic_regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### Imports"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 33,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from scipy.optimize import minimize\n",
 18 |     "import matplotlib.pyplot as plt\n",
 19 |     "import matplotlib.animation as animation\n",
 20 |     "from IPython.display import HTML, Image\n",
 21 |     "plt.style.use('ggplot')\n",
 22 |     "params = {'legend.fontsize': '18',\n",
 23 |     "          'axes.labelsize': '20',\n",
 24 |     "          'axes.labelweight': 'bold',\n",
 25 |     "          'axes.titlesize':'20',\n",
 26 |     "          'xtick.labelsize':'18',\n",
 27 |     "          'ytick.labelsize':'18'}\n",
 28 |     "plt.rcParams.update(params)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "#### Functions"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 36,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "def loss_lr(w, X, y):\n",
 45 |     "    return np.sum(np.log(1 + np.exp(-y*(X@w))))\n",
 46 |     "\n",
 47 |     "def loss_lr_grad(w, X, y):\n",
 48 |     "    return -X.T @ (y/(1+np.exp(y*(X@w))))\n",
 49 |     "\n",
 50 |     "def mk_fig():\n",
 51 |     "    \"\"\"\n",
 52 |     "    Convenience function to plot figure canvas\n",
 53 |     "\n",
 54 |     "    Returns\n",
 55 |     "    -------\n",
 56 |     "    fig, axes\n",
 57 |     "        Figure and axes objects\n",
 58 |     "    \"\"\"\n",
 59 |     "    fig, axes = plt.subplots(1, 1, figsize=(7, 7))\n",
 60 |     "    axes.set_xlim(-1, 21)\n",
 61 |     "    axes.set_ylim(-1.5, 1.5)\n",
 62 |     "    axes.set_xlabel('x')\n",
 63 |     "    axes.set_ylabel('y')\n",
 64 |     "\n",
 65 |     "    return fig, axes"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "#### Data"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 113,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "np.random.seed(1)\n",
 82 |     "x = np.concatenate((np.random.randint(0, 12, 10),\n",
 83 |     "                    np.random.randint(8, 20, 10))\n",
 84 |     "                  )\n",
 85 |     "y = np.concatenate((np.ones((10,)) * -1,\n",
 86 |     "                    np.ones(10,))\n",
 87 |     "                  )"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 114,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/plain": [
 98 |        "[<matplotlib.lines.Line2D at 0x1a2064f110>]"
 99 |       ]
100 |      },
101 |      "execution_count": 114,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     },
105 |     {
106 |      "data": {
107 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAHBCAYAAADHHtqNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de3TU9Z3/8ddMrlwSIEwSCMgtgCZBBUFMYjYWEpWbVvxtQUVbPT271do97bYe22699aiI/tweD8jZ0z22rOAqIDcvQLqGKFJCKDS0QoIQwkVMCLcJ5ZpMJjO/P/hl1jQXJsn3+5lM8nyc4znme/nMO+/5Mq98r+Pw+/1+AQAAI5yhLgAAgN6E4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgyJDXUBXrF+/XkeOHNHhw4d16tQpJSYmaunSpR0a48knn9Tp06dbnffWW28pPj7eilIBAJAU5sH73nvvqX///ho9erQuXbrU6XGGDRumuXPntpjep0+frpQHAEALYR28S5YsUXJysiTpZz/7merq6jo1zoABA5Sbm2tlaQAAtCqsz/E2ha4VGhsbdfnyZcvGAwCgNWG9x2uViooKPfzww2psbFTfvn01ZcoUPfTQQ0pISAh1aQCAHqbXB+/w4cM1ffp0DRs2TI2NjSorK1NRUZH27dunhQsXthm+hYWFKiwslCQtWrTIZMkAgDDm6CnfTtR0jrejVzW35o9//KMWL16s6dOn6/HHHw9qnerq6i6/rsvl0pkzZ7o8DlpHf+1Hj+1Hj+1nVY9TUlJanR7W53jtkpOTo8TERO3ZsyfUpQAAehiCtw1JSUk6f/58qMsAAPQwBG8bampqNHDgwFCXAQDoYXpN8J45c0ZVVVXyer2BaRcvXmx12YKCAp09e1aTJ082VR4AoJcI66uaP//888DjHs+fPy+v16u1a9dKkhITE5s9FOPNN99UeXm53nzzTSUlJUmStm7dqqKiIk2cOFGJiYny+XwqKyvTrl27lJycrHnz5pn/pQAAPVpYB29RUZHKy8ubTVu1apUkKT09/ZpPo0pNTdW+fftUXFwcOJ+blJSkb3/727rvvvvUr18/ewoHAPRaPeZ2olDjdqLuj/7ajx7bjx7bj9uJAADoQQheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMIngBADCI4AUAwCCCFwAAgwheAAAMigx1AV21fv16HTlyRIcPH9apU6eUmJiopUuXdnicrVu3auPGjaqqqlLfvn01efJkPfTQQ4qPj7ehaqDnuOJp1P/9Y7X+UnNJfkkOSZOG9NPP70hRdESE8Xo8Xp9W/OW0Pjv6NzX6/IpwOjRt1AB995ZERTqD39do9Pm14/gFFR3+mzxen6IjncobM0BZI+LkdDiMjdEddafehGOPHX6/3x/qIrpi3rx56t+/v0aPHq3Dhw+rb9++HQ7ejz/+WMuXL1d6erpycnJ09uxZffzxx0pMTNTChQsVGxt7zTGqq6s7+ysEuFwunTlzpsvjoHX013plJy/pV4XH1dqHiEPSy/nXKSO5n7F6vjpXp6cKjqm+sWVFMREOvT5jpEYMvPa/53N1Xr302dc6WlunBt//To9ySqMGxeqZbw3XwNj291usGKM1od6Ou1NvunuPU1JSWp0e9oealyxZot///vd69tlnlZCQ0OH1z58/r1WrVik1NVXPPfec8vPzNX/+fP34xz/W119/rU2bNtlQNRD+PI2NbYauJPkl/arwuDyNjUbq8fp8bYauJNU3+vVUwTF5fb5W5zfx+f166bOvVXG2+Ye5JDX4pIqzdXrps6/la2efxYoxuqPu1Jtw7nHYB29ycnKX1t+1a5fq6+s1c+ZMOb9xGGrKlClKTk7Wtm3buloi0CO9urW6zdBt4pf02uddPxoUjOWlp9sM3Sb1jX6t2HO63WV2fHVBR2vr2l3maG2dSo5fsHWM7qg79Sacexz2wdtVlZWVkqTx48e3mDdu3DhVV1errq79NxfojfbUXApuuRPBLddVnx79W3DLHWl/uS2H/9ZiD+rvNfikLZVtj2PFGN1Rd+pNOPc47C+u6iq32y1JrR6mTkhIkN/vl9vtbnGsvrCwUIWFhZKkRYsWyeVydbmWyMhIS8ZB6+ivtYI9gOeTjPTd568IarlGv6PdevzO4PbQfc62tycrxmhLKLfj7tSbcO5xrw9ej8cj6Wqj/15UVFSzZb4pPz9f+fn5gZ+tOBEf6osmejr6a61grxd1ypp/H9d8HUdwfwpEOPzt1uPwBXdO2unztjmOFWO0JZTbcXfqTTj0uMdeXNVV0dHRkiSv19tiXkNDQ7NlAPyvSUOCu1p50lAzVzVPGzUguOVGt79c3pgBirrGJ2OUU8pLbXscK8bojrpTb8K5x70+eJsOMTcdcv4mt9sth8PRqaulgZ7u53ekXHOv1yHp6dzW/+q32ndvSVRMRPsVxUQ49MikxHaXyRoRp1GD2r/laNSgWGVeF2frGN1Rd+pNOPe41wdvamqqJOngwYMt5lVUVCglJSWo+3iB3iY6IkIv51/XZvg23cdr6iEakU6nXp8xss3wbbqP91oP0XA6HHrmW8M1bnBsiz2qKKc0bvDV+0PbeziDFWN0R92pN+Hc4151jvfMmTOqr69XcnJy4JzurbfeqmXLlqmgoEA5OTmBW4p2796tkydPav78+aEsGejWMpL7afUD4/Tq59X6y4lL8unqX/OThvbT07nmn1w1YmCs3p03Tsv3nNZnR/4mr8+vSKdD00YP0COTgn9y1cDYSL1290iVHL+gLZV/U32jTzERTuWlDlDmdcE9EcmKMbqj7tSbcO1x2D+56vPPP9fp01fvyysoKJDX69WcOXMkSYmJicrNzQ0s+8ILL6i8vFxvvvmmkpKSAtM/+ugjrVixQhkZGbr99tvldrv10UcfyeVy6ZVXXuHJVT0E/bUfPbYfPbaf3RdXhf0eb1FRkcrLy5tNW7VqlSQpPT29WfC25Z577lFcXJw2btyoZcuWqU+fPsrKytKCBQs4zAwAsFTY7/F2F+zxdn/013702H702H7cTgQAQA9C8AIAYBDBCwCAQQQvAAAGEbwAABhE8AIAYBDBCwCAQQQvAAAGEbwAABhE8AIAYBDBCwCAQQQvAAAGEbwAABhE8AIAYBDBCwCAQQQvAAAGRYa6AAAAOqKxsVFVVVXav39/4L8DBw5o8eLFuummm0Jd3jURvACAkPL5fDp37pxOnjypAwcOBML0yy+/VFVVVdDj/Pa3v9XSpUttrNQaBC8AwBZer1enTp3SoUOHtG/fPu3du1f79u3T4cOHuzx2SkqK0tLSlJaWpvT0dN1www0aO3asBVXbj+AFAATlwoULKisr0549e7R582a53W7deeedOnDggPbu3Su3292l8ceMGaMbbrhB6enpSktL0/XXX6+hQ4cqNjbWot+geyB4AaAXa2xs1MmTJ1VeXq5du3bp/fff18mTJ4Ne/z//8z/bnR8XF6cJEyYE/ktPT1dKSooGDBggh8PR1fLDEsELAD2M3+/XxYsXdeLECZWXl6uoqEhr1661/HXS0tI0Z84cTZgwQWlpaUpKSlJUVJTlr9PTELwAECY8Ho+OHz+u8vJy7d27Vx999JFKSkosf52srCzNnDlTN998s2644Qb179/f8tfozQheAAghv9+v2tpanT59WjU1NSouLtaGDRv09ddfW/5a9913n6ZPn66bb75Zo0ePVkREhOWvgWsjeAHABj6fT2fPntWJEydUWVmpoqIirVu3zvLXSU5O1owZMzRt2jRNmjRJgwcP7rXnTsMFwQsAHeT1elVTU6MvvvhCRUVFWrVqlXw+n6WvkZGRoby8PE2fPl033HCD4uLiJEkul0tnzpyx9LVgFsELALq6h3ro0CFt2bJFb7/9to4fP27L62RnZysvL0933HGHRo4cqb59+9ryOui+CF4APd6pU6e0ZcsWffDBB9q2bZstrzFz5kzl5eXp1ltv1fDhw3vcvaewDsELIGw1NjaqtLRUmzdv1m9/+1tbXmP27NmaNWuWbrzxRl133XWKjo625XXQexC8ALqly5cv69NPP9WyZcu0Y8cOy8fPy8vT7NmzNXXqVF133XWKjOTjEGawpQEwrrS0VPfcc48tY6elpekf//EflZ+fzy0z6JYIXgCW8fv9eu+99/TUU0/ZMv4///M/a9asWZo4cSJPSELYIngBBM3tdqugoECLFy+29Kpfl8ulV199VZMnT9bgwYPldDotGxvobgheAJKuhur27du1ZMkSlZWVWTburFmz9JOf/EQpKSkaOHAgD3dAr0fwAr3AxYsX9ec//1n/8R//YentNN///vc1b948jRw5UnFxcTzcAQgCwQuEufr6en3xxRdavny5pY8k/M53vqNHHnlE6enp6tOnj2XjAr0dwQt0c42NjTp69Kg2bdqkDz/8UOXl5V0e86677tJjjz2mzMxM7ksFDCN4gRBqaGhQdXW1ioqK9OGHH+pPf/pTl8e85ZZb9MQTT2jatGnsqQLdEMEL2OjChQsqKSnR+vXr9cEHH3R5vGnTpumee+7RnXfeqYSEBAsqBGAawQt0kt/v17Fjx/TBBx9ow4YNOnjwYJfGmzJliu69917l5eVp5MiRXP0L9FAEL9AGj8ejTz/9VOvXr9dHH33UpbFSUlI0d+5czZw5UxMmTODhD0AvRvCiV/L7/Tp8+LA+/PBDbdiwQYcOHerSeHl5ebr//vs1bdo0DRgwwKIqAfREBC96pMbGRh07dkw7d+7Ujh07tHbt2k6Pdd111+m+++7T3LlzNX78eA4BA+gSghdhyev16uDBgyopKdGOHTu0c+dOnT17tlNj3XXXXbrvvvt055138qXkAGxH8KJb8ng82rdvX7NgvXTpUofGSElJUWZmpjIzM3XbbbdpypQpOnfunE0VA0BwCF6ExJUrV1RaWho4FFxSUiKfz9ehMcaMGaOsrKxAsA4dOrTdh+vzfasAugM+iWCL8+fPa9euXYFgLS0t7fAYGRkZgT3WW2+9VS6Xi/OrAMIewYtOOXv2rEpKSgKHgvfv39/hMSZPnhzYY508ebLi4+NtqBQAuheCF62qqqpqFqxHjhzp0PqRkZG67bbbAsE6ceJEHl8IAOoBwevz+bRp0yYVFhbq9OnTio+PV1ZWlubNm6fY2Nhrrj9v3rxWp8fExGjFihVWl9stNN3D2nTR0o4dO3TixIkOjdG/f3/ddtttys7OVmZmpjIyMngoBAAEIeyD9+2339bmzZs1depUzZkzR1VVVdq8ebOOHDmiZ599tt2LbZqkpaUpLy+v2bRwvhCnsbFR+/fvV0lJSSBYa2trOzSGy+VSZmZmYI91/PjxQfUSANC+8E0XScePH1dBQYGmTp2qp556KjA9KSlJy5YtU3FxsXJycq45TlJSknJzc+0s1VJerzdwq01xcbFKSko6fKvN8OHDmwUrzwYGADPCOni3b98uv9+v2bNnN5uel5end999V9u2bQsqeKWrYeb1eoM6PG23hoYG/fWvfw2cXy0pKVFdXV2Hxhg3blzgHGvTrTYAgNAL6+CtrKyUw+HQ2LFjm02Pjo7WqFGjgn7+bklJibZt2yafz6f4+HhlZ2frgQceMPoUowULFuizzz4Levm0tDRlZWUFgnXw4MH2FQcAsExYB6/b7VZ8fHyrF/UMGjRIBw4ckNfrbfd87dixY5WZmakhQ4boypUr2rNnjwoKClReXq6XXnqpzT3gwsJCFRYWSpIWLVokl8vVpd+lvLy82c8TJ05Ubm6ucnNzdfvtt/PdqxaIjIzs8vuE9tFj+9Fj+9nd47AOXo/H02aoRkdHS5Lq6+vbDd6FCxc2+/mOO+7QiBEjtHLlSm3atEn3339/q+vl5+crPz8/8POZM2c6Wn4ze/bskcvlanUcn8/X5fGhNvsL69Bj+9Fj+1nV45SUlFanh/VlqtHR0fJ6va3O83g8kq7eFtRR9957ryIjIzv1tCUAANoT1sGbkJCg8+fPq6GhocW82tpaxcXFdeq2oMjISA0aNEgXLlywokwAAALCOnhTU1Pl9/tbXETl8Xh09OhRpaamdmpcj8cjt9vNF5oDACwX1sGbnZ0th8OhjRs3Npu+ZcsW1dfXN7uVqKamRlVVVc2Wa2uPdtWqVWpsbNTkyZOtLxoA0KuF9cVVI0aM0N13362CggK9/vrrmjRpUuDJVenp6c2C98UXX9Tp06e1evXqwLS1a9eqoqJCGRkZcrlcqqur0549e1RWVqZx48Zp5syZofi1AAA9WFgHryQ9+uijSkxMVGFhoUpLSxUXF6cZM2Zo/vz513zEYUZGhqqqqrR161ZdvHhRTqdTQ4YM0QMPPKA5c+YErowGAMAqDr/f7w91ET1BdXV1l8fgNgF70V/70WP70WP7cTsRAAA9CMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgEMELAIBBBC8AAAYRvAAAGETwAgBgUGSoC+gqn8+nTZs2qbCwUKdPn1Z8fLyysrI0b948xcbG2r4+AAAdEfQe74YNG3Tu3Dk7a+mUt99+W8uXL9fw4cP12GOPKTMzU5s3b9arr74qn89n+/oAAHRE0Hu87733nlatWqVbbrlFeXl5mjhxopzO0B6pPn78uAoKCjR16lQ99dRTgelJSUlatmyZiouLlZOTY9v6AAB0VIeS0+fzaffu3Xr11Vf1wx/+UCtXrtTJkyftqu2atm/fLr/fr9mzZzebnpeXp5iYGG3bts3W9QEA6Kig93ijo6Pl8XgCP9fW1mr9+vXasGGD0tPTlZeXp9tuu02RkeZOG1dWVsrhcGjs2LEtah01apQOHTpk6/oAAHRU0Cn5u9/9Trt379b27dv1l7/8RV6vV5Lk9/tVVlamsrIy9e/fX//wD/+g6dOna8SIEbYV3cTtdis+Pl5RUVEt5g0aNEgHDhyQ1+tt84+Brq4PAEBHdWiPNzs7W9nZ2bpy5Yp27typ4uJi7d27N3AR0sWLF7V582Zt3rxZGRkZ+t73vqeRI0faVrzH42kzFKOjoyVJ9fX1bS7TlfULCwtVWFgoSVq0aJFcLleH6/97kZGRloyD1tFf+9Fj+9Fj+9nd407tyvXp00ff+ta3lJOTo8LCQr3zzjtqaGhotkxZWZl+9atf6bnnntP48eMtKfbvRUdHq66urtV5TYfFY2JibFk/Pz9f+fn5gZ/PnDkTVM3tcblcloyD1tFf+9Fj+9Fj+1nV45SUlFand+qy5JqaGr3zzjt64okntGzZshah27SH2NDQoNWrV3fmJYKSkJCg8+fPt3h96eo56Li4uHYPE3d1fQAAOiroVPH5fPrTn/6kTz75RGVlZfL7/c0HioxUTk6OZs2apeTkZC1ZskS7d+9WZWWl5UU3SU1N1V//+lcdOnRIaWlpgekej0dHjx5tNs2O9QEA6Kigg/fxxx/X3/72txbTBw4cqLvuukt33nmn4uPjA9Pz8/O1e/duXb582ZpKW5Gdna3169dr48aNzUJyy5Ytqq+vb3YPbk1NjRobGzVs2LBOrQ8AgBWCDt6/D90xY8Zo1qxZys7OVkRERIvl2zu3apURI0bo7rvvVkFBgV5//XVNmjRJVVVV2rx5s9LT05sF54svvqjTp083O/TdkfUBALBCh05gOp1OTZ06VbNnz77mBVPp6elatWpVl4oLxqOPPqrExEQVFhaqtLRUcXFxmjFjhubPnx/Uk7W6uj4AAB3h8P/9ydo2/Pd//7dmzJihwYMH211TWKquru7yGFytaC/6az96bD96bD+7r2oOeo93wYIFXS4CAIDejmOpAAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYFBnqArpq69at2rhxo6qqqtS3b19NnjxZDz30kOLj44Na/8knn9Tp06dbnffWW28FPQ4AAMEI6+D9+OOPtXz5cqWnp+uxxx7T2bNn9fHHH+vgwYNauHChYmNjgxpn2LBhmjt3bovpffr0sbpkAEAvF7bBe/78ea1atUqpqal67rnn5HRePWqempqq1157TZs2bdL9998f1FgDBgxQbm6uneUCACApjM/x7tq1S/X19Zo5c2YgdCVpypQpSk5O1rZt2zo0XmNjoy5fvmx1mQAANBO2e7yVlZWSpPHjx7eYN27cOG3fvl11dXVBHW6uqKjQww8/rMbGRvXt21dTpkzRQw89pISEBMvrBgD0bmEbvG63W5JaDceEhAT5/X653W6lpKS0O87w4cM1ffp0DRs2TI2NjSorK1NRUZH27dunhQsXthm+hYWFKiwslCQtWrRILperi7+RFBkZack4aB39tR89th89tp/dPQ558F66dEkbN24MevlZs2apf//+8ng8kq426O9FRUVJUmCZ9vzyl79s9vPtt9+u9PR0LV68WKtXr9bjjz/e6nr5+fnKz88P/HzmzJmgf4e2uFwuS8ZB6+iv/eix/eix/azqcVs7ft0ieNesWRP08rm5uerfv7+io6MlSV6vN/D/TRoaGiSpxfRg5eTk6L333tOePXs6tT4AAG0JefAmJSVp9erVHV6v6RCw2+3WkCFDms1zu91yOBxdOkeblJSkL7/8stPrAwDQmrC9qjk1NVWSdPDgwRbzKioqlJKSEvR9vK2pqanRwIEDO70+AACtCdvgvfXWWxUdHa2CggL5fL7A9N27d+vkyZPKyclptvyZM2dUVVUlr9cbmHbx4sVWxy4oKNDZs2c1efJke4oHAPRaIT/U3Fnx8fGaP3++VqxYoRdffFG333673G63PvroIw0bNkyzZ89utvybb76p8vJyvfnmm0pKSpJ09XGTRUVFmjhxohITE+Xz+VRWVqZdu3YpOTlZ8+bNC8WvBgDowcI2eCXpnnvuUVxcnDZu3Khly5apT58+ysrK0oIFC4I6zJyamqp9+/apuLhY58+fl3T13O63v/1t3XffferXr5/dvwIAoJdx+P1+f6iL6Amqq6u7PAa3CdiL/tqPHtuPHtvP7tuJwvYcLwAA4YjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMIjgBQDAIIIXAACDCF4AAAwieAEAMCgy1AV0xSeffKL9+/fr8OHDOnHihPx+v1avXt3hcUpLS7Vu3TodO3ZMkZGRuvHGG/Xwww8rKSnJhqqb83k98q9dLpV8ppM+n+R0SpnT5Pg/35MzMri3x+9rlP/PO+TfsUXyeKToaDmy8+W4JUsOZ+/+28qK/lrJqvfKinG623bzzfdKjY1SRERI3yvALg6/3+8PdRGd9eSTT+rChQsaPXq0Tp06pbNnz3Y4eHfu3Knf/OY3GjlypPLy8nT58mVt2rRJTqdTr7zyihISEoIap7q6usP1+6q+kv+Vn0n19S1nxsTI8ct/l3PYiHbH8J8/J9+bL0lfH5EaGv53RlSUNHy0nD96Ro74gR2urSewor9Wsuq9smIcu7Ybl8ulM2fOdHi97vZedWed7TGCZ1WPU1JSWp0e1sF76tQpuVwuOZ1OLVq0SKWlpR0KXq/XqyeffFIRERH6zW9+o9jYWEnS0aNH9fOf/1zTp0/XD37wg6DG6mjw+rxe+X/yYOsfNE1iYuR44702/9r3+3zyLXpaOnKw7TFGj5fzF6/1uj1fK/prJaveKyvGsXO76cwHVnd7r7o7gtd+dgdvWH8aJyUlydmFQCkvL1dtba2mT58eCF1JGjVqlDIyMlRcXCyv12tFqS34177d/geNJNXXy79+RdtjlO64usfSnq+PyL+npBMVhjcr+mtpPRa9V1aM0922m+72XgF2C+vg7arKykpJ0vjx41vMGzdunK5cuaITJ07Y8+Ilnwa3XHFRm7P8xYXNDxO2pqFB/u2FHSish7Cgv1ay6r2yYpxut910s/cKsFuvPm5TW1srSa2ex22a5na7dd1117WYX1hYqMLCqx9MixYtksvl6tBrn/T5glvQ19jm2G6/X9f4+OPRdS8AAA+uSURBVJQkRfl9SuhgfeHOiv5ayar3yopx7NxuIiMjQ/JvoTfpTI/RMXb3OOTBe+nSJW3cuDHo5WfNmqX+/ftb8tr1///wVlRUVIt5TdPq2zgElp+fr/z8/MDPHT4fEOwhcmdEm2M3OhxBDdHgcPa+c0IW9NdKVr1XVoxj53bTqXNj3ey96u44x2s/u8/xdovgXbNmTdDL5+bmWha8MTExkqSGVg67NU1rWsZymdOkwg+uvVz29DZnObLz5f/yi/YPG0ZFyXF7ftvzeyoL+mslq94rK8bpdttNN3uvALuFPHiTkpI6de+tFQYNGiTp6uHk4cOHN5vndrsltX4Y2gqO//M9+bcVXPtKzrmPtD3GLVny/8/o9q9OHT5ajkmZXag0PFnRX0vrsei9smKc7rbddLf3CrBbr764KjU1VZJ08GDLD6CKigr16dNHQ4cOteW1nZGRcvzy36W29qib7l1s5/YJh9Mp54+ekUaPv3r/5TdFRV29JeRHz/S6W4kka/prJaveKyvG6W7bTXd7rwC79Zotuba2VpcvX5bL5QocPk5PT9egQYNUVFSkOXPmNLuPt6ysTNOmTVOkjf/YncNGyPfGe/Kve1va8Znka5ScEVL2dDnmPhLUB40jfqCcv3hN/j075N++RfLUS9ExctyeL8ekzF4Zuk2s6K+VrHqvrBinu203Ld6rRq8UERmy9wqwU1g/QGP37t06duyYJGnbtm2qrq7W/PnzJUn9+vXTjBkzAssuXbpUW7du1fPPP6+MjIzA9B07duiNN94IPLnqypUrgYu9Xn31VVufXPX3uGjCXvTXfvTYfvTYfj3+4qqu2Llzp7Zu3dps2qpVqyRJiYmJzYK3LVlZWYqOjta6deu0YsUKRUVFacKECVqwYIFt53cBAL1XWO/xdifs8XZ/9Nd+9Nh+9Nh+PDISAIAehOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMAgghcAAIMIXgAADCJ4AQAwiOAFAMCgyFAX0BWffPKJ9u/fr8OHD+vEiRPy+/1avXp1h8Z44YUXVF5e3uq8V155RampqVaUCgCApDAP3g0bNujChQsaPXq06uvrdfbs2U6NExcXp+9973stpicnJ3e1RAAAmgnr4H3++eflcrnkdDq1aNGiTgdvbGyscnNzLa4OAICWwvocb1JSkpxOa34Fn8+ny5cvy+/3WzIeAACtCes9Xqu43W5997vflcfjUUxMjG6++WY9+OCDGjZsWKhLAwD0ML0+eJOSknT99ddr5MiRcjqdqqioUEFBgfbu3asXX3xRI0aMaHW9wsJCFRYWSpIWLVqklJQUS+qxahy0jv7ajx7bjx7bz84eO/whPrZ66dIlbdy4MejlZ82apf79+7eYvmjRIpWWlnb4qubW7N+/Xy+88IImTJigZ599tsvjBesXv/iFFi1aZOz1ehv6az96bD96bD+7exzyPd5Lly5pzZo1QS+fm5vbavBaKS0tTenp6SorK5PH41F0dLStrwcA6D1CHrxJSUmW7KVaLTExUWVlZbp48aISEhJCXQ4AoIcI66ua7XTixAlFRETYvnf9Tfn5+cZeqzeiv/ajx/ajx/azu8e9Jnhra2tVVVWl+vr6wLTLly/L5/O1WLa0tFQHDhzQTTfdZPQwM/+g7EV/7UeP7UeP7Wd3j0N+qLkrdu/erWPHjkmSampqJElr166VJPXr108zZswILPvuu+9q69atev7555WRkSFJ2rdvn5YvX67JkycrOTlZTqdThw4d0rZt29p8mhUAAF0R1sG7c+dObd26tdm0VatWSbp6jvabwdualJQUjRkzRqWlpTp37pwaGxs1ePBg3Xnnnbr//vs5twsAsFzIbycCAKA3Ces93nDn8/m0adMmFRYW6vTp04qPj1dWVpbmzZun2NjYUJfXI8ybN6/V6TExMVqxYoXhasLb+vXrdeTIER0+fFinTp1SYmKili5d2ubyFRUVWrlypSoqKuRwODR+/HgtWLBAo0aNMld0mOlIj5cuXdriiF+Tn/70p8rMzLSz1LBUXV2tbdu26YsvvlBNTY0aGhqUnJysrKwszZo1q8XnbnV1td555x3t379fXq9Xo0eP1rx58zRhwoQu1UHwhtDbb7+tzZs3a+rUqZozZ46qqqq0efNmHTlyRM8++6xlz6Hu7dLS0pSXl9dsWmQkm35Hvffee+rfv79Gjx6tS5cutbvswYMH9etf/1oJCQmBP37+8Ic/6LnnntNLL73U5hPheruO9LjJj370oxbTxo4da3VpPcKnn36qP/zhD5oyZYpycnIUERGhsrIyrVy5Ujt27NDLL78cuKC2pqZGzzzzjCIiInTvvfeqb9++2rJli15++WX98pe/1E033dTpOvj0CZHjx4+roKBAU6dO1VNPPRWYnpSUpGXLlqm4uFg5OTkhrLDnSEpK4tunLLBkyZLAV2X+7Gc/U11dXZvLLlu2TJGRkYHwlaTs7Gz967/+q5YvX65nnnnGSM3hpiM9bsK2HbzMzEzNnTtXffv2DUy76667NHToUK1bt05FRUWBa4PeffddXbp0Sa+++mrgKM0dd9yhn/70p/rd736nN954Qw6Ho1N1sEsVItu3b5ff79fs2bObTc/Ly1NMTIy2bdsWosp6Jq/XG9SHGNoW7PdT19TUqLKyUpmZmc0uUExISFBmZqb27t2rc+fO2VVmWOvMd4D7/f42b41Ec6mpqc1Ct0l2drYk6auvvpIk1dXV6c9//rMyMjKanRqJjY3V9OnTdeLECVVWVna6DvZ4Q6SyslIOh6PFIaHo6GiNGjVKhw4dClFlPU9JSYm2bdsmn8+n+Ph4ZWdn64EHHmj1HyC6rmnbHT9+fIt548aN06effqrDhw/rlltuMV1aj/Too4/qypUrioyMVFpamh544AGNGzcu1GWFlabvch84cKCkqwHc0NDQ6jbcNO3QoUOdPqRP8IaI2+1WfHy8oqKiWswbNGiQDhw4IK/Xy7nILho7dqwyMzM1ZMgQXblyRXv27FFBQYHKy8v10ksvcRGbDWprayWp1dvxmqa53W6jNfVEAwcO1OzZszVmzBjFxMTo2LFj2rRpk5577rkun4PsTXw+n9asWaOIiIjA6b2m7dOubZhP9RDxeDxthmrTyf36+nqCt4sWLlzY7Oc77rhDI0aM0MqVK7Vp0ybdf//9Iaqs52p6Olxrf1R+c9tG1yxYsKDZz1OnTlVOTo6efvppvfXWW1q8eHGIKgsv//Vf/6WKigo9+OCDga8C9Hg8klq/CLNpu25apjM4xxsi0dHR8nq9rc5rekNjYmJMltRr3HvvvYqMjFRpaWmoS+mRmrbbhoaGFvPYtu01dOhQZWVlqaamRtXV1aEup9tbuXKlCgoKlJ+fr7lz5wamN/2B2NpndNN23ZXHCRO8IZKQkKDz58+3+uFUW1uruLg49nZtEhkZqUGDBunChQuhLqVHGjRokKTWD8W1dwgP1khMTJQktu9rWL16tdatW6dvfetb+qd/+qdm89o7nGzFNkzwhkhqaqr8fn+Li6g8Ho+OHj2q1NTUEFXW83k8Hrndbg0YMCDUpfRITRecHDx4sMW8podpjBkzxnRZvcaJEyckie27He+//77WrFmj3NxcPf744y1uCxoxYoSioqJa3YabpnXlM5rgDZHs7Gw5HA5t3Lix2fQtW7aovr6ee3gt0NZf/KtWrVJjY6MmT55suKLeYciQIUpNTVVJSUmzPQa3262SkhJNmDAhcPUoOqeurq7Vc4xHjhxRSUmJhg0bpiFDhoSgsu5vzZo1ev/995Wbm6sf/vCHrT6oKDY2VpMnT1ZZWZmOHj0amF5XV6eioiINHTq0Sw8p4VhmiIwYMUJ33323CgoK9Prrr2vSpEmBJ1elp6cTvBZYu3atKioqlJGRIZfLpbq6Ou3Zs0dlZWUaN26cZs6cGeoSw8rnn3+u06dPS5LOnz8vr9cb+DawxMTEZg9yePTRR/XrX/9azz//fOCBBAUFBfL5fHrkkUfMFx8mgu1xTU2NFi5cqFtvvVVDhw4NXNX86aefyul06gc/+EHIfofurKCgQKtXr5bL5dKNN96oP/7xj83mDxw4MHA1+EMPPaS9e/fq5Zdf1uzZs9WnTx9t2bJFbrdbv/jFLzr98AyJL0kIKZ/Pp40bNwae1RwXF6fs7GzNnz+f21wssGvXLv3P//yPvvrqK128eFFOp1NDhgxRVlaW5syZY/S7lnuCF154QeXl5a3OS09P1wsvvNBs2sGDB5s9q/n666/Xgw8+yGHmdgTb43PnzmnFihWqrKxUbW2tPB6PBg4cqIyMDM2dO1fDhg0zWHX4aO/51lLL7fjrr7/Wu+++q/Ly8sCzmr/zne90+VYtghcAAIM4xwsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYxLcTAeiwr776Sv/2b/8W+Gq6e+65p9m3Di1evDjwzS9xcXF67bXXNHjw4JDUCnQ37PEC6LARI0bo0UcfDfz88ccf68svv5QkFRcXN/u6tSeeeILQBb6B4AXQKfn5+crKypIk+f1+LV26VNXV1XrrrbcCy8yaNUtTpkwJVYlAt8TXAgLotMuXL+vpp5/WqVOnJEmxsbGqq6uTJI0ZM0YvvfSSIiM5owV8E3u8ADqtb9+++vGPf6yIiAhJCoRunz599JOf/ITQBVpB8ALoknHjxmn8+PHNpt1yyy0aMmRIiCoCujeCF0CXfPLJJ9q/f3+zacXFxSotLQ1RRUD3RvAC6LTjx4/r7bffDvw8fPhwSf97sZXb7Q5VaUC3RfAC6BSPx6M33ngjcC9vRkaGXnnllUD4XrhwQUuWLJHP5wtlmUC3Q/AC6JTf//73On78uCSpX79+evLJJxUTE6N/+Zd/CVxUVVZWpnXr1oWyTKDbIXgBdFhxcbGKiooCP3//+9+Xy+WSJI0ePVrz588PzFuzZk2Lc8BAb8Z9vAAAGMQeLwAABhG8AAAYRPACAGAQwQsAgEEELwAABhG8AAAYRPACAGAQwQsAgEEELwAABv0/pzLTmF2zP6YAAAAASUVORK5CYII=\n",
108 |       "text/plain": [
109 |        "<Figure size 504x504 with 1 Axes>"
110 |       ]
111 |      },
112 |      "metadata": {},
113 |      "output_type": "display_data"
114 |     }
115 |    ],
116 |    "source": [
117 |     "fig, axes = mk_fig()\n",
118 |     "mask = y == -1\n",
119 |     "colors = ['#E24A33', '#348ABD']\n",
120 |     "\n",
121 |     "axes.scatter(x[mask], y[mask], s=100, label='Class 0', zorder=2)\n",
122 |     "axes.scatter(x[~mask], y[~mask], s=100, label='Class 1', zorder=2)\n",
123 |     "\n",
124 |     "w0 = np.array([0])\n",
125 |     "w = minimize(lambda w: loss_lr(w, x[:, None], y), w0, jac=lambda w: loss_lr_grad(w, x[:, None], y)).x \n",
126 |     "p = 1 / (1+np.exp(-w*x))\n",
127 |     "axes.plot(x, p, '-k')"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 115,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "from sklearn.linear_model import LogisticRegression"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 120,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "lr = LogisticRegression(C=1e8)\n",
146 |     "lr.fit(x[:, None], y);"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 121,
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "array([[0.9953184]])"
158 |       ]
159 |      },
160 |      "execution_count": 121,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "lr.coef_"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 122,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/plain": [
177 |        "array([-1.,  1., -1., -1.,  1., -1., -1., -1., -1., -1.,  1.,  1., -1.,\n",
178 |        "        1.,  1., -1.,  1.,  1.,  1., -1.])"
179 |       ]
180 |      },
181 |      "execution_count": 122,
182 |      "metadata": {},
183 |      "output_type": "execute_result"
184 |     }
185 |    ],
186 |    "source": [
187 |     "lr.predict(x[:, None])"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 123,
193 |    "metadata": {},
194 |    "outputs": [
195 |     {
196 |      "data": {
197 |       "text/plain": [
198 |        "array([[9.93534766e-01, 6.46523442e-03],\n",
199 |        "       [2.81490423e-01, 7.18509577e-01],\n",
200 |        "       [8.85833853e-01, 1.14166147e-01],\n",
201 |        "       [7.41457200e-01, 2.58542800e-01],\n",
202 |        "       [2.81490423e-01, 7.18509577e-01],\n",
203 |        "       [9.93534766e-01, 6.46523442e-03],\n",
204 |        "       [9.99955118e-01, 4.48823176e-05],\n",
205 |        "       [9.99955118e-01, 4.48823176e-05],\n",
206 |        "       [9.99878576e-01, 1.21423660e-04],\n",
207 |        "       [9.54531158e-01, 4.54688420e-02],\n",
208 |        "       [1.93972562e-02, 9.80602744e-01],\n",
209 |        "       [9.97768505e-04, 9.99002231e-01],\n",
210 |        "       [5.14555334e-01, 4.85444666e-01],\n",
211 |        "       [1.26485284e-01, 8.73514716e-01],\n",
212 |        "       [5.08002779e-02, 9.49199722e-01],\n",
213 |        "       [5.14555334e-01, 4.85444666e-01],\n",
214 |        "       [1.26485284e-01, 8.73514716e-01],\n",
215 |        "       [1.36421087e-04, 9.99863579e-01],\n",
216 |        "       [3.69013075e-04, 9.99630987e-01],\n",
217 |        "       [5.14555334e-01, 4.85444666e-01]])"
218 |       ]
219 |      },
220 |      "execution_count": 123,
221 |      "metadata": {},
222 |      "output_type": "execute_result"
223 |     }
224 |    ],
225 |    "source": [
226 |     "lr.predict_proba(x[:, None])"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "#### Create and save animations"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 4,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "fig, axes = mk_fig()\n",
243 |     "mask = y == 0\n",
244 |     "colors = ['#E24A33', '#348ABD']\n",
245 |     "\n",
246 |     "def init():\n",
247 |     "    axes.scatter(X[mask, 0], X[mask, 1], s=100, c='#E24A33', label='Class 0', zorder=2)\n",
248 |     "    axes.scatter(X[~mask, 0], X[~mask, 1], s=100, c='#348ABD', label='Class 1', zorder=2)\n",
249 |     "    axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n",
250 |     "    axes.legend(facecolor='#F0F0F0', framealpha=1)\n",
251 |     "\n",
252 |     "def animate(i):\n",
253 |     "    if i == 0:\n",
254 |     "        axes.scatter(xq[0], xq[1], s=200, facecolor='None', edgecolor='k', lw=2, label='Unknown', zorder=2)\n",
255 |     "    if i == 1:\n",
256 |     "        k = closest_node(xq, X, i)\n",
257 |     "        axes.plot([xq[0], X[k,0]],\n",
258 |     "                  [xq[1], X[k,1]],\n",
259 |     "                  'k-', zorder=1)\n",
260 |     "        axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n",
261 |     "    elif i % 2 == 1:\n",
262 |     "        k = closest_node(xq, X, i)\n",
263 |     "        axes.plot([np.repeat(xq[0], 2), X[k[-2:],0]],\n",
264 |     "                  [np.repeat(xq[1], 2), X[k[-2:],1]],\n",
265 |     "                  'k-', zorder=1)\n",
266 |     "        if sum(y[k] == 0) > sum(y[k] == 1):\n",
267 |     "            axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[0], edgecolor='k', lw=2, zorder=2)\n",
268 |     "        elif sum(y[k] == 1) > sum(y[k] == 0):\n",
269 |     "            axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[1], edgecolor='k', lw=2, zorder=2)\n",
270 |     "        else:  # if equal counts, set to closest point's color\n",
271 |     "            axes.scatter(xq[0], xq[1]+0.01, s=200, facecolor=colors[y[k[0]]], edgecolor='k', lw=2, zorder=2)\n",
272 |     "            \n",
273 |     "plt.close(fig)\n",
274 |     "ani = animation.FuncAnimation(fig,\n",
275 |     "                              animate,\n",
276 |     "                              init_func=init,\n",
277 |     "                              frames=10,\n",
278 |     "                              interval=600)\n",
279 |     "ani.save('../gif/knn/knn.gif', writer='imagemagick', fps=1, dpi=75)\n",
280 |     "# HTML(ani.to_jshtml())"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {},
286 |    "source": [
287 |     "#### View animations"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "Image(url='../gif/knn/knn.gif')"
297 |    ]
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "kernelspec": {
302 |    "display_name": "Python 3",
303 |    "language": "python",
304 |    "name": "python3"
305 |   },
306 |   "language_info": {
307 |    "codemirror_mode": {
308 |     "name": "ipython",
309 |     "version": 3
310 |    },
311 |    "file_extension": ".py",
312 |    "mimetype": "text/x-python",
313 |    "name": "python",
314 |    "nbconvert_exporter": "python",
315 |    "pygments_lexer": "ipython3",
316 |    "version": "3.7.4"
317 |   }
318 |  },
319 |  "nbformat": 4,
320 |  "nbformat_minor": 2
321 | }
322 | 


--------------------------------------------------------------------------------
/ml-deploy-model/README.md:
--------------------------------------------------------------------------------
 1 | # A data scientist's guide to deploying machine learning models
 2 | 
 3 | ![](https://img.shields.io/badge/-tutorial-informational)
 4 | ![](https://img.shields.io/badge/-machine--learning-important)
 5 | ![](https://img.shields.io/badge/-aws-lightgrey)
 6 | 
 7 | <p align="center">
 8 |   <img src="docs/img/ml-deploy.png" width="250">
 9 | </p>
10 | 
11 | The aim of this repository is to provide a simple guide to deploying machine learning (ML) models for data scientists familiar with machine learning in a local environment, but interested in learning how to deploy their models. Deployment refers to the act of making your ML model available in a production environment, where it can be accessed and utilised by other software.
12 | 
13 | Perhaps surprisingly, deployment is a process that is quite unfamiliar to many data scientists - in large part due to the need for some level of familiarity with software engineering. Fortunately, there are many tools available to help us data scientists with deploying our models. This repository focuses on currently and commonly used tools for ML deployment and is overwhelmingly practical, aiming to provide you with a useful overview of these tools and a foundation for using and expanding upon them in future. Here is a current list of tutorials, click a link to get started (to follow these tutorials, I recommend cloning this repository to your local machine):
14 | 
15 | 1. [Building and deploying a machine learning model with Amazon Sagemaker](deploy-with-sagemaker.ipynb)
16 | 2. [Deploying a machine learning model with Flask and Heroku](deploy-with-flask.ipynb)
17 | 
18 | ### My recommendations for deploying ML models
19 | 
20 | Having gone through the process of deploying a ML model via different methods/tools, I have a slight preference for Amazon SageMaker. Not only is it highly scalable, but, in addition, everything related to your model/service can be kept in one place: data in S3, notebooks in SageMaker, APIs in API Gateway, etc. However, in saying that, Flask has its use cases too, not only does it provide an easy method of developing a web application (not just an endpoint), but it is free and I found it slightly easier to learn than SageMaker. As always with ML, there's no free lunch and the right tool for the job depends on the job itself.
21 | 


--------------------------------------------------------------------------------
/ml-deploy-model/data/README.md:
--------------------------------------------------------------------------------
1 | This data is modified after that provided by the UCI Machine Learning Repository, available [here](https://archive.ics.uci.edu/ml/datasets/abalone).
2 | 
3 | The data has been modified from the original, namely, the "Sex" characteristic has been one-hot-encoded and the "rings" target variable has had +1.5 added to it to represent the abalone age in years. It has been shuffled, one-hot-encoded and split into training and validation sets. The training set has 3341 rows and the validation set has 836 rows.
4 | 
5 | This preprocessing and splitting can be reproduced using the [build_model.ipynb](../deploy-with-flask/build_model.ipynb) in this repository.
6 | 


--------------------------------------------------------------------------------
/ml-deploy-model/data/column_names.csv:
--------------------------------------------------------------------------------
1 | ﻿age,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,sex_I,sex_M


--------------------------------------------------------------------------------
/ml-deploy-model/data/raw/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | model_data = pd.get_dummies(pd.read_csv('abalone.csv'),
 5 |                             drop_first=True)
 6 | 
 7 | model_data[['age']] = model_data[['rings']] + 1
 8 | model_data = model_data.drop(columns="rings")
 9 | train_data, validation_data = np.split(model_data.sample(
10 |     frac=1, random_state=123), [int(0.8 * len(model_data))])
11 | 
12 | pd.concat([train_data['age'], train_data.drop(['age'], axis=1)],
13 |           axis=1).to_csv('../abalone_train.csv', index=False, header=False)
14 | pd.concat([validation_data['age'], validation_data.drop(['age'], axis=1)], axis=1).to_csv(
15 |     '../abalone_validation.csv', index=False, header=False)
16 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# A tutorial for deploying a model with Flask, Docker and Heroku"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Deployment refers to the act of making your machine learning model available in a production environment, where it can be accessed and utilised by other tools, workflows and software. Deployment is typically one of the last stages in the machine learning workflow and can be one of the most difficult.\n",
 15 |     "\n",
 16 |     "Flask is a web framework for Python, meaning that it provides functionality for building APIs and web applications. In this tutorial, we will explore:\n",
 17 |     "\n",
 18 |     "1. using Flask to create a simple API to interface with a machine leanring model; and,\n",
 19 |     "2. using Flask to create a simple web application that integrates our API with some basic html.\n",
 20 |     "\n",
 21 |     "The aim of this tutorial is to introduce you to deploying machine learning models with Flask. I will not be giving an in-depth introduction to Flask here, I only intend to show how easy it is to deploy a model with Flask and to provide a foundation for which you can build off of to deploy your models in efficient and creative ways in the future.\n",
 22 |     "\n",
 23 |     "We'll be using SageMaker’s implementation of the XGBoost algorithm to train and host a regression model to predict the age of abalone based on the classic abalone dataset hosted [here](https://archive.ics.uci.edu/ml/datasets/abalone). We aim to predict the age of abalone based on eight physical measurements. The data provided in the tutorial has been modified from the original, namely, the \"Sex\" characteristic has been one-hot-encoded and the \"rings\" target variable has had +1.5 added to it to represent the abalone age in years. The data is located in the [data folder](./data). It has been shuffled, one-hot-encoded and split into training and validation sets for you. The training set has 3341 rows and the validation set has 836 rows. The data looks like this:\n",
 24 |     "\n",
 25 |     "<img src=\"docs/img/flask_images/fl_0.png\" width=\"600\">\n",
 26 |     "\n",
 27 |     "If you're interested in other ways of deploying your machine learning model, check out [my other tutorial using Amazon SageMaker](deploy-with-sagemaker.ipynb)."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "# Contents\n",
 35 |     "\n",
 36 |     "1. [Requirements](#1)\n",
 37 |     "2. [Preparing the model we wish to deploy](#2)\n",
 38 |     "3. [Setting up your directory structure and environment](#3)\n",
 39 |     "4. [Model deployment](#4)\n",
 40 |     "5. [Building and deploying a web API](#5)\n",
 41 |     "6. [Building and deploying a web application](#6)\n",
 42 |     "7. [End and next steps](#7)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "# 1. Requirements <a id=1></a>"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "To complete this tutorial, you'll need the following:\n",
 57 |     "\n",
 58 |     "- Heroku account. Register [here](https://www.heroku.com/).\n",
 59 |     "- Heroku CLI. Download [here](https://devcenter.heroku.com/categories/command-line).\n",
 60 |     "- The Postman app and a free account. Download/register [here](https://www.postman.com/)."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "# 2. Preparing the model we wish to deploy <a id=2></a>"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "In contrast to the [Amazon SageMaker tutorial](deploy-with-sagemaker.ipynb) where we trained and built a model using SageMaker, here we will develop the model we wish to deploy locally. The notebook [build_model.ipynb](flask/build_model.ipynb) in this repository builds a simple Random Forest regression model using the abalone dataset. To simplify things a little, this model is trained on only four input features: `['length', 'diameter', 'height', 'whole_weight']` as the four-feature model did not seem to be too much worse than the full-featured model for the purpose of this tutorial:\n",
 75 |     "\n",
 76 |     "<img src=\"docs/img/flask_images/fl_1.png\" width=\"600\">\n",
 77 |     "\n",
 78 |     "The [build_model.ipynb](flask/build_model.ipynb) notebook saves the trained model using `joblib` into the appropriate directory location (described in the next section)."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "# 3. Setting up your directory structure and environment <a id=3></a>"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "We'll be needing a specific directory structure to help us easily deploy our machine learning model. As I'll discuss in the next section, we have two options for deploying our model: 1. as a web api service; or, 2. as a web application. The directory structure (provided in this repository in the [deploy-with-flask directory](deploy-with-flask)) that we need to follow looks like this:\n",
 93 |     "\n",
 94 |     "```shell\n",
 95 |     "flask\n",
 96 |     "├── build_model.ipynb  # this notebook contains the model building code\n",
 97 |     "├── web_api\n",
 98 |     "│   └── abalone_predictor.joblib  # this is the machine learning model we have built locally\n",
 99 |     "│   └── app.py  # the file that defines our flask API\n",
100 |     "│   └── Procfile  # required by Heroku to help start flask app\n",
101 |     "│   └── requirements.txt  # file containing required packages\n",
102 |     "│   \n",
103 |     "└── web_application\n",
104 |     "    └── abalone_predictor.joblib  # this is the machine learning model we have built locally\n",
105 |     "    └── app.py  # the file that defines our flask API\n",
106 |     "    └── Procfile  # required by Heroku to help start flask app\n",
107 |     "    └── requirements.txt  # file containing required packages\n",
108 |     "    └── templates  # this subdirectory contains HTML templates to help us build the web application\n",
109 |     "    │   └── style.css  # css template to be used in web application\n",
110 |     "    └── static  # this subdirectory contains CSS style sheets\n",
111 |     "        └── home.html  # html template to be used in web application\n",
112 |     "        └── prediction.html  # html template to be used in web application\n",
113 |     "```"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "At this point, you should also set up your development environment. I've provided a [`requirements.txt`](deploy-with-flask/web_api/requirements.txt) file in the repository. I recommend creating a new virtual environment (I use conda, so: `$ conda create -n python=3.6`) and to then install the required packages from `requirements.txt` using `pip install -r requirements.txt`."
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "# 4. Model deployment <a id=4></a>"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "As mentioned previously, we have two options for deploying our abalone prediction model. We can:\n",
135 |     "\n",
136 |     "1. develop a RESTful web API that accepts HTTP requests in the form of input data and returns a prediction;\n",
137 |     "2. build a web application with a HTML user-interface that interacts directly with our API.\n",
138 |     "\n",
139 |     "We'll explore both options below."
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "# 5. Building and deploying a web API <a id=5></a>"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "It is extremely easy to create a RESTful API with Python and Flask. We already have the model we wish to deploy, we just need to create an API that allows users to access our model - by \"access\" I mean, we want users to be able to send data to our model and to receive a prediction in return."
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "## 5.1 Building the Flask API"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "All we need to create our API is a single Python file named `app.py`. This file is located in the [`web_api`](deploy-with-flask/web_api) folder in this repository. This tutorial is not a tutorial on how to use Flask, rather, I want to show you how you can easily deploy a model with the help of Flask. There are many good online resources for learning about Flask - as a starter, I highly recommend the free [Flask tutorial video series by Corey Schafer](https://www.youtube.com/playlist?list=PL-osiE80TeTs4UjLw5MM6OjgkjFeUxCYH). With that said, let's open up `app.py` and briefly discuss what's going on in the file.\n",
168 |     "\n",
169 |     "\n",
170 |     "<img src=\"docs/img/flask_images/fl_2.png\" width=\"600\">"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "The `app.py` module is extremely simple. Each section of the code is numbered and described below:\n",
178 |     "\n",
179 |     "1. We first create an instance of the Flask class, every Flask application will have this line.\n",
180 |     "2. We then paste in a Python function that accepts as input our trained machine learning model and some input data and return the model prediction.\n",
181 |     "3. We then load up our pre-trained model.\n",
182 |     "4. The `@` symbol denotes a decorator. You don't need to know too much about decorators to understand what's going on here. Basically, we are defining our home page and populating it with some basic HTML text.\n",
183 |     "5. We the define a new route at the URL `/predict` which will accept json POST requests, make a prediction with our previosuly defined prediction function, and then return the result.\n",
184 |     "6. This piece of Python code simply allows us to start running our flask application by directly invoking the module with python from the command line - let's do that now!"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "## 5.2 Testing the Flask API"
192 |    ]
193 |   },
194 |   {
195 |    "attachments": {},
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "Open up a terminal and `cd` to the location of `app.py`. Then, type `python app.py`. You should see something like the following.\n",
200 |     "\n",
201 |     "<img src=\"docs/img/flask_images/fl_3.png\" width=\"600\">\n",
202 |     "\n",
203 |     "Copy-and-paste the URL `http://127.0.0.1:5000/` into your browser of choice (this is the IP address of your local machine followed by the port, 5000, that Flask runs on by default).\n",
204 |     "\n",
205 |     "<img src=\"docs/img/flask_images/fl_4.png\" width=\"600\">\n",
206 |     "\n",
207 |     "Great, our Flask app is up and running!\n",
208 |     "\n",
209 |     "We can open up Postman to make sure that we can send JSON POST requests to our app and receive a prediction in return. To do that:\n",
210 |     "\n",
211 |     "1. Open up Postman on your computer.\n",
212 |     "2. Click \"Create a request\".\n",
213 |     "3. Change the request to a \"POST\" request.\n",
214 |     "4. Enter the URL `http://127.0.0.1:5000/predict`.\n",
215 |     "5. Click the \"Body\" tab, click the \"raw\" radio button, and from the drop-down choose \"JSON\".\n",
216 |     "6. Paste the following into the body (feel free to change the numbers if you like):\n",
217 |     "\n",
218 |     "```\n",
219 |     "{\"length\": 0.41,\n",
220 |     "\"diameter\": 0.33,\n",
221 |     "\"height\": 0.10,\n",
222 |     "\"whole_weight\": 0.36}\n",
223 |     "```\n",
224 |     "\n",
225 |     "7. Click \"Send\". You should receive a prediction back (in my case, it was 9.14).\n",
226 |     "\n",
227 |     "<img src=\"docs/img/flask_images/fl_5.png\" width=\"600\">"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "## 5.3 Deploying the API"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "metadata": {},
240 |    "source": [
241 |     "Okay, so we have a working API, we now want to deploy it to the web so others can send requests. We will use Heroku to deploy our app but you could also use other services such as AWS.\n",
242 |     "\n",
243 |     "1. Head over to [Heroku](https://dashboard.heroku.com/), log-in, and click \"Create new app\".\n",
244 |     "2. Choose a unique name for your app.\n",
245 |     "\n",
246 |     "<img src=\"docs/img/flask_images/fl_6.png\" width=\"600\">\n",
247 |     "\n",
248 |     "3. We will be using the Heroku CLI to deploy our model. All we have to do is follow the simple instructions provided (note that for more complex applications, you may choose to containerize everything in a Docker container to deploy to Heroku).\n",
249 |     "\n",
250 |     "<img src=\"docs/img/flask_images/fl_7.png\" width=\"600\">\n",
251 |     "\n",
252 |     "4. If you follow those commands, you should eventually see something like the following message verifying that your flask app has been deployed:\n",
253 |     "\n",
254 |     "<img src=\"docs/img/flask_images/fl_8.png\" width=\"600\">\n",
255 |     "\n",
256 |     "5. Your app is now live on the web and anyone can send API requests to it! Let's give it a try in Postman. Open up Postman and repeat the steps outlined above except now with the url `https://my-abalone-predictor.herokuapp.com/predict`. If you sent your request correctly, you should receive a model prediction in return. Awesome!\n",
257 |     "\n",
258 |     "<img src=\"docs/img/flask_images/fl_9.png\" width=\"600\">"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "# 6. Building and deploying a web application <a id=6></a>"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "In section 5, we deployed our model as an endpoint that can receive JSON requests and return a prediction. Great! However, Flask has the ability to create entire web applications, not just a simple API, and I want to briefly introduce that functionality here. We only need to refactor our code a little bit and link it up with some html and css to create our web application."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "We will use Flask to create a html form, accept data submitted to the form, and return a prediction using the submitted data. I won't go into too much detail here, I just want to show you what's possible and give you a foundation to build off. Let's open up our web application's [`app.py`](deploy-with-flask/web_application/app.py) file and go through the code step-by-step:\n",
280 |     "\n",
281 |     "<img src=\"docs/img/flask_images/fl_10.png\" width=\"600\">"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "1. We'll be using `wtforms` and `flask_wtf` to help us build our form so we need to add those to our import list. We're also importing a few useful modules from `flask` itself to help us build our web app.\n",
289 |     "2. We create an instance of the Flask class and we also create a `SECRET_KEY` which basically allows us to store and use information specific to a user in a session (more on that [here](https://flask.palletsprojects.com/en/1.1.x/quickstart/#sessions)).\n",
290 |     "3. We again define our prediction function.\n",
291 |     "4. Load up our pre-trained model.\n",
292 |     "5. We now construct a simple form - there is an input for each of our input features, as well as a submit button.\n",
293 |     "6. We want our home page to actually return the form we just created. So we instantiate a form, we validate it (check that each field has some data), and we then redirect the user to a page \"prediction\" where results will be displayed. The home page will be rendered with the help of the `home.html` file located [here](deploy-with-flask/web_application/templates/home.html) and we are passing the `form` to the template so we can use it in the rendering of the page. \n",
294 |     "7. The \"prediction\" page will store the input data as a dictionary and pass it to our model predict function. The page is rendered wit the help of the `prediction.html` file located [here](deploy-with-flask/web_application/templates/prediction.html), and uses the `results` output by our model prediction function.\n",
295 |     "\n",
296 |     "With that done, let's test out our application."
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "## 6.2 Testing the web application"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "markdown",
308 |    "metadata": {},
309 |    "source": [
310 |     "1. Open up a terminal and `cd` to the location of our web application's `app.py` file.\n",
311 |     "2. Type `python app.py` and then copy and paste the URL `http://127.0.0.1:5000/` into your browser. You should see something like the following.\n",
312 |     "\n",
313 |     "<img src=\"docs/img/flask_images/fl_11.png\" width=\"600\">\n",
314 |     "\n",
315 |     "3. Our web application is working! Let's try and make a prediction:\n",
316 |     "\n",
317 |     "<img src=\"docs/img/flask_images/fl_12.png\" width=\"600\">\n",
318 |     "\n",
319 |     "<img src=\"docs/img/flask_images/fl_13.png\" width=\"600\">\n",
320 |     "\n",
321 |     "4. Looks like our predictions are working too!\n",
322 |     "\n",
323 |     "This application is of course extremely simple and Flask is capable of building much more sophisticated web applications (have a look at the [docs](https://flask.palletsprojects.com/en/1.1.x/)), but hopefully this has given you a taste and some ideas as to what's possible with deploying your machine learning model as an application."
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "## 6.3 Deploying the web application"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "We now have a working application, let's deploy it to the web using Heroku.\n",
338 |     "\n",
339 |     "1. Head over to [Heroku](https://dashboard.heroku.com/), log-in, and click \"Create new app\".\n",
340 |     "2. Choose a unique name for your app.\n",
341 |     "\n",
342 |     "<img src=\"docs/img/flask_images/fl_14.png\" width=\"600\">\n",
343 |     "\n",
344 |     "3. We will again be using the Heroku CLI to deploy our model. Once again, follow the simple instructions provided by Heroku to deploy your web application.\n",
345 |     "\n",
346 |     "<img src=\"docs/img/flask_images/fl_15.png\" width=\"600\">\n",
347 |     "\n",
348 |     "<img src=\"docs/img/flask_images/fl_16.png\" width=\"600\">\n",
349 |     "\n",
350 |     "4. If you follow those commands, you should eventually see a message verifying that your web application has been deployed!\n",
351 |     "5. Open up the provided URL and share your application with the world!"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "# 7. End and next steps <a id=7></a>"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "Congratulations! You just deployed a model using Flask and Heroku. Hopefully this tutorial gave you some insight into how a machine leanring model can be deployed using these tools and how you might be able to expand upon the concepts presented to quickly and creatively deploy your models!\n",
366 |     "\n",
367 |     "I recommend checking out the [Flask docs](https://flask.palletsprojects.com/en/1.1.x/) to learn more about Flask. There is also an excellent and thorough [Flask tutorial by Miguel Grinberg](https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-world) that you might find useful."
368 |    ]
369 |   }
370 |  ],
371 |  "metadata": {
372 |   "kernelspec": {
373 |    "display_name": "Python 3",
374 |    "language": "python",
375 |    "name": "python3"
376 |   },
377 |   "language_info": {
378 |    "codemirror_mode": {
379 |     "name": "ipython",
380 |     "version": 3
381 |    },
382 |    "file_extension": ".py",
383 |    "mimetype": "text/x-python",
384 |    "name": "python",
385 |    "nbconvert_exporter": "python",
386 |    "pygments_lexer": "ipython3",
387 |    "version": "3.7.4"
388 |   }
389 |  },
390 |  "nbformat": 4,
391 |  "nbformat_minor": 4
392 | }
393 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/build_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Build abalone age predictor"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This short and sweet notebook documents the steps taken to build the abalone age prediction model we will deploy using Flask and Heroku."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Imports"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import joblib\n",
 31 |     "import pandas as pd\n",
 32 |     "from sklearn.ensemble import RandomForestRegressor\n",
 33 |     "from sklearn.metrics import mean_absolute_error"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Load data"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 2,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "train_df = pd.read_csv('../data/abalone_train.csv',\n",
 50 |     "                       names = ['age', 'length', 'diameter', 'height',\n",
 51 |     "                                'whole_weight', 'shucked_weight', 'viscera_weight',\n",
 52 |     "                                'shell_weight', 'sex_I', 'sex_M'])\n",
 53 |     "valid_df = pd.read_csv('../data/abalone_validation.csv',\n",
 54 |     "                       names = ['age', 'length', 'diameter', 'height',\n",
 55 |     "                                'whole_weight', 'shucked_weight', 'viscera_weight',\n",
 56 |     "                                'shell_weight', 'sex_I', 'sex_M'])"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### Build model"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "Using all of the features:"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 3,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "MAE = 1.52 years\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df.drop(columns='age'),\n",
 88 |     "                                                                      train_df['age'])\n",
 89 |     "predicted_age = model.predict(valid_df.drop(columns='age'))\n",
 90 |     "mae = mean_absolute_error(predicted_age, valid_df['age'])\n",
 91 |     "print(f\"MAE = {mae:.2f} years\")"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "Using only the features `['age', 'length', 'diameter', 'height']`:"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 4,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "MAE = 1.88 years\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "features = ['length', 'diameter', 'height', 'whole_weight']\n",
116 |     "model = RandomForestRegressor(n_estimators=100, random_state=123).fit(train_df[features],\n",
117 |     "                                                                      train_df['age'])\n",
118 |     "predicted_age = model.predict(valid_df[features])\n",
119 |     "mae = mean_absolute_error(predicted_age, valid_df['age'])\n",
120 |     "print(f\"MAE = {mae:.2f} years\")"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "### Save model"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "I'm happy enough with the performance of the reduced-feature model on the validation data. So I'll now re-fit the model on the full dataset to get it ready for deployment. Then save model using joblib."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 5,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "features = ['length', 'diameter', 'height', 'whole_weight']\n",
144 |     "full_X = pd.concat((train_df[features], valid_df[features]))\n",
145 |     "full_y = pd.concat((train_df['age'], valid_df['age']))\n",
146 |     "model = RandomForestRegressor(n_estimators=100).fit(full_X,\n",
147 |     "                                                    full_y)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "Save to both the web_api and web_application folders:"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 6,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "with open('web_api/abalone_predictor.joblib', 'wb') as f:\n",
164 |     "    joblib.dump(model, f)\n",
165 |     "with open('web_application/abalone_predictor.joblib', 'wb') as f:\n",
166 |     "    joblib.dump(model, f)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Prediction function"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "Here we will define a function that accepts input data and returns a prediction. We will use this function to develop our web API and web application using Flask."
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 7,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "input_json = {'length': 0.41,\n",
190 |     "              'diameter': 0.33,\n",
191 |     "              'height': 0.10,\n",
192 |     "              'whole_weight': 0.36}"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 8,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "def return_prediction(model, input_json):\n",
202 |     "    \n",
203 |     "    input_data = [[input_json[k] for k in input_json.keys()]]\n",
204 |     "    prediction = model.predict(input_data)[0]\n",
205 |     "    \n",
206 |     "    return prediction"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 9,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "data": {
216 |       "text/plain": [
217 |        "9.14"
218 |       ]
219 |      },
220 |      "execution_count": 9,
221 |      "metadata": {},
222 |      "output_type": "execute_result"
223 |     }
224 |    ],
225 |    "source": [
226 |     "return_prediction(model, input_json)"
227 |    ]
228 |   }
229 |  ],
230 |  "metadata": {
231 |   "kernelspec": {
232 |    "display_name": "Python 3",
233 |    "language": "python",
234 |    "name": "python3"
235 |   },
236 |   "language_info": {
237 |    "codemirror_mode": {
238 |     "name": "ipython",
239 |     "version": 3
240 |    },
241 |    "file_extension": ".py",
242 |    "mimetype": "text/x-python",
243 |    "name": "python",
244 |    "nbconvert_exporter": "python",
245 |    "pygments_lexer": "ipython3",
246 |    "version": "3.7.4"
247 |   }
248 |  },
249 |  "nbformat": 4,
250 |  "nbformat_minor": 2
251 | }
252 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_api/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn app:app


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_api/abalone_predictor.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/deploy-with-flask/web_api/abalone_predictor.joblib


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_api/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, jsonify
 2 | import joblib
 3 | 
 4 | # 1. create an instance of the Flask class
 5 | app = Flask(__name__)
 6 | 
 7 | # 2. define a prediction function
 8 | def return_prediction(model, input_json):
 9 | 
10 |     input_data = [[input_json[k] for k in input_json.keys()]]
11 |     prediction = model.predict(input_data)[0]
12 | 
13 |     return prediction
14 | 
15 | # 3. load our abalone age predictor model
16 | model = joblib.load('abalone_predictor.joblib')
17 | 
18 | # 4. set up our home page
19 | @app.route("/")
20 | def index():
21 |     return """
22 |     <h1>Welcome to our abalone prediction service</h1>
23 |     To use this service, make a JSON post request to the /predict url with the following fields:
24 |     <ul>
25 |     <li>length</li>
26 |     <li>diameter</li>
27 |     <li>height</li>
28 |     <li>whole_weight</li>
29 |     </ul>
30 |     """
31 | 
32 | # 5. define a new route which will accept POST requests and return our model predictions
33 | @app.route('/predict', methods=['POST'])
34 | def abalone_prediction():
35 |     content = request.json
36 |     results = return_prediction(model, content)
37 |     return jsonify(results)
38 | 
39 | # 6. allows us to run flask using $ python app.py
40 | if __name__ == '__main__':
41 |     app.run()
42 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_api/default.profraw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/deploy-with-flask/web_api/default.profraw


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_api/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2020.4.5.1
 2 | click==7.1.1
 3 | Flask==1.1.2
 4 | Flask-WTF==0.14.3
 5 | gunicorn==20.0.4
 6 | itsdangerous==1.1.0
 7 | Jinja2==2.11.3
 8 | joblib==0.14.1
 9 | MarkupSafe==1.1.1
10 | numpy==1.18.2
11 | scikit-learn==0.22.2
12 | scipy==1.4.1
13 | Werkzeug==1.0.1
14 | WTForms==2.2.1
15 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn app:app


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/abalone_predictor.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/deploy-with-flask/web_application/abalone_predictor.joblib


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/app.py:
--------------------------------------------------------------------------------
 1 | # 1. Imports
 2 | from flask import Flask, render_template, session, url_for, redirect
 3 | from flask_wtf import FlaskForm
 4 | from wtforms import TextField, SubmitField
 5 | import joblib
 6 | 
 7 | # 2. create an instance of the Flask class
 8 | app = Flask(__name__)
 9 | app.config['SECRET_KEY'] = 'asecretkey'
10 | 
11 | # 3. define a prediction function
12 | def return_prediction(model, input_json):
13 | 
14 |     input_data = [[input_json[k] for k in input_json.keys()]]
15 |     prediction = model.predict(input_data)[0]
16 | 
17 |     return prediction
18 | 
19 | # 4. load our abalone age predictor model
20 | model = joblib.load('abalone_predictor.joblib')
21 | 
22 | # 5. create a WTForm Class
23 | class PredictForm(FlaskForm):
24 | 
25 |     length = TextField("Shell length")
26 |     diameter = TextField("Shell diameter")
27 |     height = TextField("Shell height")
28 |     whole_weight = TextField("Whole weight")
29 |     submit = SubmitField("Predict")
30 | 
31 | # 6. set up our home page
32 | @app.route("/", methods=["GET", "POST"])
33 | def index():
34 | 
35 |     # Create instance of the form
36 |     form = PredictForm()
37 | 
38 |     # Validate the form
39 |     if form.validate_on_submit():
40 |         session['length'] = form.length.data
41 |         session['diameter'] = form.diameter.data
42 |         session['height'] = form.height.data
43 |         session['whole_weight'] = form.whole_weight.data
44 |         return redirect(url_for("prediction"))
45 | 
46 |     return render_template('home.html', form=form)
47 | 
48 | # 7. define a new "prediction" route that processes form input and returns a model prediction
49 | @app.route('/prediction')
50 | def prediction():
51 | 
52 |     content = {}
53 |     content['length'] = float(session['length'])
54 |     content['diameter'] = float(session['diameter'])
55 |     content['height'] = float(session['height'])
56 |     content['whole_weight'] = float(session['whole_weight'])
57 |     results = return_prediction(model, content)
58 |     return render_template('prediction.html', results=results)
59 | 
60 | # 8. allows us to run flask using $ python app.py
61 | if __name__ == '__main__':
62 |     app.run()
63 | 
64 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2020.4.5.1
 2 | click==7.1.1
 3 | Flask==1.1.2
 4 | Flask-WTF==0.14.3
 5 | gunicorn==20.0.4
 6 | itsdangerous==1.1.0
 7 | Jinja2==2.11.2
 8 | joblib==0.14.1
 9 | MarkupSafe==1.1.1
10 | numpy==1.18.2
11 | scikit-learn==0.22.2
12 | scipy==1.4.1
13 | Werkzeug==1.0.1
14 | WTForms==2.2.1
15 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/static/style.css:
--------------------------------------------------------------------------------
 1 | 
 2 | body {
 3 |     background-color: rgb(238, 238, 238);
 4 |     color: rgb(10, 9, 34);
 5 | }
 6 | 
 7 | label {
 8 |     width: 150px;
 9 | }
10 | 
11 | input {
12 |     width: 100px;
13 | }
14 | 
15 | #submit {
16 |     background-color: rgb(53, 157, 53);
17 |     width: 100px;
18 |     margin-left: 154px;
19 |     font-weight: bold;
20 | }


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/templates/home.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 | 
 8 |     <!-- Bootstrap CSS -->
 9 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
10 |     <link rel="stylesheet" type="text/css" href="{{url_for('static', filename='style.css')}}">
11 | 
12 |     <title>Abalone Prediction Service</title>
13 | </head>
14 | 
15 | <body>
16 |     <div class="container m-1">
17 |         <h1>Abalone Age Predictor</h1>
18 |         <h3 class="mb-4">Please enter your abalone measurements below:</h3>
19 |         <form method="POST">
20 |             {# This hidden_tag is a CSRF security feature. #}
21 |             {{ form.hidden_tag() }}
22 |             {{ form.length.label }} {{form.length}}
23 |             <br>
24 |             {{ form.diameter.label }} {{form.diameter}}
25 |             <br>
26 |             {{ form.height.label }} {{form.height}}
27 |             <br>
28 |             {{ form.whole_weight.label }} {{form.whole_weight}}
29 |             <br>
30 |             {{ form.submit() }}
31 |         </form>
32 |     </div>
33 | </body>
34 | 
35 | </html>


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-flask/web_application/templates/prediction.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 | 	<meta charset="UTF-8">
 6 | 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 | 
 8 | 	<!-- Bootstrap CSS -->
 9 | 	<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
10 | 	<link rel="stylesheet" type="text/css" href="{{url_for('static', filename='style.css')}}">
11 | 
12 | 	<title>Abalone Prediction Service</title>
13 | </head>
14 | 
15 | <body>
16 | 	<div class="container m-1">
17 | 		<h1>Thank You. Here is the information you gave:</h1>
18 | 		<ul>
19 | 			<li>Shell length: {{session['length']}}</li>
20 | 			<li>Shell diameter: {{session['diameter']}}</li>
21 | 			<li>Shell height: {{session['height']}}</li>
22 | 			<li>Whole weight: {{session['whole_weight']}}</li>
23 | 		</ul>
24 | 		<h3>Your predicted abalone age is: {{results}} years</h3>
25 | 	</div>
26 | </body>
27 | 
28 | </html>


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-sagemaker.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# A tutorial for: building deploying a model on Amazon Sagemaker"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Deployment refers to the act of making your machine learning model available in a production environment, where it can be accessed and utilised by other tools, workflows and software. Deployment is typically one of the last stages in the machine learning workflow and can be one of the most difficult.\n",
 15 |     "\n",
 16 |     "This is where [Amazon SageMaker](https://aws.amazon.com/sagemaker/) comes in. SageMaker is a relatively new Amazon service that supports all of the steps of a machine learning model development: data labelling, model building, training, optimization, and deployment. You can choose to use all or any combination of these key features in SageMaker. In addition, SageMaker is based on Jupyter notebooks (which are familiar to most data scientists these days), comes with many built-in state-of-the-art algorithms, and provides a host of ready-to-use examples to get you up and running quickly.\n",
 17 |     "\n",
 18 |     "In this tutorial I'll walk you through building and deploying a machine learning model using SageMaker. While the SageMaker docs are quite good, I still found it a little difficult to get going on SageMaker - so this tutorial provides a simple walkthrough of using SageMaker for model buidling and deployment.\n",
 19 |     "\n",
 20 |     "We'll be using SageMaker’s implementation of the XGBoost algorithm to train and host a regression model to predict the age of abalone based on the classic abalone dataset hosted [here](https://archive.ics.uci.edu/ml/datasets/abalone). We aim to predict the age of abalone based on eight physical measurements. The data provided in the tutorial has been modified from the original, namely, the \"Sex\" characteristic has been one-hot-encoded and the \"rings\" target variable has had +1.5 added to it to represent the abalone age in years. The data is located in the [data folder](./data). It has been shuffled, one-hot-encoded and split into training and validation sets for you. The training set has 3341 rows and the validation set has 836 rows. The data looks like this:\n",
 21 |     "\n",
 22 |     "<img src=\"docs/img/sagemaker_images/sm_0.png\" width=\"600\">\n",
 23 |     "\n",
 24 |     "If you're interested in other ways of deploying your machine learning model, check out [my other tutorial using Flask](deploy-with-flask.ipynb)."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# Contents\n",
 32 |     "\n",
 33 |     "1. [Requirements](#1)\n",
 34 |     "2. [Preparing the data](#2)\n",
 35 |     "3. [Setting up SageMaker](#3)\n",
 36 |     "4. [Building and deploying the model](#4)\n",
 37 |     "5. [Using the model](#5)\n",
 38 |     "6. [End](#6)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "# 1. Requirements <a id=1></a>"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "To complete this tutorial, you'll need the following:\n",
 53 |     "\n",
 54 |     "- An AWS account. Register [here](https://console.aws.amazon.com/).\n",
 55 |     "- The Postman app and a free account. Download/register [here](https://www.postman.com/)."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "# 2. Preparing the data <a id=2></a>"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "Many of the Amazon SageMaker examples provided in the docs use data that are downloaded from online sources, but I wanted to emulate the process of using a custom dataset in this tutorial. To make the data available to SageMaker we need to host it in an S3 bucket:\n",
 70 |     "\n",
 71 |     "1. Head over to AWS, log in, and search for S3:\n",
 72 |     "\n",
 73 |     "<img src=\"docs/img/sagemaker_images/sm_1.png\" width=\"600\">\n",
 74 |     "\n",
 75 |     "2. Choose \"Create Bucket\".\n",
 76 |     "3. Provide a globally unique name for your bucket. I named mine \"deploy-tutorial-tb\".\n",
 77 |     "4. Leave remaining settings as default and click \"Create\" in the lower left. You've now created a storage bucket to hold your data.\n",
 78 |     "\n",
 79 |     "<img src=\"docs/img/sagemaker_images/sm_2.png\" width=\"600\">\n",
 80 |     "\n",
 81 |     "5. Click on the bucket you just created and then click \"Upload\" on the top left corner.\n",
 82 |     "6. Drag and drop into the upload prompt the two data files provided with this repository, called \"abalone_train.csv\" and \"abalone_validation.csv\". Then click \"Upload\" in the lower left corner to complete the upload.\n",
 83 |     "\n",
 84 |     "<img src=\"docs/img/sagemaker_images/sm_3.png\" width=\"600\">\n",
 85 |     "\n",
 86 |     "7. You will see the data now in the bucket. Your data is now ready to be accessed by SageMaker.\n",
 87 |     "\n",
 88 |     "<img src=\"docs/img/sagemaker_images/sm_4.png\" width=\"600\">"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "# 3. Setting up SageMaker <a id=3></a>"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "We will now prepare SageMaker to build and deploy a machine learning model.\n",
103 |     "\n",
104 |     "1. Head back to the main AWS dashboard and search for SageMaker.\n",
105 |     "\n",
106 |     "<img src=\"docs/img/sagemaker_images/sm_5.png\" width=\"600\">\n",
107 |     "\n",
108 |     "2. Click \"Notebook instances\" in the panel on the left side of the screen. Then click \"Create notebook instance\" in top right of the screen.\n",
109 |     "\n",
110 |     "<img src=\"docs/img/sagemaker_images/sm_6.png\" width=\"600\">\n",
111 |     "\n",
112 |     "3. Give your notebook instance a name, I called mine \"deploy-tutorial-tb\".\n",
113 |     "4. Scroll down and in the IAM role field select \"Create a new role\" from the drop down. In the pop-up, select \"Any S3 bucket\" and then click \"Create role\" at the bottom right (feel free to specify only a specific bucket, the one you created previously, if you wish).\n",
114 |     "\n",
115 |     "<img src=\"docs/img/sagemaker_images/sm_7.png\" width=\"600\">\n",
116 |     "\n",
117 |     "5. Leave all remaining fields as default, scroll to the bottom of the page and click \"Create notebook instance\".\n",
118 |     "6. Wait a few minutes for status of your newly created notebook to change from \"Pending\" to \"InService\".\n",
119 |     "\n",
120 |     "<img src=\"docs/img/sagemaker_images/sm_8.png\" width=\"600\">\n",
121 |     "\n",
122 |     "7. Click \"Open Jupyter\" to open your notebook instance."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "We'll now upload the notebook I've created for this tutorial, which is located in the [deploy-with-sagemaker folder](deploy-with-sagemaker) in this repository\n",
130 |     "\n",
131 |     "1. Click the \"New\" dropdown button at the top right and select \"Folder\". Click the checkbox next to your newly created folder, and then click \"Rename\" in the menu bar above to give the folder a name such as \"deploy-tutorial\".\n",
132 |     "\n",
133 |     "2. Click the folder to enter it and then click \"Upload\" in top right corner. Choose the [xgboost_abalone.ipynb](deploy-with-sagemaker/xgboost_abalone.ipynb) notebook file downloaded from this repo and upload it. Open up the notebook. We'll use this notebook to build and deploy the model as described in the following section. At this point, your screen should look something like the following:\n",
134 |     "\n",
135 |     "<img src=\"docs/img/sagemaker_images/sm_9.png\" width=\"600\">\n",
136 |     "\n",
137 |     "> Note 1: If you get an error \"Kernel not found\". Use the dropdown menu that appeared to choose the \"conda_python3\" kernel and select \"Set kernel\".\n",
138 |     "\n",
139 |     "> Note 2: Amazon SageMaker has many example notebooks available for you to use - no matter what your use case is, you should be able to find a base notebook to work off. You can check out the available examples by clicking the \"SageMaker Examples\" tab in your opened notebook instance (note that it may take a few minutes for the examples to become available to you, you can periodically click the little refresh button at the top right corner of the screen to refresh the list). You can either \"Preview\" (just to have a look) or \"Use\" (will copy the notebook to your root) these notebooks."
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "# 4. Building and deploying the model <a id=4></a>"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "In this section we will build and deploy a model to predict the age of abalone. The notebook we just uploaded has all the details of the dataset and XGBoost model we'll be using for this task. Let's walk through it step-by-step:\n",
154 |     "\n",
155 |     "1. We first need to point our notebook to the dataset we uploaded to S3 previously. All that is required here is to enter the name of your S3 bucket into the cell and run it (recall that I called mine \"deploy-tutorial-tb\").\n",
156 |     "\n",
157 |     "<img src=\"docs/img/sagemaker_images/sm_10.png\" width=\"600\">\n",
158 |     "\n",
159 |     "2. SageMaker uses Docker containers to allow users to train and deploy models. There are many pre-built Docker images available, particularly suited for SageMaker's built-in algorithms and we will use one of those here (note however that you can always upload your own custom docker container - more on that in the [docs](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html)). Run the next cell to load the xgboost docker container.\n",
160 |     "\n",
161 |     "<img src=\"docs/img/sagemaker_images/sm_11.png\" width=\"600\">\n",
162 |     "\n",
163 |     "3. The following cell sets the parameters for, and executes training of the XGBoost model. You shouldn't need to change anything here, but feel free to take a look at what's going on inside this cell. The cell will periodically print feedback on the status of the training job. When it's finished you should see a \"Completed\" message - training should take around 5 minutes.\n",
164 |     "\n",
165 |     "<img src=\"docs/img/sagemaker_images/sm_12.png\" width=\"600\">\n",
166 |     "\n",
167 |     "4. Now we need to create a SageMaker model from the training job above. Run cells under the headings \"4. Create the model\" and \"5. Create endpoint\" to create the model and deploy it to an endpoint that will be available to provide inferences - it will take about 10 minutes to run all of these cells (the notebook provides more details on what each of these cells are doing). You will eventually receive an output that your endpoint has been created.\n",
168 |     "5. If you stop the tutorial here, be sure to shut down the endpoint you created by running the cell under the heading \"7. Delete Endpoint\" - else you will continue to be charged by Amazon.\n",
169 |     "\n",
170 |     "> Note that a key functionality of SageMaker is model tuning. I'm not describing any tuning/optimization here, but it is well described in the [SageMaker docs](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning.html)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "# 5. Using the model <a id=5></a>"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "Now that we've deployed the model as an endpoint, we need want to use it in a production environment. There are two main ways we may want to use our model:\n",
185 |     "\n",
186 |     "1. To act as a HTTPS endpoint that can provide inferences on a case-by-case basis (e.g., for a web application). We will focus on this use case here.\n",
187 |     "2. To get predictions for an entire dataset. You can read about this use case in the [SageMaker docs](https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-batch.html).\n",
188 |     "\n",
189 |     "\n",
190 |     "To allow users to interact with our machine learning model we need an API. Put simply, APIs facilitate applications to work together without having to know exactly how they’re implemented. You can think of a simple wall plug as an API: manufactures of electronic equipment know that they can attach a particular power cord to their device which will allow the device to \"interface with\" (i.e., use) the electricity supply through the wall plug - the manufacturers don't need to know how the electricity is supplied or what kinf of infrastructure is behing the wall plug, they just know that if their device fits the wall plug, they can access the electricity. We will use a combination of Amazon API Gateway and Amazon Lambda to create an API that will allow users to use our machine learning model for predictions. This section is a shortened summary of this [Amazon blog post](https://aws.amazon.com/blogs/machine-learning/call-an-amazon-sagemaker-model-endpoint-using-amazon-api-gateway-and-aws-lambda/)."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "### 5.1 Create a Lambda function that calls the SageMaker Runtime Invoke_Endpoint\n",
198 |     "\n",
199 |     "Our first task is to create a function that will be invoked through an API request. We'll create that funciton using Amazon Lambda.\n",
200 |     "\n",
201 |     "1. Head back to the main AWS dashboard and search for Lambda.\n",
202 |     "\n",
203 |     "<img src=\"docs/img/sagemaker_images/sm_13.png\" width=\"600\">\n",
204 |     "\n",
205 |     "2. In the Lambda dashboard select \"Create function\".\n",
206 |     "3. With the \"Author from scratch\" tab selected, give your function a name (I called the function \"predict_abalone\"), choose Python 3.6, and select \"Create a new role with basic Lambda permissions\".\n",
207 |     "\n",
208 |     "<img src=\"docs/img/sagemaker_images/sm_14.png\" width=\"600\">\n",
209 |     "\n",
210 |     "4. Before we write any code, select the \"Permissions\" tab at the top of the screen, and then click on the new role that was created for you when you created the lambda function.\n",
211 |     "\n",
212 |     "<img src=\"docs/img/sagemaker_images/sm_15.png\" width=\"600\">\n",
213 |     "\n",
214 |     "5. In the new screen that opens up, click the name of the role called e.g., \"AWSLambdaBasicExecutionRole-\\*\", then click \"Edit policy\", click the JSON tab, and then replace the contents of the JSON with the following. Then click \"Review policy\" at the bottom right, and then \"Save changes\".\n",
215 |     "\n",
216 |     "```\n",
217 |     "{\n",
218 |     "    \"Version\": \"2012-10-17\",\n",
219 |     "    \"Statement\": [\n",
220 |     "        {\n",
221 |     "            \"Sid\": \"VisualEditor0\",\n",
222 |     "            \"Effect\": \"Allow\",\n",
223 |     "            \"Action\": \"sagemaker:InvokeEndpoint\",\n",
224 |     "            \"Resource\": \"*\"\n",
225 |     "        }\n",
226 |     "    ]\n",
227 |     "}\n",
228 |     "```\n",
229 |     "\n",
230 |     "<img src=\"docs/img/sagemaker_images/sm_16.png\" width=\"600\">\n",
231 |     "\n",
232 |     "6. Now head back to Amazon Lambda and click the \"Configuration\" tab. Scroll down and copy-and-paste the following code into the editor. This is the function we will trigger with an API request.\n",
233 |     "\n",
234 |     "```python\n",
235 |     "import os\n",
236 |     "import boto3\n",
237 |     "import math\n",
238 |     "\n",
239 |     "# grab environment variables\n",
240 |     "ENDPOINT_NAME = os.environ['ENDPOINT_NAME']\n",
241 |     "runtime = boto3.client('runtime.sagemaker')\n",
242 |     "\n",
243 |     "def lambda_handler(event, context):\n",
244 |     "    \n",
245 |     "    \n",
246 |     "    payload = event\n",
247 |     "    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME, \n",
248 |     "                                       ContentType='text/csv', \n",
249 |     "                                       Body=payload)\n",
250 |     "    result = response['Body'].read()\n",
251 |     "    result = result.decode(\"utf-8\")\n",
252 |     "    result = result.split(',')\n",
253 |     "    result = [math.ceil(float(i)) for i in result]\n",
254 |     "    \n",
255 |     "    return result[0]\n",
256 |     "```\n",
257 |     "\n",
258 |     "<img src=\"docs/img/sagemaker_images/sm_17.png\" width=\"600\">\n",
259 |     "\n",
260 |     "7. `ENDPOINT_NAME` in the code above is an environment variable that holds the name of the SageMaker model endpoint you deployed using the sample notebook. Scroll down the page and click the button \"Manage environment variables\" under the \"Environment variables\" tab. Click \"Add environment variable\". Call the key \"ENDPOINT_NAME\" and the value the name of your endpoint - for example, mine was \"deploy-tutorial-2020-04-13-16-03-41\". Click \"Save\".\n",
261 |     "\n",
262 |     "<img src=\"docs/img/sagemaker_images/sm_18.png\" width=\"600\">"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "### 5.2 Create an API Gateway – Integration request setup\n",
270 |     "\n",
271 |     "We're almost there, we've created a Python function in Lambda that calls our SageMaker model endpoint. We'll now use API Gateway to help users easily access this function via an API request."
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "1. Head back to the main AWS dashboard and search for Amazon API Gateway.\n",
279 |     "\n",
280 |     "<img src=\"docs/img/sagemaker_images/sm_19.png\" width=\"600\">\n",
281 |     "\n",
282 |     "2. Under the \"REST API\" tab click \"Build\".\n",
283 |     "3. Click the \"New API\" radio button.\n",
284 |     "4. Give your API a name like \"PredictAbalone\" and leave other settings as default. Click \"Create API\".\n",
285 |     "5. Next, select \"Create Resource\" from the \"Actions\" drop-down menu and give the resource a name like “predictabalone” and click \"Create resource\".\n",
286 |     "\n",
287 |     "<img src=\"docs/img/sagemaker_images/sm_20.png\" width=\"600\">\n",
288 |     "\n",
289 |     "6. From the \"Actions\" drop-down menu, choose \"Create Method\" and select \"POST\".\n",
290 |     "7. On the screen that appears, choose \"Integration type: Lambda Function\" and in the \"Lambda function\" text box, search for and find the function we created earlier (mine was called \"predict_abalone\"). Click \"Save\".\n",
291 |     "\n",
292 |     "<img src=\"docs/img/sagemaker_images/sm_21.png\" width=\"600\">\n",
293 |     "\n",
294 |     "8. From the \"Actions\" drop-down menu, choose \"Deploy API\". Create a new \"Deploy stage\" called \"Test\" and then click \"Deploy\".\n",
295 |     "\n",
296 |     "<img src=\"docs/img/sagemaker_images/sm_22.png\" width=\"600\">\n",
297 |     "\n",
298 |     "9. Note the invoke URL that was created when you deployed your API. It should be something like: `https://{restapi_id}.execute-api.{region}.amazonaws.com/test/predictabalone`. Note that the resource name \"predictabalone\" is not appended automatically.\n",
299 |     "\n",
300 |     "For more detailed information on how to create an API with API Gateway, refer to the [documentation](https://docs.aws.amazon.com/apigateway/latest/developerguide/how-to-create-api.html). "
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "metadata": {},
306 |    "source": [
307 |     "### 5.3 Testing our model with Postman\n",
308 |     "\n",
309 |     "Now that we have a deployed model endpoint and have set up our API we can test everything out. We'll use Postman to send an API request and (hopefully) receive back a prediction from our model. You can download the latest version of Postman [here](https://www.postman.com/downloads/)."
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "1. Open up the Postman application and then select \"Create a request\".\n",
317 |     "2. Choose \"POST\" as the request method and the paste your invoke URL from the previous step into \"Enter request url\" field.\n",
318 |     "3. Select the \"Body\" tab and then the \"raw\" radio button. Post the following example data into the body field: \n",
319 |     "\n",
320 |     "`\"0.41,0.325,0.1,0.3555,0.146,0.072,0.105,0,1\"`\n",
321 |     "\n",
322 |     "4. Finally, click \"Send\" and you should receive a results of `10` back - the predicted age of the abalone for the sent data.\n",
323 |     "\n",
324 |     "<img src=\"docs/img/sagemaker_images/sm_23.png\" width=\"600\">"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "# 6. End and next steps<a id=6></a>"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "Congratulations! You just created a model endpoint deployed and hosted by Amazon SageMaker and were able to invoke that endpoint with the help of API Gateway and a Lambda function - so cool! Have fun integrating this endpoint into your other software/workflows/apps!\n",
339 |     "\n",
340 |     "We only scratched the surface of Amazon SageMaker's capabilities in this tutorial. I highly recommend checking out the [SageMaker docs](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) to see what else it can do!"
341 |    ]
342 |   }
343 |  ],
344 |  "metadata": {
345 |   "kernelspec": {
346 |    "display_name": "Python 3",
347 |    "language": "python",
348 |    "name": "python3"
349 |   },
350 |   "language_info": {
351 |    "codemirror_mode": {
352 |     "name": "ipython",
353 |     "version": 3
354 |    },
355 |    "file_extension": ".py",
356 |    "mimetype": "text/x-python",
357 |    "name": "python",
358 |    "nbconvert_exporter": "python",
359 |    "pygments_lexer": "ipython3",
360 |    "version": "3.7.4"
361 |   }
362 |  },
363 |  "nbformat": 4,
364 |  "nbformat_minor": 4
365 | }
366 | 


--------------------------------------------------------------------------------
/ml-deploy-model/deploy-with-sagemaker/xgboost_abalone.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Predicting abalone age with Amazon SageMaker and the XGBoost algorithm\n",
  8 |     "\n",
  9 |     "**Created by: [Tomas Beuzen](https://tomasbeuzen.github.io/). Hosted on [GitHub](xxx).**"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "---\n",
 17 |     "## Contents\n",
 18 |     "1. [Introduction](#1)\n",
 19 |     "2. [Setup](#2)\n",
 20 |     "3. [Training the XGBoost model ](#3)\n",
 21 |     "4. [Create the model](#4)\n",
 22 |     "5. [Create endpoint](#5)\n",
 23 |     "6. [Validate the model for use](#6)\n",
 24 |     "7. [Delete Endpoint](#7)\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "---\n",
 32 |     "## 1. Introduction <a id=1></a>\n",
 33 |     "\n",
 34 |     "This notebook demonstrates the use of Amazon SageMaker’s implementation of the XGBoost algorithm to train and host a regression model. It uses the classic abalone dataset which, the original version of which can be found [here](https://archive.ics.uci.edu/ml/datasets/abalone). Briefly, the number of \"rings\" present in an abalone shell is a proxy for the age of the abalone (age [years] = rings + 1.5). We aim to predict the age of abalone based on eight physical measurements. The data provided in the tutorial has been modified from the original, namely, the \"Sex\" characteristic has been one-hot-encoded and the \"rings\" target variable has had 1.5 added to it to represent the abalone age in years.\n",
 35 |     "\n",
 36 |     "This notebook is modified after the example provided by Amazon [here](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/xgboost_abalone/xgboost_abalone.ipynb). It has been significantly stripped down and modified to provide a bare minimum example illustrating how to build and deploy a model using SageMaker."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "---\n",
 44 |     "## 2. Setup <a id=2></a>\n",
 45 |     "\n",
 46 |     "\n",
 47 |     "This notebook was created and tested on an ml.m2.medium notebook instance. The following code sets up paths to the S3 bucket we stored our data in previously."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "isConfigCell": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "import os\n",
 59 |     "import boto3\n",
 60 |     "import re\n",
 61 |     "import sagemaker\n",
 62 |     "\n",
 63 |     "role = sagemaker.get_execution_role()\n",
 64 |     "region = boto3.Session().region_name\n",
 65 |     "\n",
 66 |     "bucket = \"deploy-tutorial-tb\"  # <-- insert your bucket name here\n",
 67 |     "bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region, bucket)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "---\n",
 75 |     "## 3. Training the XGBoost model <a id=3></a>\n",
 76 |     "\n",
 77 |     "The following cell loads the Amazon sagemaker xgboost docker image"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "from sagemaker.amazon.amazon_estimator import get_image_uri\n",
 87 |     "container = get_image_uri(region, 'xgboost', '0.90-1')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "The following cell sets the parameters for, and executes training of the XGBoost model. You should not have to change any setting here (unless you want to change the name of the job which is clearly marked in a comment below). Training the model should take around 5 minutes. The code periodically pings the status of the job and prints the output."
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "%%time\n",
104 |     "import boto3\n",
105 |     "from time import gmtime, strftime\n",
106 |     "\n",
107 |     "job_name = 'deploy-tutorial-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())  # <-- feel free to change the name of your job if you wish\n",
108 |     "print(\"Training job\", job_name)\n",
109 |     "\n",
110 |     "create_training_params = \\\n",
111 |     "{\n",
112 |     "    \"AlgorithmSpecification\": {\n",
113 |     "        \"TrainingImage\": container,\n",
114 |     "        \"TrainingInputMode\": \"File\"\n",
115 |     "    },\n",
116 |     "    \"RoleArn\": role,\n",
117 |     "    \"OutputDataConfig\": {\n",
118 |     "        \"S3OutputPath\": bucket_path + \"/xgboost-model\"\n",
119 |     "    },\n",
120 |     "    \"ResourceConfig\": {\n",
121 |     "        \"InstanceCount\": 1,\n",
122 |     "        \"InstanceType\": \"ml.m5.2xlarge\",\n",
123 |     "        \"VolumeSizeInGB\": 5\n",
124 |     "    },\n",
125 |     "    \"TrainingJobName\": job_name,\n",
126 |     "    \"HyperParameters\": {\n",
127 |     "        \"max_depth\":\"5\",\n",
128 |     "        \"eta\":\"0.2\",\n",
129 |     "        \"gamma\":\"4\",\n",
130 |     "        \"min_child_weight\":\"6\",\n",
131 |     "        \"subsample\":\"0.7\",\n",
132 |     "        \"silent\":\"0\",\n",
133 |     "        \"objective\":\"reg:linear\",\n",
134 |     "        \"num_round\":\"50\"\n",
135 |     "    },\n",
136 |     "    \"StoppingCondition\": {\n",
137 |     "        \"MaxRuntimeInSeconds\": 3600\n",
138 |     "    },\n",
139 |     "    \"InputDataConfig\": [\n",
140 |     "        {\n",
141 |     "            \"ChannelName\": \"train\",\n",
142 |     "            \"DataSource\": {\n",
143 |     "                \"S3DataSource\": {\n",
144 |     "                    \"S3DataType\": \"S3Prefix\",\n",
145 |     "                    \"S3Uri\": bucket_path + '/abalone_train',\n",
146 |     "                    \"S3DataDistributionType\": \"FullyReplicated\"\n",
147 |     "                }\n",
148 |     "            },\n",
149 |     "            \"ContentType\": \"text/csv\",\n",
150 |     "            \"CompressionType\": \"None\"\n",
151 |     "        },\n",
152 |     "        {\n",
153 |     "            \"ChannelName\": \"validation\",\n",
154 |     "            \"DataSource\": {\n",
155 |     "                \"S3DataSource\": {\n",
156 |     "                    \"S3DataType\": \"S3Prefix\",\n",
157 |     "                    \"S3Uri\": bucket_path + '/abalone_validation',\n",
158 |     "                    \"S3DataDistributionType\": \"FullyReplicated\"\n",
159 |     "                }\n",
160 |     "            },\n",
161 |     "            \"ContentType\": \"text/csv\",\n",
162 |     "            \"CompressionType\": \"None\"\n",
163 |     "        }\n",
164 |     "    ]\n",
165 |     "}\n",
166 |     "\n",
167 |     "\n",
168 |     "client = boto3.client('sagemaker', region_name=region)\n",
169 |     "client.create_training_job(**create_training_params)\n",
170 |     "\n",
171 |     "import time\n",
172 |     "\n",
173 |     "status = client.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']\n",
174 |     "print(status)\n",
175 |     "while status !='Completed' and status!='Failed':\n",
176 |     "    time.sleep(60)\n",
177 |     "    status = client.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']\n",
178 |     "    print(status)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "If you see the message \"Completed\" that means training sucessfully completed and the output model was stored in the output path specified by `training_params['OutputDataConfig']` above."
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "---\n",
193 |     "## 4. Create the model <a id=4></a>\n",
194 |     "In order to set up hosting, we have to import the model from training to hosting. The cell below creates a SageMaker Model from the training output above."
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "%%time\n",
204 |     "import boto3\n",
205 |     "from time import gmtime, strftime\n",
206 |     "\n",
207 |     "model_name=job_name + '-model'\n",
208 |     "print(model_name)\n",
209 |     "\n",
210 |     "info = client.describe_training_job(TrainingJobName=job_name)\n",
211 |     "model_data = info['ModelArtifacts']['S3ModelArtifacts']\n",
212 |     "print(model_data)\n",
213 |     "\n",
214 |     "primary_container = {\n",
215 |     "    'Image': container,\n",
216 |     "    'ModelDataUrl': model_data\n",
217 |     "}\n",
218 |     "\n",
219 |     "create_model_response = client.create_model(\n",
220 |     "    ModelName = model_name,\n",
221 |     "    ExecutionRoleArn = role,\n",
222 |     "    PrimaryContainer = primary_container)\n",
223 |     "\n",
224 |     "print(create_model_response['ModelArn'])"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "---\n",
232 |     "## 5. Create endpoint <a id=5></a>\n",
233 |     "\n",
234 |     "Now that we've created a model we need to create a HTTPS endpoint where your machine learning model is available to provide inferences.\n",
235 |     "\n",
236 |     "### Create endpoint configuration\n",
237 |     "SageMaker supports configuring REST endpoints in hosting with multiple models, e.g. for A/B testing purposes. In order to support this, we need to create an endpoint configuration which describes the distribution of traffic across the models, whether split, shadowed, or sampled in some way. In addition, and more relevant for the current tutorial, the endpoint configuration describes the instance type required for model deployment. "
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "from time import gmtime, strftime\n",
247 |     "\n",
248 |     "endpoint_config_name = 'deploy-tutorial-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
249 |     "print(endpoint_config_name)\n",
250 |     "create_endpoint_config_response = client.create_endpoint_config(\n",
251 |     "    EndpointConfigName = endpoint_config_name,\n",
252 |     "    ProductionVariants=[{\n",
253 |     "        'InstanceType':'ml.m5.xlarge',\n",
254 |     "        'InitialVariantWeight':1,\n",
255 |     "        'InitialInstanceCount':1,\n",
256 |     "        'ModelName':model_name,\n",
257 |     "        'VariantName':'AllTraffic'}])\n",
258 |     "\n",
259 |     "print(\"Endpoint Config Arn: \" + create_endpoint_config_response['EndpointConfigArn'])"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "### Create endpoint\n",
267 |     "Finally we will create the endpoint that serves up the model, using the name and configuration defined above. The end result is an endpoint that can be validated and incorporated into production applications. It will take about 10 minutes to run the cell below and set up the endpoint."
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {
274 |     "scrolled": true
275 |    },
276 |    "outputs": [],
277 |    "source": [
278 |     "%%time\n",
279 |     "import time\n",
280 |     "\n",
281 |     "endpoint_name = 'deploy-tutorial-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
282 |     "print(endpoint_name)\n",
283 |     "create_endpoint_response = client.create_endpoint(\n",
284 |     "    EndpointName=endpoint_name,\n",
285 |     "    EndpointConfigName=endpoint_config_name)\n",
286 |     "print(create_endpoint_response['EndpointArn'])\n",
287 |     "\n",
288 |     "resp = client.describe_endpoint(EndpointName=endpoint_name)\n",
289 |     "status = resp['EndpointStatus']\n",
290 |     "while status=='Creating':\n",
291 |     "    print(\"Status: \" + status)\n",
292 |     "    time.sleep(60)\n",
293 |     "    resp = client.describe_endpoint(EndpointName=endpoint_name)\n",
294 |     "    status = resp['EndpointStatus']\n",
295 |     "\n",
296 |     "print(\"Arn: \" + resp['EndpointArn'])\n",
297 |     "print(\"Status: \" + status)"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "---\n",
305 |     "## 6. Validate the model for use <a id=6></a>\n",
306 |     "Now that we've created the endpoint we can test that our model is available to perform inference. Let's try it out by making a single prediction which we call the \"payload\" in the cell below."
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {},
313 |    "outputs": [],
314 |    "source": [
315 |     "import math\n",
316 |     "\n",
317 |     "features = 'length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,sex_I,sex_M'\n",
318 |     "payload = '0.41,0.325,0.1,0.3555,0.146,0.072,0.105,0,1'\n",
319 |     "\n",
320 |     "runtime_client = boto3.client('runtime.sagemaker', region_name=region)\n",
321 |     "response = runtime_client.invoke_endpoint(EndpointName=endpoint_name, \n",
322 |     "                                   ContentType='text/csv', \n",
323 |     "                                   Body=payload)\n",
324 |     "result = response['Body'].read()\n",
325 |     "result = result.decode(\"utf-8\")\n",
326 |     "result = result.split(',')\n",
327 |     "result = [math.ceil(float(i)) for i in result]\n",
328 |     "print(features)\n",
329 |     "print(payload)\n",
330 |     "print (f'Prediction: {result[0]:.0f}')"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "## 7. Delete Endpoint <a id=7></a>\n",
338 |     "Once you are done using the endpoint, you can use the following to delete it. "
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": [
347 |     "client.delete_endpoint(EndpointName=endpoint_name)"
348 |    ]
349 |   }
350 |  ],
351 |  "metadata": {
352 |   "anaconda-cloud": {},
353 |   "celltoolbar": "Raw Cell Format",
354 |   "kernelspec": {
355 |    "display_name": "Python 3",
356 |    "language": "python",
357 |    "name": "python3"
358 |   },
359 |   "language_info": {
360 |    "codemirror_mode": {
361 |     "name": "ipython",
362 |     "version": 3
363 |    },
364 |    "file_extension": ".py",
365 |    "mimetype": "text/x-python",
366 |    "name": "python",
367 |    "nbconvert_exporter": "python",
368 |    "pygments_lexer": "ipython3",
369 |    "version": "3.7.4"
370 |   }
371 |  },
372 |  "nbformat": 4,
373 |  "nbformat_minor": 2
374 | }
375 | 


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_0.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_1.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_10.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_11.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_12.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_13.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_14.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_15.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_16.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_2.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_3.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_4.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_5.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_6.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_7.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_8.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/flask_images/fl_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/flask_images/fl_9.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/ml-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/ml-deploy.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_0.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_1.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_10.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_11.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_12.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_13.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_14.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_15.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_16.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_17.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_18.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_19.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_2.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_20.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_21.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_22.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_23.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_3.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_4.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_5.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_6.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_7.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_8.png


--------------------------------------------------------------------------------
/ml-deploy-model/docs/img/sagemaker_images/sm_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-deploy-model/docs/img/sagemaker_images/sm_9.png


--------------------------------------------------------------------------------
/ml-image-generation/GANs/saved_models/GAN_cgan_generator.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/GANs/saved_models/GAN_cgan_generator.h5


--------------------------------------------------------------------------------
/ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0001.png


--------------------------------------------------------------------------------
/ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/GANs/saved_models/convolutional_GAN_images/image_at_epoch_0002.png


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_2L/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "cvae_2L"
2 | all_model_checkpoint_paths: "cvae_2L"
3 | 


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00000-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.data-00001-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_2L/cvae_2L.index


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_8L/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "cvae_8L"
2 | all_model_checkpoint_paths: "cvae_8L"
3 | 


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00000-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.data-00001-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/cvae_8L/cvae_8L.index


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_2L/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "vae_2L"
2 | all_model_checkpoint_paths: "vae_2L"
3 | 


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00000-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.data-00001-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_2L/vae_2L.index


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_8L/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "vae_8L"
2 | all_model_checkpoint_paths: "vae_8L"
3 | 


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00000-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.data-00001-of-00002


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-image-generation/autoencoders/checkpoints/vae_8L/vae_8L.index


--------------------------------------------------------------------------------
/ml-image-generation/autoencoders/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.9.0
  2 | appnope==0.1.0
  3 | astor==0.8.1
  4 | attrs==19.3.0
  5 | backcall==0.1.0
  6 | bleach==3.3.0
  7 | cachetools==4.0.0
  8 | certifi==2019.11.28
  9 | chardet==3.0.4
 10 | cycler==0.10.0
 11 | decorator==4.4.2
 12 | defusedxml==0.6.0
 13 | entrypoints==0.3
 14 | gast==0.2.2
 15 | google-auth==1.13.1
 16 | google-auth-oauthlib==0.4.1
 17 | google-pasta==0.2.0
 18 | grpcio==1.27.2
 19 | h5py==2.10.0
 20 | idna==2.9
 21 | importlib-metadata==1.6.0
 22 | ipykernel==5.2.0
 23 | ipython==7.13.0
 24 | ipython-genutils==0.2.0
 25 | ipywidgets==7.5.1
 26 | jedi==0.16.0
 27 | Jinja2==2.11.3
 28 | joblib==0.14.1
 29 | json5==0.9.4
 30 | jsonschema==3.2.0
 31 | jupyter==1.0.0
 32 | jupyter-client==6.1.2
 33 | jupyter-console==6.1.0
 34 | jupyter-contrib-core==0.3.3
 35 | jupyter-contrib-nbextensions==0.5.1
 36 | jupyter-core==4.6.3
 37 | jupyter-highlight-selected-word==0.2.0
 38 | jupyter-latex-envs==1.4.6
 39 | jupyter-nbextensions-configurator==0.4.1
 40 | jupyterlab==2.1.1
 41 | jupyterlab-server==1.1.1
 42 | Keras-Applications==1.0.8
 43 | Keras-Preprocessing==1.1.0
 44 | kiwisolver==1.2.0
 45 | lxml==4.6.3
 46 | Markdown==3.2.1
 47 | MarkupSafe==1.1.1
 48 | matplotlib==3.1.1
 49 | mistune==0.8.4
 50 | nbconvert==5.6.1
 51 | nbformat==5.0.5
 52 | notebook==6.1.5
 53 | numpy==1.18.2
 54 | oauthlib==3.1.0
 55 | opt-einsum==3.2.0
 56 | pandas==0.25.2
 57 | pandocfilters==1.4.2
 58 | parso==0.6.2
 59 | pexpect==4.8.0
 60 | pickleshare==0.7.5
 61 | Pillow==8.1.1
 62 | prometheus-client==0.7.1
 63 | prompt-toolkit==3.0.5
 64 | protobuf==3.11.3
 65 | ptyprocess==0.6.0
 66 | pyasn1==0.4.8
 67 | pyasn1-modules==0.2.8
 68 | Pygments==2.6.1
 69 | pyparsing==2.4.6
 70 | pyrsistent==0.16.0
 71 | python-dateutil==2.8.1
 72 | pytz==2019.3
 73 | PyYAML==5.3.1
 74 | pyzmq==19.0.0
 75 | qtconsole==4.7.2
 76 | QtPy==1.9.0
 77 | requests==2.23.0
 78 | requests-oauthlib==1.3.0
 79 | rsa==4.0
 80 | scikit-learn==0.21.3
 81 | scipy==1.4.1
 82 | seaborn==0.9.0
 83 | Send2Trash==1.5.0
 84 | six==1.14.0
 85 | tensorboard==2.0.2
 86 | tensorflow==2.4.0
 87 | tensorflow-estimator==2.0.1
 88 | termcolor==1.1.0
 89 | terminado==0.8.3
 90 | testpath==0.4.4
 91 | tornado==6.0.4
 92 | traitlets==4.3.3
 93 | urllib3==1.25.8
 94 | wcwidth==0.1.9
 95 | webencodings==0.5.1
 96 | Werkzeug==1.0.1
 97 | widgetsnbextension==3.5.1
 98 | wrapt==1.12.1
 99 | zipp==3.1.0
100 | 


--------------------------------------------------------------------------------
/ml-timeseries/README.md:
--------------------------------------------------------------------------------
1 | # Supervised learning for time series
2 | 
3 | In this repository are some notes and examples on using supervised machine learning for modelling time series data.


--------------------------------------------------------------------------------
/ml-timeseries/data/README.md:
--------------------------------------------------------------------------------
1 | # Data availability
2 | 
3 | The data "sales_data.csv" is a dataset of "Retail Sales of Clothing and Clothing Accessory Stores" made available by the Federal Reserve Bank of St Louis and can be accessed [here](https://fred.stlouisfed.org/series/MRTSSM448USN). [1]
4 | 
5 | #### Citation
6 | 
7 | [1] U.S. Census Bureau, Retail Sales: Clothing and Clothing Accessory Stores [MRTSSM448USN], retrieved from FRED, Federal Reserve Bank of St. Louis; https://fred.stlouisfed.org/series/MRTSSM448USN.
8 | 


--------------------------------------------------------------------------------
/ml-timeseries/data/sales_data.csv:
--------------------------------------------------------------------------------
  1 | date,sales
  2 | 1992-01-01,6938
  3 | 1992-02-01,7524
  4 | 1992-03-01,8475
  5 | 1992-04-01,9401
  6 | 1992-05-01,9558
  7 | 1992-06-01,9182
  8 | 1992-07-01,9103
  9 | 1992-08-01,10513
 10 | 1992-09-01,9573
 11 | 1992-10-01,10254
 12 | 1992-11-01,11187
 13 | 1992-12-01,18395
 14 | 1993-01-01,7502
 15 | 1993-02-01,7524
 16 | 1993-03-01,8766
 17 | 1993-04-01,9867
 18 | 1993-05-01,10063
 19 | 1993-06-01,9635
 20 | 1993-07-01,9794
 21 | 1993-08-01,10628
 22 | 1993-09-01,10013
 23 | 1993-10-01,10346
 24 | 1993-11-01,11760
 25 | 1993-12-01,18851
 26 | 1994-01-01,7280
 27 | 1994-02-01,7902
 28 | 1994-03-01,9921
 29 | 1994-04-01,9869
 30 | 1994-05-01,10009
 31 | 1994-06-01,9893
 32 | 1994-07-01,9735
 33 | 1994-08-01,11157
 34 | 1994-09-01,10217
 35 | 1994-10-01,10730
 36 | 1994-11-01,12354
 37 | 1994-12-01,20016
 38 | 1995-01-01,7518
 39 | 1995-02-01,7961
 40 | 1995-03-01,9815
 41 | 1995-04-01,10168
 42 | 1995-05-01,10620
 43 | 1995-06-01,10301
 44 | 1995-07-01,9784
 45 | 1995-08-01,11264
 46 | 1995-09-01,10710
 47 | 1995-10-01,10439
 48 | 1995-11-01,12751
 49 | 1995-12-01,20002
 50 | 1996-01-01,7684
 51 | 1996-02-01,8991
 52 | 1996-03-01,10349
 53 | 1996-04-01,10570
 54 | 1996-05-01,11405
 55 | 1996-06-01,10554
 56 | 1996-07-01,10202
 57 | 1996-08-01,12134
 58 | 1996-09-01,10623
 59 | 1996-10-01,11250
 60 | 1996-11-01,12875
 61 | 1996-12-01,19944
 62 | 1997-01-01,8194
 63 | 1997-02-01,8835
 64 | 1997-03-01,10840
 65 | 1997-04-01,10131
 66 | 1997-05-01,11505
 67 | 1997-06-01,10654
 68 | 1997-07-01,10734
 69 | 1997-08-01,12461
 70 | 1997-09-01,10942
 71 | 1997-10-01,11635
 72 | 1997-11-01,13244
 73 | 1997-12-01,21118
 74 | 1998-01-01,8800
 75 | 1998-02-01,9499
 76 | 1998-03-01,10863
 77 | 1998-04-01,11825
 78 | 1998-05-01,12239
 79 | 1998-06-01,11451
 80 | 1998-07-01,11633
 81 | 1998-08-01,12971
 82 | 1998-09-01,11214
 83 | 1998-10-01,12384
 84 | 1998-11-01,13854
 85 | 1998-12-01,22418
 86 | 1999-01-01,9237
 87 | 1999-02-01,10171
 88 | 1999-03-01,12081
 89 | 1999-04-01,12386
 90 | 1999-05-01,13167
 91 | 1999-06-01,12280
 92 | 1999-07-01,12461
 93 | 1999-08-01,13734
 94 | 1999-09-01,12357
 95 | 1999-10-01,12948
 96 | 1999-11-01,14643
 97 | 1999-12-01,24286
 98 | 2000-01-01,9447
 99 | 2000-02-01,11170
100 | 2000-03-01,12841
101 | 2000-04-01,13124
102 | 2000-05-01,13735
103 | 2000-06-01,12953
104 | 2000-07-01,12500
105 | 2000-08-01,14610
106 | 2000-09-01,13375
107 | 2000-10-01,13369
108 | 2000-11-01,15675
109 | 2000-12-01,24875
110 | 2001-01-01,10060
111 | 2001-02-01,11450
112 | 2001-03-01,13067
113 | 2001-04-01,13362
114 | 2001-05-01,13787
115 | 2001-06-01,12935
116 | 2001-07-01,12600
117 | 2001-08-01,14818
118 | 2001-09-01,12104
119 | 2001-10-01,13218
120 | 2001-11-01,15352
121 | 2001-12-01,24534
122 | 2002-01-01,10344
123 | 2002-02-01,11730
124 | 2002-03-01,13977
125 | 2002-04-01,13195
126 | 2002-05-01,14150
127 | 2002-06-01,13210
128 | 2002-07-01,12873
129 | 2002-08-01,15113
130 | 2002-09-01,12445
131 | 2002-10-01,14006
132 | 2002-11-01,15911
133 | 2002-12-01,25350
134 | 2003-01-01,10804
135 | 2003-02-01,11662
136 | 2003-03-01,13452
137 | 2003-04-01,13691
138 | 2003-05-01,14730
139 | 2003-06-01,13496
140 | 2003-07-01,13854
141 | 2003-08-01,15522
142 | 2003-09-01,13567
143 | 2003-10-01,14601
144 | 2003-11-01,16555
145 | 2003-12-01,26760
146 | 2004-01-01,11790
147 | 2004-02-01,13344
148 | 2004-03-01,14760
149 | 2004-04-01,15058
150 | 2004-05-01,15379
151 | 2004-06-01,14237
152 | 2004-07-01,14667
153 | 2004-08-01,15588
154 | 2004-09-01,14224
155 | 2004-10-01,15570
156 | 2004-11-01,17230
157 | 2004-12-01,28406
158 | 2005-01-01,12046
159 | 2005-02-01,13878
160 | 2005-03-01,15727
161 | 2005-04-01,15708
162 | 2005-05-01,15989
163 | 2005-06-01,15559
164 | 2005-07-01,15218
165 | 2005-08-01,16697
166 | 2005-09-01,14960
167 | 2005-10-01,16509
168 | 2005-11-01,18402
169 | 2005-12-01,30276
170 | 2006-01-01,12893
171 | 2006-02-01,14474
172 | 2006-03-01,16386
173 | 2006-04-01,16848
174 | 2006-05-01,17103
175 | 2006-06-01,16505
176 | 2006-07-01,16275
177 | 2006-08-01,17832
178 | 2006-09-01,16767
179 | 2006-10-01,17253
180 | 2006-11-01,19391
181 | 2006-12-01,31462
182 | 2007-01-01,13927
183 | 2007-02-01,15077
184 | 2007-03-01,18045
185 | 2007-04-01,17096
186 | 2007-05-01,18474
187 | 2007-06-01,17289
188 | 2007-07-01,16883
189 | 2007-08-01,18850
190 | 2007-09-01,16765
191 | 2007-10-01,17614
192 | 2007-11-01,20550
193 | 2007-12-01,30635
194 | 2008-01-01,14173
195 | 2008-02-01,15876
196 | 2008-03-01,17770
197 | 2008-04-01,17103
198 | 2008-05-01,19084
199 | 2008-06-01,17007
200 | 2008-07-01,17369
201 | 2008-08-01,19041
202 | 2008-09-01,15882
203 | 2008-10-01,16796
204 | 2008-11-01,18756
205 | 2008-12-01,26726
206 | 2009-01-01,13387
207 | 2009-02-01,14684
208 | 2009-03-01,15563
209 | 2009-04-01,16337
210 | 2009-05-01,17264
211 | 2009-06-01,15434
212 | 2009-07-01,16007
213 | 2009-08-01,17656
214 | 2009-09-01,15630
215 | 2009-10-01,17053
216 | 2009-11-01,18332
217 | 2009-12-01,27128
218 | 2010-01-01,13216
219 | 2010-02-01,14816
220 | 2010-03-01,17390
221 | 2010-04-01,17042
222 | 2010-05-01,17727
223 | 2010-06-01,16138
224 | 2010-07-01,16842
225 | 2010-08-01,17923
226 | 2010-09-01,16232
227 | 2010-10-01,17412
228 | 2010-11-01,20003
229 | 2010-12-01,28545
230 | 2011-01-01,13703
231 | 2011-02-01,15931
232 | 2011-03-01,18252
233 | 2011-04-01,18647
234 | 2011-05-01,18771
235 | 2011-06-01,17783
236 | 2011-07-01,17937
237 | 2011-08-01,19263
238 | 2011-09-01,17999
239 | 2011-10-01,18255
240 | 2011-11-01,20957
241 | 2011-12-01,31108
242 | 2012-01-01,14358
243 | 2012-02-01,17931
244 | 2012-03-01,20162
245 | 2012-04-01,18601
246 | 2012-05-01,19952
247 | 2012-06-01,18717
248 | 2012-07-01,18266
249 | 2012-08-01,20750
250 | 2012-09-01,18391
251 | 2012-10-01,18845
252 | 2012-11-01,22016
253 | 2012-12-01,31504
254 | 2013-01-01,15155
255 | 2013-02-01,17382
256 | 2013-03-01,20575
257 | 2013-04-01,19176
258 | 2013-05-01,20780
259 | 2013-06-01,18939
260 | 2013-07-01,19176
261 | 2013-08-01,21558
262 | 2013-09-01,18111
263 | 2013-10-01,19855
264 | 2013-11-01,22437
265 | 2013-12-01,31578
266 | 2014-01-01,15179
267 | 2014-02-01,17507
268 | 2014-03-01,20070
269 | 2014-04-01,20322
270 | 2014-05-01,21568
271 | 2014-06-01,18947
272 | 2014-07-01,19828
273 | 2014-08-01,21993
274 | 2014-09-01,18646
275 | 2014-10-01,20220
276 | 2014-11-01,23491
277 | 2014-12-01,32638
278 | 2015-01-01,15764
279 | 2015-02-01,17980
280 | 2015-03-01,20752
281 | 2015-04-01,20389
282 | 2015-05-01,22145
283 | 2015-06-01,19667
284 | 2015-07-01,20564
285 | 2015-08-01,22314
286 | 2015-09-01,19151
287 | 2015-10-01,20637
288 | 2015-11-01,23090
289 | 2015-12-01,33345
290 | 2016-01-01,15694
291 | 2016-02-01,18939
292 | 2016-03-01,21492
293 | 2016-04-01,20428
294 | 2016-05-01,21656
295 | 2016-06-01,20160
296 | 2016-07-01,20667
297 | 2016-08-01,22388
298 | 2016-09-01,19790
299 | 2016-10-01,20500
300 | 2016-11-01,23644
301 | 2016-12-01,34482
302 | 2017-01-01,15663
303 | 2017-02-01,17745
304 | 2017-03-01,21028
305 | 2017-04-01,20852
306 | 2017-05-01,21606
307 | 2017-06-01,20322
308 | 2017-07-01,20535
309 | 2017-08-01,22536
310 | 2017-09-01,19720
311 | 2017-10-01,20307
312 | 2017-11-01,24438
313 | 2017-12-01,33720
314 | 2018-01-01,15881
315 | 2018-02-01,18585
316 | 2018-03-01,22404
317 | 2018-04-01,20616
318 | 2018-05-01,23764
319 | 2018-06-01,21589
320 | 2018-07-01,21919
321 | 2018-08-01,23381
322 | 2018-09-01,20260
323 | 2018-10-01,21473
324 | 2018-11-01,25831
325 | 2018-12-01,34706
326 | 2019-01-01,16410
327 | 2019-02-01,18134
328 | 2019-03-01,22093
329 | 2019-04-01,21597
330 | 2019-05-01,23200
331 | 2019-06-01,21123
332 | 2019-07-01,21714
333 | 2019-08-01,23791
334 | 2019-09-01,19695
335 | 2019-10-01,21113


--------------------------------------------------------------------------------
/ml-timeseries/docs/img/ts_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-timeseries/docs/img/ts_1.png


--------------------------------------------------------------------------------
/ml-timeseries/docs/img/ts_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/machine-learning-tutorials/d9888c72a6480a2baeb70ec5e49dd0bbb7bbd4cc/ml-timeseries/docs/img/ts_2.png


--------------------------------------------------------------------------------