├── .gitignore
├── .ipynb_checkpoints
    ├── 0. Beginning-checkpoint.ipynb
    ├── 1. Hello, World!-checkpoint.ipynb
    ├── 2.0 First Impressions of Machine Learning-checkpoint.ipynb
    ├── 2.1 Supervised Learning - Classification-checkpoint.ipynb
    ├── 2.2 Supervised Learning - Regression-checkpoint.ipynb
    ├── 2.3 Unsupervised Learning - Transformations and Dimensionality Reduction-checkpoint.ipynb
    ├── 2.4 Unsupervised Learning - Clustering-checkpoint.ipynb
    └── 3. Validations and Learning Curves-checkpoint.ipynb
├── 0. Beginning.ipynb
├── 1. Hello, World!.ipynb
├── 2.0 First Impressions of Machine Learning.ipynb
├── 2.1 Supervised Learning - Classification.ipynb
├── 2.2 Supervised Learning - Regression.ipynb
├── 2.3 Unsupervised Learning - Transformations and Dimensionality Reduction.ipynb
├── 2.4 Unsupervised Learning - Clustering.ipynb
├── 2.5 Review of Scikit-learn API.ipynb
├── 3. Validations and Learning Curves.ipynb
├── 4.1 Example - Supervised Spam Classification.ipynb
├── 4.2 Example - Face Recognition.ipynb
├── 5. Where do we go from here.ipynb
├── README.md
├── cheatsheet.txt
├── data
    ├── SMSSpamCollection
    └── readme
├── fetch_data.py
├── figures
    ├── BangPypers.pdf
    ├── Pic_BP_PDF-01.png
    ├── Pic_BP_PDF-02.png
    ├── Pic_BP_PDF-03.png
    ├── Pic_BP_PDF-04.png
    ├── Pic_BP_PDF-05.png
    ├── Pic_BP_PDF-06.png
    ├── Pic_BP_PDF-07.png
    ├── Pic_BP_PDF-08.png
    ├── Pic_BP_PDF-09.png
    ├── Pic_BP_PDF-10.png
    ├── Pic_BP_PDF-11.png
    ├── Pic_BP_PDF-12.png
    ├── Pic_BP_PDF-13.png
    ├── Pic_BP_PDF-14.png
    ├── Pic_BP_PDF-15.png
    ├── Pic_BP_PDF-16.png
    ├── Pic_BP_PDF-17.png
    ├── Pic_BP_PDF-18.png
    ├── Pic_BP_PDF-19.png
    ├── Pic_BP_PDF-20.png
    ├── Pic_BP_PDF-21.png
    ├── Pic_BP_PDF-22.png
    ├── Pic_BP_PDF-23.png
    ├── Pic_BP_PDF-24.png
    ├── __init__.py
    ├── __init__.pyc
    ├── bowjpg
    ├── cluster_comparison.png
    ├── iris_setosa.jpg
    ├── iris_versicolor.jpg
    ├── iris_virginica.jpg
    ├── magician.jpg
    ├── ml_map.png
    ├── netflix-prize.png
    ├── petal_sepal.jpg
    ├── plot.py
    ├── plot.pyc
    ├── plot_2d_separator.py
    ├── plot_2d_separator.pyc
    ├── plot_digits_datasets.py
    ├── plot_digits_datasets.pyc
    ├── supervised_workflow.svg
    ├── train_test_split.svg
    ├── train_validation_test2.svg
    └── unsupervised_workflow.svg
└── scripts
    ├── classify_iris.py
    ├── cluster_digits.py
    ├── knn_iris.py
    ├── knn_regression.py
    └── plot_digits.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | /data/lfw-home
3 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/0. Beginning-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 0
6 | }
7 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/2.3 Unsupervised Learning - Transformations and Dimensionality Reduction-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%matplotlib inline\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "# Unsupervised Learning\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "Many instances of unsupervised learning, such as dimensionality reduction, manifold learning and feature extraction, find a new representation of the input data without any additional input.\n",
 28 |     "\n",
 29 |     "<img src=\"figures/unsupervised_workflow.svg\" width=\"100%\">\n",
 30 |     "\n",
 31 |     "The most simple example of this, which can barely be called learning, is rescaling the data to have zero mean and unit variance. This is a helpful preprocessing step for many machine learning models.\n",
 32 |     "\n",
 33 |     "Applying such a preprocessing has a very similar interface to the supervised learning algorithms we saw so far.\n",
 34 |     "Let's load the iris dataset and rescale it:"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "collapsed": false
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "from sklearn.datasets import load_iris\n",
 46 |     "\n",
 47 |     "iris = load_iris()\n",
 48 |     "X, y = iris.data, iris.target\n",
 49 |     "print(X.shape)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "The iris dataset is not \"centered\" that is it has non-zero mean and the standard deviation is different for each component:\n"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "print(\"mean : %s \" % X.mean(axis=0))\n",
 68 |     "print(\"standard deviation : %s \" % X.std(axis=0))"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "To use a preprocessing method, we first import the estimator, here StandardScaler and instantiate it:\n",
 76 |     "    "
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": true
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "from sklearn.preprocessing import StandardScaler\n",
 88 |     "scaler = StandardScaler()"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "As with the classification and regression algorithms, we call ``fit`` to learn the model from the data. As this is an unsupervised model, we only pass ``X``, not ``y``. This simply estimates mean and standard deviation."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "scaler.fit(X)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "Now we can rescale our data by applying the ``transform`` (not ``predict``) method:"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": true
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "X_scaled = scaler.transform(X)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "``X_scaled`` has the same number of samples and features, but the mean was subtracted and all features were scaled to have unit standard deviation:"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": false
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "print(X_scaled.shape)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": false
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "print(\"mean : %s \" % X_scaled.mean(axis=0))\n",
154 |     "print(\"standard deviation : %s \" % X_scaled.std(axis=0))"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "Principal Component Analysis\n",
162 |     "============================"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "An unsupervised transformation that is somewhat more interesting is Principle Component Analysis (PCA).\n",
170 |     "It is a technique to reduce the dimensionality of the data, by creating a linear projection.\n",
171 |     "That is, we find new features to represent the data that are a linear combination of the old data (i.e. we rotate it).\n",
172 |     "\n",
173 |     "The way PCA finds these new directions is by looking for the directions of maximum variance.\n",
174 |     "Usually only few components that explain most of the variance in the data are kept. To illustrate how a rotation might look like, we first show it on two dimensional data and keep both principal components.\n",
175 |     "\n",
176 |     "We create a Gaussian blob that is rotated:"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": false
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "rnd = np.random.RandomState(5)\n",
188 |     "X_ = rnd.normal(size=(300, 2))\n",
189 |     "X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)\n",
190 |     "y = X_[:, 0] > 0\n",
191 |     "plt.scatter(X_blob[:, 0], X_blob[:, 1], c=y, linewidths=0, s=30)\n",
192 |     "plt.xlabel(\"feature 1\")\n",
193 |     "plt.ylabel(\"feature 2\")"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "As always, we instantiate our PCA model. By default all directions are kept."
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": false
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "from sklearn.decomposition import PCA\n",
212 |     "pca = PCA()"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "Then we fit the PCA model with our data. As PCA is an unsupervised algorithm, there is no output ``y``."
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": false
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "pca.fit(X_blob)"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "Then we can transform the data, projected on the principal components:"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {
244 |     "collapsed": false
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "X_pca = pca.transform(X_blob)\n",
249 |     "\n",
250 |     "plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, linewidths=0, s=30)\n",
251 |     "plt.xlabel(\"first principal component\")\n",
252 |     "plt.ylabel(\"second principal component\")"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "On the left of the plot you can see the four points that were on the top right before. PCA found fit first component to be along the diagonal, and the second to be perpendicular to it. As PCA finds a rotation, the principal components are always at right angles to each other."
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "Dimensionality Reduction for Visualization with PCA\n",
267 |     "-------------------------------------------------------------\n",
268 |     "Consider the digits dataset. It cannot be visualized in a single 2D plot, as it has 64 features. We are going to extract 2 dimensions to visualize it in, using the example from the sklearn examples [here](http://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": false
276 |    },
277 |    "outputs": [],
278 |    "source": [
279 |     "from figures.plot_digits_datasets import digits_plot\n",
280 |     "\n",
281 |     "digits_plot()"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "Note that this projection was determined *without* any information about the\n",
289 |     "labels (represented by the colors): this is the sense in which the learning\n",
290 |     "is **unsupervised**.  Nevertheless, we see that the projection gives us insight\n",
291 |     "into the distribution of the different digits in parameter space."
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "markdown",
296 |    "metadata": {},
297 |    "source": [
298 |     "## Manifold Learning\n",
299 |     "\n",
300 |     "One weakness of PCA is that it cannot detect non-linear features.  A set\n",
301 |     "of algorithms known as *Manifold Learning* have been developed to address\n",
302 |     "this deficiency.  A canonical dataset used in Manifold learning is the\n",
303 |     "*S-curve*, which we briefly saw in an earlier section:"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": null,
309 |    "metadata": {
310 |     "collapsed": false
311 |    },
312 |    "outputs": [],
313 |    "source": [
314 |     "from sklearn.datasets import make_s_curve\n",
315 |     "X, y = make_s_curve(n_samples=1000)\n",
316 |     "\n",
317 |     "from mpl_toolkits.mplot3d import Axes3D\n",
318 |     "ax = plt.axes(projection='3d')\n",
319 |     "\n",
320 |     "ax.scatter3D(X[:, 0], X[:, 1], X[:, 2], c=y)\n",
321 |     "ax.view_init(10, -60)"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "This is a 2-dimensional dataset embedded in three dimensions, but it is embedded\n",
329 |     "in such a way that PCA cannot discover the underlying data orientation:"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {
336 |     "collapsed": false
337 |    },
338 |    "outputs": [],
339 |    "source": [
340 |     "X_pca = PCA(n_components=2).fit_transform(X)\n",
341 |     "plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "metadata": {},
347 |    "source": [
348 |     "Manifold learning algorithms, however, available in the ``sklearn.manifold``\n",
349 |     "submodule, are able to recover the underlying 2-dimensional manifold:"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": null,
355 |    "metadata": {
356 |     "collapsed": false
357 |    },
358 |    "outputs": [],
359 |    "source": [
360 |     "from sklearn.manifold import Isomap\n",
361 |     "\n",
362 |     "iso = Isomap(n_neighbors=15, n_components=2)\n",
363 |     "X_iso = iso.fit_transform(X)\n",
364 |     "plt.scatter(X_iso[:, 0], X_iso[:, 1], c=y)"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "##Exercise\n",
372 |     "Compare the results of Isomap and  PCA on a 5-class subset of the digits dataset (``load_digits(5)``).\n",
373 |     "\n",
374 |     "__Bonus__: Also compare to TSNE, another popular manifold learning technique."
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": null,
380 |    "metadata": {
381 |     "collapsed": true
382 |    },
383 |    "outputs": [],
384 |    "source": [
385 |     "from sklearn.datasets import load_digits\n",
386 |     "\n",
387 |     "digits = load_digits(5)\n",
388 |     "\n",
389 |     "X = digits.data\n",
390 |     "# ..."
391 |    ]
392 |   }
393 |  ],
394 |  "metadata": {
395 |   "kernelspec": {
396 |    "display_name": "Python 2",
397 |    "language": "python",
398 |    "name": "python2"
399 |   },
400 |   "language_info": {
401 |    "codemirror_mode": {
402 |     "name": "ipython",
403 |     "version": 2
404 |    },
405 |    "file_extension": ".py",
406 |    "mimetype": "text/x-python",
407 |    "name": "python",
408 |    "nbconvert_exporter": "python",
409 |    "pygments_lexer": "ipython2",
410 |    "version": "2.7.11"
411 |   }
412 |  },
413 |  "nbformat": 4,
414 |  "nbformat_minor": 0
415 | }
416 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/3. Validations and Learning Curves-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 0
6 | }
7 | 


--------------------------------------------------------------------------------
/0. Beginning.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "collapsed": true
 7 |    },
 8 |    "source": [
 9 |     "## Machine Learning in Python: An Introduction to Scikit-Learn\n"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "markdown",
14 |    "metadata": {},
15 |    "source": [
16 |     "### What this workshop is about?"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "markdown",
21 |    "metadata": {},
22 |    "source": [
23 |     "* Introduction to the basics of Machine Learning, and some tips and tricks\n",
24 |     "* Introduction to scikit-learn, utilizing it for your machine learning needs"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "markdown",
29 |    "metadata": {},
30 |    "source": [
31 |     "### Today's Workflow\n",
32 |     "\n",
33 |     "#### Setup and Introduction\n",
34 |     "* Getting your machines to a common working baseline.\n",
35 |     "\n",
36 |     "#### A Gentle Introduction to Machine Learning and Scikit-Learn\n",
37 |     "* What is Machine Learning?\n",
38 |     "* Core Terminologies \n",
39 |     "* Supervised Learning\n",
40 |     "* Unsupervised Learning\n",
41 |     "* Evaluation of Models\n",
42 |     "* How to choose the right model for your dataset\n",
43 |     "\n",
44 |     "#### Going deeper with Supervised Learning\n",
45 |     "* Classification\n",
46 |     "* Regression\n",
47 |     "\n",
48 |     "#### Going deeper with Unsupervised Learning\n",
49 |     "* Clustering\n",
50 |     "* Dimensionality Reduction\n",
51 |     "\n",
52 |     "#### Model Validation\n",
53 |     "* Validation and Cross Validation"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "metadata": {
60 |     "collapsed": true
61 |    },
62 |    "outputs": [],
63 |    "source": []
64 |   }
65 |  ],
66 |  "metadata": {
67 |   "anaconda-cloud": {},
68 |   "kernelspec": {
69 |    "display_name": "Python [default]",
70 |    "language": "python",
71 |    "name": "python2"
72 |   },
73 |   "language_info": {
74 |    "codemirror_mode": {
75 |     "name": "ipython",
76 |     "version": 2
77 |    },
78 |    "file_extension": ".py",
79 |    "mimetype": "text/x-python",
80 |    "name": "python",
81 |    "nbconvert_exporter": "python",
82 |    "pygments_lexer": "ipython2",
83 |    "version": "2.7.12"
84 |   }
85 |  },
86 |  "nbformat": 4,
87 |  "nbformat_minor": 0
88 | }
89 | 


--------------------------------------------------------------------------------
/2.1 Supervised Learning - Classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%matplotlib inline\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "import numpy as np\n",
 14 |     "import seaborn"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "To visualize the workings of machine learning algorithms, it is often helpful to study two-dimensional or one-dimensional data, that is data with only one or two features. While in practice, datasets usually have many more features, it is hard to plot high-dimensional data on two-dimensional screens.\n",
 22 |     "\n",
 23 |     "We will illustrate some very simple examples before we move on to more \"real world\" data sets."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "Classification\n",
 31 |     "========\n",
 32 |     "First, we will look at a two class classification problem in two dimensions. We use the synthetic data generated by the ``make_blobs`` function."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {
 39 |     "collapsed": false
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "from sklearn.datasets import make_blobs\n",
 44 |     "X, y = make_blobs(centers=2, random_state=0)\n",
 45 |     "print(X.shape)\n",
 46 |     "print(y.shape)\n",
 47 |     "print(X[:5, :])\n",
 48 |     "print(y[:5])"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "As the data is two-dimensional, we can plot each sample as a point in two-dimensional space, with the first feature being the x-axis and the second feature being the y-axis."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=40)\n",
 67 |     "plt.xlabel(\"first feature\")\n",
 68 |     "plt.ylabel(\"second feature\")"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "As classification is a supervised task, and we are interested in how well the model generalizes, we split our data into a training set,\n",
 76 |     "to built the model from, and a test-set, to evaluate how well our model performs on new data. The ``train_test_split`` function form the ``cross_validation`` module does that for us, by randomly splitting of 25% of the data for testing.\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": true
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "from sklearn.cross_validation import train_test_split\n",
 88 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "K Nearest Neighbors\n",
 96 |     "------------------------------------------------\n",
 97 |     "A popular and easy to understand classifier is K nearest neighbors (kNN).  It has one of the simplest learning strategies: given a new, unknown observation, look up in your reference database which ones have the closest features and assign the predominant class.\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "from sklearn.neighbors import KNeighborsClassifier"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "This time we set a parameter of the KNeighborsClassifier to tell it we only want to look at one nearest neighbor:"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {
122 |     "collapsed": false
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "knn = KNeighborsClassifier(n_neighbors=9)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "We fit the model with out training data"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {
140 |     "collapsed": false,
141 |     "scrolled": true
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "knn.fit(X_train, y_train)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "collapsed": false
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "from figures import plot_2d_separator"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "collapsed": false
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=40)\n",
168 |     "plt.xlabel(\"first feature\")\n",
169 |     "plt.ylabel(\"second feature\")\n",
170 |     "plot_2d_separator.plot_2d_separator(knn, X)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {
177 |     "collapsed": false
178 |    },
179 |    "outputs": [],
180 |    "source": [
181 |     "knn.score(X_test, y_test)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "## Using a different classifier"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "Now we'll take a few minutes and try out another learning model.  Because of ``scikit-learn``'s uniform interface, the syntax is identical to that of ``LinearSVC`` above.\n",
196 |     "\n",
197 |     "There are many possibilities of classifiers; you could try any of the methods discussed at <http://scikit-learn.org/stable/supervised_learning.html>.  Alternatively, you can explore what's available in ``scikit-learn`` using just the tab-completion feature.  For example, import the ``linear_model`` submodule:"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {
204 |     "collapsed": true
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "from sklearn import linear_model"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "And use the tab completion to find what's available.  Type ``linear_model.`` and then the tab key to see an interactive list of the functions within this submodule.  The ones which begin with capital letters are the models which are available."
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "Now select a new classifier and try out a classification of the iris data.\n",
223 |     "\n",
224 |     "Some good choices are\n",
225 |     "\n",
226 |     "- ``sklearn.svm.LinearSVC`` :\n",
227 |     "    Support Vector Machines without kernels based on liblinear\n",
228 |     "\n",
229 |     "- ``sklearn.svm.SVC`` :\n",
230 |     "    Support Vector Machines with kernels based on libsvm\n",
231 |     "\n",
232 |     "- ``sklearn.linear_model.LogisticRegression`` :\n",
233 |     "    Regularized Logistic Regression based on liblinear\n",
234 |     "\n",
235 |     "- ``sklearn.linear_model.SGDClassifier`` :\n",
236 |     "    Regularized linear models (SVM or logistic regression) using a Stochastic Gradient Descent algorithm written in Cython\n",
237 |     "\n",
238 |     "- ``sklearn.neighbors.NeighborsClassifier`` :\n",
239 |     "    k-Nearest Neighbors classifier based on the ball tree datastructure for low dimensional data and brute force search for high dimensional data\n",
240 |     "\n",
241 |     "- ``sklearn.naive_bayes.GaussianNB`` :\n",
242 |     "    Gaussian Naive Bayes model. This is an unsophisticated model which can be trained very quickly. It is often used to obtain baseline results before moving to a more sophisticated classifier.\n",
243 |     "\n",
244 |     "- ``sklearn.tree.DecisionTreeClassifier`` :\n",
245 |     "  A classifier based on a series of binary decisions.  This is another very fast classifier, which can be very powerful.\n",
246 |     "\n",
247 |     "Choose one of the above, import it, and use the ``?`` feature to learn about it."
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {
254 |     "collapsed": true
255 |    },
256 |    "outputs": [],
257 |    "source": []
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "Now instantiate this model as we did with ``LinearSVC`` above.  Call it ``clf``."
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {
270 |     "collapsed": true
271 |    },
272 |    "outputs": [],
273 |    "source": []
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "Now use our data ``X`` and ``y`` to train the model, using ``clf2.fit(X, y)``"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {
286 |     "collapsed": true
287 |    },
288 |    "outputs": [],
289 |    "source": []
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "Now call the ``predict`` method, and find the classification of ``X_new``."
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {
302 |     "collapsed": true
303 |    },
304 |    "outputs": [],
305 |    "source": []
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "Now use the code snippet in `Cell 16` to plot the corresponding graph"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {
318 |     "collapsed": true
319 |    },
320 |    "outputs": [],
321 |    "source": []
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "On the Iris Dataset\n",
328 |     "=========\n",
329 |     "**Exercise** Apply the KNeighborsClassifier to the ``iris`` dataset. Play with different values of the ``n_neighbors`` and observe how training and test score change.\n",
330 |     "\n",
331 |     "Note: If you finish early, you can try applying a different estimator: `sklearn.svm.SVC`"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {
338 |     "collapsed": false
339 |    },
340 |    "outputs": [],
341 |    "source": [
342 |     "%load scripts/knn_iris.py"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {
349 |     "collapsed": false
350 |    },
351 |    "outputs": [],
352 |    "source": [
353 |     "plot_iris_knn()"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "metadata": {},
359 |    "source": [
360 |     "## Support Vector Machines"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "markdown",
365 |    "metadata": {},
366 |    "source": [
367 |     "Another powerful and highly effective method can be used for both Classification and Regression.\n",
368 |     "\n",
369 |     "SVMs are a **discriminative** classifier: that is, they draw a boundary between clusters of data."
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {
376 |     "collapsed": false
377 |    },
378 |    "outputs": [],
379 |    "source": [
380 |     "from sklearn.datasets.samples_generator import make_blobs\n",
381 |     "X, y = make_blobs(n_samples=50, centers=2,\n",
382 |     "                  random_state=0, cluster_std=0.60)\n",
383 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring');"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "markdown",
388 |    "metadata": {},
389 |    "source": [
390 |     "There can be many seperators for the dataset above. How the find the best one?"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": null,
396 |    "metadata": {
397 |     "collapsed": false
398 |    },
399 |    "outputs": [],
400 |    "source": [
401 |     "xfit = np.linspace(-1, 3.5)\n",
402 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')\n",
403 |     "\n",
404 |     "for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:\n",
405 |     "    yfit = m * xfit + b\n",
406 |     "    plt.plot(xfit, yfit, '-k')\n",
407 |     "    plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none', color='#AAAAAA', alpha=0.4)\n",
408 |     "\n",
409 |     "plt.xlim(-1, 3.5);"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {},
415 |    "source": [
416 |     "### SVM's to the rescue :)"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": null,
422 |    "metadata": {
423 |     "collapsed": false
424 |    },
425 |    "outputs": [],
426 |    "source": [
427 |     "from sklearn.svm import SVC  # \"Support Vector Classifier\"\n",
428 |     "clf = SVC(kernel='linear')\n",
429 |     "clf.fit(X, y)"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": null,
435 |    "metadata": {
436 |     "collapsed": true
437 |    },
438 |    "outputs": [],
439 |    "source": [
440 |     "def plot_svc_decision_function(clf, ax=None):\n",
441 |     "    \"\"\"Plot the decision function for a 2D SVC\"\"\"\n",
442 |     "    if ax is None:\n",
443 |     "        ax = plt.gca()\n",
444 |     "    x = np.linspace(plt.xlim()[0], plt.xlim()[1], 30)\n",
445 |     "    y = np.linspace(plt.ylim()[0], plt.ylim()[1], 30)\n",
446 |     "    Y, X = np.meshgrid(y, x)\n",
447 |     "    P = np.zeros_like(X)\n",
448 |     "    for i, xi in enumerate(x):\n",
449 |     "        for j, yj in enumerate(y):\n",
450 |     "            P[i, j] = clf.decision_function([[xi, yj]])\n",
451 |     "    # plot the margins\n",
452 |     "    ax.contour(X, Y, P, colors='k',\n",
453 |     "               levels=[-1, 0, 1], alpha=0.5,\n",
454 |     "               linestyles=['--', '-', '--'])"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": null,
460 |    "metadata": {
461 |     "collapsed": false
462 |    },
463 |    "outputs": [],
464 |    "source": [
465 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')\n",
466 |     "plot_svc_decision_function(clf);"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": null,
472 |    "metadata": {
473 |     "collapsed": false
474 |    },
475 |    "outputs": [],
476 |    "source": [
477 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')\n",
478 |     "plot_svc_decision_function(clf)\n",
479 |     "plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],\n",
480 |     "            s=200, facecolors='none');"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": null,
486 |    "metadata": {
487 |     "collapsed": false
488 |    },
489 |    "outputs": [],
490 |    "source": [
491 |     "from IPython.html.widgets import interact\n",
492 |     "\n",
493 |     "def plot_svm(N=10):\n",
494 |     "    X, y = make_blobs(n_samples=200, centers=2,\n",
495 |     "                      random_state=0, cluster_std=0.60)\n",
496 |     "    X = X[:N]\n",
497 |     "    y = y[:N]\n",
498 |     "    clf = SVC(kernel='linear')\n",
499 |     "    clf.fit(X, y)\n",
500 |     "    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')\n",
501 |     "    plt.xlim(-1, 4)\n",
502 |     "    plt.ylim(-1, 6)\n",
503 |     "    plot_svc_decision_function(clf, plt.gca())\n",
504 |     "    plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],\n",
505 |     "                s=200, facecolors='none')\n",
506 |     "    \n",
507 |     "interact(plot_svm, N=[10, 200], kernel='linear');"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "#### What if the data is not linear?"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": null,
520 |    "metadata": {
521 |     "collapsed": false
522 |    },
523 |    "outputs": [],
524 |    "source": [
525 |     "from sklearn.datasets.samples_generator import make_circles\n",
526 |     "X, y = make_circles(100, factor=.1, noise=.1)\n",
527 |     "\n",
528 |     "clf = SVC(kernel='linear').fit(X, y)\n",
529 |     "\n",
530 |     "plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')\n",
531 |     "plot_svc_decision_function(clf);"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": null,
537 |    "metadata": {
538 |     "collapsed": true
539 |    },
540 |    "outputs": [],
541 |    "source": []
542 |   },
543 |   {
544 |    "cell_type": "code",
545 |    "execution_count": null,
546 |    "metadata": {
547 |     "collapsed": true
548 |    },
549 |    "outputs": [],
550 |    "source": []
551 |   }
552 |  ],
553 |  "metadata": {
554 |   "kernelspec": {
555 |    "display_name": "Python [default]",
556 |    "language": "python",
557 |    "name": "python2"
558 |   },
559 |   "language_info": {
560 |    "codemirror_mode": {
561 |     "name": "ipython",
562 |     "version": 2
563 |    },
564 |    "file_extension": ".py",
565 |    "mimetype": "text/x-python",
566 |    "name": "python",
567 |    "nbconvert_exporter": "python",
568 |    "pygments_lexer": "ipython2",
569 |    "version": "2.7.12"
570 |   },
571 |   "widgets": {
572 |    "state": {
573 |     "59dece73c784433691328d95d18a2e4a": {
574 |      "views": [
575 |       {
576 |        "cell_index": 44
577 |       }
578 |      ]
579 |     }
580 |    },
581 |    "version": "1.2.0"
582 |   }
583 |  },
584 |  "nbformat": 4,
585 |  "nbformat_minor": 0
586 | }
587 | 


--------------------------------------------------------------------------------
/2.5 Review of Scikit-learn API.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "collapsed": true
 7 |    },
 8 |    "source": [
 9 |     "\n",
10 |     "### A recap on Scikit-learn's estimator interface\n",
11 |     "Scikit-learn strives to have a uniform interface across all methods. Given a scikit-learn *estimator*\n",
12 |     "object named `model`, the following methods are available (not all for each model):\n",
13 |     "\n",
14 |     "- Available in **all Estimators**\n",
15 |     "  + `model.fit()` : fit training data. For supervised learning applications,\n",
16 |     "    this accepts two arguments: the data `X` and the labels `y` (e.g. `model.fit(X, y)`).\n",
17 |     "    For unsupervised learning applications, ``fit`` takes only a single argument,\n",
18 |     "    the data `X` (e.g. `model.fit(X)`).\n",
19 |     "- Available in **supervised estimators**\n",
20 |     "  + `model.predict()` : given a trained model, predict the label of a new set of data.\n",
21 |     "    This method accepts one argument, the new data `X_new` (e.g. `model.predict(X_new)`),\n",
22 |     "    and returns the learned label for each object in the array.\n",
23 |     "  + `model.predict_proba()` : For classification problems, some estimators also provide\n",
24 |     "    this method, which returns the probability that a new observation has each categorical label.\n",
25 |     "    In this case, the label with the highest probability is returned by `model.predict()`.\n",
26 |     "  + `model.decision_function()` : For classification problems, some estimators provide an uncertainty estimate that is not a probability. For binary classification, a decision_function >= 0 means the positive class will be predicted, while < 0 means the negative class.\n",
27 |     "  + `model.score()` : for classification or regression problems, most (all?) estimators implement\n",
28 |     "    a score method.  Scores are between 0 and 1, with a larger score indicating a better fit.\n",
29 |     "  + `model.transform()` : For feature selection algorithms, this will reduce the dataset to the selected features. For some classification and regression models such as some linear models and random forests, this method reduces the dataset to the most informative features. These classification and regression models can therefor also be used as feature selection methods.\n",
30 |     "  \n",
31 |     "- Available in **unsupervised estimators**\n",
32 |     "  + `model.transform()` : given an unsupervised model, transform new data into the new basis.\n",
33 |     "    This also accepts one argument `X_new`, and returns the new representation of the data based\n",
34 |     "    on the unsupervised model.\n",
35 |     "  + `model.fit_transform()` : some estimators implement this method,\n",
36 |     "    which more efficiently performs a fit and a transform on the same input data.\n",
37 |     "  + `model.predict()` : for clustering algorithms, the predict method will produce cluster labels for new data points. Not all clustering methods have this functionality.\n",
38 |     "  + `model.predict_proba()` : Gaussian mixture models (GMMs) provide the probability for each point to be generated by a given mixture component.\n",
39 |     "  + `model.score()` : Density models like KDE and GMMs provide the likelihood of the data under the model."
40 |    ]
41 |   },
42 |   {
43 |    "cell_type": "markdown",
44 |    "metadata": {
45 |     "collapsed": true
46 |    },
47 |    "source": [
48 |     "Apart from ``fit``, the two most important functions are arguably ``predict`` to produce a target variable (a ``y``) ``transform``, which produces a new representation of the data (an ``X``).\n",
49 |     "The following table shows for which class of models which function applies:\n",
50 |     "\n"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "markdown",
55 |    "metadata": {},
56 |    "source": [
57 |     "<table>\n",
58 |     "<tr style=\"border:None; font-size:20px; padding:10px;\"><th>``model.predict``</th><th>``model.transform``</th></tr>\n",
59 |     "<tr style=\"border:None; font-size:20px; padding:10px;\"><td>Classification</td><td>Preprocessing</td></tr>\n",
60 |     "<tr style=\"border:None; font-size:20px; padding:10px;\"><td>Regression</td><td>Dimensionality Reduction</td></tr>\n",
61 |     "<tr style=\"border:None; font-size:20px; padding:10px;\"><td>Clustering</td><td>Feature Extraction</td></tr>\n",
62 |     "<tr style=\"border:None; font-size:20px; padding:10px;\"><td>&nbsp;</td><td>Feature selection</td></tr>\n",
63 |     "\n",
64 |     "</table>\n",
65 |     "\n",
66 |     "\n"
67 |    ]
68 |   }
69 |  ],
70 |  "metadata": {
71 |   "kernelspec": {
72 |    "display_name": "Python 2",
73 |    "language": "python",
74 |    "name": "python2"
75 |   },
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython",
79 |     "version": 2
80 |    },
81 |    "file_extension": ".py",
82 |    "mimetype": "text/x-python",
83 |    "name": "python",
84 |    "nbconvert_exporter": "python",
85 |    "pygments_lexer": "ipython2",
86 |    "version": "2.7.9"
87 |   }
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 0
91 | }
92 | 


--------------------------------------------------------------------------------
/4.1 Example - Supervised Spam Classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Spam Classification"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Some background information for text processing"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "%matplotlib inline\n",
 26 |     "import matplotlib.pyplot as plt\n",
 27 |     "import numpy as np"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Bag of words? Bag of whaa...?"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "<img src=\"figures/bowjpg\" width=\"100%\">\n"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {
 48 |     "collapsed": true
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "X = [\"It was a bright cold day in April, and the clocks were striking thirteen\",\n",
 53 |     "    \"The sky above the port was the color of television, tuned to a dead channel\"]"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 4,
 59 |    "metadata": {
 60 |     "collapsed": false
 61 |    },
 62 |    "outputs": [
 63 |     {
 64 |      "data": {
 65 |       "text/plain": [
 66 |        "2"
 67 |       ]
 68 |      },
 69 |      "execution_count": 4,
 70 |      "metadata": {},
 71 |      "output_type": "execute_result"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "len(X)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 5,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "CountVectorizer(analyzer=u'word', binary=False, decode_error=u'strict',\n",
 89 |        "        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',\n",
 90 |        "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
 91 |        "        ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
 92 |        "        strip_accents=None, token_pattern=u'(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
 93 |        "        tokenizer=None, vocabulary=None)"
 94 |       ]
 95 |      },
 96 |      "execution_count": 5,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "from sklearn.feature_extraction.text import CountVectorizer\n",
103 |     "\n",
104 |     "vectorizer = CountVectorizer()\n",
105 |     "vectorizer.fit(X)\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 7,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "{u'above': 0,\n",
119 |        " u'and': 1,\n",
120 |        " u'april': 2,\n",
121 |        " u'bright': 3,\n",
122 |        " u'channel': 4,\n",
123 |        " u'clocks': 5,\n",
124 |        " u'cold': 6,\n",
125 |        " u'color': 7,\n",
126 |        " u'day': 8,\n",
127 |        " u'dead': 9,\n",
128 |        " u'in': 10,\n",
129 |        " u'it': 11,\n",
130 |        " u'of': 12,\n",
131 |        " u'port': 13,\n",
132 |        " u'sky': 14,\n",
133 |        " u'striking': 15,\n",
134 |        " u'television': 16,\n",
135 |        " u'the': 17,\n",
136 |        " u'thirteen': 18,\n",
137 |        " u'to': 19,\n",
138 |        " u'tuned': 20,\n",
139 |        " u'was': 21,\n",
140 |        " u'were': 22}"
141 |       ]
142 |      },
143 |      "execution_count": 7,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "vectorizer.vocabulary_"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 8,
155 |    "metadata": {
156 |     "collapsed": true
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "X_bag_of_words = vectorizer.transform(X)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 9,
166 |    "metadata": {
167 |     "collapsed": false
168 |    },
169 |    "outputs": [
170 |     {
171 |      "data": {
172 |       "text/plain": [
173 |        "<2x23 sparse matrix of type '<type 'numpy.int64'>'\n",
174 |        "\twith 25 stored elements in Compressed Sparse Row format>"
175 |       ]
176 |      },
177 |      "execution_count": 9,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "X_bag_of_words"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 10,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [
193 |     {
194 |      "data": {
195 |       "text/plain": [
196 |        "(2, 23)"
197 |       ]
198 |      },
199 |      "execution_count": 10,
200 |      "metadata": {},
201 |      "output_type": "execute_result"
202 |     }
203 |    ],
204 |    "source": [
205 |     "X_bag_of_words.shape"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 11,
211 |    "metadata": {
212 |     "collapsed": false
213 |    },
214 |    "outputs": [
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "array([[0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1],\n",
219 |        "       [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 3, 0, 1, 1, 1, 0]])"
220 |       ]
221 |      },
222 |      "execution_count": 11,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "X_bag_of_words.toarray()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 12,
234 |    "metadata": {
235 |     "collapsed": false
236 |    },
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "[u'above',\n",
242 |        " u'and',\n",
243 |        " u'april',\n",
244 |        " u'bright',\n",
245 |        " u'channel',\n",
246 |        " u'clocks',\n",
247 |        " u'cold',\n",
248 |        " u'color',\n",
249 |        " u'day',\n",
250 |        " u'dead',\n",
251 |        " u'in',\n",
252 |        " u'it',\n",
253 |        " u'of',\n",
254 |        " u'port',\n",
255 |        " u'sky',\n",
256 |        " u'striking',\n",
257 |        " u'television',\n",
258 |        " u'the',\n",
259 |        " u'thirteen',\n",
260 |        " u'to',\n",
261 |        " u'tuned',\n",
262 |        " u'was',\n",
263 |        " u'were']"
264 |       ]
265 |      },
266 |      "execution_count": 12,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "vectorizer.get_feature_names()"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 13,
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "outputs": [
282 |     {
283 |      "data": {
284 |       "text/plain": [
285 |        "[array([u'and', u'april', u'bright', u'clocks', u'cold', u'day', u'in',\n",
286 |        "        u'it', u'striking', u'the', u'thirteen', u'was', u'were'], \n",
287 |        "       dtype='<U10'),\n",
288 |        " array([u'above', u'channel', u'color', u'dead', u'of', u'port', u'sky',\n",
289 |        "        u'television', u'the', u'to', u'tuned', u'was'], \n",
290 |        "       dtype='<U10')]"
291 |       ]
292 |      },
293 |      "execution_count": 13,
294 |      "metadata": {},
295 |      "output_type": "execute_result"
296 |     }
297 |    ],
298 |    "source": [
299 |     "vectorizer.inverse_transform(X_bag_of_words)"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "markdown",
304 |    "metadata": {},
305 |    "source": [
306 |     "### TF-IDF\n",
307 |     "In information retrieval, tf–idf, short for term frequency–inverse document frequency, is a numerical statistic that is intended to reflect how important a word is to a document in a collection or corpus.\n",
308 |     "\n",
309 |     "The importance increases proportionally to the number of times a word appears in the document but is offset by the frequency of the word in the corpus. "
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "Typically, the tf-idf weight is composed by two terms: the first computes the normalized Term Frequency (TF), aka. the number of times a word appears in a document, divided by the total number of words in that document; the second term is the Inverse Document Frequency (IDF), computed as the logarithm of the number of the documents in the corpus divided by the number of documents where the specific term appears.\n",
317 |     "\n",
318 |     "**TF**: Term Frequency, which measures how frequently a term occurs in a document. Since every document is different in length, it is possible that a term would appear much more times in long documents than shorter ones. Thus, the term frequency is often divided by the document length (aka. the total number of terms in the document) as a way of normalization: \n",
319 |     "\n",
320 |     "**TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document).**\n",
321 |     "\n",
322 |     "**IDF**: Inverse Document Frequency, which measures how important a term is. While computing TF, all terms are considered equally important. However it is known that certain terms, such as \"is\", \"of\", and \"that\", may appear a lot of times but have little importance. Thus we need to weigh down the frequent terms while scale up the rare ones, by computing the following: \n",
323 |     "\n",
324 |     "**IDF(t) = log_e(Total number of documents / Number of documents with term t in it).**\n",
325 |     "\n"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 16,
331 |    "metadata": {
332 |     "collapsed": false
333 |    },
334 |    "outputs": [
335 |     {
336 |      "data": {
337 |       "text/plain": [
338 |        "TfidfVectorizer(analyzer=u'word', binary=False, decode_error=u'strict',\n",
339 |        "        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',\n",
340 |        "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
341 |        "        ngram_range=(1, 1), norm=u'l2', preprocessor=None, smooth_idf=True,\n",
342 |        "        stop_words=None, strip_accents=None, sublinear_tf=False,\n",
343 |        "        token_pattern=u'(?u)\\\\b\\\\w\\\\w+\\\\b', tokenizer=None, use_idf=True,\n",
344 |        "        vocabulary=None)"
345 |       ]
346 |      },
347 |      "execution_count": 16,
348 |      "metadata": {},
349 |      "output_type": "execute_result"
350 |     }
351 |    ],
352 |    "source": [
353 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
354 |     "\n",
355 |     "tfidf_vectorizer = TfidfVectorizer()\n",
356 |     "tfidf_vectorizer.fit(X)"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 15,
362 |    "metadata": {
363 |     "collapsed": false
364 |    },
365 |    "outputs": [
366 |     {
367 |      "name": "stdout",
368 |      "output_type": "stream",
369 |      "text": [
370 |       "[[ 0.    0.29  0.29  0.29  0.    0.29  0.29  0.    0.29  0.    0.29  0.29\n",
371 |       "   0.    0.    0.    0.29  0.    0.21  0.29  0.    0.    0.21  0.29]\n",
372 |       " [ 0.26  0.    0.    0.    0.26  0.    0.    0.26  0.    0.26  0.    0.\n",
373 |       "   0.26  0.26  0.26  0.    0.26  0.55  0.    0.26  0.26  0.18  0.  ]]\n"
374 |      ]
375 |     }
376 |    ],
377 |    "source": [
378 |     "import numpy as np\n",
379 |     "np.set_printoptions(precision=2)\n",
380 |     "\n",
381 |     "print(tfidf_vectorizer.transform(X).toarray())"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {},
387 |    "source": [
388 |     "### Bigrams and N-Grams\n",
389 |     "Entirely discarding word order is not always a good idea, as composite phrases often have specific meaning, and modifiers like \"not\" can invert the meaning of words.\n",
390 |     "A simple way to include some word order are n-grams, which don't only look at a single token, but at all pairs of neighborhing tokens:"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 17,
396 |    "metadata": {
397 |     "collapsed": false
398 |    },
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/plain": [
403 |        "CountVectorizer(analyzer=u'word', binary=False, decode_error=u'strict',\n",
404 |        "        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',\n",
405 |        "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
406 |        "        ngram_range=(2, 2), preprocessor=None, stop_words=None,\n",
407 |        "        strip_accents=None, token_pattern=u'(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
408 |        "        tokenizer=None, vocabulary=None)"
409 |       ]
410 |      },
411 |      "execution_count": 17,
412 |      "metadata": {},
413 |      "output_type": "execute_result"
414 |     }
415 |    ],
416 |    "source": [
417 |     "# look at sequences of tokens of minimum length 2 and maximum length 2\n",
418 |     "bigram_vectorizer = CountVectorizer(ngram_range=(2, 2))\n",
419 |     "bigram_vectorizer.fit(X)"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 18,
425 |    "metadata": {
426 |     "collapsed": false
427 |    },
428 |    "outputs": [
429 |     {
430 |      "data": {
431 |       "text/plain": [
432 |        "[u'above the',\n",
433 |        " u'and the',\n",
434 |        " u'april and',\n",
435 |        " u'bright cold',\n",
436 |        " u'clocks were',\n",
437 |        " u'cold day',\n",
438 |        " u'color of',\n",
439 |        " u'day in',\n",
440 |        " u'dead channel',\n",
441 |        " u'in april',\n",
442 |        " u'it was',\n",
443 |        " u'of television',\n",
444 |        " u'port was',\n",
445 |        " u'sky above',\n",
446 |        " u'striking thirteen',\n",
447 |        " u'television tuned',\n",
448 |        " u'the clocks',\n",
449 |        " u'the color',\n",
450 |        " u'the port',\n",
451 |        " u'the sky',\n",
452 |        " u'to dead',\n",
453 |        " u'tuned to',\n",
454 |        " u'was bright',\n",
455 |        " u'was the',\n",
456 |        " u'were striking']"
457 |       ]
458 |      },
459 |      "execution_count": 18,
460 |      "metadata": {},
461 |      "output_type": "execute_result"
462 |     }
463 |    ],
464 |    "source": [
465 |     "bigram_vectorizer.get_feature_names()"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": 19,
471 |    "metadata": {
472 |     "collapsed": false
473 |    },
474 |    "outputs": [
475 |     {
476 |      "data": {
477 |       "text/plain": [
478 |        "array([[0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,\n",
479 |        "        1, 0, 1],\n",
480 |        "       [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n",
481 |        "        0, 1, 0]])"
482 |       ]
483 |      },
484 |      "execution_count": 19,
485 |      "metadata": {},
486 |      "output_type": "execute_result"
487 |     }
488 |    ],
489 |    "source": [
490 |     "bigram_vectorizer.transform(X).toarray()"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": 20,
496 |    "metadata": {
497 |     "collapsed": false
498 |    },
499 |    "outputs": [
500 |     {
501 |      "data": {
502 |       "text/plain": [
503 |        "CountVectorizer(analyzer=u'word', binary=False, decode_error=u'strict',\n",
504 |        "        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',\n",
505 |        "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
506 |        "        ngram_range=(1, 2), preprocessor=None, stop_words=None,\n",
507 |        "        strip_accents=None, token_pattern=u'(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
508 |        "        tokenizer=None, vocabulary=None)"
509 |       ]
510 |      },
511 |      "execution_count": 20,
512 |      "metadata": {},
513 |      "output_type": "execute_result"
514 |     }
515 |    ],
516 |    "source": [
517 |     "gram_vectorizer = CountVectorizer(ngram_range=(1, 2))\n",
518 |     "gram_vectorizer.fit(X)"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": 21,
524 |    "metadata": {
525 |     "collapsed": false
526 |    },
527 |    "outputs": [
528 |     {
529 |      "data": {
530 |       "text/plain": [
531 |        "[u'above',\n",
532 |        " u'above the',\n",
533 |        " u'and',\n",
534 |        " u'and the',\n",
535 |        " u'april',\n",
536 |        " u'april and',\n",
537 |        " u'bright',\n",
538 |        " u'bright cold',\n",
539 |        " u'channel',\n",
540 |        " u'clocks',\n",
541 |        " u'clocks were',\n",
542 |        " u'cold',\n",
543 |        " u'cold day',\n",
544 |        " u'color',\n",
545 |        " u'color of',\n",
546 |        " u'day',\n",
547 |        " u'day in',\n",
548 |        " u'dead',\n",
549 |        " u'dead channel',\n",
550 |        " u'in',\n",
551 |        " u'in april',\n",
552 |        " u'it',\n",
553 |        " u'it was',\n",
554 |        " u'of',\n",
555 |        " u'of television',\n",
556 |        " u'port',\n",
557 |        " u'port was',\n",
558 |        " u'sky',\n",
559 |        " u'sky above',\n",
560 |        " u'striking',\n",
561 |        " u'striking thirteen',\n",
562 |        " u'television',\n",
563 |        " u'television tuned',\n",
564 |        " u'the',\n",
565 |        " u'the clocks',\n",
566 |        " u'the color',\n",
567 |        " u'the port',\n",
568 |        " u'the sky',\n",
569 |        " u'thirteen',\n",
570 |        " u'to',\n",
571 |        " u'to dead',\n",
572 |        " u'tuned',\n",
573 |        " u'tuned to',\n",
574 |        " u'was',\n",
575 |        " u'was bright',\n",
576 |        " u'was the',\n",
577 |        " u'were',\n",
578 |        " u'were striking']"
579 |       ]
580 |      },
581 |      "execution_count": 21,
582 |      "metadata": {},
583 |      "output_type": "execute_result"
584 |     }
585 |    ],
586 |    "source": [
587 |     "gram_vectorizer.get_feature_names()"
588 |    ]
589 |   },
590 |   {
591 |    "cell_type": "code",
592 |    "execution_count": 22,
593 |    "metadata": {
594 |     "collapsed": false
595 |    },
596 |    "outputs": [
597 |     {
598 |      "data": {
599 |       "text/plain": [
600 |        "array([[0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n",
601 |        "        1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,\n",
602 |        "        1, 0, 1, 1],\n",
603 |        "       [1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,\n",
604 |        "        0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 3, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,\n",
605 |        "        0, 1, 0, 0]])"
606 |       ]
607 |      },
608 |      "execution_count": 22,
609 |      "metadata": {},
610 |      "output_type": "execute_result"
611 |     }
612 |    ],
613 |    "source": [
614 |     "gram_vectorizer.transform(X).toarray()"
615 |    ]
616 |   },
617 |   {
618 |    "cell_type": "markdown",
619 |    "metadata": {},
620 |    "source": [
621 |     "### Character N-grams"
622 |    ]
623 |   },
624 |   {
625 |    "cell_type": "markdown",
626 |    "metadata": {},
627 |    "source": [
628 |     "Sometimes it is also helpful to not look at words, but instead single character.\n",
629 |     "That is particularly useful if you have very noisy data, want to identify the language, or we want to predict something about a single word.\n",
630 |     "We can simply look at characters instead of words by setting ``analyzer=\"char\"``.\n",
631 |     "Looking at single characters is usually not very informative, but looking at longer n-grams of characters can be:"
632 |    ]
633 |   },
634 |   {
635 |    "cell_type": "code",
636 |    "execution_count": 24,
637 |    "metadata": {
638 |     "collapsed": false
639 |    },
640 |    "outputs": [
641 |     {
642 |      "data": {
643 |       "text/plain": [
644 |        "CountVectorizer(analyzer='char', binary=False, decode_error=u'strict',\n",
645 |        "        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',\n",
646 |        "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
647 |        "        ngram_range=(2, 2), preprocessor=None, stop_words=None,\n",
648 |        "        strip_accents=None, token_pattern=u'(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
649 |        "        tokenizer=None, vocabulary=None)"
650 |       ]
651 |      },
652 |      "execution_count": 24,
653 |      "metadata": {},
654 |      "output_type": "execute_result"
655 |     }
656 |    ],
657 |    "source": [
658 |     "char_vectorizer = CountVectorizer(ngram_range=(2, 2), analyzer=\"char\")\n",
659 |     "char_vectorizer.fit(X)"
660 |    ]
661 |   },
662 |   {
663 |    "cell_type": "code",
664 |    "execution_count": 25,
665 |    "metadata": {
666 |     "collapsed": false
667 |    },
668 |    "outputs": [
669 |     {
670 |      "name": "stdout",
671 |      "output_type": "stream",
672 |      "text": [
673 |       "[u' a', u' b', u' c', u' d', u' i', u' o', u' p', u' s', u' t', u' w', u', ', u'a ', u'ab', u'ad', u'an', u'ap', u'as', u'ay', u'bo', u'br', u'ch', u'ck', u'cl', u'co', u'd ', u'da', u'de', u'e ', u'ea', u'ed', u'ee', u'el', u'en', u'er', u'ev', u'f ', u'g ', u'gh', u'ha', u'he', u'hi', u'ht', u'ig', u'ik', u'il', u'in', u'io', u'ir', u'is', u'it', u'ki', u'ks', u'ky', u'l,', u'ld', u'le', u'lo', u'n ', u'n,', u'nd', u'ne', u'ng', u'nn', u'o ', u'oc', u'of', u'ol', u'on', u'or', u'ov', u'po', u'pr', u'r ', u're', u'ri', u'rt', u's ', u'si', u'sk', u'st', u't ', u'te', u'th', u'to', u'tr', u'tu', u'un', u've', u'vi', u'wa', u'we', u'y ']\n"
674 |      ]
675 |     }
676 |    ],
677 |    "source": [
678 |     "print(char_vectorizer.get_feature_names())"
679 |    ]
680 |   },
681 |   {
682 |    "cell_type": "markdown",
683 |    "metadata": {},
684 |    "source": [
685 |     "## Moving on to the problem at hand"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "code",
690 |    "execution_count": 26,
691 |    "metadata": {
692 |     "collapsed": true
693 |    },
694 |    "outputs": [],
695 |    "source": [
696 |     "import os\n",
697 |     "with open(os.path.join(\"data\",\"SMSSpamCollection\")) as f:\n",
698 |     "    lines = [line.strip().split(\"\\t\") for line in f.readlines()]\n",
699 |     "text = [x[1] for x in lines]\n",
700 |     "y = [x[0] == \"ham\" for x in lines]"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": 28,
706 |    "metadata": {
707 |     "collapsed": false
708 |    },
709 |    "outputs": [],
710 |    "source": [
711 |     "from sklearn.cross_validation import train_test_split\n",
712 |     "\n",
713 |     "text_train, text_test, y_train, y_test = train_test_split(text, y, random_state=42)"
714 |    ]
715 |   },
716 |   {
717 |    "cell_type": "code",
718 |    "execution_count": 29,
719 |    "metadata": {
720 |     "collapsed": true
721 |    },
722 |    "outputs": [],
723 |    "source": [
724 |     "from sklearn.feature_extraction.text import CountVectorizer\n",
725 |     "\n",
726 |     "vectorizer = CountVectorizer()\n",
727 |     "vectorizer.fit(text_train)\n",
728 |     "\n",
729 |     "X_train = vectorizer.transform(text_train)\n",
730 |     "X_test = vectorizer.transform(text_test)"
731 |    ]
732 |   },
733 |   {
734 |    "cell_type": "code",
735 |    "execution_count": 30,
736 |    "metadata": {
737 |     "collapsed": false
738 |    },
739 |    "outputs": [
740 |     {
741 |      "name": "stdout",
742 |      "output_type": "stream",
743 |      "text": [
744 |       "7464\n"
745 |      ]
746 |     }
747 |    ],
748 |    "source": [
749 |     "print(len(vectorizer.vocabulary_))"
750 |    ]
751 |   },
752 |   {
753 |    "cell_type": "code",
754 |    "execution_count": 31,
755 |    "metadata": {
756 |     "collapsed": false
757 |    },
758 |    "outputs": [
759 |     {
760 |      "name": "stdout",
761 |      "output_type": "stream",
762 |      "text": [
763 |       "[u'00', u'000', u'000pes', u'008704050406', u'0089', u'0121', u'01223585236', u'01223585334', u'02', u'0207', u'02072069400', u'02073162414', u'02085076972', u'03', u'04', u'0430', u'05', u'050703', u'0578', u'06']\n"
764 |      ]
765 |     }
766 |    ],
767 |    "source": [
768 |     "print(vectorizer.get_feature_names()[:20])\n"
769 |    ]
770 |   },
771 |   {
772 |    "cell_type": "code",
773 |    "execution_count": 32,
774 |    "metadata": {
775 |     "collapsed": false
776 |    },
777 |    "outputs": [
778 |     {
779 |      "name": "stdout",
780 |      "output_type": "stream",
781 |      "text": [
782 |       "[u'getting', u'getzed', u'gf', u'ghodbandar', u'ghost', u'gibbs', u'gibe', u'gift', u'gifted', u'gifts', u'giggle', u'gimme', u'gimmi', u'gin', u'girl', u'girlfrnd', u'girlie', u'girls', u'gist', u'giv']\n"
783 |      ]
784 |     }
785 |    ],
786 |    "source": [
787 |     "print(vectorizer.get_feature_names()[3000:3020])"
788 |    ]
789 |   },
790 |   {
791 |    "cell_type": "code",
792 |    "execution_count": 33,
793 |    "metadata": {
794 |     "collapsed": false
795 |    },
796 |    "outputs": [
797 |     {
798 |      "data": {
799 |       "text/plain": [
800 |        "SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,\n",
801 |        "       eta0=0.0, fit_intercept=True, l1_ratio=0.15,\n",
802 |        "       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,\n",
803 |        "       penalty='l2', power_t=0.5, random_state=None, shuffle=True,\n",
804 |        "       verbose=0, warm_start=False)"
805 |       ]
806 |      },
807 |      "execution_count": 33,
808 |      "metadata": {},
809 |      "output_type": "execute_result"
810 |     }
811 |    ],
812 |    "source": [
813 |     "from sklearn.linear_model import SGDClassifier\n",
814 |     "\n",
815 |     "clf = SGDClassifier()\n",
816 |     "clf"
817 |    ]
818 |   },
819 |   {
820 |    "cell_type": "code",
821 |    "execution_count": 34,
822 |    "metadata": {
823 |     "collapsed": false
824 |    },
825 |    "outputs": [
826 |     {
827 |      "data": {
828 |       "text/plain": [
829 |        "SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,\n",
830 |        "       eta0=0.0, fit_intercept=True, l1_ratio=0.15,\n",
831 |        "       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,\n",
832 |        "       penalty='l2', power_t=0.5, random_state=None, shuffle=True,\n",
833 |        "       verbose=0, warm_start=False)"
834 |       ]
835 |      },
836 |      "execution_count": 34,
837 |      "metadata": {},
838 |      "output_type": "execute_result"
839 |     }
840 |    ],
841 |    "source": [
842 |     "clf.fit(X_train, y_train)"
843 |    ]
844 |   },
845 |   {
846 |    "cell_type": "code",
847 |    "execution_count": 35,
848 |    "metadata": {
849 |     "collapsed": false
850 |    },
851 |    "outputs": [
852 |     {
853 |      "data": {
854 |       "text/plain": [
855 |        "0.9813486370157819"
856 |       ]
857 |      },
858 |      "execution_count": 35,
859 |      "metadata": {},
860 |      "output_type": "execute_result"
861 |     }
862 |    ],
863 |    "source": [
864 |     "clf.score(X_test, y_test)"
865 |    ]
866 |   },
867 |   {
868 |    "cell_type": "code",
869 |    "execution_count": 36,
870 |    "metadata": {
871 |     "collapsed": false
872 |    },
873 |    "outputs": [
874 |     {
875 |      "data": {
876 |       "text/plain": [
877 |        "0.99880382775119614"
878 |       ]
879 |      },
880 |      "execution_count": 36,
881 |      "metadata": {},
882 |      "output_type": "execute_result"
883 |     }
884 |    ],
885 |    "source": [
886 |     "clf.score(X_train, y_train)"
887 |    ]
888 |   },
889 |   {
890 |    "cell_type": "code",
891 |    "execution_count": null,
892 |    "metadata": {
893 |     "collapsed": true
894 |    },
895 |    "outputs": [],
896 |    "source": []
897 |   }
898 |  ],
899 |  "metadata": {
900 |   "kernelspec": {
901 |    "display_name": "Python 2",
902 |    "language": "python",
903 |    "name": "python2"
904 |   },
905 |   "language_info": {
906 |    "codemirror_mode": {
907 |     "name": "ipython",
908 |     "version": 2
909 |    },
910 |    "file_extension": ".py",
911 |    "mimetype": "text/x-python",
912 |    "name": "python",
913 |    "nbconvert_exporter": "python",
914 |    "pygments_lexer": "ipython2",
915 |    "version": "2.7.11"
916 |   }
917 |  },
918 |  "nbformat": 4,
919 |  "nbformat_minor": 0
920 | }
921 | 


--------------------------------------------------------------------------------
/5. Where do we go from here.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Story Time : The Netflix Prize\n",
  8 |     "<img src=\"figures/netflix-prize.png\" width=\"60%\">"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "The Netflix Prize was an open competition for the best collaborative filtering algorithm to predict user ratings for films, based on previous ratings without any other information about the users or films, i.e. without the users or the films being identified except by numbers assigned for the contest."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     " On 21 September 2009, the grand prize of US$1,000,000 was given to the BellKor's Pragmatic Chaos team which bested Netflix's own algorithm for predicting ratings by 10.06%"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "This competition took 3 years to complete. The winner was not a single algorithm but a complex combination of many algorithms, each one reducing the error ever so slightly."
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "Netflix doesn't use it !!! Because in it's own words it “did not seem to justify the engineering effort needed to bring them into a production environment,”"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "### Ok nice story, but why?"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "## ML is not magic!"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "<img src=\"figures/magician.jpg\" width=70%>"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "The point is, it's very easy to underestimate the complexity that goes with ML.\n",
 65 |     "A couple of very important points from the paper, however I urge you to read the paper itself."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "There is a pretty pretty awesome paper named **[\"A Few Useful Things to Know about Machine Learning\"](http://homes.cs.washington.edu/~pedrod/papers/cacm12.pdf)** by Prof. Pedro Domingos, wherein he talks about the pitfalls of ML."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "* **Sometimes data is not enough ** Quoting Domingos: \"... the need for knowledge in learning should not be surprising. Machine learning is not magic; it can’t get something from nothing. What it does is get more from less. Programming, like all engineering, is a lot of work: we have to build everything from scratch. Learning is more like farming, which lets nature do most of the work. Farmers combine seeds with nutrients to grow crops. Learners combine knowledge with data to grow programs.\""
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "* **More Data > Clever Algorithm** Quoting Domingos: \"Suppose you’ve constructed the best set of features you can, but the classifiers you’re getting are still not accurate enough. What can you do now? There are two main choices: design a better learning algorithm, or gather more data. [...] As a rule of thumb, a dumb algorithm with lots and lots of data beats a clever one with modest amounts of it. (After all, machine learning is all about letting data do the heavy lifting.)\""
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "* **The CURSE of Dimensionality** This expression was coined by Bellman in 1961 to refer to the fact that many algorithms that work fine in low dimensions become intractable when the input is high-dimensional. Generalizing correctly becomes exponentially harder as the dimensionality (number of features of the examples grows, because a fixed-size training set covers a dwindling fraction of the input space. Even with a moderate dimension of 100 and a huge training set of a trillion examples, the latter covers only a fraction of about 10−18 of the input space. This is what makes machine learning both necessary and hard.\n",
 94 |     "\n"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "### Which brings us too ...."
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "## Diving Deeper into ML"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "If you wish to better understand the inner workings of how Machine Learning functions, how each of the prediction algorithms work, what makes them work, then you can follow in this path.\n",
116 |     "\n",
117 |     "The thing to note here is that this path is filled with Math and Statistics along with programming. It is very easy to forget that on the ground level ML is all about math and statistics, especially when we have an elegant library like scikit-learn which provides a very splendid abstraction."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "However, this path too has its benefits:\n",
125 |     "    * You get a better understanding of hyperparameters.\n",
126 |     "    * Visualization becomes easier.\n",
127 |     "    * Standard Algorithms sometimes fail in non-trivial problems :(\n",
128 |     "    "
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "### How to go about it then?"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "* A much recommended approach will be to start with Andrew Ng's Machine Learning course on [Coursera](https://www.coursera.org/learn/machine-learning). It provides a gentle and solid introduction to the insides of Machine Learning and has lots of programming exercises too. \n",
143 |     "* Start playing with a number of ML related notebooks shared by [IPython]\n",
144 |     "(https://github.com/ipython/ipython/wiki/A-gallery-of-interesting-IPython-Notebooks).\n",
145 |     "* Some examples shared by [Scikit-Learn](http://scikit-learn.org/stable/auto_examples/)\n",
146 |     "* [Kaggle](https://www.kaggle.com/)\n",
147 |     "* This excellent post about Machine Learning on [Github](https://github.com/hangtwenty/dive-into-machine-learning)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "### Finally"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "<img src=\"figures/ml_map.png\">"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "# THANK YOU"
169 |    ]
170 |   }
171 |  ],
172 |  "metadata": {
173 |   "kernelspec": {
174 |    "display_name": "Python 2",
175 |    "language": "python",
176 |    "name": "python2"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 2
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython2",
188 |    "version": "2.7.11"
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 0
193 | }
194 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning using Scikit-Learn
 2 | 
 3 | ## Introduction
 4 | 
 5 | The following repository contains notebooks which are based on the material used by me during the BangPypers July meetup. These notebooks are made keeping in mind that the intended audience has very little or no experience with scikit-learn and/or machine learning but have some knowledge of python.
 6 | 
 7 | ## Installation
 8 | 
 9 | * Clone this repo `git clone https://github.com/pfrcks/BangPypers-SKLearn.git`
10 | * If you don't have `python-dev` install it using `sudo apt-get install python-dev` or whatver equivalent command you have for your distribution.
11 | * Installation is a non-trivial process generally. However we have the wonderful **conda** environment manager, a part of Anaconda Scientific Distribution. The best course of action is downloading and installing [miniconda](http://conda.pydata.org/miniconda.html).
12 |     * Once you have minconda installed issue the following command on your shell
13 |     * `conda install numpy scipy matplotlib scikit-learn ipython-notebook seaborn`
14 |     * `conda install -c conda-forge ipywidgets`
15 |     * **Note**: The above process requires a good net connection and time. Please do this before coming to the workshop.
16 | * If you want to further simplify the process you can go for the fullfledged package [Anaconda](https://docs.continuum.io/anaconda/install) instead of the above method. (This is the most preferred method)
17 |     * After installing issue `conda install -c conda-forge ipywidgets`
18 | * **fetch_data.py** fetches the data required for the Facial Recognition Example. The dataset is ~230MB. If you want to follow along during the workshow you can execute `python fetch_data.py` after cd'ing into the repo directory. In case you don't want to download it, you are welcome to look at the example during the workshop.
19 | * **NOTE** : This repo is a work in process. To keep yourself updated issue a `git pull` before attending the workshop to be on the latest version.
20 | * **NOTE** : If you face any problems during installation, please create an issue on github.
21 | * That's it.
22 | 
23 | ## Requirements
24 | 
25 | * Python-2.7
26 | * Working knowledge of Python
27 | 
28 | ## Notes
29 | 
30 | * This workshop has been developed with the intended audience as people with little or no experience of scikit-learn and/or machine learning. 
31 | * Please download the repo and fetch the dependencies before coming to the workshop. The installation takes time which can be spent on the workshop instead.
32 | 
33 | ## Credits where credit's due
34 | 
35 | * These notebooks owe a lot to the notebooks published by [Jake Vanderplas](https://github.com/jakevdp/sklearn_tutorial) and [Andreas Muller](https://www.youtube.com/watch?v=80fZrVMurPM), who have a much more extensive coverage of the topics. If you want to go further in regards to the black box approach with scikit-learn, I would highly recommend going through their notebooks and screencasts. These tutorials helped me a lot in understanding scikit-learn and it's application.
36 | 
37 | ## Where to go from here?
38 | 
39 | * Kaggle
40 | * Andrew-Ng
41 | * KD-Nuggets post
42 | * Dive into
43 | * Awesome notebooks
44 | * [A visual intro to ML](http://www.r2d3.us/visual-intro-to-machine-learning-part-1/)
45 | 


--------------------------------------------------------------------------------
/cheatsheet.txt:
--------------------------------------------------------------------------------
 1 | Numpy:
 2 | 	- ones : Return a new array of given shape and type, filled with ones.
 3 | 	- arange : Return evenly spaced values within a given interval. arange([start,] stop[, step,], dtype=None)
 4 | 	- asarray : Convert the input to an array.
 5 | 	- random.random : Return random floats in the half-open interval [0.0, 1.0).
 6 | 	- linspace : Returns `num` evenly spaced samples, calculated over the interval [`start`, `stop`].
 7 | 	- newaxis : Starting with 1D list of numbers with newaxis, you can turn it into a 2D matrix.
 8 | 	- array : Create an array
 9 | 	- random.normal : Draw random samples from a normal (Gaussian) distribution.
10 | 	- meshgrid : The purpose of meshgrid is to create a rectangular grid out of an array of x values and an array of y values.
11 | 	- random.RandomState : Container for the Mersenne Twister pseudo-random number generator
12 | 	- random.RandomState.permutation : Randomly permute a sequence, or return a permuted range
13 | 	- random.RandomState.uniform : Draw samples from a uniform distribution. 
14 | 	- squeeze : Remove single-dimensional entries from the shape of an array
15 | 	- random.randn : Return a sample (or samples) from the "standard normal" distribution.
16 | 	- dot : Dot product of two arrays.
17 | 	- ravel : Return a contiguous flattened array.
18 | 	- set_printoptions : Set printing options.
19 | 
20 | Matplotlib:
21 | 	- scatter : Make a scatter plot of x vs y, where x and y are sequence like objects of the same lengths.
22 | 	- contour : Makes a contour. 
23 | 	- figure : Creates a new figure. 
24 | 	- add_subplot : Add a subplot.
25 | 	- subplots_adjust : As implied by name.
26 | 	- pcolormesh : Create a pseudocolor plot of a 2-D array.
27 | 	- xlim : Get or set the *x* limits of the current axes.
28 | 	- ylim : Get or set the *y* limits of the current axes.
29 | 	- axis : Convenience method to get or set axis properties.
30 | 	- fill_between : Make filled polygons between two curves.
31 | 	- setp : Set a property on an artist object.
32 | 
33 | Scikit-Learn:
34 | 	- K Neighbors (Classifier/Regressor) : Classifier/Regressor implementing the k-nearest neighbors vote
35 | 	- linear_model : The :mod:`sklearn.linear_model` module implements generalized linear models. Eg SGD, BR, etc.
36 | 	- Linear Regression : Ordinary least squares Linear Regression.
37 | 	- ^ normalize? : If True, the regressors X will be normalized before regression.
38 | 	- coef_ : Estimated coefficients for the linear regression problem
39 | 	- intercept_ : Independent term in the linear model.
40 | 	- residues_ : Get the residues of the fitted model.
41 | 	- makeblobs : Generate isotropic Gaussian blobs for clustering.
42 | 	- make_circles : Make a large circle containing a smaller circle in 2d.
43 | 	- SVM : The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
44 | 	- SVC : Support Vector Classification. The implementation is based on libsvm.
45 | 	- Kernels : The kernel is effectively a similarity measure.
46 | 	- decision_function : Gives per-class scores for each sample (or a single score per sample in the binary case).
47 | 	- support_vectors_ : 
48 | 	- score : Returns the mean accuracy on the given test data and labels.
49 | 	- StandardScaler : Standardize features by removing the mean and scaling to unit variance
50 | 	- transform : Transform the data based on what is learned from `fit`
51 | 	- pca.explained_variance_ 
52 | 	- pca.components_
53 | 	- fit_transform : Fit the model with X and apply the dimensionality reduction on X.
54 | 	- inverse_transform : Transform data back to its original space, i.e., return an input X_original whose transform would be X.
55 | 	- KMeans : K-Means clustering
56 | 	- fit_predict : Compute cluster centers and predict cluster index for each sample.
57 | 	- confusion_matrix : Compute confusion matrix to evaluate the accuracy of a classification
58 | 	- accuracy_score : Accuracy classification score.
59 | 	- adjusted_rand_score : The Rand Index computes a similarity measure between two clusterings by considering all pairs of samples and 
60 | 				counting pairs that are assigned in the same or different clusters in the predicted and true clusterings.
61 | 	- MiniBatchKMeans : Mini-Batch K-Means clustering
62 | 	- cross_val_score : Evaluate a score by cross-validation
63 | 	- mean_sqaured_error : Mean squared error regression loss
64 | 	- pipeline.make_pipeline : Construct a Pipeline from the given estimators.
65 | 	- Polynomial Features : Generate a new feature matrix consisting of all polynomial combinations of the features with degree less than
66 | 				 or equal to the specified degree.
67 | 	- learning_curve.validation_curve : Determine training and test scores for varying parameter values.
68 | 	- feature_extraction.CountVectorizer : Convert a collection of text documents to a matrix of token counts
69 | 	- TfidfVectorizer : Convert a collection of raw documents to a matrix of TF-IDF features.
70 | 	- SGDClassifier : Linear classifiers (SVM, logistic regression, a.o.) with SGD training.
71 | 	- RandomizedPCA : Principal component analysis (PCA) using randomized SVD
72 | 	- RandomForestRegressor : A random forest is a meta estimator that fits a number of classifying decision trees on various sub-samples 
73 | 				of the dataset and use averaging to improve the predictive accuracy and control over-fitting.
74 | 


--------------------------------------------------------------------------------
/data/readme:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/data/readme


--------------------------------------------------------------------------------
/fetch_data.py:
--------------------------------------------------------------------------------
1 | from sklearn import datasets
2 | lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=0.4, data_home='data')
3 | 


--------------------------------------------------------------------------------
/figures/BangPypers.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/BangPypers.pdf


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-01.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-02.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-03.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-04.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-05.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-06.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-07.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-08.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-09.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-10.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-11.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-12.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-13.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-14.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-15.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-16.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-17.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-18.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-19.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-20.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-21.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-22.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-23.png


--------------------------------------------------------------------------------
/figures/Pic_BP_PDF-24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/Pic_BP_PDF-24.png


--------------------------------------------------------------------------------
/figures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/__init__.py


--------------------------------------------------------------------------------
/figures/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/__init__.pyc


--------------------------------------------------------------------------------
/figures/bowjpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/bowjpg


--------------------------------------------------------------------------------
/figures/cluster_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/cluster_comparison.png


--------------------------------------------------------------------------------
/figures/iris_setosa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/iris_setosa.jpg


--------------------------------------------------------------------------------
/figures/iris_versicolor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/iris_versicolor.jpg


--------------------------------------------------------------------------------
/figures/iris_virginica.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/iris_virginica.jpg


--------------------------------------------------------------------------------
/figures/magician.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/magician.jpg


--------------------------------------------------------------------------------
/figures/ml_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/ml_map.png


--------------------------------------------------------------------------------
/figures/netflix-prize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/netflix-prize.png


--------------------------------------------------------------------------------
/figures/petal_sepal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/petal_sepal.jpg


--------------------------------------------------------------------------------
/figures/plot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Small helpers for code that is not shown in the notebooks
  3 | Taken from Jake Vanderplas.
  4 | https://github.com/jakevdp/
  5 | """
  6 | 
  7 | from sklearn import neighbors, datasets, linear_model
  8 | import pylab as pl
  9 | import numpy as np
 10 | from matplotlib.colors import ListedColormap
 11 | import matplotlib.pyplot as plt
 12 | from sklearn.linear_model import SGDClassifier
 13 | from sklearn.datasets.samples_generator import make_blobs
 14 | import warnings
 15 | 
 16 | # Create color maps for 3-class classification problem, as with iris
 17 | cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
 18 | cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
 19 | 
 20 | 
 21 | def plot_iris_knn():
 22 |     iris = datasets.load_iris()
 23 |     X = iris.data[:, :2]  # we only take the first two features. We could
 24 |                         # avoid this ugly slicing by using a two-dim dataset
 25 |     y = iris.target
 26 | 
 27 |     knn = neighbors.KNeighborsClassifier(n_neighbors=3)
 28 |     knn.fit(X, y)
 29 | 
 30 |     x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
 31 |     y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
 32 |     xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
 33 |                          np.linspace(y_min, y_max, 100))
 34 |     Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
 35 | 
 36 |     # Put the result into a color plot
 37 |     Z = Z.reshape(xx.shape)
 38 |     pl.figure()
 39 |     pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
 40 | 
 41 |     # Plot also the training points
 42 |     pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
 43 |     pl.xlabel('sepal length (cm)')
 44 |     pl.ylabel('sepal width (cm)')
 45 |     pl.axis('tight')
 46 | 
 47 | 
 48 | def plot_polynomial_regression():
 49 |     rng = np.random.RandomState(0)
 50 |     x = 2*rng.rand(100) - 1
 51 |     f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
 52 |     y = f(x) + .4 * rng.normal(size=100)
 53 | 
 54 |     x_test = np.linspace(-1, 1, 100)
 55 | 
 56 |     pl.figure()
 57 |     pl.scatter(x, y, s=4)
 58 | 
 59 |     X = np.array([x**i for i in range(5)]).T
 60 |     X_test = np.array([x_test**i for i in range(5)]).T
 61 |     regr = linear_model.LinearRegression()
 62 |     regr.fit(X, y)
 63 |     pl.plot(x_test, regr.predict(X_test), label='4th order')
 64 | 
 65 |     X = np.array([x**i for i in range(10)]).T
 66 |     X_test = np.array([x_test**i for i in range(10)]).T
 67 |     regr = linear_model.LinearRegression()
 68 |     regr.fit(X, y)
 69 |     pl.plot(x_test, regr.predict(X_test), label='9th order')
 70 | 
 71 |     pl.legend(loc='best')
 72 |     pl.axis('tight')
 73 |     pl.title('Fitting a 4th and a 9th order polynomial')
 74 | 
 75 |     pl.figure()
 76 |     pl.scatter(x, y, s=4)
 77 |     pl.plot(x_test, f(x_test), label="truth")
 78 |     pl.axis('tight')
 79 |     pl.title('Ground truth (9th order polynomial)')
 80 | 
 81 | 
 82 | def plot_sgd_separator():
 83 |     # we create 50 separable points
 84 |     X, Y = make_blobs(n_samples=50, centers=2,
 85 |                       random_state=0, cluster_std=0.60)
 86 | 
 87 |     # fit the model
 88 |     clf = SGDClassifier(loss="hinge", alpha=0.01,
 89 |                         n_iter=200, fit_intercept=True)
 90 |     clf.fit(X, Y)
 91 | 
 92 |     # plot the line, the points, and the nearest vectors to the plane
 93 |     xx = np.linspace(-1, 5, 10)
 94 |     yy = np.linspace(-1, 5, 10)
 95 | 
 96 |     X1, X2 = np.meshgrid(xx, yy)
 97 |     Z = np.empty(X1.shape)
 98 |     for (i, j), val in np.ndenumerate(X1):
 99 |         x1 = val
100 |         x2 = X2[i, j]
101 |         x3 = np.array([x1, x2])
102 |         p = clf.decision_function(x3.reshape(1, -1))
103 |         Z[i, j] = p[0]
104 |     levels = [-1.0, 0.0, 1.0]
105 |     linestyles = ['dashed', 'solid', 'dashed']
106 |     colors = 'k'
107 | 
108 |     ax = plt.axes()
109 |     ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
110 |     ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
111 | 
112 |     ax.axis('tight')
113 | 
114 | 
115 | def plot_example_decision_tree():
116 |     fig = plt.figure(figsize=(10, 4))
117 |     ax = fig.add_axes([0, 0, 0.8, 1], frameon=False, xticks=[], yticks=[])
118 |     ax.set_title('Example Decision Tree: Animal Classification', size=24)
119 | 
120 |     def text(ax, x, y, t, size=20, **kwargs):
121 |         ax.text(x, y, t,
122 |                 ha='center', va='center', size=size,
123 |                 bbox=dict(boxstyle='round', ec='k', fc='w'), **kwargs)
124 | 
125 |     text(ax, 0.5, 0.9, "How big is\nthe animal?", 20)
126 |     text(ax, 0.3, 0.6, "Does the animal\nhave horns?", 18)
127 |     text(ax, 0.7, 0.6, "Does the animal\nhave two legs?", 18)
128 |     text(ax, 0.12, 0.3, "Are the horns\nlonger than 10cm?", 14)
129 |     text(ax, 0.38, 0.3, "Is the animal\nwearing a collar?", 14)
130 |     text(ax, 0.62, 0.3, "Does the animal\nhave wings?", 14)
131 |     text(ax, 0.88, 0.3, "Does the animal\nhave a tail?", 14)
132 | 
133 |     text(ax, 0.4, 0.75, "> 1m", 12, alpha=0.4)
134 |     text(ax, 0.6, 0.75, "< 1m", 12, alpha=0.4)
135 | 
136 |     text(ax, 0.21, 0.45, "yes", 12, alpha=0.4)
137 |     text(ax, 0.34, 0.45, "no", 12, alpha=0.4)
138 | 
139 |     text(ax, 0.66, 0.45, "yes", 12, alpha=0.4)
140 |     text(ax, 0.79, 0.45, "no", 12, alpha=0.4)
141 | 
142 |     ax.plot([0.3, 0.5, 0.7], [0.6, 0.9, 0.6], '-k')
143 |     ax.plot([0.12, 0.3, 0.38], [0.3, 0.6, 0.3], '-k')
144 |     ax.plot([0.62, 0.7, 0.88], [0.3, 0.6, 0.3], '-k')
145 |     ax.plot([0.0, 0.12, 0.20], [0.0, 0.3, 0.0], '--k')
146 |     ax.plot([0.28, 0.38, 0.48], [0.0, 0.3, 0.0], '--k')
147 |     ax.plot([0.52, 0.62, 0.72], [0.0, 0.3, 0.0], '--k')
148 |     ax.plot([0.8, 0.88, 1.0], [0.0, 0.3, 0.0], '--k')
149 |     ax.axis([0, 1, 0, 1])
150 | 
151 | 
152 | def visualize_tree(estimator, X, y, boundaries=True,
153 |                    xlim=None, ylim=None):
154 |     estimator.fit(X, y)
155 | 
156 |     if xlim is None:
157 |         xlim = (X[:, 0].min() - 0.1, X[:, 0].max() + 0.1)
158 |     if ylim is None:
159 |         ylim = (X[:, 1].min() - 0.1, X[:, 1].max() + 0.1)
160 | 
161 |     x_min, x_max = xlim
162 |     y_min, y_max = ylim
163 |     xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
164 |                          np.linspace(y_min, y_max, 100))
165 |     Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
166 | 
167 |     # Put the result into a color plot
168 |     Z = Z.reshape(xx.shape)
169 |     plt.figure()
170 |     plt.pcolormesh(xx, yy, Z, alpha=0.2, cmap='rainbow')
171 |     plt.clim(y.min(), y.max())
172 | 
173 |     # Plot also the training points
174 |     plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')
175 |     plt.axis('off')
176 | 
177 |     plt.xlim(x_min, x_max)
178 |     plt.ylim(y_min, y_max)
179 |     plt.clim(y.min(), y.max())
180 | 
181 |     # Plot the decision boundaries
182 |     def plot_boundaries(i, xlim, ylim):
183 |         if i < 0:
184 |             return
185 | 
186 |         tree = estimator.tree_
187 | 
188 |         if tree.feature[i] == 0:
189 |             plt.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k')
190 |             plot_boundaries(tree.children_left[i],
191 |                             [xlim[0], tree.threshold[i]], ylim)
192 |             plot_boundaries(tree.children_right[i],
193 |                             [tree.threshold[i], xlim[1]], ylim)
194 | 
195 |         elif tree.feature[i] == 1:
196 |             plt.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k')
197 |             plot_boundaries(tree.children_left[i], xlim,
198 |                             [ylim[0], tree.threshold[i]])
199 |             plot_boundaries(tree.children_right[i], xlim,
200 |                             [tree.threshold[i], ylim[1]])
201 | 
202 |     if boundaries:
203 |         plot_boundaries(0, plt.xlim(), plt.ylim())
204 | 
205 | 
206 | def plot_tree_interactive(X, y):
207 |     from sklearn.tree import DecisionTreeClassifier
208 | 
209 |     def interactive_tree(depth=1):
210 |         clf = DecisionTreeClassifier(max_depth=depth, random_state=0)
211 |         visualize_tree(clf, X, y)
212 | 
213 |     from IPython.html.widgets import interact
214 |     return interact(interactive_tree, depth=[1, 5])
215 | 
216 | 
217 | def plot_kmeans_interactive(min_clusters=1, max_clusters=6):
218 |     from IPython.html.widgets import interact
219 |     from sklearn.metrics.pairwise import euclidean_distances
220 |     from sklearn.datasets.samples_generator import make_blobs
221 | 
222 |     with warnings.catch_warnings():
223 |         warnings.filterwarnings('ignore')
224 | 
225 |         X, y = make_blobs(n_samples=300, centers=4,
226 |                           random_state=0, cluster_std=0.60)
227 | 
228 |         def _kmeans_step(frame=0, n_clusters=4):
229 |             rng = np.random.RandomState(2)
230 |             labels = np.zeros(X.shape[0])
231 |             centers = rng.randn(n_clusters, 2)
232 | 
233 |             nsteps = frame // 3
234 | 
235 |             for i in range(nsteps + 1):
236 |                 old_centers = centers
237 |                 if i < nsteps or frame % 3 > 0:
238 |                     dist = euclidean_distances(X, centers)
239 |                     labels = dist.argmin(1)
240 | 
241 |                 if i < nsteps or frame % 3 > 1:
242 |                     centers = np.array([X[labels == j].mean(0)
243 |                                         for j in range(n_clusters)])
244 |                     nans = np.isnan(centers)
245 |                     centers[nans] = old_centers[nans]
246 | 
247 | 
248 |             # plot the data and cluster centers
249 |             plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='rainbow',
250 |                         vmin=0, vmax=n_clusters - 1);
251 |             plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
252 |                         c=np.arange(n_clusters),
253 |                         s=200, cmap='rainbow')
254 |             plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
255 |                         c='black', s=50)
256 | 
257 |             # plot new centers if third frame
258 |             if frame % 3 == 2:
259 |                 for i in range(n_clusters):
260 |                     plt.annotate('', centers[i], old_centers[i],
261 |                                  arrowprops=dict(arrowstyle='->', linewidth=1))
262 |                 plt.scatter(centers[:, 0], centers[:, 1], marker='o',
263 |                             c=np.arange(n_clusters),
264 |                             s=200, cmap='rainbow')
265 |                 plt.scatter(centers[:, 0], centers[:, 1], marker='o',
266 |                             c='black', s=50)
267 | 
268 |             plt.xlim(-4, 4)
269 |             plt.ylim(-2, 10)
270 | 
271 |             if frame % 3 == 1:
272 |                 plt.text(3.8, 9.5, "1. Reassign points to nearest centroid",
273 |                          ha='right', va='top', size=14)
274 |             elif frame % 3 == 2:
275 |                 plt.text(3.8, 9.5, "2. Update centroids to cluster means",
276 |                          ha='right', va='top', size=14)
277 | 
278 | 
279 |     return interact(_kmeans_step, frame=[0, 50],
280 |                     n_clusters=[min_clusters, max_clusters])
281 | 
282 | 
283 | def plot_image_components(x, coefficients=None, mean=0, components=None,
284 |                           imshape=(8, 8), n_components=6, fontsize=12):
285 |     if coefficients is None:
286 |         coefficients = x
287 | 
288 |     if components is None:
289 |         components = np.eye(len(coefficients), len(x))
290 | 
291 |     mean = np.zeros_like(x) + mean
292 | 
293 | 
294 |     fig = plt.figure(figsize=(1.2 * (5 + n_components), 1.2 * 2))
295 |     g = plt.GridSpec(2, 5 + n_components, hspace=0.3)
296 | 
297 |     def show(i, j, x, title=None):
298 |         ax = fig.add_subplot(g[i, j], xticks=[], yticks=[])
299 |         ax.imshow(x.reshape(imshape), interpolation='nearest')
300 |         if title:
301 |             ax.set_title(title, fontsize=fontsize)
302 | 
303 |     show(slice(2), slice(2), x, "True")
304 | 
305 |     approx = mean.copy()
306 |     show(0, 2, np.zeros_like(x) + mean, r'$\mu$')
307 |     show(1, 2, approx, r'$1 \cdot \mu$')
308 | 
309 |     for i in range(0, n_components):
310 |         approx = approx + coefficients[i] * components[i]
311 |         show(0, i + 3, components[i], r'$c_{0}$'.format(i + 1))
312 |         show(1, i + 3, approx,
313 |              r"${0:.2f} \cdot c_{1}$".format(coefficients[i], i + 1))
314 |         plt.gca().text(0, 1.05, '$+$', ha='right', va='bottom',
315 |                        transform=plt.gca().transAxes, fontsize=fontsize)
316 | 
317 |     show(slice(2), slice(-2, None), approx, "Approx")
318 | 
319 | 
320 | def plot_pca_interactive(data, n_components=6):
321 |     from sklearn.decomposition import PCA
322 |     from IPython.html.widgets import interact
323 | 
324 |     pca = PCA(n_components=n_components)
325 |     Xproj = pca.fit_transform(data)
326 | 
327 |     def show_decomp(i=0):
328 |         plot_image_components(data[i], Xproj[i],
329 |                               pca.mean_, pca.components_)
330 | 
331 |     interact(show_decomp, i=(0, data.shape[0] - 1));
332 | 


--------------------------------------------------------------------------------
/figures/plot.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/plot.pyc


--------------------------------------------------------------------------------
/figures/plot_2d_separator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None):
 6 |     if eps is None:
 7 |         eps = X.std() / 2.
 8 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
 9 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
10 |     xx = np.linspace(x_min, x_max, 100)
11 |     yy = np.linspace(y_min, y_max, 100)
12 | 
13 |     X1, X2 = np.meshgrid(xx, yy)
14 |     X_grid = np.c_[X1.ravel(), X2.ravel()]
15 |     try:
16 |         decision_values = classifier.decision_function(X_grid)
17 |         levels = [0]
18 |         fill_levels = [decision_values.min(), 0, decision_values.max()]
19 |     except AttributeError:
20 |         # no decision_function
21 |         decision_values = classifier.predict_proba(X_grid)[:, 1]
22 |         levels = [.5]
23 |         fill_levels = [0, .5, 1]
24 | 
25 |     if ax is None:
26 |         ax = plt.gca()
27 |     if fill:
28 |         ax.contourf(X1, X2, decision_values.reshape(X1.shape),
29 |                     levels=fill_levels, colors=['blue', 'red'])
30 |     else:
31 |         ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
32 |                    colors="black")
33 |     ax.set_xlim(x_min, x_max)
34 |     ax.set_ylim(y_min, y_max)
35 |     ax.set_xticks(())
36 |     ax.set_yticks(())
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     from sklearn.datasets import make_blobs
41 |     from sklearn.linear_model import LogisticRegression
42 |     X, y = make_blobs(centers=2, random_state=42)
43 |     clf = LogisticRegression().fit(X, y)
44 |     plot_2d_separator(clf, X, fill=True)
45 |     plt.scatter(X[:, 0], X[:, 1], c=y)
46 |     plt.show()
47 | 


--------------------------------------------------------------------------------
/figures/plot_2d_separator.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/plot_2d_separator.pyc


--------------------------------------------------------------------------------
/figures/plot_digits_datasets.py:
--------------------------------------------------------------------------------
 1 | # Taken from example in scikit-learn examples
 2 | # Authors: Fabian Pedregosa <fabian.pedregosa@inria.fr>
 3 | #          Olivier Grisel <olivier.grisel@ensta.org>
 4 | #          Mathieu Blondel <mathieu@mblondel.org>
 5 | #          Gael Varoquaux
 6 | # License: BSD 3 clause (C) INRIA 2011
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | from matplotlib import offsetbox
11 | from sklearn import (manifold, datasets, decomposition, ensemble, lda,
12 |                      random_projection)
13 | 
14 | def digits_plot():
15 |     digits = datasets.load_digits(n_class=6)
16 |     n_digits = 500
17 |     X = digits.data[:n_digits]
18 |     y = digits.target[:n_digits]
19 |     n_samples, n_features = X.shape
20 |     n_neighbors = 30
21 | 
22 |     def plot_embedding(X, title=None):
23 |         x_min, x_max = np.min(X, 0), np.max(X, 0)
24 |         X = (X - x_min) / (x_max - x_min)
25 | 
26 |         plt.figure()
27 |         ax = plt.subplot(111)
28 |         for i in range(X.shape[0]):
29 |             plt.text(X[i, 0], X[i, 1], str(digits.target[i]),
30 |                     color=plt.cm.Set1(y[i] / 10.),
31 |                     fontdict={'weight': 'bold', 'size': 9})
32 | 
33 |         if hasattr(offsetbox, 'AnnotationBbox'):
34 |             # only print thumbnails with matplotlib > 1.0
35 |             shown_images = np.array([[1., 1.]])  # just something big
36 |             for i in range(X.shape[0]):
37 |                 dist = np.sum((X[i] - shown_images) ** 2, 1)
38 |                 if np.min(dist) < 1e5:
39 |                     # don't show points that are too close
40 |                     # set a high threshold to basically turn this off
41 |                     continue
42 |                 shown_images = np.r_[shown_images, [X[i]]]
43 |                 imagebox = offsetbox.AnnotationBbox(
44 |                     offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r),
45 |                     X[i])
46 |                 ax.add_artist(imagebox)
47 |         plt.xticks([]), plt.yticks([])
48 |         if title is not None:
49 |             plt.title(title)
50 | 
51 |     n_img_per_row = 10
52 |     img = np.zeros((10 * n_img_per_row, 10 * n_img_per_row))
53 |     for i in range(n_img_per_row):
54 |         ix = 10 * i + 1
55 |         for j in range(n_img_per_row):
56 |             iy = 10 * j + 1
57 |             img[ix:ix + 8, iy:iy + 8] = X[i * n_img_per_row + j].reshape((8, 8))
58 | 
59 |     plt.imshow(img, cmap=plt.cm.binary)
60 |     plt.xticks([])
61 |     plt.yticks([])
62 |     plt.title('A selection from the 64-dimensional digits dataset')
63 |     print("Computing PCA projection")
64 |     pca = decomposition.PCA(n_components=2).fit(X)
65 |     X_pca = pca.transform(X)
66 |     plot_embedding(X_pca, "Principal Components projection of the digits")
67 |     plt.figure()
68 |     plt.matshow(pca.components_[0, :].reshape(8, 8), cmap="gray")
69 |     plt.axis('off')
70 |     plt.figure()
71 |     plt.matshow(pca.components_[1, :].reshape(8, 8), cmap="gray")
72 |     plt.axis('off')
73 |     plt.show()
74 | 


--------------------------------------------------------------------------------
/figures/plot_digits_datasets.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pfrcks/Machine-Learning-SKLearn/f7f8a50136cc6f54406859cfe693abf4b4aca19e/figures/plot_digits_datasets.pyc


--------------------------------------------------------------------------------
/figures/train_test_split.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:ooo="http://xml.openoffice.org/svg/export"
  4 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  5 |    xmlns:cc="http://creativecommons.org/ns#"
  6 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  7 |    xmlns:svg="http://www.w3.org/2000/svg"
  8 |    xmlns="http://www.w3.org/2000/svg"
  9 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 10 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 11 |    version="1.2"
 12 |    width="226.34221mm"
 13 |    height="33.302219mm"
 14 |    viewBox="0 0 22634.221 3330.2219"
 15 |    preserveAspectRatio="xMidYMid"
 16 |    clip-path="url(#presentation_clip_path)"
 17 |    xml:space="preserve"
 18 |    id="svg2"
 19 |    inkscape:version="0.91 r13725"
 20 |    sodipodi:docname="train_test_split.svg"
 21 |    style="fill-rule:evenodd;stroke-width:28.22200012;stroke-linejoin:round"><metadata
 22 |      id="metadata747"><rdf:RDF><cc:Work
 23 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 24 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><sodipodi:namedview
 25 |      pagecolor="#ffffff"
 26 |      bordercolor="#666666"
 27 |      borderopacity="1"
 28 |      objecttolerance="10"
 29 |      gridtolerance="10"
 30 |      guidetolerance="10"
 31 |      inkscape:pageopacity="0"
 32 |      inkscape:pageshadow="2"
 33 |      inkscape:window-width="1547"
 34 |      inkscape:window-height="876"
 35 |      id="namedview745"
 36 |      showgrid="false"
 37 |      fit-margin-top="0"
 38 |      fit-margin-left="0"
 39 |      fit-margin-right="0"
 40 |      fit-margin-bottom="0"
 41 |      inkscape:zoom="0.42335493"
 42 |      inkscape:cx="241.92305"
 43 |      inkscape:cy="-194.94516"
 44 |      inkscape:window-x="53"
 45 |      inkscape:window-y="24"
 46 |      inkscape:window-maximized="1"
 47 |      inkscape:current-layer="svg2" /><defs
 48 |      class="ClipPathGroup"
 49 |      id="defs4"><clipPath
 50 |        id="presentation_clip_path"
 51 |        clipPathUnits="userSpaceOnUse"><rect
 52 |          x="0"
 53 |          y="0"
 54 |          width="28000"
 55 |          height="21000"
 56 |          id="rect7" /></clipPath></defs><defs
 57 |      id="defs9"><font
 58 |        id="EmbeddedFont_1"
 59 |        horiz-adv-x="2048"
 60 |        horiz-origin-x="0"
 61 |        horiz-origin-y="0"
 62 |        vert-origin-x="45"
 63 |        vert-origin-y="90"
 64 |        vert-adv-y="90"><font-face
 65 |          font-family="Liberation Sans embedded"
 66 |          units-per-em="2048"
 67 |          font-weight="normal"
 68 |          font-style="normal"
 69 |          ascent="1852"
 70 |          descent="450"
 71 |          id="font-face12" /><missing-glyph
 72 |          horiz-adv-x="2048"
 73 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
 74 |          id="missing-glyph14" /><glyph
 75 |          unicode="x"
 76 |          horiz-adv-x="1006"
 77 |          d="M 801,0 L 510,444 217,0 23,0 408,556 41,1082 240,1082 510,661 778,1082 979,1082 612,558 1002,0 801,0 Z"
 78 |          id="glyph16" /><glyph
 79 |          unicode="v"
 80 |          horiz-adv-x="1033"
 81 |          d="M 613,0 L 400,0 7,1082 199,1082 437,378 C 442,363 447,346 454,325 460,304 466,282 473,259 480,236 486,215 492,194 497,173 502,155 506,141 510,155 515,173 522,194 528,215 534,236 541,258 548,280 555,302 562,323 569,344 575,361 580,376 L 826,1082 1017,1082 613,0 Z"
 82 |          id="glyph18" /><glyph
 83 |          unicode="u"
 84 |          horiz-adv-x="874"
 85 |          d="M 314,1082 L 314,396 C 314,343 318,299 326,264 333,229 346,200 363,179 380,157 403,142 432,133 460,124 495,119 537,119 580,119 618,127 653,142 687,157 716,178 741,207 765,235 784,270 797,312 810,353 817,401 817,455 L 817,1082 997,1082 997,231 C 997,208 997,185 998,160 998,135 998,111 999,89 1000,66 1000,47 1001,31 1002,15 1002,5 1003,0 L 833,0 C 832,3 832,12 831,27 830,42 830,59 829,78 828,97 827,116 826,136 825,155 825,172 825,185 L 822,185 C 805,154 786,125 765,100 744,75 720,53 693,36 666,18 634,4 599,-6 564,-15 523,-20 476,-20 416,-20 364,-13 321,2 278,17 242,39 214,70 186,101 166,140 153,188 140,236 133,294 133,361 L 133,1082 314,1082 Z"
 86 |          id="glyph20" /><glyph
 87 |          unicode="t"
 88 |          horiz-adv-x="531"
 89 |          d="M 554,8 C 527,1 499,-5 471,-10 442,-14 409,-16 372,-16 228,-16 156,66 156,229 L 156,951 31,951 31,1082 163,1082 216,1324 336,1324 336,1082 536,1082 536,951 336,951 336,268 C 336,216 345,180 362,159 379,138 408,127 450,127 467,127 484,128 501,131 517,134 535,137 554,141 L 554,8 Z"
 90 |          id="glyph22" /><glyph
 91 |          unicode="s"
 92 |          horiz-adv-x="901"
 93 |          d="M 950,299 C 950,248 940,203 921,164 901,124 872,91 835,64 798,37 752,16 698,2 643,-13 581,-20 511,-20 448,-20 392,-15 342,-6 291,4 247,20 209,41 171,62 139,91 114,126 88,161 69,203 57,254 L 216,285 C 231,227 263,185 311,158 359,131 426,117 511,117 550,117 585,120 618,125 650,130 678,140 701,153 724,166 743,183 756,205 769,226 775,253 775,285 775,318 767,345 752,366 737,387 715,404 688,418 661,432 628,444 589,455 550,465 507,476 460,489 417,500 374,513 331,527 288,541 250,560 216,583 181,606 153,634 132,668 111,702 100,745 100,796 100,895 135,970 206,1022 276,1073 378,1099 513,1099 632,1099 727,1078 798,1036 868,994 912,927 931,834 L 769,814 C 763,842 752,866 736,885 720,904 701,919 678,931 655,942 630,951 602,956 573,961 544,963 513,963 432,963 372,951 333,926 294,901 275,864 275,814 275,785 282,761 297,742 311,723 331,707 357,694 382,681 413,669 449,660 485,650 525,640 568,629 597,622 626,614 656,606 686,597 715,587 744,576 772,564 799,550 824,535 849,519 870,500 889,478 908,456 923,430 934,401 945,372 950,338 950,299 Z"
 94 |          id="glyph24" /><glyph
 95 |          unicode="r"
 96 |          horiz-adv-x="530"
 97 |          d="M 142,0 L 142,830 C 142,853 142,876 142,900 141,923 141,946 140,968 139,990 139,1011 138,1030 137,1049 137,1067 136,1082 L 306,1082 C 307,1067 308,1049 309,1030 310,1010 311,990 312,969 313,948 313,929 314,910 314,891 314,874 314,861 L 318,861 C 331,902 344,938 359,969 373,999 390,1024 409,1044 428,1063 451,1078 478,1088 505,1097 537,1102 575,1102 590,1102 604,1101 617,1099 630,1096 641,1094 648,1092 L 648,927 C 636,930 622,933 606,935 590,936 572,937 552,937 511,937 476,928 447,909 418,890 394,865 376,832 357,799 344,759 335,714 326,668 322,618 322,564 L 322,0 142,0 Z"
 98 |          id="glyph26" /><glyph
 99 |          unicode="p"
100 |          horiz-adv-x="953"
101 |          d="M 1053,546 C 1053,464 1046,388 1033,319 1020,250 998,190 967,140 936,90 895,51 844,23 793,-6 730,-20 655,-20 578,-20 510,-5 452,24 394,53 350,101 319,168 L 314,168 C 315,167 315,161 316,150 316,139 316,126 317,110 317,94 317,76 318,57 318,37 318,17 318,-2 L 318,-425 138,-425 138,861 C 138,887 138,912 138,936 137,960 137,982 136,1002 135,1021 135,1038 134,1052 133,1066 133,1076 132,1082 L 306,1082 C 307,1080 308,1073 309,1061 310,1049 311,1035 312,1018 313,1001 314,982 315,963 316,944 316,925 316,908 L 320,908 C 337,943 356,972 377,997 398,1021 423,1041 450,1057 477,1072 508,1084 542,1091 575,1098 613,1101 655,1101 730,1101 793,1088 844,1061 895,1034 936,997 967,949 998,900 1020,842 1033,774 1046,705 1053,629 1053,546 Z M 864,542 C 864,609 860,668 852,720 844,772 830,816 811,852 791,888 765,915 732,934 699,953 658,962 609,962 569,962 531,956 496,945 461,934 430,912 404,880 377,848 356,804 341,748 326,691 318,618 318,528 318,451 324,387 337,334 350,281 368,238 393,205 417,172 447,149 483,135 519,120 560,113 607,113 657,113 699,123 732,142 765,161 791,189 811,226 830,263 844,308 852,361 860,414 864,474 864,542 Z"
102 |          id="glyph28" /><glyph
103 |          unicode="o"
104 |          horiz-adv-x="980"
105 |          d="M 1053,542 C 1053,353 1011,212 928,119 845,26 724,-20 565,-20 490,-20 422,-9 363,14 304,37 254,71 213,118 172,165 140,223 119,294 97,364 86,447 86,542 86,915 248,1102 571,1102 655,1102 728,1090 789,1067 850,1044 900,1009 939,962 978,915 1006,857 1025,787 1044,717 1053,635 1053,542 Z M 864,542 C 864,626 858,695 845,750 832,805 813,848 788,881 763,914 732,937 696,950 660,963 619,969 574,969 528,969 487,962 450,949 413,935 381,912 355,879 329,846 309,802 296,747 282,692 275,624 275,542 275,458 282,389 297,334 312,279 332,235 358,202 383,169 414,146 449,133 484,120 522,113 563,113 609,113 651,120 688,133 725,146 757,168 783,201 809,234 829,278 843,333 857,388 864,458 864,542 Z"
106 |          id="glyph30" /><glyph
107 |          unicode="n"
108 |          horiz-adv-x="874"
109 |          d="M 825,0 L 825,686 C 825,739 821,783 814,818 806,853 793,882 776,904 759,925 736,941 708,950 679,959 644,963 602,963 559,963 521,956 487,941 452,926 423,904 399,876 374,847 355,812 342,771 329,729 322,681 322,627 L 322,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 334,928 353,957 374,982 395,1007 419,1029 446,1047 473,1064 505,1078 540,1088 575,1097 616,1102 663,1102 723,1102 775,1095 818,1080 861,1065 897,1043 925,1012 953,981 974,942 987,894 1000,845 1006,788 1006,721 L 1006,0 825,0 Z"
110 |          id="glyph32" /><glyph
111 |          unicode="m"
112 |          horiz-adv-x="1457"
113 |          d="M 768,0 L 768,686 C 768,739 765,783 758,818 751,853 740,882 725,904 709,925 688,941 663,950 638,959 607,963 570,963 532,963 498,956 467,941 436,926 410,904 389,876 367,847 350,812 339,771 327,729 321,681 321,627 L 321,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 333,928 350,957 369,982 388,1007 410,1029 435,1047 460,1064 488,1078 521,1088 553,1097 590,1102 633,1102 715,1102 780,1086 828,1053 875,1020 908,968 927,897 L 930,897 C 946,928 964,957 984,982 1004,1007 1027,1029 1054,1047 1081,1064 1111,1078 1144,1088 1177,1097 1215,1102 1258,1102 1313,1102 1360,1095 1400,1080 1439,1065 1472,1043 1497,1012 1522,981 1541,942 1553,894 1565,845 1571,788 1571,721 L 1571,0 1393,0 1393,686 C 1393,739 1390,783 1383,818 1376,853 1365,882 1350,904 1334,925 1313,941 1288,950 1263,959 1232,963 1195,963 1157,963 1123,956 1092,942 1061,927 1035,906 1014,878 992,850 975,815 964,773 952,731 946,682 946,627 L 946,0 768,0 Z"
114 |          id="glyph34" /><glyph
115 |          unicode="l"
116 |          horiz-adv-x="187"
117 |          d="M 138,0 L 138,1484 318,1484 318,0 138,0 Z"
118 |          id="glyph36" /><glyph
119 |          unicode="k"
120 |          horiz-adv-x="901"
121 |          d="M 816,0 L 450,494 318,385 318,0 138,0 138,1484 318,1484 318,557 793,1082 1004,1082 565,617 1027,0 816,0 Z"
122 |          id="glyph38" /><glyph
123 |          unicode="i"
124 |          horiz-adv-x="187"
125 |          d="M 137,1312 L 137,1484 317,1484 317,1312 137,1312 Z M 137,0 L 137,1082 317,1082 317,0 137,0 Z"
126 |          id="glyph40" /><glyph
127 |          unicode="h"
128 |          horiz-adv-x="874"
129 |          d="M 317,897 C 337,934 359,965 382,991 405,1016 431,1037 459,1054 487,1071 518,1083 551,1091 584,1098 622,1102 663,1102 732,1102 789,1093 834,1074 878,1055 913,1029 939,996 964,962 982,922 992,875 1001,828 1006,777 1006,721 L 1006,0 825,0 825,686 C 825,732 822,772 817,807 811,842 800,871 784,894 768,917 745,934 716,946 687,957 649,963 602,963 559,963 521,955 487,940 452,925 423,903 399,875 374,847 355,813 342,773 329,733 322,688 322,638 L 322,0 142,0 142,1484 322,1484 322,1098 C 322,1076 322,1054 321,1032 320,1010 320,990 319,971 318,952 317,937 316,924 315,911 315,902 314,897 L 317,897 Z"
130 |          id="glyph42" /><glyph
131 |          unicode="g"
132 |          horiz-adv-x="927"
133 |          d="M 548,-425 C 486,-425 431,-419 383,-406 335,-393 294,-375 260,-352 226,-328 198,-300 177,-267 156,-234 140,-198 131,-158 L 312,-132 C 324,-182 351,-220 392,-248 433,-274 486,-288 553,-288 594,-288 631,-282 664,-271 697,-260 726,-241 749,-217 772,-191 790,-159 803,-119 816,-79 822,-30 822,27 L 822,201 820,201 C 807,174 790,148 771,123 751,98 727,75 699,56 670,37 637,21 600,10 563,-2 520,-8 472,-8 403,-8 345,4 296,27 247,50 207,84 176,130 145,176 122,233 108,302 93,370 86,449 86,539 86,626 93,704 108,773 122,842 145,901 178,950 210,998 252,1035 304,1061 355,1086 418,1099 492,1099 569,1099 635,1082 692,1047 748,1012 791,962 822,897 L 824,897 C 824,914 825,932 826,953 827,974 828,993 829,1012 830,1030 831,1046 832,1059 833,1072 835,1080 836,1082 L 1007,1082 C 1006,1076 1006,1066 1005,1052 1004,1037 1004,1020 1003,1000 1002,980 1002,958 1002,934 1001,909 1001,884 1001,858 L 1001,31 C 1001,-120 964,-234 890,-311 815,-387 701,-425 548,-425 Z M 822,541 C 822,616 814,681 798,735 781,788 760,832 733,866 706,900 676,925 642,941 607,957 572,965 536,965 490,965 451,957 418,941 385,925 357,900 336,866 314,831 298,787 288,734 277,680 272,616 272,541 272,463 277,398 288,345 298,292 314,249 335,216 356,183 383,160 416,146 449,132 488,125 533,125 569,125 604,133 639,148 673,163 704,188 731,221 758,254 780,297 797,350 814,403 822,466 822,541 Z"
134 |          id="glyph44" /><glyph
135 |          unicode="f"
136 |          horiz-adv-x="557"
137 |          d="M 361,951 L 361,0 181,0 181,951 29,951 29,1082 181,1082 181,1204 C 181,1243 185,1280 192,1314 199,1347 213,1377 233,1402 252,1427 279,1446 313,1461 347,1475 391,1482 445,1482 466,1482 489,1481 512,1479 535,1477 555,1474 572,1470 L 572,1333 C 561,1335 548,1337 533,1339 518,1340 504,1341 492,1341 465,1341 444,1337 427,1330 410,1323 396,1312 387,1299 377,1285 370,1268 367,1248 363,1228 361,1205 361,1179 L 361,1082 572,1082 572,951 361,951 Z"
138 |          id="glyph46" /><glyph
139 |          unicode="e"
140 |          horiz-adv-x="980"
141 |          d="M 276,503 C 276,446 282,394 294,347 305,299 323,258 348,224 372,189 403,163 441,144 479,125 525,115 578,115 656,115 719,131 766,162 813,193 844,233 861,281 L 1019,236 C 1008,206 992,176 972,146 951,115 924,88 890,64 856,39 814,19 763,4 712,-12 650,-20 578,-20 418,-20 296,28 213,123 129,218 87,360 87,548 87,649 100,735 125,806 150,876 185,933 229,977 273,1021 324,1053 383,1073 442,1092 504,1102 571,1102 662,1102 738,1087 799,1058 860,1029 909,988 946,937 983,885 1009,824 1025,754 1040,684 1048,608 1048,527 L 1048,503 276,503 Z M 862,641 C 852,755 823,838 775,891 727,943 658,969 568,969 538,969 507,964 474,955 441,945 410,928 382,903 354,878 330,845 311,803 292,760 281,706 278,641 L 862,641 Z"
142 |          id="glyph48" /><glyph
143 |          unicode="d"
144 |          horiz-adv-x="927"
145 |          d="M 821,174 C 788,105 744,55 689,25 634,-5 565,-20 484,-20 347,-20 247,26 183,118 118,210 86,349 86,536 86,913 219,1102 484,1102 566,1102 634,1087 689,1057 744,1027 788,979 821,914 L 823,914 C 823,921 823,931 823,946 822,960 822,975 822,991 821,1006 821,1021 821,1035 821,1049 821,1059 821,1065 L 821,1484 1001,1484 1001,223 C 1001,197 1001,172 1002,148 1002,124 1002,102 1003,82 1004,62 1004,45 1005,31 1006,16 1006,6 1007,0 L 835,0 C 834,7 833,16 832,29 831,41 830,55 829,71 828,87 827,104 826,122 825,139 825,157 825,174 L 821,174 Z M 275,542 C 275,467 280,403 289,350 298,297 313,253 334,219 355,184 381,159 413,143 445,127 484,119 530,119 577,119 619,127 656,142 692,157 722,182 747,217 771,251 789,296 802,351 815,406 821,474 821,554 821,631 815,696 802,749 789,802 771,844 746,877 721,910 691,933 656,948 620,962 579,969 532,969 488,969 450,961 418,946 386,931 359,906 338,872 317,838 301,794 291,740 280,685 275,619 275,542 Z"
146 |          id="glyph50" /><glyph
147 |          unicode="c"
148 |          horiz-adv-x="901"
149 |          d="M 275,546 C 275,484 280,427 289,375 298,323 313,278 334,241 355,203 384,174 419,153 454,132 497,122 548,122 612,122 666,139 709,173 752,206 778,258 788,328 L 970,328 C 964,283 951,239 931,197 911,155 884,118 850,86 815,54 773,28 724,9 675,-10 618,-20 553,-20 468,-20 396,-6 337,23 278,52 230,91 193,142 156,192 129,251 112,320 95,388 87,462 87,542 87,615 93,679 105,735 117,790 134,839 156,881 177,922 203,957 232,986 261,1014 293,1037 328,1054 362,1071 398,1083 436,1091 474,1098 512,1102 551,1102 612,1102 666,1094 713,1077 760,1060 801,1038 836,1009 870,980 898,945 919,906 940,867 955,824 964,779 L 779,765 C 770,825 746,873 708,908 670,943 616,961 546,961 495,961 452,953 418,936 383,919 355,893 334,859 313,824 298,781 289,729 280,677 275,616 275,546 Z"
150 |          id="glyph52" /><glyph
151 |          unicode="a"
152 |          horiz-adv-x="1060"
153 |          d="M 414,-20 C 305,-20 224,9 169,66 114,124 87,203 87,303 87,375 101,434 128,480 155,526 190,562 234,588 277,614 327,632 383,642 439,652 496,657 554,657 L 797,657 797,717 C 797,762 792,800 783,832 774,863 759,889 740,908 721,928 697,942 668,951 639,960 604,965 565,965 530,965 499,963 471,958 443,953 419,944 398,931 377,918 361,900 348,878 335,855 327,827 323,793 L 135,810 C 142,853 154,892 173,928 192,963 218,994 253,1020 287,1046 330,1066 382,1081 433,1095 496,1102 569,1102 705,1102 807,1071 876,1009 945,946 979,856 979,738 L 979,272 C 979,219 986,179 1000,152 1014,125 1041,111 1080,111 1090,111 1100,112 1110,113 1120,114 1130,116 1139,118 L 1139,6 C 1116,1 1094,-3 1072,-6 1049,-9 1025,-10 1000,-10 966,-10 937,-5 913,4 888,13 868,26 853,45 838,63 826,86 818,113 810,140 805,171 803,207 L 797,207 C 778,172 757,141 734,113 711,85 684,61 653,42 622,22 588,7 549,-4 510,-15 465,-20 414,-20 Z M 455,115 C 512,115 563,125 606,146 649,167 684,194 713,226 741,259 762,294 776,332 790,371 797,408 797,443 L 797,531 600,531 C 556,531 514,528 475,522 435,517 400,506 370,489 340,472 316,449 299,418 281,388 272,349 272,300 272,241 288,195 320,163 351,131 396,115 455,115 Z"
154 |          id="glyph54" /><glyph
155 |          unicode="T"
156 |          horiz-adv-x="1192"
157 |          d="M 720,1253 L 720,0 530,0 530,1253 46,1253 46,1409 1204,1409 1204,1253 720,1253 Z"
158 |          id="glyph56" /><glyph
159 |          unicode="S"
160 |          horiz-adv-x="1192"
161 |          d="M 1272,389 C 1272,330 1261,275 1238,225 1215,175 1179,132 1131,96 1083,59 1023,31 950,11 877,-10 790,-20 690,-20 515,-20 378,11 280,72 182,133 120,222 93,338 L 278,375 C 287,338 302,305 321,275 340,245 367,219 400,198 433,176 473,159 522,147 571,135 629,129 697,129 754,129 806,134 853,144 900,153 941,168 975,188 1009,208 1036,234 1055,266 1074,297 1083,335 1083,379 1083,425 1073,462 1052,491 1031,520 1001,543 963,562 925,581 880,596 827,609 774,622 716,635 652,650 613,659 573,668 534,679 494,689 456,701 420,716 383,730 349,747 317,766 285,785 257,809 234,836 211,863 192,894 179,930 166,965 159,1006 159,1053 159,1120 173,1177 200,1225 227,1272 264,1311 312,1342 360,1373 417,1395 482,1409 547,1423 618,1430 694,1430 781,1430 856,1423 918,1410 980,1396 1032,1375 1075,1348 1118,1321 1152,1287 1178,1247 1203,1206 1224,1159 1239,1106 L 1051,1073 C 1042,1107 1028,1137 1011,1164 993,1191 970,1213 941,1231 912,1249 878,1263 837,1272 796,1281 747,1286 692,1286 627,1286 572,1280 528,1269 483,1257 448,1241 421,1221 394,1201 374,1178 363,1151 351,1124 345,1094 345,1063 345,1021 356,987 377,960 398,933 426,910 462,892 498,874 540,859 587,847 634,835 685,823 738,811 781,801 825,791 868,781 911,770 952,758 991,744 1030,729 1067,712 1102,693 1136,674 1166,650 1191,622 1216,594 1236,561 1251,523 1265,485 1272,440 1272,389 Z"
162 |          id="glyph58" /><glyph
163 |          unicode="O"
164 |          horiz-adv-x="1430"
165 |          d="M 1495,711 C 1495,601 1479,501 1448,411 1416,321 1370,244 1310,180 1250,116 1177,67 1090,32 1003,-3 905,-20 795,-20 679,-20 577,-2 490,35 403,71 330,122 272,187 214,252 170,329 141,418 112,507 97,605 97,711 97,821 112,920 143,1009 174,1098 219,1173 278,1236 337,1298 411,1346 498,1380 585,1413 684,1430 797,1430 909,1430 1009,1413 1096,1379 1183,1345 1256,1297 1315,1234 1374,1171 1418,1096 1449,1007 1480,918 1495,820 1495,711 Z M 1300,711 C 1300,796 1289,873 1268,942 1246,1011 1214,1071 1172,1120 1129,1169 1077,1207 1014,1234 951,1261 879,1274 797,1274 713,1274 639,1261 576,1234 513,1207 460,1169 418,1120 375,1071 344,1011 323,942 302,873 291,796 291,711 291,626 302,549 324,479 345,408 377,348 420,297 462,246 515,206 578,178 641,149 713,135 795,135 883,135 959,149 1023,178 1086,207 1139,247 1180,298 1221,349 1251,409 1271,480 1290,551 1300,628 1300,711 Z"
166 |          id="glyph60" /><glyph
167 |          unicode="L"
168 |          horiz-adv-x="927"
169 |          d="M 168,0 L 168,1409 359,1409 359,156 1071,156 1071,0 168,0 Z"
170 |          id="glyph62" /><glyph
171 |          unicode="F"
172 |          horiz-adv-x="1006"
173 |          d="M 359,1253 L 359,729 1145,729 1145,571 359,571 359,0 168,0 168,1409 1169,1409 1169,1253 359,1253 Z"
174 |          id="glyph64" /><glyph
175 |          unicode="D"
176 |          horiz-adv-x="1218"
177 |          d="M 1381,719 C 1381,602 1363,498 1328,409 1293,319 1244,244 1183,184 1122,123 1049,78 966,47 882,16 792,0 695,0 L 168,0 168,1409 634,1409 C 743,1409 843,1396 935,1369 1026,1342 1105,1300 1171,1244 1237,1187 1289,1116 1326,1029 1363,942 1381,839 1381,719 Z M 1189,719 C 1189,814 1175,896 1148,964 1121,1031 1082,1087 1033,1130 984,1173 925,1205 856,1226 787,1246 712,1256 630,1256 L 359,1256 359,153 673,153 C 747,153 816,165 879,189 942,213 996,249 1042,296 1088,343 1124,402 1150,473 1176,544 1189,626 1189,719 Z"
178 |          id="glyph66" /><glyph
179 |          unicode="C"
180 |          horiz-adv-x="1324"
181 |          d="M 792,1274 C 712,1274 641,1261 580,1234 518,1207 466,1169 425,1120 383,1071 351,1011 330,942 309,873 298,796 298,711 298,626 310,549 333,479 356,408 389,348 432,297 475,246 527,207 590,179 652,151 722,137 800,137 855,137 905,144 950,159 995,173 1035,193 1072,219 1108,245 1140,276 1169,312 1198,347 1223,387 1245,430 L 1401,352 C 1376,299 1344,250 1307,205 1270,160 1226,120 1176,87 1125,54 1068,28 1005,9 941,-10 870,-20 791,-20 677,-20 577,-2 492,35 406,71 334,122 277,187 219,252 176,329 147,418 118,507 104,605 104,711 104,821 119,920 150,1009 180,1098 224,1173 283,1236 341,1298 413,1346 498,1380 583,1413 681,1430 790,1430 940,1430 1065,1401 1166,1342 1267,1283 1341,1196 1388,1081 L 1207,1021 C 1194,1054 1176,1086 1153,1117 1130,1147 1102,1174 1068,1197 1034,1220 994,1239 949,1253 903,1267 851,1274 792,1274 Z"
182 |          id="glyph68" /><glyph
183 |          unicode="A"
184 |          horiz-adv-x="1377"
185 |          d="M 1167,0 L 1006,412 364,412 202,0 4,0 579,1409 796,1409 1362,0 1167,0 Z M 768,1026 C 757,1053 747,1080 738,1107 728,1134 719,1159 712,1182 705,1204 699,1223 694,1238 689,1253 686,1262 685,1265 684,1262 681,1252 676,1237 671,1222 665,1203 658,1180 650,1157 641,1132 632,1105 622,1078 612,1051 602,1024 L 422,561 949,561 768,1026 Z"
186 |          id="glyph70" /><glyph
187 |          unicode="5"
188 |          horiz-adv-x="980"
189 |          d="M 1053,459 C 1053,388 1042,324 1021,265 1000,206 968,156 926,114 884,71 832,38 770,15 707,-8 635,-20 553,-20 479,-20 415,-11 360,6 305,23 258,47 220,78 182,108 152,143 130,184 107,225 91,268 82,315 L 264,336 C 271,309 282,284 295,259 308,234 327,211 350,192 373,172 401,156 435,145 468,133 509,127 557,127 604,127 646,134 684,149 722,163 755,184 782,212 809,240 829,274 844,315 859,356 866,402 866,455 866,498 859,538 845,575 831,611 811,642 785,669 759,695 727,715 690,730 652,745 609,752 561,752 531,752 503,749 478,744 453,739 429,731 408,722 386,713 366,702 349,690 331,677 314,664 299,651 L 123,651 170,1409 971,1409 971,1256 334,1256 307,809 C 339,834 379,855 427,873 475,890 532,899 598,899 668,899 731,888 787,867 843,846 891,816 930,777 969,738 1000,691 1021,637 1042,583 1053,524 1053,459 Z"
190 |          id="glyph72" /><glyph
191 |          unicode="4"
192 |          horiz-adv-x="1060"
193 |          d="M 881,319 L 881,0 711,0 711,319 47,319 47,459 692,1409 881,1409 881,461 1079,461 1079,319 881,319 Z M 711,1206 C 710,1203 706,1196 701,1187 696,1177 690,1166 683,1154 676,1142 670,1130 663,1118 656,1105 649,1095 644,1087 L 283,555 C 280,550 275,543 269,534 262,525 256,517 249,508 242,499 236,490 229,481 222,472 217,466 213,461 L 711,461 711,1206 Z"
194 |          id="glyph74" /><glyph
195 |          unicode="3"
196 |          horiz-adv-x="1006"
197 |          d="M 1049,389 C 1049,324 1039,267 1018,216 997,165 966,123 926,88 885,53 835,26 776,8 716,-11 648,-20 571,-20 484,-20 410,-9 351,13 291,34 242,63 203,99 164,134 135,175 116,221 97,266 84,313 78,362 L 264,379 C 269,342 279,308 294,277 308,246 327,220 352,198 377,176 407,159 443,147 479,135 522,129 571,129 662,129 733,151 785,196 836,241 862,307 862,395 862,447 851,489 828,521 805,552 776,577 742,595 707,612 670,624 630,630 589,636 552,639 518,639 L 416,639 416,795 514,795 C 548,795 583,799 620,806 657,813 690,825 721,844 751,862 776,887 796,918 815,949 825,989 825,1038 825,1113 803,1173 759,1217 714,1260 648,1282 561,1282 482,1282 418,1262 369,1221 320,1180 291,1123 283,1049 L 102,1063 C 109,1125 126,1179 153,1225 180,1271 214,1309 255,1340 296,1370 342,1393 395,1408 448,1423 504,1430 563,1430 642,1430 709,1420 766,1401 823,1381 869,1354 905,1321 941,1287 968,1247 985,1202 1002,1157 1010,1108 1010,1057 1010,1016 1004,977 993,941 982,905 964,873 940,844 916,815 886,791 849,770 812,749 767,734 715,723 L 715,719 C 772,713 821,700 863,681 905,661 940,636 967,607 994,578 1015,544 1029,507 1042,470 1049,430 1049,389 Z"
198 |          id="glyph76" /><glyph
199 |          unicode="2"
200 |          horiz-adv-x="954"
201 |          d="M 103,0 L 103,127 C 137,205 179,274 228,334 277,393 328,447 382,496 436,544 490,589 543,630 596,671 643,713 686,754 729,795 763,839 790,884 816,929 829,981 829,1038 829,1078 823,1113 811,1144 799,1174 782,1199 759,1220 736,1241 709,1256 678,1267 646,1277 611,1282 572,1282 536,1282 502,1277 471,1267 439,1257 411,1242 386,1222 361,1202 341,1177 326,1148 310,1118 300,1083 295,1044 L 111,1061 C 117,1112 131,1159 153,1204 175,1249 205,1288 244,1322 283,1355 329,1382 384,1401 438,1420 501,1430 572,1430 642,1430 704,1422 759,1405 814,1388 860,1364 898,1331 935,1298 964,1258 984,1210 1004,1162 1014,1107 1014,1044 1014,997 1006,952 989,909 972,866 949,826 921,787 892,748 859,711 822,675 785,639 746,604 705,570 664,535 623,501 582,468 541,434 502,400 466,366 429,332 397,298 368,263 339,228 317,191 301,153 L 1036,153 1036,0 103,0 Z"
202 |          id="glyph78" /><glyph
203 |          unicode="1"
204 |          horiz-adv-x="927"
205 |          d="M 156,0 L 156,153 515,153 515,1237 197,1010 197,1180 530,1409 696,1409 696,153 1039,153 1039,0 156,0 Z"
206 |          id="glyph80" /><glyph
207 |          unicode=" "
208 |          horiz-adv-x="556"
209 |          id="glyph82" /></font></defs><defs
210 |      id="defs84"><font
211 |        id="EmbeddedFont_2"
212 |        horiz-adv-x="2048"
213 |        horiz-origin-x="0"
214 |        horiz-origin-y="0"
215 |        vert-origin-x="45"
216 |        vert-origin-y="90"
217 |        vert-adv-y="90"><font-face
218 |          font-family="Liberation Serif embedded"
219 |          units-per-em="2048"
220 |          font-weight="normal"
221 |          font-style="normal"
222 |          ascent="1826"
223 |          descent="450"
224 |          id="font-face87" /><missing-glyph
225 |          horiz-adv-x="2048"
226 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
227 |          id="missing-glyph89" /><glyph
228 |          unicode="t"
229 |          horiz-adv-x="557"
230 |          d="M 334,-20 C 270,-20 222,-1 191,37 159,75 143,128 143,197 L 143,856 20,856 20,901 145,940 246,1153 309,1153 309,940 524,940 524,856 309,856 309,215 C 309,172 319,139 339,117 358,95 384,84 416,84 441,84 465,86 490,89 514,92 536,96 557,100 L 557,35 C 547,28 534,22 518,15 501,8 483,3 464,-3 444,-7 423,-12 401,-15 379,-18 357,-20 334,-20 Z"
231 |          id="glyph91" /><glyph
232 |          unicode="r"
233 |          horiz-adv-x="636"
234 |          d="M 664,965 L 664,711 621,711 563,821 C 544,821 524,820 503,817 482,814 460,811 439,807 418,802 397,797 378,791 358,785 341,779 326,772 L 326,70 487,45 487,0 41,0 41,45 160,70 160,870 41,895 41,940 315,940 324,823 C 339,836 360,850 387,867 414,883 443,898 474,913 505,928 536,940 567,950 598,960 625,965 649,965 L 664,965 Z"
235 |          id="glyph93" /><glyph
236 |          unicode="o"
237 |          horiz-adv-x="901"
238 |          d="M 946,475 C 946,316 910,193 839,108 768,23 657,-20 506,-20 365,-20 258,22 186,107 114,192 78,314 78,475 78,634 114,755 186,839 258,923 367,965 514,965 657,965 764,924 837,842 910,759 946,637 946,475 Z M 766,475 C 766,540 762,598 753,649 744,700 730,743 710,778 689,813 662,839 629,858 596,876 555,885 506,885 457,885 416,876 384,858 352,839 327,813 308,778 289,743 276,700 269,649 262,598 258,540 258,475 258,410 262,351 269,300 276,249 289,205 308,170 327,134 352,107 384,88 416,69 457,59 506,59 555,59 596,69 629,88 662,107 689,134 710,170 730,205 744,249 753,300 762,351 766,410 766,475 Z"
239 |          id="glyph95" /><glyph
240 |          unicode="f"
241 |          horiz-adv-x="636"
242 |          d="M 225,856 L 63,856 63,905 225,944 225,1010 C 225,1081 232,1143 247,1197 261,1250 282,1295 309,1332 336,1368 369,1395 408,1414 447,1433 490,1442 539,1442 569,1442 596,1440 619,1437 642,1433 663,1428 682,1423 L 682,1218 633,1218 588,1341 C 577,1348 566,1353 553,1357 540,1360 525,1362 506,1362 483,1362 464,1357 449,1347 434,1336 423,1320 414,1299 405,1277 399,1249 396,1216 393,1183 391,1143 391,1096 L 391,940 641,940 641,856 391,856 391,78 594,45 594,0 86,0 86,45 225,78 225,856 Z"
243 |          id="glyph97" /><glyph
244 |          unicode="e"
245 |          horiz-adv-x="769"
246 |          d="M 260,473 L 260,455 C 260,406 264,360 271,315 278,270 292,231 313,197 334,162 363,135 401,115 439,94 489,84 551,84 571,84 592,85 614,87 636,88 658,90 680,93 702,96 723,99 744,102 765,105 784,109 801,113 L 801,57 C 786,47 767,38 746,29 724,20 700,11 674,4 648,-3 620,-9 591,-14 562,-18 532,-20 502,-20 424,-20 358,-9 305,12 251,33 207,65 174,107 141,149 117,201 102,263 87,325 80,396 80,477 80,641 114,763 183,844 252,925 350,965 477,965 527,965 574,958 618,945 661,932 699,909 732,878 765,847 791,805 810,752 829,699 838,634 838,555 L 838,473 260,473 Z M 477,885 C 440,885 408,877 381,862 354,846 331,824 314,795 296,766 283,732 275,691 266,650 262,604 262,553 L 664,553 C 664,604 661,650 656,691 650,732 640,766 626,795 611,824 592,846 568,862 544,877 514,885 477,885 Z"
247 |          id="glyph99" /><glyph
248 |          unicode="&gt;"
249 |          horiz-adv-x="980"
250 |          d="M 104,186 L 104,289 913,680 104,1071 104,1174 1057,705 1057,655 104,186 Z"
251 |          id="glyph101" /><glyph
252 |          unicode="&lt;"
253 |          horiz-adv-x="980"
254 |          d="M 102,655 L 102,705 1055,1174 1055,1071 246,680 1055,289 1055,186 102,655 Z"
255 |          id="glyph103" /><glyph
256 |          unicode="6"
257 |          horiz-adv-x="901"
258 |          d="M 963,416 C 963,347 954,285 937,231 919,177 893,131 858,94 823,57 781,28 730,9 679,-10 620,-20 553,-20 480,-20 414,-6 357,23 299,52 250,95 211,152 171,209 141,280 120,365 99,450 88,549 88,662 88,783 102,888 130,975 157,1062 194,1134 241,1191 287,1248 340,1289 401,1316 462,1343 525,1356 590,1356 640,1356 691,1353 742,1346 793,1339 840,1331 881,1321 L 881,1090 815,1090 780,1227 C 768,1234 754,1240 739,1245 723,1250 707,1255 690,1259 673,1263 656,1266 639,1269 622,1271 605,1272 590,1272 543,1272 500,1261 463,1238 425,1215 392,1181 365,1135 338,1089 316,1031 301,962 285,893 276,811 273,717 321,742 374,763 431,779 488,795 544,803 600,803 656,803 707,795 752,780 797,764 835,740 866,708 897,676 921,636 938,587 955,538 963,481 963,416 Z M 549,59 C 592,59 628,66 657,80 686,94 710,115 728,144 746,172 759,207 767,250 774,292 778,341 778,397 778,506 761,585 727,634 692,683 638,707 563,707 518,707 470,703 421,694 371,685 321,673 272,657 272,561 277,476 288,402 298,328 314,266 337,215 360,164 388,125 423,99 458,72 500,59 549,59 Z"
259 |          id="glyph105" /><glyph
260 |          unicode="1"
261 |          horiz-adv-x="742"
262 |          d="M 627,80 L 901,53 901,0 180,0 180,53 455,80 455,1174 184,1077 184,1130 575,1352 627,1352 627,80 Z"
263 |          id="glyph107" /></font></defs><defs
264 |      id="defs109"><font
265 |        id="EmbeddedFont_3"
266 |        horiz-adv-x="2048"
267 |        horiz-origin-x="0"
268 |        horiz-origin-y="0"
269 |        vert-origin-x="45"
270 |        vert-origin-y="90"
271 |        vert-adv-y="90"><font-face
272 |          font-family="StarSymbol embedded"
273 |          units-per-em="2048"
274 |          font-weight="normal"
275 |          font-style="normal"
276 |          ascent="1879"
277 |          descent="661"
278 |          id="font-face112" /><missing-glyph
279 |          horiz-adv-x="2048"
280 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
281 |          id="missing-glyph114" /><glyph
282 |          unicode="●"
283 |          horiz-adv-x="1191"
284 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 223,773 276,916 383,1023 489,1130 632,1184 813,1184 992,1184 1136,1130 1245,1023 1353,916 1407,772 1407,592 1407,412 1353,268 1245,161 1136,54 992,0 813,0 Z"
285 |          id="glyph116" /><glyph
286 |          unicode="–"
287 |          horiz-adv-x="1165"
288 |          d="M -4,459 L 1135,459 1135,606 -4,606 -4,459 Z"
289 |          id="glyph118" /></font></defs><defs
290 |      class="TextShapeIndex"
291 |      id="defs120"><g
292 |        ooo:slide="id1"
293 |        ooo:id-list="id6 id7 id8 id9 id10 id11 id12 id13 id14 id15 id16 id17 id18 id19 id20 id21 id22 id23 id24 id25 id26 id27 id28 id29 id30 id31 id32 id33 id34 id35 id36 id37 id38 id39 id40 id41 id42 id43 id44"
294 |        id="g122" /></defs><defs
295 |      class="EmbeddedBulletChars"
296 |      id="defs124"><g
297 |        id="bullet-char-template(57356)"
298 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
299 |          d="M 580,1141 1163,571 580,0 -4,571 580,1141 Z"
300 |          id="path127"
301 |          inkscape:connector-curvature="0" /></g><g
302 |        id="bullet-char-template(57354)"
303 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
304 |          d="m 8,1128 1129,0 L 1137,0 8,0 8,1128 Z"
305 |          id="path130"
306 |          inkscape:connector-curvature="0" /></g><g
307 |        id="bullet-char-template(10146)"
308 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
309 |          d="M 174,0 602,739 174,1481 1456,739 174,0 Z m 1184,739 -1049,607 350,-607 699,0 z"
310 |          id="path133"
311 |          inkscape:connector-curvature="0" /></g><g
312 |        id="bullet-char-template(10132)"
313 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
314 |          d="M 2015,739 1276,0 717,0 l 543,543 -1086,0 0,393 1086,0 -543,545 557,0 741,-742 z"
315 |          id="path136"
316 |          inkscape:connector-curvature="0" /></g><g
317 |        id="bullet-char-template(10007)"
318 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
319 |          d="m 0,-2 c -7,16 -16,29 -25,39 l 381,530 c -94,256 -141,385 -141,387 0,25 13,38 40,38 9,0 21,-2 34,-5 21,4 42,12 65,25 l 27,-13 111,-251 280,301 64,-25 24,25 c 21,-10 41,-24 62,-43 C 886,937 835,863 770,784 769,783 710,716 594,584 L 774,223 c 0,-27 -21,-55 -63,-84 l 16,-20 C 717,90 699,76 672,76 641,76 570,178 457,381 L 164,-76 c -22,-34 -53,-51 -92,-51 -42,0 -63,17 -64,51 -7,9 -10,24 -10,44 0,9 1,19 2,30 z"
320 |          id="path139"
321 |          inkscape:connector-curvature="0" /></g><g
322 |        id="bullet-char-template(10004)"
323 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
324 |          d="M 285,-33 C 182,-33 111,30 74,156 52,228 41,333 41,471 c 0,78 14,145 41,201 34,71 87,106 158,106 53,0 88,-31 106,-94 l 23,-176 c 8,-64 28,-97 59,-98 l 735,706 c 11,11 33,17 66,17 42,0 63,-15 63,-46 l 0,-122 c 0,-36 -10,-64 -30,-84 L 442,47 C 390,-6 338,-33 285,-33 Z"
325 |          id="path142"
326 |          inkscape:connector-curvature="0" /></g><g
327 |        id="bullet-char-template(9679)"
328 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
329 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 c 0,181 53,324 160,431 106,107 249,161 430,161 179,0 323,-54 432,-161 108,-107 162,-251 162,-431 0,-180 -54,-324 -162,-431 C 1136,54 992,0 813,0 Z"
330 |          id="path145"
331 |          inkscape:connector-curvature="0" /></g><g
332 |        id="bullet-char-template(8226)"
333 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
334 |          d="m 346,457 c -73,0 -137,26 -191,78 -54,51 -81,114 -81,188 0,73 27,136 81,188 54,52 118,78 191,78 73,0 134,-26 185,-79 51,-51 77,-114 77,-187 0,-75 -25,-137 -76,-188 -50,-52 -112,-78 -186,-78 z"
335 |          id="path148"
336 |          inkscape:connector-curvature="0" /></g><g
337 |        id="bullet-char-template(8211)"
338 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
339 |          d="m -4,459 1139,0 0,147 -1139,0 0,-147 z"
340 |          id="path151"
341 |          inkscape:connector-curvature="0" /></g></defs><defs
342 |      class="TextEmbeddedBitmaps"
343 |      id="defs153" /><g
344 |      class="com.sun.star.drawing.CustomShape"
345 |      id="g192"
346 |      transform="translate(-3071.889,-2299.889)"><g
347 |        id="id6"><path
348 |          d="m 14389,3584 -11303,0 0,-1270 22606,0 0,1270 -11303,0 z"
349 |          id="path195"
350 |          inkscape:connector-curvature="0"
351 |          style="fill:#eeeeee;stroke:none" /><path
352 |          d="m 14389,3584 -11303,0 0,-1270 22606,0 0,1270 -11303,0 z"
353 |          id="path197"
354 |          inkscape:connector-curvature="0"
355 |          style="fill:none;stroke:#3465a4" /><text
356 |          class="TextShape"
357 |          id="text199"><tspan
358 |            class="TextParagraph"
359 |            font-size="635px"
360 |            font-weight="400"
361 |            id="tspan201"
362 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
363 |              class="TextPosition"
364 |              x="13280"
365 |              y="3170"
366 |              id="tspan203"><tspan
367 |                id="tspan205"
368 |                style="fill:#000000;stroke:none">All Data</tspan></tspan></tspan></text>
369 | </g></g><g
370 |      class="com.sun.star.drawing.CustomShape"
371 |      id="g207"
372 |      transform="translate(-3071.889,-2299.889)"><g
373 |        id="id7"><path
374 |          d="m 10706,5616 -7620,0 0,-1270 15240,0 0,1270 -7620,0 z"
375 |          id="path210"
376 |          inkscape:connector-curvature="0"
377 |          style="fill:#99ffcc;stroke:none" /><path
378 |          d="m 10706,5616 -7620,0 0,-1270 15240,0 0,1270 -7620,0 z"
379 |          id="path212"
380 |          inkscape:connector-curvature="0"
381 |          style="fill:none;stroke:#3465a4" /><text
382 |          class="TextShape"
383 |          id="text214"><tspan
384 |            class="TextParagraph"
385 |            font-size="635px"
386 |            font-weight="400"
387 |            id="tspan216"
388 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
389 |              class="TextPosition"
390 |              x="8871"
391 |              y="5202"
392 |              id="tspan218"><tspan
393 |                id="tspan220"
394 |                style="fill:#000000;stroke:none">Training data</tspan></tspan></tspan></text>
395 | </g></g><g
396 |      class="com.sun.star.drawing.CustomShape"
397 |      id="g222"
398 |      transform="translate(-3071.889,-2299.889)"><g
399 |        id="id8"><path
400 |          d="m 22136,5616 -3556,0 0,-1270 7112,0 0,1270 -3556,0 z"
401 |          id="path225"
402 |          inkscape:connector-curvature="0"
403 |          style="fill:#83caff;stroke:none" /><path
404 |          d="m 22136,5616 -3556,0 0,-1270 7112,0 0,1270 -3556,0 z"
405 |          id="path227"
406 |          inkscape:connector-curvature="0"
407 |          style="fill:none;stroke:#3465a4" /><text
408 |          class="TextShape"
409 |          id="text229"><tspan
410 |            class="TextParagraph"
411 |            font-size="635px"
412 |            font-weight="400"
413 |            id="tspan231"
414 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
415 |              class="TextPosition"
416 |              x="20849"
417 |              y="5202"
418 |              id="tspan233"><tspan
419 |                id="tspan235"
420 |                style="fill:#000000;stroke:none">Test data</tspan></tspan></tspan></text>
421 | </g></g><g
422 |      class="com.sun.star.drawing.TextShape"
423 |      id="g687"
424 |      transform="translate(-3071.889,-2299.889)"><g
425 |        id="id39" /></g></svg>


--------------------------------------------------------------------------------
/figures/train_validation_test2.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:ooo="http://xml.openoffice.org/svg/export"
  4 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  5 |    xmlns:cc="http://creativecommons.org/ns#"
  6 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  7 |    xmlns:svg="http://www.w3.org/2000/svg"
  8 |    xmlns="http://www.w3.org/2000/svg"
  9 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 10 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 11 |    version="1.2"
 12 |    width="226.34221mm"
 13 |    height="33.16222mm"
 14 |    viewBox="0 0 22634.221 3316.222"
 15 |    preserveAspectRatio="xMidYMid"
 16 |    clip-path="url(#presentation_clip_path)"
 17 |    xml:space="preserve"
 18 |    id="svg2"
 19 |    inkscape:version="0.91 r13725"
 20 |    sodipodi:docname="train_validation_test.svg"
 21 |    style="fill-rule:evenodd;stroke-width:28.22200012;stroke-linejoin:round"><metadata
 22 |      id="metadata247"><rdf:RDF><cc:Work
 23 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 24 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><sodipodi:namedview
 25 |      pagecolor="#ffffff"
 26 |      bordercolor="#666666"
 27 |      borderopacity="1"
 28 |      objecttolerance="10"
 29 |      gridtolerance="10"
 30 |      guidetolerance="10"
 31 |      inkscape:pageopacity="0"
 32 |      inkscape:pageshadow="2"
 33 |      inkscape:window-width="1547"
 34 |      inkscape:window-height="876"
 35 |      id="namedview245"
 36 |      showgrid="false"
 37 |      fit-margin-top="0"
 38 |      fit-margin-left="0"
 39 |      fit-margin-right="0"
 40 |      fit-margin-bottom="0"
 41 |      inkscape:zoom="1.45664"
 42 |      inkscape:cx="538.89202"
 43 |      inkscape:cy="196.25707"
 44 |      inkscape:window-x="53"
 45 |      inkscape:window-y="24"
 46 |      inkscape:window-maximized="1"
 47 |      inkscape:current-layer="g175" /><defs
 48 |      class="ClipPathGroup"
 49 |      id="defs4"><clipPath
 50 |        id="presentation_clip_path"
 51 |        clipPathUnits="userSpaceOnUse"><rect
 52 |          x="0"
 53 |          y="0"
 54 |          width="28000"
 55 |          height="21000"
 56 |          id="rect7" /></clipPath></defs><defs
 57 |      id="defs9"><font
 58 |        id="EmbeddedFont_1"
 59 |        horiz-adv-x="2048"
 60 |        horiz-origin-x="0"
 61 |        horiz-origin-y="0"
 62 |        vert-origin-x="45"
 63 |        vert-origin-y="90"
 64 |        vert-adv-y="90"><font-face
 65 |          font-family="Liberation Sans embedded"
 66 |          units-per-em="2048"
 67 |          font-weight="normal"
 68 |          font-style="normal"
 69 |          ascent="1852"
 70 |          descent="450"
 71 |          id="font-face12" /><missing-glyph
 72 |          horiz-adv-x="2048"
 73 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
 74 |          id="missing-glyph14" /><glyph
 75 |          unicode="x"
 76 |          horiz-adv-x="1006"
 77 |          d="M 801,0 L 510,444 217,0 23,0 408,556 41,1082 240,1082 510,661 778,1082 979,1082 612,558 1002,0 801,0 Z"
 78 |          id="glyph16" /><glyph
 79 |          unicode="v"
 80 |          horiz-adv-x="1033"
 81 |          d="M 613,0 L 400,0 7,1082 199,1082 437,378 C 442,363 447,346 454,325 460,304 466,282 473,259 480,236 486,215 492,194 497,173 502,155 506,141 510,155 515,173 522,194 528,215 534,236 541,258 548,280 555,302 562,323 569,344 575,361 580,376 L 826,1082 1017,1082 613,0 Z"
 82 |          id="glyph18" /><glyph
 83 |          unicode="u"
 84 |          horiz-adv-x="874"
 85 |          d="M 314,1082 L 314,396 C 314,343 318,299 326,264 333,229 346,200 363,179 380,157 403,142 432,133 460,124 495,119 537,119 580,119 618,127 653,142 687,157 716,178 741,207 765,235 784,270 797,312 810,353 817,401 817,455 L 817,1082 997,1082 997,231 C 997,208 997,185 998,160 998,135 998,111 999,89 1000,66 1000,47 1001,31 1002,15 1002,5 1003,0 L 833,0 C 832,3 832,12 831,27 830,42 830,59 829,78 828,97 827,116 826,136 825,155 825,172 825,185 L 822,185 C 805,154 786,125 765,100 744,75 720,53 693,36 666,18 634,4 599,-6 564,-15 523,-20 476,-20 416,-20 364,-13 321,2 278,17 242,39 214,70 186,101 166,140 153,188 140,236 133,294 133,361 L 133,1082 314,1082 Z"
 86 |          id="glyph20" /><glyph
 87 |          unicode="t"
 88 |          horiz-adv-x="531"
 89 |          d="M 554,8 C 527,1 499,-5 471,-10 442,-14 409,-16 372,-16 228,-16 156,66 156,229 L 156,951 31,951 31,1082 163,1082 216,1324 336,1324 336,1082 536,1082 536,951 336,951 336,268 C 336,216 345,180 362,159 379,138 408,127 450,127 467,127 484,128 501,131 517,134 535,137 554,141 L 554,8 Z"
 90 |          id="glyph22" /><glyph
 91 |          unicode="s"
 92 |          horiz-adv-x="901"
 93 |          d="M 950,299 C 950,248 940,203 921,164 901,124 872,91 835,64 798,37 752,16 698,2 643,-13 581,-20 511,-20 448,-20 392,-15 342,-6 291,4 247,20 209,41 171,62 139,91 114,126 88,161 69,203 57,254 L 216,285 C 231,227 263,185 311,158 359,131 426,117 511,117 550,117 585,120 618,125 650,130 678,140 701,153 724,166 743,183 756,205 769,226 775,253 775,285 775,318 767,345 752,366 737,387 715,404 688,418 661,432 628,444 589,455 550,465 507,476 460,489 417,500 374,513 331,527 288,541 250,560 216,583 181,606 153,634 132,668 111,702 100,745 100,796 100,895 135,970 206,1022 276,1073 378,1099 513,1099 632,1099 727,1078 798,1036 868,994 912,927 931,834 L 769,814 C 763,842 752,866 736,885 720,904 701,919 678,931 655,942 630,951 602,956 573,961 544,963 513,963 432,963 372,951 333,926 294,901 275,864 275,814 275,785 282,761 297,742 311,723 331,707 357,694 382,681 413,669 449,660 485,650 525,640 568,629 597,622 626,614 656,606 686,597 715,587 744,576 772,564 799,550 824,535 849,519 870,500 889,478 908,456 923,430 934,401 945,372 950,338 950,299 Z"
 94 |          id="glyph24" /><glyph
 95 |          unicode="r"
 96 |          horiz-adv-x="530"
 97 |          d="M 142,0 L 142,830 C 142,853 142,876 142,900 141,923 141,946 140,968 139,990 139,1011 138,1030 137,1049 137,1067 136,1082 L 306,1082 C 307,1067 308,1049 309,1030 310,1010 311,990 312,969 313,948 313,929 314,910 314,891 314,874 314,861 L 318,861 C 331,902 344,938 359,969 373,999 390,1024 409,1044 428,1063 451,1078 478,1088 505,1097 537,1102 575,1102 590,1102 604,1101 617,1099 630,1096 641,1094 648,1092 L 648,927 C 636,930 622,933 606,935 590,936 572,937 552,937 511,937 476,928 447,909 418,890 394,865 376,832 357,799 344,759 335,714 326,668 322,618 322,564 L 322,0 142,0 Z"
 98 |          id="glyph26" /><glyph
 99 |          unicode="o"
100 |          horiz-adv-x="980"
101 |          d="M 1053,542 C 1053,353 1011,212 928,119 845,26 724,-20 565,-20 490,-20 422,-9 363,14 304,37 254,71 213,118 172,165 140,223 119,294 97,364 86,447 86,542 86,915 248,1102 571,1102 655,1102 728,1090 789,1067 850,1044 900,1009 939,962 978,915 1006,857 1025,787 1044,717 1053,635 1053,542 Z M 864,542 C 864,626 858,695 845,750 832,805 813,848 788,881 763,914 732,937 696,950 660,963 619,969 574,969 528,969 487,962 450,949 413,935 381,912 355,879 329,846 309,802 296,747 282,692 275,624 275,542 275,458 282,389 297,334 312,279 332,235 358,202 383,169 414,146 449,133 484,120 522,113 563,113 609,113 651,120 688,133 725,146 757,168 783,201 809,234 829,278 843,333 857,388 864,458 864,542 Z"
102 |          id="glyph28" /><glyph
103 |          unicode="n"
104 |          horiz-adv-x="874"
105 |          d="M 825,0 L 825,686 C 825,739 821,783 814,818 806,853 793,882 776,904 759,925 736,941 708,950 679,959 644,963 602,963 559,963 521,956 487,941 452,926 423,904 399,876 374,847 355,812 342,771 329,729 322,681 322,627 L 322,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 334,928 353,957 374,982 395,1007 419,1029 446,1047 473,1064 505,1078 540,1088 575,1097 616,1102 663,1102 723,1102 775,1095 818,1080 861,1065 897,1043 925,1012 953,981 974,942 987,894 1000,845 1006,788 1006,721 L 1006,0 825,0 Z"
106 |          id="glyph30" /><glyph
107 |          unicode="m"
108 |          horiz-adv-x="1457"
109 |          d="M 768,0 L 768,686 C 768,739 765,783 758,818 751,853 740,882 725,904 709,925 688,941 663,950 638,959 607,963 570,963 532,963 498,956 467,941 436,926 410,904 389,876 367,847 350,812 339,771 327,729 321,681 321,627 L 321,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 333,928 350,957 369,982 388,1007 410,1029 435,1047 460,1064 488,1078 521,1088 553,1097 590,1102 633,1102 715,1102 780,1086 828,1053 875,1020 908,968 927,897 L 930,897 C 946,928 964,957 984,982 1004,1007 1027,1029 1054,1047 1081,1064 1111,1078 1144,1088 1177,1097 1215,1102 1258,1102 1313,1102 1360,1095 1400,1080 1439,1065 1472,1043 1497,1012 1522,981 1541,942 1553,894 1565,845 1571,788 1571,721 L 1571,0 1393,0 1393,686 C 1393,739 1390,783 1383,818 1376,853 1365,882 1350,904 1334,925 1313,941 1288,950 1263,959 1232,963 1195,963 1157,963 1123,956 1092,942 1061,927 1035,906 1014,878 992,850 975,815 964,773 952,731 946,682 946,627 L 946,0 768,0 Z"
110 |          id="glyph32" /><glyph
111 |          unicode="l"
112 |          horiz-adv-x="187"
113 |          d="M 138,0 L 138,1484 318,1484 318,0 138,0 Z"
114 |          id="glyph34" /><glyph
115 |          unicode="k"
116 |          horiz-adv-x="901"
117 |          d="M 816,0 L 450,494 318,385 318,0 138,0 138,1484 318,1484 318,557 793,1082 1004,1082 565,617 1027,0 816,0 Z"
118 |          id="glyph36" /><glyph
119 |          unicode="i"
120 |          horiz-adv-x="187"
121 |          d="M 137,1312 L 137,1484 317,1484 317,1312 137,1312 Z M 137,0 L 137,1082 317,1082 317,0 137,0 Z"
122 |          id="glyph38" /><glyph
123 |          unicode="h"
124 |          horiz-adv-x="874"
125 |          d="M 317,897 C 337,934 359,965 382,991 405,1016 431,1037 459,1054 487,1071 518,1083 551,1091 584,1098 622,1102 663,1102 732,1102 789,1093 834,1074 878,1055 913,1029 939,996 964,962 982,922 992,875 1001,828 1006,777 1006,721 L 1006,0 825,0 825,686 C 825,732 822,772 817,807 811,842 800,871 784,894 768,917 745,934 716,946 687,957 649,963 602,963 559,963 521,955 487,940 452,925 423,903 399,875 374,847 355,813 342,773 329,733 322,688 322,638 L 322,0 142,0 142,1484 322,1484 322,1098 C 322,1076 322,1054 321,1032 320,1010 320,990 319,971 318,952 317,937 316,924 315,911 315,902 314,897 L 317,897 Z"
126 |          id="glyph40" /><glyph
127 |          unicode="g"
128 |          horiz-adv-x="927"
129 |          d="M 548,-425 C 486,-425 431,-419 383,-406 335,-393 294,-375 260,-352 226,-328 198,-300 177,-267 156,-234 140,-198 131,-158 L 312,-132 C 324,-182 351,-220 392,-248 433,-274 486,-288 553,-288 594,-288 631,-282 664,-271 697,-260 726,-241 749,-217 772,-191 790,-159 803,-119 816,-79 822,-30 822,27 L 822,201 820,201 C 807,174 790,148 771,123 751,98 727,75 699,56 670,37 637,21 600,10 563,-2 520,-8 472,-8 403,-8 345,4 296,27 247,50 207,84 176,130 145,176 122,233 108,302 93,370 86,449 86,539 86,626 93,704 108,773 122,842 145,901 178,950 210,998 252,1035 304,1061 355,1086 418,1099 492,1099 569,1099 635,1082 692,1047 748,1012 791,962 822,897 L 824,897 C 824,914 825,932 826,953 827,974 828,993 829,1012 830,1030 831,1046 832,1059 833,1072 835,1080 836,1082 L 1007,1082 C 1006,1076 1006,1066 1005,1052 1004,1037 1004,1020 1003,1000 1002,980 1002,958 1002,934 1001,909 1001,884 1001,858 L 1001,31 C 1001,-120 964,-234 890,-311 815,-387 701,-425 548,-425 Z M 822,541 C 822,616 814,681 798,735 781,788 760,832 733,866 706,900 676,925 642,941 607,957 572,965 536,965 490,965 451,957 418,941 385,925 357,900 336,866 314,831 298,787 288,734 277,680 272,616 272,541 272,463 277,398 288,345 298,292 314,249 335,216 356,183 383,160 416,146 449,132 488,125 533,125 569,125 604,133 639,148 673,163 704,188 731,221 758,254 780,297 797,350 814,403 822,466 822,541 Z"
130 |          id="glyph42" /><glyph
131 |          unicode="f"
132 |          horiz-adv-x="557"
133 |          d="M 361,951 L 361,0 181,0 181,951 29,951 29,1082 181,1082 181,1204 C 181,1243 185,1280 192,1314 199,1347 213,1377 233,1402 252,1427 279,1446 313,1461 347,1475 391,1482 445,1482 466,1482 489,1481 512,1479 535,1477 555,1474 572,1470 L 572,1333 C 561,1335 548,1337 533,1339 518,1340 504,1341 492,1341 465,1341 444,1337 427,1330 410,1323 396,1312 387,1299 377,1285 370,1268 367,1248 363,1228 361,1205 361,1179 L 361,1082 572,1082 572,951 361,951 Z"
134 |          id="glyph44" /><glyph
135 |          unicode="e"
136 |          horiz-adv-x="980"
137 |          d="M 276,503 C 276,446 282,394 294,347 305,299 323,258 348,224 372,189 403,163 441,144 479,125 525,115 578,115 656,115 719,131 766,162 813,193 844,233 861,281 L 1019,236 C 1008,206 992,176 972,146 951,115 924,88 890,64 856,39 814,19 763,4 712,-12 650,-20 578,-20 418,-20 296,28 213,123 129,218 87,360 87,548 87,649 100,735 125,806 150,876 185,933 229,977 273,1021 324,1053 383,1073 442,1092 504,1102 571,1102 662,1102 738,1087 799,1058 860,1029 909,988 946,937 983,885 1009,824 1025,754 1040,684 1048,608 1048,527 L 1048,503 276,503 Z M 862,641 C 852,755 823,838 775,891 727,943 658,969 568,969 538,969 507,964 474,955 441,945 410,928 382,903 354,878 330,845 311,803 292,760 281,706 278,641 L 862,641 Z"
138 |          id="glyph46" /><glyph
139 |          unicode="d"
140 |          horiz-adv-x="927"
141 |          d="M 821,174 C 788,105 744,55 689,25 634,-5 565,-20 484,-20 347,-20 247,26 183,118 118,210 86,349 86,536 86,913 219,1102 484,1102 566,1102 634,1087 689,1057 744,1027 788,979 821,914 L 823,914 C 823,921 823,931 823,946 822,960 822,975 822,991 821,1006 821,1021 821,1035 821,1049 821,1059 821,1065 L 821,1484 1001,1484 1001,223 C 1001,197 1001,172 1002,148 1002,124 1002,102 1003,82 1004,62 1004,45 1005,31 1006,16 1006,6 1007,0 L 835,0 C 834,7 833,16 832,29 831,41 830,55 829,71 828,87 827,104 826,122 825,139 825,157 825,174 L 821,174 Z M 275,542 C 275,467 280,403 289,350 298,297 313,253 334,219 355,184 381,159 413,143 445,127 484,119 530,119 577,119 619,127 656,142 692,157 722,182 747,217 771,251 789,296 802,351 815,406 821,474 821,554 821,631 815,696 802,749 789,802 771,844 746,877 721,910 691,933 656,948 620,962 579,969 532,969 488,969 450,961 418,946 386,931 359,906 338,872 317,838 301,794 291,740 280,685 275,619 275,542 Z"
142 |          id="glyph48" /><glyph
143 |          unicode="c"
144 |          horiz-adv-x="901"
145 |          d="M 275,546 C 275,484 280,427 289,375 298,323 313,278 334,241 355,203 384,174 419,153 454,132 497,122 548,122 612,122 666,139 709,173 752,206 778,258 788,328 L 970,328 C 964,283 951,239 931,197 911,155 884,118 850,86 815,54 773,28 724,9 675,-10 618,-20 553,-20 468,-20 396,-6 337,23 278,52 230,91 193,142 156,192 129,251 112,320 95,388 87,462 87,542 87,615 93,679 105,735 117,790 134,839 156,881 177,922 203,957 232,986 261,1014 293,1037 328,1054 362,1071 398,1083 436,1091 474,1098 512,1102 551,1102 612,1102 666,1094 713,1077 760,1060 801,1038 836,1009 870,980 898,945 919,906 940,867 955,824 964,779 L 779,765 C 770,825 746,873 708,908 670,943 616,961 546,961 495,961 452,953 418,936 383,919 355,893 334,859 313,824 298,781 289,729 280,677 275,616 275,546 Z"
146 |          id="glyph50" /><glyph
147 |          unicode="a"
148 |          horiz-adv-x="1060"
149 |          d="M 414,-20 C 305,-20 224,9 169,66 114,124 87,203 87,303 87,375 101,434 128,480 155,526 190,562 234,588 277,614 327,632 383,642 439,652 496,657 554,657 L 797,657 797,717 C 797,762 792,800 783,832 774,863 759,889 740,908 721,928 697,942 668,951 639,960 604,965 565,965 530,965 499,963 471,958 443,953 419,944 398,931 377,918 361,900 348,878 335,855 327,827 323,793 L 135,810 C 142,853 154,892 173,928 192,963 218,994 253,1020 287,1046 330,1066 382,1081 433,1095 496,1102 569,1102 705,1102 807,1071 876,1009 945,946 979,856 979,738 L 979,272 C 979,219 986,179 1000,152 1014,125 1041,111 1080,111 1090,111 1100,112 1110,113 1120,114 1130,116 1139,118 L 1139,6 C 1116,1 1094,-3 1072,-6 1049,-9 1025,-10 1000,-10 966,-10 937,-5 913,4 888,13 868,26 853,45 838,63 826,86 818,113 810,140 805,171 803,207 L 797,207 C 778,172 757,141 734,113 711,85 684,61 653,42 622,22 588,7 549,-4 510,-15 465,-20 414,-20 Z M 455,115 C 512,115 563,125 606,146 649,167 684,194 713,226 741,259 762,294 776,332 790,371 797,408 797,443 L 797,531 600,531 C 556,531 514,528 475,522 435,517 400,506 370,489 340,472 316,449 299,418 281,388 272,349 272,300 272,241 288,195 320,163 351,131 396,115 455,115 Z"
150 |          id="glyph52" /><glyph
151 |          unicode="V"
152 |          horiz-adv-x="1377"
153 |          d="M 782,0 L 584,0 9,1409 210,1409 600,417 C 610,387 620,357 630,328 640,298 649,271 657,248 666,221 675,194 684,168 692,193 701,219 710,246 718,269 727,296 737,325 746,354 757,385 768,417 L 1156,1409 1357,1409 782,0 Z"
154 |          id="glyph54" /><glyph
155 |          unicode="T"
156 |          horiz-adv-x="1192"
157 |          d="M 720,1253 L 720,0 530,0 530,1253 46,1253 46,1409 1204,1409 1204,1253 720,1253 Z"
158 |          id="glyph56" /><glyph
159 |          unicode="S"
160 |          horiz-adv-x="1192"
161 |          d="M 1272,389 C 1272,330 1261,275 1238,225 1215,175 1179,132 1131,96 1083,59 1023,31 950,11 877,-10 790,-20 690,-20 515,-20 378,11 280,72 182,133 120,222 93,338 L 278,375 C 287,338 302,305 321,275 340,245 367,219 400,198 433,176 473,159 522,147 571,135 629,129 697,129 754,129 806,134 853,144 900,153 941,168 975,188 1009,208 1036,234 1055,266 1074,297 1083,335 1083,379 1083,425 1073,462 1052,491 1031,520 1001,543 963,562 925,581 880,596 827,609 774,622 716,635 652,650 613,659 573,668 534,679 494,689 456,701 420,716 383,730 349,747 317,766 285,785 257,809 234,836 211,863 192,894 179,930 166,965 159,1006 159,1053 159,1120 173,1177 200,1225 227,1272 264,1311 312,1342 360,1373 417,1395 482,1409 547,1423 618,1430 694,1430 781,1430 856,1423 918,1410 980,1396 1032,1375 1075,1348 1118,1321 1152,1287 1178,1247 1203,1206 1224,1159 1239,1106 L 1051,1073 C 1042,1107 1028,1137 1011,1164 993,1191 970,1213 941,1231 912,1249 878,1263 837,1272 796,1281 747,1286 692,1286 627,1286 572,1280 528,1269 483,1257 448,1241 421,1221 394,1201 374,1178 363,1151 351,1124 345,1094 345,1063 345,1021 356,987 377,960 398,933 426,910 462,892 498,874 540,859 587,847 634,835 685,823 738,811 781,801 825,791 868,781 911,770 952,758 991,744 1030,729 1067,712 1102,693 1136,674 1166,650 1191,622 1216,594 1236,561 1251,523 1265,485 1272,440 1272,389 Z"
162 |          id="glyph58" /><glyph
163 |          unicode="O"
164 |          horiz-adv-x="1430"
165 |          d="M 1495,711 C 1495,601 1479,501 1448,411 1416,321 1370,244 1310,180 1250,116 1177,67 1090,32 1003,-3 905,-20 795,-20 679,-20 577,-2 490,35 403,71 330,122 272,187 214,252 170,329 141,418 112,507 97,605 97,711 97,821 112,920 143,1009 174,1098 219,1173 278,1236 337,1298 411,1346 498,1380 585,1413 684,1430 797,1430 909,1430 1009,1413 1096,1379 1183,1345 1256,1297 1315,1234 1374,1171 1418,1096 1449,1007 1480,918 1495,820 1495,711 Z M 1300,711 C 1300,796 1289,873 1268,942 1246,1011 1214,1071 1172,1120 1129,1169 1077,1207 1014,1234 951,1261 879,1274 797,1274 713,1274 639,1261 576,1234 513,1207 460,1169 418,1120 375,1071 344,1011 323,942 302,873 291,796 291,711 291,626 302,549 324,479 345,408 377,348 420,297 462,246 515,206 578,178 641,149 713,135 795,135 883,135 959,149 1023,178 1086,207 1139,247 1180,298 1221,349 1251,409 1271,480 1290,551 1300,628 1300,711 Z"
166 |          id="glyph60" /><glyph
167 |          unicode="L"
168 |          horiz-adv-x="927"
169 |          d="M 168,0 L 168,1409 359,1409 359,156 1071,156 1071,0 168,0 Z"
170 |          id="glyph62" /><glyph
171 |          unicode="F"
172 |          horiz-adv-x="1006"
173 |          d="M 359,1253 L 359,729 1145,729 1145,571 359,571 359,0 168,0 168,1409 1169,1409 1169,1253 359,1253 Z"
174 |          id="glyph64" /><glyph
175 |          unicode="D"
176 |          horiz-adv-x="1218"
177 |          d="M 1381,719 C 1381,602 1363,498 1328,409 1293,319 1244,244 1183,184 1122,123 1049,78 966,47 882,16 792,0 695,0 L 168,0 168,1409 634,1409 C 743,1409 843,1396 935,1369 1026,1342 1105,1300 1171,1244 1237,1187 1289,1116 1326,1029 1363,942 1381,839 1381,719 Z M 1189,719 C 1189,814 1175,896 1148,964 1121,1031 1082,1087 1033,1130 984,1173 925,1205 856,1226 787,1246 712,1256 630,1256 L 359,1256 359,153 673,153 C 747,153 816,165 879,189 942,213 996,249 1042,296 1088,343 1124,402 1150,473 1176,544 1189,626 1189,719 Z"
178 |          id="glyph66" /><glyph
179 |          unicode="C"
180 |          horiz-adv-x="1324"
181 |          d="M 792,1274 C 712,1274 641,1261 580,1234 518,1207 466,1169 425,1120 383,1071 351,1011 330,942 309,873 298,796 298,711 298,626 310,549 333,479 356,408 389,348 432,297 475,246 527,207 590,179 652,151 722,137 800,137 855,137 905,144 950,159 995,173 1035,193 1072,219 1108,245 1140,276 1169,312 1198,347 1223,387 1245,430 L 1401,352 C 1376,299 1344,250 1307,205 1270,160 1226,120 1176,87 1125,54 1068,28 1005,9 941,-10 870,-20 791,-20 677,-20 577,-2 492,35 406,71 334,122 277,187 219,252 176,329 147,418 118,507 104,605 104,711 104,821 119,920 150,1009 180,1098 224,1173 283,1236 341,1298 413,1346 498,1380 583,1413 681,1430 790,1430 940,1430 1065,1401 1166,1342 1267,1283 1341,1196 1388,1081 L 1207,1021 C 1194,1054 1176,1086 1153,1117 1130,1147 1102,1174 1068,1197 1034,1220 994,1239 949,1253 903,1267 851,1274 792,1274 Z"
182 |          id="glyph68" /><glyph
183 |          unicode="A"
184 |          horiz-adv-x="1377"
185 |          d="M 1167,0 L 1006,412 364,412 202,0 4,0 579,1409 796,1409 1362,0 1167,0 Z M 768,1026 C 757,1053 747,1080 738,1107 728,1134 719,1159 712,1182 705,1204 699,1223 694,1238 689,1253 686,1262 685,1265 684,1262 681,1252 676,1237 671,1222 665,1203 658,1180 650,1157 641,1132 632,1105 622,1078 612,1051 602,1024 L 422,561 949,561 768,1026 Z"
186 |          id="glyph70" /><glyph
187 |          unicode=" "
188 |          horiz-adv-x="556"
189 |          id="glyph72" /></font></defs><defs
190 |      id="defs74"><font
191 |        id="EmbeddedFont_2"
192 |        horiz-adv-x="2048"
193 |        horiz-origin-x="0"
194 |        horiz-origin-y="0"
195 |        vert-origin-x="45"
196 |        vert-origin-y="90"
197 |        vert-adv-y="90"><font-face
198 |          font-family="Liberation Serif embedded"
199 |          units-per-em="2048"
200 |          font-weight="normal"
201 |          font-style="normal"
202 |          ascent="1826"
203 |          descent="450"
204 |          id="font-face77" /><missing-glyph
205 |          horiz-adv-x="2048"
206 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
207 |          id="missing-glyph79" /><glyph
208 |          unicode="t"
209 |          horiz-adv-x="557"
210 |          d="M 334,-20 C 270,-20 222,-1 191,37 159,75 143,128 143,197 L 143,856 20,856 20,901 145,940 246,1153 309,1153 309,940 524,940 524,856 309,856 309,215 C 309,172 319,139 339,117 358,95 384,84 416,84 441,84 465,86 490,89 514,92 536,96 557,100 L 557,35 C 547,28 534,22 518,15 501,8 483,3 464,-3 444,-7 423,-12 401,-15 379,-18 357,-20 334,-20 Z"
211 |          id="glyph81" /><glyph
212 |          unicode="r"
213 |          horiz-adv-x="636"
214 |          d="M 664,965 L 664,711 621,711 563,821 C 544,821 524,820 503,817 482,814 460,811 439,807 418,802 397,797 378,791 358,785 341,779 326,772 L 326,70 487,45 487,0 41,0 41,45 160,70 160,870 41,895 41,940 315,940 324,823 C 339,836 360,850 387,867 414,883 443,898 474,913 505,928 536,940 567,950 598,960 625,965 649,965 L 664,965 Z"
215 |          id="glyph83" /><glyph
216 |          unicode="o"
217 |          horiz-adv-x="901"
218 |          d="M 946,475 C 946,316 910,193 839,108 768,23 657,-20 506,-20 365,-20 258,22 186,107 114,192 78,314 78,475 78,634 114,755 186,839 258,923 367,965 514,965 657,965 764,924 837,842 910,759 946,637 946,475 Z M 766,475 C 766,540 762,598 753,649 744,700 730,743 710,778 689,813 662,839 629,858 596,876 555,885 506,885 457,885 416,876 384,858 352,839 327,813 308,778 289,743 276,700 269,649 262,598 258,540 258,475 258,410 262,351 269,300 276,249 289,205 308,170 327,134 352,107 384,88 416,69 457,59 506,59 555,59 596,69 629,88 662,107 689,134 710,170 730,205 744,249 753,300 762,351 766,410 766,475 Z"
219 |          id="glyph85" /><glyph
220 |          unicode="f"
221 |          horiz-adv-x="636"
222 |          d="M 225,856 L 63,856 63,905 225,944 225,1010 C 225,1081 232,1143 247,1197 261,1250 282,1295 309,1332 336,1368 369,1395 408,1414 447,1433 490,1442 539,1442 569,1442 596,1440 619,1437 642,1433 663,1428 682,1423 L 682,1218 633,1218 588,1341 C 577,1348 566,1353 553,1357 540,1360 525,1362 506,1362 483,1362 464,1357 449,1347 434,1336 423,1320 414,1299 405,1277 399,1249 396,1216 393,1183 391,1143 391,1096 L 391,940 641,940 641,856 391,856 391,78 594,45 594,0 86,0 86,45 225,78 225,856 Z"
223 |          id="glyph87" /><glyph
224 |          unicode="e"
225 |          horiz-adv-x="769"
226 |          d="M 260,473 L 260,455 C 260,406 264,360 271,315 278,270 292,231 313,197 334,162 363,135 401,115 439,94 489,84 551,84 571,84 592,85 614,87 636,88 658,90 680,93 702,96 723,99 744,102 765,105 784,109 801,113 L 801,57 C 786,47 767,38 746,29 724,20 700,11 674,4 648,-3 620,-9 591,-14 562,-18 532,-20 502,-20 424,-20 358,-9 305,12 251,33 207,65 174,107 141,149 117,201 102,263 87,325 80,396 80,477 80,641 114,763 183,844 252,925 350,965 477,965 527,965 574,958 618,945 661,932 699,909 732,878 765,847 791,805 810,752 829,699 838,634 838,555 L 838,473 260,473 Z M 477,885 C 440,885 408,877 381,862 354,846 331,824 314,795 296,766 283,732 275,691 266,650 262,604 262,553 L 664,553 C 664,604 661,650 656,691 650,732 640,766 626,795 611,824 592,846 568,862 544,877 514,885 477,885 Z"
227 |          id="glyph89" /><glyph
228 |          unicode="&gt;"
229 |          horiz-adv-x="980"
230 |          d="M 104,186 L 104,289 913,680 104,1071 104,1174 1057,705 1057,655 104,186 Z"
231 |          id="glyph91" /><glyph
232 |          unicode="&lt;"
233 |          horiz-adv-x="980"
234 |          d="M 102,655 L 102,705 1055,1174 1055,1071 246,680 1055,289 1055,186 102,655 Z"
235 |          id="glyph93" /><glyph
236 |          unicode="7"
237 |          horiz-adv-x="848"
238 |          d="M 201,1024 L 135,1024 135,1341 965,1341 965,1264 367,0 238,0 825,1188 236,1188 201,1024 Z"
239 |          id="glyph95" /><glyph
240 |          unicode="1"
241 |          horiz-adv-x="742"
242 |          d="M 627,80 L 901,53 901,0 180,0 180,53 455,80 455,1174 184,1077 184,1130 575,1352 627,1352 627,80 Z"
243 |          id="glyph97" /></font></defs><defs
244 |      id="defs99"><font
245 |        id="EmbeddedFont_3"
246 |        horiz-adv-x="2048"
247 |        horiz-origin-x="0"
248 |        horiz-origin-y="0"
249 |        vert-origin-x="45"
250 |        vert-origin-y="90"
251 |        vert-adv-y="90"><font-face
252 |          font-family="StarSymbol embedded"
253 |          units-per-em="2048"
254 |          font-weight="normal"
255 |          font-style="normal"
256 |          ascent="1879"
257 |          descent="661"
258 |          id="font-face102" /><missing-glyph
259 |          horiz-adv-x="2048"
260 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
261 |          id="missing-glyph104" /><glyph
262 |          unicode="●"
263 |          horiz-adv-x="1191"
264 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 223,773 276,916 383,1023 489,1130 632,1184 813,1184 992,1184 1136,1130 1245,1023 1353,916 1407,772 1407,592 1407,412 1353,268 1245,161 1136,54 992,0 813,0 Z"
265 |          id="glyph106" /><glyph
266 |          unicode="–"
267 |          horiz-adv-x="1165"
268 |          d="M -4,459 L 1135,459 1135,606 -4,606 -4,459 Z"
269 |          id="glyph108" /></font></defs><defs
270 |      class="TextShapeIndex"
271 |      id="defs110"><g
272 |        ooo:slide="id1"
273 |        ooo:id-list="id6 id7 id8 id9 id10"
274 |        id="g112" /></defs><defs
275 |      class="EmbeddedBulletChars"
276 |      id="defs114"><g
277 |        id="bullet-char-template(57356)"
278 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
279 |          d="M 580,1141 1163,571 580,0 -4,571 580,1141 Z"
280 |          id="path117"
281 |          inkscape:connector-curvature="0" /></g><g
282 |        id="bullet-char-template(57354)"
283 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
284 |          d="m 8,1128 1129,0 L 1137,0 8,0 8,1128 Z"
285 |          id="path120"
286 |          inkscape:connector-curvature="0" /></g><g
287 |        id="bullet-char-template(10146)"
288 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
289 |          d="M 174,0 602,739 174,1481 1456,739 174,0 Z m 1184,739 -1049,607 350,-607 699,0 z"
290 |          id="path123"
291 |          inkscape:connector-curvature="0" /></g><g
292 |        id="bullet-char-template(10132)"
293 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
294 |          d="M 2015,739 1276,0 717,0 l 543,543 -1086,0 0,393 1086,0 -543,545 557,0 741,-742 z"
295 |          id="path126"
296 |          inkscape:connector-curvature="0" /></g><g
297 |        id="bullet-char-template(10007)"
298 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
299 |          d="m 0,-2 c -7,16 -16,29 -25,39 l 381,530 c -94,256 -141,385 -141,387 0,25 13,38 40,38 9,0 21,-2 34,-5 21,4 42,12 65,25 l 27,-13 111,-251 280,301 64,-25 24,25 c 21,-10 41,-24 62,-43 C 886,937 835,863 770,784 769,783 710,716 594,584 L 774,223 c 0,-27 -21,-55 -63,-84 l 16,-20 C 717,90 699,76 672,76 641,76 570,178 457,381 L 164,-76 c -22,-34 -53,-51 -92,-51 -42,0 -63,17 -64,51 -7,9 -10,24 -10,44 0,9 1,19 2,30 z"
300 |          id="path129"
301 |          inkscape:connector-curvature="0" /></g><g
302 |        id="bullet-char-template(10004)"
303 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
304 |          d="M 285,-33 C 182,-33 111,30 74,156 52,228 41,333 41,471 c 0,78 14,145 41,201 34,71 87,106 158,106 53,0 88,-31 106,-94 l 23,-176 c 8,-64 28,-97 59,-98 l 735,706 c 11,11 33,17 66,17 42,0 63,-15 63,-46 l 0,-122 c 0,-36 -10,-64 -30,-84 L 442,47 C 390,-6 338,-33 285,-33 Z"
305 |          id="path132"
306 |          inkscape:connector-curvature="0" /></g><g
307 |        id="bullet-char-template(9679)"
308 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
309 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 c 0,181 53,324 160,431 106,107 249,161 430,161 179,0 323,-54 432,-161 108,-107 162,-251 162,-431 0,-180 -54,-324 -162,-431 C 1136,54 992,0 813,0 Z"
310 |          id="path135"
311 |          inkscape:connector-curvature="0" /></g><g
312 |        id="bullet-char-template(8226)"
313 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
314 |          d="m 346,457 c -73,0 -137,26 -191,78 -54,51 -81,114 -81,188 0,73 27,136 81,188 54,52 118,78 191,78 73,0 134,-26 185,-79 51,-51 77,-114 77,-187 0,-75 -25,-137 -76,-188 -50,-52 -112,-78 -186,-78 z"
315 |          id="path138"
316 |          inkscape:connector-curvature="0" /></g><g
317 |        id="bullet-char-template(8211)"
318 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
319 |          d="m -4,459 1139,0 0,147 -1139,0 0,-147 z"
320 |          id="path141"
321 |          inkscape:connector-curvature="0" /></g></defs><defs
322 |      class="TextEmbeddedBitmaps"
323 |      id="defs143" /><g
324 |      class="SlideGroup"
325 |      id="g175"
326 |      transform="translate(-3071.889,-2299.889)"><g
327 |        id="g182"
328 |        class="com.sun.star.drawing.CustomShape"><g
329 |          id="id6"><path
330 |            style="fill:#eeeeee;stroke:none"
331 |            inkscape:connector-curvature="0"
332 |            id="path185"
333 |            d="m 14389,3584 -11303,0 0,-1270 22606,0 0,1270 -11303,0 z" /><path
334 |            style="fill:none;stroke:#3465a4"
335 |            inkscape:connector-curvature="0"
336 |            id="path187"
337 |            d="m 14389,3584 -11303,0 0,-1270 22606,0 0,1270 -11303,0 z" /><text
338 |            id="text189"
339 |            class="TextShape"><tspan
340 |              style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"
341 |              id="tspan191"
342 |              font-weight="400"
343 |              font-size="635px"
344 |              class="TextParagraph"><tspan
345 |                id="tspan193"
346 |                y="3170"
347 |                x="13280"
348 |                class="TextPosition"><tspan
349 |                  style="fill:#000000;stroke:none"
350 |                  id="tspan195">All Data</tspan></tspan></tspan></text>
351 | </g></g><path
352 |        style="fill:#99ffcc;stroke:none"
353 |        inkscape:connector-curvature="0"
354 |        id="path200"
355 |        d="m 10232.251,5601.8208 -7146.0718,0 0,-1269.6416 14292.1418,0 0,1269.6416 -7146.07,0 z" /><path
356 |        style="fill:none;stroke:#3465a4;stroke-width:28.58041"
357 |        inkscape:connector-curvature="0"
358 |        id="path202"
359 |        d="m 10232.251,5601.8208 -7146.0718,0 0,-1269.6416 14292.1418,0 0,1269.6416 -7146.07,0 z" /><text
360 |        id="text204"
361 |        class="TextShape"><tspan
362 |          style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"
363 |          id="tspan206"
364 |          font-weight="400"
365 |          font-size="635px"
366 |          class="TextParagraph"><tspan
367 |            id="tspan208"
368 |            y="5188"
369 |            x="8922"
370 |            class="TextPosition"><tspan
371 |              style="fill:#000000;stroke:none"
372 |              id="tspan210">Training</tspan></tspan></tspan></text>
373 | <g
374 |        id="g212"
375 |        class="com.sun.star.drawing.CustomShape"><g
376 |          id="id8"><path
377 |            style="fill:#83caff;stroke:none"
378 |            inkscape:connector-curvature="0"
379 |            id="path215"
380 |            d="m 23514,5602 -2178,0 0,-1270 4356,0 0,1270 -2178,0 z" /><path
381 |            style="fill:none;stroke:#3465a4"
382 |            inkscape:connector-curvature="0"
383 |            id="path217"
384 |            d="m 23514,5602 -2178,0 0,-1270 4356,0 0,1270 -2178,0 z" /><text
385 |            id="text219"
386 |            class="TextShape"><tspan
387 |              style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"
388 |              id="tspan221"
389 |              font-weight="400"
390 |              font-size="635px"
391 |              class="TextParagraph"><tspan
392 |                id="tspan223"
393 |                y="5188"
394 |                x="22932"
395 |                class="TextPosition"><tspan
396 |                  style="fill:#000000;stroke:none"
397 |                  id="tspan225">Test</tspan></tspan></tspan></text>
398 | </g></g><g
399 |        id="g227"
400 |        class="com.sun.star.drawing.TextShape"><g
401 |          id="id9" /></g><path
402 |        style="fill:#80ffe6;stroke:none"
403 |        inkscape:connector-curvature="0"
404 |        id="path233"
405 |        d="m 19369.802,5602 -1734.802,0 0,-1270 3469.603,0 0,1270 -1734.801,0 z" /><path
406 |        style="fill:none;stroke:#3465a4;stroke-width:29.89772224"
407 |        inkscape:connector-curvature="0"
408 |        id="path235"
409 |        d="m 19369.802,5601.1621 -1733.964,0 0,-1268.3242 3467.927,0 0,1268.3242 -1733.963,0 z" /><text
410 |        id="text237"
411 |        class="TextShape"><tspan
412 |          style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"
413 |          id="tspan239"
414 |          font-weight="400"
415 |          font-size="635px"
416 |          class="TextParagraph"><tspan
417 |            id="tspan241"
418 |            y="5188"
419 |            x="17815"
420 |            class="TextPosition"><tspan
421 |              style="fill:#000000;stroke:none"
422 |              id="tspan243">Validation</tspan></tspan></tspan></text>
423 | </g></svg>


--------------------------------------------------------------------------------
/figures/unsupervised_workflow.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:ooo="http://xml.openoffice.org/svg/export"
  4 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  5 |    xmlns:cc="http://creativecommons.org/ns#"
  6 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  7 |    xmlns:svg="http://www.w3.org/2000/svg"
  8 |    xmlns="http://www.w3.org/2000/svg"
  9 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 10 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 11 |    version="1.2"
 12 |    width="157.76999mm"
 13 |    height="87.419998mm"
 14 |    viewBox="0 0 15776.999 8741.9998"
 15 |    preserveAspectRatio="xMidYMid"
 16 |    clip-path="url(#presentation_clip_path)"
 17 |    xml:space="preserve"
 18 |    id="svg2"
 19 |    inkscape:version="0.91 r13725"
 20 |    sodipodi:docname="unsupervised_workflow.svg"
 21 |    style="fill-rule:evenodd;stroke-width:28.22200012;stroke-linejoin:round"><metadata
 22 |      id="metadata286"><rdf:RDF><cc:Work
 23 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 24 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><sodipodi:namedview
 25 |      pagecolor="#ffffff"
 26 |      bordercolor="#666666"
 27 |      borderopacity="1"
 28 |      objecttolerance="10"
 29 |      gridtolerance="10"
 30 |      guidetolerance="10"
 31 |      inkscape:pageopacity="0"
 32 |      inkscape:pageshadow="2"
 33 |      inkscape:window-width="1867"
 34 |      inkscape:window-height="1056"
 35 |      id="namedview284"
 36 |      showgrid="false"
 37 |      fit-margin-top="0"
 38 |      fit-margin-left="0"
 39 |      fit-margin-right="0"
 40 |      fit-margin-bottom="0"
 41 |      inkscape:zoom="0.29935714"
 42 |      inkscape:cx="434.09055"
 43 |      inkscape:cy="153.70863"
 44 |      inkscape:window-x="53"
 45 |      inkscape:window-y="24"
 46 |      inkscape:window-maximized="1"
 47 |      inkscape:current-layer="svg2" /><defs
 48 |      class="ClipPathGroup"
 49 |      id="defs4"><clipPath
 50 |        id="presentation_clip_path"
 51 |        clipPathUnits="userSpaceOnUse"><rect
 52 |          x="0"
 53 |          y="0"
 54 |          width="28000"
 55 |          height="21000"
 56 |          id="rect7" /></clipPath></defs><defs
 57 |      id="defs9"><font
 58 |        id="EmbeddedFont_1"
 59 |        horiz-adv-x="2048"
 60 |        horiz-origin-x="0"
 61 |        horiz-origin-y="0"
 62 |        vert-origin-x="45"
 63 |        vert-origin-y="90"
 64 |        vert-adv-y="90"><font-face
 65 |          font-family="Liberation Sans embedded"
 66 |          units-per-em="2048"
 67 |          font-weight="normal"
 68 |          font-style="normal"
 69 |          ascent="1852"
 70 |          descent="450"
 71 |          id="font-face12" /><missing-glyph
 72 |          horiz-adv-x="2048"
 73 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
 74 |          id="missing-glyph14" /><glyph
 75 |          unicode="x"
 76 |          horiz-adv-x="1006"
 77 |          d="M 801,0 L 510,444 217,0 23,0 408,556 41,1082 240,1082 510,661 778,1082 979,1082 612,558 1002,0 801,0 Z"
 78 |          id="glyph16" /><glyph
 79 |          unicode="w"
 80 |          horiz-adv-x="1509"
 81 |          d="M 1174,0 L 965,0 792,698 C 787,716 781,738 776,765 770,792 764,818 759,843 752,872 746,903 740,934 734,904 728,874 721,845 716,820 710,793 704,766 697,739 691,715 686,694 L 508,0 300,0 -3,1082 175,1082 358,347 C 363,332 367,313 372,291 377,268 381,246 386,225 391,200 396,175 401,149 406,174 412,199 418,223 423,244 429,265 434,286 439,307 444,325 448,339 L 644,1082 837,1082 1026,339 C 1031,322 1036,302 1041,280 1046,258 1051,237 1056,218 1061,195 1067,172 1072,149 1077,174 1083,199 1088,223 1093,244 1098,265 1103,288 1108,310 1112,330 1117,347 L 1308,1082 1484,1082 1174,0 Z"
 82 |          id="glyph18" /><glyph
 83 |          unicode="v"
 84 |          horiz-adv-x="1033"
 85 |          d="M 613,0 L 400,0 7,1082 199,1082 437,378 C 442,363 447,346 454,325 460,304 466,282 473,259 480,236 486,215 492,194 497,173 502,155 506,141 510,155 515,173 522,194 528,215 534,236 541,258 548,280 555,302 562,323 569,344 575,361 580,376 L 826,1082 1017,1082 613,0 Z"
 86 |          id="glyph20" /><glyph
 87 |          unicode="u"
 88 |          horiz-adv-x="874"
 89 |          d="M 314,1082 L 314,396 C 314,343 318,299 326,264 333,229 346,200 363,179 380,157 403,142 432,133 460,124 495,119 537,119 580,119 618,127 653,142 687,157 716,178 741,207 765,235 784,270 797,312 810,353 817,401 817,455 L 817,1082 997,1082 997,231 C 997,208 997,185 998,160 998,135 998,111 999,89 1000,66 1000,47 1001,31 1002,15 1002,5 1003,0 L 833,0 C 832,3 832,12 831,27 830,42 830,59 829,78 828,97 827,116 826,136 825,155 825,172 825,185 L 822,185 C 805,154 786,125 765,100 744,75 720,53 693,36 666,18 634,4 599,-6 564,-15 523,-20 476,-20 416,-20 364,-13 321,2 278,17 242,39 214,70 186,101 166,140 153,188 140,236 133,294 133,361 L 133,1082 314,1082 Z"
 90 |          id="glyph22" /><glyph
 91 |          unicode="t"
 92 |          horiz-adv-x="531"
 93 |          d="M 554,8 C 527,1 499,-5 471,-10 442,-14 409,-16 372,-16 228,-16 156,66 156,229 L 156,951 31,951 31,1082 163,1082 216,1324 336,1324 336,1082 536,1082 536,951 336,951 336,268 C 336,216 345,180 362,159 379,138 408,127 450,127 467,127 484,128 501,131 517,134 535,137 554,141 L 554,8 Z"
 94 |          id="glyph24" /><glyph
 95 |          unicode="s"
 96 |          horiz-adv-x="901"
 97 |          d="M 950,299 C 950,248 940,203 921,164 901,124 872,91 835,64 798,37 752,16 698,2 643,-13 581,-20 511,-20 448,-20 392,-15 342,-6 291,4 247,20 209,41 171,62 139,91 114,126 88,161 69,203 57,254 L 216,285 C 231,227 263,185 311,158 359,131 426,117 511,117 550,117 585,120 618,125 650,130 678,140 701,153 724,166 743,183 756,205 769,226 775,253 775,285 775,318 767,345 752,366 737,387 715,404 688,418 661,432 628,444 589,455 550,465 507,476 460,489 417,500 374,513 331,527 288,541 250,560 216,583 181,606 153,634 132,668 111,702 100,745 100,796 100,895 135,970 206,1022 276,1073 378,1099 513,1099 632,1099 727,1078 798,1036 868,994 912,927 931,834 L 769,814 C 763,842 752,866 736,885 720,904 701,919 678,931 655,942 630,951 602,956 573,961 544,963 513,963 432,963 372,951 333,926 294,901 275,864 275,814 275,785 282,761 297,742 311,723 331,707 357,694 382,681 413,669 449,660 485,650 525,640 568,629 597,622 626,614 656,606 686,597 715,587 744,576 772,564 799,550 824,535 849,519 870,500 889,478 908,456 923,430 934,401 945,372 950,338 950,299 Z"
 98 |          id="glyph26" /><glyph
 99 |          unicode="r"
100 |          horiz-adv-x="530"
101 |          d="M 142,0 L 142,830 C 142,853 142,876 142,900 141,923 141,946 140,968 139,990 139,1011 138,1030 137,1049 137,1067 136,1082 L 306,1082 C 307,1067 308,1049 309,1030 310,1010 311,990 312,969 313,948 313,929 314,910 314,891 314,874 314,861 L 318,861 C 331,902 344,938 359,969 373,999 390,1024 409,1044 428,1063 451,1078 478,1088 505,1097 537,1102 575,1102 590,1102 604,1101 617,1099 630,1096 641,1094 648,1092 L 648,927 C 636,930 622,933 606,935 590,936 572,937 552,937 511,937 476,928 447,909 418,890 394,865 376,832 357,799 344,759 335,714 326,668 322,618 322,564 L 322,0 142,0 Z"
102 |          id="glyph28" /><glyph
103 |          unicode="p"
104 |          horiz-adv-x="953"
105 |          d="M 1053,546 C 1053,464 1046,388 1033,319 1020,250 998,190 967,140 936,90 895,51 844,23 793,-6 730,-20 655,-20 578,-20 510,-5 452,24 394,53 350,101 319,168 L 314,168 C 315,167 315,161 316,150 316,139 316,126 317,110 317,94 317,76 318,57 318,37 318,17 318,-2 L 318,-425 138,-425 138,861 C 138,887 138,912 138,936 137,960 137,982 136,1002 135,1021 135,1038 134,1052 133,1066 133,1076 132,1082 L 306,1082 C 307,1080 308,1073 309,1061 310,1049 311,1035 312,1018 313,1001 314,982 315,963 316,944 316,925 316,908 L 320,908 C 337,943 356,972 377,997 398,1021 423,1041 450,1057 477,1072 508,1084 542,1091 575,1098 613,1101 655,1101 730,1101 793,1088 844,1061 895,1034 936,997 967,949 998,900 1020,842 1033,774 1046,705 1053,629 1053,546 Z M 864,542 C 864,609 860,668 852,720 844,772 830,816 811,852 791,888 765,915 732,934 699,953 658,962 609,962 569,962 531,956 496,945 461,934 430,912 404,880 377,848 356,804 341,748 326,691 318,618 318,528 318,451 324,387 337,334 350,281 368,238 393,205 417,172 447,149 483,135 519,120 560,113 607,113 657,113 699,123 732,142 765,161 791,189 811,226 830,263 844,308 852,361 860,414 864,474 864,542 Z"
106 |          id="glyph30" /><glyph
107 |          unicode="o"
108 |          horiz-adv-x="980"
109 |          d="M 1053,542 C 1053,353 1011,212 928,119 845,26 724,-20 565,-20 490,-20 422,-9 363,14 304,37 254,71 213,118 172,165 140,223 119,294 97,364 86,447 86,542 86,915 248,1102 571,1102 655,1102 728,1090 789,1067 850,1044 900,1009 939,962 978,915 1006,857 1025,787 1044,717 1053,635 1053,542 Z M 864,542 C 864,626 858,695 845,750 832,805 813,848 788,881 763,914 732,937 696,950 660,963 619,969 574,969 528,969 487,962 450,949 413,935 381,912 355,879 329,846 309,802 296,747 282,692 275,624 275,542 275,458 282,389 297,334 312,279 332,235 358,202 383,169 414,146 449,133 484,120 522,113 563,113 609,113 651,120 688,133 725,146 757,168 783,201 809,234 829,278 843,333 857,388 864,458 864,542 Z"
110 |          id="glyph32" /><glyph
111 |          unicode="n"
112 |          horiz-adv-x="874"
113 |          d="M 825,0 L 825,686 C 825,739 821,783 814,818 806,853 793,882 776,904 759,925 736,941 708,950 679,959 644,963 602,963 559,963 521,956 487,941 452,926 423,904 399,876 374,847 355,812 342,771 329,729 322,681 322,627 L 322,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 334,928 353,957 374,982 395,1007 419,1029 446,1047 473,1064 505,1078 540,1088 575,1097 616,1102 663,1102 723,1102 775,1095 818,1080 861,1065 897,1043 925,1012 953,981 974,942 987,894 1000,845 1006,788 1006,721 L 1006,0 825,0 Z"
114 |          id="glyph34" /><glyph
115 |          unicode="m"
116 |          horiz-adv-x="1457"
117 |          d="M 768,0 L 768,686 C 768,739 765,783 758,818 751,853 740,882 725,904 709,925 688,941 663,950 638,959 607,963 570,963 532,963 498,956 467,941 436,926 410,904 389,876 367,847 350,812 339,771 327,729 321,681 321,627 L 321,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 333,928 350,957 369,982 388,1007 410,1029 435,1047 460,1064 488,1078 521,1088 553,1097 590,1102 633,1102 715,1102 780,1086 828,1053 875,1020 908,968 927,897 L 930,897 C 946,928 964,957 984,982 1004,1007 1027,1029 1054,1047 1081,1064 1111,1078 1144,1088 1177,1097 1215,1102 1258,1102 1313,1102 1360,1095 1400,1080 1439,1065 1472,1043 1497,1012 1522,981 1541,942 1553,894 1565,845 1571,788 1571,721 L 1571,0 1393,0 1393,686 C 1393,739 1390,783 1383,818 1376,853 1365,882 1350,904 1334,925 1313,941 1288,950 1263,959 1232,963 1195,963 1157,963 1123,956 1092,942 1061,927 1035,906 1014,878 992,850 975,815 964,773 952,731 946,682 946,627 L 946,0 768,0 Z"
118 |          id="glyph36" /><glyph
119 |          unicode="l"
120 |          horiz-adv-x="187"
121 |          d="M 138,0 L 138,1484 318,1484 318,0 138,0 Z"
122 |          id="glyph38" /><glyph
123 |          unicode="k"
124 |          horiz-adv-x="901"
125 |          d="M 816,0 L 450,494 318,385 318,0 138,0 138,1484 318,1484 318,557 793,1082 1004,1082 565,617 1027,0 816,0 Z"
126 |          id="glyph40" /><glyph
127 |          unicode="i"
128 |          horiz-adv-x="187"
129 |          d="M 137,1312 L 137,1484 317,1484 317,1312 137,1312 Z M 137,0 L 137,1082 317,1082 317,0 137,0 Z"
130 |          id="glyph42" /><glyph
131 |          unicode="h"
132 |          horiz-adv-x="874"
133 |          d="M 317,897 C 337,934 359,965 382,991 405,1016 431,1037 459,1054 487,1071 518,1083 551,1091 584,1098 622,1102 663,1102 732,1102 789,1093 834,1074 878,1055 913,1029 939,996 964,962 982,922 992,875 1001,828 1006,777 1006,721 L 1006,0 825,0 825,686 C 825,732 822,772 817,807 811,842 800,871 784,894 768,917 745,934 716,946 687,957 649,963 602,963 559,963 521,955 487,940 452,925 423,903 399,875 374,847 355,813 342,773 329,733 322,688 322,638 L 322,0 142,0 142,1484 322,1484 322,1098 C 322,1076 322,1054 321,1032 320,1010 320,990 319,971 318,952 317,937 316,924 315,911 315,902 314,897 L 317,897 Z"
134 |          id="glyph44" /><glyph
135 |          unicode="g"
136 |          horiz-adv-x="927"
137 |          d="M 548,-425 C 486,-425 431,-419 383,-406 335,-393 294,-375 260,-352 226,-328 198,-300 177,-267 156,-234 140,-198 131,-158 L 312,-132 C 324,-182 351,-220 392,-248 433,-274 486,-288 553,-288 594,-288 631,-282 664,-271 697,-260 726,-241 749,-217 772,-191 790,-159 803,-119 816,-79 822,-30 822,27 L 822,201 820,201 C 807,174 790,148 771,123 751,98 727,75 699,56 670,37 637,21 600,10 563,-2 520,-8 472,-8 403,-8 345,4 296,27 247,50 207,84 176,130 145,176 122,233 108,302 93,370 86,449 86,539 86,626 93,704 108,773 122,842 145,901 178,950 210,998 252,1035 304,1061 355,1086 418,1099 492,1099 569,1099 635,1082 692,1047 748,1012 791,962 822,897 L 824,897 C 824,914 825,932 826,953 827,974 828,993 829,1012 830,1030 831,1046 832,1059 833,1072 835,1080 836,1082 L 1007,1082 C 1006,1076 1006,1066 1005,1052 1004,1037 1004,1020 1003,1000 1002,980 1002,958 1002,934 1001,909 1001,884 1001,858 L 1001,31 C 1001,-120 964,-234 890,-311 815,-387 701,-425 548,-425 Z M 822,541 C 822,616 814,681 798,735 781,788 760,832 733,866 706,900 676,925 642,941 607,957 572,965 536,965 490,965 451,957 418,941 385,925 357,900 336,866 314,831 298,787 288,734 277,680 272,616 272,541 272,463 277,398 288,345 298,292 314,249 335,216 356,183 383,160 416,146 449,132 488,125 533,125 569,125 604,133 639,148 673,163 704,188 731,221 758,254 780,297 797,350 814,403 822,466 822,541 Z"
138 |          id="glyph46" /><glyph
139 |          unicode="f"
140 |          horiz-adv-x="557"
141 |          d="M 361,951 L 361,0 181,0 181,951 29,951 29,1082 181,1082 181,1204 C 181,1243 185,1280 192,1314 199,1347 213,1377 233,1402 252,1427 279,1446 313,1461 347,1475 391,1482 445,1482 466,1482 489,1481 512,1479 535,1477 555,1474 572,1470 L 572,1333 C 561,1335 548,1337 533,1339 518,1340 504,1341 492,1341 465,1341 444,1337 427,1330 410,1323 396,1312 387,1299 377,1285 370,1268 367,1248 363,1228 361,1205 361,1179 L 361,1082 572,1082 572,951 361,951 Z"
142 |          id="glyph48" /><glyph
143 |          unicode="e"
144 |          horiz-adv-x="980"
145 |          d="M 276,503 C 276,446 282,394 294,347 305,299 323,258 348,224 372,189 403,163 441,144 479,125 525,115 578,115 656,115 719,131 766,162 813,193 844,233 861,281 L 1019,236 C 1008,206 992,176 972,146 951,115 924,88 890,64 856,39 814,19 763,4 712,-12 650,-20 578,-20 418,-20 296,28 213,123 129,218 87,360 87,548 87,649 100,735 125,806 150,876 185,933 229,977 273,1021 324,1053 383,1073 442,1092 504,1102 571,1102 662,1102 738,1087 799,1058 860,1029 909,988 946,937 983,885 1009,824 1025,754 1040,684 1048,608 1048,527 L 1048,503 276,503 Z M 862,641 C 852,755 823,838 775,891 727,943 658,969 568,969 538,969 507,964 474,955 441,945 410,928 382,903 354,878 330,845 311,803 292,760 281,706 278,641 L 862,641 Z"
146 |          id="glyph50" /><glyph
147 |          unicode="d"
148 |          horiz-adv-x="927"
149 |          d="M 821,174 C 788,105 744,55 689,25 634,-5 565,-20 484,-20 347,-20 247,26 183,118 118,210 86,349 86,536 86,913 219,1102 484,1102 566,1102 634,1087 689,1057 744,1027 788,979 821,914 L 823,914 C 823,921 823,931 823,946 822,960 822,975 822,991 821,1006 821,1021 821,1035 821,1049 821,1059 821,1065 L 821,1484 1001,1484 1001,223 C 1001,197 1001,172 1002,148 1002,124 1002,102 1003,82 1004,62 1004,45 1005,31 1006,16 1006,6 1007,0 L 835,0 C 834,7 833,16 832,29 831,41 830,55 829,71 828,87 827,104 826,122 825,139 825,157 825,174 L 821,174 Z M 275,542 C 275,467 280,403 289,350 298,297 313,253 334,219 355,184 381,159 413,143 445,127 484,119 530,119 577,119 619,127 656,142 692,157 722,182 747,217 771,251 789,296 802,351 815,406 821,474 821,554 821,631 815,696 802,749 789,802 771,844 746,877 721,910 691,933 656,948 620,962 579,969 532,969 488,969 450,961 418,946 386,931 359,906 338,872 317,838 301,794 291,740 280,685 275,619 275,542 Z"
150 |          id="glyph52" /><glyph
151 |          unicode="c"
152 |          horiz-adv-x="901"
153 |          d="M 275,546 C 275,484 280,427 289,375 298,323 313,278 334,241 355,203 384,174 419,153 454,132 497,122 548,122 612,122 666,139 709,173 752,206 778,258 788,328 L 970,328 C 964,283 951,239 931,197 911,155 884,118 850,86 815,54 773,28 724,9 675,-10 618,-20 553,-20 468,-20 396,-6 337,23 278,52 230,91 193,142 156,192 129,251 112,320 95,388 87,462 87,542 87,615 93,679 105,735 117,790 134,839 156,881 177,922 203,957 232,986 261,1014 293,1037 328,1054 362,1071 398,1083 436,1091 474,1098 512,1102 551,1102 612,1102 666,1094 713,1077 760,1060 801,1038 836,1009 870,980 898,945 919,906 940,867 955,824 964,779 L 779,765 C 770,825 746,873 708,908 670,943 616,961 546,961 495,961 452,953 418,936 383,919 355,893 334,859 313,824 298,781 289,729 280,677 275,616 275,546 Z"
154 |          id="glyph54" /><glyph
155 |          unicode="a"
156 |          horiz-adv-x="1060"
157 |          d="M 414,-20 C 305,-20 224,9 169,66 114,124 87,203 87,303 87,375 101,434 128,480 155,526 190,562 234,588 277,614 327,632 383,642 439,652 496,657 554,657 L 797,657 797,717 C 797,762 792,800 783,832 774,863 759,889 740,908 721,928 697,942 668,951 639,960 604,965 565,965 530,965 499,963 471,958 443,953 419,944 398,931 377,918 361,900 348,878 335,855 327,827 323,793 L 135,810 C 142,853 154,892 173,928 192,963 218,994 253,1020 287,1046 330,1066 382,1081 433,1095 496,1102 569,1102 705,1102 807,1071 876,1009 945,946 979,856 979,738 L 979,272 C 979,219 986,179 1000,152 1014,125 1041,111 1080,111 1090,111 1100,112 1110,113 1120,114 1130,116 1139,118 L 1139,6 C 1116,1 1094,-3 1072,-6 1049,-9 1025,-10 1000,-10 966,-10 937,-5 913,4 888,13 868,26 853,45 838,63 826,86 818,113 810,140 805,171 803,207 L 797,207 C 778,172 757,141 734,113 711,85 684,61 653,42 622,22 588,7 549,-4 510,-15 465,-20 414,-20 Z M 455,115 C 512,115 563,125 606,146 649,167 684,194 713,226 741,259 762,294 776,332 790,371 797,408 797,443 L 797,531 600,531 C 556,531 514,528 475,522 435,517 400,506 370,489 340,472 316,449 299,418 281,388 272,349 272,300 272,241 288,195 320,163 351,131 396,115 455,115 Z"
158 |          id="glyph56" /><glyph
159 |          unicode="V"
160 |          horiz-adv-x="1377"
161 |          d="M 782,0 L 584,0 9,1409 210,1409 600,417 C 610,387 620,357 630,328 640,298 649,271 657,248 666,221 675,194 684,168 692,193 701,219 710,246 718,269 727,296 737,325 746,354 757,385 768,417 L 1156,1409 1357,1409 782,0 Z"
162 |          id="glyph58" /><glyph
163 |          unicode="U"
164 |          horiz-adv-x="1192"
165 |          d="M 731,-20 C 654,-20 580,-10 511,11 442,32 381,64 329,108 276,151 235,207 204,274 173,341 158,420 158,512 L 158,1409 349,1409 349,528 C 349,457 359,396 378,347 397,297 423,256 457,225 491,194 531,171 578,157 624,142 675,135 730,135 785,135 836,142 885,157 934,172 976,195 1013,227 1050,259 1079,301 1100,353 1121,404 1131,467 1131,541 L 1131,1409 1321,1409 1321,530 C 1321,436 1306,355 1275,286 1244,217 1201,159 1148,114 1095,69 1032,35 961,13 889,-9 812,-20 731,-20 Z"
166 |          id="glyph60" /><glyph
167 |          unicode="T"
168 |          horiz-adv-x="1192"
169 |          d="M 720,1253 L 720,0 530,0 530,1253 46,1253 46,1409 1204,1409 1204,1253 720,1253 Z"
170 |          id="glyph62" /><glyph
171 |          unicode="S"
172 |          horiz-adv-x="1192"
173 |          d="M 1272,389 C 1272,330 1261,275 1238,225 1215,175 1179,132 1131,96 1083,59 1023,31 950,11 877,-10 790,-20 690,-20 515,-20 378,11 280,72 182,133 120,222 93,338 L 278,375 C 287,338 302,305 321,275 340,245 367,219 400,198 433,176 473,159 522,147 571,135 629,129 697,129 754,129 806,134 853,144 900,153 941,168 975,188 1009,208 1036,234 1055,266 1074,297 1083,335 1083,379 1083,425 1073,462 1052,491 1031,520 1001,543 963,562 925,581 880,596 827,609 774,622 716,635 652,650 613,659 573,668 534,679 494,689 456,701 420,716 383,730 349,747 317,766 285,785 257,809 234,836 211,863 192,894 179,930 166,965 159,1006 159,1053 159,1120 173,1177 200,1225 227,1272 264,1311 312,1342 360,1373 417,1395 482,1409 547,1423 618,1430 694,1430 781,1430 856,1423 918,1410 980,1396 1032,1375 1075,1348 1118,1321 1152,1287 1178,1247 1203,1206 1224,1159 1239,1106 L 1051,1073 C 1042,1107 1028,1137 1011,1164 993,1191 970,1213 941,1231 912,1249 878,1263 837,1272 796,1281 747,1286 692,1286 627,1286 572,1280 528,1269 483,1257 448,1241 421,1221 394,1201 374,1178 363,1151 351,1124 345,1094 345,1063 345,1021 356,987 377,960 398,933 426,910 462,892 498,874 540,859 587,847 634,835 685,823 738,811 781,801 825,791 868,781 911,770 952,758 991,744 1030,729 1067,712 1102,693 1136,674 1166,650 1191,622 1216,594 1236,561 1251,523 1265,485 1272,440 1272,389 Z"
174 |          id="glyph64" /><glyph
175 |          unicode="O"
176 |          horiz-adv-x="1430"
177 |          d="M 1495,711 C 1495,601 1479,501 1448,411 1416,321 1370,244 1310,180 1250,116 1177,67 1090,32 1003,-3 905,-20 795,-20 679,-20 577,-2 490,35 403,71 330,122 272,187 214,252 170,329 141,418 112,507 97,605 97,711 97,821 112,920 143,1009 174,1098 219,1173 278,1236 337,1298 411,1346 498,1380 585,1413 684,1430 797,1430 909,1430 1009,1413 1096,1379 1183,1345 1256,1297 1315,1234 1374,1171 1418,1096 1449,1007 1480,918 1495,820 1495,711 Z M 1300,711 C 1300,796 1289,873 1268,942 1246,1011 1214,1071 1172,1120 1129,1169 1077,1207 1014,1234 951,1261 879,1274 797,1274 713,1274 639,1261 576,1234 513,1207 460,1169 418,1120 375,1071 344,1011 323,942 302,873 291,796 291,711 291,626 302,549 324,479 345,408 377,348 420,297 462,246 515,206 578,178 641,149 713,135 795,135 883,135 959,149 1023,178 1086,207 1139,247 1180,298 1221,349 1251,409 1271,480 1290,551 1300,628 1300,711 Z"
178 |          id="glyph66" /><glyph
179 |          unicode="N"
180 |          horiz-adv-x="1165"
181 |          d="M 1082,0 L 328,1200 C 329,1167 331,1135 333,1103 334,1076 336,1047 337,1017 338,986 338,959 338,936 L 338,0 168,0 168,1409 390,1409 1152,201 C 1150,234 1148,266 1146,299 1145,327 1143,358 1142,391 1141,424 1140,455 1140,485 L 1140,1409 1312,1409 1312,0 1082,0 Z"
182 |          id="glyph68" /><glyph
183 |          unicode="M"
184 |          horiz-adv-x="1377"
185 |          d="M 1366,0 L 1366,940 C 1366,974 1366,1009 1367,1044 1368,1079 1369,1112 1370,1141 1371,1175 1373,1208 1375,1240 1366,1206 1356,1172 1346,1139 1337,1110 1328,1080 1318,1048 1307,1015 1297,986 1287,960 L 923,0 789,0 420,960 C 416,970 412,982 408,995 403,1008 399,1023 394,1038 389,1053 384,1068 379,1084 374,1099 369,1115 364,1130 353,1165 342,1202 331,1240 332,1203 333,1166 334,1129 335,1098 336,1065 337,1031 338,996 338,966 338,940 L 338,0 168,0 168,1409 419,1409 794,432 C 799,419 804,402 811,381 818,360 824,338 830,316 836,294 842,273 847,254 852,234 855,219 857,208 859,219 863,234 868,254 873,274 880,295 887,317 894,339 900,360 907,381 914,402 920,419 925,432 L 1293,1409 1538,1409 1538,0 1366,0 Z"
186 |          id="glyph70" /><glyph
187 |          unicode="L"
188 |          horiz-adv-x="927"
189 |          d="M 168,0 L 168,1409 359,1409 359,156 1071,156 1071,0 168,0 Z"
190 |          id="glyph72" /><glyph
191 |          unicode="F"
192 |          horiz-adv-x="1006"
193 |          d="M 359,1253 L 359,729 1145,729 1145,571 359,571 359,0 168,0 168,1409 1169,1409 1169,1253 359,1253 Z"
194 |          id="glyph74" /><glyph
195 |          unicode="D"
196 |          horiz-adv-x="1218"
197 |          d="M 1381,719 C 1381,602 1363,498 1328,409 1293,319 1244,244 1183,184 1122,123 1049,78 966,47 882,16 792,0 695,0 L 168,0 168,1409 634,1409 C 743,1409 843,1396 935,1369 1026,1342 1105,1300 1171,1244 1237,1187 1289,1116 1326,1029 1363,942 1381,839 1381,719 Z M 1189,719 C 1189,814 1175,896 1148,964 1121,1031 1082,1087 1033,1130 984,1173 925,1205 856,1226 787,1246 712,1256 630,1256 L 359,1256 359,153 673,153 C 747,153 816,165 879,189 942,213 996,249 1042,296 1088,343 1124,402 1150,473 1176,544 1189,626 1189,719 Z"
198 |          id="glyph76" /><glyph
199 |          unicode="C"
200 |          horiz-adv-x="1324"
201 |          d="M 792,1274 C 712,1274 641,1261 580,1234 518,1207 466,1169 425,1120 383,1071 351,1011 330,942 309,873 298,796 298,711 298,626 310,549 333,479 356,408 389,348 432,297 475,246 527,207 590,179 652,151 722,137 800,137 855,137 905,144 950,159 995,173 1035,193 1072,219 1108,245 1140,276 1169,312 1198,347 1223,387 1245,430 L 1401,352 C 1376,299 1344,250 1307,205 1270,160 1226,120 1176,87 1125,54 1068,28 1005,9 941,-10 870,-20 791,-20 677,-20 577,-2 492,35 406,71 334,122 277,187 219,252 176,329 147,418 118,507 104,605 104,711 104,821 119,920 150,1009 180,1098 224,1173 283,1236 341,1298 413,1346 498,1380 583,1413 681,1430 790,1430 940,1430 1065,1401 1166,1342 1267,1283 1341,1196 1388,1081 L 1207,1021 C 1194,1054 1176,1086 1153,1117 1130,1147 1102,1174 1068,1197 1034,1220 994,1239 949,1253 903,1267 851,1274 792,1274 Z"
202 |          id="glyph78" /><glyph
203 |          unicode=" "
204 |          horiz-adv-x="556"
205 |          id="glyph80" /></font></defs><defs
206 |      id="defs82"><font
207 |        id="EmbeddedFont_2"
208 |        horiz-adv-x="2048"
209 |        horiz-origin-x="0"
210 |        horiz-origin-y="0"
211 |        vert-origin-x="45"
212 |        vert-origin-y="90"
213 |        vert-adv-y="90"><font-face
214 |          font-family="Liberation Serif embedded"
215 |          units-per-em="2048"
216 |          font-weight="normal"
217 |          font-style="normal"
218 |          ascent="1826"
219 |          descent="450"
220 |          id="font-face85" /><missing-glyph
221 |          horiz-adv-x="2048"
222 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
223 |          id="missing-glyph87" /><glyph
224 |          unicode="t"
225 |          horiz-adv-x="557"
226 |          d="M 334,-20 C 270,-20 222,-1 191,37 159,75 143,128 143,197 L 143,856 20,856 20,901 145,940 246,1153 309,1153 309,940 524,940 524,856 309,856 309,215 C 309,172 319,139 339,117 358,95 384,84 416,84 441,84 465,86 490,89 514,92 536,96 557,100 L 557,35 C 547,28 534,22 518,15 501,8 483,3 464,-3 444,-7 423,-12 401,-15 379,-18 357,-20 334,-20 Z"
227 |          id="glyph89" /><glyph
228 |          unicode="r"
229 |          horiz-adv-x="636"
230 |          d="M 664,965 L 664,711 621,711 563,821 C 544,821 524,820 503,817 482,814 460,811 439,807 418,802 397,797 378,791 358,785 341,779 326,772 L 326,70 487,45 487,0 41,0 41,45 160,70 160,870 41,895 41,940 315,940 324,823 C 339,836 360,850 387,867 414,883 443,898 474,913 505,928 536,940 567,950 598,960 625,965 649,965 L 664,965 Z"
231 |          id="glyph91" /><glyph
232 |          unicode="o"
233 |          horiz-adv-x="901"
234 |          d="M 946,475 C 946,316 910,193 839,108 768,23 657,-20 506,-20 365,-20 258,22 186,107 114,192 78,314 78,475 78,634 114,755 186,839 258,923 367,965 514,965 657,965 764,924 837,842 910,759 946,637 946,475 Z M 766,475 C 766,540 762,598 753,649 744,700 730,743 710,778 689,813 662,839 629,858 596,876 555,885 506,885 457,885 416,876 384,858 352,839 327,813 308,778 289,743 276,700 269,649 262,598 258,540 258,475 258,410 262,351 269,300 276,249 289,205 308,170 327,134 352,107 384,88 416,69 457,59 506,59 555,59 596,69 629,88 662,107 689,134 710,170 730,205 744,249 753,300 762,351 766,410 766,475 Z"
235 |          id="glyph93" /><glyph
236 |          unicode="f"
237 |          horiz-adv-x="636"
238 |          d="M 225,856 L 63,856 63,905 225,944 225,1010 C 225,1081 232,1143 247,1197 261,1250 282,1295 309,1332 336,1368 369,1395 408,1414 447,1433 490,1442 539,1442 569,1442 596,1440 619,1437 642,1433 663,1428 682,1423 L 682,1218 633,1218 588,1341 C 577,1348 566,1353 553,1357 540,1360 525,1362 506,1362 483,1362 464,1357 449,1347 434,1336 423,1320 414,1299 405,1277 399,1249 396,1216 393,1183 391,1143 391,1096 L 391,940 641,940 641,856 391,856 391,78 594,45 594,0 86,0 86,45 225,78 225,856 Z"
239 |          id="glyph95" /><glyph
240 |          unicode="e"
241 |          horiz-adv-x="769"
242 |          d="M 260,473 L 260,455 C 260,406 264,360 271,315 278,270 292,231 313,197 334,162 363,135 401,115 439,94 489,84 551,84 571,84 592,85 614,87 636,88 658,90 680,93 702,96 723,99 744,102 765,105 784,109 801,113 L 801,57 C 786,47 767,38 746,29 724,20 700,11 674,4 648,-3 620,-9 591,-14 562,-18 532,-20 502,-20 424,-20 358,-9 305,12 251,33 207,65 174,107 141,149 117,201 102,263 87,325 80,396 80,477 80,641 114,763 183,844 252,925 350,965 477,965 527,965 574,958 618,945 661,932 699,909 732,878 765,847 791,805 810,752 829,699 838,634 838,555 L 838,473 260,473 Z M 477,885 C 440,885 408,877 381,862 354,846 331,824 314,795 296,766 283,732 275,691 266,650 262,604 262,553 L 664,553 C 664,604 661,650 656,691 650,732 640,766 626,795 611,824 592,846 568,862 544,877 514,885 477,885 Z"
243 |          id="glyph97" /><glyph
244 |          unicode="&gt;"
245 |          horiz-adv-x="980"
246 |          d="M 104,186 L 104,289 913,680 104,1071 104,1174 1057,705 1057,655 104,186 Z"
247 |          id="glyph99" /><glyph
248 |          unicode="&lt;"
249 |          horiz-adv-x="980"
250 |          d="M 102,655 L 102,705 1055,1174 1055,1071 246,680 1055,289 1055,186 102,655 Z"
251 |          id="glyph101" /><glyph
252 |          unicode="2"
253 |          horiz-adv-x="848"
254 |          d="M 911,0 L 90,0 90,147 C 157,210 219,266 276,316 332,365 383,412 428,455 473,498 512,539 546,578 579,617 607,658 630,701 652,744 669,790 680,839 691,888 696,944 696,1006 696,1093 676,1159 637,1204 598,1249 533,1272 444,1272 424,1272 404,1271 385,1268 365,1265 346,1261 328,1256 310,1251 293,1245 278,1239 262,1232 248,1226 236,1219 L 201,1055 135,1055 135,1313 C 186,1325 236,1335 285,1344 334,1352 387,1356 444,1356 591,1356 701,1326 775,1265 848,1204 885,1117 885,1006 885,951 878,901 864,854 849,807 829,763 802,720 775,677 743,634 704,593 665,551 621,508 572,463 523,418 469,371 410,321 351,270 288,215 221,154 L 911,154 911,0 Z"
255 |          id="glyph103" /><glyph
256 |          unicode="1"
257 |          horiz-adv-x="742"
258 |          d="M 627,80 L 901,53 901,0 180,0 180,53 455,80 455,1174 184,1077 184,1130 575,1352 627,1352 627,80 Z"
259 |          id="glyph105" /></font></defs><defs
260 |      id="defs107"><font
261 |        id="EmbeddedFont_3"
262 |        horiz-adv-x="2048"
263 |        horiz-origin-x="0"
264 |        horiz-origin-y="0"
265 |        vert-origin-x="45"
266 |        vert-origin-y="90"
267 |        vert-adv-y="90"><font-face
268 |          font-family="StarSymbol embedded"
269 |          units-per-em="2048"
270 |          font-weight="normal"
271 |          font-style="normal"
272 |          ascent="1879"
273 |          descent="661"
274 |          id="font-face110" /><missing-glyph
275 |          horiz-adv-x="2048"
276 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
277 |          id="missing-glyph112" /><glyph
278 |          unicode="●"
279 |          horiz-adv-x="1191"
280 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 223,773 276,916 383,1023 489,1130 632,1184 813,1184 992,1184 1136,1130 1245,1023 1353,916 1407,772 1407,592 1407,412 1353,268 1245,161 1136,54 992,0 813,0 Z"
281 |          id="glyph114" /><glyph
282 |          unicode="–"
283 |          horiz-adv-x="1165"
284 |          d="M -4,459 L 1135,459 1135,606 -4,606 -4,459 Z"
285 |          id="glyph116" /></font></defs><defs
286 |      class="TextShapeIndex"
287 |      id="defs118"><g
288 |        ooo:slide="id1"
289 |        ooo:id-list="id6 id7 id8 id9 id10 id11 id12 id13 id14 id15"
290 |        id="g120" /></defs><defs
291 |      class="EmbeddedBulletChars"
292 |      id="defs122"><g
293 |        id="bullet-char-template(57356)"
294 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
295 |          d="M 580,1141 1163,571 580,0 -4,571 580,1141 Z"
296 |          id="path125"
297 |          inkscape:connector-curvature="0" /></g><g
298 |        id="bullet-char-template(57354)"
299 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
300 |          d="m 8,1128 1129,0 L 1137,0 8,0 8,1128 Z"
301 |          id="path128"
302 |          inkscape:connector-curvature="0" /></g><g
303 |        id="bullet-char-template(10146)"
304 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
305 |          d="M 174,0 602,739 174,1481 1456,739 174,0 Z m 1184,739 -1049,607 350,-607 699,0 z"
306 |          id="path131"
307 |          inkscape:connector-curvature="0" /></g><g
308 |        id="bullet-char-template(10132)"
309 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
310 |          d="M 2015,739 1276,0 717,0 l 543,543 -1086,0 0,393 1086,0 -543,545 557,0 741,-742 z"
311 |          id="path134"
312 |          inkscape:connector-curvature="0" /></g><g
313 |        id="bullet-char-template(10007)"
314 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
315 |          d="m 0,-2 c -7,16 -16,29 -25,39 l 381,530 c -94,256 -141,385 -141,387 0,25 13,38 40,38 9,0 21,-2 34,-5 21,4 42,12 65,25 l 27,-13 111,-251 280,301 64,-25 24,25 c 21,-10 41,-24 62,-43 C 886,937 835,863 770,784 769,783 710,716 594,584 L 774,223 c 0,-27 -21,-55 -63,-84 l 16,-20 C 717,90 699,76 672,76 641,76 570,178 457,381 L 164,-76 c -22,-34 -53,-51 -92,-51 -42,0 -63,17 -64,51 -7,9 -10,24 -10,44 0,9 1,19 2,30 z"
316 |          id="path137"
317 |          inkscape:connector-curvature="0" /></g><g
318 |        id="bullet-char-template(10004)"
319 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
320 |          d="M 285,-33 C 182,-33 111,30 74,156 52,228 41,333 41,471 c 0,78 14,145 41,201 34,71 87,106 158,106 53,0 88,-31 106,-94 l 23,-176 c 8,-64 28,-97 59,-98 l 735,706 c 11,11 33,17 66,17 42,0 63,-15 63,-46 l 0,-122 c 0,-36 -10,-64 -30,-84 L 442,47 C 390,-6 338,-33 285,-33 Z"
321 |          id="path140"
322 |          inkscape:connector-curvature="0" /></g><g
323 |        id="bullet-char-template(9679)"
324 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
325 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 c 0,181 53,324 160,431 106,107 249,161 430,161 179,0 323,-54 432,-161 108,-107 162,-251 162,-431 0,-180 -54,-324 -162,-431 C 1136,54 992,0 813,0 Z"
326 |          id="path143"
327 |          inkscape:connector-curvature="0" /></g><g
328 |        id="bullet-char-template(8226)"
329 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
330 |          d="m 346,457 c -73,0 -137,26 -191,78 -54,51 -81,114 -81,188 0,73 27,136 81,188 54,52 118,78 191,78 73,0 134,-26 185,-79 51,-51 77,-114 77,-187 0,-75 -25,-137 -76,-188 -50,-52 -112,-78 -186,-78 z"
331 |          id="path146"
332 |          inkscape:connector-curvature="0" /></g><g
333 |        id="bullet-char-template(8211)"
334 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
335 |          d="m -4,459 1139,0 0,147 -1139,0 0,-147 z"
336 |          id="path149"
337 |          inkscape:connector-curvature="0" /></g></defs><defs
338 |      class="TextEmbeddedBitmaps"
339 |      id="defs151" /><g
340 |      class="com.sun.star.drawing.CustomShape"
341 |      id="g201"
342 |      transform="translate(-1749,-6096)"><g
343 |        id="id7"><path
344 |          d="m 5318,8255 -3302,0 0,-2032 6604,0 0,2032 -3302,0 z"
345 |          id="path204"
346 |          inkscape:connector-curvature="0"
347 |          style="fill:#cccccc;stroke:none" /><path
348 |          d="m 5318,8255 -3302,0 0,-2032 6604,0 0,2032 -3302,0 z"
349 |          id="path206"
350 |          inkscape:connector-curvature="0"
351 |          style="fill:none;stroke:#3465a4;stroke-width:212;stroke-linecap:round;stroke-linejoin:round" /><text
352 |          class="TextShape"
353 |          id="text208"><tspan
354 |            class="TextParagraph"
355 |            font-size="635px"
356 |            font-weight="400"
357 |            id="tspan210"
358 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
359 |              class="TextPosition"
360 |              x="3430"
361 |              y="7460"
362 |              id="tspan212"><tspan
363 |                id="tspan214"
364 |                style="fill:#000000;stroke:none">Training Data</tspan></tspan></tspan></text>
365 | </g></g><g
366 |      class="com.sun.star.drawing.CustomShape"
367 |      id="g216"
368 |      transform="translate(-1749,-6096)"><g
369 |        id="id8"><path
370 |          d="m 5157,14732 -3302,0 0,-2032 6604,0 0,2032 -3302,0 z"
371 |          id="path219"
372 |          inkscape:connector-curvature="0"
373 |          style="fill:#cccccc;stroke:none" /><path
374 |          d="m 5157,14732 -3302,0 0,-2032 6604,0 0,2032 -3302,0 z"
375 |          id="path221"
376 |          inkscape:connector-curvature="0"
377 |          style="fill:none;stroke:#3465a4;stroke-width:212;stroke-linecap:round;stroke-linejoin:round" /><text
378 |          class="TextShape"
379 |          id="text223"><tspan
380 |            class="TextParagraph"
381 |            font-size="635px"
382 |            font-weight="400"
383 |            id="tspan225"
384 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
385 |              class="TextPosition"
386 |              x="3817"
387 |              y="13937"
388 |              id="tspan227"><tspan
389 |                id="tspan229"
390 |                style="fill:#000000;stroke:none">Test Data</tspan></tspan></tspan></text>
391 | </g></g><g
392 |      class="com.sun.star.drawing.CustomShape"
393 |      id="g231"
394 |      transform="translate(-1749,-6096)"><g
395 |        id="id9"><path
396 |          d="m 14478,8382 -3048,0 0,-2286 6096,0 0,2286 -3048,0 z"
397 |          id="path234"
398 |          inkscape:connector-curvature="0"
399 |          style="fill:#dddddd;stroke:none" /><text
400 |          class="TextShape"
401 |          id="text236"><tspan
402 |            class="TextParagraph"
403 |            font-size="635px"
404 |            font-weight="400"
405 |            id="tspan238"
406 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
407 |              class="TextPosition"
408 |              x="13617"
409 |              y="7460"
410 |              id="tspan240"><tspan
411 |                id="tspan242"
412 |                style="fill:#000000;stroke:none">Model</tspan></tspan></tspan></text>
413 | </g></g><g
414 |      class="com.sun.star.drawing.CustomShape"
415 |      id="g244"
416 |      transform="translate(-1749,-6096)"><g
417 |        id="id10"><path
418 |          d="m 14478,14732 -2794,0 0,-2032 5588,0 0,2032 -2794,0 z"
419 |          id="path247"
420 |          inkscape:connector-curvature="0"
421 |          style="fill:#cccccc;stroke:none" /><text
422 |          class="TextShape"
423 |          id="text249"><tspan
424 |            class="TextParagraph"
425 |            font-size="635px"
426 |            font-weight="400"
427 |            id="tspan251"
428 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
429 |              class="TextPosition"
430 |              x="13077"
431 |              y="13937"
432 |              id="tspan253"><tspan
433 |                id="tspan255"
434 |                style="fill:#000000;stroke:none">New View</tspan></tspan></tspan></text>
435 | </g></g><g
436 |      class="com.sun.star.drawing.ConnectorShape"
437 |      id="g257"
438 |      transform="translate(-1749,-6096)"><g
439 |        id="id11" /></g><g
440 |      class="com.sun.star.drawing.ConnectorShape"
441 |      id="g260"
442 |      transform="translate(-1749,-6096)"><g
443 |        id="id12" /></g><g
444 |      class="com.sun.star.drawing.ConnectorShape"
445 |      id="g263"
446 |      transform="translate(-1749,-6096)"><g
447 |        id="id13"><path
448 |          d="m 8459,13716 2483,0"
449 |          id="path266"
450 |          inkscape:connector-curvature="0"
451 |          style="fill:none;stroke:#000000;stroke-width:212;stroke-linejoin:round" /><path
452 |          d="m 11684,13716 -777,-259 0,518 777,-259 z"
453 |          id="path268"
454 |          inkscape:connector-curvature="0"
455 |          style="fill:#000000;stroke:none" /></g></g><g
456 |      class="com.sun.star.drawing.ConnectorShape"
457 |      id="g270"
458 |      transform="translate(-1749,-6096)"><g
459 |        id="id14"><path
460 |          d="m 14478,8382 0,3577"
461 |          id="path273"
462 |          inkscape:connector-curvature="0"
463 |          style="fill:none;stroke:#000000;stroke-width:212;stroke-linejoin:round" /><path
464 |          d="m 14478,12700 259,-775 -517,0 258,775 z"
465 |          id="path275"
466 |          inkscape:connector-curvature="0"
467 |          style="fill:#000000;stroke:none" /></g></g><g
468 |      class="com.sun.star.drawing.ConnectorShape"
469 |      id="g277"
470 |      transform="translate(-1749,-6096)"><g
471 |        id="id15"><path
472 |          d="m 8620,7239 2069,0"
473 |          id="path280"
474 |          inkscape:connector-curvature="0"
475 |          style="fill:none;stroke:#000000;stroke-width:212;stroke-linejoin:round" /><path
476 |          d="m 11430,7239 -775,-258 0,517 775,-259 z"
477 |          id="path282"
478 |          inkscape:connector-curvature="0"
479 |          style="fill:#000000;stroke:none" /></g></g></svg>


--------------------------------------------------------------------------------
/scripts/classify_iris.py:
--------------------------------------------------------------------------------
 1 | from sklearn import neighbors, datasets
 2 | 
 3 | iris = datasets.load_iris()
 4 | X, y = iris.data, iris.target
 5 | 
 6 | # create the model
 7 | knn = neighbors.KNeighborsClassifier(n_neighbors=5)
 8 | 
 9 | # fit the model
10 | knn.fit(X, y)
11 | 
12 | # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal?
13 | # call the "predict" method:
14 |     result = knn.predict([[3, 5, 4, 2],])
15 | 
16 |     print(iris.target_names[result])
17 | 
18 | knn.predict_proba([[3, 5, 4, 2],])
19 | 


--------------------------------------------------------------------------------
/scripts/cluster_digits.py:
--------------------------------------------------------------------------------
 1 | from sklearn.cluster import KMeans
 2 | kmeans = KMeans(n_clusters=10)
 3 | clusters = kmeans.fit_predict(digits.data)
 4 | 
 5 | print(kmeans.cluster_centers_.shape)
 6 | 
 7 | #------------------------------------------------------------
 8 | # visualize the cluster centers
 9 | fig = plt.figure(figsize=(8, 3))
10 | for i in range(10):
11 |     ax = fig.add_subplot(2, 5, 1 + i)
12 |     ax.imshow(kmeans.cluster_centers_[i].reshape((8, 8)),
13 |               cmap=plt.cm.binary)
14 | from sklearn.manifold import Isomap
15 | X_iso = Isomap(n_neighbors=10).fit_transform(digits.data)
16 | 
17 | #------------------------------------------------------------
18 | # visualize the projected data
19 | fig, ax = plt.subplots(1, 2, figsize=(8, 4))
20 | 
21 | ax[0].scatter(X_iso[:, 0], X_iso[:, 1], c=clusters)
22 | ax[1].scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target)
23 | 


--------------------------------------------------------------------------------
/scripts/knn_iris.py:
--------------------------------------------------------------------------------
 1 | from sklearn import neighbors, datasets
 2 | import pylab as pl
 3 | import numpy as np
 4 | from matplotlib.colors import ListedColormap
 5 | 
 6 | 
 7 | # Create color maps for 3-class classification problem, as with iris
 8 | cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
 9 | cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
10 | 
11 | iris = datasets.load_iris()
12 | X, y = iris.data, iris.target
13 | 
14 | # create the model
15 | knn = neighbors.KNeighborsClassifier(n_neighbors=5)
16 | 
17 | # fit the model
18 | knn.fit(X, y)
19 | 
20 | # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal?
21 | # call the "predict" method:
22 | result = knn.predict([[3, 5, 4, 2],])
23 | print(iris.target_names[result])
24 | 
25 | 
26 | def plot_iris_knn():
27 |     iris = datasets.load_iris()
28 |     X = iris.data[:, :2]
29 |     # we only take the first two features. We could
30 |     # avoid this ugly slicing by using a two-dim dataset
31 |     y = iris.target
32 | 
33 |     knn = neighbors.KNeighborsClassifier(n_neighbors=3)
34 |     knn.fit(X, y)
35 | 
36 |     x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
37 |     y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
38 |     xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
39 |                          np.linspace(y_min, y_max, 100))
40 |     Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
41 | 
42 |     # Put the result into a color plot
43 |     Z = Z.reshape(xx.shape)
44 |     pl.figure()
45 |     pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
46 | 
47 |     # Plot also the training points
48 |     pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
49 |     pl.xlabel('sepal length (cm)')
50 |     pl.ylabel('sepal width (cm)')
51 |     pl.axis('tight')
52 | 


--------------------------------------------------------------------------------
/scripts/knn_regression.py:
--------------------------------------------------------------------------------
 1 | from sklearn.neighbors import KNeighborsRegressor
 2 | kneighbor_regression = KNeighborsRegressor(n_neighbors=1)
 3 | kneighbor_regression.fit(X_train, y_train)
 4 | 
 5 | y_pred_train = kneighbor_regression.predict(X_train)
 6 | 
 7 | plt.plot(X_train, y_train, 'o', label="data")
 8 | plt.plot(X_train, y_pred_train, 'o', label="prediction")
 9 | plt.legend(loc='best')
10 | 
11 | #y_pred_test = kneighbor_regression.predict(X_test)
12 | 
13 | #plt.plot(X_test, y_test, 'o', label="data")
14 | #plt.plot(X_test, y_pred_test, 'o', label="prediction")
15 | #plt.legend(loc='best')
16 | 


--------------------------------------------------------------------------------
/scripts/plot_digits.py:
--------------------------------------------------------------------------------
 1 | # set up the figure
 2 | fig = plt.figure(figsize=(6, 6))  # figure size in inches
 3 | fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
 4 | 
 5 | # plot the digits: each image is 8x8 pixels
 6 | for i in range(64):
 7 |     ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
 8 |     ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest')
 9 |     
10 |     # label the image with the target value
11 |     ax.text(0, 7, str(digits.target[i]))
12 | 


--------------------------------------------------------------------------------