├── .gitignore
├── 01-introduction.ipynb
├── 02-supervised-learning.ipynb
├── 03-unsupervised-learning.ipynb
├── 04-representing-data-feature-engineering.ipynb
├── 05-model-evaluation-and-improvement.ipynb
├── 06-algorithm-chains-and-pipelines.ipynb
├── 07-working-with-text-data.ipynb
├── 08-conclusion.ipynb
├── README.md
├── cover.jpg
├── data
    ├── adult.data
    ├── citibike.csv
    └── ram_price.csv
├── environment.yml
├── images
    ├── 05_gridsearch_overview.png
    ├── api_table.png
    ├── bag_of_words.png
    ├── bag_of_words.svg
    ├── classifier_comparison.png
    ├── dendrogram.png
    ├── iris_petal_sepal.png
    ├── iris_petal_sepal.svg
    ├── overfitting_underfitting_cartoon.png
    ├── overfitting_underfitting_cartoon.svg
    ├── pipeline.png
    └── pipeline.svg
├── mglearn
    ├── __init__.py
    ├── datasets.py
    ├── make_blobs.py
    ├── plot_2d_separator.py
    ├── plot_agglomerative.py
    ├── plot_animal_tree.py
    ├── plot_cross_validation.py
    ├── plot_dbscan.py
    ├── plot_decomposition.py
    ├── plot_grid_search.py
    ├── plot_helpers.py
    ├── plot_improper_preprocessing.py
    ├── plot_interactive_tree.py
    ├── plot_kmeans.py
    ├── plot_kneighbors_regularization.py
    ├── plot_knn_classification.py
    ├── plot_knn_regression.py
    ├── plot_linear_regression.py
    ├── plot_linear_svc_regularization.py
    ├── plot_metrics.py
    ├── plot_nmf.py
    ├── plot_nn_graphs.py
    ├── plot_pca.py
    ├── plot_rbf_svm_parameters.py
    ├── plot_ridge.py
    ├── plot_scaling.py
    ├── plot_tree_nonmonotonous.py
    ├── plots.py
    └── tools.py
└── preamble.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | .ipynb_checkpoints/
4 | 


--------------------------------------------------------------------------------
/08-conclusion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "hide_input": false
  7 |    },
  8 |    "source": [
  9 |     "## Outlook\n",
 10 |     "### Approaching a machine learning problem\n",
 11 |     "### Humans in the loop"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### From prototype to production"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "### Testing production systems"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "### Building your own estimator"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 1,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from sklearn.base import BaseEstimator, TransformerMixin\n",
 42 |     "\n",
 43 |     "class MyTransformer(BaseEstimator, TransformerMixin):\n",
 44 |     "    def __init__(self, first_paramter=1, second_parameter=2):\n",
 45 |     "        # all parameters must be specified in the __init__ function\n",
 46 |     "        self.first_paramter = 1\n",
 47 |     "        self.second_parameter = 2\n",
 48 |     "        \n",
 49 |     "    def fit(self, X, y=None):\n",
 50 |     "        # fit should only take X and y as parameters\n",
 51 |     "        # even if your model is unsupervised, you need to accept a y argument!\n",
 52 |     "        \n",
 53 |     "        # Model fitting code goes here\n",
 54 |     "        print(\"fitting the model right here\")\n",
 55 |     "        # fit returns self\n",
 56 |     "        return self\n",
 57 |     "    \n",
 58 |     "    def transform(self, X):\n",
 59 |     "        # transform takes as parameter only X\n",
 60 |     "        \n",
 61 |     "        # apply some transformation to X:\n",
 62 |     "        X_transformed = X + 1\n",
 63 |     "        return X_transformed"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Where to go from here\n",
 71 |     "#### Theory\n",
 72 |     "#### Other machine learning frameworks and packages\n",
 73 |     "#### Ranking, recommender systems, time series, and other kinds of learning\n",
 74 |     "#### Probabilistic modeling, inference and probabilistic programming"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "#### Neural Networks"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "#### Scaling to larger datasets"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "#### Honing your skills"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "#### Conclusion"
103 |    ]
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "anaconda-cloud": {},
108 |   "kernelspec": {
109 |    "display_name": "Python [conda env:root] *",
110 |    "language": "python",
111 |    "name": "conda-root-py"
112 |   },
113 |   "language_info": {
114 |    "codemirror_mode": {
115 |     "name": "ipython",
116 |     "version": 3
117 |    },
118 |    "file_extension": ".py",
119 |    "mimetype": "text/x-python",
120 |    "name": "python",
121 |    "nbconvert_exporter": "python",
122 |    "pygments_lexer": "ipython3",
123 |    "version": "3.7.6"
124 |   },
125 |   "toc-autonumbering": false
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 4
129 | }
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/amueller/introduction_to_ml_with_python/master)
 2 | 
 3 | # Introduction to Machine Learning with Python
 4 | 
 5 | This repository holds the code for the forthcoming book "Introduction to Machine
 6 | Learning with Python" by [Andreas Mueller](http://amueller.io) and [Sarah Guido](https://twitter.com/sarah_guido).
 7 | You can find details about the book on the [O'Reilly website](http://shop.oreilly.com/product/0636920030515.do).
 8 | 
 9 | The book requires the current stable version of scikit-learn, that is
10 | 0.20.0.  Most of the book can also be used with previous versions of
11 | scikit-learn, though you need to adjust the import for everything from the
12 | ``model_selection`` module, mostly ``cross_val_score``, ``train_test_split``
13 | and ``GridSearchCV``.
14 | 
15 | 
16 | This repository provides the notebooks from which the book is created, together
17 | with the ``mglearn`` library of helper functions to create figures and
18 | datasets.
19 | 
20 | For the curious ones, the cover depicts a [hellbender](https://en.wikipedia.org/wiki/Hellbender).
21 | 
22 | All datasets are included in the repository, with the exception of the aclImdb dataset, which you can download from
23 | the page of [Andrew Maas](http://ai.stanford.edu/~amaas/data/sentiment/). See the book for details.
24 | 
25 | If you get ``ImportError: No module named mglearn`` you can try to install mglearn into your python environment using
26 | the command ``pip install mglearn`` in your terminal or ``!pip install mglearn`` in Jupyter Notebook.
27 | 
28 | 
29 | ## Errata
30 | Please note that the first print of the book is missing the following line when listing the assumed imports:
31 | 
32 | ```python
33 | from IPython.display import display
34 | ```
35 | Please add this line if you see an error involving ``display``.
36 | 
37 | 
38 | The first print of the book used a function called ``plot_group_kfold``.
39 | This has been renamed to ``plot_label_kfold`` because of a rename in
40 | scikit-learn.
41 | 
42 | ## Setup
43 | 
44 | To run the code, you need the packages ``numpy``, ``scipy``, ``scikit-learn``, ``matplotlib``, ``pandas`` and ``pillow``.
45 | Some of the visualizations of decision trees and neural networks structures also require ``graphviz``. The chapter
46 | on text processing also requires ``nltk`` and ``spacy``.
47 | 
48 | The easiest way to set up an environment is by installing [Anaconda](https://www.continuum.io/downloads).
49 | 
50 | ### Installing packages with conda:
51 | If you already have a Python environment set up, and you are using the ``conda`` package manager, you can get all packages by running
52 | 
53 |     conda install numpy scipy scikit-learn matplotlib pandas pillow graphviz python-graphviz
54 | 
55 | For the chapter on text processing you also need to install ``nltk`` and ``spacy``:
56 | 
57 |     conda install nltk spacy
58 | 
59 | 
60 | ### Installing packages with pip
61 | If you already have a Python environment and are using pip to install packages, you need to run
62 | 
63 |     pip install numpy scipy scikit-learn matplotlib pandas pillow graphviz
64 | 
65 | You also need to install the graphiz C-library, which is easiest using a package manager.
66 | If you are using OS X and homebrew, you can ``brew install graphviz``. If you are on Ubuntu or debian, you can ``apt-get install graphviz``.
67 | Installing graphviz on Windows can be tricky and using conda / anaconda is recommended.
68 | For the chapter on text processing you also need to install ``nltk`` and ``spacy``:
69 | 
70 |     pip install nltk spacy
71 | 
72 | ### Downloading English language model
73 | For the text processing chapter, you need to download the English language model for spacy using
74 | 
75 |     python -m spacy download en
76 | 
77 | ## Submitting Errata
78 | 
79 | If you have errata for the (e-)book, please submit them via the [O'Reilly Website](http://www.oreilly.com/catalog/errata.csp?isbn=0636920030515).
80 | You can submit fixes to the code as pull-requests here, but I'd appreciate it if you would also submit them there, as this repository doesn't hold the
81 | "master notebooks".
82 | 
83 | ![cover](cover.jpg)
84 | 


--------------------------------------------------------------------------------
/cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/cover.jpg


--------------------------------------------------------------------------------
/data/ram_price.csv:
--------------------------------------------------------------------------------
  1 | ,date,price
  2 | 0,1957.0,411041792.0
  3 | 1,1959.0,67947725.0
  4 | 2,1960.0,5242880.0
  5 | 3,1965.0,2642412.0
  6 | 4,1970.0,734003.0
  7 | 5,1973.0,399360.0
  8 | 6,1974.0,314573.0
  9 | 7,1975.0,421888.0
 10 | 8,1975.08,180224.0
 11 | 9,1975.25,67584.0
 12 | 10,1975.75,49920.0
 13 | 11,1976.0,40704.0
 14 | 12,1976.17,48960.0
 15 | 13,1976.42,23040.0
 16 | 14,1976.58,32000.0
 17 | 15,1977.08,36800.0
 18 | 16,1978.17,28000.0
 19 | 17,1978.25,29440.0
 20 | 18,1978.33,19200.0
 21 | 19,1978.5,24000.0
 22 | 20,1978.58,16000.0
 23 | 21,1978.75,15200.0
 24 | 22,1979.0,10528.0
 25 | 23,1979.75,6704.0
 26 | 24,1980.0,6480.0
 27 | 25,1981.0,8800.0
 28 | 26,1981.58,4479.0
 29 | 27,1982.0,3520.0
 30 | 28,1982.17,4464.0
 31 | 29,1982.67,1980.0
 32 | 30,1983.0,2396.0
 33 | 31,1983.67,1980.0
 34 | 32,1984.0,1379.0
 35 | 33,1984.58,1331.0
 36 | 34,1985.0,880.0
 37 | 35,1985.33,720.0
 38 | 36,1985.42,550.0
 39 | 37,1985.5,420.0
 40 | 38,1985.58,350.0
 41 | 39,1985.67,300.0
 42 | 40,1985.83,300.0
 43 | 41,1985.92,300.0
 44 | 42,1986.0,300.0
 45 | 43,1986.08,300.0
 46 | 44,1986.17,300.0
 47 | 45,1986.25,300.0
 48 | 46,1986.33,190.0
 49 | 47,1986.42,190.0
 50 | 48,1986.5,190.0
 51 | 49,1986.58,190.0
 52 | 50,1986.67,190.0
 53 | 51,1986.75,190.0
 54 | 52,1986.92,190.0
 55 | 53,1987.0,176.0
 56 | 54,1987.08,176.0
 57 | 55,1987.17,157.0
 58 | 56,1987.25,154.0
 59 | 57,1987.33,154.0
 60 | 58,1987.42,154.0
 61 | 59,1987.5,154.0
 62 | 60,1987.58,154.0
 63 | 61,1987.67,163.0
 64 | 62,1987.75,133.0
 65 | 63,1987.83,163.0
 66 | 64,1987.92,163.0
 67 | 65,1988.0,163.0
 68 | 66,1988.08,182.0
 69 | 67,1988.17,199.0
 70 | 68,1988.33,199.0
 71 | 69,1988.42,199.0
 72 | 70,1988.5,505.0
 73 | 71,1988.58,505.0
 74 | 72,1988.67,505.0
 75 | 73,1988.75,505.0
 76 | 74,1988.83,505.0
 77 | 75,1988.92,505.0
 78 | 76,1989.0,505.0
 79 | 77,1989.08,505.0
 80 | 78,1989.17,505.0
 81 | 79,1989.25,505.0
 82 | 80,1989.42,344.0
 83 | 81,1989.5,197.0
 84 | 82,1989.58,188.0
 85 | 83,1989.67,188.0
 86 | 84,1989.75,128.0
 87 | 85,1989.83,117.0
 88 | 86,1989.92,113.0
 89 | 87,1990.0,106.0
 90 | 88,1990.17,98.3
 91 | 89,1990.33,98.3
 92 | 90,1990.42,89.5
 93 | 91,1990.5,82.8
 94 | 92,1990.58,81.1
 95 | 93,1990.67,71.5
 96 | 94,1990.75,59.0
 97 | 95,1990.83,51.0
 98 | 96,1990.92,45.5
 99 | 97,1991.0,44.5
100 | 98,1991.08,44.5
101 | 99,1991.17,45.0
102 | 100,1991.25,45.0
103 | 101,1991.33,45.0
104 | 102,1991.42,43.8
105 | 103,1991.5,43.8
106 | 104,1991.58,41.3
107 | 105,1991.67,46.3
108 | 106,1991.75,45.0
109 | 107,1991.83,39.8
110 | 108,1991.92,39.8
111 | 109,1992.0,36.3
112 | 110,1992.08,36.3
113 | 111,1992.17,36.3
114 | 112,1992.25,34.8
115 | 113,1992.33,30.0
116 | 114,1992.42,32.5
117 | 115,1992.5,33.5
118 | 116,1992.58,31.0
119 | 117,1992.67,27.5
120 | 118,1992.75,26.3
121 | 119,1992.83,26.3
122 | 120,1992.92,26.3
123 | 121,1993.0,33.1
124 | 122,1993.08,27.5
125 | 123,1993.17,27.5
126 | 124,1993.25,27.5
127 | 125,1993.33,27.5
128 | 126,1993.42,30.0
129 | 127,1993.5,30.0
130 | 128,1993.58,30.0
131 | 129,1993.67,30.0
132 | 130,1993.75,36.0
133 | 131,1993.83,39.8
134 | 132,1993.92,35.8
135 | 133,1994.0,35.8
136 | 134,1994.08,35.8
137 | 135,1994.17,36.0
138 | 136,1994.25,37.3
139 | 137,1994.33,37.3
140 | 138,1994.42,37.3
141 | 139,1994.5,38.5
142 | 140,1994.58,37.0
143 | 141,1994.67,34.0
144 | 142,1994.75,33.5
145 | 143,1994.83,32.3
146 | 144,1994.92,32.3
147 | 145,1995.0,32.3
148 | 146,1995.08,32.0
149 | 147,1995.17,32.0
150 | 148,1995.25,31.2
151 | 149,1995.33,31.2
152 | 150,1995.42,31.1
153 | 151,1995.5,31.2
154 | 152,1995.58,30.6
155 | 153,1995.67,33.1
156 | 154,1995.75,33.1
157 | 155,1995.83,30.9
158 | 156,1995.92,30.9
159 | 157,1996.0,29.9
160 | 158,1996.08,28.8
161 | 159,1996.17,26.1
162 | 160,1996.25,24.7
163 | 161,1996.33,17.2
164 | 162,1996.42,14.9
165 | 163,1996.5,11.3
166 | 164,1996.58,9.06
167 | 165,1996.67,8.44
168 | 166,1996.75,8.0
169 | 167,1996.83,5.25
170 | 168,1996.92,5.25
171 | 169,1997.0,4.63
172 | 170,1997.08,3.63
173 | 171,1997.17,3.0
174 | 172,1997.25,3.0
175 | 173,1997.33,3.0
176 | 174,1997.42,3.69
177 | 175,1997.5,4.0
178 | 176,1997.58,4.13
179 | 177,1997.67,3.63
180 | 178,1997.75,3.41
181 | 179,1997.83,3.25
182 | 180,1997.92,2.16
183 | 181,1998.0,2.16
184 | 182,1998.08,0.91
185 | 183,1998.17,0.97
186 | 184,1998.25,1.22
187 | 185,1998.33,1.19
188 | 186,1998.42,0.97
189 | 187,1998.58,1.03
190 | 188,1998.67,0.97
191 | 189,1998.75,1.16
192 | 190,1998.83,0.84
193 | 191,1998.92,0.84
194 | 192,1999.08,1.44
195 | 193,1999.13,0.84
196 | 194,1999.17,1.25
197 | 195,1999.25,1.25
198 | 196,1999.33,0.86
199 | 197,1999.5,0.78
200 | 198,1999.67,0.87
201 | 199,1999.75,1.04
202 | 200,1999.83,1.34
203 | 201,1999.92,2.35
204 | 202,2000.0,1.56
205 | 203,2000.08,1.48
206 | 204,2000.17,1.08
207 | 205,2000.25,0.84
208 | 206,2000.33,0.7
209 | 207,2000.42,0.9
210 | 208,2000.5,0.77
211 | 209,2000.58,0.84
212 | 210,2000.67,1.07
213 | 211,2000.75,1.12
214 | 212,2000.83,1.12
215 | 213,2000.92,0.9
216 | 214,2001.0,0.75
217 | 215,2001.08,0.464
218 | 216,2001.17,0.464
219 | 217,2001.25,0.383
220 | 218,2001.33,0.387
221 | 219,2001.42,0.305
222 | 220,2001.5,0.352
223 | 221,2001.5,0.27
224 | 222,2001.58,0.191
225 | 223,2001.67,0.191
226 | 224,2001.75,0.169
227 | 225,2001.77,0.148
228 | 226,2002.08,0.134
229 | 227,2002.08,0.207
230 | 228,2002.25,0.193
231 | 229,2002.33,0.193
232 | 230,2002.42,0.33
233 | 231,2002.58,0.193
234 | 232,2002.75,0.193
235 | 233,2003.17,0.176
236 | 234,2003.25,0.076
237 | 235,2003.33,0.126
238 | 236,2003.42,0.115
239 | 237,2003.5,0.133
240 | 238,2003.58,0.129
241 | 239,2003.67,0.143
242 | 240,2003.75,0.148
243 | 241,2003.83,0.16
244 | 242,2003.99,0.166
245 | 243,2004.0,0.174
246 | 244,2004.08,0.148
247 | 245,2004.17,0.146
248 | 246,2004.33,0.156
249 | 247,2004.42,0.203
250 | 248,2004.5,0.176
251 | 249,2005.25,0.185
252 | 250,2005.42,0.149
253 | 251,2005.83,0.116
254 | 252,2005.92,0.185
255 | 253,2006.17,0.112
256 | 254,2006.33,0.073
257 | 255,2006.5,0.082
258 | 256,2006.67,0.073
259 | 257,2006.75,0.088
260 | 258,2006.83,0.098
261 | 259,2006.99,0.092
262 | 260,2007.0,0.082
263 | 261,2007.08,0.078
264 | 262,2007.17,0.066
265 | 263,2007.33,0.0464
266 | 264,2007.5,0.0386
267 | 265,2007.67,0.0351
268 | 266,2007.75,0.0322
269 | 267,2007.83,0.0244
270 | 268,2007.92,0.0244
271 | 269,2008.0,0.0232
272 | 270,2008.08,0.022
273 | 271,2008.33,0.022
274 | 272,2008.5,0.0207
275 | 273,2008.58,0.0176
276 | 274,2008.67,0.0146
277 | 275,2008.83,0.011
278 | 276,2008.92,0.0098
279 | 277,2009.0,0.0098
280 | 278,2009.08,0.0107
281 | 279,2009.25,0.0105
282 | 280,2009.42,0.0115
283 | 281,2009.5,0.011
284 | 282,2009.58,0.0127
285 | 283,2009.75,0.0183
286 | 284,2009.92,0.0205
287 | 285,2010.0,0.019
288 | 286,2010.08,0.0202
289 | 287,2010.17,0.0195
290 | 288,2010.33,0.0242
291 | 289,2010.5,0.021
292 | 290,2010.58,0.022
293 | 291,2010.75,0.0171
294 | 292,2010.83,0.0146
295 | 293,2010.92,0.0122
296 | 294,2011.0,0.01
297 | 295,2011.08,0.0103
298 | 296,2011.33,0.01
299 | 297,2011.42,0.0085
300 | 298,2011.67,0.0054
301 | 299,2011.75,0.0051
302 | 300,2012.0,0.0049
303 | 301,2012.08,0.0049
304 | 302,2012.25,0.005
305 | 303,2012.33,0.0049
306 | 304,2012.58,0.0048
307 | 305,2012.67,0.004
308 | 306,2012.83,0.0037
309 | 307,2013.0,0.0043
310 | 308,2013.08,0.0054
311 | 309,2013.33,0.0067
312 | 310,2013.42,0.0061
313 | 311,2013.58,0.0073
314 | 312,2013.67,0.0065
315 | 313,2013.75,0.0082
316 | 314,2013.83,0.0085
317 | 315,2013.92,0.0079
318 | 316,2014.08,0.0095
319 | 317,2014.17,0.0079
320 | 318,2014.25,0.0073
321 | 319,2014.42,0.0079
322 | 320,2014.58,0.0085
323 | 321,2014.67,0.0085
324 | 322,2014.83,0.0085
325 | 323,2015.0,0.0078
326 | 324,2015.08,0.0073
327 | 325,2015.25,0.0061
328 | 326,2015.33,0.0056
329 | 327,2015.5,0.0049
330 | 328,2015.58,0.0045
331 | 329,2015.67,0.0043
332 | 330,2015.75,0.0042
333 | 331,2015.83,0.0038
334 | 332,2015.92,0.0037
335 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: python-ml
 2 | dependencies:
 3 |   - numpy
 4 |   - scipy
 5 |   - scikit-learn
 6 |   - matplotlib
 7 |   - pandas
 8 |   - pillow
 9 |   - graphviz
10 |   - python-graphviz
11 |   - imageio
12 |   - joblib
13 | 


--------------------------------------------------------------------------------
/images/05_gridsearch_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/05_gridsearch_overview.png


--------------------------------------------------------------------------------
/images/api_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/api_table.png


--------------------------------------------------------------------------------
/images/bag_of_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/bag_of_words.png


--------------------------------------------------------------------------------
/images/classifier_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/classifier_comparison.png


--------------------------------------------------------------------------------
/images/dendrogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/dendrogram.png


--------------------------------------------------------------------------------
/images/iris_petal_sepal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/iris_petal_sepal.png


--------------------------------------------------------------------------------
/images/overfitting_underfitting_cartoon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/overfitting_underfitting_cartoon.png


--------------------------------------------------------------------------------
/images/overfitting_underfitting_cartoon.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:ooo="http://xml.openoffice.org/svg/export"
  4 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  5 |    xmlns:cc="http://creativecommons.org/ns#"
  6 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  7 |    xmlns:svg="http://www.w3.org/2000/svg"
  8 |    xmlns="http://www.w3.org/2000/svg"
  9 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 10 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 11 |    version="1.2"
 12 |    width="244.0087mm"
 13 |    height="140.81184mm"
 14 |    viewBox="0 0 24400.87 14081.184"
 15 |    preserveAspectRatio="xMidYMid"
 16 |    clip-path="url(#presentation_clip_path)"
 17 |    xml:space="preserve"
 18 |    id="svg2"
 19 |    inkscape:version="0.91 r13725"
 20 |    sodipodi:docname="overfitting_underfitting_cartoon.svg"
 21 |    style="fill-rule:evenodd;stroke-width:28.22200012;stroke-linejoin:round"><metadata
 22 |      id="metadata304"><rdf:RDF><cc:Work
 23 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 24 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><sodipodi:namedview
 25 |      pagecolor="#ffffff"
 26 |      bordercolor="#666666"
 27 |      borderopacity="1"
 28 |      objecttolerance="10"
 29 |      gridtolerance="10"
 30 |      guidetolerance="10"
 31 |      inkscape:pageopacity="0"
 32 |      inkscape:pageshadow="2"
 33 |      inkscape:window-width="1867"
 34 |      inkscape:window-height="1056"
 35 |      id="namedview302"
 36 |      showgrid="false"
 37 |      fit-margin-top="0"
 38 |      fit-margin-left="0"
 39 |      fit-margin-right="0"
 40 |      fit-margin-bottom="0"
 41 |      inkscape:zoom="1.1974286"
 42 |      inkscape:cx="430.55117"
 43 |      inkscape:cy="278.93389"
 44 |      inkscape:window-x="53"
 45 |      inkscape:window-y="24"
 46 |      inkscape:window-maximized="1"
 47 |      inkscape:current-layer="svg2" /><defs
 48 |      class="ClipPathGroup"
 49 |      id="defs4"><clipPath
 50 |        id="presentation_clip_path"
 51 |        clipPathUnits="userSpaceOnUse"><rect
 52 |          x="0"
 53 |          y="0"
 54 |          width="28000"
 55 |          height="21000"
 56 |          id="rect7" /></clipPath></defs><defs
 57 |      id="defs9"><font
 58 |        id="EmbeddedFont_1"
 59 |        horiz-adv-x="2048"
 60 |        horiz-origin-x="0"
 61 |        horiz-origin-y="0"
 62 |        vert-origin-x="45"
 63 |        vert-origin-y="90"
 64 |        vert-adv-y="90"><font-face
 65 |          font-family="Liberation Sans embedded"
 66 |          units-per-em="2048"
 67 |          font-weight="normal"
 68 |          font-style="normal"
 69 |          ascent="1852"
 70 |          descent="450"
 71 |          id="font-face12" /><missing-glyph
 72 |          horiz-adv-x="2048"
 73 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
 74 |          id="missing-glyph14" /><glyph
 75 |          unicode="z"
 76 |          horiz-adv-x="928"
 77 |          d="M 49,0 L 49,137 710,943 89,943 89,1082 913,1082 913,945 251,139 950,139 950,0 49,0 Z"
 78 |          id="glyph16" /><glyph
 79 |          unicode="y"
 80 |          horiz-adv-x="1033"
 81 |          d="M 604,1 C 579,-64 553,-123 527,-175 500,-227 471,-272 438,-309 405,-346 369,-374 329,-394 289,-413 243,-423 191,-423 168,-423 147,-423 128,-423 109,-423 88,-420 67,-414 L 67,-279 C 80,-282 94,-284 110,-284 126,-284 140,-284 151,-284 204,-284 253,-264 298,-225 343,-186 383,-123 417,-37 L 434,6 5,1082 197,1082 425,484 C 432,466 440,442 451,412 461,382 471,352 482,322 492,292 501,265 509,241 517,217 522,202 523,196 525,203 530,218 538,240 545,261 554,285 564,312 573,339 583,366 593,393 603,420 611,444 618,464 L 830,1082 1020,1082 604,1 Z"
 82 |          id="glyph18" /><glyph
 83 |          unicode="x"
 84 |          horiz-adv-x="1006"
 85 |          d="M 801,0 L 510,444 217,0 23,0 408,556 41,1082 240,1082 510,661 778,1082 979,1082 612,558 1002,0 801,0 Z"
 86 |          id="glyph20" /><glyph
 87 |          unicode="w"
 88 |          horiz-adv-x="1509"
 89 |          d="M 1174,0 L 965,0 792,698 C 787,716 781,738 776,765 770,792 764,818 759,843 752,872 746,903 740,934 734,904 728,874 721,845 716,820 710,793 704,766 697,739 691,715 686,694 L 508,0 300,0 -3,1082 175,1082 358,347 C 363,332 367,313 372,291 377,268 381,246 386,225 391,200 396,175 401,149 406,174 412,199 418,223 423,244 429,265 434,286 439,307 444,325 448,339 L 644,1082 837,1082 1026,339 C 1031,322 1036,302 1041,280 1046,258 1051,237 1056,218 1061,195 1067,172 1072,149 1077,174 1083,199 1088,223 1093,244 1098,265 1103,288 1108,310 1112,330 1117,347 L 1308,1082 1484,1082 1174,0 Z"
 90 |          id="glyph22" /><glyph
 91 |          unicode="v"
 92 |          horiz-adv-x="1033"
 93 |          d="M 613,0 L 400,0 7,1082 199,1082 437,378 C 442,363 447,346 454,325 460,304 466,282 473,259 480,236 486,215 492,194 497,173 502,155 506,141 510,155 515,173 522,194 528,215 534,236 541,258 548,280 555,302 562,323 569,344 575,361 580,376 L 826,1082 1017,1082 613,0 Z"
 94 |          id="glyph24" /><glyph
 95 |          unicode="u"
 96 |          horiz-adv-x="874"
 97 |          d="M 314,1082 L 314,396 C 314,343 318,299 326,264 333,229 346,200 363,179 380,157 403,142 432,133 460,124 495,119 537,119 580,119 618,127 653,142 687,157 716,178 741,207 765,235 784,270 797,312 810,353 817,401 817,455 L 817,1082 997,1082 997,231 C 997,208 997,185 998,160 998,135 998,111 999,89 1000,66 1000,47 1001,31 1002,15 1002,5 1003,0 L 833,0 C 832,3 832,12 831,27 830,42 830,59 829,78 828,97 827,116 826,136 825,155 825,172 825,185 L 822,185 C 805,154 786,125 765,100 744,75 720,53 693,36 666,18 634,4 599,-6 564,-15 523,-20 476,-20 416,-20 364,-13 321,2 278,17 242,39 214,70 186,101 166,140 153,188 140,236 133,294 133,361 L 133,1082 314,1082 Z"
 98 |          id="glyph26" /><glyph
 99 |          unicode="t"
100 |          horiz-adv-x="531"
101 |          d="M 554,8 C 527,1 499,-5 471,-10 442,-14 409,-16 372,-16 228,-16 156,66 156,229 L 156,951 31,951 31,1082 163,1082 216,1324 336,1324 336,1082 536,1082 536,951 336,951 336,268 C 336,216 345,180 362,159 379,138 408,127 450,127 467,127 484,128 501,131 517,134 535,137 554,141 L 554,8 Z"
102 |          id="glyph28" /><glyph
103 |          unicode="s"
104 |          horiz-adv-x="901"
105 |          d="M 950,299 C 950,248 940,203 921,164 901,124 872,91 835,64 798,37 752,16 698,2 643,-13 581,-20 511,-20 448,-20 392,-15 342,-6 291,4 247,20 209,41 171,62 139,91 114,126 88,161 69,203 57,254 L 216,285 C 231,227 263,185 311,158 359,131 426,117 511,117 550,117 585,120 618,125 650,130 678,140 701,153 724,166 743,183 756,205 769,226 775,253 775,285 775,318 767,345 752,366 737,387 715,404 688,418 661,432 628,444 589,455 550,465 507,476 460,489 417,500 374,513 331,527 288,541 250,560 216,583 181,606 153,634 132,668 111,702 100,745 100,796 100,895 135,970 206,1022 276,1073 378,1099 513,1099 632,1099 727,1078 798,1036 868,994 912,927 931,834 L 769,814 C 763,842 752,866 736,885 720,904 701,919 678,931 655,942 630,951 602,956 573,961 544,963 513,963 432,963 372,951 333,926 294,901 275,864 275,814 275,785 282,761 297,742 311,723 331,707 357,694 382,681 413,669 449,660 485,650 525,640 568,629 597,622 626,614 656,606 686,597 715,587 744,576 772,564 799,550 824,535 849,519 870,500 889,478 908,456 923,430 934,401 945,372 950,338 950,299 Z"
106 |          id="glyph30" /><glyph
107 |          unicode="r"
108 |          horiz-adv-x="530"
109 |          d="M 142,0 L 142,830 C 142,853 142,876 142,900 141,923 141,946 140,968 139,990 139,1011 138,1030 137,1049 137,1067 136,1082 L 306,1082 C 307,1067 308,1049 309,1030 310,1010 311,990 312,969 313,948 313,929 314,910 314,891 314,874 314,861 L 318,861 C 331,902 344,938 359,969 373,999 390,1024 409,1044 428,1063 451,1078 478,1088 505,1097 537,1102 575,1102 590,1102 604,1101 617,1099 630,1096 641,1094 648,1092 L 648,927 C 636,930 622,933 606,935 590,936 572,937 552,937 511,937 476,928 447,909 418,890 394,865 376,832 357,799 344,759 335,714 326,668 322,618 322,564 L 322,0 142,0 Z"
110 |          id="glyph32" /><glyph
111 |          unicode="p"
112 |          horiz-adv-x="953"
113 |          d="M 1053,546 C 1053,464 1046,388 1033,319 1020,250 998,190 967,140 936,90 895,51 844,23 793,-6 730,-20 655,-20 578,-20 510,-5 452,24 394,53 350,101 319,168 L 314,168 C 315,167 315,161 316,150 316,139 316,126 317,110 317,94 317,76 318,57 318,37 318,17 318,-2 L 318,-425 138,-425 138,861 C 138,887 138,912 138,936 137,960 137,982 136,1002 135,1021 135,1038 134,1052 133,1066 133,1076 132,1082 L 306,1082 C 307,1080 308,1073 309,1061 310,1049 311,1035 312,1018 313,1001 314,982 315,963 316,944 316,925 316,908 L 320,908 C 337,943 356,972 377,997 398,1021 423,1041 450,1057 477,1072 508,1084 542,1091 575,1098 613,1101 655,1101 730,1101 793,1088 844,1061 895,1034 936,997 967,949 998,900 1020,842 1033,774 1046,705 1053,629 1053,546 Z M 864,542 C 864,609 860,668 852,720 844,772 830,816 811,852 791,888 765,915 732,934 699,953 658,962 609,962 569,962 531,956 496,945 461,934 430,912 404,880 377,848 356,804 341,748 326,691 318,618 318,528 318,451 324,387 337,334 350,281 368,238 393,205 417,172 447,149 483,135 519,120 560,113 607,113 657,113 699,123 732,142 765,161 791,189 811,226 830,263 844,308 852,361 860,414 864,474 864,542 Z"
114 |          id="glyph34" /><glyph
115 |          unicode="o"
116 |          horiz-adv-x="980"
117 |          d="M 1053,542 C 1053,353 1011,212 928,119 845,26 724,-20 565,-20 490,-20 422,-9 363,14 304,37 254,71 213,118 172,165 140,223 119,294 97,364 86,447 86,542 86,915 248,1102 571,1102 655,1102 728,1090 789,1067 850,1044 900,1009 939,962 978,915 1006,857 1025,787 1044,717 1053,635 1053,542 Z M 864,542 C 864,626 858,695 845,750 832,805 813,848 788,881 763,914 732,937 696,950 660,963 619,969 574,969 528,969 487,962 450,949 413,935 381,912 355,879 329,846 309,802 296,747 282,692 275,624 275,542 275,458 282,389 297,334 312,279 332,235 358,202 383,169 414,146 449,133 484,120 522,113 563,113 609,113 651,120 688,133 725,146 757,168 783,201 809,234 829,278 843,333 857,388 864,458 864,542 Z"
118 |          id="glyph36" /><glyph
119 |          unicode="n"
120 |          horiz-adv-x="874"
121 |          d="M 825,0 L 825,686 C 825,739 821,783 814,818 806,853 793,882 776,904 759,925 736,941 708,950 679,959 644,963 602,963 559,963 521,956 487,941 452,926 423,904 399,876 374,847 355,812 342,771 329,729 322,681 322,627 L 322,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 334,928 353,957 374,982 395,1007 419,1029 446,1047 473,1064 505,1078 540,1088 575,1097 616,1102 663,1102 723,1102 775,1095 818,1080 861,1065 897,1043 925,1012 953,981 974,942 987,894 1000,845 1006,788 1006,721 L 1006,0 825,0 Z"
122 |          id="glyph38" /><glyph
123 |          unicode="m"
124 |          horiz-adv-x="1457"
125 |          d="M 768,0 L 768,686 C 768,739 765,783 758,818 751,853 740,882 725,904 709,925 688,941 663,950 638,959 607,963 570,963 532,963 498,956 467,941 436,926 410,904 389,876 367,847 350,812 339,771 327,729 321,681 321,627 L 321,0 142,0 142,851 C 142,874 142,898 142,923 141,948 141,971 140,994 139,1016 139,1035 138,1051 137,1067 137,1077 136,1082 L 306,1082 C 307,1079 307,1070 308,1055 309,1040 310,1024 311,1005 312,986 312,966 313,947 314,927 314,910 314,897 L 317,897 C 333,928 350,957 369,982 388,1007 410,1029 435,1047 460,1064 488,1078 521,1088 553,1097 590,1102 633,1102 715,1102 780,1086 828,1053 875,1020 908,968 927,897 L 930,897 C 946,928 964,957 984,982 1004,1007 1027,1029 1054,1047 1081,1064 1111,1078 1144,1088 1177,1097 1215,1102 1258,1102 1313,1102 1360,1095 1400,1080 1439,1065 1472,1043 1497,1012 1522,981 1541,942 1553,894 1565,845 1571,788 1571,721 L 1571,0 1393,0 1393,686 C 1393,739 1390,783 1383,818 1376,853 1365,882 1350,904 1334,925 1313,941 1288,950 1263,959 1232,963 1195,963 1157,963 1123,956 1092,942 1061,927 1035,906 1014,878 992,850 975,815 964,773 952,731 946,682 946,627 L 946,0 768,0 Z"
126 |          id="glyph40" /><glyph
127 |          unicode="l"
128 |          horiz-adv-x="187"
129 |          d="M 138,0 L 138,1484 318,1484 318,0 138,0 Z"
130 |          id="glyph42" /><glyph
131 |          unicode="k"
132 |          horiz-adv-x="901"
133 |          d="M 816,0 L 450,494 318,385 318,0 138,0 138,1484 318,1484 318,557 793,1082 1004,1082 565,617 1027,0 816,0 Z"
134 |          id="glyph44" /><glyph
135 |          unicode="i"
136 |          horiz-adv-x="187"
137 |          d="M 137,1312 L 137,1484 317,1484 317,1312 137,1312 Z M 137,0 L 137,1082 317,1082 317,0 137,0 Z"
138 |          id="glyph46" /><glyph
139 |          unicode="h"
140 |          horiz-adv-x="874"
141 |          d="M 317,897 C 337,934 359,965 382,991 405,1016 431,1037 459,1054 487,1071 518,1083 551,1091 584,1098 622,1102 663,1102 732,1102 789,1093 834,1074 878,1055 913,1029 939,996 964,962 982,922 992,875 1001,828 1006,777 1006,721 L 1006,0 825,0 825,686 C 825,732 822,772 817,807 811,842 800,871 784,894 768,917 745,934 716,946 687,957 649,963 602,963 559,963 521,955 487,940 452,925 423,903 399,875 374,847 355,813 342,773 329,733 322,688 322,638 L 322,0 142,0 142,1484 322,1484 322,1098 C 322,1076 322,1054 321,1032 320,1010 320,990 319,971 318,952 317,937 316,924 315,911 315,902 314,897 L 317,897 Z"
142 |          id="glyph48" /><glyph
143 |          unicode="g"
144 |          horiz-adv-x="927"
145 |          d="M 548,-425 C 486,-425 431,-419 383,-406 335,-393 294,-375 260,-352 226,-328 198,-300 177,-267 156,-234 140,-198 131,-158 L 312,-132 C 324,-182 351,-220 392,-248 433,-274 486,-288 553,-288 594,-288 631,-282 664,-271 697,-260 726,-241 749,-217 772,-191 790,-159 803,-119 816,-79 822,-30 822,27 L 822,201 820,201 C 807,174 790,148 771,123 751,98 727,75 699,56 670,37 637,21 600,10 563,-2 520,-8 472,-8 403,-8 345,4 296,27 247,50 207,84 176,130 145,176 122,233 108,302 93,370 86,449 86,539 86,626 93,704 108,773 122,842 145,901 178,950 210,998 252,1035 304,1061 355,1086 418,1099 492,1099 569,1099 635,1082 692,1047 748,1012 791,962 822,897 L 824,897 C 824,914 825,932 826,953 827,974 828,993 829,1012 830,1030 831,1046 832,1059 833,1072 835,1080 836,1082 L 1007,1082 C 1006,1076 1006,1066 1005,1052 1004,1037 1004,1020 1003,1000 1002,980 1002,958 1002,934 1001,909 1001,884 1001,858 L 1001,31 C 1001,-120 964,-234 890,-311 815,-387 701,-425 548,-425 Z M 822,541 C 822,616 814,681 798,735 781,788 760,832 733,866 706,900 676,925 642,941 607,957 572,965 536,965 490,965 451,957 418,941 385,925 357,900 336,866 314,831 298,787 288,734 277,680 272,616 272,541 272,463 277,398 288,345 298,292 314,249 335,216 356,183 383,160 416,146 449,132 488,125 533,125 569,125 604,133 639,148 673,163 704,188 731,221 758,254 780,297 797,350 814,403 822,466 822,541 Z"
146 |          id="glyph50" /><glyph
147 |          unicode="f"
148 |          horiz-adv-x="557"
149 |          d="M 361,951 L 361,0 181,0 181,951 29,951 29,1082 181,1082 181,1204 C 181,1243 185,1280 192,1314 199,1347 213,1377 233,1402 252,1427 279,1446 313,1461 347,1475 391,1482 445,1482 466,1482 489,1481 512,1479 535,1477 555,1474 572,1470 L 572,1333 C 561,1335 548,1337 533,1339 518,1340 504,1341 492,1341 465,1341 444,1337 427,1330 410,1323 396,1312 387,1299 377,1285 370,1268 367,1248 363,1228 361,1205 361,1179 L 361,1082 572,1082 572,951 361,951 Z"
150 |          id="glyph52" /><glyph
151 |          unicode="e"
152 |          horiz-adv-x="980"
153 |          d="M 276,503 C 276,446 282,394 294,347 305,299 323,258 348,224 372,189 403,163 441,144 479,125 525,115 578,115 656,115 719,131 766,162 813,193 844,233 861,281 L 1019,236 C 1008,206 992,176 972,146 951,115 924,88 890,64 856,39 814,19 763,4 712,-12 650,-20 578,-20 418,-20 296,28 213,123 129,218 87,360 87,548 87,649 100,735 125,806 150,876 185,933 229,977 273,1021 324,1053 383,1073 442,1092 504,1102 571,1102 662,1102 738,1087 799,1058 860,1029 909,988 946,937 983,885 1009,824 1025,754 1040,684 1048,608 1048,527 L 1048,503 276,503 Z M 862,641 C 852,755 823,838 775,891 727,943 658,969 568,969 538,969 507,964 474,955 441,945 410,928 382,903 354,878 330,845 311,803 292,760 281,706 278,641 L 862,641 Z"
154 |          id="glyph54" /><glyph
155 |          unicode="d"
156 |          horiz-adv-x="927"
157 |          d="M 821,174 C 788,105 744,55 689,25 634,-5 565,-20 484,-20 347,-20 247,26 183,118 118,210 86,349 86,536 86,913 219,1102 484,1102 566,1102 634,1087 689,1057 744,1027 788,979 821,914 L 823,914 C 823,921 823,931 823,946 822,960 822,975 822,991 821,1006 821,1021 821,1035 821,1049 821,1059 821,1065 L 821,1484 1001,1484 1001,223 C 1001,197 1001,172 1002,148 1002,124 1002,102 1003,82 1004,62 1004,45 1005,31 1006,16 1006,6 1007,0 L 835,0 C 834,7 833,16 832,29 831,41 830,55 829,71 828,87 827,104 826,122 825,139 825,157 825,174 L 821,174 Z M 275,542 C 275,467 280,403 289,350 298,297 313,253 334,219 355,184 381,159 413,143 445,127 484,119 530,119 577,119 619,127 656,142 692,157 722,182 747,217 771,251 789,296 802,351 815,406 821,474 821,554 821,631 815,696 802,749 789,802 771,844 746,877 721,910 691,933 656,948 620,962 579,969 532,969 488,969 450,961 418,946 386,931 359,906 338,872 317,838 301,794 291,740 280,685 275,619 275,542 Z"
158 |          id="glyph56" /><glyph
159 |          unicode="c"
160 |          horiz-adv-x="901"
161 |          d="M 275,546 C 275,484 280,427 289,375 298,323 313,278 334,241 355,203 384,174 419,153 454,132 497,122 548,122 612,122 666,139 709,173 752,206 778,258 788,328 L 970,328 C 964,283 951,239 931,197 911,155 884,118 850,86 815,54 773,28 724,9 675,-10 618,-20 553,-20 468,-20 396,-6 337,23 278,52 230,91 193,142 156,192 129,251 112,320 95,388 87,462 87,542 87,615 93,679 105,735 117,790 134,839 156,881 177,922 203,957 232,986 261,1014 293,1037 328,1054 362,1071 398,1083 436,1091 474,1098 512,1102 551,1102 612,1102 666,1094 713,1077 760,1060 801,1038 836,1009 870,980 898,945 919,906 940,867 955,824 964,779 L 779,765 C 770,825 746,873 708,908 670,943 616,961 546,961 495,961 452,953 418,936 383,919 355,893 334,859 313,824 298,781 289,729 280,677 275,616 275,546 Z"
162 |          id="glyph58" /><glyph
163 |          unicode="a"
164 |          horiz-adv-x="1060"
165 |          d="M 414,-20 C 305,-20 224,9 169,66 114,124 87,203 87,303 87,375 101,434 128,480 155,526 190,562 234,588 277,614 327,632 383,642 439,652 496,657 554,657 L 797,657 797,717 C 797,762 792,800 783,832 774,863 759,889 740,908 721,928 697,942 668,951 639,960 604,965 565,965 530,965 499,963 471,958 443,953 419,944 398,931 377,918 361,900 348,878 335,855 327,827 323,793 L 135,810 C 142,853 154,892 173,928 192,963 218,994 253,1020 287,1046 330,1066 382,1081 433,1095 496,1102 569,1102 705,1102 807,1071 876,1009 945,946 979,856 979,738 L 979,272 C 979,219 986,179 1000,152 1014,125 1041,111 1080,111 1090,111 1100,112 1110,113 1120,114 1130,116 1139,118 L 1139,6 C 1116,1 1094,-3 1072,-6 1049,-9 1025,-10 1000,-10 966,-10 937,-5 913,4 888,13 868,26 853,45 838,63 826,86 818,113 810,140 805,171 803,207 L 797,207 C 778,172 757,141 734,113 711,85 684,61 653,42 622,22 588,7 549,-4 510,-15 465,-20 414,-20 Z M 455,115 C 512,115 563,125 606,146 649,167 684,194 713,226 741,259 762,294 776,332 790,371 797,408 797,443 L 797,531 600,531 C 556,531 514,528 475,522 435,517 400,506 370,489 340,472 316,449 299,418 281,388 272,349 272,300 272,241 288,195 320,163 351,131 396,115 455,115 Z"
166 |          id="glyph60" /><glyph
167 |          unicode="U"
168 |          horiz-adv-x="1192"
169 |          d="M 731,-20 C 654,-20 580,-10 511,11 442,32 381,64 329,108 276,151 235,207 204,274 173,341 158,420 158,512 L 158,1409 349,1409 349,528 C 349,457 359,396 378,347 397,297 423,256 457,225 491,194 531,171 578,157 624,142 675,135 730,135 785,135 836,142 885,157 934,172 976,195 1013,227 1050,259 1079,301 1100,353 1121,404 1131,467 1131,541 L 1131,1409 1321,1409 1321,530 C 1321,436 1306,355 1275,286 1244,217 1201,159 1148,114 1095,69 1032,35 961,13 889,-9 812,-20 731,-20 Z"
170 |          id="glyph62" /><glyph
171 |          unicode="T"
172 |          horiz-adv-x="1192"
173 |          d="M 720,1253 L 720,0 530,0 530,1253 46,1253 46,1409 1204,1409 1204,1253 720,1253 Z"
174 |          id="glyph64" /><glyph
175 |          unicode="S"
176 |          horiz-adv-x="1192"
177 |          d="M 1272,389 C 1272,330 1261,275 1238,225 1215,175 1179,132 1131,96 1083,59 1023,31 950,11 877,-10 790,-20 690,-20 515,-20 378,11 280,72 182,133 120,222 93,338 L 278,375 C 287,338 302,305 321,275 340,245 367,219 400,198 433,176 473,159 522,147 571,135 629,129 697,129 754,129 806,134 853,144 900,153 941,168 975,188 1009,208 1036,234 1055,266 1074,297 1083,335 1083,379 1083,425 1073,462 1052,491 1031,520 1001,543 963,562 925,581 880,596 827,609 774,622 716,635 652,650 613,659 573,668 534,679 494,689 456,701 420,716 383,730 349,747 317,766 285,785 257,809 234,836 211,863 192,894 179,930 166,965 159,1006 159,1053 159,1120 173,1177 200,1225 227,1272 264,1311 312,1342 360,1373 417,1395 482,1409 547,1423 618,1430 694,1430 781,1430 856,1423 918,1410 980,1396 1032,1375 1075,1348 1118,1321 1152,1287 1178,1247 1203,1206 1224,1159 1239,1106 L 1051,1073 C 1042,1107 1028,1137 1011,1164 993,1191 970,1213 941,1231 912,1249 878,1263 837,1272 796,1281 747,1286 692,1286 627,1286 572,1280 528,1269 483,1257 448,1241 421,1221 394,1201 374,1178 363,1151 351,1124 345,1094 345,1063 345,1021 356,987 377,960 398,933 426,910 462,892 498,874 540,859 587,847 634,835 685,823 738,811 781,801 825,791 868,781 911,770 952,758 991,744 1030,729 1067,712 1102,693 1136,674 1166,650 1191,622 1216,594 1236,561 1251,523 1265,485 1272,440 1272,389 Z"
178 |          id="glyph66" /><glyph
179 |          unicode="O"
180 |          horiz-adv-x="1430"
181 |          d="M 1495,711 C 1495,601 1479,501 1448,411 1416,321 1370,244 1310,180 1250,116 1177,67 1090,32 1003,-3 905,-20 795,-20 679,-20 577,-2 490,35 403,71 330,122 272,187 214,252 170,329 141,418 112,507 97,605 97,711 97,821 112,920 143,1009 174,1098 219,1173 278,1236 337,1298 411,1346 498,1380 585,1413 684,1430 797,1430 909,1430 1009,1413 1096,1379 1183,1345 1256,1297 1315,1234 1374,1171 1418,1096 1449,1007 1480,918 1495,820 1495,711 Z M 1300,711 C 1300,796 1289,873 1268,942 1246,1011 1214,1071 1172,1120 1129,1169 1077,1207 1014,1234 951,1261 879,1274 797,1274 713,1274 639,1261 576,1234 513,1207 460,1169 418,1120 375,1071 344,1011 323,942 302,873 291,796 291,711 291,626 302,549 324,479 345,408 377,348 420,297 462,246 515,206 578,178 641,149 713,135 795,135 883,135 959,149 1023,178 1086,207 1139,247 1180,298 1221,349 1251,409 1271,480 1290,551 1300,628 1300,711 Z"
182 |          id="glyph68" /><glyph
183 |          unicode="M"
184 |          horiz-adv-x="1377"
185 |          d="M 1366,0 L 1366,940 C 1366,974 1366,1009 1367,1044 1368,1079 1369,1112 1370,1141 1371,1175 1373,1208 1375,1240 1366,1206 1356,1172 1346,1139 1337,1110 1328,1080 1318,1048 1307,1015 1297,986 1287,960 L 923,0 789,0 420,960 C 416,970 412,982 408,995 403,1008 399,1023 394,1038 389,1053 384,1068 379,1084 374,1099 369,1115 364,1130 353,1165 342,1202 331,1240 332,1203 333,1166 334,1129 335,1098 336,1065 337,1031 338,996 338,966 338,940 L 338,0 168,0 168,1409 419,1409 794,432 C 799,419 804,402 811,381 818,360 824,338 830,316 836,294 842,273 847,254 852,234 855,219 857,208 859,219 863,234 868,254 873,274 880,295 887,317 894,339 900,360 907,381 914,402 920,419 925,432 L 1293,1409 1538,1409 1538,0 1366,0 Z"
186 |          id="glyph70" /><glyph
187 |          unicode="L"
188 |          horiz-adv-x="927"
189 |          d="M 168,0 L 168,1409 359,1409 359,156 1071,156 1071,0 168,0 Z"
190 |          id="glyph72" /><glyph
191 |          unicode="G"
192 |          horiz-adv-x="1377"
193 |          d="M 103,711 C 103,821 118,920 148,1009 177,1098 222,1173 281,1236 340,1298 413,1346 500,1380 587,1413 689,1430 804,1430 891,1430 967,1422 1032,1407 1097,1392 1154,1370 1202,1341 1250,1312 1291,1278 1324,1237 1357,1196 1386,1149 1409,1098 L 1227,1044 C 1210,1079 1189,1110 1165,1139 1140,1167 1111,1191 1076,1211 1041,1231 1001,1247 956,1258 910,1269 858,1274 799,1274 714,1274 640,1261 577,1234 514,1207 461,1169 420,1120 379,1071 348,1011 328,942 307,873 297,796 297,711 297,626 308,549 330,479 352,408 385,348 428,297 471,246 525,206 590,178 654,149 728,135 813,135 868,135 919,140 966,149 1013,158 1055,171 1093,186 1130,201 1163,217 1192,236 1221,254 1245,272 1264,291 L 1264,545 843,545 843,705 1440,705 1440,219 C 1409,187 1372,157 1330,128 1287,99 1240,73 1187,51 1134,29 1077,12 1014,-1 951,-14 884,-20 813,-20 694,-20 591,-2 502,35 413,71 340,122 281,187 222,252 177,329 148,418 118,507 103,605 103,711 Z"
194 |          id="glyph74" /><glyph
195 |          unicode="F"
196 |          horiz-adv-x="1006"
197 |          d="M 359,1253 L 359,729 1145,729 1145,571 359,571 359,0 168,0 168,1409 1169,1409 1169,1253 359,1253 Z"
198 |          id="glyph76" /><glyph
199 |          unicode="C"
200 |          horiz-adv-x="1324"
201 |          d="M 792,1274 C 712,1274 641,1261 580,1234 518,1207 466,1169 425,1120 383,1071 351,1011 330,942 309,873 298,796 298,711 298,626 310,549 333,479 356,408 389,348 432,297 475,246 527,207 590,179 652,151 722,137 800,137 855,137 905,144 950,159 995,173 1035,193 1072,219 1108,245 1140,276 1169,312 1198,347 1223,387 1245,430 L 1401,352 C 1376,299 1344,250 1307,205 1270,160 1226,120 1176,87 1125,54 1068,28 1005,9 941,-10 870,-20 791,-20 677,-20 577,-2 492,35 406,71 334,122 277,187 219,252 176,329 147,418 118,507 104,605 104,711 104,821 119,920 150,1009 180,1098 224,1173 283,1236 341,1298 413,1346 498,1380 583,1413 681,1430 790,1430 940,1430 1065,1401 1166,1342 1267,1283 1341,1196 1388,1081 L 1207,1021 C 1194,1054 1176,1086 1153,1117 1130,1147 1102,1174 1068,1197 1034,1220 994,1239 949,1253 903,1267 851,1274 792,1274 Z"
202 |          id="glyph78" /><glyph
203 |          unicode="A"
204 |          horiz-adv-x="1377"
205 |          d="M 1167,0 L 1006,412 364,412 202,0 4,0 579,1409 796,1409 1362,0 1167,0 Z M 768,1026 C 757,1053 747,1080 738,1107 728,1134 719,1159 712,1182 705,1204 699,1223 694,1238 689,1253 686,1262 685,1265 684,1262 681,1252 676,1237 671,1222 665,1203 658,1180 650,1157 641,1132 632,1105 622,1078 612,1051 602,1024 L 422,561 949,561 768,1026 Z"
206 |          id="glyph80" /><glyph
207 |          unicode=" "
208 |          horiz-adv-x="556"
209 |          id="glyph82" /></font></defs><defs
210 |      id="defs84"><font
211 |        id="EmbeddedFont_2"
212 |        horiz-adv-x="2048"
213 |        horiz-origin-x="0"
214 |        horiz-origin-y="0"
215 |        vert-origin-x="45"
216 |        vert-origin-y="90"
217 |        vert-adv-y="90"><font-face
218 |          font-family="Liberation Serif embedded"
219 |          units-per-em="2048"
220 |          font-weight="normal"
221 |          font-style="normal"
222 |          ascent="1826"
223 |          descent="450"
224 |          id="font-face87" /><missing-glyph
225 |          horiz-adv-x="2048"
226 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
227 |          id="missing-glyph89" /><glyph
228 |          unicode="t"
229 |          horiz-adv-x="557"
230 |          d="M 334,-20 C 270,-20 222,-1 191,37 159,75 143,128 143,197 L 143,856 20,856 20,901 145,940 246,1153 309,1153 309,940 524,940 524,856 309,856 309,215 C 309,172 319,139 339,117 358,95 384,84 416,84 441,84 465,86 490,89 514,92 536,96 557,100 L 557,35 C 547,28 534,22 518,15 501,8 483,3 464,-3 444,-7 423,-12 401,-15 379,-18 357,-20 334,-20 Z"
231 |          id="glyph91" /><glyph
232 |          unicode="r"
233 |          horiz-adv-x="636"
234 |          d="M 664,965 L 664,711 621,711 563,821 C 544,821 524,820 503,817 482,814 460,811 439,807 418,802 397,797 378,791 358,785 341,779 326,772 L 326,70 487,45 487,0 41,0 41,45 160,70 160,870 41,895 41,940 315,940 324,823 C 339,836 360,850 387,867 414,883 443,898 474,913 505,928 536,940 567,950 598,960 625,965 649,965 L 664,965 Z"
235 |          id="glyph93" /><glyph
236 |          unicode="o"
237 |          horiz-adv-x="901"
238 |          d="M 946,475 C 946,316 910,193 839,108 768,23 657,-20 506,-20 365,-20 258,22 186,107 114,192 78,314 78,475 78,634 114,755 186,839 258,923 367,965 514,965 657,965 764,924 837,842 910,759 946,637 946,475 Z M 766,475 C 766,540 762,598 753,649 744,700 730,743 710,778 689,813 662,839 629,858 596,876 555,885 506,885 457,885 416,876 384,858 352,839 327,813 308,778 289,743 276,700 269,649 262,598 258,540 258,475 258,410 262,351 269,300 276,249 289,205 308,170 327,134 352,107 384,88 416,69 457,59 506,59 555,59 596,69 629,88 662,107 689,134 710,170 730,205 744,249 753,300 762,351 766,410 766,475 Z"
239 |          id="glyph95" /><glyph
240 |          unicode="f"
241 |          horiz-adv-x="636"
242 |          d="M 225,856 L 63,856 63,905 225,944 225,1010 C 225,1081 232,1143 247,1197 261,1250 282,1295 309,1332 336,1368 369,1395 408,1414 447,1433 490,1442 539,1442 569,1442 596,1440 619,1437 642,1433 663,1428 682,1423 L 682,1218 633,1218 588,1341 C 577,1348 566,1353 553,1357 540,1360 525,1362 506,1362 483,1362 464,1357 449,1347 434,1336 423,1320 414,1299 405,1277 399,1249 396,1216 393,1183 391,1143 391,1096 L 391,940 641,940 641,856 391,856 391,78 594,45 594,0 86,0 86,45 225,78 225,856 Z"
243 |          id="glyph97" /><glyph
244 |          unicode="e"
245 |          horiz-adv-x="769"
246 |          d="M 260,473 L 260,455 C 260,406 264,360 271,315 278,270 292,231 313,197 334,162 363,135 401,115 439,94 489,84 551,84 571,84 592,85 614,87 636,88 658,90 680,93 702,96 723,99 744,102 765,105 784,109 801,113 L 801,57 C 786,47 767,38 746,29 724,20 700,11 674,4 648,-3 620,-9 591,-14 562,-18 532,-20 502,-20 424,-20 358,-9 305,12 251,33 207,65 174,107 141,149 117,201 102,263 87,325 80,396 80,477 80,641 114,763 183,844 252,925 350,965 477,965 527,965 574,958 618,945 661,932 699,909 732,878 765,847 791,805 810,752 829,699 838,634 838,555 L 838,473 260,473 Z M 477,885 C 440,885 408,877 381,862 354,846 331,824 314,795 296,766 283,732 275,691 266,650 262,604 262,553 L 664,553 C 664,604 661,650 656,691 650,732 640,766 626,795 611,824 592,846 568,862 544,877 514,885 477,885 Z"
247 |          id="glyph99" /><glyph
248 |          unicode="&gt;"
249 |          horiz-adv-x="980"
250 |          d="M 104,186 L 104,289 913,680 104,1071 104,1174 1057,705 1057,655 104,186 Z"
251 |          id="glyph101" /><glyph
252 |          unicode="&lt;"
253 |          horiz-adv-x="980"
254 |          d="M 102,655 L 102,705 1055,1174 1055,1071 246,680 1055,289 1055,186 102,655 Z"
255 |          id="glyph103" /><glyph
256 |          unicode="3"
257 |          horiz-adv-x="875"
258 |          d="M 944,365 C 944,304 933,250 912,203 891,155 860,115 820,82 779,49 730,23 671,6 612,-11 544,-20 469,-20 403,-20 339,-16 277,-8 215,0 159,10 109,23 L 98,305 164,305 209,117 C 221,110 237,104 256,97 275,90 296,85 319,80 341,75 364,71 388,68 411,65 433,63 453,63 515,63 566,71 606,86 645,102 677,123 700,151 723,179 738,212 747,251 756,290 760,332 760,378 760,430 753,474 738,508 723,543 703,571 678,592 652,613 622,629 588,638 553,648 516,653 477,653 L 334,653 334,748 477,748 C 552,748 608,770 644,813 680,856 698,922 698,1010 698,1054 694,1092 685,1125 676,1158 661,1185 642,1207 623,1229 598,1245 567,1256 536,1267 498,1272 453,1272 433,1272 413,1271 393,1268 372,1265 353,1261 334,1256 315,1251 298,1245 282,1239 266,1232 252,1226 240,1219 L 205,1055 139,1055 139,1313 C 162,1319 186,1325 209,1330 232,1335 257,1340 282,1344 307,1347 334,1350 362,1353 390,1355 420,1356 453,1356 594,1356 701,1329 774,1275 847,1221 883,1138 883,1026 883,984 877,945 866,909 855,872 837,840 813,812 789,784 759,761 722,742 685,723 641,709 590,702 711,688 801,653 858,598 915,542 944,464 944,365 Z"
259 |          id="glyph105" /><glyph
260 |          unicode="1"
261 |          horiz-adv-x="742"
262 |          d="M 627,80 L 901,53 901,0 180,0 180,53 455,80 455,1174 184,1077 184,1130 575,1352 627,1352 627,80 Z"
263 |          id="glyph107" /></font></defs><defs
264 |      id="defs109"><font
265 |        id="EmbeddedFont_3"
266 |        horiz-adv-x="2048"
267 |        horiz-origin-x="0"
268 |        horiz-origin-y="0"
269 |        vert-origin-x="45"
270 |        vert-origin-y="90"
271 |        vert-adv-y="90"><font-face
272 |          font-family="StarSymbol embedded"
273 |          units-per-em="2048"
274 |          font-weight="normal"
275 |          font-style="normal"
276 |          ascent="1879"
277 |          descent="661"
278 |          id="font-face112" /><missing-glyph
279 |          horiz-adv-x="2048"
280 |          d="M 0,0 L 2047,0 2047,2047 0,2047 0,0 Z"
281 |          id="missing-glyph114" /><glyph
282 |          unicode="●"
283 |          horiz-adv-x="1191"
284 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 223,773 276,916 383,1023 489,1130 632,1184 813,1184 992,1184 1136,1130 1245,1023 1353,916 1407,772 1407,592 1407,412 1353,268 1245,161 1136,54 992,0 813,0 Z"
285 |          id="glyph116" /><glyph
286 |          unicode="–"
287 |          horiz-adv-x="1165"
288 |          d="M -4,459 L 1135,459 1135,606 -4,606 -4,459 Z"
289 |          id="glyph118" /></font></defs><defs
290 |      class="TextShapeIndex"
291 |      id="defs120"><g
292 |        ooo:slide="id1"
293 |        ooo:id-list="id6 id7 id8 id9 id10 id11 id12 id13 id14 id15 id16 id17"
294 |        id="g122" /></defs><defs
295 |      class="EmbeddedBulletChars"
296 |      id="defs124"><g
297 |        id="bullet-char-template(57356)"
298 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
299 |          d="M 580,1141 1163,571 580,0 -4,571 580,1141 Z"
300 |          id="path127"
301 |          inkscape:connector-curvature="0" /></g><g
302 |        id="bullet-char-template(57354)"
303 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
304 |          d="m 8,1128 1129,0 L 1137,0 8,0 8,1128 Z"
305 |          id="path130"
306 |          inkscape:connector-curvature="0" /></g><g
307 |        id="bullet-char-template(10146)"
308 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
309 |          d="M 174,0 602,739 174,1481 1456,739 174,0 Z m 1184,739 -1049,607 350,-607 699,0 z"
310 |          id="path133"
311 |          inkscape:connector-curvature="0" /></g><g
312 |        id="bullet-char-template(10132)"
313 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
314 |          d="M 2015,739 1276,0 717,0 l 543,543 -1086,0 0,393 1086,0 -543,545 557,0 741,-742 z"
315 |          id="path136"
316 |          inkscape:connector-curvature="0" /></g><g
317 |        id="bullet-char-template(10007)"
318 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
319 |          d="m 0,-2 c -7,16 -16,29 -25,39 l 381,530 c -94,256 -141,385 -141,387 0,25 13,38 40,38 9,0 21,-2 34,-5 21,4 42,12 65,25 l 27,-13 111,-251 280,301 64,-25 24,25 c 21,-10 41,-24 62,-43 C 886,937 835,863 770,784 769,783 710,716 594,584 L 774,223 c 0,-27 -21,-55 -63,-84 l 16,-20 C 717,90 699,76 672,76 641,76 570,178 457,381 L 164,-76 c -22,-34 -53,-51 -92,-51 -42,0 -63,17 -64,51 -7,9 -10,24 -10,44 0,9 1,19 2,30 z"
320 |          id="path139"
321 |          inkscape:connector-curvature="0" /></g><g
322 |        id="bullet-char-template(10004)"
323 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
324 |          d="M 285,-33 C 182,-33 111,30 74,156 52,228 41,333 41,471 c 0,78 14,145 41,201 34,71 87,106 158,106 53,0 88,-31 106,-94 l 23,-176 c 8,-64 28,-97 59,-98 l 735,706 c 11,11 33,17 66,17 42,0 63,-15 63,-46 l 0,-122 c 0,-36 -10,-64 -30,-84 L 442,47 C 390,-6 338,-33 285,-33 Z"
325 |          id="path142"
326 |          inkscape:connector-curvature="0" /></g><g
327 |        id="bullet-char-template(9679)"
328 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
329 |          d="M 813,0 C 632,0 489,54 383,161 276,268 223,411 223,592 c 0,181 53,324 160,431 106,107 249,161 430,161 179,0 323,-54 432,-161 108,-107 162,-251 162,-431 0,-180 -54,-324 -162,-431 C 1136,54 992,0 813,0 Z"
330 |          id="path145"
331 |          inkscape:connector-curvature="0" /></g><g
332 |        id="bullet-char-template(8226)"
333 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
334 |          d="m 346,457 c -73,0 -137,26 -191,78 -54,51 -81,114 -81,188 0,73 27,136 81,188 54,52 118,78 191,78 73,0 134,-26 185,-79 51,-51 77,-114 77,-187 0,-75 -25,-137 -76,-188 -50,-52 -112,-78 -186,-78 z"
335 |          id="path148"
336 |          inkscape:connector-curvature="0" /></g><g
337 |        id="bullet-char-template(8211)"
338 |        transform="scale(4.8828125e-4,-4.8828125e-4)"><path
339 |          d="m -4,459 1139,0 0,147 -1139,0 0,-147 z"
340 |          id="path151"
341 |          inkscape:connector-curvature="0" /></g></defs><defs
342 |      class="TextEmbeddedBitmaps"
343 |      id="defs153" /><g
344 |      class="com.sun.star.drawing.CustomShape"
345 |      id="g203"
346 |      transform="translate(-1559.2402,-4531.589)"><g
347 |        id="id7"><path
348 |          d="m 15151,17272 -10795,0 0,-12700 21590,0 0,12700 -10795,0 z"
349 |          id="path206"
350 |          inkscape:connector-curvature="0"
351 |          style="fill:none;stroke:#000000" /></g></g><g
352 |      class="com.sun.star.drawing.OpenBezierShape"
353 |      id="g208"
354 |      transform="translate(-1559.2402,-4531.589)"><g
355 |        id="id8"><path
356 |          d="m 4356,15240 c 3302,-3302 6301,-8269 10668,-9906 2032,-762 7112,-509 10922,-762"
357 |          id="path211"
358 |          inkscape:connector-curvature="0"
359 |          style="fill:none;stroke:#0000ff;stroke-width:81;stroke-linejoin:round" /></g></g><g
360 |      class="com.sun.star.drawing.OpenBezierShape"
361 |      id="g213"
362 |      transform="translate(-1559.2402,-4531.589)"><g
363 |        id="id9"><path
364 |          d="m 4356,15240 c 0,0 6269,-7279 10414,-7874 3871,-556 7451,2371 11176,3556"
365 |          id="path216"
366 |          inkscape:connector-curvature="0"
367 |          style="fill:none;stroke:#33cc66;stroke-width:81;stroke-linejoin:round" /></g></g><g
368 |      class="com.sun.star.drawing.TextShape"
369 |      id="g218"
370 |      transform="translate(-1559.2402,-4531.589)"><g
371 |        id="id10"><text
372 |          class="TextShape"
373 |          id="text221"><tspan
374 |            class="TextParagraph"
375 |            font-size="635px"
376 |            font-weight="400"
377 |            id="tspan223"
378 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
379 |              class="TextPosition"
380 |              x="12704"
381 |              y="18481"
382 |              id="tspan225"><tspan
383 |                id="tspan227"
384 |                style="fill:#000000;stroke:none">Model complexity</tspan></tspan></tspan></text>
385 | </g></g><g
386 |      class="com.sun.star.drawing.TextShape"
387 |      id="g229"
388 |      transform="translate(-1559.2402,-4531.589)"><g
389 |        id="id11"><text
390 |          class="TextShape"
391 |          id="text232"><tspan
392 |            class="TextParagraph"
393 |            font-size="635px"
394 |            font-weight="400"
395 |            id="tspan234"
396 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
397 |              class="TextPosition"
398 |              x="1558"
399 |              y="10915"
400 |              id="tspan236"><tspan
401 |                id="tspan238"
402 |                style="fill:#000000;stroke:none">Accuracy</tspan></tspan></tspan></text>
403 | </g></g><g
404 |      class="com.sun.star.drawing.TextShape"
405 |      id="g240"
406 |      transform="translate(-1559.2402,-4531.589)"><g
407 |        id="id12"><text
408 |          class="TextShape"
409 |          id="text243"><tspan
410 |            class="TextParagraph"
411 |            font-size="635px"
412 |            font-weight="400"
413 |            id="tspan245"
414 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
415 |              class="TextPosition"
416 |              x="22348"
417 |              y="5781"
418 |              id="tspan247"><tspan
419 |                id="tspan249"
420 |                style="fill:#000000;stroke:none">Training</tspan></tspan></tspan></text>
421 | </g></g><g
422 |      class="com.sun.star.drawing.TextShape"
423 |      id="g251"
424 |      transform="translate(-1559.2402,-4531.589)"><g
425 |        id="id13"><text
426 |          class="TextShape"
427 |          id="text254"><tspan
428 |            class="TextParagraph"
429 |            font-size="635px"
430 |            font-weight="400"
431 |            id="tspan256"
432 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
433 |              class="TextPosition"
434 |              x="21613"
435 |              y="11877"
436 |              id="tspan258"><tspan
437 |                id="tspan260"
438 |                style="fill:#000000;stroke:none">Generalization</tspan></tspan></tspan></text>
439 | </g></g><g
440 |      class="com.sun.star.drawing.TextShape"
441 |      id="g262"
442 |      transform="translate(-1559.2402,-4531.589)"><g
443 |        id="id14"><text
444 |          class="TextShape"
445 |          id="text265"><tspan
446 |            class="TextParagraph"
447 |            font-size="635px"
448 |            font-weight="400"
449 |            id="tspan267"
450 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
451 |              class="TextPosition"
452 |              x="5076"
453 |              y="16703"
454 |              id="tspan269"><tspan
455 |                id="tspan271"
456 |                style="fill:#000000;stroke:none">Underfitting</tspan></tspan></tspan></text>
457 | </g></g><g
458 |      class="com.sun.star.drawing.TextShape"
459 |      id="g273"
460 |      transform="translate(-1559.2402,-4531.589)"><g
461 |        id="id15"><text
462 |          class="TextShape"
463 |          id="text276"><tspan
464 |            class="TextParagraph"
465 |            font-size="635px"
466 |            font-weight="400"
467 |            id="tspan278"
468 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
469 |              class="TextPosition"
470 |              x="21840"
471 |              y="16549"
472 |              id="tspan280"><tspan
473 |                id="tspan282"
474 |                style="fill:#000000;stroke:none">Overfitting</tspan></tspan></tspan></text>
475 | </g></g><g
476 |      class="com.sun.star.drawing.TextShape"
477 |      id="g284"
478 |      transform="translate(-1559.2402,-4531.589)"><g
479 |        id="id16"><text
480 |          class="TextShape"
481 |          id="text287"><tspan
482 |            class="TextParagraph"
483 |            font-size="635px"
484 |            font-weight="400"
485 |            id="tspan289"
486 |            style="font-weight:400;font-size:635px;font-family:'Liberation Sans', sans-serif"><tspan
487 |              class="TextPosition"
488 |              x="13712"
489 |              y="9337"
490 |              id="tspan291"><tspan
491 |                id="tspan293"
492 |                style="fill:#000000;stroke:none">Sweet spot</tspan></tspan></tspan></text>
493 | </g></g><g
494 |      class="com.sun.star.drawing.PolyLineShape"
495 |      id="g295"
496 |      transform="translate(-1559.2402,-4531.589)"><g
497 |        id="id17"><path
498 |          d="m 15494,8636 0,-586"
499 |          id="path298"
500 |          inkscape:connector-curvature="0"
501 |          style="fill:none;stroke:#000000" /><path
502 |          d="m 15494,7620 -150,450 300,0 -150,-450 z"
503 |          id="path300"
504 |          inkscape:connector-curvature="0"
505 |          style="fill:#000000;stroke:none" /></g></g></svg>


--------------------------------------------------------------------------------
/images/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amueller/introduction_to_ml_with_python/ea60cf6cf791553b6cca7cf31802c68cb3798ebb/images/pipeline.png


--------------------------------------------------------------------------------
/images/pipeline.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="638.42084"
 13 |    height="531.79626"
 14 |    id="svg4789"
 15 |    version="1.1"
 16 |    inkscape:version="0.91 r13725"
 17 |    sodipodi:docname="pipeline.svg">
 18 |   <defs
 19 |      id="defs4791">
 20 |     <marker
 21 |        inkscape:stockid="Arrow2Lend"
 22 |        orient="auto"
 23 |        refY="0"
 24 |        refX="0"
 25 |        id="marker9806-5-7"
 26 |        style="overflow:visible"
 27 |        inkscape:isstock="true">
 28 |       <path
 29 |          inkscape:connector-curvature="0"
 30 |          id="path9808-3-6"
 31 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
 32 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 33 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
 34 |     </marker>
 35 |     <marker
 36 |        inkscape:stockid="Arrow2Lend"
 37 |        orient="auto"
 38 |        refY="0"
 39 |        refX="0"
 40 |        id="marker10168-4"
 41 |        style="overflow:visible"
 42 |        inkscape:isstock="true">
 43 |       <path
 44 |          inkscape:connector-curvature="0"
 45 |          id="path10170-6"
 46 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
 47 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 48 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
 49 |     </marker>
 50 |     <marker
 51 |        inkscape:stockid="Arrow2Lend"
 52 |        orient="auto"
 53 |        refY="0"
 54 |        refX="0"
 55 |        id="Arrow2Lend-2-1"
 56 |        style="overflow:visible"
 57 |        inkscape:isstock="true">
 58 |       <path
 59 |          inkscape:connector-curvature="0"
 60 |          id="path4587-8-2"
 61 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
 62 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 63 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
 64 |     </marker>
 65 |     <marker
 66 |        inkscape:stockid="Arrow2Lend"
 67 |        orient="auto"
 68 |        refY="0"
 69 |        refX="0"
 70 |        id="Arrow2Lend-2"
 71 |        style="overflow:visible"
 72 |        inkscape:isstock="true">
 73 |       <path
 74 |          inkscape:connector-curvature="0"
 75 |          id="path4587-8"
 76 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
 77 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 78 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
 79 |     </marker>
 80 |     <marker
 81 |        inkscape:stockid="Arrow2Lend"
 82 |        orient="auto"
 83 |        refY="0"
 84 |        refX="0"
 85 |        id="Arrow2Lend"
 86 |        style="overflow:visible"
 87 |        inkscape:isstock="true">
 88 |       <path
 89 |          id="path4587"
 90 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
 91 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 92 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
 93 |          inkscape:connector-curvature="0" />
 94 |     </marker>
 95 |     <marker
 96 |        inkscape:stockid="Arrow2Lend"
 97 |        orient="auto"
 98 |        refY="0"
 99 |        refX="0"
100 |        id="marker5143-9-1"
101 |        style="overflow:visible"
102 |        inkscape:isstock="true">
103 |       <path
104 |          id="path5145-4-9"
105 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
106 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
107 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
108 |          inkscape:connector-curvature="0" />
109 |     </marker>
110 |     <marker
111 |        inkscape:stockid="Arrow2Lend"
112 |        orient="auto"
113 |        refY="0"
114 |        refX="0"
115 |        id="marker5143-0"
116 |        style="overflow:visible"
117 |        inkscape:isstock="true">
118 |       <path
119 |          id="path5145-2"
120 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
121 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
122 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
123 |          inkscape:connector-curvature="0" />
124 |     </marker>
125 |     <marker
126 |        inkscape:stockid="Arrow2Lend"
127 |        orient="auto"
128 |        refY="0"
129 |        refX="0"
130 |        id="marker9806-5"
131 |        style="overflow:visible"
132 |        inkscape:isstock="true">
133 |       <path
134 |          inkscape:connector-curvature="0"
135 |          id="path9808-3"
136 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
137 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
138 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
139 |     </marker>
140 |     <marker
141 |        inkscape:stockid="Arrow2Lend"
142 |        orient="auto"
143 |        refY="0"
144 |        refX="0"
145 |        id="marker10168"
146 |        style="overflow:visible"
147 |        inkscape:isstock="true">
148 |       <path
149 |          inkscape:connector-curvature="0"
150 |          id="path10170"
151 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
152 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
153 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
154 |     </marker>
155 |     <marker
156 |        inkscape:stockid="Arrow2Lend"
157 |        orient="auto"
158 |        refY="0"
159 |        refX="0"
160 |        id="marker5143"
161 |        style="overflow:visible"
162 |        inkscape:isstock="true">
163 |       <path
164 |          id="path5145"
165 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
166 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
167 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
168 |          inkscape:connector-curvature="0" />
169 |     </marker>
170 |     <marker
171 |        inkscape:isstock="true"
172 |        style="overflow:visible"
173 |        id="marker5007"
174 |        refX="0"
175 |        refY="0"
176 |        orient="auto"
177 |        inkscape:stockid="Arrow2Lend">
178 |       <path
179 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
180 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
181 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
182 |          id="path5009"
183 |          inkscape:connector-curvature="0" />
184 |     </marker>
185 |     <marker
186 |        inkscape:stockid="Arrow2Lend"
187 |        orient="auto"
188 |        refY="0"
189 |        refX="0"
190 |        id="marker9806-5-7-4"
191 |        style="overflow:visible"
192 |        inkscape:isstock="true">
193 |       <path
194 |          inkscape:connector-curvature="0"
195 |          id="path9808-3-6-7"
196 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
197 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
198 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
199 |     </marker>
200 |     <marker
201 |        inkscape:stockid="Arrow2Lend"
202 |        orient="auto"
203 |        refY="0"
204 |        refX="0"
205 |        id="marker10168-4-1"
206 |        style="overflow:visible"
207 |        inkscape:isstock="true">
208 |       <path
209 |          inkscape:connector-curvature="0"
210 |          id="path10170-6-7"
211 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
212 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
213 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
214 |     </marker>
215 |     <marker
216 |        inkscape:stockid="Arrow2Lend"
217 |        orient="auto"
218 |        refY="0"
219 |        refX="0"
220 |        id="Arrow2Lend-2-1-3"
221 |        style="overflow:visible"
222 |        inkscape:isstock="true">
223 |       <path
224 |          inkscape:connector-curvature="0"
225 |          id="path4587-8-2-9"
226 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
227 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
228 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
229 |     </marker>
230 |     <marker
231 |        inkscape:stockid="Arrow2Lend"
232 |        orient="auto"
233 |        refY="0"
234 |        refX="0"
235 |        id="Arrow2Lend-2-7"
236 |        style="overflow:visible"
237 |        inkscape:isstock="true">
238 |       <path
239 |          inkscape:connector-curvature="0"
240 |          id="path4587-8-7"
241 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
242 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
243 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
244 |     </marker>
245 |     <marker
246 |        inkscape:stockid="Arrow2Lend"
247 |        orient="auto"
248 |        refY="0"
249 |        refX="0"
250 |        id="Arrow2Lend-9"
251 |        style="overflow:visible"
252 |        inkscape:isstock="true">
253 |       <path
254 |          id="path4587-9"
255 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
256 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
257 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
258 |          inkscape:connector-curvature="0" />
259 |     </marker>
260 |     <marker
261 |        inkscape:stockid="Arrow2Lend"
262 |        orient="auto"
263 |        refY="0"
264 |        refX="0"
265 |        id="marker5143-9-1-6"
266 |        style="overflow:visible"
267 |        inkscape:isstock="true">
268 |       <path
269 |          id="path5145-4-9-2"
270 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
271 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
272 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
273 |          inkscape:connector-curvature="0" />
274 |     </marker>
275 |     <marker
276 |        inkscape:stockid="Arrow2Lend"
277 |        orient="auto"
278 |        refY="0"
279 |        refX="0"
280 |        id="marker5143-0-8"
281 |        style="overflow:visible"
282 |        inkscape:isstock="true">
283 |       <path
284 |          id="path5145-2-2"
285 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
286 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
287 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
288 |          inkscape:connector-curvature="0" />
289 |     </marker>
290 |     <marker
291 |        inkscape:stockid="Arrow2Lend"
292 |        orient="auto"
293 |        refY="0"
294 |        refX="0"
295 |        id="marker9806-5-4"
296 |        style="overflow:visible"
297 |        inkscape:isstock="true">
298 |       <path
299 |          inkscape:connector-curvature="0"
300 |          id="path9808-3-5"
301 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
302 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
303 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
304 |     </marker>
305 |     <marker
306 |        inkscape:stockid="Arrow2Lend"
307 |        orient="auto"
308 |        refY="0"
309 |        refX="0"
310 |        id="marker10168-0"
311 |        style="overflow:visible"
312 |        inkscape:isstock="true">
313 |       <path
314 |          inkscape:connector-curvature="0"
315 |          id="path10170-67"
316 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
317 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
318 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
319 |     </marker>
320 |     <marker
321 |        inkscape:stockid="Arrow2Lend"
322 |        orient="auto"
323 |        refY="0"
324 |        refX="0"
325 |        id="marker5143-4"
326 |        style="overflow:visible"
327 |        inkscape:isstock="true">
328 |       <path
329 |          id="path5145-8"
330 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
331 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
332 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
333 |          inkscape:connector-curvature="0" />
334 |     </marker>
335 |     <marker
336 |        inkscape:isstock="true"
337 |        style="overflow:visible"
338 |        id="marker5007-8"
339 |        refX="0"
340 |        refY="0"
341 |        orient="auto"
342 |        inkscape:stockid="Arrow2Lend">
343 |       <path
344 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
345 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
346 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
347 |          id="path5009-6"
348 |          inkscape:connector-curvature="0" />
349 |     </marker>
350 |   </defs>
351 |   <sodipodi:namedview
352 |      id="base"
353 |      pagecolor="#ffffff"
354 |      bordercolor="#666666"
355 |      borderopacity="1.0"
356 |      inkscape:pageopacity="0.0"
357 |      inkscape:pageshadow="2"
358 |      inkscape:zoom="1.979899"
359 |      inkscape:cx="96.195082"
360 |      inkscape:cy="208.88048"
361 |      inkscape:document-units="px"
362 |      inkscape:current-layer="layer1"
363 |      showgrid="false"
364 |      inkscape:window-width="1547"
365 |      inkscape:window-height="876"
366 |      inkscape:window-x="53"
367 |      inkscape:window-y="24"
368 |      inkscape:window-maximized="1"
369 |      fit-margin-top="0.1"
370 |      fit-margin-left="0.1"
371 |      fit-margin-right="0.1"
372 |      fit-margin-bottom="0.1" />
373 |   <metadata
374 |      id="metadata4794">
375 |     <rdf:RDF>
376 |       <cc:Work
377 |          rdf:about="">
378 |         <dc:format>image/svg+xml</dc:format>
379 |         <dc:type
380 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
381 |         <dc:title />
382 |       </cc:Work>
383 |     </rdf:RDF>
384 |   </metadata>
385 |   <g
386 |      inkscape:label="Layer 1"
387 |      inkscape:groupmode="layer"
388 |      id="layer1"
389 |      transform="translate(-43.646738,-280.87931)">
390 |     <text
391 |        xml:space="preserve"
392 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
393 |        x="50.761936"
394 |        y="429.52625"
395 |        id="text3350"
396 |        sodipodi:linespacing="125%"><tspan
397 |          sodipodi:role="line"
398 |          id="tspan3352"
399 |          x="50.761936"
400 |          y="429.52625"
401 |          style="font-size:25px">pipe.fit(X, y)</tspan></text>
402 |     <g
403 |        style="display:inline"
404 |        id="g4284"
405 |        transform="translate(132.32644,472.55884)">
406 |       <rect
407 |          y="71.910835"
408 |          x="55.322643"
409 |          height="36.735451"
410 |          width="57.55637"
411 |          id="rect4307"
412 |          style="fill:#000000;fill-opacity:0.18238992;stroke:#000000;stroke-width:0.63003862;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
413 |       <text
414 |          xml:space="preserve"
415 |          style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
416 |          x="75.458252"
417 |          y="95.746094"
418 |          id="text10425"
419 |          sodipodi:linespacing="125%"><tspan
420 |            sodipodi:role="line"
421 |            id="tspan10427"
422 |            x="75.458252"
423 |            y="95.746094">T1</tspan></text>
424 |     </g>
425 |     <text
426 |        xml:space="preserve"
427 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
428 |        x="46.126743"
429 |        y="577.41748"
430 |        id="text4336"
431 |        sodipodi:linespacing="125%"><tspan
432 |          sodipodi:role="line"
433 |          x="46.126743"
434 |          y="577.41748"
435 |          id="tspan4344">X</tspan></text>
436 |     <text
437 |        xml:space="preserve"
438 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
439 |        x="118.54911"
440 |        y="501.12311"
441 |        id="text4348"
442 |        sodipodi:linespacing="125%"><tspan
443 |          sodipodi:role="line"
444 |          id="tspan4350"
445 |          x="118.54911"
446 |          y="501.12311">y</tspan></text>
447 |     <path
448 |        style="display:inline;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5007)"
449 |        d="m 82.384137,560.85502 99.285733,0"
450 |        id="path4417"
451 |        inkscape:connector-type="polyline"
452 |        inkscape:connector-curvature="0"
453 |        sodipodi:nodetypes="cc" />
454 |     <path
455 |        style="display:inline;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5143)"
456 |        d="m 89.529047,616.27816 85.000013,0.71429"
457 |        id="path4421"
458 |        inkscape:connector-type="polyline"
459 |        inkscape:connector-curvature="0"
460 |        sodipodi:nodetypes="cc" />
461 |     <text
462 |        xml:space="preserve"
463 |        style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:Sans"
464 |        x="91.416542"
465 |        y="553.79498"
466 |        id="text4433"
467 |        sodipodi:linespacing="125%"><tspan
468 |          sodipodi:role="line"
469 |          id="tspan4435"
470 |          x="91.416542"
471 |          y="553.79498">T1.fit(X, y)</tspan></text>
472 |     <text
473 |        xml:space="preserve"
474 |        style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:Sans"
475 |        x="262.3812"
476 |        y="615.36639"
477 |        id="text4433-8"
478 |        sodipodi:linespacing="125%"><tspan
479 |          sodipodi:role="line"
480 |          id="tspan4435-5"
481 |          x="262.3812"
482 |          y="615.36639">T2.fit(X1, y)</tspan></text>
483 |     <text
484 |        xml:space="preserve"
485 |        style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:Sans"
486 |        x="443.78171"
487 |        y="669.71436"
488 |        id="text4433-7"
489 |        sodipodi:linespacing="125%"><tspan
490 |          sodipodi:role="line"
491 |          id="tspan4435-7"
492 |          x="443.78171"
493 |          y="669.71436">Classifier.fit(X2, y)</tspan></text>
494 |     <text
495 |        xml:space="preserve"
496 |        style="font-size:12.5px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:Sans"
497 |        x="83.799721"
498 |        y="610.19586"
499 |        id="text4463"
500 |        sodipodi:linespacing="125%"><tspan
501 |          sodipodi:role="line"
502 |          id="tspan4465"
503 |          x="83.799721"
504 |          y="610.19586">T1.transform(X)</tspan></text>
505 |     <text
506 |        xml:space="preserve"
507 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:40px;line-height:125%;font-family:sans-serif;-inkscape-font-specification:sans-serif;letter-spacing:0px;word-spacing:0px;display:inline;fill:#000000;fill-opacity:1;stroke:none"
508 |        x="50.761936"
509 |        y="732.5296"
510 |        id="text3350-7"
511 |        sodipodi:linespacing="125%"><tspan
512 |          sodipodi:role="line"
513 |          id="tspan3352-7"
514 |          x="50.761936"
515 |          y="732.5296"
516 |          style="font-size:25px">pipe.predict(X')</tspan></text>
517 |     <text
518 |        xml:space="preserve"
519 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
520 |        x="42.555332"
521 |        y="806.34369"
522 |        id="text4336-5"
523 |        sodipodi:linespacing="125%"><tspan
524 |          sodipodi:role="line"
525 |          x="42.555332"
526 |          y="806.34369"
527 |          id="tspan4344-5">X'</tspan></text>
528 |     <text
529 |        xml:space="preserve"
530 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
531 |        x="580.99603"
532 |        y="804.25525"
533 |        id="text4348-0"
534 |        sodipodi:linespacing="125%"><tspan
535 |          sodipodi:role="line"
536 |          id="tspan4350-6"
537 |          x="580.99603"
538 |          y="804.25525">y'</tspan></text>
539 |     <text
540 |        xml:space="preserve"
541 |        style="font-size:12.5px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:Sans"
542 |        x="439.88602"
543 |        y="784.42786"
544 |        id="text4463-6-9-1"
545 |        sodipodi:linespacing="125%"><tspan
546 |          sodipodi:role="line"
547 |          id="tspan4465-2-8-5"
548 |          x="439.88602"
549 |          y="784.42786">Classifier.predict(X'2)</tspan></text>
550 |     <path
551 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker10168);display:inline"
552 |        d="m 442.40976,793.92407 136.10806,0"
553 |        id="path9792"
554 |        inkscape:connector-curvature="0"
555 |        sodipodi:nodetypes="cc" />
556 |     <g
557 |        style="display:inline"
558 |        id="g4292"
559 |        transform="translate(-94.465643,270.55298)">
560 |       <rect
561 |          y="73.341331"
562 |          x="215.44391"
563 |          height="36.731609"
564 |          width="58.266811"
565 |          id="rect4307-2"
566 |          style="fill:#000000;fill-opacity:0.18238992;stroke:#000000;stroke-width:0.63388193;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
567 |       <text
568 |          sodipodi:linespacing="125%"
569 |          id="text10425-8-6-6"
570 |          y="97.273544"
571 |          x="235.99333"
572 |          style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
573 |          xml:space="preserve"><tspan
574 |            y="97.273544"
575 |            x="235.99333"
576 |            id="tspan10427-4-8-2"
577 |            sodipodi:role="line">T2</tspan></text>
578 |     </g>
579 |     <g
580 |        style="display:inline"
581 |        id="g4297"
582 |        transform="translate(-210.54664,269.54282)">
583 |       <rect
584 |          y="74.432762"
585 |          x="398.27213"
586 |          height="36.569061"
587 |          width="92.389977"
588 |          id="rect4307-6"
589 |          style="fill:#000000;fill-opacity:0.18238992;stroke:#000000;stroke-width:0.7964291;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
590 |       <text
591 |          sodipodi:linespacing="125%"
592 |          id="text10517"
593 |          y="98.309334"
594 |          x="409.79059"
595 |          style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
596 |          xml:space="preserve"><tspan
597 |            y="98.309334"
598 |            x="409.79059"
599 |            id="tspan10519"
600 |            sodipodi:role="line">Classifier</tspan></text>
601 |     </g>
602 |     <g
603 |        style="display:inline"
604 |        id="g4292-0"
605 |        transform="translate(162.61943,527.17786)">
606 |       <rect
607 |          y="73.341331"
608 |          x="215.44391"
609 |          height="36.731609"
610 |          width="58.266811"
611 |          id="rect4307-2-1"
612 |          style="fill:#000000;fill-opacity:0.18238992;stroke:#000000;stroke-width:0.63388193;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
613 |       <text
614 |          sodipodi:linespacing="125%"
615 |          id="text10425-8-6-6-0"
616 |          y="97.273544"
617 |          x="235.99333"
618 |          style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
619 |          xml:space="preserve"><tspan
620 |            y="97.273544"
621 |            x="235.99333"
622 |            id="tspan10427-4-8-2-9"
623 |            sodipodi:role="line">T2</tspan></text>
624 |     </g>
625 |     <g
626 |        style="display:inline"
627 |        id="g4284-2"
628 |        transform="translate(-1.0121629,271.98155)">
629 |       <rect
630 |          y="71.910835"
631 |          x="55.322643"
632 |          height="36.735451"
633 |          width="57.55637"
634 |          id="rect4307-29"
635 |          style="fill:#000000;fill-opacity:0.18238992;stroke:#000000;stroke-width:0.63003862;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
636 |       <text
637 |          xml:space="preserve"
638 |          style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
639 |          x="75.458252"
640 |          y="95.746094"
641 |          id="text10425-1"
642 |          sodipodi:linespacing="125%"><tspan
643 |            sodipodi:role="line"
644 |            id="tspan10427-2"
645 |            x="75.458252"
646 |            y="95.746094">T1</tspan></text>
647 |     </g>
648 |     <text
649 |        xml:space="preserve"
650 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
651 |        x="191.24173"
652 |        y="633.46509"
653 |        id="text4336-5-4"
654 |        sodipodi:linespacing="125%"><tspan
655 |          sodipodi:role="line"
656 |          x="191.24173"
657 |          y="633.46509"
658 |          id="tspan4344-5-0">X1</tspan></text>
659 |     <path
660 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker9806-5);display:inline"
661 |        d="m 250.64993,619.53139 109.2821,-0.71429"
662 |        id="path9788-4"
663 |        inkscape:connector-curvature="0"
664 |        sodipodi:nodetypes="cc" />
665 |     <text
666 |        xml:space="preserve"
667 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
668 |        x="513.39563"
669 |        y="610.09094"
670 |        id="text4348-6"
671 |        sodipodi:linespacing="125%"><tspan
672 |          sodipodi:role="line"
673 |          id="tspan4350-61"
674 |          x="513.39563"
675 |          y="610.09094">y</tspan></text>
676 |     <text
677 |        xml:space="preserve"
678 |        style="font-size:12.5px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:Sans"
679 |        x="253.08249"
680 |        y="669.50073"
681 |        id="text4463-0"
682 |        sodipodi:linespacing="125%"><tspan
683 |          sodipodi:role="line"
684 |          id="tspan4465-2"
685 |          x="253.08249"
686 |          y="669.50073">T2.transform(X1)</tspan></text>
687 |     <text
688 |        xml:space="preserve"
689 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
690 |        x="382.16745"
691 |        y="693.72876"
692 |        id="text4336-5-4-3"
693 |        sodipodi:linespacing="125%"><tspan
694 |          sodipodi:role="line"
695 |          x="382.16745"
696 |          y="693.72876"
697 |          id="tspan4344-5-0-3">X2</tspan></text>
698 |     <text
699 |        xml:space="preserve"
700 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
701 |        x="308.08939"
702 |        y="554.94806"
703 |        id="text4348-6-1"
704 |        sodipodi:linespacing="125%"><tspan
705 |          sodipodi:role="line"
706 |          id="tspan4350-61-6"
707 |          x="308.08939"
708 |          y="554.94806">y</tspan></text>
709 |     <g
710 |        style="display:inline"
711 |        id="g4297-0"
712 |        transform="translate(190.90726,586.16771)">
713 |       <rect
714 |          y="74.432762"
715 |          x="398.27213"
716 |          height="36.569061"
717 |          width="92.389977"
718 |          id="rect4307-6-6"
719 |          style="fill:#000000;fill-opacity:0.18238992;stroke:#000000;stroke-width:0.7964291;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
720 |       <text
721 |          sodipodi:linespacing="125%"
722 |          id="text10517-9"
723 |          y="98.309334"
724 |          x="409.79059"
725 |          style="font-size:15px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
726 |          xml:space="preserve"><tspan
727 |            y="98.309334"
728 |            x="409.79059"
729 |            id="tspan10519-4"
730 |            sodipodi:role="line">Classifier</tspan></text>
731 |     </g>
732 |     <path
733 |        style="display:inline;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5143-0)"
734 |        d="m 83.812717,794.07033 96.428583,0"
735 |        id="path4421-5"
736 |        inkscape:connector-type="polyline"
737 |        inkscape:connector-curvature="0"
738 |        sodipodi:nodetypes="cc" />
739 |     <text
740 |        xml:space="preserve"
741 |        style="font-style:normal;font-weight:normal;font-size:12.5px;line-height:125%;font-family:Sans;letter-spacing:0px;word-spacing:0px;display:inline;fill:#000000;fill-opacity:1;stroke:none"
742 |        x="83.799721"
743 |        y="779.78461"
744 |        id="text4463-31"
745 |        sodipodi:linespacing="125%"><tspan
746 |          sodipodi:role="line"
747 |          id="tspan4465-9"
748 |          x="83.799721"
749 |          y="779.78461">T1.transform(X')</tspan></text>
750 |     <text
751 |        xml:space="preserve"
752 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
753 |        x="185.73392"
754 |        y="806.62531"
755 |        id="text4336-5-4-7"
756 |        sodipodi:linespacing="125%"><tspan
757 |          sodipodi:role="line"
758 |          x="185.73392"
759 |          y="806.62531"
760 |          id="tspan4344-5-0-0">X'1</tspan></text>
761 |     <path
762 |        style="display:inline;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5143-9-1)"
763 |        d="m 273.16321,793.33958 85,0.71429"
764 |        id="path4421-4-5"
765 |        inkscape:connector-type="polyline"
766 |        inkscape:connector-curvature="0" />
767 |     <text
768 |        xml:space="preserve"
769 |        style="font-style:normal;font-weight:normal;font-size:12.5px;line-height:125%;font-family:Sans;letter-spacing:0px;word-spacing:0px;display:inline;fill:#000000;fill-opacity:1;stroke:none"
770 |        x="259.59171"
771 |        y="779.62531"
772 |        id="text4463-0-1"
773 |        sodipodi:linespacing="125%"><tspan
774 |          sodipodi:role="line"
775 |          id="tspan4465-2-4"
776 |          x="259.59171"
777 |          y="779.62531">T2.transform(X'1)</tspan></text>
778 |     <text
779 |        xml:space="preserve"
780 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
781 |        x="376.65964"
782 |        y="806.78467"
783 |        id="text4336-5-4-3-0"
784 |        sodipodi:linespacing="125%"><tspan
785 |          sodipodi:role="line"
786 |          x="376.65964"
787 |          y="806.78467"
788 |          id="tspan4344-5-0-3-7">X'2</tspan></text>
789 |     <path
790 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend);display:inline"
791 |        d="m 144.80296,501.31359 35,35"
792 |        id="path4560"
793 |        inkscape:connector-curvature="0" />
794 |     <path
795 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-2);display:inline"
796 |        d="m 335.83212,558.78378 35,35"
797 |        id="path4560-9"
798 |        inkscape:connector-curvature="0" />
799 |     <path
800 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-2-1);display:inline"
801 |        d="m 542.26071,609.21235 42.14286,42.85714"
802 |        id="path4560-9-5"
803 |        inkscape:connector-curvature="0"
804 |        sodipodi:nodetypes="cc" />
805 |     <path
806 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker10168-4);display:inline"
807 |        d="m 444.05594,680.75977 136.10806,0"
808 |        id="path9792-0"
809 |        inkscape:connector-curvature="0"
810 |        sodipodi:nodetypes="cc" />
811 |     <path
812 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker9806-5-7);display:inline"
813 |        d="m 250.64993,680.11848 109.2821,-0.71429"
814 |        id="path9788-4-8"
815 |        inkscape:connector-curvature="0"
816 |        sodipodi:nodetypes="cc" />
817 |     <flowRoot
818 |        xml:space="preserve"
819 |        id="flowRoot3336"
820 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:none;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
821 |        transform="translate(10.505964,232.05914)"><flowRegion
822 |          id="flowRegion3338"><rect
823 |            id="rect3340"
824 |            width="568.71588"
825 |            height="32.324883"
826 |            x="53.538086"
827 |            y="49.280727"
828 |            style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:none;stroke:none;font-family:sans-serif;-inkscape-font-specification:sans-serif" /></flowRegion><flowPara
829 |          id="flowPara3342"
830 |          style="font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:none;stroke:none;font-family:sans-serif;-inkscape-font-specification:sans-serif">pipe = make_pipeline(T1(), T2(), Classifier())</flowPara></flowRoot>    <text
831 |        xml:space="preserve"
832 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;display:inline;font-family:sans-serif;-inkscape-font-specification:sans-serif"
833 |        x="54.597691"
834 |        y="299.97345"
835 |        id="text3350-9"
836 |        sodipodi:linespacing="125%"><tspan
837 |          sodipodi:role="line"
838 |          id="tspan3352-5"
839 |          x="54.597691"
840 |          y="299.97345"
841 |          style="font-size:25px">pipe = make_pipeline(T1(), T2(), Classifier())</tspan></text>
842 |     <text
843 |        xml:space="preserve"
844 |        style="font-style:normal;font-weight:normal;font-size:15px;line-height:125%;font-family:Sans;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
845 |        x="176.4818"
846 |        y="777.82532"
847 |        id="text4390"
848 |        sodipodi:linespacing="125%"><tspan
849 |          sodipodi:role="line"
850 |          id="tspan4392"
851 |          x="176.4818"
852 |          y="777.82532" /></text>
853 |   </g>
854 | </svg>
855 | 


--------------------------------------------------------------------------------
/mglearn/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import plots
 2 | from . import tools
 3 | from .plots import cm3, cm2
 4 | from .tools import discrete_scatter
 5 | from .plot_helpers import ReBl
 6 | 
 7 | __version__ = "0.2.0"
 8 | 
 9 | __all__ = ['tools', 'plots', 'cm3', 'cm2', 'discrete_scatter', 'ReBl']
10 | 


--------------------------------------------------------------------------------
/mglearn/datasets.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import os
 4 | from scipy import signal
 5 | from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
 6 | from sklearn.datasets import make_blobs
 7 | from sklearn.utils import Bunch
 8 | 
 9 | DATA_PATH = os.path.join(os.path.dirname(__file__), "data")
10 | 
11 | 
12 | def make_forge():
13 |     # a carefully hand-designed dataset lol
14 |     X, y = make_blobs(centers=2, random_state=4, n_samples=30)
15 |     y[np.array([7, 27])] = 0
16 |     mask = np.ones(len(X), dtype=bool)
17 |     mask[np.array([0, 1, 5, 26])] = 0
18 |     X, y = X[mask], y[mask]
19 |     return X, y
20 | 
21 | 
22 | def make_wave(n_samples=100):
23 |     rnd = np.random.RandomState(42)
24 |     x = rnd.uniform(-3, 3, size=n_samples)
25 |     y_no_noise = (np.sin(4 * x) + x)
26 |     y = (y_no_noise + rnd.normal(size=len(x))) / 2
27 |     return x.reshape(-1, 1), y
28 | 
29 | 
30 | def load_boston():
31 |     try:
32 |         from sklearn.datasets import load_boston
33 |         return load_boston()
34 |     except ImportError:
35 |         pass
36 |     data_url = "http://lib.stat.cmu.edu/datasets/boston"
37 |     raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
38 |     data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
39 |     target = raw_df.values[1::2, 2]
40 |     return Bunch(data=data, target=target)
41 | 
42 | 
43 | def load_extended_boston():
44 |     boston = load_boston()
45 |     X = boston.data
46 | 
47 |     X = MinMaxScaler().fit_transform(boston.data)
48 |     X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
49 |     return X, boston.target
50 | 
51 | 
52 | def load_citibike():
53 |     data_mine = pd.read_csv(os.path.join(DATA_PATH, "citibike.csv"))
54 |     data_mine['one'] = 1
55 |     data_mine['starttime'] = pd.to_datetime(data_mine.starttime)
56 |     data_starttime = data_mine.set_index("starttime")
57 |     data_resampled = data_starttime.resample("3h").sum().fillna(0)
58 |     return data_resampled.one
59 | 
60 | 
61 | def make_signals():
62 |     # fix a random state seed
63 |     rng = np.random.RandomState(42)
64 |     n_samples = 2000
65 |     time = np.linspace(0, 8, n_samples)
66 |     # create three signals
67 |     s1 = np.sin(2 * time)  # Signal 1 : sinusoidal signal
68 |     s2 = np.sign(np.sin(3 * time))  # Signal 2 : square signal
69 |     s3 = signal.sawtooth(2 * np.pi * time)  # Signal 3: saw tooth signal
70 | 
71 |     # concatenate the signals, add noise
72 |     S = np.c_[s1, s2, s3]
73 |     S += 0.2 * rng.normal(size=S.shape)
74 | 
75 |     S /= S.std(axis=0)  # Standardize data
76 |     S -= S.min()
77 |     return S
78 | 


--------------------------------------------------------------------------------
/mglearn/make_blobs.py:
--------------------------------------------------------------------------------
  1 | import numbers
  2 | import numpy as np
  3 | 
  4 | from sklearn.utils import check_array, check_random_state
  5 | from sklearn.utils import shuffle as shuffle_
  6 | from sklearn.utils.deprecation import deprecated
  7 | 
  8 | 
  9 | @deprecated("Please import make_blobs directly from scikit-learn")
 10 | def make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=1.0,
 11 |                center_box=(-10.0, 10.0), shuffle=True, random_state=None):
 12 |     """Generate isotropic Gaussian blobs for clustering.
 13 | 
 14 |     Read more in the :ref:`User Guide <sample_generators>`.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     n_samples : int, or tuple, optional (default=100)
 19 |         The total number of points equally divided among clusters.
 20 | 
 21 |     n_features : int, optional (default=2)
 22 |         The number of features for each sample.
 23 | 
 24 |     centers : int or array of shape [n_centers, n_features], optional
 25 |         (default=3)
 26 |         The number of centers to generate, or the fixed center locations.
 27 | 
 28 |     cluster_std: float or sequence of floats, optional (default=1.0)
 29 |         The standard deviation of the clusters.
 30 | 
 31 |     center_box: pair of floats (min, max), optional (default=(-10.0, 10.0))
 32 |         The bounding box for each cluster center when centers are
 33 |         generated at random.
 34 | 
 35 |     shuffle : boolean, optional (default=True)
 36 |         Shuffle the samples.
 37 | 
 38 |     random_state : int, RandomState instance or None, optional (default=None)
 39 |         If int, random_state is the seed used by the random number generator;
 40 |         If RandomState instance, random_state is the random number generator;
 41 |         If None, the random number generator is the RandomState instance used
 42 |         by `np.random`.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     X : array of shape [n_samples, n_features]
 47 |         The generated samples.
 48 | 
 49 |     y : array of shape [n_samples]
 50 |         The integer labels for cluster membership of each sample.
 51 | 
 52 |     Examples
 53 |     --------
 54 |     >>> from sklearn.datasets.samples_generator import make_blobs
 55 |     >>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,
 56 |     ...                   random_state=0)
 57 |     >>> print(X.shape)
 58 |     (10, 2)
 59 |     >>> y
 60 |     array([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])
 61 | 
 62 |     See also
 63 |     --------
 64 |     make_classification: a more intricate variant
 65 |     """
 66 |     generator = check_random_state(random_state)
 67 | 
 68 |     if isinstance(centers, numbers.Integral):
 69 |         centers = generator.uniform(center_box[0], center_box[1],
 70 |                                     size=(centers, n_features))
 71 |     else:
 72 |         centers = check_array(centers)
 73 |         n_features = centers.shape[1]
 74 | 
 75 |     if isinstance(cluster_std, numbers.Real):
 76 |         cluster_std = np.ones(len(centers)) * cluster_std
 77 | 
 78 |     X = []
 79 |     y = []
 80 | 
 81 |     n_centers = centers.shape[0]
 82 |     if isinstance(n_samples, numbers.Integral):
 83 |         n_samples_per_center = [int(n_samples // n_centers)] * n_centers
 84 |         for i in range(n_samples % n_centers):
 85 |             n_samples_per_center[i] += 1
 86 |     else:
 87 |         n_samples_per_center = n_samples
 88 | 
 89 |     for i, (n, std) in enumerate(zip(n_samples_per_center, cluster_std)):
 90 |         X.append(centers[i] + generator.normal(scale=std,
 91 |                                                size=(n, n_features)))
 92 |         y += [i] * n
 93 | 
 94 |     X = np.concatenate(X)
 95 |     y = np.array(y)
 96 | 
 97 |     if shuffle:
 98 |         X, y = shuffle_(X, y, random_state=generator)
 99 | 
100 |     return X, y
101 | 


--------------------------------------------------------------------------------
/mglearn/plot_2d_separator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from .plot_helpers import cm2, cm3, discrete_scatter
  4 | 
  5 | 
  6 | def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
  7 |                            alpha=1, cm=cm3):
  8 |     # multiclass
  9 |     if eps is None:
 10 |         eps = X.std() / 2.
 11 | 
 12 |     if ax is None:
 13 |         ax = plt.gca()
 14 | 
 15 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
 16 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
 17 |     xx = np.linspace(x_min, x_max, 1000)
 18 |     yy = np.linspace(y_min, y_max, 1000)
 19 | 
 20 |     X1, X2 = np.meshgrid(xx, yy)
 21 |     X_grid = np.c_[X1.ravel(), X2.ravel()]
 22 |     decision_values = classifier.predict(X_grid)
 23 |     ax.imshow(decision_values.reshape(X1.shape), extent=(x_min, x_max,
 24 |                                                          y_min, y_max),
 25 |               aspect='auto', origin='lower', alpha=alpha, cmap=cm)
 26 |     ax.set_xlim(x_min, x_max)
 27 |     ax.set_ylim(y_min, y_max)
 28 |     ax.set_xticks(())
 29 |     ax.set_yticks(())
 30 | 
 31 | 
 32 | def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis",
 33 |                    function=None):
 34 |     # binary with fill
 35 |     if eps is None:
 36 |         eps = X.std() / 2.
 37 | 
 38 |     if ax is None:
 39 |         ax = plt.gca()
 40 | 
 41 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
 42 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
 43 |     xx = np.linspace(x_min, x_max, 100)
 44 |     yy = np.linspace(y_min, y_max, 100)
 45 | 
 46 |     X1, X2 = np.meshgrid(xx, yy)
 47 |     X_grid = np.c_[X1.ravel(), X2.ravel()]
 48 |     if function is None:
 49 |         function = getattr(classifier, "decision_function",
 50 |                            getattr(classifier, "predict_proba"))
 51 |     else:
 52 |         function = getattr(classifier, function)
 53 |     decision_values = function(X_grid)
 54 |     if decision_values.ndim > 1 and decision_values.shape[1] > 1:
 55 |         # predict_proba
 56 |         decision_values = decision_values[:, 1]
 57 |     grr = ax.imshow(decision_values.reshape(X1.shape),
 58 |                     extent=(x_min, x_max, y_min, y_max), aspect='auto',
 59 |                     origin='lower', alpha=alpha, cmap=cm)
 60 | 
 61 |     ax.set_xlim(x_min, x_max)
 62 |     ax.set_ylim(y_min, y_max)
 63 |     ax.set_xticks(())
 64 |     ax.set_yticks(())
 65 |     return grr
 66 | 
 67 | 
 68 | def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
 69 |                       cm=cm2, linewidth=None, threshold=None,
 70 |                       linestyle="solid"):
 71 |     # binary?
 72 |     if eps is None:
 73 |         eps = X.std() / 2.
 74 | 
 75 |     if ax is None:
 76 |         ax = plt.gca()
 77 | 
 78 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
 79 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
 80 |     xx = np.linspace(x_min, x_max, 1000)
 81 |     yy = np.linspace(y_min, y_max, 1000)
 82 | 
 83 |     X1, X2 = np.meshgrid(xx, yy)
 84 |     X_grid = np.c_[X1.ravel(), X2.ravel()]
 85 |     try:
 86 |         decision_values = classifier.decision_function(X_grid)
 87 |         levels = [0] if threshold is None else [threshold]
 88 |         fill_levels = [decision_values.min()] + levels + [
 89 |             decision_values.max()]
 90 |     except AttributeError:
 91 |         # no decision_function
 92 |         decision_values = classifier.predict_proba(X_grid)[:, 1]
 93 |         levels = [.5] if threshold is None else [threshold]
 94 |         fill_levels = [0] + levels + [1]
 95 |     if fill:
 96 |         ax.contourf(X1, X2, decision_values.reshape(X1.shape),
 97 |                     levels=fill_levels, alpha=alpha, cmap=cm)
 98 |     else:
 99 |         ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
100 |                    colors="black", alpha=alpha, linewidths=linewidth,
101 |                    linestyles=linestyle, zorder=5)
102 | 
103 |     ax.set_xlim(x_min, x_max)
104 |     ax.set_ylim(y_min, y_max)
105 |     ax.set_xticks(())
106 |     ax.set_yticks(())


--------------------------------------------------------------------------------
/mglearn/plot_agglomerative.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from sklearn.datasets import make_blobs
 4 | from sklearn.cluster import AgglomerativeClustering
 5 | from sklearn.neighbors import KernelDensity
 6 | 
 7 | 
 8 | def plot_agglomerative_algorithm():
 9 |     # generate synthetic two-dimensional data
10 |     X, y = make_blobs(random_state=0, n_samples=12)
11 | 
12 |     agg = AgglomerativeClustering(n_clusters=X.shape[0], compute_full_tree=True).fit(X)
13 | 
14 |     fig, axes = plt.subplots(X.shape[0] // 5, 5, subplot_kw={'xticks': (),
15 |                                                              'yticks': ()},
16 |                              figsize=(20, 8))
17 | 
18 |     eps = X.std() / 2
19 | 
20 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
21 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
22 | 
23 |     xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
24 |     gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]
25 | 
26 |     for i, ax in enumerate(axes.ravel()):
27 |         ax.set_xlim(x_min, x_max)
28 |         ax.set_ylim(y_min, y_max)
29 |         agg.n_clusters = X.shape[0] - i
30 |         agg.fit(X)
31 |         ax.set_title("Step %d" % i)
32 |         ax.scatter(X[:, 0], X[:, 1], s=60, c='grey')
33 |         bins = np.bincount(agg.labels_)
34 |         for cluster in range(agg.n_clusters):
35 |             if bins[cluster] > 1:
36 |                 points = X[agg.labels_ == cluster]
37 |                 other_points = X[agg.labels_ != cluster]
38 | 
39 |                 kde = KernelDensity(bandwidth=.5).fit(points)
40 |                 scores = kde.score_samples(gridpoints)
41 |                 score_inside = np.min(kde.score_samples(points))
42 |                 score_outside = np.max(kde.score_samples(other_points))
43 |                 levels = .8 * score_inside + .2 * score_outside
44 |                 ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],
45 |                            colors='k', linestyles='solid', linewidths=2)
46 | 
47 |     axes[0, 0].set_title("Initialization")
48 | 
49 | 
50 | def plot_agglomerative():
51 |     X, y = make_blobs(random_state=0, n_samples=12)
52 |     agg = AgglomerativeClustering(n_clusters=3)
53 | 
54 |     eps = X.std() / 2.
55 | 
56 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
57 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
58 | 
59 |     xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
60 |     gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]
61 | 
62 |     ax = plt.gca()
63 |     for i, x in enumerate(X):
64 |         ax.text(x[0] + .1, x[1], "%d" % i, horizontalalignment='left', verticalalignment='center')
65 | 
66 |     ax.scatter(X[:, 0], X[:, 1], s=60, c='grey')
67 |     ax.set_xticks(())
68 |     ax.set_yticks(())
69 | 
70 |     for i in range(11):
71 |         agg.n_clusters = X.shape[0] - i
72 |         agg.fit(X)
73 | 
74 |         bins = np.bincount(agg.labels_)
75 |         for cluster in range(agg.n_clusters):
76 |             if bins[cluster] > 1:
77 |                 points = X[agg.labels_ == cluster]
78 |                 other_points = X[agg.labels_ != cluster]
79 | 
80 |                 kde = KernelDensity(bandwidth=.5).fit(points)
81 |                 scores = kde.score_samples(gridpoints)
82 |                 score_inside = np.min(kde.score_samples(points))
83 |                 score_outside = np.max(kde.score_samples(other_points))
84 |                 levels = .8 * score_inside + .2 * score_outside
85 |                 ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],
86 |                            colors='k', linestyles='solid', linewidths=1)
87 | 
88 |     ax.set_xlim(x_min, x_max)
89 |     ax.set_ylim(y_min, y_max)
90 | 


--------------------------------------------------------------------------------
/mglearn/plot_animal_tree.py:
--------------------------------------------------------------------------------
 1 | from imageio import imread
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_animal_tree(ax=None):
 6 |     import graphviz
 7 |     if ax is None:
 8 |         ax = plt.gca()
 9 |     mygraph = graphviz.Digraph(node_attr={'shape': 'box'},
10 |                                edge_attr={'labeldistance': "10.5"},
11 |                                format="png")
12 |     mygraph.node("0", "Has feathers?")
13 |     mygraph.node("1", "Can fly?")
14 |     mygraph.node("2", "Has fins?")
15 |     mygraph.node("3", "Hawk")
16 |     mygraph.node("4", "Penguin")
17 |     mygraph.node("5", "Dolphin")
18 |     mygraph.node("6", "Bear")
19 |     mygraph.edge("0", "1", label="True")
20 |     mygraph.edge("0", "2", label="False")
21 |     mygraph.edge("1", "3", label="True")
22 |     mygraph.edge("1", "4", label="False")
23 |     mygraph.edge("2", "5", label="True")
24 |     mygraph.edge("2", "6", label="False")
25 |     mygraph.render("tmp")
26 |     ax.imshow(imread("tmp.png"))
27 |     ax.set_axis_off()
28 | 


--------------------------------------------------------------------------------
/mglearn/plot_cross_validation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | 
  5 | def plot_group_kfold():
  6 |     from sklearn.model_selection import GroupKFold
  7 |     groups = [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3]
  8 | 
  9 |     plt.figure(figsize=(10, 2))
 10 |     plt.title("GroupKFold")
 11 | 
 12 |     axes = plt.gca()
 13 |     axes.set_frame_on(False)
 14 | 
 15 |     n_folds = 12
 16 |     n_samples = 12
 17 |     n_iter = 3
 18 |     n_samples_per_fold = 1
 19 | 
 20 |     cv = GroupKFold(n_splits=3)
 21 |     mask = np.zeros((n_iter, n_samples))
 22 |     for i, (train, test) in enumerate(cv.split(range(12), groups=groups)):
 23 |         mask[i, train] = 1
 24 |         mask[i, test] = 2
 25 | 
 26 |     for i in range(n_folds):
 27 |         # test is grey
 28 |         colors = ["grey" if x == 2 else "white" for x in mask[:, i]]
 29 |         # not selected has no hatch
 30 | 
 31 |         boxes = axes.barh(y=range(n_iter), width=[1 - 0.1] * n_iter,
 32 |                           left=i * n_samples_per_fold, height=.6, color=colors,
 33 |                           hatch="//", edgecolor="k", align='edge')
 34 |         for j in np.where(mask[:, i] == 0)[0]:
 35 |             boxes[j].set_hatch("")
 36 | 
 37 |     axes.barh(y=[n_iter] * n_folds, width=[1 - 0.1] * n_folds,
 38 |               left=np.arange(n_folds) * n_samples_per_fold, height=.6,
 39 |               color="w", edgecolor='k', align="edge")
 40 | 
 41 |     for i in range(12):
 42 |         axes.text((i + .5) * n_samples_per_fold, 3.5, "%d" %
 43 |                   groups[i], horizontalalignment="center")
 44 | 
 45 |     axes.invert_yaxis()
 46 |     axes.set_xlim(0, n_samples + 1)
 47 |     axes.set_ylabel("CV iterations")
 48 |     axes.set_xlabel("Data points")
 49 |     axes.set_xticks(np.arange(n_samples) + .5)
 50 |     axes.set_xticklabels(np.arange(1, n_samples + 1))
 51 |     axes.set_yticks(np.arange(n_iter + 1) + .3)
 52 |     axes.set_yticklabels(
 53 |         ["Split %d" % x for x in range(1, n_iter + 1)] + ["Group"])
 54 |     plt.legend([boxes[0], boxes[1]], ["Training set", "Test set"], loc=(1, .3))
 55 |     plt.tight_layout()
 56 | 
 57 | 
 58 | def plot_shuffle_split():
 59 |     from sklearn.model_selection import ShuffleSplit
 60 |     plt.figure(figsize=(10, 2))
 61 |     plt.title("ShuffleSplit with 10 points"
 62 |               ", train_size=5, test_size=2, n_splits=4")
 63 | 
 64 |     axes = plt.gca()
 65 |     axes.set_frame_on(False)
 66 | 
 67 |     n_folds = 10
 68 |     n_samples = 10
 69 |     n_iter = 4
 70 |     n_samples_per_fold = 1
 71 | 
 72 |     ss = ShuffleSplit(n_splits=4, train_size=5, test_size=2, random_state=43)
 73 |     mask = np.zeros((n_iter, n_samples))
 74 |     for i, (train, test) in enumerate(ss.split(range(10))):
 75 |         mask[i, train] = 1
 76 |         mask[i, test] = 2
 77 | 
 78 |     for i in range(n_folds):
 79 |         # test is grey
 80 |         colors = ["grey" if x == 2 else "white" for x in mask[:, i]]
 81 |         # not selected has no hatch
 82 | 
 83 |         boxes = axes.barh(y=range(n_iter), width=[1 - 0.1] * n_iter,
 84 |                           left=i * n_samples_per_fold, height=.6, color=colors,
 85 |                           hatch="//", edgecolor='k', align='edge')
 86 |         for j in np.where(mask[:, i] == 0)[0]:
 87 |             boxes[j].set_hatch("")
 88 | 
 89 |     axes.invert_yaxis()
 90 |     axes.set_xlim(0, n_samples + 1)
 91 |     axes.set_ylabel("CV iterations")
 92 |     axes.set_xlabel("Data points")
 93 |     axes.set_xticks(np.arange(n_samples) + .5)
 94 |     axes.set_xticklabels(np.arange(1, n_samples + 1))
 95 |     axes.set_yticks(np.arange(n_iter) + .3)
 96 |     axes.set_yticklabels(["Split %d" % x for x in range(1, n_iter + 1)])
 97 |     # legend hacked for this random state
 98 |     plt.legend([boxes[1], boxes[0], boxes[2]], [
 99 |                "Training set", "Test set", "Not selected"], loc=(1, .3))
100 |     plt.tight_layout()
101 | 
102 | 
103 | def plot_stratified_cross_validation():
104 |     fig, both_axes = plt.subplots(2, 1, figsize=(12, 5))
105 |     # plt.title("cross_validation_not_stratified")
106 |     axes = both_axes[0]
107 |     axes.set_title("Standard cross-validation with sorted class labels")
108 | 
109 |     axes.set_frame_on(False)
110 | 
111 |     n_folds = 3
112 |     n_samples = 150
113 | 
114 |     n_samples_per_fold = n_samples / float(n_folds)
115 | 
116 |     for i in range(n_folds):
117 |         colors = ["w"] * n_folds
118 |         colors[i] = "grey"
119 |         axes.barh(y=range(n_folds), width=[n_samples_per_fold - 1] *
120 |                   n_folds, left=i * n_samples_per_fold, height=.6,
121 |                   color=colors, hatch="//", edgecolor='k', align='edge')
122 | 
123 |     axes.barh(y=[n_folds] * n_folds, width=[n_samples_per_fold - 1] *
124 |               n_folds, left=np.arange(3) * n_samples_per_fold, height=.6,
125 |               color="w", edgecolor='k', align='edge')
126 | 
127 |     axes.invert_yaxis()
128 |     axes.set_xlim(0, n_samples + 1)
129 |     axes.set_ylabel("CV iterations")
130 |     axes.set_xlabel("Data points")
131 |     axes.set_xticks(np.arange(n_samples_per_fold / 2.,
132 |                               n_samples, n_samples_per_fold))
133 |     axes.set_xticklabels(["Fold %d" % x for x in range(1, n_folds + 1)])
134 |     axes.set_yticks(np.arange(n_folds + 1) + .3)
135 |     axes.set_yticklabels(
136 |         ["Split %d" % x for x in range(1, n_folds + 1)] + ["Class label"])
137 |     for i in range(3):
138 |         axes.text((i + .5) * n_samples_per_fold, 3.5, "Class %d" %
139 |                   i, horizontalalignment="center")
140 | 
141 |     ax = both_axes[1]
142 |     ax.set_title("Stratified Cross-validation")
143 |     ax.set_frame_on(False)
144 |     ax.invert_yaxis()
145 |     ax.set_xlim(0, n_samples + 1)
146 |     ax.set_ylabel("CV iterations")
147 |     ax.set_xlabel("Data points")
148 | 
149 |     ax.set_yticks(np.arange(n_folds + 1) + .3)
150 |     ax.set_yticklabels(
151 |         ["Split %d" % x for x in range(1, n_folds + 1)] + ["Class label"])
152 | 
153 |     n_subsplit = n_samples_per_fold / 3.
154 |     for i in range(n_folds):
155 |         test_bars = ax.barh(
156 |             y=[i] * n_folds, width=[n_subsplit - 1] * n_folds,
157 |             left=np.arange(n_folds) * n_samples_per_fold + i * n_subsplit,
158 |             height=.6, color="grey", hatch="//", edgecolor='k', align='edge')
159 | 
160 |     w = 2 * n_subsplit - 1
161 |     ax.barh(y=[0] * n_folds, width=[w] * n_folds, left=np.arange(n_folds)
162 |             * n_samples_per_fold + (0 + 1) * n_subsplit, height=.6, color="w",
163 |             hatch="//", edgecolor='k', align='edge')
164 |     ax.barh(y=[1] * (n_folds + 1), width=[w / 2., w, w, w / 2.],
165 |             left=np.maximum(0, np.arange(n_folds + 1) * n_samples_per_fold -
166 |                             n_subsplit), height=.6, color="w", hatch="//",
167 |             edgecolor='k', align='edge')
168 |     training_bars = ax.barh(y=[2] * n_folds, width=[w] * n_folds,
169 |                             left=np.arange(n_folds) * n_samples_per_fold,
170 |                             height=.6, color="w", hatch="//", edgecolor='k',
171 |                             align='edge')
172 | 
173 |     ax.barh(y=[n_folds] * n_folds, width=[n_samples_per_fold - 1] *
174 |             n_folds, left=np.arange(n_folds) * n_samples_per_fold, height=.6,
175 |             color="w", edgecolor='k', align='edge')
176 | 
177 |     for i in range(3):
178 |         ax.text((i + .5) * n_samples_per_fold, 3.5, "Class %d" %
179 |                 i, horizontalalignment="center")
180 |     ax.set_ylim(4, -0.1)
181 |     plt.legend([training_bars[0], test_bars[0]], [
182 |                'Training data', 'Test data'], loc=(1.05, 1), frameon=False)
183 | 
184 |     fig.tight_layout()
185 | 
186 | 
187 | def plot_cross_validation():
188 |     plt.figure(figsize=(12, 2))
189 |     plt.title("cross_validation")
190 |     axes = plt.gca()
191 |     axes.set_frame_on(False)
192 | 
193 |     n_folds = 5
194 |     n_samples = 25
195 | 
196 |     n_samples_per_fold = n_samples / float(n_folds)
197 | 
198 |     for i in range(n_folds):
199 |         colors = ["w"] * n_folds
200 |         colors[i] = "grey"
201 |         bars = plt.barh(
202 |             y=range(n_folds), width=[n_samples_per_fold - 0.1] * n_folds,
203 |             left=i * n_samples_per_fold, height=.6, color=colors, hatch="//",
204 |             edgecolor='k', align='edge')
205 |     axes.invert_yaxis()
206 |     axes.set_xlim(0, n_samples + 1)
207 |     plt.ylabel("CV iterations")
208 |     plt.xlabel("Data points")
209 |     plt.xticks(np.arange(n_samples_per_fold / 2., n_samples,
210 |                          n_samples_per_fold),
211 |                ["Fold %d" % x for x in range(1, n_folds + 1)])
212 |     plt.yticks(np.arange(n_folds) + .3,
213 |                ["Split %d" % x for x in range(1, n_folds + 1)])
214 |     plt.legend([bars[0], bars[4]], ['Training data', 'Test data'],
215 |                loc=(1.05, 0.4), frameon=False)
216 | 
217 | 
218 | def plot_threefold_split():
219 |     plt.figure(figsize=(15, 1))
220 |     axis = plt.gca()
221 |     bars = axis.barh([0, 0, 0], [11.9, 2.9, 4.9], left=[0, 12, 15], color=[
222 |                      'white', 'grey', 'grey'], hatch="//", edgecolor='k',
223 |                      align='edge')
224 |     bars[2].set_hatch(r"")
225 |     axis.set_yticks(())
226 |     axis.set_frame_on(False)
227 |     axis.set_ylim(-.1, .8)
228 |     axis.set_xlim(-0.1, 20.1)
229 |     axis.set_xticks([6, 13.3, 17.5])
230 |     axis.set_xticklabels(["training set", "validation set",
231 |                           "test set"], fontdict={'fontsize': 20})
232 |     axis.tick_params(length=0, labeltop=True, labelbottom=False)
233 |     axis.text(6, -.3, "Model fitting",
234 |               fontdict={'fontsize': 13}, horizontalalignment="center")
235 |     axis.text(13.3, -.3, "Parameter selection",
236 |               fontdict={'fontsize': 13}, horizontalalignment="center")
237 |     axis.text(17.5, -.3, "Evaluation",
238 |               fontdict={'fontsize': 13}, horizontalalignment="center")
239 | 


--------------------------------------------------------------------------------
/mglearn/plot_dbscan.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.cluster import DBSCAN
 4 | from sklearn.datasets import make_blobs
 5 | 
 6 | from .plot_helpers import discrete_scatter, cm3
 7 | 
 8 | 
 9 | def plot_dbscan():
10 |     X, y = make_blobs(random_state=0, n_samples=12)
11 | 
12 |     dbscan = DBSCAN()
13 |     clusters = dbscan.fit_predict(X)
14 |     clusters
15 | 
16 |     fig, axes = plt.subplots(3, 4, figsize=(11, 8),
17 |                              subplot_kw={'xticks': (), 'yticks': ()})
18 |     # Plot clusters as red, green and blue, and outliers (-1) as white
19 |     colors = [cm3(1), cm3(0), cm3(2)]
20 |     markers = ['o', '^', 'v']
21 | 
22 |     # iterate over settings of min_samples and eps
23 |     for i, min_samples in enumerate([2, 3, 5]):
24 |         for j, eps in enumerate([1, 1.5, 2, 3]):
25 |             # instantiate DBSCAN with a particular setting
26 |             dbscan = DBSCAN(min_samples=min_samples, eps=eps)
27 |             # get cluster assignments
28 |             clusters = dbscan.fit_predict(X)
29 |             print("min_samples: %d eps: %f  cluster: %s"
30 |                   % (min_samples, eps, clusters))
31 |             if np.any(clusters == -1):
32 |                 c = ['w'] + colors
33 |                 m = ['o'] + markers
34 |             else:
35 |                 c = colors
36 |                 m = markers
37 |             discrete_scatter(X[:, 0], X[:, 1], clusters, ax=axes[i, j], c=c,
38 |                              s=8, markers=m)
39 |             inds = dbscan.core_sample_indices_
40 |             # vizualize core samples and clusters.
41 |             if len(inds):
42 |                 discrete_scatter(X[inds, 0], X[inds, 1], clusters[inds],
43 |                                  ax=axes[i, j], s=15, c=colors,
44 |                                  markers=markers)
45 |             axes[i, j].set_title("min_samples: %d eps: %.1f"
46 |                                  % (min_samples, eps))
47 |     fig.tight_layout()
48 | 


--------------------------------------------------------------------------------
/mglearn/plot_decomposition.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 3 | 
 4 | 
 5 | def plot_decomposition(people, pca):
 6 |     image_shape = people.images[0].shape
 7 |     plt.figure(figsize=(20, 3))
 8 |     ax = plt.gca()
 9 | 
10 |     imagebox = OffsetImage(people.images[0], zoom=1.5, cmap="gray")
11 |     ab = AnnotationBbox(imagebox, (.05, 0.4), pad=0.0, xycoords='data')
12 |     ax.add_artist(ab)
13 | 
14 |     for i in range(4):
15 |         imagebox = OffsetImage(pca.components_[i].reshape(image_shape), zoom=1.5, cmap="viridis")
16 | 
17 |         ab = AnnotationBbox(imagebox, (.3 + .2 * i, 0.4),
18 |                             pad=0.0,
19 |                             xycoords='data'
20 |                             )
21 |         ax.add_artist(ab)
22 |         if i == 0:
23 |             plt.text(.18, .25, 'x_%d *' % i, fontdict={'fontsize': 50})
24 |         else:
25 |             plt.text(.15 + .2 * i, .25, '+ x_%d *' % i, fontdict={'fontsize': 50})
26 | 
27 |     plt.text(.95, .25, '+ ...', fontdict={'fontsize': 50})
28 | 
29 |     plt.rc('text', usetex=True)
30 |     plt.text(.13, .3, r'\approx', fontdict={'fontsize': 50})
31 |     plt.axis("off")
32 | 


--------------------------------------------------------------------------------
/mglearn/plot_grid_search.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.svm import SVC
 4 | from sklearn.model_selection import GridSearchCV, train_test_split
 5 | from sklearn.datasets import load_iris
 6 | import pandas as pd
 7 | 
 8 | 
 9 | def plot_cross_val_selection():
10 |     iris = load_iris()
11 |     X_trainval, X_test, y_trainval, y_test = train_test_split(iris.data,
12 |                                                               iris.target,
13 |                                                               random_state=0)
14 | 
15 |     param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
16 |                   'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
17 |     grid_search = GridSearchCV(SVC(), param_grid, cv=5,
18 |                                return_train_score=True)
19 |     grid_search.fit(X_trainval, y_trainval)
20 |     results = pd.DataFrame(grid_search.cv_results_)[15:]
21 | 
22 |     best = np.argmax(results.mean_test_score.values)
23 |     plt.figure(figsize=(10, 3))
24 |     plt.xlim(-1, len(results))
25 |     plt.ylim(0, 1.1)
26 |     for i, (_, row) in enumerate(results.iterrows()):
27 |         scores = row[['split%d_test_score' % i for i in range(5)]]
28 |         marker_cv, = plt.plot([i] * 5, scores, '^', c='gray', markersize=5,
29 |                               alpha=.5)
30 |         marker_mean, = plt.plot(i, row.mean_test_score, 'v', c='none', alpha=1,
31 |                                 markersize=10, markeredgecolor='k')
32 |         if i == best:
33 |             marker_best, = plt.plot(i, row.mean_test_score, 'o', c='red',
34 |                                     fillstyle="none", alpha=1, markersize=20,
35 |                                     markeredgewidth=3)
36 |     plt.xticks(range(len(results)), [str(x).strip("{}").replace("'", "") for x
37 |                                      in results['params']],
38 |                rotation=90)
39 |     plt.ylabel("Validation accuracy")
40 |     plt.xlabel("Parameter settings")
41 |     plt.legend([marker_cv, marker_mean, marker_best],
42 |                ["cv accuracy", "mean accuracy", "best parameter setting"],
43 |                loc=(1.05, .4))
44 | 
45 | 
46 | def plot_grid_search_overview():
47 |     plt.figure(figsize=(10, 3), dpi=70)
48 |     axes = plt.gca()
49 |     axes.yaxis.set_visible(False)
50 |     axes.xaxis.set_visible(False)
51 |     axes.set_frame_on(False)
52 | 
53 |     def draw(ax, text, start, target=None):
54 |         if target is not None:
55 |             patchB = target.get_bbox_patch()
56 |             end = target.get_position()
57 |         else:
58 |             end = start
59 |             patchB = None
60 |         annotation = ax.annotate(text, end, start, xycoords='axes pixels',
61 |                                  textcoords='axes pixels', size=20,
62 |                                  arrowprops=dict(
63 |                                      arrowstyle="-|>", fc="w", ec="k",
64 |                                      patchB=patchB,
65 |                                      connectionstyle="arc3,rad=0.0"),
66 |                                  bbox=dict(boxstyle="round", fc="w"),
67 |                                  horizontalalignment="center",
68 |                                  verticalalignment="center")
69 |         plt.draw()
70 |         return annotation
71 | 
72 |     step = 100
73 |     grr = 400
74 | 
75 |     final_evaluation = draw(axes, "final evaluation", (5 * step, grr - 3 *
76 |                                                        step))
77 |     retrained_model = draw(axes, "retrained model", (3 * step, grr - 3 * step),
78 |                            final_evaluation)
79 |     best_parameters = draw(axes, "best parameters", (.5 * step, grr - 3 *
80 |                                                      step), retrained_model)
81 |     cross_validation = draw(axes, "cross-validation", (.5 * step, grr - 2 *
82 |                                                        step), best_parameters)
83 |     draw(axes, "parameter grid", (0.0, grr - 0), cross_validation)
84 |     training_data = draw(axes, "training data", (2 * step, grr - step),
85 |                          cross_validation)
86 |     draw(axes, "training data", (2 * step, grr - step), retrained_model)
87 |     test_data = draw(axes, "test data", (5 * step, grr - step),
88 |                      final_evaluation)
89 |     draw(axes, "data set", (3.5 * step, grr - 0.0), training_data)
90 |     draw(axes, "data set", (3.5 * step, grr - 0.0), test_data)
91 |     plt.ylim(0, 1)
92 |     plt.xlim(0, 1.5)
93 | 


--------------------------------------------------------------------------------
/mglearn/plot_helpers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib as mpl
  3 | import matplotlib.pyplot as plt
  4 | from matplotlib.colors import ListedColormap, colorConverter, LinearSegmentedColormap
  5 | 
  6 | 
  7 | cm_cycle = ListedColormap(['#0000aa', '#ff5050', '#50ff50', '#9040a0', '#fff000'])
  8 | cm3 = ListedColormap(['#0000aa', '#ff2020', '#50ff50'])
  9 | cm2 = ListedColormap(['#0000aa', '#ff2020'])
 10 | 
 11 | # create a smooth transition from the first to to the second color of cm3
 12 | # similar to RdBu but with our red and blue, also not going through white,
 13 | # which is really bad for greyscale
 14 | 
 15 | cdict = {'red': [(0.0, 0.0, cm2(0)[0]),
 16 |                  (1.0, cm2(1)[0], 1.0)],
 17 | 
 18 |          'green': [(0.0, 0.0, cm2(0)[1]),
 19 |                    (1.0, cm2(1)[1], 1.0)],
 20 | 
 21 |          'blue': [(0.0, 0.0, cm2(0)[2]),
 22 |                   (1.0, cm2(1)[2], 1.0)]}
 23 | 
 24 | ReBl = LinearSegmentedColormap("ReBl", cdict)
 25 | 
 26 | 
 27 | def discrete_scatter(x1, x2, y=None, markers=None, s=10, ax=None,
 28 |                      labels=None, padding=.2, alpha=1, c=None, markeredgewidth=None):
 29 |     """Adaption of matplotlib.pyplot.scatter to plot classes or clusters.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 | 
 34 |     x1 : nd-array
 35 |         input data, first axis
 36 | 
 37 |     x2 : nd-array
 38 |         input data, second axis
 39 | 
 40 |     y : nd-array
 41 |         input data, discrete labels
 42 | 
 43 |     cmap : colormap
 44 |         Colormap to use.
 45 | 
 46 |     markers : list of string
 47 |         List of markers to use, or None (which defaults to 'o').
 48 | 
 49 |     s : int or float
 50 |         Size of the marker
 51 | 
 52 |     padding : float
 53 |         Fraction of the dataset range to use for padding the axes.
 54 | 
 55 |     alpha : float
 56 |         Alpha value for all points.
 57 |     """
 58 |     if ax is None:
 59 |         ax = plt.gca()
 60 | 
 61 |     if y is None:
 62 |         y = np.zeros(len(x1))
 63 | 
 64 |     unique_y = np.unique(y)
 65 | 
 66 |     if markers is None:
 67 |         markers = ['o', '^', 'v', 'D', 's', '*', 'p', 'h', 'H', '8', '<', '>'] * 10
 68 | 
 69 |     if len(markers) == 1:
 70 |         markers = markers * len(unique_y)
 71 | 
 72 |     if labels is None:
 73 |         labels = unique_y
 74 | 
 75 |     # lines in the matplotlib sense, not actual lines
 76 |     lines = []
 77 | 
 78 |     current_cycler = mpl.rcParams['axes.prop_cycle']
 79 | 
 80 |     for i, (yy, cycle) in enumerate(zip(unique_y, current_cycler())):
 81 |         mask = y == yy
 82 |         # if c is none, use color cycle
 83 |         if c is None:
 84 |             color = cycle['color']
 85 |         elif len(c) > 1:
 86 |             color = c[i]
 87 |         else:
 88 |             color = c
 89 |         # use light edge for dark markers
 90 |         if np.mean(colorConverter.to_rgb(color)) < .4:
 91 |             markeredgecolor = "grey"
 92 |         else:
 93 |             markeredgecolor = "black"
 94 | 
 95 |         lines.append(ax.plot(x1[mask], x2[mask], markers[i], markersize=s,
 96 |                              label=labels[i], alpha=alpha, c=color,
 97 |                              markeredgewidth=markeredgewidth,
 98 |                              markeredgecolor=markeredgecolor)[0])
 99 | 
100 |     if padding != 0:
101 |         pad1 = x1.std() * padding
102 |         pad2 = x2.std() * padding
103 |         xlim = ax.get_xlim()
104 |         ylim = ax.get_ylim()
105 |         ax.set_xlim(min(x1.min() - pad1, xlim[0]), max(x1.max() + pad1, xlim[1]))
106 |         ax.set_ylim(min(x2.min() - pad2, ylim[0]), max(x2.max() + pad2, ylim[1]))
107 | 
108 |     return lines
109 | 


--------------------------------------------------------------------------------
/mglearn/plot_improper_preprocessing.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | 
 4 | def make_bracket(s, xy, textxy, width, ax):
 5 |     annotation = ax.annotate(
 6 |         s, xy, textxy, ha="center", va="center", size=20,
 7 |         arrowprops=dict(arrowstyle="-[", fc="w", ec="k",
 8 |                         lw=2,), bbox=dict(boxstyle="square", fc="w"))
 9 |     annotation.arrow_patch.get_arrowstyle().widthB = width
10 | 
11 | 
12 | def plot_improper_processing():
13 |     fig, axes = plt.subplots(2, 1, figsize=(15, 10))
14 | 
15 |     for axis in axes:
16 |         bars = axis.barh([0, 0, 0], [11.9, 2.9, 4.9], left=[0, 12, 15],
17 |                          color=['white', 'grey', 'grey'], hatch="//",
18 |                          align='edge', edgecolor='k')
19 |         bars[2].set_hatch(r"")
20 |         axis.set_yticks(())
21 |         axis.set_frame_on(False)
22 |         axis.set_ylim(-.1, 6)
23 |         axis.set_xlim(-0.1, 20.1)
24 |         axis.set_xticks(())
25 |         axis.tick_params(length=0, labeltop=True, labelbottom=False)
26 |         axis.text(6, -.3, "training folds",
27 |                   fontdict={'fontsize': 14}, horizontalalignment="center")
28 |         axis.text(13.5, -.3, "validation fold",
29 |                   fontdict={'fontsize': 14}, horizontalalignment="center")
30 |         axis.text(17.5, -.3, "test set",
31 |                   fontdict={'fontsize': 14}, horizontalalignment="center")
32 | 
33 |     make_bracket("scaler fit", (7.5, 1.3), (7.5, 2.), 15, axes[0])
34 |     make_bracket("SVC fit", (6, 3), (6, 4), 12, axes[0])
35 |     make_bracket("SVC predict", (13.4, 3), (13.4, 4), 2.5, axes[0])
36 | 
37 |     axes[0].set_title("Cross validation")
38 |     axes[1].set_title("Test set prediction")
39 | 
40 |     make_bracket("scaler fit", (7.5, 1.3), (7.5, 2.), 15, axes[1])
41 |     make_bracket("SVC fit", (7.5, 3), (7.5, 4), 15, axes[1])
42 |     make_bracket("SVC predict", (17.5, 3), (17.5, 4), 4.8, axes[1])
43 | 
44 | 
45 | def plot_proper_processing():
46 |     fig, axes = plt.subplots(2, 1, figsize=(15, 8))
47 | 
48 |     for axis in axes:
49 |         bars = axis.barh([0, 0, 0], [11.9, 2.9, 4.9],
50 |                          left=[0, 12, 15], color=['white', 'grey', 'grey'],
51 |                          hatch="//", align='edge', edgecolor='k')
52 |         bars[2].set_hatch(r"")
53 |         axis.set_yticks(())
54 |         axis.set_frame_on(False)
55 |         axis.set_ylim(-.1, 4.5)
56 |         axis.set_xlim(-0.1, 20.1)
57 |         axis.set_xticks(())
58 |         axis.tick_params(length=0, labeltop=True, labelbottom=False)
59 |         axis.text(6, -.3, "training folds", fontdict={'fontsize': 14},
60 |                   horizontalalignment="center")
61 |         axis.text(13.5, -.3, "validation fold", fontdict={'fontsize': 14},
62 |                   horizontalalignment="center")
63 |         axis.text(17.5, -.3, "test set", fontdict={'fontsize': 14},
64 |                   horizontalalignment="center")
65 | 
66 |     make_bracket("scaler fit", (6, 1.3), (6, 2.), 12, axes[0])
67 |     make_bracket("SVC fit", (6, 3), (6, 4), 12, axes[0])
68 |     make_bracket("SVC predict", (13.4, 3), (13.4, 4), 2.5, axes[0])
69 | 
70 |     axes[0].set_title("Cross validation")
71 |     axes[1].set_title("Test set prediction")
72 | 
73 |     make_bracket("scaler fit", (7.5, 1.3), (7.5, 2.), 15, axes[1])
74 |     make_bracket("SVC fit", (7.5, 3), (7.5, 4), 15, axes[1])
75 |     make_bracket("SVC predict", (17.5, 3), (17.5, 4), 4.8, axes[1])
76 |     fig.subplots_adjust(hspace=.3)
77 | 


--------------------------------------------------------------------------------
/mglearn/plot_interactive_tree.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.tree import DecisionTreeClassifier
 5 | 
 6 | from io import StringIO
 7 | from sklearn.tree import export_graphviz
 8 | from imageio import imread
 9 | from scipy import ndimage
10 | from sklearn.datasets import make_moons
11 | 
12 | import re
13 | 
14 | from .tools import discrete_scatter
15 | from .plot_helpers import cm2
16 | 
17 | 
18 | def tree_image(tree, fout=None):
19 |     try:
20 |         import graphviz
21 |     except ImportError:
22 |         # make a hacky white plot
23 |         x = np.ones((10, 10))
24 |         x[0, 0] = 0
25 |         return x
26 |     dot_data = StringIO()
27 |     export_graphviz(tree, out_file=dot_data, max_depth=3, impurity=False)
28 |     data = dot_data.getvalue()
29 |     data = re.sub(r"samples = [0-9]+\\n", "", data)
30 |     data = re.sub(r"\\nsamples = [0-9]+", "", data)
31 |     data = re.sub(r"value", "counts", data)
32 | 
33 |     graph = graphviz.Source(data, format="png")
34 |     if fout is None:
35 |         fout = "tmp"
36 |     graph.render(fout)
37 |     return imread(fout + ".png")
38 | 
39 | 
40 | def plot_tree_progressive():
41 |     X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
42 |     plt.figure()
43 |     ax = plt.gca()
44 |     discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
45 |     ax.set_xlabel("Feature 0")
46 |     ax.set_ylabel("Feature 1")
47 |     plt.legend(["Class 0", "Class 1"], loc='best')
48 | 
49 |     axes = []
50 |     for i in range(3):
51 |         fig, ax = plt.subplots(1, 2, figsize=(12, 4),
52 |                                subplot_kw={'xticks': (), 'yticks': ()})
53 |         axes.append(ax)
54 |     axes = np.array(axes)
55 | 
56 |     for i, max_depth in enumerate([1, 2, 9]):
57 |         tree = plot_tree(X, y, max_depth=max_depth, ax=axes[i, 0])
58 |         axes[i, 1].imshow(tree_image(tree))
59 |         axes[i, 1].set_axis_off()
60 | 
61 | 
62 | def plot_tree_partition(X, y, tree, ax=None):
63 |     if ax is None:
64 |         ax = plt.gca()
65 |     eps = X.std() / 2.
66 | 
67 |     x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
68 |     y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
69 |     xx = np.linspace(x_min, x_max, 1000)
70 |     yy = np.linspace(y_min, y_max, 1000)
71 | 
72 |     X1, X2 = np.meshgrid(xx, yy)
73 |     X_grid = np.c_[X1.ravel(), X2.ravel()]
74 | 
75 |     Z = tree.predict(X_grid)
76 |     Z = Z.reshape(X1.shape)
77 |     faces = tree.apply(X_grid)
78 |     faces = faces.reshape(X1.shape)
79 |     border = ndimage.laplace(faces) != 0
80 |     ax.contourf(X1, X2, Z, alpha=.4, cmap=cm2, levels=[0, .5, 1])
81 |     ax.scatter(X1[border], X2[border], marker='.', s=1)
82 | 
83 |     discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
84 |     ax.set_xlim(x_min, x_max)
85 |     ax.set_ylim(y_min, y_max)
86 |     ax.set_xticks(())
87 |     ax.set_yticks(())
88 |     return ax
89 | 
90 | 
91 | def plot_tree(X, y, max_depth=1, ax=None):
92 |     tree = DecisionTreeClassifier(max_depth=max_depth, random_state=0).fit(X, y)
93 |     ax = plot_tree_partition(X, y, tree, ax=ax)
94 |     ax.set_title("depth = %d" % max_depth)
95 |     return tree
96 | 


--------------------------------------------------------------------------------
/mglearn/plot_kmeans.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from sklearn.datasets import make_blobs
  4 | from sklearn.cluster import KMeans
  5 | from sklearn.metrics import pairwise_distances
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib as mpl
  8 | from cycler import cycler
  9 | 
 10 | from .tools import discrete_scatter
 11 | from .plot_2d_separator import plot_2d_classification
 12 | from .plot_helpers import cm3
 13 | 
 14 | 
 15 | def plot_kmeans_algorithm():
 16 | 
 17 |     X, y = make_blobs(random_state=1)
 18 |     # we don't want cyan in there
 19 |     with mpl.rc_context(rc={'axes.prop_cycle': cycler('color', ['#0000aa',
 20 |                                                                 '#ff2020',
 21 |                                                                 '#50ff50'])}):
 22 |         fig, axes = plt.subplots(3, 3, figsize=(10, 8), subplot_kw={'xticks': (), 'yticks': ()})
 23 |         axes = axes.ravel()
 24 |         axes[0].set_title("Input data")
 25 |         discrete_scatter(X[:, 0], X[:, 1], ax=axes[0], markers=['o'], c='w')
 26 | 
 27 |         axes[1].set_title("Initialization")
 28 |         init = X[:3, :]
 29 |         discrete_scatter(X[:, 0], X[:, 1], ax=axes[1], markers=['o'], c='w')
 30 |         discrete_scatter(init[:, 0], init[:, 1], [0, 1, 2], ax=axes[1],
 31 |                          markers=['^'], markeredgewidth=2)
 32 | 
 33 |         axes[2].set_title("Assign Points (1)")
 34 |         km = KMeans(n_clusters=3, init=init, max_iter=1, n_init=1).fit(X)
 35 |         centers = km.cluster_centers_
 36 |         # need to compute labels by hand. scikit-learn does two e-steps for max_iter=1
 37 |         # (and it's totally my fault)
 38 |         labels = np.argmin(pairwise_distances(init, X), axis=0)
 39 |         discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
 40 |                          ax=axes[2])
 41 |         discrete_scatter(init[:, 0], init[:, 1], [0, 1, 2],
 42 |                          ax=axes[2], markers=['^'], markeredgewidth=2)
 43 | 
 44 |         axes[3].set_title("Recompute Centers (1)")
 45 |         discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
 46 |                          ax=axes[3])
 47 |         discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
 48 |                          ax=axes[3], markers=['^'], markeredgewidth=2)
 49 | 
 50 |         axes[4].set_title("Reassign Points (2)")
 51 |         km = KMeans(n_clusters=3, init=init, max_iter=1, n_init=1).fit(X)
 52 |         labels = km.labels_
 53 |         discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
 54 |                          ax=axes[4])
 55 |         discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
 56 |                          ax=axes[4], markers=['^'], markeredgewidth=2)
 57 | 
 58 |         km = KMeans(n_clusters=3, init=init, max_iter=2, n_init=1).fit(X)
 59 |         axes[5].set_title("Recompute Centers (2)")
 60 |         centers = km.cluster_centers_
 61 |         discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
 62 |                          ax=axes[5])
 63 |         discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
 64 |                          ax=axes[5], markers=['^'], markeredgewidth=2)
 65 | 
 66 |         axes[6].set_title("Reassign Points (3)")
 67 |         labels = km.labels_
 68 |         discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
 69 |                          ax=axes[6])
 70 |         markers = discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
 71 |                                    ax=axes[6], markers=['^'],
 72 |                                    markeredgewidth=2)
 73 | 
 74 |         axes[7].set_title("Recompute Centers (3)")
 75 |         km = KMeans(n_clusters=3, init=init, max_iter=3, n_init=1).fit(X)
 76 |         centers = km.cluster_centers_
 77 |         discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
 78 |                          ax=axes[7])
 79 |         discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
 80 |                          ax=axes[7], markers=['^'], markeredgewidth=2)
 81 |         axes[8].set_axis_off()
 82 |         axes[8].legend(markers, ["Cluster 0", "Cluster 1", "Cluster 2"], loc='best')
 83 | 
 84 | 
 85 | def plot_kmeans_boundaries():
 86 |     X, y = make_blobs(random_state=1)
 87 |     init = X[:3, :]
 88 |     km = KMeans(n_clusters=3, init=init, max_iter=2, n_init=1).fit(X)
 89 |     discrete_scatter(X[:, 0], X[:, 1], km.labels_, markers=['o'])
 90 |     discrete_scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1],
 91 |                      [0, 1, 2], markers=['^'], markeredgewidth=2)
 92 |     plot_2d_classification(km, X, cm=cm3, alpha=.4)
 93 | 
 94 | 
 95 | def plot_kmeans_faces(km, pca, X_pca, X_people, y_people, target_names):
 96 |     n_clusters = 10
 97 |     image_shape = (87, 65)
 98 |     fig, axes = plt.subplots(n_clusters, 11, subplot_kw={'xticks': (), 'yticks': ()},
 99 |                              figsize=(10, 15), gridspec_kw={"hspace": .3})
100 | 
101 |     for cluster in range(n_clusters):
102 |         center = km.cluster_centers_[cluster]
103 |         mask = km.labels_ == cluster
104 |         dists = np.sum((X_pca - center) ** 2, axis=1)
105 |         dists[~mask] = np.inf
106 |         inds = np.argsort(dists)[:5]
107 |         dists[~mask] = -np.inf
108 |         inds = np.r_[inds, np.argsort(dists)[-5:]]
109 |         axes[cluster, 0].imshow(pca.inverse_transform(center).reshape(image_shape), vmin=0, vmax=1)
110 |         for image, label, asdf, ax in zip(X_people[inds], y_people[inds],
111 |                                           km.labels_[inds], axes[cluster, 1:]):
112 |             ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
113 |             ax.set_title("%s" % (target_names[label].split()[-1]), fontdict={'fontsize': 9})
114 | 
115 |     # add some boxes to illustrate which are similar and which dissimilar
116 |     rec = plt.Rectangle([-5, -30], 73, 1295, fill=False, lw=2)
117 |     rec = axes[0, 0].add_patch(rec)
118 |     rec.set_clip_on(False)
119 |     axes[0, 0].text(0, -40, "Center")
120 | 
121 |     rec = plt.Rectangle([-5, -30], 385, 1295, fill=False, lw=2)
122 |     rec = axes[0, 1].add_patch(rec)
123 |     rec.set_clip_on(False)
124 |     axes[0, 1].text(0, -40, "Close to center")
125 | 
126 |     rec = plt.Rectangle([-5, -30], 385, 1295, fill=False, lw=2)
127 |     rec = axes[0, 6].add_patch(rec)
128 |     rec.set_clip_on(False)
129 |     axes[0, 6].text(0, -40, "Far from center")
130 | 


--------------------------------------------------------------------------------
/mglearn/plot_kneighbors_regularization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.neighbors import KNeighborsRegressor
 5 | 
 6 | 
 7 | def plot_kneighbors_regularization():
 8 |     rnd = np.random.RandomState(42)
 9 |     x = np.linspace(-3, 3, 100)
10 |     y_no_noise = np.sin(4 * x) + x
11 |     y = y_no_noise + rnd.normal(size=len(x))
12 |     X = x[:, np.newaxis]
13 |     fig, axes = plt.subplots(1, 3, figsize=(15, 5))
14 | 
15 |     x_test = np.linspace(-3, 3, 1000)
16 | 
17 |     for n_neighbors, ax in zip([2, 5, 20], axes.ravel()):
18 |         kneighbor_regression = KNeighborsRegressor(n_neighbors=n_neighbors)
19 |         kneighbor_regression.fit(X, y)
20 |         ax.plot(x, y_no_noise, label="true function")
21 |         ax.plot(x, y, "o", label="data")
22 |         ax.plot(x_test, kneighbor_regression.predict(x_test[:, np.newaxis]),
23 |                 label="prediction")
24 |         ax.legend()
25 |         ax.set_title("n_neighbors = %d" % n_neighbors)


--------------------------------------------------------------------------------
/mglearn/plot_knn_classification.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.metrics import euclidean_distances
 5 | from sklearn.neighbors import KNeighborsClassifier
 6 | 
 7 | from .datasets import make_forge
 8 | from .plot_helpers import discrete_scatter
 9 | 
10 | 
11 | def plot_knn_classification(n_neighbors=1):
12 |     X, y = make_forge()
13 | 
14 |     X_test = np.array([[8.2, 3.66214339], [9.9, 3.2], [11.2, .5]])
15 |     dist = euclidean_distances(X, X_test)
16 |     closest = np.argsort(dist, axis=0)
17 | 
18 |     for x, neighbors in zip(X_test, closest.T):
19 |         for neighbor in neighbors[:n_neighbors]:
20 |             plt.arrow(x[0], x[1], X[neighbor, 0] - x[0],
21 |                       X[neighbor, 1] - x[1], head_width=0, fc='k', ec='k')
22 | 
23 |     clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y)
24 |     test_points = discrete_scatter(X_test[:, 0], X_test[:, 1], clf.predict(X_test), markers="*")
25 |     training_points = discrete_scatter(X[:, 0], X[:, 1], y)
26 |     plt.legend(training_points + test_points, ["training class 0", "training class 1",
27 |                                                "test pred 0", "test pred 1"])
28 | 


--------------------------------------------------------------------------------
/mglearn/plot_knn_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.neighbors import KNeighborsRegressor
 5 | from sklearn.metrics import euclidean_distances
 6 | 
 7 | from .datasets import make_wave
 8 | from .plot_helpers import cm3
 9 | 
10 | 
11 | def plot_knn_regression(n_neighbors=1):
12 |     X, y = make_wave(n_samples=40)
13 |     X_test = np.array([[-1.5], [0.9], [1.5]])
14 | 
15 |     dist = euclidean_distances(X, X_test)
16 |     closest = np.argsort(dist, axis=0)
17 | 
18 |     plt.figure(figsize=(10, 6))
19 | 
20 |     reg = KNeighborsRegressor(n_neighbors=n_neighbors).fit(X, y)
21 |     y_pred = reg.predict(X_test)
22 | 
23 |     for x, y_, neighbors in zip(X_test, y_pred, closest.T):
24 |         for neighbor in neighbors[:n_neighbors]:
25 |                 plt.arrow(x[0], y_, X[neighbor, 0] - x[0], y[neighbor] - y_,
26 |                           head_width=0, fc='k', ec='k')
27 | 
28 |     train, = plt.plot(X, y, 'o', c=cm3(0))
29 |     test, = plt.plot(X_test, -3 * np.ones(len(X_test)), '*', c=cm3(2),
30 |                      markersize=20)
31 |     pred, = plt.plot(X_test, y_pred, '*', c=cm3(0), markersize=20)
32 |     plt.vlines(X_test, -3.1, 3.1, linestyle="--")
33 |     plt.legend([train, test, pred],
34 |                ["training data/target", "test data", "test prediction"],
35 |                ncol=3, loc=(.1, 1.025))
36 |     plt.ylim(-3.1, 3.1)
37 |     plt.xlabel("Feature")
38 |     plt.ylabel("Target")
39 | 


--------------------------------------------------------------------------------
/mglearn/plot_linear_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.linear_model import LinearRegression
 5 | from sklearn.model_selection import train_test_split
 6 | from .datasets import make_wave
 7 | from .plot_helpers import cm2
 8 | 
 9 | 
10 | def plot_linear_regression_wave():
11 |     X, y = make_wave(n_samples=60)
12 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
13 | 
14 |     line = np.linspace(-3, 3, 100).reshape(-1, 1)
15 | 
16 |     lr = LinearRegression().fit(X_train, y_train)
17 |     print("w[0]: %f  b: %f" % (lr.coef_[0], lr.intercept_))
18 | 
19 |     plt.figure(figsize=(8, 8))
20 |     plt.plot(line, lr.predict(line))
21 |     plt.plot(X, y, 'o', c=cm2(0))
22 |     ax = plt.gca()
23 |     ax.spines['left'].set_position('center')
24 |     ax.spines['right'].set_color('none')
25 |     ax.spines['bottom'].set_position('center')
26 |     ax.spines['top'].set_color('none')
27 |     ax.set_ylim(-3, 3)
28 |     #ax.set_xlabel("Feature")
29 |     #ax.set_ylabel("Target")
30 |     ax.legend(["model", "training data"], loc="best")
31 |     ax.grid(True)
32 |     ax.set_aspect('equal')
33 | 


--------------------------------------------------------------------------------
/mglearn/plot_linear_svc_regularization.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from sklearn.svm import LinearSVC
 4 | from sklearn.datasets import make_blobs
 5 | 
 6 | from .plot_helpers import discrete_scatter
 7 | 
 8 | 
 9 | def plot_linear_svc_regularization():
10 |     X, y = make_blobs(centers=2, random_state=4, n_samples=30)
11 |     fig, axes = plt.subplots(1, 3, figsize=(12, 4))
12 | 
13 |     # a carefully hand-designed dataset lol
14 |     y[7] = 0
15 |     y[27] = 0
16 |     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
17 |     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
18 | 
19 |     for ax, C in zip(axes, [1e-2, 10, 1e3]):
20 |         discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
21 | 
22 |         svm = LinearSVC(C=C, tol=0.00001, dual=False).fit(X, y)
23 |         w = svm.coef_[0]
24 |         a = -w[0] / w[1]
25 |         xx = np.linspace(6, 13)
26 |         yy = a * xx - (svm.intercept_[0]) / w[1]
27 |         ax.plot(xx, yy, c='k')
28 |         ax.set_xlim(x_min, x_max)
29 |         ax.set_ylim(y_min, y_max)
30 |         ax.set_xticks(())
31 |         ax.set_yticks(())
32 |         ax.set_title("C = %f" % C)
33 |     axes[0].legend(loc="best")
34 | 


--------------------------------------------------------------------------------
/mglearn/plot_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from .tools import plot_2d_separator, plot_2d_scores, cm, discrete_scatter
 5 | from .plot_helpers import ReBl
 6 | 
 7 | 
 8 | def plot_confusion_matrix_illustration():
 9 |     plt.figure(figsize=(8, 8))
10 |     confusion = np.array([[401, 2], [8, 39]])
11 |     plt.text(0.40, .7, confusion[0, 0], size=70, horizontalalignment='right')
12 |     plt.text(0.40, .2, confusion[1, 0], size=70, horizontalalignment='right')
13 |     plt.text(.90, .7, confusion[0, 1], size=70, horizontalalignment='right')
14 |     plt.text(.90, 0.2, confusion[1, 1], size=70, horizontalalignment='right')
15 |     plt.xticks([.25, .75], ["predicted 'not nine'", "predicted 'nine'"], size=20)
16 |     plt.yticks([.25, .75], ["true 'nine'", "true 'not nine'"], size=20)
17 |     plt.plot([.5, .5], [0, 1], '--', c='k')
18 |     plt.plot([0, 1], [.5, .5], '--', c='k')
19 | 
20 |     plt.xlim(0, 1)
21 |     plt.ylim(0, 1)
22 | 
23 | 
24 | def plot_binary_confusion_matrix():
25 |     plt.text(0.45, .6, "TN", size=100, horizontalalignment='right')
26 |     plt.text(0.45, .1, "FN", size=100, horizontalalignment='right')
27 |     plt.text(.95, .6, "FP", size=100, horizontalalignment='right')
28 |     plt.text(.95, 0.1, "TP", size=100, horizontalalignment='right')
29 |     plt.xticks([.25, .75], ["predicted negative", "predicted positive"], size=15)
30 |     plt.yticks([.25, .75], ["positive class", "negative class"], size=15)
31 |     plt.plot([.5, .5], [0, 1], '--', c='k')
32 |     plt.plot([0, 1], [.5, .5], '--', c='k')
33 | 
34 |     plt.xlim(0, 1)
35 |     plt.ylim(0, 1)
36 | 
37 | 
38 | def plot_decision_threshold():
39 |     from sklearn.datasets import make_blobs
40 |     from sklearn.svm import SVC
41 |     from sklearn.model_selection import train_test_split
42 | 
43 |     X, y = make_blobs(n_samples=(400, 50), cluster_std=[7.0, 2],
44 |                       random_state=22)
45 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
46 | 
47 |     fig, axes = plt.subplots(2, 3, figsize=(15, 8), subplot_kw={'xticks': (), 'yticks': ()})
48 |     plt.suptitle("decision_threshold")
49 |     axes[0, 0].set_title("training data")
50 |     discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=axes[0, 0])
51 | 
52 |     svc = SVC(gamma=.05).fit(X_train, y_train)
53 |     axes[0, 1].set_title("decision with threshold 0")
54 |     discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=axes[0, 1])
55 |     plot_2d_scores(svc, X_train, function="decision_function", alpha=.7,
56 |                    ax=axes[0, 1], cm=ReBl)
57 |     plot_2d_separator(svc, X_train, linewidth=3, ax=axes[0, 1])
58 |     axes[0, 2].set_title("decision with threshold -0.8")
59 |     discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=axes[0, 2])
60 |     plot_2d_separator(svc, X_train, linewidth=3, ax=axes[0, 2], threshold=-.8)
61 |     plot_2d_scores(svc, X_train, function="decision_function", alpha=.7,
62 |                    ax=axes[0, 2], cm=ReBl)
63 | 
64 |     axes[1, 0].set_axis_off()
65 | 
66 |     mask = np.abs(X_train[:, 1] - 7) < 5
67 |     bla = np.sum(mask)
68 | 
69 |     line = np.linspace(X_train.min(), X_train.max(), 100)
70 |     axes[1, 1].set_title("Cross-section with threshold 0")
71 |     axes[1, 1].plot(line, svc.decision_function(np.c_[line, 10 * np.ones(100)]), c='k')
72 |     dec = svc.decision_function(np.c_[line, 10 * np.ones(100)])
73 |     contour = (dec > 0).reshape(1, -1).repeat(10, axis=0)
74 |     axes[1, 1].contourf(line, np.linspace(-1.5, 1.5, 10), contour, alpha=0.4, cmap=cm)
75 |     discrete_scatter(X_train[mask, 0], np.zeros(bla), y_train[mask], ax=axes[1, 1])
76 |     axes[1, 1].set_xlim(X_train.min(), X_train.max())
77 |     axes[1, 1].set_ylim(-1.5, 1.5)
78 |     axes[1, 1].set_xticks(())
79 |     axes[1, 1].set_ylabel("Decision value")
80 | 
81 |     contour2 = (dec > -.8).reshape(1, -1).repeat(10, axis=0)
82 |     axes[1, 2].set_title("Cross-section with threshold -0.8")
83 |     axes[1, 2].contourf(line, np.linspace(-1.5, 1.5, 10), contour2, alpha=0.4, cmap=cm)
84 |     discrete_scatter(X_train[mask, 0], np.zeros(bla), y_train[mask], alpha=.1, ax=axes[1, 2])
85 |     axes[1, 2].plot(line, svc.decision_function(np.c_[line, 10 * np.ones(100)]), c='k')
86 |     axes[1, 2].set_xlim(X_train.min(), X_train.max())
87 |     axes[1, 2].set_ylim(-1.5, 1.5)
88 |     axes[1, 2].set_xticks(())
89 |     axes[1, 2].set_ylabel("Decision value")
90 |     axes[1, 0].legend(['negative class', 'positive class'])
91 | 


--------------------------------------------------------------------------------
/mglearn/plot_nmf.py:
--------------------------------------------------------------------------------
 1 | from sklearn.decomposition import NMF
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | from joblib import Memory
 6 | 
 7 | try:
 8 |     memory = Memory(cachedir="cache")
 9 | except TypeError:
10 |     # joblib.Memory changed its API in 0.12
11 |     memory = Memory(location="cache", verbose=0)
12 | 
13 | 
14 | def plot_nmf_illustration():
15 |     rnd = np.random.RandomState(5)
16 |     X_ = rnd.normal(size=(300, 2))
17 |     # Add 8 to make sure every point lies in the positive part of the space
18 |     X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2) + 8
19 | 
20 |     nmf = NMF(random_state=0)
21 |     nmf.fit(X_blob)
22 |     X_nmf = nmf.transform(X_blob)
23 | 
24 |     fig, axes = plt.subplots(1, 2, figsize=(15, 5))
25 | 
26 |     axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_nmf[:, 0], linewidths=0,
27 |                     s=60, cmap='viridis')
28 |     axes[0].set_xlabel("feature 1")
29 |     axes[0].set_ylabel("feature 2")
30 |     axes[0].set_xlim(0, 12)
31 |     axes[0].set_ylim(0, 12)
32 |     axes[0].arrow(0, 0, nmf.components_[0, 0], nmf.components_[0, 1], width=.1,
33 |                   head_width=.3, color='k')
34 |     axes[0].arrow(0, 0, nmf.components_[1, 0], nmf.components_[1, 1], width=.1,
35 |                   head_width=.3, color='k')
36 |     axes[0].set_aspect('equal')
37 |     axes[0].set_title("NMF with two components")
38 | 
39 |     # second plot
40 |     nmf = NMF(random_state=0, n_components=1)
41 |     nmf.fit(X_blob)
42 | 
43 |     axes[1].scatter(X_blob[:, 0], X_blob[:, 1], c=X_nmf[:, 0], linewidths=0,
44 |                     s=60, cmap='viridis')
45 |     axes[1].set_xlabel("feature 1")
46 |     axes[1].set_ylabel("feature 2")
47 |     axes[1].set_xlim(0, 12)
48 |     axes[1].set_ylim(0, 12)
49 |     axes[1].arrow(0, 0, nmf.components_[0, 0], nmf.components_[0, 1], width=.1,
50 |                   head_width=.3, color='k')
51 | 
52 |     axes[1].set_aspect('equal')
53 |     axes[1].set_title("NMF with one component")
54 | 
55 | 
56 | @memory.cache
57 | def nmf_faces(X_train, X_test):
58 |     # Build NMF models with 10, 50, 100 and 500 components
59 |     # this list will hold the back-transformd test-data
60 |     reduced_images = []
61 |     for n_components in [10, 50, 100, 500]:
62 |         # build the NMF model
63 |         nmf = NMF(n_components=n_components, random_state=0)
64 |         nmf.fit(X_train)
65 |         # transform the test data (afterwards has n_components many dimensions)
66 |         X_test_nmf = nmf.transform(X_test)
67 |         # back-transform the transformed test-data
68 |         # (afterwards it's in the original space again)
69 |         X_test_back = np.dot(X_test_nmf, nmf.components_)
70 |         reduced_images.append(X_test_back)
71 |     return reduced_images
72 | 
73 | 
74 | def plot_nmf_faces(X_train, X_test, image_shape):
75 |     reduced_images = nmf_faces(X_train, X_test)
76 | 
77 |     # plot the first three images in the test set:
78 |     fix, axes = plt.subplots(3, 5, figsize=(15, 12),
79 |                              subplot_kw={'xticks': (), 'yticks': ()})
80 |     for i, ax in enumerate(axes):
81 |         # plot original image
82 |         ax[0].imshow(X_test[i].reshape(image_shape),
83 |                      vmin=0, vmax=1)
84 |         # plot the four back-transformed images
85 |         for a, X_test_back in zip(ax[1:], reduced_images):
86 |             a.imshow(X_test_back[i].reshape(image_shape), vmin=0, vmax=1)
87 | 
88 |     # label the top row
89 |     axes[0, 0].set_title("original image")
90 |     for ax, n_components in zip(axes[0, 1:], [10, 50, 100, 500]):
91 |         ax.set_title("%d components" % n_components)
92 | 


--------------------------------------------------------------------------------
/mglearn/plot_nn_graphs.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | def plot_logistic_regression_graph():
  4 |     import graphviz
  5 |     lr_graph = graphviz.Digraph(node_attr={'shape': 'circle', 'fixedsize': 'True'},
  6 |                                 graph_attr={'rankdir': 'LR', 'splines': 'line'})
  7 |     inputs = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_0")
  8 |     output = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_2")
  9 | 
 10 |     for i in range(4):
 11 |         inputs.node("x[%d]" % i, labelloc="c")
 12 |     inputs.body.append('label = "inputs"')
 13 |     inputs.body.append('color = "white"')
 14 | 
 15 |     lr_graph.subgraph(inputs)
 16 | 
 17 |     output.body.append('label = "output"')
 18 |     output.body.append('color = "white"')
 19 |     output.node("y")
 20 | 
 21 |     lr_graph.subgraph(output)
 22 | 
 23 |     for i in range(4):
 24 |         lr_graph.edge("x[%d]" % i, "y", label="w[%d]" % i)
 25 |     return lr_graph
 26 | 
 27 | 
 28 | def plot_single_hidden_layer_graph():
 29 |     import graphviz
 30 |     nn_graph = graphviz.Digraph(node_attr={'shape': 'circle', 'fixedsize': 'True'},
 31 |                                 graph_attr={'rankdir': 'LR', 'splines': 'line'})
 32 | 
 33 |     inputs = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_0")
 34 |     hidden = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_1")
 35 |     output = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_2")
 36 | 
 37 |     for i in range(4):
 38 |         inputs.node("x[%d]" % i)
 39 | 
 40 |     inputs.body.append('label = "inputs"')
 41 |     inputs.body.append('color = "white"')
 42 | 
 43 |     hidden.body.append('label = "hidden layer"')
 44 |     hidden.body.append('color = "white"')
 45 | 
 46 |     for i in range(3):
 47 |         hidden.node("h%d" % i, label="h[%d]" % i)
 48 | 
 49 |     output.node("y")
 50 |     output.body.append('label = "output"')
 51 |     output.body.append('color = "white"')
 52 | 
 53 |     nn_graph.subgraph(inputs)
 54 |     nn_graph.subgraph(hidden)
 55 |     nn_graph.subgraph(output)
 56 | 
 57 |     for i in range(4):
 58 |         for j in range(3):
 59 |             nn_graph.edge("x[%d]" % i, "h%d" % j)
 60 | 
 61 |     for i in range(3):
 62 |         nn_graph.edge("h%d" % i, "y")
 63 |     return nn_graph
 64 | 
 65 | 
 66 | def plot_two_hidden_layer_graph():
 67 |     import graphviz
 68 |     nn_graph = graphviz.Digraph(node_attr={'shape': 'circle', 'fixedsize': 'True'},
 69 |                                 graph_attr={'rankdir': 'LR', 'splines': 'line'})
 70 | 
 71 |     inputs = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_0")
 72 |     hidden = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_1")
 73 |     hidden2 = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_2")
 74 | 
 75 |     output = graphviz.Digraph(node_attr={'shape': 'circle'}, name="cluster_3")
 76 | 
 77 |     for i in range(4):
 78 |         inputs.node("x[%d]" % i)
 79 | 
 80 |     inputs.body.append('label = "inputs"')
 81 |     inputs.body.append('color = "white"')
 82 | 
 83 |     for i in range(3):
 84 |         hidden.node("h1[%d]" % i)
 85 | 
 86 |     for i in range(3):
 87 |         hidden2.node("h2[%d]" % i)
 88 | 
 89 |     hidden.body.append('label = "hidden layer 1"')
 90 |     hidden.body.append('color = "white"')
 91 | 
 92 |     hidden2.body.append('label = "hidden layer 2"')
 93 |     hidden2.body.append('color = "white"')
 94 | 
 95 |     output.node("y")
 96 |     output.body.append('label = "output"')
 97 |     output.body.append('color = "white"')
 98 | 
 99 |     nn_graph.subgraph(inputs)
100 |     nn_graph.subgraph(hidden)
101 |     nn_graph.subgraph(hidden2)
102 | 
103 |     nn_graph.subgraph(output)
104 | 
105 |     for i in range(4):
106 |         for j in range(3):
107 |             nn_graph.edge("x[%d]" % i, "h1[%d]" % j, label="")
108 | 
109 |     for i in range(3):
110 |         for j in range(3):
111 |             nn_graph.edge("h1[%d]" % i, "h2[%d]" % j, label="")
112 | 
113 |     for i in range(3):
114 |         nn_graph.edge("h2[%d]" % i, "y", label="")
115 | 
116 |     return nn_graph
117 | 


--------------------------------------------------------------------------------
/mglearn/plot_pca.py:
--------------------------------------------------------------------------------
  1 | from sklearn.decomposition import PCA
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | 
  5 | from joblib import Memory
  6 | 
  7 | try:
  8 |     memory = Memory(cachedir="cache")
  9 | except TypeError:
 10 |     # joblib.Memory changed its API in 0.12
 11 |     memory = Memory(location="cache", verbose=0)
 12 | 
 13 | def plot_pca_illustration():
 14 |     rnd = np.random.RandomState(5)
 15 |     X_ = rnd.normal(size=(300, 2))
 16 |     X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)
 17 | 
 18 |     pca = PCA()
 19 |     pca.fit(X_blob)
 20 |     X_pca = pca.transform(X_blob)
 21 | 
 22 |     S = X_pca.std(axis=0)
 23 | 
 24 |     fig, axes = plt.subplots(2, 2, figsize=(10, 10))
 25 |     axes = axes.ravel()
 26 | 
 27 |     axes[0].set_title("Original data")
 28 |     axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0,
 29 |                     s=60, cmap='viridis')
 30 |     axes[0].set_xlabel("feature 1")
 31 |     axes[0].set_ylabel("feature 2")
 32 |     axes[0].arrow(pca.mean_[0], pca.mean_[1], S[0] * pca.components_[0, 0],
 33 |                   S[0] * pca.components_[0, 1], width=.1, head_width=.3,
 34 |                   color='k')
 35 |     axes[0].arrow(pca.mean_[0], pca.mean_[1], S[1] * pca.components_[1, 0],
 36 |                   S[1] * pca.components_[1, 1], width=.1, head_width=.3,
 37 |                   color='k')
 38 |     axes[0].text(-1.5, -.5, "Component 2", size=14)
 39 |     axes[0].text(-4, -4, "Component 1", size=14)
 40 |     axes[0].set_aspect('equal')
 41 | 
 42 |     axes[1].set_title("Transformed data")
 43 |     axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0,
 44 |                     s=60, cmap='viridis')
 45 |     axes[1].set_xlabel("First principal component")
 46 |     axes[1].set_ylabel("Second principal component")
 47 |     axes[1].set_aspect('equal')
 48 |     axes[1].set_ylim(-8, 8)
 49 | 
 50 |     pca = PCA(n_components=1)
 51 |     pca.fit(X_blob)
 52 |     X_inverse = pca.inverse_transform(pca.transform(X_blob))
 53 | 
 54 |     axes[2].set_title("Transformed data w/ second component dropped")
 55 |     axes[2].scatter(X_pca[:, 0], np.zeros(X_pca.shape[0]), c=X_pca[:, 0],
 56 |                     linewidths=0, s=60, cmap='viridis')
 57 |     axes[2].set_xlabel("First principal component")
 58 |     axes[2].set_aspect('equal')
 59 |     axes[2].set_ylim(-8, 8)
 60 | 
 61 |     axes[3].set_title("Back-rotation using only first component")
 62 |     axes[3].scatter(X_inverse[:, 0], X_inverse[:, 1], c=X_pca[:, 0],
 63 |                     linewidths=0, s=60, cmap='viridis')
 64 |     axes[3].set_xlabel("feature 1")
 65 |     axes[3].set_ylabel("feature 2")
 66 |     axes[3].set_aspect('equal')
 67 |     axes[3].set_xlim(-8, 4)
 68 |     axes[3].set_ylim(-8, 4)
 69 | 
 70 | 
 71 | def plot_pca_whitening():
 72 |     rnd = np.random.RandomState(5)
 73 |     X_ = rnd.normal(size=(300, 2))
 74 |     X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)
 75 | 
 76 |     pca = PCA(whiten=True)
 77 |     pca.fit(X_blob)
 78 |     X_pca = pca.transform(X_blob)
 79 | 
 80 |     fig, axes = plt.subplots(1, 2, figsize=(10, 10))
 81 |     axes = axes.ravel()
 82 | 
 83 |     axes[0].set_title("Original data")
 84 |     axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')
 85 |     axes[0].set_xlabel("feature 1")
 86 |     axes[0].set_ylabel("feature 2")
 87 |     axes[0].set_aspect('equal')
 88 | 
 89 |     axes[1].set_title("Whitened data")
 90 |     axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')
 91 |     axes[1].set_xlabel("First principal component")
 92 |     axes[1].set_ylabel("Second principal component")
 93 |     axes[1].set_aspect('equal')
 94 |     axes[1].set_xlim(-3, 4)
 95 | 
 96 | 
 97 | @memory.cache
 98 | def pca_faces(X_train, X_test):
 99 |     # copy and pasted from nmf. refactor?
100 |     # Build NMF models with 10, 50, 100, 500 components
101 |     # this list will hold the back-transformd test-data
102 |     reduced_images = []
103 |     for n_components in [10, 50, 100, 500]:
104 |         # build the NMF model
105 |         pca = PCA(n_components=n_components)
106 |         pca.fit(X_train)
107 |         # transform the test data (afterwards has n_components many dimensions)
108 |         X_test_pca = pca.transform(X_test)
109 |         # back-transform the transformed test-data
110 |         # (afterwards it's in the original space again)
111 |         X_test_back = pca.inverse_transform(X_test_pca)
112 |         reduced_images.append(X_test_back)
113 |     return reduced_images
114 | 
115 | 
116 | def plot_pca_faces(X_train, X_test, image_shape):
117 |     reduced_images = pca_faces(X_train, X_test)
118 | 
119 |     # plot the first three images in the test set:
120 |     fix, axes = plt.subplots(3, 5, figsize=(15, 12),
121 |                              subplot_kw={'xticks': (), 'yticks': ()})
122 |     for i, ax in enumerate(axes):
123 |         # plot original image
124 |         ax[0].imshow(X_test[i].reshape(image_shape),
125 |                      vmin=0, vmax=1)
126 |         # plot the four back-transformed images
127 |         for a, X_test_back in zip(ax[1:], reduced_images):
128 |             a.imshow(X_test_back[i].reshape(image_shape), vmin=0, vmax=1)
129 | 
130 |     # label the top row
131 |     axes[0, 0].set_title("original image")
132 |     for ax, n_components in zip(axes[0, 1:], [10, 50, 100, 500]):
133 |         ax.set_title("%d components" % n_components)
134 | 


--------------------------------------------------------------------------------
/mglearn/plot_rbf_svm_parameters.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from sklearn.svm import SVC
 3 | from .plot_2d_separator import plot_2d_separator
 4 | from .tools import make_handcrafted_dataset
 5 | from .plot_helpers import discrete_scatter
 6 | 
 7 | 
 8 | def plot_svm(log_C, log_gamma, ax=None):
 9 |     X, y = make_handcrafted_dataset()
10 |     C = 10. ** log_C
11 |     gamma = 10. ** log_gamma
12 |     svm = SVC(kernel='rbf', C=C, gamma=gamma).fit(X, y)
13 |     if ax is None:
14 |         ax = plt.gca()
15 |     plot_2d_separator(svm, X, ax=ax, eps=.5)
16 |     # plot data
17 |     discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
18 |     # plot support vectors
19 |     sv = svm.support_vectors_
20 |     # class labels of support vectors are given by the sign of the dual coefficients
21 |     sv_labels = svm.dual_coef_.ravel() > 0
22 |     discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3, ax=ax)
23 |     ax.set_title("C = %.4f gamma = %.4f" % (C, gamma))
24 | 
25 | 
26 | def plot_svm_interactive():
27 |     from IPython.html.widgets import interactive, FloatSlider
28 |     C_slider = FloatSlider(min=-3, max=3, step=.1, value=0, readout=False)
29 |     gamma_slider = FloatSlider(min=-2, max=2, step=.1, value=0, readout=False)
30 |     return interactive(plot_svm, log_C=C_slider, log_gamma=gamma_slider)
31 | 


--------------------------------------------------------------------------------
/mglearn/plot_ridge.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from sklearn.linear_model import Ridge, LinearRegression
 5 | from sklearn.model_selection import learning_curve, KFold
 6 | 
 7 | from .datasets import load_extended_boston
 8 | 
 9 | 
10 | def plot_learning_curve(est, X, y):
11 |     training_set_size, train_scores, test_scores = learning_curve(
12 |         est, X, y, train_sizes=np.linspace(.1, 1, 20), cv=KFold(20, shuffle=True, random_state=1))
13 |     estimator_name = est.__class__.__name__
14 |     line = plt.plot(training_set_size, train_scores.mean(axis=1), '--',
15 |                     label="training " + estimator_name)
16 |     plt.plot(training_set_size, test_scores.mean(axis=1), '-',
17 |              label="test " + estimator_name, c=line[0].get_color())
18 |     plt.xlabel('Training set size')
19 |     plt.ylabel('Score (R^2)')
20 |     plt.ylim(0, 1.1)
21 | 
22 | 
23 | def plot_ridge_n_samples():
24 |     X, y = load_extended_boston()
25 | 
26 |     plot_learning_curve(Ridge(alpha=1), X, y)
27 |     plot_learning_curve(LinearRegression(), X, y)
28 |     plt.legend(loc=(0, 1.05), ncol=2, fontsize=11)
29 | 


--------------------------------------------------------------------------------
/mglearn/plot_scaling.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from sklearn.datasets import make_blobs
 4 | from sklearn.preprocessing import (StandardScaler, MinMaxScaler, Normalizer,
 5 |                                    RobustScaler)
 6 | from .plot_helpers import cm2
 7 | 
 8 | 
 9 | def plot_scaling():
10 |     X, y = make_blobs(n_samples=50, centers=2, random_state=4, cluster_std=1)
11 |     X += 3
12 | 
13 |     plt.figure(figsize=(15, 8))
14 |     main_ax = plt.subplot2grid((2, 4), (0, 0), rowspan=2, colspan=2)
15 | 
16 |     main_ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm2, s=60)
17 |     maxx = np.abs(X[:, 0]).max()
18 |     maxy = np.abs(X[:, 1]).max()
19 | 
20 |     main_ax.set_xlim(-maxx + 1, maxx + 1)
21 |     main_ax.set_ylim(-maxy + 1, maxy + 1)
22 |     main_ax.set_title("Original Data")
23 |     other_axes = [plt.subplot2grid((2, 4), (i, j))
24 |                   for j in range(2, 4) for i in range(2)]
25 | 
26 |     for ax, scaler in zip(other_axes, [StandardScaler(), RobustScaler(),
27 |                                        MinMaxScaler(), Normalizer(norm='l2')]):
28 |         X_ = scaler.fit_transform(X)
29 |         ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=cm2, s=60)
30 |         ax.set_xlim(-2, 2)
31 |         ax.set_ylim(-2, 2)
32 |         ax.set_title(type(scaler).__name__)
33 | 
34 |     other_axes.append(main_ax)
35 | 
36 |     for ax in other_axes:
37 |         ax.spines['left'].set_position('center')
38 |         ax.spines['right'].set_color('none')
39 |         ax.spines['bottom'].set_position('center')
40 |         ax.spines['top'].set_color('none')
41 |         ax.xaxis.set_ticks_position('bottom')
42 |         ax.yaxis.set_ticks_position('left')
43 | 


--------------------------------------------------------------------------------
/mglearn/plot_tree_nonmonotonous.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from sklearn.datasets import make_blobs
 3 | from sklearn.tree import DecisionTreeClassifier, export_graphviz
 4 | from .tools import discrete_scatter
 5 | from .plot_2d_separator import plot_2d_separator
 6 | 
 7 | 
 8 | def plot_tree_not_monotone():
 9 |     import graphviz
10 |     # make a simple 2d dataset
11 |     X, y = make_blobs(centers=4, random_state=8)
12 |     y = y % 2
13 |     plt.figure()
14 |     discrete_scatter(X[:, 0], X[:, 1], y)
15 |     plt.legend(["Class 0", "Class 1"], loc="best")
16 | 
17 |     # learn a decision tree model
18 |     tree = DecisionTreeClassifier(random_state=0).fit(X, y)
19 |     plot_2d_separator(tree, X, linestyle="dashed")
20 | 
21 |     # visualize the tree
22 |     export_graphviz(tree, out_file="mytree.dot", impurity=False, filled=True)
23 |     with open("mytree.dot") as f:
24 |         dot_graph = f.read()
25 |     print("Feature importances: %s" % tree.feature_importances_)
26 |     return graphviz.Source(dot_graph)
27 | 


--------------------------------------------------------------------------------
/mglearn/plots.py:
--------------------------------------------------------------------------------
 1 | from .plot_linear_svc_regularization import plot_linear_svc_regularization
 2 | from .plot_interactive_tree import plot_tree_progressive, plot_tree_partition
 3 | from .plot_animal_tree import plot_animal_tree
 4 | from .plot_rbf_svm_parameters import plot_svm
 5 | from .plot_knn_regression import plot_knn_regression
 6 | from .plot_knn_classification import plot_knn_classification
 7 | from .plot_2d_separator import plot_2d_classification, plot_2d_separator
 8 | from .plot_nn_graphs import (plot_logistic_regression_graph,
 9 |                              plot_single_hidden_layer_graph,
10 |                              plot_two_hidden_layer_graph)
11 | from .plot_linear_regression import plot_linear_regression_wave
12 | from .plot_tree_nonmonotonous import plot_tree_not_monotone
13 | from .plot_scaling import plot_scaling
14 | from .plot_pca import plot_pca_illustration, plot_pca_whitening, plot_pca_faces
15 | from .plot_decomposition import plot_decomposition
16 | from .plot_nmf import plot_nmf_illustration, plot_nmf_faces
17 | from .plot_helpers import cm2, cm3
18 | from .plot_agglomerative import plot_agglomerative, plot_agglomerative_algorithm
19 | from .plot_kmeans import plot_kmeans_algorithm, plot_kmeans_boundaries, plot_kmeans_faces
20 | from .plot_improper_preprocessing import plot_improper_processing, plot_proper_processing
21 | from .plot_cross_validation import (plot_threefold_split, plot_group_kfold,
22 |                                     plot_shuffle_split, plot_cross_validation,
23 |                                     plot_stratified_cross_validation)
24 | 
25 | from .plot_grid_search import plot_grid_search_overview, plot_cross_val_selection
26 | from .plot_metrics import (plot_confusion_matrix_illustration,
27 |                            plot_binary_confusion_matrix,
28 |                            plot_decision_threshold)
29 | from .plot_dbscan import plot_dbscan
30 | from .plot_ridge import plot_ridge_n_samples
31 | from .plot_kneighbors_regularization import plot_kneighbors_regularization
32 | 
33 | __all__ = ['plot_linear_svc_regularization',
34 |            "plot_animal_tree", "plot_tree_progressive",
35 |            'plot_tree_partition', 'plot_svm',
36 |            'plot_knn_regression',
37 |            'plot_logistic_regression_graph',
38 |            'plot_single_hidden_layer_graph',
39 |            'plot_two_hidden_layer_graph',
40 |            'plot_2d_classification',
41 |            'plot_2d_separator',
42 |            'plot_knn_classification',
43 |            'plot_linear_regression_wave',
44 |            'plot_tree_not_monotone',
45 |            'plot_scaling',
46 |            'plot_pca_illustration',
47 |            'plot_pca_faces',
48 |            'plot_pca_whitening',
49 |            'plot_decomposition',
50 |            'plot_nmf_illustration',
51 |            'plot_nmf_faces',
52 |            'plot_agglomerative',
53 |            'plot_agglomerative_algorithm',
54 |            'plot_kmeans_boundaries',
55 |            'plot_kmeans_algorithm',
56 |            'plot_kmeans_faces',
57 |            'cm3', 'cm2', 'plot_improper_processing', 'plot_proper_processing',
58 |            'plot_group_kfold',
59 |            'plot_shuffle_split',
60 |            'plot_stratified_cross_validation',
61 |            'plot_threefold_split',
62 |            'plot_cross_validation',
63 |            'plot_grid_search_overview',
64 |            'plot_cross_val_selection',
65 |            'plot_confusion_matrix_illustration',
66 |            'plot_binary_confusion_matrix',
67 |            'plot_decision_threshold',
68 |            'plot_dbscan',
69 |            'plot_ridge_n_samples',
70 |            'plot_kneighbors_regularization'
71 |            ]
72 | 


--------------------------------------------------------------------------------
/mglearn/tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.datasets import make_blobs
  3 | from sklearn.tree import export_graphviz
  4 | import matplotlib.pyplot as plt
  5 | from .plot_2d_separator import (plot_2d_separator, plot_2d_classification,
  6 |                                 plot_2d_scores)
  7 | from .plot_helpers import cm2 as cm, discrete_scatter
  8 | 
  9 | 
 10 | def visualize_coefficients(coefficients, feature_names, n_top_features=25):
 11 |     """Visualize coefficients of a linear model.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     coefficients : nd-array, shape (n_features,)
 16 |         Model coefficients.
 17 | 
 18 |     feature_names : list or nd-array of strings, shape (n_features,)
 19 |         Feature names for labeling the coefficients.
 20 | 
 21 |     n_top_features : int, default=25
 22 |         How many features to show. The function will show the largest (most
 23 |         positive) and smallest (most negative)  n_top_features coefficients,
 24 |         for a total of 2 * n_top_features coefficients.
 25 |     """
 26 |     coefficients = coefficients.squeeze()
 27 |     if coefficients.ndim > 1:
 28 |         # this is not a row or column vector
 29 |         raise ValueError("coeffients must be 1d array or column vector, got"
 30 |                          " shape {}".format(coefficients.shape))
 31 |     coefficients = coefficients.ravel()
 32 | 
 33 |     if len(coefficients) != len(feature_names):
 34 |         raise ValueError("Number of coefficients {} doesn't match number of"
 35 |                          "feature names {}.".format(len(coefficients),
 36 |                                                     len(feature_names)))
 37 |     # get coefficients with large absolute values
 38 |     coef = coefficients.ravel()
 39 |     positive_coefficients = np.argsort(coef)[-n_top_features:]
 40 |     negative_coefficients = np.argsort(coef)[:n_top_features]
 41 |     interesting_coefficients = np.hstack([negative_coefficients,
 42 |                                           positive_coefficients])
 43 |     # plot them
 44 |     plt.figure(figsize=(15, 5))
 45 |     colors = [cm(1) if c < 0 else cm(0)
 46 |               for c in coef[interesting_coefficients]]
 47 |     plt.bar(np.arange(2 * n_top_features), coef[interesting_coefficients],
 48 |             color=colors)
 49 |     feature_names = np.array(feature_names)
 50 |     plt.subplots_adjust(bottom=0.3)
 51 |     plt.xticks(np.arange(1, 1 + 2 * n_top_features),
 52 |                feature_names[interesting_coefficients], rotation=60,
 53 |                ha="right")
 54 |     plt.ylabel("Coefficient magnitude")
 55 |     plt.xlabel("Feature")
 56 | 
 57 | 
 58 | def heatmap(values, xlabel, ylabel, xticklabels, yticklabels, cmap=None,
 59 |             vmin=None, vmax=None, ax=None, fmt="%0.2f"):
 60 |     if ax is None:
 61 |         ax = plt.gca()
 62 |     # plot the mean cross-validation scores
 63 |     img = ax.pcolor(values, cmap=cmap, vmin=vmin, vmax=vmax)
 64 |     img.update_scalarmappable()
 65 |     ax.set_xlabel(xlabel)
 66 |     ax.set_ylabel(ylabel)
 67 |     ax.set_xticks(np.arange(len(xticklabels)) + .5)
 68 |     ax.set_yticks(np.arange(len(yticklabels)) + .5)
 69 |     ax.set_xticklabels(xticklabels)
 70 |     ax.set_yticklabels(yticklabels)
 71 |     ax.set_aspect(1)
 72 | 
 73 |     for p, color, value in zip(img.get_paths(), img.get_facecolors(),
 74 |                                img.get_array()):
 75 |         x, y = p.vertices[:-2, :].mean(0)
 76 |         if np.mean(color[:3]) > 0.5:
 77 |             c = 'k'
 78 |         else:
 79 |             c = 'w'
 80 |         ax.text(x, y, fmt % value, color=c, ha="center", va="center")
 81 |     return img
 82 | 
 83 | 
 84 | def make_handcrafted_dataset():
 85 |     # a carefully hand-designed dataset lol
 86 |     X, y = make_blobs(centers=2, random_state=4, n_samples=30)
 87 |     y[np.array([7, 27])] = 0
 88 |     mask = np.ones(len(X), dtype=bool)
 89 |     mask[np.array([0, 1, 5, 26])] = 0
 90 |     X, y = X[mask], y[mask]
 91 |     return X, y
 92 | 
 93 | 
 94 | def print_topics(topics, feature_names, sorting, topics_per_chunk=6,
 95 |                  n_words=20):
 96 |     for i in range(0, len(topics), topics_per_chunk):
 97 |         # for each chunk:
 98 |         these_topics = topics[i: i + topics_per_chunk]
 99 |         # maybe we have less than topics_per_chunk left
100 |         len_this_chunk = len(these_topics)
101 |         # print topic headers
102 |         print(("topic {:<8}" * len_this_chunk).format(*these_topics))
103 |         print(("-------- {0:<5}" * len_this_chunk).format(""))
104 |         # print top n_words frequent words
105 |         for i in range(n_words):
106 |             try:
107 |                 print(("{:<14}" * len_this_chunk).format(
108 |                     *feature_names[sorting[these_topics, i]]))
109 |             except:
110 |                 pass
111 |         print("\n")
112 | 
113 | 
114 | def get_tree(tree, **kwargs):
115 |     try:
116 |         # python3
117 |         from io import StringIO
118 |     except ImportError:
119 |         # python2
120 |         from StringIO import StringIO
121 |     f = StringIO()
122 |     export_graphviz(tree, f, **kwargs)
123 |     import graphviz
124 |     return graphviz.Source(f.getvalue())
125 | 
126 | __all__ = ['plot_2d_separator', 'plot_2d_classification', 'plot_2d_scores',
127 |            'cm', 'visualize_coefficients', 'print_topics', 'heatmap',
128 |            'discrete_scatter']
129 | 


--------------------------------------------------------------------------------
/preamble.py:
--------------------------------------------------------------------------------
 1 | from IPython.display import set_matplotlib_formats, display
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import mglearn
 6 | from cycler import cycler
 7 | 
 8 | set_matplotlib_formats('pdf', 'png')
 9 | plt.rcParams['savefig.dpi'] = 300
10 | plt.rcParams['image.cmap'] = "viridis"
11 | plt.rcParams['image.interpolation'] = "none"
12 | plt.rcParams['savefig.bbox'] = "tight"
13 | plt.rcParams['lines.linewidth'] = 2
14 | plt.rcParams['legend.numpoints'] = 1
15 | plt.rc('axes', prop_cycle=(
16 |     cycler('color', mglearn.plot_helpers.cm_cycle.colors) +
17 |     cycler('linestyle', ['-', '-', "--", (0, (3, 3)), (0, (1.5, 1.5))])))
18 | 
19 | np.set_printoptions(precision=3, suppress=True)
20 | 
21 | pd.set_option("display.max_columns", 8)
22 | pd.set_option('display.precision', 2)
23 | 
24 | __all__ = ['np', 'mglearn', 'display', 'plt', 'pd']
25 | 


--------------------------------------------------------------------------------