├── file.txt
├── .gitignore
├── requirements.txt
├── README.md
├── step-0-prototype.ipynb
└── lineapy-trial-prototype.ipynb
/file.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDEs
2 | .idea
3 | .vscode
4 |
5 | ## OS configs
6 | .DS_Store
7 |
8 | # Project
9 | data/*
10 | models/*
11 | reports/*
12 |
13 | # Python
14 | __pycache__
15 | .ipynb_checkpoints
16 |
17 | # Venv
18 | dvc-venv
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dvc>=2.8.3,<3
2 | joblib>=1.0.1,<2
3 | jupyter>=1.0.0,<2
4 | jupyter_contrib_nbextensions>=0.5.1,<1
5 | matplotlib>=3.4.3,<4
6 | numpy>=1.21.2,<2
7 | pandas>=1.3.2,<2
8 | pytest>=6.2.4,<7
9 | python-box>=5.4.1,<6
10 | pyyaml>=5.4.1,<6
11 | scikit-learn>=0.24.2,<2
12 | scipy>=1.7.1,<2
13 | tqdm>=4.62.2,<5
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # course-ds-base
2 |
3 | ## Preparation
4 |
5 | ### 1. Fork / Clone this repository
6 |
7 | ```bash
8 | git clone https://github.com/iterative/course-ds-base.git
9 | cd course-ds-base
10 | ```
11 |
12 |
13 | ### 2. Create and activate virtual environment
14 |
15 | Create virtual environment named `dvc-venv` (you may use other name)
16 | ```bash
17 | python3 -m venv dvc-venv
18 | echo "export PYTHONPATH=$PWD" >> dvc-venv/bin/activate
19 | source dvc-venv/bin/activate
20 | ```
21 | Install python libraries
22 |
23 | ```bash
24 | pip install --upgrade pip setuptools wheel
25 | pip install -r requirements.txt
26 | ```
27 |
28 | Add Virtual Environment to Jupyter Notebook
29 |
30 | ```bash
31 | python -m ipykernel install --user --name=dvc-venv
32 | ```
33 |
34 | Configure ToC for jupyter notebook (optional)
35 |
36 | ```bash
37 | jupyter contrib nbextension install --user
38 | jupyter nbextension enable toc2/main
39 | ```
40 |
41 | ## 3. Run Jupyter Notebook
42 |
43 | ```bash
44 | jupyter notebook
45 | ```
46 |
47 |
--------------------------------------------------------------------------------
/step-0-prototype.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2019-06-16T21:17:31.460557Z",
9 | "start_time": "2019-06-16T21:17:29.395297Z"
10 | }
11 | },
12 | "outputs": [],
13 | "source": [
14 | "import itertools\n",
15 | "import matplotlib.pyplot as plt\n",
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "from sklearn.metrics import confusion_matrix, f1_score\n",
19 | "from sklearn.linear_model import LogisticRegression\n",
20 | "from sklearn.model_selection import train_test_split"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Load dataset"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {
34 | "ExecuteTime": {
35 | "end_time": "2019-06-16T21:17:31.485189Z",
36 | "start_time": "2019-06-16T21:17:31.473720Z"
37 | }
38 | },
39 | "outputs": [],
40 | "source": [
41 | "# Get data \n",
42 | "\n",
43 | "import pandas as pd\n",
44 | "from sklearn.datasets import load_iris\n",
45 | "\n",
46 | "data = load_iris(as_frame=True)\n",
47 | "dataset = data.frame\n",
48 | "dataset.head()"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "# print labels for target values \n",
58 | "\n",
59 | "[print(f'{target}: {label}') for target, label in zip(data.target.unique(), data.target_names)]"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "ExecuteTime": {
67 | "end_time": "2019-06-16T21:17:32.328046Z",
68 | "start_time": "2019-06-16T21:17:32.323611Z"
69 | }
70 | },
71 | "outputs": [],
72 | "source": [
73 | "# feature names\n",
74 | "\n",
75 | "dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]\n",
76 | "\n",
77 | "feature_names = dataset.columns.tolist()[:4]\n",
78 | "feature_names"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "# Features engineering"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {
92 | "ExecuteTime": {
93 | "end_time": "2019-06-16T21:21:02.150708Z",
94 | "start_time": "2019-06-16T21:21:02.144518Z"
95 | }
96 | },
97 | "outputs": [],
98 | "source": [
99 | "dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width']\n",
100 | "dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width']\n",
101 | "\n",
102 | "dataset = dataset[[\n",
103 | " 'sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n",
104 | "# 'sepal_length_in_square', 'sepal_width_in_square', 'petal_length_in_square', 'petal_width_in_square',\n",
105 | " 'sepal_length_to_sepal_width', 'petal_length_to_petal_width',\n",
106 | " 'target'\n",
107 | "]]"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {
114 | "ExecuteTime": {
115 | "end_time": "2019-06-16T21:21:02.987144Z",
116 | "start_time": "2019-06-16T21:21:02.976092Z"
117 | }
118 | },
119 | "outputs": [],
120 | "source": [
121 | "dataset.head()"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {},
127 | "source": [
128 | "# Split dataset"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {
135 | "ExecuteTime": {
136 | "end_time": "2019-06-16T21:21:06.361378Z",
137 | "start_time": "2019-06-16T21:21:06.358647Z"
138 | }
139 | },
140 | "outputs": [],
141 | "source": [
142 | "test_size=0.2"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "## Splittail train/test"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "ExecuteTime": {
157 | "end_time": "2019-06-16T21:21:07.438133Z",
158 | "start_time": "2019-06-16T21:21:07.431649Z"
159 | }
160 | },
161 | "outputs": [],
162 | "source": [
163 | "train_dataset, test_dataset = train_test_split(dataset, test_size=test_size, random_state=42)\n",
164 | "train_dataset.shape, test_dataset.shape"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "metadata": {},
170 | "source": [
171 | "# Train"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {
178 | "ExecuteTime": {
179 | "end_time": "2019-06-16T21:21:10.932148Z",
180 | "start_time": "2019-06-16T21:21:10.927844Z"
181 | }
182 | },
183 | "outputs": [],
184 | "source": [
185 | "# Get X and Y\n",
186 | "\n",
187 | "y_train = train_dataset.loc[:, 'target'].values.astype('int32')\n",
188 | "X_train = train_dataset.drop('target', axis=1).values.astype('float32')"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {
195 | "ExecuteTime": {
196 | "end_time": "2019-06-16T21:21:55.427365Z",
197 | "start_time": "2019-06-16T21:21:55.416431Z"
198 | }
199 | },
200 | "outputs": [],
201 | "source": [
202 | "# Create an instance of Logistic Regression Classifier CV and fit the data\n",
203 | "\n",
204 | "logreg = LogisticRegression(C=0.001, solver='lbfgs', multi_class='multinomial', max_iter=100)\n",
205 | "logreg.fit(X_train, y_train)"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "# Evaluate"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": null,
218 | "metadata": {
219 | "ExecuteTime": {
220 | "end_time": "2019-06-16T21:21:55.875303Z",
221 | "start_time": "2019-06-16T21:21:55.864724Z"
222 | }
223 | },
224 | "outputs": [],
225 | "source": [
226 | "def plot_confusion_matrix(cm,\n",
227 | " target_names,\n",
228 | " title='Confusion matrix',\n",
229 | " cmap=None,\n",
230 | " normalize=True):\n",
231 | " \"\"\"\n",
232 | " given a sklearn confusion matrix (cm), make a nice plot\n",
233 | "\n",
234 | " Arguments\n",
235 | " ---------\n",
236 | " cm: confusion matrix from sklearn.metrics.confusion_matrix\n",
237 | "\n",
238 | " target_names: given classification classes such as [0, 1, 2]\n",
239 | " the class names, for example: ['high', 'medium', 'low']\n",
240 | "\n",
241 | " title: the text to display at the top of the matrix\n",
242 | "\n",
243 | " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n",
244 | " see http://matplotlib.org/examples/color/colormaps_reference.html\n",
245 | " plt.get_cmap('jet') or plt.cm.Blues\n",
246 | "\n",
247 | " normalize: If False, plot the raw numbers\n",
248 | " If True, plot the proportions\n",
249 | "\n",
250 | " Usage\n",
251 | " -----\n",
252 | " plot_confusion_matrix(cm = cm, # confusion matrix created by\n",
253 | " # sklearn.metrics.confusion_matrix\n",
254 | " normalize = True, # show proportions\n",
255 | " target_names = y_labels_vals, # list of names of the classes\n",
256 | " title = best_estimator_name) # title of graph\n",
257 | "\n",
258 | " Citiation\n",
259 | " ---------\n",
260 | " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n",
261 | "\n",
262 | " \"\"\"\n",
263 | "\n",
264 | " accuracy = np.trace(cm) / float(np.sum(cm))\n",
265 | " misclass = 1 - accuracy\n",
266 | "\n",
267 | " if cmap is None:\n",
268 | " cmap = plt.get_cmap('Blues')\n",
269 | "\n",
270 | " plt.figure(figsize=(8, 6))\n",
271 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
272 | " plt.title(title)\n",
273 | " plt.colorbar()\n",
274 | "\n",
275 | " if target_names is not None:\n",
276 | " tick_marks = np.arange(len(target_names))\n",
277 | " plt.xticks(tick_marks, target_names, rotation=45)\n",
278 | " plt.yticks(tick_marks, target_names)\n",
279 | "\n",
280 | " if normalize:\n",
281 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
282 | "\n",
283 | " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n",
284 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
285 | " if normalize:\n",
286 | " plt.text(j, i, \"{:0.4f}\".format(cm[i, j]),\n",
287 | " horizontalalignment=\"center\",\n",
288 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
289 | " else:\n",
290 | " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n",
291 | " horizontalalignment=\"center\",\n",
292 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
293 | "\n",
294 | " plt.tight_layout()\n",
295 | " plt.ylabel('True label')\n",
296 | " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n",
297 | " plt.show()"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": null,
303 | "metadata": {
304 | "ExecuteTime": {
305 | "end_time": "2019-06-16T21:21:56.090756Z",
306 | "start_time": "2019-06-16T21:21:56.086966Z"
307 | }
308 | },
309 | "outputs": [],
310 | "source": [
311 | "# Get X and Y\n",
312 | "\n",
313 | "y_test = test_dataset.loc[:, 'target'].values.astype('int32')\n",
314 | "X_test = test_dataset.drop('target', axis=1).values.astype('float32')"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": null,
320 | "metadata": {
321 | "ExecuteTime": {
322 | "end_time": "2019-06-16T21:21:56.270245Z",
323 | "start_time": "2019-06-16T21:21:56.265054Z"
324 | }
325 | },
326 | "outputs": [],
327 | "source": [
328 | "prediction = logreg.predict(X_test)\n",
329 | "cm = confusion_matrix(prediction, y_test)\n",
330 | "f1 = f1_score(y_true = y_test, y_pred = prediction, average='macro')"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": null,
336 | "metadata": {
337 | "ExecuteTime": {
338 | "end_time": "2019-06-16T21:21:56.493617Z",
339 | "start_time": "2019-06-16T21:21:56.489929Z"
340 | }
341 | },
342 | "outputs": [],
343 | "source": [
344 | "# f1 score value\n",
345 | "f1"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": null,
351 | "metadata": {
352 | "ExecuteTime": {
353 | "end_time": "2019-06-16T21:21:56.966279Z",
354 | "start_time": "2019-06-16T21:21:56.726149Z"
355 | }
356 | },
357 | "outputs": [],
358 | "source": [
359 | "plot_confusion_matrix(cm, data.target_names, normalize=False)"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": null,
365 | "metadata": {},
366 | "outputs": [],
367 | "source": []
368 | }
369 | ],
370 | "metadata": {
371 | "kernelspec": {
372 | "display_name": "Python 3 (ipykernel)",
373 | "language": "python",
374 | "name": "python3"
375 | },
376 | "language_info": {
377 | "codemirror_mode": {
378 | "name": "ipython",
379 | "version": 3
380 | },
381 | "file_extension": ".py",
382 | "mimetype": "text/x-python",
383 | "name": "python",
384 | "nbconvert_exporter": "python",
385 | "pygments_lexer": "ipython3",
386 | "version": "3.9.2"
387 | },
388 | "toc": {
389 | "base_numbering": 1,
390 | "nav_menu": {},
391 | "number_sections": true,
392 | "sideBar": true,
393 | "skip_h1_title": false,
394 | "title_cell": "Table of Contents",
395 | "title_sidebar": "Contents",
396 | "toc_cell": false,
397 | "toc_position": {},
398 | "toc_section_display": true,
399 | "toc_window_display": true
400 | },
401 | "varInspector": {
402 | "cols": {
403 | "lenName": 16,
404 | "lenType": 16,
405 | "lenVar": 40
406 | },
407 | "kernels_config": {
408 | "python": {
409 | "delete_cmd_postfix": "",
410 | "delete_cmd_prefix": "del ",
411 | "library": "var_list.py",
412 | "varRefreshCmd": "print(var_dic_list())"
413 | },
414 | "r": {
415 | "delete_cmd_postfix": ") ",
416 | "delete_cmd_prefix": "rm(",
417 | "library": "var_list.r",
418 | "varRefreshCmd": "cat(var_dic_list()) "
419 | }
420 | },
421 | "types_to_exclude": [
422 | "module",
423 | "function",
424 | "builtin_function_or_method",
425 | "instance",
426 | "_Feature"
427 | ],
428 | "window_display": false
429 | }
430 | },
431 | "nbformat": 4,
432 | "nbformat_minor": 4
433 | }
434 |
--------------------------------------------------------------------------------
/lineapy-trial-prototype.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "Requirement already satisfied: lineapy in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (0.2.3)\n",
13 | "Requirement already satisfied: jinja2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.11.2)\n",
14 | "Requirement already satisfied: pandas in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.3.2)\n",
15 | "Requirement already satisfied: pydantic in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.8.2)\n",
16 | "Requirement already satisfied: networkx in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.5)\n",
17 | "Requirement already satisfied: SQLAlchemy<2.0.0,>=1.4 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.4.47)\n",
18 | "Requirement already satisfied: requests in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.25.1)\n",
19 | "Requirement already satisfied: alembic==1.8.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.8.0)\n",
20 | "Requirement already satisfied: IPython>=7.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (7.19.0)\n",
21 | "Requirement already satisfied: isort in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.9.3)\n",
22 | "Requirement already satisfied: rich in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (12.4.4)\n",
23 | "Requirement already satisfied: click>=8.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (8.1.3)\n",
24 | "Requirement already satisfied: pyyaml in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.3.1)\n",
25 | "Requirement already satisfied: fsspec in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2022.7.1)\n",
26 | "Requirement already satisfied: nbconvert<7.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (6.0.7)\n",
27 | "Requirement already satisfied: nbformat in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.0.8)\n",
28 | "Requirement already satisfied: cloudpickle in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.6.0)\n",
29 | "Requirement already satisfied: asttokens in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.2.1)\n",
30 | "Requirement already satisfied: black in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (21.7b0)\n",
31 | "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (4.3.0)\n",
32 | "Requirement already satisfied: importlib-metadata in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (2.0.0)\n",
33 | "Requirement already satisfied: Mako in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (1.2.4)\n",
34 | "Requirement already satisfied: importlib-resources in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (5.7.1)\n",
35 | "Requirement already satisfied: appnope in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.1.0)\n",
36 | "Requirement already satisfied: jedi>=0.10 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.17.1)\n",
37 | "Requirement already satisfied: pygments in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (2.7.2)\n",
38 | "Requirement already satisfied: pickleshare in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.7.5)\n",
39 | "Requirement already satisfied: traitlets>=4.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (5.0.5)\n",
40 | "Requirement already satisfied: pexpect>4.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (4.8.0)\n",
41 | "Requirement already satisfied: decorator in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (4.4.2)\n",
42 | "Requirement already satisfied: backcall in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.2.0)\n",
43 | "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (3.0.8)\n",
44 | "Requirement already satisfied: setuptools>=18.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (50.3.1.post20201107)\n",
45 | "Requirement already satisfied: bleach in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (3.2.1)\n",
46 | "Requirement already satisfied: testpath in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.4.4)\n",
47 | "Requirement already satisfied: jupyter-core in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (4.6.3)\n",
48 | "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (1.4.3)\n",
49 | "Requirement already satisfied: defusedxml in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.6.0)\n",
50 | "Requirement already satisfied: mistune<2,>=0.8.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.8.4)\n",
51 | "Requirement already satisfied: jupyterlab-pygments in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.1.2)\n",
52 | "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.5.1)\n",
53 | "Requirement already satisfied: entrypoints>=0.2.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.3)\n",
54 | "Requirement already satisfied: MarkupSafe>=0.23 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jinja2->lineapy) (1.1.1)\n",
55 | "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbformat->lineapy) (3.2.0)\n",
56 | "Requirement already satisfied: ipython-genutils in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbformat->lineapy) (0.2.0)\n",
57 | "Requirement already satisfied: greenlet!=0.4.17 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from SQLAlchemy<2.0.0,>=1.4->lineapy) (2.0.2)\n",
58 | "Requirement already satisfied: six in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from asttokens->lineapy) (1.15.0)\n",
59 | "Requirement already satisfied: tomli<2.0.0,>=0.2.6 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (1.2.3)\n",
60 | "Requirement already satisfied: regex>=2020.1.8 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (2020.10.15)\n",
61 | "Requirement already satisfied: mypy-extensions>=0.4.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (0.4.3)\n",
62 | "Requirement already satisfied: appdirs in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (1.4.4)\n",
63 | "Requirement already satisfied: pathspec<1,>=0.8.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (0.9.0)\n",
64 | "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (2.8.1)\n",
65 | "Requirement already satisfied: pytz>=2017.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (2022.1)\n",
66 | "Requirement already satisfied: numpy>=1.17.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (1.18.5)\n",
67 | "Requirement already satisfied: idna<3,>=2.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (2.10)\n",
68 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (2020.6.20)\n",
69 | "Requirement already satisfied: chardet<5,>=3.0.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (3.0.4)\n",
70 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (1.25.11)\n",
71 | "Requirement already satisfied: commonmark<0.10.0,>=0.9.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from rich->lineapy) (0.9.1)\n",
72 | "Requirement already satisfied: parso<0.8.0,>=0.7.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jedi>=0.10->IPython>=7.0.0->lineapy) (0.7.0)\n"
73 | ]
74 | },
75 | {
76 | "name": "stdout",
77 | "output_type": "stream",
78 | "text": [
79 | "Requirement already satisfied: attrs>=17.4.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->lineapy) (20.3.0)\n",
80 | "Requirement already satisfied: pyrsistent>=0.14.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->lineapy) (0.17.3)\n",
81 | "Requirement already satisfied: jupyter-client>=6.1.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (6.1.7)\n",
82 | "Requirement already satisfied: async-generator in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (1.10)\n",
83 | "Requirement already satisfied: nest-asyncio in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (1.5.1)\n",
84 | "Requirement already satisfied: ptyprocess>=0.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pexpect>4.3->IPython>=7.0.0->lineapy) (0.6.0)\n",
85 | "Requirement already satisfied: wcwidth in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->IPython>=7.0.0->lineapy) (0.2.5)\n",
86 | "Requirement already satisfied: webencodings in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert<7.0.0->lineapy) (0.5.1)\n",
87 | "Requirement already satisfied: packaging in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert<7.0.0->lineapy) (20.4)\n",
88 | "Requirement already satisfied: zipp>=0.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from importlib-metadata->alembic==1.8.0->lineapy) (3.4.0)\n",
89 | "Requirement already satisfied: tornado>=4.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (6.1)\n",
90 | "Requirement already satisfied: pyzmq>=13 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (19.0.2)\n",
91 | "Requirement already satisfied: pyparsing>=2.0.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from packaging->bleach->nbconvert<7.0.0->lineapy) (2.4.7)\n",
92 | "\n",
93 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
94 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
95 | ]
96 | }
97 | ],
98 | "source": [
99 | "! pip install lineapy"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 2,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "name": "stdout",
109 | "output_type": "stream",
110 | "text": [
111 | "Requirement already satisfied: pandas==1.3.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (1.3.2)\n",
112 | "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (2.8.1)\n",
113 | "Requirement already satisfied: numpy>=1.17.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (1.18.5)\n",
114 | "Requirement already satisfied: pytz>=2017.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (2022.1)\n",
115 | "Requirement already satisfied: six>=1.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas==1.3.2) (1.15.0)\n",
116 | "\n",
117 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
118 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
119 | ]
120 | }
121 | ],
122 | "source": [
123 | "! python -m pip install pandas==1.3.2"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 3,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "%load_ext lineapy"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 4,
138 | "metadata": {},
139 | "outputs": [
140 | {
141 | "data": {
142 | "text/plain": [
143 | "lineapy_config(home_dir=PosixPath('/Users/jenif/.lineapy'), database_url='sqlite:////Users/jenif/.lineapy/db.sqlite', artifact_storage_dir=PosixPath('/Users/jenif/.lineapy/linea_pickles'), customized_annotation_folder=PosixPath('/Users/jenif/.lineapy/custom-annotations'), do_not_track=False, logging_level='INFO', logging_file=PosixPath('/Users/jenif/.lineapy/lineapy.log'), storage_options=None, mlflow_registry_uri=None, mlflow_tracking_uri=None, default_ml_models_storage_backend=None)"
144 | ]
145 | },
146 | "execution_count": 4,
147 | "metadata": {},
148 | "output_type": "execute_result"
149 | }
150 | ],
151 | "source": [
152 | "lineapy.options"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 5,
158 | "metadata": {
159 | "ExecuteTime": {
160 | "end_time": "2019-06-16T21:17:31.460557Z",
161 | "start_time": "2019-06-16T21:17:29.395297Z"
162 | }
163 | },
164 | "outputs": [],
165 | "source": [
166 | "import lineapy\n",
167 | "import joblib\n",
168 | "import json\n",
169 | "import itertools\n",
170 | "import matplotlib.pyplot as plt\n",
171 | "import numpy as np\n",
172 | "import pandas as pd\n",
173 | "from sklearn.metrics import confusion_matrix, f1_score\n",
174 | "from sklearn.linear_model import LogisticRegression\n",
175 | "from sklearn.model_selection import train_test_split\n"
176 | ]
177 | },
178 | {
179 | "cell_type": "markdown",
180 | "metadata": {},
181 | "source": [
182 | "# Load dataset"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 6,
188 | "metadata": {
189 | "ExecuteTime": {
190 | "end_time": "2019-06-16T21:17:31.485189Z",
191 | "start_time": "2019-06-16T21:17:31.473720Z"
192 | }
193 | },
194 | "outputs": [
195 | {
196 | "data": {
197 | "text/html": [
198 | "
\n",
199 | "\n",
212 | "
\n",
213 | " \n",
214 | " \n",
215 | " | \n",
216 | " sepal length (cm) | \n",
217 | " sepal width (cm) | \n",
218 | " petal length (cm) | \n",
219 | " petal width (cm) | \n",
220 | " target | \n",
221 | "
\n",
222 | " \n",
223 | " \n",
224 | " \n",
225 | " | 0 | \n",
226 | " 5.1 | \n",
227 | " 3.5 | \n",
228 | " 1.4 | \n",
229 | " 0.2 | \n",
230 | " 0 | \n",
231 | "
\n",
232 | " \n",
233 | " | 1 | \n",
234 | " 4.9 | \n",
235 | " 3.0 | \n",
236 | " 1.4 | \n",
237 | " 0.2 | \n",
238 | " 0 | \n",
239 | "
\n",
240 | " \n",
241 | " | 2 | \n",
242 | " 4.7 | \n",
243 | " 3.2 | \n",
244 | " 1.3 | \n",
245 | " 0.2 | \n",
246 | " 0 | \n",
247 | "
\n",
248 | " \n",
249 | " | 3 | \n",
250 | " 4.6 | \n",
251 | " 3.1 | \n",
252 | " 1.5 | \n",
253 | " 0.2 | \n",
254 | " 0 | \n",
255 | "
\n",
256 | " \n",
257 | " | 4 | \n",
258 | " 5.0 | \n",
259 | " 3.6 | \n",
260 | " 1.4 | \n",
261 | " 0.2 | \n",
262 | " 0 | \n",
263 | "
\n",
264 | " \n",
265 | "
\n",
266 | "
"
267 | ],
268 | "text/plain": [
269 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
270 | "0 5.1 3.5 1.4 0.2 \n",
271 | "1 4.9 3.0 1.4 0.2 \n",
272 | "2 4.7 3.2 1.3 0.2 \n",
273 | "3 4.6 3.1 1.5 0.2 \n",
274 | "4 5.0 3.6 1.4 0.2 \n",
275 | "\n",
276 | " target \n",
277 | "0 0 \n",
278 | "1 0 \n",
279 | "2 0 \n",
280 | "3 0 \n",
281 | "4 0 "
282 | ]
283 | },
284 | "execution_count": 6,
285 | "metadata": {},
286 | "output_type": "execute_result"
287 | }
288 | ],
289 | "source": [
290 | "# Get data \n",
291 | "\n",
292 | "import pandas as pd\n",
293 | "from sklearn.datasets import load_iris\n",
294 | "\n",
295 | "data = load_iris(as_frame=True)\n",
296 | "dataset = data.frame\n",
297 | "dataset.head()"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 7,
303 | "metadata": {},
304 | "outputs": [
305 | {
306 | "name": "stdout",
307 | "output_type": "stream",
308 | "text": [
309 | "0: setosa\n",
310 | "1: versicolor\n",
311 | "2: virginica\n"
312 | ]
313 | },
314 | {
315 | "data": {
316 | "text/plain": [
317 | "[None, None, None]"
318 | ]
319 | },
320 | "execution_count": 7,
321 | "metadata": {},
322 | "output_type": "execute_result"
323 | }
324 | ],
325 | "source": [
326 | "# print labels for target values \n",
327 | "\n",
328 | "[print(f'{target}: {label}') for target, label in zip(data.target.unique(), data.target_names)]"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 8,
334 | "metadata": {
335 | "ExecuteTime": {
336 | "end_time": "2019-06-16T21:17:32.328046Z",
337 | "start_time": "2019-06-16T21:17:32.323611Z"
338 | }
339 | },
340 | "outputs": [
341 | {
342 | "data": {
343 | "text/plain": [
344 | "['sepal_length', 'sepal_width', 'petal_length', 'petal_width']"
345 | ]
346 | },
347 | "execution_count": 8,
348 | "metadata": {},
349 | "output_type": "execute_result"
350 | }
351 | ],
352 | "source": [
353 | "# feature names\n",
354 | "\n",
355 | "dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]\n",
356 | "\n",
357 | "feature_names = dataset.columns.tolist()[:4]\n",
358 | "feature_names"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": 9,
364 | "metadata": {},
365 | "outputs": [],
366 | "source": [
367 | "#save raw data as artifact\n",
368 | "dataset_csv = './data/raw/iris.csv'\n",
369 | "dataset.to_csv(dataset_csv, index=False)\n"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 10,
375 | "metadata": {},
376 | "outputs": [
377 | {
378 | "data": {
379 | "text/html": [
380 | "\n",
381 | "\n",
394 | "
\n",
395 | " \n",
396 | " \n",
397 | " | \n",
398 | " sepal_length | \n",
399 | " sepal_width | \n",
400 | " petal_length | \n",
401 | " petal_width | \n",
402 | " target | \n",
403 | "
\n",
404 | " \n",
405 | " \n",
406 | " \n",
407 | " | 0 | \n",
408 | " 5.1 | \n",
409 | " 3.5 | \n",
410 | " 1.4 | \n",
411 | " 0.2 | \n",
412 | " 0 | \n",
413 | "
\n",
414 | " \n",
415 | " | 1 | \n",
416 | " 4.9 | \n",
417 | " 3.0 | \n",
418 | " 1.4 | \n",
419 | " 0.2 | \n",
420 | " 0 | \n",
421 | "
\n",
422 | " \n",
423 | " | 2 | \n",
424 | " 4.7 | \n",
425 | " 3.2 | \n",
426 | " 1.3 | \n",
427 | " 0.2 | \n",
428 | " 0 | \n",
429 | "
\n",
430 | " \n",
431 | " | 3 | \n",
432 | " 4.6 | \n",
433 | " 3.1 | \n",
434 | " 1.5 | \n",
435 | " 0.2 | \n",
436 | " 0 | \n",
437 | "
\n",
438 | " \n",
439 | " | 4 | \n",
440 | " 5.0 | \n",
441 | " 3.6 | \n",
442 | " 1.4 | \n",
443 | " 0.2 | \n",
444 | " 0 | \n",
445 | "
\n",
446 | " \n",
447 | " | ... | \n",
448 | " ... | \n",
449 | " ... | \n",
450 | " ... | \n",
451 | " ... | \n",
452 | " ... | \n",
453 | "
\n",
454 | " \n",
455 | " | 145 | \n",
456 | " 6.7 | \n",
457 | " 3.0 | \n",
458 | " 5.2 | \n",
459 | " 2.3 | \n",
460 | " 2 | \n",
461 | "
\n",
462 | " \n",
463 | " | 146 | \n",
464 | " 6.3 | \n",
465 | " 2.5 | \n",
466 | " 5.0 | \n",
467 | " 1.9 | \n",
468 | " 2 | \n",
469 | "
\n",
470 | " \n",
471 | " | 147 | \n",
472 | " 6.5 | \n",
473 | " 3.0 | \n",
474 | " 5.2 | \n",
475 | " 2.0 | \n",
476 | " 2 | \n",
477 | "
\n",
478 | " \n",
479 | " | 148 | \n",
480 | " 6.2 | \n",
481 | " 3.4 | \n",
482 | " 5.4 | \n",
483 | " 2.3 | \n",
484 | " 2 | \n",
485 | "
\n",
486 | " \n",
487 | " | 149 | \n",
488 | " 5.9 | \n",
489 | " 3.0 | \n",
490 | " 5.1 | \n",
491 | " 1.8 | \n",
492 | " 2 | \n",
493 | "
\n",
494 | " \n",
495 | "
\n",
496 | "
150 rows × 5 columns
\n",
497 | "
"
498 | ],
499 | "text/plain": [
500 | " sepal_length sepal_width petal_length petal_width target\n",
501 | "0 5.1 3.5 1.4 0.2 0\n",
502 | "1 4.9 3.0 1.4 0.2 0\n",
503 | "2 4.7 3.2 1.3 0.2 0\n",
504 | "3 4.6 3.1 1.5 0.2 0\n",
505 | "4 5.0 3.6 1.4 0.2 0\n",
506 | ".. ... ... ... ... ...\n",
507 | "145 6.7 3.0 5.2 2.3 2\n",
508 | "146 6.3 2.5 5.0 1.9 2\n",
509 | "147 6.5 3.0 5.2 2.0 2\n",
510 | "148 6.2 3.4 5.4 2.3 2\n",
511 | "149 5.9 3.0 5.1 1.8 2\n",
512 | "\n",
513 | "[150 rows x 5 columns]"
514 | ]
515 | },
516 | "execution_count": 10,
517 | "metadata": {},
518 | "output_type": "execute_result"
519 | }
520 | ],
521 | "source": [
522 | "dataset"
523 | ]
524 | },
525 | {
526 | "cell_type": "code",
527 | "execution_count": 11,
528 | "metadata": {},
529 | "outputs": [
530 | {
531 | "name": "stdout",
532 | "output_type": "stream",
533 | "text": [
534 | "1.3.2\n"
535 | ]
536 | }
537 | ],
538 | "source": [
539 | "print(pd.__version__)"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": 12,
545 | "metadata": {},
546 | "outputs": [
547 | {
548 | "data": {
549 | "text/plain": [
550 | "LineaArtifact(name='iris-raw', _version=4)"
551 | ]
552 | },
553 | "execution_count": 12,
554 | "metadata": {},
555 | "output_type": "execute_result"
556 | }
557 | ],
558 | "source": [
559 | "#save raw data as artifact to lineapy\n",
560 | "lineapy.save(dataset, \"iris-raw\")"
561 | ]
562 | },
563 | {
564 | "cell_type": "markdown",
565 | "metadata": {},
566 | "source": [
567 | "# Features engineering"
568 | ]
569 | },
570 | {
571 | "cell_type": "code",
572 | "execution_count": 13,
573 | "metadata": {
574 | "ExecuteTime": {
575 | "end_time": "2019-06-16T21:21:02.150708Z",
576 | "start_time": "2019-06-16T21:21:02.144518Z"
577 | }
578 | },
579 | "outputs": [],
580 | "source": [
581 | "dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width']\n",
582 | "dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width']\n",
583 | "\n",
584 | "dataset = dataset[[\n",
585 | " 'sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n",
586 | "# 'sepal_length_in_square', 'sepal_width_in_square', 'petal_length_in_square', 'petal_width_in_square',\n",
587 | " 'sepal_length_to_sepal_width', 'petal_length_to_petal_width',\n",
588 | " 'target'\n",
589 | "]]"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": 14,
595 | "metadata": {
596 | "ExecuteTime": {
597 | "end_time": "2019-06-16T21:21:02.987144Z",
598 | "start_time": "2019-06-16T21:21:02.976092Z"
599 | }
600 | },
601 | "outputs": [
602 | {
603 | "data": {
604 | "text/html": [
605 | "\n",
606 | "\n",
619 | "
\n",
620 | " \n",
621 | " \n",
622 | " | \n",
623 | " sepal_length | \n",
624 | " sepal_width | \n",
625 | " petal_length | \n",
626 | " petal_width | \n",
627 | " sepal_length_to_sepal_width | \n",
628 | " petal_length_to_petal_width | \n",
629 | " target | \n",
630 | "
\n",
631 | " \n",
632 | " \n",
633 | " \n",
634 | " | 0 | \n",
635 | " 5.1 | \n",
636 | " 3.5 | \n",
637 | " 1.4 | \n",
638 | " 0.2 | \n",
639 | " 1.457143 | \n",
640 | " 7.0 | \n",
641 | " 0 | \n",
642 | "
\n",
643 | " \n",
644 | " | 1 | \n",
645 | " 4.9 | \n",
646 | " 3.0 | \n",
647 | " 1.4 | \n",
648 | " 0.2 | \n",
649 | " 1.633333 | \n",
650 | " 7.0 | \n",
651 | " 0 | \n",
652 | "
\n",
653 | " \n",
654 | " | 2 | \n",
655 | " 4.7 | \n",
656 | " 3.2 | \n",
657 | " 1.3 | \n",
658 | " 0.2 | \n",
659 | " 1.468750 | \n",
660 | " 6.5 | \n",
661 | " 0 | \n",
662 | "
\n",
663 | " \n",
664 | " | 3 | \n",
665 | " 4.6 | \n",
666 | " 3.1 | \n",
667 | " 1.5 | \n",
668 | " 0.2 | \n",
669 | " 1.483871 | \n",
670 | " 7.5 | \n",
671 | " 0 | \n",
672 | "
\n",
673 | " \n",
674 | " | 4 | \n",
675 | " 5.0 | \n",
676 | " 3.6 | \n",
677 | " 1.4 | \n",
678 | " 0.2 | \n",
679 | " 1.388889 | \n",
680 | " 7.0 | \n",
681 | " 0 | \n",
682 | "
\n",
683 | " \n",
684 | "
\n",
685 | "
"
686 | ],
687 | "text/plain": [
688 | " sepal_length sepal_width petal_length petal_width \\\n",
689 | "0 5.1 3.5 1.4 0.2 \n",
690 | "1 4.9 3.0 1.4 0.2 \n",
691 | "2 4.7 3.2 1.3 0.2 \n",
692 | "3 4.6 3.1 1.5 0.2 \n",
693 | "4 5.0 3.6 1.4 0.2 \n",
694 | "\n",
695 | " sepal_length_to_sepal_width petal_length_to_petal_width target \n",
696 | "0 1.457143 7.0 0 \n",
697 | "1 1.633333 7.0 0 \n",
698 | "2 1.468750 6.5 0 \n",
699 | "3 1.483871 7.5 0 \n",
700 | "4 1.388889 7.0 0 "
701 | ]
702 | },
703 | "execution_count": 14,
704 | "metadata": {},
705 | "output_type": "execute_result"
706 | }
707 | ],
708 | "source": [
709 | "dataset.head()"
710 | ]
711 | },
712 | {
713 | "cell_type": "code",
714 | "execution_count": 15,
715 | "metadata": {},
716 | "outputs": [],
717 | "source": [
718 | "# Save features\n",
719 | "features_path = './data/processed/featured_iris.csv'\n",
720 | "dataset.to_csv(features_path, index=False)"
721 | ]
722 | },
723 | {
724 | "cell_type": "code",
725 | "execution_count": 16,
726 | "metadata": {},
727 | "outputs": [
728 | {
729 | "data": {
730 | "text/plain": [
731 | "LineaArtifact(name='iris-preprocessed', _version=4)"
732 | ]
733 | },
734 | "execution_count": 16,
735 | "metadata": {},
736 | "output_type": "execute_result"
737 | }
738 | ],
739 | "source": [
740 | "#save features to lineapy\n",
741 | "lineapy.save(dataset, \"iris-preprocessed\")"
742 | ]
743 | },
744 | {
745 | "cell_type": "markdown",
746 | "metadata": {},
747 | "source": [
748 | "# Split dataset"
749 | ]
750 | },
751 | {
752 | "cell_type": "code",
753 | "execution_count": 17,
754 | "metadata": {
755 | "ExecuteTime": {
756 | "end_time": "2019-06-16T21:21:06.361378Z",
757 | "start_time": "2019-06-16T21:21:06.358647Z"
758 | }
759 | },
760 | "outputs": [],
761 | "source": [
762 | "test_size=0.2"
763 | ]
764 | },
765 | {
766 | "cell_type": "markdown",
767 | "metadata": {},
768 | "source": [
769 | "## Splittail train/test"
770 | ]
771 | },
772 | {
773 | "cell_type": "code",
774 | "execution_count": 18,
775 | "metadata": {
776 | "ExecuteTime": {
777 | "end_time": "2019-06-16T21:21:07.438133Z",
778 | "start_time": "2019-06-16T21:21:07.431649Z"
779 | }
780 | },
781 | "outputs": [
782 | {
783 | "data": {
784 | "text/plain": [
785 | "((120, 7), (30, 7))"
786 | ]
787 | },
788 | "execution_count": 18,
789 | "metadata": {},
790 | "output_type": "execute_result"
791 | }
792 | ],
793 | "source": [
794 | "train_dataset, test_dataset = train_test_split(dataset, test_size=test_size, random_state=42)\n",
795 | "train_dataset.shape, test_dataset.shape"
796 | ]
797 | },
798 | {
799 | "cell_type": "code",
800 | "execution_count": 19,
801 | "metadata": {},
802 | "outputs": [],
803 | "source": [
804 | "# Save train and test sets\n",
805 | "trainset_path = './data/processed/train_iris.csv'\n",
806 | "testset_path = './data/processed/test_iris.csv'\n",
807 | "\n",
808 | "train_dataset.to_csv(trainset_path)\n",
809 | "test_dataset.to_csv(testset_path)"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 20,
815 | "metadata": {},
816 | "outputs": [
817 | {
818 | "data": {
819 | "text/plain": [
820 | "LineaArtifact(name='test-dataset', _version=4)"
821 | ]
822 | },
823 | "execution_count": 20,
824 | "metadata": {},
825 | "output_type": "execute_result"
826 | }
827 | ],
828 | "source": [
829 | "#save train and test sets to lineapy\n",
830 | "lineapy.save(train_dataset, \"train-dataset\")\n",
831 | "lineapy.save(test_dataset, \"test-dataset\")"
832 | ]
833 | },
834 | {
835 | "cell_type": "markdown",
836 | "metadata": {},
837 | "source": [
838 | "# Train"
839 | ]
840 | },
841 | {
842 | "cell_type": "code",
843 | "execution_count": 21,
844 | "metadata": {
845 | "ExecuteTime": {
846 | "end_time": "2019-06-16T21:21:10.932148Z",
847 | "start_time": "2019-06-16T21:21:10.927844Z"
848 | }
849 | },
850 | "outputs": [],
851 | "source": [
852 | "# Get X and Y\n",
853 | "\n",
854 | "y_train = train_dataset.loc[:, 'target'].values.astype('int32')\n",
855 | "X_train = train_dataset.drop('target', axis=1).values.astype('float32')"
856 | ]
857 | },
858 | {
859 | "cell_type": "code",
860 | "execution_count": 22,
861 | "metadata": {
862 | "ExecuteTime": {
863 | "end_time": "2019-06-16T21:21:55.427365Z",
864 | "start_time": "2019-06-16T21:21:55.416431Z"
865 | }
866 | },
867 | "outputs": [
868 | {
869 | "data": {
870 | "text/plain": [
871 | "LogisticRegression(C=0.001, multi_class='multinomial')"
872 | ]
873 | },
874 | "execution_count": 22,
875 | "metadata": {},
876 | "output_type": "execute_result"
877 | }
878 | ],
879 | "source": [
880 | "# Create an instance of Logistic Regression Classifier CV and fit the data\n",
881 | "\n",
882 | "logreg = LogisticRegression(C=0.001, solver='lbfgs', multi_class='multinomial', max_iter=100)\n",
883 | "logreg.fit(X_train, y_train)"
884 | ]
885 | },
886 | {
887 | "cell_type": "code",
888 | "execution_count": 23,
889 | "metadata": {},
890 | "outputs": [
891 | {
892 | "data": {
893 | "text/plain": [
894 | "['./models/model.joblib']"
895 | ]
896 | },
897 | "execution_count": 23,
898 | "metadata": {},
899 | "output_type": "execute_result"
900 | }
901 | ],
902 | "source": [
903 | "model_path= './models/model.joblib'\n",
904 | "joblib.dump(logreg, model_path)"
905 | ]
906 | },
907 | {
908 | "cell_type": "code",
909 | "execution_count": 24,
910 | "metadata": {},
911 | "outputs": [
912 | {
913 | "data": {
914 | "text/plain": [
915 | "LineaArtifact(name='logreg-model', _version=3)"
916 | ]
917 | },
918 | "execution_count": 24,
919 | "metadata": {},
920 | "output_type": "execute_result"
921 | }
922 | ],
923 | "source": [
924 | "#save model to lineapy\n",
925 | "lineapy.save(model_path, \"logreg-model\")"
926 | ]
927 | },
928 | {
929 | "cell_type": "markdown",
930 | "metadata": {},
931 | "source": [
932 | "# Evaluate"
933 | ]
934 | },
935 | {
936 | "cell_type": "code",
937 | "execution_count": 32,
938 | "metadata": {
939 | "ExecuteTime": {
940 | "end_time": "2019-06-16T21:21:55.875303Z",
941 | "start_time": "2019-06-16T21:21:55.864724Z"
942 | }
943 | },
944 | "outputs": [],
945 | "source": [
946 | "def plot_confusion_matrix(cm,\n",
947 | " target_names,\n",
948 | " title='Confusion matrix',\n",
949 | " cmap=None,\n",
950 | " normalize=True):\n",
951 | " \"\"\"\n",
952 | " given a sklearn confusion matrix (cm), make a nice plot\n",
953 | "\n",
954 | " Arguments\n",
955 | " ---------\n",
956 | " cm: confusion matrix from sklearn.metrics.confusion_matrix\n",
957 | "\n",
958 | " target_names: given classification classes such as [0, 1, 2]\n",
959 | " the class names, for example: ['high', 'medium', 'low']\n",
960 | "\n",
961 | " title: the text to display at the top of the matrix\n",
962 | "\n",
963 | " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n",
964 | " see http://matplotlib.org/examples/color/colormaps_reference.html\n",
965 | " plt.get_cmap('jet') or plt.cm.Blues\n",
966 | "\n",
967 | " normalize: If False, plot the raw numbers\n",
968 | " If True, plot the proportions\n",
969 | "\n",
970 | " Usage\n",
971 | " -----\n",
972 | " plot_confusion_matrix(cm = cm, # confusion matrix created by\n",
973 | " # sklearn.metrics.confusion_matrix\n",
974 | " normalize = True, # show proportions\n",
975 | " target_names = y_labels_vals, # list of names of the classes\n",
976 | " title = best_estimator_name) # title of graph\n",
977 | "\n",
978 | " Citiation\n",
979 | " ---------\n",
980 | " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n",
981 | "\n",
982 | " \"\"\"\n",
983 | "\n",
984 | " accuracy = np.trace(cm) / float(np.sum(cm))\n",
985 | " misclass = 1 - accuracy\n",
986 | "\n",
987 | " if cmap is None:\n",
988 | " cmap = plt.get_cmap('Blues')\n",
989 | "\n",
990 | " plt.figure(figsize=(8, 6))\n",
991 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
992 | " plt.title(title)\n",
993 | " plt.colorbar()\n",
994 | "\n",
995 | " if target_names is not None:\n",
996 | " tick_marks = np.arange(len(target_names))\n",
997 | " plt.xticks(tick_marks, target_names, rotation=45)\n",
998 | " plt.yticks(tick_marks, target_names)\n",
999 | "\n",
1000 | " if normalize:\n",
1001 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
1002 | "\n",
1003 | " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n",
1004 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
1005 | " if normalize:\n",
1006 | " plt.text(j, i, \"{:0.4f}\".format(cm[i, j]),\n",
1007 | " horizontalalignment=\"center\",\n",
1008 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
1009 | " else:\n",
1010 | " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n",
1011 | " horizontalalignment=\"center\",\n",
1012 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
1013 | "\n",
1014 | " plt.tight_layout()\n",
1015 | " plt.ylabel('True label')\n",
1016 | " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n",
1017 | " \n",
1018 | " return plt.gcf()"
1019 | ]
1020 | },
1021 | {
1022 | "cell_type": "code",
1023 | "execution_count": 33,
1024 | "metadata": {
1025 | "ExecuteTime": {
1026 | "end_time": "2019-06-16T21:21:56.090756Z",
1027 | "start_time": "2019-06-16T21:21:56.086966Z"
1028 | }
1029 | },
1030 | "outputs": [],
1031 | "source": [
1032 | "# Get X and Y\n",
1033 | "\n",
1034 | "y_test = test_dataset.loc[:, 'target'].values.astype('int32')\n",
1035 | "X_test = test_dataset.drop('target', axis=1).values.astype('float32')"
1036 | ]
1037 | },
1038 | {
1039 | "cell_type": "code",
1040 | "execution_count": 34,
1041 | "metadata": {
1042 | "ExecuteTime": {
1043 | "end_time": "2019-06-16T21:21:56.270245Z",
1044 | "start_time": "2019-06-16T21:21:56.265054Z"
1045 | }
1046 | },
1047 | "outputs": [],
1048 | "source": [
1049 | "prediction = logreg.predict(X_test)\n",
1050 | "cm = confusion_matrix(prediction, y_test)\n",
1051 | "f1 = f1_score(y_true = y_test, y_pred = prediction, average='macro')"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "code",
1056 | "execution_count": 35,
1057 | "metadata": {
1058 | "ExecuteTime": {
1059 | "end_time": "2019-06-16T21:21:56.493617Z",
1060 | "start_time": "2019-06-16T21:21:56.489929Z"
1061 | }
1062 | },
1063 | "outputs": [
1064 | {
1065 | "data": {
1066 | "text/plain": [
1067 | "0.9305555555555555"
1068 | ]
1069 | },
1070 | "execution_count": 35,
1071 | "metadata": {},
1072 | "output_type": "execute_result"
1073 | }
1074 | ],
1075 | "source": [
1076 | "# f1 score value\n",
1077 | "f1"
1078 | ]
1079 | },
1080 | {
1081 | "cell_type": "code",
1082 | "execution_count": 36,
1083 | "metadata": {},
1084 | "outputs": [],
1085 | "source": [
1086 | "# Save metrics\n",
1087 | "metrics_file = './reports/metrics.json'\n",
1088 | "\n",
1089 | "metrics = {\n",
1090 | " 'f1': f1\n",
1091 | "}\n",
1092 | "\n",
1093 | "with open(metrics_file, 'w') as mf:\n",
1094 | " json.dump(\n",
1095 | " obj=metrics,\n",
1096 | " fp=mf,\n",
1097 | " indent=4\n",
1098 | " )\n"
1099 | ]
1100 | },
1101 | {
1102 | "cell_type": "code",
1103 | "execution_count": 37,
1104 | "metadata": {},
1105 | "outputs": [
1106 | {
1107 | "data": {
1108 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe0AAAHCCAYAAADCTpEYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA26klEQVR4nO3dd7xcVdXG8d+ThEDoJaEkAem9SkCaiBTpTYGggBSRLkrxVZSXqoiKCAKKCNJfQpcqoCgqCJIQQiDUKCBJaIFICaEkrPePvS9Obm7L3DJ35zxfPvNh5pwz56yZyZ01a+999lFEYGZmZr1fn0YHYGZmZh3jpG1mZlYIJ20zM7NCOGmbmZkVwknbzMysEE7aZmZmhXDSNmsASQMk3SbpLUnXd2I/+0i6pytjaxRJn5X0TKPjMOvN5PO0zVon6SvAscCqwDvAGOCHEXF/J/e7H/ANYJOImN7ZOHs7SQGsFBHjGx2LWclcaZu1QtKxwDnAGcASwDLAL4Fdu2D3nwKerULC7ghJ/Rodg1kJnLTNWiBpIeA04MiIuCkipkbERxFxW0R8O28zt6RzJE3Kt3MkzZ3XbSFpgqTjJL0m6WVJB+Z1pwInAcMlvSvpa5JOkXRVzfGXlRRNyUzSAZL+JekdSc9L2qdm+f01z9tE0sjc7D5S0iY16+6TdLqkB/J+7pE0sJXX3xT//9TEv5ukHSQ9K+lNSd+r2X5DSQ9K+k/e9nxJ/fO6v+bNHsuvd3jN/r8j6RXg0qZl+Tkr5GN8Oj8eLGmypC0687malc5J26xlGwPzADe3sc33gY2AdYF1gA2BE2vWLwksBAwBvgZcIGmRiDiZVL1fGxHzR8QlbQUiaT7gF8D2EbEAsAmpmb75dosCd+RtFwPOBu6QtFjNZl8BDgQWB/oDx7dx6CVJ78EQ0o+M3wD7AusDnwVOkrR83nYGcAwwkPTebQUcARARm+dt1smv99qa/S9KanU4pPbAEfFP4DvA1ZLmBS4FLouI+9qI12yO56Rt1rLFgMntNF/vA5wWEa9FxOvAqcB+Nes/yus/iog7gXeBVeqM52NgTUkDIuLliBjXwjY7As9FxJURMT0irgGeBnau2ebSiHg2IqYB15F+cLTmI1L//UfACFJCPjci3snHHwesDRARj0TEQ/m4LwC/Bj7Xgdd0ckR8kOOZSUT8BngO+AewFOlHklmlOWmbtewNYGA7fa2DgRdrHr+Yl32yj2ZJ/z1g/tkNJCKmAsOBw4CXJd0hadUOxNMU05Cax6/MRjxvRMSMfL8pqb5as35a0/MlrSzpdkmvSHqb1JLQYtN7jdcj4v12tvkNsCZwXkR80M62ZnM8J22zlj0IvA/s1sY2k0hNu02WycvqMRWYt+bxkrUrI+LuiNiGVHE+TUpm7cXTFNPEOmOaHb8ixbVSRCwIfA9QO89p89QVSfOTBgJeApySm//NKs1J26wFEfEWqR/3gjwAa15Jc0naXtJP8mbXACdKGpQHdJ0EXNXaPtsxBthc0jJ5ENwJTSskLSFpl9y3/QGpmX1GC/u4E1hZ0lck9ZM0HFgduL3OmGbHAsDbwLu5FeDwZutfBZaf5VltOxd4JCIOJvXVX9jpKM0K56Rt1oqIOJt0jvaJwOvAS8BRwO/yJj8ARgFjgceB0XlZPcf6A3Bt3tcjzJxo+wDHkSrpN0l9xUe0sI83gJ3ytm8A/wPsFBGT64lpNh1PGuT2DqkV4Npm608BLs+jy/dqb2eSdgW2I3UJQPocPt00at6sqjy5ipmZWSFcaZuZmRXCSdvMzKwQTtpmZmaFcNI2MzMrhJO2mZlZIXxlnTqo/3yhAZ7nYU6w3opLNDoEM2vB6NGPTI6IQd19nL4Lfipi+iyz6M62mPb63RGxXReE1CYn7TpowKLMvfGxjQ7DusADtx/T6BDMrAUD5lLzKXm7RUyfxtyrtDt1QLveH3NBe9P2dgknbTMzqzCByukpdtI2M7PqEqD2psnvPZy0zcys2gqqtMuJ1MzMrOJcaZuZWbW5edzMzKwEHohmZmZWjoIq7XJ+XpiZmVWcK20zM6su4eZxMzOzMqio5nEnbTMzq7aCKu1yIjUzM6s4V9pmZlZtbh43MzMrQVnnaZcTqZmZWcW50jYzs+ryVb7MzMwKUlDzuJO2mZlVmPu0zczMrBu40jYzs2rr4z5tMzOz3s9zj5uZmRWkoNHj5fy8MDMzqzhX2mZmVmFljR530jYzs2orqHncSdvMzKqtoEq7nEjNzMwKJem3kl6T9ETNskUl/UHSc/n/i7S3HydtMzOrLqlrbu27DNiu2bLvAvdGxErAvflxm5y0zcys2tSn87d2RMRfgTebLd4VuDzfvxzYrb39uE/bzMyqrWsGog2UNKrm8UURcVE7z1kiIl4GiIiXJS3e3kGctM3MzDpvckQM6+6DOGmbmVmFNfQ87VclLZWr7KWA19p7gvu0zcys2npmIFpLbgX2z/f3B25p7wmutM3MrLp66IIhkq4BtiD1fU8ATgbOBK6T9DXg38Ce7e3HSdvMzKybRcSXW1m11ezsx0nbzMwqzHOPm5mZlcNzj5uZmRWioEq7nEjNzMwqzpW2mZlVm5vHzczMCqCyBqKVE6mZmVnFudI2M7Nqc/O4mZlZGeSkbWZm1vuJspK2+7TNzMwK4UrbzMyqS/lWCCdtMzOrMLl53Mpy4THb8OKIQxl14X6fLFtk/rm5/Ywv8vglB3D7GV9k4fnnbmCEVq977r6LtddYhTVWXZGf/uTMRodjneTPs3tI6vStpzhpG1f+4Ul2PfHmmZYdP3xD7hvzEmt97TLuG/MSx++1QYOis3rNmDGDbx19JLfc9nseHfsk14+4hqeefLLRYVmd/HkaOGkb8MATE3nznfdnWrbTxstz1R/TF8JVf3ySnTdZoRGhWSeMfPhhVlhhRZZbfnn69+/PnsP35vbbbml0WFYnf57dx5W2FW/xhefllTenAvDKm1MZtNC8DY7IZtekSRMZOnTpTx4PGTKUiRMnNjAi6wx/nt3HSbuHSTpA0uBGx2HWm0TELMtKGnBjM/Pn2U3URbceMkckbeAAwEm7C732n/dYctH5AFhy0fl4/a33GhyRza4hQ4YyYcJLnzyeOHECgwf7z6RU/jwNenHSljSfpDskPSbpCUnDJa0v6S+SHpF0t6SlJO0BDAOuljRG0gBJW0l6VNLjkn4rae68zzMlPSlprKSz8rKdJf0jb/9HSUs08nX3Fnc89C/23Xp1APbdenVuf/BfDY7IZtewDTZg/PjneOH55/nwww+5/toR7LjTLo0Oy+rkz7N7iM43jfdki0dvPk97O2BSROwIIGkh4PfArhHxuqThwA8j4iBJRwHHR8QoSfMAlwFbRcSzkq4ADs//3x1YNSJC0sL5OPcDG+VlBwP/AxzXPBhJhwCHADDPIt33qhvg8u9uz2fXXpqBC87D+CsP5vSrHuSsa0dy1fd2ZP9t1+Cl195hnx/e3ugwbTb169ePn597PjvvuC0zZsxg/wMOYvU11mh0WFYnf57dp6RuBrXUT9IbSFoZuBu4DrgdmAL8HWgq+foCL0fEFyTdx3+T9jrAeRGxed7PVsCRwF7AI8Ao4A7g9oj4UNJawM+ApYD+wPMRsV1bsfVZaOmYe+Nju/T1WmNMuf2YRodgZi0YMJceiYhh3X2cfostHwvu8INO72fKVfv0SLy9tnk8Ip4F1gceB34EfAkYFxHr5ttaEfGFFp7a4k+miJgObAjcCOwG3JVXnQecHxFrAYcC83TpCzEzM+sivbZ5PI8GfzMirpL0LqlpepCkjSPiQUlzAStHxDjgHWCB/NSngWUlrRgR44H9gL9Imh+YNyLulPQQMD5vvxDQdN7E/j308szMrJcoqXm81yZtYC3gp5I+Bj4CDgemA7/I/dv9gHOAcaQ+7AslTQM2Bg4ErpfUDxgJXAgsCtyS+7wFNLWLnpK3nQg8BCzXEy/OzMx6AV8wpGtExN2kPu3mNm9h2xtJzd5N7gXWa7bZy6Tm8ebPvQXwtEJmZhVVUqXda/u0zczMbGa9ttI2MzPrbirs0pxO2mZmVmlO2mZmZqUoJ2e7T9vMzKwUrrTNzKy65OZxMzOzYpSUtN08bmZmVghX2mZmVmklVdpO2mZmVlk+T9vMzKwk5eRs92mbmZmVwpW2mZlVl0/5MjMzK4eTtpmZWSFKStru0zYzMyuEK20zM6u2cgptJ20zM6u2kprHnbTNzKyypLImV3GftpmZWSFcaZuZWaWVVGk7aZuZWaU5aZuZmZWinJztPm0zM7NSuNI2M7NKc/O4mZlZCXzBEDMzszIIKChnu0/bzMysFE7aZmZWYfpkVrTO3No9inSMpHGSnpB0jaR56onWSdvMzCpN6vyt7f1rCHA0MCwi1gT6AnvXE6uTtpmZWffrBwyQ1A+YF5hU707MzMwqq4tGjw+UNKrm8UURcRFAREyUdBbwb2AacE9E3FPPQZy0zcysujrQvN1BkyNiWIuHkBYBdgWWA/4DXC9p34i4anYP4uZxMzOrLAF9+qjTt3ZsDTwfEa9HxEfATcAm9cTrpG1mZta9/g1sJGlepbb4rYCn6tmRm8fNzKzSuntylYj4h6QbgNHAdOBR4KJ69uWkbWZmldYT05hGxMnAyZ3dj5O2mZlVV9cNROsR7tM2MzMrhCttMzOrrHTBkHJKbSdtMzOrsI7NHd5bOGmbmVmlFZSz3adtZmZWClfaZmZWaW4eNzMzK0Fhp3w5aZuZWWWVNnrcfdpmZmaFcKVtZmaVVlCh7aRtZmbVVlLzuJO2mZlVWkE5233aZmZmpXClbWZm1SU3j8/x1ltxCR64/ZhGh2FdYLvzH2h0CNaF7jpq00aHYIVJp3w1OoqOc9I2M7MKK+uCIe7TNjMzK4QrbTMzq7SCCm0nbTMzqzY3j5uZmVmXc6VtZmbV5at8mZmZlaG0q3w5aZuZWaWVlLTdp21mZlYIV9pmZlZpBRXaTtpmZlZtJTWPO2mbmVl1FTZ63H3aZmZmhXClbWZmlaXCLhjipG1mZpVWUM520jYzs2rrU1DWdp+2mZlZIVxpm5lZpRVUaDtpm5lZdUk+T9vMzKwYfcrJ2e7TNjMzK4UrbTMzqzQ3j5uZmRWioJztpG1mZtUl0qxopXCftpmZWSFcaZuZWaWVNHrcSdvMzKpLZV0wxM3jZmZmhXClbWZmlVZQoe2kbWZm1SXKusqXk7aZmVVaQTnbfdpmZmalcKVtZmaVVtLocSdtMzOrrHRpzkZH0XGtJm1J5wHR2vqIOLpbIjIzM+tBc8pAtFE9FoWZmZm1q9WkHRGX1z6WNF9ETO3+kMzMzHpOOXV2B0aPS9pY0pPAU/nxOpJ+2e2RmZmZ9QDlqUw7c+vAMRaWdIOkpyU9JWnjemLtyEC0c4BtgVsBIuIxSZvXczAzM7PeJE2u0iOHOhe4KyL2kNQfmLeenXRo9HhEvNTsl8SMeg5mZmZWNZIWBDYHDgCIiA+BD+vZV0cmV3lJ0iZASOov6XhyU7mZmVnRuqBpvAPN48sDrwOXSnpU0sWS5qsn3I4k7cOAI4EhwERg3fzYzMyseE3nanfmBgyUNKrmdkjNIfoBnwZ+FRHrAVOB79YTa7vN4xExGdinnp2bmZn1dl00I9rkiBjWyroJwISI+Ed+fAN1Ju2OjB5fXtJtkl6X9JqkWyQtX8/BzMzMqiYiXiF1Na+SF20FPFnPvjoyEO3/gAuA3fPjvYFrgM/Uc0AzM7PeogdHj38DuDqPHP8XcGA9O+lI0lZEXFnz+CpJR9VzMDMzs96mJy4YEhFjgNaazzusrbnHF813/yzpu8AI0lzkw4E7OntgMzOz3qCkGdHaqrQfISXpptdzaM26AE7vrqDMzMxsVm3NPb5cTwZiZmbW06SyrvLVkfO0kbSmpL0kfbXp1t2BWePcc/ddrL3GKqyx6or89CdnNjoc64SlFxnAxfus88ntjsM/wx7rLdXosKxO/tvsHl10nnaPaHcgmqSTgS2A1YE7ge2B+4ErujUya4gZM2bwraOP5I7f/4EhQ4ey2UYbsNNOu7Da6qs3OjSrw0tTpnHw1Y8BaYTsDQdvwN/Gv9ngqKwe/tvsPj0xEK2rdKTS3oN0TtkrEXEgsA4wd7dGZQ0z8uGHWWGFFVlu+eXp378/ew7fm9tvu6XRYVkX+PTSCzPxrfd59Z0PGh2K1cF/mwYdS9rTIuJjYHqe9Pw10jyqNgeaNGkiQ4cu/cnjIUOGMnHixAZGZF1ly1UG8qdnXm90GFYn/212n5KaxzuStEdJWhj4DWlE+Wjg4e4MqiWSTpO0dR3P20LS7d0R05woImZZVlLTkbWsXx+x6fKLct9zbzQ6FKuT/za7hxB91PlbT+nI3ONH5LsXSroLWDAixnZHMEr/ApUr++ZxnNQdx2whhn4RMb0njtUbDRkylAkTXvrk8cSJExg8eHADI7Ku8JllF+HZ195lynsfNToUq5P/Ng3aqLQlfbr5DVgU6Jfvt0rSjyUdUfP4FEnHSfq2pJGSxko6Na9bVtJTkn5JquKXlnSZpCckPS7pmLzdZZL2yPc3kPR3SY9JeljSApLmkXRpfs6jkj7fQlyLSvpdPv5Dktauie8iSfdQ8QF2wzbYgPHjn+OF55/nww8/5PprR7DjTrs0OizrpK1WGci9z0xudBjWCf7b7CZd0DTeW0aP/6yNdQFs2cb6EcA5wC/z472AM4HNgA1JE7bcKmlz4N/AKsCBEXGEpPWBIRGxJkBumv9Enrf1WmB4RIzM/ezTgG8CRMRaklYF7pG0crO4TgUejYjdJG1JStDr5nXrA5tFxLQ2Xtccr1+/fvz83PPZecdtmTFjBvsfcBCrr7FGo8OyTpi7Xx/WX2ZhfnbvPxsdinWC/za7T0ndDG1NrjJLpdpREfGopMUlDQYGAVOAtYEvAI/mzeYHViIl7Rcj4qG8/F/A8pLOI02Xek+z3a8CvBwRI/Ox3gaQtBlwXl72tKQXgeZJezPgS3mbP0laTNJCed2tbSXsfG3UQwCWXmaZDr8XJdpu+x3YbvsdGh2GdZEPpn/Mrr/u8WEo1g38t9k9OjRhSS/RnbHeQDpdbDip8hbwo4hYN99WjIhL8rZTm54UEVNIp5XdBxwJXNxsvyJV+s115KdSS9s07WtqC+v+u1HERRExLCKGDRo4qAOHMjMz61rdmbRHkC7juQcpgd8NHCRpfgBJQyQt3vxJkgYCfSLiRuB/geb9508DgyVtkLdfQFI/4K/APnnZysAywDPNnlu7zRaki5a/3elXamZmRRKpebyzt57SkUtz1iUixklaAJgYES8DL0taDXgwv8B3gX2BGc2eOgS4VFLTD4oTmu33Q0nDgfMkDSD1Z29N6j+/UNLjwHTggIj4oNmbeUre91jgPWD/LnvBZmZWpB66nnaX6Mg0piJVp8tHxGmSlgGWjIh2O8kiYq1mj88Fzm1h0zVrtnmMWatrIuKAmvsjgY1a2M8BzRdExH2kpnYi4k1g1xa2OaWl+M3MbM5XUtLuSPP4L4GNgS/nx+8AF3RbRGZmZtaijjSPfyYiPi3pUUgDxfJpV2ZmZkVL51mXU2p3JGl/JKkveZS1pEHALDOWmZmZlaik5vGOJO1fADcDi0v6IWk0+IndGpWZmVkPKajQ7tDc41dLeoR0eU4Bu0XEU90emZmZmc2kI6PHlyGdHnVb7bKI+Hd3BmZmZtbdBD16la7O6kjz+B2k/mwB8wDLkSYt8aS3ZmZWvJKmMe1I8/hM51rnK3wd2m0RmZmZ9aCCCu3Z/4EREaOBDbohFjMzM2tDR/q0j6152Ic0W9nr3RaRmZlZD5E0x/VpL1Bzfzqpj/vG7gnHzMysZxWUs9tO2nlSlfkj4ts9FI+ZmVmPKmlylVb7tCX1i4gZtHDxDjMzM+t5bVXaD5MS9hhJtwLXA1ObVkbETd0cm5mZWbeaE8/TXhR4A9iS/56vHYCTtpmZFa+gnN1m0l48jxx/gv8m6ybRrVGZmZn1BJXVp91W0u4LzM/MybqJk7aZmVkPaytpvxwRp/VYJGZmZg2gFmvT3qmtpF3OqzAzM6tDGojW6Cg6rq1pTLfqsSjMzMysXa1W2hHxZk8GYmZm1gglVdodOeXLzMxsjqWCzvly0jYzs8qak/q0zczMrBdxpW1mZtWlOWdGNDMzsznenDb3uJmZ2RzJfdpmZmbWLVxpm5lZpRXUOu6kbWZmVSb6FDRrt5O2mZlVliir0naftpmZWSFcaZuZWXWprNHjTtpmZlZpPk/bzMysAO7TNjMzs27hStvMzCrNzeNmZmaFKChnO2mbmVl1iZ7rJ5bUFxgFTIyInerZh/u0zczMesY3gac6swMnbTMzqy6BpE7f2j2MNBTYEbi4M+G6edzMzCqth7q0zwH+B1igMztxpW1mZtZ5AyWNqrkd0rRC0k7AaxHxSGcP4krbzMwqS3TZKV+TI2JYK+s2BXaRtAMwD7CgpKsiYt/ZPYgrbTMzqzR1wa0tEXFCRAyNiGWBvYE/1ZOwwZW2mZlVnM/TNjMzs1lExH3AffU+30nbzMwqrGOnbPUWTtpmZlZZPTkjWldw0jYzs0orqdIu6QeGmZlZpbnSNjOzSiunznbStoo7f491Gh2CdaFFNjiq0SFYaVRW87iTtpmZVVZpA9FKitXMzKzSXGmbmVmluXnczMysEOWkbCdtMzOruIIKbfdpm5mZlcKVtpmZVVYaPV5Oqe2kbWZmlVZS87iTtpmZVZhQQZW2+7TNzMwK4UrbzMwqzc3jZmZmBfBANDMzs1KorErbfdpmZmaFcKVtZmaVVlKl7aRtZmaV5lO+zMzMrMu50jYzs8oS0KecQttJ28zMqq2k5nEnbTMzq7SSBqK5T9vMzKwQrrTNzKzS3DxuZmZWAA9EMzMzK4YvzWlmZmbdwJW2mZlVV2EXDHHSNjOzSisoZztpm5lZdaWBaOWkbfdpm5mZFcKVtpmZVVo5dbaTtpmZVV1BWdtJ28zMKs3naZuZmVmXc6VtZmaVVtDgcSdtMzOrtoJytpO2mZlVXEFZ233aZmZmhXClbWZmlSXKGj3upG1mZtXlC4aYmZmVo6Cc7T5tMzOzUrjSNjOzaiuo1HbSNjOzClNRA9HcPG5mZlYIV9pmZlZpHj1uZmZWAFFUl7aTtpmZVVxBWdt92mZmZoVw0jYzs0pTF/zX5v6lpSX9WdJTksZJ+ma9sTpp2yzuufsu1l5jFdZYdUV++pMzGx2O1emVSRM4aK8d2OXz67PbVhtw1SW/bHRINpsuPHkfXrz3R4y6/nufLPvi1uvxyA3fZ+ojv+DTqy/TwOjmHFLnb+2YDhwXEasBGwFHSlq9nlidtG0mM2bM4FtHH8ktt/2eR8c+yfUjruGpJ59sdFhWh759+3H8/57BrX9+hKtv+RMjLr+Ifz77dKPDstlw5W0PseuRF8y0bNw/J7H3cb/h/tH/bFBUcx51wa0tEfFyRIzO998BngKG1BOrk7bNZOTDD7PCCiuy3PLL079/f/Ycvje333ZLo8OyOgxaYklWX2tdAOabfwGWW3EVXn1lUmODstnywOh/8uZb78207JnnX+W5F19rUETWWZKWBdYD/lHP8520bSaTJk1k6NClP3k8ZMhQJk6c2MCIrCtMfOlFnh43lrXXG9boUMx6l64os1OpPVDSqJrbIbMcSpofuBH4VkS8XU+4DU/akgZLuqGO590paeF2tjlN0tZ1B1dBETHLMpU084DN4r2p73LMofvynVPOZP4FFmx0OGa9ThcNRJscEcNqbhfNdAxpLlLCvjoibqo31oafpx0Rk4A9mi+X1C8iprfxvB06sO+TOhle5QwZMpQJE1765PHEiRMYPHhwAyOyzvjoo4845pB92XG3vdh6+10bHY5ZryO6f0Y0pcrnEuCpiDi7M/vq0Upb0o8lHVHz+BRJx0l6Ij8+QNL1km4D7pE0r6TrJI2VdK2kf0galrd9QdJAScvmYfS/yUPp75E0IG9zmaQ98v0NJP1d0mOSHpa0QH7u3ySNzrdNevL96I2GbbAB48c/xwvPP8+HH37I9deOYMeddml0WFaHiODkbx/J8iutwv6HfKPR4ZhV2abAfsCWksbkW7uFZ0t6utIeAZwDNJ17shdwGHBgzTYbA2tHxJuSjgemRMTaktYExrSy35WAL0fE1yVdB3wJuKpppaT+wLXA8IgYKWlBYBrwGrBNRLwvaSXgGqDSnX79+vXj5+eez847bsuMGTPY/4CDWH2NNRodltXh0ZEPctuN17DSqmuwx7bp9+jR3zmZzbfctsGRWUdd/qMD+Oz6KzFw4fkZf9fpnH7hnUx5aypnf2dPBi4yPzf94jDGPjORXZqNMLfZ090dgBFxf1cdpkeTdkQ8KmlxSYOBQcAU4N/NNvtDRLyZ728GnJuf+4Sksa3s+vmIGJPvPwIs22z9KsDLETEy7+ttAEnzAedLWheYAazcWux5UMEhAEsvM2efG7nd9juw3fZ1/Qi0XuTTG27C4y+90+gwrBP2P+GyFpff+ufWvgqtLgUN22lEn/YNpD7sJUmVd3NTa+539K38oOb+DGBAs/UCZh1hBccArwLrkLoK3m/tAHlQwUUA668/rKV9mZlZgXw97baNAPYmJe72Ro3fT2pCJ88es1adx3waGCxpg7yvBST1AxYiVeAfk/ob+ta5fzMzs27X40k7IsYBCwATI+Lldjb/JTAoN4t/BxgLvFXHMT8EhgPnSXoM+AMwT97//pIeIjWNT219L2ZmNifqgWlMu0xDTvmKiLVq7r8ArJnvXwZcVrPp+8C+eaDYCsC9wIt522XzNpObnp+Xn1Vz/4Ca+yNJc77Weg5Yu+bxCXW9IDMzK1Y5jeO94DztdswL/DmflC7g8Fw1m5mZdY2CsnavTtp5YvVKn4JlZmbWpFcnbTMzs+6Upg4vp9R20jYzs+rq4YFkndXwC4aYmZlZx7jSNjOzSiuo0HbSNjOziisoaztpm5lZhamogWju0zYzMyuEK20zM6u0kkaPO2mbmVlliaK6tJ20zcys4grK2u7TNjMzK4QrbTMzq7SSRo87aZuZWaV5IJqZmVkhCsrZ7tM2MzMrhSttMzOrrsKu8uWkbWZmFVdO1nbSNjOzyhJlVdru0zYzMyuEK20zM6u0ggptJ20zM6u2kprHnbTNzKzSSpoRzX3aZmZmhXClbWZm1VZOoe2kbWZm1VZQznbSNjOz6lJhM6K5T9vMzKwQrrTNzKzSSho97qRtZmbVVk7OdvO4mZlZKVxpm5lZpRVUaDtpm5lZtZU0etxJ28zMKkxFDURzn7aZmVkhXGmbmVllibKax11pm5mZFcKVtpmZVZorbTMzM+tyrrTNzKzSSho97qRtZmbVVdhVvpy0zcysskRZM6K5T9vMzKwQrrTNzKzaCiq1nbTNzKzSPBDNzMysECUNRHOftpmZWSFcaZuZWaUVVGi70jYzs4pTF9zaO4S0naRnJI2X9N16Q3WlbWZmldbdA9Ek9QUuALYBJgAjJd0aEU/O7r5caZuZmXWvDYHxEfGviPgQGAHsWs+OXGmbmVll9dD1tIcAL9U8ngB8pp4dOWnXYfToRyYPmEsvNjqOHjAQmNzoIKxL+LOcc1Tls/xUTxxk9OhH7h4wlwZ2wa7mkTSq5vFFEXFRvt/Sz4Ko5yBO2nWIiEGNjqEnSBoVEcMaHYd1nj/LOYc/y64VEdv1wGEmAEvXPB4KTKpnR+7TNjMz614jgZUkLSepP7A3cGs9O3KlbWZm1o0iYrqko4C7gb7AbyNiXD37ctK2tlzU/iZWCH+Wcw5/lgWKiDuBOzu7H0XU1RduZmZmPcx92mZmZoVw0jYzMyuEk7aZzUQq6UKFZtXipG1mn5CkyANdJO0nabNGx2Szr6UfXv4xNmdw0ra6SPKZB3OgmoS9Helc0mcaG5HNrqYfXpI+L2m4pC9D+myduMvnpG2zTdIRwCWSTpX02UbHY11L0obAQcBjEfF6XuYv+0Lk5LwjcB7wAXCupO/XrPNnWTAnbZstko4E9gTOJ1255gxJOzc2KuuMFr7E3wD+DawtaVPwl31JJC0FHA98CZgBvAgcI+mn8N/WFCuTmzitwyQtCCwC7EKqxAAuB74t6eOIuKNhwVldmvVh70S6iMEbwMnACcDO+bN90F/2vVdNk/i8EfGypP2AQcBpEbFebj15SNK0iDipweFaJ7jStg6RtG5EvE1qchtMStxfJM2f2xc4UtJ8rsaKIwBJhwFnAMOAm4DdgXNJzatfyV/61gvVJOydgBGSFoiICcACwEN5s36k1rH7GxWndQ0nbWuXpG8Cp0kaGhFvkf7dTCNVZVsAo4ADImKqq7EySFo1f9l/LGkwadDZVyLiVGA74HRgU+BXwMvA842L1trSNOgM+CFwXkS8k1d9BCwi6RfAlcDVEXGPf1iXzdOYWpsk7Qp8D9g2Iv4jacmIeEXSr4GlgNWB3SLiiYYGah0maX7gHOBj4ND8pX8ZqRJ7NCJmSPoisENEHCypf0R82LiIrTlJSwBLRsRj+fGhwIyIuFjS3BHxQV7+WdLf6RsRcW/jIrau4krbWiSp6d/Gp4DRwIqSTgNul/T3iDgUOAzYyAm7OO+REvQMUvIGmAgcByyUHy8GzJ3/HXzU0wFau/YCpkmaN1fOiwBNp3Y1JeyNgX9HxHVO2HMOV9rWIkmLRMQUSYsA15G+4C8H7gAuBs6IiDENDNFmU7NBZ32A1YBvAxMj4vuSLgSWBN4BVgUO9A+y3ikn6iWAE0lN308AvwCmAscCnwEuA74WEX9tUJjWDZy0bRaSDgF2BV4AxkTEb2rW7Qr8CNgqIl5uTIQ2u5ol7OVIXaEvSFqd9CX/SkScKGkNYAjwbES80LiIrSXNPsd5Sf3YfUk/rCcDpwEL59tpEXF7YyK17uKkbTOR9CXgFNIpXSuTBpq9QfpF/0XgVGBPV2BlknQMMJzUNTYO+AEwL/AtYDpwmAcT9m55JP98pAGg00in5y0IXBERj0haCJg7Il6rTfI2Z3CfdsW1MJJ0QeDiiBhJOvXnV6RTvJYG/gJs54RdptzHORzYBtiElKSPjYjHgZ8D75OaXK2Xafo7lbQ5MII0OPRMYDPSj+wpwFGSNo2ItyLiNfBEKnMiJ+0KkzQXsHW+f5SkLUh//EdKWi0ipkXEaNIgl4ER8WpEvNiwgG22tPCD7F3SgLO5ImI6aSDhZyR9Lf8Q+3ZEvNLTcVrrmj7DPMJ/E+DrwLak7qtnSLOebU46RW8i8J/GRGo9xUm72voCu0t6ADgUeCEifgdcCJwnaQtJewKLA5MaF6bNrmZ9n/tLWpvUlPoBaXrShSJiBqk15X0An9bVu+TpSC+U1Dcv2hzYB1goIt4DbgSezcu2iIgTI2JcY6K1nuJpTCssIt6XNAL4Aqnp+yWlq3f9mtR0ejzpS/7rETGxcZHa7KpJ2EcChwDDI2K8pD8BRwPPSfqAdOrQro2L1FqTpyM9B/iUpCkRcaakgaREvkceSHgz6XvcLSQV4YFoFZa/AOYiJegfk5pPz8iTp8wbEe9JmisifJ5uISQtBrwVEdNzpTYC+Gptt4akL5BGiK8MXBoRzzYmWmuNpL65JQRJlwDrkc7YmCLpFNK4hP3zD7FPJlOxOZ+TdkXlCmxHYDzwFHAF6Tzs8aTJNHYnDVZ6x4NZyiBpRVLlfDbwIWmClNuAL0TE200zm0kaGBGTGxmrtS9/nm9FxOv5HPrVSLMPTpH0I1IL2WbABxHxcSNjtZ7jPu0KkrQ36fKahwCLAp+LiKnAwaSJNRYlzUP9thN2OSJiPGm0/2rANpGuhf0Y8HNJ/XLCPgi4UtI8noO696kZJb4h6dzr/5O0YEQcRuq/vkHSohFxArB3HizqhF0hrrQrJs87/QXSNXbXB/YgzTE9XdJyEfF8/oKf3tBArcNqRxjnx6cCywKXkC728Q3gs6Sqe2dgP5+213vl7osTSK1fRwNjgOMi4k1JV5OmFt7Cf6PV5KRdIZKOAOYmjRb+MfBwRDSd8vV1YEXgJPePlaPZKPHdgVcj4u+STiSdX38j8GdSy8p7wNMR8VzDArZ25Yu3PBwRv5Q0N3AVaQKcvSPiHaXL5I5pZIzWOB49XhH5KkAHALtHxERJywKrS1oG2Il0ytdXnLDLUpOwjyVdMOKrefkPJB1PuuSmgJs8oLB3k7QD0J90gZ4FlK6L/U7+sf0ocBLpXPoxnumsutynXQGSBgDbA/8LfCDpcNJgs3VJ01huQUrYPsezELX90ZLWJHVzbEI6lWtrSftHxFmk+eN3Ip0lYL2UpHWBo0jdVs8AGwLrS1qANLnRn4AdcouYZzqrMFfaFRAR0yTdSbrQxwTSl8KLwDWkeYs/cv9YOZo1ie9IGv0/iXR61yukqUgXk7RYRPwwjxZ/r3ERW3P5oi3rRsTN+dS8bwEfR8Sjef3KpIGh3yRdcW0XYCvSvAlWYe7TrghJ8wBrAf/MA1r2IX0p7Ogv9DJJ2gb4Pmk+8cWBrwG/joinJB0IDI2I0xsZo7VM0vqkls6ncxP4QaQuqosi4pK8zUqkFpJ3gDWAs0gX63mqQWFbL+CkXTFK11E+kPTL/sseRVwmSRsBNwPfiohrm637GnAkaZS4uzx6qVxt3wX8OCJ+K2k/0lSlf4uIK2q2WwK4Fjg6IsY2JlrrLdw8Xj3zAB8De/kXezlaGHg0mjT17MmSfhcRH+SxC8sA25Fmy3LC7qUkLQ58hXQWx8GSPo6IyyR9TOq7VkRcDhARr0raPiKmNTJm6x1caVeQR56WpVkf9rakaymPASYDZwArkc4KeE9Sf6Cvv+B7t3wRkGuAl0gzEV4KnB0RV0v6KvBopEumms3ESdusEPkUrp2BUcDGpAk4/kGq1jYiTbjhZN2LSRoMDIiIf+a5/y8FfgIEcBlwWm3TuFlzPuXLrACSVgfWjIjPka6b/DZwP+nUvROAvwGDGhehtUfSfMD3gJ/lgYJNn+HSEXE/aSDa8w0M0QrgStusl5O0AbApaWrSxUnn7e4SER9J2gv4Y0S82cAQrYMkLQisTbqoy83A50jdGztExDN5G3dfWatcaZv1YnkSlc+RkvZE0jnYx+aEfQBpwpx5GhehzY58EZ77gd2AkaSLgCxBGqfQtI0TtrXKlbZZL1VzTfN+wO+BKaTJcZYHXiMl8r08Srxsklb2Nc2to5y0zXohSVuSKuyREXF7nkhlTdJ5vYuTLp86OiJebGCY1gmS+tReVtPN4tYRPk/brHd6gVRR/yTPjDUd2BV4ICL+0sjArGs0vw62E7Z1hCtts14sz0G9N+mSqicA1wP7AtP9JW9WPU7aZr1cvqaygOOB69z/aVZdTtpmvZz7Os2siZO2mZlZIXyetpmZWSGctM3MzArhpG1mZlYIJ20zM7NCOGmbmZkVwknbrBtImiFpjKQnJF0vad5O7OsySXvk+xfny3S2tu0Wkjap4xgv5Os7d2h5s23enc1jnZKvDW5ms8lJ26x7TIuIdSNiTeBD4LDalZL61rPTiDg4Ip5sY5MtgNlO2mZWBidts+73N2DFXAX/WdL/AY9L6ivpp5JGShor6VBIk6lIOl/Sk5LuIF0ghLzuPknD8v3tJI2W9JikeyUtS/pxcEyu8j8raZCkG/MxRkraND93MUn3SHpU0q9JM661SdLvJD0iaZykQ5qt+1mO5V5Jg/KyFSTdlZ/zN0mrdsm7aVZhvmCIWTfKl9XcnnR1LoANgTUj4vmc+N6KiA3yVKUPSLoHWA9YBViLdK3lJ4HfNtvvIOA3wOZ5X4tGxJuSLgTejYiz8nb/B/w8Iu6XtAxwN7AacDJwf0ScJmlHYKYk3IqD8jEGACMl3RgRb5CuBT06Io6TdFLe91HARcBhEfGcpM8AvwS2rONtNLPMSdusewyQNCbf/xtwCanZ+uGIeD4v/wKwdlN/NbAQsBKwOXBNRMwAJkn6Uwv73wj4a9O+IuLNVuLYGlhd+qSQXlDSAvkYX8zPvUPSlA68pqMl7Z7vL51jfQP4GLg2L78KuEnS/Pn1Xl9z7Lk7cAwza4OTtln3mBYR69YuyMlrau0i4BsRcXez7XYA2ptfWB3YBlIX2MYRMa2FWDo8h7GkLUg/ADaOiPck3QfM08rmkY/7n+bvgZl1jvu0zRrnbuBwSXNBugynpPmAvwJ75z7vpYDPt/DcB4HPSVouP3fRvPwdYIGa7e4hNVWTt1s33/0rsE9etj2wSDuxLgRMyQl7VVKl36QP0NRa8BVSs/vbwPOS9szHkKR12jmGmbXDSduscS4m9VePlvQE8GtS69fNwHPA48CvgL80f2JEvE7qh75J0mP8t3n6NmD3poFowNHAsDzQ7Un+O4r9VGBzSaNJzfT/bifWu4B+ksYCpwMP1aybCqwh6RFSn/Vpefk+wNdyfOOAXTvwnphZG3yVLzMzs0K40jYzMyuEk7aZmVkhnLTNupikuSVdK2m8pH/kSU9a2m547mseJ+knNcsPk/R47pe+v2naUkmfyhOVjMnPOazmOZfkSVbGSrohn3LVFa9lF0nfreN5n0wC0xMkrZ/fs/GSfqGa88yabXdC3uYZSdvWLO8v6SJJz0p6WtKXatbtpTTRzbh83juSPp8/h6bb+5J26/YXapXnPm2rBEn9ImJ6Dx3rCGDtiDhM0t7A7hExvNk2iwGPAutHxOuSLgeuiIh7JS2YR18jaRfgiIjYTlJ/0t/sBzkpPwFsEhGTmj3nbOC1iDizJ15vS/IpYcdHxKgeOt7DwDdJA+TuBH4REb9vts3qwDWkCW4GA38EVo6IGZJOBfpGxImS+gCLRsRkSSsB1wFbRsQUSYtHxGvN9rsoMB4YGhHvdfNLtYpzpW0NpVamxlSzKTrzsvklXZorqrFN1ZBqLlghaQ9Jl+X7l0k6W9KfgR9L2lDS35Wm7vy7pFXydn0lnVWz329I2krSzTX73UbSTR18WbsCl+f7NwBbtVD5LQ88m0eBQ0ogXwJoSr7ZfOTzqSPiw4j4IC+fm5q/35qELWBA03MkDZN0cfMAJS2bK8qLlS5qcrWkrSU9IOk5SRvm7Q6QdH6+v2fe9jFJf23tvWvhWL+SNCp/xqfWLD8zV7BjJZ3V2jHao3Ra3IIR8WCkKuQKYLcWNt0VGBERH+RJacaTEjjAQcCP8nv5cURMzsu/DlwQEVPyuteY1R7A752wrSd4chVrtFmmxiQlo5mm6Mzb/i9p2s+1ACS1d24xwMrA1rmaWjDvc7qkrYEzSInyEGA5YL28blFgCnCBpEE5sR4IXJqPey1pmtHmzo6IK4AhwEsAeX9vAYsBk2u2HQ+sqtR0PoGUZPo3rZR0JHBsXrZlzfKlgTuAFYFvR8SkmnWXAjuQTiM7Lh9/FHBwK+/NisCe+fWPJJ1jvRmwC/A9Zk18JwHbRsRESQvnZS29d819P3/GfYF7Ja2dX/PuwKoRETX7m+UY+cfVtS3sF9IFUobk/TWZkJc1N4SZT1WbAAypOfbpSpPI/BM4KiJeJf37QdIDQF/glIi4i5ntDZzdSnxmXcpJ2xqtpakxB9HyFJ1bk74gycs7MvXm9Xk6UEgThFyemzwDmKtmvxc2NZ83HU/SlcC+ORluDHw1r5+pqbsFLfWnztQPlZtaDyclo4+Bv5Oq76b1F5B+NHwFOBHYPy9/iTT16WDgd5JuyMmFiDgwJ8bzgOHkHxlteD4iHs+vdRxwb06gjwPLtrD9A8Blkq4DmlodWnzvmtkrt6L0A5YCVif9sHgfuFjpoii3t3aMiHgGWLe1F9FCKwa0PNtba9v1A4YCD0TEsZKOBc4C9svrViL9OBgK/E3SmhHxn3zspUhzxN89667Nup6bx61hNPPUmOuQ+njnofUpOltbXrus+dSatdOGng78OV8uc+eabVvb76XAvsCXScl/eo77Ws08CKnp9tX8vAmkHyBNFwxZCJglmUXEbRHxmYjYGHiGNKFKcyNooak3V9jjgM82Wz6D9EPgS82f04IPau5/XPP4Y1r4QR8Rh5F+QCwNjFHql29zOlWlGduOB7aKiLVJrQTz5PdyQ+DG/Pruau0YklZp5f0ek6vkCaSE2mQoMIlZffK5NNvuDeA90qQ2ANcDn655zi0R8VH+EfkMKYk32Qu4OSI+au09MOtKTtrWSK1NjdnaFJ3Np+Rsah5/VdJqSgOImqr21o43Md8/oGb5PcBhOcF+crycGCeRkshlTRtHxPB8rezmtyvyJreSK2NSf+efooURn5IWr3kdR5BmSCO3BDTZkZzMJQ3N3QhNz9kUeEbJinm5SD9Ins6PN5R0BV1A0goR8Y+IOInU1L80rbx3NRYk/XB6S9ISpCueoTSQbqGIuBP4FrmSbukYEfFMK+/3uhHxn4h4GXhH0kb59X8VuKWFl3AraXrYufO/rZVIF3AJ0kxyW+TttiK1BAD8jjyNrKSBpObyf9Xs88ukwW1mPcLN49ZId5G+8MeSKpiHIE3RmZtTb8qJ+DVgG+AHpCbjJ4AZpKk4bwK+S2pefYk0orq1051+QmoePxaovXLWxaQv47GSPiL1p5+f110NDIqIJ+m4S4ArJY0nVdifNOlLGhP/vYjGufrvfNynRcSz+f5Ruc/9I1LfetMPgNWAn0kKUoV7VkQ8nt+jy3OfvYDHgMPzc5YBZrpYSCf8NP+gEHBvPs4TtP7eERGPSXqU1CrwL1LzN6T50W+R1NSyckwbx+iIw0k/rAYAv8+3ptH3wyLipIgYl5vdnwSmA0fWdJ18h/SZnQM0jWGA1Oz9BaUpYGeQxhG8kfe9LOmHyyzTzJp1F5/yZdYGpZHTj0bEJY2OpR6SfgpcGRFjGx2LmXWek7ZZK5QugDEV2KbmVCszs4Zx0jYzMyuEB6KZmZkVwknbzMysEE7aZmZmhXDSNjMzK4STtpmZWSGctM3MzArx/8JpoYjLihYHAAAAAElFTkSuQmCC\n",
1109 | "text/plain": [
1110 | ""
1111 | ]
1112 | },
1113 | "metadata": {
1114 | "needs_background": "light"
1115 | },
1116 | "output_type": "display_data"
1117 | }
1118 | ],
1119 | "source": [
1120 | "cm_plot = plot_confusion_matrix(cm, data.target_names, normalize=False)"
1121 | ]
1122 | },
1123 | {
1124 | "cell_type": "code",
1125 | "execution_count": 39,
1126 | "metadata": {},
1127 | "outputs": [],
1128 | "source": [
1129 | "# Save confusion matrix image\n",
1130 | "confusion_matrix_image = './reports/confusion_matrix.png'\n",
1131 | "cm_plot.savefig(confusion_matrix_image)"
1132 | ]
1133 | },
1134 | {
1135 | "cell_type": "code",
1136 | "execution_count": 40,
1137 | "metadata": {},
1138 | "outputs": [
1139 | {
1140 | "data": {
1141 | "text/plain": [
1142 | "LineaArtifact(name='plot-confusion-matrix', _version=1)"
1143 | ]
1144 | },
1145 | "execution_count": 40,
1146 | "metadata": {},
1147 | "output_type": "execute_result"
1148 | }
1149 | ],
1150 | "source": [
1151 | "#save confusion matrix to lineapy\n",
1152 | "lineapy.save(plot_confusion_matrix, \"plot-confusion-matrix\")"
1153 | ]
1154 | },
1155 | {
1156 | "cell_type": "code",
1157 | "execution_count": null,
1158 | "metadata": {},
1159 | "outputs": [],
1160 | "source": [
1161 | "#commenting for change\n"
1162 | ]
1163 | }
1164 | ],
1165 | "metadata": {
1166 | "kernelspec": {
1167 | "display_name": "Python 3",
1168 | "language": "python",
1169 | "name": "python3"
1170 | },
1171 | "language_info": {
1172 | "codemirror_mode": {
1173 | "name": "ipython",
1174 | "version": 3
1175 | },
1176 | "file_extension": ".py",
1177 | "mimetype": "text/x-python",
1178 | "name": "python",
1179 | "nbconvert_exporter": "python",
1180 | "pygments_lexer": "ipython3",
1181 | "version": "3.8.5"
1182 | },
1183 | "toc": {
1184 | "base_numbering": 1,
1185 | "nav_menu": {},
1186 | "number_sections": true,
1187 | "sideBar": true,
1188 | "skip_h1_title": false,
1189 | "title_cell": "Table of Contents",
1190 | "title_sidebar": "Contents",
1191 | "toc_cell": false,
1192 | "toc_position": {},
1193 | "toc_section_display": true,
1194 | "toc_window_display": true
1195 | },
1196 | "varInspector": {
1197 | "cols": {
1198 | "lenName": 16,
1199 | "lenType": 16,
1200 | "lenVar": 40
1201 | },
1202 | "kernels_config": {
1203 | "python": {
1204 | "delete_cmd_postfix": "",
1205 | "delete_cmd_prefix": "del ",
1206 | "library": "var_list.py",
1207 | "varRefreshCmd": "print(var_dic_list())"
1208 | },
1209 | "r": {
1210 | "delete_cmd_postfix": ") ",
1211 | "delete_cmd_prefix": "rm(",
1212 | "library": "var_list.r",
1213 | "varRefreshCmd": "cat(var_dic_list()) "
1214 | }
1215 | },
1216 | "types_to_exclude": [
1217 | "module",
1218 | "function",
1219 | "builtin_function_or_method",
1220 | "instance",
1221 | "_Feature"
1222 | ],
1223 | "window_display": false
1224 | }
1225 | },
1226 | "nbformat": 4,
1227 | "nbformat_minor": 4
1228 | }
1229 |
--------------------------------------------------------------------------------