├── file.txt ├── .gitignore ├── requirements.txt ├── README.md ├── step-0-prototype.ipynb └── lineapy-trial-prototype.ipynb /file.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDEs 2 | .idea 3 | .vscode 4 | 5 | ## OS configs 6 | .DS_Store 7 | 8 | # Project 9 | data/* 10 | models/* 11 | reports/* 12 | 13 | # Python 14 | __pycache__ 15 | .ipynb_checkpoints 16 | 17 | # Venv 18 | dvc-venv -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dvc>=2.8.3,<3 2 | joblib>=1.0.1,<2 3 | jupyter>=1.0.0,<2 4 | jupyter_contrib_nbextensions>=0.5.1,<1 5 | matplotlib>=3.4.3,<4 6 | numpy>=1.21.2,<2 7 | pandas>=1.3.2,<2 8 | pytest>=6.2.4,<7 9 | python-box>=5.4.1,<6 10 | pyyaml>=5.4.1,<6 11 | scikit-learn>=0.24.2,<2 12 | scipy>=1.7.1,<2 13 | tqdm>=4.62.2,<5 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # course-ds-base 2 | 3 | ## Preparation 4 | 5 | ### 1. Fork / Clone this repository 6 | 7 | ```bash 8 | git clone https://github.com/iterative/course-ds-base.git 9 | cd course-ds-base 10 | ``` 11 | 12 | 13 | ### 2. Create and activate virtual environment 14 | 15 | Create virtual environment named `dvc-venv` (you may use other name) 16 | ```bash 17 | python3 -m venv dvc-venv 18 | echo "export PYTHONPATH=$PWD" >> dvc-venv/bin/activate 19 | source dvc-venv/bin/activate 20 | ``` 21 | Install python libraries 22 | 23 | ```bash 24 | pip install --upgrade pip setuptools wheel 25 | pip install -r requirements.txt 26 | ``` 27 | 28 | Add Virtual Environment to Jupyter Notebook 29 | 30 | ```bash 31 | python -m ipykernel install --user --name=dvc-venv 32 | ``` 33 | 34 | Configure ToC for jupyter notebook (optional) 35 | 36 | ```bash 37 | jupyter contrib nbextension install --user 38 | jupyter nbextension enable toc2/main 39 | ``` 40 | 41 | ## 3. Run Jupyter Notebook 42 | 43 | ```bash 44 | jupyter notebook 45 | ``` 46 | 47 | -------------------------------------------------------------------------------- /step-0-prototype.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-06-16T21:17:31.460557Z", 9 | "start_time": "2019-06-16T21:17:29.395297Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import itertools\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "from sklearn.metrics import confusion_matrix, f1_score\n", 19 | "from sklearn.linear_model import LogisticRegression\n", 20 | "from sklearn.model_selection import train_test_split" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Load dataset" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "ExecuteTime": { 35 | "end_time": "2019-06-16T21:17:31.485189Z", 36 | "start_time": "2019-06-16T21:17:31.473720Z" 37 | } 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# Get data \n", 42 | "\n", 43 | "import pandas as pd\n", 44 | "from sklearn.datasets import load_iris\n", 45 | "\n", 46 | "data = load_iris(as_frame=True)\n", 47 | "dataset = data.frame\n", 48 | "dataset.head()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# print labels for target values \n", 58 | "\n", 59 | "[print(f'{target}: {label}') for target, label in zip(data.target.unique(), data.target_names)]" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "ExecuteTime": { 67 | "end_time": "2019-06-16T21:17:32.328046Z", 68 | "start_time": "2019-06-16T21:17:32.323611Z" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "# feature names\n", 74 | "\n", 75 | "dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]\n", 76 | "\n", 77 | "feature_names = dataset.columns.tolist()[:4]\n", 78 | "feature_names" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "# Features engineering" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "ExecuteTime": { 93 | "end_time": "2019-06-16T21:21:02.150708Z", 94 | "start_time": "2019-06-16T21:21:02.144518Z" 95 | } 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width']\n", 100 | "dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width']\n", 101 | "\n", 102 | "dataset = dataset[[\n", 103 | " 'sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", 104 | "# 'sepal_length_in_square', 'sepal_width_in_square', 'petal_length_in_square', 'petal_width_in_square',\n", 105 | " 'sepal_length_to_sepal_width', 'petal_length_to_petal_width',\n", 106 | " 'target'\n", 107 | "]]" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "ExecuteTime": { 115 | "end_time": "2019-06-16T21:21:02.987144Z", 116 | "start_time": "2019-06-16T21:21:02.976092Z" 117 | } 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "dataset.head()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "# Split dataset" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "ExecuteTime": { 136 | "end_time": "2019-06-16T21:21:06.361378Z", 137 | "start_time": "2019-06-16T21:21:06.358647Z" 138 | } 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "test_size=0.2" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "## Splittail train/test" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "ExecuteTime": { 157 | "end_time": "2019-06-16T21:21:07.438133Z", 158 | "start_time": "2019-06-16T21:21:07.431649Z" 159 | } 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "train_dataset, test_dataset = train_test_split(dataset, test_size=test_size, random_state=42)\n", 164 | "train_dataset.shape, test_dataset.shape" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "# Train" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "ExecuteTime": { 179 | "end_time": "2019-06-16T21:21:10.932148Z", 180 | "start_time": "2019-06-16T21:21:10.927844Z" 181 | } 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# Get X and Y\n", 186 | "\n", 187 | "y_train = train_dataset.loc[:, 'target'].values.astype('int32')\n", 188 | "X_train = train_dataset.drop('target', axis=1).values.astype('float32')" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "ExecuteTime": { 196 | "end_time": "2019-06-16T21:21:55.427365Z", 197 | "start_time": "2019-06-16T21:21:55.416431Z" 198 | } 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "# Create an instance of Logistic Regression Classifier CV and fit the data\n", 203 | "\n", 204 | "logreg = LogisticRegression(C=0.001, solver='lbfgs', multi_class='multinomial', max_iter=100)\n", 205 | "logreg.fit(X_train, y_train)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "# Evaluate" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "ExecuteTime": { 220 | "end_time": "2019-06-16T21:21:55.875303Z", 221 | "start_time": "2019-06-16T21:21:55.864724Z" 222 | } 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "def plot_confusion_matrix(cm,\n", 227 | " target_names,\n", 228 | " title='Confusion matrix',\n", 229 | " cmap=None,\n", 230 | " normalize=True):\n", 231 | " \"\"\"\n", 232 | " given a sklearn confusion matrix (cm), make a nice plot\n", 233 | "\n", 234 | " Arguments\n", 235 | " ---------\n", 236 | " cm: confusion matrix from sklearn.metrics.confusion_matrix\n", 237 | "\n", 238 | " target_names: given classification classes such as [0, 1, 2]\n", 239 | " the class names, for example: ['high', 'medium', 'low']\n", 240 | "\n", 241 | " title: the text to display at the top of the matrix\n", 242 | "\n", 243 | " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n", 244 | " see http://matplotlib.org/examples/color/colormaps_reference.html\n", 245 | " plt.get_cmap('jet') or plt.cm.Blues\n", 246 | "\n", 247 | " normalize: If False, plot the raw numbers\n", 248 | " If True, plot the proportions\n", 249 | "\n", 250 | " Usage\n", 251 | " -----\n", 252 | " plot_confusion_matrix(cm = cm, # confusion matrix created by\n", 253 | " # sklearn.metrics.confusion_matrix\n", 254 | " normalize = True, # show proportions\n", 255 | " target_names = y_labels_vals, # list of names of the classes\n", 256 | " title = best_estimator_name) # title of graph\n", 257 | "\n", 258 | " Citiation\n", 259 | " ---------\n", 260 | " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", 261 | "\n", 262 | " \"\"\"\n", 263 | "\n", 264 | " accuracy = np.trace(cm) / float(np.sum(cm))\n", 265 | " misclass = 1 - accuracy\n", 266 | "\n", 267 | " if cmap is None:\n", 268 | " cmap = plt.get_cmap('Blues')\n", 269 | "\n", 270 | " plt.figure(figsize=(8, 6))\n", 271 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", 272 | " plt.title(title)\n", 273 | " plt.colorbar()\n", 274 | "\n", 275 | " if target_names is not None:\n", 276 | " tick_marks = np.arange(len(target_names))\n", 277 | " plt.xticks(tick_marks, target_names, rotation=45)\n", 278 | " plt.yticks(tick_marks, target_names)\n", 279 | "\n", 280 | " if normalize:\n", 281 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 282 | "\n", 283 | " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", 284 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", 285 | " if normalize:\n", 286 | " plt.text(j, i, \"{:0.4f}\".format(cm[i, j]),\n", 287 | " horizontalalignment=\"center\",\n", 288 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 289 | " else:\n", 290 | " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n", 291 | " horizontalalignment=\"center\",\n", 292 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 293 | "\n", 294 | " plt.tight_layout()\n", 295 | " plt.ylabel('True label')\n", 296 | " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", 297 | " plt.show()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "ExecuteTime": { 305 | "end_time": "2019-06-16T21:21:56.090756Z", 306 | "start_time": "2019-06-16T21:21:56.086966Z" 307 | } 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "# Get X and Y\n", 312 | "\n", 313 | "y_test = test_dataset.loc[:, 'target'].values.astype('int32')\n", 314 | "X_test = test_dataset.drop('target', axis=1).values.astype('float32')" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "ExecuteTime": { 322 | "end_time": "2019-06-16T21:21:56.270245Z", 323 | "start_time": "2019-06-16T21:21:56.265054Z" 324 | } 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "prediction = logreg.predict(X_test)\n", 329 | "cm = confusion_matrix(prediction, y_test)\n", 330 | "f1 = f1_score(y_true = y_test, y_pred = prediction, average='macro')" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "ExecuteTime": { 338 | "end_time": "2019-06-16T21:21:56.493617Z", 339 | "start_time": "2019-06-16T21:21:56.489929Z" 340 | } 341 | }, 342 | "outputs": [], 343 | "source": [ 344 | "# f1 score value\n", 345 | "f1" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": { 352 | "ExecuteTime": { 353 | "end_time": "2019-06-16T21:21:56.966279Z", 354 | "start_time": "2019-06-16T21:21:56.726149Z" 355 | } 356 | }, 357 | "outputs": [], 358 | "source": [ 359 | "plot_confusion_matrix(cm, data.target_names, normalize=False)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 3 (ipykernel)", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.9.2" 387 | }, 388 | "toc": { 389 | "base_numbering": 1, 390 | "nav_menu": {}, 391 | "number_sections": true, 392 | "sideBar": true, 393 | "skip_h1_title": false, 394 | "title_cell": "Table of Contents", 395 | "title_sidebar": "Contents", 396 | "toc_cell": false, 397 | "toc_position": {}, 398 | "toc_section_display": true, 399 | "toc_window_display": true 400 | }, 401 | "varInspector": { 402 | "cols": { 403 | "lenName": 16, 404 | "lenType": 16, 405 | "lenVar": 40 406 | }, 407 | "kernels_config": { 408 | "python": { 409 | "delete_cmd_postfix": "", 410 | "delete_cmd_prefix": "del ", 411 | "library": "var_list.py", 412 | "varRefreshCmd": "print(var_dic_list())" 413 | }, 414 | "r": { 415 | "delete_cmd_postfix": ") ", 416 | "delete_cmd_prefix": "rm(", 417 | "library": "var_list.r", 418 | "varRefreshCmd": "cat(var_dic_list()) " 419 | } 420 | }, 421 | "types_to_exclude": [ 422 | "module", 423 | "function", 424 | "builtin_function_or_method", 425 | "instance", 426 | "_Feature" 427 | ], 428 | "window_display": false 429 | } 430 | }, 431 | "nbformat": 4, 432 | "nbformat_minor": 4 433 | } 434 | -------------------------------------------------------------------------------- /lineapy-trial-prototype.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Requirement already satisfied: lineapy in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (0.2.3)\n", 13 | "Requirement already satisfied: jinja2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.11.2)\n", 14 | "Requirement already satisfied: pandas in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.3.2)\n", 15 | "Requirement already satisfied: pydantic in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.8.2)\n", 16 | "Requirement already satisfied: networkx in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.5)\n", 17 | "Requirement already satisfied: SQLAlchemy<2.0.0,>=1.4 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.4.47)\n", 18 | "Requirement already satisfied: requests in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.25.1)\n", 19 | "Requirement already satisfied: alembic==1.8.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.8.0)\n", 20 | "Requirement already satisfied: IPython>=7.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (7.19.0)\n", 21 | "Requirement already satisfied: isort in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.9.3)\n", 22 | "Requirement already satisfied: rich in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (12.4.4)\n", 23 | "Requirement already satisfied: click>=8.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (8.1.3)\n", 24 | "Requirement already satisfied: pyyaml in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.3.1)\n", 25 | "Requirement already satisfied: fsspec in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2022.7.1)\n", 26 | "Requirement already satisfied: nbconvert<7.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (6.0.7)\n", 27 | "Requirement already satisfied: nbformat in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.0.8)\n", 28 | "Requirement already satisfied: cloudpickle in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.6.0)\n", 29 | "Requirement already satisfied: asttokens in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.2.1)\n", 30 | "Requirement already satisfied: black in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (21.7b0)\n", 31 | "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (4.3.0)\n", 32 | "Requirement already satisfied: importlib-metadata in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (2.0.0)\n", 33 | "Requirement already satisfied: Mako in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (1.2.4)\n", 34 | "Requirement already satisfied: importlib-resources in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (5.7.1)\n", 35 | "Requirement already satisfied: appnope in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.1.0)\n", 36 | "Requirement already satisfied: jedi>=0.10 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.17.1)\n", 37 | "Requirement already satisfied: pygments in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (2.7.2)\n", 38 | "Requirement already satisfied: pickleshare in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.7.5)\n", 39 | "Requirement already satisfied: traitlets>=4.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (5.0.5)\n", 40 | "Requirement already satisfied: pexpect>4.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (4.8.0)\n", 41 | "Requirement already satisfied: decorator in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (4.4.2)\n", 42 | "Requirement already satisfied: backcall in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.2.0)\n", 43 | "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (3.0.8)\n", 44 | "Requirement already satisfied: setuptools>=18.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (50.3.1.post20201107)\n", 45 | "Requirement already satisfied: bleach in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (3.2.1)\n", 46 | "Requirement already satisfied: testpath in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.4.4)\n", 47 | "Requirement already satisfied: jupyter-core in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (4.6.3)\n", 48 | "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (1.4.3)\n", 49 | "Requirement already satisfied: defusedxml in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.6.0)\n", 50 | "Requirement already satisfied: mistune<2,>=0.8.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.8.4)\n", 51 | "Requirement already satisfied: jupyterlab-pygments in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.1.2)\n", 52 | "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.5.1)\n", 53 | "Requirement already satisfied: entrypoints>=0.2.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.3)\n", 54 | "Requirement already satisfied: MarkupSafe>=0.23 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jinja2->lineapy) (1.1.1)\n", 55 | "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbformat->lineapy) (3.2.0)\n", 56 | "Requirement already satisfied: ipython-genutils in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbformat->lineapy) (0.2.0)\n", 57 | "Requirement already satisfied: greenlet!=0.4.17 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from SQLAlchemy<2.0.0,>=1.4->lineapy) (2.0.2)\n", 58 | "Requirement already satisfied: six in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from asttokens->lineapy) (1.15.0)\n", 59 | "Requirement already satisfied: tomli<2.0.0,>=0.2.6 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (1.2.3)\n", 60 | "Requirement already satisfied: regex>=2020.1.8 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (2020.10.15)\n", 61 | "Requirement already satisfied: mypy-extensions>=0.4.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (0.4.3)\n", 62 | "Requirement already satisfied: appdirs in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (1.4.4)\n", 63 | "Requirement already satisfied: pathspec<1,>=0.8.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (0.9.0)\n", 64 | "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (2.8.1)\n", 65 | "Requirement already satisfied: pytz>=2017.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (2022.1)\n", 66 | "Requirement already satisfied: numpy>=1.17.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (1.18.5)\n", 67 | "Requirement already satisfied: idna<3,>=2.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (2.10)\n", 68 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (2020.6.20)\n", 69 | "Requirement already satisfied: chardet<5,>=3.0.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (3.0.4)\n", 70 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (1.25.11)\n", 71 | "Requirement already satisfied: commonmark<0.10.0,>=0.9.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from rich->lineapy) (0.9.1)\n", 72 | "Requirement already satisfied: parso<0.8.0,>=0.7.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jedi>=0.10->IPython>=7.0.0->lineapy) (0.7.0)\n" 73 | ] 74 | }, 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "Requirement already satisfied: attrs>=17.4.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->lineapy) (20.3.0)\n", 80 | "Requirement already satisfied: pyrsistent>=0.14.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->lineapy) (0.17.3)\n", 81 | "Requirement already satisfied: jupyter-client>=6.1.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (6.1.7)\n", 82 | "Requirement already satisfied: async-generator in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (1.10)\n", 83 | "Requirement already satisfied: nest-asyncio in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (1.5.1)\n", 84 | "Requirement already satisfied: ptyprocess>=0.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pexpect>4.3->IPython>=7.0.0->lineapy) (0.6.0)\n", 85 | "Requirement already satisfied: wcwidth in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->IPython>=7.0.0->lineapy) (0.2.5)\n", 86 | "Requirement already satisfied: webencodings in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert<7.0.0->lineapy) (0.5.1)\n", 87 | "Requirement already satisfied: packaging in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert<7.0.0->lineapy) (20.4)\n", 88 | "Requirement already satisfied: zipp>=0.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from importlib-metadata->alembic==1.8.0->lineapy) (3.4.0)\n", 89 | "Requirement already satisfied: tornado>=4.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (6.1)\n", 90 | "Requirement already satisfied: pyzmq>=13 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (19.0.2)\n", 91 | "Requirement already satisfied: pyparsing>=2.0.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from packaging->bleach->nbconvert<7.0.0->lineapy) (2.4.7)\n", 92 | "\n", 93 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", 94 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "! pip install lineapy" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 2, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "Requirement already satisfied: pandas==1.3.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (1.3.2)\n", 112 | "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (2.8.1)\n", 113 | "Requirement already satisfied: numpy>=1.17.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (1.18.5)\n", 114 | "Requirement already satisfied: pytz>=2017.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (2022.1)\n", 115 | "Requirement already satisfied: six>=1.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas==1.3.2) (1.15.0)\n", 116 | "\n", 117 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", 118 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" 119 | ] 120 | } 121 | ], 122 | "source": [ 123 | "! python -m pip install pandas==1.3.2" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 3, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "%load_ext lineapy" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 4, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "lineapy_config(home_dir=PosixPath('/Users/jenif/.lineapy'), database_url='sqlite:////Users/jenif/.lineapy/db.sqlite', artifact_storage_dir=PosixPath('/Users/jenif/.lineapy/linea_pickles'), customized_annotation_folder=PosixPath('/Users/jenif/.lineapy/custom-annotations'), do_not_track=False, logging_level='INFO', logging_file=PosixPath('/Users/jenif/.lineapy/lineapy.log'), storage_options=None, mlflow_registry_uri=None, mlflow_tracking_uri=None, default_ml_models_storage_backend=None)" 144 | ] 145 | }, 146 | "execution_count": 4, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "lineapy.options" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 5, 158 | "metadata": { 159 | "ExecuteTime": { 160 | "end_time": "2019-06-16T21:17:31.460557Z", 161 | "start_time": "2019-06-16T21:17:29.395297Z" 162 | } 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "import lineapy\n", 167 | "import joblib\n", 168 | "import json\n", 169 | "import itertools\n", 170 | "import matplotlib.pyplot as plt\n", 171 | "import numpy as np\n", 172 | "import pandas as pd\n", 173 | "from sklearn.metrics import confusion_matrix, f1_score\n", 174 | "from sklearn.linear_model import LogisticRegression\n", 175 | "from sklearn.model_selection import train_test_split\n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "# Load dataset" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 6, 188 | "metadata": { 189 | "ExecuteTime": { 190 | "end_time": "2019-06-16T21:17:31.485189Z", 191 | "start_time": "2019-06-16T21:17:31.473720Z" 192 | } 193 | }, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/html": [ 198 | "
\n", 199 | "\n", 212 | "\n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", 266 | "
" 267 | ], 268 | "text/plain": [ 269 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", 270 | "0 5.1 3.5 1.4 0.2 \n", 271 | "1 4.9 3.0 1.4 0.2 \n", 272 | "2 4.7 3.2 1.3 0.2 \n", 273 | "3 4.6 3.1 1.5 0.2 \n", 274 | "4 5.0 3.6 1.4 0.2 \n", 275 | "\n", 276 | " target \n", 277 | "0 0 \n", 278 | "1 0 \n", 279 | "2 0 \n", 280 | "3 0 \n", 281 | "4 0 " 282 | ] 283 | }, 284 | "execution_count": 6, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "# Get data \n", 291 | "\n", 292 | "import pandas as pd\n", 293 | "from sklearn.datasets import load_iris\n", 294 | "\n", 295 | "data = load_iris(as_frame=True)\n", 296 | "dataset = data.frame\n", 297 | "dataset.head()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 7, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "0: setosa\n", 310 | "1: versicolor\n", 311 | "2: virginica\n" 312 | ] 313 | }, 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "[None, None, None]" 318 | ] 319 | }, 320 | "execution_count": 7, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "# print labels for target values \n", 327 | "\n", 328 | "[print(f'{target}: {label}') for target, label in zip(data.target.unique(), data.target_names)]" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 8, 334 | "metadata": { 335 | "ExecuteTime": { 336 | "end_time": "2019-06-16T21:17:32.328046Z", 337 | "start_time": "2019-06-16T21:17:32.323611Z" 338 | } 339 | }, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "['sepal_length', 'sepal_width', 'petal_length', 'petal_width']" 345 | ] 346 | }, 347 | "execution_count": 8, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "# feature names\n", 354 | "\n", 355 | "dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]\n", 356 | "\n", 357 | "feature_names = dataset.columns.tolist()[:4]\n", 358 | "feature_names" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 9, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "#save raw data as artifact\n", 368 | "dataset_csv = './data/raw/iris.csv'\n", 369 | "dataset.to_csv(dataset_csv, index=False)\n" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 10, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/html": [ 380 | "
\n", 381 | "\n", 394 | "\n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthtarget
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
\n", 496 | "

150 rows × 5 columns

\n", 497 | "
" 498 | ], 499 | "text/plain": [ 500 | " sepal_length sepal_width petal_length petal_width target\n", 501 | "0 5.1 3.5 1.4 0.2 0\n", 502 | "1 4.9 3.0 1.4 0.2 0\n", 503 | "2 4.7 3.2 1.3 0.2 0\n", 504 | "3 4.6 3.1 1.5 0.2 0\n", 505 | "4 5.0 3.6 1.4 0.2 0\n", 506 | ".. ... ... ... ... ...\n", 507 | "145 6.7 3.0 5.2 2.3 2\n", 508 | "146 6.3 2.5 5.0 1.9 2\n", 509 | "147 6.5 3.0 5.2 2.0 2\n", 510 | "148 6.2 3.4 5.4 2.3 2\n", 511 | "149 5.9 3.0 5.1 1.8 2\n", 512 | "\n", 513 | "[150 rows x 5 columns]" 514 | ] 515 | }, 516 | "execution_count": 10, 517 | "metadata": {}, 518 | "output_type": "execute_result" 519 | } 520 | ], 521 | "source": [ 522 | "dataset" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 11, 528 | "metadata": {}, 529 | "outputs": [ 530 | { 531 | "name": "stdout", 532 | "output_type": "stream", 533 | "text": [ 534 | "1.3.2\n" 535 | ] 536 | } 537 | ], 538 | "source": [ 539 | "print(pd.__version__)" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 12, 545 | "metadata": {}, 546 | "outputs": [ 547 | { 548 | "data": { 549 | "text/plain": [ 550 | "LineaArtifact(name='iris-raw', _version=4)" 551 | ] 552 | }, 553 | "execution_count": 12, 554 | "metadata": {}, 555 | "output_type": "execute_result" 556 | } 557 | ], 558 | "source": [ 559 | "#save raw data as artifact to lineapy\n", 560 | "lineapy.save(dataset, \"iris-raw\")" 561 | ] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": {}, 566 | "source": [ 567 | "# Features engineering" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 13, 573 | "metadata": { 574 | "ExecuteTime": { 575 | "end_time": "2019-06-16T21:21:02.150708Z", 576 | "start_time": "2019-06-16T21:21:02.144518Z" 577 | } 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width']\n", 582 | "dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width']\n", 583 | "\n", 584 | "dataset = dataset[[\n", 585 | " 'sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", 586 | "# 'sepal_length_in_square', 'sepal_width_in_square', 'petal_length_in_square', 'petal_width_in_square',\n", 587 | " 'sepal_length_to_sepal_width', 'petal_length_to_petal_width',\n", 588 | " 'target'\n", 589 | "]]" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 14, 595 | "metadata": { 596 | "ExecuteTime": { 597 | "end_time": "2019-06-16T21:21:02.987144Z", 598 | "start_time": "2019-06-16T21:21:02.976092Z" 599 | } 600 | }, 601 | "outputs": [ 602 | { 603 | "data": { 604 | "text/html": [ 605 | "
\n", 606 | "\n", 619 | "\n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthsepal_length_to_sepal_widthpetal_length_to_petal_widthtarget
05.13.51.40.21.4571437.00
14.93.01.40.21.6333337.00
24.73.21.30.21.4687506.50
34.63.11.50.21.4838717.50
45.03.61.40.21.3888897.00
\n", 685 | "
" 686 | ], 687 | "text/plain": [ 688 | " sepal_length sepal_width petal_length petal_width \\\n", 689 | "0 5.1 3.5 1.4 0.2 \n", 690 | "1 4.9 3.0 1.4 0.2 \n", 691 | "2 4.7 3.2 1.3 0.2 \n", 692 | "3 4.6 3.1 1.5 0.2 \n", 693 | "4 5.0 3.6 1.4 0.2 \n", 694 | "\n", 695 | " sepal_length_to_sepal_width petal_length_to_petal_width target \n", 696 | "0 1.457143 7.0 0 \n", 697 | "1 1.633333 7.0 0 \n", 698 | "2 1.468750 6.5 0 \n", 699 | "3 1.483871 7.5 0 \n", 700 | "4 1.388889 7.0 0 " 701 | ] 702 | }, 703 | "execution_count": 14, 704 | "metadata": {}, 705 | "output_type": "execute_result" 706 | } 707 | ], 708 | "source": [ 709 | "dataset.head()" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 15, 715 | "metadata": {}, 716 | "outputs": [], 717 | "source": [ 718 | "# Save features\n", 719 | "features_path = './data/processed/featured_iris.csv'\n", 720 | "dataset.to_csv(features_path, index=False)" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 16, 726 | "metadata": {}, 727 | "outputs": [ 728 | { 729 | "data": { 730 | "text/plain": [ 731 | "LineaArtifact(name='iris-preprocessed', _version=4)" 732 | ] 733 | }, 734 | "execution_count": 16, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "#save features to lineapy\n", 741 | "lineapy.save(dataset, \"iris-preprocessed\")" 742 | ] 743 | }, 744 | { 745 | "cell_type": "markdown", 746 | "metadata": {}, 747 | "source": [ 748 | "# Split dataset" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": 17, 754 | "metadata": { 755 | "ExecuteTime": { 756 | "end_time": "2019-06-16T21:21:06.361378Z", 757 | "start_time": "2019-06-16T21:21:06.358647Z" 758 | } 759 | }, 760 | "outputs": [], 761 | "source": [ 762 | "test_size=0.2" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": {}, 768 | "source": [ 769 | "## Splittail train/test" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": 18, 775 | "metadata": { 776 | "ExecuteTime": { 777 | "end_time": "2019-06-16T21:21:07.438133Z", 778 | "start_time": "2019-06-16T21:21:07.431649Z" 779 | } 780 | }, 781 | "outputs": [ 782 | { 783 | "data": { 784 | "text/plain": [ 785 | "((120, 7), (30, 7))" 786 | ] 787 | }, 788 | "execution_count": 18, 789 | "metadata": {}, 790 | "output_type": "execute_result" 791 | } 792 | ], 793 | "source": [ 794 | "train_dataset, test_dataset = train_test_split(dataset, test_size=test_size, random_state=42)\n", 795 | "train_dataset.shape, test_dataset.shape" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 19, 801 | "metadata": {}, 802 | "outputs": [], 803 | "source": [ 804 | "# Save train and test sets\n", 805 | "trainset_path = './data/processed/train_iris.csv'\n", 806 | "testset_path = './data/processed/test_iris.csv'\n", 807 | "\n", 808 | "train_dataset.to_csv(trainset_path)\n", 809 | "test_dataset.to_csv(testset_path)" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": 20, 815 | "metadata": {}, 816 | "outputs": [ 817 | { 818 | "data": { 819 | "text/plain": [ 820 | "LineaArtifact(name='test-dataset', _version=4)" 821 | ] 822 | }, 823 | "execution_count": 20, 824 | "metadata": {}, 825 | "output_type": "execute_result" 826 | } 827 | ], 828 | "source": [ 829 | "#save train and test sets to lineapy\n", 830 | "lineapy.save(train_dataset, \"train-dataset\")\n", 831 | "lineapy.save(test_dataset, \"test-dataset\")" 832 | ] 833 | }, 834 | { 835 | "cell_type": "markdown", 836 | "metadata": {}, 837 | "source": [ 838 | "# Train" 839 | ] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": 21, 844 | "metadata": { 845 | "ExecuteTime": { 846 | "end_time": "2019-06-16T21:21:10.932148Z", 847 | "start_time": "2019-06-16T21:21:10.927844Z" 848 | } 849 | }, 850 | "outputs": [], 851 | "source": [ 852 | "# Get X and Y\n", 853 | "\n", 854 | "y_train = train_dataset.loc[:, 'target'].values.astype('int32')\n", 855 | "X_train = train_dataset.drop('target', axis=1).values.astype('float32')" 856 | ] 857 | }, 858 | { 859 | "cell_type": "code", 860 | "execution_count": 22, 861 | "metadata": { 862 | "ExecuteTime": { 863 | "end_time": "2019-06-16T21:21:55.427365Z", 864 | "start_time": "2019-06-16T21:21:55.416431Z" 865 | } 866 | }, 867 | "outputs": [ 868 | { 869 | "data": { 870 | "text/plain": [ 871 | "LogisticRegression(C=0.001, multi_class='multinomial')" 872 | ] 873 | }, 874 | "execution_count": 22, 875 | "metadata": {}, 876 | "output_type": "execute_result" 877 | } 878 | ], 879 | "source": [ 880 | "# Create an instance of Logistic Regression Classifier CV and fit the data\n", 881 | "\n", 882 | "logreg = LogisticRegression(C=0.001, solver='lbfgs', multi_class='multinomial', max_iter=100)\n", 883 | "logreg.fit(X_train, y_train)" 884 | ] 885 | }, 886 | { 887 | "cell_type": "code", 888 | "execution_count": 23, 889 | "metadata": {}, 890 | "outputs": [ 891 | { 892 | "data": { 893 | "text/plain": [ 894 | "['./models/model.joblib']" 895 | ] 896 | }, 897 | "execution_count": 23, 898 | "metadata": {}, 899 | "output_type": "execute_result" 900 | } 901 | ], 902 | "source": [ 903 | "model_path= './models/model.joblib'\n", 904 | "joblib.dump(logreg, model_path)" 905 | ] 906 | }, 907 | { 908 | "cell_type": "code", 909 | "execution_count": 24, 910 | "metadata": {}, 911 | "outputs": [ 912 | { 913 | "data": { 914 | "text/plain": [ 915 | "LineaArtifact(name='logreg-model', _version=3)" 916 | ] 917 | }, 918 | "execution_count": 24, 919 | "metadata": {}, 920 | "output_type": "execute_result" 921 | } 922 | ], 923 | "source": [ 924 | "#save model to lineapy\n", 925 | "lineapy.save(model_path, \"logreg-model\")" 926 | ] 927 | }, 928 | { 929 | "cell_type": "markdown", 930 | "metadata": {}, 931 | "source": [ 932 | "# Evaluate" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 32, 938 | "metadata": { 939 | "ExecuteTime": { 940 | "end_time": "2019-06-16T21:21:55.875303Z", 941 | "start_time": "2019-06-16T21:21:55.864724Z" 942 | } 943 | }, 944 | "outputs": [], 945 | "source": [ 946 | "def plot_confusion_matrix(cm,\n", 947 | " target_names,\n", 948 | " title='Confusion matrix',\n", 949 | " cmap=None,\n", 950 | " normalize=True):\n", 951 | " \"\"\"\n", 952 | " given a sklearn confusion matrix (cm), make a nice plot\n", 953 | "\n", 954 | " Arguments\n", 955 | " ---------\n", 956 | " cm: confusion matrix from sklearn.metrics.confusion_matrix\n", 957 | "\n", 958 | " target_names: given classification classes such as [0, 1, 2]\n", 959 | " the class names, for example: ['high', 'medium', 'low']\n", 960 | "\n", 961 | " title: the text to display at the top of the matrix\n", 962 | "\n", 963 | " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n", 964 | " see http://matplotlib.org/examples/color/colormaps_reference.html\n", 965 | " plt.get_cmap('jet') or plt.cm.Blues\n", 966 | "\n", 967 | " normalize: If False, plot the raw numbers\n", 968 | " If True, plot the proportions\n", 969 | "\n", 970 | " Usage\n", 971 | " -----\n", 972 | " plot_confusion_matrix(cm = cm, # confusion matrix created by\n", 973 | " # sklearn.metrics.confusion_matrix\n", 974 | " normalize = True, # show proportions\n", 975 | " target_names = y_labels_vals, # list of names of the classes\n", 976 | " title = best_estimator_name) # title of graph\n", 977 | "\n", 978 | " Citiation\n", 979 | " ---------\n", 980 | " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", 981 | "\n", 982 | " \"\"\"\n", 983 | "\n", 984 | " accuracy = np.trace(cm) / float(np.sum(cm))\n", 985 | " misclass = 1 - accuracy\n", 986 | "\n", 987 | " if cmap is None:\n", 988 | " cmap = plt.get_cmap('Blues')\n", 989 | "\n", 990 | " plt.figure(figsize=(8, 6))\n", 991 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", 992 | " plt.title(title)\n", 993 | " plt.colorbar()\n", 994 | "\n", 995 | " if target_names is not None:\n", 996 | " tick_marks = np.arange(len(target_names))\n", 997 | " plt.xticks(tick_marks, target_names, rotation=45)\n", 998 | " plt.yticks(tick_marks, target_names)\n", 999 | "\n", 1000 | " if normalize:\n", 1001 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 1002 | "\n", 1003 | " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", 1004 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", 1005 | " if normalize:\n", 1006 | " plt.text(j, i, \"{:0.4f}\".format(cm[i, j]),\n", 1007 | " horizontalalignment=\"center\",\n", 1008 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 1009 | " else:\n", 1010 | " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n", 1011 | " horizontalalignment=\"center\",\n", 1012 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 1013 | "\n", 1014 | " plt.tight_layout()\n", 1015 | " plt.ylabel('True label')\n", 1016 | " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", 1017 | " \n", 1018 | " return plt.gcf()" 1019 | ] 1020 | }, 1021 | { 1022 | "cell_type": "code", 1023 | "execution_count": 33, 1024 | "metadata": { 1025 | "ExecuteTime": { 1026 | "end_time": "2019-06-16T21:21:56.090756Z", 1027 | "start_time": "2019-06-16T21:21:56.086966Z" 1028 | } 1029 | }, 1030 | "outputs": [], 1031 | "source": [ 1032 | "# Get X and Y\n", 1033 | "\n", 1034 | "y_test = test_dataset.loc[:, 'target'].values.astype('int32')\n", 1035 | "X_test = test_dataset.drop('target', axis=1).values.astype('float32')" 1036 | ] 1037 | }, 1038 | { 1039 | "cell_type": "code", 1040 | "execution_count": 34, 1041 | "metadata": { 1042 | "ExecuteTime": { 1043 | "end_time": "2019-06-16T21:21:56.270245Z", 1044 | "start_time": "2019-06-16T21:21:56.265054Z" 1045 | } 1046 | }, 1047 | "outputs": [], 1048 | "source": [ 1049 | "prediction = logreg.predict(X_test)\n", 1050 | "cm = confusion_matrix(prediction, y_test)\n", 1051 | "f1 = f1_score(y_true = y_test, y_pred = prediction, average='macro')" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": 35, 1057 | "metadata": { 1058 | "ExecuteTime": { 1059 | "end_time": "2019-06-16T21:21:56.493617Z", 1060 | "start_time": "2019-06-16T21:21:56.489929Z" 1061 | } 1062 | }, 1063 | "outputs": [ 1064 | { 1065 | "data": { 1066 | "text/plain": [ 1067 | "0.9305555555555555" 1068 | ] 1069 | }, 1070 | "execution_count": 35, 1071 | "metadata": {}, 1072 | "output_type": "execute_result" 1073 | } 1074 | ], 1075 | "source": [ 1076 | "# f1 score value\n", 1077 | "f1" 1078 | ] 1079 | }, 1080 | { 1081 | "cell_type": "code", 1082 | "execution_count": 36, 1083 | "metadata": {}, 1084 | "outputs": [], 1085 | "source": [ 1086 | "# Save metrics\n", 1087 | "metrics_file = './reports/metrics.json'\n", 1088 | "\n", 1089 | "metrics = {\n", 1090 | " 'f1': f1\n", 1091 | "}\n", 1092 | "\n", 1093 | "with open(metrics_file, 'w') as mf:\n", 1094 | " json.dump(\n", 1095 | " obj=metrics,\n", 1096 | " fp=mf,\n", 1097 | " indent=4\n", 1098 | " )\n" 1099 | ] 1100 | }, 1101 | { 1102 | "cell_type": "code", 1103 | "execution_count": 37, 1104 | "metadata": {}, 1105 | "outputs": [ 1106 | { 1107 | "data": { 1108 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe0AAAHCCAYAAADCTpEYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA26klEQVR4nO3dd7xcVdXG8d+ThEDoJaEkAem9SkCaiBTpTYGggBSRLkrxVZSXqoiKCAKKCNJfQpcqoCgqCJIQQiDUKCBJaIFICaEkrPePvS9Obm7L3DJ35zxfPvNh5pwz56yZyZ01a+999lFEYGZmZr1fn0YHYGZmZh3jpG1mZlYIJ20zM7NCOGmbmZkVwknbzMysEE7aZmZmhXDSNmsASQMk3SbpLUnXd2I/+0i6pytjaxRJn5X0TKPjMOvN5PO0zVon6SvAscCqwDvAGOCHEXF/J/e7H/ANYJOImN7ZOHs7SQGsFBHjGx2LWclcaZu1QtKxwDnAGcASwDLAL4Fdu2D3nwKerULC7ghJ/Rodg1kJnLTNWiBpIeA04MiIuCkipkbERxFxW0R8O28zt6RzJE3Kt3MkzZ3XbSFpgqTjJL0m6WVJB+Z1pwInAcMlvSvpa5JOkXRVzfGXlRRNyUzSAZL+JekdSc9L2qdm+f01z9tE0sjc7D5S0iY16+6TdLqkB/J+7pE0sJXX3xT//9TEv5ukHSQ9K+lNSd+r2X5DSQ9K+k/e9nxJ/fO6v+bNHsuvd3jN/r8j6RXg0qZl+Tkr5GN8Oj8eLGmypC0687malc5J26xlGwPzADe3sc33gY2AdYF1gA2BE2vWLwksBAwBvgZcIGmRiDiZVL1fGxHzR8QlbQUiaT7gF8D2EbEAsAmpmb75dosCd+RtFwPOBu6QtFjNZl8BDgQWB/oDx7dx6CVJ78EQ0o+M3wD7AusDnwVOkrR83nYGcAwwkPTebQUcARARm+dt1smv99qa/S9KanU4pPbAEfFP4DvA1ZLmBS4FLouI+9qI12yO56Rt1rLFgMntNF/vA5wWEa9FxOvAqcB+Nes/yus/iog7gXeBVeqM52NgTUkDIuLliBjXwjY7As9FxJURMT0irgGeBnau2ebSiHg2IqYB15F+cLTmI1L//UfACFJCPjci3snHHwesDRARj0TEQ/m4LwC/Bj7Xgdd0ckR8kOOZSUT8BngO+AewFOlHklmlOWmbtewNYGA7fa2DgRdrHr+Yl32yj2ZJ/z1g/tkNJCKmAsOBw4CXJd0hadUOxNMU05Cax6/MRjxvRMSMfL8pqb5as35a0/MlrSzpdkmvSHqb1JLQYtN7jdcj4v12tvkNsCZwXkR80M62ZnM8J22zlj0IvA/s1sY2k0hNu02WycvqMRWYt+bxkrUrI+LuiNiGVHE+TUpm7cXTFNPEOmOaHb8ixbVSRCwIfA9QO89p89QVSfOTBgJeApySm//NKs1J26wFEfEWqR/3gjwAa15Jc0naXtJP8mbXACdKGpQHdJ0EXNXaPtsxBthc0jJ5ENwJTSskLSFpl9y3/QGpmX1GC/u4E1hZ0lck9ZM0HFgduL3OmGbHAsDbwLu5FeDwZutfBZaf5VltOxd4JCIOJvXVX9jpKM0K56Rt1oqIOJt0jvaJwOvAS8BRwO/yJj8ARgFjgceB0XlZPcf6A3Bt3tcjzJxo+wDHkSrpN0l9xUe0sI83gJ3ytm8A/wPsFBGT64lpNh1PGuT2DqkV4Npm608BLs+jy/dqb2eSdgW2I3UJQPocPt00at6sqjy5ipmZWSFcaZuZmRXCSdvMzKwQTtpmZmaFcNI2MzMrhJO2mZlZIXxlnTqo/3yhAZ7nYU6w3opLNDoEM2vB6NGPTI6IQd19nL4Lfipi+iyz6M62mPb63RGxXReE1CYn7TpowKLMvfGxjQ7DusADtx/T6BDMrAUD5lLzKXm7RUyfxtyrtDt1QLveH3NBe9P2dgknbTMzqzCByukpdtI2M7PqEqD2psnvPZy0zcys2gqqtMuJ1MzMrOJcaZuZWbW5edzMzKwEHohmZmZWjoIq7XJ+XpiZmVWcK20zM6su4eZxMzOzMqio5nEnbTMzq7aCKu1yIjUzM6s4V9pmZlZtbh43MzMrQVnnaZcTqZmZWcW50jYzs+ryVb7MzMwKUlDzuJO2mZlVmPu0zczMrBu40jYzs2rr4z5tMzOz3s9zj5uZmRWkoNHj5fy8MDMzqzhX2mZmVmFljR530jYzs2orqHncSdvMzKqtoEq7nEjNzMwKJem3kl6T9ETNskUl/UHSc/n/i7S3HydtMzOrLqlrbu27DNiu2bLvAvdGxErAvflxm5y0zcys2tSn87d2RMRfgTebLd4VuDzfvxzYrb39uE/bzMyqrWsGog2UNKrm8UURcVE7z1kiIl4GiIiXJS3e3kGctM3MzDpvckQM6+6DOGmbmVmFNfQ87VclLZWr7KWA19p7gvu0zcys2npmIFpLbgX2z/f3B25p7wmutM3MrLp66IIhkq4BtiD1fU8ATgbOBK6T9DXg38Ce7e3HSdvMzKybRcSXW1m11ezsx0nbzMwqzHOPm5mZlcNzj5uZmRWioEq7nEjNzMwqzpW2mZlVm5vHzczMCqCyBqKVE6mZmVnFudI2M7Nqc/O4mZlZGeSkbWZm1vuJspK2+7TNzMwK4UrbzMyqS/lWCCdtMzOrMLl53Mpy4THb8OKIQxl14X6fLFtk/rm5/Ywv8vglB3D7GV9k4fnnbmCEVq977r6LtddYhTVWXZGf/uTMRodjneTPs3tI6vStpzhpG1f+4Ul2PfHmmZYdP3xD7hvzEmt97TLuG/MSx++1QYOis3rNmDGDbx19JLfc9nseHfsk14+4hqeefLLRYVmd/HkaOGkb8MATE3nznfdnWrbTxstz1R/TF8JVf3ySnTdZoRGhWSeMfPhhVlhhRZZbfnn69+/PnsP35vbbbml0WFYnf57dx5W2FW/xhefllTenAvDKm1MZtNC8DY7IZtekSRMZOnTpTx4PGTKUiRMnNjAi6wx/nt3HSbuHSTpA0uBGx2HWm0TELMtKGnBjM/Pn2U3URbceMkckbeAAwEm7C732n/dYctH5AFhy0fl4/a33GhyRza4hQ4YyYcJLnzyeOHECgwf7z6RU/jwNenHSljSfpDskPSbpCUnDJa0v6S+SHpF0t6SlJO0BDAOuljRG0gBJW0l6VNLjkn4rae68zzMlPSlprKSz8rKdJf0jb/9HSUs08nX3Fnc89C/23Xp1APbdenVuf/BfDY7IZtewDTZg/PjneOH55/nwww+5/toR7LjTLo0Oy+rkz7N7iM43jfdki0dvPk97O2BSROwIIGkh4PfArhHxuqThwA8j4iBJRwHHR8QoSfMAlwFbRcSzkq4ADs//3x1YNSJC0sL5OPcDG+VlBwP/AxzXPBhJhwCHADDPIt33qhvg8u9uz2fXXpqBC87D+CsP5vSrHuSsa0dy1fd2ZP9t1+Cl195hnx/e3ugwbTb169ePn597PjvvuC0zZsxg/wMOYvU11mh0WFYnf57dp6RuBrXUT9IbSFoZuBu4DrgdmAL8HWgq+foCL0fEFyTdx3+T9jrAeRGxed7PVsCRwF7AI8Ao4A7g9oj4UNJawM+ApYD+wPMRsV1bsfVZaOmYe+Nju/T1WmNMuf2YRodgZi0YMJceiYhh3X2cfostHwvu8INO72fKVfv0SLy9tnk8Ip4F1gceB34EfAkYFxHr5ttaEfGFFp7a4k+miJgObAjcCOwG3JVXnQecHxFrAYcC83TpCzEzM+sivbZ5PI8GfzMirpL0LqlpepCkjSPiQUlzAStHxDjgHWCB/NSngWUlrRgR44H9gL9Imh+YNyLulPQQMD5vvxDQdN7E/j308szMrJcoqXm81yZtYC3gp5I+Bj4CDgemA7/I/dv9gHOAcaQ+7AslTQM2Bg4ErpfUDxgJXAgsCtyS+7wFNLWLnpK3nQg8BCzXEy/OzMx6AV8wpGtExN2kPu3mNm9h2xtJzd5N7gXWa7bZy6Tm8ebPvQXwtEJmZhVVUqXda/u0zczMbGa9ttI2MzPrbirs0pxO2mZmVmlO2mZmZqUoJ2e7T9vMzKwUrrTNzKy65OZxMzOzYpSUtN08bmZmVghX2mZmVmklVdpO2mZmVlk+T9vMzKwk5eRs92mbmZmVwpW2mZlVl0/5MjMzK4eTtpmZWSFKStru0zYzMyuEK20zM6u2cgptJ20zM6u2kprHnbTNzKyypLImV3GftpmZWSFcaZuZWaWVVGk7aZuZWaU5aZuZmZWinJztPm0zM7NSuNI2M7NKc/O4mZlZCXzBEDMzszIIKChnu0/bzMysFE7aZmZWYfpkVrTO3No9inSMpHGSnpB0jaR56onWSdvMzCpN6vyt7f1rCHA0MCwi1gT6AnvXE6uTtpmZWffrBwyQ1A+YF5hU707MzMwqq4tGjw+UNKrm8UURcRFAREyUdBbwb2AacE9E3FPPQZy0zcysujrQvN1BkyNiWIuHkBYBdgWWA/4DXC9p34i4anYP4uZxMzOrLAF9+qjTt3ZsDTwfEa9HxEfATcAm9cTrpG1mZta9/g1sJGlepbb4rYCn6tmRm8fNzKzSuntylYj4h6QbgNHAdOBR4KJ69uWkbWZmldYT05hGxMnAyZ3dj5O2mZlVV9cNROsR7tM2MzMrhCttMzOrrHTBkHJKbSdtMzOrsI7NHd5bOGmbmVmlFZSz3adtZmZWClfaZmZWaW4eNzMzK0Fhp3w5aZuZWWWVNnrcfdpmZmaFcKVtZmaVVlCh7aRtZmbVVlLzuJO2mZlVWkE5233aZmZmpXClbWZm1SU3j8/x1ltxCR64/ZhGh2FdYLvzH2h0CNaF7jpq00aHYIVJp3w1OoqOc9I2M7MKK+uCIe7TNjMzK4QrbTMzq7SCCm0nbTMzqzY3j5uZmVmXc6VtZmbV5at8mZmZlaG0q3w5aZuZWaWVlLTdp21mZlYIV9pmZlZpBRXaTtpmZlZtJTWPO2mbmVl1FTZ63H3aZmZmhXClbWZmlaXCLhjipG1mZpVWUM520jYzs2rrU1DWdp+2mZlZIVxpm5lZpRVUaDtpm5lZdUk+T9vMzKwYfcrJ2e7TNjMzK4UrbTMzqzQ3j5uZmRWioJztpG1mZtUl0qxopXCftpmZWSFcaZuZWaWVNHrcSdvMzKpLZV0wxM3jZmZmhXClbWZmlVZQoe2kbWZm1SXKusqXk7aZmVVaQTnbfdpmZmalcKVtZmaVVtLocSdtMzOrrHRpzkZH0XGtJm1J5wHR2vqIOLpbIjIzM+tBc8pAtFE9FoWZmZm1q9WkHRGX1z6WNF9ETO3+kMzMzHpOOXV2B0aPS9pY0pPAU/nxOpJ+2e2RmZmZ9QDlqUw7c+vAMRaWdIOkpyU9JWnjemLtyEC0c4BtgVsBIuIxSZvXczAzM7PeJE2u0iOHOhe4KyL2kNQfmLeenXRo9HhEvNTsl8SMeg5mZmZWNZIWBDYHDgCIiA+BD+vZV0cmV3lJ0iZASOov6XhyU7mZmVnRuqBpvAPN48sDrwOXSnpU0sWS5qsn3I4k7cOAI4EhwERg3fzYzMyseE3nanfmBgyUNKrmdkjNIfoBnwZ+FRHrAVOB79YTa7vN4xExGdinnp2bmZn1dl00I9rkiBjWyroJwISI+Ed+fAN1Ju2OjB5fXtJtkl6X9JqkWyQtX8/BzMzMqiYiXiF1Na+SF20FPFnPvjoyEO3/gAuA3fPjvYFrgM/Uc0AzM7PeogdHj38DuDqPHP8XcGA9O+lI0lZEXFnz+CpJR9VzMDMzs96mJy4YEhFjgNaazzusrbnHF813/yzpu8AI0lzkw4E7OntgMzOz3qCkGdHaqrQfISXpptdzaM26AE7vrqDMzMxsVm3NPb5cTwZiZmbW06SyrvLVkfO0kbSmpL0kfbXp1t2BWePcc/ddrL3GKqyx6or89CdnNjoc64SlFxnAxfus88ntjsM/wx7rLdXosKxO/tvsHl10nnaPaHcgmqSTgS2A1YE7ge2B+4ErujUya4gZM2bwraOP5I7f/4EhQ4ey2UYbsNNOu7Da6qs3OjSrw0tTpnHw1Y8BaYTsDQdvwN/Gv9ngqKwe/tvsPj0xEK2rdKTS3oN0TtkrEXEgsA4wd7dGZQ0z8uGHWWGFFVlu+eXp378/ew7fm9tvu6XRYVkX+PTSCzPxrfd59Z0PGh2K1cF/mwYdS9rTIuJjYHqe9Pw10jyqNgeaNGkiQ4cu/cnjIUOGMnHixAZGZF1ly1UG8qdnXm90GFYn/212n5KaxzuStEdJWhj4DWlE+Wjg4e4MqiWSTpO0dR3P20LS7d0R05woImZZVlLTkbWsXx+x6fKLct9zbzQ6FKuT/za7hxB91PlbT+nI3ONH5LsXSroLWDAixnZHMEr/ApUr++ZxnNQdx2whhn4RMb0njtUbDRkylAkTXvrk8cSJExg8eHADI7Ku8JllF+HZ195lynsfNToUq5P/Ng3aqLQlfbr5DVgU6Jfvt0rSjyUdUfP4FEnHSfq2pJGSxko6Na9bVtJTkn5JquKXlnSZpCckPS7pmLzdZZL2yPc3kPR3SY9JeljSApLmkXRpfs6jkj7fQlyLSvpdPv5Dktauie8iSfdQ8QF2wzbYgPHjn+OF55/nww8/5PprR7DjTrs0OizrpK1WGci9z0xudBjWCf7b7CZd0DTeW0aP/6yNdQFs2cb6EcA5wC/z472AM4HNgA1JE7bcKmlz4N/AKsCBEXGEpPWBIRGxJkBumv9Enrf1WmB4RIzM/ezTgG8CRMRaklYF7pG0crO4TgUejYjdJG1JStDr5nXrA5tFxLQ2Xtccr1+/fvz83PPZecdtmTFjBvsfcBCrr7FGo8OyTpi7Xx/WX2ZhfnbvPxsdinWC/za7T0ndDG1NrjJLpdpREfGopMUlDQYGAVOAtYEvAI/mzeYHViIl7Rcj4qG8/F/A8pLOI02Xek+z3a8CvBwRI/Ox3gaQtBlwXl72tKQXgeZJezPgS3mbP0laTNJCed2tbSXsfG3UQwCWXmaZDr8XJdpu+x3YbvsdGh2GdZEPpn/Mrr/u8WEo1g38t9k9OjRhSS/RnbHeQDpdbDip8hbwo4hYN99WjIhL8rZTm54UEVNIp5XdBxwJXNxsvyJV+s115KdSS9s07WtqC+v+u1HERRExLCKGDRo4qAOHMjMz61rdmbRHkC7juQcpgd8NHCRpfgBJQyQt3vxJkgYCfSLiRuB/geb9508DgyVtkLdfQFI/4K/APnnZysAywDPNnlu7zRaki5a/3elXamZmRRKpebyzt57SkUtz1iUixklaAJgYES8DL0taDXgwv8B3gX2BGc2eOgS4VFLTD4oTmu33Q0nDgfMkDSD1Z29N6j+/UNLjwHTggIj4oNmbeUre91jgPWD/LnvBZmZWpB66nnaX6Mg0piJVp8tHxGmSlgGWjIh2O8kiYq1mj88Fzm1h0zVrtnmMWatrIuKAmvsjgY1a2M8BzRdExH2kpnYi4k1g1xa2OaWl+M3MbM5XUtLuSPP4L4GNgS/nx+8AF3RbRGZmZtaijjSPfyYiPi3pUUgDxfJpV2ZmZkVL51mXU2p3JGl/JKkveZS1pEHALDOWmZmZlaik5vGOJO1fADcDi0v6IWk0+IndGpWZmVkPKajQ7tDc41dLeoR0eU4Bu0XEU90emZmZmc2kI6PHlyGdHnVb7bKI+Hd3BmZmZtbdBD16la7O6kjz+B2k/mwB8wDLkSYt8aS3ZmZWvJKmMe1I8/hM51rnK3wd2m0RmZmZ9aCCCu3Z/4EREaOBDbohFjMzM2tDR/q0j6152Ic0W9nr3RaRmZlZD5E0x/VpL1Bzfzqpj/vG7gnHzMysZxWUs9tO2nlSlfkj4ts9FI+ZmVmPKmlylVb7tCX1i4gZtHDxDjMzM+t5bVXaD5MS9hhJtwLXA1ObVkbETd0cm5mZWbeaE8/TXhR4A9iS/56vHYCTtpmZFa+gnN1m0l48jxx/gv8m6ybRrVGZmZn1BJXVp91W0u4LzM/MybqJk7aZmVkPaytpvxwRp/VYJGZmZg2gFmvT3qmtpF3OqzAzM6tDGojW6Cg6rq1pTLfqsSjMzMysXa1W2hHxZk8GYmZm1gglVdodOeXLzMxsjqWCzvly0jYzs8qak/q0zczMrBdxpW1mZtWlOWdGNDMzsznenDb3uJmZ2RzJfdpmZmbWLVxpm5lZpRXUOu6kbWZmVSb6FDRrt5O2mZlVliir0naftpmZWSFcaZuZWXWprNHjTtpmZlZpPk/bzMysAO7TNjMzs27hStvMzCrNzeNmZmaFKChnO2mbmVl1iZ7rJ5bUFxgFTIyInerZh/u0zczMesY3gac6swMnbTMzqy6BpE7f2j2MNBTYEbi4M+G6edzMzCqth7q0zwH+B1igMztxpW1mZtZ5AyWNqrkd0rRC0k7AaxHxSGcP4krbzMwqS3TZKV+TI2JYK+s2BXaRtAMwD7CgpKsiYt/ZPYgrbTMzqzR1wa0tEXFCRAyNiGWBvYE/1ZOwwZW2mZlVnM/TNjMzs1lExH3AffU+30nbzMwqrGOnbPUWTtpmZlZZPTkjWldw0jYzs0orqdIu6QeGmZlZpbnSNjOzSiunznbStoo7f491Gh2CdaFFNjiq0SFYaVRW87iTtpmZVVZpA9FKitXMzKzSXGmbmVmluXnczMysEOWkbCdtMzOruIIKbfdpm5mZlcKVtpmZVVYaPV5Oqe2kbWZmlVZS87iTtpmZVZhQQZW2+7TNzMwK4UrbzMwqzc3jZmZmBfBANDMzs1KorErbfdpmZmaFcKVtZmaVVlKl7aRtZmaV5lO+zMzMrMu50jYzs8oS0KecQttJ28zMqq2k5nEnbTMzq7SSBqK5T9vMzKwQrrTNzKzS3DxuZmZWAA9EMzMzK4YvzWlmZmbdwJW2mZlVV2EXDHHSNjOzSisoZztpm5lZdaWBaOWkbfdpm5mZFcKVtpmZVVo5dbaTtpmZVV1BWdtJ28zMKs3naZuZmVmXc6VtZmaVVtDgcSdtMzOrtoJytpO2mZlVXEFZ233aZmZmhXClbWZmlSXKGj3upG1mZtXlC4aYmZmVo6Cc7T5tMzOzUrjSNjOzaiuo1HbSNjOzClNRA9HcPG5mZlYIV9pmZlZpHj1uZmZWAFFUl7aTtpmZVVxBWdt92mZmZoVw0jYzs0pTF/zX5v6lpSX9WdJTksZJ+ma9sTpp2yzuufsu1l5jFdZYdUV++pMzGx2O1emVSRM4aK8d2OXz67PbVhtw1SW/bHRINpsuPHkfXrz3R4y6/nufLPvi1uvxyA3fZ+ojv+DTqy/TwOjmHFLnb+2YDhwXEasBGwFHSlq9nlidtG0mM2bM4FtHH8ktt/2eR8c+yfUjruGpJ59sdFhWh759+3H8/57BrX9+hKtv+RMjLr+Ifz77dKPDstlw5W0PseuRF8y0bNw/J7H3cb/h/tH/bFBUcx51wa0tEfFyRIzO998BngKG1BOrk7bNZOTDD7PCCiuy3PLL079/f/Ycvje333ZLo8OyOgxaYklWX2tdAOabfwGWW3EVXn1lUmODstnywOh/8uZb78207JnnX+W5F19rUETWWZKWBdYD/lHP8520bSaTJk1k6NClP3k8ZMhQJk6c2MCIrCtMfOlFnh43lrXXG9boUMx6l64os1OpPVDSqJrbIbMcSpofuBH4VkS8XU+4DU/akgZLuqGO590paeF2tjlN0tZ1B1dBETHLMpU084DN4r2p73LMofvynVPOZP4FFmx0OGa9ThcNRJscEcNqbhfNdAxpLlLCvjoibqo31oafpx0Rk4A9mi+X1C8iprfxvB06sO+TOhle5QwZMpQJE1765PHEiRMYPHhwAyOyzvjoo4845pB92XG3vdh6+10bHY5ZryO6f0Y0pcrnEuCpiDi7M/vq0Upb0o8lHVHz+BRJx0l6Ij8+QNL1km4D7pE0r6TrJI2VdK2kf0galrd9QdJAScvmYfS/yUPp75E0IG9zmaQ98v0NJP1d0mOSHpa0QH7u3ySNzrdNevL96I2GbbAB48c/xwvPP8+HH37I9deOYMeddml0WFaHiODkbx/J8iutwv6HfKPR4ZhV2abAfsCWksbkW7uFZ0t6utIeAZwDNJ17shdwGHBgzTYbA2tHxJuSjgemRMTaktYExrSy35WAL0fE1yVdB3wJuKpppaT+wLXA8IgYKWlBYBrwGrBNRLwvaSXgGqDSnX79+vXj5+eez847bsuMGTPY/4CDWH2NNRodltXh0ZEPctuN17DSqmuwx7bp9+jR3zmZzbfctsGRWUdd/qMD+Oz6KzFw4fkZf9fpnH7hnUx5aypnf2dPBi4yPzf94jDGPjORXZqNMLfZ090dgBFxf1cdpkeTdkQ8KmlxSYOBQcAU4N/NNvtDRLyZ728GnJuf+4Sksa3s+vmIGJPvPwIs22z9KsDLETEy7+ttAEnzAedLWheYAazcWux5UMEhAEsvM2efG7nd9juw3fZ1/Qi0XuTTG27C4y+90+gwrBP2P+GyFpff+ufWvgqtLgUN22lEn/YNpD7sJUmVd3NTa+539K38oOb+DGBAs/UCZh1hBccArwLrkLoK3m/tAHlQwUUA668/rKV9mZlZgXw97baNAPYmJe72Ro3fT2pCJ88es1adx3waGCxpg7yvBST1AxYiVeAfk/ob+ta5fzMzs27X40k7IsYBCwATI+Lldjb/JTAoN4t/BxgLvFXHMT8EhgPnSXoM+AMwT97//pIeIjWNT219L2ZmNifqgWlMu0xDTvmKiLVq7r8ArJnvXwZcVrPp+8C+eaDYCsC9wIt522XzNpObnp+Xn1Vz/4Ca+yNJc77Weg5Yu+bxCXW9IDMzK1Y5jeO94DztdswL/DmflC7g8Fw1m5mZdY2CsnavTtp5YvVKn4JlZmbWpFcnbTMzs+6Upg4vp9R20jYzs+rq4YFkndXwC4aYmZlZx7jSNjOzSiuo0HbSNjOziisoaztpm5lZhamogWju0zYzMyuEK20zM6u0kkaPO2mbmVlliaK6tJ20zcys4grK2u7TNjMzK4QrbTMzq7SSRo87aZuZWaV5IJqZmVkhCsrZ7tM2MzMrhSttMzOrrsKu8uWkbWZmFVdO1nbSNjOzyhJlVdru0zYzMyuEK20zM6u0ggptJ20zM6u2kprHnbTNzKzSSpoRzX3aZmZmhXClbWZm1VZOoe2kbWZm1VZQznbSNjOz6lJhM6K5T9vMzKwQrrTNzKzSSho97qRtZmbVVk7OdvO4mZlZKVxpm5lZpRVUaDtpm5lZtZU0etxJ28zMKkxFDURzn7aZmVkhXGmbmVllibKax11pm5mZFcKVtpmZVZorbTMzM+tyrrTNzKzSSho97qRtZmbVVdhVvpy0zcysskRZM6K5T9vMzKwQrrTNzKzaCiq1nbTNzKzSPBDNzMysECUNRHOftpmZWSFcaZuZWaUVVGi70jYzs4pTF9zaO4S0naRnJI2X9N16Q3WlbWZmldbdA9Ek9QUuALYBJgAjJd0aEU/O7r5caZuZmXWvDYHxEfGviPgQGAHsWs+OXGmbmVll9dD1tIcAL9U8ngB8pp4dOWnXYfToRyYPmEsvNjqOHjAQmNzoIKxL+LOcc1Tls/xUTxxk9OhH7h4wlwZ2wa7mkTSq5vFFEXFRvt/Sz4Ko5yBO2nWIiEGNjqEnSBoVEcMaHYd1nj/LOYc/y64VEdv1wGEmAEvXPB4KTKpnR+7TNjMz614jgZUkLSepP7A3cGs9O3KlbWZm1o0iYrqko4C7gb7AbyNiXD37ctK2tlzU/iZWCH+Wcw5/lgWKiDuBOzu7H0XU1RduZmZmPcx92mZmZoVw0jYzMyuEk7aZzUQq6UKFZtXipG1mn5CkyANdJO0nabNGx2Szr6UfXv4xNmdw0ra6SPKZB3OgmoS9Helc0mcaG5HNrqYfXpI+L2m4pC9D+myduMvnpG2zTdIRwCWSTpX02UbHY11L0obAQcBjEfF6XuYv+0Lk5LwjcB7wAXCupO/XrPNnWTAnbZstko4E9gTOJ1255gxJOzc2KuuMFr7E3wD+DawtaVPwl31JJC0FHA98CZgBvAgcI+mn8N/WFCuTmzitwyQtCCwC7EKqxAAuB74t6eOIuKNhwVldmvVh70S6iMEbwMnACcDO+bN90F/2vVdNk/i8EfGypP2AQcBpEbFebj15SNK0iDipweFaJ7jStg6RtG5EvE1qchtMStxfJM2f2xc4UtJ8rsaKIwBJhwFnAMOAm4DdgXNJzatfyV/61gvVJOydgBGSFoiICcACwEN5s36k1rH7GxWndQ0nbWuXpG8Cp0kaGhFvkf7dTCNVZVsAo4ADImKqq7EySFo1f9l/LGkwadDZVyLiVGA74HRgU+BXwMvA842L1trSNOgM+CFwXkS8k1d9BCwi6RfAlcDVEXGPf1iXzdOYWpsk7Qp8D9g2Iv4jacmIeEXSr4GlgNWB3SLiiYYGah0maX7gHOBj4ND8pX8ZqRJ7NCJmSPoisENEHCypf0R82LiIrTlJSwBLRsRj+fGhwIyIuFjS3BHxQV7+WdLf6RsRcW/jIrau4krbWiSp6d/Gp4DRwIqSTgNul/T3iDgUOAzYyAm7OO+REvQMUvIGmAgcByyUHy8GzJ3/HXzU0wFau/YCpkmaN1fOiwBNp3Y1JeyNgX9HxHVO2HMOV9rWIkmLRMQUSYsA15G+4C8H7gAuBs6IiDENDNFmU7NBZ32A1YBvAxMj4vuSLgSWBN4BVgUO9A+y3ikn6iWAE0lN308AvwCmAscCnwEuA74WEX9tUJjWDZy0bRaSDgF2BV4AxkTEb2rW7Qr8CNgqIl5uTIQ2u5ol7OVIXaEvSFqd9CX/SkScKGkNYAjwbES80LiIrSXNPsd5Sf3YfUk/rCcDpwEL59tpEXF7YyK17uKkbTOR9CXgFNIpXSuTBpq9QfpF/0XgVGBPV2BlknQMMJzUNTYO+AEwL/AtYDpwmAcT9m55JP98pAGg00in5y0IXBERj0haCJg7Il6rTfI2Z3CfdsW1MJJ0QeDiiBhJOvXnV6RTvJYG/gJs54RdptzHORzYBtiElKSPjYjHgZ8D75OaXK2Xafo7lbQ5MII0OPRMYDPSj+wpwFGSNo2ItyLiNfBEKnMiJ+0KkzQXsHW+f5SkLUh//EdKWi0ipkXEaNIgl4ER8WpEvNiwgG22tPCD7F3SgLO5ImI6aSDhZyR9Lf8Q+3ZEvNLTcVrrmj7DPMJ/E+DrwLak7qtnSLOebU46RW8i8J/GRGo9xUm72voCu0t6ADgUeCEifgdcCJwnaQtJewKLA5MaF6bNrmZ9n/tLWpvUlPoBaXrShSJiBqk15X0An9bVu+TpSC+U1Dcv2hzYB1goIt4DbgSezcu2iIgTI2JcY6K1nuJpTCssIt6XNAL4Aqnp+yWlq3f9mtR0ejzpS/7rETGxcZHa7KpJ2EcChwDDI2K8pD8BRwPPSfqAdOrQro2L1FqTpyM9B/iUpCkRcaakgaREvkceSHgz6XvcLSQV4YFoFZa/AOYiJegfk5pPz8iTp8wbEe9JmisifJ5uISQtBrwVEdNzpTYC+Gptt4akL5BGiK8MXBoRzzYmWmuNpL65JQRJlwDrkc7YmCLpFNK4hP3zD7FPJlOxOZ+TdkXlCmxHYDzwFHAF6Tzs8aTJNHYnDVZ6x4NZyiBpRVLlfDbwIWmClNuAL0TE200zm0kaGBGTGxmrtS9/nm9FxOv5HPrVSLMPTpH0I1IL2WbABxHxcSNjtZ7jPu0KkrQ36fKahwCLAp+LiKnAwaSJNRYlzUP9thN2OSJiPGm0/2rANpGuhf0Y8HNJ/XLCPgi4UtI8noO696kZJb4h6dzr/5O0YEQcRuq/vkHSohFxArB3HizqhF0hrrQrJs87/QXSNXbXB/YgzTE9XdJyEfF8/oKf3tBArcNqRxjnx6cCywKXkC728Q3gs6Sqe2dgP5+213vl7osTSK1fRwNjgOMi4k1JV5OmFt7Cf6PV5KRdIZKOAOYmjRb+MfBwRDSd8vV1YEXgJPePlaPZKPHdgVcj4u+STiSdX38j8GdSy8p7wNMR8VzDArZ25Yu3PBwRv5Q0N3AVaQKcvSPiHaXL5I5pZIzWOB49XhH5KkAHALtHxERJywKrS1oG2Il0ytdXnLDLUpOwjyVdMOKrefkPJB1PuuSmgJs8oLB3k7QD0J90gZ4FlK6L/U7+sf0ocBLpXPoxnumsutynXQGSBgDbA/8LfCDpcNJgs3VJ01huQUrYPsezELX90ZLWJHVzbEI6lWtrSftHxFmk+eN3Ip0lYL2UpHWBo0jdVs8AGwLrS1qANLnRn4AdcouYZzqrMFfaFRAR0yTdSbrQxwTSl8KLwDWkeYs/cv9YOZo1ie9IGv0/iXR61yukqUgXk7RYRPwwjxZ/r3ERW3P5oi3rRsTN+dS8bwEfR8Sjef3KpIGh3yRdcW0XYCvSvAlWYe7TrghJ8wBrAf/MA1r2IX0p7Ogv9DJJ2gb4Pmk+8cWBrwG/joinJB0IDI2I0xsZo7VM0vqkls6ncxP4QaQuqosi4pK8zUqkFpJ3gDWAs0gX63mqQWFbL+CkXTFK11E+kPTL/sseRVwmSRsBNwPfiohrm637GnAkaZS4uzx6qVxt3wX8OCJ+K2k/0lSlf4uIK2q2WwK4Fjg6IsY2JlrrLdw8Xj3zAB8De/kXezlaGHg0mjT17MmSfhcRH+SxC8sA25Fmy3LC7qUkLQ58hXQWx8GSPo6IyyR9TOq7VkRcDhARr0raPiKmNTJm6x1caVeQR56WpVkf9rakaymPASYDZwArkc4KeE9Sf6Cvv+B7t3wRkGuAl0gzEV4KnB0RV0v6KvBopEumms3ESdusEPkUrp2BUcDGpAk4/kGq1jYiTbjhZN2LSRoMDIiIf+a5/y8FfgIEcBlwWm3TuFlzPuXLrACSVgfWjIjPka6b/DZwP+nUvROAvwGDGhehtUfSfMD3gJ/lgYJNn+HSEXE/aSDa8w0M0QrgStusl5O0AbApaWrSxUnn7e4SER9J2gv4Y0S82cAQrYMkLQisTbqoy83A50jdGztExDN5G3dfWatcaZv1YnkSlc+RkvZE0jnYx+aEfQBpwpx5GhehzY58EZ77gd2AkaSLgCxBGqfQtI0TtrXKlbZZL1VzTfN+wO+BKaTJcZYHXiMl8r08Srxsklb2Nc2to5y0zXohSVuSKuyREXF7nkhlTdJ5vYuTLp86OiJebGCY1gmS+tReVtPN4tYRPk/brHd6gVRR/yTPjDUd2BV4ICL+0sjArGs0vw62E7Z1hCtts14sz0G9N+mSqicA1wP7AtP9JW9WPU7aZr1cvqaygOOB69z/aVZdTtpmvZz7Os2siZO2mZlZIXyetpmZWSGctM3MzArhpG1mZlYIJ20zM7NCOGmbmZkVwknbrBtImiFpjKQnJF0vad5O7OsySXvk+xfny3S2tu0Wkjap4xgv5Os7d2h5s23enc1jnZKvDW5ms8lJ26x7TIuIdSNiTeBD4LDalZL61rPTiDg4Ip5sY5MtgNlO2mZWBidts+73N2DFXAX/WdL/AY9L6ivpp5JGShor6VBIk6lIOl/Sk5LuIF0ghLzuPknD8v3tJI2W9JikeyUtS/pxcEyu8j8raZCkG/MxRkraND93MUn3SHpU0q9JM661SdLvJD0iaZykQ5qt+1mO5V5Jg/KyFSTdlZ/zN0mrdsm7aVZhvmCIWTfKl9XcnnR1LoANgTUj4vmc+N6KiA3yVKUPSLoHWA9YBViLdK3lJ4HfNtvvIOA3wOZ5X4tGxJuSLgTejYiz8nb/B/w8Iu6XtAxwN7AacDJwf0ScJmlHYKYk3IqD8jEGACMl3RgRb5CuBT06Io6TdFLe91HARcBhEfGcpM8AvwS2rONtNLPMSdusewyQNCbf/xtwCanZ+uGIeD4v/wKwdlN/NbAQsBKwOXBNRMwAJkn6Uwv73wj4a9O+IuLNVuLYGlhd+qSQXlDSAvkYX8zPvUPSlA68pqMl7Z7vL51jfQP4GLg2L78KuEnS/Pn1Xl9z7Lk7cAwza4OTtln3mBYR69YuyMlrau0i4BsRcXez7XYA2ptfWB3YBlIX2MYRMa2FWDo8h7GkLUg/ADaOiPck3QfM08rmkY/7n+bvgZl1jvu0zRrnbuBwSXNBugynpPmAvwJ75z7vpYDPt/DcB4HPSVouP3fRvPwdYIGa7e4hNVWTt1s33/0rsE9etj2wSDuxLgRMyQl7VVKl36QP0NRa8BVSs/vbwPOS9szHkKR12jmGmbXDSduscS4m9VePlvQE8GtS69fNwHPA48CvgL80f2JEvE7qh75J0mP8t3n6NmD3poFowNHAsDzQ7Un+O4r9VGBzSaNJzfT/bifWu4B+ksYCpwMP1aybCqwh6RFSn/Vpefk+wNdyfOOAXTvwnphZG3yVLzMzs0K40jYzMyuEk7aZmVkhnLTNupikuSVdK2m8pH/kSU9a2m547mseJ+knNcsPk/R47pe+v2naUkmfyhOVjMnPOazmOZfkSVbGSrohn3LVFa9lF0nfreN5n0wC0xMkrZ/fs/GSfqGa88yabXdC3uYZSdvWLO8v6SJJz0p6WtKXatbtpTTRzbh83juSPp8/h6bb+5J26/YXapXnPm2rBEn9ImJ6Dx3rCGDtiDhM0t7A7hExvNk2iwGPAutHxOuSLgeuiIh7JS2YR18jaRfgiIjYTlJ/0t/sBzkpPwFsEhGTmj3nbOC1iDizJ15vS/IpYcdHxKgeOt7DwDdJA+TuBH4REb9vts3qwDWkCW4GA38EVo6IGZJOBfpGxImS+gCLRsRkSSsB1wFbRsQUSYtHxGvN9rsoMB4YGhHvdfNLtYpzpW0NpVamxlSzKTrzsvklXZorqrFN1ZBqLlghaQ9Jl+X7l0k6W9KfgR9L2lDS35Wm7vy7pFXydn0lnVWz329I2krSzTX73UbSTR18WbsCl+f7NwBbtVD5LQ88m0eBQ0ogXwJoSr7ZfOTzqSPiw4j4IC+fm5q/35qELWBA03MkDZN0cfMAJS2bK8qLlS5qcrWkrSU9IOk5SRvm7Q6QdH6+v2fe9jFJf23tvWvhWL+SNCp/xqfWLD8zV7BjJZ3V2jHao3Ra3IIR8WCkKuQKYLcWNt0VGBERH+RJacaTEjjAQcCP8nv5cURMzsu/DlwQEVPyuteY1R7A752wrSd4chVrtFmmxiQlo5mm6Mzb/i9p2s+1ACS1d24xwMrA1rmaWjDvc7qkrYEzSInyEGA5YL28blFgCnCBpEE5sR4IXJqPey1pmtHmzo6IK4AhwEsAeX9vAYsBk2u2HQ+sqtR0PoGUZPo3rZR0JHBsXrZlzfKlgTuAFYFvR8SkmnWXAjuQTiM7Lh9/FHBwK+/NisCe+fWPJJ1jvRmwC/A9Zk18JwHbRsRESQvnZS29d819P3/GfYF7Ja2dX/PuwKoRETX7m+UY+cfVtS3sF9IFUobk/TWZkJc1N4SZT1WbAAypOfbpSpPI/BM4KiJeJf37QdIDQF/glIi4i5ntDZzdSnxmXcpJ2xqtpakxB9HyFJ1bk74gycs7MvXm9Xk6UEgThFyemzwDmKtmvxc2NZ83HU/SlcC+ORluDHw1r5+pqbsFLfWnztQPlZtaDyclo4+Bv5Oq76b1F5B+NHwFOBHYPy9/iTT16WDgd5JuyMmFiDgwJ8bzgOHkHxlteD4iHs+vdRxwb06gjwPLtrD9A8Blkq4DmlodWnzvmtkrt6L0A5YCVif9sHgfuFjpoii3t3aMiHgGWLe1F9FCKwa0PNtba9v1A4YCD0TEsZKOBc4C9svrViL9OBgK/E3SmhHxn3zspUhzxN89667Nup6bx61hNPPUmOuQ+njnofUpOltbXrus+dSatdOGng78OV8uc+eabVvb76XAvsCXScl/eo77Ws08CKnp9tX8vAmkHyBNFwxZCJglmUXEbRHxmYjYGHiGNKFKcyNooak3V9jjgM82Wz6D9EPgS82f04IPau5/XPP4Y1r4QR8Rh5F+QCwNjFHql29zOlWlGduOB7aKiLVJrQTz5PdyQ+DG/Pruau0YklZp5f0ek6vkCaSE2mQoMIlZffK5NNvuDeA90qQ2ANcDn655zi0R8VH+EfkMKYk32Qu4OSI+au09MOtKTtrWSK1NjdnaFJ3Np+Rsah5/VdJqSgOImqr21o43Md8/oGb5PcBhOcF+crycGCeRkshlTRtHxPB8rezmtyvyJreSK2NSf+efooURn5IWr3kdR5BmSCO3BDTZkZzMJQ3N3QhNz9kUeEbJinm5SD9Ins6PN5R0BV1A0goR8Y+IOInU1L80rbx3NRYk/XB6S9ISpCueoTSQbqGIuBP4FrmSbukYEfFMK+/3uhHxn4h4GXhH0kb59X8VuKWFl3AraXrYufO/rZVIF3AJ0kxyW+TttiK1BAD8jjyNrKSBpObyf9Xs88ukwW1mPcLN49ZId5G+8MeSKpiHIE3RmZtTb8qJ+DVgG+AHpCbjJ4AZpKk4bwK+S2pefYk0orq1051+QmoePxaovXLWxaQv47GSPiL1p5+f110NDIqIJ+m4S4ArJY0nVdifNOlLGhP/vYjGufrvfNynRcSz+f5Ruc/9I1LfetMPgNWAn0kKUoV7VkQ8nt+jy3OfvYDHgMPzc5YBZrpYSCf8NP+gEHBvPs4TtP7eERGPSXqU1CrwL1LzN6T50W+R1NSyckwbx+iIw0k/rAYAv8+3ptH3wyLipIgYl5vdnwSmA0fWdJ18h/SZnQM0jWGA1Oz9BaUpYGeQxhG8kfe9LOmHyyzTzJp1F5/yZdYGpZHTj0bEJY2OpR6SfgpcGRFjGx2LmXWek7ZZK5QugDEV2KbmVCszs4Zx0jYzMyuEB6KZmZkVwknbzMysEE7aZmZmhXDSNjMzK4STtpmZWSGctM3MzArx/8JpoYjLihYHAAAAAElFTkSuQmCC\n", 1109 | "text/plain": [ 1110 | "
" 1111 | ] 1112 | }, 1113 | "metadata": { 1114 | "needs_background": "light" 1115 | }, 1116 | "output_type": "display_data" 1117 | } 1118 | ], 1119 | "source": [ 1120 | "cm_plot = plot_confusion_matrix(cm, data.target_names, normalize=False)" 1121 | ] 1122 | }, 1123 | { 1124 | "cell_type": "code", 1125 | "execution_count": 39, 1126 | "metadata": {}, 1127 | "outputs": [], 1128 | "source": [ 1129 | "# Save confusion matrix image\n", 1130 | "confusion_matrix_image = './reports/confusion_matrix.png'\n", 1131 | "cm_plot.savefig(confusion_matrix_image)" 1132 | ] 1133 | }, 1134 | { 1135 | "cell_type": "code", 1136 | "execution_count": 40, 1137 | "metadata": {}, 1138 | "outputs": [ 1139 | { 1140 | "data": { 1141 | "text/plain": [ 1142 | "LineaArtifact(name='plot-confusion-matrix', _version=1)" 1143 | ] 1144 | }, 1145 | "execution_count": 40, 1146 | "metadata": {}, 1147 | "output_type": "execute_result" 1148 | } 1149 | ], 1150 | "source": [ 1151 | "#save confusion matrix to lineapy\n", 1152 | "lineapy.save(plot_confusion_matrix, \"plot-confusion-matrix\")" 1153 | ] 1154 | }, 1155 | { 1156 | "cell_type": "code", 1157 | "execution_count": null, 1158 | "metadata": {}, 1159 | "outputs": [], 1160 | "source": [ 1161 | "#commenting for change\n" 1162 | ] 1163 | } 1164 | ], 1165 | "metadata": { 1166 | "kernelspec": { 1167 | "display_name": "Python 3", 1168 | "language": "python", 1169 | "name": "python3" 1170 | }, 1171 | "language_info": { 1172 | "codemirror_mode": { 1173 | "name": "ipython", 1174 | "version": 3 1175 | }, 1176 | "file_extension": ".py", 1177 | "mimetype": "text/x-python", 1178 | "name": "python", 1179 | "nbconvert_exporter": "python", 1180 | "pygments_lexer": "ipython3", 1181 | "version": "3.8.5" 1182 | }, 1183 | "toc": { 1184 | "base_numbering": 1, 1185 | "nav_menu": {}, 1186 | "number_sections": true, 1187 | "sideBar": true, 1188 | "skip_h1_title": false, 1189 | "title_cell": "Table of Contents", 1190 | "title_sidebar": "Contents", 1191 | "toc_cell": false, 1192 | "toc_position": {}, 1193 | "toc_section_display": true, 1194 | "toc_window_display": true 1195 | }, 1196 | "varInspector": { 1197 | "cols": { 1198 | "lenName": 16, 1199 | "lenType": 16, 1200 | "lenVar": 40 1201 | }, 1202 | "kernels_config": { 1203 | "python": { 1204 | "delete_cmd_postfix": "", 1205 | "delete_cmd_prefix": "del ", 1206 | "library": "var_list.py", 1207 | "varRefreshCmd": "print(var_dic_list())" 1208 | }, 1209 | "r": { 1210 | "delete_cmd_postfix": ") ", 1211 | "delete_cmd_prefix": "rm(", 1212 | "library": "var_list.r", 1213 | "varRefreshCmd": "cat(var_dic_list()) " 1214 | } 1215 | }, 1216 | "types_to_exclude": [ 1217 | "module", 1218 | "function", 1219 | "builtin_function_or_method", 1220 | "instance", 1221 | "_Feature" 1222 | ], 1223 | "window_display": false 1224 | } 1225 | }, 1226 | "nbformat": 4, 1227 | "nbformat_minor": 4 1228 | } 1229 | --------------------------------------------------------------------------------