└── PyCaret Classification Crash Course.ipynb
/PyCaret Classification Crash Course.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1. Install and Import Dependencies"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "!pip install pycaret pandas shap"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "import pandas as pd\n",
26 | "from pycaret.classification import *"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "# 2. Load Data"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "df = pd.read_csv('heart.csv')"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 4,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/html": [
53 | "
\n",
54 | "\n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " | \n",
71 | " age | \n",
72 | " sex | \n",
73 | " cp | \n",
74 | " trestbps | \n",
75 | " chol | \n",
76 | " fbs | \n",
77 | " restecg | \n",
78 | " thalach | \n",
79 | " exang | \n",
80 | " oldpeak | \n",
81 | " slope | \n",
82 | " ca | \n",
83 | " thal | \n",
84 | " target | \n",
85 | "
\n",
86 | " \n",
87 | " \n",
88 | " \n",
89 | " 0 | \n",
90 | " 63 | \n",
91 | " 1 | \n",
92 | " 3 | \n",
93 | " 145 | \n",
94 | " 233 | \n",
95 | " 1 | \n",
96 | " 0 | \n",
97 | " 150 | \n",
98 | " 0 | \n",
99 | " 2.3 | \n",
100 | " 0 | \n",
101 | " 0 | \n",
102 | " 1 | \n",
103 | " 1 | \n",
104 | "
\n",
105 | " \n",
106 | " 1 | \n",
107 | " 37 | \n",
108 | " 1 | \n",
109 | " 2 | \n",
110 | " 130 | \n",
111 | " 250 | \n",
112 | " 0 | \n",
113 | " 1 | \n",
114 | " 187 | \n",
115 | " 0 | \n",
116 | " 3.5 | \n",
117 | " 0 | \n",
118 | " 0 | \n",
119 | " 2 | \n",
120 | " 1 | \n",
121 | "
\n",
122 | " \n",
123 | " 2 | \n",
124 | " 41 | \n",
125 | " 0 | \n",
126 | " 1 | \n",
127 | " 130 | \n",
128 | " 204 | \n",
129 | " 0 | \n",
130 | " 0 | \n",
131 | " 172 | \n",
132 | " 0 | \n",
133 | " 1.4 | \n",
134 | " 2 | \n",
135 | " 0 | \n",
136 | " 2 | \n",
137 | " 1 | \n",
138 | "
\n",
139 | " \n",
140 | " 3 | \n",
141 | " 56 | \n",
142 | " 1 | \n",
143 | " 1 | \n",
144 | " 120 | \n",
145 | " 236 | \n",
146 | " 0 | \n",
147 | " 1 | \n",
148 | " 178 | \n",
149 | " 0 | \n",
150 | " 0.8 | \n",
151 | " 2 | \n",
152 | " 0 | \n",
153 | " 2 | \n",
154 | " 1 | \n",
155 | "
\n",
156 | " \n",
157 | " 4 | \n",
158 | " 57 | \n",
159 | " 0 | \n",
160 | " 0 | \n",
161 | " 120 | \n",
162 | " 354 | \n",
163 | " 0 | \n",
164 | " 1 | \n",
165 | " 163 | \n",
166 | " 1 | \n",
167 | " 0.6 | \n",
168 | " 2 | \n",
169 | " 0 | \n",
170 | " 2 | \n",
171 | " 1 | \n",
172 | "
\n",
173 | " \n",
174 | "
\n",
175 | "
"
176 | ],
177 | "text/plain": [
178 | " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
179 | "0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
180 | "1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
181 | "2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
182 | "3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
183 | "4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
184 | "\n",
185 | " ca thal target \n",
186 | "0 0 1 1 \n",
187 | "1 0 2 1 \n",
188 | "2 0 2 1 \n",
189 | "3 0 2 1 \n",
190 | "4 0 2 1 "
191 | ]
192 | },
193 | "execution_count": 4,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "df.head()"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 5,
205 | "metadata": {
206 | "collapsed": true
207 | },
208 | "outputs": [
209 | {
210 | "data": {
211 | "text/plain": [
212 | "age int64\n",
213 | "sex int64\n",
214 | "cp int64\n",
215 | "trestbps int64\n",
216 | "chol int64\n",
217 | "fbs int64\n",
218 | "restecg int64\n",
219 | "thalach int64\n",
220 | "exang int64\n",
221 | "oldpeak float64\n",
222 | "slope int64\n",
223 | "ca int64\n",
224 | "thal int64\n",
225 | "target int64\n",
226 | "dtype: object"
227 | ]
228 | },
229 | "execution_count": 5,
230 | "metadata": {},
231 | "output_type": "execute_result"
232 | }
233 | ],
234 | "source": [
235 | "df.dtypes"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "# 3. Train and Evaluate Model"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 6,
248 | "metadata": {},
249 | "outputs": [],
250 | "source": [
251 | "cat_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'thal']"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 8,
257 | "metadata": {
258 | "collapsed": true
259 | },
260 | "outputs": [
261 | {
262 | "data": {
263 | "text/html": [
264 | " | Description | Value |
\n",
266 | " \n",
267 | " 0 | \n",
268 | " session_id | \n",
269 | " 8760 | \n",
270 | "
\n",
271 | " \n",
272 | " 1 | \n",
273 | " Target | \n",
274 | " target | \n",
275 | "
\n",
276 | " \n",
277 | " 2 | \n",
278 | " Target Type | \n",
279 | " Binary | \n",
280 | "
\n",
281 | " \n",
282 | " 3 | \n",
283 | " Label Encoded | \n",
284 | " 0: 0, 1: 1 | \n",
285 | "
\n",
286 | " \n",
287 | " 4 | \n",
288 | " Original Data | \n",
289 | " (303, 14) | \n",
290 | "
\n",
291 | " \n",
292 | " 5 | \n",
293 | " Missing Values | \n",
294 | " False | \n",
295 | "
\n",
296 | " \n",
297 | " 6 | \n",
298 | " Numeric Features | \n",
299 | " 5 | \n",
300 | "
\n",
301 | " \n",
302 | " 7 | \n",
303 | " Categorical Features | \n",
304 | " 8 | \n",
305 | "
\n",
306 | " \n",
307 | " 8 | \n",
308 | " Ordinal Features | \n",
309 | " False | \n",
310 | "
\n",
311 | " \n",
312 | " 9 | \n",
313 | " High Cardinality Features | \n",
314 | " False | \n",
315 | "
\n",
316 | " \n",
317 | " 10 | \n",
318 | " High Cardinality Method | \n",
319 | " None | \n",
320 | "
\n",
321 | " \n",
322 | " 11 | \n",
323 | " Transformed Train Set | \n",
324 | " (212, 27) | \n",
325 | "
\n",
326 | " \n",
327 | " 12 | \n",
328 | " Transformed Test Set | \n",
329 | " (91, 27) | \n",
330 | "
\n",
331 | " \n",
332 | " 13 | \n",
333 | " Shuffle Train-Test | \n",
334 | " True | \n",
335 | "
\n",
336 | " \n",
337 | " 14 | \n",
338 | " Stratify Train-Test | \n",
339 | " False | \n",
340 | "
\n",
341 | " \n",
342 | " 15 | \n",
343 | " Fold Generator | \n",
344 | " StratifiedKFold | \n",
345 | "
\n",
346 | " \n",
347 | " 16 | \n",
348 | " Fold Number | \n",
349 | " 10 | \n",
350 | "
\n",
351 | " \n",
352 | " 17 | \n",
353 | " CPU Jobs | \n",
354 | " -1 | \n",
355 | "
\n",
356 | " \n",
357 | " 18 | \n",
358 | " Use GPU | \n",
359 | " False | \n",
360 | "
\n",
361 | " \n",
362 | " 19 | \n",
363 | " Log Experiment | \n",
364 | " False | \n",
365 | "
\n",
366 | " \n",
367 | " 20 | \n",
368 | " Experiment Name | \n",
369 | " clf-default-name | \n",
370 | "
\n",
371 | " \n",
372 | " 21 | \n",
373 | " USI | \n",
374 | " 8cc5 | \n",
375 | "
\n",
376 | " \n",
377 | " 22 | \n",
378 | " Imputation Type | \n",
379 | " simple | \n",
380 | "
\n",
381 | " \n",
382 | " 23 | \n",
383 | " Iterative Imputation Iteration | \n",
384 | " None | \n",
385 | "
\n",
386 | " \n",
387 | " 24 | \n",
388 | " Numeric Imputer | \n",
389 | " mean | \n",
390 | "
\n",
391 | " \n",
392 | " 25 | \n",
393 | " Iterative Imputation Numeric Model | \n",
394 | " None | \n",
395 | "
\n",
396 | " \n",
397 | " 26 | \n",
398 | " Categorical Imputer | \n",
399 | " constant | \n",
400 | "
\n",
401 | " \n",
402 | " 27 | \n",
403 | " Iterative Imputation Categorical Model | \n",
404 | " None | \n",
405 | "
\n",
406 | " \n",
407 | " 28 | \n",
408 | " Unknown Categoricals Handling | \n",
409 | " least_frequent | \n",
410 | "
\n",
411 | " \n",
412 | " 29 | \n",
413 | " Normalize | \n",
414 | " False | \n",
415 | "
\n",
416 | " \n",
417 | " 30 | \n",
418 | " Normalize Method | \n",
419 | " None | \n",
420 | "
\n",
421 | " \n",
422 | " 31 | \n",
423 | " Transformation | \n",
424 | " False | \n",
425 | "
\n",
426 | " \n",
427 | " 32 | \n",
428 | " Transformation Method | \n",
429 | " None | \n",
430 | "
\n",
431 | " \n",
432 | " 33 | \n",
433 | " PCA | \n",
434 | " False | \n",
435 | "
\n",
436 | " \n",
437 | " 34 | \n",
438 | " PCA Method | \n",
439 | " None | \n",
440 | "
\n",
441 | " \n",
442 | " 35 | \n",
443 | " PCA Components | \n",
444 | " None | \n",
445 | "
\n",
446 | " \n",
447 | " 36 | \n",
448 | " Ignore Low Variance | \n",
449 | " False | \n",
450 | "
\n",
451 | " \n",
452 | " 37 | \n",
453 | " Combine Rare Levels | \n",
454 | " False | \n",
455 | "
\n",
456 | " \n",
457 | " 38 | \n",
458 | " Rare Level Threshold | \n",
459 | " None | \n",
460 | "
\n",
461 | " \n",
462 | " 39 | \n",
463 | " Numeric Binning | \n",
464 | " False | \n",
465 | "
\n",
466 | " \n",
467 | " 40 | \n",
468 | " Remove Outliers | \n",
469 | " False | \n",
470 | "
\n",
471 | " \n",
472 | " 41 | \n",
473 | " Outliers Threshold | \n",
474 | " None | \n",
475 | "
\n",
476 | " \n",
477 | " 42 | \n",
478 | " Remove Multicollinearity | \n",
479 | " False | \n",
480 | "
\n",
481 | " \n",
482 | " 43 | \n",
483 | " Multicollinearity Threshold | \n",
484 | " None | \n",
485 | "
\n",
486 | " \n",
487 | " 44 | \n",
488 | " Clustering | \n",
489 | " False | \n",
490 | "
\n",
491 | " \n",
492 | " 45 | \n",
493 | " Clustering Iteration | \n",
494 | " None | \n",
495 | "
\n",
496 | " \n",
497 | " 46 | \n",
498 | " Polynomial Features | \n",
499 | " False | \n",
500 | "
\n",
501 | " \n",
502 | " 47 | \n",
503 | " Polynomial Degree | \n",
504 | " None | \n",
505 | "
\n",
506 | " \n",
507 | " 48 | \n",
508 | " Trignometry Features | \n",
509 | " False | \n",
510 | "
\n",
511 | " \n",
512 | " 49 | \n",
513 | " Polynomial Threshold | \n",
514 | " None | \n",
515 | "
\n",
516 | " \n",
517 | " 50 | \n",
518 | " Group Features | \n",
519 | " False | \n",
520 | "
\n",
521 | " \n",
522 | " 51 | \n",
523 | " Feature Selection | \n",
524 | " False | \n",
525 | "
\n",
526 | " \n",
527 | " 52 | \n",
528 | " Feature Selection Method | \n",
529 | " classic | \n",
530 | "
\n",
531 | " \n",
532 | " 53 | \n",
533 | " Features Selection Threshold | \n",
534 | " None | \n",
535 | "
\n",
536 | " \n",
537 | " 54 | \n",
538 | " Feature Interaction | \n",
539 | " False | \n",
540 | "
\n",
541 | " \n",
542 | " 55 | \n",
543 | " Feature Ratio | \n",
544 | " False | \n",
545 | "
\n",
546 | " \n",
547 | " 56 | \n",
548 | " Interaction Threshold | \n",
549 | " None | \n",
550 | "
\n",
551 | " \n",
552 | " 57 | \n",
553 | " Fix Imbalance | \n",
554 | " False | \n",
555 | "
\n",
556 | " \n",
557 | " 58 | \n",
558 | " Fix Imbalance Method | \n",
559 | " SMOTE | \n",
560 | "
\n",
561 | "
"
562 | ],
563 | "text/plain": [
564 | ""
565 | ]
566 | },
567 | "metadata": {},
568 | "output_type": "display_data"
569 | }
570 | ],
571 | "source": [
572 | "experiment = setup(df, target='target', categorical_features=cat_features)"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 9,
578 | "metadata": {},
579 | "outputs": [
580 | {
581 | "data": {
582 | "text/html": [
583 | " | Model | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | TT (Sec) |
\n",
603 | " \n",
604 | " ridge | \n",
605 | " Ridge Classifier | \n",
606 | " 0.8299 | \n",
607 | " 0.0000 | \n",
608 | " 0.8788 | \n",
609 | " 0.8338 | \n",
610 | " 0.8488 | \n",
611 | " 0.6518 | \n",
612 | " 0.6672 | \n",
613 | " 0.0030 | \n",
614 | "
\n",
615 | " \n",
616 | " lda | \n",
617 | " Linear Discriminant Analysis | \n",
618 | " 0.8251 | \n",
619 | " 0.9058 | \n",
620 | " 0.8697 | \n",
621 | " 0.8310 | \n",
622 | " 0.8428 | \n",
623 | " 0.6426 | \n",
624 | " 0.6581 | \n",
625 | " 0.0030 | \n",
626 | "
\n",
627 | " \n",
628 | " lr | \n",
629 | " Logistic Regression | \n",
630 | " 0.8206 | \n",
631 | " 0.9015 | \n",
632 | " 0.8606 | \n",
633 | " 0.8248 | \n",
634 | " 0.8384 | \n",
635 | " 0.6334 | \n",
636 | " 0.6440 | \n",
637 | " 0.3060 | \n",
638 | "
\n",
639 | " \n",
640 | " nb | \n",
641 | " Naive Bayes | \n",
642 | " 0.8203 | \n",
643 | " 0.8805 | \n",
644 | " 0.8788 | \n",
645 | " 0.8131 | \n",
646 | " 0.8424 | \n",
647 | " 0.6330 | \n",
648 | " 0.6423 | \n",
649 | " 0.0030 | \n",
650 | "
\n",
651 | " \n",
652 | " et | \n",
653 | " Extra Trees Classifier | \n",
654 | " 0.8108 | \n",
655 | " 0.8845 | \n",
656 | " 0.8371 | \n",
657 | " 0.8333 | \n",
658 | " 0.8269 | \n",
659 | " 0.6169 | \n",
660 | " 0.6327 | \n",
661 | " 0.0850 | \n",
662 | "
\n",
663 | " \n",
664 | " rf | \n",
665 | " Random Forest Classifier | \n",
666 | " 0.8065 | \n",
667 | " 0.8818 | \n",
668 | " 0.8455 | \n",
669 | " 0.8115 | \n",
670 | " 0.8229 | \n",
671 | " 0.6065 | \n",
672 | " 0.6165 | \n",
673 | " 0.0950 | \n",
674 | "
\n",
675 | " \n",
676 | " ada | \n",
677 | " Ada Boost Classifier | \n",
678 | " 0.7779 | \n",
679 | " 0.8087 | \n",
680 | " 0.8265 | \n",
681 | " 0.7883 | \n",
682 | " 0.8027 | \n",
683 | " 0.5453 | \n",
684 | " 0.5547 | \n",
685 | " 0.0130 | \n",
686 | "
\n",
687 | " \n",
688 | " lightgbm | \n",
689 | " Light Gradient Boosting Machine | \n",
690 | " 0.7729 | \n",
691 | " 0.8655 | \n",
692 | " 0.8182 | \n",
693 | " 0.7879 | \n",
694 | " 0.7966 | \n",
695 | " 0.5337 | \n",
696 | " 0.5443 | \n",
697 | " 0.0080 | \n",
698 | "
\n",
699 | " \n",
700 | " gbc | \n",
701 | " Gradient Boosting Classifier | \n",
702 | " 0.7496 | \n",
703 | " 0.8426 | \n",
704 | " 0.7932 | \n",
705 | " 0.7640 | \n",
706 | " 0.7726 | \n",
707 | " 0.4884 | \n",
708 | " 0.4968 | \n",
709 | " 0.0100 | \n",
710 | "
\n",
711 | " \n",
712 | " dt | \n",
713 | " Decision Tree Classifier | \n",
714 | " 0.6740 | \n",
715 | " 0.6681 | \n",
716 | " 0.7152 | \n",
717 | " 0.7021 | \n",
718 | " 0.7032 | \n",
719 | " 0.3368 | \n",
720 | " 0.3415 | \n",
721 | " 0.0030 | \n",
722 | "
\n",
723 | " \n",
724 | " knn | \n",
725 | " K Neighbors Classifier | \n",
726 | " 0.6654 | \n",
727 | " 0.6676 | \n",
728 | " 0.6977 | \n",
729 | " 0.6983 | \n",
730 | " 0.6956 | \n",
731 | " 0.3233 | \n",
732 | " 0.3263 | \n",
733 | " 0.2130 | \n",
734 | "
\n",
735 | " \n",
736 | " qda | \n",
737 | " Quadratic Discriminant Analysis | \n",
738 | " 0.6565 | \n",
739 | " 0.6511 | \n",
740 | " 0.6621 | \n",
741 | " 0.7063 | \n",
742 | " 0.6636 | \n",
743 | " 0.3035 | \n",
744 | " 0.3284 | \n",
745 | " 0.0040 | \n",
746 | "
\n",
747 | " \n",
748 | " svm | \n",
749 | " SVM - Linear Kernel | \n",
750 | " 0.6227 | \n",
751 | " 0.0000 | \n",
752 | " 0.8189 | \n",
753 | " 0.6447 | \n",
754 | " 0.6919 | \n",
755 | " 0.2151 | \n",
756 | " 0.2302 | \n",
757 | " 0.0060 | \n",
758 | "
\n",
759 | "
"
760 | ],
761 | "text/plain": [
762 | ""
763 | ]
764 | },
765 | "metadata": {},
766 | "output_type": "display_data"
767 | }
768 | ],
769 | "source": [
770 | "best_model = compare_models()"
771 | ]
772 | },
773 | {
774 | "cell_type": "markdown",
775 | "metadata": {},
776 | "source": [
777 | "# 4. Test Model"
778 | ]
779 | },
780 | {
781 | "cell_type": "code",
782 | "execution_count": 13,
783 | "metadata": {},
784 | "outputs": [
785 | {
786 | "data": {
787 | "text/html": [
788 | "\n",
789 | "\n",
802 | "
\n",
803 | " \n",
804 | " \n",
805 | " | \n",
806 | " age | \n",
807 | " sex | \n",
808 | " cp | \n",
809 | " trestbps | \n",
810 | " chol | \n",
811 | " fbs | \n",
812 | " restecg | \n",
813 | " thalach | \n",
814 | " exang | \n",
815 | " oldpeak | \n",
816 | " slope | \n",
817 | " ca | \n",
818 | " thal | \n",
819 | " target | \n",
820 | " Label | \n",
821 | "
\n",
822 | " \n",
823 | " \n",
824 | " \n",
825 | " 298 | \n",
826 | " 57 | \n",
827 | " 0 | \n",
828 | " 0 | \n",
829 | " 140 | \n",
830 | " 241 | \n",
831 | " 0 | \n",
832 | " 1 | \n",
833 | " 123 | \n",
834 | " 1 | \n",
835 | " 0.2 | \n",
836 | " 1 | \n",
837 | " 0 | \n",
838 | " 3 | \n",
839 | " 0 | \n",
840 | " 0 | \n",
841 | "
\n",
842 | " \n",
843 | " 299 | \n",
844 | " 45 | \n",
845 | " 1 | \n",
846 | " 3 | \n",
847 | " 110 | \n",
848 | " 264 | \n",
849 | " 0 | \n",
850 | " 1 | \n",
851 | " 132 | \n",
852 | " 0 | \n",
853 | " 1.2 | \n",
854 | " 1 | \n",
855 | " 0 | \n",
856 | " 3 | \n",
857 | " 0 | \n",
858 | " 1 | \n",
859 | "
\n",
860 | " \n",
861 | " 300 | \n",
862 | " 68 | \n",
863 | " 1 | \n",
864 | " 0 | \n",
865 | " 144 | \n",
866 | " 193 | \n",
867 | " 1 | \n",
868 | " 1 | \n",
869 | " 141 | \n",
870 | " 0 | \n",
871 | " 3.4 | \n",
872 | " 1 | \n",
873 | " 2 | \n",
874 | " 3 | \n",
875 | " 0 | \n",
876 | " 0 | \n",
877 | "
\n",
878 | " \n",
879 | " 301 | \n",
880 | " 57 | \n",
881 | " 1 | \n",
882 | " 0 | \n",
883 | " 130 | \n",
884 | " 131 | \n",
885 | " 0 | \n",
886 | " 1 | \n",
887 | " 115 | \n",
888 | " 1 | \n",
889 | " 1.2 | \n",
890 | " 1 | \n",
891 | " 1 | \n",
892 | " 3 | \n",
893 | " 0 | \n",
894 | " 0 | \n",
895 | "
\n",
896 | " \n",
897 | " 302 | \n",
898 | " 57 | \n",
899 | " 0 | \n",
900 | " 1 | \n",
901 | " 130 | \n",
902 | " 236 | \n",
903 | " 0 | \n",
904 | " 0 | \n",
905 | " 174 | \n",
906 | " 0 | \n",
907 | " 0.0 | \n",
908 | " 1 | \n",
909 | " 1 | \n",
910 | " 2 | \n",
911 | " 0 | \n",
912 | " 1 | \n",
913 | "
\n",
914 | " \n",
915 | "
\n",
916 | "
"
917 | ],
918 | "text/plain": [
919 | " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
920 | "298 57 0 0 140 241 0 1 123 1 0.2 \n",
921 | "299 45 1 3 110 264 0 1 132 0 1.2 \n",
922 | "300 68 1 0 144 193 1 1 141 0 3.4 \n",
923 | "301 57 1 0 130 131 0 1 115 1 1.2 \n",
924 | "302 57 0 1 130 236 0 0 174 0 0.0 \n",
925 | "\n",
926 | " slope ca thal target Label \n",
927 | "298 1 0 3 0 0 \n",
928 | "299 1 0 3 0 1 \n",
929 | "300 1 2 3 0 0 \n",
930 | "301 1 1 3 0 0 \n",
931 | "302 1 1 2 0 1 "
932 | ]
933 | },
934 | "execution_count": 13,
935 | "metadata": {},
936 | "output_type": "execute_result"
937 | }
938 | ],
939 | "source": [
940 | "predict_model(best_model, df.tail())"
941 | ]
942 | },
943 | {
944 | "cell_type": "markdown",
945 | "metadata": {},
946 | "source": [
947 | "# 5. Save Model"
948 | ]
949 | },
950 | {
951 | "cell_type": "code",
952 | "execution_count": 16,
953 | "metadata": {
954 | "collapsed": true
955 | },
956 | "outputs": [
957 | {
958 | "name": "stdout",
959 | "output_type": "stream",
960 | "text": [
961 | "Transformation Pipeline and Model Succesfully Saved\n"
962 | ]
963 | },
964 | {
965 | "data": {
966 | "text/plain": [
967 | "(Pipeline(memory=None,\n",
968 | " steps=[('dtypes',\n",
969 | " DataTypes_Auto_infer(categorical_features=['sex', 'cp', 'fbs',\n",
970 | " 'restecg', 'exang',\n",
971 | " 'thal'],\n",
972 | " display_types=True, features_todrop=[],\n",
973 | " id_columns=[],\n",
974 | " ml_usecase='classification',\n",
975 | " numerical_features=[], target='target',\n",
976 | " time_features=[])),\n",
977 | " ('imputer',\n",
978 | " Simple_Imputer(categorical_strategy='not_available',\n",
979 | " fill_value_categorical=Non...\n",
980 | " ('fix_perfect', Remove_100(target='target')),\n",
981 | " ('clean_names', Clean_Colum_Names()),\n",
982 | " ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),\n",
983 | " ('dfs', 'passthrough'), ('pca', 'passthrough'),\n",
984 | " ['trained_model',\n",
985 | " RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True,\n",
986 | " fit_intercept=True, max_iter=None,\n",
987 | " normalize=False, random_state=8760,\n",
988 | " solver='auto', tol=0.001)]],\n",
989 | " verbose=False),\n",
990 | " 'ridge-model.pkl')"
991 | ]
992 | },
993 | "execution_count": 16,
994 | "metadata": {},
995 | "output_type": "execute_result"
996 | }
997 | ],
998 | "source": [
999 | "save_model(best_model, model_name='ridge-model')"
1000 | ]
1001 | },
1002 | {
1003 | "cell_type": "code",
1004 | "execution_count": 18,
1005 | "metadata": {},
1006 | "outputs": [
1007 | {
1008 | "name": "stdout",
1009 | "output_type": "stream",
1010 | "text": [
1011 | "Transformation Pipeline and Model Successfully Loaded\n"
1012 | ]
1013 | }
1014 | ],
1015 | "source": [
1016 | "model = load_model('ridge-model')"
1017 | ]
1018 | },
1019 | {
1020 | "cell_type": "code",
1021 | "execution_count": 19,
1022 | "metadata": {},
1023 | "outputs": [
1024 | {
1025 | "data": {
1026 | "text/plain": [
1027 | "array([0, 1, 0, 0, 1])"
1028 | ]
1029 | },
1030 | "execution_count": 19,
1031 | "metadata": {},
1032 | "output_type": "execute_result"
1033 | }
1034 | ],
1035 | "source": [
1036 | "model.predict(df.tail())"
1037 | ]
1038 | }
1039 | ],
1040 | "metadata": {
1041 | "kernelspec": {
1042 | "display_name": "pycaret",
1043 | "language": "python",
1044 | "name": "pycaret"
1045 | },
1046 | "language_info": {
1047 | "codemirror_mode": {
1048 | "name": "ipython",
1049 | "version": 3
1050 | },
1051 | "file_extension": ".py",
1052 | "mimetype": "text/x-python",
1053 | "name": "python",
1054 | "nbconvert_exporter": "python",
1055 | "pygments_lexer": "ipython3",
1056 | "version": "3.7.3"
1057 | }
1058 | },
1059 | "nbformat": 4,
1060 | "nbformat_minor": 2
1061 | }
1062 |
--------------------------------------------------------------------------------