├── .gitignore
├── LICENSE
├── README.md
├── figs
    ├── Danmini_Doorbell-heatmap.png
    ├── Ecobee_Thermostat-heatmap.png
    ├── Ennio_Doorbell-heatmap.png
    ├── Philips_B120N10_Baby_Monitor-heatmap.png
    ├── Provision_PT_737E_Security_Camera-heatmap.png
    ├── Provision_PT_838_Security_Camera-heatmap.png
    ├── Samsung_SNH_1011_N_Webcam-heatmap.png
    ├── SimpleHome_XCS7_1002_WHT_Security_Camera-heatmap.png
    └── SimpleHome_XCS7_1003_WHT_Security_Camera-heatmap.png
├── models
    ├── Danmini_Doorbell
    │   ├── Danmini_Doorbell_without_scaling_unbalanced_model.pkl
    │   └── report.txt
    └── generic_without_scaling_unbalanced_model.pkl
├── reports
    ├── Botnet-detection-on-IoT-devices.pdf
    ├── BotnetDetection-IoTDevices-Presentation.pdf
    ├── benign_profile.html
    ├── gafgyt_profile.html
    ├── mirai_profile.html
    ├── model_training_results.docx
    ├── pycaret-model.ipynb
    └── s3-preprocessing&training.html
├── s1-data-wrangling.ipynb
├── s2-eda.ipynb
├── s3-preprocessing&training.ipynb
├── s4-modeling.ipynb
└── scripts
    ├── models.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Dineshkumar Sundaram 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Botnet deection on IoT Devices
 2 | ![IoT Botnet](https://i.imgur.com/xsRFm2I.png)
 3 | ### Introduction:
 4 | Internet of Things (IoT) devices are widely used in modern homes and every part of our lives, because they are not that sophisticated, it becomes an easy target for Denial of service attack. IoT devices can be used as bots to launch a distributed DOS attack.
 5 | 
 6 | The rapid growth of IoT devices which can be more easily compromised than desktop computers has led to an increase in the occurrences of IoT based botnet attacks. Botnet attack is a type of DDOS attack, where the attacker uses a large number of IoT devices to participate in the DOS to overwhelm a specific target. THis type of attack is hard to detect, since the device keeps functioning normally, and the user or the owner of the device will not notice if his device is a part of an attack, in some cases the device may suffer from delay of its functionality.
 7 | 
 8 | Botnets such as Mirai are typically constructed in several distinct operational steps
 9 | - propagation
10 | - infection
11 | - C&C communication
12 | - execution of attacks.
13 | 
14 | 
15 | 
16 | ### Dataset:
17 | [Download](https://archive.ics.uci.edu/ml/datasets/detection_of_IoT_botnet_attacks_N_BaIoT)
18 | The N-BaIoT dataset was collected from a real network traffic of nine IoT devices. The data contains both benign and attack traffic. The dataset is separated where each device has its files, each file contains a type of traffic such as normal traffic or attacks. There are ten classes of attacks that were generated using two families of botnet attack codes from the github (Mirai, Bashlite). N-BaIoT dataset has 115 features, all of these features are statistical analysis, which is extracted from the packet traffic for various periods.
19 | 
20 | The dataset contains the following nine device normal & attack traffic.
21 | - Danmini - Doorbell
22 | - Ennio  - Doorbell
23 | - Ecobee - Thermostat
24 | - Philips B120N/10 - Baby Monitor
25 | - Provision PT-737E - Security Camera
26 | - Provision PT-838 - Security Camera
27 | - Simple Home XCS7-1002-WHT - Security Camera
28 | - Simple Home XCS7-1003-WHT - Security Camera
29 | - Samsung SNH 1011 N - Web cam
30 | 
31 | #### Feature information:
32 | ##### Stream aggregation:
33 | - H: ("Source IP" in N-BaIoT paper) Stats summarizing the recent traffic from this packet's host (IP)
34 | - MI: ("Source MAC-IP" in N-BaIoT paper) Stats summarizing the recent traffic from this packet's host (IP + MAC)
35 | - HH: ("Channel" in N-BaIoT paper) Stats summarizing the recent traffic going from this packet's host (IP) to the packet's destination host.
36 | - HH_jit: ("Channel jitter" in N-BaIoT paper) Stats summarizing the jitter of the traffic going from this packet's host (IP) to the packet's destination host.
37 | - HpHp: ("Socket" in N-BaIoT paper) Stats summarizing the recent traffic going from this packet's host+port (IP) to the packet's destination host+port. Example 192.168.4.2:1242 -> 192.168.4.12:80
38 | 
39 | - Time-frame (The decay factor Lambda used in the damped window):
40 |     - How much recent history of the stream is capture in these statistics
41 |     - L5, L3, L1, L0.1 and L0.01
42 | 
43 | - The statistics extracted from the packet stream:
44 |    - weight: The weight of the stream (can be viewed as the number of items observed in recent history)
45 |    - mean: …
46 |    - std: …
47 |    - radius: The root squared sum of the two streams' variances
48 |    - magnitude: The root squared sum of the two streams' means
49 |    - cov: An approximated covariance between two streams
50 |    - pcc: An approximated correlation coefficient between two streams
51 |    
52 | ### EDA
53 | 
54 | | Device | Chart |
55 | | --- | --- |
56 | | Ennio Door bell | ![Door bell](https://i.imgur.com/d17INis.png)
57 | | Danmin Door bell | ![Door bell 2](https://i.imgur.com/h7dk8RY.png)
58 | | Ecobee Thermostat | ![Thermo stat](https://i.imgur.com/cnF53Fs.png)
59 | | Ennio Door bell | ![Door bell](https://i.imgur.com/gssflL9.png)
60 | | Danmin Door bell | ![Door bell 2](https://i.imgur.com/KXThLsS.png)
61 | | Ecobee Thermostat | ![Thermo stat](https://i.imgur.com/JbCMTsc.png)
62 | | Ennio Door bell | ![Door bell](https://i.imgur.com/Htgp7hq.png)
63 | | Danmin Door bell | ![Door bell 2](https://i.imgur.com/C1J98PJ.png)
64 | | Ecobee Thermostat | ![Thermo stat](https://i.imgur.com/Sxpbefe.png)
65 | 
66 | ### Pre processing & Training
67 | 
68 | ### Modeling
69 | 
70 | ### Deploymnet
71 | 
72 | ### Future works
73 | 
74 | ### Credits & Links
75 | 
76 | 


--------------------------------------------------------------------------------
/figs/Danmini_Doorbell-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Danmini_Doorbell-heatmap.png


--------------------------------------------------------------------------------
/figs/Ecobee_Thermostat-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Ecobee_Thermostat-heatmap.png


--------------------------------------------------------------------------------
/figs/Ennio_Doorbell-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Ennio_Doorbell-heatmap.png


--------------------------------------------------------------------------------
/figs/Philips_B120N10_Baby_Monitor-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Philips_B120N10_Baby_Monitor-heatmap.png


--------------------------------------------------------------------------------
/figs/Provision_PT_737E_Security_Camera-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Provision_PT_737E_Security_Camera-heatmap.png


--------------------------------------------------------------------------------
/figs/Provision_PT_838_Security_Camera-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Provision_PT_838_Security_Camera-heatmap.png


--------------------------------------------------------------------------------
/figs/Samsung_SNH_1011_N_Webcam-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/Samsung_SNH_1011_N_Webcam-heatmap.png


--------------------------------------------------------------------------------
/figs/SimpleHome_XCS7_1002_WHT_Security_Camera-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/SimpleHome_XCS7_1002_WHT_Security_Camera-heatmap.png


--------------------------------------------------------------------------------
/figs/SimpleHome_XCS7_1003_WHT_Security_Camera-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/figs/SimpleHome_XCS7_1003_WHT_Security_Camera-heatmap.png


--------------------------------------------------------------------------------
/models/Danmini_Doorbell/Danmini_Doorbell_without_scaling_unbalanced_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/models/Danmini_Doorbell/Danmini_Doorbell_without_scaling_unbalanced_model.pkl


--------------------------------------------------------------------------------
/models/Danmini_Doorbell/report.txt:
--------------------------------------------------------------------------------
 1 | Classification Report on Test Set 
 2 |                     
 3 |  
 4 |                precision    recall  f1-score   support
 5 | 
 6 |       benign       1.00      1.00      1.00     10449
 7 |       gafgyt       1.00      1.00      1.00     66325
 8 |        mirai       1.00      1.00      1.00    137069
 9 | 
10 |     accuracy                           1.00    213843
11 |    macro avg       1.00      1.00      1.00    213843
12 | weighted avg       1.00      1.00      1.00    213843
13 | 
14 |  
15 |  
16 |                 Confusion Matrix on Test Set
17 |                     
18 |  
19 |  [[ 10447      2      0]
20 |  [     1  66324      0]
21 |  [     0      0 137069]] 
22 |  
23 | 


--------------------------------------------------------------------------------
/models/generic_without_scaling_unbalanced_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/models/generic_without_scaling_unbalanced_model.pkl


--------------------------------------------------------------------------------
/reports/Botnet-detection-on-IoT-devices.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/reports/Botnet-detection-on-IoT-devices.pdf


--------------------------------------------------------------------------------
/reports/BotnetDetection-IoTDevices-Presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/reports/BotnetDetection-IoTDevices-Presentation.pdf


--------------------------------------------------------------------------------
/reports/model_training_results.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dineshh912/IoT-botnet-attack-detection/93b88f6ff52b6e09324eaf8a0199ab04dadc998d/reports/model_training_results.docx


--------------------------------------------------------------------------------
/reports/pycaret-model.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "# Importing necssary modules\n",
  10 |     "import pandas as pd\n",
  11 |     "from datetime import datetime\n",
  12 |     "from scripts.utils import load_data, load_data_multi_label, load_data_all"
  13 |    ]
  14 |   },
  15 |   {
  16 |    "cell_type": "code",
  17 |    "execution_count": 2,
  18 |    "metadata": {},
  19 |    "outputs": [],
  20 |    "source": [
  21 |     "# Data folder path and Extention of the data files\n",
  22 |     "base_directory = '../rawdata'\n",
  23 |     "file_extension = \"*.csv\""
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "code",
  28 |    "execution_count": 3,
  29 |    "metadata": {},
  30 |    "outputs": [],
  31 |    "source": [
  32 |     "danmini_doorbell_df = load_data(base_directory, file_extension, 'Danmini_Doorbell')"
  33 |    ]
  34 |   },
  35 |   {
  36 |    "cell_type": "code",
  37 |    "execution_count": 4,
  38 |    "metadata": {},
  39 |    "outputs": [
  40 |     {
  41 |      "data": {
  42 |       "text/plain": [
  43 |        "(1018298, 117)"
  44 |       ]
  45 |      },
  46 |      "execution_count": 4,
  47 |      "metadata": {},
  48 |      "output_type": "execute_result"
  49 |     }
  50 |    ],
  51 |    "source": [
  52 |     "danmini_doorbell_df.shape"
  53 |    ]
  54 |   },
  55 |   {
  56 |    "cell_type": "code",
  57 |    "execution_count": 6,
  58 |    "metadata": {},
  59 |    "outputs": [],
  60 |    "source": [
  61 |     "df = danmini_doorbell_df.sample(frac=0.3)"
  62 |    ]
  63 |   },
  64 |   {
  65 |    "cell_type": "code",
  66 |    "execution_count": 7,
  67 |    "metadata": {},
  68 |    "outputs": [
  69 |     {
  70 |      "data": {
  71 |       "text/plain": [
  72 |        "(305489, 117)"
  73 |       ]
  74 |      },
  75 |      "execution_count": 7,
  76 |      "metadata": {},
  77 |      "output_type": "execute_result"
  78 |     }
  79 |    ],
  80 |    "source": [
  81 |     "df.shape"
  82 |    ]
  83 |   },
  84 |   {
  85 |    "cell_type": "code",
  86 |    "execution_count": 8,
  87 |    "metadata": {},
  88 |    "outputs": [
  89 |     {
  90 |      "data": {
  91 |       "text/plain": [
  92 |        "'2.1.2'"
  93 |       ]
  94 |      },
  95 |      "execution_count": 8,
  96 |      "metadata": {},
  97 |      "output_type": "execute_result"
  98 |     }
  99 |    ],
 100 |    "source": [
 101 |     "# check version\n",
 102 |     "from pycaret.utils import version\n",
 103 |     "version()"
 104 |    ]
 105 |   },
 106 |   {
 107 |    "cell_type": "code",
 108 |    "execution_count": 9,
 109 |    "metadata": {},
 110 |    "outputs": [],
 111 |    "source": [
 112 |     "from pycaret.classification import *"
 113 |    ]
 114 |   },
 115 |   {
 116 |    "cell_type": "code",
 117 |    "execution_count": 10,
 118 |    "metadata": {},
 119 |    "outputs": [
 120 |     {
 121 |      "name": "stdout",
 122 |      "output_type": "stream",
 123 |      "text": [
 124 |       "Setup Succesfully Completed!\n"
 125 |      ]
 126 |     },
 127 |     {
 128 |      "data": {
 129 |       "text/html": [
 130 |        "<style  type=\"text/css\" >\n",
 131 |        "</style><table id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >Description</th>        <th class=\"col_heading level0 col1\" >Value</th>    </tr></thead><tbody>\n",
 132 |        "                <tr>\n",
 133 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
 134 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row0_col0\" class=\"data row0 col0\" >session_id</td>\n",
 135 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row0_col1\" class=\"data row0 col1\" >123</td>\n",
 136 |        "            </tr>\n",
 137 |        "            <tr>\n",
 138 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
 139 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row1_col0\" class=\"data row1 col0\" >Target Type</td>\n",
 140 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row1_col1\" class=\"data row1 col1\" >Multiclass</td>\n",
 141 |        "            </tr>\n",
 142 |        "            <tr>\n",
 143 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
 144 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row2_col0\" class=\"data row2 col0\" >Label Encoded</td>\n",
 145 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row2_col1\" class=\"data row2 col1\" >benign: 0, gafgyt: 1, mirai: 2</td>\n",
 146 |        "            </tr>\n",
 147 |        "            <tr>\n",
 148 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
 149 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row3_col0\" class=\"data row3 col0\" >Original Data</td>\n",
 150 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row3_col1\" class=\"data row3 col1\" >(305489, 117)</td>\n",
 151 |        "            </tr>\n",
 152 |        "            <tr>\n",
 153 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
 154 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row4_col0\" class=\"data row4 col0\" >Missing Values </td>\n",
 155 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row4_col1\" class=\"data row4 col1\" >False</td>\n",
 156 |        "            </tr>\n",
 157 |        "            <tr>\n",
 158 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
 159 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row5_col0\" class=\"data row5 col0\" >Numeric Features </td>\n",
 160 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row5_col1\" class=\"data row5 col1\" >115</td>\n",
 161 |        "            </tr>\n",
 162 |        "            <tr>\n",
 163 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
 164 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row6_col0\" class=\"data row6 col0\" >Categorical Features </td>\n",
 165 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row6_col1\" class=\"data row6 col1\" >1</td>\n",
 166 |        "            </tr>\n",
 167 |        "            <tr>\n",
 168 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
 169 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row7_col0\" class=\"data row7 col0\" >Ordinal Features </td>\n",
 170 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row7_col1\" class=\"data row7 col1\" >False</td>\n",
 171 |        "            </tr>\n",
 172 |        "            <tr>\n",
 173 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
 174 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row8_col0\" class=\"data row8 col0\" >High Cardinality Features </td>\n",
 175 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row8_col1\" class=\"data row8 col1\" >False</td>\n",
 176 |        "            </tr>\n",
 177 |        "            <tr>\n",
 178 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
 179 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row9_col0\" class=\"data row9 col0\" >High Cardinality Method </td>\n",
 180 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row9_col1\" class=\"data row9 col1\" >None</td>\n",
 181 |        "            </tr>\n",
 182 |        "            <tr>\n",
 183 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
 184 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row10_col0\" class=\"data row10 col0\" >Sampled Data</td>\n",
 185 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row10_col1\" class=\"data row10 col1\" >(305489, 117)</td>\n",
 186 |        "            </tr>\n",
 187 |        "            <tr>\n",
 188 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
 189 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row11_col0\" class=\"data row11 col0\" >Transformed Train Set</td>\n",
 190 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row11_col1\" class=\"data row11 col1\" >(213842, 116)</td>\n",
 191 |        "            </tr>\n",
 192 |        "            <tr>\n",
 193 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row12\" class=\"row_heading level0 row12\" >12</th>\n",
 194 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row12_col0\" class=\"data row12 col0\" >Transformed Test Set</td>\n",
 195 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row12_col1\" class=\"data row12 col1\" >(91647, 116)</td>\n",
 196 |        "            </tr>\n",
 197 |        "            <tr>\n",
 198 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row13\" class=\"row_heading level0 row13\" >13</th>\n",
 199 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row13_col0\" class=\"data row13 col0\" >Numeric Imputer </td>\n",
 200 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row13_col1\" class=\"data row13 col1\" >mean</td>\n",
 201 |        "            </tr>\n",
 202 |        "            <tr>\n",
 203 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row14\" class=\"row_heading level0 row14\" >14</th>\n",
 204 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row14_col0\" class=\"data row14 col0\" >Categorical Imputer </td>\n",
 205 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row14_col1\" class=\"data row14 col1\" >constant</td>\n",
 206 |        "            </tr>\n",
 207 |        "            <tr>\n",
 208 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row15\" class=\"row_heading level0 row15\" >15</th>\n",
 209 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row15_col0\" class=\"data row15 col0\" >Normalize </td>\n",
 210 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row15_col1\" class=\"data row15 col1\" >False</td>\n",
 211 |        "            </tr>\n",
 212 |        "            <tr>\n",
 213 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row16\" class=\"row_heading level0 row16\" >16</th>\n",
 214 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row16_col0\" class=\"data row16 col0\" >Normalize Method </td>\n",
 215 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row16_col1\" class=\"data row16 col1\" >None</td>\n",
 216 |        "            </tr>\n",
 217 |        "            <tr>\n",
 218 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row17\" class=\"row_heading level0 row17\" >17</th>\n",
 219 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row17_col0\" class=\"data row17 col0\" >Transformation </td>\n",
 220 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row17_col1\" class=\"data row17 col1\" >False</td>\n",
 221 |        "            </tr>\n",
 222 |        "            <tr>\n",
 223 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row18\" class=\"row_heading level0 row18\" >18</th>\n",
 224 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row18_col0\" class=\"data row18 col0\" >Transformation Method </td>\n",
 225 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row18_col1\" class=\"data row18 col1\" >None</td>\n",
 226 |        "            </tr>\n",
 227 |        "            <tr>\n",
 228 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row19\" class=\"row_heading level0 row19\" >19</th>\n",
 229 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row19_col0\" class=\"data row19 col0\" >PCA </td>\n",
 230 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row19_col1\" class=\"data row19 col1\" >False</td>\n",
 231 |        "            </tr>\n",
 232 |        "            <tr>\n",
 233 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row20\" class=\"row_heading level0 row20\" >20</th>\n",
 234 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row20_col0\" class=\"data row20 col0\" >PCA Method </td>\n",
 235 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row20_col1\" class=\"data row20 col1\" >None</td>\n",
 236 |        "            </tr>\n",
 237 |        "            <tr>\n",
 238 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row21\" class=\"row_heading level0 row21\" >21</th>\n",
 239 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row21_col0\" class=\"data row21 col0\" >PCA Components </td>\n",
 240 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row21_col1\" class=\"data row21 col1\" >None</td>\n",
 241 |        "            </tr>\n",
 242 |        "            <tr>\n",
 243 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row22\" class=\"row_heading level0 row22\" >22</th>\n",
 244 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row22_col0\" class=\"data row22 col0\" >Ignore Low Variance </td>\n",
 245 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row22_col1\" class=\"data row22 col1\" >False</td>\n",
 246 |        "            </tr>\n",
 247 |        "            <tr>\n",
 248 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row23\" class=\"row_heading level0 row23\" >23</th>\n",
 249 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row23_col0\" class=\"data row23 col0\" >Combine Rare Levels </td>\n",
 250 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row23_col1\" class=\"data row23 col1\" >False</td>\n",
 251 |        "            </tr>\n",
 252 |        "            <tr>\n",
 253 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row24\" class=\"row_heading level0 row24\" >24</th>\n",
 254 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row24_col0\" class=\"data row24 col0\" >Rare Level Threshold </td>\n",
 255 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row24_col1\" class=\"data row24 col1\" >None</td>\n",
 256 |        "            </tr>\n",
 257 |        "            <tr>\n",
 258 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row25\" class=\"row_heading level0 row25\" >25</th>\n",
 259 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row25_col0\" class=\"data row25 col0\" >Numeric Binning </td>\n",
 260 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row25_col1\" class=\"data row25 col1\" >False</td>\n",
 261 |        "            </tr>\n",
 262 |        "            <tr>\n",
 263 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row26\" class=\"row_heading level0 row26\" >26</th>\n",
 264 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row26_col0\" class=\"data row26 col0\" >Remove Outliers </td>\n",
 265 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row26_col1\" class=\"data row26 col1\" >False</td>\n",
 266 |        "            </tr>\n",
 267 |        "            <tr>\n",
 268 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row27\" class=\"row_heading level0 row27\" >27</th>\n",
 269 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row27_col0\" class=\"data row27 col0\" >Outliers Threshold </td>\n",
 270 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row27_col1\" class=\"data row27 col1\" >None</td>\n",
 271 |        "            </tr>\n",
 272 |        "            <tr>\n",
 273 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row28\" class=\"row_heading level0 row28\" >28</th>\n",
 274 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row28_col0\" class=\"data row28 col0\" >Remove Multicollinearity </td>\n",
 275 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row28_col1\" class=\"data row28 col1\" >False</td>\n",
 276 |        "            </tr>\n",
 277 |        "            <tr>\n",
 278 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row29\" class=\"row_heading level0 row29\" >29</th>\n",
 279 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row29_col0\" class=\"data row29 col0\" >Multicollinearity Threshold </td>\n",
 280 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row29_col1\" class=\"data row29 col1\" >None</td>\n",
 281 |        "            </tr>\n",
 282 |        "            <tr>\n",
 283 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row30\" class=\"row_heading level0 row30\" >30</th>\n",
 284 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row30_col0\" class=\"data row30 col0\" >Clustering </td>\n",
 285 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row30_col1\" class=\"data row30 col1\" >False</td>\n",
 286 |        "            </tr>\n",
 287 |        "            <tr>\n",
 288 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row31\" class=\"row_heading level0 row31\" >31</th>\n",
 289 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row31_col0\" class=\"data row31 col0\" >Clustering Iteration </td>\n",
 290 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row31_col1\" class=\"data row31 col1\" >None</td>\n",
 291 |        "            </tr>\n",
 292 |        "            <tr>\n",
 293 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row32\" class=\"row_heading level0 row32\" >32</th>\n",
 294 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row32_col0\" class=\"data row32 col0\" >Polynomial Features </td>\n",
 295 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row32_col1\" class=\"data row32 col1\" >False</td>\n",
 296 |        "            </tr>\n",
 297 |        "            <tr>\n",
 298 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row33\" class=\"row_heading level0 row33\" >33</th>\n",
 299 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row33_col0\" class=\"data row33 col0\" >Polynomial Degree </td>\n",
 300 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row33_col1\" class=\"data row33 col1\" >None</td>\n",
 301 |        "            </tr>\n",
 302 |        "            <tr>\n",
 303 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row34\" class=\"row_heading level0 row34\" >34</th>\n",
 304 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row34_col0\" class=\"data row34 col0\" >Trignometry Features </td>\n",
 305 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row34_col1\" class=\"data row34 col1\" >False</td>\n",
 306 |        "            </tr>\n",
 307 |        "            <tr>\n",
 308 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row35\" class=\"row_heading level0 row35\" >35</th>\n",
 309 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row35_col0\" class=\"data row35 col0\" >Polynomial Threshold </td>\n",
 310 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row35_col1\" class=\"data row35 col1\" >None</td>\n",
 311 |        "            </tr>\n",
 312 |        "            <tr>\n",
 313 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row36\" class=\"row_heading level0 row36\" >36</th>\n",
 314 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row36_col0\" class=\"data row36 col0\" >Group Features </td>\n",
 315 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row36_col1\" class=\"data row36 col1\" >False</td>\n",
 316 |        "            </tr>\n",
 317 |        "            <tr>\n",
 318 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row37\" class=\"row_heading level0 row37\" >37</th>\n",
 319 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row37_col0\" class=\"data row37 col0\" >Feature Selection </td>\n",
 320 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row37_col1\" class=\"data row37 col1\" >False</td>\n",
 321 |        "            </tr>\n",
 322 |        "            <tr>\n",
 323 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row38\" class=\"row_heading level0 row38\" >38</th>\n",
 324 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row38_col0\" class=\"data row38 col0\" >Features Selection Threshold </td>\n",
 325 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row38_col1\" class=\"data row38 col1\" >None</td>\n",
 326 |        "            </tr>\n",
 327 |        "            <tr>\n",
 328 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row39\" class=\"row_heading level0 row39\" >39</th>\n",
 329 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row39_col0\" class=\"data row39 col0\" >Feature Interaction </td>\n",
 330 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row39_col1\" class=\"data row39 col1\" >False</td>\n",
 331 |        "            </tr>\n",
 332 |        "            <tr>\n",
 333 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row40\" class=\"row_heading level0 row40\" >40</th>\n",
 334 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row40_col0\" class=\"data row40 col0\" >Feature Ratio </td>\n",
 335 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row40_col1\" class=\"data row40 col1\" >False</td>\n",
 336 |        "            </tr>\n",
 337 |        "            <tr>\n",
 338 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row41\" class=\"row_heading level0 row41\" >41</th>\n",
 339 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row41_col0\" class=\"data row41 col0\" >Interaction Threshold </td>\n",
 340 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row41_col1\" class=\"data row41 col1\" >None</td>\n",
 341 |        "            </tr>\n",
 342 |        "            <tr>\n",
 343 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row42\" class=\"row_heading level0 row42\" >42</th>\n",
 344 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row42_col0\" class=\"data row42 col0\" >Fix Imbalance</td>\n",
 345 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row42_col1\" class=\"data row42 col1\" >False</td>\n",
 346 |        "            </tr>\n",
 347 |        "            <tr>\n",
 348 |        "                        <th id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6level0_row43\" class=\"row_heading level0 row43\" >43</th>\n",
 349 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row43_col0\" class=\"data row43 col0\" >Fix Imbalance Method</td>\n",
 350 |        "                        <td id=\"T_c6c103fa_0dd7_11eb_8aa6_0897987917c6row43_col1\" class=\"data row43 col1\" >SMOTE</td>\n",
 351 |        "            </tr>\n",
 352 |        "    </tbody></table>"
 353 |       ],
 354 |       "text/plain": [
 355 |        "<pandas.io.formats.style.Styler at 0x194af0498b0>"
 356 |       ]
 357 |      },
 358 |      "metadata": {},
 359 |      "output_type": "display_data"
 360 |     }
 361 |    ],
 362 |    "source": [
 363 |     "clf1 = setup(df, target = 'label', session_id=123, experiment_name='doorbell-1')"
 364 |    ]
 365 |   },
 366 |   {
 367 |    "cell_type": "code",
 368 |    "execution_count": 11,
 369 |    "metadata": {},
 370 |    "outputs": [
 371 |     {
 372 |      "data": {
 373 |       "application/vnd.jupyter.widget-view+json": {
 374 |        "model_id": "acf36808e95b4f9180d69eb1f167ad83",
 375 |        "version_major": 2,
 376 |        "version_minor": 0
 377 |       },
 378 |       "text/plain": [
 379 |        "IntProgress(value=0, description='Processing: ', max=176)"
 380 |       ]
 381 |      },
 382 |      "metadata": {},
 383 |      "output_type": "display_data"
 384 |     },
 385 |     {
 386 |      "data": {
 387 |       "text/html": [
 388 |        "<div>\n",
 389 |        "<style scoped>\n",
 390 |        "    .dataframe tbody tr th:only-of-type {\n",
 391 |        "        vertical-align: middle;\n",
 392 |        "    }\n",
 393 |        "\n",
 394 |        "    .dataframe tbody tr th {\n",
 395 |        "        vertical-align: top;\n",
 396 |        "    }\n",
 397 |        "\n",
 398 |        "    .dataframe thead th {\n",
 399 |        "        text-align: right;\n",
 400 |        "    }\n",
 401 |        "</style>\n",
 402 |        "<table border=\"1\" class=\"dataframe\">\n",
 403 |        "  <thead>\n",
 404 |        "    <tr style=\"text-align: right;\">\n",
 405 |        "      <th></th>\n",
 406 |        "      <th></th>\n",
 407 |        "      <th></th>\n",
 408 |        "    </tr>\n",
 409 |        "    <tr>\n",
 410 |        "      <th></th>\n",
 411 |        "      <th></th>\n",
 412 |        "      <th></th>\n",
 413 |        "    </tr>\n",
 414 |        "  </thead>\n",
 415 |        "  <tbody>\n",
 416 |        "    <tr>\n",
 417 |        "      <th>Initiated</th>\n",
 418 |        "      <td>. . . . . . . . . . . . . . . . . .</td>\n",
 419 |        "      <td>11:21:37</td>\n",
 420 |        "    </tr>\n",
 421 |        "    <tr>\n",
 422 |        "      <th>Status</th>\n",
 423 |        "      <td>. . . . . . . . . . . . . . . . . .</td>\n",
 424 |        "      <td>Finalizing Model</td>\n",
 425 |        "    </tr>\n",
 426 |        "    <tr>\n",
 427 |        "      <th>ETC</th>\n",
 428 |        "      <td>. . . . . . . . . . . . . . . . . .</td>\n",
 429 |        "      <td>Almost Finished</td>\n",
 430 |        "    </tr>\n",
 431 |        "  </tbody>\n",
 432 |        "</table>\n",
 433 |        "</div>"
 434 |       ],
 435 |       "text/plain": [
 436 |        "                                                                \n",
 437 |        "                                                                \n",
 438 |        "Initiated  . . . . . . . . . . . . . . . . . .          11:21:37\n",
 439 |        "Status     . . . . . . . . . . . . . . . . . .  Finalizing Model\n",
 440 |        "ETC        . . . . . . . . . . . . . . . . . .   Almost Finished"
 441 |       ]
 442 |      },
 443 |      "metadata": {},
 444 |      "output_type": "display_data"
 445 |     },
 446 |     {
 447 |      "data": {
 448 |       "text/html": [
 449 |        "<div>\n",
 450 |        "<style scoped>\n",
 451 |        "    .dataframe tbody tr th:only-of-type {\n",
 452 |        "        vertical-align: middle;\n",
 453 |        "    }\n",
 454 |        "\n",
 455 |        "    .dataframe tbody tr th {\n",
 456 |        "        vertical-align: top;\n",
 457 |        "    }\n",
 458 |        "\n",
 459 |        "    .dataframe thead th {\n",
 460 |        "        text-align: right;\n",
 461 |        "    }\n",
 462 |        "</style>\n",
 463 |        "<table border=\"1\" class=\"dataframe\">\n",
 464 |        "  <thead>\n",
 465 |        "    <tr style=\"text-align: right;\">\n",
 466 |        "      <th></th>\n",
 467 |        "      <th>Model</th>\n",
 468 |        "      <th>Accuracy</th>\n",
 469 |        "      <th>AUC</th>\n",
 470 |        "      <th>Recall</th>\n",
 471 |        "      <th>Prec.</th>\n",
 472 |        "      <th>F1</th>\n",
 473 |        "      <th>Kappa</th>\n",
 474 |        "      <th>MCC</th>\n",
 475 |        "      <th>TT (Sec)</th>\n",
 476 |        "    </tr>\n",
 477 |        "  </thead>\n",
 478 |        "  <tbody>\n",
 479 |        "    <tr>\n",
 480 |        "      <th>0</th>\n",
 481 |        "      <td>Random Forest Classifier</td>\n",
 482 |        "      <td>1.0000</td>\n",
 483 |        "      <td>0.0</td>\n",
 484 |        "      <td>0.9999</td>\n",
 485 |        "      <td>1.0000</td>\n",
 486 |        "      <td>1.0000</td>\n",
 487 |        "      <td>0.9999</td>\n",
 488 |        "      <td>0.9999</td>\n",
 489 |        "      <td>2.8086</td>\n",
 490 |        "    </tr>\n",
 491 |        "    <tr>\n",
 492 |        "      <th>1</th>\n",
 493 |        "      <td>Decision Tree Classifier</td>\n",
 494 |        "      <td>0.9998</td>\n",
 495 |        "      <td>0.0</td>\n",
 496 |        "      <td>0.9997</td>\n",
 497 |        "      <td>0.9998</td>\n",
 498 |        "      <td>0.9998</td>\n",
 499 |        "      <td>0.9997</td>\n",
 500 |        "      <td>0.9997</td>\n",
 501 |        "      <td>21.2064</td>\n",
 502 |        "    </tr>\n",
 503 |        "    <tr>\n",
 504 |        "      <th>2</th>\n",
 505 |        "      <td>K Neighbors Classifier</td>\n",
 506 |        "      <td>0.9980</td>\n",
 507 |        "      <td>0.0</td>\n",
 508 |        "      <td>0.9935</td>\n",
 509 |        "      <td>0.9980</td>\n",
 510 |        "      <td>0.9980</td>\n",
 511 |        "      <td>0.9960</td>\n",
 512 |        "      <td>0.9960</td>\n",
 513 |        "      <td>25.6996</td>\n",
 514 |        "    </tr>\n",
 515 |        "    <tr>\n",
 516 |        "      <th>3</th>\n",
 517 |        "      <td>Ridge Classifier</td>\n",
 518 |        "      <td>0.9969</td>\n",
 519 |        "      <td>0.0</td>\n",
 520 |        "      <td>0.9958</td>\n",
 521 |        "      <td>0.9969</td>\n",
 522 |        "      <td>0.9969</td>\n",
 523 |        "      <td>0.9936</td>\n",
 524 |        "      <td>0.9936</td>\n",
 525 |        "      <td>1.2116</td>\n",
 526 |        "    </tr>\n",
 527 |        "    <tr>\n",
 528 |        "      <th>4</th>\n",
 529 |        "      <td>Ada Boost Classifier</td>\n",
 530 |        "      <td>0.9245</td>\n",
 531 |        "      <td>0.0</td>\n",
 532 |        "      <td>0.9202</td>\n",
 533 |        "      <td>0.9340</td>\n",
 534 |        "      <td>0.9216</td>\n",
 535 |        "      <td>0.8392</td>\n",
 536 |        "      <td>0.8522</td>\n",
 537 |        "      <td>144.0179</td>\n",
 538 |        "    </tr>\n",
 539 |        "    <tr>\n",
 540 |        "      <th>5</th>\n",
 541 |        "      <td>Quadratic Discriminant Analysis</td>\n",
 542 |        "      <td>0.6834</td>\n",
 543 |        "      <td>0.0</td>\n",
 544 |        "      <td>0.8271</td>\n",
 545 |        "      <td>0.8491</td>\n",
 546 |        "      <td>0.6724</td>\n",
 547 |        "      <td>0.4799</td>\n",
 548 |        "      <td>0.5712</td>\n",
 549 |        "      <td>5.3659</td>\n",
 550 |        "    </tr>\n",
 551 |        "    <tr>\n",
 552 |        "      <th>6</th>\n",
 553 |        "      <td>Naive Bayes</td>\n",
 554 |        "      <td>0.6585</td>\n",
 555 |        "      <td>0.0</td>\n",
 556 |        "      <td>0.3543</td>\n",
 557 |        "      <td>0.7312</td>\n",
 558 |        "      <td>0.5410</td>\n",
 559 |        "      <td>0.0693</td>\n",
 560 |        "      <td>0.1829</td>\n",
 561 |        "      <td>0.8091</td>\n",
 562 |        "    </tr>\n",
 563 |        "    <tr>\n",
 564 |        "      <th>7</th>\n",
 565 |        "      <td>SVM - Linear Kernel</td>\n",
 566 |        "      <td>0.4204</td>\n",
 567 |        "      <td>0.0</td>\n",
 568 |        "      <td>0.3930</td>\n",
 569 |        "      <td>0.4682</td>\n",
 570 |        "      <td>0.3959</td>\n",
 571 |        "      <td>0.0762</td>\n",
 572 |        "      <td>0.1060</td>\n",
 573 |        "      <td>6.0382</td>\n",
 574 |        "    </tr>\n",
 575 |        "    <tr>\n",
 576 |        "      <th>8</th>\n",
 577 |        "      <td>Logistic Regression</td>\n",
 578 |        "      <td>0.0486</td>\n",
 579 |        "      <td>0.0</td>\n",
 580 |        "      <td>0.3333</td>\n",
 581 |        "      <td>0.0024</td>\n",
 582 |        "      <td>0.0045</td>\n",
 583 |        "      <td>0.0000</td>\n",
 584 |        "      <td>0.0000</td>\n",
 585 |        "      <td>4.2906</td>\n",
 586 |        "    </tr>\n",
 587 |        "  </tbody>\n",
 588 |        "</table>\n",
 589 |        "</div>"
 590 |       ],
 591 |       "text/plain": [
 592 |        "                             Model  Accuracy  AUC  Recall   Prec.      F1  \\\n",
 593 |        "0         Random Forest Classifier    1.0000  0.0  0.9999  1.0000  1.0000   \n",
 594 |        "1         Decision Tree Classifier    0.9998  0.0  0.9997  0.9998  0.9998   \n",
 595 |        "2           K Neighbors Classifier    0.9980  0.0  0.9935  0.9980  0.9980   \n",
 596 |        "3                 Ridge Classifier    0.9969  0.0  0.9958  0.9969  0.9969   \n",
 597 |        "4             Ada Boost Classifier    0.9245  0.0  0.9202  0.9340  0.9216   \n",
 598 |        "5  Quadratic Discriminant Analysis    0.6834  0.0  0.8271  0.8491  0.6724   \n",
 599 |        "6                      Naive Bayes    0.6585  0.0  0.3543  0.7312  0.5410   \n",
 600 |        "7              SVM - Linear Kernel    0.4204  0.0  0.3930  0.4682  0.3959   \n",
 601 |        "8              Logistic Regression    0.0486  0.0  0.3333  0.0024  0.0045   \n",
 602 |        "\n",
 603 |        "    Kappa     MCC  TT (Sec)  \n",
 604 |        "0  0.9999  0.9999    2.8086  \n",
 605 |        "1  0.9997  0.9997   21.2064  \n",
 606 |        "2  0.9960  0.9960   25.6996  \n",
 607 |        "3  0.9936  0.9936    1.2116  \n",
 608 |        "4  0.8392  0.8522  144.0179  \n",
 609 |        "5  0.4799  0.5712    5.3659  \n",
 610 |        "6  0.0693  0.1829    0.8091  \n",
 611 |        "7  0.0762  0.1060    6.0382  \n",
 612 |        "8  0.0000  0.0000    4.2906  "
 613 |       ]
 614 |      },
 615 |      "metadata": {},
 616 |      "output_type": "display_data"
 617 |     },
 618 |     {
 619 |      "ename": "KeyboardInterrupt",
 620 |      "evalue": "",
 621 |      "output_type": "error",
 622 |      "traceback": [
 623 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 624 |       "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
 625 |       "\u001b[1;32m<ipython-input-11-e904bc054f74>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mbest_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcompare_models\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 626 |       "\u001b[1;32me:\\springboard\\venv\\lib\\site-packages\\pycaret\\classification.py\u001b[0m in \u001b[0;36mcompare_models\u001b[1;34m(exclude, include, fold, round, sort, n_select, budget_time, turbo, verbose)\u001b[0m\n\u001b[0;32m   2455\u001b[0m                 \u001b[0mtime_start\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2456\u001b[0m                 \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Fitting Model\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2457\u001b[1;33m                 \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mXtrain\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mytrain\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2458\u001b[0m                 \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Evaluating Metrics\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2459\u001b[0m                 \u001b[0mtime_end\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 627 |       "\u001b[1;32me:\\springboard\\venv\\lib\\site-packages\\sklearn\\ensemble\\_gb.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight, monitor)\u001b[0m\n\u001b[0;32m    496\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    497\u001b[0m         \u001b[1;31m# fit the boosting stages\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 498\u001b[1;33m         n_stages = self._fit_stages(\n\u001b[0m\u001b[0;32m    499\u001b[0m             \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mraw_predictions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_rng\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_val\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_val\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    500\u001b[0m             sample_weight_val, begin_at_stage, monitor, X_idx_sorted)\n",
 628 |       "\u001b[1;32me:\\springboard\\venv\\lib\\site-packages\\sklearn\\ensemble\\_gb.py\u001b[0m in \u001b[0;36m_fit_stages\u001b[1;34m(self, X, y, raw_predictions, sample_weight, random_state, X_val, y_val, sample_weight_val, begin_at_stage, monitor, X_idx_sorted)\u001b[0m\n\u001b[0;32m    553\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    554\u001b[0m             \u001b[1;31m# fit next stage of trees\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 555\u001b[1;33m             raw_predictions = self._fit_stage(\n\u001b[0m\u001b[0;32m    556\u001b[0m                 \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mraw_predictions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_mask\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    557\u001b[0m                 random_state, X_idx_sorted, X_csc, X_csr)\n",
 629 |       "\u001b[1;32me:\\springboard\\venv\\lib\\site-packages\\sklearn\\ensemble\\_gb.py\u001b[0m in \u001b[0;36m_fit_stage\u001b[1;34m(self, i, X, y, raw_predictions, sample_weight, sample_mask, random_state, X_idx_sorted, X_csc, X_csr)\u001b[0m\n\u001b[0;32m    209\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    210\u001b[0m             \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_csr\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mX_csr\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 211\u001b[1;33m             tree.fit(X, residual, sample_weight=sample_weight,\n\u001b[0m\u001b[0;32m    212\u001b[0m                      check_input=False, X_idx_sorted=X_idx_sorted)\n\u001b[0;32m    213\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
 630 |       "\u001b[1;32me:\\springboard\\venv\\lib\\site-packages\\sklearn\\tree\\_classes.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight, check_input, X_idx_sorted)\u001b[0m\n\u001b[0;32m   1240\u001b[0m         \"\"\"\n\u001b[0;32m   1241\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1242\u001b[1;33m         super().fit(\n\u001b[0m\u001b[0;32m   1243\u001b[0m             \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1244\u001b[0m             \u001b[0msample_weight\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 631 |       "\u001b[1;32me:\\springboard\\venv\\lib\\site-packages\\sklearn\\tree\\_classes.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight, check_input, X_idx_sorted)\u001b[0m\n\u001b[0;32m    373\u001b[0m                                            min_impurity_split)\n\u001b[0;32m    374\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 375\u001b[1;33m         \u001b[0mbuilder\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_idx_sorted\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    376\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    377\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_outputs_\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mis_classifier\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 632 |       "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
 633 |      ]
 634 |     }
 635 |    ],
 636 |    "source": [
 637 |     "best_model = compare_models()"
 638 |    ]
 639 |   },
 640 |   {
 641 |    "cell_type": "code",
 642 |    "execution_count": 12,
 643 |    "metadata": {},
 644 |    "outputs": [
 645 |     {
 646 |      "data": {
 647 |       "text/html": [
 648 |        "<div>\n",
 649 |        "<style scoped>\n",
 650 |        "    .dataframe tbody tr th:only-of-type {\n",
 651 |        "        vertical-align: middle;\n",
 652 |        "    }\n",
 653 |        "\n",
 654 |        "    .dataframe tbody tr th {\n",
 655 |        "        vertical-align: top;\n",
 656 |        "    }\n",
 657 |        "\n",
 658 |        "    .dataframe thead th {\n",
 659 |        "        text-align: right;\n",
 660 |        "    }\n",
 661 |        "</style>\n",
 662 |        "<table border=\"1\" class=\"dataframe\">\n",
 663 |        "  <thead>\n",
 664 |        "    <tr style=\"text-align: right;\">\n",
 665 |        "      <th></th>\n",
 666 |        "      <th>Name</th>\n",
 667 |        "      <th>Reference</th>\n",
 668 |        "      <th>Turbo</th>\n",
 669 |        "    </tr>\n",
 670 |        "    <tr>\n",
 671 |        "      <th>ID</th>\n",
 672 |        "      <th></th>\n",
 673 |        "      <th></th>\n",
 674 |        "      <th></th>\n",
 675 |        "    </tr>\n",
 676 |        "  </thead>\n",
 677 |        "  <tbody>\n",
 678 |        "    <tr>\n",
 679 |        "      <th>lr</th>\n",
 680 |        "      <td>Logistic Regression</td>\n",
 681 |        "      <td>sklearn.linear_model.LogisticRegression</td>\n",
 682 |        "      <td>True</td>\n",
 683 |        "    </tr>\n",
 684 |        "    <tr>\n",
 685 |        "      <th>knn</th>\n",
 686 |        "      <td>K Neighbors Classifier</td>\n",
 687 |        "      <td>sklearn.neighbors.KNeighborsClassifier</td>\n",
 688 |        "      <td>True</td>\n",
 689 |        "    </tr>\n",
 690 |        "    <tr>\n",
 691 |        "      <th>nb</th>\n",
 692 |        "      <td>Naive Bayes</td>\n",
 693 |        "      <td>sklearn.naive_bayes.GaussianNB</td>\n",
 694 |        "      <td>True</td>\n",
 695 |        "    </tr>\n",
 696 |        "    <tr>\n",
 697 |        "      <th>dt</th>\n",
 698 |        "      <td>Decision Tree Classifier</td>\n",
 699 |        "      <td>sklearn.tree.DecisionTreeClassifier</td>\n",
 700 |        "      <td>True</td>\n",
 701 |        "    </tr>\n",
 702 |        "    <tr>\n",
 703 |        "      <th>svm</th>\n",
 704 |        "      <td>SVM - Linear Kernel</td>\n",
 705 |        "      <td>sklearn.linear_model.SGDClassifier</td>\n",
 706 |        "      <td>True</td>\n",
 707 |        "    </tr>\n",
 708 |        "    <tr>\n",
 709 |        "      <th>rbfsvm</th>\n",
 710 |        "      <td>SVM - Radial Kernel</td>\n",
 711 |        "      <td>sklearn.svm.SVC</td>\n",
 712 |        "      <td>False</td>\n",
 713 |        "    </tr>\n",
 714 |        "    <tr>\n",
 715 |        "      <th>gpc</th>\n",
 716 |        "      <td>Gaussian Process Classifier</td>\n",
 717 |        "      <td>sklearn.gaussian_process.GPC</td>\n",
 718 |        "      <td>False</td>\n",
 719 |        "    </tr>\n",
 720 |        "    <tr>\n",
 721 |        "      <th>mlp</th>\n",
 722 |        "      <td>MLP Classifier</td>\n",
 723 |        "      <td>sklearn.neural_network.MLPClassifier</td>\n",
 724 |        "      <td>False</td>\n",
 725 |        "    </tr>\n",
 726 |        "    <tr>\n",
 727 |        "      <th>ridge</th>\n",
 728 |        "      <td>Ridge Classifier</td>\n",
 729 |        "      <td>sklearn.linear_model.RidgeClassifier</td>\n",
 730 |        "      <td>True</td>\n",
 731 |        "    </tr>\n",
 732 |        "    <tr>\n",
 733 |        "      <th>rf</th>\n",
 734 |        "      <td>Random Forest Classifier</td>\n",
 735 |        "      <td>sklearn.ensemble.RandomForestClassifier</td>\n",
 736 |        "      <td>True</td>\n",
 737 |        "    </tr>\n",
 738 |        "    <tr>\n",
 739 |        "      <th>qda</th>\n",
 740 |        "      <td>Quadratic Discriminant Analysis</td>\n",
 741 |        "      <td>sklearn.discriminant_analysis.QDA</td>\n",
 742 |        "      <td>True</td>\n",
 743 |        "    </tr>\n",
 744 |        "    <tr>\n",
 745 |        "      <th>ada</th>\n",
 746 |        "      <td>Ada Boost Classifier</td>\n",
 747 |        "      <td>sklearn.ensemble.AdaBoostClassifier</td>\n",
 748 |        "      <td>True</td>\n",
 749 |        "    </tr>\n",
 750 |        "    <tr>\n",
 751 |        "      <th>gbc</th>\n",
 752 |        "      <td>Gradient Boosting Classifier</td>\n",
 753 |        "      <td>sklearn.ensemble.GradientBoostingClassifier</td>\n",
 754 |        "      <td>True</td>\n",
 755 |        "    </tr>\n",
 756 |        "    <tr>\n",
 757 |        "      <th>lda</th>\n",
 758 |        "      <td>Linear Discriminant Analysis</td>\n",
 759 |        "      <td>sklearn.discriminant_analysis.LDA</td>\n",
 760 |        "      <td>True</td>\n",
 761 |        "    </tr>\n",
 762 |        "    <tr>\n",
 763 |        "      <th>et</th>\n",
 764 |        "      <td>Extra Trees Classifier</td>\n",
 765 |        "      <td>sklearn.ensemble.ExtraTreesClassifier</td>\n",
 766 |        "      <td>True</td>\n",
 767 |        "    </tr>\n",
 768 |        "    <tr>\n",
 769 |        "      <th>xgboost</th>\n",
 770 |        "      <td>Extreme Gradient Boosting</td>\n",
 771 |        "      <td>xgboost.readthedocs.io</td>\n",
 772 |        "      <td>True</td>\n",
 773 |        "    </tr>\n",
 774 |        "    <tr>\n",
 775 |        "      <th>lightgbm</th>\n",
 776 |        "      <td>Light Gradient Boosting Machine</td>\n",
 777 |        "      <td>github.com/microsoft/LightGBM</td>\n",
 778 |        "      <td>True</td>\n",
 779 |        "    </tr>\n",
 780 |        "    <tr>\n",
 781 |        "      <th>catboost</th>\n",
 782 |        "      <td>CatBoost Classifier</td>\n",
 783 |        "      <td>catboost.ai</td>\n",
 784 |        "      <td>True</td>\n",
 785 |        "    </tr>\n",
 786 |        "  </tbody>\n",
 787 |        "</table>\n",
 788 |        "</div>"
 789 |       ],
 790 |       "text/plain": [
 791 |        "                                     Name  \\\n",
 792 |        "ID                                          \n",
 793 |        "lr                    Logistic Regression   \n",
 794 |        "knn                K Neighbors Classifier   \n",
 795 |        "nb                            Naive Bayes   \n",
 796 |        "dt               Decision Tree Classifier   \n",
 797 |        "svm                   SVM - Linear Kernel   \n",
 798 |        "rbfsvm                SVM - Radial Kernel   \n",
 799 |        "gpc           Gaussian Process Classifier   \n",
 800 |        "mlp                        MLP Classifier   \n",
 801 |        "ridge                    Ridge Classifier   \n",
 802 |        "rf               Random Forest Classifier   \n",
 803 |        "qda       Quadratic Discriminant Analysis   \n",
 804 |        "ada                  Ada Boost Classifier   \n",
 805 |        "gbc          Gradient Boosting Classifier   \n",
 806 |        "lda          Linear Discriminant Analysis   \n",
 807 |        "et                 Extra Trees Classifier   \n",
 808 |        "xgboost         Extreme Gradient Boosting   \n",
 809 |        "lightgbm  Light Gradient Boosting Machine   \n",
 810 |        "catboost              CatBoost Classifier   \n",
 811 |        "\n",
 812 |        "                                            Reference  Turbo  \n",
 813 |        "ID                                                            \n",
 814 |        "lr            sklearn.linear_model.LogisticRegression   True  \n",
 815 |        "knn            sklearn.neighbors.KNeighborsClassifier   True  \n",
 816 |        "nb                     sklearn.naive_bayes.GaussianNB   True  \n",
 817 |        "dt                sklearn.tree.DecisionTreeClassifier   True  \n",
 818 |        "svm                sklearn.linear_model.SGDClassifier   True  \n",
 819 |        "rbfsvm                                sklearn.svm.SVC  False  \n",
 820 |        "gpc                      sklearn.gaussian_process.GPC  False  \n",
 821 |        "mlp              sklearn.neural_network.MLPClassifier  False  \n",
 822 |        "ridge            sklearn.linear_model.RidgeClassifier   True  \n",
 823 |        "rf            sklearn.ensemble.RandomForestClassifier   True  \n",
 824 |        "qda                 sklearn.discriminant_analysis.QDA   True  \n",
 825 |        "ada               sklearn.ensemble.AdaBoostClassifier   True  \n",
 826 |        "gbc       sklearn.ensemble.GradientBoostingClassifier   True  \n",
 827 |        "lda                 sklearn.discriminant_analysis.LDA   True  \n",
 828 |        "et              sklearn.ensemble.ExtraTreesClassifier   True  \n",
 829 |        "xgboost                        xgboost.readthedocs.io   True  \n",
 830 |        "lightgbm                github.com/microsoft/LightGBM   True  \n",
 831 |        "catboost                                  catboost.ai   True  "
 832 |       ]
 833 |      },
 834 |      "execution_count": 12,
 835 |      "metadata": {},
 836 |      "output_type": "execute_result"
 837 |     }
 838 |    ],
 839 |    "source": [
 840 |     "models()"
 841 |    ]
 842 |   },
 843 |   {
 844 |    "cell_type": "code",
 845 |    "execution_count": 13,
 846 |    "metadata": {},
 847 |    "outputs": [
 848 |     {
 849 |      "data": {
 850 |       "text/html": [
 851 |        "<style  type=\"text/css\" >\n",
 852 |        "#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col0,#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col1,#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col2,#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col3,#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col4,#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col5,#T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col6{\n",
 853 |        "            background:  yellow;\n",
 854 |        "        }</style><table id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >Accuracy</th>        <th class=\"col_heading level0 col1\" >AUC</th>        <th class=\"col_heading level0 col2\" >Recall</th>        <th class=\"col_heading level0 col3\" >Prec.</th>        <th class=\"col_heading level0 col4\" >F1</th>        <th class=\"col_heading level0 col5\" >Kappa</th>        <th class=\"col_heading level0 col6\" >MCC</th>    </tr></thead><tbody>\n",
 855 |        "                <tr>\n",
 856 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
 857 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col0\" class=\"data row0 col0\" >0.9999</td>\n",
 858 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col1\" class=\"data row0 col1\" >0.0000</td>\n",
 859 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col2\" class=\"data row0 col2\" >0.9999</td>\n",
 860 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col3\" class=\"data row0 col3\" >0.9999</td>\n",
 861 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col4\" class=\"data row0 col4\" >0.9999</td>\n",
 862 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col5\" class=\"data row0 col5\" >0.9999</td>\n",
 863 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row0_col6\" class=\"data row0 col6\" >0.9999</td>\n",
 864 |        "            </tr>\n",
 865 |        "            <tr>\n",
 866 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
 867 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col0\" class=\"data row1 col0\" >0.9999</td>\n",
 868 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col1\" class=\"data row1 col1\" >0.0000</td>\n",
 869 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col2\" class=\"data row1 col2\" >0.9998</td>\n",
 870 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col3\" class=\"data row1 col3\" >0.9999</td>\n",
 871 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col4\" class=\"data row1 col4\" >0.9999</td>\n",
 872 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col5\" class=\"data row1 col5\" >0.9998</td>\n",
 873 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row1_col6\" class=\"data row1 col6\" >0.9998</td>\n",
 874 |        "            </tr>\n",
 875 |        "            <tr>\n",
 876 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
 877 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col0\" class=\"data row2 col0\" >0.9999</td>\n",
 878 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col1\" class=\"data row2 col1\" >0.0000</td>\n",
 879 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col2\" class=\"data row2 col2\" >0.9998</td>\n",
 880 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col3\" class=\"data row2 col3\" >0.9999</td>\n",
 881 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col4\" class=\"data row2 col4\" >0.9999</td>\n",
 882 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col5\" class=\"data row2 col5\" >0.9998</td>\n",
 883 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row2_col6\" class=\"data row2 col6\" >0.9998</td>\n",
 884 |        "            </tr>\n",
 885 |        "            <tr>\n",
 886 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
 887 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col0\" class=\"data row3 col0\" >1.0000</td>\n",
 888 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col1\" class=\"data row3 col1\" >0.0000</td>\n",
 889 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col2\" class=\"data row3 col2\" >1.0000</td>\n",
 890 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col3\" class=\"data row3 col3\" >1.0000</td>\n",
 891 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col4\" class=\"data row3 col4\" >1.0000</td>\n",
 892 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col5\" class=\"data row3 col5\" >1.0000</td>\n",
 893 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row3_col6\" class=\"data row3 col6\" >1.0000</td>\n",
 894 |        "            </tr>\n",
 895 |        "            <tr>\n",
 896 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
 897 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col0\" class=\"data row4 col0\" >1.0000</td>\n",
 898 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col1\" class=\"data row4 col1\" >0.0000</td>\n",
 899 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col2\" class=\"data row4 col2\" >1.0000</td>\n",
 900 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col3\" class=\"data row4 col3\" >1.0000</td>\n",
 901 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col4\" class=\"data row4 col4\" >1.0000</td>\n",
 902 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col5\" class=\"data row4 col5\" >1.0000</td>\n",
 903 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row4_col6\" class=\"data row4 col6\" >1.0000</td>\n",
 904 |        "            </tr>\n",
 905 |        "            <tr>\n",
 906 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row5\" class=\"row_heading level0 row5\" >Mean</th>\n",
 907 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col0\" class=\"data row5 col0\" >0.9999</td>\n",
 908 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col1\" class=\"data row5 col1\" >0.0000</td>\n",
 909 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col2\" class=\"data row5 col2\" >0.9999</td>\n",
 910 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col3\" class=\"data row5 col3\" >0.9999</td>\n",
 911 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col4\" class=\"data row5 col4\" >0.9999</td>\n",
 912 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col5\" class=\"data row5 col5\" >0.9999</td>\n",
 913 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row5_col6\" class=\"data row5 col6\" >0.9999</td>\n",
 914 |        "            </tr>\n",
 915 |        "            <tr>\n",
 916 |        "                        <th id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6level0_row6\" class=\"row_heading level0 row6\" >SD</th>\n",
 917 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col0\" class=\"data row6 col0\" >0.0000</td>\n",
 918 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col1\" class=\"data row6 col1\" >0.0000</td>\n",
 919 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col2\" class=\"data row6 col2\" >0.0001</td>\n",
 920 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col3\" class=\"data row6 col3\" >0.0000</td>\n",
 921 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col4\" class=\"data row6 col4\" >0.0000</td>\n",
 922 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col5\" class=\"data row6 col5\" >0.0001</td>\n",
 923 |        "                        <td id=\"T_892dfc8d_0ddf_11eb_ace8_0897987917c6row6_col6\" class=\"data row6 col6\" >0.0001</td>\n",
 924 |        "            </tr>\n",
 925 |        "    </tbody></table>"
 926 |       ],
 927 |       "text/plain": [
 928 |        "<pandas.io.formats.style.Styler at 0x194aedeefd0>"
 929 |       ]
 930 |      },
 931 |      "metadata": {},
 932 |      "output_type": "display_data"
 933 |     }
 934 |    ],
 935 |    "source": [
 936 |     "rf = create_model('rf', fold=5)"
 937 |    ]
 938 |   },
 939 |   {
 940 |    "cell_type": "code",
 941 |    "execution_count": 14,
 942 |    "metadata": {},
 943 |    "outputs": [
 944 |     {
 945 |      "data": {
 946 |       "text/html": [
 947 |        "<style  type=\"text/css\" >\n",
 948 |        "#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col0,#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col1,#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col2,#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col3,#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col4,#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col5,#T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col6{\n",
 949 |        "            background:  yellow;\n",
 950 |        "        }</style><table id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >Accuracy</th>        <th class=\"col_heading level0 col1\" >AUC</th>        <th class=\"col_heading level0 col2\" >Recall</th>        <th class=\"col_heading level0 col3\" >Prec.</th>        <th class=\"col_heading level0 col4\" >F1</th>        <th class=\"col_heading level0 col5\" >Kappa</th>        <th class=\"col_heading level0 col6\" >MCC</th>    </tr></thead><tbody>\n",
 951 |        "                <tr>\n",
 952 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
 953 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col0\" class=\"data row0 col0\" >0.9998</td>\n",
 954 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col1\" class=\"data row0 col1\" >0.0000</td>\n",
 955 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col2\" class=\"data row0 col2\" >0.9998</td>\n",
 956 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col3\" class=\"data row0 col3\" >0.9998</td>\n",
 957 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col4\" class=\"data row0 col4\" >0.9998</td>\n",
 958 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col5\" class=\"data row0 col5\" >0.9995</td>\n",
 959 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row0_col6\" class=\"data row0 col6\" >0.9995</td>\n",
 960 |        "            </tr>\n",
 961 |        "            <tr>\n",
 962 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
 963 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col0\" class=\"data row1 col0\" >1.0000</td>\n",
 964 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col1\" class=\"data row1 col1\" >0.0000</td>\n",
 965 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col2\" class=\"data row1 col2\" >1.0000</td>\n",
 966 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col3\" class=\"data row1 col3\" >1.0000</td>\n",
 967 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col4\" class=\"data row1 col4\" >1.0000</td>\n",
 968 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col5\" class=\"data row1 col5\" >1.0000</td>\n",
 969 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row1_col6\" class=\"data row1 col6\" >1.0000</td>\n",
 970 |        "            </tr>\n",
 971 |        "            <tr>\n",
 972 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
 973 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col0\" class=\"data row2 col0\" >1.0000</td>\n",
 974 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col1\" class=\"data row2 col1\" >0.0000</td>\n",
 975 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col2\" class=\"data row2 col2\" >0.9997</td>\n",
 976 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col3\" class=\"data row2 col3\" >1.0000</td>\n",
 977 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col4\" class=\"data row2 col4\" >1.0000</td>\n",
 978 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col5\" class=\"data row2 col5\" >0.9999</td>\n",
 979 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row2_col6\" class=\"data row2 col6\" >0.9999</td>\n",
 980 |        "            </tr>\n",
 981 |        "            <tr>\n",
 982 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
 983 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col0\" class=\"data row3 col0\" >0.9997</td>\n",
 984 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col1\" class=\"data row3 col1\" >0.0000</td>\n",
 985 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col2\" class=\"data row3 col2\" >0.9998</td>\n",
 986 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col3\" class=\"data row3 col3\" >0.9997</td>\n",
 987 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col4\" class=\"data row3 col4\" >0.9997</td>\n",
 988 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col5\" class=\"data row3 col5\" >0.9994</td>\n",
 989 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row3_col6\" class=\"data row3 col6\" >0.9994</td>\n",
 990 |        "            </tr>\n",
 991 |        "            <tr>\n",
 992 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
 993 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col0\" class=\"data row4 col0\" >0.9999</td>\n",
 994 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col1\" class=\"data row4 col1\" >0.0000</td>\n",
 995 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col2\" class=\"data row4 col2\" >0.9999</td>\n",
 996 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col3\" class=\"data row4 col3\" >0.9999</td>\n",
 997 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col4\" class=\"data row4 col4\" >0.9999</td>\n",
 998 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col5\" class=\"data row4 col5\" >0.9997</td>\n",
 999 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row4_col6\" class=\"data row4 col6\" >0.9997</td>\n",
1000 |        "            </tr>\n",
1001 |        "            <tr>\n",
1002 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
1003 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col0\" class=\"data row5 col0\" >1.0000</td>\n",
1004 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col1\" class=\"data row5 col1\" >0.0000</td>\n",
1005 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col2\" class=\"data row5 col2\" >0.9999</td>\n",
1006 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col3\" class=\"data row5 col3\" >1.0000</td>\n",
1007 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col4\" class=\"data row5 col4\" >1.0000</td>\n",
1008 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col5\" class=\"data row5 col5\" >0.9999</td>\n",
1009 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row5_col6\" class=\"data row5 col6\" >0.9999</td>\n",
1010 |        "            </tr>\n",
1011 |        "            <tr>\n",
1012 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
1013 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col0\" class=\"data row6 col0\" >0.9996</td>\n",
1014 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col1\" class=\"data row6 col1\" >0.0000</td>\n",
1015 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col2\" class=\"data row6 col2\" >0.9991</td>\n",
1016 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col3\" class=\"data row6 col3\" >0.9996</td>\n",
1017 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col4\" class=\"data row6 col4\" >0.9996</td>\n",
1018 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col5\" class=\"data row6 col5\" >0.9992</td>\n",
1019 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row6_col6\" class=\"data row6 col6\" >0.9992</td>\n",
1020 |        "            </tr>\n",
1021 |        "            <tr>\n",
1022 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
1023 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col0\" class=\"data row7 col0\" >0.9999</td>\n",
1024 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col1\" class=\"data row7 col1\" >0.0000</td>\n",
1025 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col2\" class=\"data row7 col2\" >0.9999</td>\n",
1026 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col3\" class=\"data row7 col3\" >0.9999</td>\n",
1027 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col4\" class=\"data row7 col4\" >0.9999</td>\n",
1028 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col5\" class=\"data row7 col5\" >0.9998</td>\n",
1029 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row7_col6\" class=\"data row7 col6\" >0.9998</td>\n",
1030 |        "            </tr>\n",
1031 |        "            <tr>\n",
1032 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
1033 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col0\" class=\"data row8 col0\" >0.9998</td>\n",
1034 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col1\" class=\"data row8 col1\" >0.0000</td>\n",
1035 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col2\" class=\"data row8 col2\" >0.9995</td>\n",
1036 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col3\" class=\"data row8 col3\" >0.9998</td>\n",
1037 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col4\" class=\"data row8 col4\" >0.9998</td>\n",
1038 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col5\" class=\"data row8 col5\" >0.9995</td>\n",
1039 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row8_col6\" class=\"data row8 col6\" >0.9995</td>\n",
1040 |        "            </tr>\n",
1041 |        "            <tr>\n",
1042 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
1043 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col0\" class=\"data row9 col0\" >0.9999</td>\n",
1044 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col1\" class=\"data row9 col1\" >0.0000</td>\n",
1045 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col2\" class=\"data row9 col2\" >0.9996</td>\n",
1046 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col3\" class=\"data row9 col3\" >0.9999</td>\n",
1047 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col4\" class=\"data row9 col4\" >0.9999</td>\n",
1048 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col5\" class=\"data row9 col5\" >0.9998</td>\n",
1049 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row9_col6\" class=\"data row9 col6\" >0.9998</td>\n",
1050 |        "            </tr>\n",
1051 |        "            <tr>\n",
1052 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row10\" class=\"row_heading level0 row10\" >Mean</th>\n",
1053 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col0\" class=\"data row10 col0\" >0.9998</td>\n",
1054 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col1\" class=\"data row10 col1\" >0.0000</td>\n",
1055 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col2\" class=\"data row10 col2\" >0.9997</td>\n",
1056 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col3\" class=\"data row10 col3\" >0.9998</td>\n",
1057 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col4\" class=\"data row10 col4\" >0.9998</td>\n",
1058 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col5\" class=\"data row10 col5\" >0.9997</td>\n",
1059 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row10_col6\" class=\"data row10 col6\" >0.9997</td>\n",
1060 |        "            </tr>\n",
1061 |        "            <tr>\n",
1062 |        "                        <th id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6level0_row11\" class=\"row_heading level0 row11\" >SD</th>\n",
1063 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col0\" class=\"data row11 col0\" >0.0001</td>\n",
1064 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col1\" class=\"data row11 col1\" >0.0000</td>\n",
1065 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col2\" class=\"data row11 col2\" >0.0002</td>\n",
1066 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col3\" class=\"data row11 col3\" >0.0001</td>\n",
1067 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col4\" class=\"data row11 col4\" >0.0001</td>\n",
1068 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col5\" class=\"data row11 col5\" >0.0002</td>\n",
1069 |        "                        <td id=\"T_1a06f9d9_0de0_11eb_944a_0897987917c6row11_col6\" class=\"data row11 col6\" >0.0002</td>\n",
1070 |        "            </tr>\n",
1071 |        "    </tbody></table>"
1072 |       ],
1073 |       "text/plain": [
1074 |        "<pandas.io.formats.style.Styler at 0x19480e913a0>"
1075 |       ]
1076 |      },
1077 |      "metadata": {},
1078 |      "output_type": "display_data"
1079 |     }
1080 |    ],
1081 |    "source": [
1082 |     "dt = create_model('dt')"
1083 |    ]
1084 |   },
1085 |   {
1086 |    "cell_type": "code",
1087 |    "execution_count": 15,
1088 |    "metadata": {},
1089 |    "outputs": [
1090 |     {
1091 |      "data": {
1092 |       "text/html": [
1093 |        "<style  type=\"text/css\" >\n",
1094 |        "#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col0,#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col1,#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col2,#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col3,#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col4,#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col5,#T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col6{\n",
1095 |        "            background:  yellow;\n",
1096 |        "        }</style><table id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >Accuracy</th>        <th class=\"col_heading level0 col1\" >AUC</th>        <th class=\"col_heading level0 col2\" >Recall</th>        <th class=\"col_heading level0 col3\" >Prec.</th>        <th class=\"col_heading level0 col4\" >F1</th>        <th class=\"col_heading level0 col5\" >Kappa</th>        <th class=\"col_heading level0 col6\" >MCC</th>    </tr></thead><tbody>\n",
1097 |        "                <tr>\n",
1098 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
1099 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col0\" class=\"data row0 col0\" >1.0000</td>\n",
1100 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col1\" class=\"data row0 col1\" >0.0000</td>\n",
1101 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col2\" class=\"data row0 col2\" >1.0000</td>\n",
1102 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col3\" class=\"data row0 col3\" >1.0000</td>\n",
1103 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col4\" class=\"data row0 col4\" >1.0000</td>\n",
1104 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col5\" class=\"data row0 col5\" >1.0000</td>\n",
1105 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row0_col6\" class=\"data row0 col6\" >1.0000</td>\n",
1106 |        "            </tr>\n",
1107 |        "            <tr>\n",
1108 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
1109 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col0\" class=\"data row1 col0\" >1.0000</td>\n",
1110 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col1\" class=\"data row1 col1\" >0.0000</td>\n",
1111 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col2\" class=\"data row1 col2\" >0.9999</td>\n",
1112 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col3\" class=\"data row1 col3\" >1.0000</td>\n",
1113 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col4\" class=\"data row1 col4\" >1.0000</td>\n",
1114 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col5\" class=\"data row1 col5\" >0.9999</td>\n",
1115 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row1_col6\" class=\"data row1 col6\" >0.9999</td>\n",
1116 |        "            </tr>\n",
1117 |        "            <tr>\n",
1118 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
1119 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col0\" class=\"data row2 col0\" >1.0000</td>\n",
1120 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col1\" class=\"data row2 col1\" >0.0000</td>\n",
1121 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col2\" class=\"data row2 col2\" >0.9997</td>\n",
1122 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col3\" class=\"data row2 col3\" >1.0000</td>\n",
1123 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col4\" class=\"data row2 col4\" >1.0000</td>\n",
1124 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col5\" class=\"data row2 col5\" >0.9999</td>\n",
1125 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row2_col6\" class=\"data row2 col6\" >0.9999</td>\n",
1126 |        "            </tr>\n",
1127 |        "            <tr>\n",
1128 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
1129 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col0\" class=\"data row3 col0\" >1.0000</td>\n",
1130 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col1\" class=\"data row3 col1\" >0.0000</td>\n",
1131 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col2\" class=\"data row3 col2\" >1.0000</td>\n",
1132 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col3\" class=\"data row3 col3\" >1.0000</td>\n",
1133 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col4\" class=\"data row3 col4\" >1.0000</td>\n",
1134 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col5\" class=\"data row3 col5\" >0.9999</td>\n",
1135 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row3_col6\" class=\"data row3 col6\" >0.9999</td>\n",
1136 |        "            </tr>\n",
1137 |        "            <tr>\n",
1138 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
1139 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col0\" class=\"data row4 col0\" >0.9999</td>\n",
1140 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col1\" class=\"data row4 col1\" >0.0000</td>\n",
1141 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col2\" class=\"data row4 col2\" >0.9999</td>\n",
1142 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col3\" class=\"data row4 col3\" >0.9999</td>\n",
1143 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col4\" class=\"data row4 col4\" >0.9999</td>\n",
1144 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col5\" class=\"data row4 col5\" >0.9998</td>\n",
1145 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row4_col6\" class=\"data row4 col6\" >0.9998</td>\n",
1146 |        "            </tr>\n",
1147 |        "            <tr>\n",
1148 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
1149 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col0\" class=\"data row5 col0\" >1.0000</td>\n",
1150 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col1\" class=\"data row5 col1\" >0.0000</td>\n",
1151 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col2\" class=\"data row5 col2\" >1.0000</td>\n",
1152 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col3\" class=\"data row5 col3\" >1.0000</td>\n",
1153 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col4\" class=\"data row5 col4\" >1.0000</td>\n",
1154 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col5\" class=\"data row5 col5\" >1.0000</td>\n",
1155 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row5_col6\" class=\"data row5 col6\" >1.0000</td>\n",
1156 |        "            </tr>\n",
1157 |        "            <tr>\n",
1158 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
1159 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col0\" class=\"data row6 col0\" >0.9999</td>\n",
1160 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col1\" class=\"data row6 col1\" >0.0000</td>\n",
1161 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col2\" class=\"data row6 col2\" >0.9996</td>\n",
1162 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col3\" class=\"data row6 col3\" >0.9999</td>\n",
1163 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col4\" class=\"data row6 col4\" >0.9999</td>\n",
1164 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col5\" class=\"data row6 col5\" >0.9998</td>\n",
1165 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row6_col6\" class=\"data row6 col6\" >0.9998</td>\n",
1166 |        "            </tr>\n",
1167 |        "            <tr>\n",
1168 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
1169 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col0\" class=\"data row7 col0\" >1.0000</td>\n",
1170 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col1\" class=\"data row7 col1\" >0.0000</td>\n",
1171 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col2\" class=\"data row7 col2\" >1.0000</td>\n",
1172 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col3\" class=\"data row7 col3\" >1.0000</td>\n",
1173 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col4\" class=\"data row7 col4\" >1.0000</td>\n",
1174 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col5\" class=\"data row7 col5\" >1.0000</td>\n",
1175 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row7_col6\" class=\"data row7 col6\" >1.0000</td>\n",
1176 |        "            </tr>\n",
1177 |        "            <tr>\n",
1178 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
1179 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col0\" class=\"data row8 col0\" >1.0000</td>\n",
1180 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col1\" class=\"data row8 col1\" >0.0000</td>\n",
1181 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col2\" class=\"data row8 col2\" >1.0000</td>\n",
1182 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col3\" class=\"data row8 col3\" >1.0000</td>\n",
1183 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col4\" class=\"data row8 col4\" >1.0000</td>\n",
1184 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col5\" class=\"data row8 col5\" >1.0000</td>\n",
1185 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row8_col6\" class=\"data row8 col6\" >1.0000</td>\n",
1186 |        "            </tr>\n",
1187 |        "            <tr>\n",
1188 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
1189 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col0\" class=\"data row9 col0\" >1.0000</td>\n",
1190 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col1\" class=\"data row9 col1\" >0.0000</td>\n",
1191 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col2\" class=\"data row9 col2\" >1.0000</td>\n",
1192 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col3\" class=\"data row9 col3\" >1.0000</td>\n",
1193 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col4\" class=\"data row9 col4\" >1.0000</td>\n",
1194 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col5\" class=\"data row9 col5\" >1.0000</td>\n",
1195 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row9_col6\" class=\"data row9 col6\" >1.0000</td>\n",
1196 |        "            </tr>\n",
1197 |        "            <tr>\n",
1198 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row10\" class=\"row_heading level0 row10\" >Mean</th>\n",
1199 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col0\" class=\"data row10 col0\" >1.0000</td>\n",
1200 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col1\" class=\"data row10 col1\" >0.0000</td>\n",
1201 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col2\" class=\"data row10 col2\" >0.9999</td>\n",
1202 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col3\" class=\"data row10 col3\" >1.0000</td>\n",
1203 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col4\" class=\"data row10 col4\" >1.0000</td>\n",
1204 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col5\" class=\"data row10 col5\" >0.9999</td>\n",
1205 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row10_col6\" class=\"data row10 col6\" >0.9999</td>\n",
1206 |        "            </tr>\n",
1207 |        "            <tr>\n",
1208 |        "                        <th id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6level0_row11\" class=\"row_heading level0 row11\" >SD</th>\n",
1209 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col0\" class=\"data row11 col0\" >0.0000</td>\n",
1210 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col1\" class=\"data row11 col1\" >0.0000</td>\n",
1211 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col2\" class=\"data row11 col2\" >0.0001</td>\n",
1212 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col3\" class=\"data row11 col3\" >0.0000</td>\n",
1213 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col4\" class=\"data row11 col4\" >0.0000</td>\n",
1214 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col5\" class=\"data row11 col5\" >0.0001</td>\n",
1215 |        "                        <td id=\"T_52a44e64_0de1_11eb_8ed0_0897987917c6row11_col6\" class=\"data row11 col6\" >0.0001</td>\n",
1216 |        "            </tr>\n",
1217 |        "    </tbody></table>"
1218 |       ],
1219 |       "text/plain": [
1220 |        "<pandas.io.formats.style.Styler at 0x194c4c557f0>"
1221 |       ]
1222 |      },
1223 |      "metadata": {},
1224 |      "output_type": "display_data"
1225 |     }
1226 |    ],
1227 |    "source": [
1228 |     "tuned_rf = tune_model(rf)"
1229 |    ]
1230 |   },
1231 |   {
1232 |    "cell_type": "code",
1233 |    "execution_count": 16,
1234 |    "metadata": {},
1235 |    "outputs": [
1236 |     {
1237 |      "data": {
1238 |       "text/html": [
1239 |        "<style  type=\"text/css\" >\n",
1240 |        "#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col0,#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col1,#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col2,#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col3,#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col4,#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col5,#T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col6{\n",
1241 |        "            background:  yellow;\n",
1242 |        "        }</style><table id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >Accuracy</th>        <th class=\"col_heading level0 col1\" >AUC</th>        <th class=\"col_heading level0 col2\" >Recall</th>        <th class=\"col_heading level0 col3\" >Prec.</th>        <th class=\"col_heading level0 col4\" >F1</th>        <th class=\"col_heading level0 col5\" >Kappa</th>        <th class=\"col_heading level0 col6\" >MCC</th>    </tr></thead><tbody>\n",
1243 |        "                <tr>\n",
1244 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
1245 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col0\" class=\"data row0 col0\" >0.9999</td>\n",
1246 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col1\" class=\"data row0 col1\" >0.0000</td>\n",
1247 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col2\" class=\"data row0 col2\" >0.9997</td>\n",
1248 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col3\" class=\"data row0 col3\" >0.9999</td>\n",
1249 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col4\" class=\"data row0 col4\" >0.9999</td>\n",
1250 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col5\" class=\"data row0 col5\" >0.9998</td>\n",
1251 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row0_col6\" class=\"data row0 col6\" >0.9998</td>\n",
1252 |        "            </tr>\n",
1253 |        "            <tr>\n",
1254 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
1255 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col0\" class=\"data row1 col0\" >1.0000</td>\n",
1256 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col1\" class=\"data row1 col1\" >0.0000</td>\n",
1257 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col2\" class=\"data row1 col2\" >0.9999</td>\n",
1258 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col3\" class=\"data row1 col3\" >1.0000</td>\n",
1259 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col4\" class=\"data row1 col4\" >1.0000</td>\n",
1260 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col5\" class=\"data row1 col5\" >0.9999</td>\n",
1261 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row1_col6\" class=\"data row1 col6\" >0.9999</td>\n",
1262 |        "            </tr>\n",
1263 |        "            <tr>\n",
1264 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
1265 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col0\" class=\"data row2 col0\" >0.9999</td>\n",
1266 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col1\" class=\"data row2 col1\" >0.0000</td>\n",
1267 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col2\" class=\"data row2 col2\" >0.9996</td>\n",
1268 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col3\" class=\"data row2 col3\" >0.9999</td>\n",
1269 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col4\" class=\"data row2 col4\" >0.9999</td>\n",
1270 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col5\" class=\"data row2 col5\" >0.9997</td>\n",
1271 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row2_col6\" class=\"data row2 col6\" >0.9997</td>\n",
1272 |        "            </tr>\n",
1273 |        "            <tr>\n",
1274 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
1275 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col0\" class=\"data row3 col0\" >0.9998</td>\n",
1276 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col1\" class=\"data row3 col1\" >0.0000</td>\n",
1277 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col2\" class=\"data row3 col2\" >0.9999</td>\n",
1278 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col3\" class=\"data row3 col3\" >0.9998</td>\n",
1279 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col4\" class=\"data row3 col4\" >0.9998</td>\n",
1280 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col5\" class=\"data row3 col5\" >0.9996</td>\n",
1281 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row3_col6\" class=\"data row3 col6\" >0.9996</td>\n",
1282 |        "            </tr>\n",
1283 |        "            <tr>\n",
1284 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
1285 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col0\" class=\"data row4 col0\" >0.9996</td>\n",
1286 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col1\" class=\"data row4 col1\" >0.0000</td>\n",
1287 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col2\" class=\"data row4 col2\" >0.9997</td>\n",
1288 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col3\" class=\"data row4 col3\" >0.9996</td>\n",
1289 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col4\" class=\"data row4 col4\" >0.9996</td>\n",
1290 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col5\" class=\"data row4 col5\" >0.9992</td>\n",
1291 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row4_col6\" class=\"data row4 col6\" >0.9992</td>\n",
1292 |        "            </tr>\n",
1293 |        "            <tr>\n",
1294 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
1295 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col0\" class=\"data row5 col0\" >1.0000</td>\n",
1296 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col1\" class=\"data row5 col1\" >0.0000</td>\n",
1297 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col2\" class=\"data row5 col2\" >0.9997</td>\n",
1298 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col3\" class=\"data row5 col3\" >1.0000</td>\n",
1299 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col4\" class=\"data row5 col4\" >1.0000</td>\n",
1300 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col5\" class=\"data row5 col5\" >0.9999</td>\n",
1301 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row5_col6\" class=\"data row5 col6\" >0.9999</td>\n",
1302 |        "            </tr>\n",
1303 |        "            <tr>\n",
1304 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
1305 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col0\" class=\"data row6 col0\" >0.9999</td>\n",
1306 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col1\" class=\"data row6 col1\" >0.0000</td>\n",
1307 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col2\" class=\"data row6 col2\" >0.9996</td>\n",
1308 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col3\" class=\"data row6 col3\" >0.9999</td>\n",
1309 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col4\" class=\"data row6 col4\" >0.9999</td>\n",
1310 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col5\" class=\"data row6 col5\" >0.9997</td>\n",
1311 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row6_col6\" class=\"data row6 col6\" >0.9997</td>\n",
1312 |        "            </tr>\n",
1313 |        "            <tr>\n",
1314 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
1315 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col0\" class=\"data row7 col0\" >0.9999</td>\n",
1316 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col1\" class=\"data row7 col1\" >0.0000</td>\n",
1317 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col2\" class=\"data row7 col2\" >0.9999</td>\n",
1318 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col3\" class=\"data row7 col3\" >0.9999</td>\n",
1319 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col4\" class=\"data row7 col4\" >0.9999</td>\n",
1320 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col5\" class=\"data row7 col5\" >0.9998</td>\n",
1321 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row7_col6\" class=\"data row7 col6\" >0.9998</td>\n",
1322 |        "            </tr>\n",
1323 |        "            <tr>\n",
1324 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
1325 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col0\" class=\"data row8 col0\" >0.9999</td>\n",
1326 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col1\" class=\"data row8 col1\" >0.0000</td>\n",
1327 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col2\" class=\"data row8 col2\" >0.9994</td>\n",
1328 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col3\" class=\"data row8 col3\" >0.9999</td>\n",
1329 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col4\" class=\"data row8 col4\" >0.9999</td>\n",
1330 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col5\" class=\"data row8 col5\" >0.9998</td>\n",
1331 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row8_col6\" class=\"data row8 col6\" >0.9998</td>\n",
1332 |        "            </tr>\n",
1333 |        "            <tr>\n",
1334 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
1335 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col0\" class=\"data row9 col0\" >0.9997</td>\n",
1336 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col1\" class=\"data row9 col1\" >0.0000</td>\n",
1337 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col2\" class=\"data row9 col2\" >0.9990</td>\n",
1338 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col3\" class=\"data row9 col3\" >0.9997</td>\n",
1339 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col4\" class=\"data row9 col4\" >0.9997</td>\n",
1340 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col5\" class=\"data row9 col5\" >0.9994</td>\n",
1341 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row9_col6\" class=\"data row9 col6\" >0.9994</td>\n",
1342 |        "            </tr>\n",
1343 |        "            <tr>\n",
1344 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row10\" class=\"row_heading level0 row10\" >Mean</th>\n",
1345 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col0\" class=\"data row10 col0\" >0.9999</td>\n",
1346 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col1\" class=\"data row10 col1\" >0.0000</td>\n",
1347 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col2\" class=\"data row10 col2\" >0.9996</td>\n",
1348 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col3\" class=\"data row10 col3\" >0.9999</td>\n",
1349 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col4\" class=\"data row10 col4\" >0.9999</td>\n",
1350 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col5\" class=\"data row10 col5\" >0.9997</td>\n",
1351 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row10_col6\" class=\"data row10 col6\" >0.9997</td>\n",
1352 |        "            </tr>\n",
1353 |        "            <tr>\n",
1354 |        "                        <th id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6level0_row11\" class=\"row_heading level0 row11\" >SD</th>\n",
1355 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col0\" class=\"data row11 col0\" >0.0001</td>\n",
1356 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col1\" class=\"data row11 col1\" >0.0000</td>\n",
1357 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col2\" class=\"data row11 col2\" >0.0003</td>\n",
1358 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col3\" class=\"data row11 col3\" >0.0001</td>\n",
1359 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col4\" class=\"data row11 col4\" >0.0001</td>\n",
1360 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col5\" class=\"data row11 col5\" >0.0002</td>\n",
1361 |        "                        <td id=\"T_ac7b76ee_0de1_11eb_944b_0897987917c6row11_col6\" class=\"data row11 col6\" >0.0002</td>\n",
1362 |        "            </tr>\n",
1363 |        "    </tbody></table>"
1364 |       ],
1365 |       "text/plain": [
1366 |        "<pandas.io.formats.style.Styler at 0x194c4c55ee0>"
1367 |       ]
1368 |      },
1369 |      "metadata": {},
1370 |      "output_type": "display_data"
1371 |     }
1372 |    ],
1373 |    "source": [
1374 |     "tuned_dt = tune_model(dt)"
1375 |    ]
1376 |   },
1377 |   {
1378 |    "cell_type": "code",
1379 |    "execution_count": 17,
1380 |    "metadata": {},
1381 |    "outputs": [
1382 |     {
1383 |      "data": {
1384 |       "text/html": [
1385 |        "<div>\n",
1386 |        "<style scoped>\n",
1387 |        "    .dataframe tbody tr th:only-of-type {\n",
1388 |        "        vertical-align: middle;\n",
1389 |        "    }\n",
1390 |        "\n",
1391 |        "    .dataframe tbody tr th {\n",
1392 |        "        vertical-align: top;\n",
1393 |        "    }\n",
1394 |        "\n",
1395 |        "    .dataframe thead th {\n",
1396 |        "        text-align: right;\n",
1397 |        "    }\n",
1398 |        "</style>\n",
1399 |        "<table border=\"1\" class=\"dataframe\">\n",
1400 |        "  <thead>\n",
1401 |        "    <tr style=\"text-align: right;\">\n",
1402 |        "      <th></th>\n",
1403 |        "      <th>Parameters</th>\n",
1404 |        "    </tr>\n",
1405 |        "  </thead>\n",
1406 |        "  <tbody>\n",
1407 |        "    <tr>\n",
1408 |        "      <th>bootstrap</th>\n",
1409 |        "      <td>True</td>\n",
1410 |        "    </tr>\n",
1411 |        "    <tr>\n",
1412 |        "      <th>ccp_alpha</th>\n",
1413 |        "      <td>0</td>\n",
1414 |        "    </tr>\n",
1415 |        "    <tr>\n",
1416 |        "      <th>class_weight</th>\n",
1417 |        "      <td>None</td>\n",
1418 |        "    </tr>\n",
1419 |        "    <tr>\n",
1420 |        "      <th>criterion</th>\n",
1421 |        "      <td>gini</td>\n",
1422 |        "    </tr>\n",
1423 |        "    <tr>\n",
1424 |        "      <th>max_depth</th>\n",
1425 |        "      <td>None</td>\n",
1426 |        "    </tr>\n",
1427 |        "    <tr>\n",
1428 |        "      <th>max_features</th>\n",
1429 |        "      <td>auto</td>\n",
1430 |        "    </tr>\n",
1431 |        "    <tr>\n",
1432 |        "      <th>max_leaf_nodes</th>\n",
1433 |        "      <td>None</td>\n",
1434 |        "    </tr>\n",
1435 |        "    <tr>\n",
1436 |        "      <th>max_samples</th>\n",
1437 |        "      <td>None</td>\n",
1438 |        "    </tr>\n",
1439 |        "    <tr>\n",
1440 |        "      <th>min_impurity_decrease</th>\n",
1441 |        "      <td>0</td>\n",
1442 |        "    </tr>\n",
1443 |        "    <tr>\n",
1444 |        "      <th>min_impurity_split</th>\n",
1445 |        "      <td>None</td>\n",
1446 |        "    </tr>\n",
1447 |        "    <tr>\n",
1448 |        "      <th>min_samples_leaf</th>\n",
1449 |        "      <td>1</td>\n",
1450 |        "    </tr>\n",
1451 |        "    <tr>\n",
1452 |        "      <th>min_samples_split</th>\n",
1453 |        "      <td>2</td>\n",
1454 |        "    </tr>\n",
1455 |        "    <tr>\n",
1456 |        "      <th>min_weight_fraction_leaf</th>\n",
1457 |        "      <td>0</td>\n",
1458 |        "    </tr>\n",
1459 |        "    <tr>\n",
1460 |        "      <th>n_estimators</th>\n",
1461 |        "      <td>10</td>\n",
1462 |        "    </tr>\n",
1463 |        "    <tr>\n",
1464 |        "      <th>n_jobs</th>\n",
1465 |        "      <td>-1</td>\n",
1466 |        "    </tr>\n",
1467 |        "    <tr>\n",
1468 |        "      <th>oob_score</th>\n",
1469 |        "      <td>False</td>\n",
1470 |        "    </tr>\n",
1471 |        "    <tr>\n",
1472 |        "      <th>random_state</th>\n",
1473 |        "      <td>123</td>\n",
1474 |        "    </tr>\n",
1475 |        "    <tr>\n",
1476 |        "      <th>verbose</th>\n",
1477 |        "      <td>0</td>\n",
1478 |        "    </tr>\n",
1479 |        "    <tr>\n",
1480 |        "      <th>warm_start</th>\n",
1481 |        "      <td>False</td>\n",
1482 |        "    </tr>\n",
1483 |        "  </tbody>\n",
1484 |        "</table>\n",
1485 |        "</div>"
1486 |       ],
1487 |       "text/plain": [
1488 |        "                         Parameters\n",
1489 |        "bootstrap                      True\n",
1490 |        "ccp_alpha                         0\n",
1491 |        "class_weight                   None\n",
1492 |        "criterion                      gini\n",
1493 |        "max_depth                      None\n",
1494 |        "max_features                   auto\n",
1495 |        "max_leaf_nodes                 None\n",
1496 |        "max_samples                    None\n",
1497 |        "min_impurity_decrease             0\n",
1498 |        "min_impurity_split             None\n",
1499 |        "min_samples_leaf                  1\n",
1500 |        "min_samples_split                 2\n",
1501 |        "min_weight_fraction_leaf          0\n",
1502 |        "n_estimators                     10\n",
1503 |        "n_jobs                           -1\n",
1504 |        "oob_score                     False\n",
1505 |        "random_state                    123\n",
1506 |        "verbose                           0\n",
1507 |        "warm_start                    False"
1508 |       ]
1509 |      },
1510 |      "metadata": {},
1511 |      "output_type": "display_data"
1512 |     }
1513 |    ],
1514 |    "source": [
1515 |     "evaluate_model(rf)"
1516 |    ]
1517 |   },
1518 |   {
1519 |    "cell_type": "code",
1520 |    "execution_count": 18,
1521 |    "metadata": {},
1522 |    "outputs": [
1523 |     {
1524 |      "data": {
1525 |       "text/html": [
1526 |        "<div>\n",
1527 |        "<style scoped>\n",
1528 |        "    .dataframe tbody tr th:only-of-type {\n",
1529 |        "        vertical-align: middle;\n",
1530 |        "    }\n",
1531 |        "\n",
1532 |        "    .dataframe tbody tr th {\n",
1533 |        "        vertical-align: top;\n",
1534 |        "    }\n",
1535 |        "\n",
1536 |        "    .dataframe thead th {\n",
1537 |        "        text-align: right;\n",
1538 |        "    }\n",
1539 |        "</style>\n",
1540 |        "<table border=\"1\" class=\"dataframe\">\n",
1541 |        "  <thead>\n",
1542 |        "    <tr style=\"text-align: right;\">\n",
1543 |        "      <th></th>\n",
1544 |        "      <th>Parameters</th>\n",
1545 |        "    </tr>\n",
1546 |        "  </thead>\n",
1547 |        "  <tbody>\n",
1548 |        "    <tr>\n",
1549 |        "      <th>ccp_alpha</th>\n",
1550 |        "      <td>0</td>\n",
1551 |        "    </tr>\n",
1552 |        "    <tr>\n",
1553 |        "      <th>class_weight</th>\n",
1554 |        "      <td>None</td>\n",
1555 |        "    </tr>\n",
1556 |        "    <tr>\n",
1557 |        "      <th>criterion</th>\n",
1558 |        "      <td>gini</td>\n",
1559 |        "    </tr>\n",
1560 |        "    <tr>\n",
1561 |        "      <th>max_depth</th>\n",
1562 |        "      <td>None</td>\n",
1563 |        "    </tr>\n",
1564 |        "    <tr>\n",
1565 |        "      <th>max_features</th>\n",
1566 |        "      <td>None</td>\n",
1567 |        "    </tr>\n",
1568 |        "    <tr>\n",
1569 |        "      <th>max_leaf_nodes</th>\n",
1570 |        "      <td>None</td>\n",
1571 |        "    </tr>\n",
1572 |        "    <tr>\n",
1573 |        "      <th>min_impurity_decrease</th>\n",
1574 |        "      <td>0</td>\n",
1575 |        "    </tr>\n",
1576 |        "    <tr>\n",
1577 |        "      <th>min_impurity_split</th>\n",
1578 |        "      <td>None</td>\n",
1579 |        "    </tr>\n",
1580 |        "    <tr>\n",
1581 |        "      <th>min_samples_leaf</th>\n",
1582 |        "      <td>1</td>\n",
1583 |        "    </tr>\n",
1584 |        "    <tr>\n",
1585 |        "      <th>min_samples_split</th>\n",
1586 |        "      <td>2</td>\n",
1587 |        "    </tr>\n",
1588 |        "    <tr>\n",
1589 |        "      <th>min_weight_fraction_leaf</th>\n",
1590 |        "      <td>0</td>\n",
1591 |        "    </tr>\n",
1592 |        "    <tr>\n",
1593 |        "      <th>presort</th>\n",
1594 |        "      <td>deprecated</td>\n",
1595 |        "    </tr>\n",
1596 |        "    <tr>\n",
1597 |        "      <th>random_state</th>\n",
1598 |        "      <td>123</td>\n",
1599 |        "    </tr>\n",
1600 |        "    <tr>\n",
1601 |        "      <th>splitter</th>\n",
1602 |        "      <td>best</td>\n",
1603 |        "    </tr>\n",
1604 |        "  </tbody>\n",
1605 |        "</table>\n",
1606 |        "</div>"
1607 |       ],
1608 |       "text/plain": [
1609 |        "                          Parameters\n",
1610 |        "ccp_alpha                          0\n",
1611 |        "class_weight                    None\n",
1612 |        "criterion                       gini\n",
1613 |        "max_depth                       None\n",
1614 |        "max_features                    None\n",
1615 |        "max_leaf_nodes                  None\n",
1616 |        "min_impurity_decrease              0\n",
1617 |        "min_impurity_split              None\n",
1618 |        "min_samples_leaf                   1\n",
1619 |        "min_samples_split                  2\n",
1620 |        "min_weight_fraction_leaf           0\n",
1621 |        "presort                   deprecated\n",
1622 |        "random_state                     123\n",
1623 |        "splitter                        best"
1624 |       ]
1625 |      },
1626 |      "metadata": {},
1627 |      "output_type": "display_data"
1628 |     }
1629 |    ],
1630 |    "source": [
1631 |     "evaluate_model(dt)"
1632 |    ]
1633 |   },
1634 |   {
1635 |    "cell_type": "code",
1636 |    "execution_count": null,
1637 |    "metadata": {},
1638 |    "outputs": [],
1639 |    "source": []
1640 |   }
1641 |  ],
1642 |  "metadata": {
1643 |   "kernelspec": {
1644 |    "display_name": "Python 3",
1645 |    "language": "python",
1646 |    "name": "python3"
1647 |   },
1648 |   "language_info": {
1649 |    "codemirror_mode": {
1650 |     "name": "ipython",
1651 |     "version": 3
1652 |    },
1653 |    "file_extension": ".py",
1654 |    "mimetype": "text/x-python",
1655 |    "name": "python",
1656 |    "nbconvert_exporter": "python",
1657 |    "pygments_lexer": "ipython3",
1658 |    "version": "3.8.5"
1659 |   }
1660 |  },
1661 |  "nbformat": 4,
1662 |  "nbformat_minor": 4
1663 | }
1664 | 


--------------------------------------------------------------------------------
/s4-modeling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Importing necssary modules\n",
 10 |     "from scripts.utils import load_device_data_v2\n",
 11 |     "from scripts.models import rf_classifier"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# Data folder path and Extention of the data files\n",
 21 |     "base_directory = '../rawdata'\n",
 22 |     "file_extension = \"*.csv\""
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "### Loading Device Data into Dataframe\n",
 30 |     "###### Door Bells"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "danmini_doorbell_df = load_device_data_v2(base_directory, file_extension, 'Danmini_Doorbell')\n",
 40 |     "ennio_doorbell_df = load_device_data_v2(base_directory, file_extension, 'Ennio_Doorbell')"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "###### Thermostat"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "ecobee_thermostat_df = load_device_data_v2(base_directory, file_extension, 'Ecobee_Thermostat')"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "###### Web cam"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "samsung_cam_df = load_device_data_v2(base_directory, file_extension, 'Samsung_SNH_1011_N_Webcam')"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "###### Baby monitor"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "baby_monitor_df = load_device_data_v2(base_directory, file_extension, 'Philips_B120N10_Baby_Monitor')"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "###### Security Cam"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "provision_cam1_df = load_device_data_v2(base_directory, file_extension, 'Provision_PT_737E_Security_Camera')\n",
105 |     "provision_cam2_df = load_device_data_v2(base_directory, file_extension, 'Provision_PT_838_Security_Camera')\n",
106 |     "simplehome_cam1_df = load_device_data_v2(base_directory, file_extension, 'SimpleHome_XCS7_1002_WHT_Security_Camera')\n",
107 |     "simplehome_cam2_df = load_device_data_v2(base_directory, file_extension, 'SimpleHome_XCS7_1003_WHT_Security_Camera')"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "##### Model Training"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "dataframe = {\"Danmini_Doorbell\": danmini_doorbell_df, \n",
124 |     "            \"Ecobee_Thermostat\": ecobee_thermostat_df,\n",
125 |     "            \"Ennio_Doorbell\": ennio_doorbell_df,\n",
126 |     "            \"Philips_B120N10_Baby_Monitor\": baby_monitor_df,\n",
127 |     "            \"Provision_PT_737E_Security_Camera\": provision_cam1_df,\n",
128 |     "            \"Provision_PT_838_Security_Camera\": provision_cam2_df,\n",
129 |     "            \"Samsung_SNH_1011_N_Webcam\": samsung_cam_df,\n",
130 |     "            \"SimpleHome_XCS7_1002_WHT_Security_Camera\": simplehome_cam1_df,\n",
131 |     "            \"SimpleHome_XCS7_1003_WHT_Security_Camera\": simplehome_cam2_df\n",
132 |     "            }"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "for k in dataframe:\n",
142 |     "    print(\"----------------------xxxxxxx----------------------\")\n",
143 |     "    print(k)\n",
144 |     "    print(\"----------------------xxxxxxx----------------------\")\n",
145 |     "    results = rf_classifier(dataframe[k], k)\n",
146 |     "    print(results)\n",
147 |     "    print(\"---------------------xxxxxxx-----------------------\")"
148 |    ]
149 |   }
150 |  ],
151 |  "metadata": {
152 |   "kernelspec": {
153 |    "display_name": "Python 3",
154 |    "language": "python",
155 |    "name": "python3"
156 |   },
157 |   "language_info": {
158 |    "codemirror_mode": {
159 |     "name": "ipython",
160 |     "version": 3
161 |    },
162 |    "file_extension": ".py",
163 |    "mimetype": "text/x-python",
164 |    "name": "python",
165 |    "nbconvert_exporter": "python",
166 |    "pygments_lexer": "ipython3",
167 |    "version": "3.7.8"
168 |   }
169 |  },
170 |  "nbformat": 4,
171 |  "nbformat_minor": 4
172 | }
173 | 


--------------------------------------------------------------------------------
/scripts/models.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from sklearn.model_selection import train_test_split
  4 | from sklearn.ensemble import RandomForestClassifier
  5 | from sklearn.preprocessing import StandardScaler
  6 | from sklearn.metrics import (
  7 |         f1_score, classification_report, 
  8 |         confusion_matrix, roc_curve, 
  9 |         roc_auc_score, accuracy_score,
 10 |         log_loss)
 11 | from sklearn import __version__ as sklearn_version
 12 | from sklearn.neighbors import KNeighborsClassifier
 13 | from imblearn.under_sampling import NearMiss
 14 | from datetime import datetime
 15 | import os
 16 | import pickle
 17 | 
 18 | 
 19 | 
 20 | def rf_classifier(data, device_name, scaling=False):
 21 | 
 22 |     # Split some data for validation
 23 |     validation_data = data.sample(frac=0.30)
 24 | 
 25 |     # Removing Validation data from dataframe
 26 |     data_df = data.drop(validation_data.index)
 27 | 
 28 |     # New Dict for storing Results
 29 |     results = {}
 30 | 
 31 |     # X & Y Variables from dataframe
 32 |     X = data_df.drop(['label', 'device'], axis=1)
 33 |     y = data_df['label']
 34 | 
 35 |     results['original_shape'] = [X.shape, y.shape]
 36 | 
 37 |     # Check data needs to be scaled or not
 38 |     if scaling == False:
 39 |         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=47)
 40 |         model_name = f'{device_name}_without_scaling_unbalanced_model.pkl'
 41 |     else:
 42 |         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=47)
 43 |         scaler = StandardScaler()
 44 |         scaler.fit(X_train)
 45 |         X_train = scaler.transform(X_train)
 46 |         X_test = scaler.transform(X_test)
 47 |         model_name = f'{device_name}_with_scaling_unbalanced_model.pkl'
 48 | 
 49 |     # Define Classifier
 50 |     clf = RandomForestClassifier()
 51 | 
 52 |     # Fit the model classifier into training data
 53 |     model_res = clf.fit(X_train, y_train)
 54 | 
 55 |     # Predict with Test Data
 56 |     y_pred = model_res.predict(X_test)
 57 |     y_pred_prob = model_res.predict_proba(X_test)
 58 |     lr_probs = y_pred_prob[:,1]
 59 | 
 60 |     # Accuracy Score
 61 |     ac = accuracy_score(y_test, y_pred)
 62 | 
 63 |     # Calculate F1 Score
 64 |     f1 = f1_score(y_test, y_pred, average='weighted')
 65 | 
 66 |     # Calculate Confusion Matrix, classification Report
 67 |     cm = confusion_matrix(y_test, y_pred)
 68 |     cr = classification_report(y_test, y_pred)
 69 | 
 70 | 
 71 |     # Feature Importance
 72 |     importances = pd.DataFrame({'feature':X.columns,'importance':np.round(clf.feature_importances_,3)})
 73 |     importances = importances.sort_values('importance',ascending=False).set_index('feature')
 74 | 
 75 |    
 76 |     results['feature_importance'] = [importances.head(20)]
 77 |     results['Accuracy Test Data'] = ac
 78 |     results['F1 Score Test Data'] = f1
 79 | 
 80 |     # Saving Model
 81 |     
 82 |     best_model = clf
 83 |     best_model.version = 1.0
 84 |     best_model.pandas_version = pd.__version__
 85 |     best_model.numpy_version = np.__version__
 86 |     best_model.sklearn_version = sklearn_version
 87 |     best_model.build_datetime = datetime.now()
 88 |     
 89 |     modelpath = f'models/{device_name}'
 90 |     if not os.path.exists(modelpath):
 91 |         os.mkdir(modelpath)
 92 |     iotmodel_path = os.path.join(modelpath, model_name)
 93 |     if not os.path.exists(iotmodel_path):
 94 |         with open(iotmodel_path, 'wb') as f:
 95 |             pickle.dump(best_model, f)
 96 |     
 97 |     f = open(f'models/{device_name}/report.txt', 'w')
 98 |     f.write(f'''Classification Report on Test Set 
 99 |                     \n \n {cr}\n \n 
100 |                 Confusion Matrix on Test Set
101 |                     \n \n {cm} \n \n''')
102 |     f.close()
103 | 
104 |     validation_data.to_csv(f'{modelpath}/{device_name}_validation_data.csv')
105 |     return f'Model trained and saved successfully \n {results}'


--------------------------------------------------------------------------------
/scripts/utils.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import os
  3 | from glob import glob
  4 | 
  5 | 
  6 | 
  7 | def load_device_data_v2(file_path, file_ext, device, labels=3, size=1):
  8 |     """
  9 |     This function helps to crate a data frame contain only the device specified.
 10 |     The directory should be where the unzipped data files are stored. Assumes the file structurce is
 11 |         device name(folder)
 12 |             mirai_attacks(folder)
 13 |             gafgyt_attacks(folder)
 14 |             benign_traffic.csv
 15 |     Parameters
 16 |     ----------
 17 |     file_path : str
 18 |         The directory in which the data files are stored. 
 19 |     file_ext : str
 20 |         Extension of the file.
 21 |     device: str
 22 |         Device name (Folder Name).
 23 |     label : integer
 24 |         Benign, mirai, gafgyt (bashlite) or 11 Classes.
 25 |     size : float
 26 |         All the data or only sample data.
 27 |         
 28 |     Returns
 29 |     -------
 30 |     device_data : pandas dataframe containg the data 
 31 |     """
 32 |     try:
 33 |         # Generate Empty list to hold data
 34 |         dfs = [] 
 35 | 
 36 |         # Loop Through all the file associated with the data
 37 |         for path, subdir, files in os.walk(file_path):
 38 |             for file in glob(os.path.join(path, file_ext)):
 39 |                 # Check the device name and retrive only those files.
 40 |                 if file.split("\\")[1] == device:
 41 |                     # Reading csv file.
 42 |                     data = pd.read_csv(file)
 43 |                     label = file.split('\\')
 44 |                     data['device'] = label[1]
 45 |                     # Check whether only 3 class or 11 class
 46 |                     if labels == 3:
 47 |                         data['label'] = label[2].split('_')[0]
 48 |                         
 49 |                     else:
 50 |                         # Benign data file usually outside of the folder
 51 |                         if len(label) == 3:
 52 |                             data['label'] = label[2].split('_')[0]
 53 |                         else:
 54 |                             data['label'] = label[2].split('_')[0] + '_' \
 55 |                                             + label[3].split('.')[0]
 56 | 
 57 |                     # Check load all the data or only sample of data
 58 |                     if size != 1:
 59 |                         sample_data = data.sample(frac=size)
 60 |                         dfs.append(sample_data)
 61 |                     else:
 62 |                         dfs.append(data)
 63 | 
 64 |         device_data = pd.concat(dfs, ignore_index = True)
 65 | 
 66 |         return device_data
 67 |     except Exception as e:
 68 |         return str(e)
 69 | 
 70 | 
 71 | 
 72 | def load_all_data_v2(file_path, file_ext, device, labels=3, size=1):
 73 |     """
 74 |     This function helps to crate a data frame contain all the device data.
 75 |     The directory should be where the unzipped data files are stored. Assumes the file structurce is
 76 |         device name(folder)
 77 |             mirai_attacks(folder)
 78 |             gafgyt_attacks(folder)
 79 |             benign_traffic.csv
 80 |     Parameters
 81 |     ----------
 82 |     file_path : str
 83 |         The directory in which the data files are stored. 
 84 |     file_ext : str
 85 |         Extension of the file.
 86 |     device: str
 87 |         Device name (Folder Name).
 88 |     label : integer
 89 |         Benign, mirai, gafgyt (bashlite) or 11 Classes.
 90 |     size : float
 91 |         All the data or only sample data.
 92 |         
 93 |     Returns
 94 |     -------
 95 |     device_data : pandas dataframe containg the data 
 96 |     """
 97 |     try:
 98 |         # Generate Empty list to hold data
 99 |         dfs = [] 
100 | 
101 |         # Loop Through all the file associated with the data
102 |         for path, subdir, files in os.walk(file_path):
103 |             for file in glob(os.path.join(path, file_ext)):
104 |                     # Reading csv file.
105 |                 data = pd.read_csv(file)
106 |                 label = file.split('\\')
107 |                 data['device'] = label[1]
108 |                 # Check whether only 3 class or 11 class
109 |                 if labels == 3:
110 |                     data['label'] = label[2].split('_')[0]
111 |                     
112 |                 else:
113 |                     # Benign data file usually outside of the folder
114 |                     if len(label) == 3:
115 |                         data['label'] = label[2].split('_')[0]
116 |                     else:
117 |                         data['label'] = label[2].split('_')[0] + '_' \
118 |                                         + label[3].split('.')[0]
119 | 
120 |                 # Check load all the data or only sample of data
121 |                 if size != 1:
122 |                     sample_data = data.sample(frac=size)
123 |                     dfs.append(sample_data)
124 |                 else:
125 |                     dfs.append(dat)
126 | 
127 |         device_data = pd.concat(dfs, ignore_index = True)
128 | 
129 |         return device_data
130 |     except Exception as e:
131 |         return str(e)
132 | 
133 | 
134 | def load_data_labels(PATH, EXT):
135 |     """
136 |     Creates a data frame consisting of all the .csv-files in a given directory. The directory should
137 |     be where the unzipped data files are stored. Assumes the file structurce is
138 |         device name
139 |             mirai_attacks(folder)
140 |             gafgyt_attacks(folder)
141 |             benign_traffic.csv
142 |     Parameters
143 |     ----------
144 |     PATH : str
145 |         The directory in which the data files are stored. 
146 |     EXT : str
147 |         Extension of the file
148 |         
149 |     Returns
150 |     -------
151 |     benign_data : pandas data frame 
152 |         consisting of all the bengin data.
153 |     mirai_data : pandas data frame
154 |         consisting of all the mirai data.
155 |     gafgyt_data : pandas data frame
156 |         consisting of all the gafgyt data.
157 |     """
158 |     try:
159 |         benign_dfs = []
160 |         mirai_dfs = []
161 |         gafgyt_dfs = []
162 |         for path, subdir, files in os.walk(PATH):
163 |             for file in glob(os.path.join(path, EXT)):
164 |                 if 'benign_traffic' in file:
165 |                     data = pd.read_csv(file)
166 |                     data['label'] = 'Benign'
167 |                     data['device'] = file.split('\\')[1]
168 |                     benign_dfs.append(data)
169 |                 if 'mirai_attacks' in file:
170 |                     data = pd.read_csv(file)
171 |                     data['label'] = 'Mirai_'+file.split('\\')[3].split('.')[0]
172 |                     data['device'] = file.split('\\')[1]
173 |                     mirai_dfs.append(data)
174 |                 if 'gafgyt_attacks' in file:
175 |                     data = pd.read_csv(file)
176 |                     data['label'] = 'Gafgyt_'+file.split('\\')[3].split('.')[0]
177 |                     data['device'] = file.split('\\')[1]
178 |                     gafgyt_dfs.append(data)
179 | 
180 |         benign_data = pd.concat(benign_dfs, ignore_index=True)
181 |         mirai_data = pd.concat(mirai_dfs, ignore_index=True)
182 |         gafgyt_data = pd.concat(gafgyt_dfs, ignore_index=True)
183 | 
184 |         return benign_data, mirai_data, gafgyt_data
185 |     except Exception as e:
186 |         return str(e)
187 | 
188 | 
189 | def load_device_data(PATH, EXT, device):
190 |     """
191 |     Creates a data frame consisting of individual device data. 
192 |     The directory should be where the unzipped data files are stored. 
193 |     Assumes the file structurce is
194 |         device name
195 |             mirai_attacks(folder)
196 |             gafgyt_attacks(folder)
197 |             benign_traffic.csv
198 |     Parameters
199 |     ----------
200 |     PATH : str
201 |         The directory in which the data files are stored. 
202 |     EXT : str
203 |         Extension of the file
204 |     device : str
205 |         Device Name
206 |         
207 |     Returns
208 |     -------
209 |     device_data : pandas data frame consisting of specific device data with 3 classes.
210 |     """
211 |     try:
212 |         dfs = []
213 |         for path, subdir, files in os.walk(PATH):
214 |             for file in glob(os.path.join(path, EXT)):
215 |                 if file.split('\\')[1] == device:
216 |                     data = pd.read_csv(file)
217 |                     data['label'] = file.split('\\')[2].split('_')[0]
218 |                     data['device'] = file.split('\\')[1]
219 |                     dfs.append(data)
220 |                         
221 |         device_data = pd.concat(dfs, ignore_index=True)
222 |         
223 |         return device_data
224 |         
225 |     except Exception as e:
226 |         return str(e)
227 | 
228 | 
229 | 
230 | def load_device_data_multi_label(PATH, EXT, device):
231 |     """
232 |     Creates a data frame consisting of individual device data. 
233 |     The directory should be where the unzipped data files are stored. 
234 |     Assumes the file structurce is
235 |         device name
236 |             mirai_attacks(folder)
237 |             gafgyt_attacks(folder)
238 |             benign_traffic.csv
239 |     Parameters
240 |     ----------
241 |     PATH : str
242 |         The directory in which the data files are stored. 
243 |     EXT : str
244 |         Extension of the file
245 |     device : str
246 |         Device Name
247 |         
248 |     Returns
249 |     -------
250 |     device_data : pandas data frame consisting of specific device 
251 |                   data with 11 different classes.
252 |     """
253 |     try:
254 |         dfs = []
255 |         for path, subdir, files in os.walk(PATH):
256 |             for file in glob(os.path.join(path, EXT)):
257 |                 if file.split('\\')[1] == device:
258 |                     data = pd.read_csv(file)
259 |                     label = file.split('\\')
260 |                     if len(label) == 3:
261 |                         data['label'] = label[2].split('_')[0]
262 |                     else:
263 |                         data['label'] = label[2].split('_')[0] + '_' + label[3].split('.')[0]
264 |                     data['device'] = file.split('\\')[1]
265 |                     dfs.append(data)
266 |                         
267 |         device_data = pd.concat(dfs, ignore_index=True)
268 |         
269 |         return device_data
270 |         
271 |     except Exception as e:
272 |         return str(e)
273 | 
274 | 
275 | def load_all_data(PATH, EXT):
276 |     """
277 |     Creates a data frame consisting of all the device data. 
278 |     The directory should be where the unzipped data files are stored. 
279 |     Assumes the file structurce is
280 |         device name
281 |             mirai_attacks(folder)
282 |             gafgyt_attacks(folder)
283 |             benign_traffic.csv
284 |     Parameters
285 |     ----------
286 |     PATH : str
287 |         The directory in which the data files are stored. 
288 |     EXT : str
289 |         Extension of the file
290 |         
291 |     Returns
292 |     -------
293 |     device_data : pandas data frame contain all the device data with 11 classes
294 |                 which is device specified.
295 |     """
296 |     try:
297 |         dfs = []
298 |         for path, subdir, files in os.walk(PATH):
299 |             for file in glob(os.path.join(path, EXT)):
300 |                 data = pd.read_csv(file)
301 |                 label = file.split('\\')
302 |                 if len(label) == 3:
303 |                     data['label'] = label[1] + '_' + label[2].split('_')[0]
304 |                 else:
305 |                     data['label'] = label[1] + '_' + label[2].split('_')[0] + '_' + label[3].split('.')[0]
306 |                 
307 |                 data['device'] = file.split('\\')[1]
308 |                 dfs.append(data)
309 |         device_data = pd.concat(dfs, ignore_index = True)
310 |         
311 |         return device_data
312 |     except Exception as e:
313 |         return str(e)
314 | 
315 | 
316 | def load_all_data_class(PATH, EXT):
317 |     """
318 |     Creates a data frame consisting of all the device data. 
319 |     The directory should be where the unzipped data files are stored. 
320 |     Assumes the file structurce is
321 |         device name
322 |             mirai_attacks(folder)
323 |             gafgyt_attacks(folder)
324 |             benign_traffic.csv
325 |     Parameters
326 |     ----------
327 |     PATH : str
328 |         The directory in which the data files are stored. 
329 |     EXT : str
330 |         Extension of the file
331 |         
332 |     Returns
333 |     -------
334 |     device_data : pandas data frame contain all the device data with 3 classes
335 |                 which is Not device specified.
336 |     """
337 |     try:
338 |         dfs = []
339 |         for path, subdir, files in os.walk(PATH):
340 |             for file in glob(os.path.join(path, EXT)):
341 |                 data = pd.read_csv(file)
342 |                 label = file.split("\\")
343 |                 if len(label) == 3:
344 |                     data['label'] = label[2].split('_')[0]
345 |                 else:
346 |                     data['label'] = label[2].split('_')[0]
347 |                 data['device'] = file.split('\\')[1]
348 |                 sampled_data = data.sample(frac=0.15)
349 |                 dfs.append(sampled_data)
350 |         device_data = pd.concat(dfs, ignore_index = True)
351 |         return device_data
352 |     
353 |     except Exception as e:
354 |         return(str(e))
355 | 
356 |     
357 | def load_all_data_multi_class(PATH, EXT):
358 |     """
359 |     Creates a data frame consisting of all the device data. 
360 |     The directory should be where the unzipped data files are stored. 
361 |     Assumes the file structurce is
362 |         device name
363 |             mirai_attacks(folder)
364 |             gafgyt_attacks(folder)
365 |             benign_traffic.csv
366 |     Parameters
367 |     ----------
368 |     PATH : str
369 |         The directory in which the data files are stored. 
370 |     EXT : str
371 |         Extension of the file
372 |         
373 |     Returns
374 |     -------
375 |     device_data : pandas data frame contain all the device data with 11 classes
376 |                 which is Not device specified.
377 |     """
378 |     try:
379 |         dfs = []
380 |         for path, subdir, files in os.walk(PATH):
381 |             for file in glob(os.path.join(path, EXT)):
382 |                 data = pd.read_csv(file)
383 |                 label = file.split('\\')
384 |                 if len(label) == 3:
385 |                     data['label'] = label[2].split('_')[0]
386 |                 else:
387 |                     data['label'] = label[2].split('_')[0] + '_' + label[3].split('.')[0]
388 |                 
389 |                 data['device'] = file.split('\\')[1]
390 |                 sampled_data = data.sample(frac=0.15)
391 |                 dfs.append(sampled_data)
392 |         device_data = pd.concat(dfs, ignore_index = True)
393 |         
394 |         return device_data
395 |     except Exception as e:
396 |         return str(e)


--------------------------------------------------------------------------------