├── Datasets
    ├── KDDCup99
    │   ├── kddcup.data.gz
    │   ├── kddcup.data_10_percent.gz
    │   ├── kddcup.newtestdata_10_percent_unlabeled.gz
    │   ├── kddcup.testdata.unlabeled.gz
    │   └── kddcup.testdata.unlabeled_10_percent.gz
    └── NSL-KDD
    │   ├── KDDTest-21.csv
    │   ├── KDDTrain+_20Percent.csv
    │   └── KDDTrain+_20Percent_Description.xlsx
├── IDSUsingAutoEnoderNeuralNetwork.ipynb
├── IDSUsingSimpleDeepNeuralNetwork.ipynb
├── IDSUsingTraditionalMLTechniques.ipynb
└── Project-UtilityFunctions
    ├── __pycache__
        └── lstm.cpython-37.pyc
    ├── classificationlibrary.py
    ├── dataformatinglibrary.py
    ├── datainspectionlibrary.py
    ├── dataloadinglibrary.py
    ├── datapreprocessinglibrary.py
    ├── defineInputs.py
    ├── featureencodinglibrary.py
    ├── featurescalinglibrary.py
    ├── featureselectionlibrary.py
    ├── findcombinations.py
    ├── lstm.py
    └── util.py


/Datasets/KDDCup99/kddcup.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.data.gz


--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.data_10_percent.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.data_10_percent.gz


--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.newtestdata_10_percent_unlabeled.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.newtestdata_10_percent_unlabeled.gz


--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.testdata.unlabeled.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.testdata.unlabeled.gz


--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.testdata.unlabeled_10_percent.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.testdata.unlabeled_10_percent.gz


--------------------------------------------------------------------------------
/Datasets/NSL-KDD/KDDTrain+_20Percent_Description.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/NSL-KDD/KDDTrain+_20Percent_Description.xlsx


--------------------------------------------------------------------------------
/IDSUsingSimpleDeepNeuralNetwork.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "## Import the required libraries and the utility modules"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 12,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "import numpy as np\n",
  17 |     "import pandas as pd\n",
  18 |     "\n",
  19 |     "from sklearn import metrics\n",
  20 |     "from sklearn.model_selection import train_test_split\n",
  21 |     "from sklearn.preprocessing import LabelEncoder\n",
  22 |     "\n",
  23 |     "from tensorflow.keras.models import Sequential\n",
  24 |     "from tensorflow.keras.models import load_model\n",
  25 |     "from tensorflow.keras.layers import Dense, Activation, Dropout\n",
  26 |     "from tensorflow.keras.callbacks import EarlyStopping\n",
  27 |     "from tensorflow.keras.callbacks import ModelCheckpoint\n",
  28 |     "from tensorflow.keras.utils import plot_model\n",
  29 |     "from tensorflow.python.keras.utils.np_utils import to_categorical\n",
  30 |     "\n",
  31 |     "import matplotlib.pyplot as plt\n",
  32 |     "\n",
  33 |     "#Custom libraries\n",
  34 |     "#Data formating library\n",
  35 |     "from dataloadinglibrary import loadCSV\n",
  36 |     "\n",
  37 |     "from datainspectionlibrary import getStatisticsOfData\n",
  38 |     "\n",
  39 |     "from dataformatinglibrary import createExcelFromArray\n",
  40 |     "\n",
  41 |     "from defineInputs import getLabelName\n",
  42 |     "from defineInputs import getPathToTrainingAndTestingDataSets\n",
  43 |     "from defineInputs import modelPerformanceReport\n",
  44 |     "from defineInputs import defineArrayForPreProcessing\n",
  45 |     "from defineInputs import getPathToGenerateModels\n",
  46 |     "\n",
  47 |     "from util import performPreprocessing"
  48 |    ]
  49 |   },
  50 |   {
  51 |    "cell_type": "markdown",
  52 |    "metadata": {},
  53 |    "source": [
  54 |     "### Load the training dataset and check the statistics"
  55 |    ]
  56 |   },
  57 |   {
  58 |    "cell_type": "code",
  59 |    "execution_count": 2,
  60 |    "metadata": {
  61 |     "scrolled": true
  62 |    },
  63 |    "outputs": [
  64 |     {
  65 |      "name": "stdout",
  66 |      "output_type": "stream",
  67 |      "text": [
  68 |       "***** Start checking the statistics of the dataSet *****\n",
  69 |       "\n",
  70 |       "***** Shape (number of rows and columns) in the dataset:  (25191, 42)\n",
  71 |       "***** Total number of features in the dataset:  41\n",
  72 |       "***** Number of categorical features in the dataset:  3\n",
  73 |       "***** Number of numerical features in the dataset:  38\n",
  74 |       "\n",
  75 |       "***** Names of categorical features in dataset *****\n",
  76 |       "\n",
  77 |       "| Categorical features in dataset   |\n",
  78 |       "|-----------------------------------|\n",
  79 |       "| Protocol_type                     |\n",
  80 |       "| Service                           |\n",
  81 |       "| Flag                              |\n",
  82 |       "\n",
  83 |       "\n",
  84 |       "***** Names of numerical features in dataset *****\n",
  85 |       "\n",
  86 |       "| Numerical features in the dataset   |\n",
  87 |       "|-------------------------------------|\n",
  88 |       "| Duration                            |\n",
  89 |       "| Src_bytes                           |\n",
  90 |       "| Dst_bytes                           |\n",
  91 |       "| Land                                |\n",
  92 |       "| Wrong_fragment                      |\n",
  93 |       "| Urgent                              |\n",
  94 |       "| Hot                                 |\n",
  95 |       "| Num_failed_logins                   |\n",
  96 |       "| Logged_in                           |\n",
  97 |       "| Num_compromised                     |\n",
  98 |       "| Root_shell                          |\n",
  99 |       "| Su_attempted                        |\n",
 100 |       "| Num_root                            |\n",
 101 |       "| Num_file_creations                  |\n",
 102 |       "| Num_shells                          |\n",
 103 |       "| Num_access_files                    |\n",
 104 |       "| Num_outbound_cmds                   |\n",
 105 |       "| Is_hot_login                        |\n",
 106 |       "| Is_guest_login                      |\n",
 107 |       "| Count                               |\n",
 108 |       "| Srv_count                           |\n",
 109 |       "| Serror_rate                         |\n",
 110 |       "| Srv_serror_rate                     |\n",
 111 |       "| Rerror_rate                         |\n",
 112 |       "| Srv_rerror_rate                     |\n",
 113 |       "| Same_srv_rate                       |\n",
 114 |       "| Diff_srv_rate                       |\n",
 115 |       "| Srv_diff_host_rate                  |\n",
 116 |       "| Dst_host_count                      |\n",
 117 |       "| Dst_host_srv_count                  |\n",
 118 |       "| Dst_host_same_srv_rate              |\n",
 119 |       "| Dst_host_diff_srv_rate              |\n",
 120 |       "| Dst_host_same_src_port_rate         |\n",
 121 |       "| Dst_host_srv_diff_host_rate         |\n",
 122 |       "| Dst_host_serror_rate                |\n",
 123 |       "| Dst_host_srv_serror_rate            |\n",
 124 |       "| Dst_host_rerror_rate                |\n",
 125 |       "| Dst_host_srv_rerror_rate            |\n",
 126 |       "\n",
 127 |       "\n",
 128 |       "***** Are there any missing values in the data set:  False\n",
 129 |       "Total number of records in the dataset: 25191\n",
 130 |       "Unique records in the dataset: 25191\n",
 131 |       "\n",
 132 |       "***** Are there any duplicate records in the data set:  False\n",
 133 |       "\n",
 134 |       "****** Number of different values for label that are present in the dataset:  22\n",
 135 |       "\n",
 136 |       "****** Here is the list of unique label types present in the dataset ***** \n",
 137 |       "\n",
 138 |       "| Unique label types in the dataset   |\n",
 139 |       "|-------------------------------------|\n",
 140 |       "| normal                              |\n",
 141 |       "| neptune                             |\n",
 142 |       "| warezclient                         |\n",
 143 |       "| ipsweep                             |\n",
 144 |       "| portsweep                           |\n",
 145 |       "| teardrop                            |\n",
 146 |       "| nmap                                |\n",
 147 |       "| satan                               |\n",
 148 |       "| smurf                               |\n",
 149 |       "| pod                                 |\n",
 150 |       "| back                                |\n",
 151 |       "| guess_passwd                        |\n",
 152 |       "| ftp_write                           |\n",
 153 |       "| multihop                            |\n",
 154 |       "| rootkit                             |\n",
 155 |       "| buffer_overflow                     |\n",
 156 |       "| imap                                |\n",
 157 |       "| warezmaster                         |\n",
 158 |       "| phf                                 |\n",
 159 |       "| land                                |\n",
 160 |       "| loadmodule                          |\n",
 161 |       "| spy                                 |\n",
 162 |       "\n",
 163 |       "\n",
 164 |       "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
 165 |       "\n",
 166 |       "\n",
 167 |       "attack_type: 22 \n",
 168 |       "| distinct values   |\n",
 169 |       "|-------------------|\n",
 170 |       "| normal            |\n",
 171 |       "| neptune           |\n",
 172 |       "| warezclient       |\n",
 173 |       "| ipsweep           |\n",
 174 |       "| portsweep         |\n",
 175 |       "| teardrop          |\n",
 176 |       "| nmap              |\n",
 177 |       "| satan             |\n",
 178 |       "| smurf             |\n",
 179 |       "| pod               |\n",
 180 |       "| back              |\n",
 181 |       "| guess_passwd      |\n",
 182 |       "| ftp_write         |\n",
 183 |       "| multihop          |\n",
 184 |       "| rootkit           |\n",
 185 |       "| buffer_overflow   |\n",
 186 |       "| imap              |\n",
 187 |       "| warezmaster       |\n",
 188 |       "| phf               |\n",
 189 |       "| land              |\n",
 190 |       "| loadmodule        |\n",
 191 |       "| spy               |\n",
 192 |       "\n",
 193 |       "\n",
 194 |       "Protocol_type: 3 \n",
 195 |       "| distinct values   |\n",
 196 |       "|-------------------|\n",
 197 |       "| udp               |\n",
 198 |       "| tcp               |\n",
 199 |       "| icmp              |\n",
 200 |       "\n",
 201 |       "\n",
 202 |       "Service: 66 \n",
 203 |       "| distinct values   |\n",
 204 |       "|-------------------|\n",
 205 |       "| other             |\n",
 206 |       "| private           |\n",
 207 |       "| http              |\n",
 208 |       "| remote_job        |\n",
 209 |       "| ftp_data          |\n",
 210 |       "| name              |\n",
 211 |       "| netbios_ns        |\n",
 212 |       "| eco_i             |\n",
 213 |       "| mtp               |\n",
 214 |       "| telnet            |\n",
 215 |       "| finger            |\n",
 216 |       "| domain_u          |\n",
 217 |       "| supdup            |\n",
 218 |       "| uucp_path         |\n",
 219 |       "| Z39_50            |\n",
 220 |       "| smtp              |\n",
 221 |       "| csnet_ns          |\n",
 222 |       "| uucp              |\n",
 223 |       "| netbios_dgm       |\n",
 224 |       "| urp_i             |\n",
 225 |       "| auth              |\n",
 226 |       "| domain            |\n",
 227 |       "| ftp               |\n",
 228 |       "| bgp               |\n",
 229 |       "| ldap              |\n",
 230 |       "| ecr_i             |\n",
 231 |       "| gopher            |\n",
 232 |       "| vmnet             |\n",
 233 |       "| systat            |\n",
 234 |       "| http_443          |\n",
 235 |       "| efs               |\n",
 236 |       "| whois             |\n",
 237 |       "| imap4             |\n",
 238 |       "| iso_tsap          |\n",
 239 |       "| echo              |\n",
 240 |       "| klogin            |\n",
 241 |       "| link              |\n",
 242 |       "| sunrpc            |\n",
 243 |       "| login             |\n",
 244 |       "| kshell            |\n",
 245 |       "| sql_net           |\n",
 246 |       "| time              |\n",
 247 |       "| hostnames         |\n",
 248 |       "| exec              |\n",
 249 |       "| ntp_u             |\n",
 250 |       "| discard           |\n",
 251 |       "| nntp              |\n",
 252 |       "| courier           |\n",
 253 |       "| ctf               |\n",
 254 |       "| ssh               |\n",
 255 |       "| daytime           |\n",
 256 |       "| shell             |\n",
 257 |       "| netstat           |\n",
 258 |       "| pop_3             |\n",
 259 |       "| nnsp              |\n",
 260 |       "| IRC               |\n",
 261 |       "| pop_2             |\n",
 262 |       "| printer           |\n",
 263 |       "| tim_i             |\n",
 264 |       "| pm_dump           |\n",
 265 |       "| red_i             |\n",
 266 |       "| netbios_ssn       |\n",
 267 |       "| rje               |\n",
 268 |       "| X11               |\n",
 269 |       "| urh_i             |\n",
 270 |       "| http_8001         |\n",
 271 |       "\n",
 272 |       "\n",
 273 |       "Flag: 11 \n",
 274 |       "| distinct values   |\n",
 275 |       "|-------------------|\n",
 276 |       "| SF                |\n",
 277 |       "| S0                |\n",
 278 |       "| REJ               |\n",
 279 |       "| RSTR              |\n",
 280 |       "| SH                |\n",
 281 |       "| RSTO              |\n",
 282 |       "| S1                |\n",
 283 |       "| RSTOS0            |\n",
 284 |       "| S3                |\n",
 285 |       "| S2                |\n",
 286 |       "| OTH               |\n",
 287 |       "\n",
 288 |       "\n",
 289 |       "****** Label distribution in the dataset *****\n",
 290 |       "\n",
 291 |       "normal             13448\n",
 292 |       "neptune             8282\n",
 293 |       "ipsweep              710\n",
 294 |       "satan                691\n",
 295 |       "portsweep            587\n",
 296 |       "smurf                529\n",
 297 |       "nmap                 301\n",
 298 |       "back                 196\n",
 299 |       "teardrop             188\n",
 300 |       "warezclient          181\n",
 301 |       "pod                   38\n",
 302 |       "guess_passwd          10\n",
 303 |       "warezmaster            7\n",
 304 |       "buffer_overflow        6\n",
 305 |       "imap                   5\n",
 306 |       "rootkit                4\n",
 307 |       "multihop               2\n",
 308 |       "phf                    2\n",
 309 |       "loadmodule             1\n",
 310 |       "ftp_write              1\n",
 311 |       "land                   1\n",
 312 |       "spy                    1\n",
 313 |       "Name: attack_type, dtype: int64\n",
 314 |       "\n",
 315 |       "\n",
 316 |       "***** End checking the statistics of the dataSet *****\n",
 317 |       "\n",
 318 |       "***** Here is how to training dataset looks like before performing any pre-processing *****\n"
 319 |      ]
 320 |     },
 321 |     {
 322 |      "data": {
 323 |       "text/html": [
 324 |        "<div>\n",
 325 |        "<style scoped>\n",
 326 |        "    .dataframe tbody tr th:only-of-type {\n",
 327 |        "        vertical-align: middle;\n",
 328 |        "    }\n",
 329 |        "\n",
 330 |        "    .dataframe tbody tr th {\n",
 331 |        "        vertical-align: top;\n",
 332 |        "    }\n",
 333 |        "\n",
 334 |        "    .dataframe thead th {\n",
 335 |        "        text-align: right;\n",
 336 |        "    }\n",
 337 |        "</style>\n",
 338 |        "<table border=\"1\" class=\"dataframe\">\n",
 339 |        "  <thead>\n",
 340 |        "    <tr style=\"text-align: right;\">\n",
 341 |        "      <th></th>\n",
 342 |        "      <th>Duration</th>\n",
 343 |        "      <th>Protocol_type</th>\n",
 344 |        "      <th>Service</th>\n",
 345 |        "      <th>Flag</th>\n",
 346 |        "      <th>Src_bytes</th>\n",
 347 |        "      <th>Dst_bytes</th>\n",
 348 |        "      <th>Land</th>\n",
 349 |        "      <th>Wrong_fragment</th>\n",
 350 |        "      <th>Urgent</th>\n",
 351 |        "      <th>Hot</th>\n",
 352 |        "      <th>...</th>\n",
 353 |        "      <th>Dst_host_srv_count</th>\n",
 354 |        "      <th>Dst_host_same_srv_rate</th>\n",
 355 |        "      <th>Dst_host_diff_srv_rate</th>\n",
 356 |        "      <th>Dst_host_same_src_port_rate</th>\n",
 357 |        "      <th>Dst_host_srv_diff_host_rate</th>\n",
 358 |        "      <th>Dst_host_serror_rate</th>\n",
 359 |        "      <th>Dst_host_srv_serror_rate</th>\n",
 360 |        "      <th>Dst_host_rerror_rate</th>\n",
 361 |        "      <th>Dst_host_srv_rerror_rate</th>\n",
 362 |        "      <th>attack_type</th>\n",
 363 |        "    </tr>\n",
 364 |        "  </thead>\n",
 365 |        "  <tbody>\n",
 366 |        "    <tr>\n",
 367 |        "      <th>0</th>\n",
 368 |        "      <td>0</td>\n",
 369 |        "      <td>udp</td>\n",
 370 |        "      <td>other</td>\n",
 371 |        "      <td>SF</td>\n",
 372 |        "      <td>146</td>\n",
 373 |        "      <td>0</td>\n",
 374 |        "      <td>0</td>\n",
 375 |        "      <td>0</td>\n",
 376 |        "      <td>0</td>\n",
 377 |        "      <td>0</td>\n",
 378 |        "      <td>...</td>\n",
 379 |        "      <td>1</td>\n",
 380 |        "      <td>0.00</td>\n",
 381 |        "      <td>0.60</td>\n",
 382 |        "      <td>0.88</td>\n",
 383 |        "      <td>0.00</td>\n",
 384 |        "      <td>0.00</td>\n",
 385 |        "      <td>0.00</td>\n",
 386 |        "      <td>0.0</td>\n",
 387 |        "      <td>0.00</td>\n",
 388 |        "      <td>normal</td>\n",
 389 |        "    </tr>\n",
 390 |        "    <tr>\n",
 391 |        "      <th>1</th>\n",
 392 |        "      <td>0</td>\n",
 393 |        "      <td>tcp</td>\n",
 394 |        "      <td>private</td>\n",
 395 |        "      <td>S0</td>\n",
 396 |        "      <td>0</td>\n",
 397 |        "      <td>0</td>\n",
 398 |        "      <td>0</td>\n",
 399 |        "      <td>0</td>\n",
 400 |        "      <td>0</td>\n",
 401 |        "      <td>0</td>\n",
 402 |        "      <td>...</td>\n",
 403 |        "      <td>26</td>\n",
 404 |        "      <td>0.10</td>\n",
 405 |        "      <td>0.05</td>\n",
 406 |        "      <td>0.00</td>\n",
 407 |        "      <td>0.00</td>\n",
 408 |        "      <td>1.00</td>\n",
 409 |        "      <td>1.00</td>\n",
 410 |        "      <td>0.0</td>\n",
 411 |        "      <td>0.00</td>\n",
 412 |        "      <td>neptune</td>\n",
 413 |        "    </tr>\n",
 414 |        "    <tr>\n",
 415 |        "      <th>2</th>\n",
 416 |        "      <td>0</td>\n",
 417 |        "      <td>tcp</td>\n",
 418 |        "      <td>http</td>\n",
 419 |        "      <td>SF</td>\n",
 420 |        "      <td>232</td>\n",
 421 |        "      <td>8153</td>\n",
 422 |        "      <td>0</td>\n",
 423 |        "      <td>0</td>\n",
 424 |        "      <td>0</td>\n",
 425 |        "      <td>0</td>\n",
 426 |        "      <td>...</td>\n",
 427 |        "      <td>255</td>\n",
 428 |        "      <td>1.00</td>\n",
 429 |        "      <td>0.00</td>\n",
 430 |        "      <td>0.03</td>\n",
 431 |        "      <td>0.04</td>\n",
 432 |        "      <td>0.03</td>\n",
 433 |        "      <td>0.01</td>\n",
 434 |        "      <td>0.0</td>\n",
 435 |        "      <td>0.01</td>\n",
 436 |        "      <td>normal</td>\n",
 437 |        "    </tr>\n",
 438 |        "    <tr>\n",
 439 |        "      <th>3</th>\n",
 440 |        "      <td>0</td>\n",
 441 |        "      <td>tcp</td>\n",
 442 |        "      <td>http</td>\n",
 443 |        "      <td>SF</td>\n",
 444 |        "      <td>199</td>\n",
 445 |        "      <td>420</td>\n",
 446 |        "      <td>0</td>\n",
 447 |        "      <td>0</td>\n",
 448 |        "      <td>0</td>\n",
 449 |        "      <td>0</td>\n",
 450 |        "      <td>...</td>\n",
 451 |        "      <td>255</td>\n",
 452 |        "      <td>1.00</td>\n",
 453 |        "      <td>0.00</td>\n",
 454 |        "      <td>0.00</td>\n",
 455 |        "      <td>0.00</td>\n",
 456 |        "      <td>0.00</td>\n",
 457 |        "      <td>0.00</td>\n",
 458 |        "      <td>0.0</td>\n",
 459 |        "      <td>0.00</td>\n",
 460 |        "      <td>normal</td>\n",
 461 |        "    </tr>\n",
 462 |        "    <tr>\n",
 463 |        "      <th>4</th>\n",
 464 |        "      <td>0</td>\n",
 465 |        "      <td>tcp</td>\n",
 466 |        "      <td>private</td>\n",
 467 |        "      <td>REJ</td>\n",
 468 |        "      <td>0</td>\n",
 469 |        "      <td>0</td>\n",
 470 |        "      <td>0</td>\n",
 471 |        "      <td>0</td>\n",
 472 |        "      <td>0</td>\n",
 473 |        "      <td>0</td>\n",
 474 |        "      <td>...</td>\n",
 475 |        "      <td>19</td>\n",
 476 |        "      <td>0.07</td>\n",
 477 |        "      <td>0.07</td>\n",
 478 |        "      <td>0.00</td>\n",
 479 |        "      <td>0.00</td>\n",
 480 |        "      <td>0.00</td>\n",
 481 |        "      <td>0.00</td>\n",
 482 |        "      <td>1.0</td>\n",
 483 |        "      <td>1.00</td>\n",
 484 |        "      <td>neptune</td>\n",
 485 |        "    </tr>\n",
 486 |        "  </tbody>\n",
 487 |        "</table>\n",
 488 |        "<p>5 rows × 42 columns</p>\n",
 489 |        "</div>"
 490 |       ],
 491 |       "text/plain": [
 492 |        "   Duration Protocol_type  Service Flag  Src_bytes  Dst_bytes  Land  \\\n",
 493 |        "0         0           udp    other   SF        146          0     0   \n",
 494 |        "1         0           tcp  private   S0          0          0     0   \n",
 495 |        "2         0           tcp     http   SF        232       8153     0   \n",
 496 |        "3         0           tcp     http   SF        199        420     0   \n",
 497 |        "4         0           tcp  private  REJ          0          0     0   \n",
 498 |        "\n",
 499 |        "   Wrong_fragment  Urgent  Hot  ...  Dst_host_srv_count  \\\n",
 500 |        "0               0       0    0  ...                   1   \n",
 501 |        "1               0       0    0  ...                  26   \n",
 502 |        "2               0       0    0  ...                 255   \n",
 503 |        "3               0       0    0  ...                 255   \n",
 504 |        "4               0       0    0  ...                  19   \n",
 505 |        "\n",
 506 |        "   Dst_host_same_srv_rate  Dst_host_diff_srv_rate  \\\n",
 507 |        "0                    0.00                    0.60   \n",
 508 |        "1                    0.10                    0.05   \n",
 509 |        "2                    1.00                    0.00   \n",
 510 |        "3                    1.00                    0.00   \n",
 511 |        "4                    0.07                    0.07   \n",
 512 |        "\n",
 513 |        "   Dst_host_same_src_port_rate  Dst_host_srv_diff_host_rate  \\\n",
 514 |        "0                         0.88                         0.00   \n",
 515 |        "1                         0.00                         0.00   \n",
 516 |        "2                         0.03                         0.04   \n",
 517 |        "3                         0.00                         0.00   \n",
 518 |        "4                         0.00                         0.00   \n",
 519 |        "\n",
 520 |        "   Dst_host_serror_rate  Dst_host_srv_serror_rate  Dst_host_rerror_rate  \\\n",
 521 |        "0                  0.00                      0.00                   0.0   \n",
 522 |        "1                  1.00                      1.00                   0.0   \n",
 523 |        "2                  0.03                      0.01                   0.0   \n",
 524 |        "3                  0.00                      0.00                   0.0   \n",
 525 |        "4                  0.00                      0.00                   1.0   \n",
 526 |        "\n",
 527 |        "   Dst_host_srv_rerror_rate  attack_type  \n",
 528 |        "0                      0.00       normal  \n",
 529 |        "1                      0.00      neptune  \n",
 530 |        "2                      0.01       normal  \n",
 531 |        "3                      0.00       normal  \n",
 532 |        "4                      1.00      neptune  \n",
 533 |        "\n",
 534 |        "[5 rows x 42 columns]"
 535 |       ]
 536 |      },
 537 |      "execution_count": 2,
 538 |      "metadata": {},
 539 |      "output_type": "execute_result"
 540 |     }
 541 |    ],
 542 |    "source": [
 543 |     "#Define file names and call loadCSV to load the CSV files\n",
 544 |     "trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()\n",
 545 |     "trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)\n",
 546 |     "difficultyLevel = trainingDataSet.pop('difficulty_level')\n",
 547 |     "labelName = getLabelName()\n",
 548 |     "label = trainingDataSet[labelName]\n",
 549 |     "\n",
 550 |     "#Look at the statistics of the dataSet\n",
 551 |     "getStatisticsOfData(trainingDataSet)\n",
 552 |     "print(\"\\n***** Here is how to training dataset looks like before performing any pre-processing *****\")\n",
 553 |     "trainingDataSet.head()"
 554 |    ]
 555 |   },
 556 |   {
 557 |    "cell_type": "markdown",
 558 |    "metadata": {},
 559 |    "source": [
 560 |     "### Load the testing dataset and check the statistics"
 561 |    ]
 562 |   },
 563 |   {
 564 |    "cell_type": "code",
 565 |    "execution_count": 3,
 566 |    "metadata": {},
 567 |    "outputs": [
 568 |     {
 569 |      "name": "stdout",
 570 |      "output_type": "stream",
 571 |      "text": [
 572 |       "***** Start checking the statistics of the dataSet *****\n",
 573 |       "\n",
 574 |       "***** Shape (number of rows and columns) in the dataset:  (11850, 42)\n",
 575 |       "***** Total number of features in the dataset:  41\n",
 576 |       "***** Number of categorical features in the dataset:  3\n",
 577 |       "***** Number of numerical features in the dataset:  38\n",
 578 |       "\n",
 579 |       "***** Names of categorical features in dataset *****\n",
 580 |       "\n",
 581 |       "| Categorical features in dataset   |\n",
 582 |       "|-----------------------------------|\n",
 583 |       "| Protocol_type                     |\n",
 584 |       "| Service                           |\n",
 585 |       "| Flag                              |\n",
 586 |       "\n",
 587 |       "\n",
 588 |       "***** Names of numerical features in dataset *****\n",
 589 |       "\n",
 590 |       "| Numerical features in the dataset   |\n",
 591 |       "|-------------------------------------|\n",
 592 |       "| Duration                            |\n",
 593 |       "| Src_bytes                           |\n",
 594 |       "| Dst_bytes                           |\n",
 595 |       "| Land                                |\n",
 596 |       "| Wrong_fragment                      |\n",
 597 |       "| Urgent                              |\n",
 598 |       "| Hot                                 |\n",
 599 |       "| Num_failed_logins                   |\n",
 600 |       "| Logged_in                           |\n",
 601 |       "| Num_compromised                     |\n",
 602 |       "| Root_shell                          |\n",
 603 |       "| Su_attempted                        |\n",
 604 |       "| Num_root                            |\n",
 605 |       "| Num_file_creations                  |\n",
 606 |       "| Num_shells                          |\n",
 607 |       "| Num_access_files                    |\n",
 608 |       "| Num_outbound_cmds                   |\n",
 609 |       "| Is_hot_login                        |\n",
 610 |       "| Is_guest_login                      |\n",
 611 |       "| Count                               |\n",
 612 |       "| Srv_count                           |\n",
 613 |       "| Serror_rate                         |\n",
 614 |       "| Srv_serror_rate                     |\n",
 615 |       "| Rerror_rate                         |\n",
 616 |       "| Srv_rerror_rate                     |\n",
 617 |       "| Same_srv_rate                       |\n",
 618 |       "| Diff_srv_rate                       |\n",
 619 |       "| Srv_diff_host_rate                  |\n",
 620 |       "| Dst_host_count                      |\n",
 621 |       "| Dst_host_srv_count                  |\n",
 622 |       "| Dst_host_same_srv_rate              |\n",
 623 |       "| Dst_host_diff_srv_rate              |\n",
 624 |       "| Dst_host_same_src_port_rate         |\n",
 625 |       "| Dst_host_srv_diff_host_rate         |\n",
 626 |       "| Dst_host_serror_rate                |\n",
 627 |       "| Dst_host_srv_serror_rate            |\n",
 628 |       "| Dst_host_rerror_rate                |\n",
 629 |       "| Dst_host_srv_rerror_rate            |\n",
 630 |       "\n",
 631 |       "\n",
 632 |       "***** Are there any missing values in the data set:  False\n",
 633 |       "Total number of records in the dataset: 11850\n",
 634 |       "Unique records in the dataset: 11850\n",
 635 |       "\n",
 636 |       "***** Are there any duplicate records in the data set:  False\n",
 637 |       "\n",
 638 |       "****** Number of different values for label that are present in the dataset:  38\n",
 639 |       "\n",
 640 |       "****** Here is the list of unique label types present in the dataset ***** \n",
 641 |       "\n",
 642 |       "| Unique label types in the dataset   |\n",
 643 |       "|-------------------------------------|\n",
 644 |       "| guess_passwd                        |\n",
 645 |       "| snmpguess                           |\n",
 646 |       "| processtable                        |\n",
 647 |       "| normal                              |\n",
 648 |       "| nmap                                |\n",
 649 |       "| back                                |\n",
 650 |       "| neptune                             |\n",
 651 |       "| satan                               |\n",
 652 |       "| saint                               |\n",
 653 |       "| mscan                               |\n",
 654 |       "| apache2                             |\n",
 655 |       "| httptunnel                          |\n",
 656 |       "| warezmaster                         |\n",
 657 |       "| ipsweep                             |\n",
 658 |       "| smurf                               |\n",
 659 |       "| mailbomb                            |\n",
 660 |       "| teardrop                            |\n",
 661 |       "| portsweep                           |\n",
 662 |       "| snmpgetattack                       |\n",
 663 |       "| multihop                            |\n",
 664 |       "| worm                                |\n",
 665 |       "| land                                |\n",
 666 |       "| sendmail                            |\n",
 667 |       "| buffer_overflow                     |\n",
 668 |       "| pod                                 |\n",
 669 |       "| rootkit                             |\n",
 670 |       "| xlock                               |\n",
 671 |       "| xterm                               |\n",
 672 |       "| xsnoop                              |\n",
 673 |       "| ps                                  |\n",
 674 |       "| named                               |\n",
 675 |       "| ftp_write                           |\n",
 676 |       "| loadmodule                          |\n",
 677 |       "| phf                                 |\n",
 678 |       "| udpstorm                            |\n",
 679 |       "| perl                                |\n",
 680 |       "| sqlattack                           |\n",
 681 |       "| imap                                |\n",
 682 |       "\n",
 683 |       "\n",
 684 |       "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
 685 |       "\n",
 686 |       "\n",
 687 |       "attack_type: 38 \n",
 688 |       "| distinct values   |\n",
 689 |       "|-------------------|\n",
 690 |       "| guess_passwd      |\n",
 691 |       "| snmpguess         |\n",
 692 |       "| processtable      |\n",
 693 |       "| normal            |\n",
 694 |       "| nmap              |\n",
 695 |       "| back              |\n",
 696 |       "| neptune           |\n",
 697 |       "| satan             |\n",
 698 |       "| saint             |\n",
 699 |       "| mscan             |\n",
 700 |       "| apache2           |\n",
 701 |       "| httptunnel        |\n",
 702 |       "| warezmaster       |\n",
 703 |       "| ipsweep           |\n",
 704 |       "| smurf             |\n",
 705 |       "| mailbomb          |\n",
 706 |       "| teardrop          |\n",
 707 |       "| portsweep         |\n",
 708 |       "| snmpgetattack     |\n",
 709 |       "| multihop          |\n",
 710 |       "| worm              |\n",
 711 |       "| land              |\n",
 712 |       "| sendmail          |\n",
 713 |       "| buffer_overflow   |\n",
 714 |       "| pod               |\n",
 715 |       "| rootkit           |\n",
 716 |       "| xlock             |\n",
 717 |       "| xterm             |\n",
 718 |       "| xsnoop            |\n",
 719 |       "| ps                |\n",
 720 |       "| named             |\n",
 721 |       "| ftp_write         |\n",
 722 |       "| loadmodule        |\n",
 723 |       "| phf               |\n",
 724 |       "| udpstorm          |\n",
 725 |       "| perl              |\n",
 726 |       "| sqlattack         |\n",
 727 |       "| imap              |\n",
 728 |       "\n",
 729 |       "\n",
 730 |       "Protocol_type: 3 \n",
 731 |       "| distinct values   |\n",
 732 |       "|-------------------|\n",
 733 |       "| tcp               |\n",
 734 |       "| udp               |\n",
 735 |       "| icmp              |\n",
 736 |       "\n",
 737 |       "\n",
 738 |       "Service: 62 \n",
 739 |       "| distinct values   |\n",
 740 |       "|-------------------|\n",
 741 |       "| telnet            |\n",
 742 |       "| private           |\n",
 743 |       "| http              |\n",
 744 |       "| imap4             |\n",
 745 |       "| ftp_data          |\n",
 746 |       "| other             |\n",
 747 |       "| ctf               |\n",
 748 |       "| pop_3             |\n",
 749 |       "| ftp               |\n",
 750 |       "| domain_u          |\n",
 751 |       "| domain            |\n",
 752 |       "| eco_i             |\n",
 753 |       "| ecr_i             |\n",
 754 |       "| finger            |\n",
 755 |       "| name              |\n",
 756 |       "| smtp              |\n",
 757 |       "| vmnet             |\n",
 758 |       "| mtp               |\n",
 759 |       "| bgp               |\n",
 760 |       "| exec              |\n",
 761 |       "| sunrpc            |\n",
 762 |       "| uucp_path         |\n",
 763 |       "| iso_tsap          |\n",
 764 |       "| echo              |\n",
 765 |       "| auth              |\n",
 766 |       "| hostnames         |\n",
 767 |       "| courier           |\n",
 768 |       "| uucp              |\n",
 769 |       "| daytime           |\n",
 770 |       "| nntp              |\n",
 771 |       "| netstat           |\n",
 772 |       "| urp_i             |\n",
 773 |       "| http_443          |\n",
 774 |       "| csnet_ns          |\n",
 775 |       "| login             |\n",
 776 |       "| klogin            |\n",
 777 |       "| whois             |\n",
 778 |       "| time              |\n",
 779 |       "| link              |\n",
 780 |       "| discard           |\n",
 781 |       "| gopher            |\n",
 782 |       "| supdup            |\n",
 783 |       "| netbios_ns        |\n",
 784 |       "| systat            |\n",
 785 |       "| netbios_dgm       |\n",
 786 |       "| kshell            |\n",
 787 |       "| efs               |\n",
 788 |       "| nnsp              |\n",
 789 |       "| ssh               |\n",
 790 |       "| netbios_ssn       |\n",
 791 |       "| Z39_50            |\n",
 792 |       "| IRC               |\n",
 793 |       "| ntp_u             |\n",
 794 |       "| X11               |\n",
 795 |       "| pm_dump           |\n",
 796 |       "| ldap              |\n",
 797 |       "| remote_job        |\n",
 798 |       "| sql_net           |\n",
 799 |       "| shell             |\n",
 800 |       "| tim_i             |\n",
 801 |       "| pop_2             |\n",
 802 |       "| tftp_u            |\n",
 803 |       "\n",
 804 |       "\n",
 805 |       "Flag: 11 \n",
 806 |       "| distinct values   |\n",
 807 |       "|-------------------|\n",
 808 |       "| SF                |\n",
 809 |       "| S3                |\n",
 810 |       "| SH                |\n",
 811 |       "| REJ               |\n",
 812 |       "| S0                |\n",
 813 |       "| RSTO              |\n",
 814 |       "| RSTR              |\n",
 815 |       "| RSTOS0            |\n",
 816 |       "| S1                |\n",
 817 |       "| S2                |\n",
 818 |       "| OTH               |\n",
 819 |       "\n",
 820 |       "\n",
 821 |       "****** Label distribution in the dataset *****\n",
 822 |       "\n",
 823 |       "normal             2152\n",
 824 |       "neptune            1579\n",
 825 |       "guess_passwd       1231\n",
 826 |       "mscan               996\n",
 827 |       "warezmaster         944\n",
 828 |       "apache2             737\n",
 829 |       "satan               727\n",
 830 |       "processtable        685\n",
 831 |       "smurf               627\n",
 832 |       "back                359\n",
 833 |       "snmpguess           331\n",
 834 |       "saint               309\n",
 835 |       "mailbomb            293\n",
 836 |       "snmpgetattack       178\n",
 837 |       "portsweep           156\n",
 838 |       "ipsweep             141\n",
 839 |       "httptunnel          133\n",
 840 |       "nmap                 73\n",
 841 |       "pod                  41\n",
 842 |       "buffer_overflow      20\n",
 843 |       "multihop             18\n",
 844 |       "named                17\n",
 845 |       "ps                   15\n",
 846 |       "sendmail             14\n",
 847 |       "rootkit              13\n",
 848 |       "xterm                13\n",
 849 |       "teardrop             12\n",
 850 |       "xlock                 9\n",
 851 |       "land                  7\n",
 852 |       "xsnoop                4\n",
 853 |       "ftp_write             3\n",
 854 |       "worm                  2\n",
 855 |       "perl                  2\n",
 856 |       "phf                   2\n",
 857 |       "loadmodule            2\n",
 858 |       "sqlattack             2\n",
 859 |       "udpstorm              2\n",
 860 |       "imap                  1\n",
 861 |       "Name: attack_type, dtype: int64\n",
 862 |       "\n",
 863 |       "\n",
 864 |       "***** End checking the statistics of the dataSet *****\n",
 865 |       "\n",
 866 |       "***** Here is how to testing dataset looks like before performing any pre-processing *****\n"
 867 |      ]
 868 |     },
 869 |     {
 870 |      "data": {
 871 |       "text/html": [
 872 |        "<div>\n",
 873 |        "<style scoped>\n",
 874 |        "    .dataframe tbody tr th:only-of-type {\n",
 875 |        "        vertical-align: middle;\n",
 876 |        "    }\n",
 877 |        "\n",
 878 |        "    .dataframe tbody tr th {\n",
 879 |        "        vertical-align: top;\n",
 880 |        "    }\n",
 881 |        "\n",
 882 |        "    .dataframe thead th {\n",
 883 |        "        text-align: right;\n",
 884 |        "    }\n",
 885 |        "</style>\n",
 886 |        "<table border=\"1\" class=\"dataframe\">\n",
 887 |        "  <thead>\n",
 888 |        "    <tr style=\"text-align: right;\">\n",
 889 |        "      <th></th>\n",
 890 |        "      <th>Duration</th>\n",
 891 |        "      <th>Protocol_type</th>\n",
 892 |        "      <th>Service</th>\n",
 893 |        "      <th>Flag</th>\n",
 894 |        "      <th>Src_bytes</th>\n",
 895 |        "      <th>Dst_bytes</th>\n",
 896 |        "      <th>Land</th>\n",
 897 |        "      <th>Wrong_fragment</th>\n",
 898 |        "      <th>Urgent</th>\n",
 899 |        "      <th>Hot</th>\n",
 900 |        "      <th>...</th>\n",
 901 |        "      <th>Dst_host_srv_count</th>\n",
 902 |        "      <th>Dst_host_same_srv_rate</th>\n",
 903 |        "      <th>Dst_host_diff_srv_rate</th>\n",
 904 |        "      <th>Dst_host_same_src_port_rate</th>\n",
 905 |        "      <th>Dst_host_srv_diff_host_rate</th>\n",
 906 |        "      <th>Dst_host_serror_rate</th>\n",
 907 |        "      <th>Dst_host_srv_serror_rate</th>\n",
 908 |        "      <th>Dst_host_rerror_rate</th>\n",
 909 |        "      <th>Dst_host_srv_rerror_rate</th>\n",
 910 |        "      <th>attack_type</th>\n",
 911 |        "    </tr>\n",
 912 |        "  </thead>\n",
 913 |        "  <tbody>\n",
 914 |        "    <tr>\n",
 915 |        "      <th>0</th>\n",
 916 |        "      <td>13</td>\n",
 917 |        "      <td>tcp</td>\n",
 918 |        "      <td>telnet</td>\n",
 919 |        "      <td>SF</td>\n",
 920 |        "      <td>118</td>\n",
 921 |        "      <td>2425</td>\n",
 922 |        "      <td>0</td>\n",
 923 |        "      <td>0</td>\n",
 924 |        "      <td>0</td>\n",
 925 |        "      <td>0</td>\n",
 926 |        "      <td>...</td>\n",
 927 |        "      <td>10</td>\n",
 928 |        "      <td>0.38</td>\n",
 929 |        "      <td>0.12</td>\n",
 930 |        "      <td>0.04</td>\n",
 931 |        "      <td>0.0</td>\n",
 932 |        "      <td>0.00</td>\n",
 933 |        "      <td>0.00</td>\n",
 934 |        "      <td>0.12</td>\n",
 935 |        "      <td>0.3</td>\n",
 936 |        "      <td>guess_passwd</td>\n",
 937 |        "    </tr>\n",
 938 |        "    <tr>\n",
 939 |        "      <th>1</th>\n",
 940 |        "      <td>0</td>\n",
 941 |        "      <td>udp</td>\n",
 942 |        "      <td>private</td>\n",
 943 |        "      <td>SF</td>\n",
 944 |        "      <td>44</td>\n",
 945 |        "      <td>0</td>\n",
 946 |        "      <td>0</td>\n",
 947 |        "      <td>0</td>\n",
 948 |        "      <td>0</td>\n",
 949 |        "      <td>0</td>\n",
 950 |        "      <td>...</td>\n",
 951 |        "      <td>254</td>\n",
 952 |        "      <td>1.00</td>\n",
 953 |        "      <td>0.01</td>\n",
 954 |        "      <td>0.01</td>\n",
 955 |        "      <td>0.0</td>\n",
 956 |        "      <td>0.00</td>\n",
 957 |        "      <td>0.00</td>\n",
 958 |        "      <td>0.00</td>\n",
 959 |        "      <td>0.0</td>\n",
 960 |        "      <td>snmpguess</td>\n",
 961 |        "    </tr>\n",
 962 |        "    <tr>\n",
 963 |        "      <th>2</th>\n",
 964 |        "      <td>0</td>\n",
 965 |        "      <td>tcp</td>\n",
 966 |        "      <td>telnet</td>\n",
 967 |        "      <td>S3</td>\n",
 968 |        "      <td>0</td>\n",
 969 |        "      <td>44</td>\n",
 970 |        "      <td>0</td>\n",
 971 |        "      <td>0</td>\n",
 972 |        "      <td>0</td>\n",
 973 |        "      <td>0</td>\n",
 974 |        "      <td>...</td>\n",
 975 |        "      <td>79</td>\n",
 976 |        "      <td>0.31</td>\n",
 977 |        "      <td>0.61</td>\n",
 978 |        "      <td>0.00</td>\n",
 979 |        "      <td>0.0</td>\n",
 980 |        "      <td>0.21</td>\n",
 981 |        "      <td>0.68</td>\n",
 982 |        "      <td>0.60</td>\n",
 983 |        "      <td>0.0</td>\n",
 984 |        "      <td>processtable</td>\n",
 985 |        "    </tr>\n",
 986 |        "    <tr>\n",
 987 |        "      <th>3</th>\n",
 988 |        "      <td>0</td>\n",
 989 |        "      <td>udp</td>\n",
 990 |        "      <td>private</td>\n",
 991 |        "      <td>SF</td>\n",
 992 |        "      <td>53</td>\n",
 993 |        "      <td>55</td>\n",
 994 |        "      <td>0</td>\n",
 995 |        "      <td>0</td>\n",
 996 |        "      <td>0</td>\n",
 997 |        "      <td>0</td>\n",
 998 |        "      <td>...</td>\n",
 999 |        "      <td>255</td>\n",
1000 |        "      <td>1.00</td>\n",
1001 |        "      <td>0.00</td>\n",
1002 |        "      <td>0.87</td>\n",
1003 |        "      <td>0.0</td>\n",
1004 |        "      <td>0.00</td>\n",
1005 |        "      <td>0.00</td>\n",
1006 |        "      <td>0.00</td>\n",
1007 |        "      <td>0.0</td>\n",
1008 |        "      <td>normal</td>\n",
1009 |        "    </tr>\n",
1010 |        "    <tr>\n",
1011 |        "      <th>4</th>\n",
1012 |        "      <td>0</td>\n",
1013 |        "      <td>tcp</td>\n",
1014 |        "      <td>private</td>\n",
1015 |        "      <td>SH</td>\n",
1016 |        "      <td>0</td>\n",
1017 |        "      <td>0</td>\n",
1018 |        "      <td>0</td>\n",
1019 |        "      <td>0</td>\n",
1020 |        "      <td>0</td>\n",
1021 |        "      <td>0</td>\n",
1022 |        "      <td>...</td>\n",
1023 |        "      <td>1</td>\n",
1024 |        "      <td>0.06</td>\n",
1025 |        "      <td>1.00</td>\n",
1026 |        "      <td>1.00</td>\n",
1027 |        "      <td>0.0</td>\n",
1028 |        "      <td>1.00</td>\n",
1029 |        "      <td>1.00</td>\n",
1030 |        "      <td>0.00</td>\n",
1031 |        "      <td>0.0</td>\n",
1032 |        "      <td>nmap</td>\n",
1033 |        "    </tr>\n",
1034 |        "  </tbody>\n",
1035 |        "</table>\n",
1036 |        "<p>5 rows × 42 columns</p>\n",
1037 |        "</div>"
1038 |       ],
1039 |       "text/plain": [
1040 |        "   Duration Protocol_type  Service Flag  Src_bytes  Dst_bytes  Land  \\\n",
1041 |        "0        13           tcp   telnet   SF        118       2425     0   \n",
1042 |        "1         0           udp  private   SF         44          0     0   \n",
1043 |        "2         0           tcp   telnet   S3          0         44     0   \n",
1044 |        "3         0           udp  private   SF         53         55     0   \n",
1045 |        "4         0           tcp  private   SH          0          0     0   \n",
1046 |        "\n",
1047 |        "   Wrong_fragment  Urgent  Hot  ...  Dst_host_srv_count  \\\n",
1048 |        "0               0       0    0  ...                  10   \n",
1049 |        "1               0       0    0  ...                 254   \n",
1050 |        "2               0       0    0  ...                  79   \n",
1051 |        "3               0       0    0  ...                 255   \n",
1052 |        "4               0       0    0  ...                   1   \n",
1053 |        "\n",
1054 |        "   Dst_host_same_srv_rate  Dst_host_diff_srv_rate  \\\n",
1055 |        "0                    0.38                    0.12   \n",
1056 |        "1                    1.00                    0.01   \n",
1057 |        "2                    0.31                    0.61   \n",
1058 |        "3                    1.00                    0.00   \n",
1059 |        "4                    0.06                    1.00   \n",
1060 |        "\n",
1061 |        "   Dst_host_same_src_port_rate  Dst_host_srv_diff_host_rate  \\\n",
1062 |        "0                         0.04                          0.0   \n",
1063 |        "1                         0.01                          0.0   \n",
1064 |        "2                         0.00                          0.0   \n",
1065 |        "3                         0.87                          0.0   \n",
1066 |        "4                         1.00                          0.0   \n",
1067 |        "\n",
1068 |        "   Dst_host_serror_rate  Dst_host_srv_serror_rate  Dst_host_rerror_rate  \\\n",
1069 |        "0                  0.00                      0.00                  0.12   \n",
1070 |        "1                  0.00                      0.00                  0.00   \n",
1071 |        "2                  0.21                      0.68                  0.60   \n",
1072 |        "3                  0.00                      0.00                  0.00   \n",
1073 |        "4                  1.00                      1.00                  0.00   \n",
1074 |        "\n",
1075 |        "   Dst_host_srv_rerror_rate   attack_type  \n",
1076 |        "0                       0.3  guess_passwd  \n",
1077 |        "1                       0.0     snmpguess  \n",
1078 |        "2                       0.0  processtable  \n",
1079 |        "3                       0.0        normal  \n",
1080 |        "4                       0.0          nmap  \n",
1081 |        "\n",
1082 |        "[5 rows x 42 columns]"
1083 |       ]
1084 |      },
1085 |      "execution_count": 3,
1086 |      "metadata": {},
1087 |      "output_type": "execute_result"
1088 |     }
1089 |    ],
1090 |    "source": [
1091 |     "#Define file names and call loadCSV to load the CSV files\n",
1092 |     "testingDataSet = loadCSV(testingFileNameWithAbsolutePath)\n",
1093 |     "difficultyLevel = testingDataSet.pop('difficulty_level')\n",
1094 |     "\n",
1095 |     "#Look at the statistics of the dataSet\n",
1096 |     "getStatisticsOfData(testingDataSet)\n",
1097 |     "print(\"\\n***** Here is how to testing dataset looks like before performing any pre-processing *****\")\n",
1098 |     "testingDataSet.head()"
1099 |    ]
1100 |   },
1101 |   {
1102 |    "cell_type": "markdown",
1103 |    "metadata": {},
1104 |    "source": [
1105 |     "### Perform pre-processing"
1106 |    ]
1107 |   },
1108 |   {
1109 |    "cell_type": "code",
1110 |    "execution_count": 4,
1111 |    "metadata": {
1112 |     "scrolled": true
1113 |    },
1114 |    "outputs": [
1115 |     {
1116 |      "name": "stdout",
1117 |      "output_type": "stream",
1118 |      "text": [
1119 |       "[['ExtraTreesClassifier', 'OneHotEncoder', 'Standardization']]\n",
1120 |       "***************************************************************************************************************************\n",
1121 |       "********************************************* Building Model- 0  As Below *************************************************\n",
1122 |       "\t -- Feature Selection: \t  ExtraTreesClassifier  \n",
1123 |       "\t -- Feature Encoding: \t  OneHotEncoder  \n",
1124 |       "\t -- Feature Scaling: \t  Standardization \n",
1125 |       "\n",
1126 |       "completeDataSet.shape:  (37041, 43)\n",
1127 |       "completeDataSet.head:     Duration Protocol_type  Service Flag  Src_bytes  Dst_bytes  Land  \\\n",
1128 |       "0         0           udp    other   SF        146          0     0   \n",
1129 |       "1         0           tcp  private   S0          0          0     0   \n",
1130 |       "2         0           tcp     http   SF        232       8153     0   \n",
1131 |       "3         0           tcp     http   SF        199        420     0   \n",
1132 |       "4         0           tcp  private  REJ          0          0     0   \n",
1133 |       "\n",
1134 |       "   Wrong_fragment  Urgent  Hot  ...  Dst_host_same_srv_rate  \\\n",
1135 |       "0               0       0    0  ...                    0.00   \n",
1136 |       "1               0       0    0  ...                    0.10   \n",
1137 |       "2               0       0    0  ...                    1.00   \n",
1138 |       "3               0       0    0  ...                    1.00   \n",
1139 |       "4               0       0    0  ...                    0.07   \n",
1140 |       "\n",
1141 |       "   Dst_host_diff_srv_rate  Dst_host_same_src_port_rate  \\\n",
1142 |       "0                    0.60                         0.88   \n",
1143 |       "1                    0.05                         0.00   \n",
1144 |       "2                    0.00                         0.03   \n",
1145 |       "3                    0.00                         0.00   \n",
1146 |       "4                    0.07                         0.00   \n",
1147 |       "\n",
1148 |       "   Dst_host_srv_diff_host_rate  Dst_host_serror_rate  \\\n",
1149 |       "0                         0.00                  0.00   \n",
1150 |       "1                         0.00                  1.00   \n",
1151 |       "2                         0.04                  0.03   \n",
1152 |       "3                         0.00                  0.00   \n",
1153 |       "4                         0.00                  0.00   \n",
1154 |       "\n",
1155 |       "   Dst_host_srv_serror_rate  Dst_host_rerror_rate  Dst_host_srv_rerror_rate  \\\n",
1156 |       "0                      0.00                   0.0                      0.00   \n",
1157 |       "1                      1.00                   0.0                      0.00   \n",
1158 |       "2                      0.01                   0.0                      0.01   \n",
1159 |       "3                      0.00                   0.0                      0.00   \n",
1160 |       "4                      0.00                   1.0                      1.00   \n",
1161 |       "\n",
1162 |       "   attack_type  difficulty_level  \n",
1163 |       "0       normal                15  \n",
1164 |       "1      neptune                19  \n",
1165 |       "2       normal                21  \n",
1166 |       "3       normal                21  \n",
1167 |       "4      neptune                21  \n",
1168 |       "\n",
1169 |       "[5 rows x 43 columns]\n",
1170 |       "\n",
1171 |       "****** Start performing feature selection using ExtraTreesClassifier *****\n",
1172 |       "****** Falls under wrapper methods (feature importance) *****\n",
1173 |       "****** Start label encoding on the categorical features in the given dataset *****\n",
1174 |       "****** Number of features before label encoding:  43\n",
1175 |       "****** Number of categorical features in the dataset:  3\n",
1176 |       "****** Categorical feature names in the dataset:  ['Protocol_type' 'Service' 'Flag']\n",
1177 |       "\n",
1178 |       "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
1179 |       "\n",
1180 |       "\n",
1181 |       "Protocol_type: 3 \n",
1182 |       "| distinct values   |\n",
1183 |       "|-------------------|\n",
1184 |       "| udp               |\n",
1185 |       "| tcp               |\n",
1186 |       "| icmp              |\n",
1187 |       "\n",
1188 |       "\n",
1189 |       "Service: 67 \n",
1190 |       "| distinct values   |\n",
1191 |       "|-------------------|\n",
1192 |       "| other             |\n",
1193 |       "| private           |\n",
1194 |       "| http              |\n",
1195 |       "| remote_job        |\n",
1196 |       "| ftp_data          |\n",
1197 |       "| name              |\n",
1198 |       "| netbios_ns        |\n",
1199 |       "| eco_i             |\n",
1200 |       "| mtp               |\n",
1201 |       "| telnet            |\n",
1202 |       "| finger            |\n",
1203 |       "| domain_u          |\n",
1204 |       "| supdup            |\n",
1205 |       "| uucp_path         |\n",
1206 |       "| Z39_50            |\n",
1207 |       "| smtp              |\n",
1208 |       "| csnet_ns          |\n",
1209 |       "| uucp              |\n",
1210 |       "| netbios_dgm       |\n",
1211 |       "| urp_i             |\n",
1212 |       "| auth              |\n",
1213 |       "| domain            |\n",
1214 |       "| ftp               |\n",
1215 |       "| bgp               |\n",
1216 |       "| ldap              |\n",
1217 |       "| ecr_i             |\n",
1218 |       "| gopher            |\n",
1219 |       "| vmnet             |\n",
1220 |       "| systat            |\n",
1221 |       "| http_443          |\n",
1222 |       "| efs               |\n",
1223 |       "| whois             |\n",
1224 |       "| imap4             |\n",
1225 |       "| iso_tsap          |\n",
1226 |       "| echo              |\n",
1227 |       "| klogin            |\n",
1228 |       "| link              |\n",
1229 |       "| sunrpc            |\n",
1230 |       "| login             |\n",
1231 |       "| kshell            |\n",
1232 |       "| sql_net           |\n",
1233 |       "| time              |\n",
1234 |       "| hostnames         |\n",
1235 |       "| exec              |\n",
1236 |       "| ntp_u             |\n",
1237 |       "| discard           |\n",
1238 |       "| nntp              |\n",
1239 |       "| courier           |\n",
1240 |       "| ctf               |\n",
1241 |       "| ssh               |\n",
1242 |       "| daytime           |\n",
1243 |       "| shell             |\n",
1244 |       "| netstat           |\n",
1245 |       "| pop_3             |\n",
1246 |       "| nnsp              |\n",
1247 |       "| IRC               |\n",
1248 |       "| pop_2             |\n",
1249 |       "| printer           |\n",
1250 |       "| tim_i             |\n",
1251 |       "| pm_dump           |\n",
1252 |       "| red_i             |\n",
1253 |       "| netbios_ssn       |\n",
1254 |       "| rje               |\n",
1255 |       "| X11               |\n",
1256 |       "| urh_i             |\n",
1257 |       "| http_8001         |\n",
1258 |       "| tftp_u            |\n",
1259 |       "\n",
1260 |       "\n",
1261 |       "Flag: 11 \n",
1262 |       "| distinct values   |\n",
1263 |       "|-------------------|\n",
1264 |       "| SF                |\n",
1265 |       "| S0                |\n",
1266 |       "| REJ               |\n",
1267 |       "| RSTR              |\n",
1268 |       "| SH                |\n",
1269 |       "| RSTO              |\n",
1270 |       "| S1                |\n",
1271 |       "| RSTOS0            |\n",
1272 |       "| S3                |\n",
1273 |       "| S2                |\n",
1274 |       "| OTH               |\n",
1275 |       "\n",
1276 |       "****** Number of features after label encoding:  43\n",
1277 |       "****** End label encoding on the categorical features in the given dataset *****\n",
1278 |       "\n",
1279 |       "****** ExtraTreesClassification is in progress *****\n",
1280 |       "\n",
1281 |       " selectedFeatures after ExtraTreesClassification:  difficulty_level               0.076128\n",
1282 |       "Same_srv_rate                  0.071428\n",
1283 |       "Dst_host_srv_serror_rate       0.049446\n",
1284 |       "Service                        0.046810\n",
1285 |       "Dst_host_serror_rate           0.046286\n",
1286 |       "Flag                           0.044061\n",
1287 |       "Dst_host_same_srv_rate         0.043586\n",
1288 |       "Serror_rate                    0.042794\n",
1289 |       "Protocol_type                  0.041901\n",
1290 |       "Dst_host_srv_count             0.041828\n",
1291 |       "Srv_serror_rate                0.040107\n",
1292 |       "Dst_host_same_src_port_rate    0.037406\n",
1293 |       "Count                          0.036696\n",
1294 |       "Logged_in                      0.035569\n",
1295 |       "Dst_host_rerror_rate           0.030801\n",
1296 |       "Dst_host_diff_srv_rate         0.029853\n",
1297 |       "Src_bytes                      0.028388\n",
1298 |       "Diff_srv_rate                  0.027244\n",
1299 |       "Dst_host_count                 0.027063\n",
1300 |       "Rerror_rate                    0.024310\n",
1301 |       "dtype: float64\n",
1302 |       "****** Completed ExtraTreesClassification *****\n",
1303 |       "\n",
1304 |       "***** Number of columns in the dataSet after feature selection:  21\n",
1305 |       "***** Columns in the dataSet after feature selection: \n",
1306 |       " Index(['Protocol_type', 'Service', 'Flag', 'Src_bytes', 'Logged_in', 'Count',\n",
1307 |       "       'Serror_rate', 'Srv_serror_rate', 'Rerror_rate', 'Same_srv_rate',\n",
1308 |       "       'Diff_srv_rate', 'Dst_host_count', 'Dst_host_srv_count',\n",
1309 |       "       'Dst_host_same_srv_rate', 'Dst_host_diff_srv_rate',\n",
1310 |       "       'Dst_host_same_src_port_rate', 'Dst_host_serror_rate',\n",
1311 |       "       'Dst_host_srv_serror_rate', 'Dst_host_rerror_rate', 'difficulty_level',\n",
1312 |       "       'attack_type'],\n",
1313 |       "      dtype='object')\n",
1314 |       "****** End performing feature selection using ExtraTreesClassifier *****\n",
1315 |       "****** Start one hot encoding on the categorical features in the given dataset *****\n",
1316 |       "****** Number of features before one hot encoding:  21\n",
1317 |       "****** Number of categorical features in the dataset:  0\n",
1318 |       "****** Categorical feature names in the dataset:  []\n",
1319 |       "\n",
1320 |       "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
1321 |       "\n",
1322 |       "\n",
1323 |       "attack_type: 40 \n",
1324 |       "| distinct values   |\n",
1325 |       "|-------------------|\n",
1326 |       "| normal            |\n",
1327 |       "| neptune           |\n",
1328 |       "| warezclient       |\n",
1329 |       "| ipsweep           |\n",
1330 |       "| portsweep         |\n",
1331 |       "| teardrop          |\n",
1332 |       "| nmap              |\n",
1333 |       "| satan             |\n",
1334 |       "| smurf             |\n",
1335 |       "| pod               |\n",
1336 |       "| back              |\n",
1337 |       "| guess_passwd      |\n",
1338 |       "| ftp_write         |\n",
1339 |       "| multihop          |\n",
1340 |       "| rootkit           |\n",
1341 |       "| buffer_overflow   |\n",
1342 |       "| imap              |\n",
1343 |       "| warezmaster       |\n",
1344 |       "| phf               |\n",
1345 |       "| land              |\n",
1346 |       "| loadmodule        |\n",
1347 |       "| spy               |\n",
1348 |       "| snmpguess         |\n",
1349 |       "| processtable      |\n",
1350 |       "| saint             |\n",
1351 |       "| mscan             |\n",
1352 |       "| apache2           |\n",
1353 |       "| httptunnel        |\n",
1354 |       "| mailbomb          |\n",
1355 |       "| snmpgetattack     |\n",
1356 |       "| worm              |\n",
1357 |       "| sendmail          |\n",
1358 |       "| xlock             |\n",
1359 |       "| xterm             |\n",
1360 |       "| xsnoop            |\n",
1361 |       "| ps                |\n",
1362 |       "| named             |\n",
1363 |       "| udpstorm          |\n",
1364 |       "| perl              |\n",
1365 |       "| sqlattack         |\n",
1366 |       "\n",
1367 |       "****** Number of features after one hot encoding:  21\n",
1368 |       "****** End one hot encoding on the categorical features in the given dataset *****\n",
1369 |       "\n",
1370 |       "****** Start feature scaling of the features present in the dataset using StandardScalar *****\n",
1371 |       "[[2 41 9 ... 0.0 15 'normal']\n",
1372 |       " [1 46 5 ... 0.0 19 'neptune']\n",
1373 |       " [1 22 9 ... 0.0 21 'normal']\n",
1374 |       " ...\n",
1375 |       " [1 57 2 ... 0.85 13 'mscan']\n",
1376 |       " [1 54 1 ... 0.88 15 'mscan']\n",
1377 |       " [2 46 9 ... 0.0 17 'snmpguess']]\n",
1378 |       "\n",
1379 |       "****** Number of features in the dataset before performing scaling:  20\n",
1380 |       "\n",
1381 |       "****** Features in the dataset before performing scaling ***** \n",
1382 |       " [[2 41 9 ... 0.0 0.0 15]\n",
1383 |       " [1 46 5 ... 1.0 0.0 19]\n",
1384 |       " [1 22 9 ... 0.01 0.0 21]\n",
1385 |       " ...\n",
1386 |       " [1 57 2 ... 0.08 0.85 13]\n",
1387 |       " [1 54 1 ... 0.0 0.88 15]\n",
1388 |       " [2 46 9 ... 0.0 0.0 17]]\n",
1389 |       "\n",
1390 |       "****** Number of features in the dataset after performing scaling:  20\n",
1391 |       "\n",
1392 |       "****** Features in the dataset after performing scaling ***** \n",
1393 |       " [[ 2.03857058  0.6299765   0.73536923 ... -0.54981386 -0.48776502\n",
1394 |       "  -0.85380057]\n",
1395 |       " [-0.15478617  0.93890397 -0.66099165 ...  1.89967409 -0.48776502\n",
1396 |       "   0.22813874]\n",
1397 |       " [-0.15478617 -0.54394786  0.73536923 ... -0.52531898 -0.48776502\n",
1398 |       "   0.76910839]\n",
1399 |       " ...\n",
1400 |       " [-0.15478617  1.61854439 -1.70826232 ... -0.35385482  2.03171007\n",
1401 |       "  -1.39477022]\n",
1402 |       " [-0.15478617  1.43318791 -2.05735254 ... -0.54981386  2.12063272\n",
1403 |       "  -0.85380057]\n",
1404 |       " [ 2.03857058  0.93890397  0.73536923 ... -0.54981386 -0.48776502\n",
1405 |       "  -0.31283092]]\n",
1406 |       "scaledFeatures.head():     Protocol_type   Service      Flag  Src_bytes  Logged_in     Count  \\\n",
1407 |       "0       2.038571  0.629977  0.735369  -0.011190  -0.732914 -0.581217   \n",
1408 |       "1      -0.154786  0.938904 -0.660992  -0.011262  -0.732914  0.275339   \n",
1409 |       "2      -0.154786 -0.543948  0.735369  -0.011147   1.364417 -0.643512   \n",
1410 |       "3      -0.154786 -0.543948  0.735369  -0.011163   1.364417 -0.448840   \n",
1411 |       "4      -0.154786  0.938904 -2.057353  -0.011262  -0.732914  0.259766   \n",
1412 |       "\n",
1413 |       "   Serror_rate  Srv_serror_rate  Rerror_rate  Same_srv_rate  ...  \\\n",
1414 |       "0    -0.556584        -0.552030    -0.460806      -1.421427  ...   \n",
1415 |       "1     1.851192         1.851769    -0.460806      -1.491319  ...   \n",
1416 |       "2    -0.075029        -0.071270    -0.460806       0.721924  ...   \n",
1417 |       "3    -0.556584        -0.552030    -0.460806       0.721924  ...   \n",
1418 |       "4    -0.556584        -0.552030     2.274941      -1.235049  ...   \n",
1419 |       "\n",
1420 |       "   Dst_host_count  Dst_host_srv_count  Dst_host_same_srv_rate  \\\n",
1421 |       "0        0.656445           -1.050270               -1.193023   \n",
1422 |       "1        0.656445           -0.821669               -0.966271   \n",
1423 |       "2       -1.709884            1.272317                1.074493   \n",
1424 |       "3        0.656445            1.272317                1.074493   \n",
1425 |       "4        0.656445           -0.885678               -1.034297   \n",
1426 |       "\n",
1427 |       "   Dst_host_diff_srv_rate  Dst_host_same_src_port_rate  Dst_host_serror_rate  \\\n",
1428 |       "0                2.187298                     2.137976             -0.561390   \n",
1429 |       "1               -0.237144                    -0.498320              1.904034   \n",
1430 |       "2               -0.457548                    -0.408446             -0.487427   \n",
1431 |       "3               -0.457548                    -0.498320             -0.561390   \n",
1432 |       "4               -0.148983                    -0.498320             -0.561390   \n",
1433 |       "\n",
1434 |       "   Dst_host_srv_serror_rate  Dst_host_rerror_rate  difficulty_level  \\\n",
1435 |       "0                 -0.549814             -0.487765         -0.853801   \n",
1436 |       "1                  1.899674             -0.487765          0.228139   \n",
1437 |       "2                 -0.525319             -0.487765          0.769108   \n",
1438 |       "3                 -0.549814             -0.487765          0.769108   \n",
1439 |       "4                 -0.549814              2.476323          0.769108   \n",
1440 |       "\n",
1441 |       "   attack_type  \n",
1442 |       "0       normal  \n",
1443 |       "1      neptune  \n",
1444 |       "2       normal  \n",
1445 |       "3       normal  \n",
1446 |       "4      neptune  \n",
1447 |       "\n",
1448 |       "[5 rows x 21 columns]\n",
1449 |       "scaledFeatures.shape:  (37041, 21)\n",
1450 |       "\n",
1451 |       "****** End of feature scaling of the features present in the dataset using StandardScalar *****\n",
1452 |       "\n",
1453 |       "features.shape:  (37041, 20)\n",
1454 |       "label.shape:  (37041,)\n"
1455 |      ]
1456 |     },
1457 |     {
1458 |      "data": {
1459 |       "text/html": [
1460 |        "<div>\n",
1461 |        "<style scoped>\n",
1462 |        "    .dataframe tbody tr th:only-of-type {\n",
1463 |        "        vertical-align: middle;\n",
1464 |        "    }\n",
1465 |        "\n",
1466 |        "    .dataframe tbody tr th {\n",
1467 |        "        vertical-align: top;\n",
1468 |        "    }\n",
1469 |        "\n",
1470 |        "    .dataframe thead th {\n",
1471 |        "        text-align: right;\n",
1472 |        "    }\n",
1473 |        "</style>\n",
1474 |        "<table border=\"1\" class=\"dataframe\">\n",
1475 |        "  <thead>\n",
1476 |        "    <tr style=\"text-align: right;\">\n",
1477 |        "      <th></th>\n",
1478 |        "      <th>Protocol_type</th>\n",
1479 |        "      <th>Service</th>\n",
1480 |        "      <th>Flag</th>\n",
1481 |        "      <th>Src_bytes</th>\n",
1482 |        "      <th>Logged_in</th>\n",
1483 |        "      <th>Count</th>\n",
1484 |        "      <th>Serror_rate</th>\n",
1485 |        "      <th>Srv_serror_rate</th>\n",
1486 |        "      <th>Rerror_rate</th>\n",
1487 |        "      <th>Same_srv_rate</th>\n",
1488 |        "      <th>...</th>\n",
1489 |        "      <th>Dst_host_count</th>\n",
1490 |        "      <th>Dst_host_srv_count</th>\n",
1491 |        "      <th>Dst_host_same_srv_rate</th>\n",
1492 |        "      <th>Dst_host_diff_srv_rate</th>\n",
1493 |        "      <th>Dst_host_same_src_port_rate</th>\n",
1494 |        "      <th>Dst_host_serror_rate</th>\n",
1495 |        "      <th>Dst_host_srv_serror_rate</th>\n",
1496 |        "      <th>Dst_host_rerror_rate</th>\n",
1497 |        "      <th>difficulty_level</th>\n",
1498 |        "      <th>attack_type</th>\n",
1499 |        "    </tr>\n",
1500 |        "  </thead>\n",
1501 |        "  <tbody>\n",
1502 |        "    <tr>\n",
1503 |        "      <th>0</th>\n",
1504 |        "      <td>2.038571</td>\n",
1505 |        "      <td>0.629977</td>\n",
1506 |        "      <td>0.735369</td>\n",
1507 |        "      <td>-0.011190</td>\n",
1508 |        "      <td>-0.732914</td>\n",
1509 |        "      <td>-0.581217</td>\n",
1510 |        "      <td>-0.556584</td>\n",
1511 |        "      <td>-0.552030</td>\n",
1512 |        "      <td>-0.460806</td>\n",
1513 |        "      <td>-1.421427</td>\n",
1514 |        "      <td>...</td>\n",
1515 |        "      <td>0.656445</td>\n",
1516 |        "      <td>-1.050270</td>\n",
1517 |        "      <td>-1.193023</td>\n",
1518 |        "      <td>2.187298</td>\n",
1519 |        "      <td>2.137976</td>\n",
1520 |        "      <td>-0.561390</td>\n",
1521 |        "      <td>-0.549814</td>\n",
1522 |        "      <td>-0.487765</td>\n",
1523 |        "      <td>-0.853801</td>\n",
1524 |        "      <td>normal</td>\n",
1525 |        "    </tr>\n",
1526 |        "    <tr>\n",
1527 |        "      <th>1</th>\n",
1528 |        "      <td>-0.154786</td>\n",
1529 |        "      <td>0.938904</td>\n",
1530 |        "      <td>-0.660992</td>\n",
1531 |        "      <td>-0.011262</td>\n",
1532 |        "      <td>-0.732914</td>\n",
1533 |        "      <td>0.275339</td>\n",
1534 |        "      <td>1.851192</td>\n",
1535 |        "      <td>1.851769</td>\n",
1536 |        "      <td>-0.460806</td>\n",
1537 |        "      <td>-1.491319</td>\n",
1538 |        "      <td>...</td>\n",
1539 |        "      <td>0.656445</td>\n",
1540 |        "      <td>-0.821669</td>\n",
1541 |        "      <td>-0.966271</td>\n",
1542 |        "      <td>-0.237144</td>\n",
1543 |        "      <td>-0.498320</td>\n",
1544 |        "      <td>1.904034</td>\n",
1545 |        "      <td>1.899674</td>\n",
1546 |        "      <td>-0.487765</td>\n",
1547 |        "      <td>0.228139</td>\n",
1548 |        "      <td>neptune</td>\n",
1549 |        "    </tr>\n",
1550 |        "    <tr>\n",
1551 |        "      <th>2</th>\n",
1552 |        "      <td>-0.154786</td>\n",
1553 |        "      <td>-0.543948</td>\n",
1554 |        "      <td>0.735369</td>\n",
1555 |        "      <td>-0.011147</td>\n",
1556 |        "      <td>1.364417</td>\n",
1557 |        "      <td>-0.643512</td>\n",
1558 |        "      <td>-0.075029</td>\n",
1559 |        "      <td>-0.071270</td>\n",
1560 |        "      <td>-0.460806</td>\n",
1561 |        "      <td>0.721924</td>\n",
1562 |        "      <td>...</td>\n",
1563 |        "      <td>-1.709884</td>\n",
1564 |        "      <td>1.272317</td>\n",
1565 |        "      <td>1.074493</td>\n",
1566 |        "      <td>-0.457548</td>\n",
1567 |        "      <td>-0.408446</td>\n",
1568 |        "      <td>-0.487427</td>\n",
1569 |        "      <td>-0.525319</td>\n",
1570 |        "      <td>-0.487765</td>\n",
1571 |        "      <td>0.769108</td>\n",
1572 |        "      <td>normal</td>\n",
1573 |        "    </tr>\n",
1574 |        "    <tr>\n",
1575 |        "      <th>3</th>\n",
1576 |        "      <td>-0.154786</td>\n",
1577 |        "      <td>-0.543948</td>\n",
1578 |        "      <td>0.735369</td>\n",
1579 |        "      <td>-0.011163</td>\n",
1580 |        "      <td>1.364417</td>\n",
1581 |        "      <td>-0.448840</td>\n",
1582 |        "      <td>-0.556584</td>\n",
1583 |        "      <td>-0.552030</td>\n",
1584 |        "      <td>-0.460806</td>\n",
1585 |        "      <td>0.721924</td>\n",
1586 |        "      <td>...</td>\n",
1587 |        "      <td>0.656445</td>\n",
1588 |        "      <td>1.272317</td>\n",
1589 |        "      <td>1.074493</td>\n",
1590 |        "      <td>-0.457548</td>\n",
1591 |        "      <td>-0.498320</td>\n",
1592 |        "      <td>-0.561390</td>\n",
1593 |        "      <td>-0.549814</td>\n",
1594 |        "      <td>-0.487765</td>\n",
1595 |        "      <td>0.769108</td>\n",
1596 |        "      <td>normal</td>\n",
1597 |        "    </tr>\n",
1598 |        "    <tr>\n",
1599 |        "      <th>4</th>\n",
1600 |        "      <td>-0.154786</td>\n",
1601 |        "      <td>0.938904</td>\n",
1602 |        "      <td>-2.057353</td>\n",
1603 |        "      <td>-0.011262</td>\n",
1604 |        "      <td>-0.732914</td>\n",
1605 |        "      <td>0.259766</td>\n",
1606 |        "      <td>-0.556584</td>\n",
1607 |        "      <td>-0.552030</td>\n",
1608 |        "      <td>2.274941</td>\n",
1609 |        "      <td>-1.235049</td>\n",
1610 |        "      <td>...</td>\n",
1611 |        "      <td>0.656445</td>\n",
1612 |        "      <td>-0.885678</td>\n",
1613 |        "      <td>-1.034297</td>\n",
1614 |        "      <td>-0.148983</td>\n",
1615 |        "      <td>-0.498320</td>\n",
1616 |        "      <td>-0.561390</td>\n",
1617 |        "      <td>-0.549814</td>\n",
1618 |        "      <td>2.476323</td>\n",
1619 |        "      <td>0.769108</td>\n",
1620 |        "      <td>neptune</td>\n",
1621 |        "    </tr>\n",
1622 |        "  </tbody>\n",
1623 |        "</table>\n",
1624 |        "<p>5 rows × 21 columns</p>\n",
1625 |        "</div>"
1626 |       ],
1627 |       "text/plain": [
1628 |        "   Protocol_type   Service      Flag  Src_bytes  Logged_in     Count  \\\n",
1629 |        "0       2.038571  0.629977  0.735369  -0.011190  -0.732914 -0.581217   \n",
1630 |        "1      -0.154786  0.938904 -0.660992  -0.011262  -0.732914  0.275339   \n",
1631 |        "2      -0.154786 -0.543948  0.735369  -0.011147   1.364417 -0.643512   \n",
1632 |        "3      -0.154786 -0.543948  0.735369  -0.011163   1.364417 -0.448840   \n",
1633 |        "4      -0.154786  0.938904 -2.057353  -0.011262  -0.732914  0.259766   \n",
1634 |        "\n",
1635 |        "   Serror_rate  Srv_serror_rate  Rerror_rate  Same_srv_rate  ...  \\\n",
1636 |        "0    -0.556584        -0.552030    -0.460806      -1.421427  ...   \n",
1637 |        "1     1.851192         1.851769    -0.460806      -1.491319  ...   \n",
1638 |        "2    -0.075029        -0.071270    -0.460806       0.721924  ...   \n",
1639 |        "3    -0.556584        -0.552030    -0.460806       0.721924  ...   \n",
1640 |        "4    -0.556584        -0.552030     2.274941      -1.235049  ...   \n",
1641 |        "\n",
1642 |        "   Dst_host_count  Dst_host_srv_count  Dst_host_same_srv_rate  \\\n",
1643 |        "0        0.656445           -1.050270               -1.193023   \n",
1644 |        "1        0.656445           -0.821669               -0.966271   \n",
1645 |        "2       -1.709884            1.272317                1.074493   \n",
1646 |        "3        0.656445            1.272317                1.074493   \n",
1647 |        "4        0.656445           -0.885678               -1.034297   \n",
1648 |        "\n",
1649 |        "   Dst_host_diff_srv_rate  Dst_host_same_src_port_rate  Dst_host_serror_rate  \\\n",
1650 |        "0                2.187298                     2.137976             -0.561390   \n",
1651 |        "1               -0.237144                    -0.498320              1.904034   \n",
1652 |        "2               -0.457548                    -0.408446             -0.487427   \n",
1653 |        "3               -0.457548                    -0.498320             -0.561390   \n",
1654 |        "4               -0.148983                    -0.498320             -0.561390   \n",
1655 |        "\n",
1656 |        "   Dst_host_srv_serror_rate  Dst_host_rerror_rate  difficulty_level  \\\n",
1657 |        "0                 -0.549814             -0.487765         -0.853801   \n",
1658 |        "1                  1.899674             -0.487765          0.228139   \n",
1659 |        "2                 -0.525319             -0.487765          0.769108   \n",
1660 |        "3                 -0.549814             -0.487765          0.769108   \n",
1661 |        "4                 -0.549814              2.476323          0.769108   \n",
1662 |        "\n",
1663 |        "   attack_type  \n",
1664 |        "0       normal  \n",
1665 |        "1      neptune  \n",
1666 |        "2       normal  \n",
1667 |        "3       normal  \n",
1668 |        "4      neptune  \n",
1669 |        "\n",
1670 |        "[5 rows x 21 columns]"
1671 |       ]
1672 |      },
1673 |      "execution_count": 4,
1674 |      "metadata": {},
1675 |      "output_type": "execute_result"
1676 |     }
1677 |    ],
1678 |    "source": [
1679 |     "arrayOfModels = defineArrayForPreProcessing()\n",
1680 |     "completeEncodedAndScaledDataset = performPreprocessing(trainingDataSet, testingDataSet, arrayOfModels)\n",
1681 |     "completeEncodedAndScaledDataset.head()"
1682 |    ]
1683 |   },
1684 |   {
1685 |    "cell_type": "markdown",
1686 |    "metadata": {},
1687 |    "source": [
1688 |     "### After preprocessing, check the shape of the dataset"
1689 |    ]
1690 |   },
1691 |   {
1692 |    "cell_type": "code",
1693 |    "execution_count": 5,
1694 |    "metadata": {},
1695 |    "outputs": [
1696 |     {
1697 |      "name": "stdout",
1698 |      "output_type": "stream",
1699 |      "text": [
1700 |       "(37041, 20) (37041,)\n",
1701 |       "Number of unique values in label:  40\n",
1702 |       "Unique values in label:  ['apache2' 'back' 'buffer_overflow' 'ftp_write' 'guess_passwd'\n",
1703 |       " 'httptunnel' 'imap' 'ipsweep' 'land' 'loadmodule' 'mailbomb' 'mscan'\n",
1704 |       " 'multihop' 'named' 'neptune' 'nmap' 'normal' 'perl' 'phf' 'pod'\n",
1705 |       " 'portsweep' 'processtable' 'ps' 'rootkit' 'saint' 'satan' 'sendmail'\n",
1706 |       " 'smurf' 'snmpgetattack' 'snmpguess' 'spy' 'sqlattack' 'teardrop'\n",
1707 |       " 'udpstorm' 'warezclient' 'warezmaster' 'worm' 'xlock' 'xsnoop' 'xterm']\n"
1708 |      ]
1709 |     }
1710 |    ],
1711 |    "source": [
1712 |     "x = completeEncodedAndScaledDataset.drop('attack_type',axis=1)\n",
1713 |     "y = completeEncodedAndScaledDataset['attack_type']\n",
1714 |     "print(x.shape, y.shape)\n",
1715 |     "print('Number of unique values in label: ',len(np.unique(y)))\n",
1716 |     "print('Unique values in label: ',np.unique(y))\n",
1717 |     "#print(y.value_counts())"
1718 |    ]
1719 |   },
1720 |   {
1721 |    "cell_type": "markdown",
1722 |    "metadata": {},
1723 |    "source": [
1724 |     "### Encode the categorical label values"
1725 |    ]
1726 |   },
1727 |   {
1728 |    "cell_type": "code",
1729 |    "execution_count": 6,
1730 |    "metadata": {},
1731 |    "outputs": [
1732 |     {
1733 |      "name": "stdout",
1734 |      "output_type": "stream",
1735 |      "text": [
1736 |       "(37041, 20) (37041, 40)\n"
1737 |      ]
1738 |     },
1739 |     {
1740 |      "name": "stderr",
1741 |      "output_type": "stream",
1742 |      "text": [
1743 |       "D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
1744 |       "  \n"
1745 |      ]
1746 |     }
1747 |    ],
1748 |    "source": [
1749 |     "onehot = pd.get_dummies(y)\n",
1750 |     "y = onehot.as_matrix()\n",
1751 |     "print(x.shape, y.shape)"
1752 |    ]
1753 |   },
1754 |   {
1755 |    "cell_type": "markdown",
1756 |    "metadata": {},
1757 |    "source": [
1758 |     "## Build a neural Network model"
1759 |    ]
1760 |   },
1761 |   {
1762 |    "cell_type": "code",
1763 |    "execution_count": 7,
1764 |    "metadata": {},
1765 |    "outputs": [],
1766 |    "source": [
1767 |     "'''\n",
1768 |     "This function is used to define, compile and filt a neural network\n",
1769 |     "'''\n",
1770 |     "def nn_model(trainx, trainy, valx,valy,bt_size,epochs, layers):\n",
1771 |     "  model = Sequential()\n",
1772 |     "  model.add(Dense(layers[0],activation='relu', input_shape=(trainx.shape[1],)))\n",
1773 |     "  for l in layers[1:]:\n",
1774 |     "    model.add(Dense(l, activation='relu' ))\n",
1775 |     "    model.add(Dropout(0.30))\n",
1776 |     "  model.add(Dense(trainy.shape[1], activation='softmax'))\n",
1777 |     "  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n",
1778 |     "  hist=model.fit(trainx, trainy, batch_size=bt_size, epochs=epochs, shuffle=True, validation_data=(valx,valy), verbose=True)\n",
1779 |     "  return hist"
1780 |    ]
1781 |   },
1782 |   {
1783 |    "cell_type": "code",
1784 |    "execution_count": 8,
1785 |    "metadata": {
1786 |     "scrolled": true
1787 |    },
1788 |    "outputs": [
1789 |     {
1790 |      "name": "stdout",
1791 |      "output_type": "stream",
1792 |      "text": [
1793 |       "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\ops\\resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
1794 |       "Instructions for updating:\n",
1795 |       "Colocations handled automatically by placer.\n",
1796 |       "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\keras\\layers\\core.py:143: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
1797 |       "Instructions for updating:\n",
1798 |       "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
1799 |       "Train on 27780 samples, validate on 9261 samples\n",
1800 |       "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\ops\\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
1801 |       "Instructions for updating:\n",
1802 |       "Use tf.cast instead.\n",
1803 |       "Epoch 1/100\n",
1804 |       "27780/27780 [==============================] - 12s 420us/sample - loss: 1.0481 - acc: 0.7006 - val_loss: 0.6489 - val_acc: 0.7742\n",
1805 |       "Epoch 2/100\n",
1806 |       "27780/27780 [==============================] - 11s 381us/sample - loss: 0.7296 - acc: 0.7751 - val_loss: 0.5398 - val_acc: 0.8213\n",
1807 |       "Epoch 3/100\n",
1808 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.6316 - acc: 0.7992 - val_loss: 0.4577 - val_acc: 0.8402\n",
1809 |       "Epoch 4/100\n",
1810 |       "27780/27780 [==============================] - 11s 392us/sample - loss: 0.5731 - acc: 0.8157 - val_loss: 0.4002 - val_acc: 0.8790\n",
1811 |       "Epoch 5/100\n",
1812 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5312 - acc: 0.8364 - val_loss: 0.3762 - val_acc: 0.8743\n",
1813 |       "Epoch 6/100\n",
1814 |       "27780/27780 [==============================] - 11s 382us/sample - loss: 0.5000 - acc: 0.8528 - val_loss: 0.3834 - val_acc: 0.8959\n",
1815 |       "Epoch 7/100\n",
1816 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5053 - acc: 0.8568 - val_loss: 0.3448 - val_acc: 0.9031\n",
1817 |       "Epoch 8/100\n",
1818 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.4810 - acc: 0.8643 - val_loss: 0.3433 - val_acc: 0.9037\n",
1819 |       "Epoch 9/100\n",
1820 |       "27780/27780 [==============================] - 11s 395us/sample - loss: 0.4779 - acc: 0.8639 - val_loss: 0.3262 - val_acc: 0.9022\n",
1821 |       "Epoch 10/100\n",
1822 |       "27780/27780 [==============================] - 37s 1ms/sample - loss: 0.4537 - acc: 0.8704 - val_loss: 0.3192 - val_acc: 0.9094\n",
1823 |       "Epoch 11/100\n",
1824 |       "27780/27780 [==============================] - 40s 1ms/sample - loss: 0.4726 - acc: 0.8733 - val_loss: 0.3233 - val_acc: 0.9058\n",
1825 |       "Epoch 12/100\n",
1826 |       "27780/27780 [==============================] - 42s 2ms/sample - loss: 0.4438 - acc: 0.8746 - val_loss: 0.2998 - val_acc: 0.9097\n",
1827 |       "Epoch 13/100\n",
1828 |       "27780/27780 [==============================] - 11s 413us/sample - loss: 0.4329 - acc: 0.8796 - val_loss: 0.3063 - val_acc: 0.9161\n",
1829 |       "Epoch 14/100\n",
1830 |       "27780/27780 [==============================] - 10s 361us/sample - loss: 0.4481 - acc: 0.8753 - val_loss: 0.2901 - val_acc: 0.9099\n",
1831 |       "Epoch 15/100\n",
1832 |       "27780/27780 [==============================] - 10s 364us/sample - loss: 0.4404 - acc: 0.8772 - val_loss: 0.3386 - val_acc: 0.9068\n",
1833 |       "Epoch 16/100\n",
1834 |       "27780/27780 [==============================] - 10s 367us/sample - loss: 0.4451 - acc: 0.8808 - val_loss: 0.2942 - val_acc: 0.9142\n",
1835 |       "Epoch 17/100\n",
1836 |       "27780/27780 [==============================] - 10s 370us/sample - loss: 0.4482 - acc: 0.8839 - val_loss: 0.2815 - val_acc: 0.9166\n",
1837 |       "Epoch 18/100\n",
1838 |       "27780/27780 [==============================] - 10s 372us/sample - loss: 0.4619 - acc: 0.8762 - val_loss: 0.3048 - val_acc: 0.9119\n",
1839 |       "Epoch 19/100\n",
1840 |       "27780/27780 [==============================] - 10s 373us/sample - loss: 0.4768 - acc: 0.8789 - val_loss: 0.2868 - val_acc: 0.9173\n",
1841 |       "Epoch 20/100\n",
1842 |       "27780/27780 [==============================] - 10s 374us/sample - loss: 0.4557 - acc: 0.8812 - val_loss: 0.2839 - val_acc: 0.9148\n",
1843 |       "Epoch 21/100\n",
1844 |       "27780/27780 [==============================] - 10s 373us/sample - loss: 0.4341 - acc: 0.8857 - val_loss: 0.2716 - val_acc: 0.9219\n",
1845 |       "Epoch 22/100\n",
1846 |       "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4442 - acc: 0.8855 - val_loss: 0.2975 - val_acc: 0.9151\n",
1847 |       "Epoch 23/100\n",
1848 |       "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4499 - acc: 0.8844 - val_loss: 0.2735 - val_acc: 0.9175\n",
1849 |       "Epoch 24/100\n",
1850 |       "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4499 - acc: 0.8874 - val_loss: 0.2904 - val_acc: 0.9228\n",
1851 |       "Epoch 25/100\n",
1852 |       "27780/27780 [==============================] - 10s 378us/sample - loss: 0.4782 - acc: 0.8844 - val_loss: 0.3537 - val_acc: 0.9121\n",
1853 |       "Epoch 26/100\n",
1854 |       "27780/27780 [==============================] - 10s 377us/sample - loss: 0.5170 - acc: 0.8828 - val_loss: 0.2650 - val_acc: 0.9230\n",
1855 |       "Epoch 27/100\n",
1856 |       "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4159 - acc: 0.8907 - val_loss: 0.2711 - val_acc: 0.9246\n",
1857 |       "Epoch 28/100\n",
1858 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.4607 - acc: 0.8807 - val_loss: 0.3042 - val_acc: 0.9160\n",
1859 |       "Epoch 29/100\n",
1860 |       "27780/27780 [==============================] - 11s 381us/sample - loss: 0.4366 - acc: 0.8857 - val_loss: 0.2859 - val_acc: 0.9171\n",
1861 |       "Epoch 30/100\n",
1862 |       "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4303 - acc: 0.8923 - val_loss: 0.2832 - val_acc: 0.9194\n",
1863 |       "Epoch 31/100\n",
1864 |       "27780/27780 [==============================] - 11s 380us/sample - loss: 0.5191 - acc: 0.8786 - val_loss: 0.4336 - val_acc: 0.8996\n",
1865 |       "Epoch 32/100\n",
1866 |       "27780/27780 [==============================] - 11s 381us/sample - loss: 0.5173 - acc: 0.8720 - val_loss: 0.3143 - val_acc: 0.9112\n",
1867 |       "Epoch 33/100\n",
1868 |       "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4416 - acc: 0.8854 - val_loss: 0.2853 - val_acc: 0.9193\n",
1869 |       "Epoch 34/100\n",
1870 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4670 - acc: 0.8879 - val_loss: 0.2971 - val_acc: 0.9171\n",
1871 |       "Epoch 35/100\n",
1872 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4538 - acc: 0.8838 - val_loss: 0.2842 - val_acc: 0.9210\n",
1873 |       "Epoch 36/100\n",
1874 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.4627 - acc: 0.8860 - val_loss: 0.3164 - val_acc: 0.9176\n",
1875 |       "Epoch 37/100\n",
1876 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4645 - acc: 0.8835 - val_loss: 0.3689 - val_acc: 0.9066\n",
1877 |       "Epoch 38/100\n",
1878 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.4621 - acc: 0.8809 - val_loss: 0.3259 - val_acc: 0.9129\n",
1879 |       "Epoch 39/100\n",
1880 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4537 - acc: 0.8845 - val_loss: 0.2831 - val_acc: 0.9199\n",
1881 |       "Epoch 40/100\n",
1882 |       "27780/27780 [==============================] - 11s 394us/sample - loss: 0.4641 - acc: 0.8784 - val_loss: 0.2876 - val_acc: 0.9129\n",
1883 |       "Epoch 41/100\n",
1884 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4442 - acc: 0.8835 - val_loss: 0.2862 - val_acc: 0.9189\n",
1885 |       "Epoch 42/100\n",
1886 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4541 - acc: 0.8864 - val_loss: 0.2863 - val_acc: 0.9191\n",
1887 |       "Epoch 43/100\n",
1888 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4861 - acc: 0.8815 - val_loss: 0.3277 - val_acc: 0.9136\n",
1889 |       "Epoch 44/100\n",
1890 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4576 - acc: 0.8830 - val_loss: 0.3660 - val_acc: 0.9008\n",
1891 |       "Epoch 45/100\n",
1892 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5172 - acc: 0.8793 - val_loss: 0.3160 - val_acc: 0.9036\n",
1893 |       "Epoch 46/100\n",
1894 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5235 - acc: 0.8769 - val_loss: 0.3028 - val_acc: 0.9205\n",
1895 |       "Epoch 47/100\n",
1896 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4917 - acc: 0.8840 - val_loss: 0.2869 - val_acc: 0.9187\n",
1897 |       "Epoch 48/100\n",
1898 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4863 - acc: 0.8767 - val_loss: 0.3165 - val_acc: 0.9057\n",
1899 |       "Epoch 49/100\n",
1900 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4999 - acc: 0.8771 - val_loss: 0.2654 - val_acc: 0.9218\n",
1901 |       "Epoch 50/100\n",
1902 |       "27780/27780 [==============================] - 11s 405us/sample - loss: 0.4832 - acc: 0.8861 - val_loss: 0.2689 - val_acc: 0.9221\n",
1903 |       "Epoch 51/100\n",
1904 |       "27780/27780 [==============================] - 11s 390us/sample - loss: 0.4703 - acc: 0.8832 - val_loss: 0.2785 - val_acc: 0.9167\n"
1905 |      ]
1906 |     },
1907 |     {
1908 |      "name": "stdout",
1909 |      "output_type": "stream",
1910 |      "text": [
1911 |       "Epoch 52/100\n",
1912 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5021 - acc: 0.8807 - val_loss: 0.2870 - val_acc: 0.9158\n",
1913 |       "Epoch 53/100\n",
1914 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4962 - acc: 0.8730 - val_loss: 0.2757 - val_acc: 0.9199\n",
1915 |       "Epoch 54/100\n",
1916 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4674 - acc: 0.8850 - val_loss: 0.2959 - val_acc: 0.9132\n",
1917 |       "Epoch 55/100\n",
1918 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5025 - acc: 0.8792 - val_loss: 0.3102 - val_acc: 0.9173\n",
1919 |       "Epoch 56/100\n",
1920 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5713 - acc: 0.8688 - val_loss: 0.2990 - val_acc: 0.9135\n",
1921 |       "Epoch 57/100\n",
1922 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4880 - acc: 0.8781 - val_loss: 0.3754 - val_acc: 0.9047\n",
1923 |       "Epoch 58/100\n",
1924 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5401 - acc: 0.8740 - val_loss: 0.3097 - val_acc: 0.9072\n",
1925 |       "Epoch 59/100\n",
1926 |       "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4993 - acc: 0.8759 - val_loss: 0.3168 - val_acc: 0.9186\n",
1927 |       "Epoch 60/100\n",
1928 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4789 - acc: 0.8816 - val_loss: 0.2970 - val_acc: 0.9136\n",
1929 |       "Epoch 61/100\n",
1930 |       "27780/27780 [==============================] - 11s 382us/sample - loss: 0.5057 - acc: 0.8786 - val_loss: 0.3339 - val_acc: 0.9128\n",
1931 |       "Epoch 62/100\n",
1932 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5387 - acc: 0.8769 - val_loss: 0.3822 - val_acc: 0.9097\n",
1933 |       "Epoch 63/100\n",
1934 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5181 - acc: 0.8760 - val_loss: 0.3005 - val_acc: 0.9182\n",
1935 |       "Epoch 64/100\n",
1936 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5243 - acc: 0.8725 - val_loss: 0.3749 - val_acc: 0.9021\n",
1937 |       "Epoch 65/100\n",
1938 |       "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5467 - acc: 0.8649 - val_loss: 0.4062 - val_acc: 0.8889\n",
1939 |       "Epoch 66/100\n",
1940 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5542 - acc: 0.8632 - val_loss: 0.3906 - val_acc: 0.9081\n",
1941 |       "Epoch 67/100\n",
1942 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5404 - acc: 0.8688 - val_loss: 0.3076 - val_acc: 0.9142\n",
1943 |       "Epoch 68/100\n",
1944 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5097 - acc: 0.8747 - val_loss: 0.3242 - val_acc: 0.9136\n",
1945 |       "Epoch 69/100\n",
1946 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5479 - acc: 0.8698 - val_loss: 0.3221 - val_acc: 0.9144\n",
1947 |       "Epoch 70/100\n",
1948 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5271 - acc: 0.8699 - val_loss: 0.3466 - val_acc: 0.9121\n",
1949 |       "Epoch 71/100\n",
1950 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5463 - acc: 0.8725 - val_loss: 0.3034 - val_acc: 0.9151\n",
1951 |       "Epoch 72/100\n",
1952 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5350 - acc: 0.8666 - val_loss: 0.3085 - val_acc: 0.9122\n",
1953 |       "Epoch 73/100\n",
1954 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5153 - acc: 0.8709 - val_loss: 0.2951 - val_acc: 0.9156\n",
1955 |       "Epoch 74/100\n",
1956 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5131 - acc: 0.8686 - val_loss: 0.3406 - val_acc: 0.8987\n",
1957 |       "Epoch 75/100\n",
1958 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.6295 - acc: 0.8508 - val_loss: 0.3472 - val_acc: 0.9013\n",
1959 |       "Epoch 76/100\n",
1960 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5793 - acc: 0.8554 - val_loss: 0.3403 - val_acc: 0.9096\n",
1961 |       "Epoch 77/100\n",
1962 |       "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5163 - acc: 0.8666 - val_loss: 0.3244 - val_acc: 0.9090\n",
1963 |       "Epoch 78/100\n",
1964 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5066 - acc: 0.8669 - val_loss: 0.3062 - val_acc: 0.9177\n",
1965 |       "Epoch 79/100\n",
1966 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5329 - acc: 0.8664 - val_loss: 0.3231 - val_acc: 0.8989\n",
1967 |       "Epoch 80/100\n",
1968 |       "27780/27780 [==============================] - 11s 388us/sample - loss: 0.5266 - acc: 0.8626 - val_loss: 0.3232 - val_acc: 0.9050\n",
1969 |       "Epoch 81/100\n",
1970 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5326 - acc: 0.8676 - val_loss: 0.3178 - val_acc: 0.9121\n",
1971 |       "Epoch 82/100\n",
1972 |       "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5326 - acc: 0.8677 - val_loss: 0.2966 - val_acc: 0.9139\n",
1973 |       "Epoch 83/100\n",
1974 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.5424 - acc: 0.8681 - val_loss: 0.3101 - val_acc: 0.9135\n",
1975 |       "Epoch 84/100\n",
1976 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5325 - acc: 0.8724 - val_loss: 0.2976 - val_acc: 0.9140\n",
1977 |       "Epoch 85/100\n",
1978 |       "27780/27780 [==============================] - 11s 388us/sample - loss: 0.5753 - acc: 0.8688 - val_loss: 0.2812 - val_acc: 0.9152\n",
1979 |       "Epoch 86/100\n",
1980 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5258 - acc: 0.8711 - val_loss: 0.3018 - val_acc: 0.9160\n",
1981 |       "Epoch 87/100\n",
1982 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5738 - acc: 0.8590 - val_loss: 0.3412 - val_acc: 0.9003\n",
1983 |       "Epoch 88/100\n",
1984 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5871 - acc: 0.8603 - val_loss: 0.3327 - val_acc: 0.9085\n",
1985 |       "Epoch 89/100\n",
1986 |       "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5534 - acc: 0.8593 - val_loss: 0.3462 - val_acc: 0.9055\n",
1987 |       "Epoch 90/100\n",
1988 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5189 - acc: 0.8657 - val_loss: 0.3108 - val_acc: 0.9120\n",
1989 |       "Epoch 91/100\n",
1990 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5696 - acc: 0.8617 - val_loss: 0.3282 - val_acc: 0.9000\n",
1991 |       "Epoch 92/100\n",
1992 |       "27780/27780 [==============================] - 11s 390us/sample - loss: 0.5057 - acc: 0.8669 - val_loss: 0.3007 - val_acc: 0.9076\n",
1993 |       "Epoch 93/100\n",
1994 |       "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5720 - acc: 0.8626 - val_loss: 0.3421 - val_acc: 0.9071\n",
1995 |       "Epoch 94/100\n",
1996 |       "27780/27780 [==============================] - 11s 390us/sample - loss: 0.5298 - acc: 0.8645 - val_loss: 0.3073 - val_acc: 0.9088\n",
1997 |       "Epoch 95/100\n",
1998 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.5363 - acc: 0.8654 - val_loss: 0.3450 - val_acc: 0.9090\n",
1999 |       "Epoch 96/100\n",
2000 |       "27780/27780 [==============================] - 11s 391us/sample - loss: 0.5624 - acc: 0.8659 - val_loss: 0.3341 - val_acc: 0.9159\n",
2001 |       "Epoch 97/100\n",
2002 |       "27780/27780 [==============================] - 11s 391us/sample - loss: 0.5754 - acc: 0.8666 - val_loss: 0.3329 - val_acc: 0.9079\n",
2003 |       "Epoch 98/100\n",
2004 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.6255 - acc: 0.8611 - val_loss: 0.4500 - val_acc: 0.9037\n",
2005 |       "Epoch 99/100\n",
2006 |       "27780/27780 [==============================] - 11s 388us/sample - loss: 0.6341 - acc: 0.8553 - val_loss: 0.4839 - val_acc: 0.8996\n",
2007 |       "Epoch 100/100\n",
2008 |       "27780/27780 [==============================] - 11s 389us/sample - loss: 0.7025 - acc: 0.8531 - val_loss: 0.5064 - val_acc: 0.8945\n"
2009 |      ]
2010 |     }
2011 |    ],
2012 |    "source": [
2013 |     "trainx, testx, trainy, testy = train_test_split(x,y, test_size=0.25, random_state=42)\n",
2014 |     "layers=[trainx.shape[1],800,500,400,300,200,100,50,10]\n",
2015 |     "hist = nn_model(trainx, trainy, testx, testy,16,100,layers)"
2016 |    ]
2017 |   },
2018 |   {
2019 |    "cell_type": "code",
2020 |    "execution_count": 9,
2021 |    "metadata": {},
2022 |    "outputs": [
2023 |     {
2024 |      "name": "stdout",
2025 |      "output_type": "stream",
2026 |      "text": [
2027 |       "MAX Accuracy during training:  89.22966122627258\n",
2028 |       "MAX Accuracy during validation:  92.46301651000977\n"
2029 |      ]
2030 |     }
2031 |    ],
2032 |    "source": [
2033 |     "print('MAX Accuracy during training: ',max(hist.history['acc'])*100)\n",
2034 |     "print('MAX Accuracy during validation: ',max(hist.history['val_acc'])*100)"
2035 |    ]
2036 |   },
2037 |   {
2038 |    "cell_type": "markdown",
2039 |    "metadata": {},
2040 |    "source": [
2041 |     "## Polt the training accuracy and testing accuracy"
2042 |    ]
2043 |   },
2044 |   {
2045 |    "cell_type": "code",
2046 |    "execution_count": 13,
2047 |    "metadata": {},
2048 |    "outputs": [
2049 |     {
2050 |      "data": {
2051 |       "text/plain": [
2052 |        "[<matplotlib.lines.Line2D at 0x17810fa3240>]"
2053 |       ]
2054 |      },
2055 |      "execution_count": 13,
2056 |      "metadata": {},
2057 |      "output_type": "execute_result"
2058 |     },
2059 |     {
2060 |      "data": {
2061 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXeYFFX2/t/L4JBB4qrkKCBIGhEBxYQCBnDBFZV1WBXWhIoEcdeImL6LPyMGVHRNYEJExEVFxcwwRMlJkCEODDAwA0zo8/vjdNF5umame3qoej/P0091Vd2qOlXV/d57zz33XiMiIIQQ4g4qJNoAQgghZQdFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXETFRBsQTL169aRZs2aJNoMQQk4oFi9evFdE6kdLV+5Ev1mzZkhPT0+0GYQQckJhjNlqJx3dO4QQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+gQAsGMH8OKLwMGDibaEEBJPKPouZ9064OabgWbNgFGjgMmTE20RISSeUPRPcAoLgdzcotN8+y3Qsydw4EDg9t9/Bzp0AN57DxgxAujTB3jjDSA/P372nkgcPKiZ4oIFwG+/JdoaQmIDRf8E5/bbgfbtgSNHwu8/eBBITQV+/RWYNStw3/vv63L9emDKFGDMGGDnTuDzz+Nja24u8Mcf8Tl3rHnlFeDkk4G2bYHzzwfOOQf47rtEW0VI6aHon8D88Qfw+uvA1q3Ayy+HTzNunPrr69QBPvnEt10EmDlTBa1xY93Wvz/QqBHw6quxt/XwYb3W6acH2lEUIsDChbqMJZMmAVdeCSxaFDnNnDlAkybAu+8CX30FtGwJjBwZOXMVAZYti72thMQcESlXn27dugmxx4gRIsnJImedJVK/vsihQ4H7v/5aBBAZN05k9GhNe/Cg7lu1Sve99FLgMY88ots3bQq9XlqaSK9eIm++WbRdkyeLDBsmsn27ruflifTvL1KhgsgZZ4gkJYm8+270+3vzTbXl7bcDt3s8Iu+9J3L//SJ33CFy880iK1dGP59F48Z6XkDkr38VWb8+9Pz16okMH+7b9u23mn78+PDnfOAB3T9lin07InH4sEhOTunPQ9wFgHSxobEJF/ngD0U/PB5P4PrWrSInnSRy660iv/6qb/Lxx337Dx0SadZMpE0bkdxckZ9+0jTvvaf7H31UxBiRHTsCz5uRoaJ8772+bceOqaglJek5Lrsssp1ZWSJVqmi6WrVEXn9dxRMQefVVteuCC/Tar70W+Tz5+SItW+pxrVuLFBT49r3/vm6vUEGkdm29Xrt2IkePFv0MRUR27dJjH35YPzVq6DPyZ+NGTfPKK4Hbb75Zr5meHrj9nXc0feXKIqecEirYwe8uGn36iHTubO9+REQWLBC54QaRwsLiXSfRHD0q8sUXibbCOVD0T0A8HpE5c8KXPC+/XKRrV5Hff9dtt9+uor91q64PGKACeOCAyIoVKhrGqNiLqCCceqrIVVfpepcuIj17hrdj4ECtOSxeLDJxokjbtvpLSU0VueIKkSZNIt/Df/6jaT/+WMXLKlE/9JAvTW6uyCWXaCaya1f487z1lh5nZRhWzSAnR6RRI7Xfygi++ELTTJwY2S6LOXM07Q8/6PqLL+r6xo2+NO+9p9uWLg08dv9+fYYdOoh89ZXIkSMiP/6oNajzzxeZP1+P+7//8x3z++8ip50mMmlSdNtENBO2ntnYsfaOOfdcTb9qlb305YXHHlO7ly9PtCXOgKJfDlizRqR9e5Hvv4+eNitL5Oqr9Y00barrFq+9pturVhWpVElL6ZUqacnTIj1d0/TpoyLUoIHIZ58FXuP227VU/PvvmvY//wlvy9y5PuExRqRHD5FZs3TfE0/o9gMHQo8rKFDb+/TR9cJCLek/9VRoaXfZMj3P1Kmh58nPF2nVSjOuggKRjh1FTj9dvz/8cKBoW1xzjd732rXh78nioYe0tG65wtat0/O9/LIvzZ136rPOzw89/osvtEQP6LOsXl1rIvv26f5LLxWpU0fdaLt26fOwakgvvFC0bSL6PACRiy/WZ//tt0WnX7HC966mTYt+/pKSmxu+JvHLLyILFxb/fIWFWhMFNIOPJVu3qkvwtttEUlJEHnwwtue3Q36+ujjnzy+7a1L0ywFXXKFPuEOH8AJisWCBll4rVhQZNUqXgwerUG7frm6SPn1Edu7UEj2gQhLsd7/qKt03ZIhIZmbodb77Tvf36hVauvWnsFB9+1On6jX9sUrKVg3Cn08/1X2ffFLUU1E8HpHmzdXXH8zbb+t5Zs7U9Y8+0vWnnlKh/dvfQo/ZuVPk5JP1ORXlThkwQNsV/O1o0kR9+xbdu4ucd17kcxw+rOJ/550iF12kGYfFokVq64QJmllWqSLy229ae/J3r0XisstUDA8f1sykUaPAAkAwt96qBYAaNUT++c+iz+3PkiX6+7FqS8eOqXi//nro7+q337QQcdZZvnYaEZHp0/V32Lat/etazJvny6zuvtv+cUePhmY+hYX6W37mGZGzz/adt3p1rbGeemrxXWylZdo0teGss8ru2hT9BPP99/p0L7xQwvqHLXbuVGFo1UobSkXUPWCVPq+6SkuWlsvH49GSf7gSclaWZiCRfmQFBfonAEQ6dSrZfW3dGloytrjgAhXQojI4f+65J7Bx2bKxTRuRM8/0/bkLCzXjtPzmW7aEP59VIwpu+LXweFS8UlMDt994o2YYBQUqKsnJ2vhdUqzM1xhfxnXkiLqAKlbUzDcchw6pgN91l66npamopqTobyItLfDZHjyowpaaqu4yu+/08899wlipkj7vSpV826pUEXn6aX0eM2fqM2/aVKRaNZGGDdXtN22a3l/Vqrr0f4d2GDxYpG5drc1dcEH09JmZImPGqC3Jyfp/ueAC/Z1YNS9Az/fEE1oDKijwue8i/WbiQV6eFmgsu375pWyuS9FPIB6P5vCNGqkP+txzVWzD/THuvVfdDf5+/MJCdRNYboGnnoqdbSNH6jkfeaRkx3s8WvO47bbA7cuXF9/WH3/UY6ZP922zfPkffxyY9sMPdfsDD0Q+X2Ghlo779Qu/38qwXnwxcPv06br9t9/0Y7e2EomVKzUTmTw5cPvBgyqeF14Y/riZM/Xa/i6dadPUtWWJWqtWPhfWlCm6beFCfS4VKmgNoSiOHdNztG2rpfpx47SWM3as1qiWLPHVUNu2VUE/+2yR3bv1HTdp4hOzSy7R5xRsczR27tTMb8wY/T3Wrh25oOLxaIBCjRp6f8OG6X9m6FCRc87RmtGYMVoICufaW7w49DdWFIcOlV6kX39drzljhv5XrrmmdOezC0U/gXzwgT5ZK7QxPV3/PP4RMSLaMFijRvgfxe7dWi3t1s1+ydkOv/yiP0R/l0Rx6dVLMzJ/RozQEqLl27ZDQYGWvC13TU6OliTPOiu0Cu/xiPz8c/RnMWqU2hEu8sUSqGAf9J49uv3RR0WefVa/Z2TYv49w5OWF3z5pkkR0raWmamYR7tgdOzRKqEEDFckFC7T2062bPhurMTta+5HV0P7ll5HTeDwqkg0aaDtTbq5v365d6tIaOlSfcWZm8TN7q11o7VpfxvXnn+HTWu/syitL1lCdn6+1kTvvtJd++HDNXEr6/o8d04y9e3d9jmPGaOEt0v3FEop+GeAfRmhx7JhIixba+Oi/PzU1tKHREoDgKBGLffuil9wSwS23qDhZpbP8fBWiv/+9+OcaMUJdFEeO+J5HcCNtcZg9W88RrgFtwgQtYR45ErqvSxf14197rdbQ4sX27SoqEyYEbs/PV3fH9dcXffzmzVoCt2qBb7yh2+2I765dWsgoKuTWH7shoM2baztSJHJz1TWVna3nbNHC19j/889q9+zZocd5PPpegkN2i0ufPlqQiMaKFVo4A4oOJy6KV1/V4+fO1fU//tD3HVzgiwcU/Tjz3/+q8PlXaz0ekX/8I3xJavt2jeo45RStQufkaAegAQPK1u5YYPlJt23T9QULJKxLxg5WpNAbb6j4WyGlJSU7W4U9WFRFtITatWv448aP1xDY005Tf3M8ueIK/R34l+itZ/jhh9GPz8rS9oHTTgvsE9CyZdHP7+ab9dlEi3AqLn/7m5ZuI3HXXXLcPWV1jLMatLOzfbWsYKy2h2idAaNhZfb+NZb9+0U2bAhMd/nlWgv2D22OxvbtIk8+qfeYmqq1o7PPDnRXDR6shaJ4d7ij6MeRvDz1bVphlFbJ9N//lpCYdH9WrdIffY0a2pkGUL/2iYYlUFZpZuxYFcziNuaJqIugRg1tSKxYsXRuJ4tzz1W3hz9WW0SkCBer93JwnH08sGojn37q2zZmTPGeoccTKiLXX68ZQTjWrdNS7OjRJbO5KCyX0Z49ofuys/X9XnqpCvuQIeqq8a9ttWwZmtF6POoiadYssqvMLp99Fvpfu+IK/b1Z4aLWb/rJJ/U3Ur16oIswJ0eDMT76SCO0li4Vuekmrb0DIjVr6n+7WzdfQIaF1Xb17LOlu49oUPTjiBWO9frr2shWvbqvNDNiRNEhWtu2+SJRgv3iJwpZWRLgSmjbVqRv35Kfb+hQPZ9dv2s0rN7G/mGr69dLkdX2I0d8DZSlcS/ZIT9fxdmq5X36qYpGuPDV4vD884E1MH/eeEP3BXf8iwVWpFq43rWWz/633yIfP3iwNi77Y4V0vvpq6e3bvTswM9+4UX8fdevK8eCAHj18NScrk/jmG985Hn3UVyiwPpUra9+XzZuLvr7Ho/0uKlXS3vPxgqIfJwoK1MfYubMvjr5VKzne2GSn0TUrS6NfIvnyTwQaNlQf/oYNeu/PPVfyc333nVaJw/UtKAlWBM6MGb5t1tANy5ZFPq5vX/WVl8W4N//+t/p6b7xR7erWLbp4RCMtTc/10Ueh+6wOZ/EYqiE7W0X04YcDt3s82jnRamyOxMSJand2tu+4Xr20bcXuUBTR8Hd9jR6tpfytW33uWP8CweHDWoK3akW5uRp9d+ml+vuZNUsz0d277V9/715ty/jLX8JnyrGAoh8jPJ7AH+yMGaF/rG3btNTrpkGy+vXTjO+ZZ/R5hBugLVEUFGh7y403+raNHq0ls6Iy5R9+iH8V3GLzZp/Y3HVXbMTt2DEtTYYbvuH88zVjjRft24c2EFuD1EXzyVvurp9/1vX//U/X7fRgtsuwYdqOcviwuvmGDtXtHo/+d4cMCfxtXHKJ1uJFtE8KoC6g0rBypbq6unaNj1bEVPQB9AOwDsBGABPC7G8KYD6AFQC+B9DIb18qgA3eT2q0a5UH0V+yRAWjZ09tfK1XT+S++zTsqmNHdWecaINbxZpx41Rg+vTRP3x5Y/Bg9bF6PBoX3rq1xnWXJ6ZO1R7OseScc0R69w7c5vFoQ+LIkbG9lj9WI2ZwA2adOoENqOGw+k+89JL679u105J5rEr5Ij4303336TJcj3J/nntO061bp7ZYIZilZc4crRUNGRJ7DYmZ6ANIArAJQAsAyQCWA2gflOYjS9ABXAjgHe/3OgA2e5e1vd9rF3W9RIt+QYG6a2rUUEH75z9FBg3SqrgVzhWpx6ebsIZKAMomHK24WKFzb7+trqiqVX0Nz07m7ru1n4J/4+e2bRK2U1ossSK6rAEAt21TV5mdns3+mZIltsHjRpWWJUv0vElJGgYaTcAtt+X550uJI9MiMXmynvNf/4rdOUViK/rnAJjnt34fgPuC0qyySvcADIBs7/drAbzql+5VANcWdb1Ei77l+w3ukbl5s0ZYXHNNbDtLnahYf6LyGoHk7z5p1qxoX76TsHou+zcYWh234vmeFi70iePhw1rKN8Z+O8X552sN+uSTtdEz1uPVWJ20APsD07Vpo+lbtSpdP4FgPB4N+IhFOKo/dkXfzsxZDQFs81vP8G7zZzmAwd7vVwGoYYypa/PYcoPHAzz+uE4/OGhQ4L7mzXXS8BkzgIoVE2NfeaJdOyApSWfkOuecRFsTSvPmQO/eQL9+QHo60KlToi0qGy64QJfffOPbtny5Ljt2jN91O3UCTjpJ/x/du+usbJMm6Xuwe/zatcChQ8AzzwDGxNa+ihWBs8/W3+vQofaOGTBAl2PH6m89Vhij05NedJHOxrZgQezObQc78hXu8UvQ+lgALxpjhgP4AcB2AAU2j4UxZiSAkQDQpEkTGybFh88/B1auBN55B6jAiSSLpHJloEcPoHPn2P4hYskPP8RePMo79eoBXboA8+cD99+v21asAJo2BWrVit91K1UCzjwT+PhjoEEDYN48oG9f+8d37qzLW24BOnSIj42vvKKZSpUq9tLfeqtOj5maGntbTjpJn1WPHsC116ru1KkT++uEJVpVADbcO0HpqwPIkBPMvePx6GiGLVrQfWOXggI2aJdHxo3TkENrCI927TScON5Mnaruz+DhuO2wb5+GlRY1jLQTWbpUO+VZ0USlATF07ywC0NoY09wYkwxgKIDZ/gmMMfWMMda57gMwzft9HoBLjDG1jTG1AVzi3Vbu+PprdQNMmED3jV2SklgjKo9cdBGQlwf89BNw9Ciwbp2WwuPNiBHq3jnllOIfW6cO8NxzQO3asberPNO5M/Dww/rcZswom2tG/cuKSAGAO6BivQbAhyKyyhgz0RhzpTfZ+QDWGWPWA/gLgMe8x2YBeBSacSwCMNG7rdzx8svAaacBN9yQaEsIKR29ewPJyerXX71a26rc0qZxIjJ+vLp5brsN2LEj/tczWisoP6SkpEh6enqZX7dFC23omT69zC9NSMy54ALg4EFg1Cjgxhu1tN+mTaKtIpFYv15L/X36AHPnlqwtyhizWERSoqVj5RxAbi6wZYtG7RDiBC6+GFi6FPj2W224bNky0RaRomjTRt1bV1wR/2tR9KGlIBENQyRFkJcH9OoFvPBCoi0hUbjoIl1+8IGGapbXCCviY8QIdfHEO+KMog9gzRpdulr08/K0RWn37shppk0DfvkFeO+9MjOLlIyUFKBmTSA/v2waccmJA0Uf2tiVlAS0bp1oSxLI++8Djzyidcxw5OYCEydqMSQ9XQOeSbmlYkVfRy024hJ/KPrQkn6rVhrx4EpEgGef1e8zZuh6MC++COzcCTz0EFBYCPz4o71zFxZqhkHKHMvFw5I+8YeiDxV9R7t2li4F7rkH2Ls3/P4fftC++n36AH/8AaSlBe4/eBB48kmgf3+NL0tOBr77Lvp1V67UkIQ2bYDt20t/H6RY/OMf2t2/d+9EW0LKE64X/fx8YMOGE1j0CwvVP7VtG5CdHVpK//xz4NxzdUCTnj2BjRtDz/Hss0DdulrKT04OjVudPBnYvx947DENBenRQ8NCIiECvPQScNZZwJ49mmkMHBhY4t+8GfjwQw0iLw07dgDDhtnLhEpKZqa2Z+Tnx+8acaB6dW0YZAc6EoCdbrtl+SnrYRhWr5YTd7jkggIdktB/DrdatXTGiJkzdYaTChV06qKZM3V+uHr1RH75xXeOTZt0OERrnNdBg3S2CWtYwY0bdXjCv/3Nd8xDD+kxkfrM33uv2tKvn8iuXTrDtTWIeH6+yNNP6/i/gNq/Y0fkeyxqUPWFC3UWa0CnNipJ/387jBzps9XOOAEej06t5LYxBUhCAWfOsscnn+hTSE8v08uGx+PRiXcXL7aX3pqJ/cEH9bjJk0WGD9eZK6xMYNAg3yAs69frOLGVKunknps2+eaOy8jQNB98oMfNn68Cfc45mpH8+afvutakqLNmhb+HU08VufzywIF5rEHEGzXS5eWXa6ZUpYpmRJ9/HnquF19UW7/6KnTf22/rvmbN1ObKlTWTKe6YvF9+qc8uEocP6+QKZ56pg6S0aRN9olnr+fz978WzhZBSQNG3yaRJ+hQsXUwY+fm+CTs7dIguXnPmaNqbbw5/rvnzdXKA4IHAMzNFbrpJBaxCBR2Z67rrfPtzcnSm95tv9k1eOn164DmOHlWRveuu0GtbVaepUwO3ezwit9yi0yu9/77v/lavFunUSY8ZM8Y3+4c1k7cxOtC6/6wg1nx6F1zgm1jXmhrp+ef13D/+KHLDDUULem6uTlqalBR54Pc339Tz/vCDfurW1UHfw01EazFunC/TLe0ce4TYhKJvk+uuE2nSpEwvGcrhwzrBKCBy4YW6/N//Iqf/4w+daqhz5+hz0UVi+3YVp9atRZYvD9x3/fUq/ElJgRmCPxdeqKXfYKwplCJNmhtuWM4jR3SmeEBrFs89p2J/ySW+WUGef17TZmXpVFjt2ulxFh6PPsNKlXyZSFKSLt96K7wt1jRNFSpohhSO3r21dG9lUps2iZx1lh6Xmipy8GDoMe3b61ybTZpoBu6fYRWXhQt1FhRCokDRt0mXLuoVSBgHDoj06KHC88orOrv1aaeJ9O0bPv2sWeo+qVVL/e3xwKpFNG4ssn9/+DSPPqpprJK2xV//KtK0acmmPpoxQ10pgMh552mtw+NRX3rt2iJ792p7RVKSyKJFocfv3q02d+yoNY2sLH2OFSqEToV25Ig+5z591GefnOxzcVmsWaO2PPVU4Pa8PJEHHtDzNm+u7RYWf/yhxzz9tLajAOrGKgn79qnry5rcleNYkyKg6Edg3z5fAbGwUF3Ko0fH6WI7dmip+fffw+8/eFAF/6STAkXpiSf01fiXwHfv1sZUQEuyS5bEyWjRjCc1NXDOvWB++klCJg8tKFBxvvHGkl97/XqRRx4Ryc72bVuxQgW2e3e95kMPRT4+WBgPH9baQ3JyYLuB5Q6aP19L70lJoT+EceO0vSNSA/GCBVojue8+3zarprN2rWZY/fppRlZUY3UkRo5UuwYP1nMOGaIZISFhoOhHoHNn1YC8PN88qsHu55jg8YhceqleoGlTkT17AvdnZ6sLoGJFkU8/DdyXlSVSrZoKr4gK7CmnqHBNmlQ6d0GsOHZMo3pGjPBtW7xY7/fdd2N/vVtv1XN37Vr8+9+/X6t0gMioUfp8GzUS6dXLVyP5+9/1fqyaS16etj8MGlT0uQcP1ozu0CFd799fpGVL33nXr1fhDjcL9oIFkdsSfv1V7b3nHj3X5MmawZxxhtYgWOonQVD0I1C9ut712LFxnjD6hRf05Lfdpo2e552nQimijZfnnKNi4F9S9mfUKK0BTJqky5YttcRbnvjHP9SHbkX2/N//6T2XpFQbjb17tQaxdm3Jjs/J0YZnQBtiAZF583z7V69WUU1N1VqA1ag+Z07R57XE+fnn9RqVK+sUUP707atRU/4ur717NRM/7TSRbdsC0+fna22uYcPAGs+cOdoGA2h7yrvvatWVEKHohyUnR++4bl1d9u2ry717Y3yh1asDQwjfe08vNHy4VtkrVNAqf1ERIJs2aTpAZMCAyL71RLJliwqXFUHUr582sJZnvv1Wa17nnRfa7mC5zwDNaHv3tjd3Zs+e6tufNSs0MxERee013e7vkrMKBVWqqMBb4l5YqO4tIPzvIz9fw1XbtJHj0U0pKSKPPx4aqUVcBUU/DJY75+WXfbX9Bg1ifJHdu9UFUa9eoC/Y6rBUsaKWBIPdPeF46imRJ58s31X5u+7SzGnFCnWP3HFHoi2KTkGBr9blz6FDImlpWlMpzjO3GmxbtFC3XHCHsr179b3fe69vW7du+iP83/+0xnfZZVr1tH6YgwYV3RheUCDy88+aQfTqpccMG1Z84fd4RL75xueeIicsFP0wWDXxL74Q2bBBC9sXXhiDExcU6B//yiv1z21MqJ++oEBjzzdsiMEFyxG7d6vQWSXPmTMTbVHZU1Cg7htAZODA8Gn69dPagMejGSSgIaMiWgqxahjNm4u8807xxduKpiqu8FsZ1pVXliziipQbKPphsGrfVu/bFSuid66MSna29i4FNJRy/HiRVatKbesJxYMPynFXg1uHHrCigSJFBUybpvvT0rQT2kknBYa7vvSSL2S3pFjCf8MN9gQ8J0ddXVaY7JQp4dN5PJoRxTNirLQsWhSftqQTCIp+GF59Ve84uN2sxGzZojHhSUnakGfH/+tEDh7UhpIy7mNRrjh2TEUzUme5rCwV+rvu0l7Af/1rfOx44AH9kX/5ZfS0Vmb9/fcadVS5cmh48f796mqyotCC76+gIPHux/nz9T/YuLH2k/Bn27bAfhQOhqIfBmtUgdIUpo6zcqU2CNSqFX5sGLexeHHk/ghEuewydf8BIrNnx+caVue+aH7LTZs08srqcb17t2ZGZ5yhVeAVK0S+/lqjxipWFPnnP9XuRx7xnePoUZFzz1WX1Pz58bmfaGzapGNNtWmjobPNm6vQFxZqB7lKlbTNwwVQ9MNw++36uyg1Ho/+qerW1V6bhNjhv/+V49ED8exrYYXOFjWK4JVXavzy9u2+bdaYRv6fhg21wVhE5OqrNdpo61b9D9x0kxzvuQ3oUBb+IabxJjtbM6natbV3elqaSM2aGtZ63nly3OVqjL3AiRMcin4YhgzRsbtKzbx5+uiefTYGJyOu4cABFdrx4+N/nZo1Ra65Jvz+2bMl7PASIjrWzwcfaP+RTz8N7AewZYuK/jXX+Hoe//vf2jZwzz0qruefH/v72bVL2xN+/FHdVtOmaY2jZ09163zzjS/tzz9rYEHNmjpY3qJFauc778TernKGXdE3mrb8kJKSIunp6XE593nn6YQS339fipN4PDo5SFYWsHYtUKlSrMwjbiAjA2jQIP5zc44fDzz9tE6a07y5b/vhw8AZZwA1agBLlhTfjkceAR5+WCeV7t8f+Owz3ywtEybohDuHDulkO+E4ehT46itg1y6dnCYnB2jcGGjRAmjfXr/7s2AB0Ldv+AlsTj0VmDQJuPHGwO2bNwNVqwKnnKL/11NP1bkj33+/ePfqT2Ym8OabOitN9erFOzYrC1i1SuetrFWr5DZEwRizWERSoia0kzOU5SeeJf02bQLnAikRM2ZoyeGEnHWFuIaMDG04Du43MWaM/n5/+qlk583JUb/56adrjcIfK/xz4cLIxz/+eKD7yGrjsKK//IfC3r9fXUetWmmt46uvtCS/aVPRk+sEk5qqfn87oaw7d2pPZ//oJ49H5Ior1MZftBP0AAAU40lEQVTBgyNHRuXmaue/KVNE7r5b23As1xego8bGMSwWdO+EUquWjm5QLHJytJFy+3Ydqa1VK43YYe9HUt4ZPlzdMdOn6+916VJ1h4wcWbrzZmWFn4BiyxY53vsxEikpGuW1bZsKd2GhZlA//KBjVfmPiHrddWpvUZmIHayJgfxnjIvEgAGa9oEHfNustphzz9XlxIm+fQcPaoPxxRdr9JMl8FWr6lAZ112nbrSxYyVu41J5oegHceSI3u2kScU8MDU1sGRiZzwWQsoDmzfrsBiARuG0a6eNyPHqS+HxaInafxA+fzIy1JYnngi/339EVGucpEcfLb1dWVmamdx/f9HpvvvO96wAHT4jI0NLi717a8Y5bJju++ADHTLbGtOlQwct3c+Zo8cEl+gLCkTOPlt76sd83BeFoh/E1q2+92ibLVu0pHHNNVp6uf9+jYxgz0VyolBYqG6Xbt30DxA8C1qsufjiyP01XnpJbVi9OvLx+/apgAIaahmrGnXv3jo8SiQ8Hp0cp1EjjQq69FL973fqpLUlqyd9bq7WVqwCYN++9udaXbFC3VnDh5f+fsJA0Q8iLU2KHx59xx3qF41Zby5CEoTHEzpJTDwYP15L6uE6w1xyiYZTRis0bd+u8dVbt8bOLqstIdLcCJYLyJplLTvbNw6SNWubRUaGjvhakv45//qXnjMO/Roo+kF8/rlEbWMKYPdu9dGVZkIQQtyGFeiwdGng9gMHtAA1blxi7Fq6VO2aNk3b6EaPVn/7m2/q0OAtW4a21e3Zo6GesexxnJur7YJt2hSvMdoGdkW/Ytzih8oZu3fr8i9/sXnACy8Ax44B48bFzSZCHEfXrrpcsgTo3Nm3/csvNexy4MDE2NWpk4Zu3nqr/q+Tk4HatQPDOOfO1VBUi/r1gWHDYmtHlSrAiy8C/fppSO2//hXb89ugQplfMUEUS/QPHdIXc9VVQNu2cbWLEEfRsqWvD4A/n32mItqjR2LsMga45x6gZ0/glVeAnTv1s2wZ8PjjwKOPqhCXBZdeCgwerH0Mtmwpm2v64aqSfs2aQOXKNhK/8AJw4IB2NiGE2KdCBaBLl0DRz8vTUvTVVweWpMuasWP140+nTvopa555Rms/d98NzJpVppd2VUnfVil/3jzgwQeBQYO05y0hpHh07aol6MJCXf/+eyA7O3GunfJI48aqM599BsyeXaaXpuj7s2KFlkY6dADefrtM7CLEcXTtChw5Aqxbp6X8hx7S4QcuvjjRlpUvRo/WITEGD1bf/pEjZXJZW6JvjOlnjFlnjNlojAnxeRhjmhhjvjPGLDXGrDDGDPBub2aMOWKMWeb9vBLrG7BLVNHfsQO47DL1R86Zo0tCSPHxb8ydMAH47Tfg9dcjj8fjVpKTtRZ0/fXAE08AHTsC8+fH/bJRRd8YkwRgCoD+ANoDuNYY0z4o2f0APhSRLgCGAnjJb98mEens/dwSI7uLTVTRv/NOYP9+4IsvgEaNyswuQhxH27Yq8E8/rb7rUaOAIUMSbVX5pF494K23gG++0cbme+/VQeLiiJ2G3O4ANorIZgAwxswAMBDAar80AqCm93stADtiaWRpyc/Xge4iir6IjuZ39dWBYWaEkOKTlKT/o19/1Xax//wn0RaVfy66SN3Le/b4Ri2NE3bO3hDANr/1DO82fx4GMMwYkwFgLoBRfvuae90+C4wx55bG2JKyZ48uI4r+li3A3r3A2WeXlUmEOJvevTUO/sMPOfy4XapUAZo2jftl7Ii+CbMteBD+awG8JSKNAAwA8I4xpgKAnQCaeN0+9wB43xhTM+hYGGNGGmPSjTHpmZmZxbsDG0SN0V+4UJfdu8f82oS4ksceAzZtApo1S7QlJAg7op8BwH9mg0YIdd/cBOBDABCRXwFUBlBPRI6JyD7v9sUANgFoE3wBEZkqIikiklK/fv3i30UUoop+WpoG8HfsGPNrE+JKTjpJS/qk3GFH9BcBaG2MaW6MSYY21AYHlv4J4CIAMMa0g4p+pjGmvrchGMaYFgBaA9gcK+PtYquk37Wr/lAJIcTBRBV9ESkAcAeAeQDWQKN0VhljJhpjrvQmGwNghDFmOYDpAIZ7BwA6D8AK7/aPAdwiIlnxuJGiKFL08/M1tIz+fEKIC7A1DIOIzIU20Ppve9Dv+2oAvcIc9wmAT0ppY6nZvVuntaxWLczOlSt13k768wkhLsAVPXKLjNG3GnFZ0ieEuACKflqadpBglAEhxAVQ9Bcu1FK+CReZSgghzsIVor9zZwTRz84G1qyhP58Q4hocL/p79gD79gFtQnoHAEhP1yEY6M8nhLgEx4v+8uW6DDtPQlqaLjluPiHEJVD0W7cG6tQpU5sIISRROF70ly0DGjYE6tYNs3PpUt/Y34QQ4gIcL/rLl0cYLfnwYR1ds0OHsjaJEEIShqNF/9gxYO3aCK6dNWt0ecYZZWoTIYQkEkeL/urVQEFBBNFftUqXFH1CiItwtOgvW6bLiKJfqRLQsmWZ2kQIIYnE0aK/fLlORtOqVZidq1bpXJ5JSWVuFyGEJArHi/6ZZ0bQ9VWr6NohhLgOx4q+iIp+WNfOoUPAn39S9AkhrsOxop+RAezfH0H0V6/WJUWfEOIyHCv6URtxAYo+IcR1OFb0reEXzjwzzM5Vq3Qi9ObNy9QmQghJNI4W/ZYtgRo1wuxctQpo146RO4QQ1+Fo0Q/r2gF0XlwOv0AIcSGOFP38fGDjxgi6fuAAsH07/fmEEFfiSNHPytKQzbCzZTFyhxDiYhwp+vv26TLsMPmM3CGEuBhHi37YMfRXrQKqVgWaNi1TmwghpDzgTtFv3x6o4MhbJ4SQInGk8kUUfRFgxQpG7hBCXIu7RP/PP4E9e4Du3cvcJkIIKQ84VvSTk4Fq1YJ2pKXpkqJPCHEpjhX9unUBY4J2pKXpxCkdOybELkIISTSOFv0QFi4EunTRagAhhLgQ94h+QQGweDFdO4QQV+Me0V+9GsjNpegTQlyNe0TfasQ9++wyt4cQQsoLjhN9kSJEv3ZtHW+ZEEJciuNE/9Ahdd+HFf3u3cOE9BBCiHtwnOiH7ZiVk6Nj6NOfTwhxObZE3xjTzxizzhiz0RgzIcz+JsaY74wxS40xK4wxA/z23ec9bp0x5tJYGh+OsKK/dClQWEjRJ4S4norREhhjkgBMAdAXQAaARcaY2SKy2i/Z/QA+FJGXjTHtAcwF0Mz7fSiAMwCcBuAbY0wbESmM9Y1YhBX9hQt1edZZ8bosIYScENgp6XcHsFFENotIHoAZAAYGpREANb3fawHY4f0+EMAMETkmIn8A2Og9X9wIK/ppaTqUcthZVQghxD3YEf2GALb5rWd4t/nzMIBhxpgMaCl/VDGOhTFmpDEm3RiTnpmZadP08EQUfYZqEkKILdEPF+4iQevXAnhLRBoBGADgHWNMBZvHQkSmikiKiKTUr1/fhkmRsUS/dm3vhtxcYMsWjrdDCCGw4dOHls4b+603gs99Y3ETgH4AICK/GmMqA6hn89iYsm8fcPLJQEXrzjZt0mXr1vG8LCGEnBDYKekvAtDaGNPcGJMMbZidHZTmTwAXAYAxph2AygAyvemGGmMqGWOaA2gNIC1WxocjpGPWhg26pOgTQkj0kr6IFBhj7gAwD0ASgGkissoYMxFAuojMBjAGwGvGmNFQ981wEREAq4wxHwJYDaAAwO3xjNwBihD9Vq3ieVlCCDkhsOPegYjMhTbQ+m970O/7agC9Ihz7GIDHSmFjsdi3D2jQwG/Dhg26oWbNiMcQQohbcFyP3KysMCV9unYIIQSAA0V/3z6gTh2/DRR9Qgg5jqNEPz8fyM72K+kfPgzs3EnRJ4QQL44S/awsXR4XfYZrEkJIAI4S/ZDeuAzXJISQANwh+gzXJIQQAG4Q/VNPBapXT5hNhBBSnnC+6LOUTwghx3G+6NOfTwghx3Gc6CcnA9WqQWM3d++m6BNCiB+OE/26db1zn2/cqBsp+oQQchxHij4AhmsSQkgYnCv6VkmfDbmEEHIc54r+hg1Aw4ZA1aoJtYkQQsoTzhZ9unYIISQAx4i+CEWfEEKi4RjRP3QIKCjwin5eHpCZCTRpkmizCCGkXOEY0c/LA/r3B9q2hQ6pDHC2LEIICcLWdIknAvXqAXOtCR3/9Io+x9whhJAAHFPSD+AwRZ8QQsJB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBfhbNGvVi2xdhBCSDnDuaJfpQqQlJRoSwghpFzhXNGna4cQQkKg6BNCiItwrujTn08IISE4V/RZ0ieEkBAo+oQQ4iIo+oQQ4iIo+oQQ4iJsib4xpp8xZp0xZqMxZkKY/c8YY5Z5P+uNMQf89hX67ZsdS+MjQtEnhJCwRJ05yxiTBGAKgL4AMgAsMsbMFpHVVhoRGe2XfhSALn6nOCIinWNnsg0o+oQQEhY7Jf3uADaKyGYRyQMwA8DAItJfC2B6LIwrEYWFwJEjFH1CCAmDHdFvCGCb33qGd1sIxpimAJoD+NZvc2VjTLox5jdjzKAIx430pknPzMy0aXoEcnJ0SdEnhJAQ7Ii+CbNNIqQdCuBjESn029ZERFIAXAfgWWNMy5CTiUwVkRQRSalfv74Nk4qAI2wSQkhE7Ih+BoDGfuuNAOyIkHYoglw7IrLDu9wM4HsE+vtjD0WfEEIiYkf0FwFobYxpboxJhgp7SBSOMeZ0ALUB/Oq3rbYxppL3ez0AvQCsDj42plD0CSEkIlGjd0SkwBhzB4B5AJIATBORVcaYiQDSRcTKAK4FMENE/F0/7QC8aozxQDOYJ/2jfuICRZ8QQiISVfQBQETmApgbtO3BoPWHwxz3C4COpbCv+FD0CSEkIs7rkUvRJ4SQiFD0CSHERVD0CSHERVD0CSHERThT9CtWBJKTE20JIYSUO5wp+tWrAyZcR2JCCHE3zhV9QgghIVD0CSHERVD0CSHERThP9HNyKPqEEBIB54k+S/qEEBIRij4hhLgIij4hhLgIij4hhLgIZ4m+CEWfEEKKwFmif/Qo4PFQ9AkhJALOEn0OtkYIIUVC0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBfhTNGvVi2xdhBCSDnFmaJftWpi7SCEkHKK80S/WjWggrNuixBCYoWz1JGDrRFCSJFQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEXYEn1jTD9jzDpjzEZjzIQw+58xxizzftYbYw747Us1xmzwflJjaXwIFH1CCCmSitESGGOSAEwB0BdABoBFxpjZIrLaSiMio/3SjwLQxfu9DoCHAKQAEACLvcfuj+ldAEBenn4o+oQQEhE7Jf3uADaKyGYRyQMwA8DAItJfC2C69/ulAL4WkSyv0H8NoF9pDI5ITo4uKfqEEBIRO6LfEMA2v/UM77YQjDFNATQH8G1xjjXGjDTGpBtj0jMzM+3YHZ5rrgHatSv58YQQ4nCiuncAmDDbJELaoQA+FpHC4hwrIlMBTAWAlJSUSOcumtq1gRkzSnQoIYS4BTsl/QwAjf3WGwHYESHtUPhcO8U9lhBCSJyxI/qLALQ2xjQ3xiRDhX12cCJjzOkAagP41W/zPACXGGNqG2NqA7jEu40QQkgCiOreEZECY8wdULFOAjBNRFYZYyYCSBcRKwO4FsAMERG/Y7OMMY9CMw4AmCgiWbG9BUIIIXYxfhpdLkhJSZH09PREm0EIIScUxpjFIpISLZ2zeuQSQggpEoo+IYS4CIo+IYS4CIo+IYS4iHLXkGuMyQSwtRSnqAdgb4zMOVFw4z0D7rxvN94z4M77Lu49NxWR+tESlTvRLy3GmHQ7LdhOwo33DLjzvt14z4A77zte90z3DiGEuAiKPiGEuAgniv7URBuQANx4z4A779uN9wy4877jcs+O8+kTQgiJjBNL+oQQQiLgGNGPNo+vUzDGNDbGfGeMWWOMWWWMucu7vY4x5mvvXMRfe0c1dRTGmCRjzFJjzBzvenNjzELvPX/gHQXWURhjTjbGfGyMWet95+c4/V0bY0Z7f9srjTHTjTGVnfiujTHTjDF7jDEr/baFfbdGed6rbyuMMV1Lel1HiL7fPL79AbQHcK0xpn1irYobBQDGiEg7AD0A3O691wkA5otIawDzvetO4y4Aa/zWnwLwjPee9wO4KSFWxZfnAPxPRNoC6AS9f8e+a2NMQwB3AkgRkQ7QkX2Hwpnv+i2ETh8b6d32B9Da+xkJ4OWSXtQRoo/iz+N7wiIiO0Vkiff7IagINITe73+9yf4LYFBiLIwPxphGAC4D8Lp33QC4EMDH3iROvOeaAM4D8AYAiEieiByAw981dMj3KsaYigCqAtgJB75rEfkBQPBQ85He7UAAb4vyG4CTjTGnluS6ThF92/P4OgljTDMAXQAsBPAXEdkJaMYAoEHiLIsLzwIYD8DjXa8L4ICIFHjXnfjOWwDIBPCm1631ujGmGhz8rkVkO4DJAP6Eiv1BAIvh/HdtEendxkzjnCL6xZnH1xEYY6oD+ATA3SKSnWh74okx5nIAe0Rksf/mMEmd9s4rAugK4GUR6QIgBw5y5YTD68MeCKA5gNMAVIO6NoJx2ruORsx+704RfVfNxWuMOQkq+O+JyEzv5t1Wdc+73JMo++JALwBXGmO2QF13F0JL/id7XQCAM995BoAMEVnoXf8Ymgk4+V1fDOAPEckUkXwAMwH0hPPftUWkdxszjXOK6Nuax9cJeH3ZbwBYIyL/z2/XbACp3u+pAD4ra9vihYjcJyKNRKQZ9N1+KyLXA/gOwBBvMkfdMwCIyC4A27zzTwPARQBWw8HvGurW6WGMqer9rVv37Oh37UekdzsbwA3eKJ4eAA5abqBiIyKO+AAYAGA9gE0A/p1oe+J4n72h1boVAJZ5PwOgPu75ADZ4l3USbWuc7v98AHO831sASAOwEcBHACol2r443G9nAOne9z0LQG2nv2sAjwBYC2AlgHcAVHLiuwYwHdpukQ8tyd8U6d1C3TtTvPr2OzS6qUTXZY9cQghxEU5x7xBCCLEBRZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlzE/wfVmUdfBOl8tgAAAABJRU5ErkJggg==\n",
2062 |       "text/plain": [
2063 |        "<Figure size 432x288 with 1 Axes>"
2064 |       ]
2065 |      },
2066 |      "metadata": {
2067 |       "needs_background": "light"
2068 |      },
2069 |      "output_type": "display_data"
2070 |     }
2071 |    ],
2072 |    "source": [
2073 |     "plt.plot(range(100), hist.history['acc'], 'r', label='Train acc')\n",
2074 |     "plt.plot(range(100), hist.history['val_acc'], 'b', label='Test acc')"
2075 |    ]
2076 |   },
2077 |   {
2078 |    "cell_type": "markdown",
2079 |    "metadata": {},
2080 |    "source": [
2081 |     "## Polt the training loss and testing loss"
2082 |    ]
2083 |   },
2084 |   {
2085 |    "cell_type": "code",
2086 |    "execution_count": 14,
2087 |    "metadata": {},
2088 |    "outputs": [
2089 |     {
2090 |      "data": {
2091 |       "text/plain": [
2092 |        "<matplotlib.legend.Legend at 0x178261bc080>"
2093 |       ]
2094 |      },
2095 |      "execution_count": 14,
2096 |      "metadata": {},
2097 |      "output_type": "execute_result"
2098 |     },
2099 |     {
2100 |      "data": {
2101 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAEICAYAAAA+16EyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzsnXeYFEXzx7/FceQjJ+FIkgRB0hGULEFABMQACCgiIComDOCrAoq+hp9ijiCiovBiQBFBkmQkHJJBooQjHuEIwhFu6/dHzbCze5tv4119nmef3enp6a6Znemaqq7uJmaGoiiKosQauSItgKIoiqIEgiowRVEUJSZRBaYoiqLEJKrAFEVRlJhEFZiiKIoSk6gCUxRFUWISVWAxBBGNIaLJkZYjqxBRZSJiIsodQRl8vpZEtIiIBoVaJiXnQERziahvpOWIdbwqMCLaS0QXiOgsEaUR0QoiGkpEPim/cDVWruohomuIaAYRHTL2VfazzL1EdJSIClrSBhHRoqAJHiSIqI1xjh85pS8jogE+lsFEVC0kAgaI5bx+ckqvZ6QvipBoQSdQRUlEk4jolSDKsZeI2nvY34aIbER0zvL5NQj1BvU8ggURzbac52UiumTZ/jSQMpm5IzN/G4AsL1nqTieiK5bttYHIYpTbg4g2e8nzMxE9HWgdocBXC+w2Zk4AUAnA6wBGAPgiZFIFDxuA3wHckYUycgN4PDjiuCdICv5fAPf6q6jDSYDnmQrgJiIqYUm7D8CO4EilBMAhZi5k+dwWaYFC9ZLMzJ3N8wTwLYA3Lec9NFxyGLKMtsjyBIBFFlkahareaMUvFyIzn2bmGQB6AbiPiOoAABHdSkTriOgMER0gojGWw5YY32nGW8KNRFSViP4gohNEdJyIviWiouYBRDSCiA4aVt92ImpnpOciopFEtNs4dhoRFXdXDzMfZeaPAazx/9Jc5f8APG2VzwoRXUdE84jopCHr3ZZ9Dm/URDSAiJZZtpmIHiGinQB2GmnvGdfwDBGtJaKWfsiaBmASgNHuMhDRQCLaRkSniGgOEVUy0s3rt8G4fr2IaDER3WHsb2HI28XYbk9E643fuYjoBSLaR0THiOhrIipi7DMt4weIaD+AP1zIdIfx1l/HjdiXAPwMoLeRPw7A3ZDGxFrOTUS0hohOG983WfZVMc7nLBHNA1DS6dhmJN6FNCLaQERt3F1Dp+PyEtG7JFb+IeN3XmNfGyJKIaKnjOtymIju96VcF/V8T0RHjHNbQkTXG+lDAPQF8CxZLCEiKkdEPxJRKhH9Q0SPWcoaYzw7XxvXYwsRJRn7vgFQEcCvRnnP+imnp2c0kPNw8AqQxUqzXN8RRHQEwJdGelciWk92j9ENluNdti1ZwXgW9hLRfww5xhNRCSKaZVz/U0T0KxGVtxxz1TNC4tVZTETvGDLvIaKOWZCnvlHeKeO/vdWy704i2mGc/34iepCIygGYAqAW2a25gu5rcHsN1hv/6woiamDZ94hR11ki2kVE3Y30ukT0p3HMMSKa4PfJMrPHD4C9ANq7SN8P4CHjdxsAdSEK8QYARwH0MPZVBsAAcluOrQagA4C8AEpBlM+7xr6aAA4AKGc5vqrx+wkAKwEkGsd+BmCKu3os9eU29lV2Sh8JYKa3cwfwE4BXjLRBkLceAChoyHq/UUdDAMcBXG/sXwRgkKW8AQCWWbYZwDwAxQHkN9L6AShhlPcUgCMA8hn7xgCY7EbWNgBSAJQFcAZATSN9GYABxu8eAHYBqGWU/wKAFU7yVLNsvwzgA+P3fwDsBvCGZd97xu+BRrnXAihkXK9vnP6Xr43rld/6XxnXbpe1XjfndROAVUZaFwBznP6L4gBOAehvlNvH2C5h7P8TwDjIfdMKwFnzWgIoD+CEUW4uyL15AkApV/+jk3wvQ+7J0pB7eQWAsRbZrxh54o3yzwMo5qYsT/UMBJBgyP8ugPWWfZNg3J/Gdi4AawGMApDH+F/2ALjFch+lG/LEAXgNwEpvz7zzf+Jmn9tn1N/zcHNPXs1jub5vGOXlhzyDxwA0Nc7tPuN88sJz29ICQJoP7aErGdsbcvzXuN75jXvhduN3Ycgz8YPlGOtzOQjAZePaxAF4FMABH2QZCmC+U1pxSPt7l1HWTQBOQrxncQBOA6hv5C0FoJ6lbdjspb6fATztIr08gHNGGfGGXIchz3tZSJtY2cibCHvbNBvAMAAEoACAm7ydc6a6fbhILm9m4yZ93s0x7wJ4x6kBy6RYLPl7AFhn/K5m3IDtAcQ75dsGoJ1l+xrjj8/tqR64UWC+njuAOsYfXwqOjWYvAEudjvkMwGjj9yJ4V2A3e5HhlOUmGwMvCsz4/SaA/7l4UGYDeMByTC5Ig1rJIo+1sWgHYKPx+3fj3Fca24sB9DR+LwDwsOW4mi7+l2st+820pwFsBZDo4fyt57XTKHsq5G3d+l/0B7Da6dg/jWteEdLAFLTs+w52BTYChsK17J8D4D5X/6NTvt0Auli2bwGw1yL7BTi+vB0D0MxNWW7rccpX1Lh+RYztSXBUYE0B7Hc65jkAX1ruo/mWfbUBXHC+7738JzaIxW9+7vb2jPp7Hm7uyat5DDkuwXjBM9I+gfECYUnbDqA1PLQtvn7cyNge8kKQx8NxSQBSLdvOCuxvy77CxnmX9CKLKwX2IIBfndL+B+kGyQVRZn1heRaMPFlRYI8CmOuUts0oszSkDbsVQF6nPDMAvA2gbCD/BTNnKQqxvHExQERNiWihYS6fNi5sSXcHElFpIppqmPJnAEw28zPzLshb3BgAx4x85YxDKwGYbpjZacZFygBQJgvn4RVm3gxgJsRis1IJQFNTHkOmvpC3Dl85YN0w3E3bDLM6DUAReLiWbngDwC1EVM+FvO9ZZD0Jefsp71yAwZ8AahBRGQD1IVZUBSIqCaAJ7G7bcgD2WY7bB1Fe1v/F4TwNngHwETOn+Hhe30De2NoCmO60z1kGU47yxr5TzPyv0z6TSgDucvofW0AaX2+4Ovdylu0TzHzFsn0eYqX6DBHFEdHrhlvuDETBAO7vi0oAyjmdz3/g+H8ccZIpH/nXd3OImYtaPtMsdbt8RgM4D19IZeZ0y3YlAE85nXsFiNXlqW3JKkeZ+ZK5QUQFiWiC4To7A3GdezpP5/8DAAoZblLTrbfBBzkqAejodP63AriGmW0AukPaqIMkXR/1/TlJN7h99pj5GMTL8jSAo0Q0nYiqGHmGQa7JBsP92MvfigNSYETUGNIwmP0530G0aQVmLgLgU0jDCMibhDOvGek3MHNhiNvMzA9m/o6ZW0D+DIY0yIA0gp2dHpx8zHzQTT3BZDSAwXBs7A8AWOwkTyFmfsjY/y/ENDZxpdiuyk3S3zUC0r9TjJmLQiw/cnGcW5j5BMQKHuu06wCAB53kzc/MK9yUcx7iinoc8nZ2CeIiGw5gNzMfN7IegvxXJqbFc9TVeVroCOAFMvrZfOAbAA8DmGXIZsVZBlOOgxB3RjEnv35Fy+8DEAvMel0KMvPrPsjk6twP+XCcP9wDaXjaQ15oKhvp7p6xAwD+cTqfBGbu4mN9WXmWPD2j/p4HII25p2fI1bm/6lR/AWaeAnhsW7KKsxzPAqgCoInRxt0cUKHMi9gepOH8QuqKAwBmuGiTRhrlLTXugzKQ9vsbN/L7g6dnD8z8MzO3hbSdqQDeN9L3M/N9kP/0GQCTjZdln/FLgRFRYSLqCnHhTGbmTcauBAAnmTmdiJpAblSTVIi74VpLWgLEZ5pmdGw+Y6mjJhHdTNIRng5xwWQYuz8F8CrZAw9KmR2CbuoBEeWD+L8BIK+x7TfG29v/ADxmSZ4JsVD6E1G88WlMRLWM/esB9CSiAiQd0Q94qSYB0vCnAshNRKMg7oRAGAfxf9eypH0K4Dmyd5wXIaK7LPuPwun6QVyFw4xvQNxc1m1AOoCfJAmUKATpC/ifk+Xhii0AOgH4iIi6eTshZv4H4gp63sXuWZD/4h4iym28zdWG9HHuA5AM4CUiykNELQBYo+YmA7iNiG4xrIR8xptvojeZIOf+gnEvloT0O2VlrF5uo37zEw+5Ly5C+uUKQK6vFef/bTWAMyQBC/mNc6pjvHj6gqv7wFc8PaP+ngcgz9A9xjl0gvz/nhgPYKjhFSLDErqViBK8tC3BJgGifE+RRM+OClE9znwPoDkR3W48B3lIgpuqms87ESVAXK9nYT//owDKElF+L+W7uj+nQ6KEbzPqHAzpi5tPRJWIqJNR7kXIS30GABBRbyIqy+JPPG2U763NcMSbjxFi5l8wTvY0xK30CIA4S547ISbjWUij/iEsfTWQTuxUiK+8GYDrIW/25yA36FOw93PcAHkAz0JcXDNh73TNBXn7327s3w3gv+7qYbsP3eFjyf8fALO9nHt7y3YFyI2/yJJWE8BvRr0nIK4Cs5O0JIC5hqzLIa4L5z4wq38/DjI84QzEanjWKgN87AOzpD1r1DHAktYfwCajjgMAJlr2mZ2v1j6NW4wyWhvbdYztXpbjckEe0APGdZgMI1ABroN4HNIg/QNHIW/uXs/Lsu9qH5ix3QJyX502vltY9l0LYCnknpuHzPdoU4hSPmmcw28AKhr7FsF9H1g+yBvlYePzPuxBN67+k6v/p4uyFiHz/ToZ4nL8xbiP9gG413rvAKgOeY7SAPxspJmRZUcgfRAr3d1HLv6P7pAgrTS47vPw9J+4fUYDPI8kyIvOWYi1MAWOfWCZ5IC8FK0xyjkMadQT4LltaQngnA/t4SS47gPb65SWCHGxnzOuxUNwbHuc+8Cs97FPffZw0QfG9jZ0HqQ9Og5pg2pC+hwXGNflNMSb0sjyv00zrksanPrIjDw/I/P9OdPY1xHABth1hFluVaOeM5D7cC6AKsa+jyH3p3mN7vF2/Z0/ZBSkKIqiKDGFTiWlKIqixCSqwBRFUZSYRBWYoiiKEpOoAlMURVFikogtZxFMSpYsyZUrV460GIqiKDHD2rVrjzNzqUjLkRWyhQKrXLkykpOTIy2GoihKzEBEzrNnxBzqQlQURVFiElVgiqIoSkyiCkxRFEWJSbJFH5iiKLHL5cuXkZKSgvT0dO+ZFb/Jly8fEhMTER8fH2lRgo4qMEVRIkpKSgoSEhJQuXJlEPm18ILiBWbGiRMnkJKSgipVqng/IMZQF6KiKBElPT0dJUqUUOUVAogIJUqUyLbWrSowRVEijiqv0JGdr23OVmBvvQX8/HOkpVAURVECIGcrsPffB375JdJSKIoSQU6cOIH69eujfv36KFu2LMqXL391+9KlSz6Vcf/992P79u0B1T9//nz06NEjoGNzOjk7iKNAAeC888r0iqLkJEqUKIH169cDAMaMGYNChQrh6aefdshjLqCYK5frd/4vv/wy5HIqmcnZFliBAsCFC5GWQlGUKGTXrl2oU6cOhg4dioYNG+Lw4cMYMmQIkpKScP311+Pll1++mrdFixZYv349rly5gqJFi2LkyJGoV68ebrzxRhw7diyg+ufNm4f69eujbt26GDx48FVr8JlnnkHt2rVxww03YMSIEQCAqVOnok6dOqhXrx7atm2b9ZOPEXK2BZY/v1pgihJNPPEEYFhDQaN+feDddwM6dOvWrfjyyy/x6aefAgBef/11FC9eHFeuXEHbtm1x5513onbt2g7HnD59Gq1bt8brr7+O4cOHY+LEiRg5cqRf9Z4/fx4DBw7EokWLULVqVfTt2xeff/457rrrLsyaNQtbtmwBESEtLQ0A8NJLL2HRokUoU6bM1bScgFpgqsAURXFD1apV0bhx46vbU6ZMQcOGDdGwYUNs27YNW7duzXRM/vz50blzZwBAo0aNsHfvXr/r3bZtG6pXr46qVasCAO69914sWbIExYsXR65cuTB48GBMnz4dBQsWBAA0b94c9957LyZMmACbzRbAmcYmOdsCK1AASE2NtBSKopgEaCmFClNBAMDOnTvx3nvvYfXq1ShatCj69evncnxVnjx5rv6Oi4vDlStX/K6XmV2mx8fHIzk5GfPmzcPUqVPxySefYO7cuRg/fjxWrVqFmTNnol69eti4cSOKFSvmd72xhlpg2gemKIoPnDlzBgkJCShcuDAOHz6MOXPmhKyu2rVrY+fOndizZw8AYPLkyWjdujXOnj2LM2fOoGvXrnjnnXewbt06AMCePXvQrFkzjB07FsWKFcPBgwdDJls0oRaYuhAVRfGBhg0bonbt2qhTpw6uvfZaNG/ePGhlz5kzB4mJiVe3p0+fji+++AI9e/ZERkYGmjZtisGDB+PYsWPo2bMnLl68CJvNhnHjxgEAnnzySfzzzz9gZnTs2BF16tQJmmzRDLkzVWOJpKQkDmhBy2HDgClTgBMngi+Uoig+sW3bNtSqVSvSYmRrXF1jIlrLzEkREikohNWFSEQTiegYEW12s5+I6H0i2kVEG4moYUgFUgtMURQlZgl3H9gkAJ087O8MoLrxGQLgk5BKU6AAkJ4O5KCoHUVRlOxCWBUYMy8BcNJDlu4AvmZhJYCiRHRNyAQqUEC+s+lMzYqiKNmZaItCLA/ggGU7xUjLBBENIaJkIkpODTQUPn9++VY3oqIoSswRbQrM1bz/LqNMmPlzZk5i5qRSpUoFVptpgakCUxRFiTmiTYGlAKhg2U4EcChktakCUxRFiVmiTYHNAHCvEY3YDMBpZj4cstpUgSlKjicYy6kAwMSJE3HkyBGX+/r164efde3BoBPWgcxENAVAGwAliSgFwGgA8QDAzJ8CmAWgC4BdAM4DuD+kApkKTGfjUJQciy/LqfjCxIkT0bBhQ5QtWzbYIipuCHcUYh9mvoaZ45k5kZm/YOZPDeUFI/rwEWauysx1mTmA0cl+oEEciqJ44KuvvkKTJk1Qv359PPzww7DZbLhy5Qr69++PunXrok6dOnj//ffxv//9D+vXr0evXr18ttxsNhuGDx+OOnXqoG7duvjhhx8AAAcPHkSLFi1Qv3591KlTBytWrHBZp6JTScm3KjBFiQqiaTWVzZs3Y/r06VixYgVy586NIUOGYOrUqahatSqOHz+OTZs2AQDS0tJQtGhRfPDBB/jwww9Rv359n8r//vvvsXXrVmzYsAGpqalo3LgxWrVqhcmTJ+O2227DiBEjkJGRgQsXLmDt2rWZ6lRUgcm3KjBFUZyYP38+1qxZg6QkmW3pwoULqFChAm655RZs374djz/+OLp06YKOHTsGVP6yZctwzz33IC4uDmXLlkWLFi2QnJyMxo0b48EHH0R6ejp69OiBevXqoVq1akGpM7uhCgxQBaYoUUI0rabCzBg4cCDGjh2bad/GjRsxe/ZsvP/++/jxxx/x+eefB1S+K26++WYsWrQIv/32G/r27YvnnnsOffv2DUqd2Y1oi0IMLxrEoSiKG9q3b49p06bh+PHjACRacf/+/UhNTQUz46677sJLL72Ev/76CwCQkJCAs2fP+lx+q1atMHXqVGRkZODo0aNYvnw5kpKSsG/fPpQtWxZDhgzBgAEDsG7dOrd15nRytgWmQRyKorihbt26GD16NNq3bw+bzYb4+Hh8+umniIuLwwMPPABmBhHhjTfeAADcf//9GDRoEPLnz4/Vq1c7LGwJAIMGDcKwYcMAAFWqVMHixYuxcuVK1KtXD0SEcePGoXTp0pg4cSLGjRuH+Ph4FCpUCJMnT8aBAwdc1pnTydnLqdhsQFwcMHo0MGZM0OVSFMU7upxK6NHlVLIjuXIB+fKpBaYoihKD5GwFBkg/mPaBKYqixByqwHRRS0WJONmhKyNayc7XVhVY/vyqwBQlguTLlw8nTpzI1g1tpGBmnDhxAvny5Yu0KCEhZ0chAmqBKUqESUxMREpKCgJe10/xSL58+ZCYmBhpMUKCKjBVYIoSUeLj41GlSpVIi6HEIOpC1CAORVGUmEQVmFpgiqIoMYkqMA3iUBRFiUlUgakFpiiKEpOEVYERUSci2k5Eu4hopIv9lYhoARFtJKJFRBT60BlVYIqiKDFJ2BQYEcUB+AhAZwC1AfQhotpO2d4C8DUz3wDgZQCvhVwwDeJQFEWJScJpgTUBsIuZ9zDzJQBTAXR3ylMbwALj90IX+4OPaYHpIEpFUZSYIpwKrDyAA5btFCPNygYAdxi/bweQQEQlQiqVuaRKenpIq1EURVGCSzgVGLlIczZ7ngbQmojWAWgN4CCAKy4LIxpCRMlElJylEfy6KrOiKEpMEk4FlgKggmU7EcAhawZmPsTMPZm5AYDnjbTTrgpj5s+ZOYmZk0qVKhW4VLoqs6IoSkwSTgW2BkB1IqpCRHkA9AYww5qBiEoSkSnTcwAmhlwqtcAURVFikrApMGa+AmAYgDkAtgGYxsxbiOhlIupmZGsDYDsR7QBQBsCrIRfM7ANTBaYoihJThHUyX2aeBWCWU9ooy+8fAPwQTpnUAlMURYlNdCYOVWCKoigxiSowDeJQFEWJSVSBqQWmKIoSk6gC0yAORVGUmEQVmFpgiqIoMYkqMFVgiqIoMYkqMNOFqEEciqIoMYUqsLg4IG9etcAURVFiDFVggFhhqsAURVFiClVggK7KrCiKEoOoAgNUgSmKosQgqsAAUWAaxKEoihJTqAID1AJTFEWJQVSBARrEoSiKEoOoAgPUAlMURYlBVIEB2gemKIoSg6gCA9QCUxRFiUHCrsCIqBMRbSeiXUQ00sX+ikS0kIjWEdFGIuoScqFUgSmKosQcYVVgRBQH4CMAnQHUBtCHiGo7ZXsBwDRmbgCgN4CPQy6YBnEoiqLEHOG2wJoA2MXMe5j5EoCpALo75WEAhY3fRQAcCrlUpgXGHPKqFEVRlOAQbgVWHsABy3aKkWZlDIB+RJQCYBaAR10VRERDiCiZiJJTU1OzJlWBAqK8Ll3KWjmKoihK2Ai3AiMXac5mTx8Ak5g5EUAXAN8QUSY5mflzZk5i5qRSpUplTSpdE0xRFCXmCLcCSwFQwbKdiMwuwgcATAMAZv4TQD4AJUMqlSowRVGUmCPcCmwNgOpEVIWI8kCCNGY45dkPoB0AEFEtiALLoo/QC+ailqrAFEVRYoawKjBmvgJgGIA5ALZBog23ENHLRNTNyPYUgMFEtAHAFAADmEMcXaEWmKIoSsyRO9wVMvMsSHCGNW2U5fdWAM3DKpSpwHQ2DkVRlJhBZ+IA1AJTFEWJQVSBAdoHpiiKEoOoAgPUAlMURYlBVIEB2gemKIoSg6gCA9QCUxRFiUFUgQGqwBRFUWIQVWCABnEoiqLEIKrAACB3biA+XhWYoihKDKEKzKRAAQ3iUBRFiSFUgZnoqsyKoigxhSowE1VgiqIoMYUqMJMCBYBz5yIthaIoiuIjqsBMypcHUlIiLYWiKIriI6rATCpXBvbujbQUiqIoio+oAjOpVAk4cULdiIqiKDGCKjCTypXle9++iIqhKIqi+EZYFRgRdSKi7US0i4hGutj/DhGtNz47iCgtbMKZCkzdiIqiKDFB2FZkJqI4AB8B6AAgBcAaIpphrMAMAGDmJy35HwXQIFzyqQJTFCWm2L4dSE0FbrwRiIuLtDQRIZwWWBMAu5h5DzNfAjAVQHcP+fsAmBIWyQCgTBkgXz5VYIqixAYTJgDt2wPMkZYkYoRTgZUHcMCynWKkZYKIKgGoAuAPd4UR0RAiSiai5NTU1KxLRwRUrKh9YIqixAYbNwK1a8tcrjmUcCowcpHm7tWhN4AfmDnDXWHM/DkzJzFzUqlSpYIioIbSK4oSM2zaBNxwQ6SliCjhVGApACpYthMBHHKTtzfC6T40UQWmKEoscPw4cPgwULdupCWJKOFUYGsAVCeiKkSUB6KkZjhnIqKaAIoB+DOMsgmVK0un6L//hr1qRVEUn9m0Sb7VAgsPzHwFwDAAcwBsAzCNmbcQ0ctE1M2StQ+AqcwR6JnUsWCKosQCGzfKdw63wMLa+8fMswDMckob5bQ9JpwyOVCpknzv2yedo4qiKNHIxo1AqVISPZ2D0Zk4rOhYMEVRYgEzgINcxcblHFSBWSlbFsiTRxWYoijRS0YGsHlzjncfAqrAHMmVS9yIqsAURYlW9uwBLlzI8QEcgCqwzGgovaIo0YwGcFxFFZgzlSppFKKiKNHLxo3iLdJAM1VgmahcGTh6VEx0RVGUaGPTJqBaNaBAgUhLEnFytALr3Rt4+22nRB0LpihKNLNxo/Z/GeRoBbZhA7BypVOihtIrihKtnDsnQRza/wUghyuwChWAAwecElWBKYoSrWzZIsunqAUGIIcrsMREICXFKfGaa4D4eHUhKooSfZhzIKoFBiCHK7AKFWRC5ytXLIm5csm6YGqBKYoSTRw9Cnz5JVCoEFClSqSliQpytAJLTARsNuCQ86Iu114ry3UrihIeNm0CevQAzp+PtCTRyW+/idX111/ABx/Ii7aSsxVYBWN1skxuxEaN5IFKTw+7TIqSI5k7F/jlF2DZskhLEn289x7Qtat0byQnAwMGRFqiqCFHK7DERPnOFMjRuLH4FTdsCLtMipIjOXhQvpcsiawc0chnnwE33QSsWgVcf32kpYkqcrQCc2uBNW4s32vWhFUeRcmxmAps8eLIyhFtHDsGbNsGdOsG5MsXaWmijhytwAoXlv7QTBZYYqLMTL96dUTkUpQch6nAVq/WWXCsLF0q361aRVaOKCWsCoyIOhHRdiLaRUQj3eS5m4i2EtEWIvoutPKIFZbJAiMSK0wtMEUJDwcPAiVKAJcu6YujlSVLgPz5pV9eyUTYFBgRxQH4CEBnALUB9CGi2k55qgN4DkBzZr4ewBOhlisx0YUFBogC274dOH061CIoSs6GWUKB77hDXh61H8zOkiXS/5UnT6QliUrCaYE1AbCLmfcw8yUAUwF0d8ozGMBHzHwKAJj5WKiFcjkbBwA0aSIP1tq1oRZBUXI2J06I5XX99TLDhCowIS1NAsnUfeiWcCqw8gCsqiLFSLNSA0ANIlpORCuJqJO7wohoCBElE1FyampqwEJVqAAcOQJcvuy0IylJvtWNqCihxez/KldOGusVK1w8kDmQZcvkJVoVmFvCqcDIRRo7becGUB1AGwB9AEwgoqKuCmPmz5k5iZmTSpUqFbBQiYl2D4YDJUrIgGYivRR/AAAgAElEQVRVYIoSWkwFVr68NNbnz8uA3ZzOkiXiOmzaNNKSRC3hVGApACpYthMBOKuNFAC/MPNlZv4HwHaIQgsZbkPpAXEjaoeyooQWqwJr2VJ+R6Mb8eLF8Na3ZIm0Qfnzh7feGCKcCmwNgOpEVIWI8gDoDWCGU56fAbQFACIqCXEp7gmlUG4HMwMSyHHggMxBpihKaDAV2DXXAGXKADVrRpcCYwaefFLkO3w4PHWeOyezbqj70CNhU2DMfAXAMABzAGwDMI2ZtxDRy0TUzcg2B8AJItoKYCGAZ5j5RCjl8miB6YBmRQk9hw4BpUvLKhAA0Lq1jH/KyIisXCZjxwLvvgucOgVMmRKeOv/8U86/devw1BejhHUcGDPPYuYazFyVmV810kYx8wzjNzPzcGauzcx1mXlqqGUqXBhISHBjgTVsKJNmqhtRUULHwYPiPjRp2VKGr2zeHJ76L11yP4nwxx8Do0cD990ngV3ffBMemZYsAeLigBtvDE99MUqOnonDxG0ofcGCEtqrFpgSKdLTpYHNzjgrMNPzEa5AjmHDZKyVM3PmyL5u3YAJE4D+/YH16wNTrBs2+D45+KFDwNdfi8JMSPC/rhyEKjC4WdjS5KabgOXLs38jokQnt90GDB4caSlCi7MCq15d5nhbty70dWdkAD/9JArG7IszmTRJ+uSmTgVy5wZ69xaraPJk12WdOwcMGSLjcqycPCnK6M03vctz4gTQoYMc88EHAZ1STkIVGDxYYADQuTNw9qwu86CEn4wMue+yswv74kXg+HFHBZYrF1CvXngssORkURqA40TCzMDChUC7dvYowNKlgVtuAb79VhYSdOb334Hx44Hp0x3T16+X1S1+/dWzLGfOAJ06Abt3AzNm2C1RxS2qwCAW2NGjboysdu1kLMasWWGXS8nh7N4tbqfdu52WDc9GmFF95co5pjdsKA2/K0URTGbPlumrChZ0VGB//y2NQtu2jvn79xd3jatZ883IyfXrHdPNZZmSkz1HNPfrJ1bn999nrldxiSowiAXmcjAzIK6M1q1lRVRFCSebNsn35cvA/v2RlSVUWMeAWWnQAPj3X2DnztDWP3u2jLVq3dpRKS1cKN9t2jjm79ZN+qVcBXOYCszZ9blhg7ggAbHSXLFmjVhoY8eK21jxCVVgsI8Fc9sPduut8ka2J6RD0hTFkY0b7b937Ahv3StWiDvrl19CW487BdawoXyHsh8sNVUUR+fOosC2b7f3Xy1cKG+2117reEyBAjLp8A8/OEYupqXJ/5U3r7x4WC3mDRuAm2+WcWSzZ7uW5Y03gCJFgEceCe45ZnNUgcE+FsxtP1iXLvKtbkQlnGzaJFOaAaG3RExOnAAGDQKaNwfmzQN69fLc/5uaKn3EgeJOgdWuLa77UPaDzZsnrhdTgQFiRdlswKJF4sYjFzPg9esn5zxzpj1t+XIpq29fcfv+/bekX74MbN0K1K8vLwRz5mR2B2/fLoEkDz8s43oUn1EFBh8UWPXqQLVqqsCU8LJpk7iwEhLCo8AOHRLFMWkS8PTT4nGoVEncZmaDbMWcaLZzZ/lt5Z13gKFDvYeOHzwoVkuxYo7p8fFA3bqhVWCzZ8sLQqNGYvGZ/WBbtkhgibt+qDZtxJr6zrJc4dKlIvNDD8m22Q/299/SuV6vnrwIp6UBK1c6lvfWW6KsH3886KeY3VEFBmkfChf24EIExI24cKH7AY+KEkz+/VeCN264QV6gnF2IaWmZw76zyujRMtvE6tXA//2fKK/ff5eGuVOnzOHhmzZJA718uUTmmaxdKwrws8+k0T5zxn2dhw6J9eXK0mnQQFyIzsoxEM6ckQCMhx4SpWqziTV0yy0SGh8fL1bn4sX2/i93CiwuTkLqZ82ScHdALLekJLG08uWzuz7NAI569SQ8Pi7O0Y14+LCM+RowQEL2Fb9QBWZQoQKwb5+HDF26yI1v3tyKEkq2bJGGu25doEaNzBbYQw8BLVr43rivXi1h2e7e0rZuBSZOlD4Ys/8JAKpUkYb66FHg+ecdj/npJ1E8deoAzz4rbrUrV2TcWunSMovF0qWiCI65WdrPeQyYlYYNRUFkNYBlxw6Z0X3KFODTT8Vd+Ouv4v7s3Nmer3Vrue7ffy/nXamS+zL79hX34I8/ykttcrLMIJI7t/xnVgWWN6/M71ikiPxnVk/OO+/INXv66aydY06FmWP+06hRI84q3bsz167tIUN6OnOBAswPPZTluhTFKxMmMAPMO3cyv/gic65czBcvyj6bjbl0adm/bp1v5d1zj+Tv3dv1/m7dmAsXZk5Ndb1/0CC5/0+ftqfVrcvcqhXzypVS9ogRzP/3f/L7++8lz8yZzPnySV5TfitVq7qX6c8/pazp0zPvs9kkfc+ezOkLFjBPmsT85ZfM774r51WyJPPChcw//cRcsCAzkXyOHbMfu2yZ1AcwDxzoWiZrPTVqMLdpw/zHH3LMzJmyb8gQ5qJFJU+HDswNG9qPe/11ybtgAXPPnp7/kxADIJmjoP3OyifiAgTjEwwF9swzzHnyMF+54iFTt27MFSowZ2RkuT5F8chjj4nCyMhg/uYbeVS3bZN9O3bYG9qxY72Xdfo0c/78zCVKyDGLFjnuX7JE0v/7X/dlrFoleT75RLZ37pTtd9+V7fvuY46Pl3q6dZPG22TGDMn76quOZdpsotyGD3dd57//iuJ+8cXM+0ylkTs389ChzPv3M//4I3ODBvZrY34aNGDeu9d+7MaNzFWqMN98s2OZFy+K/IBcc2+89JIowcGD5fvUKUn/5BMpY+9eedG4/37Huk25ChViHjOG+dw573WFAFVgUfIJhgIbP16uhvMLnQNTpkimefOyXJ8SY1gb5HDQti1zkyby27RwZsyQ7YkTZbtsWXseT3zxheT/4w/mSpXEGrp8WfZducLcrBlz+fKiMNxhszHXq2e3Jt58095IMzMfOsSckCCN8v79mY+/4w5RVrt329NOnJAy3n7bfb21azN37Zo5/ZZbmMuUYX74YVGcplKoXl0sr9275WHeu9f1W+nly8znz2dOb9dOyklJcS+TianEieTamJj/16efOip5ZrmOffowP/4489Gj3usIIarAouQTDAW2eLFcjd9/95DpwgXm4sWZe/XKcn1KDGCzyQ3RuDHztde6doGFqt4SJZgfeEC2zYb+rbdke+BAuQ9feknSjxzxXF6rVuLustnEhWY2qj/8wHz99bL9xRfe5frwQ8mbnCxKz/m5W7pU3HCuOHBAlFunTvaXgU2bpLypU93X2a8fc7lyjmnr1rGDxbh3L/N//sP87bdeXCg+8P339uvuC02aiCyPPmpPMy3HZs1k38KFWZMpRKgCi5JPMBTY4cNyNd5/30vGxx8XX6O7voLszrx50qAGwn//y3zNNcwtWjAPGOC54Yo0f/8tcgJ219sPP4Sn7kOHpL733rOnlSjB/OCD8rtGDebbbrM35BMnui9rzx7J88orsm32y5gWy3XXyf/gi4V56pS42Lp2dSzTV955hx36x37/XbaXLnV/zLhxmZX0PfeIMjRddpHk3XdFvmnTHNNr1bJf45MnIyObF1SBRcknGArMZhMPyLBhXjKaPux33slynTGH2Rj60u/iihYtxO3VqhVzsWLSGGb1jTlU9Owpnf8ffSSupvLlmbt0CU/dc+bwVZefSbNm4lY8elT2vfGG3LSJiSKrO8x+mn377Gk7d8q5fP21/9f/vvvsDfPWrf4de/kyc/36zHnzilXbvDl79dubgRzduklf3j//MMfFMT/1lH91h4qzZ+XF7MIFx3QzaKZChcjI5QPZQYGFNYyeiDoR0XYi2kVEI13sH0BEqUS03vgMCp9sEq3sdcaeunVl7rQvvgjO+JRYwpxWKNAptXbvlrDlxYtlaYkLF7yMXYggq1fL2L+HH5bZyO+7T8ZEBXvslSvMORDr1rWnmaH0K1bIdosWctN27QrMnSuzujvDLGOM2rYFKla0p1erJnN79u8v45L8YcgQ+b7uOqBWLf+OzZ1bQu8fekgGX/79twwIdp7I10qzZsB774m8TZoATz0ls9U/+aR/dYeKQoWA556TsV9WGjSQ73r1QlZ1Tmt+XBE2BUZEcQA+AtAZQG0AfYiotous/2Pm+sZnQrjkA3xUYIBMtbN5s32Zi7VrZYxJdr+jzGUiAlE6//4rgzarVpVts/Hbti04sgWTI0dkvFRSkj3t/vtl8OvXX4e+/o0bpWEvWdKeVr26yDR3rowratRI0rt2lXWoXM2Ovny5vDTce2/wZLvxRuD22wOfNaJKFRn7tGCBjMNKSZHz8cRjj0n+kydFAfbr537sWLRQv758h1CBTZ4sw1NPnQpZFVFPOC2wJgB2MfMeZr4EYCqA7mGs3ys1akjb7HXh1N69ZdqZ556TN+GkJOCee4A//giLnBEhNdU+J14gCsy02mJBga1dK9/W9ZiqVZNpkyZOdP+i4soKsnL5slgQkyd7Xh5l40ZH6wuQmxOQF6XGje2N/s03i4VonZfP5KOPxNK54w7PcvkDkSiRoUODU1YuH5ug1q1lsPCwYcBLL2W97lDTtKn8TyGaWT49HXjhBRlfXqRISKqICcKpwMoDsM42mGKkOXMHEW0koh+IqEJ4RBNq1JC2afduLxkTEkSJLVwoU+G8/ba8EY4Zk32tsJkzxQJp314mjfR3nSbzopoKrHhxma0hGhXYmjXSuJpuIJMHHgB27XI9ue3bbwNFi8oEse6YORMYN05cd9ddJ25oZ0V24IDMo9eypWN69erynZYmL00m+fPLfzJ9uqMC3btXZpR48EFxc2UHKlaUVYorhLVZCIyEBPHQNG0akuI//lgmKHnjDd/fAbIj4Tx1F5Odwbm1/xVAZWa+AcB8AF+5LYxoCBElE1FyampqUAQ0X3J9ciO+9ZYosJ07geHDgf/8Rxq2BQuCIkvU8fPP0nD06CGTkzrPi+cNU4FVq2ZPq1UrOhVYcrLI5tzw33GHNEwTJzqmT54sUwFlZIhycrdo4cSJ4hr88UdRdoMGAa+84phn2jT57t3bMd163awKDJDpn1JSRDmavPuuKOHHHvN8rkrMkZYGvPoq0LGjvLvkaMIVLQLgRgBzLNvPAXjOQ/44AKd9KTsYUYjMzGlpEjj0+usBHJyeLpFqzZuHf9BrqDl3TgahPvqoTJcDMK9Y4V8ZDz0kkYdWhg61T7kTLdhsMkD2vvtc7x88WAbOPvKIzIgxZ47MBtG2rYyPypdPxjo5z9Zy6JBEz40caa+na1eZ4sgawdaoEXNSkuu6r7mG3YZl9+ghM3fs2yf7CxZk7t/f79NXop+RI9mvWcTcAY1C9Is1AKoTURUiygOgN4AZ1gxEdI1lsxuAsL6eFykiE0IHtHZg3rxihS1fnv2ssDlzxOl+++32CU79nWB19267+9CkVi15nfS0zHqo+eknx0UbU1JEHmsAh5VXXpH+zvHjZYLW226TJUimT5fAinHjJFrxnXccj/vmG7HQ7r9ftk3r6PhxcfUBYs2vXQv06eO67tq1ZXZ656VHAKnPZpM+ts8+k6CZp57y71ooUc/Bg2Jc9+1rjxPJ0YRTWwLoAmAHgN0AnjfSXgbQzfj9GoAtADYAWAjgOl/KDZYFxszcsqUMVwoI0wpr1kxGRmcX+vcX6+nyZeYzZ/jqOCR/qFo18wwm8+bx1YlNI8GFC2IBFi9un1bInKnizz89H3v4MPOoUWJtHTxoT7fZmG+/Xaw0c85Bm425Zs3MN5aZbk4HZY7ZcjeN0Z49YvW54+WX+eocex06eJZfiUkefVTmUfjnn6yXhWxggUVcgGB8gqnAHnhA5t8MGHNSRUBG4w8f7nrOtVBz4YLj4NVAuXRJlNe999rTihWTOeh85fJlcbP95z+O6Skpcp0+/DDrcgaCObclILO/M4uMuXNn7T87eVL++4QE5jVrxN3qbsaMDz6QfatWyawYrVoFXu+FC/KiADDPnRt4OUpUcvGiTMjSp09wyssOCiwHx6+4pkYNWbro9OkACxg0SIIA3nhDoqbGjQM+/DBzvn/+CV3E4sWLQLt2EumW1YHC06fLQJO777anVazoX7n790u0nbMLsVw5CYqIVCDHl1/Kudxwg0S3Mct/V6eORPcFSrFiEo1YooQsmDh6tAy7uOuuzHnvvVeCRYYNk4G97tyHvpAvn7gqR4zQ3v1syJw5wIkTMgxOMYi0Bg3GJ5gW2PTp8gK7Zk2QCuzYUTrqz561p5lTBX3+eZAqsWCziRkJiK/h7ruzVl7z5vJWbw1K6NaNuU4d38uYO5ddLuPBLO4z52UtwsG+feKuGzXKbjUvWiTuxEGDglPHrl32wAvrkhrOPPIIX10aJKfOsal45e67mUuVEqdIMIBaYNkPv0LpfWHMGOmo//hj2T53zj4dz3vvBd8K+/BDGV/0wgsSVDJtmix3Hghr10pQyrBhjoNNKlUSC8xX2Z3HgFlxDqX/80+xVDwNjTh3TqwVbyPOMzLc7/vqK5F/wAAJyiheXELhT550H8DhL1WriiXWrp3ngIphw+S7QwfH2TcUxeD0aWDGDBldER8faWmiiEhr0GB8gmmBpafLi/no0UErUtYuMq2wJ56Qt+3BgznThK1ZZf58CdXu3l0spn//lclE69cPbNLce++VcOy0NMf0t97iTOHcqanu+42eekomcHW1EKi5Qm1amliP5vIUDRs6rv67cqVcx/Ll+Wq/Ve7ccm5Dhsjs8VYWLJAOA1d/ZEZG5gUNn33WXm5yssfLEhImTGDesCH89SoxgbkE3MqVwSsT2cACi7gAwfgEU4ExS9sWrI5SZrbPqN2rl2jHhx6Sxr5ECc8zifvDhg0ye3qdOhIpaDJ1KgfkrjxyRFyQjzySed/330uZ69fLts0mF83dufToIQsTuuKXX/hq1N/8+fK7b19RTm3aiBJ+7TXZLl9elOqrrzJ/9ZUEXHTsKFF3+fLJ8iMZGaIMcucW+XPnlnWnrCxcKPVMnmxP27tX1nDKkyd8634pio/cfDNztWrBHTKpCixKPsFWYF26+NfF4xOdOsnlTky0WxYjRkijmdVowX37ZNG/8uUzr4Zrs8nYgFKlMltSnjBDsp0tG2bm1atl3y+/yPbmzezReqlbV9avcsWOHXw1Qq9tW+kzSk8X5QKI3IB0ALhb/+nQIeZbb+Wr61sBoth27ZI+rZYt7U++zcZ8112i7J1XIO7fX/4nRYkiDhyQ994xY4JbriqwKPkEW4GNHi16xRp3kWXWrJEZHmbNsqf9849U5BxebuLK5ebMiRMSsl2kSGZLw2TtWnkCnn3WfTn//CNju156ST5lyrhvzI8cYYfVP81FBxMSMi//brOJG/KJJ1yXdfmyuBdbtpQyrMvLf/SRKKDx472/etpskq9IEQnxv3xZ0idMkHK//FKs3n79ZHvEiMxlZGRE16wgisLMb74pt+zOncEtVxVYlHyCrcB++02uzOLFQS3WtULq3j3zdELMsnBh1arMrVszb9vmuG/7dlEaXbuK0siTx3WEn5UBAyTf7t2O6Rs2iMsuLo6vWlFm/5K7/jmbTVx25qKCnTqJ5fPqq3x1TJOJudT1Bx+4l61uXclTooRMW+Vclz84X+OMDOabbpJrnJTEVxfkVEWlxAi33hoCjxBnDwWmUYguMFfRMJf7Chqupo0eNkyiFF991R7Vd/myjLs6eFCW1qhXT5aQ+OILoHlzmcJo+HBg+3aJoFu4UJab8MQrr8iCgiONdURtNolUrFdPJup9/HEZr5WRIZ+LF2UhRFcQ2ceCXbwoa1F16AA8+qiMfRozxp7XUwSiibm0yhNPyHgp57r8wfka58oFfPKJjGXbvl2mjXrhBf/LVZQIcfAgULlypKWIUiKtQYPxCbYFxiwxCXfeGfRiM2Oz2d1ajzwi0YKPPSbb33wj7ro+feyW0XXXiU8hkH6zMWOkjNmzJbACkDFjJ074X1aHDrIs/IIFUs6vv0q6GVVoTvb71VeyvX27+7LefVemP3E1SW2wWLw4+D4YRQkDpUoxP/hg8MtFNrDAIi5AMD6hUGC9ejFXrBj0Yl2TkSHuOMDu5nLuM1q5Uj5ZcX2dO2cPQ8+VS6L2Ai1v0CBROiNGyLx/Zofh2bPyxJUpI31kI0dKXRcv8oEDzFu2uCjLZpPADUVRHEhPl8f15ZeDX3Z2UGDqQnRD06biUfN32auAyJVL1hd7+22ZyqhNG+D//i+zQE2bZs31VbCgDJ6uXBmYPVtmQw+0vEqVZM6tX3+VZebNtbMKFZKya9aU8l9/XdYRy5MHI0a4WRyYyPuy8oqSAzHbn3LlIitHtKIKzA1Nmsj3mjVhrHT4cOnzmjlT+qtCwR13yDyMHTtmrZyKFeV769bMZTVqBCxaJJO3NW8OdO8OQFYq8WcCD0XJ6Rw8KN/lXa1dryBErWTs06ABEBcngRy33RbGiuvWDWNlWcBcFwyQAA5niESxWZTb8ePAhQsyLU7RomGQUVFiHFOBqQXmGrXA3FCggOiSoEciZhdMBVasmFhcPmBOb2g+lIqieObQIflWC8w1qsA80KSJKDCbLdKSRCHly4uJ2r69fHvBZpOlIAD7Q6koimcOHpTu4eLFIy1JdKIKzANNmsiK97t2RVqSKCQ+Hhg/Hhg1yqfsJ0/aXwRUgSmKbxw8KO5DHbbomrArMCLqRETbiWgXEY30kO9OImIiCtLaFv7TtKl8qxvRDfffL4s/+sDx4/bfqsCiE2YZ056WFmlJFJNDh9R96ImwKjAiigPwEYDOAGoD6ENEtV3kSwDwGIBV4ZTPmVq1JPJcFVjWsS7vpQosOpk/H7j9duDOOz0vpaaED9MCU1wTbgusCYBdzLyHmS8BmAqgu4t8YwG8CcDLioWhJS5O1jZcskRDv7OKqcBy5VIFFq1MmCCe4QULgBdfjLQ0CrMoMLXA3BNuBVYewAHLdoqRdhUiagCgAjPP9FQQEQ0homQiSk71tHpvFundG9iwQRbWVQLH/Itq1lQFFo0cPy7uw4cfBgYPBl57TaaNVCLHmTPA+fOqwDwRbgXmqivyqm1DRLkAvAPAw/rrxkHMnzNzEjMnlSpVKogiOjJwoIzZHTVKrbCsYPaB3XCDKrBoZPJk4NIl4IEHgPffl5ER995rn4tZCT86Bsw74VZgKQAqWLYTAVibswQAdQAsIqK9AJoBmBHJQI48ecSdsmqVzJCkBEZqKpCQAFSpAhw+rEMToglmWeigSRMZ+5gvH/Djj8DZs8C330ZaupyLzsLhnXArsDUAqhNRFSLKA6A3gBnmTmY+zcwlmbkyM1cGsBJAN2ZODrOcDtx3nzS8phWWkSEPvFplvpOaCpQqJW+Tly87RiUqoefSJfdu8NWrgc2bxfoyqVRJPA87doRHPiUzpqdCLTD3hFWBMfMVAMMAzAGwDcA0Zt5CRC8TUbdwyuIP8fGirNaulWW1mjYFBg0Cxo4Ftm2LtHSxganAzLdJdSOGl4kTZVavrVsz7/viC5l5pndvx/Tq1YGdO8Mjn5IZtcC8E/ZxYMw8i5lrMHNVZn7VSBvFzDNc5G0TaevLpF8/eaBHjRIX2HvvSfr06ZGVK1Y4ftxugQGqwAJh2jTgq68CO3bBAvl2VmDnzgFTpgC9egGFCzvuq1FDLDD1MkSGgwdlprb8+SMtSfSiM3H4SO7cwNdfA2+8IQv7PvaY9BmoAvON1FSgZElVYFnhtdeA0aP9P45ZFgcAMrsEV60SJeZsfQHywpaWZp8CTAkvhw6p+9AbqsD8oFkz4Nln7Utf3X67uBX374+sXNEOs92FWLaspGVFge3aJbPZB3upm6NHZbmXaMRmkxenffv8nyljyxZ7n6OzAjMtsnr1Mh9Xo4brY5TwoGPAvKMKLAvcfrt863gZz5w7B1y8KAosTx75zooCmzNHlmSZNi14MgLAPfcAXboEt8xgsW+fLEUDyJJx/mBaX9de61qBFS8OlC6d+bjq1eVbFVhk0Fk4vKMKLAvUrCnTTakb0TPm2785XK9cuawpsGXL5DuYwxqOHZOGfuvWMK3C7Sd//23/vX69f8cuXChRhR06uFZgtWu7niy2cmVxnWsgR/jJyJD7UC0wz6gCyyK33y5TTWXnfoKDB7M2bsuchaNkSfnOqgJbvlwa1i1bgAMHvOf3hRkz7Oe4dGlwygwmZrRrwYIyM4yv2GyimNu2FZfgiROO96qpwFwRH+/aalNCz9Gj8t+pAvOMKrAscvvt8rb066+RliQ0HD4sjdjUqYGXYSqwYFhg+/eL0jLHLP3+e+ByWZk+XayUggWBxYuDU2Yw2bZNXgBuvNE/C2zzZlnKpm1b8RgAdoWUmirWca1a7o/XUPrIoLNw+IYqsCzSqBFQoUL2dSOuXi2DYP11W1lxVmDly8sb5pUr/pe1fLl8Dx4s1z0YCuzMGZmJvWdPoHlzsaijjW3bRNHUry+Wp6/XbuFC+W7TJnNQhhnA4c4CA+SYnTt9s8A3bRKFGQirVkmE7+HDgR0fTq5ccT2eLpjoSsy+oQosixCJFTZzpjQEt94KvPCCBC1kB9atk++szInnqg/MZhMl5i/Ll4uVVK8e0KmTKJ7LlwOXDQBmzRIl3bMn0KqVNMQnT2atzGDCbFdg9erJvbV9u2/HLlwoFnTFivY+LX8UWPXqMqGsN4v5wgXgllskEMYfdu4E7rpLInxHjpT6Xn3VHrASaZzHwNlsQP/+wPXX+7fM0o4dco85l92unYwxTXdad0MtMN9QBRYEnnkGeOIJ4Lrr5EF/9VVRYsFk/frIhHgHQ4Glpkr0oTn8ICtjwZYvFzda7txA585iPf35Z+CyAWI9lykj5bZqJWnR1A92/LgoVNMCA3yziG02sSbbtJFt5z6trVtlfkpPb/mm1ebNjTh+vFhPmzbZG19vzJ4tynPWLBnftn69zBbywgtyrnv2+FZOqLZUyqMAABRqSURBVJgwQaIzv/tOtpmB4cPt7nQz3RtHjsi6r6+/7pi+aRPwxx8y32THjo4vTQcPynJOZcpk/TyyM6rAgkBiIvD228BPP0mD/9BDwFtviXUQDJilsR46NDjl+YNVgQU6I4M5BsyMdAtUgZ05IyHkzZvLdrt2osiyEo2Yni4NaPfu0mA0aQLkzRtdbkQzgOO666QfK08e3wI5NmwATp2S/i8Tc3YNwHMEookvofTp6eL+u/Za2Z4zx7tsADBpkvTr7doFjBkj1uVPP4nVePq0LKzpbJmEky++kICXvn2BPn1ExvfeAx5/HOjRQ4Zx+LLw56xZ4iX45hvHZ+jHH+Xaf/ihuFBbtJAXpwMHpK+3bFm5JxUPMHPMfxo1asTRxL//MteqxXzNNcypqVkvb+tWZoA5Xz7m8+ezXp6vpKZKvRUryvfRo4GV07Urc/369u1Dh6S8jz/2r5w5c+S4uXPtaa1bO5btL7/+KmX+/rtjmUlJgZcZbD79VGTcu1e2GzZk7tDB+3FvvCHHHThgTxs+nDl/fuaMDOayZZnvv99zGRkZct899ZT7PB98IPX88Qdz+fLMd93lXbaMDOYSJZjvvdf1fvN/GTzYe1mh4PBhZiLm0aOZX3mFOXdukadPH5H9f/+T7YULvZfVs6fkBZhXr7anX389c6tW8nvRIuYiRez5AOYmTUJxZnYAJHMUtN9Z+agFFgIKFBD3wokTMulvVpcOMTvi09PtY6DCgWl93XmnfAfqRjx+3B5CD4hbJpCVmZctk+OaNbOndeokrqdAO/9/+gkoUsTRSmnVCvjrL7H4ooFt2+SeqmAsRFSvnpyzN4v4hx9kRfHERHtajRrSv7Rxo7i2PPV/AXK9q1Vzb4Glp4trrGVLcVV26iSz3nsLMlm/Xp6PDh1c7+/aFXjuOXFNBjr/Y1b49Ve5vj17As8/D6xcKRN5T5ok16RrV+mLnTLFfsyhQ7Jyxd699rRLl4C5c4G77xbL2cy/fbsE49xxh2y3bi3/88yZwOefi7Xn7HJUXBBpDRqMT7RZYCbjxsmbVOPGzCtWSJrNxrxyJfN//+u7dXbXXWLN5cnD/PTToZPXGfMNfvly+f7668DKqVpV3lytlCvHPHCgf+XcfDNzgwaOaevXi2wTJ/ovV0YGc+nSmWWbP1/KnD3b/zJDQceOYnWZvPuuyHfokPtj9u6VPK+/7pi+cKGkjxkj37/95r3+nj2Zr7tOfmdkMA8dynzHHVLGo49KOfPny/7vv7ffM554/XXJd/iw+zyXLzO3bSsW45YtjvsyMpgnTGAeNYr5hReYX3xRzs1m834+vtClC3OVKp7Lu+ce5uLFmS9dEnk6dpRzGjTInmfePEmbMYO5e3d5jq9ckeff2ToON8gGFljEBQjGJ1oVmM3G/M030lgDzJ07M1eqxFddBL64R2w25lKlxNVy883MdeuGXOyr9O4t7sP0dLs7JRAKF2Z+7DHHtKQk5k6dfC/j0iXmggWlwbRis0mj4IvbyplVq+R/+PZbx/Rz58Rl9Nxz/pcZCipWlMbSZNEi7wr2rbckz65djukHD0p6/fryvWeP9/pHjmSOjxeFYja8FSvKPQEwt2hhb+hPnWKOixOF4ol27Zjr1PFe9+HDzCVLyv1y+bI93Xw5BJhz5bLL0rAh8+TJjnn95exZ5rx5mZ94wnO+GTOkzlmzmN9/X35XrSrHHjsmeZ54QrbPnbO7Hf/4g7lRI+amTQOXMRioAouST7QqMJOzZ5mff14UUefOzJMmMT/wgDzoO3d6PnbLFr5qYZhvrZ7evINJzZrMPXrI7woVmPv187+M9HSReexYx/Tu3f1TxqtXSzlTp2bed//9zEWL+t9ovfiiNH4nTmTe16yZNDIZGe6PT06W6xPKfsmzZzNfv5Mn+ap1deiQWLK33SZK3iq/s7XKLIqmYEE53uwL88YXX/BVCzwujrlXLynn3Dm5Bs7Xr3lzz32I589Lo/7kk97rZrY3/P/9r2z/9Zco1O7d7Yrz/Hnmzz4TSxFgHjLEt7Jd8cMP7FP/Vnq63HctW0o/4a232vurX35Z8lSvbn9R+/df5kKFpP8SEA9HJFEFFiWfaFdgrjh8WBqQvn095/vwQ776pvzXX/L7q69CL9/Zs/JW+9JLst2mDfONN/pfjvnG/8knjumPPspcoIDU4wtPPSVWkflma2XaNKlj2TL/ZGvYUBpbV3z8sZT5+OPu3Ujdu7OD+yxYHDsmjR2zKAhAGlUrlSoxV6smDWJcnOR56y3Zt3+/Y4PvTIMGfNVa8YWlSyV/fLy41dLSPOcfO5Y9Bv3Mncs+uy9N7rxTXOirVomSKlfOtQs+I0PuLSLmtWsd96WlidL1Rv/+4hr05YVo4EA5l5Il7e7Qzp2Zy5Rh3rRJ9n3wgT1/v3581XJ0to7DjSqwQCoEOgHYDmAXgJEu9g8FsAnAegDLANT2VmYsKjBm5meflQdt0yZ72u7dzBcv2rfvvFPcNTabvc/G6k4KFcuW8VXfPbNYjKVL+1+O2Ufl3ACvWCHpn37qvYxLl6Ru0xp0xnRbPf+873KZitVdI2+zifvH+jZtZd8+sd6AwF2rrti2jblYMbF+9+0TFzSQuQ/IjGzr1o15xw75LlhQjnnnHdm3Y4frOnr1kv2+WtRHjkj+3LlFgXhjzRrJP3ky84YNzA8+KFayeV8/+6woQ1+UicnRo6Ik8uSRZ8bTS0NamtwvVtfm3r2i9K69ljklxf2xly/L9e/f3ze5liyRe+/nn+1pZr9XUpL95dNk1ixJq1fPt/JDiSow/5VXHIDdAK4FkAfABmcFBaCw5Xc3AL97KzdWFdjx48wJCdIYbd1qb5R69JAHz2aTh/a+++zH9O0rrkhXrp+PPpL9V65kXTYzNNrsZDb7Ps6ccZ1/7FjpwHfGfJgXL3ZMt9nEEqhTx3vH+y+/SBm//OI+T4sWvlsUzMzjx0uZGza4z5ORIdfe+S2aWQIHiMS1evPNvtfridRUaWBLlpSQ6sREeYGJi3N8qWGW/2XlSvv23r1i0fbowXzTTZ4byBdf9Ky8nbHZmNu3z2xFuyMjQ+7RwoX56vAPawh6gwYyVMFfTEt7xAjvec3/d+pUua41a8o1TUhgrlHDvRv+jz9cv3B5wlkR22ziHgeYa9d23HfpktzzH33ke/mhQhWY/wrsRgBzLNvPAXjOQ/4+AGZ7KzdWFRizvL2bHdEJCfImDchb9ObN8vvLL+35v/pK0v76y7Ec0yUJML/5ZtblGjhQGiFTuZiNx7p1mfPu3y+NecGCYkFa+e47OW7r1szHmX0rixZ5luX22+WN2trH48yrr7LXqDYrPXqIgvCmPC9flv8kVy7mBQsk7eJFcRHdeqsEpxQo4Fk2X7hwQdyZ+fIx//mnWK5ly8o51ajhWxlvvslX3VOvvOI+3+TJksdqNQSbESNEabz1lvSRmRGtAwawyz5RX9m507dIwytXJFClQgUJlsibV16ili6V+7RWLdcuzvvuk7y+urbdYd7bzzyTtXJCiSow/xXYnQAmWLb7A/jQRb5HDEvtAIDqbsoaAiAZQHLFihX9+d+iitOnZTDjk0/Km6LNJg1m7twSeQgw//OPPb85CNg5PPrRR+VNvVUrcbNs3uy97rlzxV3iSrnUry9hwSZr17LbN9PXXpN9BQtKX5nVOjSjs1z1XZ0/L30N1gjCDz+U4AkzuOXYMbkWw4d7Phezf3DSJM/5mKXzvWBBCQf3hbNnpcErXVpcj2ZQwW+/2cPGrdZQIPTvL+VMm2ZP271b6vVVTvPtHmD++2/3+Y4flyAHd9Z0KLDZ5B43FWxWr5cvLF5sfzn86Sd7+qJF8rKXlCT3gsnq1fIi5mnQtq+kp4sL2pcoz0ihCsx/BXaXCwX2gYf89wD4ylu5sWyBueLkSXu4faVKmffXry/ppi9/715RWoMH2/sKGjXybBXYbBKpBjDffbfjvosXpY/C6qpJS2OXkVM2m3Sqt2xpd9tYZ9h48UVpFNy5NZ9+WhRvSoo9ypJIzu/AAXt/jrWf0N35lC0rof/eMIMIfv3Ve16TLVvE0mrZUj6VK8s5mf1D//d/vpfljBmOPWZM5n1m36c/cr73XuCyhJKMDOkLq1Eja2Hu/vDWW44vBSY//8xXg3RM2Zo0kXvo9OnwyBZpVIH5r8D8dSHmAnDaW7nZTYExS2d5fLzrwb6rVkn02XXXiYUycKAosP37Zb9pFTzzjPuIMXMsUc2aojA2brTvM11MztZWiRKZw5PN8Pbx46Wx7dBBrBvzzXPoUFGo7ti9W+o3+wx695a388KFRbbrrvN9WqcBA8Siu3LFHswwbpycx6pVco7r1ombKF8+e6Sfr5jXxdkCrl5drGZvZGRIJ771Pzl/XpRh7dpZd0PGCsHoow0Gjz/OV/tWJ07ksEX4RguqwPxXYLkB7AFQxRLEcb1TnuqW37f5cpGzowJjln4Qd7N1LF4sbpBatcSCMd8kTfr25avuk4YNZcYC61tvp07iEktJkb63O+6Q9H/+EeVx442Z35KbNJEBqFaGDZM+A7NR3rdPysubV4IJqlQRGT3RtStf7R8xG7clS+x9er52eJuuvUaN7ANb3X26dvWtTGeGDZOxP1aX6MCBojg9WUr798usEmYE2pEjkj5qlKT5MqeeElzS0+XZKFZM+ntvvNE/azfWUQUWmBLrAmCH0cf1vJH2MoBuxu/3AGwxwugXOis4V5/sqsC88fvvYqUVKGBvEE2uXJFQ41GjpF8MkPDpy5ft/UVmFJrZiCYni9IpXNi1775PH0eX5sWLYpX16uWYb80a6bNq3lwUmbdZMvbsYf7888yNx9y5Mtbq1CmfLgefPCmRZlWryvi13bulv2fdOnHTff+99IX8/HPgExMzZ446+/JLuX7u+h2/+07kKlhQ3LIFCsgYrvnz5fqEY1iE4pqdO+WFy9W4seyOKrAo+eRUBcYs47V8GUhrRoH17i2h2QkJdsVw6pQ0sMWLS57vvnNdxgsviEVnhnSb/QieBqReuhS8+el84cKF8NbHLANSXQ3WTk+XMVCAvN2bA1dXrBArDpD/IVwzqyiuWbo083RiOQFVYFHyyckKzB/MIAlABpNaeeklSbeOOXNm0iTJs327PVqydOnwdchHK+Z8jNZJgQ8ckPBtc9yS8zXauFFcqxMmhFdWRTHJDgost9tp6pVsx4gRshTEZ5/JCtJWnn5aFtDztCR81aryPX68rCT711+yDHzuHH4XEclyIkuXynX57TdZvPD8eeD77+3L0VipW1cWlFQUJXBIFHFsk5SUxMnJyZEWI9tz+LB9NeWqVWWdpP79VYEBsqruo4/K7zx5ZH2xceO8r7elKJGCiNYyc1Kk5cgK2vQoPlO2LDB2LFCxolhqqrjs9O0rC0Q2bgy0awcUKhRpiRQl+6MWmKIoSg4kO1hguSItgKIoiqIEgiowRVEUJSZRBaYoiqL8f3v3F2JVFcVx/PtjJimNMPtHjZYKUklQSoT9IcR60JLsocgoEil6CbIownqJHnoIon8UQqhlEFaYlPQQhAn1kqQJZVkk9scpU6O0KMik1cPeQ5dhhrDmzOHs/fvAcO/ecy6sxbqcdc/e5850khuYmZl1khuYmZl1khuYmZl1khuYmZl1khuYmZl1UhFfZJZ0EPjmP778VODHMQynC2rMGerMu8acoc68jzXncyLitKaCGQ9FNLD/Q9K2rn8b/VjVmDPUmXeNOUOdedeYs5cQzcysk9zAzMysk9zA4Pm2A2hBjTlDnXnXmDPUmXd1OVe/B2ZmZt3kKzAzM+skNzAzM+ukahuYpIWSvpC0W9LKtuNpiqRpkrZI2iXpU0kr8vwUSe9I+jI/ntx2rGNNUp+kHZLeyuMZkrbmnF+VNKHtGMeapMmSNkj6PNf80tJrLene/N7eKWm9pONLrLWktZIOSNrZMzdibZU8k89vH0ua217kzamygUnqA54DFgGzgZslzW43qsYcBe6LiPOBecBdOdeVwOaImAVszuPSrAB29YwfA57MOf8M3N5KVM16Gng7Is4DLiTlX2ytJQ0AdwMXR8QFQB+wlDJr/SKwcNjcaLVdBMzKP3cCq8YpxnFVZQMDLgF2R8SeiDgCvAIsaTmmRkTEvoj4KD//lXRCGyDluy4ftg64vp0ImyFpKnAtsDqPBSwANuRDSsz5JOBKYA1ARByJiEMUXmugHzhBUj8wEdhHgbWOiPeAn4ZNj1bbJcBLkXwATJZ05vhEOn5qbWADwN6e8WCeK5qk6cAcYCtwRkTsg9TkgNPbi6wRTwEPAH/l8SnAoYg4mscl1nwmcBB4IS+drpY0iYJrHRHfAY8D35Ia12FgO+XXeshota3iHFdrA9MIc0V/n0DSicDrwD0R8Uvb8TRJ0mLgQERs750e4dDSat4PzAVWRcQc4DcKWi4cSd7zWQLMAM4CJpGWz4Yrrdb/pob3e7UNbBCY1jOeCnzfUiyNk3QcqXm9HBEb8/T+oSWF/HigrfgacDlwnaSvScvDC0hXZJPzMhOUWfNBYDAitubxBlJDK7nWVwNfRcTBiPgT2AhcRvm1HjJabas4x9XawD4EZuU7lSaQNn03tRxTI/LezxpgV0Q80fOrTcCy/HwZ8OZ4x9aUiHgwIqZGxHRSbd+NiFuALcAN+bCicgaIiB+AvZLOzVNXAZ9RcK1JS4fzJE3M7/WhnIuudY/RarsJuC3fjTgPODy01FiSav8Sh6RrSJ/K+4C1EfFoyyE1QtIVwPvAJ/yzH/QQaR/sNeBs0kngxogYvkHceZLmA/dHxGJJM0lXZFOAHcCtEfFHm/GNNUkXkW5cmQDsAZaTPqgWW2tJjwA3ke643QHcQdrvKarWktYD80n/NmU/8DDwBiPUNjfzZ0l3Lf4OLI+IbW3E3aRqG5iZmXVbrUuIZmbWcW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSX8DlZ9Tmzov1loAAAAASUVORK5CYII=\n",
2102 |       "text/plain": [
2103 |        "<Figure size 432x288 with 1 Axes>"
2104 |       ]
2105 |      },
2106 |      "metadata": {
2107 |       "needs_background": "light"
2108 |      },
2109 |      "output_type": "display_data"
2110 |     }
2111 |    ],
2112 |    "source": [
2113 |     "plt.plot(range(100), hist.history['loss'], 'r', label='Train  Loss')\n",
2114 |     "plt.plot(range(100), hist.history['val_loss'], 'b', label='Test Loss')\n",
2115 |     "plt.title(\"Dataset1: Neural Network Model on Latent Features: Train-Test Loss \")\n",
2116 |     "plt.legend()"
2117 |    ]
2118 |   },
2119 |   {
2120 |    "cell_type": "code",
2121 |    "execution_count": null,
2122 |    "metadata": {},
2123 |    "outputs": [],
2124 |    "source": []
2125 |   }
2126 |  ],
2127 |  "metadata": {
2128 |   "kernelspec": {
2129 |    "display_name": "Python 3",
2130 |    "language": "python",
2131 |    "name": "python3"
2132 |   },
2133 |   "language_info": {
2134 |    "codemirror_mode": {
2135 |     "name": "ipython",
2136 |     "version": 3
2137 |    },
2138 |    "file_extension": ".py",
2139 |    "mimetype": "text/x-python",
2140 |    "name": "python",
2141 |    "nbconvert_exporter": "python",
2142 |    "pygments_lexer": "ipython3",
2143 |    "version": "3.7.1"
2144 |   }
2145 |  },
2146 |  "nbformat": 4,
2147 |  "nbformat_minor": 2
2148 | }
2149 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/__pycache__/lstm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Project-UtilityFunctions/__pycache__/lstm.cpython-37.pyc


--------------------------------------------------------------------------------
/Project-UtilityFunctions/classificationlibrary.py:
--------------------------------------------------------------------------------
  1 | #Libraries for feature encoding
  2 | from sklearn.preprocessing import LabelEncoder
  3 | 
  4 | #Libraries for classification
  5 | from sklearn.linear_model import LogisticRegression
  6 | from sklearn.tree import DecisionTreeClassifier
  7 | from sklearn.neighbors import KNeighborsClassifier
  8 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
  9 | from sklearn.naive_bayes import GaussianNB
 10 | from sklearn.svm import SVC
 11 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance)
 12 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance)
 13 | from sklearn.neighbors import KNeighborsClassifier 
 14 | 
 15 | #Libraries to measure the accuracy
 16 | from sklearn import metrics
 17 | from sklearn.metrics import accuracy_score
 18 | 
 19 | #import pandas library
 20 | import pandas as pd
 21 | 
 22 | #This function is used to perform classification using DecisionTreeClassifier
 23 | def classifyUsingDecisionTreeClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
 24 |     print("****** Start classification training using DecisionTreeClassifier *****")
 25 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
 26 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
 27 | 
 28 |     labelencoder_ytrain = LabelEncoder()
 29 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
 30 | 
 31 |     classifier = DecisionTreeClassifier()
 32 |     classifier.fit(xtrain,ytrain)
 33 | 
 34 |     ytrainpred = classifier.predict(xtrain)
 35 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
 36 | 
 37 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
 38 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
 39 | 
 40 |     labelencoder_ytest = LabelEncoder()
 41 |     ytest = labelencoder_ytest.fit_transform(ytest)
 42 | 
 43 |     # Predicting the Test set results
 44 |     ytestpred = classifier.predict(xtest)
 45 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
 46 |     print("\n****** End classification training using DecisionTreeClassifier *****\n")
 47 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
 48 | 	
 49 | #This function is used to perform classification using LogisticRegression
 50 | def classifyUsingLogisticRegression(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
 51 |     print("****** Start classification training using LogisticRegression *****")
 52 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
 53 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
 54 | 
 55 |     labelencoder_ytrain = LabelEncoder()
 56 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
 57 | 
 58 |     classifier = LogisticRegression()
 59 |     classifier.fit(xtrain,ytrain)
 60 | 
 61 |     ytrainpred = classifier.predict(xtrain)
 62 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
 63 | 
 64 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
 65 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
 66 | 
 67 |     labelencoder_ytest = LabelEncoder()
 68 |     ytest = labelencoder_ytest.fit_transform(ytest)
 69 | 
 70 |     # Predicting the Test set results
 71 |     ytestpred = classifier.predict(xtest)
 72 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
 73 |     print("\n****** End classification training using LogisticRegression *****\n")
 74 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
 75 | 	
 76 | #This function is used to perform classification using LinearDiscriminantAnalysis
 77 | def classifyUsingLinearDiscriminantAnalysis(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
 78 |     print("****** Start classification training using LinearDiscriminantAnalysis *****")
 79 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
 80 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
 81 | 
 82 |     labelencoder_ytrain = LabelEncoder()
 83 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
 84 | 
 85 |     classifier = LinearDiscriminantAnalysis()
 86 |     classifier.fit(xtrain,ytrain)
 87 | 
 88 |     ytrainpred = classifier.predict(xtrain)
 89 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
 90 | 
 91 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
 92 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
 93 | 
 94 |     labelencoder_ytest = LabelEncoder()
 95 |     ytest = labelencoder_ytest.fit_transform(ytest)
 96 | 
 97 |     # Predicting the Test set results
 98 |     ytestpred = classifier.predict(xtest)
 99 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
100 |     print("\n****** End classification training using LinearDiscriminantAnalysis *****\n")
101 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
102 | 	
103 | #This function is used to perform classification using GuassianNaiveBayes
104 | def classifyUsingGaussianNB(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
105 |     print("****** Start classification training using GuassianNaiveBayes *****")
106 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
107 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
108 | 
109 |     labelencoder_ytrain = LabelEncoder()
110 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
111 | 
112 |     classifier = GaussianNB()
113 |     classifier.fit(xtrain,ytrain)
114 | 
115 |     ytrainpred = classifier.predict(xtrain)
116 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
117 | 
118 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
119 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
120 | 
121 |     labelencoder_ytest = LabelEncoder()
122 |     ytest = labelencoder_ytest.fit_transform(ytest)
123 | 
124 |     # Predicting the Test set results
125 |     ytestpred = classifier.predict(xtest)
126 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
127 |     print("\n****** End classification training using GuassianNaiveBayes *****\n")
128 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
129 | 
130 | #This function is used to perform classification using RandomForestClassifier
131 | def classifyUsingRandomForestClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
132 |     print("****** Start classification training using RandomForestClassifier *****")
133 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
134 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
135 | 
136 |     labelencoder_ytrain = LabelEncoder()
137 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
138 | 
139 |     classifier = RandomForestClassifier(n_estimators=100)
140 |     classifier.fit(xtrain,ytrain)
141 | 
142 |     ytrainpred = classifier.predict(xtrain)
143 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
144 | 
145 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
146 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
147 | 
148 |     labelencoder_ytest = LabelEncoder()
149 |     ytest = labelencoder_ytest.fit_transform(ytest)
150 | 
151 |     # Predicting the Test set results
152 |     ytestpred = classifier.predict(xtest)
153 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
154 |     print("\n****** End classification training using RandomForestClassifier *****\n")
155 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
156 | 
157 | #This function is used to perform classification using RandomForestClassifier
158 | def classifyUsingExtraTreesClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
159 |     print("****** Start classification training using ExtraTreesClassifier *****")
160 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
161 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
162 | 
163 |     print("trainingEncodedAndScaledDataset.shape: ",trainingEncodedAndScaledDataset.shape)
164 | 
165 |     labelencoder_ytrain = LabelEncoder()
166 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
167 | 
168 |     classifier = ExtraTreesClassifier(n_estimators=100)
169 |     classifier.fit(xtrain,ytrain)
170 | 
171 |     ytrainpred = classifier.predict(xtrain)
172 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
173 | 
174 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
175 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
176 | 
177 |     print("testingEncodedAndScaledDataset.shape: ",testingEncodedAndScaledDataset.shape)
178 | 
179 |     labelencoder_ytest = LabelEncoder()
180 |     ytest = labelencoder_ytest.fit_transform(ytest)
181 | 
182 |     # Predicting the Test set results
183 |     ytestpred = classifier.predict(xtest)
184 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
185 |     print("\n****** End classification training using ExtraTreesClassifier *****\n")
186 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
187 | 
188 | def classifyUsingKNNClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
189 |     print("****** Start classification training using KNeighborsClassifier *****")
190 |     xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
191 |     ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
192 | 
193 |     labelencoder_ytrain = LabelEncoder()
194 |     ytrain = labelencoder_ytrain.fit_transform(ytrain)
195 | 
196 |     classifier = KNeighborsClassifier(n_neighbors=1)
197 |     classifier.fit(xtrain,ytrain)
198 | 
199 |     ytrainpred = classifier.predict(xtrain)
200 |     print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
201 | 
202 |     xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
203 |     ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
204 | 
205 |     print("testingEncodedAndScaledDataset.shape: ",testingEncodedAndScaledDataset.shape)
206 | 
207 |     labelencoder_ytest = LabelEncoder()
208 |     ytest = labelencoder_ytest.fit_transform(ytest)
209 | 
210 |     # Predicting the Test set results
211 |     ytestpred = classifier.predict(xtest)
212 |     print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
213 |     print("\n****** End classification training using KNeighborsClassifier *****\n")
214 |     return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
215 | 
216 | def findingOptimumNumberOfNeighboursForKNN(trainingEncodedAndScaledDataset):
217 | 	print("****** Start finding optimum number of neighbours for KNN *****")
218 | 	xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
219 | 	ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
220 | 
221 | 	labelencoder_ytrain = LabelEncoder()
222 | 	ytrain = labelencoder_ytrain.fit_transform(ytrain)
223 | 
224 | 	# creating odd list of K for KNN
225 | 	neighbors = list(range(1, 150, 2))
226 | 
227 | 	# empty list that will hold cv scores
228 | 	cv_scores = []
229 | 
230 | 	# perform 10-fold cross validation
231 | 	for k in neighbors:
232 | 		knn = KNeighborsClassifier(n_neighbors=k)
233 | 		scores = cross_val_score(knn, xtrain, ytrain, cv=10, scoring='accuracy')
234 | 		cv_scores.append(scores.mean())
235 | 		print("With number of neighbours as {}, average score is {}".format(k,scores.mean()))
236 | 
237 | 	# changing to misclassification error
238 | 	mse = [1 - x for x in cv_scores]
239 | 
240 | 	# determining best k
241 | 	optimal_k = neighbors[mse.index(min(mse))]
242 | 	print("The optimal number of neighbors is {}".format(optimal_k))
243 | 
244 | 	# plot misclassification error vs k
245 | 	plt.plot(neighbors, mse)
246 | 	plt.xlabel("Number of Neighbors K")
247 | 	plt.ylabel("Misclassification Error")
248 | 	plt.show()
249 | 
250 | 	print("****** End finding optimum number of neighbours for KNN *****")


--------------------------------------------------------------------------------
/Project-UtilityFunctions/dataformatinglibrary.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #Libraries for printing tables in readable format
 4 | from tabulate import tabulate
 5 | 
 6 | #Library for creating an excel sheet
 7 | import xlsxwriter
 8 | 
 9 | def createExcelFromArray(array, fileName):
10 |     workbook = xlsxwriter.Workbook(fileName)
11 |     worksheet = workbook.add_worksheet()
12 | 
13 |     row = 0
14 |     for col, data in enumerate(array):
15 |         worksheet.write_row(col, row, data)
16 | 
17 |     workbook.close()
18 | 
19 | def printList (list,heading):
20 |     for i in range(0, len(list)): 
21 |         list[i] = str(list[i]) 
22 |     if len(list)>0:
23 |         print(tabulate([i.strip("[]").split(", ") for i in list], headers=[heading], tablefmt='orgtbl')+"\n")
24 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/datainspectionlibrary.py:
--------------------------------------------------------------------------------
 1 | #Data formating library
 2 | from dataformatinglibrary import printList
 3 | 
 4 | #Data pre-processing library
 5 | from datapreprocessinglibrary import checkForMissingValues
 6 | from datapreprocessinglibrary import checkForDulicateRecords
 7 | 
 8 | #Utility functions
 9 | from defineInputs import getLabelName
10 | 
11 | #Libraries for feature selection
12 | #SelectKBest, Chi2: Falls under filter methods (univariate selection)
13 | from sklearn.feature_selection import SelectKBest
14 | from sklearn.feature_selection import chi2
15 | from sklearn.feature_selection import SelectFromModel
16 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance)
17 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance)
18 | 
19 | import numpy as np
20 | 
21 | #This function is used to check the statistics of a given dataSet
22 | def getStatisticsOfData (dataSet):
23 |     print("***** Start checking the statistics of the dataSet *****\n")
24 |     
25 |     labelName = getLabelName()
26 |     #Number of rows and columns in the dataset
27 |     print("***** Shape (number of rows and columns) in the dataset: ", dataSet.shape)
28 |     
29 |     #Total number of features in the dataset
30 |     numberOfColumnsInTheDataset = len(dataSet.drop([labelName],axis=1).columns)
31 |     #numberOfColumnsInTheDataset = len(dataSet.columns)
32 |     print("***** Total number of features in the dataset: ",numberOfColumnsInTheDataset)
33 |     
34 |     #Total number of categorical featuers in the dataset
35 |     categoricalFeaturesInTheDataset = list(set(dataSet.drop([labelName],axis=1).columns) - set(dataSet.drop([labelName],axis=1)._get_numeric_data().columns))
36 |     #categoricalFeaturesInTheDataset = list(set(dataSet.columns) - set(dataSet._get_numeric_data().columns))
37 |     print("***** Number of categorical features in the dataset: ",len(categoricalFeaturesInTheDataset))
38 |   
39 |     #Total number of numerical features in the dataset
40 |     numericalFeaturesInTheDataset = list(dataSet.drop([labelName],axis=1)._get_numeric_data().columns)
41 |     #numericalFeaturesInTheDataset = list(dataSet._get_numeric_data().columns)
42 |     print("***** Number of numerical features in the dataset: ",len(numericalFeaturesInTheDataset))
43 | 
44 |     #Names of categorical features in the dataset
45 |     print("\n***** Names of categorical features in dataset *****\n")
46 |     printList(categoricalFeaturesInTheDataset,'Categorical features in dataset')
47 | 
48 |     #Names of numerical features in the dataset
49 |     print("\n***** Names of numerical features in dataset *****\n")
50 |     printList(numericalFeaturesInTheDataset,'Numerical features in the dataset')
51 |     
52 |     #Checking for any missing values in the data set
53 |     anyMissingValuesInTheDataset = checkForMissingValues(dataSet)
54 |     print("\n***** Are there any missing values in the data set: ", anyMissingValuesInTheDataset)
55 |       
56 |     anyDuplicateRecordsInTheDataset = checkForDulicateRecords(dataSet)
57 |     print("\n***** Are there any duplicate records in the data set: ", anyDuplicateRecordsInTheDataset)
58 |     #Check if there are any duplicate records in the data set
59 |     if (anyDuplicateRecordsInTheDataset):
60 |         dataSet = dataSet.drop_duplicates()
61 |         print("Number of records in the dataSet after removing the duplicates: ", len(dataSet.index))
62 | 
63 |     #How many number of different values for label that are present in the dataset
64 |     print('\n****** Number of different values for label that are present in the dataset: ',dataSet[labelName].nunique())
65 |     #What are the different values for label in the dataset
66 |     print('\n****** Here is the list of unique label types present in the dataset ***** \n')
67 |     printList(list(dataSet[getLabelName()].unique()),'Unique label types in the dataset')
68 | 
69 |     #What are the different values in each of the categorical features in the dataset
70 |     print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
71 |     categoricalFeaturesInTheDataset = list(set(dataSet.columns) - set(dataSet._get_numeric_data().columns))
72 |     numericalFeaturesInTheDataset = list(dataSet._get_numeric_data().columns)
73 |     for feature in categoricalFeaturesInTheDataset:
74 |         uniq = np.unique(dataSet[feature])
75 |         print('\n{}: {} '.format(feature,len(uniq)))
76 |         printList(dataSet[feature].unique(),'distinct values')
77 |         
78 |     print('\n****** Label distribution in the dataset *****\n')
79 |     print(dataSet[labelName].value_counts())
80 |     print()
81 | 
82 |     print("\n***** End checking the statistics of the dataSet *****")


--------------------------------------------------------------------------------
/Project-UtilityFunctions/dataloadinglibrary.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | #This function is used to load CSV file from the 'data' directory 
4 | #in the present working directly 
5 | def loadCSV (fileNameWithAbsolutePath):
6 |     dataSet = pd.read_csv(fileNameWithAbsolutePath)
7 |     return dataSet
8 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/datapreprocessinglibrary.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import train_test_split
 2 | from defineInputs import getLabelName
 3 | 
 4 | #This function is used to check for missing values in a given dataSet
 5 | def checkForMissingValues (dataSet):
 6 |     anyMissingValuesInTheDataset = dataSet.isnull().values.any()
 7 |     return anyMissingValuesInTheDataset
 8 | 	
 9 | #This function is used to check for duplicate records in a given dataSet
10 | def checkForDulicateRecords (dataSet):
11 |     totalRecordsInDataset = len(dataSet.index)
12 |     numberOfUniqueRecordsInDataset = len(dataSet.drop_duplicates().index)
13 |     anyDuplicateRecordsInTheDataset = False if totalRecordsInDataset == numberOfUniqueRecordsInDataset else True 
14 |     print('Total number of records in the dataset: {}\nUnique records in the dataset: {}'.format(totalRecordsInDataset,numberOfUniqueRecordsInDataset))
15 |     return anyDuplicateRecordsInTheDataset
16 | 
17 | #Split the complete dataSet into training dataSet and testing dataSet
18 | def splitCompleteDataSetIntoTrainingSetAndTestingSet(completeDataSet):
19 | 	labelName = getLabelName()
20 | 	label = completeDataSet[labelName]
21 | 	features = completeDataSet.drop(labelName,axis=1)
22 | 	featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet=train_test_split(features,label,test_size=0.4, random_state=42)
23 | 	print("features.shape: ",features.shape)
24 | 	print("label.shape: ",label.shape)
25 | 	return featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet
26 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/defineInputs.py:
--------------------------------------------------------------------------------
 1 | #This function is to maintain the name of the label at a single place
 2 | def getLabelName():
 3 | 	return 'attack_type'
 4 | 
 5 | def getPathToTrainingAndTestingDataSets():
 6 | 	trainingFileNameWithAbsolutePath = "D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\Datasets\\NSL-KDD\\KDDTrain+_20Percent.csv"
 7 | 	testingFileNameWithAbsolutePath = "D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\Datasets\\NSL-KDD\\KDDTest-21.csv"
 8 | 	return trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath
 9 | 	
10 | def modelPerformanceReport():
11 | 	modelPerformanceReport = 'D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\ModelsAndTheirPerformanceReports\\ModelsPerformance031442020.1.xlsx'
12 | 	return modelPerformanceReport
13 | 
14 | def getPathToGenerateModels():
15 | 	generatedModelsPath = 'D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\ModelsAndTheirPerformanceReports\\'
16 | 	return generatedModelsPath
17 | 
18 | ### Models with the below configuration will be generated
19 | def defineArrayOfModels():
20 | 	arrayOfModels = [
21 | 		[	
22 | 			"FeatureSelectionTechnique", 
23 | 			"FeatureEncodingTechnique", 
24 | 			"FeatureNormalizationTechnique", 
25 | 			"ClassificationTechnique", 
26 | 			"TrainAccuraccy", 
27 | 			"TestAccuraccy", 
28 | 			"ModelName", 
29 | 			"ModelFileName"
30 | 		],
31 | 		[
32 | 			"ExtraTreesClassifier",
33 | 			"OneHotEncoder",
34 | 			"Standardization",
35 | 			"DecisonTree"
36 | 		],
37 | 		[
38 | 			"ExtraTreesClassifier",
39 | 			"OneHotEncoder",
40 | 			"Standardization",
41 | 			"RandomForestClassifier"
42 | 		],
43 | 		[
44 | 			"ExtraTreesClassifier",
45 | 			"OneHotEncoder",
46 | 			"Standardization",
47 | 			"ExtraTreesClassifier"
48 | 		],
49 | 		[
50 | 			"ExtraTreesClassifier",
51 | 			"OneHotEncoder",
52 | 			"Standardization",
53 | 			"KNN"
54 | 		] 
55 | 	]
56 | 	print(arrayOfModels)
57 | 	return arrayOfModels
58 | 
59 | def defineArrayForPreProcessing():
60 | 	arrayOfModels = [
61 | 		[
62 | 			"ExtraTreesClassifier",
63 | 			"OneHotEncoder",
64 | 			"Standardization",
65 | 		]
66 | 	]
67 | 	print(arrayOfModels)
68 | 	return arrayOfModels
69 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/featureencodinglibrary.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | 
  4 | #Libraries for feature encoding
  5 | from sklearn.preprocessing import LabelEncoder
  6 | import category_encoders as ce
  7 | 
  8 | #Utility functions
  9 | from defineInputs import getLabelName
 10 | from dataformatinglibrary import printList
 11 | 
 12 | #This function is used to perform one hot encoding on the categorical features in the given dataset
 13 | def featureEncodingUsingOneHotEncoder(dataSetForFeatureEncoding):
 14 |     print("****** Start one hot encoding on the categorical features in the given dataset *****")
 15 | 	
 16 |     labelName = getLabelName()
 17 |     #Extract the categorical features, leave the label
 18 |     categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
 19 |     #Get the names of the categorical features
 20 |     categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
 21 |     
 22 |     print("****** Number of features before one hot encoding: ",len(dataSetForFeatureEncoding.columns))
 23 |     print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
 24 |     print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
 25 |     
 26 |     print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
 27 |     categoricalFeaturesInTheDataset = list(set(dataSetForFeatureEncoding.columns) - set(dataSetForFeatureEncoding._get_numeric_data().columns))
 28 |     numericalFeaturesInTheDataset = list(dataSetForFeatureEncoding._get_numeric_data().columns)
 29 |     for feature in categoricalFeaturesInTheDataset:
 30 |         uniq = np.unique(dataSetForFeatureEncoding[feature])
 31 |         print('\n{}: {} '.format(feature,len(uniq)))
 32 |         printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
 33 |         
 34 |     #Using get_dummies function to get the dummy variables for the categorical columns
 35 |     onHotEncodedDataSet=pd.get_dummies(dataSetForFeatureEncoding, columns=categoricalColumnNames, prefix=categoricalColumnNames)
 36 |     
 37 |     #Move the label column to the end
 38 |     label = onHotEncodedDataSet.pop(labelName)
 39 |     onHotEncodedDataSet[labelName] = label
 40 |     numberOfColumnsInOneHotEncodedDataset = len(onHotEncodedDataSet.columns)
 41 |     print("****** Number of features after one hot encoding: ",numberOfColumnsInOneHotEncodedDataset)
 42 | 
 43 |     print("****** End one hot encoding on the categorical features in the given dataset *****\n")
 44 |     return onHotEncodedDataSet
 45 | 	
 46 | #This function is used to perform label encoding on the categorical features in the given dataset
 47 | def featureEncodingUsingLabelEncoder(dataSetForFeatureEncoding):
 48 |     print("****** Start label encoding on the categorical features in the given dataset *****")
 49 | 
 50 |     labelName = getLabelName()
 51 |     #Extract the categorical features, leave the label
 52 |     categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
 53 |     #Get the names of the categorical features
 54 |     categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
 55 |  
 56 |     print("****** Number of features before label encoding: ",len(dataSetForFeatureEncoding.columns))
 57 |     print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
 58 |     print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
 59 | 
 60 |     print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
 61 |     labelEncoder = LabelEncoder() 
 62 |     for feature in categoricalColumnNames:
 63 |         uniq = np.unique(dataSetForFeatureEncoding[feature])
 64 |         print('\n{}: {} '.format(feature,len(uniq)))
 65 |         printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
 66 |         dataSetForFeatureEncoding[feature] = labelEncoder.fit_transform(dataSetForFeatureEncoding[feature]) 
 67 |     print("****** Number of features after label encoding: ",len(dataSetForFeatureEncoding.columns))    
 68 |     
 69 |     print("****** End label encoding on the categorical features in the given dataset *****\n")
 70 |     return dataSetForFeatureEncoding
 71 | 	
 72 | #This function is used to perform binary encoding on the categorical features in the given dataset
 73 | def featureEncodingUsingBinaryEncoder(dataSetForFeatureEncoding):
 74 |     print("****** Start binary encoding on the categorical features in the given dataset *****")
 75 | 
 76 |     labelName = getLabelName()
 77 |     #Extract the categorical features, leave the label
 78 |     categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
 79 |     #Get the names of the categorical features
 80 |     categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
 81 |  
 82 |     print("****** Number of features before binary encoding: ",len(dataSetForFeatureEncoding.columns))
 83 |     print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
 84 |     print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
 85 | 
 86 |     print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
 87 |     label = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([labelName])].columns, axis = 1)
 88 |     for feature in categoricalColumnNames:
 89 |         uniq = np.unique(dataSetForFeatureEncoding[feature])
 90 |         print('\n{}: {} '.format(feature,len(uniq)))
 91 |         printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
 92 |         featureColumns = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([feature])].columns, axis = 1)
 93 |         binaryEncoder = ce.BinaryEncoder(cols = [feature])
 94 |         binaryEncodedFeature = binaryEncoder.fit_transform(featureColumns, label)
 95 |         dataSetForFeatureEncoding = dataSetForFeatureEncoding.join(binaryEncodedFeature)
 96 |         dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(feature, axis=1)
 97 | 
 98 |     dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(labelName, axis=1)
 99 |     dataSetForFeatureEncoding[labelName] = label
100 |     print("****** Number of features after binary encoding: ",len(dataSetForFeatureEncoding.columns))    
101 |     
102 |     print("****** End binary encoding on the categorical features in the given dataset *****\n")
103 |     return dataSetForFeatureEncoding
104 | 	
105 | #This function is used to perform frequency encoding on the categorical features in the given dataset
106 | def featureEncodingUsingFrequencyEncoder(dataSetForFeatureEncoding):
107 |     print("****** Start frequency encoding on the categorical features in the given dataset *****")
108 | 
109 |     labelName = getLabelName()
110 |     #Extract the categorical features, leave the label
111 |     categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
112 |     #Get the names of the categorical features
113 |     categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
114 |  
115 |     print("****** Number of features before label encoding: ",len(dataSetForFeatureEncoding.columns))
116 |     print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
117 |     print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
118 | 
119 |     print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
120 |     label = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([labelName])].columns, axis = 1)
121 |     for feature in categoricalColumnNames:
122 |         uniq = np.unique(dataSetForFeatureEncoding[feature])
123 |         print('\n{}: {} '.format(feature,len(uniq)))
124 |         printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
125 |         frequencyEncoder = dataSetForFeatureEncoding.groupby(feature).size()/len(dataSetForFeatureEncoding)
126 |         dataSetForFeatureEncoding.loc[:,feature+"_Encoded"] = dataSetForFeatureEncoding[feature].map(frequencyEncoder)
127 |         dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(feature, axis=1)
128 | 
129 |     dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(labelName, axis=1)
130 |     dataSetForFeatureEncoding[labelName] = label
131 |     print("****** Number of features after frequency encoding: ",len(dataSetForFeatureEncoding.columns))    
132 |     
133 |     print("****** End frequency encoding on the categorical features in the given dataset *****\n")
134 |     return dataSetForFeatureEncoding


--------------------------------------------------------------------------------
/Project-UtilityFunctions/featurescalinglibrary.py:
--------------------------------------------------------------------------------
  1 | #Utility functions
  2 | from defineInputs import getLabelName
  3 | 
  4 | import pandas as pd
  5 | import numpy as np
  6 | 
  7 | #Libraries for feature scaling
  8 | from sklearn.preprocessing import MinMaxScaler
  9 | from sklearn.preprocessing import StandardScaler
 10 | from sklearn.preprocessing import Binarizer
 11 | from sklearn.preprocessing import Normalizer
 12 | 
 13 | 
 14 | #This function is used to perform min-max feature scaing on the features in the given dataset
 15 | #Formula for Min-Max scalar feature scaling is (Xi-Xmin)/(Xmax-Xmin)
 16 | def featureScalingUsingMinMaxScaler(dataSetForFeatureScaling):
 17 |     print("****** Start feature scaling of the features present in the dataset using MinMaxScaler *****")
 18 | 
 19 |     numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
 20 |     dataSetInArrayFormat = dataSetForFeatureScaling.values
 21 | 
 22 |     #Remove the label column from the dataset
 23 |     labelName = getLabelName()
 24 |     label = dataSetForFeatureScaling.pop(labelName)
 25 | 
 26 |     print(dataSetInArrayFormat)
 27 |     features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
 28 |     print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
 29 |     print("\n****** Features in the dataset before performing scaling ***** \n",features)
 30 |     
 31 |     #Perform feature scaling
 32 |     scaler=MinMaxScaler(feature_range=(0,1))
 33 |     scaledFeatures=scaler.fit_transform(features)    
 34 |     print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
 35 |     print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
 36 | 
 37 |     #Convert from array format to dataframe
 38 |     scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
 39 |     scaledFeatures = scaledFeatures.reset_index(drop=True)
 40 |     label = label.reset_index(drop=True)
 41 |     scaledFeatures[labelName]=label
 42 |     
 43 |     print("\n****** End of feature scaling of the features present in the dataset using MinMaxScaler *****\n")
 44 |     return scaledFeatures
 45 | 	
 46 | #This function is used to perform StandardScalar feature scaing on the features in the given dataset
 47 | #This is also called as Z-score normalization
 48 | #Formula for StandardScalar scalar feature scaling is z = (x – mean) / standard-deviation. 
 49 | def featureScalingUsingStandardScalar(dataSetForFeatureScaling):
 50 |     print("****** Start feature scaling of the features present in the dataset using StandardScalar *****")
 51 | 
 52 |     numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
 53 |     dataSetInArrayFormat = dataSetForFeatureScaling.values
 54 | 
 55 |     #Remove the label column from the dataset
 56 |     labelName = getLabelName()
 57 |     label = dataSetForFeatureScaling.pop(labelName)
 58 | 
 59 |     print(dataSetInArrayFormat)
 60 |     features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
 61 |     print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
 62 |     print("\n****** Features in the dataset before performing scaling ***** \n",features)
 63 |     
 64 |     #Perform feature scaling
 65 |     scaler=StandardScaler()
 66 |     scaledFeatures=scaler.fit_transform(features)    
 67 |     print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
 68 |     print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
 69 | 
 70 |     #Convert from array format to dataframe
 71 |     scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
 72 |     scaledFeatures = scaledFeatures.reset_index(drop=True)
 73 |     label = label.reset_index(drop=True)
 74 |     scaledFeatures[labelName]=label
 75 |     print("scaledFeatures.head(): ",scaledFeatures.head())
 76 |     print("scaledFeatures.shape: ",scaledFeatures.shape)
 77 |     
 78 |     print("\n****** End of feature scaling of the features present in the dataset using StandardScalar *****\n")
 79 |     return scaledFeatures
 80 | 	
 81 | #This function is used to perform Binarizing feature scaing on the features in the given dataset
 82 | #It is used for binary thresholding of an array like matrix.
 83 | def featureScalingUsingBinarizer(dataSetForFeatureScaling):
 84 |     print("****** Start feature scaling of the features present in the dataset using Binarizer *****")
 85 | 
 86 |     numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
 87 |     dataSetInArrayFormat = dataSetForFeatureScaling.values
 88 | 
 89 |     #Remove the label column from the dataset
 90 |     labelName = getLabelName()
 91 |     label = dataSetForFeatureScaling.pop(labelName)
 92 | 
 93 |     print(dataSetInArrayFormat)
 94 |     features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
 95 |     print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
 96 |     print("\n****** Features in the dataset before performing scaling ***** \n",features)
 97 |     
 98 |     #Perform feature scaling
 99 |     scaledFeatures=Binarizer(0.0).fit(features).transform(features)
100 |     print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
101 |     print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
102 | 
103 |     #Convert from array format to dataframe
104 |     scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
105 |     scaledFeatures = scaledFeatures.reset_index(drop=True)
106 |     label = label.reset_index(drop=True)
107 |     scaledFeatures[labelName]=label
108 |     
109 |     print("\n****** End of feature scaling of the features present in the dataset using Binarizer *****\n")
110 |     return scaledFeatures
111 | 	
112 | #This function is used to perform Normalizing feature scaing on the features in the given dataset
113 | #It is used to rescale each sample. 
114 | #Each sample (i.e. each row of the data matrix) with at least one non zero component 
115 | #is rescaled independently of other samples so that its norm (l1 or l2) equals one.
116 | def featureScalingUsingNormalizer(dataSetForFeatureScaling):
117 |     print("****** Start feature scaling of the features present in the dataset using Normalizer *****")
118 | 
119 |     numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
120 |     dataSetInArrayFormat = dataSetForFeatureScaling.values
121 | 
122 |     #Remove the label column from the dataset
123 |     labelName = getLabelName()
124 |     label = dataSetForFeatureScaling.pop(labelName)
125 | 
126 |     print(dataSetInArrayFormat)
127 | 	
128 |     features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
129 |     print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
130 |     print("\n****** Features in the dataset before performing scaling ***** \n",features)
131 |     
132 |     #Perform feature scaling
133 |     scaledFeatures=Normalizer().fit(features).transform(features)
134 |     print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
135 |     print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
136 | 
137 |     #Convert from array format to dataframe
138 |     scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
139 |     scaledFeatures = scaledFeatures.reset_index(drop=True)
140 |     label = label.reset_index(drop=True)
141 |     scaledFeatures[labelName]=label
142 |     
143 |     print("\n****** End of feature scaling of the features present in the dataset using Normalizer *****\n")
144 |     return scaledFeatures
145 | 	
146 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/featureselectionlibrary.py:
--------------------------------------------------------------------------------
  1 | #Utility functions
  2 | from defineInputs import getLabelName
  3 | 
  4 | from featureencodinglibrary import featureEncodingUsingLabelEncoder
  5 | from dataformatinglibrary import printList
  6 | 
  7 | #Matplotlib is a plotting library for the Python programming language and its numerical mathematics extension NumPy
  8 | import matplotlib.pyplot as plt
  9 | from matplotlib.pyplot import figure
 10 | import seaborn as sns
 11 | import numpy as np
 12 | import pandas as pd
 13 | import math
 14 | import scipy.stats as ss
 15 | from collections import Counter
 16 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance)
 17 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance)
 18 | from sklearn.feature_selection import SelectKBest
 19 | from sklearn.feature_selection import chi2
 20 | from sklearn.preprocessing import LabelEncoder
 21 | 
 22 | #This function is used to calculate the conditional entropy between a given feature and the target
 23 | def conditional_entropy(x,y):
 24 |     # entropy of x given y
 25 |     y_counter = Counter(y)
 26 |     xy_counter = Counter(list(zip(x,y)))
 27 |     total_occurrences = sum(y_counter.values())
 28 |     entropy = 0
 29 |     for xy in xy_counter.keys():
 30 |         p_xy = xy_counter[xy] / total_occurrences
 31 |         p_y = y_counter[xy[1]] / total_occurrences
 32 |         entropy += p_xy * math.log(p_y/p_xy)
 33 |     return entropy
 34 | 
 35 | #This function is used to perform feature selection using TheilU 
 36 | #In TheilU we calculate the uncertainty coefficient between the given feature and the target
 37 | def theil_u(x,y):
 38 |     s_xy = conditional_entropy(x,y)
 39 |     x_counter = Counter(x)
 40 |     total_occurrences = sum(x_counter.values())
 41 |     p_x = list(map(lambda n: n/total_occurrences, x_counter.values()))
 42 |     s_x = ss.entropy(p_x)
 43 |     if s_x == 0:
 44 |         return 1
 45 |     else:
 46 |         return (s_x - s_xy) / s_x
 47 |     
 48 | def featureSelectionUsingTheilU(dataSetForFeatureSelection):
 49 |     print("\n****** Start performing feature selection using TheilU *****")
 50 |     print("****** Falls under the group of techniques that use correlation matrix with Heatmap *****")
 51 | 
 52 |     labelName = getLabelName()
 53 |     label = dataSetForFeatureSelection[labelName]
 54 | 
 55 |     theilu = pd.DataFrame(index=[labelName],columns=dataSetForFeatureSelection.columns)
 56 |     columns = dataSetForFeatureSelection.columns
 57 |     dataSetAfterFeatuerSelection = dataSetForFeatureSelection
 58 | 
 59 |     for j in range(0,len(columns)):
 60 |         u = theil_u(label.tolist(),dataSetForFeatureSelection[columns[j]].tolist())
 61 |         theilu.loc[:,columns[j]] = u
 62 |         if u < 0.50:
 63 |             dataSetAfterFeatuerSelection.pop(columns[j])
 64 | 
 65 |     print('***** Ploting the uncertainty coefficient between the target and each feature *****')
 66 |     theilu.fillna(value=np.nan,inplace=True)
 67 |     plt.figure(figsize=(30,1))
 68 |     sns.heatmap(theilu,annot=True,fmt='.2f')
 69 |     plt.show()
 70 | 
 71 |     numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)
 72 |     print('***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
 73 |     print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
 74 |     print("****** End performing feature selection using TheilU *****")
 75 |     return dataSetAfterFeatuerSelection
 76 | 	
 77 | #This function is used to perform feature selection using Chi-squared test
 78 | def featureSelectionUsingChisquaredTest(dataSetForFeatureSelection):
 79 |     print("\n****** Start performing feature selection using ChisquaredTest *****")
 80 |     print("****** Falls under filter methods (univariate selection) *****")
 81 |     
 82 |     numberOfFeatureToBeSelected = 10
 83 |     labelName = getLabelName()
 84 | 
 85 |     #To be able to apply Chi-squared test
 86 |     dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection)
 87 |     dataSetAfterFeatuerSelection = dataSetForFeatureSelection
 88 |     
 89 |     #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1]  
 90 |     features = dataSetForFeatureSelection.drop([labelName],axis=1)
 91 |     label = dataSetForFeatureSelection[labelName]
 92 |     
 93 |     #Apply SelectKBest class to extract top 10 best features
 94 |     bestfeatures = SelectKBest(score_func=chi2, k=numberOfFeatureToBeSelected)
 95 |     fitBestfeatures = bestfeatures.fit(features,label)
 96 |     columns = pd.DataFrame(features.columns)
 97 |     scores = pd.DataFrame(fitBestfeatures.scores_)
 98 |     #concat two dataframes for better visualization 
 99 |     scoresOfBestFeatures = pd.concat([columns,scores],axis=1)
100 |     scoresOfBestFeatures.columns = ['Features','Score']
101 |     print("\n***** Scores for each feature in the dataset are *****")
102 |     print(scoresOfBestFeatures.nlargest(numberOfFeatureToBeSelected,'Score'))
103 |        
104 |     mask = fitBestfeatures.get_support() 
105 |     for j in range(0,len(mask)):
106 |         if (mask[j] == False):
107 |             dataSetAfterFeatuerSelection.pop(features.columns[j])
108 |     
109 |     numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)    
110 |     print('***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
111 |     print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
112 |     print("****** End performing feature selection using ChisquaredTest *****")
113 |    
114 |     return dataSetAfterFeatuerSelection
115 | 
116 | #This function is used to perform feature selection using RandomForestClassifier
117 | def featureSelectionUsingRandomForestClassifier(dataSetForFeatureSelection):
118 |     print("\n****** Start performing feature selection using RandomForestClassifier *****")
119 |     print("****** Falls under wrapper methods (feature importance) *****")
120 | 	
121 |     labelName = getLabelName()
122 | 
123 |     #Applying feature encoding before applying the RandomForestClassification
124 |     dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection)
125 |     dataSetAfterFeatuerSelection = dataSetForFeatureSelection
126 |     #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1]  
127 |     features = dataSetForFeatureSelection.drop([labelName],axis=1)
128 |     label = dataSetForFeatureSelection[labelName]
129 | 
130 |     labelencoder = LabelEncoder()
131 |     labelTransformed = labelencoder.fit_transform(label)
132 | 
133 |     print("****** RandomForestClassification is in progress *****")
134 |     #Train using RamdomForestClassifier
135 |     trainedforest = RandomForestClassifier(n_estimators=700).fit(features,labelTransformed)
136 |     importances = trainedforest.feature_importances_ #array with importances of each feature
137 |     idx = np.arange(0, features.shape[1]) #create an index array, with the number of features
138 |     features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance
139 |     featureImportances = pd.Series(importances, index= features.columns)
140 |     selectedFeatures = featureImportances.nlargest(len(features_to_keep))
141 |     print("\n selectedFeatures after RandomForestClassification: ", selectedFeatures)
142 |     print("****** Completed RandomForestClassification *****")
143 | 
144 |     #Plot the feature Importance to see which features have been considered as most important for our model to make its predictions
145 |     #figure(num=None, figsize=(20, 22), dpi=80, facecolor='w', edgecolor='k')
146 |     #selectedFeatures.plot(kind='barh')
147 | 
148 |     selectedFeaturesNames = selectedFeatures.keys()
149 |     dataSetForFeatureSelection = dataSetForFeatureSelection.drop(selectedFeaturesNames,axis=1)
150 |     dataSetAfterFeatuerSelection = dataSetAfterFeatuerSelection.drop(dataSetForFeatureSelection.columns, axis=1)
151 |     dataSetAfterFeatuerSelection[labelName] = label
152 |     
153 |     numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)    
154 |     print('\n***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
155 |     print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
156 |     print("****** End performing feature selection using RandomForestClassifier *****")
157 |     return dataSetAfterFeatuerSelection
158 | 	
159 | #This function is used to perform feature selection using ExtraTreesClassifier
160 | def featureSelectionUsingExtraTreesClassifier(dataSetForFeatureSelection):
161 |     print("\n****** Start performing feature selection using ExtraTreesClassifier *****")
162 |     print("****** Falls under wrapper methods (feature importance) *****")
163 |     
164 |     labelName = getLabelName()
165 | 
166 |     #Applying feature encoding before applying the ExtraTreesClassification
167 |     dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection)
168 |     dataSetAfterFeatuerSelection = dataSetForFeatureSelection
169 |     #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1]  
170 |     features = dataSetForFeatureSelection.drop([labelName],axis=1)
171 |     label = dataSetForFeatureSelection[labelName]
172 | 
173 |     labelencoder = LabelEncoder()
174 |     labelTransformed = labelencoder.fit_transform(label)
175 | 	
176 |     print("****** ExtraTreesClassification is in progress *****")
177 |     #Train using ExtraTreesClassifier
178 |     trainedforest = ExtraTreesClassifier(n_estimators=700).fit(features,labelTransformed)
179 |     importances = trainedforest.feature_importances_ #array with importances of each feature
180 |     idx = np.arange(0, features.shape[1]) #create an index array, with the number of features
181 |     features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance
182 |     featureImportances = pd.Series(importances, index= features.columns)
183 |     selectedFeatures = featureImportances.nlargest(len(features_to_keep))
184 |     print("\n selectedFeatures after ExtraTreesClassification: ", selectedFeatures)
185 |     print("****** Completed ExtraTreesClassification *****")
186 | 
187 |     #Plot the feature Importance to see which features have been considered as most important for our model to make its predictions
188 |     #figure(num=None, figsize=(20, 22), dpi=80, facecolor='w', edgecolor='k')
189 |     #selectedFeatures.plot(kind='barh')
190 | 
191 |     selectedFeaturesNames = selectedFeatures.keys()
192 |     dataSetForFeatureSelection = dataSetForFeatureSelection.drop(selectedFeaturesNames,axis=1)
193 |     dataSetAfterFeatuerSelection = dataSetAfterFeatuerSelection.drop(dataSetForFeatureSelection.columns, axis=1)
194 |     dataSetAfterFeatuerSelection[labelName] = label
195 |     
196 |     numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)    
197 |     print('\n***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
198 |     print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
199 |     print("****** End performing feature selection using ExtraTreesClassifier *****")
200 |     return dataSetAfterFeatuerSelection
201 | 
202 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/findcombinations.py:
--------------------------------------------------------------------------------
 1 | # Python3 program to find combinations from n 
 2 | # arrays such that one element from each 
 3 | # array is present 
 4 | 
 5 | # function to prcombinations that contain 
 6 | # one element from each of the given arrays 
 7 | def print1(arr): 
 8 | 	
 9 | 	# number of arrays 
10 | 	n = len(arr) 
11 | 
12 | 	# to keep track of next element 
13 | 	# in each of the n arrays 
14 | 	indices = [0 for i in range(n)] 
15 | 
16 | 	while (1): 
17 | 		print("[")
18 |         
19 | 		# prcurrent combination 
20 | 		for i in range(n): 
21 | 			print("'"+arr[i][indices[i]], end = "',") 
22 | 		print() 
23 | 
24 | 		# find the rightmost array that has more 
25 | 		# elements left after the current element 
26 | 		# in that array 
27 | 		next = n - 1
28 | 		while (next >= 0 and
29 | 			(indices[next] + 1 >= len(arr[next]))): 
30 | 			next-=1
31 | 
32 | 		# no such array is found so no more 
33 | 		# combinations left 
34 | 		if (next < 0): 
35 | 			return
36 | 
37 | 		# if found move to next element in that 
38 | 		# array 
39 | 		indices[next] += 1
40 | 
41 | 		# for all arrays to the right of this 
42 | 		# array current index again points to 
43 | 		# first element 
44 | 		for i in range(next + 1, n): 
45 | 			indices[i] = 0
46 | 		print("],")
47 | 
48 | 
49 | # Driver Code 
50 | 
51 | # initializing a vector with 3 empty vectors 
52 | arr = [[] for i in range(4)] 
53 | 
54 | # now entering data 
55 | # [[1, 2, 3], [4], [5, 6]] 
56 | arr[0].append('TheilsU') 
57 | arr[0].append('Chi-SquaredTest') 
58 | arr[0].append('RandomForestClassifier') 
59 | arr[0].append('ExtraTreesClassifier') 
60 | 
61 | arr[1].append('OneHotEncoder') 
62 | arr[1].append('LabelEncoder') 
63 | arr[1].append('BinaryEncoder') 
64 | arr[1].append('FrequencyEncoder') 
65 | 
66 | arr[2].append('Min-Max') 
67 | arr[2].append('Standardization') 
68 | arr[2].append('Binarizing') 
69 | arr[2].append('Normalizing') 
70 | 
71 | arr[3].append('DecisonTree') 
72 | arr[3].append('RandomForestClassifier') 
73 | arr[3].append('ExtraTreesClassifier') 
74 | arr[3].append('LogisticRegressionRegression') 
75 | arr[3].append('LinearDiscriminantAnalysis') 
76 | arr[3].append('GuassianNaiveBayes') 
77 | 
78 | print1(arr) 
79 | 
80 | # This code is contributed by mohit kumar 
81 | 


--------------------------------------------------------------------------------
/Project-UtilityFunctions/lstm.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import warnings
  3 | import numpy as np
  4 | from numpy import newaxis
  5 | from keras.layers.core import Dense, Activation, Dropout
  6 | from keras.layers.recurrent import LSTM
  7 | from keras.models import Sequential
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | 
 11 | warnings.filterwarnings("ignore")
 12 | 
 13 | def plot_results_multiple(predicted_data, true_data, prediction_len):
 14 |     fig = plt.figure(facecolor='white')
 15 |     ax = fig.add_subplot(111)
 16 |     ax.plot(true_data, label='True Data')
 17 |     print ('yo')
 18 |     #Pad the list of predictions to shift it in the graph to it's correct start
 19 |     for i, data in enumerate(predicted_data):
 20 |         padding = [None for p in xrange(i * prediction_len)]
 21 |         plt.plot(padding + data, label='Prediction')
 22 |         plt.legend()
 23 |     plt.show()
 24 | 
 25 | def load_data(filename, seq_len, normalise_window):
 26 |     f = open(filename, 'r').read()
 27 |     data = f.split('\n')
 28 | 
 29 |     sequence_length = seq_len + 1
 30 |     result = []
 31 |     for index in range(len(data) - sequence_length):
 32 |         result.append(data[index: index + sequence_length])
 33 |     
 34 |     if normalise_window:
 35 |         result = normalise_windows(result)
 36 | 
 37 |     result = np.array(result)
 38 | 
 39 |     row = round(0.9 * result.shape[0])
 40 |     train = result[:int(row), :]
 41 |     np.random.shuffle(train)
 42 |     x_train = train[:, :-1]
 43 |     y_train = train[:, -1]
 44 |     x_test = result[int(row):, :-1]
 45 |     y_test = result[int(row):, -1]
 46 | 
 47 |     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
 48 |     x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  
 49 | 
 50 |     return [x_train, y_train, x_test, y_test]
 51 | 
 52 | def normalise_windows(window_data):
 53 |     normalised_data = []
 54 |     for window in window_data:
 55 |         normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
 56 |         normalised_data.append(normalised_window)
 57 |     return normalised_data
 58 | 
 59 | def build_model(layers):
 60 |     model = Sequential()
 61 | 
 62 |     model.add(LSTM(
 63 |         input_dim=layers[0],
 64 |         output_dim=layers[1],
 65 |         return_sequences=True))
 66 |     model.add(Dropout(0.2))
 67 | 
 68 |     model.add(LSTM(
 69 |         layers[2],
 70 |         return_sequences=False))
 71 |     model.add(Dropout(0.2))
 72 | 
 73 |     model.add(Dense(
 74 |         output_dim=layers[3]))
 75 |     model.add(Activation("linear"))
 76 | 
 77 |     start = time.time()
 78 |     model.compile(loss="mse", optimizer="rmsprop")
 79 |     print ("Compilation Time : ", time.time() - start)
 80 |     return model
 81 | 
 82 | def predict_point_by_point(model, data):
 83 |     #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
 84 |     predicted = model.predict(data)
 85 |     predicted = np.reshape(predicted, (predicted.size,))
 86 |     return predicted
 87 | 
 88 | def predict_sequence_full(model, data, window_size):
 89 |     #Shift the window by 1 new prediction each time, re-run predictions on new window
 90 |     curr_frame = data[0]
 91 |     predicted = []
 92 |     for i in xrange(len(data)):
 93 |         predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
 94 |         curr_frame = curr_frame[1:]
 95 |         curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
 96 |     return predicted
 97 | 
 98 | def predict_sequences_multiple(model, data, window_size, prediction_len):
 99 |     #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
100 |     prediction_seqs = []
101 |     for i in xrange(len(data)/prediction_len):
102 |         curr_frame = data[i*prediction_len]
103 |         predicted = []
104 |         for j in xrange(prediction_len):
105 |             predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
106 |             curr_frame = curr_frame[1:]
107 |             curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
108 |         prediction_seqs.append(predicted)
109 |     return prediction_seqs


--------------------------------------------------------------------------------
/Project-UtilityFunctions/util.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | #Utils
  4 | import operator
  5 | 
  6 | #Seaborn is an open source Python library providing high level API for visualizing the data 
  7 | import seaborn as sns
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | #library for saving the trained models to files
 11 | import joblib
 12 | 
 13 | from defineInputs import getPathToTrainingAndTestingDataSets
 14 | from defineInputs import getPathToGenerateModels
 15 | 
 16 | #Data loading library
 17 | from dataloadinglibrary import loadCSV
 18 | 
 19 | from defineInputs import getLabelName
 20 | 
 21 | #Data pre-processing library
 22 | from datapreprocessinglibrary import splitCompleteDataSetIntoTrainingSetAndTestingSet
 23 | 
 24 | #Feature selection library
 25 | from featureselectionlibrary import featureSelectionUsingTheilU
 26 | from featureselectionlibrary import featureSelectionUsingChisquaredTest
 27 | from featureselectionlibrary import featureSelectionUsingRandomForestClassifier
 28 | from featureselectionlibrary import featureSelectionUsingExtraTreesClassifier
 29 | 
 30 | #feature encoding library
 31 | from featureencodinglibrary import featureEncodingUsingOneHotEncoder
 32 | from featureencodinglibrary import featureEncodingUsingLabelEncoder
 33 | from featureencodinglibrary import featureEncodingUsingBinaryEncoder
 34 | from featureencodinglibrary import featureEncodingUsingFrequencyEncoder
 35 | 
 36 | #feature scaling library
 37 | from featurescalinglibrary import featureScalingUsingMinMaxScaler
 38 | from featurescalinglibrary import featureScalingUsingStandardScalar
 39 | from featurescalinglibrary import featureScalingUsingBinarizer
 40 | from featurescalinglibrary import featureScalingUsingNormalizer
 41 | 
 42 | from classificationlibrary import classifyUsingDecisionTreeClassifier
 43 | from classificationlibrary import classifyUsingLogisticRegression
 44 | from classificationlibrary import classifyUsingLinearDiscriminantAnalysis
 45 | from classificationlibrary import classifyUsingGaussianNB
 46 | from classificationlibrary import classifyUsingRandomForestClassifier
 47 | from classificationlibrary import classifyUsingExtraTreesClassifier
 48 | from classificationlibrary import classifyUsingKNNClassifier
 49 | from classificationlibrary import findingOptimumNumberOfNeighboursForKNN
 50 | 
 51 | def compareModels(arrayOfModels):
 52 |     modelsAndAccuracies = {}
 53 |     for i in range(1,len(arrayOfModels)):
 54 |         data = arrayOfModels[i]
 55 |         modelsAndAccuracies[data[3]]=data[5]
 56 |     bestModelAndItsAccuracy = {}
 57 |     bestModelAndItsAccuracy[max(modelsAndAccuracies.items(), key=operator.itemgetter(1))[0]]=modelsAndAccuracies[max(modelsAndAccuracies.items(), key=operator.itemgetter(1))[0]]
 58 |     sns.set_style("whitegrid")
 59 |     plt.figure(figsize=(5,5))
 60 |     plt.ylabel("Algorithms",fontsize=10)
 61 |     plt.xlabel("Accuracy %",fontsize=10)
 62 |     plt.title("Comparing the models based on the accuries achieved",fontsize=15)
 63 |     sns.barplot(x=list(modelsAndAccuracies.values()), y=list(modelsAndAccuracies.keys()))
 64 |     plt.show()
 65 |     return bestModelAndItsAccuracy
 66 | 
 67 | ### Below function is responsible for performing pre-processing, training, evaluation, persisting model
 68 | def performPreprocessingBuildModelsAndEvaluateAccuracy(trainingDataSet, testingDataSet, arrayOfModels):
 69 |     for i in range(1,len(arrayOfModels)):
 70 |         print('***************************************************************************************************************************')
 71 |         print('********************************************* Building Model-', i ,' As Below *************************************************')
 72 |         print('\t -- Feature Selection: \t ', arrayOfModels[i][0], ' \n\t -- Feature Encoding: \t ', arrayOfModels[i][1], ' \n\t -- Feature Scaling: \t ', arrayOfModels[i][2], ' \n\t -- Classification: \t ', arrayOfModels[i][3], '\n')
 73 |  
 74 |         trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()
 75 |         trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)
 76 |         testingDataSet = loadCSV(testingFileNameWithAbsolutePath)
 77 | 
 78 |         labelName = getLabelName()
 79 |         label = trainingDataSet[labelName]
 80 | 
 81 |         #Combining the test and training datasets for preprocessing then together, because we observed that in sme datasets
 82 |         #the values in the categorical columns in test dataset and train dataset are being different this causes issues while
 83 |         #applying classification techniques
 84 |         completeDataSet = pd.concat(( trainingDataSet, testingDataSet ))
 85 | 
 86 |         #difficultyLevel = completeDataSet.pop('difficulty_level')
 87 |         
 88 |         print("completeDataSet.shape: ",completeDataSet.shape)
 89 |         print("completeDataSet.head: ",completeDataSet.head())
 90 | 
 91 |         #Feature Selection
 92 |         if arrayOfModels[i][0] == 'TheilsU':
 93 |             #Perform feature selection using TheilU
 94 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingTheilU(completeDataSet)
 95 |         elif arrayOfModels[i][0] == 'Chi-SquaredTest':
 96 |             #Perform feature selection using Chi-squared Test
 97 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingChisquaredTest(completeDataSet)
 98 |         elif arrayOfModels[i][0] == 'RandomForestClassifier':
 99 |             #Perform feature selection using RandomForestClassifier
100 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingRandomForestClassifier(completeDataSet)
101 |         elif arrayOfModels[i][0] == 'ExtraTreesClassifier':
102 |             #Perform feature selection using ExtraTreesClassifier
103 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingExtraTreesClassifier(completeDataSet)
104 |         
105 |         #Feature Encoding        
106 |         if arrayOfModels[i][1] == 'LabelEncoder':
107 |             #Perform lable encoding to convert categorical values into label encoded features
108 |             completeEncodedDataSet = featureEncodingUsingLabelEncoder(completeDataSetAfterFeatuerSelection)
109 |         elif arrayOfModels[i][1] == 'OneHotEncoder':
110 |             #Perform OnHot encoding to convert categorical values into one-hot encoded features
111 |             completeEncodedDataSet = featureEncodingUsingOneHotEncoder(completeDataSetAfterFeatuerSelection)
112 |         elif arrayOfModels[i][1] == 'FrequencyEncoder':
113 |             #Perform Frequency encoding to convert categorical values into frequency encoded features
114 |             completeEncodedDataSet = featureEncodingUsingFrequencyEncoder(completeDataSetAfterFeatuerSelection)
115 |         elif arrayOfModels[i][1] == 'BinaryEncoder':
116 |             #Perform Binary encoding to convert categorical values into binary encoded features
117 |             completeEncodedDataSet = featureEncodingUsingBinaryEncoder(completeDataSetAfterFeatuerSelection)
118 | 
119 |         #Feature Scaling        
120 |         if arrayOfModels[i][2] == 'Min-Max':
121 |             #Perform MinMaxScaler to scale the features of the dataset into same range
122 |             completeEncodedAndScaledDataset = featureScalingUsingMinMaxScaler(completeEncodedDataSet)
123 |         elif arrayOfModels[i][2] == 'Binarizing':
124 |             #Perform Binarizing to scale the features of the dataset into same range
125 |             completeEncodedAndScaledDataset = featureScalingUsingBinarizer(completeEncodedDataSet)
126 |         elif arrayOfModels[i][2] == 'Normalizing':
127 |             #Perform Normalizing to scale the features of the dataset into same range
128 |             completeEncodedAndScaledDataset = featureScalingUsingNormalizer(completeEncodedDataSet)
129 |         elif arrayOfModels[i][2] == 'Standardization':
130 |             #Perform Standardization to scale the features of the dataset into same range
131 |             completeEncodedAndScaledDataset = featureScalingUsingStandardScalar(completeEncodedDataSet)
132 |         
133 |         #Split the complete dataSet into training dataSet and testing dataSet
134 |         featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet = splitCompleteDataSetIntoTrainingSetAndTestingSet(completeEncodedAndScaledDataset)
135 |         
136 |         trainingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTrainingDataSet, labelInPreProcessedTrainingDataSet], axis=1, sort=False)
137 |         testingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTestingDataSet, labelInPreProcessedTestingDataSet], axis=1, sort=False)
138 | 
139 |         #Classification                
140 |         if arrayOfModels[i][3] == 'DecisonTree':
141 |             #Perform classification using DecisionTreeClassifier
142 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingDecisionTreeClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
143 |         elif arrayOfModels[i][3] == 'RandomForestClassifier':
144 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingRandomForestClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
145 |         elif arrayOfModels[i][3] == 'ExtraTreesClassifier':
146 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingExtraTreesClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
147 |         elif arrayOfModels[i][3] == 'LogisticRegressionRegression':
148 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingLogisticRegression(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
149 |         elif arrayOfModels[i][3] == 'LinearDiscriminantAnalysis':
150 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingLinearDiscriminantAnalysis(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
151 |         elif arrayOfModels[i][3] == 'GuassianNaiveBayes':
152 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingGaussianNB(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
153 |         elif arrayOfModels[i][3] == 'KNN':
154 |             classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingKNNClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
155 | 
156 |         arrayOfModels[i].append(trainingAccuracyScore)
157 |         arrayOfModels[i].append(testingAccuracyScore)
158 |         
159 |         modelName = arrayOfModels[i][0]+"_"+arrayOfModels[i][1]+"_"+arrayOfModels[i][2]+"_"+arrayOfModels[i][3]
160 |         modelFileName = getPathToGenerateModels() + modelName+".pkl"
161 |         arrayOfModels[i].append(modelName)
162 |         arrayOfModels[i].append(modelFileName)
163 |         #Save the model to file
164 |         joblib.dump(classifier, modelFileName)
165 | 		
166 | def performPreprocessing(trainingDataSet, testingDataSet, arrayOfModels):
167 |     for i in range(0,len(arrayOfModels)):
168 |         print('***************************************************************************************************************************')
169 |         print('********************************************* Building Model-', i ,' As Below *************************************************')
170 |         print('\t -- Feature Selection: \t ', arrayOfModels[i][0], ' \n\t -- Feature Encoding: \t ', arrayOfModels[i][1], ' \n\t -- Feature Scaling: \t ', arrayOfModels[i][2], '\n')
171 |  
172 |         trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()
173 |         trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)
174 |         testingDataSet = loadCSV(testingFileNameWithAbsolutePath)
175 | 
176 |         labelName = getLabelName()
177 |         label = trainingDataSet[labelName]
178 | 
179 |         #Combining the test and training datasets for preprocessing then together, because we observed that in sme datasets
180 |         #the values in the categorical columns in test dataset and train dataset are being different this causes issues while
181 |         #applying classification techniques
182 |         completeDataSet = pd.concat(( trainingDataSet, testingDataSet ))
183 | 
184 |         #difficultyLevel = completeDataSet.pop('difficulty_level')
185 |         
186 |         print("completeDataSet.shape: ",completeDataSet.shape)
187 |         print("completeDataSet.head: ",completeDataSet.head())
188 | 
189 |         #Feature Selection
190 |         if arrayOfModels[i][0] == 'TheilsU':
191 |             #Perform feature selection using TheilU
192 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingTheilU(completeDataSet)
193 |         elif arrayOfModels[i][0] == 'Chi-SquaredTest':
194 |             #Perform feature selection using Chi-squared Test
195 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingChisquaredTest(completeDataSet)
196 |         elif arrayOfModels[i][0] == 'RandomForestClassifier':
197 |             #Perform feature selection using RandomForestClassifier
198 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingRandomForestClassifier(completeDataSet)
199 |         elif arrayOfModels[i][0] == 'ExtraTreesClassifier':
200 |             #Perform feature selection using ExtraTreesClassifier
201 |             completeDataSetAfterFeatuerSelection = featureSelectionUsingExtraTreesClassifier(completeDataSet)
202 |         
203 |         #Feature Encoding        
204 |         if arrayOfModels[i][1] == 'LabelEncoder':
205 |             #Perform lable encoding to convert categorical values into label encoded features
206 |             completeEncodedDataSet = featureEncodingUsingLabelEncoder(completeDataSetAfterFeatuerSelection)
207 |         elif arrayOfModels[i][1] == 'OneHotEncoder':
208 |             #Perform OnHot encoding to convert categorical values into one-hot encoded features
209 |             completeEncodedDataSet = featureEncodingUsingOneHotEncoder(completeDataSetAfterFeatuerSelection)
210 |         elif arrayOfModels[i][1] == 'FrequencyEncoder':
211 |             #Perform Frequency encoding to convert categorical values into frequency encoded features
212 |             completeEncodedDataSet = featureEncodingUsingFrequencyEncoder(completeDataSetAfterFeatuerSelection)
213 |         elif arrayOfModels[i][1] == 'BinaryEncoder':
214 |             #Perform Binary encoding to convert categorical values into binary encoded features
215 |             completeEncodedDataSet = featureEncodingUsingBinaryEncoder(completeDataSetAfterFeatuerSelection)
216 | 
217 |         #Feature Scaling        
218 |         if arrayOfModels[i][2] == 'Min-Max':
219 |             #Perform MinMaxScaler to scale the features of the dataset into same range
220 |             completeEncodedAndScaledDataset = featureScalingUsingMinMaxScaler(completeEncodedDataSet)
221 |         elif arrayOfModels[i][2] == 'Binarizing':
222 |             #Perform Binarizing to scale the features of the dataset into same range
223 |             completeEncodedAndScaledDataset = featureScalingUsingBinarizer(completeEncodedDataSet)
224 |         elif arrayOfModels[i][2] == 'Normalizing':
225 |             #Perform Normalizing to scale the features of the dataset into same range
226 |             completeEncodedAndScaledDataset = featureScalingUsingNormalizer(completeEncodedDataSet)
227 |         elif arrayOfModels[i][2] == 'Standardization':
228 |             #Perform Standardization to scale the features of the dataset into same range
229 |             completeEncodedAndScaledDataset = featureScalingUsingStandardScalar(completeEncodedDataSet)
230 |         
231 |         #Split the complete dataSet into training dataSet and testing dataSet
232 |         featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet = splitCompleteDataSetIntoTrainingSetAndTestingSet(completeEncodedAndScaledDataset)
233 |         
234 |         trainingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTrainingDataSet, labelInPreProcessedTrainingDataSet], axis=1, sort=False)
235 |         testingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTestingDataSet, labelInPreProcessedTestingDataSet], axis=1, sort=False)
236 |     
237 |     return 	completeEncodedAndScaledDataset


--------------------------------------------------------------------------------