├── Datasets ├── KDDCup99 │ ├── kddcup.data.gz │ ├── kddcup.data_10_percent.gz │ ├── kddcup.newtestdata_10_percent_unlabeled.gz │ ├── kddcup.testdata.unlabeled.gz │ └── kddcup.testdata.unlabeled_10_percent.gz └── NSL-KDD │ ├── KDDTest-21.csv │ ├── KDDTrain+_20Percent.csv │ └── KDDTrain+_20Percent_Description.xlsx ├── IDSUsingAutoEnoderNeuralNetwork.ipynb ├── IDSUsingSimpleDeepNeuralNetwork.ipynb ├── IDSUsingTraditionalMLTechniques.ipynb └── Project-UtilityFunctions ├── __pycache__ └── lstm.cpython-37.pyc ├── classificationlibrary.py ├── dataformatinglibrary.py ├── datainspectionlibrary.py ├── dataloadinglibrary.py ├── datapreprocessinglibrary.py ├── defineInputs.py ├── featureencodinglibrary.py ├── featurescalinglibrary.py ├── featureselectionlibrary.py ├── findcombinations.py ├── lstm.py └── util.py /Datasets/KDDCup99/kddcup.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.data.gz -------------------------------------------------------------------------------- /Datasets/KDDCup99/kddcup.data_10_percent.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.data_10_percent.gz -------------------------------------------------------------------------------- /Datasets/KDDCup99/kddcup.newtestdata_10_percent_unlabeled.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.newtestdata_10_percent_unlabeled.gz -------------------------------------------------------------------------------- /Datasets/KDDCup99/kddcup.testdata.unlabeled.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.testdata.unlabeled.gz -------------------------------------------------------------------------------- /Datasets/KDDCup99/kddcup.testdata.unlabeled_10_percent.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.testdata.unlabeled_10_percent.gz -------------------------------------------------------------------------------- /Datasets/NSL-KDD/KDDTrain+_20Percent_Description.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/NSL-KDD/KDDTrain+_20Percent_Description.xlsx -------------------------------------------------------------------------------- /IDSUsingSimpleDeepNeuralNetwork.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import the required libraries and the utility modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 12, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "\n", 19 | "from sklearn import metrics\n", 20 | "from sklearn.model_selection import train_test_split\n", 21 | "from sklearn.preprocessing import LabelEncoder\n", 22 | "\n", 23 | "from tensorflow.keras.models import Sequential\n", 24 | "from tensorflow.keras.models import load_model\n", 25 | "from tensorflow.keras.layers import Dense, Activation, Dropout\n", 26 | "from tensorflow.keras.callbacks import EarlyStopping\n", 27 | "from tensorflow.keras.callbacks import ModelCheckpoint\n", 28 | "from tensorflow.keras.utils import plot_model\n", 29 | "from tensorflow.python.keras.utils.np_utils import to_categorical\n", 30 | "\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "\n", 33 | "#Custom libraries\n", 34 | "#Data formating library\n", 35 | "from dataloadinglibrary import loadCSV\n", 36 | "\n", 37 | "from datainspectionlibrary import getStatisticsOfData\n", 38 | "\n", 39 | "from dataformatinglibrary import createExcelFromArray\n", 40 | "\n", 41 | "from defineInputs import getLabelName\n", 42 | "from defineInputs import getPathToTrainingAndTestingDataSets\n", 43 | "from defineInputs import modelPerformanceReport\n", 44 | "from defineInputs import defineArrayForPreProcessing\n", 45 | "from defineInputs import getPathToGenerateModels\n", 46 | "\n", 47 | "from util import performPreprocessing" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "### Load the training dataset and check the statistics" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "scrolled": true 62 | }, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "***** Start checking the statistics of the dataSet *****\n", 69 | "\n", 70 | "***** Shape (number of rows and columns) in the dataset: (25191, 42)\n", 71 | "***** Total number of features in the dataset: 41\n", 72 | "***** Number of categorical features in the dataset: 3\n", 73 | "***** Number of numerical features in the dataset: 38\n", 74 | "\n", 75 | "***** Names of categorical features in dataset *****\n", 76 | "\n", 77 | "| Categorical features in dataset |\n", 78 | "|-----------------------------------|\n", 79 | "| Protocol_type |\n", 80 | "| Service |\n", 81 | "| Flag |\n", 82 | "\n", 83 | "\n", 84 | "***** Names of numerical features in dataset *****\n", 85 | "\n", 86 | "| Numerical features in the dataset |\n", 87 | "|-------------------------------------|\n", 88 | "| Duration |\n", 89 | "| Src_bytes |\n", 90 | "| Dst_bytes |\n", 91 | "| Land |\n", 92 | "| Wrong_fragment |\n", 93 | "| Urgent |\n", 94 | "| Hot |\n", 95 | "| Num_failed_logins |\n", 96 | "| Logged_in |\n", 97 | "| Num_compromised |\n", 98 | "| Root_shell |\n", 99 | "| Su_attempted |\n", 100 | "| Num_root |\n", 101 | "| Num_file_creations |\n", 102 | "| Num_shells |\n", 103 | "| Num_access_files |\n", 104 | "| Num_outbound_cmds |\n", 105 | "| Is_hot_login |\n", 106 | "| Is_guest_login |\n", 107 | "| Count |\n", 108 | "| Srv_count |\n", 109 | "| Serror_rate |\n", 110 | "| Srv_serror_rate |\n", 111 | "| Rerror_rate |\n", 112 | "| Srv_rerror_rate |\n", 113 | "| Same_srv_rate |\n", 114 | "| Diff_srv_rate |\n", 115 | "| Srv_diff_host_rate |\n", 116 | "| Dst_host_count |\n", 117 | "| Dst_host_srv_count |\n", 118 | "| Dst_host_same_srv_rate |\n", 119 | "| Dst_host_diff_srv_rate |\n", 120 | "| Dst_host_same_src_port_rate |\n", 121 | "| Dst_host_srv_diff_host_rate |\n", 122 | "| Dst_host_serror_rate |\n", 123 | "| Dst_host_srv_serror_rate |\n", 124 | "| Dst_host_rerror_rate |\n", 125 | "| Dst_host_srv_rerror_rate |\n", 126 | "\n", 127 | "\n", 128 | "***** Are there any missing values in the data set: False\n", 129 | "Total number of records in the dataset: 25191\n", 130 | "Unique records in the dataset: 25191\n", 131 | "\n", 132 | "***** Are there any duplicate records in the data set: False\n", 133 | "\n", 134 | "****** Number of different values for label that are present in the dataset: 22\n", 135 | "\n", 136 | "****** Here is the list of unique label types present in the dataset ***** \n", 137 | "\n", 138 | "| Unique label types in the dataset |\n", 139 | "|-------------------------------------|\n", 140 | "| normal |\n", 141 | "| neptune |\n", 142 | "| warezclient |\n", 143 | "| ipsweep |\n", 144 | "| portsweep |\n", 145 | "| teardrop |\n", 146 | "| nmap |\n", 147 | "| satan |\n", 148 | "| smurf |\n", 149 | "| pod |\n", 150 | "| back |\n", 151 | "| guess_passwd |\n", 152 | "| ftp_write |\n", 153 | "| multihop |\n", 154 | "| rootkit |\n", 155 | "| buffer_overflow |\n", 156 | "| imap |\n", 157 | "| warezmaster |\n", 158 | "| phf |\n", 159 | "| land |\n", 160 | "| loadmodule |\n", 161 | "| spy |\n", 162 | "\n", 163 | "\n", 164 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n", 165 | "\n", 166 | "\n", 167 | "attack_type: 22 \n", 168 | "| distinct values |\n", 169 | "|-------------------|\n", 170 | "| normal |\n", 171 | "| neptune |\n", 172 | "| warezclient |\n", 173 | "| ipsweep |\n", 174 | "| portsweep |\n", 175 | "| teardrop |\n", 176 | "| nmap |\n", 177 | "| satan |\n", 178 | "| smurf |\n", 179 | "| pod |\n", 180 | "| back |\n", 181 | "| guess_passwd |\n", 182 | "| ftp_write |\n", 183 | "| multihop |\n", 184 | "| rootkit |\n", 185 | "| buffer_overflow |\n", 186 | "| imap |\n", 187 | "| warezmaster |\n", 188 | "| phf |\n", 189 | "| land |\n", 190 | "| loadmodule |\n", 191 | "| spy |\n", 192 | "\n", 193 | "\n", 194 | "Protocol_type: 3 \n", 195 | "| distinct values |\n", 196 | "|-------------------|\n", 197 | "| udp |\n", 198 | "| tcp |\n", 199 | "| icmp |\n", 200 | "\n", 201 | "\n", 202 | "Service: 66 \n", 203 | "| distinct values |\n", 204 | "|-------------------|\n", 205 | "| other |\n", 206 | "| private |\n", 207 | "| http |\n", 208 | "| remote_job |\n", 209 | "| ftp_data |\n", 210 | "| name |\n", 211 | "| netbios_ns |\n", 212 | "| eco_i |\n", 213 | "| mtp |\n", 214 | "| telnet |\n", 215 | "| finger |\n", 216 | "| domain_u |\n", 217 | "| supdup |\n", 218 | "| uucp_path |\n", 219 | "| Z39_50 |\n", 220 | "| smtp |\n", 221 | "| csnet_ns |\n", 222 | "| uucp |\n", 223 | "| netbios_dgm |\n", 224 | "| urp_i |\n", 225 | "| auth |\n", 226 | "| domain |\n", 227 | "| ftp |\n", 228 | "| bgp |\n", 229 | "| ldap |\n", 230 | "| ecr_i |\n", 231 | "| gopher |\n", 232 | "| vmnet |\n", 233 | "| systat |\n", 234 | "| http_443 |\n", 235 | "| efs |\n", 236 | "| whois |\n", 237 | "| imap4 |\n", 238 | "| iso_tsap |\n", 239 | "| echo |\n", 240 | "| klogin |\n", 241 | "| link |\n", 242 | "| sunrpc |\n", 243 | "| login |\n", 244 | "| kshell |\n", 245 | "| sql_net |\n", 246 | "| time |\n", 247 | "| hostnames |\n", 248 | "| exec |\n", 249 | "| ntp_u |\n", 250 | "| discard |\n", 251 | "| nntp |\n", 252 | "| courier |\n", 253 | "| ctf |\n", 254 | "| ssh |\n", 255 | "| daytime |\n", 256 | "| shell |\n", 257 | "| netstat |\n", 258 | "| pop_3 |\n", 259 | "| nnsp |\n", 260 | "| IRC |\n", 261 | "| pop_2 |\n", 262 | "| printer |\n", 263 | "| tim_i |\n", 264 | "| pm_dump |\n", 265 | "| red_i |\n", 266 | "| netbios_ssn |\n", 267 | "| rje |\n", 268 | "| X11 |\n", 269 | "| urh_i |\n", 270 | "| http_8001 |\n", 271 | "\n", 272 | "\n", 273 | "Flag: 11 \n", 274 | "| distinct values |\n", 275 | "|-------------------|\n", 276 | "| SF |\n", 277 | "| S0 |\n", 278 | "| REJ |\n", 279 | "| RSTR |\n", 280 | "| SH |\n", 281 | "| RSTO |\n", 282 | "| S1 |\n", 283 | "| RSTOS0 |\n", 284 | "| S3 |\n", 285 | "| S2 |\n", 286 | "| OTH |\n", 287 | "\n", 288 | "\n", 289 | "****** Label distribution in the dataset *****\n", 290 | "\n", 291 | "normal 13448\n", 292 | "neptune 8282\n", 293 | "ipsweep 710\n", 294 | "satan 691\n", 295 | "portsweep 587\n", 296 | "smurf 529\n", 297 | "nmap 301\n", 298 | "back 196\n", 299 | "teardrop 188\n", 300 | "warezclient 181\n", 301 | "pod 38\n", 302 | "guess_passwd 10\n", 303 | "warezmaster 7\n", 304 | "buffer_overflow 6\n", 305 | "imap 5\n", 306 | "rootkit 4\n", 307 | "multihop 2\n", 308 | "phf 2\n", 309 | "loadmodule 1\n", 310 | "ftp_write 1\n", 311 | "land 1\n", 312 | "spy 1\n", 313 | "Name: attack_type, dtype: int64\n", 314 | "\n", 315 | "\n", 316 | "***** End checking the statistics of the dataSet *****\n", 317 | "\n", 318 | "***** Here is how to training dataset looks like before performing any pre-processing *****\n" 319 | ] 320 | }, 321 | { 322 | "data": { 323 | "text/html": [ 324 | "
\n", 325 | "\n", 338 | "\n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | "
DurationProtocol_typeServiceFlagSrc_bytesDst_bytesLandWrong_fragmentUrgentHot...Dst_host_srv_countDst_host_same_srv_rateDst_host_diff_srv_rateDst_host_same_src_port_rateDst_host_srv_diff_host_rateDst_host_serror_rateDst_host_srv_serror_rateDst_host_rerror_rateDst_host_srv_rerror_rateattack_type
00udpotherSF14600000...10.000.600.880.000.000.000.00.00normal
10tcpprivateS0000000...260.100.050.000.001.001.000.00.00neptune
20tcphttpSF23281530000...2551.000.000.030.040.030.010.00.01normal
30tcphttpSF1994200000...2551.000.000.000.000.000.000.00.00normal
40tcpprivateREJ000000...190.070.070.000.000.000.001.01.00neptune
\n", 488 | "

5 rows × 42 columns

\n", 489 | "
" 490 | ], 491 | "text/plain": [ 492 | " Duration Protocol_type Service Flag Src_bytes Dst_bytes Land \\\n", 493 | "0 0 udp other SF 146 0 0 \n", 494 | "1 0 tcp private S0 0 0 0 \n", 495 | "2 0 tcp http SF 232 8153 0 \n", 496 | "3 0 tcp http SF 199 420 0 \n", 497 | "4 0 tcp private REJ 0 0 0 \n", 498 | "\n", 499 | " Wrong_fragment Urgent Hot ... Dst_host_srv_count \\\n", 500 | "0 0 0 0 ... 1 \n", 501 | "1 0 0 0 ... 26 \n", 502 | "2 0 0 0 ... 255 \n", 503 | "3 0 0 0 ... 255 \n", 504 | "4 0 0 0 ... 19 \n", 505 | "\n", 506 | " Dst_host_same_srv_rate Dst_host_diff_srv_rate \\\n", 507 | "0 0.00 0.60 \n", 508 | "1 0.10 0.05 \n", 509 | "2 1.00 0.00 \n", 510 | "3 1.00 0.00 \n", 511 | "4 0.07 0.07 \n", 512 | "\n", 513 | " Dst_host_same_src_port_rate Dst_host_srv_diff_host_rate \\\n", 514 | "0 0.88 0.00 \n", 515 | "1 0.00 0.00 \n", 516 | "2 0.03 0.04 \n", 517 | "3 0.00 0.00 \n", 518 | "4 0.00 0.00 \n", 519 | "\n", 520 | " Dst_host_serror_rate Dst_host_srv_serror_rate Dst_host_rerror_rate \\\n", 521 | "0 0.00 0.00 0.0 \n", 522 | "1 1.00 1.00 0.0 \n", 523 | "2 0.03 0.01 0.0 \n", 524 | "3 0.00 0.00 0.0 \n", 525 | "4 0.00 0.00 1.0 \n", 526 | "\n", 527 | " Dst_host_srv_rerror_rate attack_type \n", 528 | "0 0.00 normal \n", 529 | "1 0.00 neptune \n", 530 | "2 0.01 normal \n", 531 | "3 0.00 normal \n", 532 | "4 1.00 neptune \n", 533 | "\n", 534 | "[5 rows x 42 columns]" 535 | ] 536 | }, 537 | "execution_count": 2, 538 | "metadata": {}, 539 | "output_type": "execute_result" 540 | } 541 | ], 542 | "source": [ 543 | "#Define file names and call loadCSV to load the CSV files\n", 544 | "trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()\n", 545 | "trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)\n", 546 | "difficultyLevel = trainingDataSet.pop('difficulty_level')\n", 547 | "labelName = getLabelName()\n", 548 | "label = trainingDataSet[labelName]\n", 549 | "\n", 550 | "#Look at the statistics of the dataSet\n", 551 | "getStatisticsOfData(trainingDataSet)\n", 552 | "print(\"\\n***** Here is how to training dataset looks like before performing any pre-processing *****\")\n", 553 | "trainingDataSet.head()" 554 | ] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "metadata": {}, 559 | "source": [ 560 | "### Load the testing dataset and check the statistics" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 3, 566 | "metadata": {}, 567 | "outputs": [ 568 | { 569 | "name": "stdout", 570 | "output_type": "stream", 571 | "text": [ 572 | "***** Start checking the statistics of the dataSet *****\n", 573 | "\n", 574 | "***** Shape (number of rows and columns) in the dataset: (11850, 42)\n", 575 | "***** Total number of features in the dataset: 41\n", 576 | "***** Number of categorical features in the dataset: 3\n", 577 | "***** Number of numerical features in the dataset: 38\n", 578 | "\n", 579 | "***** Names of categorical features in dataset *****\n", 580 | "\n", 581 | "| Categorical features in dataset |\n", 582 | "|-----------------------------------|\n", 583 | "| Protocol_type |\n", 584 | "| Service |\n", 585 | "| Flag |\n", 586 | "\n", 587 | "\n", 588 | "***** Names of numerical features in dataset *****\n", 589 | "\n", 590 | "| Numerical features in the dataset |\n", 591 | "|-------------------------------------|\n", 592 | "| Duration |\n", 593 | "| Src_bytes |\n", 594 | "| Dst_bytes |\n", 595 | "| Land |\n", 596 | "| Wrong_fragment |\n", 597 | "| Urgent |\n", 598 | "| Hot |\n", 599 | "| Num_failed_logins |\n", 600 | "| Logged_in |\n", 601 | "| Num_compromised |\n", 602 | "| Root_shell |\n", 603 | "| Su_attempted |\n", 604 | "| Num_root |\n", 605 | "| Num_file_creations |\n", 606 | "| Num_shells |\n", 607 | "| Num_access_files |\n", 608 | "| Num_outbound_cmds |\n", 609 | "| Is_hot_login |\n", 610 | "| Is_guest_login |\n", 611 | "| Count |\n", 612 | "| Srv_count |\n", 613 | "| Serror_rate |\n", 614 | "| Srv_serror_rate |\n", 615 | "| Rerror_rate |\n", 616 | "| Srv_rerror_rate |\n", 617 | "| Same_srv_rate |\n", 618 | "| Diff_srv_rate |\n", 619 | "| Srv_diff_host_rate |\n", 620 | "| Dst_host_count |\n", 621 | "| Dst_host_srv_count |\n", 622 | "| Dst_host_same_srv_rate |\n", 623 | "| Dst_host_diff_srv_rate |\n", 624 | "| Dst_host_same_src_port_rate |\n", 625 | "| Dst_host_srv_diff_host_rate |\n", 626 | "| Dst_host_serror_rate |\n", 627 | "| Dst_host_srv_serror_rate |\n", 628 | "| Dst_host_rerror_rate |\n", 629 | "| Dst_host_srv_rerror_rate |\n", 630 | "\n", 631 | "\n", 632 | "***** Are there any missing values in the data set: False\n", 633 | "Total number of records in the dataset: 11850\n", 634 | "Unique records in the dataset: 11850\n", 635 | "\n", 636 | "***** Are there any duplicate records in the data set: False\n", 637 | "\n", 638 | "****** Number of different values for label that are present in the dataset: 38\n", 639 | "\n", 640 | "****** Here is the list of unique label types present in the dataset ***** \n", 641 | "\n", 642 | "| Unique label types in the dataset |\n", 643 | "|-------------------------------------|\n", 644 | "| guess_passwd |\n", 645 | "| snmpguess |\n", 646 | "| processtable |\n", 647 | "| normal |\n", 648 | "| nmap |\n", 649 | "| back |\n", 650 | "| neptune |\n", 651 | "| satan |\n", 652 | "| saint |\n", 653 | "| mscan |\n", 654 | "| apache2 |\n", 655 | "| httptunnel |\n", 656 | "| warezmaster |\n", 657 | "| ipsweep |\n", 658 | "| smurf |\n", 659 | "| mailbomb |\n", 660 | "| teardrop |\n", 661 | "| portsweep |\n", 662 | "| snmpgetattack |\n", 663 | "| multihop |\n", 664 | "| worm |\n", 665 | "| land |\n", 666 | "| sendmail |\n", 667 | "| buffer_overflow |\n", 668 | "| pod |\n", 669 | "| rootkit |\n", 670 | "| xlock |\n", 671 | "| xterm |\n", 672 | "| xsnoop |\n", 673 | "| ps |\n", 674 | "| named |\n", 675 | "| ftp_write |\n", 676 | "| loadmodule |\n", 677 | "| phf |\n", 678 | "| udpstorm |\n", 679 | "| perl |\n", 680 | "| sqlattack |\n", 681 | "| imap |\n", 682 | "\n", 683 | "\n", 684 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n", 685 | "\n", 686 | "\n", 687 | "attack_type: 38 \n", 688 | "| distinct values |\n", 689 | "|-------------------|\n", 690 | "| guess_passwd |\n", 691 | "| snmpguess |\n", 692 | "| processtable |\n", 693 | "| normal |\n", 694 | "| nmap |\n", 695 | "| back |\n", 696 | "| neptune |\n", 697 | "| satan |\n", 698 | "| saint |\n", 699 | "| mscan |\n", 700 | "| apache2 |\n", 701 | "| httptunnel |\n", 702 | "| warezmaster |\n", 703 | "| ipsweep |\n", 704 | "| smurf |\n", 705 | "| mailbomb |\n", 706 | "| teardrop |\n", 707 | "| portsweep |\n", 708 | "| snmpgetattack |\n", 709 | "| multihop |\n", 710 | "| worm |\n", 711 | "| land |\n", 712 | "| sendmail |\n", 713 | "| buffer_overflow |\n", 714 | "| pod |\n", 715 | "| rootkit |\n", 716 | "| xlock |\n", 717 | "| xterm |\n", 718 | "| xsnoop |\n", 719 | "| ps |\n", 720 | "| named |\n", 721 | "| ftp_write |\n", 722 | "| loadmodule |\n", 723 | "| phf |\n", 724 | "| udpstorm |\n", 725 | "| perl |\n", 726 | "| sqlattack |\n", 727 | "| imap |\n", 728 | "\n", 729 | "\n", 730 | "Protocol_type: 3 \n", 731 | "| distinct values |\n", 732 | "|-------------------|\n", 733 | "| tcp |\n", 734 | "| udp |\n", 735 | "| icmp |\n", 736 | "\n", 737 | "\n", 738 | "Service: 62 \n", 739 | "| distinct values |\n", 740 | "|-------------------|\n", 741 | "| telnet |\n", 742 | "| private |\n", 743 | "| http |\n", 744 | "| imap4 |\n", 745 | "| ftp_data |\n", 746 | "| other |\n", 747 | "| ctf |\n", 748 | "| pop_3 |\n", 749 | "| ftp |\n", 750 | "| domain_u |\n", 751 | "| domain |\n", 752 | "| eco_i |\n", 753 | "| ecr_i |\n", 754 | "| finger |\n", 755 | "| name |\n", 756 | "| smtp |\n", 757 | "| vmnet |\n", 758 | "| mtp |\n", 759 | "| bgp |\n", 760 | "| exec |\n", 761 | "| sunrpc |\n", 762 | "| uucp_path |\n", 763 | "| iso_tsap |\n", 764 | "| echo |\n", 765 | "| auth |\n", 766 | "| hostnames |\n", 767 | "| courier |\n", 768 | "| uucp |\n", 769 | "| daytime |\n", 770 | "| nntp |\n", 771 | "| netstat |\n", 772 | "| urp_i |\n", 773 | "| http_443 |\n", 774 | "| csnet_ns |\n", 775 | "| login |\n", 776 | "| klogin |\n", 777 | "| whois |\n", 778 | "| time |\n", 779 | "| link |\n", 780 | "| discard |\n", 781 | "| gopher |\n", 782 | "| supdup |\n", 783 | "| netbios_ns |\n", 784 | "| systat |\n", 785 | "| netbios_dgm |\n", 786 | "| kshell |\n", 787 | "| efs |\n", 788 | "| nnsp |\n", 789 | "| ssh |\n", 790 | "| netbios_ssn |\n", 791 | "| Z39_50 |\n", 792 | "| IRC |\n", 793 | "| ntp_u |\n", 794 | "| X11 |\n", 795 | "| pm_dump |\n", 796 | "| ldap |\n", 797 | "| remote_job |\n", 798 | "| sql_net |\n", 799 | "| shell |\n", 800 | "| tim_i |\n", 801 | "| pop_2 |\n", 802 | "| tftp_u |\n", 803 | "\n", 804 | "\n", 805 | "Flag: 11 \n", 806 | "| distinct values |\n", 807 | "|-------------------|\n", 808 | "| SF |\n", 809 | "| S3 |\n", 810 | "| SH |\n", 811 | "| REJ |\n", 812 | "| S0 |\n", 813 | "| RSTO |\n", 814 | "| RSTR |\n", 815 | "| RSTOS0 |\n", 816 | "| S1 |\n", 817 | "| S2 |\n", 818 | "| OTH |\n", 819 | "\n", 820 | "\n", 821 | "****** Label distribution in the dataset *****\n", 822 | "\n", 823 | "normal 2152\n", 824 | "neptune 1579\n", 825 | "guess_passwd 1231\n", 826 | "mscan 996\n", 827 | "warezmaster 944\n", 828 | "apache2 737\n", 829 | "satan 727\n", 830 | "processtable 685\n", 831 | "smurf 627\n", 832 | "back 359\n", 833 | "snmpguess 331\n", 834 | "saint 309\n", 835 | "mailbomb 293\n", 836 | "snmpgetattack 178\n", 837 | "portsweep 156\n", 838 | "ipsweep 141\n", 839 | "httptunnel 133\n", 840 | "nmap 73\n", 841 | "pod 41\n", 842 | "buffer_overflow 20\n", 843 | "multihop 18\n", 844 | "named 17\n", 845 | "ps 15\n", 846 | "sendmail 14\n", 847 | "rootkit 13\n", 848 | "xterm 13\n", 849 | "teardrop 12\n", 850 | "xlock 9\n", 851 | "land 7\n", 852 | "xsnoop 4\n", 853 | "ftp_write 3\n", 854 | "worm 2\n", 855 | "perl 2\n", 856 | "phf 2\n", 857 | "loadmodule 2\n", 858 | "sqlattack 2\n", 859 | "udpstorm 2\n", 860 | "imap 1\n", 861 | "Name: attack_type, dtype: int64\n", 862 | "\n", 863 | "\n", 864 | "***** End checking the statistics of the dataSet *****\n", 865 | "\n", 866 | "***** Here is how to testing dataset looks like before performing any pre-processing *****\n" 867 | ] 868 | }, 869 | { 870 | "data": { 871 | "text/html": [ 872 | "
\n", 873 | "\n", 886 | "\n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | "
DurationProtocol_typeServiceFlagSrc_bytesDst_bytesLandWrong_fragmentUrgentHot...Dst_host_srv_countDst_host_same_srv_rateDst_host_diff_srv_rateDst_host_same_src_port_rateDst_host_srv_diff_host_rateDst_host_serror_rateDst_host_srv_serror_rateDst_host_rerror_rateDst_host_srv_rerror_rateattack_type
013tcptelnetSF11824250000...100.380.120.040.00.000.000.120.3guess_passwd
10udpprivateSF4400000...2541.000.010.010.00.000.000.000.0snmpguess
20tcptelnetS30440000...790.310.610.000.00.210.680.600.0processtable
30udpprivateSF53550000...2551.000.000.870.00.000.000.000.0normal
40tcpprivateSH000000...10.061.001.000.01.001.000.000.0nmap
\n", 1036 | "

5 rows × 42 columns

\n", 1037 | "
" 1038 | ], 1039 | "text/plain": [ 1040 | " Duration Protocol_type Service Flag Src_bytes Dst_bytes Land \\\n", 1041 | "0 13 tcp telnet SF 118 2425 0 \n", 1042 | "1 0 udp private SF 44 0 0 \n", 1043 | "2 0 tcp telnet S3 0 44 0 \n", 1044 | "3 0 udp private SF 53 55 0 \n", 1045 | "4 0 tcp private SH 0 0 0 \n", 1046 | "\n", 1047 | " Wrong_fragment Urgent Hot ... Dst_host_srv_count \\\n", 1048 | "0 0 0 0 ... 10 \n", 1049 | "1 0 0 0 ... 254 \n", 1050 | "2 0 0 0 ... 79 \n", 1051 | "3 0 0 0 ... 255 \n", 1052 | "4 0 0 0 ... 1 \n", 1053 | "\n", 1054 | " Dst_host_same_srv_rate Dst_host_diff_srv_rate \\\n", 1055 | "0 0.38 0.12 \n", 1056 | "1 1.00 0.01 \n", 1057 | "2 0.31 0.61 \n", 1058 | "3 1.00 0.00 \n", 1059 | "4 0.06 1.00 \n", 1060 | "\n", 1061 | " Dst_host_same_src_port_rate Dst_host_srv_diff_host_rate \\\n", 1062 | "0 0.04 0.0 \n", 1063 | "1 0.01 0.0 \n", 1064 | "2 0.00 0.0 \n", 1065 | "3 0.87 0.0 \n", 1066 | "4 1.00 0.0 \n", 1067 | "\n", 1068 | " Dst_host_serror_rate Dst_host_srv_serror_rate Dst_host_rerror_rate \\\n", 1069 | "0 0.00 0.00 0.12 \n", 1070 | "1 0.00 0.00 0.00 \n", 1071 | "2 0.21 0.68 0.60 \n", 1072 | "3 0.00 0.00 0.00 \n", 1073 | "4 1.00 1.00 0.00 \n", 1074 | "\n", 1075 | " Dst_host_srv_rerror_rate attack_type \n", 1076 | "0 0.3 guess_passwd \n", 1077 | "1 0.0 snmpguess \n", 1078 | "2 0.0 processtable \n", 1079 | "3 0.0 normal \n", 1080 | "4 0.0 nmap \n", 1081 | "\n", 1082 | "[5 rows x 42 columns]" 1083 | ] 1084 | }, 1085 | "execution_count": 3, 1086 | "metadata": {}, 1087 | "output_type": "execute_result" 1088 | } 1089 | ], 1090 | "source": [ 1091 | "#Define file names and call loadCSV to load the CSV files\n", 1092 | "testingDataSet = loadCSV(testingFileNameWithAbsolutePath)\n", 1093 | "difficultyLevel = testingDataSet.pop('difficulty_level')\n", 1094 | "\n", 1095 | "#Look at the statistics of the dataSet\n", 1096 | "getStatisticsOfData(testingDataSet)\n", 1097 | "print(\"\\n***** Here is how to testing dataset looks like before performing any pre-processing *****\")\n", 1098 | "testingDataSet.head()" 1099 | ] 1100 | }, 1101 | { 1102 | "cell_type": "markdown", 1103 | "metadata": {}, 1104 | "source": [ 1105 | "### Perform pre-processing" 1106 | ] 1107 | }, 1108 | { 1109 | "cell_type": "code", 1110 | "execution_count": 4, 1111 | "metadata": { 1112 | "scrolled": true 1113 | }, 1114 | "outputs": [ 1115 | { 1116 | "name": "stdout", 1117 | "output_type": "stream", 1118 | "text": [ 1119 | "[['ExtraTreesClassifier', 'OneHotEncoder', 'Standardization']]\n", 1120 | "***************************************************************************************************************************\n", 1121 | "********************************************* Building Model- 0 As Below *************************************************\n", 1122 | "\t -- Feature Selection: \t ExtraTreesClassifier \n", 1123 | "\t -- Feature Encoding: \t OneHotEncoder \n", 1124 | "\t -- Feature Scaling: \t Standardization \n", 1125 | "\n", 1126 | "completeDataSet.shape: (37041, 43)\n", 1127 | "completeDataSet.head: Duration Protocol_type Service Flag Src_bytes Dst_bytes Land \\\n", 1128 | "0 0 udp other SF 146 0 0 \n", 1129 | "1 0 tcp private S0 0 0 0 \n", 1130 | "2 0 tcp http SF 232 8153 0 \n", 1131 | "3 0 tcp http SF 199 420 0 \n", 1132 | "4 0 tcp private REJ 0 0 0 \n", 1133 | "\n", 1134 | " Wrong_fragment Urgent Hot ... Dst_host_same_srv_rate \\\n", 1135 | "0 0 0 0 ... 0.00 \n", 1136 | "1 0 0 0 ... 0.10 \n", 1137 | "2 0 0 0 ... 1.00 \n", 1138 | "3 0 0 0 ... 1.00 \n", 1139 | "4 0 0 0 ... 0.07 \n", 1140 | "\n", 1141 | " Dst_host_diff_srv_rate Dst_host_same_src_port_rate \\\n", 1142 | "0 0.60 0.88 \n", 1143 | "1 0.05 0.00 \n", 1144 | "2 0.00 0.03 \n", 1145 | "3 0.00 0.00 \n", 1146 | "4 0.07 0.00 \n", 1147 | "\n", 1148 | " Dst_host_srv_diff_host_rate Dst_host_serror_rate \\\n", 1149 | "0 0.00 0.00 \n", 1150 | "1 0.00 1.00 \n", 1151 | "2 0.04 0.03 \n", 1152 | "3 0.00 0.00 \n", 1153 | "4 0.00 0.00 \n", 1154 | "\n", 1155 | " Dst_host_srv_serror_rate Dst_host_rerror_rate Dst_host_srv_rerror_rate \\\n", 1156 | "0 0.00 0.0 0.00 \n", 1157 | "1 1.00 0.0 0.00 \n", 1158 | "2 0.01 0.0 0.01 \n", 1159 | "3 0.00 0.0 0.00 \n", 1160 | "4 0.00 1.0 1.00 \n", 1161 | "\n", 1162 | " attack_type difficulty_level \n", 1163 | "0 normal 15 \n", 1164 | "1 neptune 19 \n", 1165 | "2 normal 21 \n", 1166 | "3 normal 21 \n", 1167 | "4 neptune 21 \n", 1168 | "\n", 1169 | "[5 rows x 43 columns]\n", 1170 | "\n", 1171 | "****** Start performing feature selection using ExtraTreesClassifier *****\n", 1172 | "****** Falls under wrapper methods (feature importance) *****\n", 1173 | "****** Start label encoding on the categorical features in the given dataset *****\n", 1174 | "****** Number of features before label encoding: 43\n", 1175 | "****** Number of categorical features in the dataset: 3\n", 1176 | "****** Categorical feature names in the dataset: ['Protocol_type' 'Service' 'Flag']\n", 1177 | "\n", 1178 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n", 1179 | "\n", 1180 | "\n", 1181 | "Protocol_type: 3 \n", 1182 | "| distinct values |\n", 1183 | "|-------------------|\n", 1184 | "| udp |\n", 1185 | "| tcp |\n", 1186 | "| icmp |\n", 1187 | "\n", 1188 | "\n", 1189 | "Service: 67 \n", 1190 | "| distinct values |\n", 1191 | "|-------------------|\n", 1192 | "| other |\n", 1193 | "| private |\n", 1194 | "| http |\n", 1195 | "| remote_job |\n", 1196 | "| ftp_data |\n", 1197 | "| name |\n", 1198 | "| netbios_ns |\n", 1199 | "| eco_i |\n", 1200 | "| mtp |\n", 1201 | "| telnet |\n", 1202 | "| finger |\n", 1203 | "| domain_u |\n", 1204 | "| supdup |\n", 1205 | "| uucp_path |\n", 1206 | "| Z39_50 |\n", 1207 | "| smtp |\n", 1208 | "| csnet_ns |\n", 1209 | "| uucp |\n", 1210 | "| netbios_dgm |\n", 1211 | "| urp_i |\n", 1212 | "| auth |\n", 1213 | "| domain |\n", 1214 | "| ftp |\n", 1215 | "| bgp |\n", 1216 | "| ldap |\n", 1217 | "| ecr_i |\n", 1218 | "| gopher |\n", 1219 | "| vmnet |\n", 1220 | "| systat |\n", 1221 | "| http_443 |\n", 1222 | "| efs |\n", 1223 | "| whois |\n", 1224 | "| imap4 |\n", 1225 | "| iso_tsap |\n", 1226 | "| echo |\n", 1227 | "| klogin |\n", 1228 | "| link |\n", 1229 | "| sunrpc |\n", 1230 | "| login |\n", 1231 | "| kshell |\n", 1232 | "| sql_net |\n", 1233 | "| time |\n", 1234 | "| hostnames |\n", 1235 | "| exec |\n", 1236 | "| ntp_u |\n", 1237 | "| discard |\n", 1238 | "| nntp |\n", 1239 | "| courier |\n", 1240 | "| ctf |\n", 1241 | "| ssh |\n", 1242 | "| daytime |\n", 1243 | "| shell |\n", 1244 | "| netstat |\n", 1245 | "| pop_3 |\n", 1246 | "| nnsp |\n", 1247 | "| IRC |\n", 1248 | "| pop_2 |\n", 1249 | "| printer |\n", 1250 | "| tim_i |\n", 1251 | "| pm_dump |\n", 1252 | "| red_i |\n", 1253 | "| netbios_ssn |\n", 1254 | "| rje |\n", 1255 | "| X11 |\n", 1256 | "| urh_i |\n", 1257 | "| http_8001 |\n", 1258 | "| tftp_u |\n", 1259 | "\n", 1260 | "\n", 1261 | "Flag: 11 \n", 1262 | "| distinct values |\n", 1263 | "|-------------------|\n", 1264 | "| SF |\n", 1265 | "| S0 |\n", 1266 | "| REJ |\n", 1267 | "| RSTR |\n", 1268 | "| SH |\n", 1269 | "| RSTO |\n", 1270 | "| S1 |\n", 1271 | "| RSTOS0 |\n", 1272 | "| S3 |\n", 1273 | "| S2 |\n", 1274 | "| OTH |\n", 1275 | "\n", 1276 | "****** Number of features after label encoding: 43\n", 1277 | "****** End label encoding on the categorical features in the given dataset *****\n", 1278 | "\n", 1279 | "****** ExtraTreesClassification is in progress *****\n", 1280 | "\n", 1281 | " selectedFeatures after ExtraTreesClassification: difficulty_level 0.076128\n", 1282 | "Same_srv_rate 0.071428\n", 1283 | "Dst_host_srv_serror_rate 0.049446\n", 1284 | "Service 0.046810\n", 1285 | "Dst_host_serror_rate 0.046286\n", 1286 | "Flag 0.044061\n", 1287 | "Dst_host_same_srv_rate 0.043586\n", 1288 | "Serror_rate 0.042794\n", 1289 | "Protocol_type 0.041901\n", 1290 | "Dst_host_srv_count 0.041828\n", 1291 | "Srv_serror_rate 0.040107\n", 1292 | "Dst_host_same_src_port_rate 0.037406\n", 1293 | "Count 0.036696\n", 1294 | "Logged_in 0.035569\n", 1295 | "Dst_host_rerror_rate 0.030801\n", 1296 | "Dst_host_diff_srv_rate 0.029853\n", 1297 | "Src_bytes 0.028388\n", 1298 | "Diff_srv_rate 0.027244\n", 1299 | "Dst_host_count 0.027063\n", 1300 | "Rerror_rate 0.024310\n", 1301 | "dtype: float64\n", 1302 | "****** Completed ExtraTreesClassification *****\n", 1303 | "\n", 1304 | "***** Number of columns in the dataSet after feature selection: 21\n", 1305 | "***** Columns in the dataSet after feature selection: \n", 1306 | " Index(['Protocol_type', 'Service', 'Flag', 'Src_bytes', 'Logged_in', 'Count',\n", 1307 | " 'Serror_rate', 'Srv_serror_rate', 'Rerror_rate', 'Same_srv_rate',\n", 1308 | " 'Diff_srv_rate', 'Dst_host_count', 'Dst_host_srv_count',\n", 1309 | " 'Dst_host_same_srv_rate', 'Dst_host_diff_srv_rate',\n", 1310 | " 'Dst_host_same_src_port_rate', 'Dst_host_serror_rate',\n", 1311 | " 'Dst_host_srv_serror_rate', 'Dst_host_rerror_rate', 'difficulty_level',\n", 1312 | " 'attack_type'],\n", 1313 | " dtype='object')\n", 1314 | "****** End performing feature selection using ExtraTreesClassifier *****\n", 1315 | "****** Start one hot encoding on the categorical features in the given dataset *****\n", 1316 | "****** Number of features before one hot encoding: 21\n", 1317 | "****** Number of categorical features in the dataset: 0\n", 1318 | "****** Categorical feature names in the dataset: []\n", 1319 | "\n", 1320 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n", 1321 | "\n", 1322 | "\n", 1323 | "attack_type: 40 \n", 1324 | "| distinct values |\n", 1325 | "|-------------------|\n", 1326 | "| normal |\n", 1327 | "| neptune |\n", 1328 | "| warezclient |\n", 1329 | "| ipsweep |\n", 1330 | "| portsweep |\n", 1331 | "| teardrop |\n", 1332 | "| nmap |\n", 1333 | "| satan |\n", 1334 | "| smurf |\n", 1335 | "| pod |\n", 1336 | "| back |\n", 1337 | "| guess_passwd |\n", 1338 | "| ftp_write |\n", 1339 | "| multihop |\n", 1340 | "| rootkit |\n", 1341 | "| buffer_overflow |\n", 1342 | "| imap |\n", 1343 | "| warezmaster |\n", 1344 | "| phf |\n", 1345 | "| land |\n", 1346 | "| loadmodule |\n", 1347 | "| spy |\n", 1348 | "| snmpguess |\n", 1349 | "| processtable |\n", 1350 | "| saint |\n", 1351 | "| mscan |\n", 1352 | "| apache2 |\n", 1353 | "| httptunnel |\n", 1354 | "| mailbomb |\n", 1355 | "| snmpgetattack |\n", 1356 | "| worm |\n", 1357 | "| sendmail |\n", 1358 | "| xlock |\n", 1359 | "| xterm |\n", 1360 | "| xsnoop |\n", 1361 | "| ps |\n", 1362 | "| named |\n", 1363 | "| udpstorm |\n", 1364 | "| perl |\n", 1365 | "| sqlattack |\n", 1366 | "\n", 1367 | "****** Number of features after one hot encoding: 21\n", 1368 | "****** End one hot encoding on the categorical features in the given dataset *****\n", 1369 | "\n", 1370 | "****** Start feature scaling of the features present in the dataset using StandardScalar *****\n", 1371 | "[[2 41 9 ... 0.0 15 'normal']\n", 1372 | " [1 46 5 ... 0.0 19 'neptune']\n", 1373 | " [1 22 9 ... 0.0 21 'normal']\n", 1374 | " ...\n", 1375 | " [1 57 2 ... 0.85 13 'mscan']\n", 1376 | " [1 54 1 ... 0.88 15 'mscan']\n", 1377 | " [2 46 9 ... 0.0 17 'snmpguess']]\n", 1378 | "\n", 1379 | "****** Number of features in the dataset before performing scaling: 20\n", 1380 | "\n", 1381 | "****** Features in the dataset before performing scaling ***** \n", 1382 | " [[2 41 9 ... 0.0 0.0 15]\n", 1383 | " [1 46 5 ... 1.0 0.0 19]\n", 1384 | " [1 22 9 ... 0.01 0.0 21]\n", 1385 | " ...\n", 1386 | " [1 57 2 ... 0.08 0.85 13]\n", 1387 | " [1 54 1 ... 0.0 0.88 15]\n", 1388 | " [2 46 9 ... 0.0 0.0 17]]\n", 1389 | "\n", 1390 | "****** Number of features in the dataset after performing scaling: 20\n", 1391 | "\n", 1392 | "****** Features in the dataset after performing scaling ***** \n", 1393 | " [[ 2.03857058 0.6299765 0.73536923 ... -0.54981386 -0.48776502\n", 1394 | " -0.85380057]\n", 1395 | " [-0.15478617 0.93890397 -0.66099165 ... 1.89967409 -0.48776502\n", 1396 | " 0.22813874]\n", 1397 | " [-0.15478617 -0.54394786 0.73536923 ... -0.52531898 -0.48776502\n", 1398 | " 0.76910839]\n", 1399 | " ...\n", 1400 | " [-0.15478617 1.61854439 -1.70826232 ... -0.35385482 2.03171007\n", 1401 | " -1.39477022]\n", 1402 | " [-0.15478617 1.43318791 -2.05735254 ... -0.54981386 2.12063272\n", 1403 | " -0.85380057]\n", 1404 | " [ 2.03857058 0.93890397 0.73536923 ... -0.54981386 -0.48776502\n", 1405 | " -0.31283092]]\n", 1406 | "scaledFeatures.head(): Protocol_type Service Flag Src_bytes Logged_in Count \\\n", 1407 | "0 2.038571 0.629977 0.735369 -0.011190 -0.732914 -0.581217 \n", 1408 | "1 -0.154786 0.938904 -0.660992 -0.011262 -0.732914 0.275339 \n", 1409 | "2 -0.154786 -0.543948 0.735369 -0.011147 1.364417 -0.643512 \n", 1410 | "3 -0.154786 -0.543948 0.735369 -0.011163 1.364417 -0.448840 \n", 1411 | "4 -0.154786 0.938904 -2.057353 -0.011262 -0.732914 0.259766 \n", 1412 | "\n", 1413 | " Serror_rate Srv_serror_rate Rerror_rate Same_srv_rate ... \\\n", 1414 | "0 -0.556584 -0.552030 -0.460806 -1.421427 ... \n", 1415 | "1 1.851192 1.851769 -0.460806 -1.491319 ... \n", 1416 | "2 -0.075029 -0.071270 -0.460806 0.721924 ... \n", 1417 | "3 -0.556584 -0.552030 -0.460806 0.721924 ... \n", 1418 | "4 -0.556584 -0.552030 2.274941 -1.235049 ... \n", 1419 | "\n", 1420 | " Dst_host_count Dst_host_srv_count Dst_host_same_srv_rate \\\n", 1421 | "0 0.656445 -1.050270 -1.193023 \n", 1422 | "1 0.656445 -0.821669 -0.966271 \n", 1423 | "2 -1.709884 1.272317 1.074493 \n", 1424 | "3 0.656445 1.272317 1.074493 \n", 1425 | "4 0.656445 -0.885678 -1.034297 \n", 1426 | "\n", 1427 | " Dst_host_diff_srv_rate Dst_host_same_src_port_rate Dst_host_serror_rate \\\n", 1428 | "0 2.187298 2.137976 -0.561390 \n", 1429 | "1 -0.237144 -0.498320 1.904034 \n", 1430 | "2 -0.457548 -0.408446 -0.487427 \n", 1431 | "3 -0.457548 -0.498320 -0.561390 \n", 1432 | "4 -0.148983 -0.498320 -0.561390 \n", 1433 | "\n", 1434 | " Dst_host_srv_serror_rate Dst_host_rerror_rate difficulty_level \\\n", 1435 | "0 -0.549814 -0.487765 -0.853801 \n", 1436 | "1 1.899674 -0.487765 0.228139 \n", 1437 | "2 -0.525319 -0.487765 0.769108 \n", 1438 | "3 -0.549814 -0.487765 0.769108 \n", 1439 | "4 -0.549814 2.476323 0.769108 \n", 1440 | "\n", 1441 | " attack_type \n", 1442 | "0 normal \n", 1443 | "1 neptune \n", 1444 | "2 normal \n", 1445 | "3 normal \n", 1446 | "4 neptune \n", 1447 | "\n", 1448 | "[5 rows x 21 columns]\n", 1449 | "scaledFeatures.shape: (37041, 21)\n", 1450 | "\n", 1451 | "****** End of feature scaling of the features present in the dataset using StandardScalar *****\n", 1452 | "\n", 1453 | "features.shape: (37041, 20)\n", 1454 | "label.shape: (37041,)\n" 1455 | ] 1456 | }, 1457 | { 1458 | "data": { 1459 | "text/html": [ 1460 | "
\n", 1461 | "\n", 1474 | "\n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | "
Protocol_typeServiceFlagSrc_bytesLogged_inCountSerror_rateSrv_serror_rateRerror_rateSame_srv_rate...Dst_host_countDst_host_srv_countDst_host_same_srv_rateDst_host_diff_srv_rateDst_host_same_src_port_rateDst_host_serror_rateDst_host_srv_serror_rateDst_host_rerror_ratedifficulty_levelattack_type
02.0385710.6299770.735369-0.011190-0.732914-0.581217-0.556584-0.552030-0.460806-1.421427...0.656445-1.050270-1.1930232.1872982.137976-0.561390-0.549814-0.487765-0.853801normal
1-0.1547860.938904-0.660992-0.011262-0.7329140.2753391.8511921.851769-0.460806-1.491319...0.656445-0.821669-0.966271-0.237144-0.4983201.9040341.899674-0.4877650.228139neptune
2-0.154786-0.5439480.735369-0.0111471.364417-0.643512-0.075029-0.071270-0.4608060.721924...-1.7098841.2723171.074493-0.457548-0.408446-0.487427-0.525319-0.4877650.769108normal
3-0.154786-0.5439480.735369-0.0111631.364417-0.448840-0.556584-0.552030-0.4608060.721924...0.6564451.2723171.074493-0.457548-0.498320-0.561390-0.549814-0.4877650.769108normal
4-0.1547860.938904-2.057353-0.011262-0.7329140.259766-0.556584-0.5520302.274941-1.235049...0.656445-0.885678-1.034297-0.148983-0.498320-0.561390-0.5498142.4763230.769108neptune
\n", 1624 | "

5 rows × 21 columns

\n", 1625 | "
" 1626 | ], 1627 | "text/plain": [ 1628 | " Protocol_type Service Flag Src_bytes Logged_in Count \\\n", 1629 | "0 2.038571 0.629977 0.735369 -0.011190 -0.732914 -0.581217 \n", 1630 | "1 -0.154786 0.938904 -0.660992 -0.011262 -0.732914 0.275339 \n", 1631 | "2 -0.154786 -0.543948 0.735369 -0.011147 1.364417 -0.643512 \n", 1632 | "3 -0.154786 -0.543948 0.735369 -0.011163 1.364417 -0.448840 \n", 1633 | "4 -0.154786 0.938904 -2.057353 -0.011262 -0.732914 0.259766 \n", 1634 | "\n", 1635 | " Serror_rate Srv_serror_rate Rerror_rate Same_srv_rate ... \\\n", 1636 | "0 -0.556584 -0.552030 -0.460806 -1.421427 ... \n", 1637 | "1 1.851192 1.851769 -0.460806 -1.491319 ... \n", 1638 | "2 -0.075029 -0.071270 -0.460806 0.721924 ... \n", 1639 | "3 -0.556584 -0.552030 -0.460806 0.721924 ... \n", 1640 | "4 -0.556584 -0.552030 2.274941 -1.235049 ... \n", 1641 | "\n", 1642 | " Dst_host_count Dst_host_srv_count Dst_host_same_srv_rate \\\n", 1643 | "0 0.656445 -1.050270 -1.193023 \n", 1644 | "1 0.656445 -0.821669 -0.966271 \n", 1645 | "2 -1.709884 1.272317 1.074493 \n", 1646 | "3 0.656445 1.272317 1.074493 \n", 1647 | "4 0.656445 -0.885678 -1.034297 \n", 1648 | "\n", 1649 | " Dst_host_diff_srv_rate Dst_host_same_src_port_rate Dst_host_serror_rate \\\n", 1650 | "0 2.187298 2.137976 -0.561390 \n", 1651 | "1 -0.237144 -0.498320 1.904034 \n", 1652 | "2 -0.457548 -0.408446 -0.487427 \n", 1653 | "3 -0.457548 -0.498320 -0.561390 \n", 1654 | "4 -0.148983 -0.498320 -0.561390 \n", 1655 | "\n", 1656 | " Dst_host_srv_serror_rate Dst_host_rerror_rate difficulty_level \\\n", 1657 | "0 -0.549814 -0.487765 -0.853801 \n", 1658 | "1 1.899674 -0.487765 0.228139 \n", 1659 | "2 -0.525319 -0.487765 0.769108 \n", 1660 | "3 -0.549814 -0.487765 0.769108 \n", 1661 | "4 -0.549814 2.476323 0.769108 \n", 1662 | "\n", 1663 | " attack_type \n", 1664 | "0 normal \n", 1665 | "1 neptune \n", 1666 | "2 normal \n", 1667 | "3 normal \n", 1668 | "4 neptune \n", 1669 | "\n", 1670 | "[5 rows x 21 columns]" 1671 | ] 1672 | }, 1673 | "execution_count": 4, 1674 | "metadata": {}, 1675 | "output_type": "execute_result" 1676 | } 1677 | ], 1678 | "source": [ 1679 | "arrayOfModels = defineArrayForPreProcessing()\n", 1680 | "completeEncodedAndScaledDataset = performPreprocessing(trainingDataSet, testingDataSet, arrayOfModels)\n", 1681 | "completeEncodedAndScaledDataset.head()" 1682 | ] 1683 | }, 1684 | { 1685 | "cell_type": "markdown", 1686 | "metadata": {}, 1687 | "source": [ 1688 | "### After preprocessing, check the shape of the dataset" 1689 | ] 1690 | }, 1691 | { 1692 | "cell_type": "code", 1693 | "execution_count": 5, 1694 | "metadata": {}, 1695 | "outputs": [ 1696 | { 1697 | "name": "stdout", 1698 | "output_type": "stream", 1699 | "text": [ 1700 | "(37041, 20) (37041,)\n", 1701 | "Number of unique values in label: 40\n", 1702 | "Unique values in label: ['apache2' 'back' 'buffer_overflow' 'ftp_write' 'guess_passwd'\n", 1703 | " 'httptunnel' 'imap' 'ipsweep' 'land' 'loadmodule' 'mailbomb' 'mscan'\n", 1704 | " 'multihop' 'named' 'neptune' 'nmap' 'normal' 'perl' 'phf' 'pod'\n", 1705 | " 'portsweep' 'processtable' 'ps' 'rootkit' 'saint' 'satan' 'sendmail'\n", 1706 | " 'smurf' 'snmpgetattack' 'snmpguess' 'spy' 'sqlattack' 'teardrop'\n", 1707 | " 'udpstorm' 'warezclient' 'warezmaster' 'worm' 'xlock' 'xsnoop' 'xterm']\n" 1708 | ] 1709 | } 1710 | ], 1711 | "source": [ 1712 | "x = completeEncodedAndScaledDataset.drop('attack_type',axis=1)\n", 1713 | "y = completeEncodedAndScaledDataset['attack_type']\n", 1714 | "print(x.shape, y.shape)\n", 1715 | "print('Number of unique values in label: ',len(np.unique(y)))\n", 1716 | "print('Unique values in label: ',np.unique(y))\n", 1717 | "#print(y.value_counts())" 1718 | ] 1719 | }, 1720 | { 1721 | "cell_type": "markdown", 1722 | "metadata": {}, 1723 | "source": [ 1724 | "### Encode the categorical label values" 1725 | ] 1726 | }, 1727 | { 1728 | "cell_type": "code", 1729 | "execution_count": 6, 1730 | "metadata": {}, 1731 | "outputs": [ 1732 | { 1733 | "name": "stdout", 1734 | "output_type": "stream", 1735 | "text": [ 1736 | "(37041, 20) (37041, 40)\n" 1737 | ] 1738 | }, 1739 | { 1740 | "name": "stderr", 1741 | "output_type": "stream", 1742 | "text": [ 1743 | "D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 1744 | " \n" 1745 | ] 1746 | } 1747 | ], 1748 | "source": [ 1749 | "onehot = pd.get_dummies(y)\n", 1750 | "y = onehot.as_matrix()\n", 1751 | "print(x.shape, y.shape)" 1752 | ] 1753 | }, 1754 | { 1755 | "cell_type": "markdown", 1756 | "metadata": {}, 1757 | "source": [ 1758 | "## Build a neural Network model" 1759 | ] 1760 | }, 1761 | { 1762 | "cell_type": "code", 1763 | "execution_count": 7, 1764 | "metadata": {}, 1765 | "outputs": [], 1766 | "source": [ 1767 | "'''\n", 1768 | "This function is used to define, compile and filt a neural network\n", 1769 | "'''\n", 1770 | "def nn_model(trainx, trainy, valx,valy,bt_size,epochs, layers):\n", 1771 | " model = Sequential()\n", 1772 | " model.add(Dense(layers[0],activation='relu', input_shape=(trainx.shape[1],)))\n", 1773 | " for l in layers[1:]:\n", 1774 | " model.add(Dense(l, activation='relu' ))\n", 1775 | " model.add(Dropout(0.30))\n", 1776 | " model.add(Dense(trainy.shape[1], activation='softmax'))\n", 1777 | " model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", 1778 | " hist=model.fit(trainx, trainy, batch_size=bt_size, epochs=epochs, shuffle=True, validation_data=(valx,valy), verbose=True)\n", 1779 | " return hist" 1780 | ] 1781 | }, 1782 | { 1783 | "cell_type": "code", 1784 | "execution_count": 8, 1785 | "metadata": { 1786 | "scrolled": true 1787 | }, 1788 | "outputs": [ 1789 | { 1790 | "name": "stdout", 1791 | "output_type": "stream", 1792 | "text": [ 1793 | "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\ops\\resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", 1794 | "Instructions for updating:\n", 1795 | "Colocations handled automatically by placer.\n", 1796 | "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\keras\\layers\\core.py:143: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", 1797 | "Instructions for updating:\n", 1798 | "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", 1799 | "Train on 27780 samples, validate on 9261 samples\n", 1800 | "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\ops\\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n", 1801 | "Instructions for updating:\n", 1802 | "Use tf.cast instead.\n", 1803 | "Epoch 1/100\n", 1804 | "27780/27780 [==============================] - 12s 420us/sample - loss: 1.0481 - acc: 0.7006 - val_loss: 0.6489 - val_acc: 0.7742\n", 1805 | "Epoch 2/100\n", 1806 | "27780/27780 [==============================] - 11s 381us/sample - loss: 0.7296 - acc: 0.7751 - val_loss: 0.5398 - val_acc: 0.8213\n", 1807 | "Epoch 3/100\n", 1808 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.6316 - acc: 0.7992 - val_loss: 0.4577 - val_acc: 0.8402\n", 1809 | "Epoch 4/100\n", 1810 | "27780/27780 [==============================] - 11s 392us/sample - loss: 0.5731 - acc: 0.8157 - val_loss: 0.4002 - val_acc: 0.8790\n", 1811 | "Epoch 5/100\n", 1812 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5312 - acc: 0.8364 - val_loss: 0.3762 - val_acc: 0.8743\n", 1813 | "Epoch 6/100\n", 1814 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.5000 - acc: 0.8528 - val_loss: 0.3834 - val_acc: 0.8959\n", 1815 | "Epoch 7/100\n", 1816 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5053 - acc: 0.8568 - val_loss: 0.3448 - val_acc: 0.9031\n", 1817 | "Epoch 8/100\n", 1818 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.4810 - acc: 0.8643 - val_loss: 0.3433 - val_acc: 0.9037\n", 1819 | "Epoch 9/100\n", 1820 | "27780/27780 [==============================] - 11s 395us/sample - loss: 0.4779 - acc: 0.8639 - val_loss: 0.3262 - val_acc: 0.9022\n", 1821 | "Epoch 10/100\n", 1822 | "27780/27780 [==============================] - 37s 1ms/sample - loss: 0.4537 - acc: 0.8704 - val_loss: 0.3192 - val_acc: 0.9094\n", 1823 | "Epoch 11/100\n", 1824 | "27780/27780 [==============================] - 40s 1ms/sample - loss: 0.4726 - acc: 0.8733 - val_loss: 0.3233 - val_acc: 0.9058\n", 1825 | "Epoch 12/100\n", 1826 | "27780/27780 [==============================] - 42s 2ms/sample - loss: 0.4438 - acc: 0.8746 - val_loss: 0.2998 - val_acc: 0.9097\n", 1827 | "Epoch 13/100\n", 1828 | "27780/27780 [==============================] - 11s 413us/sample - loss: 0.4329 - acc: 0.8796 - val_loss: 0.3063 - val_acc: 0.9161\n", 1829 | "Epoch 14/100\n", 1830 | "27780/27780 [==============================] - 10s 361us/sample - loss: 0.4481 - acc: 0.8753 - val_loss: 0.2901 - val_acc: 0.9099\n", 1831 | "Epoch 15/100\n", 1832 | "27780/27780 [==============================] - 10s 364us/sample - loss: 0.4404 - acc: 0.8772 - val_loss: 0.3386 - val_acc: 0.9068\n", 1833 | "Epoch 16/100\n", 1834 | "27780/27780 [==============================] - 10s 367us/sample - loss: 0.4451 - acc: 0.8808 - val_loss: 0.2942 - val_acc: 0.9142\n", 1835 | "Epoch 17/100\n", 1836 | "27780/27780 [==============================] - 10s 370us/sample - loss: 0.4482 - acc: 0.8839 - val_loss: 0.2815 - val_acc: 0.9166\n", 1837 | "Epoch 18/100\n", 1838 | "27780/27780 [==============================] - 10s 372us/sample - loss: 0.4619 - acc: 0.8762 - val_loss: 0.3048 - val_acc: 0.9119\n", 1839 | "Epoch 19/100\n", 1840 | "27780/27780 [==============================] - 10s 373us/sample - loss: 0.4768 - acc: 0.8789 - val_loss: 0.2868 - val_acc: 0.9173\n", 1841 | "Epoch 20/100\n", 1842 | "27780/27780 [==============================] - 10s 374us/sample - loss: 0.4557 - acc: 0.8812 - val_loss: 0.2839 - val_acc: 0.9148\n", 1843 | "Epoch 21/100\n", 1844 | "27780/27780 [==============================] - 10s 373us/sample - loss: 0.4341 - acc: 0.8857 - val_loss: 0.2716 - val_acc: 0.9219\n", 1845 | "Epoch 22/100\n", 1846 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4442 - acc: 0.8855 - val_loss: 0.2975 - val_acc: 0.9151\n", 1847 | "Epoch 23/100\n", 1848 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4499 - acc: 0.8844 - val_loss: 0.2735 - val_acc: 0.9175\n", 1849 | "Epoch 24/100\n", 1850 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4499 - acc: 0.8874 - val_loss: 0.2904 - val_acc: 0.9228\n", 1851 | "Epoch 25/100\n", 1852 | "27780/27780 [==============================] - 10s 378us/sample - loss: 0.4782 - acc: 0.8844 - val_loss: 0.3537 - val_acc: 0.9121\n", 1853 | "Epoch 26/100\n", 1854 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.5170 - acc: 0.8828 - val_loss: 0.2650 - val_acc: 0.9230\n", 1855 | "Epoch 27/100\n", 1856 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4159 - acc: 0.8907 - val_loss: 0.2711 - val_acc: 0.9246\n", 1857 | "Epoch 28/100\n", 1858 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.4607 - acc: 0.8807 - val_loss: 0.3042 - val_acc: 0.9160\n", 1859 | "Epoch 29/100\n", 1860 | "27780/27780 [==============================] - 11s 381us/sample - loss: 0.4366 - acc: 0.8857 - val_loss: 0.2859 - val_acc: 0.9171\n", 1861 | "Epoch 30/100\n", 1862 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4303 - acc: 0.8923 - val_loss: 0.2832 - val_acc: 0.9194\n", 1863 | "Epoch 31/100\n", 1864 | "27780/27780 [==============================] - 11s 380us/sample - loss: 0.5191 - acc: 0.8786 - val_loss: 0.4336 - val_acc: 0.8996\n", 1865 | "Epoch 32/100\n", 1866 | "27780/27780 [==============================] - 11s 381us/sample - loss: 0.5173 - acc: 0.8720 - val_loss: 0.3143 - val_acc: 0.9112\n", 1867 | "Epoch 33/100\n", 1868 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4416 - acc: 0.8854 - val_loss: 0.2853 - val_acc: 0.9193\n", 1869 | "Epoch 34/100\n", 1870 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4670 - acc: 0.8879 - val_loss: 0.2971 - val_acc: 0.9171\n", 1871 | "Epoch 35/100\n", 1872 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4538 - acc: 0.8838 - val_loss: 0.2842 - val_acc: 0.9210\n", 1873 | "Epoch 36/100\n", 1874 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.4627 - acc: 0.8860 - val_loss: 0.3164 - val_acc: 0.9176\n", 1875 | "Epoch 37/100\n", 1876 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4645 - acc: 0.8835 - val_loss: 0.3689 - val_acc: 0.9066\n", 1877 | "Epoch 38/100\n", 1878 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.4621 - acc: 0.8809 - val_loss: 0.3259 - val_acc: 0.9129\n", 1879 | "Epoch 39/100\n", 1880 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4537 - acc: 0.8845 - val_loss: 0.2831 - val_acc: 0.9199\n", 1881 | "Epoch 40/100\n", 1882 | "27780/27780 [==============================] - 11s 394us/sample - loss: 0.4641 - acc: 0.8784 - val_loss: 0.2876 - val_acc: 0.9129\n", 1883 | "Epoch 41/100\n", 1884 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4442 - acc: 0.8835 - val_loss: 0.2862 - val_acc: 0.9189\n", 1885 | "Epoch 42/100\n", 1886 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4541 - acc: 0.8864 - val_loss: 0.2863 - val_acc: 0.9191\n", 1887 | "Epoch 43/100\n", 1888 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4861 - acc: 0.8815 - val_loss: 0.3277 - val_acc: 0.9136\n", 1889 | "Epoch 44/100\n", 1890 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4576 - acc: 0.8830 - val_loss: 0.3660 - val_acc: 0.9008\n", 1891 | "Epoch 45/100\n", 1892 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5172 - acc: 0.8793 - val_loss: 0.3160 - val_acc: 0.9036\n", 1893 | "Epoch 46/100\n", 1894 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5235 - acc: 0.8769 - val_loss: 0.3028 - val_acc: 0.9205\n", 1895 | "Epoch 47/100\n", 1896 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4917 - acc: 0.8840 - val_loss: 0.2869 - val_acc: 0.9187\n", 1897 | "Epoch 48/100\n", 1898 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4863 - acc: 0.8767 - val_loss: 0.3165 - val_acc: 0.9057\n", 1899 | "Epoch 49/100\n", 1900 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4999 - acc: 0.8771 - val_loss: 0.2654 - val_acc: 0.9218\n", 1901 | "Epoch 50/100\n", 1902 | "27780/27780 [==============================] - 11s 405us/sample - loss: 0.4832 - acc: 0.8861 - val_loss: 0.2689 - val_acc: 0.9221\n", 1903 | "Epoch 51/100\n", 1904 | "27780/27780 [==============================] - 11s 390us/sample - loss: 0.4703 - acc: 0.8832 - val_loss: 0.2785 - val_acc: 0.9167\n" 1905 | ] 1906 | }, 1907 | { 1908 | "name": "stdout", 1909 | "output_type": "stream", 1910 | "text": [ 1911 | "Epoch 52/100\n", 1912 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5021 - acc: 0.8807 - val_loss: 0.2870 - val_acc: 0.9158\n", 1913 | "Epoch 53/100\n", 1914 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4962 - acc: 0.8730 - val_loss: 0.2757 - val_acc: 0.9199\n", 1915 | "Epoch 54/100\n", 1916 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4674 - acc: 0.8850 - val_loss: 0.2959 - val_acc: 0.9132\n", 1917 | "Epoch 55/100\n", 1918 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5025 - acc: 0.8792 - val_loss: 0.3102 - val_acc: 0.9173\n", 1919 | "Epoch 56/100\n", 1920 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5713 - acc: 0.8688 - val_loss: 0.2990 - val_acc: 0.9135\n", 1921 | "Epoch 57/100\n", 1922 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4880 - acc: 0.8781 - val_loss: 0.3754 - val_acc: 0.9047\n", 1923 | "Epoch 58/100\n", 1924 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5401 - acc: 0.8740 - val_loss: 0.3097 - val_acc: 0.9072\n", 1925 | "Epoch 59/100\n", 1926 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4993 - acc: 0.8759 - val_loss: 0.3168 - val_acc: 0.9186\n", 1927 | "Epoch 60/100\n", 1928 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4789 - acc: 0.8816 - val_loss: 0.2970 - val_acc: 0.9136\n", 1929 | "Epoch 61/100\n", 1930 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.5057 - acc: 0.8786 - val_loss: 0.3339 - val_acc: 0.9128\n", 1931 | "Epoch 62/100\n", 1932 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5387 - acc: 0.8769 - val_loss: 0.3822 - val_acc: 0.9097\n", 1933 | "Epoch 63/100\n", 1934 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5181 - acc: 0.8760 - val_loss: 0.3005 - val_acc: 0.9182\n", 1935 | "Epoch 64/100\n", 1936 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5243 - acc: 0.8725 - val_loss: 0.3749 - val_acc: 0.9021\n", 1937 | "Epoch 65/100\n", 1938 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5467 - acc: 0.8649 - val_loss: 0.4062 - val_acc: 0.8889\n", 1939 | "Epoch 66/100\n", 1940 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5542 - acc: 0.8632 - val_loss: 0.3906 - val_acc: 0.9081\n", 1941 | "Epoch 67/100\n", 1942 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5404 - acc: 0.8688 - val_loss: 0.3076 - val_acc: 0.9142\n", 1943 | "Epoch 68/100\n", 1944 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5097 - acc: 0.8747 - val_loss: 0.3242 - val_acc: 0.9136\n", 1945 | "Epoch 69/100\n", 1946 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5479 - acc: 0.8698 - val_loss: 0.3221 - val_acc: 0.9144\n", 1947 | "Epoch 70/100\n", 1948 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5271 - acc: 0.8699 - val_loss: 0.3466 - val_acc: 0.9121\n", 1949 | "Epoch 71/100\n", 1950 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5463 - acc: 0.8725 - val_loss: 0.3034 - val_acc: 0.9151\n", 1951 | "Epoch 72/100\n", 1952 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5350 - acc: 0.8666 - val_loss: 0.3085 - val_acc: 0.9122\n", 1953 | "Epoch 73/100\n", 1954 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5153 - acc: 0.8709 - val_loss: 0.2951 - val_acc: 0.9156\n", 1955 | "Epoch 74/100\n", 1956 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5131 - acc: 0.8686 - val_loss: 0.3406 - val_acc: 0.8987\n", 1957 | "Epoch 75/100\n", 1958 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.6295 - acc: 0.8508 - val_loss: 0.3472 - val_acc: 0.9013\n", 1959 | "Epoch 76/100\n", 1960 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5793 - acc: 0.8554 - val_loss: 0.3403 - val_acc: 0.9096\n", 1961 | "Epoch 77/100\n", 1962 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5163 - acc: 0.8666 - val_loss: 0.3244 - val_acc: 0.9090\n", 1963 | "Epoch 78/100\n", 1964 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5066 - acc: 0.8669 - val_loss: 0.3062 - val_acc: 0.9177\n", 1965 | "Epoch 79/100\n", 1966 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5329 - acc: 0.8664 - val_loss: 0.3231 - val_acc: 0.8989\n", 1967 | "Epoch 80/100\n", 1968 | "27780/27780 [==============================] - 11s 388us/sample - loss: 0.5266 - acc: 0.8626 - val_loss: 0.3232 - val_acc: 0.9050\n", 1969 | "Epoch 81/100\n", 1970 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5326 - acc: 0.8676 - val_loss: 0.3178 - val_acc: 0.9121\n", 1971 | "Epoch 82/100\n", 1972 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5326 - acc: 0.8677 - val_loss: 0.2966 - val_acc: 0.9139\n", 1973 | "Epoch 83/100\n", 1974 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.5424 - acc: 0.8681 - val_loss: 0.3101 - val_acc: 0.9135\n", 1975 | "Epoch 84/100\n", 1976 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5325 - acc: 0.8724 - val_loss: 0.2976 - val_acc: 0.9140\n", 1977 | "Epoch 85/100\n", 1978 | "27780/27780 [==============================] - 11s 388us/sample - loss: 0.5753 - acc: 0.8688 - val_loss: 0.2812 - val_acc: 0.9152\n", 1979 | "Epoch 86/100\n", 1980 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5258 - acc: 0.8711 - val_loss: 0.3018 - val_acc: 0.9160\n", 1981 | "Epoch 87/100\n", 1982 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5738 - acc: 0.8590 - val_loss: 0.3412 - val_acc: 0.9003\n", 1983 | "Epoch 88/100\n", 1984 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5871 - acc: 0.8603 - val_loss: 0.3327 - val_acc: 0.9085\n", 1985 | "Epoch 89/100\n", 1986 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5534 - acc: 0.8593 - val_loss: 0.3462 - val_acc: 0.9055\n", 1987 | "Epoch 90/100\n", 1988 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5189 - acc: 0.8657 - val_loss: 0.3108 - val_acc: 0.9120\n", 1989 | "Epoch 91/100\n", 1990 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5696 - acc: 0.8617 - val_loss: 0.3282 - val_acc: 0.9000\n", 1991 | "Epoch 92/100\n", 1992 | "27780/27780 [==============================] - 11s 390us/sample - loss: 0.5057 - acc: 0.8669 - val_loss: 0.3007 - val_acc: 0.9076\n", 1993 | "Epoch 93/100\n", 1994 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5720 - acc: 0.8626 - val_loss: 0.3421 - val_acc: 0.9071\n", 1995 | "Epoch 94/100\n", 1996 | "27780/27780 [==============================] - 11s 390us/sample - loss: 0.5298 - acc: 0.8645 - val_loss: 0.3073 - val_acc: 0.9088\n", 1997 | "Epoch 95/100\n", 1998 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.5363 - acc: 0.8654 - val_loss: 0.3450 - val_acc: 0.9090\n", 1999 | "Epoch 96/100\n", 2000 | "27780/27780 [==============================] - 11s 391us/sample - loss: 0.5624 - acc: 0.8659 - val_loss: 0.3341 - val_acc: 0.9159\n", 2001 | "Epoch 97/100\n", 2002 | "27780/27780 [==============================] - 11s 391us/sample - loss: 0.5754 - acc: 0.8666 - val_loss: 0.3329 - val_acc: 0.9079\n", 2003 | "Epoch 98/100\n", 2004 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.6255 - acc: 0.8611 - val_loss: 0.4500 - val_acc: 0.9037\n", 2005 | "Epoch 99/100\n", 2006 | "27780/27780 [==============================] - 11s 388us/sample - loss: 0.6341 - acc: 0.8553 - val_loss: 0.4839 - val_acc: 0.8996\n", 2007 | "Epoch 100/100\n", 2008 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.7025 - acc: 0.8531 - val_loss: 0.5064 - val_acc: 0.8945\n" 2009 | ] 2010 | } 2011 | ], 2012 | "source": [ 2013 | "trainx, testx, trainy, testy = train_test_split(x,y, test_size=0.25, random_state=42)\n", 2014 | "layers=[trainx.shape[1],800,500,400,300,200,100,50,10]\n", 2015 | "hist = nn_model(trainx, trainy, testx, testy,16,100,layers)" 2016 | ] 2017 | }, 2018 | { 2019 | "cell_type": "code", 2020 | "execution_count": 9, 2021 | "metadata": {}, 2022 | "outputs": [ 2023 | { 2024 | "name": "stdout", 2025 | "output_type": "stream", 2026 | "text": [ 2027 | "MAX Accuracy during training: 89.22966122627258\n", 2028 | "MAX Accuracy during validation: 92.46301651000977\n" 2029 | ] 2030 | } 2031 | ], 2032 | "source": [ 2033 | "print('MAX Accuracy during training: ',max(hist.history['acc'])*100)\n", 2034 | "print('MAX Accuracy during validation: ',max(hist.history['val_acc'])*100)" 2035 | ] 2036 | }, 2037 | { 2038 | "cell_type": "markdown", 2039 | "metadata": {}, 2040 | "source": [ 2041 | "## Polt the training accuracy and testing accuracy" 2042 | ] 2043 | }, 2044 | { 2045 | "cell_type": "code", 2046 | "execution_count": 13, 2047 | "metadata": {}, 2048 | "outputs": [ 2049 | { 2050 | "data": { 2051 | "text/plain": [ 2052 | "[]" 2053 | ] 2054 | }, 2055 | "execution_count": 13, 2056 | "metadata": {}, 2057 | "output_type": "execute_result" 2058 | }, 2059 | { 2060 | "data": { 2061 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXeYFFX2/t/L4JBB4qrkKCBIGhEBxYQCBnDBFZV1WBXWhIoEcdeImL6LPyMGVHRNYEJExEVFxcwwRMlJkCEODDAwA0zo8/vjdNF5umame3qoej/P0091Vd2qOlXV/d57zz33XiMiIIQQ4g4qJNoAQgghZQdFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXETFRBsQTL169aRZs2aJNoMQQk4oFi9evFdE6kdLV+5Ev1mzZkhPT0+0GYQQckJhjNlqJx3dO4QQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+gQAsGMH8OKLwMGDibaEEBJPKPouZ9064OabgWbNgFGjgMmTE20RISSeUPRPcAoLgdzcotN8+y3Qsydw4EDg9t9/Bzp0AN57DxgxAujTB3jjDSA/P372nkgcPKiZ4oIFwG+/JdoaQmIDRf8E5/bbgfbtgSNHwu8/eBBITQV+/RWYNStw3/vv63L9emDKFGDMGGDnTuDzz+Nja24u8Mcf8Tl3rHnlFeDkk4G2bYHzzwfOOQf47rtEW0VI6aHon8D88Qfw+uvA1q3Ayy+HTzNunPrr69QBPvnEt10EmDlTBa1xY93Wvz/QqBHw6quxt/XwYb3W6acH2lEUIsDChbqMJZMmAVdeCSxaFDnNnDlAkybAu+8CX30FtGwJjBwZOXMVAZYti72thMQcESlXn27dugmxx4gRIsnJImedJVK/vsihQ4H7v/5aBBAZN05k9GhNe/Cg7lu1Sve99FLgMY88ots3bQq9XlqaSK9eIm++WbRdkyeLDBsmsn27ruflifTvL1KhgsgZZ4gkJYm8+270+3vzTbXl7bcDt3s8Iu+9J3L//SJ33CFy880iK1dGP59F48Z6XkDkr38VWb8+9Pz16okMH+7b9u23mn78+PDnfOAB3T9lin07InH4sEhOTunPQ9wFgHSxobEJF/ngD0U/PB5P4PrWrSInnSRy660iv/6qb/Lxx337Dx0SadZMpE0bkdxckZ9+0jTvvaf7H31UxBiRHTsCz5uRoaJ8772+bceOqaglJek5Lrsssp1ZWSJVqmi6WrVEXn9dxRMQefVVteuCC/Tar70W+Tz5+SItW+pxrVuLFBT49r3/vm6vUEGkdm29Xrt2IkePFv0MRUR27dJjH35YPzVq6DPyZ+NGTfPKK4Hbb75Zr5meHrj9nXc0feXKIqecEirYwe8uGn36iHTubO9+REQWLBC54QaRwsLiXSfRHD0q8sUXibbCOVD0T0A8HpE5c8KXPC+/XKRrV5Hff9dtt9+uor91q64PGKACeOCAyIoVKhrGqNiLqCCceqrIVVfpepcuIj17hrdj4ECtOSxeLDJxokjbtvpLSU0VueIKkSZNIt/Df/6jaT/+WMXLKlE/9JAvTW6uyCWXaCaya1f487z1lh5nZRhWzSAnR6RRI7Xfygi++ELTTJwY2S6LOXM07Q8/6PqLL+r6xo2+NO+9p9uWLg08dv9+fYYdOoh89ZXIkSMiP/6oNajzzxeZP1+P+7//8x3z++8ip50mMmlSdNtENBO2ntnYsfaOOfdcTb9qlb305YXHHlO7ly9PtCXOgKJfDlizRqR9e5Hvv4+eNitL5Oqr9Y00barrFq+9pturVhWpVElL6ZUqacnTIj1d0/TpoyLUoIHIZ58FXuP227VU/PvvmvY//wlvy9y5PuExRqRHD5FZs3TfE0/o9gMHQo8rKFDb+/TR9cJCLek/9VRoaXfZMj3P1Kmh58nPF2nVSjOuggKRjh1FTj9dvz/8cKBoW1xzjd732rXh78nioYe0tG65wtat0/O9/LIvzZ136rPOzw89/osvtEQP6LOsXl1rIvv26f5LLxWpU0fdaLt26fOwakgvvFC0bSL6PACRiy/WZ//tt0WnX7HC966mTYt+/pKSmxu+JvHLLyILFxb/fIWFWhMFNIOPJVu3qkvwtttEUlJEHnwwtue3Q36+ujjnzy+7a1L0ywFXXKFPuEOH8AJisWCBll4rVhQZNUqXgwerUG7frm6SPn1Edu7UEj2gQhLsd7/qKt03ZIhIZmbodb77Tvf36hVauvWnsFB9+1On6jX9sUrKVg3Cn08/1X2ffFLUU1E8HpHmzdXXH8zbb+t5Zs7U9Y8+0vWnnlKh/dvfQo/ZuVPk5JP1ORXlThkwQNsV/O1o0kR9+xbdu4ucd17kcxw+rOJ/550iF12kGYfFokVq64QJmllWqSLy229ae/J3r0XisstUDA8f1sykUaPAAkAwt96qBYAaNUT++c+iz+3PkiX6+7FqS8eOqXi//nro7+q337QQcdZZvnYaEZHp0/V32Lat/etazJvny6zuvtv+cUePhmY+hYX6W37mGZGzz/adt3p1rbGeemrxXWylZdo0teGss8ru2hT9BPP99/p0L7xQwvqHLXbuVGFo1UobSkXUPWCVPq+6SkuWlsvH49GSf7gSclaWZiCRfmQFBfonAEQ6dSrZfW3dGloytrjgAhXQojI4f+65J7Bx2bKxTRuRM8/0/bkLCzXjtPzmW7aEP59VIwpu+LXweFS8UlMDt994o2YYBQUqKsnJ2vhdUqzM1xhfxnXkiLqAKlbUzDcchw6pgN91l66npamopqTobyItLfDZHjyowpaaqu4yu+/08899wlipkj7vSpV826pUEXn6aX0eM2fqM2/aVKRaNZGGDdXtN22a3l/Vqrr0f4d2GDxYpG5drc1dcEH09JmZImPGqC3Jyfp/ueAC/Z1YNS9Az/fEE1oDKijwue8i/WbiQV6eFmgsu375pWyuS9FPIB6P5vCNGqkP+txzVWzD/THuvVfdDf5+/MJCdRNYboGnnoqdbSNH6jkfeaRkx3s8WvO47bbA7cuXF9/WH3/UY6ZP922zfPkffxyY9sMPdfsDD0Q+X2Ghlo779Qu/38qwXnwxcPv06br9t9/0Y7e2EomVKzUTmTw5cPvBgyqeF14Y/riZM/Xa/i6dadPUtWWJWqtWPhfWlCm6beFCfS4VKmgNoSiOHdNztG2rpfpx47SWM3as1qiWLPHVUNu2VUE/+2yR3bv1HTdp4hOzSy7R5xRsczR27tTMb8wY/T3Wrh25oOLxaIBCjRp6f8OG6X9m6FCRc87RmtGYMVoICufaW7w49DdWFIcOlV6kX39drzljhv5XrrmmdOezC0U/gXzwgT5ZK7QxPV3/PP4RMSLaMFijRvgfxe7dWi3t1s1+ydkOv/yiP0R/l0Rx6dVLMzJ/RozQEqLl27ZDQYGWvC13TU6OliTPOiu0Cu/xiPz8c/RnMWqU2hEu8sUSqGAf9J49uv3RR0WefVa/Z2TYv49w5OWF3z5pkkR0raWmamYR7tgdOzRKqEEDFckFC7T2062bPhurMTta+5HV0P7ll5HTeDwqkg0aaDtTbq5v365d6tIaOlSfcWZm8TN7q11o7VpfxvXnn+HTWu/syitL1lCdn6+1kTvvtJd++HDNXEr6/o8d04y9e3d9jmPGaOEt0v3FEop+GeAfRmhx7JhIixba+Oi/PzU1tKHREoDgKBGLffuil9wSwS23qDhZpbP8fBWiv/+9+OcaMUJdFEeO+J5HcCNtcZg9W88RrgFtwgQtYR45ErqvSxf14197rdbQ4sX27SoqEyYEbs/PV3fH9dcXffzmzVoCt2qBb7yh2+2I765dWsgoKuTWH7shoM2baztSJHJz1TWVna3nbNHC19j/889q9+zZocd5PPpegkN2i0ufPlqQiMaKFVo4A4oOJy6KV1/V4+fO1fU//tD3HVzgiwcU/Tjz3/+q8PlXaz0ekX/8I3xJavt2jeo45RStQufkaAegAQPK1u5YYPlJt23T9QULJKxLxg5WpNAbb6j4WyGlJSU7W4U9WFRFtITatWv448aP1xDY005Tf3M8ueIK/R34l+itZ/jhh9GPz8rS9oHTTgvsE9CyZdHP7+ab9dlEi3AqLn/7m5ZuI3HXXXLcPWV1jLMatLOzfbWsYKy2h2idAaNhZfb+NZb9+0U2bAhMd/nlWgv2D22OxvbtIk8+qfeYmqq1o7PPDnRXDR6shaJ4d7ij6MeRvDz1bVphlFbJ9N//lpCYdH9WrdIffY0a2pkGUL/2iYYlUFZpZuxYFcziNuaJqIugRg1tSKxYsXRuJ4tzz1W3hz9WW0SkCBer93JwnH08sGojn37q2zZmTPGeoccTKiLXX68ZQTjWrdNS7OjRJbO5KCyX0Z49ofuys/X9XnqpCvuQIeqq8a9ttWwZmtF6POoiadYssqvMLp99Fvpfu+IK/b1Z4aLWb/rJJ/U3Ur16oIswJ0eDMT76SCO0li4Vuekmrb0DIjVr6n+7WzdfQIaF1Xb17LOlu49oUPTjiBWO9frr2shWvbqvNDNiRNEhWtu2+SJRgv3iJwpZWRLgSmjbVqRv35Kfb+hQPZ9dv2s0rN7G/mGr69dLkdX2I0d8DZSlcS/ZIT9fxdmq5X36qYpGuPDV4vD884E1MH/eeEP3BXf8iwVWpFq43rWWz/633yIfP3iwNi77Y4V0vvpq6e3bvTswM9+4UX8fdevK8eCAHj18NScrk/jmG985Hn3UVyiwPpUra9+XzZuLvr7Ho/0uKlXS3vPxgqIfJwoK1MfYubMvjr5VKzne2GSn0TUrS6NfIvnyTwQaNlQf/oYNeu/PPVfyc333nVaJw/UtKAlWBM6MGb5t1tANy5ZFPq5vX/WVl8W4N//+t/p6b7xR7erWLbp4RCMtTc/10Ueh+6wOZ/EYqiE7W0X04YcDt3s82jnRamyOxMSJand2tu+4Xr20bcXuUBTR8Hd9jR6tpfytW33uWP8CweHDWoK3akW5uRp9d+ml+vuZNUsz0d277V9/715ty/jLX8JnyrGAoh8jPJ7AH+yMGaF/rG3btNTrpkGy+vXTjO+ZZ/R5hBugLVEUFGh7y403+raNHq0ls6Iy5R9+iH8V3GLzZp/Y3HVXbMTt2DEtTYYbvuH88zVjjRft24c2EFuD1EXzyVvurp9/1vX//U/X7fRgtsuwYdqOcviwuvmGDtXtHo/+d4cMCfxtXHKJ1uJFtE8KoC6g0rBypbq6unaNj1bEVPQB9AOwDsBGABPC7G8KYD6AFQC+B9DIb18qgA3eT2q0a5UH0V+yRAWjZ09tfK1XT+S++zTsqmNHdWecaINbxZpx41Rg+vTRP3x5Y/Bg9bF6PBoX3rq1xnWXJ6ZO1R7OseScc0R69w7c5vFoQ+LIkbG9lj9WI2ZwA2adOoENqOGw+k+89JL679u105J5rEr5Ij4303336TJcj3J/nntO061bp7ZYIZilZc4crRUNGRJ7DYmZ6ANIArAJQAsAyQCWA2gflOYjS9ABXAjgHe/3OgA2e5e1vd9rF3W9RIt+QYG6a2rUUEH75z9FBg3SqrgVzhWpx6ebsIZKAMomHK24WKFzb7+trqiqVX0Nz07m7ru1n4J/4+e2bRK2U1ossSK6rAEAt21TV5mdns3+mZIltsHjRpWWJUv0vElJGgYaTcAtt+X550uJI9MiMXmynvNf/4rdOUViK/rnAJjnt34fgPuC0qyySvcADIBs7/drAbzql+5VANcWdb1Ei77l+w3ukbl5s0ZYXHNNbDtLnahYf6LyGoHk7z5p1qxoX76TsHou+zcYWh234vmeFi70iePhw1rKN8Z+O8X552sN+uSTtdEz1uPVWJ20APsD07Vpo+lbtSpdP4FgPB4N+IhFOKo/dkXfzsxZDQFs81vP8G7zZzmAwd7vVwGoYYypa/PYcoPHAzz+uE4/OGhQ4L7mzXXS8BkzgIoVE2NfeaJdOyApSWfkOuecRFsTSvPmQO/eQL9+QHo60KlToi0qGy64QJfffOPbtny5Ljt2jN91O3UCTjpJ/x/du+usbJMm6Xuwe/zatcChQ8AzzwDGxNa+ihWBs8/W3+vQofaOGTBAl2PH6m89Vhij05NedJHOxrZgQezObQc78hXu8UvQ+lgALxpjhgP4AcB2AAU2j4UxZiSAkQDQpEkTGybFh88/B1auBN55B6jAiSSLpHJloEcPoHPn2P4hYskPP8RePMo79eoBXboA8+cD99+v21asAJo2BWrVit91K1UCzjwT+PhjoEEDYN48oG9f+8d37qzLW24BOnSIj42vvKKZSpUq9tLfeqtOj5maGntbTjpJn1WPHsC116ru1KkT++uEJVpVADbcO0HpqwPIkBPMvePx6GiGLVrQfWOXggI2aJdHxo3TkENrCI927TScON5Mnaruz+DhuO2wb5+GlRY1jLQTWbpUO+VZ0USlATF07ywC0NoY09wYkwxgKIDZ/gmMMfWMMda57gMwzft9HoBLjDG1jTG1AVzi3Vbu+PprdQNMmED3jV2SklgjKo9cdBGQlwf89BNw9Ciwbp2WwuPNiBHq3jnllOIfW6cO8NxzQO3asberPNO5M/Dww/rcZswom2tG/cuKSAGAO6BivQbAhyKyyhgz0RhzpTfZ+QDWGWPWA/gLgMe8x2YBeBSacSwCMNG7rdzx8svAaacBN9yQaEsIKR29ewPJyerXX71a26rc0qZxIjJ+vLp5brsN2LEj/tczWisoP6SkpEh6enqZX7dFC23omT69zC9NSMy54ALg4EFg1Cjgxhu1tN+mTaKtIpFYv15L/X36AHPnlqwtyhizWERSoqVj5RxAbi6wZYtG7RDiBC6+GFi6FPj2W224bNky0RaRomjTRt1bV1wR/2tR9KGlIBENQyRFkJcH9OoFvPBCoi0hUbjoIl1+8IGGapbXCCviY8QIdfHEO+KMog9gzRpdulr08/K0RWn37shppk0DfvkFeO+9MjOLlIyUFKBmTSA/v2waccmJA0Uf2tiVlAS0bp1oSxLI++8Djzyidcxw5OYCEydqMSQ9XQOeSbmlYkVfRy024hJ/KPrQkn6rVhrx4EpEgGef1e8zZuh6MC++COzcCTz0EFBYCPz4o71zFxZqhkHKHMvFw5I+8YeiDxV9R7t2li4F7rkH2Ls3/P4fftC++n36AH/8AaSlBe4/eBB48kmgf3+NL0tOBr77Lvp1V67UkIQ2bYDt20t/H6RY/OMf2t2/d+9EW0LKE64X/fx8YMOGE1j0CwvVP7VtG5CdHVpK//xz4NxzdUCTnj2BjRtDz/Hss0DdulrKT04OjVudPBnYvx947DENBenRQ8NCIiECvPQScNZZwJ49mmkMHBhY4t+8GfjwQw0iLw07dgDDhtnLhEpKZqa2Z+Tnx+8acaB6dW0YZAc6EoCdbrtl+SnrYRhWr5YTd7jkggIdktB/DrdatXTGiJkzdYaTChV06qKZM3V+uHr1RH75xXeOTZt0OERrnNdBg3S2CWtYwY0bdXjCv/3Nd8xDD+kxkfrM33uv2tKvn8iuXTrDtTWIeH6+yNNP6/i/gNq/Y0fkeyxqUPWFC3UWa0CnNipJ/387jBzps9XOOAEej06t5LYxBUhCAWfOsscnn+hTSE8v08uGx+PRiXcXL7aX3pqJ/cEH9bjJk0WGD9eZK6xMYNAg3yAs69frOLGVKunknps2+eaOy8jQNB98oMfNn68Cfc45mpH8+afvutakqLNmhb+HU08VufzywIF5rEHEGzXS5eWXa6ZUpYpmRJ9/HnquF19UW7/6KnTf22/rvmbN1ObKlTWTKe6YvF9+qc8uEocP6+QKZ56pg6S0aRN9olnr+fz978WzhZBSQNG3yaRJ+hQsXUwY+fm+CTs7dIguXnPmaNqbbw5/rvnzdXKA4IHAMzNFbrpJBaxCBR2Z67rrfPtzcnSm95tv9k1eOn164DmOHlWRveuu0GtbVaepUwO3ezwit9yi0yu9/77v/lavFunUSY8ZM8Y3+4c1k7cxOtC6/6wg1nx6F1zgm1jXmhrp+ef13D/+KHLDDUULem6uTlqalBR54Pc339Tz/vCDfurW1UHfw01EazFunC/TLe0ce4TYhKJvk+uuE2nSpEwvGcrhwzrBKCBy4YW6/N//Iqf/4w+daqhz5+hz0UVi+3YVp9atRZYvD9x3/fUq/ElJgRmCPxdeqKXfYKwplCJNmhtuWM4jR3SmeEBrFs89p2J/ySW+WUGef17TZmXpVFjt2ulxFh6PPsNKlXyZSFKSLt96K7wt1jRNFSpohhSO3r21dG9lUps2iZx1lh6Xmipy8GDoMe3b61ybTZpoBu6fYRWXhQt1FhRCokDRt0mXLuoVSBgHDoj06KHC88orOrv1aaeJ9O0bPv2sWeo+qVVL/e3xwKpFNG4ssn9/+DSPPqpprJK2xV//KtK0acmmPpoxQ10pgMh552mtw+NRX3rt2iJ792p7RVKSyKJFocfv3q02d+yoNY2sLH2OFSqEToV25Ig+5z591GefnOxzcVmsWaO2PPVU4Pa8PJEHHtDzNm+u7RYWf/yhxzz9tLajAOrGKgn79qnry5rcleNYkyKg6Edg3z5fAbGwUF3Ko0fH6WI7dmip+fffw+8/eFAF/6STAkXpiSf01fiXwHfv1sZUQEuyS5bEyWjRjCc1NXDOvWB++klCJg8tKFBxvvHGkl97/XqRRx4Ryc72bVuxQgW2e3e95kMPRT4+WBgPH9baQ3JyYLuB5Q6aP19L70lJoT+EceO0vSNSA/GCBVojue8+3zarprN2rWZY/fppRlZUY3UkRo5UuwYP1nMOGaIZISFhoOhHoHNn1YC8PN88qsHu55jg8YhceqleoGlTkT17AvdnZ6sLoGJFkU8/DdyXlSVSrZoKr4gK7CmnqHBNmlQ6d0GsOHZMo3pGjPBtW7xY7/fdd2N/vVtv1XN37Vr8+9+/X6t0gMioUfp8GzUS6dXLVyP5+9/1fqyaS16etj8MGlT0uQcP1ozu0CFd799fpGVL33nXr1fhDjcL9oIFkdsSfv1V7b3nHj3X5MmawZxxhtYgWOonQVD0I1C9ut712LFxnjD6hRf05Lfdpo2e552nQimijZfnnKNi4F9S9mfUKK0BTJqky5YttcRbnvjHP9SHbkX2/N//6T2XpFQbjb17tQaxdm3Jjs/J0YZnQBtiAZF583z7V69WUU1N1VqA1ag+Z07R57XE+fnn9RqVK+sUUP707atRU/4ur717NRM/7TSRbdsC0+fna22uYcPAGs+cOdoGA2h7yrvvatWVEKHohyUnR++4bl1d9u2ry717Y3yh1asDQwjfe08vNHy4VtkrVNAqf1ERIJs2aTpAZMCAyL71RLJliwqXFUHUr582sJZnvv1Wa17nnRfa7mC5zwDNaHv3tjd3Zs+e6tufNSs0MxERee013e7vkrMKBVWqqMBb4l5YqO4tIPzvIz9fw1XbtJHj0U0pKSKPPx4aqUVcBUU/DJY75+WXfbX9Bg1ifJHdu9UFUa9eoC/Y6rBUsaKWBIPdPeF46imRJ58s31X5u+7SzGnFCnWP3HFHoi2KTkGBr9blz6FDImlpWlMpzjO3GmxbtFC3XHCHsr179b3fe69vW7du+iP83/+0xnfZZVr1tH6YgwYV3RheUCDy88+aQfTqpccMG1Z84fd4RL75xueeIicsFP0wWDXxL74Q2bBBC9sXXhiDExcU6B//yiv1z21MqJ++oEBjzzdsiMEFyxG7d6vQWSXPmTMTbVHZU1Cg7htAZODA8Gn69dPagMejGSSgIaMiWgqxahjNm4u8807xxduKpiqu8FsZ1pVXliziipQbKPphsGrfVu/bFSuid66MSna29i4FNJRy/HiRVatKbesJxYMPynFXg1uHHrCigSJFBUybpvvT0rQT2kknBYa7vvSSL2S3pFjCf8MN9gQ8J0ddXVaY7JQp4dN5PJoRxTNirLQsWhSftqQTCIp+GF59Ve84uN2sxGzZojHhSUnakGfH/+tEDh7UhpIy7mNRrjh2TEUzUme5rCwV+rvu0l7Af/1rfOx44AH9kX/5ZfS0Vmb9/fcadVS5cmh48f796mqyotCC76+gIPHux/nz9T/YuLH2k/Bn27bAfhQOhqIfBmtUgdIUpo6zcqU2CNSqFX5sGLexeHHk/ghEuewydf8BIrNnx+caVue+aH7LTZs08srqcb17t2ZGZ5yhVeAVK0S+/lqjxipWFPnnP9XuRx7xnePoUZFzz1WX1Pz58bmfaGzapGNNtWmjobPNm6vQFxZqB7lKlbTNwwVQ9MNw++36uyg1Ho/+qerW1V6bhNjhv/+V49ED8exrYYXOFjWK4JVXavzy9u2+bdaYRv6fhg21wVhE5OqrNdpo61b9D9x0kxzvuQ3oUBb+IabxJjtbM6natbV3elqaSM2aGtZ63nly3OVqjL3AiRMcin4YhgzRsbtKzbx5+uiefTYGJyOu4cABFdrx4+N/nZo1Ra65Jvz+2bMl7PASIjrWzwcfaP+RTz8N7AewZYuK/jXX+Hoe//vf2jZwzz0qruefH/v72bVL2xN+/FHdVtOmaY2jZ09163zzjS/tzz9rYEHNmjpY3qJFauc778TernKGXdE3mrb8kJKSIunp6XE593nn6YQS339fipN4PDo5SFYWsHYtUKlSrMwjbiAjA2jQIP5zc44fDzz9tE6a07y5b/vhw8AZZwA1agBLlhTfjkceAR5+WCeV7t8f+Owz3ywtEybohDuHDulkO+E4ehT46itg1y6dnCYnB2jcGGjRAmjfXr/7s2AB0Ldv+AlsTj0VmDQJuPHGwO2bNwNVqwKnnKL/11NP1bkj33+/ePfqT2Ym8OabOitN9erFOzYrC1i1SuetrFWr5DZEwRizWERSoia0kzOU5SeeJf02bQLnAikRM2ZoyeGEnHWFuIaMDG04Du43MWaM/n5/+qlk583JUb/56adrjcIfK/xz4cLIxz/+eKD7yGrjsKK//IfC3r9fXUetWmmt46uvtCS/aVPRk+sEk5qqfn87oaw7d2pPZ//oJ49H5Ior1MZftBP0AAAU40lEQVTBgyNHRuXmaue/KVNE7r5b23As1xego8bGMSwWdO+EUquWjm5QLHJytJFy+3Ydqa1VK43YYe9HUt4ZPlzdMdOn6+916VJ1h4wcWbrzZmWFn4BiyxY53vsxEikpGuW1bZsKd2GhZlA//KBjVfmPiHrddWpvUZmIHayJgfxnjIvEgAGa9oEHfNustphzz9XlxIm+fQcPaoPxxRdr9JMl8FWr6lAZ112nbrSxYyVu41J5oegHceSI3u2kScU8MDU1sGRiZzwWQsoDmzfrsBiARuG0a6eNyPHqS+HxaInafxA+fzIy1JYnngi/339EVGucpEcfLb1dWVmamdx/f9HpvvvO96wAHT4jI0NLi717a8Y5bJju++ADHTLbGtOlQwct3c+Zo8cEl+gLCkTOPlt76sd83BeFoh/E1q2+92ibLVu0pHHNNVp6uf9+jYxgz0VyolBYqG6Xbt30DxA8C1qsufjiyP01XnpJbVi9OvLx+/apgAIaahmrGnXv3jo8SiQ8Hp0cp1EjjQq69FL973fqpLUlqyd9bq7WVqwCYN++9udaXbFC3VnDh5f+fsJA0Q8iLU2KHx59xx3qF41Zby5CEoTHEzpJTDwYP15L6uE6w1xyiYZTRis0bd+u8dVbt8bOLqstIdLcCJYLyJplLTvbNw6SNWubRUaGjvhakv45//qXnjMO/Roo+kF8/rlEbWMKYPdu9dGVZkIQQtyGFeiwdGng9gMHtAA1blxi7Fq6VO2aNk3b6EaPVn/7m2/q0OAtW4a21e3Zo6GesexxnJur7YJt2hSvMdoGdkW/Ytzih8oZu3fr8i9/sXnACy8Ax44B48bFzSZCHEfXrrpcsgTo3Nm3/csvNexy4MDE2NWpk4Zu3nqr/q+Tk4HatQPDOOfO1VBUi/r1gWHDYmtHlSrAiy8C/fppSO2//hXb89ugQplfMUEUS/QPHdIXc9VVQNu2cbWLEEfRsqWvD4A/n32mItqjR2LsMga45x6gZ0/glVeAnTv1s2wZ8PjjwKOPqhCXBZdeCgwerH0Mtmwpm2v64aqSfs2aQOXKNhK/8AJw4IB2NiGE2KdCBaBLl0DRz8vTUvTVVweWpMuasWP140+nTvopa555Rms/d98NzJpVppd2VUnfVil/3jzgwQeBQYO05y0hpHh07aol6MJCXf/+eyA7O3GunfJI48aqM599BsyeXaaXpuj7s2KFlkY6dADefrtM7CLEcXTtChw5Aqxbp6X8hx7S4QcuvjjRlpUvRo/WITEGD1bf/pEjZXJZW6JvjOlnjFlnjNlojAnxeRhjmhhjvjPGLDXGrDDGDPBub2aMOWKMWeb9vBLrG7BLVNHfsQO47DL1R86Zo0tCSPHxb8ydMAH47Tfg9dcjj8fjVpKTtRZ0/fXAE08AHTsC8+fH/bJRRd8YkwRgCoD+ANoDuNYY0z4o2f0APhSRLgCGAnjJb98mEens/dwSI7uLTVTRv/NOYP9+4IsvgEaNyswuQhxH27Yq8E8/rb7rUaOAIUMSbVX5pF494K23gG++0cbme+/VQeLiiJ2G3O4ANorIZgAwxswAMBDAar80AqCm93stADtiaWRpyc/Xge4iir6IjuZ39dWBYWaEkOKTlKT/o19/1Xax//wn0RaVfy66SN3Le/b4Ri2NE3bO3hDANr/1DO82fx4GMMwYkwFgLoBRfvuae90+C4wx55bG2JKyZ48uI4r+li3A3r3A2WeXlUmEOJvevTUO/sMPOfy4XapUAZo2jftl7Ii+CbMteBD+awG8JSKNAAwA8I4xpgKAnQCaeN0+9wB43xhTM+hYGGNGGmPSjTHpmZmZxbsDG0SN0V+4UJfdu8f82oS4ksceAzZtApo1S7QlJAg7op8BwH9mg0YIdd/cBOBDABCRXwFUBlBPRI6JyD7v9sUANgFoE3wBEZkqIikiklK/fv3i30UUoop+WpoG8HfsGPNrE+JKTjpJS/qk3GFH9BcBaG2MaW6MSYY21AYHlv4J4CIAMMa0g4p+pjGmvrchGMaYFgBaA9gcK+PtYquk37Wr/lAJIcTBRBV9ESkAcAeAeQDWQKN0VhljJhpjrvQmGwNghDFmOYDpAIZ7BwA6D8AK7/aPAdwiIlnxuJGiKFL08/M1tIz+fEKIC7A1DIOIzIU20Ppve9Dv+2oAvcIc9wmAT0ppY6nZvVuntaxWLczOlSt13k768wkhLsAVPXKLjNG3GnFZ0ieEuACKflqadpBglAEhxAVQ9Bcu1FK+CReZSgghzsIVor9zZwTRz84G1qyhP58Q4hocL/p79gD79gFtQnoHAEhP1yEY6M8nhLgEx4v+8uW6DDtPQlqaLjluPiHEJVD0W7cG6tQpU5sIISRROF70ly0DGjYE6tYNs3PpUt/Y34QQ4gIcL/rLl0cYLfnwYR1ds0OHsjaJEEIShqNF/9gxYO3aCK6dNWt0ecYZZWoTIYQkEkeL/urVQEFBBNFftUqXFH1CiItwtOgvW6bLiKJfqRLQsmWZ2kQIIYnE0aK/fLlORtOqVZidq1bpXJ5JSWVuFyGEJArHi/6ZZ0bQ9VWr6NohhLgOx4q+iIp+WNfOoUPAn39S9AkhrsOxop+RAezfH0H0V6/WJUWfEOIyHCv6URtxAYo+IcR1OFb0reEXzjwzzM5Vq3Qi9ObNy9QmQghJNI4W/ZYtgRo1wuxctQpo146RO4QQ1+Fo0Q/r2gF0XlwOv0AIcSGOFP38fGDjxgi6fuAAsH07/fmEEFfiSNHPytKQzbCzZTFyhxDiYhwp+vv26TLsMPmM3CGEuBhHi37YMfRXrQKqVgWaNi1TmwghpDzgTtFv3x6o4MhbJ4SQInGk8kUUfRFgxQpG7hBCXIu7RP/PP4E9e4Du3cvcJkIIKQ84VvSTk4Fq1YJ2pKXpkqJPCHEpjhX9unUBY4J2pKXpxCkdOybELkIISTSOFv0QFi4EunTRagAhhLgQ94h+QQGweDFdO4QQV+Me0V+9GsjNpegTQlyNe0TfasQ9++wyt4cQQsoLjhN9kSJEv3ZtHW+ZEEJciuNE/9Ahdd+HFf3u3cOE9BBCiHtwnOiH7ZiVk6Nj6NOfTwhxObZE3xjTzxizzhiz0RgzIcz+JsaY74wxS40xK4wxA/z23ec9bp0x5tJYGh+OsKK/dClQWEjRJ4S4norREhhjkgBMAdAXQAaARcaY2SKy2i/Z/QA+FJGXjTHtAcwF0Mz7fSiAMwCcBuAbY0wbESmM9Y1YhBX9hQt1edZZ8bosIYScENgp6XcHsFFENotIHoAZAAYGpREANb3fawHY4f0+EMAMETkmIn8A2Og9X9wIK/ppaTqUcthZVQghxD3YEf2GALb5rWd4t/nzMIBhxpgMaCl/VDGOhTFmpDEm3RiTnpmZadP08EQUfYZqEkKILdEPF+4iQevXAnhLRBoBGADgHWNMBZvHQkSmikiKiKTUr1/fhkmRsUS/dm3vhtxcYMsWjrdDCCGw4dOHls4b+603gs99Y3ETgH4AICK/GmMqA6hn89iYsm8fcPLJQEXrzjZt0mXr1vG8LCGEnBDYKekvAtDaGNPcGJMMbZidHZTmTwAXAYAxph2AygAyvemGGmMqGWOaA2gNIC1WxocjpGPWhg26pOgTQkj0kr6IFBhj7gAwD0ASgGkissoYMxFAuojMBjAGwGvGmNFQ981wEREAq4wxHwJYDaAAwO3xjNwBihD9Vq3ieVlCCDkhsOPegYjMhTbQ+m970O/7agC9Ihz7GIDHSmFjsdi3D2jQwG/Dhg26oWbNiMcQQohbcFyP3KysMCV9unYIIQSAA0V/3z6gTh2/DRR9Qgg5jqNEPz8fyM72K+kfPgzs3EnRJ4QQL44S/awsXR4XfYZrEkJIAI4S/ZDeuAzXJISQANwh+gzXJIQQAG4Q/VNPBapXT5hNhBBSnnC+6LOUTwghx3G+6NOfTwghx3Gc6CcnA9WqQWM3d++m6BNCiB+OE/26db1zn2/cqBsp+oQQchxHij4AhmsSQkgYnCv6VkmfDbmEEHIc54r+hg1Aw4ZA1aoJtYkQQsoTzhZ9unYIISQAx4i+CEWfEEKi4RjRP3QIKCjwin5eHpCZCTRpkmizCCGkXOEY0c/LA/r3B9q2hQ6pDHC2LEIICcLWdIknAvXqAXOtCR3/9Io+x9whhJAAHFPSD+AwRZ8QQsJB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBfhbNGvVi2xdhBCSDnDuaJfpQqQlJRoSwghpFzhXNGna4cQQkKg6BNCiItwrujTn08IISE4V/RZ0ieEkBAo+oQQ4iIo+oQQ4iIo+oQQ4iJsib4xpp8xZp0xZqMxZkKY/c8YY5Z5P+uNMQf89hX67ZsdS+MjQtEnhJCwRJ05yxiTBGAKgL4AMgAsMsbMFpHVVhoRGe2XfhSALn6nOCIinWNnsg0o+oQQEhY7Jf3uADaKyGYRyQMwA8DAItJfC2B6LIwrEYWFwJEjFH1CCAmDHdFvCGCb33qGd1sIxpimAJoD+NZvc2VjTLox5jdjzKAIx430pknPzMy0aXoEcnJ0SdEnhJAQ7Ii+CbNNIqQdCuBjESn029ZERFIAXAfgWWNMy5CTiUwVkRQRSalfv74Nk4qAI2wSQkhE7Ih+BoDGfuuNAOyIkHYoglw7IrLDu9wM4HsE+vtjD0WfEEIiYkf0FwFobYxpboxJhgp7SBSOMeZ0ALUB/Oq3rbYxppL3ez0AvQCsDj42plD0CSEkIlGjd0SkwBhzB4B5AJIATBORVcaYiQDSRcTKAK4FMENE/F0/7QC8aozxQDOYJ/2jfuICRZ8QQiISVfQBQETmApgbtO3BoPWHwxz3C4COpbCv+FD0CSEkIs7rkUvRJ4SQiFD0CSHERVD0CSHERVD0CSHERThT9CtWBJKTE20JIYSUO5wp+tWrAyZcR2JCCHE3zhV9QgghIVD0CSHERVD0CSHERThP9HNyKPqEEBIB54k+S/qEEBIRij4hhLgIij4hhLgIij4hhLgIZ4m+CEWfEEKKwFmif/Qo4PFQ9AkhJALOEn0OtkYIIUVC0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBfhTNGvVi2xdhBCSDnFmaJftWpi7SCEkHKK80S/WjWggrNuixBCYoWz1JGDrRFCSJFQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEXYEn1jTD9jzDpjzEZjzIQw+58xxizzftYbYw747Us1xmzwflJjaXwIFH1CCCmSitESGGOSAEwB0BdABoBFxpjZIrLaSiMio/3SjwLQxfu9DoCHAKQAEACLvcfuj+ldAEBenn4o+oQQEhE7Jf3uADaKyGYRyQMwA8DAItJfC2C69/ulAL4WkSyv0H8NoF9pDI5ITo4uKfqEEBIRO6LfEMA2v/UM77YQjDFNATQH8G1xjjXGjDTGpBtj0jMzM+3YHZ5rrgHatSv58YQQ4nCiuncAmDDbJELaoQA+FpHC4hwrIlMBTAWAlJSUSOcumtq1gRkzSnQoIYS4BTsl/QwAjf3WGwHYESHtUPhcO8U9lhBCSJyxI/qLALQ2xjQ3xiRDhX12cCJjzOkAagP41W/zPACXGGNqG2NqA7jEu40QQkgCiOreEZECY8wdULFOAjBNRFYZYyYCSBcRKwO4FsAMERG/Y7OMMY9CMw4AmCgiWbG9BUIIIXYxfhpdLkhJSZH09PREm0EIIScUxpjFIpISLZ2zeuQSQggpEoo+IYS4CIo+IYS4CIo+IYS4iHLXkGuMyQSwtRSnqAdgb4zMOVFw4z0D7rxvN94z4M77Lu49NxWR+tESlTvRLy3GmHQ7LdhOwo33DLjzvt14z4A77zte90z3DiGEuAiKPiGEuAgniv7URBuQANx4z4A779uN9wy4877jcs+O8+kTQgiJjBNL+oQQQiLgGNGPNo+vUzDGNDbGfGeMWWOMWWWMucu7vY4x5mvvXMRfe0c1dRTGmCRjzFJjzBzvenNjzELvPX/gHQXWURhjTjbGfGyMWet95+c4/V0bY0Z7f9srjTHTjTGVnfiujTHTjDF7jDEr/baFfbdGed6rbyuMMV1Lel1HiL7fPL79AbQHcK0xpn1irYobBQDGiEg7AD0A3O691wkA5otIawDzvetO4y4Aa/zWnwLwjPee9wO4KSFWxZfnAPxPRNoC6AS9f8e+a2NMQwB3AkgRkQ7QkX2Hwpnv+i2ETh8b6d32B9Da+xkJ4OWSXtQRoo/iz+N7wiIiO0Vkiff7IagINITe73+9yf4LYFBiLIwPxphGAC4D8Lp33QC4EMDH3iROvOeaAM4D8AYAiEieiByAw981dMj3KsaYigCqAtgJB75rEfkBQPBQ85He7UAAb4vyG4CTjTGnluS6ThF92/P4OgljTDMAXQAsBPAXEdkJaMYAoEHiLIsLzwIYD8DjXa8L4ICIFHjXnfjOWwDIBPCm1631ujGmGhz8rkVkO4DJAP6Eiv1BAIvh/HdtEendxkzjnCL6xZnH1xEYY6oD+ATA3SKSnWh74okx5nIAe0Rksf/mMEmd9s4rAugK4GUR6QIgBw5y5YTD68MeCKA5gNMAVIO6NoJx2ruORsx+704RfVfNxWuMOQkq+O+JyEzv5t1Wdc+73JMo++JALwBXGmO2QF13F0JL/id7XQCAM995BoAMEVnoXf8Ymgk4+V1fDOAPEckUkXwAMwH0hPPftUWkdxszjXOK6Nuax9cJeH3ZbwBYIyL/z2/XbACp3u+pAD4ra9vihYjcJyKNRKQZ9N1+KyLXA/gOwBBvMkfdMwCIyC4A27zzTwPARQBWw8HvGurW6WGMqer9rVv37Oh37UekdzsbwA3eKJ4eAA5abqBiIyKO+AAYAGA9gE0A/p1oe+J4n72h1boVAJZ5PwOgPu75ADZ4l3USbWuc7v98AHO831sASAOwEcBHACol2r443G9nAOne9z0LQG2nv2sAjwBYC2AlgHcAVHLiuwYwHdpukQ8tyd8U6d1C3TtTvPr2OzS6qUTXZY9cQghxEU5x7xBCCLEBRZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlzE/wfVmUdfBOl8tgAAAABJRU5ErkJggg==\n", 2062 | "text/plain": [ 2063 | "
" 2064 | ] 2065 | }, 2066 | "metadata": { 2067 | "needs_background": "light" 2068 | }, 2069 | "output_type": "display_data" 2070 | } 2071 | ], 2072 | "source": [ 2073 | "plt.plot(range(100), hist.history['acc'], 'r', label='Train acc')\n", 2074 | "plt.plot(range(100), hist.history['val_acc'], 'b', label='Test acc')" 2075 | ] 2076 | }, 2077 | { 2078 | "cell_type": "markdown", 2079 | "metadata": {}, 2080 | "source": [ 2081 | "## Polt the training loss and testing loss" 2082 | ] 2083 | }, 2084 | { 2085 | "cell_type": "code", 2086 | "execution_count": 14, 2087 | "metadata": {}, 2088 | "outputs": [ 2089 | { 2090 | "data": { 2091 | "text/plain": [ 2092 | "" 2093 | ] 2094 | }, 2095 | "execution_count": 14, 2096 | "metadata": {}, 2097 | "output_type": "execute_result" 2098 | }, 2099 | { 2100 | "data": { 2101 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAEICAYAAAA+16EyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzsnXeYFEXzx7/FceQjJ+FIkgRB0hGULEFABMQACCgiIComDOCrAoq+hp9ijiCiovBiQBFBkmQkHJJBooQjHuEIwhFu6/dHzbCze5tv4119nmef3enp6a6Znemaqq7uJmaGoiiKosQauSItgKIoiqIEgiowRVEUJSZRBaYoiqLEJKrAFEVRlJhEFZiiKIoSk6gCUxRFUWISVWAxBBGNIaLJkZYjqxBRZSJiIsodQRl8vpZEtIiIBoVaJiXnQERziahvpOWIdbwqMCLaS0QXiOgsEaUR0QoiGkpEPim/cDVWruohomuIaAYRHTL2VfazzL1EdJSIClrSBhHRoqAJHiSIqI1xjh85pS8jogE+lsFEVC0kAgaI5bx+ckqvZ6QvipBoQSdQRUlEk4jolSDKsZeI2nvY34aIbER0zvL5NQj1BvU8ggURzbac52UiumTZ/jSQMpm5IzN/G4AsL1nqTieiK5bttYHIYpTbg4g2e8nzMxE9HWgdocBXC+w2Zk4AUAnA6wBGAPgiZFIFDxuA3wHckYUycgN4PDjiuCdICv5fAPf6q6jDSYDnmQrgJiIqYUm7D8CO4EilBMAhZi5k+dwWaYFC9ZLMzJ3N8wTwLYA3Lec9NFxyGLKMtsjyBIBFFlkahareaMUvFyIzn2bmGQB6AbiPiOoAABHdSkTriOgMER0gojGWw5YY32nGW8KNRFSViP4gohNEdJyIviWiouYBRDSCiA4aVt92ImpnpOciopFEtNs4dhoRFXdXDzMfZeaPAazx/9Jc5f8APG2VzwoRXUdE84jopCHr3ZZ9Dm/URDSAiJZZtpmIHiGinQB2GmnvGdfwDBGtJaKWfsiaBmASgNHuMhDRQCLaRkSniGgOEVUy0s3rt8G4fr2IaDER3WHsb2HI28XYbk9E643fuYjoBSLaR0THiOhrIipi7DMt4weIaD+AP1zIdIfx1l/HjdiXAPwMoLeRPw7A3ZDGxFrOTUS0hohOG983WfZVMc7nLBHNA1DS6dhmJN6FNCLaQERt3F1Dp+PyEtG7JFb+IeN3XmNfGyJKIaKnjOtymIju96VcF/V8T0RHjHNbQkTXG+lDAPQF8CxZLCEiKkdEPxJRKhH9Q0SPWcoaYzw7XxvXYwsRJRn7vgFQEcCvRnnP+imnp2c0kPNw8AqQxUqzXN8RRHQEwJdGelciWk92j9ENluNdti1ZwXgW9hLRfww5xhNRCSKaZVz/U0T0KxGVtxxz1TNC4tVZTETvGDLvIaKOWZCnvlHeKeO/vdWy704i2mGc/34iepCIygGYAqAW2a25gu5rcHsN1hv/6woiamDZ94hR11ki2kVE3Y30ukT0p3HMMSKa4PfJMrPHD4C9ANq7SN8P4CHjdxsAdSEK8QYARwH0MPZVBsAAcluOrQagA4C8AEpBlM+7xr6aAA4AKGc5vqrx+wkAKwEkGsd+BmCKu3os9eU29lV2Sh8JYKa3cwfwE4BXjLRBkLceAChoyHq/UUdDAMcBXG/sXwRgkKW8AQCWWbYZwDwAxQHkN9L6AShhlPcUgCMA8hn7xgCY7EbWNgBSAJQFcAZATSN9GYABxu8eAHYBqGWU/wKAFU7yVLNsvwzgA+P3fwDsBvCGZd97xu+BRrnXAihkXK9vnP6Xr43rld/6XxnXbpe1XjfndROAVUZaFwBznP6L4gBOAehvlNvH2C5h7P8TwDjIfdMKwFnzWgIoD+CEUW4uyL15AkApV/+jk3wvQ+7J0pB7eQWAsRbZrxh54o3yzwMo5qYsT/UMBJBgyP8ugPWWfZNg3J/Gdi4AawGMApDH+F/2ALjFch+lG/LEAXgNwEpvz7zzf+Jmn9tn1N/zcHNPXs1jub5vGOXlhzyDxwA0Nc7tPuN88sJz29ICQJoP7aErGdsbcvzXuN75jXvhduN3Ycgz8YPlGOtzOQjAZePaxAF4FMABH2QZCmC+U1pxSPt7l1HWTQBOQrxncQBOA6hv5C0FoJ6lbdjspb6fATztIr08gHNGGfGGXIchz3tZSJtY2cibCHvbNBvAMAAEoACAm7ydc6a6fbhILm9m4yZ93s0x7wJ4x6kBy6RYLPl7AFhn/K5m3IDtAcQ75dsGoJ1l+xrjj8/tqR64UWC+njuAOsYfXwqOjWYvAEudjvkMwGjj9yJ4V2A3e5HhlOUmGwMvCsz4/SaA/7l4UGYDeMByTC5Ig1rJIo+1sWgHYKPx+3fj3Fca24sB9DR+LwDwsOW4mi7+l2st+820pwFsBZDo4fyt57XTKHsq5G3d+l/0B7Da6dg/jWteEdLAFLTs+w52BTYChsK17J8D4D5X/6NTvt0Auli2bwGw1yL7BTi+vB0D0MxNWW7rccpX1Lh+RYztSXBUYE0B7Hc65jkAX1ruo/mWfbUBXHC+7738JzaIxW9+7vb2jPp7Hm7uyat5DDkuwXjBM9I+gfECYUnbDqA1PLQtvn7cyNge8kKQx8NxSQBSLdvOCuxvy77CxnmX9CKLKwX2IIBfndL+B+kGyQVRZn1heRaMPFlRYI8CmOuUts0oszSkDbsVQF6nPDMAvA2gbCD/BTNnKQqxvHExQERNiWihYS6fNi5sSXcHElFpIppqmPJnAEw28zPzLshb3BgAx4x85YxDKwGYbpjZacZFygBQJgvn4RVm3gxgJsRis1IJQFNTHkOmvpC3Dl85YN0w3E3bDLM6DUAReLiWbngDwC1EVM+FvO9ZZD0Jefsp71yAwZ8AahBRGQD1IVZUBSIqCaAJ7G7bcgD2WY7bB1Fe1v/F4TwNngHwETOn+Hhe30De2NoCmO60z1kGU47yxr5TzPyv0z6TSgDucvofW0AaX2+4Ovdylu0TzHzFsn0eYqX6DBHFEdHrhlvuDETBAO7vi0oAyjmdz3/g+H8ccZIpH/nXd3OImYtaPtMsdbt8RgM4D19IZeZ0y3YlAE85nXsFiNXlqW3JKkeZ+ZK5QUQFiWiC4To7A3GdezpP5/8DAAoZblLTrbfBBzkqAejodP63AriGmW0AukPaqIMkXR/1/TlJN7h99pj5GMTL8jSAo0Q0nYiqGHmGQa7JBsP92MvfigNSYETUGNIwmP0530G0aQVmLgLgU0jDCMibhDOvGek3MHNhiNvMzA9m/o6ZW0D+DIY0yIA0gp2dHpx8zHzQTT3BZDSAwXBs7A8AWOwkTyFmfsjY/y/ENDZxpdiuyk3S3zUC0r9TjJmLQiw/cnGcW5j5BMQKHuu06wCAB53kzc/MK9yUcx7iinoc8nZ2CeIiGw5gNzMfN7IegvxXJqbFc9TVeVroCOAFMvrZfOAbAA8DmGXIZsVZBlOOgxB3RjEnv35Fy+8DEAvMel0KMvPrPsjk6twP+XCcP9wDaXjaQ15oKhvp7p6xAwD+cTqfBGbu4mN9WXmWPD2j/p4HII25p2fI1bm/6lR/AWaeAnhsW7KKsxzPAqgCoInRxt0cUKHMi9gepOH8QuqKAwBmuGiTRhrlLTXugzKQ9vsbN/L7g6dnD8z8MzO3hbSdqQDeN9L3M/N9kP/0GQCTjZdln/FLgRFRYSLqCnHhTGbmTcauBAAnmTmdiJpAblSTVIi74VpLWgLEZ5pmdGw+Y6mjJhHdTNIRng5xwWQYuz8F8CrZAw9KmR2CbuoBEeWD+L8BIK+x7TfG29v/ADxmSZ4JsVD6E1G88WlMRLWM/esB9CSiAiQd0Q94qSYB0vCnAshNRKMg7oRAGAfxf9eypH0K4Dmyd5wXIaK7LPuPwun6QVyFw4xvQNxc1m1AOoCfJAmUKATpC/ifk+Xhii0AOgH4iIi6eTshZv4H4gp63sXuWZD/4h4iym28zdWG9HHuA5AM4CUiykNELQBYo+YmA7iNiG4xrIR8xptvojeZIOf+gnEvloT0O2VlrF5uo37zEw+5Ly5C+uUKQK6vFef/bTWAMyQBC/mNc6pjvHj6gqv7wFc8PaP+ngcgz9A9xjl0gvz/nhgPYKjhFSLDErqViBK8tC3BJgGifE+RRM+OClE9znwPoDkR3W48B3lIgpuqms87ESVAXK9nYT//owDKElF+L+W7uj+nQ6KEbzPqHAzpi5tPRJWIqJNR7kXIS30GABBRbyIqy+JPPG2U763NcMSbjxFi5l8wTvY0xK30CIA4S547ISbjWUij/iEsfTWQTuxUiK+8GYDrIW/25yA36FOw93PcAHkAz0JcXDNh73TNBXn7327s3w3gv+7qYbsP3eFjyf8fALO9nHt7y3YFyI2/yJJWE8BvRr0nIK4Cs5O0JIC5hqzLIa4L5z4wq38/DjI84QzEanjWKgN87AOzpD1r1DHAktYfwCajjgMAJlr2mZ2v1j6NW4wyWhvbdYztXpbjckEe0APGdZgMI1ABroN4HNIg/QNHIW/uXs/Lsu9qH5ix3QJyX502vltY9l0LYCnknpuHzPdoU4hSPmmcw28AKhr7FsF9H1g+yBvlYePzPuxBN67+k6v/p4uyFiHz/ToZ4nL8xbiP9gG413rvAKgOeY7SAPxspJmRZUcgfRAr3d1HLv6P7pAgrTS47vPw9J+4fUYDPI8kyIvOWYi1MAWOfWCZ5IC8FK0xyjkMadQT4LltaQngnA/t4SS47gPb65SWCHGxnzOuxUNwbHuc+8Cs97FPffZw0QfG9jZ0HqQ9Og5pg2pC+hwXGNflNMSb0sjyv00zrksanPrIjDw/I/P9OdPY1xHABth1hFluVaOeM5D7cC6AKsa+jyH3p3mN7vF2/Z0/ZBSkKIqiKDGFTiWlKIqixCSqwBRFUZSYRBWYoiiKEpOoAlMURVFikogtZxFMSpYsyZUrV460GIqiKDHD2rVrjzNzqUjLkRWyhQKrXLkykpOTIy2GoihKzEBEzrNnxBzqQlQURVFiElVgiqIoSkyiCkxRFEWJSbJFH5iiKLHL5cuXkZKSgvT0dO+ZFb/Jly8fEhMTER8fH2lRgo4qMEVRIkpKSgoSEhJQuXJlEPm18ILiBWbGiRMnkJKSgipVqng/IMZQF6KiKBElPT0dJUqUUOUVAogIJUqUyLbWrSowRVEijiqv0JGdr23OVmBvvQX8/HOkpVAURVECIGcrsPffB375JdJSKIoSQU6cOIH69eujfv36KFu2LMqXL391+9KlSz6Vcf/992P79u0B1T9//nz06NEjoGNzOjk7iKNAAeC888r0iqLkJEqUKIH169cDAMaMGYNChQrh6aefdshjLqCYK5frd/4vv/wy5HIqmcnZFliBAsCFC5GWQlGUKGTXrl2oU6cOhg4dioYNG+Lw4cMYMmQIkpKScP311+Pll1++mrdFixZYv349rly5gqJFi2LkyJGoV68ebrzxRhw7diyg+ufNm4f69eujbt26GDx48FVr8JlnnkHt2rVxww03YMSIEQCAqVOnok6dOqhXrx7atm2b9ZOPEXK2BZY/v1pgihJNPPEEYFhDQaN+feDddwM6dOvWrfjyyy/x6aefAgBef/11FC9eHFeuXEHbtm1x5513onbt2g7HnD59Gq1bt8brr7+O4cOHY+LEiRg5cqRf9Z4/fx4DBw7EokWLULVqVfTt2xeff/457rrrLsyaNQtbtmwBESEtLQ0A8NJLL2HRokUoU6bM1bScgFpgqsAURXFD1apV0bhx46vbU6ZMQcOGDdGwYUNs27YNW7duzXRM/vz50blzZwBAo0aNsHfvXr/r3bZtG6pXr46qVasCAO69914sWbIExYsXR65cuTB48GBMnz4dBQsWBAA0b94c9957LyZMmACbzRbAmcYmOdsCK1AASE2NtBSKopgEaCmFClNBAMDOnTvx3nvvYfXq1ShatCj69evncnxVnjx5rv6Oi4vDlStX/K6XmV2mx8fHIzk5GfPmzcPUqVPxySefYO7cuRg/fjxWrVqFmTNnol69eti4cSOKFSvmd72xhlpg2gemKIoPnDlzBgkJCShcuDAOHz6MOXPmhKyu2rVrY+fOndizZw8AYPLkyWjdujXOnj2LM2fOoGvXrnjnnXewbt06AMCePXvQrFkzjB07FsWKFcPBgwdDJls0oRaYuhAVRfGBhg0bonbt2qhTpw6uvfZaNG/ePGhlz5kzB4mJiVe3p0+fji+++AI9e/ZERkYGmjZtisGDB+PYsWPo2bMnLl68CJvNhnHjxgEAnnzySfzzzz9gZnTs2BF16tQJmmzRDLkzVWOJpKQkDmhBy2HDgClTgBMngi+Uoig+sW3bNtSqVSvSYmRrXF1jIlrLzEkREikohNWFSEQTiegYEW12s5+I6H0i2kVEG4moYUgFUgtMURQlZgl3H9gkAJ087O8MoLrxGQLgk5BKU6AAkJ4O5KCoHUVRlOxCWBUYMy8BcNJDlu4AvmZhJYCiRHRNyAQqUEC+s+lMzYqiKNmZaItCLA/ggGU7xUjLBBENIaJkIkpODTQUPn9++VY3oqIoSswRbQrM1bz/LqNMmPlzZk5i5qRSpUoFVptpgakCUxRFiTmiTYGlAKhg2U4EcChktakCUxRFiVmiTYHNAHCvEY3YDMBpZj4cstpUgSlKjicYy6kAwMSJE3HkyBGX+/r164efde3BoBPWgcxENAVAGwAliSgFwGgA8QDAzJ8CmAWgC4BdAM4DuD+kApkKTGfjUJQciy/LqfjCxIkT0bBhQ5QtWzbYIipuCHcUYh9mvoaZ45k5kZm/YOZPDeUFI/rwEWauysx1mTmA0cl+oEEciqJ44KuvvkKTJk1Qv359PPzww7DZbLhy5Qr69++PunXrok6dOnj//ffxv//9D+vXr0evXr18ttxsNhuGDx+OOnXqoG7duvjhhx8AAAcPHkSLFi1Qv3591KlTBytWrHBZp6JTScm3KjBFiQqiaTWVzZs3Y/r06VixYgVy586NIUOGYOrUqahatSqOHz+OTZs2AQDS0tJQtGhRfPDBB/jwww9Rv359n8r//vvvsXXrVmzYsAGpqalo3LgxWrVqhcmTJ+O2227DiBEjkJGRgQsXLmDt2rWZ6lRUgcm3KjBFUZyYP38+1qxZg6QkmW3pwoULqFChAm655RZs374djz/+OLp06YKOHTsGVP6yZctwzz33IC4uDmXLlkWLFi2QnJyMxo0b48EHH0R6ejp69OiBevXqoVq1akGpM7uhCgxQBaYoUUI0rabCzBg4cCDGjh2bad/GjRsxe/ZsvP/++/jxxx/x+eefB1S+K26++WYsWrQIv/32G/r27YvnnnsOffv2DUqd2Y1oi0IMLxrEoSiKG9q3b49p06bh+PHjACRacf/+/UhNTQUz46677sJLL72Ev/76CwCQkJCAs2fP+lx+q1atMHXqVGRkZODo0aNYvnw5kpKSsG/fPpQtWxZDhgzBgAEDsG7dOrd15nRytgWmQRyKorihbt26GD16NNq3bw+bzYb4+Hh8+umniIuLwwMPPABmBhHhjTfeAADcf//9GDRoEPLnz4/Vq1c7LGwJAIMGDcKwYcMAAFWqVMHixYuxcuVK1KtXD0SEcePGoXTp0pg4cSLGjRuH+Ph4FCpUCJMnT8aBAwdc1pnTydnLqdhsQFwcMHo0MGZM0OVSFMU7upxK6NHlVLIjuXIB+fKpBaYoihKD5GwFBkg/mPaBKYqixByqwHRRS0WJONmhKyNayc7XVhVY/vyqwBQlguTLlw8nTpzI1g1tpGBmnDhxAvny5Yu0KCEhZ0chAmqBKUqESUxMREpKCgJe10/xSL58+ZCYmBhpMUKCKjBVYIoSUeLj41GlSpVIi6HEIOpC1CAORVGUmEQVmFpgiqIoMYkqMA3iUBRFiUlUgakFpiiKEpOEVYERUSci2k5Eu4hopIv9lYhoARFtJKJFRBT60BlVYIqiKDFJ2BQYEcUB+AhAZwC1AfQhotpO2d4C8DUz3wDgZQCvhVwwDeJQFEWJScJpgTUBsIuZ9zDzJQBTAXR3ylMbwALj90IX+4OPaYHpIEpFUZSYIpwKrDyAA5btFCPNygYAdxi/bweQQEQlQiqVuaRKenpIq1EURVGCSzgVGLlIczZ7ngbQmojWAWgN4CCAKy4LIxpCRMlElJylEfy6KrOiKEpMEk4FlgKggmU7EcAhawZmPsTMPZm5AYDnjbTTrgpj5s+ZOYmZk0qVKhW4VLoqs6IoSkwSTgW2BkB1IqpCRHkA9AYww5qBiEoSkSnTcwAmhlwqtcAURVFikrApMGa+AmAYgDkAtgGYxsxbiOhlIupmZGsDYDsR7QBQBsCrIRfM7ANTBaYoihJThHUyX2aeBWCWU9ooy+8fAPwQTpnUAlMURYlNdCYOVWCKoigxiSowDeJQFEWJSVSBqQWmKIoSk6gC0yAORVGUmEQVmFpgiqIoMYkqMFVgiqIoMYkqMNOFqEEciqIoMYUqsLg4IG9etcAURVFiDFVggFhhqsAURVFiClVggK7KrCiKEoOoAgNUgSmKosQgqsAAUWAaxKEoihJTqAID1AJTFEWJQVSBARrEoSiKEoOoAgPUAlMURYlBVIEB2gemKIoSg6gCA9QCUxRFiUHCrsCIqBMRbSeiXUQ00sX+ikS0kIjWEdFGIuoScqFUgSmKosQcYVVgRBQH4CMAnQHUBtCHiGo7ZXsBwDRmbgCgN4CPQy6YBnEoiqLEHOG2wJoA2MXMe5j5EoCpALo75WEAhY3fRQAcCrlUpgXGHPKqFEVRlOAQbgVWHsABy3aKkWZlDIB+RJQCYBaAR10VRERDiCiZiJJTU1OzJlWBAqK8Ll3KWjmKoihK2Ai3AiMXac5mTx8Ak5g5EUAXAN8QUSY5mflzZk5i5qRSpUplTSpdE0xRFCXmCLcCSwFQwbKdiMwuwgcATAMAZv4TQD4AJUMqlSowRVGUmCPcCmwNgOpEVIWI8kCCNGY45dkPoB0AEFEtiALLoo/QC+ailqrAFEVRYoawKjBmvgJgGIA5ALZBog23ENHLRNTNyPYUgMFEtAHAFAADmEMcXaEWmKIoSsyRO9wVMvMsSHCGNW2U5fdWAM3DKpSpwHQ2DkVRlJhBZ+IA1AJTFEWJQVSBAdoHpiiKEoOoAgPUAlMURYlBVIEB2gemKIoSg6gCA9QCUxRFiUFUgQGqwBRFUWIQVWCABnEoiqLEIKrAACB3biA+XhWYoihKDKEKzKRAAQ3iUBRFiSFUgZnoqsyKoigxhSowE1VgiqIoMYUqMJMCBYBz5yIthaIoiuIjqsBMypcHUlIiLYWiKIriI6rATCpXBvbujbQUiqIoio+oAjOpVAk4cULdiIqiKDGCKjCTypXle9++iIqhKIqi+EZYFRgRdSKi7US0i4hGutj/DhGtNz47iCgtbMKZCkzdiIqiKDFB2FZkJqI4AB8B6AAgBcAaIpphrMAMAGDmJy35HwXQIFzyqQJTFCWm2L4dSE0FbrwRiIuLtDQRIZwWWBMAu5h5DzNfAjAVQHcP+fsAmBIWyQCgTBkgXz5VYIqixAYTJgDt2wPMkZYkYoRTgZUHcMCynWKkZYKIKgGoAuAPd4UR0RAiSiai5NTU1KxLRwRUrKh9YIqixAYbNwK1a8tcrjmUcCowcpHm7tWhN4AfmDnDXWHM/DkzJzFzUqlSpYIioIbSK4oSM2zaBNxwQ6SliCjhVGApACpYthMBHHKTtzfC6T40UQWmKEoscPw4cPgwULdupCWJKOFUYGsAVCeiKkSUB6KkZjhnIqKaAIoB+DOMsgmVK0un6L//hr1qRVEUn9m0Sb7VAgsPzHwFwDAAcwBsAzCNmbcQ0ctE1M2StQ+AqcwR6JnUsWCKosQCGzfKdw63wMLa+8fMswDMckob5bQ9JpwyOVCpknzv2yedo4qiKNHIxo1AqVISPZ2D0Zk4rOhYMEVRYgEzgINcxcblHFSBWSlbFsiTRxWYoijRS0YGsHlzjncfAqrAHMmVS9yIqsAURYlW9uwBLlzI8QEcgCqwzGgovaIo0YwGcFxFFZgzlSppFKKiKNHLxo3iLdJAM1VgmahcGTh6VEx0RVGUaGPTJqBaNaBAgUhLEnFytALr3Rt4+22nRB0LpihKNLNxo/Z/GeRoBbZhA7BypVOihtIrihKtnDsnQRza/wUghyuwChWAAwecElWBKYoSrWzZIsunqAUGIIcrsMREICXFKfGaa4D4eHUhKooSfZhzIKoFBiCHK7AKFWRC5ytXLIm5csm6YGqBKYoSTRw9Cnz5JVCoEFClSqSliQpytAJLTARsNuCQ86Iu114ry3UrihIeNm0CevQAzp+PtCTRyW+/idX111/ABx/Ii7aSsxVYBWN1skxuxEaN5IFKTw+7TIqSI5k7F/jlF2DZskhLEn289x7Qtat0byQnAwMGRFqiqCFHK7DERPnOFMjRuLH4FTdsCLtMipIjOXhQvpcsiawc0chnnwE33QSsWgVcf32kpYkqcrQCc2uBNW4s32vWhFUeRcmxmAps8eLIyhFtHDsGbNsGdOsG5MsXaWmijhytwAoXlv7QTBZYYqLMTL96dUTkUpQch6nAVq/WWXCsLF0q361aRVaOKCWsCoyIOhHRdiLaRUQj3eS5m4i2EtEWIvoutPKIFZbJAiMSK0wtMEUJDwcPAiVKAJcu6YujlSVLgPz5pV9eyUTYFBgRxQH4CEBnALUB9CGi2k55qgN4DkBzZr4ewBOhlisx0YUFBogC274dOH061CIoSs6GWUKB77hDXh61H8zOkiXS/5UnT6QliUrCaYE1AbCLmfcw8yUAUwF0d8ozGMBHzHwKAJj5WKiFcjkbBwA0aSIP1tq1oRZBUXI2J06I5XX99TLDhCowIS1NAsnUfeiWcCqw8gCsqiLFSLNSA0ANIlpORCuJqJO7wohoCBElE1FyampqwEJVqAAcOQJcvuy0IylJvtWNqCihxez/KldOGusVK1w8kDmQZcvkJVoVmFvCqcDIRRo7becGUB1AGwB9AEwgoqKuCmPmz5k5iZmTSpUqFbBQiYl2D4YDJUrIgGYivRR/AAAgAElEQVRVYIoSWkwFVr68NNbnz8uA3ZzOkiXiOmzaNNKSRC3hVGApACpYthMBOKuNFAC/MPNlZv4HwHaIQgsZbkPpAXEjaoeyooQWqwJr2VJ+R6Mb8eLF8Na3ZIm0Qfnzh7feGCKcCmwNgOpEVIWI8gDoDWCGU56fAbQFACIqCXEp7gmlUG4HMwMSyHHggMxBpihKaDAV2DXXAGXKADVrRpcCYwaefFLkO3w4PHWeOyezbqj70CNhU2DMfAXAMABzAGwDMI2ZtxDRy0TUzcg2B8AJItoKYCGAZ5j5RCjl8miB6YBmRQk9hw4BpUvLKhAA0Lq1jH/KyIisXCZjxwLvvgucOgVMmRKeOv/8U86/devw1BejhHUcGDPPYuYazFyVmV810kYx8wzjNzPzcGauzcx1mXlqqGUqXBhISHBjgTVsKJNmqhtRUULHwYPiPjRp2VKGr2zeHJ76L11yP4nwxx8Do0cD990ngV3ffBMemZYsAeLigBtvDE99MUqOnonDxG0ofcGCEtqrFpgSKdLTpYHNzjgrMNPzEa5AjmHDZKyVM3PmyL5u3YAJE4D+/YH16wNTrBs2+D45+KFDwNdfi8JMSPC/rhyEKjC4WdjS5KabgOXLs38jokQnt90GDB4caSlCi7MCq15d5nhbty70dWdkAD/9JArG7IszmTRJ+uSmTgVy5wZ69xaraPJk12WdOwcMGSLjcqycPCnK6M03vctz4gTQoYMc88EHAZ1STkIVGDxYYADQuTNw9qwu86CEn4wMue+yswv74kXg+HFHBZYrF1CvXngssORkURqA40TCzMDChUC7dvYowNKlgVtuAb79VhYSdOb334Hx44Hp0x3T16+X1S1+/dWzLGfOAJ06Abt3AzNm2C1RxS2qwCAW2NGjboysdu1kLMasWWGXS8nh7N4tbqfdu52WDc9GmFF95co5pjdsKA2/K0URTGbPlumrChZ0VGB//y2NQtu2jvn79xd3jatZ883IyfXrHdPNZZmSkz1HNPfrJ1bn999nrldxiSowiAXmcjAzIK6M1q1lRVRFCSebNsn35cvA/v2RlSVUWMeAWWnQAPj3X2DnztDWP3u2jLVq3dpRKS1cKN9t2jjm79ZN+qVcBXOYCszZ9blhg7ggAbHSXLFmjVhoY8eK21jxCVVgsI8Fc9sPduut8ka2J6RD0hTFkY0b7b937Ahv3StWiDvrl19CW487BdawoXyHsh8sNVUUR+fOosC2b7f3Xy1cKG+2117reEyBAjLp8A8/OEYupqXJ/5U3r7x4WC3mDRuAm2+WcWSzZ7uW5Y03gCJFgEceCe45ZnNUgcE+FsxtP1iXLvKtbkQlnGzaJFOaAaG3RExOnAAGDQKaNwfmzQN69fLc/5uaKn3EgeJOgdWuLa77UPaDzZsnrhdTgQFiRdlswKJF4sYjFzPg9esn5zxzpj1t+XIpq29fcfv+/bekX74MbN0K1K8vLwRz5mR2B2/fLoEkDz8s43oUn1EFBh8UWPXqQLVqqsCU8LJpk7iwEhLCo8AOHRLFMWkS8PTT4nGoVEncZmaDbMWcaLZzZ/lt5Z13gKFDvYeOHzwoVkuxYo7p8fFA3bqhVWCzZ8sLQqNGYvGZ/WBbtkhgibt+qDZtxJr6zrJc4dKlIvNDD8m22Q/299/SuV6vnrwIp6UBK1c6lvfWW6KsH3886KeY3VEFBmkfChf24EIExI24cKH7AY+KEkz+/VeCN264QV6gnF2IaWmZw76zyujRMtvE6tXA//2fKK/ff5eGuVOnzOHhmzZJA718uUTmmaxdKwrws8+k0T5zxn2dhw6J9eXK0mnQQFyIzsoxEM6ckQCMhx4SpWqziTV0yy0SGh8fL1bn4sX2/i93CiwuTkLqZ82ScHdALLekJLG08uWzuz7NAI569SQ8Pi7O0Y14+LCM+RowQEL2Fb9QBWZQoQKwb5+HDF26yI1v3tyKEkq2bJGGu25doEaNzBbYQw8BLVr43rivXi1h2e7e0rZuBSZOlD4Ys/8JAKpUkYb66FHg+ecdj/npJ1E8deoAzz4rbrUrV2TcWunSMovF0qWiCI65WdrPeQyYlYYNRUFkNYBlxw6Z0X3KFODTT8Vd+Ouv4v7s3Nmer3Vrue7ffy/nXamS+zL79hX34I8/ykttcrLMIJI7t/xnVgWWN6/M71ikiPxnVk/OO+/INXv66aydY06FmWP+06hRI84q3bsz167tIUN6OnOBAswPPZTluhTFKxMmMAPMO3cyv/gic65czBcvyj6bjbl0adm/bp1v5d1zj+Tv3dv1/m7dmAsXZk5Ndb1/0CC5/0+ftqfVrcvcqhXzypVS9ogRzP/3f/L7++8lz8yZzPnySV5TfitVq7qX6c8/pazp0zPvs9kkfc+ezOkLFjBPmsT85ZfM774r51WyJPPChcw//cRcsCAzkXyOHbMfu2yZ1AcwDxzoWiZrPTVqMLdpw/zHH3LMzJmyb8gQ5qJFJU+HDswNG9qPe/11ybtgAXPPnp7/kxADIJmjoP3OyifiAgTjEwwF9swzzHnyMF+54iFTt27MFSowZ2RkuT5F8chjj4nCyMhg/uYbeVS3bZN9O3bYG9qxY72Xdfo0c/78zCVKyDGLFjnuX7JE0v/7X/dlrFoleT75RLZ37pTtd9+V7fvuY46Pl3q6dZPG22TGDMn76quOZdpsotyGD3dd57//iuJ+8cXM+0ylkTs389ChzPv3M//4I3ODBvZrY34aNGDeu9d+7MaNzFWqMN98s2OZFy+K/IBcc2+89JIowcGD5fvUKUn/5BMpY+9eedG4/37Huk25ChViHjOG+dw573WFAFVgUfIJhgIbP16uhvMLnQNTpkimefOyXJ8SY1gb5HDQti1zkyby27RwZsyQ7YkTZbtsWXseT3zxheT/4w/mSpXEGrp8WfZducLcrBlz+fKiMNxhszHXq2e3Jt58095IMzMfOsSckCCN8v79mY+/4w5RVrt329NOnJAy3n7bfb21azN37Zo5/ZZbmMuUYX74YVGcplKoXl0sr9275WHeu9f1W+nly8znz2dOb9dOyklJcS+TianEieTamJj/16efOip5ZrmOffowP/4489Gj3usIIarAouQTDAW2eLFcjd9/95DpwgXm4sWZe/XKcn1KDGCzyQ3RuDHztde6doGFqt4SJZgfeEC2zYb+rbdke+BAuQ9feknSjxzxXF6rVuLustnEhWY2qj/8wHz99bL9xRfe5frwQ8mbnCxKz/m5W7pU3HCuOHBAlFunTvaXgU2bpLypU93X2a8fc7lyjmnr1rGDxbh3L/N//sP87bdeXCg+8P339uvuC02aiCyPPmpPMy3HZs1k38KFWZMpRKgCi5JPMBTY4cNyNd5/30vGxx8XX6O7voLszrx50qAGwn//y3zNNcwtWjAPGOC54Yo0f/8tcgJ219sPP4Sn7kOHpL733rOnlSjB/OCD8rtGDebbbrM35BMnui9rzx7J88orsm32y5gWy3XXyf/gi4V56pS42Lp2dSzTV955hx36x37/XbaXLnV/zLhxmZX0PfeIMjRddpHk3XdFvmnTHNNr1bJf45MnIyObF1SBRcknGArMZhMPyLBhXjKaPux33slynTGH2Rj60u/iihYtxO3VqhVzsWLSGGb1jTlU9Owpnf8ffSSupvLlmbt0CU/dc+bwVZefSbNm4lY8elT2vfGG3LSJiSKrO8x+mn377Gk7d8q5fP21/9f/vvvsDfPWrf4de/kyc/36zHnzilXbvDl79dubgRzduklf3j//MMfFMT/1lH91h4qzZ+XF7MIFx3QzaKZChcjI5QPZQYGFNYyeiDoR0XYi2kVEI13sH0BEqUS03vgMCp9sEq3sdcaeunVl7rQvvgjO+JRYwpxWKNAptXbvlrDlxYtlaYkLF7yMXYggq1fL2L+HH5bZyO+7T8ZEBXvslSvMORDr1rWnmaH0K1bIdosWctN27QrMnSuzujvDLGOM2rYFKla0p1erJnN79u8v45L8YcgQ+b7uOqBWLf+OzZ1bQu8fekgGX/79twwIdp7I10qzZsB774m8TZoATz0ls9U/+aR/dYeKQoWA556TsV9WGjSQ73r1QlZ1Tmt+XBE2BUZEcQA+AtAZQG0AfYiotous/2Pm+sZnQrjkA3xUYIBMtbN5s32Zi7VrZYxJdr+jzGUiAlE6//4rgzarVpVts/Hbti04sgWTI0dkvFRSkj3t/vtl8OvXX4e+/o0bpWEvWdKeVr26yDR3rowratRI0rt2lXWoXM2Ovny5vDTce2/wZLvxRuD22wOfNaJKFRn7tGCBjMNKSZHz8cRjj0n+kydFAfbr537sWLRQv758h1CBTZ4sw1NPnQpZFVFPOC2wJgB2MfMeZr4EYCqA7mGs3ys1akjb7HXh1N69ZdqZ556TN+GkJOCee4A//giLnBEhNdU+J14gCsy02mJBga1dK9/W9ZiqVZNpkyZOdP+i4soKsnL5slgQkyd7Xh5l40ZH6wuQmxOQF6XGje2N/s03i4VonZfP5KOPxNK54w7PcvkDkSiRoUODU1YuH5ug1q1lsPCwYcBLL2W97lDTtKn8TyGaWT49HXjhBRlfXqRISKqICcKpwMoDsM42mGKkOXMHEW0koh+IqEJ4RBNq1JC2afduLxkTEkSJLVwoU+G8/ba8EY4Zk32tsJkzxQJp314mjfR3nSbzopoKrHhxma0hGhXYmjXSuJpuIJMHHgB27XI9ue3bbwNFi8oEse6YORMYN05cd9ddJ25oZ0V24IDMo9eypWN69erynZYmL00m+fPLfzJ9uqMC3btXZpR48EFxc2UHKlaUVYorhLVZCIyEBPHQNG0akuI//lgmKHnjDd/fAbIj4Tx1F5Odwbm1/xVAZWa+AcB8AF+5LYxoCBElE1FyampqUAQ0X3J9ciO+9ZYosJ07geHDgf/8Rxq2BQuCIkvU8fPP0nD06CGTkzrPi+cNU4FVq2ZPq1UrOhVYcrLI5tzw33GHNEwTJzqmT54sUwFlZIhycrdo4cSJ4hr88UdRdoMGAa+84phn2jT57t3bMd163awKDJDpn1JSRDmavPuuKOHHHvN8rkrMkZYGvPoq0LGjvLvkaMIVLQLgRgBzLNvPAXjOQ/44AKd9KTsYUYjMzGlpEjj0+usBHJyeLpFqzZuHf9BrqDl3TgahPvqoTJcDMK9Y4V8ZDz0kkYdWhg61T7kTLdhsMkD2vvtc7x88WAbOPvKIzIgxZ47MBtG2rYyPypdPxjo5z9Zy6JBEz40caa+na1eZ4sgawdaoEXNSkuu6r7mG3YZl9+ghM3fs2yf7CxZk7t/f79NXop+RI9mvWcTcAY1C9Is1AKoTURUiygOgN4AZ1gxEdI1lsxuAsL6eFykiE0IHtHZg3rxihS1fnv2ssDlzxOl+++32CU79nWB19267+9CkVi15nfS0zHqo+eknx0UbU1JEHmsAh5VXXpH+zvHjZYLW226TJUimT5fAinHjJFrxnXccj/vmG7HQ7r9ftk3r6PhxcfUBYs2vXQv06eO67tq1ZXZ656VHAKnPZpM+ts8+k6CZp57y71ooUc/Bg2Jc9+1rjxPJ0YRTWwLoAmAHgN0AnjfSXgbQzfj9GoAtADYAWAjgOl/KDZYFxszcsqUMVwoI0wpr1kxGRmcX+vcX6+nyZeYzZ/jqOCR/qFo18wwm8+bx1YlNI8GFC2IBFi9un1bInKnizz89H3v4MPOoUWJtHTxoT7fZmG+/Xaw0c85Bm425Zs3MN5aZbk4HZY7ZcjeN0Z49YvW54+WX+eocex06eJZfiUkefVTmUfjnn6yXhWxggUVcgGB8gqnAHnhA5t8MGHNSRUBG4w8f7nrOtVBz4YLj4NVAuXRJlNe999rTihWTOeh85fJlcbP95z+O6Skpcp0+/DDrcgaCObclILO/M4uMuXNn7T87eVL++4QE5jVrxN3qbsaMDz6QfatWyawYrVoFXu+FC/KiADDPnRt4OUpUcvGiTMjSp09wyssOCiwHx6+4pkYNWbro9OkACxg0SIIA3nhDoqbGjQM+/DBzvn/+CV3E4sWLQLt2EumW1YHC06fLQJO777anVazoX7n790u0nbMLsVw5CYqIVCDHl1/Kudxwg0S3Mct/V6eORPcFSrFiEo1YooQsmDh6tAy7uOuuzHnvvVeCRYYNk4G97tyHvpAvn7gqR4zQ3v1syJw5wIkTMgxOMYi0Bg3GJ5gW2PTp8gK7Zk2QCuzYUTrqz561p5lTBX3+eZAqsWCziRkJiK/h7ruzVl7z5vJWbw1K6NaNuU4d38uYO5ddLuPBLO4z52UtwsG+feKuGzXKbjUvWiTuxEGDglPHrl32wAvrkhrOPPIIX10aJKfOsal45e67mUuVEqdIMIBaYNkPv0LpfWHMGOmo//hj2T53zj4dz3vvBd8K+/BDGV/0wgsSVDJtmix3Hghr10pQyrBhjoNNKlUSC8xX2Z3HgFlxDqX/80+xVDwNjTh3TqwVbyPOMzLc7/vqK5F/wAAJyiheXELhT550H8DhL1WriiXWrp3ngIphw+S7QwfH2TcUxeD0aWDGDBldER8faWmiiEhr0GB8gmmBpafLi/no0UErUtYuMq2wJ56Qt+3BgznThK1ZZf58CdXu3l0spn//lclE69cPbNLce++VcOy0NMf0t97iTOHcqanu+42eekomcHW1EKi5Qm1amliP5vIUDRs6rv67cqVcx/Ll+Wq/Ve7ccm5Dhsjs8VYWLJAOA1d/ZEZG5gUNn33WXm5yssfLEhImTGDesCH89SoxgbkE3MqVwSsT2cACi7gAwfgEU4ExS9sWrI5SZrbPqN2rl2jHhx6Sxr5ECc8zifvDhg0ye3qdOhIpaDJ1KgfkrjxyRFyQjzySed/330uZ69fLts0mF83dufToIQsTuuKXX/hq1N/8+fK7b19RTm3aiBJ+7TXZLl9elOqrrzJ/9ZUEXHTsKFF3+fLJ8iMZGaIMcucW+XPnlnWnrCxcKPVMnmxP27tX1nDKkyd8634pio/cfDNztWrBHTKpCixKPsFWYF26+NfF4xOdOsnlTky0WxYjRkijmdVowX37ZNG/8uUzr4Zrs8nYgFKlMltSnjBDsp0tG2bm1atl3y+/yPbmzezReqlbV9avcsWOHXw1Qq9tW+kzSk8X5QKI3IB0ALhb/+nQIeZbb+Wr61sBoth27ZI+rZYt7U++zcZ8112i7J1XIO7fX/4nRYkiDhyQ994xY4JbriqwKPkEW4GNHi16xRp3kWXWrJEZHmbNsqf9849U5BxebuLK5ebMiRMSsl2kSGZLw2TtWnkCnn3WfTn//CNju156ST5lyrhvzI8cYYfVP81FBxMSMi//brOJG/KJJ1yXdfmyuBdbtpQyrMvLf/SRKKDx472/etpskq9IEQnxv3xZ0idMkHK//FKs3n79ZHvEiMxlZGRE16wgisLMb74pt+zOncEtVxVYlHyCrcB++02uzOLFQS3WtULq3j3zdELMsnBh1arMrVszb9vmuG/7dlEaXbuK0siTx3WEn5UBAyTf7t2O6Rs2iMsuLo6vWlFm/5K7/jmbTVx25qKCnTqJ5fPqq3x1TJOJudT1Bx+4l61uXclTooRMW+Vclz84X+OMDOabbpJrnJTEVxfkVEWlxAi33hoCjxBnDwWmUYguMFfRMJf7Chqupo0eNkyiFF991R7Vd/myjLs6eFCW1qhXT5aQ+OILoHlzmcJo+HBg+3aJoFu4UJab8MQrr8iCgiONdURtNolUrFdPJup9/HEZr5WRIZ+LF2UhRFcQ2ceCXbwoa1F16AA8+qiMfRozxp7XUwSiibm0yhNPyHgp57r8wfka58oFfPKJjGXbvl2mjXrhBf/LVZQIcfAgULlypKWIUiKtQYPxCbYFxiwxCXfeGfRiM2Oz2d1ajzwi0YKPPSbb33wj7ro+feyW0XXXiU8hkH6zMWOkjNmzJbACkDFjJ074X1aHDrIs/IIFUs6vv0q6GVVoTvb71VeyvX27+7LefVemP3E1SW2wWLw4+D4YRQkDpUoxP/hg8MtFNrDAIi5AMD6hUGC9ejFXrBj0Yl2TkSHuOMDu5nLuM1q5Uj5ZcX2dO2cPQ8+VS6L2Ai1v0CBROiNGyLx/Zofh2bPyxJUpI31kI0dKXRcv8oEDzFu2uCjLZpPADUVRHEhPl8f15ZeDX3Z2UGDqQnRD06biUfN32auAyJVL1hd7+22ZyqhNG+D//i+zQE2bZs31VbCgDJ6uXBmYPVtmQw+0vEqVZM6tX3+VZebNtbMKFZKya9aU8l9/XdYRy5MHI0a4WRyYyPuy8oqSAzHbn3LlIitHtKIKzA1Nmsj3mjVhrHT4cOnzmjlT+qtCwR13yDyMHTtmrZyKFeV769bMZTVqBCxaJJO3NW8OdO8OQFYq8WcCD0XJ6Rw8KN/lXa1dryBErWTs06ABEBcngRy33RbGiuvWDWNlWcBcFwyQAA5niESxWZTb8ePAhQsyLU7RomGQUVFiHFOBqQXmGrXA3FCggOiSoEciZhdMBVasmFhcPmBOb2g+lIqieObQIflWC8w1qsA80KSJKDCbLdKSRCHly4uJ2r69fHvBZpOlIAD7Q6koimcOHpTu4eLFIy1JdKIKzANNmsiK97t2RVqSKCQ+Hhg/Hhg1yqfsJ0/aXwRUgSmKbxw8KO5DHbbomrArMCLqRETbiWgXEY30kO9OImIiCtLaFv7TtKl8qxvRDfffL4s/+sDx4/bfqsCiE2YZ056WFmlJFJNDh9R96ImwKjAiigPwEYDOAGoD6ENEtV3kSwDwGIBV4ZTPmVq1JPJcFVjWsS7vpQosOpk/H7j9duDOOz0vpaaED9MCU1wTbgusCYBdzLyHmS8BmAqgu4t8YwG8CcDLioWhJS5O1jZcskRDv7OKqcBy5VIFFq1MmCCe4QULgBdfjLQ0CrMoMLXA3BNuBVYewAHLdoqRdhUiagCgAjPP9FQQEQ0homQiSk71tHpvFundG9iwQRbWVQLH/Itq1lQFFo0cPy7uw4cfBgYPBl57TaaNVCLHmTPA+fOqwDwRbgXmqivyqm1DRLkAvAPAw/rrxkHMnzNzEjMnlSpVKogiOjJwoIzZHTVKrbCsYPaB3XCDKrBoZPJk4NIl4IEHgPffl5ER995rn4tZCT86Bsw74VZgKQAqWLYTAVibswQAdQAsIqK9AJoBmBHJQI48ecSdsmqVzJCkBEZqKpCQAFSpAhw+rEMToglmWeigSRMZ+5gvH/Djj8DZs8C330ZaupyLzsLhnXArsDUAqhNRFSLKA6A3gBnmTmY+zcwlmbkyM1cGsBJAN2ZODrOcDtx3nzS8phWWkSEPvFplvpOaCpQqJW+Tly87RiUqoefSJfdu8NWrgc2bxfoyqVRJPA87doRHPiUzpqdCLTD3hFWBMfMVAMMAzAGwDcA0Zt5CRC8TUbdwyuIP8fGirNaulWW1mjYFBg0Cxo4Ftm2LtHSxganAzLdJdSOGl4kTZVavrVsz7/viC5l5pndvx/Tq1YGdO8Mjn5IZtcC8E/ZxYMw8i5lrMHNVZn7VSBvFzDNc5G0TaevLpF8/eaBHjRIX2HvvSfr06ZGVK1Y4ftxugQGqwAJh2jTgq68CO3bBAvl2VmDnzgFTpgC9egGFCzvuq1FDLDD1MkSGgwdlprb8+SMtSfSiM3H4SO7cwNdfA2+8IQv7PvaY9BmoAvON1FSgZElVYFnhtdeA0aP9P45ZFgcAMrsEV60SJeZsfQHywpaWZp8CTAkvhw6p+9AbqsD8oFkz4Nln7Utf3X67uBX374+sXNEOs92FWLaspGVFge3aJbPZB3upm6NHZbmXaMRmkxenffv8nyljyxZ7n6OzAjMtsnr1Mh9Xo4brY5TwoGPAvKMKLAvcfrt863gZz5w7B1y8KAosTx75zooCmzNHlmSZNi14MgLAPfcAXboEt8xgsW+fLEUDyJJx/mBaX9de61qBFS8OlC6d+bjq1eVbFVhk0Fk4vKMKLAvUrCnTTakb0TPm2785XK9cuawpsGXL5DuYwxqOHZOGfuvWMK3C7Sd//23/vX69f8cuXChRhR06uFZgtWu7niy2cmVxnWsgR/jJyJD7UC0wz6gCyyK33y5TTWXnfoKDB7M2bsuchaNkSfnOqgJbvlwa1i1bgAMHvOf3hRkz7Oe4dGlwygwmZrRrwYIyM4yv2GyimNu2FZfgiROO96qpwFwRH+/aalNCz9Gj8t+pAvOMKrAscvvt8rb066+RliQ0HD4sjdjUqYGXYSqwYFhg+/eL0jLHLP3+e+ByWZk+XayUggWBxYuDU2Yw2bZNXgBuvNE/C2zzZlnKpm1b8RgAdoWUmirWca1a7o/XUPrIoLNw+IYqsCzSqBFQoUL2dSOuXi2DYP11W1lxVmDly8sb5pUr/pe1fLl8Dx4s1z0YCuzMGZmJvWdPoHlzsaijjW3bRNHUry+Wp6/XbuFC+W7TJnNQhhnA4c4CA+SYnTt9s8A3bRKFGQirVkmE7+HDgR0fTq5ccT2eLpjoSsy+oQosixCJFTZzpjQEt94KvPCCBC1kB9atk++szInnqg/MZhMl5i/Ll4uVVK8e0KmTKJ7LlwOXDQBmzRIl3bMn0KqVNMQnT2atzGDCbFdg9erJvbV9u2/HLlwoFnTFivY+LX8UWPXqMqGsN4v5wgXgllskEMYfdu4E7rpLInxHjpT6Xn3VHrASaZzHwNlsQP/+wPXX+7fM0o4dco85l92unYwxTXdad0MtMN9QBRYEnnkGeOIJ4Lrr5EF/9VVRYsFk/frIhHgHQ4Glpkr0oTn8ICtjwZYvFzda7txA585iPf35Z+CyAWI9lykj5bZqJWnR1A92/LgoVNMCA3yziG02sSbbtJFt5z6trVtlfkpPb/mm1ebNjTh+vFhPmzbZG19vzJ4tynPWLBnftn69zBbywgtyrnv2+FZOqLZUyqMAABRqSURBVJgwQaIzv/tOtpmB4cPt7nQz3RtHjsi6r6+/7pi+aRPwxx8y32THjo4vTQcPynJOZcpk/TyyM6rAgkBiIvD228BPP0mD/9BDwFtviXUQDJilsR46NDjl+YNVgQU6I4M5BsyMdAtUgZ05IyHkzZvLdrt2osiyEo2Yni4NaPfu0mA0aQLkzRtdbkQzgOO666QfK08e3wI5NmwATp2S/i8Tc3YNwHMEookvofTp6eL+u/Za2Z4zx7tsADBpkvTr7doFjBkj1uVPP4nVePq0LKzpbJmEky++kICXvn2BPn1ExvfeAx5/HOjRQ4Zx+LLw56xZ4iX45hvHZ+jHH+Xaf/ihuFBbtJAXpwMHpK+3bFm5JxUPMHPMfxo1asTRxL//MteqxXzNNcypqVkvb+tWZoA5Xz7m8+ezXp6vpKZKvRUryvfRo4GV07Urc/369u1Dh6S8jz/2r5w5c+S4uXPtaa1bO5btL7/+KmX+/rtjmUlJgZcZbD79VGTcu1e2GzZk7tDB+3FvvCHHHThgTxs+nDl/fuaMDOayZZnvv99zGRkZct899ZT7PB98IPX88Qdz+fLMd93lXbaMDOYSJZjvvdf1fvN/GTzYe1mh4PBhZiLm0aOZX3mFOXdukadPH5H9f/+T7YULvZfVs6fkBZhXr7anX389c6tW8nvRIuYiRez5AOYmTUJxZnYAJHMUtN9Z+agFFgIKFBD3wokTMulvVpcOMTvi09PtY6DCgWl93XmnfAfqRjx+3B5CD4hbJpCVmZctk+OaNbOndeokrqdAO/9/+gkoUsTRSmnVCvjrL7H4ooFt2+SeqmAsRFSvnpyzN4v4hx9kRfHERHtajRrSv7Rxo7i2PPV/AXK9q1Vzb4Glp4trrGVLcVV26iSz3nsLMlm/Xp6PDh1c7+/aFXjuOXFNBjr/Y1b49Ve5vj17As8/D6xcKRN5T5ok16RrV+mLnTLFfsyhQ7Jyxd699rRLl4C5c4G77xbL2cy/fbsE49xxh2y3bi3/88yZwOefi7Xn7HJUXBBpDRqMT7RZYCbjxsmbVOPGzCtWSJrNxrxyJfN//+u7dXbXXWLN5cnD/PTToZPXGfMNfvly+f7668DKqVpV3lytlCvHPHCgf+XcfDNzgwaOaevXi2wTJ/ovV0YGc+nSmWWbP1/KnD3b/zJDQceOYnWZvPuuyHfokPtj9u6VPK+/7pi+cKGkjxkj37/95r3+nj2Zr7tOfmdkMA8dynzHHVLGo49KOfPny/7vv7ffM554/XXJd/iw+zyXLzO3bSsW45YtjvsyMpgnTGAeNYr5hReYX3xRzs1m834+vtClC3OVKp7Lu+ce5uLFmS9dEnk6dpRzGjTInmfePEmbMYO5e3d5jq9ckeff2ToON8gGFljEBQjGJ1oVmM3G/M030lgDzJ07M1eqxFddBL64R2w25lKlxNVy883MdeuGXOyr9O4t7sP0dLs7JRAKF2Z+7DHHtKQk5k6dfC/j0iXmggWlwbRis0mj4IvbyplVq+R/+PZbx/Rz58Rl9Nxz/pcZCipWlMbSZNEi7wr2rbckz65djukHD0p6/fryvWeP9/pHjmSOjxeFYja8FSvKPQEwt2hhb+hPnWKOixOF4ol27Zjr1PFe9+HDzCVLyv1y+bI93Xw5BJhz5bLL0rAh8+TJjnn95exZ5rx5mZ94wnO+GTOkzlmzmN9/X35XrSrHHjsmeZ54QrbPnbO7Hf/4g7lRI+amTQOXMRioAouST7QqMJOzZ5mff14UUefOzJMmMT/wgDzoO3d6PnbLFr5qYZhvrZ7evINJzZrMPXrI7woVmPv187+M9HSReexYx/Tu3f1TxqtXSzlTp2bed//9zEWL+t9ovfiiNH4nTmTe16yZNDIZGe6PT06W6xPKfsmzZzNfv5Mn+ap1deiQWLK33SZK3iq/s7XKLIqmYEE53uwL88YXX/BVCzwujrlXLynn3Dm5Bs7Xr3lzz32I589Lo/7kk97rZrY3/P/9r2z/9Zco1O7d7Yrz/Hnmzz4TSxFgHjLEt7Jd8cMP7FP/Vnq63HctW0o/4a232vurX35Z8lSvbn9R+/df5kKFpP8SEA9HJFEFFiWfaFdgrjh8WBqQvn095/vwQ776pvzXX/L7q69CL9/Zs/JW+9JLst2mDfONN/pfjvnG/8knjumPPspcoIDU4wtPPSVWkflma2XaNKlj2TL/ZGvYUBpbV3z8sZT5+OPu3Ujdu7OD+yxYHDsmjR2zKAhAGlUrlSoxV6smDWJcnOR56y3Zt3+/Y4PvTIMGfNVa8YWlSyV/fLy41dLSPOcfO5Y9Bv3Mncs+uy9N7rxTXOirVomSKlfOtQs+I0PuLSLmtWsd96WlidL1Rv/+4hr05YVo4EA5l5Il7e7Qzp2Zy5Rh3rRJ9n3wgT1/v3581XJ0to7DjSqwQCoEOgHYDmAXgJEu9g8FsAnAegDLANT2VmYsKjBm5meflQdt0yZ72u7dzBcv2rfvvFPcNTabvc/G6k4KFcuW8VXfPbNYjKVL+1+O2Ufl3ACvWCHpn37qvYxLl6Ru0xp0xnRbPf+873KZitVdI2+zifvH+jZtZd8+sd6AwF2rrti2jblYMbF+9+0TFzSQuQ/IjGzr1o15xw75LlhQjnnnHdm3Y4frOnr1kv2+WtRHjkj+3LlFgXhjzRrJP3ky84YNzA8+KFayeV8/+6woQ1+UicnRo6Ik8uSRZ8bTS0NamtwvVtfm3r2i9K69ljklxf2xly/L9e/f3ze5liyRe+/nn+1pZr9XUpL95dNk1ixJq1fPt/JDiSow/5VXHIDdAK4FkAfABmcFBaCw5Xc3AL97KzdWFdjx48wJCdIYbd1qb5R69JAHz2aTh/a+++zH9O0rrkhXrp+PPpL9V65kXTYzNNrsZDb7Ps6ccZ1/7FjpwHfGfJgXL3ZMt9nEEqhTx3vH+y+/SBm//OI+T4sWvlsUzMzjx0uZGza4z5ORIdfe+S2aWQIHiMS1evPNvtfridRUaWBLlpSQ6sREeYGJi3N8qWGW/2XlSvv23r1i0fbowXzTTZ4byBdf9Ky8nbHZmNu3z2xFuyMjQ+7RwoX56vAPawh6gwYyVMFfTEt7xAjvec3/d+pUua41a8o1TUhgrlHDvRv+jz9cv3B5wlkR22ziHgeYa9d23HfpktzzH33ke/mhQhWY/wrsRgBzLNvPAXjOQ/4+AGZ7KzdWFRizvL2bHdEJCfImDchb9ObN8vvLL+35v/pK0v76y7Ec0yUJML/5ZtblGjhQGiFTuZiNx7p1mfPu3y+NecGCYkFa+e47OW7r1szHmX0rixZ5luX22+WN2trH48yrr7LXqDYrPXqIgvCmPC9flv8kVy7mBQsk7eJFcRHdeqsEpxQo4Fk2X7hwQdyZ+fIx//mnWK5ly8o51ajhWxlvvslX3VOvvOI+3+TJksdqNQSbESNEabz1lvSRmRGtAwawyz5RX9m507dIwytXJFClQgUJlsibV16ili6V+7RWLdcuzvvuk7y+urbdYd7bzzyTtXJCiSow/xXYnQAmWLb7A/jQRb5HDEvtAIDqbsoaAiAZQHLFihX9+d+iitOnZTDjk0/Km6LNJg1m7twSeQgw//OPPb85CNg5PPrRR+VNvVUrcbNs3uy97rlzxV3iSrnUry9hwSZr17LbN9PXXpN9BQtKX5nVOjSjs1z1XZ0/L30N1gjCDz+U4AkzuOXYMbkWw4d7Phezf3DSJM/5mKXzvWBBCQf3hbNnpcErXVpcj2ZQwW+/2cPGrdZQIPTvL+VMm2ZP271b6vVVTvPtHmD++2/3+Y4flyAHd9Z0KLDZ5B43FWxWr5cvLF5sfzn86Sd7+qJF8rKXlCT3gsnq1fIi5mnQtq+kp4sL2pcoz0ihCsx/BXaXCwX2gYf89wD4ylu5sWyBueLkSXu4faVKmffXry/ppi9/715RWoMH2/sKGjXybBXYbBKpBjDffbfjvosXpY/C6qpJS2OXkVM2m3Sqt2xpd9tYZ9h48UVpFNy5NZ9+WhRvSoo9ypJIzu/AAXt/jrWf0N35lC0rof/eMIMIfv3Ve16TLVvE0mrZUj6VK8s5mf1D//d/vpfljBmOPWZM5n1m36c/cr73XuCyhJKMDOkLq1Eja2Hu/vDWW44vBSY//8xXg3RM2Zo0kXvo9OnwyBZpVIH5r8D8dSHmAnDaW7nZTYExS2d5fLzrwb6rVkn02XXXiYUycKAosP37Zb9pFTzzjPuIMXMsUc2aojA2brTvM11MztZWiRKZw5PN8Pbx46Wx7dBBrBvzzXPoUFGo7ti9W+o3+wx695a388KFRbbrrvN9WqcBA8Siu3LFHswwbpycx6pVco7r1ombKF8+e6Sfr5jXxdkCrl5drGZvZGRIJ771Pzl/XpRh7dpZd0PGCsHoow0Gjz/OV/tWJ07ksEX4RguqwPxXYLkB7AFQxRLEcb1TnuqW37f5cpGzowJjln4Qd7N1LF4sbpBatcSCMd8kTfr25avuk4YNZcYC61tvp07iEktJkb63O+6Q9H/+EeVx442Z35KbNJEBqFaGDZM+A7NR3rdPysubV4IJqlQRGT3RtStf7R8xG7clS+x9er52eJuuvUaN7ANb3X26dvWtTGeGDZOxP1aX6MCBojg9WUr798usEmYE2pEjkj5qlKT5MqeeElzS0+XZKFZM+ntvvNE/azfWUQUWmBLrAmCH0cf1vJH2MoBuxu/3AGwxwugXOis4V5/sqsC88fvvYqUVKGBvEE2uXJFQ41GjpF8MkPDpy5ft/UVmFJrZiCYni9IpXNi1775PH0eX5sWLYpX16uWYb80a6bNq3lwUmbdZMvbsYf7888yNx9y5Mtbq1CmfLgefPCmRZlWryvi13bulv2fdOnHTff+99IX8/HPgExMzZ446+/JLuX7u+h2/+07kKlhQ3LIFCsgYrvnz5fqEY1iE4pqdO+WFy9W4seyOKrAo+eRUBcYs47V8GUhrRoH17i2h2QkJdsVw6pQ0sMWLS57vvnNdxgsviEVnhnSb/QieBqReuhS8+el84cKF8NbHLANSXQ3WTk+XMVCAvN2bA1dXrBArDpD/IVwzqyiuWbo083RiOQFVYFHyyckKzB/MIAlABpNaeeklSbeOOXNm0iTJs327PVqydOnwdchHK+Z8jNZJgQ8ckPBtc9yS8zXauFFcqxMmhFdWRTHJDgost9tp6pVsx4gRshTEZ5/JCtJWnn5aFtDztCR81aryPX68rCT711+yDHzuHH4XEclyIkuXynX57TdZvPD8eeD77+3L0VipW1cWlFQUJXBIFHFsk5SUxMnJyZEWI9tz+LB9NeWqVWWdpP79VYEBsqruo4/K7zx5ZH2xceO8r7elKJGCiNYyc1Kk5cgK2vQoPlO2LDB2LFCxolhqqrjs9O0rC0Q2bgy0awcUKhRpiRQl+6MWmKIoSg4kO1hguSItgKIoiqIEgiowRVEUJSZRBaYoiqL8f3v3F2JVFcVx/PtjJimNMPtHjZYKUklQSoT9IcR60JLsocgoEil6CbIownqJHnoIon8UQqhlEFaYlPQQhAn1kqQJZVkk9scpU6O0KMik1cPeQ5dhhrDmzOHs/fvAcO/ecy6sxbqcdc/e5850khuYmZl1khuYmZl1khuYmZl1khuYmZl1khuYmZl1UhFfZJZ0EPjmP778VODHMQynC2rMGerMu8acoc68jzXncyLitKaCGQ9FNLD/Q9K2rn8b/VjVmDPUmXeNOUOdedeYs5cQzcysk9zAzMysk9zA4Pm2A2hBjTlDnXnXmDPUmXd1OVe/B2ZmZt3kKzAzM+skNzAzM+ukahuYpIWSvpC0W9LKtuNpiqRpkrZI2iXpU0kr8vwUSe9I+jI/ntx2rGNNUp+kHZLeyuMZkrbmnF+VNKHtGMeapMmSNkj6PNf80tJrLene/N7eKWm9pONLrLWktZIOSNrZMzdibZU8k89vH0ua217kzamygUnqA54DFgGzgZslzW43qsYcBe6LiPOBecBdOdeVwOaImAVszuPSrAB29YwfA57MOf8M3N5KVM16Gng7Is4DLiTlX2ytJQ0AdwMXR8QFQB+wlDJr/SKwcNjcaLVdBMzKP3cCq8YpxnFVZQMDLgF2R8SeiDgCvAIsaTmmRkTEvoj4KD//lXRCGyDluy4ftg64vp0ImyFpKnAtsDqPBSwANuRDSsz5JOBKYA1ARByJiEMUXmugHzhBUj8wEdhHgbWOiPeAn4ZNj1bbJcBLkXwATJZ05vhEOn5qbWADwN6e8WCeK5qk6cAcYCtwRkTsg9TkgNPbi6wRTwEPAH/l8SnAoYg4mscl1nwmcBB4IS+drpY0iYJrHRHfAY8D35Ia12FgO+XXeshota3iHFdrA9MIc0V/n0DSicDrwD0R8Uvb8TRJ0mLgQERs750e4dDSat4PzAVWRcQc4DcKWi4cSd7zWQLMAM4CJpGWz4Yrrdb/pob3e7UNbBCY1jOeCnzfUiyNk3QcqXm9HBEb8/T+oSWF/HigrfgacDlwnaSvScvDC0hXZJPzMhOUWfNBYDAitubxBlJDK7nWVwNfRcTBiPgT2AhcRvm1HjJabas4x9XawD4EZuU7lSaQNn03tRxTI/LezxpgV0Q80fOrTcCy/HwZ8OZ4x9aUiHgwIqZGxHRSbd+NiFuALcAN+bCicgaIiB+AvZLOzVNXAZ9RcK1JS4fzJE3M7/WhnIuudY/RarsJuC3fjTgPODy01FiSav8Sh6RrSJ/K+4C1EfFoyyE1QtIVwPvAJ/yzH/QQaR/sNeBs0kngxogYvkHceZLmA/dHxGJJM0lXZFOAHcCtEfFHm/GNNUkXkW5cmQDsAZaTPqgWW2tJjwA3ke643QHcQdrvKarWktYD80n/NmU/8DDwBiPUNjfzZ0l3Lf4OLI+IbW3E3aRqG5iZmXVbrUuIZmbWcW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSX8DlZ9Tmzov1loAAAAASUVORK5CYII=\n", 2102 | "text/plain": [ 2103 | "
" 2104 | ] 2105 | }, 2106 | "metadata": { 2107 | "needs_background": "light" 2108 | }, 2109 | "output_type": "display_data" 2110 | } 2111 | ], 2112 | "source": [ 2113 | "plt.plot(range(100), hist.history['loss'], 'r', label='Train Loss')\n", 2114 | "plt.plot(range(100), hist.history['val_loss'], 'b', label='Test Loss')\n", 2115 | "plt.title(\"Dataset1: Neural Network Model on Latent Features: Train-Test Loss \")\n", 2116 | "plt.legend()" 2117 | ] 2118 | }, 2119 | { 2120 | "cell_type": "code", 2121 | "execution_count": null, 2122 | "metadata": {}, 2123 | "outputs": [], 2124 | "source": [] 2125 | } 2126 | ], 2127 | "metadata": { 2128 | "kernelspec": { 2129 | "display_name": "Python 3", 2130 | "language": "python", 2131 | "name": "python3" 2132 | }, 2133 | "language_info": { 2134 | "codemirror_mode": { 2135 | "name": "ipython", 2136 | "version": 3 2137 | }, 2138 | "file_extension": ".py", 2139 | "mimetype": "text/x-python", 2140 | "name": "python", 2141 | "nbconvert_exporter": "python", 2142 | "pygments_lexer": "ipython3", 2143 | "version": "3.7.1" 2144 | } 2145 | }, 2146 | "nbformat": 4, 2147 | "nbformat_minor": 2 2148 | } 2149 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/__pycache__/lstm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Project-UtilityFunctions/__pycache__/lstm.cpython-37.pyc -------------------------------------------------------------------------------- /Project-UtilityFunctions/classificationlibrary.py: -------------------------------------------------------------------------------- 1 | #Libraries for feature encoding 2 | from sklearn.preprocessing import LabelEncoder 3 | 4 | #Libraries for classification 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.tree import DecisionTreeClassifier 7 | from sklearn.neighbors import KNeighborsClassifier 8 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 9 | from sklearn.naive_bayes import GaussianNB 10 | from sklearn.svm import SVC 11 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance) 12 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance) 13 | from sklearn.neighbors import KNeighborsClassifier 14 | 15 | #Libraries to measure the accuracy 16 | from sklearn import metrics 17 | from sklearn.metrics import accuracy_score 18 | 19 | #import pandas library 20 | import pandas as pd 21 | 22 | #This function is used to perform classification using DecisionTreeClassifier 23 | def classifyUsingDecisionTreeClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 24 | print("****** Start classification training using DecisionTreeClassifier *****") 25 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 26 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 27 | 28 | labelencoder_ytrain = LabelEncoder() 29 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 30 | 31 | classifier = DecisionTreeClassifier() 32 | classifier.fit(xtrain,ytrain) 33 | 34 | ytrainpred = classifier.predict(xtrain) 35 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 36 | 37 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 38 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 39 | 40 | labelencoder_ytest = LabelEncoder() 41 | ytest = labelencoder_ytest.fit_transform(ytest) 42 | 43 | # Predicting the Test set results 44 | ytestpred = classifier.predict(xtest) 45 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 46 | print("\n****** End classification training using DecisionTreeClassifier *****\n") 47 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 48 | 49 | #This function is used to perform classification using LogisticRegression 50 | def classifyUsingLogisticRegression(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 51 | print("****** Start classification training using LogisticRegression *****") 52 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 53 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 54 | 55 | labelencoder_ytrain = LabelEncoder() 56 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 57 | 58 | classifier = LogisticRegression() 59 | classifier.fit(xtrain,ytrain) 60 | 61 | ytrainpred = classifier.predict(xtrain) 62 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 63 | 64 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 65 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 66 | 67 | labelencoder_ytest = LabelEncoder() 68 | ytest = labelencoder_ytest.fit_transform(ytest) 69 | 70 | # Predicting the Test set results 71 | ytestpred = classifier.predict(xtest) 72 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 73 | print("\n****** End classification training using LogisticRegression *****\n") 74 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 75 | 76 | #This function is used to perform classification using LinearDiscriminantAnalysis 77 | def classifyUsingLinearDiscriminantAnalysis(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 78 | print("****** Start classification training using LinearDiscriminantAnalysis *****") 79 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 80 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 81 | 82 | labelencoder_ytrain = LabelEncoder() 83 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 84 | 85 | classifier = LinearDiscriminantAnalysis() 86 | classifier.fit(xtrain,ytrain) 87 | 88 | ytrainpred = classifier.predict(xtrain) 89 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 90 | 91 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 92 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 93 | 94 | labelencoder_ytest = LabelEncoder() 95 | ytest = labelencoder_ytest.fit_transform(ytest) 96 | 97 | # Predicting the Test set results 98 | ytestpred = classifier.predict(xtest) 99 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 100 | print("\n****** End classification training using LinearDiscriminantAnalysis *****\n") 101 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 102 | 103 | #This function is used to perform classification using GuassianNaiveBayes 104 | def classifyUsingGaussianNB(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 105 | print("****** Start classification training using GuassianNaiveBayes *****") 106 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 107 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 108 | 109 | labelencoder_ytrain = LabelEncoder() 110 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 111 | 112 | classifier = GaussianNB() 113 | classifier.fit(xtrain,ytrain) 114 | 115 | ytrainpred = classifier.predict(xtrain) 116 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 117 | 118 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 119 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 120 | 121 | labelencoder_ytest = LabelEncoder() 122 | ytest = labelencoder_ytest.fit_transform(ytest) 123 | 124 | # Predicting the Test set results 125 | ytestpred = classifier.predict(xtest) 126 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 127 | print("\n****** End classification training using GuassianNaiveBayes *****\n") 128 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 129 | 130 | #This function is used to perform classification using RandomForestClassifier 131 | def classifyUsingRandomForestClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 132 | print("****** Start classification training using RandomForestClassifier *****") 133 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 134 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 135 | 136 | labelencoder_ytrain = LabelEncoder() 137 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 138 | 139 | classifier = RandomForestClassifier(n_estimators=100) 140 | classifier.fit(xtrain,ytrain) 141 | 142 | ytrainpred = classifier.predict(xtrain) 143 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 144 | 145 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 146 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 147 | 148 | labelencoder_ytest = LabelEncoder() 149 | ytest = labelencoder_ytest.fit_transform(ytest) 150 | 151 | # Predicting the Test set results 152 | ytestpred = classifier.predict(xtest) 153 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 154 | print("\n****** End classification training using RandomForestClassifier *****\n") 155 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 156 | 157 | #This function is used to perform classification using RandomForestClassifier 158 | def classifyUsingExtraTreesClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 159 | print("****** Start classification training using ExtraTreesClassifier *****") 160 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 161 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 162 | 163 | print("trainingEncodedAndScaledDataset.shape: ",trainingEncodedAndScaledDataset.shape) 164 | 165 | labelencoder_ytrain = LabelEncoder() 166 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 167 | 168 | classifier = ExtraTreesClassifier(n_estimators=100) 169 | classifier.fit(xtrain,ytrain) 170 | 171 | ytrainpred = classifier.predict(xtrain) 172 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 173 | 174 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 175 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 176 | 177 | print("testingEncodedAndScaledDataset.shape: ",testingEncodedAndScaledDataset.shape) 178 | 179 | labelencoder_ytest = LabelEncoder() 180 | ytest = labelencoder_ytest.fit_transform(ytest) 181 | 182 | # Predicting the Test set results 183 | ytestpred = classifier.predict(xtest) 184 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 185 | print("\n****** End classification training using ExtraTreesClassifier *****\n") 186 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 187 | 188 | def classifyUsingKNNClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset): 189 | print("****** Start classification training using KNeighborsClassifier *****") 190 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 191 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 192 | 193 | labelencoder_ytrain = LabelEncoder() 194 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 195 | 196 | classifier = KNeighborsClassifier(n_neighbors=1) 197 | classifier.fit(xtrain,ytrain) 198 | 199 | ytrainpred = classifier.predict(xtrain) 200 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred)) 201 | 202 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values 203 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values 204 | 205 | print("testingEncodedAndScaledDataset.shape: ",testingEncodedAndScaledDataset.shape) 206 | 207 | labelencoder_ytest = LabelEncoder() 208 | ytest = labelencoder_ytest.fit_transform(ytest) 209 | 210 | # Predicting the Test set results 211 | ytestpred = classifier.predict(xtest) 212 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred)) 213 | print("\n****** End classification training using KNeighborsClassifier *****\n") 214 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred) 215 | 216 | def findingOptimumNumberOfNeighboursForKNN(trainingEncodedAndScaledDataset): 217 | print("****** Start finding optimum number of neighbours for KNN *****") 218 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values 219 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values 220 | 221 | labelencoder_ytrain = LabelEncoder() 222 | ytrain = labelencoder_ytrain.fit_transform(ytrain) 223 | 224 | # creating odd list of K for KNN 225 | neighbors = list(range(1, 150, 2)) 226 | 227 | # empty list that will hold cv scores 228 | cv_scores = [] 229 | 230 | # perform 10-fold cross validation 231 | for k in neighbors: 232 | knn = KNeighborsClassifier(n_neighbors=k) 233 | scores = cross_val_score(knn, xtrain, ytrain, cv=10, scoring='accuracy') 234 | cv_scores.append(scores.mean()) 235 | print("With number of neighbours as {}, average score is {}".format(k,scores.mean())) 236 | 237 | # changing to misclassification error 238 | mse = [1 - x for x in cv_scores] 239 | 240 | # determining best k 241 | optimal_k = neighbors[mse.index(min(mse))] 242 | print("The optimal number of neighbors is {}".format(optimal_k)) 243 | 244 | # plot misclassification error vs k 245 | plt.plot(neighbors, mse) 246 | plt.xlabel("Number of Neighbors K") 247 | plt.ylabel("Misclassification Error") 248 | plt.show() 249 | 250 | print("****** End finding optimum number of neighbours for KNN *****") -------------------------------------------------------------------------------- /Project-UtilityFunctions/dataformatinglibrary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #Libraries for printing tables in readable format 4 | from tabulate import tabulate 5 | 6 | #Library for creating an excel sheet 7 | import xlsxwriter 8 | 9 | def createExcelFromArray(array, fileName): 10 | workbook = xlsxwriter.Workbook(fileName) 11 | worksheet = workbook.add_worksheet() 12 | 13 | row = 0 14 | for col, data in enumerate(array): 15 | worksheet.write_row(col, row, data) 16 | 17 | workbook.close() 18 | 19 | def printList (list,heading): 20 | for i in range(0, len(list)): 21 | list[i] = str(list[i]) 22 | if len(list)>0: 23 | print(tabulate([i.strip("[]").split(", ") for i in list], headers=[heading], tablefmt='orgtbl')+"\n") 24 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/datainspectionlibrary.py: -------------------------------------------------------------------------------- 1 | #Data formating library 2 | from dataformatinglibrary import printList 3 | 4 | #Data pre-processing library 5 | from datapreprocessinglibrary import checkForMissingValues 6 | from datapreprocessinglibrary import checkForDulicateRecords 7 | 8 | #Utility functions 9 | from defineInputs import getLabelName 10 | 11 | #Libraries for feature selection 12 | #SelectKBest, Chi2: Falls under filter methods (univariate selection) 13 | from sklearn.feature_selection import SelectKBest 14 | from sklearn.feature_selection import chi2 15 | from sklearn.feature_selection import SelectFromModel 16 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance) 17 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance) 18 | 19 | import numpy as np 20 | 21 | #This function is used to check the statistics of a given dataSet 22 | def getStatisticsOfData (dataSet): 23 | print("***** Start checking the statistics of the dataSet *****\n") 24 | 25 | labelName = getLabelName() 26 | #Number of rows and columns in the dataset 27 | print("***** Shape (number of rows and columns) in the dataset: ", dataSet.shape) 28 | 29 | #Total number of features in the dataset 30 | numberOfColumnsInTheDataset = len(dataSet.drop([labelName],axis=1).columns) 31 | #numberOfColumnsInTheDataset = len(dataSet.columns) 32 | print("***** Total number of features in the dataset: ",numberOfColumnsInTheDataset) 33 | 34 | #Total number of categorical featuers in the dataset 35 | categoricalFeaturesInTheDataset = list(set(dataSet.drop([labelName],axis=1).columns) - set(dataSet.drop([labelName],axis=1)._get_numeric_data().columns)) 36 | #categoricalFeaturesInTheDataset = list(set(dataSet.columns) - set(dataSet._get_numeric_data().columns)) 37 | print("***** Number of categorical features in the dataset: ",len(categoricalFeaturesInTheDataset)) 38 | 39 | #Total number of numerical features in the dataset 40 | numericalFeaturesInTheDataset = list(dataSet.drop([labelName],axis=1)._get_numeric_data().columns) 41 | #numericalFeaturesInTheDataset = list(dataSet._get_numeric_data().columns) 42 | print("***** Number of numerical features in the dataset: ",len(numericalFeaturesInTheDataset)) 43 | 44 | #Names of categorical features in the dataset 45 | print("\n***** Names of categorical features in dataset *****\n") 46 | printList(categoricalFeaturesInTheDataset,'Categorical features in dataset') 47 | 48 | #Names of numerical features in the dataset 49 | print("\n***** Names of numerical features in dataset *****\n") 50 | printList(numericalFeaturesInTheDataset,'Numerical features in the dataset') 51 | 52 | #Checking for any missing values in the data set 53 | anyMissingValuesInTheDataset = checkForMissingValues(dataSet) 54 | print("\n***** Are there any missing values in the data set: ", anyMissingValuesInTheDataset) 55 | 56 | anyDuplicateRecordsInTheDataset = checkForDulicateRecords(dataSet) 57 | print("\n***** Are there any duplicate records in the data set: ", anyDuplicateRecordsInTheDataset) 58 | #Check if there are any duplicate records in the data set 59 | if (anyDuplicateRecordsInTheDataset): 60 | dataSet = dataSet.drop_duplicates() 61 | print("Number of records in the dataSet after removing the duplicates: ", len(dataSet.index)) 62 | 63 | #How many number of different values for label that are present in the dataset 64 | print('\n****** Number of different values for label that are present in the dataset: ',dataSet[labelName].nunique()) 65 | #What are the different values for label in the dataset 66 | print('\n****** Here is the list of unique label types present in the dataset ***** \n') 67 | printList(list(dataSet[getLabelName()].unique()),'Unique label types in the dataset') 68 | 69 | #What are the different values in each of the categorical features in the dataset 70 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n') 71 | categoricalFeaturesInTheDataset = list(set(dataSet.columns) - set(dataSet._get_numeric_data().columns)) 72 | numericalFeaturesInTheDataset = list(dataSet._get_numeric_data().columns) 73 | for feature in categoricalFeaturesInTheDataset: 74 | uniq = np.unique(dataSet[feature]) 75 | print('\n{}: {} '.format(feature,len(uniq))) 76 | printList(dataSet[feature].unique(),'distinct values') 77 | 78 | print('\n****** Label distribution in the dataset *****\n') 79 | print(dataSet[labelName].value_counts()) 80 | print() 81 | 82 | print("\n***** End checking the statistics of the dataSet *****") -------------------------------------------------------------------------------- /Project-UtilityFunctions/dataloadinglibrary.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | #This function is used to load CSV file from the 'data' directory 4 | #in the present working directly 5 | def loadCSV (fileNameWithAbsolutePath): 6 | dataSet = pd.read_csv(fileNameWithAbsolutePath) 7 | return dataSet 8 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/datapreprocessinglibrary.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | from defineInputs import getLabelName 3 | 4 | #This function is used to check for missing values in a given dataSet 5 | def checkForMissingValues (dataSet): 6 | anyMissingValuesInTheDataset = dataSet.isnull().values.any() 7 | return anyMissingValuesInTheDataset 8 | 9 | #This function is used to check for duplicate records in a given dataSet 10 | def checkForDulicateRecords (dataSet): 11 | totalRecordsInDataset = len(dataSet.index) 12 | numberOfUniqueRecordsInDataset = len(dataSet.drop_duplicates().index) 13 | anyDuplicateRecordsInTheDataset = False if totalRecordsInDataset == numberOfUniqueRecordsInDataset else True 14 | print('Total number of records in the dataset: {}\nUnique records in the dataset: {}'.format(totalRecordsInDataset,numberOfUniqueRecordsInDataset)) 15 | return anyDuplicateRecordsInTheDataset 16 | 17 | #Split the complete dataSet into training dataSet and testing dataSet 18 | def splitCompleteDataSetIntoTrainingSetAndTestingSet(completeDataSet): 19 | labelName = getLabelName() 20 | label = completeDataSet[labelName] 21 | features = completeDataSet.drop(labelName,axis=1) 22 | featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet=train_test_split(features,label,test_size=0.4, random_state=42) 23 | print("features.shape: ",features.shape) 24 | print("label.shape: ",label.shape) 25 | return featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet 26 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/defineInputs.py: -------------------------------------------------------------------------------- 1 | #This function is to maintain the name of the label at a single place 2 | def getLabelName(): 3 | return 'attack_type' 4 | 5 | def getPathToTrainingAndTestingDataSets(): 6 | trainingFileNameWithAbsolutePath = "D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\Datasets\\NSL-KDD\\KDDTrain+_20Percent.csv" 7 | testingFileNameWithAbsolutePath = "D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\Datasets\\NSL-KDD\\KDDTest-21.csv" 8 | return trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath 9 | 10 | def modelPerformanceReport(): 11 | modelPerformanceReport = 'D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\ModelsAndTheirPerformanceReports\\ModelsPerformance031442020.1.xlsx' 12 | return modelPerformanceReport 13 | 14 | def getPathToGenerateModels(): 15 | generatedModelsPath = 'D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\ModelsAndTheirPerformanceReports\\' 16 | return generatedModelsPath 17 | 18 | ### Models with the below configuration will be generated 19 | def defineArrayOfModels(): 20 | arrayOfModels = [ 21 | [ 22 | "FeatureSelectionTechnique", 23 | "FeatureEncodingTechnique", 24 | "FeatureNormalizationTechnique", 25 | "ClassificationTechnique", 26 | "TrainAccuraccy", 27 | "TestAccuraccy", 28 | "ModelName", 29 | "ModelFileName" 30 | ], 31 | [ 32 | "ExtraTreesClassifier", 33 | "OneHotEncoder", 34 | "Standardization", 35 | "DecisonTree" 36 | ], 37 | [ 38 | "ExtraTreesClassifier", 39 | "OneHotEncoder", 40 | "Standardization", 41 | "RandomForestClassifier" 42 | ], 43 | [ 44 | "ExtraTreesClassifier", 45 | "OneHotEncoder", 46 | "Standardization", 47 | "ExtraTreesClassifier" 48 | ], 49 | [ 50 | "ExtraTreesClassifier", 51 | "OneHotEncoder", 52 | "Standardization", 53 | "KNN" 54 | ] 55 | ] 56 | print(arrayOfModels) 57 | return arrayOfModels 58 | 59 | def defineArrayForPreProcessing(): 60 | arrayOfModels = [ 61 | [ 62 | "ExtraTreesClassifier", 63 | "OneHotEncoder", 64 | "Standardization", 65 | ] 66 | ] 67 | print(arrayOfModels) 68 | return arrayOfModels 69 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/featureencodinglibrary.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | #Libraries for feature encoding 5 | from sklearn.preprocessing import LabelEncoder 6 | import category_encoders as ce 7 | 8 | #Utility functions 9 | from defineInputs import getLabelName 10 | from dataformatinglibrary import printList 11 | 12 | #This function is used to perform one hot encoding on the categorical features in the given dataset 13 | def featureEncodingUsingOneHotEncoder(dataSetForFeatureEncoding): 14 | print("****** Start one hot encoding on the categorical features in the given dataset *****") 15 | 16 | labelName = getLabelName() 17 | #Extract the categorical features, leave the label 18 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object']) 19 | #Get the names of the categorical features 20 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values 21 | 22 | print("****** Number of features before one hot encoding: ",len(dataSetForFeatureEncoding.columns)) 23 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames)) 24 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames) 25 | 26 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n') 27 | categoricalFeaturesInTheDataset = list(set(dataSetForFeatureEncoding.columns) - set(dataSetForFeatureEncoding._get_numeric_data().columns)) 28 | numericalFeaturesInTheDataset = list(dataSetForFeatureEncoding._get_numeric_data().columns) 29 | for feature in categoricalFeaturesInTheDataset: 30 | uniq = np.unique(dataSetForFeatureEncoding[feature]) 31 | print('\n{}: {} '.format(feature,len(uniq))) 32 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values') 33 | 34 | #Using get_dummies function to get the dummy variables for the categorical columns 35 | onHotEncodedDataSet=pd.get_dummies(dataSetForFeatureEncoding, columns=categoricalColumnNames, prefix=categoricalColumnNames) 36 | 37 | #Move the label column to the end 38 | label = onHotEncodedDataSet.pop(labelName) 39 | onHotEncodedDataSet[labelName] = label 40 | numberOfColumnsInOneHotEncodedDataset = len(onHotEncodedDataSet.columns) 41 | print("****** Number of features after one hot encoding: ",numberOfColumnsInOneHotEncodedDataset) 42 | 43 | print("****** End one hot encoding on the categorical features in the given dataset *****\n") 44 | return onHotEncodedDataSet 45 | 46 | #This function is used to perform label encoding on the categorical features in the given dataset 47 | def featureEncodingUsingLabelEncoder(dataSetForFeatureEncoding): 48 | print("****** Start label encoding on the categorical features in the given dataset *****") 49 | 50 | labelName = getLabelName() 51 | #Extract the categorical features, leave the label 52 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object']) 53 | #Get the names of the categorical features 54 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values 55 | 56 | print("****** Number of features before label encoding: ",len(dataSetForFeatureEncoding.columns)) 57 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames)) 58 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames) 59 | 60 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n') 61 | labelEncoder = LabelEncoder() 62 | for feature in categoricalColumnNames: 63 | uniq = np.unique(dataSetForFeatureEncoding[feature]) 64 | print('\n{}: {} '.format(feature,len(uniq))) 65 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values') 66 | dataSetForFeatureEncoding[feature] = labelEncoder.fit_transform(dataSetForFeatureEncoding[feature]) 67 | print("****** Number of features after label encoding: ",len(dataSetForFeatureEncoding.columns)) 68 | 69 | print("****** End label encoding on the categorical features in the given dataset *****\n") 70 | return dataSetForFeatureEncoding 71 | 72 | #This function is used to perform binary encoding on the categorical features in the given dataset 73 | def featureEncodingUsingBinaryEncoder(dataSetForFeatureEncoding): 74 | print("****** Start binary encoding on the categorical features in the given dataset *****") 75 | 76 | labelName = getLabelName() 77 | #Extract the categorical features, leave the label 78 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object']) 79 | #Get the names of the categorical features 80 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values 81 | 82 | print("****** Number of features before binary encoding: ",len(dataSetForFeatureEncoding.columns)) 83 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames)) 84 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames) 85 | 86 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n') 87 | label = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([labelName])].columns, axis = 1) 88 | for feature in categoricalColumnNames: 89 | uniq = np.unique(dataSetForFeatureEncoding[feature]) 90 | print('\n{}: {} '.format(feature,len(uniq))) 91 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values') 92 | featureColumns = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([feature])].columns, axis = 1) 93 | binaryEncoder = ce.BinaryEncoder(cols = [feature]) 94 | binaryEncodedFeature = binaryEncoder.fit_transform(featureColumns, label) 95 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.join(binaryEncodedFeature) 96 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(feature, axis=1) 97 | 98 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(labelName, axis=1) 99 | dataSetForFeatureEncoding[labelName] = label 100 | print("****** Number of features after binary encoding: ",len(dataSetForFeatureEncoding.columns)) 101 | 102 | print("****** End binary encoding on the categorical features in the given dataset *****\n") 103 | return dataSetForFeatureEncoding 104 | 105 | #This function is used to perform frequency encoding on the categorical features in the given dataset 106 | def featureEncodingUsingFrequencyEncoder(dataSetForFeatureEncoding): 107 | print("****** Start frequency encoding on the categorical features in the given dataset *****") 108 | 109 | labelName = getLabelName() 110 | #Extract the categorical features, leave the label 111 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object']) 112 | #Get the names of the categorical features 113 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values 114 | 115 | print("****** Number of features before label encoding: ",len(dataSetForFeatureEncoding.columns)) 116 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames)) 117 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames) 118 | 119 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n') 120 | label = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([labelName])].columns, axis = 1) 121 | for feature in categoricalColumnNames: 122 | uniq = np.unique(dataSetForFeatureEncoding[feature]) 123 | print('\n{}: {} '.format(feature,len(uniq))) 124 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values') 125 | frequencyEncoder = dataSetForFeatureEncoding.groupby(feature).size()/len(dataSetForFeatureEncoding) 126 | dataSetForFeatureEncoding.loc[:,feature+"_Encoded"] = dataSetForFeatureEncoding[feature].map(frequencyEncoder) 127 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(feature, axis=1) 128 | 129 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(labelName, axis=1) 130 | dataSetForFeatureEncoding[labelName] = label 131 | print("****** Number of features after frequency encoding: ",len(dataSetForFeatureEncoding.columns)) 132 | 133 | print("****** End frequency encoding on the categorical features in the given dataset *****\n") 134 | return dataSetForFeatureEncoding -------------------------------------------------------------------------------- /Project-UtilityFunctions/featurescalinglibrary.py: -------------------------------------------------------------------------------- 1 | #Utility functions 2 | from defineInputs import getLabelName 3 | 4 | import pandas as pd 5 | import numpy as np 6 | 7 | #Libraries for feature scaling 8 | from sklearn.preprocessing import MinMaxScaler 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.preprocessing import Binarizer 11 | from sklearn.preprocessing import Normalizer 12 | 13 | 14 | #This function is used to perform min-max feature scaing on the features in the given dataset 15 | #Formula for Min-Max scalar feature scaling is (Xi-Xmin)/(Xmax-Xmin) 16 | def featureScalingUsingMinMaxScaler(dataSetForFeatureScaling): 17 | print("****** Start feature scaling of the features present in the dataset using MinMaxScaler *****") 18 | 19 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns) 20 | dataSetInArrayFormat = dataSetForFeatureScaling.values 21 | 22 | #Remove the label column from the dataset 23 | labelName = getLabelName() 24 | label = dataSetForFeatureScaling.pop(labelName) 25 | 26 | print(dataSetInArrayFormat) 27 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1] 28 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1)) 29 | print("\n****** Features in the dataset before performing scaling ***** \n",features) 30 | 31 | #Perform feature scaling 32 | scaler=MinMaxScaler(feature_range=(0,1)) 33 | scaledFeatures=scaler.fit_transform(features) 34 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1)) 35 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures) 36 | 37 | #Convert from array format to dataframe 38 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns) 39 | scaledFeatures = scaledFeatures.reset_index(drop=True) 40 | label = label.reset_index(drop=True) 41 | scaledFeatures[labelName]=label 42 | 43 | print("\n****** End of feature scaling of the features present in the dataset using MinMaxScaler *****\n") 44 | return scaledFeatures 45 | 46 | #This function is used to perform StandardScalar feature scaing on the features in the given dataset 47 | #This is also called as Z-score normalization 48 | #Formula for StandardScalar scalar feature scaling is z = (x – mean) / standard-deviation. 49 | def featureScalingUsingStandardScalar(dataSetForFeatureScaling): 50 | print("****** Start feature scaling of the features present in the dataset using StandardScalar *****") 51 | 52 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns) 53 | dataSetInArrayFormat = dataSetForFeatureScaling.values 54 | 55 | #Remove the label column from the dataset 56 | labelName = getLabelName() 57 | label = dataSetForFeatureScaling.pop(labelName) 58 | 59 | print(dataSetInArrayFormat) 60 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1] 61 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1)) 62 | print("\n****** Features in the dataset before performing scaling ***** \n",features) 63 | 64 | #Perform feature scaling 65 | scaler=StandardScaler() 66 | scaledFeatures=scaler.fit_transform(features) 67 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1)) 68 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures) 69 | 70 | #Convert from array format to dataframe 71 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns) 72 | scaledFeatures = scaledFeatures.reset_index(drop=True) 73 | label = label.reset_index(drop=True) 74 | scaledFeatures[labelName]=label 75 | print("scaledFeatures.head(): ",scaledFeatures.head()) 76 | print("scaledFeatures.shape: ",scaledFeatures.shape) 77 | 78 | print("\n****** End of feature scaling of the features present in the dataset using StandardScalar *****\n") 79 | return scaledFeatures 80 | 81 | #This function is used to perform Binarizing feature scaing on the features in the given dataset 82 | #It is used for binary thresholding of an array like matrix. 83 | def featureScalingUsingBinarizer(dataSetForFeatureScaling): 84 | print("****** Start feature scaling of the features present in the dataset using Binarizer *****") 85 | 86 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns) 87 | dataSetInArrayFormat = dataSetForFeatureScaling.values 88 | 89 | #Remove the label column from the dataset 90 | labelName = getLabelName() 91 | label = dataSetForFeatureScaling.pop(labelName) 92 | 93 | print(dataSetInArrayFormat) 94 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1] 95 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1)) 96 | print("\n****** Features in the dataset before performing scaling ***** \n",features) 97 | 98 | #Perform feature scaling 99 | scaledFeatures=Binarizer(0.0).fit(features).transform(features) 100 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1)) 101 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures) 102 | 103 | #Convert from array format to dataframe 104 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns) 105 | scaledFeatures = scaledFeatures.reset_index(drop=True) 106 | label = label.reset_index(drop=True) 107 | scaledFeatures[labelName]=label 108 | 109 | print("\n****** End of feature scaling of the features present in the dataset using Binarizer *****\n") 110 | return scaledFeatures 111 | 112 | #This function is used to perform Normalizing feature scaing on the features in the given dataset 113 | #It is used to rescale each sample. 114 | #Each sample (i.e. each row of the data matrix) with at least one non zero component 115 | #is rescaled independently of other samples so that its norm (l1 or l2) equals one. 116 | def featureScalingUsingNormalizer(dataSetForFeatureScaling): 117 | print("****** Start feature scaling of the features present in the dataset using Normalizer *****") 118 | 119 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns) 120 | dataSetInArrayFormat = dataSetForFeatureScaling.values 121 | 122 | #Remove the label column from the dataset 123 | labelName = getLabelName() 124 | label = dataSetForFeatureScaling.pop(labelName) 125 | 126 | print(dataSetInArrayFormat) 127 | 128 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1] 129 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1)) 130 | print("\n****** Features in the dataset before performing scaling ***** \n",features) 131 | 132 | #Perform feature scaling 133 | scaledFeatures=Normalizer().fit(features).transform(features) 134 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1)) 135 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures) 136 | 137 | #Convert from array format to dataframe 138 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns) 139 | scaledFeatures = scaledFeatures.reset_index(drop=True) 140 | label = label.reset_index(drop=True) 141 | scaledFeatures[labelName]=label 142 | 143 | print("\n****** End of feature scaling of the features present in the dataset using Normalizer *****\n") 144 | return scaledFeatures 145 | 146 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/featureselectionlibrary.py: -------------------------------------------------------------------------------- 1 | #Utility functions 2 | from defineInputs import getLabelName 3 | 4 | from featureencodinglibrary import featureEncodingUsingLabelEncoder 5 | from dataformatinglibrary import printList 6 | 7 | #Matplotlib is a plotting library for the Python programming language and its numerical mathematics extension NumPy 8 | import matplotlib.pyplot as plt 9 | from matplotlib.pyplot import figure 10 | import seaborn as sns 11 | import numpy as np 12 | import pandas as pd 13 | import math 14 | import scipy.stats as ss 15 | from collections import Counter 16 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance) 17 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance) 18 | from sklearn.feature_selection import SelectKBest 19 | from sklearn.feature_selection import chi2 20 | from sklearn.preprocessing import LabelEncoder 21 | 22 | #This function is used to calculate the conditional entropy between a given feature and the target 23 | def conditional_entropy(x,y): 24 | # entropy of x given y 25 | y_counter = Counter(y) 26 | xy_counter = Counter(list(zip(x,y))) 27 | total_occurrences = sum(y_counter.values()) 28 | entropy = 0 29 | for xy in xy_counter.keys(): 30 | p_xy = xy_counter[xy] / total_occurrences 31 | p_y = y_counter[xy[1]] / total_occurrences 32 | entropy += p_xy * math.log(p_y/p_xy) 33 | return entropy 34 | 35 | #This function is used to perform feature selection using TheilU 36 | #In TheilU we calculate the uncertainty coefficient between the given feature and the target 37 | def theil_u(x,y): 38 | s_xy = conditional_entropy(x,y) 39 | x_counter = Counter(x) 40 | total_occurrences = sum(x_counter.values()) 41 | p_x = list(map(lambda n: n/total_occurrences, x_counter.values())) 42 | s_x = ss.entropy(p_x) 43 | if s_x == 0: 44 | return 1 45 | else: 46 | return (s_x - s_xy) / s_x 47 | 48 | def featureSelectionUsingTheilU(dataSetForFeatureSelection): 49 | print("\n****** Start performing feature selection using TheilU *****") 50 | print("****** Falls under the group of techniques that use correlation matrix with Heatmap *****") 51 | 52 | labelName = getLabelName() 53 | label = dataSetForFeatureSelection[labelName] 54 | 55 | theilu = pd.DataFrame(index=[labelName],columns=dataSetForFeatureSelection.columns) 56 | columns = dataSetForFeatureSelection.columns 57 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection 58 | 59 | for j in range(0,len(columns)): 60 | u = theil_u(label.tolist(),dataSetForFeatureSelection[columns[j]].tolist()) 61 | theilu.loc[:,columns[j]] = u 62 | if u < 0.50: 63 | dataSetAfterFeatuerSelection.pop(columns[j]) 64 | 65 | print('***** Ploting the uncertainty coefficient between the target and each feature *****') 66 | theilu.fillna(value=np.nan,inplace=True) 67 | plt.figure(figsize=(30,1)) 68 | sns.heatmap(theilu,annot=True,fmt='.2f') 69 | plt.show() 70 | 71 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns) 72 | print('***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns)) 73 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns) 74 | print("****** End performing feature selection using TheilU *****") 75 | return dataSetAfterFeatuerSelection 76 | 77 | #This function is used to perform feature selection using Chi-squared test 78 | def featureSelectionUsingChisquaredTest(dataSetForFeatureSelection): 79 | print("\n****** Start performing feature selection using ChisquaredTest *****") 80 | print("****** Falls under filter methods (univariate selection) *****") 81 | 82 | numberOfFeatureToBeSelected = 10 83 | labelName = getLabelName() 84 | 85 | #To be able to apply Chi-squared test 86 | dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection) 87 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection 88 | 89 | #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1] 90 | features = dataSetForFeatureSelection.drop([labelName],axis=1) 91 | label = dataSetForFeatureSelection[labelName] 92 | 93 | #Apply SelectKBest class to extract top 10 best features 94 | bestfeatures = SelectKBest(score_func=chi2, k=numberOfFeatureToBeSelected) 95 | fitBestfeatures = bestfeatures.fit(features,label) 96 | columns = pd.DataFrame(features.columns) 97 | scores = pd.DataFrame(fitBestfeatures.scores_) 98 | #concat two dataframes for better visualization 99 | scoresOfBestFeatures = pd.concat([columns,scores],axis=1) 100 | scoresOfBestFeatures.columns = ['Features','Score'] 101 | print("\n***** Scores for each feature in the dataset are *****") 102 | print(scoresOfBestFeatures.nlargest(numberOfFeatureToBeSelected,'Score')) 103 | 104 | mask = fitBestfeatures.get_support() 105 | for j in range(0,len(mask)): 106 | if (mask[j] == False): 107 | dataSetAfterFeatuerSelection.pop(features.columns[j]) 108 | 109 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns) 110 | print('***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns)) 111 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns) 112 | print("****** End performing feature selection using ChisquaredTest *****") 113 | 114 | return dataSetAfterFeatuerSelection 115 | 116 | #This function is used to perform feature selection using RandomForestClassifier 117 | def featureSelectionUsingRandomForestClassifier(dataSetForFeatureSelection): 118 | print("\n****** Start performing feature selection using RandomForestClassifier *****") 119 | print("****** Falls under wrapper methods (feature importance) *****") 120 | 121 | labelName = getLabelName() 122 | 123 | #Applying feature encoding before applying the RandomForestClassification 124 | dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection) 125 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection 126 | #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1] 127 | features = dataSetForFeatureSelection.drop([labelName],axis=1) 128 | label = dataSetForFeatureSelection[labelName] 129 | 130 | labelencoder = LabelEncoder() 131 | labelTransformed = labelencoder.fit_transform(label) 132 | 133 | print("****** RandomForestClassification is in progress *****") 134 | #Train using RamdomForestClassifier 135 | trainedforest = RandomForestClassifier(n_estimators=700).fit(features,labelTransformed) 136 | importances = trainedforest.feature_importances_ #array with importances of each feature 137 | idx = np.arange(0, features.shape[1]) #create an index array, with the number of features 138 | features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance 139 | featureImportances = pd.Series(importances, index= features.columns) 140 | selectedFeatures = featureImportances.nlargest(len(features_to_keep)) 141 | print("\n selectedFeatures after RandomForestClassification: ", selectedFeatures) 142 | print("****** Completed RandomForestClassification *****") 143 | 144 | #Plot the feature Importance to see which features have been considered as most important for our model to make its predictions 145 | #figure(num=None, figsize=(20, 22), dpi=80, facecolor='w', edgecolor='k') 146 | #selectedFeatures.plot(kind='barh') 147 | 148 | selectedFeaturesNames = selectedFeatures.keys() 149 | dataSetForFeatureSelection = dataSetForFeatureSelection.drop(selectedFeaturesNames,axis=1) 150 | dataSetAfterFeatuerSelection = dataSetAfterFeatuerSelection.drop(dataSetForFeatureSelection.columns, axis=1) 151 | dataSetAfterFeatuerSelection[labelName] = label 152 | 153 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns) 154 | print('\n***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns)) 155 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns) 156 | print("****** End performing feature selection using RandomForestClassifier *****") 157 | return dataSetAfterFeatuerSelection 158 | 159 | #This function is used to perform feature selection using ExtraTreesClassifier 160 | def featureSelectionUsingExtraTreesClassifier(dataSetForFeatureSelection): 161 | print("\n****** Start performing feature selection using ExtraTreesClassifier *****") 162 | print("****** Falls under wrapper methods (feature importance) *****") 163 | 164 | labelName = getLabelName() 165 | 166 | #Applying feature encoding before applying the ExtraTreesClassification 167 | dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection) 168 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection 169 | #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1] 170 | features = dataSetForFeatureSelection.drop([labelName],axis=1) 171 | label = dataSetForFeatureSelection[labelName] 172 | 173 | labelencoder = LabelEncoder() 174 | labelTransformed = labelencoder.fit_transform(label) 175 | 176 | print("****** ExtraTreesClassification is in progress *****") 177 | #Train using ExtraTreesClassifier 178 | trainedforest = ExtraTreesClassifier(n_estimators=700).fit(features,labelTransformed) 179 | importances = trainedforest.feature_importances_ #array with importances of each feature 180 | idx = np.arange(0, features.shape[1]) #create an index array, with the number of features 181 | features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance 182 | featureImportances = pd.Series(importances, index= features.columns) 183 | selectedFeatures = featureImportances.nlargest(len(features_to_keep)) 184 | print("\n selectedFeatures after ExtraTreesClassification: ", selectedFeatures) 185 | print("****** Completed ExtraTreesClassification *****") 186 | 187 | #Plot the feature Importance to see which features have been considered as most important for our model to make its predictions 188 | #figure(num=None, figsize=(20, 22), dpi=80, facecolor='w', edgecolor='k') 189 | #selectedFeatures.plot(kind='barh') 190 | 191 | selectedFeaturesNames = selectedFeatures.keys() 192 | dataSetForFeatureSelection = dataSetForFeatureSelection.drop(selectedFeaturesNames,axis=1) 193 | dataSetAfterFeatuerSelection = dataSetAfterFeatuerSelection.drop(dataSetForFeatureSelection.columns, axis=1) 194 | dataSetAfterFeatuerSelection[labelName] = label 195 | 196 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns) 197 | print('\n***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns)) 198 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns) 199 | print("****** End performing feature selection using ExtraTreesClassifier *****") 200 | return dataSetAfterFeatuerSelection 201 | 202 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/findcombinations.py: -------------------------------------------------------------------------------- 1 | # Python3 program to find combinations from n 2 | # arrays such that one element from each 3 | # array is present 4 | 5 | # function to prcombinations that contain 6 | # one element from each of the given arrays 7 | def print1(arr): 8 | 9 | # number of arrays 10 | n = len(arr) 11 | 12 | # to keep track of next element 13 | # in each of the n arrays 14 | indices = [0 for i in range(n)] 15 | 16 | while (1): 17 | print("[") 18 | 19 | # prcurrent combination 20 | for i in range(n): 21 | print("'"+arr[i][indices[i]], end = "',") 22 | print() 23 | 24 | # find the rightmost array that has more 25 | # elements left after the current element 26 | # in that array 27 | next = n - 1 28 | while (next >= 0 and 29 | (indices[next] + 1 >= len(arr[next]))): 30 | next-=1 31 | 32 | # no such array is found so no more 33 | # combinations left 34 | if (next < 0): 35 | return 36 | 37 | # if found move to next element in that 38 | # array 39 | indices[next] += 1 40 | 41 | # for all arrays to the right of this 42 | # array current index again points to 43 | # first element 44 | for i in range(next + 1, n): 45 | indices[i] = 0 46 | print("],") 47 | 48 | 49 | # Driver Code 50 | 51 | # initializing a vector with 3 empty vectors 52 | arr = [[] for i in range(4)] 53 | 54 | # now entering data 55 | # [[1, 2, 3], [4], [5, 6]] 56 | arr[0].append('TheilsU') 57 | arr[0].append('Chi-SquaredTest') 58 | arr[0].append('RandomForestClassifier') 59 | arr[0].append('ExtraTreesClassifier') 60 | 61 | arr[1].append('OneHotEncoder') 62 | arr[1].append('LabelEncoder') 63 | arr[1].append('BinaryEncoder') 64 | arr[1].append('FrequencyEncoder') 65 | 66 | arr[2].append('Min-Max') 67 | arr[2].append('Standardization') 68 | arr[2].append('Binarizing') 69 | arr[2].append('Normalizing') 70 | 71 | arr[3].append('DecisonTree') 72 | arr[3].append('RandomForestClassifier') 73 | arr[3].append('ExtraTreesClassifier') 74 | arr[3].append('LogisticRegressionRegression') 75 | arr[3].append('LinearDiscriminantAnalysis') 76 | arr[3].append('GuassianNaiveBayes') 77 | 78 | print1(arr) 79 | 80 | # This code is contributed by mohit kumar 81 | -------------------------------------------------------------------------------- /Project-UtilityFunctions/lstm.py: -------------------------------------------------------------------------------- 1 | import time 2 | import warnings 3 | import numpy as np 4 | from numpy import newaxis 5 | from keras.layers.core import Dense, Activation, Dropout 6 | from keras.layers.recurrent import LSTM 7 | from keras.models import Sequential 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | warnings.filterwarnings("ignore") 12 | 13 | def plot_results_multiple(predicted_data, true_data, prediction_len): 14 | fig = plt.figure(facecolor='white') 15 | ax = fig.add_subplot(111) 16 | ax.plot(true_data, label='True Data') 17 | print ('yo') 18 | #Pad the list of predictions to shift it in the graph to it's correct start 19 | for i, data in enumerate(predicted_data): 20 | padding = [None for p in xrange(i * prediction_len)] 21 | plt.plot(padding + data, label='Prediction') 22 | plt.legend() 23 | plt.show() 24 | 25 | def load_data(filename, seq_len, normalise_window): 26 | f = open(filename, 'r').read() 27 | data = f.split('\n') 28 | 29 | sequence_length = seq_len + 1 30 | result = [] 31 | for index in range(len(data) - sequence_length): 32 | result.append(data[index: index + sequence_length]) 33 | 34 | if normalise_window: 35 | result = normalise_windows(result) 36 | 37 | result = np.array(result) 38 | 39 | row = round(0.9 * result.shape[0]) 40 | train = result[:int(row), :] 41 | np.random.shuffle(train) 42 | x_train = train[:, :-1] 43 | y_train = train[:, -1] 44 | x_test = result[int(row):, :-1] 45 | y_test = result[int(row):, -1] 46 | 47 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 48 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) 49 | 50 | return [x_train, y_train, x_test, y_test] 51 | 52 | def normalise_windows(window_data): 53 | normalised_data = [] 54 | for window in window_data: 55 | normalised_window = [((float(p) / float(window[0])) - 1) for p in window] 56 | normalised_data.append(normalised_window) 57 | return normalised_data 58 | 59 | def build_model(layers): 60 | model = Sequential() 61 | 62 | model.add(LSTM( 63 | input_dim=layers[0], 64 | output_dim=layers[1], 65 | return_sequences=True)) 66 | model.add(Dropout(0.2)) 67 | 68 | model.add(LSTM( 69 | layers[2], 70 | return_sequences=False)) 71 | model.add(Dropout(0.2)) 72 | 73 | model.add(Dense( 74 | output_dim=layers[3])) 75 | model.add(Activation("linear")) 76 | 77 | start = time.time() 78 | model.compile(loss="mse", optimizer="rmsprop") 79 | print ("Compilation Time : ", time.time() - start) 80 | return model 81 | 82 | def predict_point_by_point(model, data): 83 | #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time 84 | predicted = model.predict(data) 85 | predicted = np.reshape(predicted, (predicted.size,)) 86 | return predicted 87 | 88 | def predict_sequence_full(model, data, window_size): 89 | #Shift the window by 1 new prediction each time, re-run predictions on new window 90 | curr_frame = data[0] 91 | predicted = [] 92 | for i in xrange(len(data)): 93 | predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0]) 94 | curr_frame = curr_frame[1:] 95 | curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0) 96 | return predicted 97 | 98 | def predict_sequences_multiple(model, data, window_size, prediction_len): 99 | #Predict sequence of 50 steps before shifting prediction run forward by 50 steps 100 | prediction_seqs = [] 101 | for i in xrange(len(data)/prediction_len): 102 | curr_frame = data[i*prediction_len] 103 | predicted = [] 104 | for j in xrange(prediction_len): 105 | predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0]) 106 | curr_frame = curr_frame[1:] 107 | curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0) 108 | prediction_seqs.append(predicted) 109 | return prediction_seqs -------------------------------------------------------------------------------- /Project-UtilityFunctions/util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | #Utils 4 | import operator 5 | 6 | #Seaborn is an open source Python library providing high level API for visualizing the data 7 | import seaborn as sns 8 | import matplotlib.pyplot as plt 9 | 10 | #library for saving the trained models to files 11 | import joblib 12 | 13 | from defineInputs import getPathToTrainingAndTestingDataSets 14 | from defineInputs import getPathToGenerateModels 15 | 16 | #Data loading library 17 | from dataloadinglibrary import loadCSV 18 | 19 | from defineInputs import getLabelName 20 | 21 | #Data pre-processing library 22 | from datapreprocessinglibrary import splitCompleteDataSetIntoTrainingSetAndTestingSet 23 | 24 | #Feature selection library 25 | from featureselectionlibrary import featureSelectionUsingTheilU 26 | from featureselectionlibrary import featureSelectionUsingChisquaredTest 27 | from featureselectionlibrary import featureSelectionUsingRandomForestClassifier 28 | from featureselectionlibrary import featureSelectionUsingExtraTreesClassifier 29 | 30 | #feature encoding library 31 | from featureencodinglibrary import featureEncodingUsingOneHotEncoder 32 | from featureencodinglibrary import featureEncodingUsingLabelEncoder 33 | from featureencodinglibrary import featureEncodingUsingBinaryEncoder 34 | from featureencodinglibrary import featureEncodingUsingFrequencyEncoder 35 | 36 | #feature scaling library 37 | from featurescalinglibrary import featureScalingUsingMinMaxScaler 38 | from featurescalinglibrary import featureScalingUsingStandardScalar 39 | from featurescalinglibrary import featureScalingUsingBinarizer 40 | from featurescalinglibrary import featureScalingUsingNormalizer 41 | 42 | from classificationlibrary import classifyUsingDecisionTreeClassifier 43 | from classificationlibrary import classifyUsingLogisticRegression 44 | from classificationlibrary import classifyUsingLinearDiscriminantAnalysis 45 | from classificationlibrary import classifyUsingGaussianNB 46 | from classificationlibrary import classifyUsingRandomForestClassifier 47 | from classificationlibrary import classifyUsingExtraTreesClassifier 48 | from classificationlibrary import classifyUsingKNNClassifier 49 | from classificationlibrary import findingOptimumNumberOfNeighboursForKNN 50 | 51 | def compareModels(arrayOfModels): 52 | modelsAndAccuracies = {} 53 | for i in range(1,len(arrayOfModels)): 54 | data = arrayOfModels[i] 55 | modelsAndAccuracies[data[3]]=data[5] 56 | bestModelAndItsAccuracy = {} 57 | bestModelAndItsAccuracy[max(modelsAndAccuracies.items(), key=operator.itemgetter(1))[0]]=modelsAndAccuracies[max(modelsAndAccuracies.items(), key=operator.itemgetter(1))[0]] 58 | sns.set_style("whitegrid") 59 | plt.figure(figsize=(5,5)) 60 | plt.ylabel("Algorithms",fontsize=10) 61 | plt.xlabel("Accuracy %",fontsize=10) 62 | plt.title("Comparing the models based on the accuries achieved",fontsize=15) 63 | sns.barplot(x=list(modelsAndAccuracies.values()), y=list(modelsAndAccuracies.keys())) 64 | plt.show() 65 | return bestModelAndItsAccuracy 66 | 67 | ### Below function is responsible for performing pre-processing, training, evaluation, persisting model 68 | def performPreprocessingBuildModelsAndEvaluateAccuracy(trainingDataSet, testingDataSet, arrayOfModels): 69 | for i in range(1,len(arrayOfModels)): 70 | print('***************************************************************************************************************************') 71 | print('********************************************* Building Model-', i ,' As Below *************************************************') 72 | print('\t -- Feature Selection: \t ', arrayOfModels[i][0], ' \n\t -- Feature Encoding: \t ', arrayOfModels[i][1], ' \n\t -- Feature Scaling: \t ', arrayOfModels[i][2], ' \n\t -- Classification: \t ', arrayOfModels[i][3], '\n') 73 | 74 | trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets() 75 | trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath) 76 | testingDataSet = loadCSV(testingFileNameWithAbsolutePath) 77 | 78 | labelName = getLabelName() 79 | label = trainingDataSet[labelName] 80 | 81 | #Combining the test and training datasets for preprocessing then together, because we observed that in sme datasets 82 | #the values in the categorical columns in test dataset and train dataset are being different this causes issues while 83 | #applying classification techniques 84 | completeDataSet = pd.concat(( trainingDataSet, testingDataSet )) 85 | 86 | #difficultyLevel = completeDataSet.pop('difficulty_level') 87 | 88 | print("completeDataSet.shape: ",completeDataSet.shape) 89 | print("completeDataSet.head: ",completeDataSet.head()) 90 | 91 | #Feature Selection 92 | if arrayOfModels[i][0] == 'TheilsU': 93 | #Perform feature selection using TheilU 94 | completeDataSetAfterFeatuerSelection = featureSelectionUsingTheilU(completeDataSet) 95 | elif arrayOfModels[i][0] == 'Chi-SquaredTest': 96 | #Perform feature selection using Chi-squared Test 97 | completeDataSetAfterFeatuerSelection = featureSelectionUsingChisquaredTest(completeDataSet) 98 | elif arrayOfModels[i][0] == 'RandomForestClassifier': 99 | #Perform feature selection using RandomForestClassifier 100 | completeDataSetAfterFeatuerSelection = featureSelectionUsingRandomForestClassifier(completeDataSet) 101 | elif arrayOfModels[i][0] == 'ExtraTreesClassifier': 102 | #Perform feature selection using ExtraTreesClassifier 103 | completeDataSetAfterFeatuerSelection = featureSelectionUsingExtraTreesClassifier(completeDataSet) 104 | 105 | #Feature Encoding 106 | if arrayOfModels[i][1] == 'LabelEncoder': 107 | #Perform lable encoding to convert categorical values into label encoded features 108 | completeEncodedDataSet = featureEncodingUsingLabelEncoder(completeDataSetAfterFeatuerSelection) 109 | elif arrayOfModels[i][1] == 'OneHotEncoder': 110 | #Perform OnHot encoding to convert categorical values into one-hot encoded features 111 | completeEncodedDataSet = featureEncodingUsingOneHotEncoder(completeDataSetAfterFeatuerSelection) 112 | elif arrayOfModels[i][1] == 'FrequencyEncoder': 113 | #Perform Frequency encoding to convert categorical values into frequency encoded features 114 | completeEncodedDataSet = featureEncodingUsingFrequencyEncoder(completeDataSetAfterFeatuerSelection) 115 | elif arrayOfModels[i][1] == 'BinaryEncoder': 116 | #Perform Binary encoding to convert categorical values into binary encoded features 117 | completeEncodedDataSet = featureEncodingUsingBinaryEncoder(completeDataSetAfterFeatuerSelection) 118 | 119 | #Feature Scaling 120 | if arrayOfModels[i][2] == 'Min-Max': 121 | #Perform MinMaxScaler to scale the features of the dataset into same range 122 | completeEncodedAndScaledDataset = featureScalingUsingMinMaxScaler(completeEncodedDataSet) 123 | elif arrayOfModels[i][2] == 'Binarizing': 124 | #Perform Binarizing to scale the features of the dataset into same range 125 | completeEncodedAndScaledDataset = featureScalingUsingBinarizer(completeEncodedDataSet) 126 | elif arrayOfModels[i][2] == 'Normalizing': 127 | #Perform Normalizing to scale the features of the dataset into same range 128 | completeEncodedAndScaledDataset = featureScalingUsingNormalizer(completeEncodedDataSet) 129 | elif arrayOfModels[i][2] == 'Standardization': 130 | #Perform Standardization to scale the features of the dataset into same range 131 | completeEncodedAndScaledDataset = featureScalingUsingStandardScalar(completeEncodedDataSet) 132 | 133 | #Split the complete dataSet into training dataSet and testing dataSet 134 | featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet = splitCompleteDataSetIntoTrainingSetAndTestingSet(completeEncodedAndScaledDataset) 135 | 136 | trainingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTrainingDataSet, labelInPreProcessedTrainingDataSet], axis=1, sort=False) 137 | testingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTestingDataSet, labelInPreProcessedTestingDataSet], axis=1, sort=False) 138 | 139 | #Classification 140 | if arrayOfModels[i][3] == 'DecisonTree': 141 | #Perform classification using DecisionTreeClassifier 142 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingDecisionTreeClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 143 | elif arrayOfModels[i][3] == 'RandomForestClassifier': 144 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingRandomForestClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 145 | elif arrayOfModels[i][3] == 'ExtraTreesClassifier': 146 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingExtraTreesClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 147 | elif arrayOfModels[i][3] == 'LogisticRegressionRegression': 148 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingLogisticRegression(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 149 | elif arrayOfModels[i][3] == 'LinearDiscriminantAnalysis': 150 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingLinearDiscriminantAnalysis(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 151 | elif arrayOfModels[i][3] == 'GuassianNaiveBayes': 152 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingGaussianNB(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 153 | elif arrayOfModels[i][3] == 'KNN': 154 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingKNNClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset) 155 | 156 | arrayOfModels[i].append(trainingAccuracyScore) 157 | arrayOfModels[i].append(testingAccuracyScore) 158 | 159 | modelName = arrayOfModels[i][0]+"_"+arrayOfModels[i][1]+"_"+arrayOfModels[i][2]+"_"+arrayOfModels[i][3] 160 | modelFileName = getPathToGenerateModels() + modelName+".pkl" 161 | arrayOfModels[i].append(modelName) 162 | arrayOfModels[i].append(modelFileName) 163 | #Save the model to file 164 | joblib.dump(classifier, modelFileName) 165 | 166 | def performPreprocessing(trainingDataSet, testingDataSet, arrayOfModels): 167 | for i in range(0,len(arrayOfModels)): 168 | print('***************************************************************************************************************************') 169 | print('********************************************* Building Model-', i ,' As Below *************************************************') 170 | print('\t -- Feature Selection: \t ', arrayOfModels[i][0], ' \n\t -- Feature Encoding: \t ', arrayOfModels[i][1], ' \n\t -- Feature Scaling: \t ', arrayOfModels[i][2], '\n') 171 | 172 | trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets() 173 | trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath) 174 | testingDataSet = loadCSV(testingFileNameWithAbsolutePath) 175 | 176 | labelName = getLabelName() 177 | label = trainingDataSet[labelName] 178 | 179 | #Combining the test and training datasets for preprocessing then together, because we observed that in sme datasets 180 | #the values in the categorical columns in test dataset and train dataset are being different this causes issues while 181 | #applying classification techniques 182 | completeDataSet = pd.concat(( trainingDataSet, testingDataSet )) 183 | 184 | #difficultyLevel = completeDataSet.pop('difficulty_level') 185 | 186 | print("completeDataSet.shape: ",completeDataSet.shape) 187 | print("completeDataSet.head: ",completeDataSet.head()) 188 | 189 | #Feature Selection 190 | if arrayOfModels[i][0] == 'TheilsU': 191 | #Perform feature selection using TheilU 192 | completeDataSetAfterFeatuerSelection = featureSelectionUsingTheilU(completeDataSet) 193 | elif arrayOfModels[i][0] == 'Chi-SquaredTest': 194 | #Perform feature selection using Chi-squared Test 195 | completeDataSetAfterFeatuerSelection = featureSelectionUsingChisquaredTest(completeDataSet) 196 | elif arrayOfModels[i][0] == 'RandomForestClassifier': 197 | #Perform feature selection using RandomForestClassifier 198 | completeDataSetAfterFeatuerSelection = featureSelectionUsingRandomForestClassifier(completeDataSet) 199 | elif arrayOfModels[i][0] == 'ExtraTreesClassifier': 200 | #Perform feature selection using ExtraTreesClassifier 201 | completeDataSetAfterFeatuerSelection = featureSelectionUsingExtraTreesClassifier(completeDataSet) 202 | 203 | #Feature Encoding 204 | if arrayOfModels[i][1] == 'LabelEncoder': 205 | #Perform lable encoding to convert categorical values into label encoded features 206 | completeEncodedDataSet = featureEncodingUsingLabelEncoder(completeDataSetAfterFeatuerSelection) 207 | elif arrayOfModels[i][1] == 'OneHotEncoder': 208 | #Perform OnHot encoding to convert categorical values into one-hot encoded features 209 | completeEncodedDataSet = featureEncodingUsingOneHotEncoder(completeDataSetAfterFeatuerSelection) 210 | elif arrayOfModels[i][1] == 'FrequencyEncoder': 211 | #Perform Frequency encoding to convert categorical values into frequency encoded features 212 | completeEncodedDataSet = featureEncodingUsingFrequencyEncoder(completeDataSetAfterFeatuerSelection) 213 | elif arrayOfModels[i][1] == 'BinaryEncoder': 214 | #Perform Binary encoding to convert categorical values into binary encoded features 215 | completeEncodedDataSet = featureEncodingUsingBinaryEncoder(completeDataSetAfterFeatuerSelection) 216 | 217 | #Feature Scaling 218 | if arrayOfModels[i][2] == 'Min-Max': 219 | #Perform MinMaxScaler to scale the features of the dataset into same range 220 | completeEncodedAndScaledDataset = featureScalingUsingMinMaxScaler(completeEncodedDataSet) 221 | elif arrayOfModels[i][2] == 'Binarizing': 222 | #Perform Binarizing to scale the features of the dataset into same range 223 | completeEncodedAndScaledDataset = featureScalingUsingBinarizer(completeEncodedDataSet) 224 | elif arrayOfModels[i][2] == 'Normalizing': 225 | #Perform Normalizing to scale the features of the dataset into same range 226 | completeEncodedAndScaledDataset = featureScalingUsingNormalizer(completeEncodedDataSet) 227 | elif arrayOfModels[i][2] == 'Standardization': 228 | #Perform Standardization to scale the features of the dataset into same range 229 | completeEncodedAndScaledDataset = featureScalingUsingStandardScalar(completeEncodedDataSet) 230 | 231 | #Split the complete dataSet into training dataSet and testing dataSet 232 | featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet = splitCompleteDataSetIntoTrainingSetAndTestingSet(completeEncodedAndScaledDataset) 233 | 234 | trainingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTrainingDataSet, labelInPreProcessedTrainingDataSet], axis=1, sort=False) 235 | testingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTestingDataSet, labelInPreProcessedTestingDataSet], axis=1, sort=False) 236 | 237 | return completeEncodedAndScaledDataset --------------------------------------------------------------------------------