├── Datasets
├── KDDCup99
│ ├── kddcup.data.gz
│ ├── kddcup.data_10_percent.gz
│ ├── kddcup.newtestdata_10_percent_unlabeled.gz
│ ├── kddcup.testdata.unlabeled.gz
│ └── kddcup.testdata.unlabeled_10_percent.gz
└── NSL-KDD
│ ├── KDDTest-21.csv
│ ├── KDDTrain+_20Percent.csv
│ └── KDDTrain+_20Percent_Description.xlsx
├── IDSUsingAutoEnoderNeuralNetwork.ipynb
├── IDSUsingSimpleDeepNeuralNetwork.ipynb
├── IDSUsingTraditionalMLTechniques.ipynb
└── Project-UtilityFunctions
├── __pycache__
└── lstm.cpython-37.pyc
├── classificationlibrary.py
├── dataformatinglibrary.py
├── datainspectionlibrary.py
├── dataloadinglibrary.py
├── datapreprocessinglibrary.py
├── defineInputs.py
├── featureencodinglibrary.py
├── featurescalinglibrary.py
├── featureselectionlibrary.py
├── findcombinations.py
├── lstm.py
└── util.py
/Datasets/KDDCup99/kddcup.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.data.gz
--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.data_10_percent.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.data_10_percent.gz
--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.newtestdata_10_percent_unlabeled.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.newtestdata_10_percent_unlabeled.gz
--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.testdata.unlabeled.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.testdata.unlabeled.gz
--------------------------------------------------------------------------------
/Datasets/KDDCup99/kddcup.testdata.unlabeled_10_percent.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/KDDCup99/kddcup.testdata.unlabeled_10_percent.gz
--------------------------------------------------------------------------------
/Datasets/NSL-KDD/KDDTrain+_20Percent_Description.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Datasets/NSL-KDD/KDDTrain+_20Percent_Description.xlsx
--------------------------------------------------------------------------------
/IDSUsingSimpleDeepNeuralNetwork.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Import the required libraries and the utility modules"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 12,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "\n",
19 | "from sklearn import metrics\n",
20 | "from sklearn.model_selection import train_test_split\n",
21 | "from sklearn.preprocessing import LabelEncoder\n",
22 | "\n",
23 | "from tensorflow.keras.models import Sequential\n",
24 | "from tensorflow.keras.models import load_model\n",
25 | "from tensorflow.keras.layers import Dense, Activation, Dropout\n",
26 | "from tensorflow.keras.callbacks import EarlyStopping\n",
27 | "from tensorflow.keras.callbacks import ModelCheckpoint\n",
28 | "from tensorflow.keras.utils import plot_model\n",
29 | "from tensorflow.python.keras.utils.np_utils import to_categorical\n",
30 | "\n",
31 | "import matplotlib.pyplot as plt\n",
32 | "\n",
33 | "#Custom libraries\n",
34 | "#Data formating library\n",
35 | "from dataloadinglibrary import loadCSV\n",
36 | "\n",
37 | "from datainspectionlibrary import getStatisticsOfData\n",
38 | "\n",
39 | "from dataformatinglibrary import createExcelFromArray\n",
40 | "\n",
41 | "from defineInputs import getLabelName\n",
42 | "from defineInputs import getPathToTrainingAndTestingDataSets\n",
43 | "from defineInputs import modelPerformanceReport\n",
44 | "from defineInputs import defineArrayForPreProcessing\n",
45 | "from defineInputs import getPathToGenerateModels\n",
46 | "\n",
47 | "from util import performPreprocessing"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "### Load the training dataset and check the statistics"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 2,
60 | "metadata": {
61 | "scrolled": true
62 | },
63 | "outputs": [
64 | {
65 | "name": "stdout",
66 | "output_type": "stream",
67 | "text": [
68 | "***** Start checking the statistics of the dataSet *****\n",
69 | "\n",
70 | "***** Shape (number of rows and columns) in the dataset: (25191, 42)\n",
71 | "***** Total number of features in the dataset: 41\n",
72 | "***** Number of categorical features in the dataset: 3\n",
73 | "***** Number of numerical features in the dataset: 38\n",
74 | "\n",
75 | "***** Names of categorical features in dataset *****\n",
76 | "\n",
77 | "| Categorical features in dataset |\n",
78 | "|-----------------------------------|\n",
79 | "| Protocol_type |\n",
80 | "| Service |\n",
81 | "| Flag |\n",
82 | "\n",
83 | "\n",
84 | "***** Names of numerical features in dataset *****\n",
85 | "\n",
86 | "| Numerical features in the dataset |\n",
87 | "|-------------------------------------|\n",
88 | "| Duration |\n",
89 | "| Src_bytes |\n",
90 | "| Dst_bytes |\n",
91 | "| Land |\n",
92 | "| Wrong_fragment |\n",
93 | "| Urgent |\n",
94 | "| Hot |\n",
95 | "| Num_failed_logins |\n",
96 | "| Logged_in |\n",
97 | "| Num_compromised |\n",
98 | "| Root_shell |\n",
99 | "| Su_attempted |\n",
100 | "| Num_root |\n",
101 | "| Num_file_creations |\n",
102 | "| Num_shells |\n",
103 | "| Num_access_files |\n",
104 | "| Num_outbound_cmds |\n",
105 | "| Is_hot_login |\n",
106 | "| Is_guest_login |\n",
107 | "| Count |\n",
108 | "| Srv_count |\n",
109 | "| Serror_rate |\n",
110 | "| Srv_serror_rate |\n",
111 | "| Rerror_rate |\n",
112 | "| Srv_rerror_rate |\n",
113 | "| Same_srv_rate |\n",
114 | "| Diff_srv_rate |\n",
115 | "| Srv_diff_host_rate |\n",
116 | "| Dst_host_count |\n",
117 | "| Dst_host_srv_count |\n",
118 | "| Dst_host_same_srv_rate |\n",
119 | "| Dst_host_diff_srv_rate |\n",
120 | "| Dst_host_same_src_port_rate |\n",
121 | "| Dst_host_srv_diff_host_rate |\n",
122 | "| Dst_host_serror_rate |\n",
123 | "| Dst_host_srv_serror_rate |\n",
124 | "| Dst_host_rerror_rate |\n",
125 | "| Dst_host_srv_rerror_rate |\n",
126 | "\n",
127 | "\n",
128 | "***** Are there any missing values in the data set: False\n",
129 | "Total number of records in the dataset: 25191\n",
130 | "Unique records in the dataset: 25191\n",
131 | "\n",
132 | "***** Are there any duplicate records in the data set: False\n",
133 | "\n",
134 | "****** Number of different values for label that are present in the dataset: 22\n",
135 | "\n",
136 | "****** Here is the list of unique label types present in the dataset ***** \n",
137 | "\n",
138 | "| Unique label types in the dataset |\n",
139 | "|-------------------------------------|\n",
140 | "| normal |\n",
141 | "| neptune |\n",
142 | "| warezclient |\n",
143 | "| ipsweep |\n",
144 | "| portsweep |\n",
145 | "| teardrop |\n",
146 | "| nmap |\n",
147 | "| satan |\n",
148 | "| smurf |\n",
149 | "| pod |\n",
150 | "| back |\n",
151 | "| guess_passwd |\n",
152 | "| ftp_write |\n",
153 | "| multihop |\n",
154 | "| rootkit |\n",
155 | "| buffer_overflow |\n",
156 | "| imap |\n",
157 | "| warezmaster |\n",
158 | "| phf |\n",
159 | "| land |\n",
160 | "| loadmodule |\n",
161 | "| spy |\n",
162 | "\n",
163 | "\n",
164 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
165 | "\n",
166 | "\n",
167 | "attack_type: 22 \n",
168 | "| distinct values |\n",
169 | "|-------------------|\n",
170 | "| normal |\n",
171 | "| neptune |\n",
172 | "| warezclient |\n",
173 | "| ipsweep |\n",
174 | "| portsweep |\n",
175 | "| teardrop |\n",
176 | "| nmap |\n",
177 | "| satan |\n",
178 | "| smurf |\n",
179 | "| pod |\n",
180 | "| back |\n",
181 | "| guess_passwd |\n",
182 | "| ftp_write |\n",
183 | "| multihop |\n",
184 | "| rootkit |\n",
185 | "| buffer_overflow |\n",
186 | "| imap |\n",
187 | "| warezmaster |\n",
188 | "| phf |\n",
189 | "| land |\n",
190 | "| loadmodule |\n",
191 | "| spy |\n",
192 | "\n",
193 | "\n",
194 | "Protocol_type: 3 \n",
195 | "| distinct values |\n",
196 | "|-------------------|\n",
197 | "| udp |\n",
198 | "| tcp |\n",
199 | "| icmp |\n",
200 | "\n",
201 | "\n",
202 | "Service: 66 \n",
203 | "| distinct values |\n",
204 | "|-------------------|\n",
205 | "| other |\n",
206 | "| private |\n",
207 | "| http |\n",
208 | "| remote_job |\n",
209 | "| ftp_data |\n",
210 | "| name |\n",
211 | "| netbios_ns |\n",
212 | "| eco_i |\n",
213 | "| mtp |\n",
214 | "| telnet |\n",
215 | "| finger |\n",
216 | "| domain_u |\n",
217 | "| supdup |\n",
218 | "| uucp_path |\n",
219 | "| Z39_50 |\n",
220 | "| smtp |\n",
221 | "| csnet_ns |\n",
222 | "| uucp |\n",
223 | "| netbios_dgm |\n",
224 | "| urp_i |\n",
225 | "| auth |\n",
226 | "| domain |\n",
227 | "| ftp |\n",
228 | "| bgp |\n",
229 | "| ldap |\n",
230 | "| ecr_i |\n",
231 | "| gopher |\n",
232 | "| vmnet |\n",
233 | "| systat |\n",
234 | "| http_443 |\n",
235 | "| efs |\n",
236 | "| whois |\n",
237 | "| imap4 |\n",
238 | "| iso_tsap |\n",
239 | "| echo |\n",
240 | "| klogin |\n",
241 | "| link |\n",
242 | "| sunrpc |\n",
243 | "| login |\n",
244 | "| kshell |\n",
245 | "| sql_net |\n",
246 | "| time |\n",
247 | "| hostnames |\n",
248 | "| exec |\n",
249 | "| ntp_u |\n",
250 | "| discard |\n",
251 | "| nntp |\n",
252 | "| courier |\n",
253 | "| ctf |\n",
254 | "| ssh |\n",
255 | "| daytime |\n",
256 | "| shell |\n",
257 | "| netstat |\n",
258 | "| pop_3 |\n",
259 | "| nnsp |\n",
260 | "| IRC |\n",
261 | "| pop_2 |\n",
262 | "| printer |\n",
263 | "| tim_i |\n",
264 | "| pm_dump |\n",
265 | "| red_i |\n",
266 | "| netbios_ssn |\n",
267 | "| rje |\n",
268 | "| X11 |\n",
269 | "| urh_i |\n",
270 | "| http_8001 |\n",
271 | "\n",
272 | "\n",
273 | "Flag: 11 \n",
274 | "| distinct values |\n",
275 | "|-------------------|\n",
276 | "| SF |\n",
277 | "| S0 |\n",
278 | "| REJ |\n",
279 | "| RSTR |\n",
280 | "| SH |\n",
281 | "| RSTO |\n",
282 | "| S1 |\n",
283 | "| RSTOS0 |\n",
284 | "| S3 |\n",
285 | "| S2 |\n",
286 | "| OTH |\n",
287 | "\n",
288 | "\n",
289 | "****** Label distribution in the dataset *****\n",
290 | "\n",
291 | "normal 13448\n",
292 | "neptune 8282\n",
293 | "ipsweep 710\n",
294 | "satan 691\n",
295 | "portsweep 587\n",
296 | "smurf 529\n",
297 | "nmap 301\n",
298 | "back 196\n",
299 | "teardrop 188\n",
300 | "warezclient 181\n",
301 | "pod 38\n",
302 | "guess_passwd 10\n",
303 | "warezmaster 7\n",
304 | "buffer_overflow 6\n",
305 | "imap 5\n",
306 | "rootkit 4\n",
307 | "multihop 2\n",
308 | "phf 2\n",
309 | "loadmodule 1\n",
310 | "ftp_write 1\n",
311 | "land 1\n",
312 | "spy 1\n",
313 | "Name: attack_type, dtype: int64\n",
314 | "\n",
315 | "\n",
316 | "***** End checking the statistics of the dataSet *****\n",
317 | "\n",
318 | "***** Here is how to training dataset looks like before performing any pre-processing *****\n"
319 | ]
320 | },
321 | {
322 | "data": {
323 | "text/html": [
324 | "
\n",
325 | "\n",
338 | "
\n",
339 | " \n",
340 | " \n",
341 | " | \n",
342 | " Duration | \n",
343 | " Protocol_type | \n",
344 | " Service | \n",
345 | " Flag | \n",
346 | " Src_bytes | \n",
347 | " Dst_bytes | \n",
348 | " Land | \n",
349 | " Wrong_fragment | \n",
350 | " Urgent | \n",
351 | " Hot | \n",
352 | " ... | \n",
353 | " Dst_host_srv_count | \n",
354 | " Dst_host_same_srv_rate | \n",
355 | " Dst_host_diff_srv_rate | \n",
356 | " Dst_host_same_src_port_rate | \n",
357 | " Dst_host_srv_diff_host_rate | \n",
358 | " Dst_host_serror_rate | \n",
359 | " Dst_host_srv_serror_rate | \n",
360 | " Dst_host_rerror_rate | \n",
361 | " Dst_host_srv_rerror_rate | \n",
362 | " attack_type | \n",
363 | "
\n",
364 | " \n",
365 | " \n",
366 | " \n",
367 | " 0 | \n",
368 | " 0 | \n",
369 | " udp | \n",
370 | " other | \n",
371 | " SF | \n",
372 | " 146 | \n",
373 | " 0 | \n",
374 | " 0 | \n",
375 | " 0 | \n",
376 | " 0 | \n",
377 | " 0 | \n",
378 | " ... | \n",
379 | " 1 | \n",
380 | " 0.00 | \n",
381 | " 0.60 | \n",
382 | " 0.88 | \n",
383 | " 0.00 | \n",
384 | " 0.00 | \n",
385 | " 0.00 | \n",
386 | " 0.0 | \n",
387 | " 0.00 | \n",
388 | " normal | \n",
389 | "
\n",
390 | " \n",
391 | " 1 | \n",
392 | " 0 | \n",
393 | " tcp | \n",
394 | " private | \n",
395 | " S0 | \n",
396 | " 0 | \n",
397 | " 0 | \n",
398 | " 0 | \n",
399 | " 0 | \n",
400 | " 0 | \n",
401 | " 0 | \n",
402 | " ... | \n",
403 | " 26 | \n",
404 | " 0.10 | \n",
405 | " 0.05 | \n",
406 | " 0.00 | \n",
407 | " 0.00 | \n",
408 | " 1.00 | \n",
409 | " 1.00 | \n",
410 | " 0.0 | \n",
411 | " 0.00 | \n",
412 | " neptune | \n",
413 | "
\n",
414 | " \n",
415 | " 2 | \n",
416 | " 0 | \n",
417 | " tcp | \n",
418 | " http | \n",
419 | " SF | \n",
420 | " 232 | \n",
421 | " 8153 | \n",
422 | " 0 | \n",
423 | " 0 | \n",
424 | " 0 | \n",
425 | " 0 | \n",
426 | " ... | \n",
427 | " 255 | \n",
428 | " 1.00 | \n",
429 | " 0.00 | \n",
430 | " 0.03 | \n",
431 | " 0.04 | \n",
432 | " 0.03 | \n",
433 | " 0.01 | \n",
434 | " 0.0 | \n",
435 | " 0.01 | \n",
436 | " normal | \n",
437 | "
\n",
438 | " \n",
439 | " 3 | \n",
440 | " 0 | \n",
441 | " tcp | \n",
442 | " http | \n",
443 | " SF | \n",
444 | " 199 | \n",
445 | " 420 | \n",
446 | " 0 | \n",
447 | " 0 | \n",
448 | " 0 | \n",
449 | " 0 | \n",
450 | " ... | \n",
451 | " 255 | \n",
452 | " 1.00 | \n",
453 | " 0.00 | \n",
454 | " 0.00 | \n",
455 | " 0.00 | \n",
456 | " 0.00 | \n",
457 | " 0.00 | \n",
458 | " 0.0 | \n",
459 | " 0.00 | \n",
460 | " normal | \n",
461 | "
\n",
462 | " \n",
463 | " 4 | \n",
464 | " 0 | \n",
465 | " tcp | \n",
466 | " private | \n",
467 | " REJ | \n",
468 | " 0 | \n",
469 | " 0 | \n",
470 | " 0 | \n",
471 | " 0 | \n",
472 | " 0 | \n",
473 | " 0 | \n",
474 | " ... | \n",
475 | " 19 | \n",
476 | " 0.07 | \n",
477 | " 0.07 | \n",
478 | " 0.00 | \n",
479 | " 0.00 | \n",
480 | " 0.00 | \n",
481 | " 0.00 | \n",
482 | " 1.0 | \n",
483 | " 1.00 | \n",
484 | " neptune | \n",
485 | "
\n",
486 | " \n",
487 | "
\n",
488 | "
5 rows × 42 columns
\n",
489 | "
"
490 | ],
491 | "text/plain": [
492 | " Duration Protocol_type Service Flag Src_bytes Dst_bytes Land \\\n",
493 | "0 0 udp other SF 146 0 0 \n",
494 | "1 0 tcp private S0 0 0 0 \n",
495 | "2 0 tcp http SF 232 8153 0 \n",
496 | "3 0 tcp http SF 199 420 0 \n",
497 | "4 0 tcp private REJ 0 0 0 \n",
498 | "\n",
499 | " Wrong_fragment Urgent Hot ... Dst_host_srv_count \\\n",
500 | "0 0 0 0 ... 1 \n",
501 | "1 0 0 0 ... 26 \n",
502 | "2 0 0 0 ... 255 \n",
503 | "3 0 0 0 ... 255 \n",
504 | "4 0 0 0 ... 19 \n",
505 | "\n",
506 | " Dst_host_same_srv_rate Dst_host_diff_srv_rate \\\n",
507 | "0 0.00 0.60 \n",
508 | "1 0.10 0.05 \n",
509 | "2 1.00 0.00 \n",
510 | "3 1.00 0.00 \n",
511 | "4 0.07 0.07 \n",
512 | "\n",
513 | " Dst_host_same_src_port_rate Dst_host_srv_diff_host_rate \\\n",
514 | "0 0.88 0.00 \n",
515 | "1 0.00 0.00 \n",
516 | "2 0.03 0.04 \n",
517 | "3 0.00 0.00 \n",
518 | "4 0.00 0.00 \n",
519 | "\n",
520 | " Dst_host_serror_rate Dst_host_srv_serror_rate Dst_host_rerror_rate \\\n",
521 | "0 0.00 0.00 0.0 \n",
522 | "1 1.00 1.00 0.0 \n",
523 | "2 0.03 0.01 0.0 \n",
524 | "3 0.00 0.00 0.0 \n",
525 | "4 0.00 0.00 1.0 \n",
526 | "\n",
527 | " Dst_host_srv_rerror_rate attack_type \n",
528 | "0 0.00 normal \n",
529 | "1 0.00 neptune \n",
530 | "2 0.01 normal \n",
531 | "3 0.00 normal \n",
532 | "4 1.00 neptune \n",
533 | "\n",
534 | "[5 rows x 42 columns]"
535 | ]
536 | },
537 | "execution_count": 2,
538 | "metadata": {},
539 | "output_type": "execute_result"
540 | }
541 | ],
542 | "source": [
543 | "#Define file names and call loadCSV to load the CSV files\n",
544 | "trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()\n",
545 | "trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)\n",
546 | "difficultyLevel = trainingDataSet.pop('difficulty_level')\n",
547 | "labelName = getLabelName()\n",
548 | "label = trainingDataSet[labelName]\n",
549 | "\n",
550 | "#Look at the statistics of the dataSet\n",
551 | "getStatisticsOfData(trainingDataSet)\n",
552 | "print(\"\\n***** Here is how to training dataset looks like before performing any pre-processing *****\")\n",
553 | "trainingDataSet.head()"
554 | ]
555 | },
556 | {
557 | "cell_type": "markdown",
558 | "metadata": {},
559 | "source": [
560 | "### Load the testing dataset and check the statistics"
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": 3,
566 | "metadata": {},
567 | "outputs": [
568 | {
569 | "name": "stdout",
570 | "output_type": "stream",
571 | "text": [
572 | "***** Start checking the statistics of the dataSet *****\n",
573 | "\n",
574 | "***** Shape (number of rows and columns) in the dataset: (11850, 42)\n",
575 | "***** Total number of features in the dataset: 41\n",
576 | "***** Number of categorical features in the dataset: 3\n",
577 | "***** Number of numerical features in the dataset: 38\n",
578 | "\n",
579 | "***** Names of categorical features in dataset *****\n",
580 | "\n",
581 | "| Categorical features in dataset |\n",
582 | "|-----------------------------------|\n",
583 | "| Protocol_type |\n",
584 | "| Service |\n",
585 | "| Flag |\n",
586 | "\n",
587 | "\n",
588 | "***** Names of numerical features in dataset *****\n",
589 | "\n",
590 | "| Numerical features in the dataset |\n",
591 | "|-------------------------------------|\n",
592 | "| Duration |\n",
593 | "| Src_bytes |\n",
594 | "| Dst_bytes |\n",
595 | "| Land |\n",
596 | "| Wrong_fragment |\n",
597 | "| Urgent |\n",
598 | "| Hot |\n",
599 | "| Num_failed_logins |\n",
600 | "| Logged_in |\n",
601 | "| Num_compromised |\n",
602 | "| Root_shell |\n",
603 | "| Su_attempted |\n",
604 | "| Num_root |\n",
605 | "| Num_file_creations |\n",
606 | "| Num_shells |\n",
607 | "| Num_access_files |\n",
608 | "| Num_outbound_cmds |\n",
609 | "| Is_hot_login |\n",
610 | "| Is_guest_login |\n",
611 | "| Count |\n",
612 | "| Srv_count |\n",
613 | "| Serror_rate |\n",
614 | "| Srv_serror_rate |\n",
615 | "| Rerror_rate |\n",
616 | "| Srv_rerror_rate |\n",
617 | "| Same_srv_rate |\n",
618 | "| Diff_srv_rate |\n",
619 | "| Srv_diff_host_rate |\n",
620 | "| Dst_host_count |\n",
621 | "| Dst_host_srv_count |\n",
622 | "| Dst_host_same_srv_rate |\n",
623 | "| Dst_host_diff_srv_rate |\n",
624 | "| Dst_host_same_src_port_rate |\n",
625 | "| Dst_host_srv_diff_host_rate |\n",
626 | "| Dst_host_serror_rate |\n",
627 | "| Dst_host_srv_serror_rate |\n",
628 | "| Dst_host_rerror_rate |\n",
629 | "| Dst_host_srv_rerror_rate |\n",
630 | "\n",
631 | "\n",
632 | "***** Are there any missing values in the data set: False\n",
633 | "Total number of records in the dataset: 11850\n",
634 | "Unique records in the dataset: 11850\n",
635 | "\n",
636 | "***** Are there any duplicate records in the data set: False\n",
637 | "\n",
638 | "****** Number of different values for label that are present in the dataset: 38\n",
639 | "\n",
640 | "****** Here is the list of unique label types present in the dataset ***** \n",
641 | "\n",
642 | "| Unique label types in the dataset |\n",
643 | "|-------------------------------------|\n",
644 | "| guess_passwd |\n",
645 | "| snmpguess |\n",
646 | "| processtable |\n",
647 | "| normal |\n",
648 | "| nmap |\n",
649 | "| back |\n",
650 | "| neptune |\n",
651 | "| satan |\n",
652 | "| saint |\n",
653 | "| mscan |\n",
654 | "| apache2 |\n",
655 | "| httptunnel |\n",
656 | "| warezmaster |\n",
657 | "| ipsweep |\n",
658 | "| smurf |\n",
659 | "| mailbomb |\n",
660 | "| teardrop |\n",
661 | "| portsweep |\n",
662 | "| snmpgetattack |\n",
663 | "| multihop |\n",
664 | "| worm |\n",
665 | "| land |\n",
666 | "| sendmail |\n",
667 | "| buffer_overflow |\n",
668 | "| pod |\n",
669 | "| rootkit |\n",
670 | "| xlock |\n",
671 | "| xterm |\n",
672 | "| xsnoop |\n",
673 | "| ps |\n",
674 | "| named |\n",
675 | "| ftp_write |\n",
676 | "| loadmodule |\n",
677 | "| phf |\n",
678 | "| udpstorm |\n",
679 | "| perl |\n",
680 | "| sqlattack |\n",
681 | "| imap |\n",
682 | "\n",
683 | "\n",
684 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
685 | "\n",
686 | "\n",
687 | "attack_type: 38 \n",
688 | "| distinct values |\n",
689 | "|-------------------|\n",
690 | "| guess_passwd |\n",
691 | "| snmpguess |\n",
692 | "| processtable |\n",
693 | "| normal |\n",
694 | "| nmap |\n",
695 | "| back |\n",
696 | "| neptune |\n",
697 | "| satan |\n",
698 | "| saint |\n",
699 | "| mscan |\n",
700 | "| apache2 |\n",
701 | "| httptunnel |\n",
702 | "| warezmaster |\n",
703 | "| ipsweep |\n",
704 | "| smurf |\n",
705 | "| mailbomb |\n",
706 | "| teardrop |\n",
707 | "| portsweep |\n",
708 | "| snmpgetattack |\n",
709 | "| multihop |\n",
710 | "| worm |\n",
711 | "| land |\n",
712 | "| sendmail |\n",
713 | "| buffer_overflow |\n",
714 | "| pod |\n",
715 | "| rootkit |\n",
716 | "| xlock |\n",
717 | "| xterm |\n",
718 | "| xsnoop |\n",
719 | "| ps |\n",
720 | "| named |\n",
721 | "| ftp_write |\n",
722 | "| loadmodule |\n",
723 | "| phf |\n",
724 | "| udpstorm |\n",
725 | "| perl |\n",
726 | "| sqlattack |\n",
727 | "| imap |\n",
728 | "\n",
729 | "\n",
730 | "Protocol_type: 3 \n",
731 | "| distinct values |\n",
732 | "|-------------------|\n",
733 | "| tcp |\n",
734 | "| udp |\n",
735 | "| icmp |\n",
736 | "\n",
737 | "\n",
738 | "Service: 62 \n",
739 | "| distinct values |\n",
740 | "|-------------------|\n",
741 | "| telnet |\n",
742 | "| private |\n",
743 | "| http |\n",
744 | "| imap4 |\n",
745 | "| ftp_data |\n",
746 | "| other |\n",
747 | "| ctf |\n",
748 | "| pop_3 |\n",
749 | "| ftp |\n",
750 | "| domain_u |\n",
751 | "| domain |\n",
752 | "| eco_i |\n",
753 | "| ecr_i |\n",
754 | "| finger |\n",
755 | "| name |\n",
756 | "| smtp |\n",
757 | "| vmnet |\n",
758 | "| mtp |\n",
759 | "| bgp |\n",
760 | "| exec |\n",
761 | "| sunrpc |\n",
762 | "| uucp_path |\n",
763 | "| iso_tsap |\n",
764 | "| echo |\n",
765 | "| auth |\n",
766 | "| hostnames |\n",
767 | "| courier |\n",
768 | "| uucp |\n",
769 | "| daytime |\n",
770 | "| nntp |\n",
771 | "| netstat |\n",
772 | "| urp_i |\n",
773 | "| http_443 |\n",
774 | "| csnet_ns |\n",
775 | "| login |\n",
776 | "| klogin |\n",
777 | "| whois |\n",
778 | "| time |\n",
779 | "| link |\n",
780 | "| discard |\n",
781 | "| gopher |\n",
782 | "| supdup |\n",
783 | "| netbios_ns |\n",
784 | "| systat |\n",
785 | "| netbios_dgm |\n",
786 | "| kshell |\n",
787 | "| efs |\n",
788 | "| nnsp |\n",
789 | "| ssh |\n",
790 | "| netbios_ssn |\n",
791 | "| Z39_50 |\n",
792 | "| IRC |\n",
793 | "| ntp_u |\n",
794 | "| X11 |\n",
795 | "| pm_dump |\n",
796 | "| ldap |\n",
797 | "| remote_job |\n",
798 | "| sql_net |\n",
799 | "| shell |\n",
800 | "| tim_i |\n",
801 | "| pop_2 |\n",
802 | "| tftp_u |\n",
803 | "\n",
804 | "\n",
805 | "Flag: 11 \n",
806 | "| distinct values |\n",
807 | "|-------------------|\n",
808 | "| SF |\n",
809 | "| S3 |\n",
810 | "| SH |\n",
811 | "| REJ |\n",
812 | "| S0 |\n",
813 | "| RSTO |\n",
814 | "| RSTR |\n",
815 | "| RSTOS0 |\n",
816 | "| S1 |\n",
817 | "| S2 |\n",
818 | "| OTH |\n",
819 | "\n",
820 | "\n",
821 | "****** Label distribution in the dataset *****\n",
822 | "\n",
823 | "normal 2152\n",
824 | "neptune 1579\n",
825 | "guess_passwd 1231\n",
826 | "mscan 996\n",
827 | "warezmaster 944\n",
828 | "apache2 737\n",
829 | "satan 727\n",
830 | "processtable 685\n",
831 | "smurf 627\n",
832 | "back 359\n",
833 | "snmpguess 331\n",
834 | "saint 309\n",
835 | "mailbomb 293\n",
836 | "snmpgetattack 178\n",
837 | "portsweep 156\n",
838 | "ipsweep 141\n",
839 | "httptunnel 133\n",
840 | "nmap 73\n",
841 | "pod 41\n",
842 | "buffer_overflow 20\n",
843 | "multihop 18\n",
844 | "named 17\n",
845 | "ps 15\n",
846 | "sendmail 14\n",
847 | "rootkit 13\n",
848 | "xterm 13\n",
849 | "teardrop 12\n",
850 | "xlock 9\n",
851 | "land 7\n",
852 | "xsnoop 4\n",
853 | "ftp_write 3\n",
854 | "worm 2\n",
855 | "perl 2\n",
856 | "phf 2\n",
857 | "loadmodule 2\n",
858 | "sqlattack 2\n",
859 | "udpstorm 2\n",
860 | "imap 1\n",
861 | "Name: attack_type, dtype: int64\n",
862 | "\n",
863 | "\n",
864 | "***** End checking the statistics of the dataSet *****\n",
865 | "\n",
866 | "***** Here is how to testing dataset looks like before performing any pre-processing *****\n"
867 | ]
868 | },
869 | {
870 | "data": {
871 | "text/html": [
872 | "\n",
873 | "\n",
886 | "
\n",
887 | " \n",
888 | " \n",
889 | " | \n",
890 | " Duration | \n",
891 | " Protocol_type | \n",
892 | " Service | \n",
893 | " Flag | \n",
894 | " Src_bytes | \n",
895 | " Dst_bytes | \n",
896 | " Land | \n",
897 | " Wrong_fragment | \n",
898 | " Urgent | \n",
899 | " Hot | \n",
900 | " ... | \n",
901 | " Dst_host_srv_count | \n",
902 | " Dst_host_same_srv_rate | \n",
903 | " Dst_host_diff_srv_rate | \n",
904 | " Dst_host_same_src_port_rate | \n",
905 | " Dst_host_srv_diff_host_rate | \n",
906 | " Dst_host_serror_rate | \n",
907 | " Dst_host_srv_serror_rate | \n",
908 | " Dst_host_rerror_rate | \n",
909 | " Dst_host_srv_rerror_rate | \n",
910 | " attack_type | \n",
911 | "
\n",
912 | " \n",
913 | " \n",
914 | " \n",
915 | " 0 | \n",
916 | " 13 | \n",
917 | " tcp | \n",
918 | " telnet | \n",
919 | " SF | \n",
920 | " 118 | \n",
921 | " 2425 | \n",
922 | " 0 | \n",
923 | " 0 | \n",
924 | " 0 | \n",
925 | " 0 | \n",
926 | " ... | \n",
927 | " 10 | \n",
928 | " 0.38 | \n",
929 | " 0.12 | \n",
930 | " 0.04 | \n",
931 | " 0.0 | \n",
932 | " 0.00 | \n",
933 | " 0.00 | \n",
934 | " 0.12 | \n",
935 | " 0.3 | \n",
936 | " guess_passwd | \n",
937 | "
\n",
938 | " \n",
939 | " 1 | \n",
940 | " 0 | \n",
941 | " udp | \n",
942 | " private | \n",
943 | " SF | \n",
944 | " 44 | \n",
945 | " 0 | \n",
946 | " 0 | \n",
947 | " 0 | \n",
948 | " 0 | \n",
949 | " 0 | \n",
950 | " ... | \n",
951 | " 254 | \n",
952 | " 1.00 | \n",
953 | " 0.01 | \n",
954 | " 0.01 | \n",
955 | " 0.0 | \n",
956 | " 0.00 | \n",
957 | " 0.00 | \n",
958 | " 0.00 | \n",
959 | " 0.0 | \n",
960 | " snmpguess | \n",
961 | "
\n",
962 | " \n",
963 | " 2 | \n",
964 | " 0 | \n",
965 | " tcp | \n",
966 | " telnet | \n",
967 | " S3 | \n",
968 | " 0 | \n",
969 | " 44 | \n",
970 | " 0 | \n",
971 | " 0 | \n",
972 | " 0 | \n",
973 | " 0 | \n",
974 | " ... | \n",
975 | " 79 | \n",
976 | " 0.31 | \n",
977 | " 0.61 | \n",
978 | " 0.00 | \n",
979 | " 0.0 | \n",
980 | " 0.21 | \n",
981 | " 0.68 | \n",
982 | " 0.60 | \n",
983 | " 0.0 | \n",
984 | " processtable | \n",
985 | "
\n",
986 | " \n",
987 | " 3 | \n",
988 | " 0 | \n",
989 | " udp | \n",
990 | " private | \n",
991 | " SF | \n",
992 | " 53 | \n",
993 | " 55 | \n",
994 | " 0 | \n",
995 | " 0 | \n",
996 | " 0 | \n",
997 | " 0 | \n",
998 | " ... | \n",
999 | " 255 | \n",
1000 | " 1.00 | \n",
1001 | " 0.00 | \n",
1002 | " 0.87 | \n",
1003 | " 0.0 | \n",
1004 | " 0.00 | \n",
1005 | " 0.00 | \n",
1006 | " 0.00 | \n",
1007 | " 0.0 | \n",
1008 | " normal | \n",
1009 | "
\n",
1010 | " \n",
1011 | " 4 | \n",
1012 | " 0 | \n",
1013 | " tcp | \n",
1014 | " private | \n",
1015 | " SH | \n",
1016 | " 0 | \n",
1017 | " 0 | \n",
1018 | " 0 | \n",
1019 | " 0 | \n",
1020 | " 0 | \n",
1021 | " 0 | \n",
1022 | " ... | \n",
1023 | " 1 | \n",
1024 | " 0.06 | \n",
1025 | " 1.00 | \n",
1026 | " 1.00 | \n",
1027 | " 0.0 | \n",
1028 | " 1.00 | \n",
1029 | " 1.00 | \n",
1030 | " 0.00 | \n",
1031 | " 0.0 | \n",
1032 | " nmap | \n",
1033 | "
\n",
1034 | " \n",
1035 | "
\n",
1036 | "
5 rows × 42 columns
\n",
1037 | "
"
1038 | ],
1039 | "text/plain": [
1040 | " Duration Protocol_type Service Flag Src_bytes Dst_bytes Land \\\n",
1041 | "0 13 tcp telnet SF 118 2425 0 \n",
1042 | "1 0 udp private SF 44 0 0 \n",
1043 | "2 0 tcp telnet S3 0 44 0 \n",
1044 | "3 0 udp private SF 53 55 0 \n",
1045 | "4 0 tcp private SH 0 0 0 \n",
1046 | "\n",
1047 | " Wrong_fragment Urgent Hot ... Dst_host_srv_count \\\n",
1048 | "0 0 0 0 ... 10 \n",
1049 | "1 0 0 0 ... 254 \n",
1050 | "2 0 0 0 ... 79 \n",
1051 | "3 0 0 0 ... 255 \n",
1052 | "4 0 0 0 ... 1 \n",
1053 | "\n",
1054 | " Dst_host_same_srv_rate Dst_host_diff_srv_rate \\\n",
1055 | "0 0.38 0.12 \n",
1056 | "1 1.00 0.01 \n",
1057 | "2 0.31 0.61 \n",
1058 | "3 1.00 0.00 \n",
1059 | "4 0.06 1.00 \n",
1060 | "\n",
1061 | " Dst_host_same_src_port_rate Dst_host_srv_diff_host_rate \\\n",
1062 | "0 0.04 0.0 \n",
1063 | "1 0.01 0.0 \n",
1064 | "2 0.00 0.0 \n",
1065 | "3 0.87 0.0 \n",
1066 | "4 1.00 0.0 \n",
1067 | "\n",
1068 | " Dst_host_serror_rate Dst_host_srv_serror_rate Dst_host_rerror_rate \\\n",
1069 | "0 0.00 0.00 0.12 \n",
1070 | "1 0.00 0.00 0.00 \n",
1071 | "2 0.21 0.68 0.60 \n",
1072 | "3 0.00 0.00 0.00 \n",
1073 | "4 1.00 1.00 0.00 \n",
1074 | "\n",
1075 | " Dst_host_srv_rerror_rate attack_type \n",
1076 | "0 0.3 guess_passwd \n",
1077 | "1 0.0 snmpguess \n",
1078 | "2 0.0 processtable \n",
1079 | "3 0.0 normal \n",
1080 | "4 0.0 nmap \n",
1081 | "\n",
1082 | "[5 rows x 42 columns]"
1083 | ]
1084 | },
1085 | "execution_count": 3,
1086 | "metadata": {},
1087 | "output_type": "execute_result"
1088 | }
1089 | ],
1090 | "source": [
1091 | "#Define file names and call loadCSV to load the CSV files\n",
1092 | "testingDataSet = loadCSV(testingFileNameWithAbsolutePath)\n",
1093 | "difficultyLevel = testingDataSet.pop('difficulty_level')\n",
1094 | "\n",
1095 | "#Look at the statistics of the dataSet\n",
1096 | "getStatisticsOfData(testingDataSet)\n",
1097 | "print(\"\\n***** Here is how to testing dataset looks like before performing any pre-processing *****\")\n",
1098 | "testingDataSet.head()"
1099 | ]
1100 | },
1101 | {
1102 | "cell_type": "markdown",
1103 | "metadata": {},
1104 | "source": [
1105 | "### Perform pre-processing"
1106 | ]
1107 | },
1108 | {
1109 | "cell_type": "code",
1110 | "execution_count": 4,
1111 | "metadata": {
1112 | "scrolled": true
1113 | },
1114 | "outputs": [
1115 | {
1116 | "name": "stdout",
1117 | "output_type": "stream",
1118 | "text": [
1119 | "[['ExtraTreesClassifier', 'OneHotEncoder', 'Standardization']]\n",
1120 | "***************************************************************************************************************************\n",
1121 | "********************************************* Building Model- 0 As Below *************************************************\n",
1122 | "\t -- Feature Selection: \t ExtraTreesClassifier \n",
1123 | "\t -- Feature Encoding: \t OneHotEncoder \n",
1124 | "\t -- Feature Scaling: \t Standardization \n",
1125 | "\n",
1126 | "completeDataSet.shape: (37041, 43)\n",
1127 | "completeDataSet.head: Duration Protocol_type Service Flag Src_bytes Dst_bytes Land \\\n",
1128 | "0 0 udp other SF 146 0 0 \n",
1129 | "1 0 tcp private S0 0 0 0 \n",
1130 | "2 0 tcp http SF 232 8153 0 \n",
1131 | "3 0 tcp http SF 199 420 0 \n",
1132 | "4 0 tcp private REJ 0 0 0 \n",
1133 | "\n",
1134 | " Wrong_fragment Urgent Hot ... Dst_host_same_srv_rate \\\n",
1135 | "0 0 0 0 ... 0.00 \n",
1136 | "1 0 0 0 ... 0.10 \n",
1137 | "2 0 0 0 ... 1.00 \n",
1138 | "3 0 0 0 ... 1.00 \n",
1139 | "4 0 0 0 ... 0.07 \n",
1140 | "\n",
1141 | " Dst_host_diff_srv_rate Dst_host_same_src_port_rate \\\n",
1142 | "0 0.60 0.88 \n",
1143 | "1 0.05 0.00 \n",
1144 | "2 0.00 0.03 \n",
1145 | "3 0.00 0.00 \n",
1146 | "4 0.07 0.00 \n",
1147 | "\n",
1148 | " Dst_host_srv_diff_host_rate Dst_host_serror_rate \\\n",
1149 | "0 0.00 0.00 \n",
1150 | "1 0.00 1.00 \n",
1151 | "2 0.04 0.03 \n",
1152 | "3 0.00 0.00 \n",
1153 | "4 0.00 0.00 \n",
1154 | "\n",
1155 | " Dst_host_srv_serror_rate Dst_host_rerror_rate Dst_host_srv_rerror_rate \\\n",
1156 | "0 0.00 0.0 0.00 \n",
1157 | "1 1.00 0.0 0.00 \n",
1158 | "2 0.01 0.0 0.01 \n",
1159 | "3 0.00 0.0 0.00 \n",
1160 | "4 0.00 1.0 1.00 \n",
1161 | "\n",
1162 | " attack_type difficulty_level \n",
1163 | "0 normal 15 \n",
1164 | "1 neptune 19 \n",
1165 | "2 normal 21 \n",
1166 | "3 normal 21 \n",
1167 | "4 neptune 21 \n",
1168 | "\n",
1169 | "[5 rows x 43 columns]\n",
1170 | "\n",
1171 | "****** Start performing feature selection using ExtraTreesClassifier *****\n",
1172 | "****** Falls under wrapper methods (feature importance) *****\n",
1173 | "****** Start label encoding on the categorical features in the given dataset *****\n",
1174 | "****** Number of features before label encoding: 43\n",
1175 | "****** Number of categorical features in the dataset: 3\n",
1176 | "****** Categorical feature names in the dataset: ['Protocol_type' 'Service' 'Flag']\n",
1177 | "\n",
1178 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
1179 | "\n",
1180 | "\n",
1181 | "Protocol_type: 3 \n",
1182 | "| distinct values |\n",
1183 | "|-------------------|\n",
1184 | "| udp |\n",
1185 | "| tcp |\n",
1186 | "| icmp |\n",
1187 | "\n",
1188 | "\n",
1189 | "Service: 67 \n",
1190 | "| distinct values |\n",
1191 | "|-------------------|\n",
1192 | "| other |\n",
1193 | "| private |\n",
1194 | "| http |\n",
1195 | "| remote_job |\n",
1196 | "| ftp_data |\n",
1197 | "| name |\n",
1198 | "| netbios_ns |\n",
1199 | "| eco_i |\n",
1200 | "| mtp |\n",
1201 | "| telnet |\n",
1202 | "| finger |\n",
1203 | "| domain_u |\n",
1204 | "| supdup |\n",
1205 | "| uucp_path |\n",
1206 | "| Z39_50 |\n",
1207 | "| smtp |\n",
1208 | "| csnet_ns |\n",
1209 | "| uucp |\n",
1210 | "| netbios_dgm |\n",
1211 | "| urp_i |\n",
1212 | "| auth |\n",
1213 | "| domain |\n",
1214 | "| ftp |\n",
1215 | "| bgp |\n",
1216 | "| ldap |\n",
1217 | "| ecr_i |\n",
1218 | "| gopher |\n",
1219 | "| vmnet |\n",
1220 | "| systat |\n",
1221 | "| http_443 |\n",
1222 | "| efs |\n",
1223 | "| whois |\n",
1224 | "| imap4 |\n",
1225 | "| iso_tsap |\n",
1226 | "| echo |\n",
1227 | "| klogin |\n",
1228 | "| link |\n",
1229 | "| sunrpc |\n",
1230 | "| login |\n",
1231 | "| kshell |\n",
1232 | "| sql_net |\n",
1233 | "| time |\n",
1234 | "| hostnames |\n",
1235 | "| exec |\n",
1236 | "| ntp_u |\n",
1237 | "| discard |\n",
1238 | "| nntp |\n",
1239 | "| courier |\n",
1240 | "| ctf |\n",
1241 | "| ssh |\n",
1242 | "| daytime |\n",
1243 | "| shell |\n",
1244 | "| netstat |\n",
1245 | "| pop_3 |\n",
1246 | "| nnsp |\n",
1247 | "| IRC |\n",
1248 | "| pop_2 |\n",
1249 | "| printer |\n",
1250 | "| tim_i |\n",
1251 | "| pm_dump |\n",
1252 | "| red_i |\n",
1253 | "| netbios_ssn |\n",
1254 | "| rje |\n",
1255 | "| X11 |\n",
1256 | "| urh_i |\n",
1257 | "| http_8001 |\n",
1258 | "| tftp_u |\n",
1259 | "\n",
1260 | "\n",
1261 | "Flag: 11 \n",
1262 | "| distinct values |\n",
1263 | "|-------------------|\n",
1264 | "| SF |\n",
1265 | "| S0 |\n",
1266 | "| REJ |\n",
1267 | "| RSTR |\n",
1268 | "| SH |\n",
1269 | "| RSTO |\n",
1270 | "| S1 |\n",
1271 | "| RSTOS0 |\n",
1272 | "| S3 |\n",
1273 | "| S2 |\n",
1274 | "| OTH |\n",
1275 | "\n",
1276 | "****** Number of features after label encoding: 43\n",
1277 | "****** End label encoding on the categorical features in the given dataset *****\n",
1278 | "\n",
1279 | "****** ExtraTreesClassification is in progress *****\n",
1280 | "\n",
1281 | " selectedFeatures after ExtraTreesClassification: difficulty_level 0.076128\n",
1282 | "Same_srv_rate 0.071428\n",
1283 | "Dst_host_srv_serror_rate 0.049446\n",
1284 | "Service 0.046810\n",
1285 | "Dst_host_serror_rate 0.046286\n",
1286 | "Flag 0.044061\n",
1287 | "Dst_host_same_srv_rate 0.043586\n",
1288 | "Serror_rate 0.042794\n",
1289 | "Protocol_type 0.041901\n",
1290 | "Dst_host_srv_count 0.041828\n",
1291 | "Srv_serror_rate 0.040107\n",
1292 | "Dst_host_same_src_port_rate 0.037406\n",
1293 | "Count 0.036696\n",
1294 | "Logged_in 0.035569\n",
1295 | "Dst_host_rerror_rate 0.030801\n",
1296 | "Dst_host_diff_srv_rate 0.029853\n",
1297 | "Src_bytes 0.028388\n",
1298 | "Diff_srv_rate 0.027244\n",
1299 | "Dst_host_count 0.027063\n",
1300 | "Rerror_rate 0.024310\n",
1301 | "dtype: float64\n",
1302 | "****** Completed ExtraTreesClassification *****\n",
1303 | "\n",
1304 | "***** Number of columns in the dataSet after feature selection: 21\n",
1305 | "***** Columns in the dataSet after feature selection: \n",
1306 | " Index(['Protocol_type', 'Service', 'Flag', 'Src_bytes', 'Logged_in', 'Count',\n",
1307 | " 'Serror_rate', 'Srv_serror_rate', 'Rerror_rate', 'Same_srv_rate',\n",
1308 | " 'Diff_srv_rate', 'Dst_host_count', 'Dst_host_srv_count',\n",
1309 | " 'Dst_host_same_srv_rate', 'Dst_host_diff_srv_rate',\n",
1310 | " 'Dst_host_same_src_port_rate', 'Dst_host_serror_rate',\n",
1311 | " 'Dst_host_srv_serror_rate', 'Dst_host_rerror_rate', 'difficulty_level',\n",
1312 | " 'attack_type'],\n",
1313 | " dtype='object')\n",
1314 | "****** End performing feature selection using ExtraTreesClassifier *****\n",
1315 | "****** Start one hot encoding on the categorical features in the given dataset *****\n",
1316 | "****** Number of features before one hot encoding: 21\n",
1317 | "****** Number of categorical features in the dataset: 0\n",
1318 | "****** Categorical feature names in the dataset: []\n",
1319 | "\n",
1320 | "****** Here is the list of unique values present in each categorical feature in the dataset *****\n",
1321 | "\n",
1322 | "\n",
1323 | "attack_type: 40 \n",
1324 | "| distinct values |\n",
1325 | "|-------------------|\n",
1326 | "| normal |\n",
1327 | "| neptune |\n",
1328 | "| warezclient |\n",
1329 | "| ipsweep |\n",
1330 | "| portsweep |\n",
1331 | "| teardrop |\n",
1332 | "| nmap |\n",
1333 | "| satan |\n",
1334 | "| smurf |\n",
1335 | "| pod |\n",
1336 | "| back |\n",
1337 | "| guess_passwd |\n",
1338 | "| ftp_write |\n",
1339 | "| multihop |\n",
1340 | "| rootkit |\n",
1341 | "| buffer_overflow |\n",
1342 | "| imap |\n",
1343 | "| warezmaster |\n",
1344 | "| phf |\n",
1345 | "| land |\n",
1346 | "| loadmodule |\n",
1347 | "| spy |\n",
1348 | "| snmpguess |\n",
1349 | "| processtable |\n",
1350 | "| saint |\n",
1351 | "| mscan |\n",
1352 | "| apache2 |\n",
1353 | "| httptunnel |\n",
1354 | "| mailbomb |\n",
1355 | "| snmpgetattack |\n",
1356 | "| worm |\n",
1357 | "| sendmail |\n",
1358 | "| xlock |\n",
1359 | "| xterm |\n",
1360 | "| xsnoop |\n",
1361 | "| ps |\n",
1362 | "| named |\n",
1363 | "| udpstorm |\n",
1364 | "| perl |\n",
1365 | "| sqlattack |\n",
1366 | "\n",
1367 | "****** Number of features after one hot encoding: 21\n",
1368 | "****** End one hot encoding on the categorical features in the given dataset *****\n",
1369 | "\n",
1370 | "****** Start feature scaling of the features present in the dataset using StandardScalar *****\n",
1371 | "[[2 41 9 ... 0.0 15 'normal']\n",
1372 | " [1 46 5 ... 0.0 19 'neptune']\n",
1373 | " [1 22 9 ... 0.0 21 'normal']\n",
1374 | " ...\n",
1375 | " [1 57 2 ... 0.85 13 'mscan']\n",
1376 | " [1 54 1 ... 0.88 15 'mscan']\n",
1377 | " [2 46 9 ... 0.0 17 'snmpguess']]\n",
1378 | "\n",
1379 | "****** Number of features in the dataset before performing scaling: 20\n",
1380 | "\n",
1381 | "****** Features in the dataset before performing scaling ***** \n",
1382 | " [[2 41 9 ... 0.0 0.0 15]\n",
1383 | " [1 46 5 ... 1.0 0.0 19]\n",
1384 | " [1 22 9 ... 0.01 0.0 21]\n",
1385 | " ...\n",
1386 | " [1 57 2 ... 0.08 0.85 13]\n",
1387 | " [1 54 1 ... 0.0 0.88 15]\n",
1388 | " [2 46 9 ... 0.0 0.0 17]]\n",
1389 | "\n",
1390 | "****** Number of features in the dataset after performing scaling: 20\n",
1391 | "\n",
1392 | "****** Features in the dataset after performing scaling ***** \n",
1393 | " [[ 2.03857058 0.6299765 0.73536923 ... -0.54981386 -0.48776502\n",
1394 | " -0.85380057]\n",
1395 | " [-0.15478617 0.93890397 -0.66099165 ... 1.89967409 -0.48776502\n",
1396 | " 0.22813874]\n",
1397 | " [-0.15478617 -0.54394786 0.73536923 ... -0.52531898 -0.48776502\n",
1398 | " 0.76910839]\n",
1399 | " ...\n",
1400 | " [-0.15478617 1.61854439 -1.70826232 ... -0.35385482 2.03171007\n",
1401 | " -1.39477022]\n",
1402 | " [-0.15478617 1.43318791 -2.05735254 ... -0.54981386 2.12063272\n",
1403 | " -0.85380057]\n",
1404 | " [ 2.03857058 0.93890397 0.73536923 ... -0.54981386 -0.48776502\n",
1405 | " -0.31283092]]\n",
1406 | "scaledFeatures.head(): Protocol_type Service Flag Src_bytes Logged_in Count \\\n",
1407 | "0 2.038571 0.629977 0.735369 -0.011190 -0.732914 -0.581217 \n",
1408 | "1 -0.154786 0.938904 -0.660992 -0.011262 -0.732914 0.275339 \n",
1409 | "2 -0.154786 -0.543948 0.735369 -0.011147 1.364417 -0.643512 \n",
1410 | "3 -0.154786 -0.543948 0.735369 -0.011163 1.364417 -0.448840 \n",
1411 | "4 -0.154786 0.938904 -2.057353 -0.011262 -0.732914 0.259766 \n",
1412 | "\n",
1413 | " Serror_rate Srv_serror_rate Rerror_rate Same_srv_rate ... \\\n",
1414 | "0 -0.556584 -0.552030 -0.460806 -1.421427 ... \n",
1415 | "1 1.851192 1.851769 -0.460806 -1.491319 ... \n",
1416 | "2 -0.075029 -0.071270 -0.460806 0.721924 ... \n",
1417 | "3 -0.556584 -0.552030 -0.460806 0.721924 ... \n",
1418 | "4 -0.556584 -0.552030 2.274941 -1.235049 ... \n",
1419 | "\n",
1420 | " Dst_host_count Dst_host_srv_count Dst_host_same_srv_rate \\\n",
1421 | "0 0.656445 -1.050270 -1.193023 \n",
1422 | "1 0.656445 -0.821669 -0.966271 \n",
1423 | "2 -1.709884 1.272317 1.074493 \n",
1424 | "3 0.656445 1.272317 1.074493 \n",
1425 | "4 0.656445 -0.885678 -1.034297 \n",
1426 | "\n",
1427 | " Dst_host_diff_srv_rate Dst_host_same_src_port_rate Dst_host_serror_rate \\\n",
1428 | "0 2.187298 2.137976 -0.561390 \n",
1429 | "1 -0.237144 -0.498320 1.904034 \n",
1430 | "2 -0.457548 -0.408446 -0.487427 \n",
1431 | "3 -0.457548 -0.498320 -0.561390 \n",
1432 | "4 -0.148983 -0.498320 -0.561390 \n",
1433 | "\n",
1434 | " Dst_host_srv_serror_rate Dst_host_rerror_rate difficulty_level \\\n",
1435 | "0 -0.549814 -0.487765 -0.853801 \n",
1436 | "1 1.899674 -0.487765 0.228139 \n",
1437 | "2 -0.525319 -0.487765 0.769108 \n",
1438 | "3 -0.549814 -0.487765 0.769108 \n",
1439 | "4 -0.549814 2.476323 0.769108 \n",
1440 | "\n",
1441 | " attack_type \n",
1442 | "0 normal \n",
1443 | "1 neptune \n",
1444 | "2 normal \n",
1445 | "3 normal \n",
1446 | "4 neptune \n",
1447 | "\n",
1448 | "[5 rows x 21 columns]\n",
1449 | "scaledFeatures.shape: (37041, 21)\n",
1450 | "\n",
1451 | "****** End of feature scaling of the features present in the dataset using StandardScalar *****\n",
1452 | "\n",
1453 | "features.shape: (37041, 20)\n",
1454 | "label.shape: (37041,)\n"
1455 | ]
1456 | },
1457 | {
1458 | "data": {
1459 | "text/html": [
1460 | "\n",
1461 | "\n",
1474 | "
\n",
1475 | " \n",
1476 | " \n",
1477 | " | \n",
1478 | " Protocol_type | \n",
1479 | " Service | \n",
1480 | " Flag | \n",
1481 | " Src_bytes | \n",
1482 | " Logged_in | \n",
1483 | " Count | \n",
1484 | " Serror_rate | \n",
1485 | " Srv_serror_rate | \n",
1486 | " Rerror_rate | \n",
1487 | " Same_srv_rate | \n",
1488 | " ... | \n",
1489 | " Dst_host_count | \n",
1490 | " Dst_host_srv_count | \n",
1491 | " Dst_host_same_srv_rate | \n",
1492 | " Dst_host_diff_srv_rate | \n",
1493 | " Dst_host_same_src_port_rate | \n",
1494 | " Dst_host_serror_rate | \n",
1495 | " Dst_host_srv_serror_rate | \n",
1496 | " Dst_host_rerror_rate | \n",
1497 | " difficulty_level | \n",
1498 | " attack_type | \n",
1499 | "
\n",
1500 | " \n",
1501 | " \n",
1502 | " \n",
1503 | " 0 | \n",
1504 | " 2.038571 | \n",
1505 | " 0.629977 | \n",
1506 | " 0.735369 | \n",
1507 | " -0.011190 | \n",
1508 | " -0.732914 | \n",
1509 | " -0.581217 | \n",
1510 | " -0.556584 | \n",
1511 | " -0.552030 | \n",
1512 | " -0.460806 | \n",
1513 | " -1.421427 | \n",
1514 | " ... | \n",
1515 | " 0.656445 | \n",
1516 | " -1.050270 | \n",
1517 | " -1.193023 | \n",
1518 | " 2.187298 | \n",
1519 | " 2.137976 | \n",
1520 | " -0.561390 | \n",
1521 | " -0.549814 | \n",
1522 | " -0.487765 | \n",
1523 | " -0.853801 | \n",
1524 | " normal | \n",
1525 | "
\n",
1526 | " \n",
1527 | " 1 | \n",
1528 | " -0.154786 | \n",
1529 | " 0.938904 | \n",
1530 | " -0.660992 | \n",
1531 | " -0.011262 | \n",
1532 | " -0.732914 | \n",
1533 | " 0.275339 | \n",
1534 | " 1.851192 | \n",
1535 | " 1.851769 | \n",
1536 | " -0.460806 | \n",
1537 | " -1.491319 | \n",
1538 | " ... | \n",
1539 | " 0.656445 | \n",
1540 | " -0.821669 | \n",
1541 | " -0.966271 | \n",
1542 | " -0.237144 | \n",
1543 | " -0.498320 | \n",
1544 | " 1.904034 | \n",
1545 | " 1.899674 | \n",
1546 | " -0.487765 | \n",
1547 | " 0.228139 | \n",
1548 | " neptune | \n",
1549 | "
\n",
1550 | " \n",
1551 | " 2 | \n",
1552 | " -0.154786 | \n",
1553 | " -0.543948 | \n",
1554 | " 0.735369 | \n",
1555 | " -0.011147 | \n",
1556 | " 1.364417 | \n",
1557 | " -0.643512 | \n",
1558 | " -0.075029 | \n",
1559 | " -0.071270 | \n",
1560 | " -0.460806 | \n",
1561 | " 0.721924 | \n",
1562 | " ... | \n",
1563 | " -1.709884 | \n",
1564 | " 1.272317 | \n",
1565 | " 1.074493 | \n",
1566 | " -0.457548 | \n",
1567 | " -0.408446 | \n",
1568 | " -0.487427 | \n",
1569 | " -0.525319 | \n",
1570 | " -0.487765 | \n",
1571 | " 0.769108 | \n",
1572 | " normal | \n",
1573 | "
\n",
1574 | " \n",
1575 | " 3 | \n",
1576 | " -0.154786 | \n",
1577 | " -0.543948 | \n",
1578 | " 0.735369 | \n",
1579 | " -0.011163 | \n",
1580 | " 1.364417 | \n",
1581 | " -0.448840 | \n",
1582 | " -0.556584 | \n",
1583 | " -0.552030 | \n",
1584 | " -0.460806 | \n",
1585 | " 0.721924 | \n",
1586 | " ... | \n",
1587 | " 0.656445 | \n",
1588 | " 1.272317 | \n",
1589 | " 1.074493 | \n",
1590 | " -0.457548 | \n",
1591 | " -0.498320 | \n",
1592 | " -0.561390 | \n",
1593 | " -0.549814 | \n",
1594 | " -0.487765 | \n",
1595 | " 0.769108 | \n",
1596 | " normal | \n",
1597 | "
\n",
1598 | " \n",
1599 | " 4 | \n",
1600 | " -0.154786 | \n",
1601 | " 0.938904 | \n",
1602 | " -2.057353 | \n",
1603 | " -0.011262 | \n",
1604 | " -0.732914 | \n",
1605 | " 0.259766 | \n",
1606 | " -0.556584 | \n",
1607 | " -0.552030 | \n",
1608 | " 2.274941 | \n",
1609 | " -1.235049 | \n",
1610 | " ... | \n",
1611 | " 0.656445 | \n",
1612 | " -0.885678 | \n",
1613 | " -1.034297 | \n",
1614 | " -0.148983 | \n",
1615 | " -0.498320 | \n",
1616 | " -0.561390 | \n",
1617 | " -0.549814 | \n",
1618 | " 2.476323 | \n",
1619 | " 0.769108 | \n",
1620 | " neptune | \n",
1621 | "
\n",
1622 | " \n",
1623 | "
\n",
1624 | "
5 rows × 21 columns
\n",
1625 | "
"
1626 | ],
1627 | "text/plain": [
1628 | " Protocol_type Service Flag Src_bytes Logged_in Count \\\n",
1629 | "0 2.038571 0.629977 0.735369 -0.011190 -0.732914 -0.581217 \n",
1630 | "1 -0.154786 0.938904 -0.660992 -0.011262 -0.732914 0.275339 \n",
1631 | "2 -0.154786 -0.543948 0.735369 -0.011147 1.364417 -0.643512 \n",
1632 | "3 -0.154786 -0.543948 0.735369 -0.011163 1.364417 -0.448840 \n",
1633 | "4 -0.154786 0.938904 -2.057353 -0.011262 -0.732914 0.259766 \n",
1634 | "\n",
1635 | " Serror_rate Srv_serror_rate Rerror_rate Same_srv_rate ... \\\n",
1636 | "0 -0.556584 -0.552030 -0.460806 -1.421427 ... \n",
1637 | "1 1.851192 1.851769 -0.460806 -1.491319 ... \n",
1638 | "2 -0.075029 -0.071270 -0.460806 0.721924 ... \n",
1639 | "3 -0.556584 -0.552030 -0.460806 0.721924 ... \n",
1640 | "4 -0.556584 -0.552030 2.274941 -1.235049 ... \n",
1641 | "\n",
1642 | " Dst_host_count Dst_host_srv_count Dst_host_same_srv_rate \\\n",
1643 | "0 0.656445 -1.050270 -1.193023 \n",
1644 | "1 0.656445 -0.821669 -0.966271 \n",
1645 | "2 -1.709884 1.272317 1.074493 \n",
1646 | "3 0.656445 1.272317 1.074493 \n",
1647 | "4 0.656445 -0.885678 -1.034297 \n",
1648 | "\n",
1649 | " Dst_host_diff_srv_rate Dst_host_same_src_port_rate Dst_host_serror_rate \\\n",
1650 | "0 2.187298 2.137976 -0.561390 \n",
1651 | "1 -0.237144 -0.498320 1.904034 \n",
1652 | "2 -0.457548 -0.408446 -0.487427 \n",
1653 | "3 -0.457548 -0.498320 -0.561390 \n",
1654 | "4 -0.148983 -0.498320 -0.561390 \n",
1655 | "\n",
1656 | " Dst_host_srv_serror_rate Dst_host_rerror_rate difficulty_level \\\n",
1657 | "0 -0.549814 -0.487765 -0.853801 \n",
1658 | "1 1.899674 -0.487765 0.228139 \n",
1659 | "2 -0.525319 -0.487765 0.769108 \n",
1660 | "3 -0.549814 -0.487765 0.769108 \n",
1661 | "4 -0.549814 2.476323 0.769108 \n",
1662 | "\n",
1663 | " attack_type \n",
1664 | "0 normal \n",
1665 | "1 neptune \n",
1666 | "2 normal \n",
1667 | "3 normal \n",
1668 | "4 neptune \n",
1669 | "\n",
1670 | "[5 rows x 21 columns]"
1671 | ]
1672 | },
1673 | "execution_count": 4,
1674 | "metadata": {},
1675 | "output_type": "execute_result"
1676 | }
1677 | ],
1678 | "source": [
1679 | "arrayOfModels = defineArrayForPreProcessing()\n",
1680 | "completeEncodedAndScaledDataset = performPreprocessing(trainingDataSet, testingDataSet, arrayOfModels)\n",
1681 | "completeEncodedAndScaledDataset.head()"
1682 | ]
1683 | },
1684 | {
1685 | "cell_type": "markdown",
1686 | "metadata": {},
1687 | "source": [
1688 | "### After preprocessing, check the shape of the dataset"
1689 | ]
1690 | },
1691 | {
1692 | "cell_type": "code",
1693 | "execution_count": 5,
1694 | "metadata": {},
1695 | "outputs": [
1696 | {
1697 | "name": "stdout",
1698 | "output_type": "stream",
1699 | "text": [
1700 | "(37041, 20) (37041,)\n",
1701 | "Number of unique values in label: 40\n",
1702 | "Unique values in label: ['apache2' 'back' 'buffer_overflow' 'ftp_write' 'guess_passwd'\n",
1703 | " 'httptunnel' 'imap' 'ipsweep' 'land' 'loadmodule' 'mailbomb' 'mscan'\n",
1704 | " 'multihop' 'named' 'neptune' 'nmap' 'normal' 'perl' 'phf' 'pod'\n",
1705 | " 'portsweep' 'processtable' 'ps' 'rootkit' 'saint' 'satan' 'sendmail'\n",
1706 | " 'smurf' 'snmpgetattack' 'snmpguess' 'spy' 'sqlattack' 'teardrop'\n",
1707 | " 'udpstorm' 'warezclient' 'warezmaster' 'worm' 'xlock' 'xsnoop' 'xterm']\n"
1708 | ]
1709 | }
1710 | ],
1711 | "source": [
1712 | "x = completeEncodedAndScaledDataset.drop('attack_type',axis=1)\n",
1713 | "y = completeEncodedAndScaledDataset['attack_type']\n",
1714 | "print(x.shape, y.shape)\n",
1715 | "print('Number of unique values in label: ',len(np.unique(y)))\n",
1716 | "print('Unique values in label: ',np.unique(y))\n",
1717 | "#print(y.value_counts())"
1718 | ]
1719 | },
1720 | {
1721 | "cell_type": "markdown",
1722 | "metadata": {},
1723 | "source": [
1724 | "### Encode the categorical label values"
1725 | ]
1726 | },
1727 | {
1728 | "cell_type": "code",
1729 | "execution_count": 6,
1730 | "metadata": {},
1731 | "outputs": [
1732 | {
1733 | "name": "stdout",
1734 | "output_type": "stream",
1735 | "text": [
1736 | "(37041, 20) (37041, 40)\n"
1737 | ]
1738 | },
1739 | {
1740 | "name": "stderr",
1741 | "output_type": "stream",
1742 | "text": [
1743 | "D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
1744 | " \n"
1745 | ]
1746 | }
1747 | ],
1748 | "source": [
1749 | "onehot = pd.get_dummies(y)\n",
1750 | "y = onehot.as_matrix()\n",
1751 | "print(x.shape, y.shape)"
1752 | ]
1753 | },
1754 | {
1755 | "cell_type": "markdown",
1756 | "metadata": {},
1757 | "source": [
1758 | "## Build a neural Network model"
1759 | ]
1760 | },
1761 | {
1762 | "cell_type": "code",
1763 | "execution_count": 7,
1764 | "metadata": {},
1765 | "outputs": [],
1766 | "source": [
1767 | "'''\n",
1768 | "This function is used to define, compile and filt a neural network\n",
1769 | "'''\n",
1770 | "def nn_model(trainx, trainy, valx,valy,bt_size,epochs, layers):\n",
1771 | " model = Sequential()\n",
1772 | " model.add(Dense(layers[0],activation='relu', input_shape=(trainx.shape[1],)))\n",
1773 | " for l in layers[1:]:\n",
1774 | " model.add(Dense(l, activation='relu' ))\n",
1775 | " model.add(Dropout(0.30))\n",
1776 | " model.add(Dense(trainy.shape[1], activation='softmax'))\n",
1777 | " model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n",
1778 | " hist=model.fit(trainx, trainy, batch_size=bt_size, epochs=epochs, shuffle=True, validation_data=(valx,valy), verbose=True)\n",
1779 | " return hist"
1780 | ]
1781 | },
1782 | {
1783 | "cell_type": "code",
1784 | "execution_count": 8,
1785 | "metadata": {
1786 | "scrolled": true
1787 | },
1788 | "outputs": [
1789 | {
1790 | "name": "stdout",
1791 | "output_type": "stream",
1792 | "text": [
1793 | "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\ops\\resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
1794 | "Instructions for updating:\n",
1795 | "Colocations handled automatically by placer.\n",
1796 | "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\keras\\layers\\core.py:143: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
1797 | "Instructions for updating:\n",
1798 | "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
1799 | "Train on 27780 samples, validate on 9261 samples\n",
1800 | "WARNING:tensorflow:From D:\\Anaconda3\\envs\\tf_gpu\\lib\\site-packages\\tensorflow\\python\\ops\\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
1801 | "Instructions for updating:\n",
1802 | "Use tf.cast instead.\n",
1803 | "Epoch 1/100\n",
1804 | "27780/27780 [==============================] - 12s 420us/sample - loss: 1.0481 - acc: 0.7006 - val_loss: 0.6489 - val_acc: 0.7742\n",
1805 | "Epoch 2/100\n",
1806 | "27780/27780 [==============================] - 11s 381us/sample - loss: 0.7296 - acc: 0.7751 - val_loss: 0.5398 - val_acc: 0.8213\n",
1807 | "Epoch 3/100\n",
1808 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.6316 - acc: 0.7992 - val_loss: 0.4577 - val_acc: 0.8402\n",
1809 | "Epoch 4/100\n",
1810 | "27780/27780 [==============================] - 11s 392us/sample - loss: 0.5731 - acc: 0.8157 - val_loss: 0.4002 - val_acc: 0.8790\n",
1811 | "Epoch 5/100\n",
1812 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5312 - acc: 0.8364 - val_loss: 0.3762 - val_acc: 0.8743\n",
1813 | "Epoch 6/100\n",
1814 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.5000 - acc: 0.8528 - val_loss: 0.3834 - val_acc: 0.8959\n",
1815 | "Epoch 7/100\n",
1816 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5053 - acc: 0.8568 - val_loss: 0.3448 - val_acc: 0.9031\n",
1817 | "Epoch 8/100\n",
1818 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.4810 - acc: 0.8643 - val_loss: 0.3433 - val_acc: 0.9037\n",
1819 | "Epoch 9/100\n",
1820 | "27780/27780 [==============================] - 11s 395us/sample - loss: 0.4779 - acc: 0.8639 - val_loss: 0.3262 - val_acc: 0.9022\n",
1821 | "Epoch 10/100\n",
1822 | "27780/27780 [==============================] - 37s 1ms/sample - loss: 0.4537 - acc: 0.8704 - val_loss: 0.3192 - val_acc: 0.9094\n",
1823 | "Epoch 11/100\n",
1824 | "27780/27780 [==============================] - 40s 1ms/sample - loss: 0.4726 - acc: 0.8733 - val_loss: 0.3233 - val_acc: 0.9058\n",
1825 | "Epoch 12/100\n",
1826 | "27780/27780 [==============================] - 42s 2ms/sample - loss: 0.4438 - acc: 0.8746 - val_loss: 0.2998 - val_acc: 0.9097\n",
1827 | "Epoch 13/100\n",
1828 | "27780/27780 [==============================] - 11s 413us/sample - loss: 0.4329 - acc: 0.8796 - val_loss: 0.3063 - val_acc: 0.9161\n",
1829 | "Epoch 14/100\n",
1830 | "27780/27780 [==============================] - 10s 361us/sample - loss: 0.4481 - acc: 0.8753 - val_loss: 0.2901 - val_acc: 0.9099\n",
1831 | "Epoch 15/100\n",
1832 | "27780/27780 [==============================] - 10s 364us/sample - loss: 0.4404 - acc: 0.8772 - val_loss: 0.3386 - val_acc: 0.9068\n",
1833 | "Epoch 16/100\n",
1834 | "27780/27780 [==============================] - 10s 367us/sample - loss: 0.4451 - acc: 0.8808 - val_loss: 0.2942 - val_acc: 0.9142\n",
1835 | "Epoch 17/100\n",
1836 | "27780/27780 [==============================] - 10s 370us/sample - loss: 0.4482 - acc: 0.8839 - val_loss: 0.2815 - val_acc: 0.9166\n",
1837 | "Epoch 18/100\n",
1838 | "27780/27780 [==============================] - 10s 372us/sample - loss: 0.4619 - acc: 0.8762 - val_loss: 0.3048 - val_acc: 0.9119\n",
1839 | "Epoch 19/100\n",
1840 | "27780/27780 [==============================] - 10s 373us/sample - loss: 0.4768 - acc: 0.8789 - val_loss: 0.2868 - val_acc: 0.9173\n",
1841 | "Epoch 20/100\n",
1842 | "27780/27780 [==============================] - 10s 374us/sample - loss: 0.4557 - acc: 0.8812 - val_loss: 0.2839 - val_acc: 0.9148\n",
1843 | "Epoch 21/100\n",
1844 | "27780/27780 [==============================] - 10s 373us/sample - loss: 0.4341 - acc: 0.8857 - val_loss: 0.2716 - val_acc: 0.9219\n",
1845 | "Epoch 22/100\n",
1846 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4442 - acc: 0.8855 - val_loss: 0.2975 - val_acc: 0.9151\n",
1847 | "Epoch 23/100\n",
1848 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4499 - acc: 0.8844 - val_loss: 0.2735 - val_acc: 0.9175\n",
1849 | "Epoch 24/100\n",
1850 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4499 - acc: 0.8874 - val_loss: 0.2904 - val_acc: 0.9228\n",
1851 | "Epoch 25/100\n",
1852 | "27780/27780 [==============================] - 10s 378us/sample - loss: 0.4782 - acc: 0.8844 - val_loss: 0.3537 - val_acc: 0.9121\n",
1853 | "Epoch 26/100\n",
1854 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.5170 - acc: 0.8828 - val_loss: 0.2650 - val_acc: 0.9230\n",
1855 | "Epoch 27/100\n",
1856 | "27780/27780 [==============================] - 10s 377us/sample - loss: 0.4159 - acc: 0.8907 - val_loss: 0.2711 - val_acc: 0.9246\n",
1857 | "Epoch 28/100\n",
1858 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.4607 - acc: 0.8807 - val_loss: 0.3042 - val_acc: 0.9160\n",
1859 | "Epoch 29/100\n",
1860 | "27780/27780 [==============================] - 11s 381us/sample - loss: 0.4366 - acc: 0.8857 - val_loss: 0.2859 - val_acc: 0.9171\n",
1861 | "Epoch 30/100\n",
1862 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4303 - acc: 0.8923 - val_loss: 0.2832 - val_acc: 0.9194\n",
1863 | "Epoch 31/100\n",
1864 | "27780/27780 [==============================] - 11s 380us/sample - loss: 0.5191 - acc: 0.8786 - val_loss: 0.4336 - val_acc: 0.8996\n",
1865 | "Epoch 32/100\n",
1866 | "27780/27780 [==============================] - 11s 381us/sample - loss: 0.5173 - acc: 0.8720 - val_loss: 0.3143 - val_acc: 0.9112\n",
1867 | "Epoch 33/100\n",
1868 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4416 - acc: 0.8854 - val_loss: 0.2853 - val_acc: 0.9193\n",
1869 | "Epoch 34/100\n",
1870 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4670 - acc: 0.8879 - val_loss: 0.2971 - val_acc: 0.9171\n",
1871 | "Epoch 35/100\n",
1872 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4538 - acc: 0.8838 - val_loss: 0.2842 - val_acc: 0.9210\n",
1873 | "Epoch 36/100\n",
1874 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.4627 - acc: 0.8860 - val_loss: 0.3164 - val_acc: 0.9176\n",
1875 | "Epoch 37/100\n",
1876 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4645 - acc: 0.8835 - val_loss: 0.3689 - val_acc: 0.9066\n",
1877 | "Epoch 38/100\n",
1878 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.4621 - acc: 0.8809 - val_loss: 0.3259 - val_acc: 0.9129\n",
1879 | "Epoch 39/100\n",
1880 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4537 - acc: 0.8845 - val_loss: 0.2831 - val_acc: 0.9199\n",
1881 | "Epoch 40/100\n",
1882 | "27780/27780 [==============================] - 11s 394us/sample - loss: 0.4641 - acc: 0.8784 - val_loss: 0.2876 - val_acc: 0.9129\n",
1883 | "Epoch 41/100\n",
1884 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4442 - acc: 0.8835 - val_loss: 0.2862 - val_acc: 0.9189\n",
1885 | "Epoch 42/100\n",
1886 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4541 - acc: 0.8864 - val_loss: 0.2863 - val_acc: 0.9191\n",
1887 | "Epoch 43/100\n",
1888 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4861 - acc: 0.8815 - val_loss: 0.3277 - val_acc: 0.9136\n",
1889 | "Epoch 44/100\n",
1890 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.4576 - acc: 0.8830 - val_loss: 0.3660 - val_acc: 0.9008\n",
1891 | "Epoch 45/100\n",
1892 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5172 - acc: 0.8793 - val_loss: 0.3160 - val_acc: 0.9036\n",
1893 | "Epoch 46/100\n",
1894 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5235 - acc: 0.8769 - val_loss: 0.3028 - val_acc: 0.9205\n",
1895 | "Epoch 47/100\n",
1896 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4917 - acc: 0.8840 - val_loss: 0.2869 - val_acc: 0.9187\n",
1897 | "Epoch 48/100\n",
1898 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4863 - acc: 0.8767 - val_loss: 0.3165 - val_acc: 0.9057\n",
1899 | "Epoch 49/100\n",
1900 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.4999 - acc: 0.8771 - val_loss: 0.2654 - val_acc: 0.9218\n",
1901 | "Epoch 50/100\n",
1902 | "27780/27780 [==============================] - 11s 405us/sample - loss: 0.4832 - acc: 0.8861 - val_loss: 0.2689 - val_acc: 0.9221\n",
1903 | "Epoch 51/100\n",
1904 | "27780/27780 [==============================] - 11s 390us/sample - loss: 0.4703 - acc: 0.8832 - val_loss: 0.2785 - val_acc: 0.9167\n"
1905 | ]
1906 | },
1907 | {
1908 | "name": "stdout",
1909 | "output_type": "stream",
1910 | "text": [
1911 | "Epoch 52/100\n",
1912 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5021 - acc: 0.8807 - val_loss: 0.2870 - val_acc: 0.9158\n",
1913 | "Epoch 53/100\n",
1914 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4962 - acc: 0.8730 - val_loss: 0.2757 - val_acc: 0.9199\n",
1915 | "Epoch 54/100\n",
1916 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4674 - acc: 0.8850 - val_loss: 0.2959 - val_acc: 0.9132\n",
1917 | "Epoch 55/100\n",
1918 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5025 - acc: 0.8792 - val_loss: 0.3102 - val_acc: 0.9173\n",
1919 | "Epoch 56/100\n",
1920 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5713 - acc: 0.8688 - val_loss: 0.2990 - val_acc: 0.9135\n",
1921 | "Epoch 57/100\n",
1922 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.4880 - acc: 0.8781 - val_loss: 0.3754 - val_acc: 0.9047\n",
1923 | "Epoch 58/100\n",
1924 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5401 - acc: 0.8740 - val_loss: 0.3097 - val_acc: 0.9072\n",
1925 | "Epoch 59/100\n",
1926 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.4993 - acc: 0.8759 - val_loss: 0.3168 - val_acc: 0.9186\n",
1927 | "Epoch 60/100\n",
1928 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.4789 - acc: 0.8816 - val_loss: 0.2970 - val_acc: 0.9136\n",
1929 | "Epoch 61/100\n",
1930 | "27780/27780 [==============================] - 11s 382us/sample - loss: 0.5057 - acc: 0.8786 - val_loss: 0.3339 - val_acc: 0.9128\n",
1931 | "Epoch 62/100\n",
1932 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5387 - acc: 0.8769 - val_loss: 0.3822 - val_acc: 0.9097\n",
1933 | "Epoch 63/100\n",
1934 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5181 - acc: 0.8760 - val_loss: 0.3005 - val_acc: 0.9182\n",
1935 | "Epoch 64/100\n",
1936 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5243 - acc: 0.8725 - val_loss: 0.3749 - val_acc: 0.9021\n",
1937 | "Epoch 65/100\n",
1938 | "27780/27780 [==============================] - 11s 383us/sample - loss: 0.5467 - acc: 0.8649 - val_loss: 0.4062 - val_acc: 0.8889\n",
1939 | "Epoch 66/100\n",
1940 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5542 - acc: 0.8632 - val_loss: 0.3906 - val_acc: 0.9081\n",
1941 | "Epoch 67/100\n",
1942 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5404 - acc: 0.8688 - val_loss: 0.3076 - val_acc: 0.9142\n",
1943 | "Epoch 68/100\n",
1944 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5097 - acc: 0.8747 - val_loss: 0.3242 - val_acc: 0.9136\n",
1945 | "Epoch 69/100\n",
1946 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5479 - acc: 0.8698 - val_loss: 0.3221 - val_acc: 0.9144\n",
1947 | "Epoch 70/100\n",
1948 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5271 - acc: 0.8699 - val_loss: 0.3466 - val_acc: 0.9121\n",
1949 | "Epoch 71/100\n",
1950 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5463 - acc: 0.8725 - val_loss: 0.3034 - val_acc: 0.9151\n",
1951 | "Epoch 72/100\n",
1952 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5350 - acc: 0.8666 - val_loss: 0.3085 - val_acc: 0.9122\n",
1953 | "Epoch 73/100\n",
1954 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5153 - acc: 0.8709 - val_loss: 0.2951 - val_acc: 0.9156\n",
1955 | "Epoch 74/100\n",
1956 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5131 - acc: 0.8686 - val_loss: 0.3406 - val_acc: 0.8987\n",
1957 | "Epoch 75/100\n",
1958 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.6295 - acc: 0.8508 - val_loss: 0.3472 - val_acc: 0.9013\n",
1959 | "Epoch 76/100\n",
1960 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5793 - acc: 0.8554 - val_loss: 0.3403 - val_acc: 0.9096\n",
1961 | "Epoch 77/100\n",
1962 | "27780/27780 [==============================] - 11s 385us/sample - loss: 0.5163 - acc: 0.8666 - val_loss: 0.3244 - val_acc: 0.9090\n",
1963 | "Epoch 78/100\n",
1964 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5066 - acc: 0.8669 - val_loss: 0.3062 - val_acc: 0.9177\n",
1965 | "Epoch 79/100\n",
1966 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5329 - acc: 0.8664 - val_loss: 0.3231 - val_acc: 0.8989\n",
1967 | "Epoch 80/100\n",
1968 | "27780/27780 [==============================] - 11s 388us/sample - loss: 0.5266 - acc: 0.8626 - val_loss: 0.3232 - val_acc: 0.9050\n",
1969 | "Epoch 81/100\n",
1970 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5326 - acc: 0.8676 - val_loss: 0.3178 - val_acc: 0.9121\n",
1971 | "Epoch 82/100\n",
1972 | "27780/27780 [==============================] - 11s 384us/sample - loss: 0.5326 - acc: 0.8677 - val_loss: 0.2966 - val_acc: 0.9139\n",
1973 | "Epoch 83/100\n",
1974 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.5424 - acc: 0.8681 - val_loss: 0.3101 - val_acc: 0.9135\n",
1975 | "Epoch 84/100\n",
1976 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5325 - acc: 0.8724 - val_loss: 0.2976 - val_acc: 0.9140\n",
1977 | "Epoch 85/100\n",
1978 | "27780/27780 [==============================] - 11s 388us/sample - loss: 0.5753 - acc: 0.8688 - val_loss: 0.2812 - val_acc: 0.9152\n",
1979 | "Epoch 86/100\n",
1980 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5258 - acc: 0.8711 - val_loss: 0.3018 - val_acc: 0.9160\n",
1981 | "Epoch 87/100\n",
1982 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5738 - acc: 0.8590 - val_loss: 0.3412 - val_acc: 0.9003\n",
1983 | "Epoch 88/100\n",
1984 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5871 - acc: 0.8603 - val_loss: 0.3327 - val_acc: 0.9085\n",
1985 | "Epoch 89/100\n",
1986 | "27780/27780 [==============================] - 11s 386us/sample - loss: 0.5534 - acc: 0.8593 - val_loss: 0.3462 - val_acc: 0.9055\n",
1987 | "Epoch 90/100\n",
1988 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5189 - acc: 0.8657 - val_loss: 0.3108 - val_acc: 0.9120\n",
1989 | "Epoch 91/100\n",
1990 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5696 - acc: 0.8617 - val_loss: 0.3282 - val_acc: 0.9000\n",
1991 | "Epoch 92/100\n",
1992 | "27780/27780 [==============================] - 11s 390us/sample - loss: 0.5057 - acc: 0.8669 - val_loss: 0.3007 - val_acc: 0.9076\n",
1993 | "Epoch 93/100\n",
1994 | "27780/27780 [==============================] - 11s 387us/sample - loss: 0.5720 - acc: 0.8626 - val_loss: 0.3421 - val_acc: 0.9071\n",
1995 | "Epoch 94/100\n",
1996 | "27780/27780 [==============================] - 11s 390us/sample - loss: 0.5298 - acc: 0.8645 - val_loss: 0.3073 - val_acc: 0.9088\n",
1997 | "Epoch 95/100\n",
1998 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.5363 - acc: 0.8654 - val_loss: 0.3450 - val_acc: 0.9090\n",
1999 | "Epoch 96/100\n",
2000 | "27780/27780 [==============================] - 11s 391us/sample - loss: 0.5624 - acc: 0.8659 - val_loss: 0.3341 - val_acc: 0.9159\n",
2001 | "Epoch 97/100\n",
2002 | "27780/27780 [==============================] - 11s 391us/sample - loss: 0.5754 - acc: 0.8666 - val_loss: 0.3329 - val_acc: 0.9079\n",
2003 | "Epoch 98/100\n",
2004 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.6255 - acc: 0.8611 - val_loss: 0.4500 - val_acc: 0.9037\n",
2005 | "Epoch 99/100\n",
2006 | "27780/27780 [==============================] - 11s 388us/sample - loss: 0.6341 - acc: 0.8553 - val_loss: 0.4839 - val_acc: 0.8996\n",
2007 | "Epoch 100/100\n",
2008 | "27780/27780 [==============================] - 11s 389us/sample - loss: 0.7025 - acc: 0.8531 - val_loss: 0.5064 - val_acc: 0.8945\n"
2009 | ]
2010 | }
2011 | ],
2012 | "source": [
2013 | "trainx, testx, trainy, testy = train_test_split(x,y, test_size=0.25, random_state=42)\n",
2014 | "layers=[trainx.shape[1],800,500,400,300,200,100,50,10]\n",
2015 | "hist = nn_model(trainx, trainy, testx, testy,16,100,layers)"
2016 | ]
2017 | },
2018 | {
2019 | "cell_type": "code",
2020 | "execution_count": 9,
2021 | "metadata": {},
2022 | "outputs": [
2023 | {
2024 | "name": "stdout",
2025 | "output_type": "stream",
2026 | "text": [
2027 | "MAX Accuracy during training: 89.22966122627258\n",
2028 | "MAX Accuracy during validation: 92.46301651000977\n"
2029 | ]
2030 | }
2031 | ],
2032 | "source": [
2033 | "print('MAX Accuracy during training: ',max(hist.history['acc'])*100)\n",
2034 | "print('MAX Accuracy during validation: ',max(hist.history['val_acc'])*100)"
2035 | ]
2036 | },
2037 | {
2038 | "cell_type": "markdown",
2039 | "metadata": {},
2040 | "source": [
2041 | "## Polt the training accuracy and testing accuracy"
2042 | ]
2043 | },
2044 | {
2045 | "cell_type": "code",
2046 | "execution_count": 13,
2047 | "metadata": {},
2048 | "outputs": [
2049 | {
2050 | "data": {
2051 | "text/plain": [
2052 | "[]"
2053 | ]
2054 | },
2055 | "execution_count": 13,
2056 | "metadata": {},
2057 | "output_type": "execute_result"
2058 | },
2059 | {
2060 | "data": {
2061 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXeYFFX2/t/L4JBB4qrkKCBIGhEBxYQCBnDBFZV1WBXWhIoEcdeImL6LPyMGVHRNYEJExEVFxcwwRMlJkCEODDAwA0zo8/vjdNF5umame3qoej/P0091Vd2qOlXV/d57zz33XiMiIIQQ4g4qJNoAQgghZQdFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXARFnxBCXETFRBsQTL169aRZs2aJNoMQQk4oFi9evFdE6kdLV+5Ev1mzZkhPT0+0GYQQckJhjNlqJx3dO4QQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+oQQ4iIo+gQAsGMH8OKLwMGDibaEEBJPKPouZ9064OabgWbNgFGjgMmTE20RISSeUPRPcAoLgdzcotN8+y3Qsydw4EDg9t9/Bzp0AN57DxgxAujTB3jjDSA/P372nkgcPKiZ4oIFwG+/JdoaQmIDRf8E5/bbgfbtgSNHwu8/eBBITQV+/RWYNStw3/vv63L9emDKFGDMGGDnTuDzz+Nja24u8Mcf8Tl3rHnlFeDkk4G2bYHzzwfOOQf47rtEW0VI6aHon8D88Qfw+uvA1q3Ayy+HTzNunPrr69QBPvnEt10EmDlTBa1xY93Wvz/QqBHw6quxt/XwYb3W6acH2lEUIsDChbqMJZMmAVdeCSxaFDnNnDlAkybAu+8CX30FtGwJjBwZOXMVAZYti72thMQcESlXn27dugmxx4gRIsnJImedJVK/vsihQ4H7v/5aBBAZN05k9GhNe/Cg7lu1Sve99FLgMY88ots3bQq9XlqaSK9eIm++WbRdkyeLDBsmsn27ruflifTvL1KhgsgZZ4gkJYm8+270+3vzTbXl7bcDt3s8Iu+9J3L//SJ33CFy880iK1dGP59F48Z6XkDkr38VWb8+9Pz16okMH+7b9u23mn78+PDnfOAB3T9lin07InH4sEhOTunPQ9wFgHSxobEJF/ngD0U/PB5P4PrWrSInnSRy660iv/6qb/Lxx337Dx0SadZMpE0bkdxckZ9+0jTvvaf7H31UxBiRHTsCz5uRoaJ8772+bceOqaglJek5Lrsssp1ZWSJVqmi6WrVEXn9dxRMQefVVteuCC/Tar70W+Tz5+SItW+pxrVuLFBT49r3/vm6vUEGkdm29Xrt2IkePFv0MRUR27dJjH35YPzVq6DPyZ+NGTfPKK4Hbb75Zr5meHrj9nXc0feXKIqecEirYwe8uGn36iHTubO9+REQWLBC54QaRwsLiXSfRHD0q8sUXibbCOVD0T0A8HpE5c8KXPC+/XKRrV5Hff9dtt9+uor91q64PGKACeOCAyIoVKhrGqNiLqCCceqrIVVfpepcuIj17hrdj4ECtOSxeLDJxokjbtvpLSU0VueIKkSZNIt/Df/6jaT/+WMXLKlE/9JAvTW6uyCWXaCaya1f487z1lh5nZRhWzSAnR6RRI7Xfygi++ELTTJwY2S6LOXM07Q8/6PqLL+r6xo2+NO+9p9uWLg08dv9+fYYdOoh89ZXIkSMiP/6oNajzzxeZP1+P+7//8x3z++8ip50mMmlSdNtENBO2ntnYsfaOOfdcTb9qlb305YXHHlO7ly9PtCXOgKJfDlizRqR9e5Hvv4+eNitL5Oqr9Y00barrFq+9pturVhWpVElL6ZUqacnTIj1d0/TpoyLUoIHIZ58FXuP227VU/PvvmvY//wlvy9y5PuExRqRHD5FZs3TfE0/o9gMHQo8rKFDb+/TR9cJCLek/9VRoaXfZMj3P1Kmh58nPF2nVSjOuggKRjh1FTj9dvz/8cKBoW1xzjd732rXh78nioYe0tG65wtat0/O9/LIvzZ136rPOzw89/osvtEQP6LOsXl1rIvv26f5LLxWpU0fdaLt26fOwakgvvFC0bSL6PACRiy/WZ//tt0WnX7HC966mTYt+/pKSmxu+JvHLLyILFxb/fIWFWhMFNIOPJVu3qkvwtttEUlJEHnwwtue3Q36+ujjnzy+7a1L0ywFXXKFPuEOH8AJisWCBll4rVhQZNUqXgwerUG7frm6SPn1Edu7UEj2gQhLsd7/qKt03ZIhIZmbodb77Tvf36hVauvWnsFB9+1On6jX9sUrKVg3Cn08/1X2ffFLUU1E8HpHmzdXXH8zbb+t5Zs7U9Y8+0vWnnlKh/dvfQo/ZuVPk5JP1ORXlThkwQNsV/O1o0kR9+xbdu4ucd17kcxw+rOJ/550iF12kGYfFokVq64QJmllWqSLy229ae/J3r0XisstUDA8f1sykUaPAAkAwt96qBYAaNUT++c+iz+3PkiX6+7FqS8eOqXi//nro7+q337QQcdZZvnYaEZHp0/V32Lat/etazJvny6zuvtv+cUePhmY+hYX6W37mGZGzz/adt3p1rbGeemrxXWylZdo0teGss8ru2hT9BPP99/p0L7xQwvqHLXbuVGFo1UobSkXUPWCVPq+6SkuWlsvH49GSf7gSclaWZiCRfmQFBfonAEQ6dSrZfW3dGloytrjgAhXQojI4f+65J7Bx2bKxTRuRM8/0/bkLCzXjtPzmW7aEP59VIwpu+LXweFS8UlMDt994o2YYBQUqKsnJ2vhdUqzM1xhfxnXkiLqAKlbUzDcchw6pgN91l66npamopqTobyItLfDZHjyowpaaqu4yu+/08899wlipkj7vSpV826pUEXn6aX0eM2fqM2/aVKRaNZGGDdXtN22a3l/Vqrr0f4d2GDxYpG5drc1dcEH09JmZImPGqC3Jyfp/ueAC/Z1YNS9Az/fEE1oDKijwue8i/WbiQV6eFmgsu375pWyuS9FPIB6P5vCNGqkP+txzVWzD/THuvVfdDf5+/MJCdRNYboGnnoqdbSNH6jkfeaRkx3s8WvO47bbA7cuXF9/WH3/UY6ZP922zfPkffxyY9sMPdfsDD0Q+X2Ghlo779Qu/38qwXnwxcPv06br9t9/0Y7e2EomVKzUTmTw5cPvBgyqeF14Y/riZM/Xa/i6dadPUtWWJWqtWPhfWlCm6beFCfS4VKmgNoSiOHdNztG2rpfpx47SWM3as1qiWLPHVUNu2VUE/+2yR3bv1HTdp4hOzSy7R5xRsczR27tTMb8wY/T3Wrh25oOLxaIBCjRp6f8OG6X9m6FCRc87RmtGYMVoICufaW7w49DdWFIcOlV6kX39drzljhv5XrrmmdOezC0U/gXzwgT5ZK7QxPV3/PP4RMSLaMFijRvgfxe7dWi3t1s1+ydkOv/yiP0R/l0Rx6dVLMzJ/RozQEqLl27ZDQYGWvC13TU6OliTPOiu0Cu/xiPz8c/RnMWqU2hEu8sUSqGAf9J49uv3RR0WefVa/Z2TYv49w5OWF3z5pkkR0raWmamYR7tgdOzRKqEEDFckFC7T2062bPhurMTta+5HV0P7ll5HTeDwqkg0aaDtTbq5v365d6tIaOlSfcWZm8TN7q11o7VpfxvXnn+HTWu/syitL1lCdn6+1kTvvtJd++HDNXEr6/o8d04y9e3d9jmPGaOEt0v3FEop+GeAfRmhx7JhIixba+Oi/PzU1tKHREoDgKBGLffuil9wSwS23qDhZpbP8fBWiv/+9+OcaMUJdFEeO+J5HcCNtcZg9W88RrgFtwgQtYR45ErqvSxf14197rdbQ4sX27SoqEyYEbs/PV3fH9dcXffzmzVoCt2qBb7yh2+2I765dWsgoKuTWH7shoM2baztSJHJz1TWVna3nbNHC19j/889q9+zZocd5PPpegkN2i0ufPlqQiMaKFVo4A4oOJy6KV1/V4+fO1fU//tD3HVzgiwcU/Tjz3/+q8PlXaz0ekX/8I3xJavt2jeo45RStQufkaAegAQPK1u5YYPlJt23T9QULJKxLxg5WpNAbb6j4WyGlJSU7W4U9WFRFtITatWv448aP1xDY005Tf3M8ueIK/R34l+itZ/jhh9GPz8rS9oHTTgvsE9CyZdHP7+ab9dlEi3AqLn/7m5ZuI3HXXXLcPWV1jLMatLOzfbWsYKy2h2idAaNhZfb+NZb9+0U2bAhMd/nlWgv2D22OxvbtIk8+qfeYmqq1o7PPDnRXDR6shaJ4d7ij6MeRvDz1bVphlFbJ9N//lpCYdH9WrdIffY0a2pkGUL/2iYYlUFZpZuxYFcziNuaJqIugRg1tSKxYsXRuJ4tzz1W3hz9WW0SkCBer93JwnH08sGojn37q2zZmTPGeoccTKiLXX68ZQTjWrdNS7OjRJbO5KCyX0Z49ofuys/X9XnqpCvuQIeqq8a9ttWwZmtF6POoiadYssqvMLp99Fvpfu+IK/b1Z4aLWb/rJJ/U3Ur16oIswJ0eDMT76SCO0li4Vuekmrb0DIjVr6n+7WzdfQIaF1Xb17LOlu49oUPTjiBWO9frr2shWvbqvNDNiRNEhWtu2+SJRgv3iJwpZWRLgSmjbVqRv35Kfb+hQPZ9dv2s0rN7G/mGr69dLkdX2I0d8DZSlcS/ZIT9fxdmq5X36qYpGuPDV4vD884E1MH/eeEP3BXf8iwVWpFq43rWWz/633yIfP3iwNi77Y4V0vvpq6e3bvTswM9+4UX8fdevK8eCAHj18NScrk/jmG985Hn3UVyiwPpUra9+XzZuLvr7Ho/0uKlXS3vPxgqIfJwoK1MfYubMvjr5VKzne2GSn0TUrS6NfIvnyTwQaNlQf/oYNeu/PPVfyc333nVaJw/UtKAlWBM6MGb5t1tANy5ZFPq5vX/WVl8W4N//+t/p6b7xR7erWLbp4RCMtTc/10Ueh+6wOZ/EYqiE7W0X04YcDt3s82jnRamyOxMSJand2tu+4Xr20bcXuUBTR8Hd9jR6tpfytW33uWP8CweHDWoK3akW5uRp9d+ml+vuZNUsz0d277V9/715ty/jLX8JnyrGAoh8jPJ7AH+yMGaF/rG3btNTrpkGy+vXTjO+ZZ/R5hBugLVEUFGh7y403+raNHq0ls6Iy5R9+iH8V3GLzZp/Y3HVXbMTt2DEtTYYbvuH88zVjjRft24c2EFuD1EXzyVvurp9/1vX//U/X7fRgtsuwYdqOcviwuvmGDtXtHo/+d4cMCfxtXHKJ1uJFtE8KoC6g0rBypbq6unaNj1bEVPQB9AOwDsBGABPC7G8KYD6AFQC+B9DIb18qgA3eT2q0a5UH0V+yRAWjZ09tfK1XT+S++zTsqmNHdWecaINbxZpx41Rg+vTRP3x5Y/Bg9bF6PBoX3rq1xnWXJ6ZO1R7OseScc0R69w7c5vFoQ+LIkbG9lj9WI2ZwA2adOoENqOGw+k+89JL679u105J5rEr5Ij4303336TJcj3J/nntO061bp7ZYIZilZc4crRUNGRJ7DYmZ6ANIArAJQAsAyQCWA2gflOYjS9ABXAjgHe/3OgA2e5e1vd9rF3W9RIt+QYG6a2rUUEH75z9FBg3SqrgVzhWpx6ebsIZKAMomHK24WKFzb7+trqiqVX0Nz07m7ru1n4J/4+e2bRK2U1ossSK6rAEAt21TV5mdns3+mZIltsHjRpWWJUv0vElJGgYaTcAtt+X550uJI9MiMXmynvNf/4rdOUViK/rnAJjnt34fgPuC0qyySvcADIBs7/drAbzql+5VANcWdb1Ei77l+w3ukbl5s0ZYXHNNbDtLnahYf6LyGoHk7z5p1qxoX76TsHou+zcYWh234vmeFi70iePhw1rKN8Z+O8X552sN+uSTtdEz1uPVWJ20APsD07Vpo+lbtSpdP4FgPB4N+IhFOKo/dkXfzsxZDQFs81vP8G7zZzmAwd7vVwGoYYypa/PYcoPHAzz+uE4/OGhQ4L7mzXXS8BkzgIoVE2NfeaJdOyApSWfkOuecRFsTSvPmQO/eQL9+QHo60KlToi0qGy64QJfffOPbtny5Ljt2jN91O3UCTjpJ/x/du+usbJMm6Xuwe/zatcChQ8AzzwDGxNa+ihWBs8/W3+vQofaOGTBAl2PH6m89Vhij05NedJHOxrZgQezObQc78hXu8UvQ+lgALxpjhgP4AcB2AAU2j4UxZiSAkQDQpEkTGybFh88/B1auBN55B6jAiSSLpHJloEcPoHPn2P4hYskPP8RePMo79eoBXboA8+cD99+v21asAJo2BWrVit91K1UCzjwT+PhjoEEDYN48oG9f+8d37qzLW24BOnSIj42vvKKZSpUq9tLfeqtOj5maGntbTjpJn1WPHsC116ru1KkT++uEJVpVADbcO0HpqwPIkBPMvePx6GiGLVrQfWOXggI2aJdHxo3TkENrCI927TScON5Mnaruz+DhuO2wb5+GlRY1jLQTWbpUO+VZ0USlATF07ywC0NoY09wYkwxgKIDZ/gmMMfWMMda57gMwzft9HoBLjDG1jTG1AVzi3Vbu+PprdQNMmED3jV2SklgjKo9cdBGQlwf89BNw9Ciwbp2WwuPNiBHq3jnllOIfW6cO8NxzQO3asberPNO5M/Dww/rcZswom2tG/cuKSAGAO6BivQbAhyKyyhgz0RhzpTfZ+QDWGWPWA/gLgMe8x2YBeBSacSwCMNG7rdzx8svAaacBN9yQaEsIKR29ewPJyerXX71a26rc0qZxIjJ+vLp5brsN2LEj/tczWisoP6SkpEh6enqZX7dFC23omT69zC9NSMy54ALg4EFg1Cjgxhu1tN+mTaKtIpFYv15L/X36AHPnlqwtyhizWERSoqVj5RxAbi6wZYtG7RDiBC6+GFi6FPj2W224bNky0RaRomjTRt1bV1wR/2tR9KGlIBENQyRFkJcH9OoFvPBCoi0hUbjoIl1+8IGGapbXCCviY8QIdfHEO+KMog9gzRpdulr08/K0RWn37shppk0DfvkFeO+9MjOLlIyUFKBmTSA/v2waccmJA0Uf2tiVlAS0bp1oSxLI++8Djzyidcxw5OYCEydqMSQ9XQOeSbmlYkVfRy024hJ/KPrQkn6rVhrx4EpEgGef1e8zZuh6MC++COzcCTz0EFBYCPz4o71zFxZqhkHKHMvFw5I+8YeiDxV9R7t2li4F7rkH2Ls3/P4fftC++n36AH/8AaSlBe4/eBB48kmgf3+NL0tOBr77Lvp1V67UkIQ2bYDt20t/H6RY/OMf2t2/d+9EW0LKE64X/fx8YMOGE1j0CwvVP7VtG5CdHVpK//xz4NxzdUCTnj2BjRtDz/Hss0DdulrKT04OjVudPBnYvx947DENBenRQ8NCIiECvPQScNZZwJ49mmkMHBhY4t+8GfjwQw0iLw07dgDDhtnLhEpKZqa2Z+Tnx+8acaB6dW0YZAc6EoCdbrtl+SnrYRhWr5YTd7jkggIdktB/DrdatXTGiJkzdYaTChV06qKZM3V+uHr1RH75xXeOTZt0OERrnNdBg3S2CWtYwY0bdXjCv/3Nd8xDD+kxkfrM33uv2tKvn8iuXTrDtTWIeH6+yNNP6/i/gNq/Y0fkeyxqUPWFC3UWa0CnNipJ/387jBzps9XOOAEej06t5LYxBUhCAWfOsscnn+hTSE8v08uGx+PRiXcXL7aX3pqJ/cEH9bjJk0WGD9eZK6xMYNAg3yAs69frOLGVKunknps2+eaOy8jQNB98oMfNn68Cfc45mpH8+afvutakqLNmhb+HU08VufzywIF5rEHEGzXS5eWXa6ZUpYpmRJ9/HnquF19UW7/6KnTf22/rvmbN1ObKlTWTKe6YvF9+qc8uEocP6+QKZ56pg6S0aRN9olnr+fz978WzhZBSQNG3yaRJ+hQsXUwY+fm+CTs7dIguXnPmaNqbbw5/rvnzdXKA4IHAMzNFbrpJBaxCBR2Z67rrfPtzcnSm95tv9k1eOn164DmOHlWRveuu0GtbVaepUwO3ezwit9yi0yu9/77v/lavFunUSY8ZM8Y3+4c1k7cxOtC6/6wg1nx6F1zgm1jXmhrp+ef13D/+KHLDDUULem6uTlqalBR54Pc339Tz/vCDfurW1UHfw01EazFunC/TLe0ce4TYhKJvk+uuE2nSpEwvGcrhwzrBKCBy4YW6/N//Iqf/4w+daqhz5+hz0UVi+3YVp9atRZYvD9x3/fUq/ElJgRmCPxdeqKXfYKwplCJNmhtuWM4jR3SmeEBrFs89p2J/ySW+WUGef17TZmXpVFjt2ulxFh6PPsNKlXyZSFKSLt96K7wt1jRNFSpohhSO3r21dG9lUps2iZx1lh6Xmipy8GDoMe3b61ybTZpoBu6fYRWXhQt1FhRCokDRt0mXLuoVSBgHDoj06KHC88orOrv1aaeJ9O0bPv2sWeo+qVVL/e3xwKpFNG4ssn9/+DSPPqpprJK2xV//KtK0acmmPpoxQ10pgMh552mtw+NRX3rt2iJ792p7RVKSyKJFocfv3q02d+yoNY2sLH2OFSqEToV25Ig+5z591GefnOxzcVmsWaO2PPVU4Pa8PJEHHtDzNm+u7RYWf/yhxzz9tLajAOrGKgn79qnry5rcleNYkyKg6Edg3z5fAbGwUF3Ko0fH6WI7dmip+fffw+8/eFAF/6STAkXpiSf01fiXwHfv1sZUQEuyS5bEyWjRjCc1NXDOvWB++klCJg8tKFBxvvHGkl97/XqRRx4Ryc72bVuxQgW2e3e95kMPRT4+WBgPH9baQ3JyYLuB5Q6aP19L70lJoT+EceO0vSNSA/GCBVojue8+3zarprN2rWZY/fppRlZUY3UkRo5UuwYP1nMOGaIZISFhoOhHoHNn1YC8PN88qsHu55jg8YhceqleoGlTkT17AvdnZ6sLoGJFkU8/DdyXlSVSrZoKr4gK7CmnqHBNmlQ6d0GsOHZMo3pGjPBtW7xY7/fdd2N/vVtv1XN37Vr8+9+/X6t0gMioUfp8GzUS6dXLVyP5+9/1fqyaS16etj8MGlT0uQcP1ozu0CFd799fpGVL33nXr1fhDjcL9oIFkdsSfv1V7b3nHj3X5MmawZxxhtYgWOonQVD0I1C9ut712LFxnjD6hRf05Lfdpo2e552nQimijZfnnKNi4F9S9mfUKK0BTJqky5YttcRbnvjHP9SHbkX2/N//6T2XpFQbjb17tQaxdm3Jjs/J0YZnQBtiAZF583z7V69WUU1N1VqA1ag+Z07R57XE+fnn9RqVK+sUUP707atRU/4ur717NRM/7TSRbdsC0+fna22uYcPAGs+cOdoGA2h7yrvvatWVEKHohyUnR++4bl1d9u2ry717Y3yh1asDQwjfe08vNHy4VtkrVNAqf1ERIJs2aTpAZMCAyL71RLJliwqXFUHUr582sJZnvv1Wa17nnRfa7mC5zwDNaHv3tjd3Zs+e6tufNSs0MxERee013e7vkrMKBVWqqMBb4l5YqO4tIPzvIz9fw1XbtJHj0U0pKSKPPx4aqUVcBUU/DJY75+WXfbX9Bg1ifJHdu9UFUa9eoC/Y6rBUsaKWBIPdPeF46imRJ58s31X5u+7SzGnFCnWP3HFHoi2KTkGBr9blz6FDImlpWlMpzjO3GmxbtFC3XHCHsr179b3fe69vW7du+iP83/+0xnfZZVr1tH6YgwYV3RheUCDy88+aQfTqpccMG1Z84fd4RL75xueeIicsFP0wWDXxL74Q2bBBC9sXXhiDExcU6B//yiv1z21MqJ++oEBjzzdsiMEFyxG7d6vQWSXPmTMTbVHZU1Cg7htAZODA8Gn69dPagMejGSSgIaMiWgqxahjNm4u8807xxduKpiqu8FsZ1pVXliziipQbKPphsGrfVu/bFSuid66MSna29i4FNJRy/HiRVatKbesJxYMPynFXg1uHHrCigSJFBUybpvvT0rQT2kknBYa7vvSSL2S3pFjCf8MN9gQ8J0ddXVaY7JQp4dN5PJoRxTNirLQsWhSftqQTCIp+GF59Ve84uN2sxGzZojHhSUnakGfH/+tEDh7UhpIy7mNRrjh2TEUzUme5rCwV+rvu0l7Af/1rfOx44AH9kX/5ZfS0Vmb9/fcadVS5cmh48f796mqyotCC76+gIPHux/nz9T/YuLH2k/Bn27bAfhQOhqIfBmtUgdIUpo6zcqU2CNSqFX5sGLexeHHk/ghEuewydf8BIrNnx+caVue+aH7LTZs08srqcb17t2ZGZ5yhVeAVK0S+/lqjxipWFPnnP9XuRx7xnePoUZFzz1WX1Pz58bmfaGzapGNNtWmjobPNm6vQFxZqB7lKlbTNwwVQ9MNw++36uyg1Ho/+qerW1V6bhNjhv/+V49ED8exrYYXOFjWK4JVXavzy9u2+bdaYRv6fhg21wVhE5OqrNdpo61b9D9x0kxzvuQ3oUBb+IabxJjtbM6natbV3elqaSM2aGtZ63nly3OVqjL3AiRMcin4YhgzRsbtKzbx5+uiefTYGJyOu4cABFdrx4+N/nZo1Ra65Jvz+2bMl7PASIjrWzwcfaP+RTz8N7AewZYuK/jXX+Hoe//vf2jZwzz0qruefH/v72bVL2xN+/FHdVtOmaY2jZ09163zzjS/tzz9rYEHNmjpY3qJFauc778TernKGXdE3mrb8kJKSIunp6XE593nn6YQS339fipN4PDo5SFYWsHYtUKlSrMwjbiAjA2jQIP5zc44fDzz9tE6a07y5b/vhw8AZZwA1agBLlhTfjkceAR5+WCeV7t8f+Owz3ywtEybohDuHDulkO+E4ehT46itg1y6dnCYnB2jcGGjRAmjfXr/7s2AB0Ldv+AlsTj0VmDQJuPHGwO2bNwNVqwKnnKL/11NP1bkj33+/ePfqT2Ym8OabOitN9erFOzYrC1i1SuetrFWr5DZEwRizWERSoia0kzOU5SeeJf02bQLnAikRM2ZoyeGEnHWFuIaMDG04Du43MWaM/n5/+qlk583JUb/56adrjcIfK/xz4cLIxz/+eKD7yGrjsKK//IfC3r9fXUetWmmt46uvtCS/aVPRk+sEk5qqfn87oaw7d2pPZ//oJ49H5Ior1MZftBP0AAAU40lEQVTBgyNHRuXmaue/KVNE7r5b23As1xego8bGMSwWdO+EUquWjm5QLHJytJFy+3Ydqa1VK43YYe9HUt4ZPlzdMdOn6+916VJ1h4wcWbrzZmWFn4BiyxY53vsxEikpGuW1bZsKd2GhZlA//KBjVfmPiHrddWpvUZmIHayJgfxnjIvEgAGa9oEHfNustphzz9XlxIm+fQcPaoPxxRdr9JMl8FWr6lAZ112nbrSxYyVu41J5oegHceSI3u2kScU8MDU1sGRiZzwWQsoDmzfrsBiARuG0a6eNyPHqS+HxaInafxA+fzIy1JYnngi/339EVGucpEcfLb1dWVmamdx/f9HpvvvO96wAHT4jI0NLi717a8Y5bJju++ADHTLbGtOlQwct3c+Zo8cEl+gLCkTOPlt76sd83BeFoh/E1q2+92ibLVu0pHHNNVp6uf9+jYxgz0VyolBYqG6Xbt30DxA8C1qsufjiyP01XnpJbVi9OvLx+/apgAIaahmrGnXv3jo8SiQ8Hp0cp1EjjQq69FL973fqpLUlqyd9bq7WVqwCYN++9udaXbFC3VnDh5f+fsJA0Q8iLU2KHx59xx3qF41Zby5CEoTHEzpJTDwYP15L6uE6w1xyiYZTRis0bd+u8dVbt8bOLqstIdLcCJYLyJplLTvbNw6SNWubRUaGjvhakv45//qXnjMO/Roo+kF8/rlEbWMKYPdu9dGVZkIQQtyGFeiwdGng9gMHtAA1blxi7Fq6VO2aNk3b6EaPVn/7m2/q0OAtW4a21e3Zo6GesexxnJur7YJt2hSvMdoGdkW/Ytzih8oZu3fr8i9/sXnACy8Ax44B48bFzSZCHEfXrrpcsgTo3Nm3/csvNexy4MDE2NWpk4Zu3nqr/q+Tk4HatQPDOOfO1VBUi/r1gWHDYmtHlSrAiy8C/fppSO2//hXb89ugQplfMUEUS/QPHdIXc9VVQNu2cbWLEEfRsqWvD4A/n32mItqjR2LsMga45x6gZ0/glVeAnTv1s2wZ8PjjwKOPqhCXBZdeCgwerH0Mtmwpm2v64aqSfs2aQOXKNhK/8AJw4IB2NiGE2KdCBaBLl0DRz8vTUvTVVweWpMuasWP140+nTvopa555Rms/d98NzJpVppd2VUnfVil/3jzgwQeBQYO05y0hpHh07aol6MJCXf/+eyA7O3GunfJI48aqM599BsyeXaaXpuj7s2KFlkY6dADefrtM7CLEcXTtChw5Aqxbp6X8hx7S4QcuvjjRlpUvRo/WITEGD1bf/pEjZXJZW6JvjOlnjFlnjNlojAnxeRhjmhhjvjPGLDXGrDDGDPBub2aMOWKMWeb9vBLrG7BLVNHfsQO47DL1R86Zo0tCSPHxb8ydMAH47Tfg9dcjj8fjVpKTtRZ0/fXAE08AHTsC8+fH/bJRRd8YkwRgCoD+ANoDuNYY0z4o2f0APhSRLgCGAnjJb98mEens/dwSI7uLTVTRv/NOYP9+4IsvgEaNyswuQhxH27Yq8E8/rb7rUaOAIUMSbVX5pF494K23gG++0cbme+/VQeLiiJ2G3O4ANorIZgAwxswAMBDAar80AqCm93stADtiaWRpyc/Xge4iir6IjuZ39dWBYWaEkOKTlKT/o19/1Xax//wn0RaVfy66SN3Le/b4Ri2NE3bO3hDANr/1DO82fx4GMMwYkwFgLoBRfvuae90+C4wx55bG2JKyZ48uI4r+li3A3r3A2WeXlUmEOJvevTUO/sMPOfy4XapUAZo2jftl7Ii+CbMteBD+awG8JSKNAAwA8I4xpgKAnQCaeN0+9wB43xhTM+hYGGNGGmPSjTHpmZmZxbsDG0SN0V+4UJfdu8f82oS4ksceAzZtApo1S7QlJAg7op8BwH9mg0YIdd/cBOBDABCRXwFUBlBPRI6JyD7v9sUANgFoE3wBEZkqIikiklK/fv3i30UUoop+WpoG8HfsGPNrE+JKTjpJS/qk3GFH9BcBaG2MaW6MSYY21AYHlv4J4CIAMMa0g4p+pjGmvrchGMaYFgBaA9gcK+PtYquk37Wr/lAJIcTBRBV9ESkAcAeAeQDWQKN0VhljJhpjrvQmGwNghDFmOYDpAIZ7BwA6D8AK7/aPAdwiIlnxuJGiKFL08/M1tIz+fEKIC7A1DIOIzIU20Ppve9Dv+2oAvcIc9wmAT0ppY6nZvVuntaxWLczOlSt13k768wkhLsAVPXKLjNG3GnFZ0ieEuACKflqadpBglAEhxAVQ9Bcu1FK+CReZSgghzsIVor9zZwTRz84G1qyhP58Q4hocL/p79gD79gFtQnoHAEhP1yEY6M8nhLgEx4v+8uW6DDtPQlqaLjluPiHEJVD0W7cG6tQpU5sIISRROF70ly0DGjYE6tYNs3PpUt/Y34QQ4gIcL/rLl0cYLfnwYR1ds0OHsjaJEEIShqNF/9gxYO3aCK6dNWt0ecYZZWoTIYQkEkeL/urVQEFBBNFftUqXFH1CiItwtOgvW6bLiKJfqRLQsmWZ2kQIIYnE0aK/fLlORtOqVZidq1bpXJ5JSWVuFyGEJArHi/6ZZ0bQ9VWr6NohhLgOx4q+iIp+WNfOoUPAn39S9AkhrsOxop+RAezfH0H0V6/WJUWfEOIyHCv6URtxAYo+IcR1OFb0reEXzjwzzM5Vq3Qi9ObNy9QmQghJNI4W/ZYtgRo1wuxctQpo146RO4QQ1+Fo0Q/r2gF0XlwOv0AIcSGOFP38fGDjxgi6fuAAsH07/fmEEFfiSNHPytKQzbCzZTFyhxDiYhwp+vv26TLsMPmM3CGEuBhHi37YMfRXrQKqVgWaNi1TmwghpDzgTtFv3x6o4MhbJ4SQInGk8kUUfRFgxQpG7hBCXIu7RP/PP4E9e4Du3cvcJkIIKQ84VvSTk4Fq1YJ2pKXpkqJPCHEpjhX9unUBY4J2pKXpxCkdOybELkIISTSOFv0QFi4EunTRagAhhLgQ94h+QQGweDFdO4QQV+Me0V+9GsjNpegTQlyNe0TfasQ9++wyt4cQQsoLjhN9kSJEv3ZtHW+ZEEJciuNE/9Ahdd+HFf3u3cOE9BBCiHtwnOiH7ZiVk6Nj6NOfTwhxObZE3xjTzxizzhiz0RgzIcz+JsaY74wxS40xK4wxA/z23ec9bp0x5tJYGh+OsKK/dClQWEjRJ4S4norREhhjkgBMAdAXQAaARcaY2SKy2i/Z/QA+FJGXjTHtAcwF0Mz7fSiAMwCcBuAbY0wbESmM9Y1YhBX9hQt1edZZ8bosIYScENgp6XcHsFFENotIHoAZAAYGpREANb3fawHY4f0+EMAMETkmIn8A2Og9X9wIK/ppaTqUcthZVQghxD3YEf2GALb5rWd4t/nzMIBhxpgMaCl/VDGOhTFmpDEm3RiTnpmZadP08EQUfYZqEkKILdEPF+4iQevXAnhLRBoBGADgHWNMBZvHQkSmikiKiKTUr1/fhkmRsUS/dm3vhtxcYMsWjrdDCCGw4dOHls4b+603gs99Y3ETgH4AICK/GmMqA6hn89iYsm8fcPLJQEXrzjZt0mXr1vG8LCGEnBDYKekvAtDaGNPcGJMMbZidHZTmTwAXAYAxph2AygAyvemGGmMqGWOaA2gNIC1WxocjpGPWhg26pOgTQkj0kr6IFBhj7gAwD0ASgGkissoYMxFAuojMBjAGwGvGmNFQ981wEREAq4wxHwJYDaAAwO3xjNwBihD9Vq3ieVlCCDkhsOPegYjMhTbQ+m970O/7agC9Ihz7GIDHSmFjsdi3D2jQwG/Dhg26oWbNiMcQQohbcFyP3KysMCV9unYIIQSAA0V/3z6gTh2/DRR9Qgg5jqNEPz8fyM72K+kfPgzs3EnRJ4QQL44S/awsXR4XfYZrEkJIAI4S/ZDeuAzXJISQANwh+gzXJIQQAG4Q/VNPBapXT5hNhBBSnnC+6LOUTwghx3G+6NOfTwghx3Gc6CcnA9WqQWM3d++m6BNCiB+OE/26db1zn2/cqBsp+oQQchxHij4AhmsSQkgYnCv6VkmfDbmEEHIc54r+hg1Aw4ZA1aoJtYkQQsoTzhZ9unYIISQAx4i+CEWfEEKi4RjRP3QIKCjwin5eHpCZCTRpkmizCCGkXOEY0c/LA/r3B9q2hQ6pDHC2LEIICcLWdIknAvXqAXOtCR3/9Io+x9whhJAAHFPSD+AwRZ8QQsJB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBfhbNGvVi2xdhBCSDnDuaJfpQqQlJRoSwghpFzhXNGna4cQQkKg6BNCiItwrujTn08IISE4V/RZ0ieEkBAo+oQQ4iIo+oQQ4iIo+oQQ4iJsib4xpp8xZp0xZqMxZkKY/c8YY5Z5P+uNMQf89hX67ZsdS+MjQtEnhJCwRJ05yxiTBGAKgL4AMgAsMsbMFpHVVhoRGe2XfhSALn6nOCIinWNnsg0o+oQQEhY7Jf3uADaKyGYRyQMwA8DAItJfC2B6LIwrEYWFwJEjFH1CCAmDHdFvCGCb33qGd1sIxpimAJoD+NZvc2VjTLox5jdjzKAIx430pknPzMy0aXoEcnJ0SdEnhJAQ7Ii+CbNNIqQdCuBjESn029ZERFIAXAfgWWNMy5CTiUwVkRQRSalfv74Nk4qAI2wSQkhE7Ih+BoDGfuuNAOyIkHYoglw7IrLDu9wM4HsE+vtjD0WfEEIiYkf0FwFobYxpboxJhgp7SBSOMeZ0ALUB/Oq3rbYxppL3ez0AvQCsDj42plD0CSEkIlGjd0SkwBhzB4B5AJIATBORVcaYiQDSRcTKAK4FMENE/F0/7QC8aozxQDOYJ/2jfuICRZ8QQiISVfQBQETmApgbtO3BoPWHwxz3C4COpbCv+FD0CSEkIs7rkUvRJ4SQiFD0CSHERVD0CSHERVD0CSHERThT9CtWBJKTE20JIYSUO5wp+tWrAyZcR2JCCHE3zhV9QgghIVD0CSHERVD0CSHERThP9HNyKPqEEBIB54k+S/qEEBIRij4hhLgIij4hhLgIij4hhLgIZ4m+CEWfEEKKwFmif/Qo4PFQ9AkhJALOEn0OtkYIIUVC0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBdB0SeEEBfhTNGvVi2xdhBCSDnFmaJftWpi7SCEkHKK80S/WjWggrNuixBCYoWz1JGDrRFCSJFQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEVQ9AkhxEXYEn1jTD9jzDpjzEZjzIQw+58xxizzftYbYw747Us1xmzwflJjaXwIFH1CCCmSitESGGOSAEwB0BdABoBFxpjZIrLaSiMio/3SjwLQxfu9DoCHAKQAEACLvcfuj+ldAEBenn4o+oQQEhE7Jf3uADaKyGYRyQMwA8DAItJfC2C69/ulAL4WkSyv0H8NoF9pDI5ITo4uKfqEEBIRO6LfEMA2v/UM77YQjDFNATQH8G1xjjXGjDTGpBtj0jMzM+3YHZ5rrgHatSv58YQQ4nCiuncAmDDbJELaoQA+FpHC4hwrIlMBTAWAlJSUSOcumtq1gRkzSnQoIYS4BTsl/QwAjf3WGwHYESHtUPhcO8U9lhBCSJyxI/qLALQ2xjQ3xiRDhX12cCJjzOkAagP41W/zPACXGGNqG2NqA7jEu40QQkgCiOreEZECY8wdULFOAjBNRFYZYyYCSBcRKwO4FsAMERG/Y7OMMY9CMw4AmCgiWbG9BUIIIXYxfhpdLkhJSZH09PREm0EIIScUxpjFIpISLZ2zeuQSQggpEoo+IYS4CIo+IYS4CIo+IYS4iHLXkGuMyQSwtRSnqAdgb4zMOVFw4z0D7rxvN94z4M77Lu49NxWR+tESlTvRLy3GmHQ7LdhOwo33DLjzvt14z4A77zte90z3DiGEuAiKPiGEuAgniv7URBuQANx4z4A779uN9wy4877jcs+O8+kTQgiJjBNL+oQQQiLgGNGPNo+vUzDGNDbGfGeMWWOMWWWMucu7vY4x5mvvXMRfe0c1dRTGmCRjzFJjzBzvenNjzELvPX/gHQXWURhjTjbGfGyMWet95+c4/V0bY0Z7f9srjTHTjTGVnfiujTHTjDF7jDEr/baFfbdGed6rbyuMMV1Lel1HiL7fPL79AbQHcK0xpn1irYobBQDGiEg7AD0A3O691wkA5otIawDzvetO4y4Aa/zWnwLwjPee9wO4KSFWxZfnAPxPRNoC6AS9f8e+a2NMQwB3AkgRkQ7QkX2Hwpnv+i2ETh8b6d32B9Da+xkJ4OWSXtQRoo/iz+N7wiIiO0Vkiff7IagINITe73+9yf4LYFBiLIwPxphGAC4D8Lp33QC4EMDH3iROvOeaAM4D8AYAiEieiByAw981dMj3KsaYigCqAtgJB75rEfkBQPBQ85He7UAAb4vyG4CTjTGnluS6ThF92/P4OgljTDMAXQAsBPAXEdkJaMYAoEHiLIsLzwIYD8DjXa8L4ICIFHjXnfjOWwDIBPCm1631ujGmGhz8rkVkO4DJAP6Eiv1BAIvh/HdtEendxkzjnCL6xZnH1xEYY6oD+ATA3SKSnWh74okx5nIAe0Rksf/mMEmd9s4rAugK4GUR6QIgBw5y5YTD68MeCKA5gNMAVIO6NoJx2ruORsx+704RfVfNxWuMOQkq+O+JyEzv5t1Wdc+73JMo++JALwBXGmO2QF13F0JL/id7XQCAM995BoAMEVnoXf8Ymgk4+V1fDOAPEckUkXwAMwH0hPPftUWkdxszjXOK6Nuax9cJeH3ZbwBYIyL/z2/XbACp3u+pAD4ra9vihYjcJyKNRKQZ9N1+KyLXA/gOwBBvMkfdMwCIyC4A27zzTwPARQBWw8HvGurW6WGMqer9rVv37Oh37UekdzsbwA3eKJ4eAA5abqBiIyKO+AAYAGA9gE0A/p1oe+J4n72h1boVAJZ5PwOgPu75ADZ4l3USbWuc7v98AHO831sASAOwEcBHACol2r443G9nAOne9z0LQG2nv2sAjwBYC2AlgHcAVHLiuwYwHdpukQ8tyd8U6d1C3TtTvPr2OzS6qUTXZY9cQghxEU5x7xBCCLEBRZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlwERZ8QQlzE/wfVmUdfBOl8tgAAAABJRU5ErkJggg==\n",
2062 | "text/plain": [
2063 | ""
2064 | ]
2065 | },
2066 | "metadata": {
2067 | "needs_background": "light"
2068 | },
2069 | "output_type": "display_data"
2070 | }
2071 | ],
2072 | "source": [
2073 | "plt.plot(range(100), hist.history['acc'], 'r', label='Train acc')\n",
2074 | "plt.plot(range(100), hist.history['val_acc'], 'b', label='Test acc')"
2075 | ]
2076 | },
2077 | {
2078 | "cell_type": "markdown",
2079 | "metadata": {},
2080 | "source": [
2081 | "## Polt the training loss and testing loss"
2082 | ]
2083 | },
2084 | {
2085 | "cell_type": "code",
2086 | "execution_count": 14,
2087 | "metadata": {},
2088 | "outputs": [
2089 | {
2090 | "data": {
2091 | "text/plain": [
2092 | ""
2093 | ]
2094 | },
2095 | "execution_count": 14,
2096 | "metadata": {},
2097 | "output_type": "execute_result"
2098 | },
2099 | {
2100 | "data": {
2101 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAEICAYAAAA+16EyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzsnXeYFEXzx7/FceQjJ+FIkgRB0hGULEFABMQACCgiIComDOCrAoq+hp9ijiCiovBiQBFBkmQkHJJBooQjHuEIwhFu6/dHzbCze5tv4119nmef3enp6a6Znemaqq7uJmaGoiiKosQauSItgKIoiqIEgiowRVEUJSZRBaYoiqLEJKrAFEVRlJhEFZiiKIoSk6gCUxRFUWISVWAxBBGNIaLJkZYjqxBRZSJiIsodQRl8vpZEtIiIBoVaJiXnQERziahvpOWIdbwqMCLaS0QXiOgsEaUR0QoiGkpEPim/cDVWruohomuIaAYRHTL2VfazzL1EdJSIClrSBhHRoqAJHiSIqI1xjh85pS8jogE+lsFEVC0kAgaI5bx+ckqvZ6QvipBoQSdQRUlEk4jolSDKsZeI2nvY34aIbER0zvL5NQj1BvU8ggURzbac52UiumTZ/jSQMpm5IzN/G4AsL1nqTieiK5bttYHIYpTbg4g2e8nzMxE9HWgdocBXC+w2Zk4AUAnA6wBGAPgiZFIFDxuA3wHckYUycgN4PDjiuCdICv5fAPf6q6jDSYDnmQrgJiIqYUm7D8CO4EilBMAhZi5k+dwWaYFC9ZLMzJ3N8wTwLYA3Lec9NFxyGLKMtsjyBIBFFlkahareaMUvFyIzn2bmGQB6AbiPiOoAABHdSkTriOgMER0gojGWw5YY32nGW8KNRFSViP4gohNEdJyIviWiouYBRDSCiA4aVt92ImpnpOciopFEtNs4dhoRFXdXDzMfZeaPAazx/9Jc5f8APG2VzwoRXUdE84jopCHr3ZZ9Dm/URDSAiJZZtpmIHiGinQB2GmnvGdfwDBGtJaKWfsiaBmASgNHuMhDRQCLaRkSniGgOEVUy0s3rt8G4fr2IaDER3WHsb2HI28XYbk9E643fuYjoBSLaR0THiOhrIipi7DMt4weIaD+AP1zIdIfx1l/HjdiXAPwMoLeRPw7A3ZDGxFrOTUS0hohOG983WfZVMc7nLBHNA1DS6dhmJN6FNCLaQERt3F1Dp+PyEtG7JFb+IeN3XmNfGyJKIaKnjOtymIju96VcF/V8T0RHjHNbQkTXG+lDAPQF8CxZLCEiKkdEPxJRKhH9Q0SPWcoaYzw7XxvXYwsRJRn7vgFQEcCvRnnP+imnp2c0kPNw8AqQxUqzXN8RRHQEwJdGelciWk92j9ENluNdti1ZwXgW9hLRfww5xhNRCSKaZVz/U0T0KxGVtxxz1TNC4tVZTETvGDLvIaKOWZCnvlHeKeO/vdWy704i2mGc/34iepCIygGYAqAW2a25gu5rcHsN1hv/6woiamDZ94hR11ki2kVE3Y30ukT0p3HMMSKa4PfJMrPHD4C9ANq7SN8P4CHjdxsAdSEK8QYARwH0MPZVBsAAcluOrQagA4C8AEpBlM+7xr6aAA4AKGc5vqrx+wkAKwEkGsd+BmCKu3os9eU29lV2Sh8JYKa3cwfwE4BXjLRBkLceAChoyHq/UUdDAMcBXG/sXwRgkKW8AQCWWbYZwDwAxQHkN9L6AShhlPcUgCMA8hn7xgCY7EbWNgBSAJQFcAZATSN9GYABxu8eAHYBqGWU/wKAFU7yVLNsvwzgA+P3fwDsBvCGZd97xu+BRrnXAihkXK9vnP6Xr43rld/6XxnXbpe1XjfndROAVUZaFwBznP6L4gBOAehvlNvH2C5h7P8TwDjIfdMKwFnzWgIoD+CEUW4uyL15AkApV/+jk3wvQ+7J0pB7eQWAsRbZrxh54o3yzwMo5qYsT/UMBJBgyP8ugPWWfZNg3J/Gdi4AawGMApDH+F/2ALjFch+lG/LEAXgNwEpvz7zzf+Jmn9tn1N/zcHNPXs1jub5vGOXlhzyDxwA0Nc7tPuN88sJz29ICQJoP7aErGdsbcvzXuN75jXvhduN3Ycgz8YPlGOtzOQjAZePaxAF4FMABH2QZCmC+U1pxSPt7l1HWTQBOQrxncQBOA6hv5C0FoJ6lbdjspb6fATztIr08gHNGGfGGXIchz3tZSJtY2cibCHvbNBvAMAAEoACAm7ydc6a6fbhILm9m4yZ93s0x7wJ4x6kBy6RYLPl7AFhn/K5m3IDtAcQ75dsGoJ1l+xrjj8/tqR64UWC+njuAOsYfXwqOjWYvAEudjvkMwGjj9yJ4V2A3e5HhlOUmGwMvCsz4/SaA/7l4UGYDeMByTC5Ig1rJIo+1sWgHYKPx+3fj3Fca24sB9DR+LwDwsOW4mi7+l2st+820pwFsBZDo4fyt57XTKHsq5G3d+l/0B7Da6dg/jWteEdLAFLTs+w52BTYChsK17J8D4D5X/6NTvt0Auli2bwGw1yL7BTi+vB0D0MxNWW7rccpX1Lh+RYztSXBUYE0B7Hc65jkAX1ruo/mWfbUBXHC+7738JzaIxW9+7vb2jPp7Hm7uyat5DDkuwXjBM9I+gfECYUnbDqA1PLQtvn7cyNge8kKQx8NxSQBSLdvOCuxvy77CxnmX9CKLKwX2IIBfndL+B+kGyQVRZn1heRaMPFlRYI8CmOuUts0oszSkDbsVQF6nPDMAvA2gbCD/BTNnKQqxvHExQERNiWihYS6fNi5sSXcHElFpIppqmPJnAEw28zPzLshb3BgAx4x85YxDKwGYbpjZacZFygBQJgvn4RVm3gxgJsRis1IJQFNTHkOmvpC3Dl85YN0w3E3bDLM6DUAReLiWbngDwC1EVM+FvO9ZZD0Jefsp71yAwZ8AahBRGQD1IVZUBSIqCaAJ7G7bcgD2WY7bB1Fe1v/F4TwNngHwETOn+Hhe30De2NoCmO60z1kGU47yxr5TzPyv0z6TSgDucvofW0AaX2+4Ovdylu0TzHzFsn0eYqX6DBHFEdHrhlvuDETBAO7vi0oAyjmdz3/g+H8ccZIpH/nXd3OImYtaPtMsdbt8RgM4D19IZeZ0y3YlAE85nXsFiNXlqW3JKkeZ+ZK5QUQFiWiC4To7A3GdezpP5/8DAAoZblLTrbfBBzkqAejodP63AriGmW0AukPaqIMkXR/1/TlJN7h99pj5GMTL8jSAo0Q0nYiqGHmGQa7JBsP92MvfigNSYETUGNIwmP0530G0aQVmLgLgU0jDCMibhDOvGek3MHNhiNvMzA9m/o6ZW0D+DIY0yIA0gp2dHpx8zHzQTT3BZDSAwXBs7A8AWOwkTyFmfsjY/y/ENDZxpdiuyk3S3zUC0r9TjJmLQiw/cnGcW5j5BMQKHuu06wCAB53kzc/MK9yUcx7iinoc8nZ2CeIiGw5gNzMfN7IegvxXJqbFc9TVeVroCOAFMvrZfOAbAA8DmGXIZsVZBlOOgxB3RjEnv35Fy+8DEAvMel0KMvPrPsjk6twP+XCcP9wDaXjaQ15oKhvp7p6xAwD+cTqfBGbu4mN9WXmWPD2j/p4HII25p2fI1bm/6lR/AWaeAnhsW7KKsxzPAqgCoInRxt0cUKHMi9gepOH8QuqKAwBmuGiTRhrlLTXugzKQ9vsbN/L7g6dnD8z8MzO3hbSdqQDeN9L3M/N9kP/0GQCTjZdln/FLgRFRYSLqCnHhTGbmTcauBAAnmTmdiJpAblSTVIi74VpLWgLEZ5pmdGw+Y6mjJhHdTNIRng5xwWQYuz8F8CrZAw9KmR2CbuoBEeWD+L8BIK+x7TfG29v/ADxmSZ4JsVD6E1G88WlMRLWM/esB9CSiAiQd0Q94qSYB0vCnAshNRKMg7oRAGAfxf9eypH0K4Dmyd5wXIaK7LPuPwun6QVyFw4xvQNxc1m1AOoCfJAmUKATpC/ifk+Xhii0AOgH4iIi6eTshZv4H4gp63sXuWZD/4h4iym28zdWG9HHuA5AM4CUiykNELQBYo+YmA7iNiG4xrIR8xptvojeZIOf+gnEvloT0O2VlrF5uo37zEw+5Ly5C+uUKQK6vFef/bTWAMyQBC/mNc6pjvHj6gqv7wFc8PaP+ngcgz9A9xjl0gvz/nhgPYKjhFSLDErqViBK8tC3BJgGifE+RRM+OClE9znwPoDkR3W48B3lIgpuqms87ESVAXK9nYT//owDKElF+L+W7uj+nQ6KEbzPqHAzpi5tPRJWIqJNR7kXIS30GABBRbyIqy+JPPG2U763NcMSbjxFi5l8wTvY0xK30CIA4S547ISbjWUij/iEsfTWQTuxUiK+8GYDrIW/25yA36FOw93PcAHkAz0JcXDNh73TNBXn7327s3w3gv+7qYbsP3eFjyf8fALO9nHt7y3YFyI2/yJJWE8BvRr0nIK4Cs5O0JIC5hqzLIa4L5z4wq38/DjI84QzEanjWKgN87AOzpD1r1DHAktYfwCajjgMAJlr2mZ2v1j6NW4wyWhvbdYztXpbjckEe0APGdZgMI1ABroN4HNIg/QNHIW/uXs/Lsu9qH5ix3QJyX502vltY9l0LYCnknpuHzPdoU4hSPmmcw28AKhr7FsF9H1g+yBvlYePzPuxBN67+k6v/p4uyFiHz/ToZ4nL8xbiP9gG413rvAKgOeY7SAPxspJmRZUcgfRAr3d1HLv6P7pAgrTS47vPw9J+4fUYDPI8kyIvOWYi1MAWOfWCZ5IC8FK0xyjkMadQT4LltaQngnA/t4SS47gPb65SWCHGxnzOuxUNwbHuc+8Cs97FPffZw0QfG9jZ0HqQ9Og5pg2pC+hwXGNflNMSb0sjyv00zrksanPrIjDw/I/P9OdPY1xHABth1hFluVaOeM5D7cC6AKsa+jyH3p3mN7vF2/Z0/ZBSkKIqiKDGFTiWlKIqixCSqwBRFUZSYRBWYoiiKEpOoAlMURVFikogtZxFMSpYsyZUrV460GIqiKDHD2rVrjzNzqUjLkRWyhQKrXLkykpOTIy2GoihKzEBEzrNnxBzqQlQURVFiElVgiqIoSkyiCkxRFEWJSbJFH5iiKLHL5cuXkZKSgvT0dO+ZFb/Jly8fEhMTER8fH2lRgo4qMEVRIkpKSgoSEhJQuXJlEPm18ILiBWbGiRMnkJKSgipVqng/IMZQF6KiKBElPT0dJUqUUOUVAogIJUqUyLbWrSowRVEijiqv0JGdr23OVmBvvQX8/HOkpVAURVECIGcrsPffB375JdJSKIoSQU6cOIH69eujfv36KFu2LMqXL391+9KlSz6Vcf/992P79u0B1T9//nz06NEjoGNzOjk7iKNAAeC888r0iqLkJEqUKIH169cDAMaMGYNChQrh6aefdshjLqCYK5frd/4vv/wy5HIqmcnZFliBAsCFC5GWQlGUKGTXrl2oU6cOhg4dioYNG+Lw4cMYMmQIkpKScP311+Pll1++mrdFixZYv349rly5gqJFi2LkyJGoV68ebrzxRhw7diyg+ufNm4f69eujbt26GDx48FVr8JlnnkHt2rVxww03YMSIEQCAqVOnok6dOqhXrx7atm2b9ZOPEXK2BZY/v1pgihJNPPEEYFhDQaN+feDddwM6dOvWrfjyyy/x6aefAgBef/11FC9eHFeuXEHbtm1x5513onbt2g7HnD59Gq1bt8brr7+O4cOHY+LEiRg5cqRf9Z4/fx4DBw7EokWLULVqVfTt2xeff/457rrrLsyaNQtbtmwBESEtLQ0A8NJLL2HRokUoU6bM1bScgFpgqsAURXFD1apV0bhx46vbU6ZMQcOGDdGwYUNs27YNW7duzXRM/vz50blzZwBAo0aNsHfvXr/r3bZtG6pXr46qVasCAO69914sWbIExYsXR65cuTB48GBMnz4dBQsWBAA0b94c9957LyZMmACbzRbAmcYmOdsCK1AASE2NtBSKopgEaCmFClNBAMDOnTvx3nvvYfXq1ShatCj69evncnxVnjx5rv6Oi4vDlStX/K6XmV2mx8fHIzk5GfPmzcPUqVPxySefYO7cuRg/fjxWrVqFmTNnol69eti4cSOKFSvmd72xhlpg2gemKIoPnDlzBgkJCShcuDAOHz6MOXPmhKyu2rVrY+fOndizZw8AYPLkyWjdujXOnj2LM2fOoGvXrnjnnXewbt06AMCePXvQrFkzjB07FsWKFcPBgwdDJls0oRaYuhAVRfGBhg0bonbt2qhTpw6uvfZaNG/ePGhlz5kzB4mJiVe3p0+fji+++AI9e/ZERkYGmjZtisGDB+PYsWPo2bMnLl68CJvNhnHjxgEAnnzySfzzzz9gZnTs2BF16tQJmmzRDLkzVWOJpKQkDmhBy2HDgClTgBMngi+Uoig+sW3bNtSqVSvSYmRrXF1jIlrLzEkREikohNWFSEQTiegYEW12s5+I6H0i2kVEG4moYUgFUgtMURQlZgl3H9gkAJ087O8MoLrxGQLgk5BKU6AAkJ4O5KCoHUVRlOxCWBUYMy8BcNJDlu4AvmZhJYCiRHRNyAQqUEC+s+lMzYqiKNmZaItCLA/ggGU7xUjLBBENIaJkIkpODTQUPn9++VY3oqIoSswRbQrM1bz/LqNMmPlzZk5i5qRSpUoFVptpgakCUxRFiTmiTYGlAKhg2U4EcChktakCUxRFiVmiTYHNAHCvEY3YDMBpZj4cstpUgSlKjicYy6kAwMSJE3HkyBGX+/r164efde3BoBPWgcxENAVAGwAliSgFwGgA8QDAzJ8CmAWgC4BdAM4DuD+kApkKTGfjUJQciy/LqfjCxIkT0bBhQ5QtWzbYIipuCHcUYh9mvoaZ45k5kZm/YOZPDeUFI/rwEWauysx1mTmA0cl+oEEciqJ44KuvvkKTJk1Qv359PPzww7DZbLhy5Qr69++PunXrok6dOnj//ffxv//9D+vXr0evXr18ttxsNhuGDx+OOnXqoG7duvjhhx8AAAcPHkSLFi1Qv3591KlTBytWrHBZp6JTScm3KjBFiQqiaTWVzZs3Y/r06VixYgVy586NIUOGYOrUqahatSqOHz+OTZs2AQDS0tJQtGhRfPDBB/jwww9Rv359n8r//vvvsXXrVmzYsAGpqalo3LgxWrVqhcmTJ+O2227DiBEjkJGRgQsXLmDt2rWZ6lRUgcm3KjBFUZyYP38+1qxZg6QkmW3pwoULqFChAm655RZs374djz/+OLp06YKOHTsGVP6yZctwzz33IC4uDmXLlkWLFi2QnJyMxo0b48EHH0R6ejp69OiBevXqoVq1akGpM7uhCgxQBaYoUUI0rabCzBg4cCDGjh2bad/GjRsxe/ZsvP/++/jxxx/x+eefB1S+K26++WYsWrQIv/32G/r27YvnnnsOffv2DUqd2Y1oi0IMLxrEoSiKG9q3b49p06bh+PHjACRacf/+/UhNTQUz46677sJLL72Ev/76CwCQkJCAs2fP+lx+q1atMHXqVGRkZODo0aNYvnw5kpKSsG/fPpQtWxZDhgzBgAEDsG7dOrd15nRytgWmQRyKorihbt26GD16NNq3bw+bzYb4+Hh8+umniIuLwwMPPABmBhHhjTfeAADcf//9GDRoEPLnz4/Vq1c7LGwJAIMGDcKwYcMAAFWqVMHixYuxcuVK1KtXD0SEcePGoXTp0pg4cSLGjRuH+Ph4FCpUCJMnT8aBAwdc1pnTydnLqdhsQFwcMHo0MGZM0OVSFMU7upxK6NHlVLIjuXIB+fKpBaYoihKD5GwFBkg/mPaBKYqixByqwHRRS0WJONmhKyNayc7XVhVY/vyqwBQlguTLlw8nTpzI1g1tpGBmnDhxAvny5Yu0KCEhZ0chAmqBKUqESUxMREpKCgJe10/xSL58+ZCYmBhpMUKCKjBVYIoSUeLj41GlSpVIi6HEIOpC1CAORVGUmEQVmFpgiqIoMYkqMA3iUBRFiUlUgakFpiiKEpOEVYERUSci2k5Eu4hopIv9lYhoARFtJKJFRBT60BlVYIqiKDFJ2BQYEcUB+AhAZwC1AfQhotpO2d4C8DUz3wDgZQCvhVwwDeJQFEWJScJpgTUBsIuZ9zDzJQBTAXR3ylMbwALj90IX+4OPaYHpIEpFUZSYIpwKrDyAA5btFCPNygYAdxi/bweQQEQlQiqVuaRKenpIq1EURVGCSzgVGLlIczZ7ngbQmojWAWgN4CCAKy4LIxpCRMlElJylEfy6KrOiKEpMEk4FlgKggmU7EcAhawZmPsTMPZm5AYDnjbTTrgpj5s+ZOYmZk0qVKhW4VLoqs6IoSkwSTgW2BkB1IqpCRHkA9AYww5qBiEoSkSnTcwAmhlwqtcAURVFikrApMGa+AmAYgDkAtgGYxsxbiOhlIupmZGsDYDsR7QBQBsCrIRfM7ANTBaYoihJThHUyX2aeBWCWU9ooy+8fAPwQTpnUAlMURYlNdCYOVWCKoigxiSowDeJQFEWJSVSBqQWmKIoSk6gC0yAORVGUmEQVmFpgiqIoMYkqMFVgiqIoMYkqMNOFqEEciqIoMYUqsLg4IG9etcAURVFiDFVggFhhqsAURVFiClVggK7KrCiKEoOoAgNUgSmKosQgqsAAUWAaxKEoihJTqAID1AJTFEWJQVSBARrEoSiKEoOoAgPUAlMURYlBVIEB2gemKIoSg6gCA9QCUxRFiUHCrsCIqBMRbSeiXUQ00sX+ikS0kIjWEdFGIuoScqFUgSmKosQcYVVgRBQH4CMAnQHUBtCHiGo7ZXsBwDRmbgCgN4CPQy6YBnEoiqLEHOG2wJoA2MXMe5j5EoCpALo75WEAhY3fRQAcCrlUpgXGHPKqFEVRlOAQbgVWHsABy3aKkWZlDIB+RJQCYBaAR10VRERDiCiZiJJTU1OzJlWBAqK8Ll3KWjmKoihK2Ai3AiMXac5mTx8Ak5g5EUAXAN8QUSY5mflzZk5i5qRSpUplTSpdE0xRFCXmCLcCSwFQwbKdiMwuwgcATAMAZv4TQD4AJUMqlSowRVGUmCPcCmwNgOpEVIWI8kCCNGY45dkPoB0AEFEtiALLoo/QC+ailqrAFEVRYoawKjBmvgJgGIA5ALZBog23ENHLRNTNyPYUgMFEtAHAFAADmEMcXaEWmKIoSsyRO9wVMvMsSHCGNW2U5fdWAM3DKpSpwHQ2DkVRlJhBZ+IA1AJTFEWJQVSBAdoHpiiKEoOoAgPUAlMURYlBVIEB2gemKIoSg6gCA9QCUxRFiUFUgQGqwBRFUWIQVWCABnEoiqLEIKrAACB3biA+XhWYoihKDKEKzKRAAQ3iUBRFiSFUgZnoqsyKoigxhSowE1VgiqIoMYUqMJMCBYBz5yIthaIoiuIjqsBMypcHUlIiLYWiKIriI6rATCpXBvbujbQUiqIoio+oAjOpVAk4cULdiIqiKDGCKjCTypXle9++iIqhKIqi+EZYFRgRdSKi7US0i4hGutj/DhGtNz47iCgtbMKZCkzdiIqiKDFB2FZkJqI4AB8B6AAgBcAaIpphrMAMAGDmJy35HwXQIFzyqQJTFCWm2L4dSE0FbrwRiIuLtDQRIZwWWBMAu5h5DzNfAjAVQHcP+fsAmBIWyQCgTBkgXz5VYIqixAYTJgDt2wPMkZYkYoRTgZUHcMCynWKkZYKIKgGoAuAPd4UR0RAiSiai5NTU1KxLRwRUrKh9YIqixAYbNwK1a8tcrjmUcCowcpHm7tWhN4AfmDnDXWHM/DkzJzFzUqlSpYIioIbSK4oSM2zaBNxwQ6SliCjhVGApACpYthMBHHKTtzfC6T40UQWmKEoscPw4cPgwULdupCWJKOFUYGsAVCeiKkSUB6KkZjhnIqKaAIoB+DOMsgmVK0un6L//hr1qRVEUn9m0Sb7VAgsPzHwFwDAAcwBsAzCNmbcQ0ctE1M2StQ+AqcwR6JnUsWCKosQCGzfKdw63wMLa+8fMswDMckob5bQ9JpwyOVCpknzv2yedo4qiKNHIxo1AqVISPZ2D0Zk4rOhYMEVRYgEzgINcxcblHFSBWSlbFsiTRxWYoijRS0YGsHlzjncfAqrAHMmVS9yIqsAURYlW9uwBLlzI8QEcgCqwzGgovaIo0YwGcFxFFZgzlSppFKKiKNHLxo3iLdJAM1VgmahcGTh6VEx0RVGUaGPTJqBaNaBAgUhLEnFytALr3Rt4+22nRB0LpihKNLNxo/Z/GeRoBbZhA7BypVOihtIrihKtnDsnQRza/wUghyuwChWAAwecElWBKYoSrWzZIsunqAUGIIcrsMREICXFKfGaa4D4eHUhKooSfZhzIKoFBiCHK7AKFWRC5ytXLIm5csm6YGqBKYoSTRw9Cnz5JVCoEFClSqSliQpytAJLTARsNuCQ86Iu114ry3UrihIeNm0CevQAzp+PtCTRyW+/idX111/ABx/Ii7aSsxVYBWN1skxuxEaN5IFKTw+7TIqSI5k7F/jlF2DZskhLEn289x7Qtat0byQnAwMGRFqiqCFHK7DERPnOFMjRuLH4FTdsCLtMipIjOXhQvpcsiawc0chnnwE33QSsWgVcf32kpYkqcrQCc2uBNW4s32vWhFUeRcmxmAps8eLIyhFtHDsGbNsGdOsG5MsXaWmijhytwAoXlv7QTBZYYqLMTL96dUTkUpQch6nAVq/WWXCsLF0q361aRVaOKCWsCoyIOhHRdiLaRUQj3eS5m4i2EtEWIvoutPKIFZbJAiMSK0wtMEUJDwcPAiVKAJcu6YujlSVLgPz5pV9eyUTYFBgRxQH4CEBnALUB9CGi2k55qgN4DkBzZr4ewBOhlisx0YUFBogC274dOH061CIoSs6GWUKB77hDXh61H8zOkiXS/5UnT6QliUrCaYE1AbCLmfcw8yUAUwF0d8ozGMBHzHwKAJj5WKiFcjkbBwA0aSIP1tq1oRZBUXI2J06I5XX99TLDhCowIS1NAsnUfeiWcCqw8gCsqiLFSLNSA0ANIlpORCuJqJO7wohoCBElE1FyampqwEJVqAAcOQJcvuy0IylJvtWNqCihxez/KldOGusVK1w8kDmQZcvkJVoVmFvCqcDIRRo7becGUB1AGwB9AEwgoqKuCmPmz5k5iZmTSpUqFbBQiYl2D4YDJUrIgGYivRR/AAAgAElEQVRVYIoSWkwFVr68NNbnz8uA3ZzOkiXiOmzaNNKSRC3hVGApACpYthMBOKuNFAC/MPNlZv4HwHaIQgsZbkPpAXEjaoeyooQWqwJr2VJ+R6Mb8eLF8Na3ZIm0Qfnzh7feGCKcCmwNgOpEVIWI8gDoDWCGU56fAbQFACIqCXEp7gmlUG4HMwMSyHHggMxBpihKaDAV2DXXAGXKADVrRpcCYwaefFLkO3w4PHWeOyezbqj70CNhU2DMfAXAMABzAGwDMI2ZtxDRy0TUzcg2B8AJItoKYCGAZ5j5RCjl8miB6YBmRQk9hw4BpUvLKhAA0Lq1jH/KyIisXCZjxwLvvgucOgVMmRKeOv/8U86/devw1BejhHUcGDPPYuYazFyVmV810kYx8wzjNzPzcGauzcx1mXlqqGUqXBhISHBjgTVsKJNmqhtRUULHwYPiPjRp2VKGr2zeHJ76L11yP4nwxx8Do0cD990ngV3ffBMemZYsAeLigBtvDE99MUqOnonDxG0ofcGCEtqrFpgSKdLTpYHNzjgrMNPzEa5AjmHDZKyVM3PmyL5u3YAJE4D+/YH16wNTrBs2+D45+KFDwNdfi8JMSPC/rhyEKjC4WdjS5KabgOXLs38jokQnt90GDB4caSlCi7MCq15d5nhbty70dWdkAD/9JArG7IszmTRJ+uSmTgVy5wZ69xaraPJk12WdOwcMGSLjcqycPCnK6M03vctz4gTQoYMc88EHAZ1STkIVGDxYYADQuTNw9qwu86CEn4wMue+yswv74kXg+HFHBZYrF1CvXngssORkURqA40TCzMDChUC7dvYowNKlgVtuAb79VhYSdOb334Hx44Hp0x3T16+X1S1+/dWzLGfOAJ06Abt3AzNm2C1RxS2qwCAW2NGjboysdu1kLMasWWGXS8nh7N4tbqfdu52WDc9GmFF95co5pjdsKA2/K0URTGbPlumrChZ0VGB//y2NQtu2jvn79xd3jatZ883IyfXrHdPNZZmSkz1HNPfrJ1bn999nrldxiSowiAXmcjAzIK6M1q1lRVRFCSebNsn35cvA/v2RlSVUWMeAWWnQAPj3X2DnztDWP3u2jLVq3dpRKS1cKN9t2jjm79ZN+qVcBXOYCszZ9blhg7ggAbHSXLFmjVhoY8eK21jxCVVgsI8Fc9sPduut8ka2J6RD0hTFkY0b7b937Ahv3StWiDvrl19CW487BdawoXyHsh8sNVUUR+fOosC2b7f3Xy1cKG+2117reEyBAjLp8A8/OEYupqXJ/5U3r7x4WC3mDRuAm2+WcWSzZ7uW5Y03gCJFgEceCe45ZnNUgcE+FsxtP1iXLvKtbkQlnGzaJFOaAaG3RExOnAAGDQKaNwfmzQN69fLc/5uaKn3EgeJOgdWuLa77UPaDzZsnrhdTgQFiRdlswKJF4sYjFzPg9esn5zxzpj1t+XIpq29fcfv+/bekX74MbN0K1K8vLwRz5mR2B2/fLoEkDz8s43oUn1EFBh8UWPXqQLVqqsCU8LJpk7iwEhLCo8AOHRLFMWkS8PTT4nGoVEncZmaDbMWcaLZzZ/lt5Z13gKFDvYeOHzwoVkuxYo7p8fFA3bqhVWCzZ8sLQqNGYvGZ/WBbtkhgibt+qDZtxJr6zrJc4dKlIvNDD8m22Q/299/SuV6vnrwIp6UBK1c6lvfWW6KsH3886KeY3VEFBmkfChf24EIExI24cKH7AY+KEkz+/VeCN264QV6gnF2IaWmZw76zyujRMtvE6tXA//2fKK/ff5eGuVOnzOHhmzZJA718uUTmmaxdKwrws8+k0T5zxn2dhw6J9eXK0mnQQFyIzsoxEM6ckQCMhx4SpWqziTV0yy0SGh8fL1bn4sX2/i93CiwuTkLqZ82ScHdALLekJLG08uWzuz7NAI569SQ8Pi7O0Y14+LCM+RowQEL2Fb9QBWZQoQKwb5+HDF26yI1v3tyKEkq2bJGGu25doEaNzBbYQw8BLVr43rivXi1h2e7e0rZuBSZOlD4Ys/8JAKpUkYb66FHg+ecdj/npJ1E8deoAzz4rbrUrV2TcWunSMovF0qWiCI65WdrPeQyYlYYNRUFkNYBlxw6Z0X3KFODTT8Vd+Ouv4v7s3Nmer3Vrue7ffy/nXamS+zL79hX34I8/ykttcrLMIJI7t/xnVgWWN6/M71ikiPxnVk/OO+/INXv66aydY06FmWP+06hRI84q3bsz167tIUN6OnOBAswPPZTluhTFKxMmMAPMO3cyv/gic65czBcvyj6bjbl0adm/bp1v5d1zj+Tv3dv1/m7dmAsXZk5Ndb1/0CC5/0+ftqfVrcvcqhXzypVS9ogRzP/3f/L7++8lz8yZzPnySV5TfitVq7qX6c8/pazp0zPvs9kkfc+ezOkLFjBPmsT85ZfM774r51WyJPPChcw//cRcsCAzkXyOHbMfu2yZ1AcwDxzoWiZrPTVqMLdpw/zHH3LMzJmyb8gQ5qJFJU+HDswNG9qPe/11ybtgAXPPnp7/kxADIJmjoP3OyifiAgTjEwwF9swzzHnyMF+54iFTt27MFSowZ2RkuT5F8chjj4nCyMhg/uYbeVS3bZN9O3bYG9qxY72Xdfo0c/78zCVKyDGLFjnuX7JE0v/7X/dlrFoleT75RLZ37pTtd9+V7fvuY46Pl3q6dZPG22TGDMn76quOZdpsotyGD3dd57//iuJ+8cXM+0ylkTs389ChzPv3M//4I3ODBvZrY34aNGDeu9d+7MaNzFWqMN98s2OZFy+K/IBcc2+89JIowcGD5fvUKUn/5BMpY+9eedG4/37Huk25ChViHjOG+dw573WFAFVgUfIJhgIbP16uhvMLnQNTpkimefOyXJ8SY1gb5HDQti1zkyby27RwZsyQ7YkTZbtsWXseT3zxheT/4w/mSpXEGrp8WfZducLcrBlz+fKiMNxhszHXq2e3Jt58095IMzMfOsSckCCN8v79mY+/4w5RVrt329NOnJAy3n7bfb21azN37Zo5/ZZbmMuUYX74YVGcplKoXl0sr9275WHeu9f1W+nly8znz2dOb9dOyklJcS+TianEieTamJj/16efOip5ZrmOffowP/4489Gj3usIIarAouQTDAW2eLFcjd9/95DpwgXm4sWZe/XKcn1KDGCzyQ3RuDHztde6doGFqt4SJZgfeEC2zYb+rbdke+BAuQ9feknSjxzxXF6rVuLustnEhWY2qj/8wHz99bL9xRfe5frwQ8mbnCxKz/m5W7pU3HCuOHBAlFunTvaXgU2bpLypU93X2a8fc7lyjmnr1rGDxbh3L/N//sP87bdeXCg+8P339uvuC02aiCyPPmpPMy3HZs1k38KFWZMpRKgCi5JPMBTY4cNyNd5/30vGxx8XX6O7voLszrx50qAGwn//y3zNNcwtWjAPGOC54Yo0f/8tcgJ219sPP4Sn7kOHpL733rOnlSjB/OCD8rtGDebbbrM35BMnui9rzx7J88orsm32y5gWy3XXyf/gi4V56pS42Lp2dSzTV955hx36x37/XbaXLnV/zLhxmZX0PfeIMjRddpHk3XdFvmnTHNNr1bJf45MnIyObF1SBRcknGArMZhMPyLBhXjKaPux33slynTGH2Rj60u/iihYtxO3VqhVzsWLSGGb1jTlU9Owpnf8ffSSupvLlmbt0CU/dc+bwVZefSbNm4lY8elT2vfGG3LSJiSKrO8x+mn377Gk7d8q5fP21/9f/vvvsDfPWrf4de/kyc/36zHnzilXbvDl79dubgRzduklf3j//MMfFMT/1lH91h4qzZ+XF7MIFx3QzaKZChcjI5QPZQYGFNYyeiDoR0XYi2kVEI13sH0BEqUS03vgMCp9sEq3sdcaeunVl7rQvvgjO+JRYwpxWKNAptXbvlrDlxYtlaYkLF7yMXYggq1fL2L+HH5bZyO+7T8ZEBXvslSvMORDr1rWnmaH0K1bIdosWctN27QrMnSuzujvDLGOM2rYFKla0p1erJnN79u8v45L8YcgQ+b7uOqBWLf+OzZ1bQu8fekgGX/79twwIdp7I10qzZsB774m8TZoATz0ls9U/+aR/dYeKQoWA556TsV9WGjSQ73r1QlZ1Tmt+XBE2BUZEcQA+AtAZQG0AfYiotous/2Pm+sZnQrjkA3xUYIBMtbN5s32Zi7VrZYxJdr+jzGUiAlE6//4rgzarVpVts/Hbti04sgWTI0dkvFRSkj3t/vtl8OvXX4e+/o0bpWEvWdKeVr26yDR3rowratRI0rt2lXWoXM2Ovny5vDTce2/wZLvxRuD22wOfNaJKFRn7tGCBjMNKSZHz8cRjj0n+kydFAfbr537sWLRQv758h1CBTZ4sw1NPnQpZFVFPOC2wJgB2MfMeZr4EYCqA7mGs3ys1akjb7HXh1N69ZdqZ556TN+GkJOCee4A//giLnBEhNdU+J14gCsy02mJBga1dK9/W9ZiqVZNpkyZOdP+i4soKsnL5slgQkyd7Xh5l40ZH6wuQmxOQF6XGje2N/s03i4VonZfP5KOPxNK54w7PcvkDkSiRoUODU1YuH5ug1q1lsPCwYcBLL2W97lDTtKn8TyGaWT49HXjhBRlfXqRISKqICcKpwMoDsM42mGKkOXMHEW0koh+IqEJ4RBNq1JC2afduLxkTEkSJLVwoU+G8/ba8EY4Zk32tsJkzxQJp314mjfR3nSbzopoKrHhxma0hGhXYmjXSuJpuIJMHHgB27XI9ue3bbwNFi8oEse6YORMYN05cd9ddJ25oZ0V24IDMo9eypWN69erynZYmL00m+fPLfzJ9uqMC3btXZpR48EFxc2UHKlaUVYorhLVZCIyEBPHQNG0akuI//lgmKHnjDd/fAbIj4Tx1F5Odwbm1/xVAZWa+AcB8AF+5LYxoCBElE1FyampqUAQ0X3J9ciO+9ZYosJ07geHDgf/8Rxq2BQuCIkvU8fPP0nD06CGTkzrPi+cNU4FVq2ZPq1UrOhVYcrLI5tzw33GHNEwTJzqmT54sUwFlZIhycrdo4cSJ4hr88UdRdoMGAa+84phn2jT57t3bMd163awKDJDpn1JSRDmavPuuKOHHHvN8rkrMkZYGvPoq0LGjvLvkaMIVLQLgRgBzLNvPAXjOQ/44AKd9KTsYUYjMzGlpEjj0+usBHJyeLpFqzZuHf9BrqDl3TgahPvqoTJcDMK9Y4V8ZDz0kkYdWhg61T7kTLdhsMkD2vvtc7x88WAbOPvKIzIgxZ47MBtG2rYyPypdPxjo5z9Zy6JBEz40caa+na1eZ4sgawdaoEXNSkuu6r7mG3YZl9+ghM3fs2yf7CxZk7t/f79NXop+RI9mvWcTcAY1C9Is1AKoTURUiygOgN4AZ1gxEdI1lsxuAsL6eFykiE0IHtHZg3rxihS1fnv2ssDlzxOl+++32CU79nWB19267+9CkVi15nfS0zHqo+eknx0UbU1JEHmsAh5VXXpH+zvHjZYLW226TJUimT5fAinHjJFrxnXccj/vmG7HQ7r9ftk3r6PhxcfUBYs2vXQv06eO67tq1ZXZ656VHAKnPZpM+ts8+k6CZp57y71ooUc/Bg2Jc9+1rjxPJ0YRTWwLoAmAHgN0AnjfSXgbQzfj9GoAtADYAWAjgOl/KDZYFxszcsqUMVwoI0wpr1kxGRmcX+vcX6+nyZeYzZ/jqOCR/qFo18wwm8+bx1YlNI8GFC2IBFi9un1bInKnizz89H3v4MPOoUWJtHTxoT7fZmG+/Xaw0c85Bm425Zs3MN5aZbk4HZY7ZcjeN0Z49YvW54+WX+eocex06eJZfiUkefVTmUfjnn6yXhWxggUVcgGB8gqnAHnhA5t8MGHNSRUBG4w8f7nrOtVBz4YLj4NVAuXRJlNe999rTihWTOeh85fJlcbP95z+O6Skpcp0+/DDrcgaCObclILO/M4uMuXNn7T87eVL++4QE5jVrxN3qbsaMDz6QfatWyawYrVoFXu+FC/KiADDPnRt4OUpUcvGiTMjSp09wyssOCiwHx6+4pkYNWbro9OkACxg0SIIA3nhDoqbGjQM+/DBzvn/+CV3E4sWLQLt2EumW1YHC06fLQJO777anVazoX7n790u0nbMLsVw5CYqIVCDHl1/Kudxwg0S3Mct/V6eORPcFSrFiEo1YooQsmDh6tAy7uOuuzHnvvVeCRYYNk4G97tyHvpAvn7gqR4zQ3v1syJw5wIkTMgxOMYi0Bg3GJ5gW2PTp8gK7Zk2QCuzYUTrqz561p5lTBX3+eZAqsWCziRkJiK/h7ruzVl7z5vJWbw1K6NaNuU4d38uYO5ddLuPBLO4z52UtwsG+feKuGzXKbjUvWiTuxEGDglPHrl32wAvrkhrOPPIIX10aJKfOsal45e67mUuVEqdIMIBaYNkPv0LpfWHMGOmo//hj2T53zj4dz3vvBd8K+/BDGV/0wgsSVDJtmix3Hghr10pQyrBhjoNNKlUSC8xX2Z3HgFlxDqX/80+xVDwNjTh3TqwVbyPOMzLc7/vqK5F/wAAJyiheXELhT550H8DhL1WriiXWrp3ngIphw+S7QwfH2TcUxeD0aWDGDBldER8faWmiiEhr0GB8gmmBpafLi/no0UErUtYuMq2wJ56Qt+3BgznThK1ZZf58CdXu3l0spn//lclE69cPbNLce++VcOy0NMf0t97iTOHcqanu+42eekomcHW1EKi5Qm1amliP5vIUDRs6rv67cqVcx/Ll+Wq/Ve7ccm5Dhsjs8VYWLJAOA1d/ZEZG5gUNn33WXm5yssfLEhImTGDesCH89SoxgbkE3MqVwSsT2cACi7gAwfgEU4ExS9sWrI5SZrbPqN2rl2jHhx6Sxr5ECc8zifvDhg0ye3qdOhIpaDJ1KgfkrjxyRFyQjzySed/330uZ69fLts0mF83dufToIQsTuuKXX/hq1N/8+fK7b19RTm3aiBJ+7TXZLl9elOqrrzJ/9ZUEXHTsKFF3+fLJ8iMZGaIMcucW+XPnlnWnrCxcKPVMnmxP27tX1nDKkyd8634pio/cfDNztWrBHTKpCixKPsFWYF26+NfF4xOdOsnlTky0WxYjRkijmdVowX37ZNG/8uUzr4Zrs8nYgFKlMltSnjBDsp0tG2bm1atl3y+/yPbmzezReqlbV9avcsWOHXw1Qq9tW+kzSk8X5QKI3IB0ALhb/+nQIeZbb+Wr61sBoth27ZI+rZYt7U++zcZ8112i7J1XIO7fX/4nRYkiDhyQ994xY4JbriqwKPkEW4GNHi16xRp3kWXWrJEZHmbNsqf9849U5BxebuLK5ebMiRMSsl2kSGZLw2TtWnkCnn3WfTn//CNju156ST5lyrhvzI8cYYfVP81FBxMSMi//brOJG/KJJ1yXdfmyuBdbtpQyrMvLf/SRKKDx472/etpskq9IEQnxv3xZ0idMkHK//FKs3n79ZHvEiMxlZGRE16wgisLMb74pt+zOncEtVxVYlHyCrcB++02uzOLFQS3WtULq3j3zdELMsnBh1arMrVszb9vmuG/7dlEaXbuK0siTx3WEn5UBAyTf7t2O6Rs2iMsuLo6vWlFm/5K7/jmbTVx25qKCnTqJ5fPqq3x1TJOJudT1Bx+4l61uXclTooRMW+Vclz84X+OMDOabbpJrnJTEVxfkVEWlxAi33hoCjxBnDwWmUYguMFfRMJf7Chqupo0eNkyiFF991R7Vd/myjLs6eFCW1qhXT5aQ+OILoHlzmcJo+HBg+3aJoFu4UJab8MQrr8iCgiONdURtNolUrFdPJup9/HEZr5WRIZ+LF2UhRFcQ2ceCXbwoa1F16AA8+qiMfRozxp7XUwSiibm0yhNPyHgp57r8wfka58oFfPKJjGXbvl2mjXrhBf/LVZQIcfAgULlypKWIUiKtQYPxCbYFxiwxCXfeGfRiM2Oz2d1ajzwi0YKPPSbb33wj7ro+feyW0XXXiU8hkH6zMWOkjNmzJbACkDFjJ074X1aHDrIs/IIFUs6vv0q6GVVoTvb71VeyvX27+7LefVemP3E1SW2wWLw4+D4YRQkDpUoxP/hg8MtFNrDAIi5AMD6hUGC9ejFXrBj0Yl2TkSHuOMDu5nLuM1q5Uj5ZcX2dO2cPQ8+VS6L2Ai1v0CBROiNGyLx/Zofh2bPyxJUpI31kI0dKXRcv8oEDzFu2uCjLZpPADUVRHEhPl8f15ZeDX3Z2UGDqQnRD06biUfN32auAyJVL1hd7+22ZyqhNG+D//i+zQE2bZs31VbCgDJ6uXBmYPVtmQw+0vEqVZM6tX3+VZebNtbMKFZKya9aU8l9/XdYRy5MHI0a4WRyYyPuy8oqSAzHbn3LlIitHtKIKzA1Nmsj3mjVhrHT4cOnzmjlT+qtCwR13yDyMHTtmrZyKFeV769bMZTVqBCxaJJO3NW8OdO8OQFYq8WcCD0XJ6Rw8KN/lXa1dryBErWTs06ABEBcngRy33RbGiuvWDWNlWcBcFwyQAA5niESxWZTb8ePAhQsyLU7RomGQUVFiHFOBqQXmGrXA3FCggOiSoEciZhdMBVasmFhcPmBOb2g+lIqieObQIflWC8w1qsA80KSJKDCbLdKSRCHly4uJ2r69fHvBZpOlIAD7Q6koimcOHpTu4eLFIy1JdKIKzANNmsiK97t2RVqSKCQ+Hhg/Hhg1yqfsJ0/aXwRUgSmKbxw8KO5DHbbomrArMCLqRETbiWgXEY30kO9OImIiCtLaFv7TtKl8qxvRDfffL4s/+sDx4/bfqsCiE2YZ056WFmlJFJNDh9R96ImwKjAiigPwEYDOAGoD6ENEtV3kSwDwGIBV4ZTPmVq1JPJcFVjWsS7vpQosOpk/H7j9duDOOz0vpaaED9MCU1wTbgusCYBdzLyHmS8BmAqgu4t8YwG8CcDLioWhJS5O1jZcskRDv7OKqcBy5VIFFq1MmCCe4QULgBdfjLQ0CrMoMLXA3BNuBVYewAHLdoqRdhUiagCgAjPP9FQQEQ0homQiSk71tHpvFundG9iwQRbWVQLH/Itq1lQFFo0cPy7uw4cfBgYPBl57TaaNVCLHmTPA+fOqwDwRbgXmqivyqm1DRLkAvAPAw/rrxkHMnzNzEjMnlSpVKogiOjJwoIzZHTVKrbCsYPaB3XCDKrBoZPJk4NIl4IEHgPffl5ER995rn4tZCT86Bsw74VZgKQAqWLYTAVibswQAdQAsIqK9AJoBmBHJQI48ecSdsmqVzJCkBEZqKpCQAFSpAhw+rEMToglmWeigSRMZ+5gvH/Djj8DZs8C330ZaupyLzsLhnXArsDUAqhNRFSLKA6A3gBnmTmY+zcwlmbkyM1cGsBJAN2ZODrOcDtx3nzS8phWWkSEPvFplvpOaCpQqJW+Tly87RiUqoefSJfdu8NWrgc2bxfoyqVRJPA87doRHPiUzpqdCLTD3hFWBMfMVAMMAzAGwDcA0Zt5CRC8TUbdwyuIP8fGirNaulWW1mjYFBg0Cxo4Ftm2LtHSxganAzLdJdSOGl4kTZVavrVsz7/viC5l5pndvx/Tq1YGdO8Mjn5IZtcC8E/ZxYMw8i5lrMHNVZn7VSBvFzDNc5G0TaevLpF8/eaBHjRIX2HvvSfr06ZGVK1Y4ftxugQGqwAJh2jTgq68CO3bBAvl2VmDnzgFTpgC9egGFCzvuq1FDLDD1MkSGgwdlprb8+SMtSfSiM3H4SO7cwNdfA2+8IQv7PvaY9BmoAvON1FSgZElVYFnhtdeA0aP9P45ZFgcAMrsEV60SJeZsfQHywpaWZp8CTAkvhw6p+9AbqsD8oFkz4Nln7Utf3X67uBX374+sXNEOs92FWLaspGVFge3aJbPZB3upm6NHZbmXaMRmkxenffv8nyljyxZ7n6OzAjMtsnr1Mh9Xo4brY5TwoGPAvKMKLAvcfrt863gZz5w7B1y8KAosTx75zooCmzNHlmSZNi14MgLAPfcAXboEt8xgsW+fLEUDyJJx/mBaX9de61qBFS8OlC6d+bjq1eVbFVhk0Fk4vKMKLAvUrCnTTakb0TPm2785XK9cuawpsGXL5DuYwxqOHZOGfuvWMK3C7Sd//23/vX69f8cuXChRhR06uFZgtWu7niy2cmVxnWsgR/jJyJD7UC0wz6gCyyK33y5TTWXnfoKDB7M2bsuchaNkSfnOqgJbvlwa1i1bgAMHvOf3hRkz7Oe4dGlwygwmZrRrwYIyM4yv2GyimNu2FZfgiROO96qpwFwRH+/aalNCz9Gj8t+pAvOMKrAscvvt8rb066+RliQ0HD4sjdjUqYGXYSqwYFhg+/eL0jLHLP3+e+ByWZk+XayUggWBxYuDU2Yw2bZNXgBuvNE/C2zzZlnKpm1b8RgAdoWUmirWca1a7o/XUPrIoLNw+IYqsCzSqBFQoUL2dSOuXi2DYP11W1lxVmDly8sb5pUr/pe1fLl8Dx4s1z0YCuzMGZmJvWdPoHlzsaijjW3bRNHUry+Wp6/XbuFC+W7TJnNQhhnA4c4CA+SYnTt9s8A3bRKFGQirVkmE7+HDgR0fTq5ccT2eLpjoSsy+oQosixCJFTZzpjQEt94KvPCCBC1kB9atk++szInnqg/MZhMl5i/Ll4uVVK8e0KmTKJ7LlwOXDQBmzRIl3bMn0KqVNMQnT2atzGDCbFdg9erJvbV9u2/HLlwoFnTFivY+LX8UWPXqMqGsN4v5wgXgllskEMYfdu4E7rpLInxHjpT6Xn3VHrASaZzHwNlsQP/+wPXX+7fM0o4dco85l92unYwxTXdad0MtMN9QBRYEnnkGeOIJ4Lrr5EF/9VVRYsFk/frIhHgHQ4Glpkr0oTn8ICtjwZYvFzda7txA585iPf35Z+CyAWI9lykj5bZqJWnR1A92/LgoVNMCA3yziG02sSbbtJFt5z6trVtlfkpPb/mm1ebNjTh+vFhPmzbZG19vzJ4tynPWLBnftn69zBbywgtyrnv2+FZOqLZUyqMAABRqSURBVJgwQaIzv/tOtpmB4cPt7nQz3RtHjsi6r6+/7pi+aRPwxx8y32THjo4vTQcPynJOZcpk/TyyM6rAgkBiIvD228BPP0mD/9BDwFtviXUQDJilsR46NDjl+YNVgQU6I4M5BsyMdAtUgZ05IyHkzZvLdrt2osiyEo2Yni4NaPfu0mA0aQLkzRtdbkQzgOO666QfK08e3wI5NmwATp2S/i8Tc3YNwHMEookvofTp6eL+u/Za2Z4zx7tsADBpkvTr7doFjBkj1uVPP4nVePq0LKzpbJmEky++kICXvn2BPn1ExvfeAx5/HOjRQ4Zx+LLw56xZ4iX45hvHZ+jHH+Xaf/ihuFBbtJAXpwMHpK+3bFm5JxUPMHPMfxo1asTRxL//MteqxXzNNcypqVkvb+tWZoA5Xz7m8+ezXp6vpKZKvRUryvfRo4GV07Urc/369u1Dh6S8jz/2r5w5c+S4uXPtaa1bO5btL7/+KmX+/rtjmUlJgZcZbD79VGTcu1e2GzZk7tDB+3FvvCHHHThgTxs+nDl/fuaMDOayZZnvv99zGRkZct899ZT7PB98IPX88Qdz+fLMd93lXbaMDOYSJZjvvdf1fvN/GTzYe1mh4PBhZiLm0aOZX3mFOXdukadPH5H9f/+T7YULvZfVs6fkBZhXr7anX389c6tW8nvRIuYiRez5AOYmTUJxZnYAJHMUtN9Z+agFFgIKFBD3wokTMulvVpcOMTvi09PtY6DCgWl93XmnfAfqRjx+3B5CD4hbJpCVmZctk+OaNbOndeokrqdAO/9/+gkoUsTRSmnVCvjrL7H4ooFt2+SeqmAsRFSvnpyzN4v4hx9kRfHERHtajRrSv7Rxo7i2PPV/AXK9q1Vzb4Glp4trrGVLcVV26iSz3nsLMlm/Xp6PDh1c7+/aFXjuOXFNBjr/Y1b49Ve5vj17As8/D6xcKRN5T5ok16RrV+mLnTLFfsyhQ7Jyxd699rRLl4C5c4G77xbL2cy/fbsE49xxh2y3bi3/88yZwOefi7Xn7HJUXBBpDRqMT7RZYCbjxsmbVOPGzCtWSJrNxrxyJfN//+u7dXbXXWLN5cnD/PTToZPXGfMNfvly+f7668DKqVpV3lytlCvHPHCgf+XcfDNzgwaOaevXi2wTJ/ovV0YGc+nSmWWbP1/KnD3b/zJDQceOYnWZvPuuyHfokPtj9u6VPK+/7pi+cKGkjxkj37/95r3+nj2Zr7tOfmdkMA8dynzHHVLGo49KOfPny/7vv7ffM554/XXJd/iw+zyXLzO3bSsW45YtjvsyMpgnTGAeNYr5hReYX3xRzs1m834+vtClC3OVKp7Lu+ce5uLFmS9dEnk6dpRzGjTInmfePEmbMYO5e3d5jq9ckeff2ToON8gGFljEBQjGJ1oVmM3G/M030lgDzJ07M1eqxFddBL64R2w25lKlxNVy883MdeuGXOyr9O4t7sP0dLs7JRAKF2Z+7DHHtKQk5k6dfC/j0iXmggWlwbRis0mj4IvbyplVq+R/+PZbx/Rz58Rl9Nxz/pcZCipWlMbSZNEi7wr2rbckz65djukHD0p6/fryvWeP9/pHjmSOjxeFYja8FSvKPQEwt2hhb+hPnWKOixOF4ol27Zjr1PFe9+HDzCVLyv1y+bI93Xw5BJhz5bLL0rAh8+TJjnn95exZ5rx5mZ94wnO+GTOkzlmzmN9/X35XrSrHHjsmeZ54QrbPnbO7Hf/4g7lRI+amTQOXMRioAouST7QqMJOzZ5mff14UUefOzJMmMT/wgDzoO3d6PnbLFr5qYZhvrZ7evINJzZrMPXrI7woVmPv187+M9HSReexYx/Tu3f1TxqtXSzlTp2bed//9zEWL+t9ovfiiNH4nTmTe16yZNDIZGe6PT06W6xPKfsmzZzNfv5Mn+ap1deiQWLK33SZK3iq/s7XKLIqmYEE53uwL88YXX/BVCzwujrlXLynn3Dm5Bs7Xr3lzz32I589Lo/7kk97rZrY3/P/9r2z/9Zco1O7d7Yrz/Hnmzz4TSxFgHjLEt7Jd8cMP7FP/Vnq63HctW0o/4a232vurX35Z8lSvbn9R+/df5kKFpP8SEA9HJFEFFiWfaFdgrjh8WBqQvn095/vwQ776pvzXX/L7q69CL9/Zs/JW+9JLst2mDfONN/pfjvnG/8knjumPPspcoIDU4wtPPSVWkflma2XaNKlj2TL/ZGvYUBpbV3z8sZT5+OPu3Ujdu7OD+yxYHDsmjR2zKAhAGlUrlSoxV6smDWJcnOR56y3Zt3+/Y4PvTIMGfNVa8YWlSyV/fLy41dLSPOcfO5Y9Bv3Mncs+uy9N7rxTXOirVomSKlfOtQs+I0PuLSLmtWsd96WlidL1Rv/+4hr05YVo4EA5l5Il7e7Qzp2Zy5Rh3rRJ9n3wgT1/v3581XJ0to7DjSqwQCoEOgHYDmAXgJEu9g8FsAnAegDLANT2VmYsKjBm5meflQdt0yZ72u7dzBcv2rfvvFPcNTabvc/G6k4KFcuW8VXfPbNYjKVL+1+O2Ufl3ACvWCHpn37qvYxLl6Ru0xp0xnRbPf+873KZitVdI2+zifvH+jZtZd8+sd6AwF2rrti2jblYMbF+9+0TFzSQuQ/IjGzr1o15xw75LlhQjnnnHdm3Y4frOnr1kv2+WtRHjkj+3LlFgXhjzRrJP3ky84YNzA8+KFayeV8/+6woQ1+UicnRo6Ik8uSRZ8bTS0NamtwvVtfm3r2i9K69ljklxf2xly/L9e/f3ze5liyRe+/nn+1pZr9XUpL95dNk1ixJq1fPt/JDiSow/5VXHIDdAK4FkAfABmcFBaCw5Xc3AL97KzdWFdjx48wJCdIYbd1qb5R69JAHz2aTh/a+++zH9O0rrkhXrp+PPpL9V65kXTYzNNrsZDb7Ps6ccZ1/7FjpwHfGfJgXL3ZMt9nEEqhTx3vH+y+/SBm//OI+T4sWvlsUzMzjx0uZGza4z5ORIdfe+S2aWQIHiMS1evPNvtfridRUaWBLlpSQ6sREeYGJi3N8qWGW/2XlSvv23r1i0fbowXzTTZ4byBdf9Ky8nbHZmNu3z2xFuyMjQ+7RwoX56vAPawh6gwYyVMFfTEt7xAjvec3/d+pUua41a8o1TUhgrlHDvRv+jz9cv3B5wlkR22ziHgeYa9d23HfpktzzH33ke/mhQhWY/wrsRgBzLNvPAXjOQ/4+AGZ7KzdWFRizvL2bHdEJCfImDchb9ObN8vvLL+35v/pK0v76y7Ec0yUJML/5ZtblGjhQGiFTuZiNx7p1mfPu3y+NecGCYkFa+e47OW7r1szHmX0rixZ5luX22+WN2trH48yrr7LXqDYrPXqIgvCmPC9flv8kVy7mBQsk7eJFcRHdeqsEpxQo4Fk2X7hwQdyZ+fIx//mnWK5ly8o51ajhWxlvvslX3VOvvOI+3+TJksdqNQSbESNEabz1lvSRmRGtAwawyz5RX9m507dIwytXJFClQgUJlsibV16ili6V+7RWLdcuzvvuk7y+urbdYd7bzzyTtXJCiSow/xXYnQAmWLb7A/jQRb5HDEvtAIDqbsoaAiAZQHLFihX9+d+iitOnZTDjk0/Km6LNJg1m7twSeQgw//OPPb85CNg5PPrRR+VNvVUrcbNs3uy97rlzxV3iSrnUry9hwSZr17LbN9PXXpN9BQtKX5nVOjSjs1z1XZ0/L30N1gjCDz+U4AkzuOXYMbkWw4d7Phezf3DSJM/5mKXzvWBBCQf3hbNnpcErXVpcj2ZQwW+/2cPGrdZQIPTvL+VMm2ZP271b6vVVTvPtHmD++2/3+Y4flyAHd9Z0KLDZ5B43FWxWr5cvLF5sfzn86Sd7+qJF8rKXlCT3gsnq1fIi5mnQtq+kp4sL2pcoz0ihCsx/BXaXCwX2gYf89wD4ylu5sWyBueLkSXu4faVKmffXry/ppi9/715RWoMH2/sKGjXybBXYbBKpBjDffbfjvosXpY/C6qpJS2OXkVM2m3Sqt2xpd9tYZ9h48UVpFNy5NZ9+WhRvSoo9ypJIzu/AAXt/jrWf0N35lC0rof/eMIMIfv3Ve16TLVvE0mrZUj6VK8s5mf1D//d/vpfljBmOPWZM5n1m36c/cr73XuCyhJKMDOkLq1Eja2Hu/vDWW44vBSY//8xXg3RM2Zo0kXvo9OnwyBZpVIH5r8D8dSHmAnDaW7nZTYExS2d5fLzrwb6rVkn02XXXiYUycKAosP37Zb9pFTzzjPuIMXMsUc2aojA2brTvM11MztZWiRKZw5PN8Pbx46Wx7dBBrBvzzXPoUFGo7ti9W+o3+wx695a388KFRbbrrvN9WqcBA8Siu3LFHswwbpycx6pVco7r1ombKF8+e6Sfr5jXxdkCrl5drGZvZGRIJ771Pzl/XpRh7dpZd0PGCsHoow0Gjz/OV/tWJ07ksEX4RguqwPxXYLkB7AFQxRLEcb1TnuqW37f5cpGzowJjln4Qd7N1LF4sbpBatcSCMd8kTfr25avuk4YNZcYC61tvp07iEktJkb63O+6Q9H/+EeVx442Z35KbNJEBqFaGDZM+A7NR3rdPysubV4IJqlQRGT3RtStf7R8xG7clS+x9er52eJuuvUaN7ANb3X26dvWtTGeGDZOxP1aX6MCBojg9WUr798usEmYE2pEjkj5qlKT5MqeeElzS0+XZKFZM+ntvvNE/azfWUQUWmBLrAmCH0cf1vJH2MoBuxu/3AGwxwugXOis4V5/sqsC88fvvYqUVKGBvEE2uXJFQ41GjpF8MkPDpy5ft/UVmFJrZiCYni9IpXNi1775PH0eX5sWLYpX16uWYb80a6bNq3lwUmbdZMvbsYf7888yNx9y5Mtbq1CmfLgefPCmRZlWryvi13bulv2fdOnHTff+99IX8/HPgExMzZ446+/JLuX7u+h2/+07kKlhQ3LIFCsgYrvnz5fqEY1iE4pqdO+WFy9W4seyOKrAo+eRUBcYs47V8GUhrRoH17i2h2QkJdsVw6pQ0sMWLS57vvnNdxgsviEVnhnSb/QieBqReuhS8+el84cKF8NbHLANSXQ3WTk+XMVCAvN2bA1dXrBArDpD/IVwzqyiuWbo083RiOQFVYFHyyckKzB/MIAlABpNaeeklSbeOOXNm0iTJs327PVqydOnwdchHK+Z8jNZJgQ8ckPBtc9yS8zXauFFcqxMmhFdWRTHJDgost9tp6pVsx4gRshTEZ5/JCtJWnn5aFtDztCR81aryPX68rCT711+yDHzuHH4XEclyIkuXynX57TdZvPD8eeD77+3L0VipW1cWlFQUJXBIFHFsk5SUxMnJyZEWI9tz+LB9NeWqVWWdpP79VYEBsqruo4/K7zx5ZH2xceO8r7elKJGCiNYyc1Kk5cgK2vQoPlO2LDB2LFCxolhqqrjs9O0rC0Q2bgy0awcUKhRpiRQl+6MWmKIoSg4kO1hguSItgKIoiqIEgiowRVEUJSZRBaYoiqL8f3v3F2JVFcVx/PtjJimNMPtHjZYKUklQSoT9IcR60JLsocgoEil6CbIownqJHnoIon8UQqhlEFaYlPQQhAn1kqQJZVkk9scpU6O0KMik1cPeQ5dhhrDmzOHs/fvAcO/ecy6sxbqcdc/e5850khuYmZl1khuYmZl1khuYmZl1khuYmZl1khuYmZl1UhFfZJZ0EPjmP778VODHMQynC2rMGerMu8acoc68jzXncyLitKaCGQ9FNLD/Q9K2rn8b/VjVmDPUmXeNOUOdedeYs5cQzcysk9zAzMysk9zA4Pm2A2hBjTlDnXnXmDPUmXd1OVe/B2ZmZt3kKzAzM+skNzAzM+ukahuYpIWSvpC0W9LKtuNpiqRpkrZI2iXpU0kr8vwUSe9I+jI/ntx2rGNNUp+kHZLeyuMZkrbmnF+VNKHtGMeapMmSNkj6PNf80tJrLene/N7eKWm9pONLrLWktZIOSNrZMzdibZU8k89vH0ua217kzamygUnqA54DFgGzgZslzW43qsYcBe6LiPOBecBdOdeVwOaImAVszuPSrAB29YwfA57MOf8M3N5KVM16Gng7Is4DLiTlX2ytJQ0AdwMXR8QFQB+wlDJr/SKwcNjcaLVdBMzKP3cCq8YpxnFVZQMDLgF2R8SeiDgCvAIsaTmmRkTEvoj4KD//lXRCGyDluy4ftg64vp0ImyFpKnAtsDqPBSwANuRDSsz5JOBKYA1ARByJiEMUXmugHzhBUj8wEdhHgbWOiPeAn4ZNj1bbJcBLkXwATJZ05vhEOn5qbWADwN6e8WCeK5qk6cAcYCtwRkTsg9TkgNPbi6wRTwEPAH/l8SnAoYg4mscl1nwmcBB4IS+drpY0iYJrHRHfAY8D35Ia12FgO+XXeshota3iHFdrA9MIc0V/n0DSicDrwD0R8Uvb8TRJ0mLgQERs750e4dDSat4PzAVWRcQc4DcKWi4cSd7zWQLMAM4CJpGWz4Yrrdb/pob3e7UNbBCY1jOeCnzfUiyNk3QcqXm9HBEb8/T+oSWF/HigrfgacDlwnaSvScvDC0hXZJPzMhOUWfNBYDAitubxBlJDK7nWVwNfRcTBiPgT2AhcRvm1HjJabas4x9XawD4EZuU7lSaQNn03tRxTI/LezxpgV0Q80fOrTcCy/HwZ8OZ4x9aUiHgwIqZGxHRSbd+NiFuALcAN+bCicgaIiB+AvZLOzVNXAZ9RcK1JS4fzJE3M7/WhnIuudY/RarsJuC3fjTgPODy01FiSav8Sh6RrSJ/K+4C1EfFoyyE1QtIVwPvAJ/yzH/QQaR/sNeBs0kngxogYvkHceZLmA/dHxGJJM0lXZFOAHcCtEfFHm/GNNUkXkW5cmQDsAZaTPqgWW2tJjwA3ke643QHcQdrvKarWktYD80n/NmU/8DDwBiPUNjfzZ0l3Lf4OLI+IbW3E3aRqG5iZmXVbrUuIZmbWcW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSW5gZmbWSX8DlZ9Tmzov1loAAAAASUVORK5CYII=\n",
2102 | "text/plain": [
2103 | ""
2104 | ]
2105 | },
2106 | "metadata": {
2107 | "needs_background": "light"
2108 | },
2109 | "output_type": "display_data"
2110 | }
2111 | ],
2112 | "source": [
2113 | "plt.plot(range(100), hist.history['loss'], 'r', label='Train Loss')\n",
2114 | "plt.plot(range(100), hist.history['val_loss'], 'b', label='Test Loss')\n",
2115 | "plt.title(\"Dataset1: Neural Network Model on Latent Features: Train-Test Loss \")\n",
2116 | "plt.legend()"
2117 | ]
2118 | },
2119 | {
2120 | "cell_type": "code",
2121 | "execution_count": null,
2122 | "metadata": {},
2123 | "outputs": [],
2124 | "source": []
2125 | }
2126 | ],
2127 | "metadata": {
2128 | "kernelspec": {
2129 | "display_name": "Python 3",
2130 | "language": "python",
2131 | "name": "python3"
2132 | },
2133 | "language_info": {
2134 | "codemirror_mode": {
2135 | "name": "ipython",
2136 | "version": 3
2137 | },
2138 | "file_extension": ".py",
2139 | "mimetype": "text/x-python",
2140 | "name": "python",
2141 | "nbconvert_exporter": "python",
2142 | "pygments_lexer": "ipython3",
2143 | "version": "3.7.1"
2144 | }
2145 | },
2146 | "nbformat": 4,
2147 | "nbformat_minor": 2
2148 | }
2149 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/__pycache__/lstm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LearnDeepLearningOrg/NetworkIntrusionDetection/11e638a3ad91dff8d343ddbab624a1e5f2eb66d7/Project-UtilityFunctions/__pycache__/lstm.cpython-37.pyc
--------------------------------------------------------------------------------
/Project-UtilityFunctions/classificationlibrary.py:
--------------------------------------------------------------------------------
1 | #Libraries for feature encoding
2 | from sklearn.preprocessing import LabelEncoder
3 |
4 | #Libraries for classification
5 | from sklearn.linear_model import LogisticRegression
6 | from sklearn.tree import DecisionTreeClassifier
7 | from sklearn.neighbors import KNeighborsClassifier
8 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
9 | from sklearn.naive_bayes import GaussianNB
10 | from sklearn.svm import SVC
11 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance)
12 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance)
13 | from sklearn.neighbors import KNeighborsClassifier
14 |
15 | #Libraries to measure the accuracy
16 | from sklearn import metrics
17 | from sklearn.metrics import accuracy_score
18 |
19 | #import pandas library
20 | import pandas as pd
21 |
22 | #This function is used to perform classification using DecisionTreeClassifier
23 | def classifyUsingDecisionTreeClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
24 | print("****** Start classification training using DecisionTreeClassifier *****")
25 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
26 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
27 |
28 | labelencoder_ytrain = LabelEncoder()
29 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
30 |
31 | classifier = DecisionTreeClassifier()
32 | classifier.fit(xtrain,ytrain)
33 |
34 | ytrainpred = classifier.predict(xtrain)
35 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
36 |
37 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
38 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
39 |
40 | labelencoder_ytest = LabelEncoder()
41 | ytest = labelencoder_ytest.fit_transform(ytest)
42 |
43 | # Predicting the Test set results
44 | ytestpred = classifier.predict(xtest)
45 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
46 | print("\n****** End classification training using DecisionTreeClassifier *****\n")
47 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
48 |
49 | #This function is used to perform classification using LogisticRegression
50 | def classifyUsingLogisticRegression(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
51 | print("****** Start classification training using LogisticRegression *****")
52 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
53 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
54 |
55 | labelencoder_ytrain = LabelEncoder()
56 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
57 |
58 | classifier = LogisticRegression()
59 | classifier.fit(xtrain,ytrain)
60 |
61 | ytrainpred = classifier.predict(xtrain)
62 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
63 |
64 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
65 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
66 |
67 | labelencoder_ytest = LabelEncoder()
68 | ytest = labelencoder_ytest.fit_transform(ytest)
69 |
70 | # Predicting the Test set results
71 | ytestpred = classifier.predict(xtest)
72 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
73 | print("\n****** End classification training using LogisticRegression *****\n")
74 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
75 |
76 | #This function is used to perform classification using LinearDiscriminantAnalysis
77 | def classifyUsingLinearDiscriminantAnalysis(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
78 | print("****** Start classification training using LinearDiscriminantAnalysis *****")
79 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
80 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
81 |
82 | labelencoder_ytrain = LabelEncoder()
83 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
84 |
85 | classifier = LinearDiscriminantAnalysis()
86 | classifier.fit(xtrain,ytrain)
87 |
88 | ytrainpred = classifier.predict(xtrain)
89 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
90 |
91 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
92 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
93 |
94 | labelencoder_ytest = LabelEncoder()
95 | ytest = labelencoder_ytest.fit_transform(ytest)
96 |
97 | # Predicting the Test set results
98 | ytestpred = classifier.predict(xtest)
99 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
100 | print("\n****** End classification training using LinearDiscriminantAnalysis *****\n")
101 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
102 |
103 | #This function is used to perform classification using GuassianNaiveBayes
104 | def classifyUsingGaussianNB(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
105 | print("****** Start classification training using GuassianNaiveBayes *****")
106 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
107 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
108 |
109 | labelencoder_ytrain = LabelEncoder()
110 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
111 |
112 | classifier = GaussianNB()
113 | classifier.fit(xtrain,ytrain)
114 |
115 | ytrainpred = classifier.predict(xtrain)
116 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
117 |
118 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
119 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
120 |
121 | labelencoder_ytest = LabelEncoder()
122 | ytest = labelencoder_ytest.fit_transform(ytest)
123 |
124 | # Predicting the Test set results
125 | ytestpred = classifier.predict(xtest)
126 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
127 | print("\n****** End classification training using GuassianNaiveBayes *****\n")
128 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
129 |
130 | #This function is used to perform classification using RandomForestClassifier
131 | def classifyUsingRandomForestClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
132 | print("****** Start classification training using RandomForestClassifier *****")
133 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
134 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
135 |
136 | labelencoder_ytrain = LabelEncoder()
137 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
138 |
139 | classifier = RandomForestClassifier(n_estimators=100)
140 | classifier.fit(xtrain,ytrain)
141 |
142 | ytrainpred = classifier.predict(xtrain)
143 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
144 |
145 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
146 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
147 |
148 | labelencoder_ytest = LabelEncoder()
149 | ytest = labelencoder_ytest.fit_transform(ytest)
150 |
151 | # Predicting the Test set results
152 | ytestpred = classifier.predict(xtest)
153 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
154 | print("\n****** End classification training using RandomForestClassifier *****\n")
155 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
156 |
157 | #This function is used to perform classification using RandomForestClassifier
158 | def classifyUsingExtraTreesClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
159 | print("****** Start classification training using ExtraTreesClassifier *****")
160 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
161 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
162 |
163 | print("trainingEncodedAndScaledDataset.shape: ",trainingEncodedAndScaledDataset.shape)
164 |
165 | labelencoder_ytrain = LabelEncoder()
166 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
167 |
168 | classifier = ExtraTreesClassifier(n_estimators=100)
169 | classifier.fit(xtrain,ytrain)
170 |
171 | ytrainpred = classifier.predict(xtrain)
172 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
173 |
174 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
175 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
176 |
177 | print("testingEncodedAndScaledDataset.shape: ",testingEncodedAndScaledDataset.shape)
178 |
179 | labelencoder_ytest = LabelEncoder()
180 | ytest = labelencoder_ytest.fit_transform(ytest)
181 |
182 | # Predicting the Test set results
183 | ytestpred = classifier.predict(xtest)
184 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
185 | print("\n****** End classification training using ExtraTreesClassifier *****\n")
186 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
187 |
188 | def classifyUsingKNNClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset):
189 | print("****** Start classification training using KNeighborsClassifier *****")
190 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
191 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
192 |
193 | labelencoder_ytrain = LabelEncoder()
194 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
195 |
196 | classifier = KNeighborsClassifier(n_neighbors=1)
197 | classifier.fit(xtrain,ytrain)
198 |
199 | ytrainpred = classifier.predict(xtrain)
200 | print("\n*** Classification accuracy score during model training: ", metrics.accuracy_score(ytrain, ytrainpred))
201 |
202 | xtest = testingEncodedAndScaledDataset.iloc[:, :-1].values
203 | ytest = testingEncodedAndScaledDataset.iloc[:, len(testingEncodedAndScaledDataset.columns)-1].values
204 |
205 | print("testingEncodedAndScaledDataset.shape: ",testingEncodedAndScaledDataset.shape)
206 |
207 | labelencoder_ytest = LabelEncoder()
208 | ytest = labelencoder_ytest.fit_transform(ytest)
209 |
210 | # Predicting the Test set results
211 | ytestpred = classifier.predict(xtest)
212 | print("*** Classification accuracy score during model testing: ", metrics.accuracy_score(ytest, ytestpred))
213 | print("\n****** End classification training using KNeighborsClassifier *****\n")
214 | return classifier, metrics.accuracy_score(ytrain, ytrainpred), metrics.accuracy_score(ytest, ytestpred)
215 |
216 | def findingOptimumNumberOfNeighboursForKNN(trainingEncodedAndScaledDataset):
217 | print("****** Start finding optimum number of neighbours for KNN *****")
218 | xtrain = trainingEncodedAndScaledDataset.iloc[:, :-1].values
219 | ytrain = trainingEncodedAndScaledDataset.iloc[:, len(trainingEncodedAndScaledDataset.columns)-1].values
220 |
221 | labelencoder_ytrain = LabelEncoder()
222 | ytrain = labelencoder_ytrain.fit_transform(ytrain)
223 |
224 | # creating odd list of K for KNN
225 | neighbors = list(range(1, 150, 2))
226 |
227 | # empty list that will hold cv scores
228 | cv_scores = []
229 |
230 | # perform 10-fold cross validation
231 | for k in neighbors:
232 | knn = KNeighborsClassifier(n_neighbors=k)
233 | scores = cross_val_score(knn, xtrain, ytrain, cv=10, scoring='accuracy')
234 | cv_scores.append(scores.mean())
235 | print("With number of neighbours as {}, average score is {}".format(k,scores.mean()))
236 |
237 | # changing to misclassification error
238 | mse = [1 - x for x in cv_scores]
239 |
240 | # determining best k
241 | optimal_k = neighbors[mse.index(min(mse))]
242 | print("The optimal number of neighbors is {}".format(optimal_k))
243 |
244 | # plot misclassification error vs k
245 | plt.plot(neighbors, mse)
246 | plt.xlabel("Number of Neighbors K")
247 | plt.ylabel("Misclassification Error")
248 | plt.show()
249 |
250 | print("****** End finding optimum number of neighbours for KNN *****")
--------------------------------------------------------------------------------
/Project-UtilityFunctions/dataformatinglibrary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #Libraries for printing tables in readable format
4 | from tabulate import tabulate
5 |
6 | #Library for creating an excel sheet
7 | import xlsxwriter
8 |
9 | def createExcelFromArray(array, fileName):
10 | workbook = xlsxwriter.Workbook(fileName)
11 | worksheet = workbook.add_worksheet()
12 |
13 | row = 0
14 | for col, data in enumerate(array):
15 | worksheet.write_row(col, row, data)
16 |
17 | workbook.close()
18 |
19 | def printList (list,heading):
20 | for i in range(0, len(list)):
21 | list[i] = str(list[i])
22 | if len(list)>0:
23 | print(tabulate([i.strip("[]").split(", ") for i in list], headers=[heading], tablefmt='orgtbl')+"\n")
24 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/datainspectionlibrary.py:
--------------------------------------------------------------------------------
1 | #Data formating library
2 | from dataformatinglibrary import printList
3 |
4 | #Data pre-processing library
5 | from datapreprocessinglibrary import checkForMissingValues
6 | from datapreprocessinglibrary import checkForDulicateRecords
7 |
8 | #Utility functions
9 | from defineInputs import getLabelName
10 |
11 | #Libraries for feature selection
12 | #SelectKBest, Chi2: Falls under filter methods (univariate selection)
13 | from sklearn.feature_selection import SelectKBest
14 | from sklearn.feature_selection import chi2
15 | from sklearn.feature_selection import SelectFromModel
16 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance)
17 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance)
18 |
19 | import numpy as np
20 |
21 | #This function is used to check the statistics of a given dataSet
22 | def getStatisticsOfData (dataSet):
23 | print("***** Start checking the statistics of the dataSet *****\n")
24 |
25 | labelName = getLabelName()
26 | #Number of rows and columns in the dataset
27 | print("***** Shape (number of rows and columns) in the dataset: ", dataSet.shape)
28 |
29 | #Total number of features in the dataset
30 | numberOfColumnsInTheDataset = len(dataSet.drop([labelName],axis=1).columns)
31 | #numberOfColumnsInTheDataset = len(dataSet.columns)
32 | print("***** Total number of features in the dataset: ",numberOfColumnsInTheDataset)
33 |
34 | #Total number of categorical featuers in the dataset
35 | categoricalFeaturesInTheDataset = list(set(dataSet.drop([labelName],axis=1).columns) - set(dataSet.drop([labelName],axis=1)._get_numeric_data().columns))
36 | #categoricalFeaturesInTheDataset = list(set(dataSet.columns) - set(dataSet._get_numeric_data().columns))
37 | print("***** Number of categorical features in the dataset: ",len(categoricalFeaturesInTheDataset))
38 |
39 | #Total number of numerical features in the dataset
40 | numericalFeaturesInTheDataset = list(dataSet.drop([labelName],axis=1)._get_numeric_data().columns)
41 | #numericalFeaturesInTheDataset = list(dataSet._get_numeric_data().columns)
42 | print("***** Number of numerical features in the dataset: ",len(numericalFeaturesInTheDataset))
43 |
44 | #Names of categorical features in the dataset
45 | print("\n***** Names of categorical features in dataset *****\n")
46 | printList(categoricalFeaturesInTheDataset,'Categorical features in dataset')
47 |
48 | #Names of numerical features in the dataset
49 | print("\n***** Names of numerical features in dataset *****\n")
50 | printList(numericalFeaturesInTheDataset,'Numerical features in the dataset')
51 |
52 | #Checking for any missing values in the data set
53 | anyMissingValuesInTheDataset = checkForMissingValues(dataSet)
54 | print("\n***** Are there any missing values in the data set: ", anyMissingValuesInTheDataset)
55 |
56 | anyDuplicateRecordsInTheDataset = checkForDulicateRecords(dataSet)
57 | print("\n***** Are there any duplicate records in the data set: ", anyDuplicateRecordsInTheDataset)
58 | #Check if there are any duplicate records in the data set
59 | if (anyDuplicateRecordsInTheDataset):
60 | dataSet = dataSet.drop_duplicates()
61 | print("Number of records in the dataSet after removing the duplicates: ", len(dataSet.index))
62 |
63 | #How many number of different values for label that are present in the dataset
64 | print('\n****** Number of different values for label that are present in the dataset: ',dataSet[labelName].nunique())
65 | #What are the different values for label in the dataset
66 | print('\n****** Here is the list of unique label types present in the dataset ***** \n')
67 | printList(list(dataSet[getLabelName()].unique()),'Unique label types in the dataset')
68 |
69 | #What are the different values in each of the categorical features in the dataset
70 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
71 | categoricalFeaturesInTheDataset = list(set(dataSet.columns) - set(dataSet._get_numeric_data().columns))
72 | numericalFeaturesInTheDataset = list(dataSet._get_numeric_data().columns)
73 | for feature in categoricalFeaturesInTheDataset:
74 | uniq = np.unique(dataSet[feature])
75 | print('\n{}: {} '.format(feature,len(uniq)))
76 | printList(dataSet[feature].unique(),'distinct values')
77 |
78 | print('\n****** Label distribution in the dataset *****\n')
79 | print(dataSet[labelName].value_counts())
80 | print()
81 |
82 | print("\n***** End checking the statistics of the dataSet *****")
--------------------------------------------------------------------------------
/Project-UtilityFunctions/dataloadinglibrary.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | #This function is used to load CSV file from the 'data' directory
4 | #in the present working directly
5 | def loadCSV (fileNameWithAbsolutePath):
6 | dataSet = pd.read_csv(fileNameWithAbsolutePath)
7 | return dataSet
8 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/datapreprocessinglibrary.py:
--------------------------------------------------------------------------------
1 | from sklearn.model_selection import train_test_split
2 | from defineInputs import getLabelName
3 |
4 | #This function is used to check for missing values in a given dataSet
5 | def checkForMissingValues (dataSet):
6 | anyMissingValuesInTheDataset = dataSet.isnull().values.any()
7 | return anyMissingValuesInTheDataset
8 |
9 | #This function is used to check for duplicate records in a given dataSet
10 | def checkForDulicateRecords (dataSet):
11 | totalRecordsInDataset = len(dataSet.index)
12 | numberOfUniqueRecordsInDataset = len(dataSet.drop_duplicates().index)
13 | anyDuplicateRecordsInTheDataset = False if totalRecordsInDataset == numberOfUniqueRecordsInDataset else True
14 | print('Total number of records in the dataset: {}\nUnique records in the dataset: {}'.format(totalRecordsInDataset,numberOfUniqueRecordsInDataset))
15 | return anyDuplicateRecordsInTheDataset
16 |
17 | #Split the complete dataSet into training dataSet and testing dataSet
18 | def splitCompleteDataSetIntoTrainingSetAndTestingSet(completeDataSet):
19 | labelName = getLabelName()
20 | label = completeDataSet[labelName]
21 | features = completeDataSet.drop(labelName,axis=1)
22 | featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet=train_test_split(features,label,test_size=0.4, random_state=42)
23 | print("features.shape: ",features.shape)
24 | print("label.shape: ",label.shape)
25 | return featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet
26 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/defineInputs.py:
--------------------------------------------------------------------------------
1 | #This function is to maintain the name of the label at a single place
2 | def getLabelName():
3 | return 'attack_type'
4 |
5 | def getPathToTrainingAndTestingDataSets():
6 | trainingFileNameWithAbsolutePath = "D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\Datasets\\NSL-KDD\\KDDTrain+_20Percent.csv"
7 | testingFileNameWithAbsolutePath = "D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\Datasets\\NSL-KDD\\KDDTest-21.csv"
8 | return trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath
9 |
10 | def modelPerformanceReport():
11 | modelPerformanceReport = 'D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\ModelsAndTheirPerformanceReports\\ModelsPerformance031442020.1.xlsx'
12 | return modelPerformanceReport
13 |
14 | def getPathToGenerateModels():
15 | generatedModelsPath = 'D:\\Learning\\DeepLearning\\Project-AttackDetectionSystem\\ModelsAndTheirPerformanceReports\\'
16 | return generatedModelsPath
17 |
18 | ### Models with the below configuration will be generated
19 | def defineArrayOfModels():
20 | arrayOfModels = [
21 | [
22 | "FeatureSelectionTechnique",
23 | "FeatureEncodingTechnique",
24 | "FeatureNormalizationTechnique",
25 | "ClassificationTechnique",
26 | "TrainAccuraccy",
27 | "TestAccuraccy",
28 | "ModelName",
29 | "ModelFileName"
30 | ],
31 | [
32 | "ExtraTreesClassifier",
33 | "OneHotEncoder",
34 | "Standardization",
35 | "DecisonTree"
36 | ],
37 | [
38 | "ExtraTreesClassifier",
39 | "OneHotEncoder",
40 | "Standardization",
41 | "RandomForestClassifier"
42 | ],
43 | [
44 | "ExtraTreesClassifier",
45 | "OneHotEncoder",
46 | "Standardization",
47 | "ExtraTreesClassifier"
48 | ],
49 | [
50 | "ExtraTreesClassifier",
51 | "OneHotEncoder",
52 | "Standardization",
53 | "KNN"
54 | ]
55 | ]
56 | print(arrayOfModels)
57 | return arrayOfModels
58 |
59 | def defineArrayForPreProcessing():
60 | arrayOfModels = [
61 | [
62 | "ExtraTreesClassifier",
63 | "OneHotEncoder",
64 | "Standardization",
65 | ]
66 | ]
67 | print(arrayOfModels)
68 | return arrayOfModels
69 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/featureencodinglibrary.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | #Libraries for feature encoding
5 | from sklearn.preprocessing import LabelEncoder
6 | import category_encoders as ce
7 |
8 | #Utility functions
9 | from defineInputs import getLabelName
10 | from dataformatinglibrary import printList
11 |
12 | #This function is used to perform one hot encoding on the categorical features in the given dataset
13 | def featureEncodingUsingOneHotEncoder(dataSetForFeatureEncoding):
14 | print("****** Start one hot encoding on the categorical features in the given dataset *****")
15 |
16 | labelName = getLabelName()
17 | #Extract the categorical features, leave the label
18 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
19 | #Get the names of the categorical features
20 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
21 |
22 | print("****** Number of features before one hot encoding: ",len(dataSetForFeatureEncoding.columns))
23 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
24 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
25 |
26 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
27 | categoricalFeaturesInTheDataset = list(set(dataSetForFeatureEncoding.columns) - set(dataSetForFeatureEncoding._get_numeric_data().columns))
28 | numericalFeaturesInTheDataset = list(dataSetForFeatureEncoding._get_numeric_data().columns)
29 | for feature in categoricalFeaturesInTheDataset:
30 | uniq = np.unique(dataSetForFeatureEncoding[feature])
31 | print('\n{}: {} '.format(feature,len(uniq)))
32 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
33 |
34 | #Using get_dummies function to get the dummy variables for the categorical columns
35 | onHotEncodedDataSet=pd.get_dummies(dataSetForFeatureEncoding, columns=categoricalColumnNames, prefix=categoricalColumnNames)
36 |
37 | #Move the label column to the end
38 | label = onHotEncodedDataSet.pop(labelName)
39 | onHotEncodedDataSet[labelName] = label
40 | numberOfColumnsInOneHotEncodedDataset = len(onHotEncodedDataSet.columns)
41 | print("****** Number of features after one hot encoding: ",numberOfColumnsInOneHotEncodedDataset)
42 |
43 | print("****** End one hot encoding on the categorical features in the given dataset *****\n")
44 | return onHotEncodedDataSet
45 |
46 | #This function is used to perform label encoding on the categorical features in the given dataset
47 | def featureEncodingUsingLabelEncoder(dataSetForFeatureEncoding):
48 | print("****** Start label encoding on the categorical features in the given dataset *****")
49 |
50 | labelName = getLabelName()
51 | #Extract the categorical features, leave the label
52 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
53 | #Get the names of the categorical features
54 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
55 |
56 | print("****** Number of features before label encoding: ",len(dataSetForFeatureEncoding.columns))
57 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
58 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
59 |
60 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
61 | labelEncoder = LabelEncoder()
62 | for feature in categoricalColumnNames:
63 | uniq = np.unique(dataSetForFeatureEncoding[feature])
64 | print('\n{}: {} '.format(feature,len(uniq)))
65 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
66 | dataSetForFeatureEncoding[feature] = labelEncoder.fit_transform(dataSetForFeatureEncoding[feature])
67 | print("****** Number of features after label encoding: ",len(dataSetForFeatureEncoding.columns))
68 |
69 | print("****** End label encoding on the categorical features in the given dataset *****\n")
70 | return dataSetForFeatureEncoding
71 |
72 | #This function is used to perform binary encoding on the categorical features in the given dataset
73 | def featureEncodingUsingBinaryEncoder(dataSetForFeatureEncoding):
74 | print("****** Start binary encoding on the categorical features in the given dataset *****")
75 |
76 | labelName = getLabelName()
77 | #Extract the categorical features, leave the label
78 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
79 | #Get the names of the categorical features
80 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
81 |
82 | print("****** Number of features before binary encoding: ",len(dataSetForFeatureEncoding.columns))
83 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
84 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
85 |
86 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
87 | label = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([labelName])].columns, axis = 1)
88 | for feature in categoricalColumnNames:
89 | uniq = np.unique(dataSetForFeatureEncoding[feature])
90 | print('\n{}: {} '.format(feature,len(uniq)))
91 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
92 | featureColumns = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([feature])].columns, axis = 1)
93 | binaryEncoder = ce.BinaryEncoder(cols = [feature])
94 | binaryEncodedFeature = binaryEncoder.fit_transform(featureColumns, label)
95 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.join(binaryEncodedFeature)
96 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(feature, axis=1)
97 |
98 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(labelName, axis=1)
99 | dataSetForFeatureEncoding[labelName] = label
100 | print("****** Number of features after binary encoding: ",len(dataSetForFeatureEncoding.columns))
101 |
102 | print("****** End binary encoding on the categorical features in the given dataset *****\n")
103 | return dataSetForFeatureEncoding
104 |
105 | #This function is used to perform frequency encoding on the categorical features in the given dataset
106 | def featureEncodingUsingFrequencyEncoder(dataSetForFeatureEncoding):
107 | print("****** Start frequency encoding on the categorical features in the given dataset *****")
108 |
109 | labelName = getLabelName()
110 | #Extract the categorical features, leave the label
111 | categoricalColumnsInTheDataSet = dataSetForFeatureEncoding.drop([labelName],axis=1).select_dtypes(['object'])
112 | #Get the names of the categorical features
113 | categoricalColumnNames = categoricalColumnsInTheDataSet.columns.values
114 |
115 | print("****** Number of features before label encoding: ",len(dataSetForFeatureEncoding.columns))
116 | print("****** Number of categorical features in the dataset: ",len(categoricalColumnNames))
117 | print("****** Categorical feature names in the dataset: ",categoricalColumnNames)
118 |
119 | print('\n****** Here is the list of unique values present in each categorical feature in the dataset *****\n')
120 | label = dataSetForFeatureEncoding.drop(dataSetForFeatureEncoding.loc[:, ~dataSetForFeatureEncoding.columns.isin([labelName])].columns, axis = 1)
121 | for feature in categoricalColumnNames:
122 | uniq = np.unique(dataSetForFeatureEncoding[feature])
123 | print('\n{}: {} '.format(feature,len(uniq)))
124 | printList(dataSetForFeatureEncoding[feature].unique(),'distinct values')
125 | frequencyEncoder = dataSetForFeatureEncoding.groupby(feature).size()/len(dataSetForFeatureEncoding)
126 | dataSetForFeatureEncoding.loc[:,feature+"_Encoded"] = dataSetForFeatureEncoding[feature].map(frequencyEncoder)
127 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(feature, axis=1)
128 |
129 | dataSetForFeatureEncoding = dataSetForFeatureEncoding.drop(labelName, axis=1)
130 | dataSetForFeatureEncoding[labelName] = label
131 | print("****** Number of features after frequency encoding: ",len(dataSetForFeatureEncoding.columns))
132 |
133 | print("****** End frequency encoding on the categorical features in the given dataset *****\n")
134 | return dataSetForFeatureEncoding
--------------------------------------------------------------------------------
/Project-UtilityFunctions/featurescalinglibrary.py:
--------------------------------------------------------------------------------
1 | #Utility functions
2 | from defineInputs import getLabelName
3 |
4 | import pandas as pd
5 | import numpy as np
6 |
7 | #Libraries for feature scaling
8 | from sklearn.preprocessing import MinMaxScaler
9 | from sklearn.preprocessing import StandardScaler
10 | from sklearn.preprocessing import Binarizer
11 | from sklearn.preprocessing import Normalizer
12 |
13 |
14 | #This function is used to perform min-max feature scaing on the features in the given dataset
15 | #Formula for Min-Max scalar feature scaling is (Xi-Xmin)/(Xmax-Xmin)
16 | def featureScalingUsingMinMaxScaler(dataSetForFeatureScaling):
17 | print("****** Start feature scaling of the features present in the dataset using MinMaxScaler *****")
18 |
19 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
20 | dataSetInArrayFormat = dataSetForFeatureScaling.values
21 |
22 | #Remove the label column from the dataset
23 | labelName = getLabelName()
24 | label = dataSetForFeatureScaling.pop(labelName)
25 |
26 | print(dataSetInArrayFormat)
27 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
28 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
29 | print("\n****** Features in the dataset before performing scaling ***** \n",features)
30 |
31 | #Perform feature scaling
32 | scaler=MinMaxScaler(feature_range=(0,1))
33 | scaledFeatures=scaler.fit_transform(features)
34 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
35 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
36 |
37 | #Convert from array format to dataframe
38 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
39 | scaledFeatures = scaledFeatures.reset_index(drop=True)
40 | label = label.reset_index(drop=True)
41 | scaledFeatures[labelName]=label
42 |
43 | print("\n****** End of feature scaling of the features present in the dataset using MinMaxScaler *****\n")
44 | return scaledFeatures
45 |
46 | #This function is used to perform StandardScalar feature scaing on the features in the given dataset
47 | #This is also called as Z-score normalization
48 | #Formula for StandardScalar scalar feature scaling is z = (x – mean) / standard-deviation.
49 | def featureScalingUsingStandardScalar(dataSetForFeatureScaling):
50 | print("****** Start feature scaling of the features present in the dataset using StandardScalar *****")
51 |
52 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
53 | dataSetInArrayFormat = dataSetForFeatureScaling.values
54 |
55 | #Remove the label column from the dataset
56 | labelName = getLabelName()
57 | label = dataSetForFeatureScaling.pop(labelName)
58 |
59 | print(dataSetInArrayFormat)
60 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
61 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
62 | print("\n****** Features in the dataset before performing scaling ***** \n",features)
63 |
64 | #Perform feature scaling
65 | scaler=StandardScaler()
66 | scaledFeatures=scaler.fit_transform(features)
67 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
68 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
69 |
70 | #Convert from array format to dataframe
71 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
72 | scaledFeatures = scaledFeatures.reset_index(drop=True)
73 | label = label.reset_index(drop=True)
74 | scaledFeatures[labelName]=label
75 | print("scaledFeatures.head(): ",scaledFeatures.head())
76 | print("scaledFeatures.shape: ",scaledFeatures.shape)
77 |
78 | print("\n****** End of feature scaling of the features present in the dataset using StandardScalar *****\n")
79 | return scaledFeatures
80 |
81 | #This function is used to perform Binarizing feature scaing on the features in the given dataset
82 | #It is used for binary thresholding of an array like matrix.
83 | def featureScalingUsingBinarizer(dataSetForFeatureScaling):
84 | print("****** Start feature scaling of the features present in the dataset using Binarizer *****")
85 |
86 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
87 | dataSetInArrayFormat = dataSetForFeatureScaling.values
88 |
89 | #Remove the label column from the dataset
90 | labelName = getLabelName()
91 | label = dataSetForFeatureScaling.pop(labelName)
92 |
93 | print(dataSetInArrayFormat)
94 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
95 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
96 | print("\n****** Features in the dataset before performing scaling ***** \n",features)
97 |
98 | #Perform feature scaling
99 | scaledFeatures=Binarizer(0.0).fit(features).transform(features)
100 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
101 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
102 |
103 | #Convert from array format to dataframe
104 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
105 | scaledFeatures = scaledFeatures.reset_index(drop=True)
106 | label = label.reset_index(drop=True)
107 | scaledFeatures[labelName]=label
108 |
109 | print("\n****** End of feature scaling of the features present in the dataset using Binarizer *****\n")
110 | return scaledFeatures
111 |
112 | #This function is used to perform Normalizing feature scaing on the features in the given dataset
113 | #It is used to rescale each sample.
114 | #Each sample (i.e. each row of the data matrix) with at least one non zero component
115 | #is rescaled independently of other samples so that its norm (l1 or l2) equals one.
116 | def featureScalingUsingNormalizer(dataSetForFeatureScaling):
117 | print("****** Start feature scaling of the features present in the dataset using Normalizer *****")
118 |
119 | numberOfColumnsInEncodedDataset = len(dataSetForFeatureScaling.columns)
120 | dataSetInArrayFormat = dataSetForFeatureScaling.values
121 |
122 | #Remove the label column from the dataset
123 | labelName = getLabelName()
124 | label = dataSetForFeatureScaling.pop(labelName)
125 |
126 | print(dataSetInArrayFormat)
127 |
128 | features = dataSetInArrayFormat[:,0:numberOfColumnsInEncodedDataset-1]
129 | print("\n****** Number of features in the dataset before performing scaling: ",np.size(features,1))
130 | print("\n****** Features in the dataset before performing scaling ***** \n",features)
131 |
132 | #Perform feature scaling
133 | scaledFeatures=Normalizer().fit(features).transform(features)
134 | print("\n****** Number of features in the dataset after performing scaling: ",np.size(scaledFeatures,1))
135 | print("\n****** Features in the dataset after performing scaling ***** \n",scaledFeatures)
136 |
137 | #Convert from array format to dataframe
138 | scaledFeatures = pd.DataFrame(scaledFeatures, columns=dataSetForFeatureScaling.columns)
139 | scaledFeatures = scaledFeatures.reset_index(drop=True)
140 | label = label.reset_index(drop=True)
141 | scaledFeatures[labelName]=label
142 |
143 | print("\n****** End of feature scaling of the features present in the dataset using Normalizer *****\n")
144 | return scaledFeatures
145 |
146 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/featureselectionlibrary.py:
--------------------------------------------------------------------------------
1 | #Utility functions
2 | from defineInputs import getLabelName
3 |
4 | from featureencodinglibrary import featureEncodingUsingLabelEncoder
5 | from dataformatinglibrary import printList
6 |
7 | #Matplotlib is a plotting library for the Python programming language and its numerical mathematics extension NumPy
8 | import matplotlib.pyplot as plt
9 | from matplotlib.pyplot import figure
10 | import seaborn as sns
11 | import numpy as np
12 | import pandas as pd
13 | import math
14 | import scipy.stats as ss
15 | from collections import Counter
16 | from sklearn.ensemble import RandomForestClassifier #RandomForestClassifier: Falls under wrapper methods (feature importance)
17 | from sklearn.ensemble import ExtraTreesClassifier #ExtraTreesClassifier: Falls under wrapper methods (feature importance)
18 | from sklearn.feature_selection import SelectKBest
19 | from sklearn.feature_selection import chi2
20 | from sklearn.preprocessing import LabelEncoder
21 |
22 | #This function is used to calculate the conditional entropy between a given feature and the target
23 | def conditional_entropy(x,y):
24 | # entropy of x given y
25 | y_counter = Counter(y)
26 | xy_counter = Counter(list(zip(x,y)))
27 | total_occurrences = sum(y_counter.values())
28 | entropy = 0
29 | for xy in xy_counter.keys():
30 | p_xy = xy_counter[xy] / total_occurrences
31 | p_y = y_counter[xy[1]] / total_occurrences
32 | entropy += p_xy * math.log(p_y/p_xy)
33 | return entropy
34 |
35 | #This function is used to perform feature selection using TheilU
36 | #In TheilU we calculate the uncertainty coefficient between the given feature and the target
37 | def theil_u(x,y):
38 | s_xy = conditional_entropy(x,y)
39 | x_counter = Counter(x)
40 | total_occurrences = sum(x_counter.values())
41 | p_x = list(map(lambda n: n/total_occurrences, x_counter.values()))
42 | s_x = ss.entropy(p_x)
43 | if s_x == 0:
44 | return 1
45 | else:
46 | return (s_x - s_xy) / s_x
47 |
48 | def featureSelectionUsingTheilU(dataSetForFeatureSelection):
49 | print("\n****** Start performing feature selection using TheilU *****")
50 | print("****** Falls under the group of techniques that use correlation matrix with Heatmap *****")
51 |
52 | labelName = getLabelName()
53 | label = dataSetForFeatureSelection[labelName]
54 |
55 | theilu = pd.DataFrame(index=[labelName],columns=dataSetForFeatureSelection.columns)
56 | columns = dataSetForFeatureSelection.columns
57 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection
58 |
59 | for j in range(0,len(columns)):
60 | u = theil_u(label.tolist(),dataSetForFeatureSelection[columns[j]].tolist())
61 | theilu.loc[:,columns[j]] = u
62 | if u < 0.50:
63 | dataSetAfterFeatuerSelection.pop(columns[j])
64 |
65 | print('***** Ploting the uncertainty coefficient between the target and each feature *****')
66 | theilu.fillna(value=np.nan,inplace=True)
67 | plt.figure(figsize=(30,1))
68 | sns.heatmap(theilu,annot=True,fmt='.2f')
69 | plt.show()
70 |
71 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)
72 | print('***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
73 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
74 | print("****** End performing feature selection using TheilU *****")
75 | return dataSetAfterFeatuerSelection
76 |
77 | #This function is used to perform feature selection using Chi-squared test
78 | def featureSelectionUsingChisquaredTest(dataSetForFeatureSelection):
79 | print("\n****** Start performing feature selection using ChisquaredTest *****")
80 | print("****** Falls under filter methods (univariate selection) *****")
81 |
82 | numberOfFeatureToBeSelected = 10
83 | labelName = getLabelName()
84 |
85 | #To be able to apply Chi-squared test
86 | dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection)
87 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection
88 |
89 | #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1]
90 | features = dataSetForFeatureSelection.drop([labelName],axis=1)
91 | label = dataSetForFeatureSelection[labelName]
92 |
93 | #Apply SelectKBest class to extract top 10 best features
94 | bestfeatures = SelectKBest(score_func=chi2, k=numberOfFeatureToBeSelected)
95 | fitBestfeatures = bestfeatures.fit(features,label)
96 | columns = pd.DataFrame(features.columns)
97 | scores = pd.DataFrame(fitBestfeatures.scores_)
98 | #concat two dataframes for better visualization
99 | scoresOfBestFeatures = pd.concat([columns,scores],axis=1)
100 | scoresOfBestFeatures.columns = ['Features','Score']
101 | print("\n***** Scores for each feature in the dataset are *****")
102 | print(scoresOfBestFeatures.nlargest(numberOfFeatureToBeSelected,'Score'))
103 |
104 | mask = fitBestfeatures.get_support()
105 | for j in range(0,len(mask)):
106 | if (mask[j] == False):
107 | dataSetAfterFeatuerSelection.pop(features.columns[j])
108 |
109 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)
110 | print('***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
111 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
112 | print("****** End performing feature selection using ChisquaredTest *****")
113 |
114 | return dataSetAfterFeatuerSelection
115 |
116 | #This function is used to perform feature selection using RandomForestClassifier
117 | def featureSelectionUsingRandomForestClassifier(dataSetForFeatureSelection):
118 | print("\n****** Start performing feature selection using RandomForestClassifier *****")
119 | print("****** Falls under wrapper methods (feature importance) *****")
120 |
121 | labelName = getLabelName()
122 |
123 | #Applying feature encoding before applying the RandomForestClassification
124 | dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection)
125 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection
126 | #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1]
127 | features = dataSetForFeatureSelection.drop([labelName],axis=1)
128 | label = dataSetForFeatureSelection[labelName]
129 |
130 | labelencoder = LabelEncoder()
131 | labelTransformed = labelencoder.fit_transform(label)
132 |
133 | print("****** RandomForestClassification is in progress *****")
134 | #Train using RamdomForestClassifier
135 | trainedforest = RandomForestClassifier(n_estimators=700).fit(features,labelTransformed)
136 | importances = trainedforest.feature_importances_ #array with importances of each feature
137 | idx = np.arange(0, features.shape[1]) #create an index array, with the number of features
138 | features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance
139 | featureImportances = pd.Series(importances, index= features.columns)
140 | selectedFeatures = featureImportances.nlargest(len(features_to_keep))
141 | print("\n selectedFeatures after RandomForestClassification: ", selectedFeatures)
142 | print("****** Completed RandomForestClassification *****")
143 |
144 | #Plot the feature Importance to see which features have been considered as most important for our model to make its predictions
145 | #figure(num=None, figsize=(20, 22), dpi=80, facecolor='w', edgecolor='k')
146 | #selectedFeatures.plot(kind='barh')
147 |
148 | selectedFeaturesNames = selectedFeatures.keys()
149 | dataSetForFeatureSelection = dataSetForFeatureSelection.drop(selectedFeaturesNames,axis=1)
150 | dataSetAfterFeatuerSelection = dataSetAfterFeatuerSelection.drop(dataSetForFeatureSelection.columns, axis=1)
151 | dataSetAfterFeatuerSelection[labelName] = label
152 |
153 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)
154 | print('\n***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
155 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
156 | print("****** End performing feature selection using RandomForestClassifier *****")
157 | return dataSetAfterFeatuerSelection
158 |
159 | #This function is used to perform feature selection using ExtraTreesClassifier
160 | def featureSelectionUsingExtraTreesClassifier(dataSetForFeatureSelection):
161 | print("\n****** Start performing feature selection using ExtraTreesClassifier *****")
162 | print("****** Falls under wrapper methods (feature importance) *****")
163 |
164 | labelName = getLabelName()
165 |
166 | #Applying feature encoding before applying the ExtraTreesClassification
167 | dataSetForFeatureSelection = featureEncodingUsingLabelEncoder(dataSetForFeatureSelection)
168 | dataSetAfterFeatuerSelection = dataSetForFeatureSelection
169 | #features = dataSetForFeatureSelection.iloc[:,0:len(dataSetForFeatureSelection.columns)-1]
170 | features = dataSetForFeatureSelection.drop([labelName],axis=1)
171 | label = dataSetForFeatureSelection[labelName]
172 |
173 | labelencoder = LabelEncoder()
174 | labelTransformed = labelencoder.fit_transform(label)
175 |
176 | print("****** ExtraTreesClassification is in progress *****")
177 | #Train using ExtraTreesClassifier
178 | trainedforest = ExtraTreesClassifier(n_estimators=700).fit(features,labelTransformed)
179 | importances = trainedforest.feature_importances_ #array with importances of each feature
180 | idx = np.arange(0, features.shape[1]) #create an index array, with the number of features
181 | features_to_keep = idx[importances > np.mean(importances)] #only keep features whose importance is greater than the mean importance
182 | featureImportances = pd.Series(importances, index= features.columns)
183 | selectedFeatures = featureImportances.nlargest(len(features_to_keep))
184 | print("\n selectedFeatures after ExtraTreesClassification: ", selectedFeatures)
185 | print("****** Completed ExtraTreesClassification *****")
186 |
187 | #Plot the feature Importance to see which features have been considered as most important for our model to make its predictions
188 | #figure(num=None, figsize=(20, 22), dpi=80, facecolor='w', edgecolor='k')
189 | #selectedFeatures.plot(kind='barh')
190 |
191 | selectedFeaturesNames = selectedFeatures.keys()
192 | dataSetForFeatureSelection = dataSetForFeatureSelection.drop(selectedFeaturesNames,axis=1)
193 | dataSetAfterFeatuerSelection = dataSetAfterFeatuerSelection.drop(dataSetForFeatureSelection.columns, axis=1)
194 | dataSetAfterFeatuerSelection[labelName] = label
195 |
196 | numberOfFeaturesInTheDatasetAfterFeatureSelection = len(dataSetAfterFeatuerSelection.columns)
197 | print('\n***** Number of columns in the dataSet after feature selection: ', len(dataSetAfterFeatuerSelection.columns))
198 | print('***** Columns in the dataSet after feature selection: \n', dataSetAfterFeatuerSelection.columns)
199 | print("****** End performing feature selection using ExtraTreesClassifier *****")
200 | return dataSetAfterFeatuerSelection
201 |
202 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/findcombinations.py:
--------------------------------------------------------------------------------
1 | # Python3 program to find combinations from n
2 | # arrays such that one element from each
3 | # array is present
4 |
5 | # function to prcombinations that contain
6 | # one element from each of the given arrays
7 | def print1(arr):
8 |
9 | # number of arrays
10 | n = len(arr)
11 |
12 | # to keep track of next element
13 | # in each of the n arrays
14 | indices = [0 for i in range(n)]
15 |
16 | while (1):
17 | print("[")
18 |
19 | # prcurrent combination
20 | for i in range(n):
21 | print("'"+arr[i][indices[i]], end = "',")
22 | print()
23 |
24 | # find the rightmost array that has more
25 | # elements left after the current element
26 | # in that array
27 | next = n - 1
28 | while (next >= 0 and
29 | (indices[next] + 1 >= len(arr[next]))):
30 | next-=1
31 |
32 | # no such array is found so no more
33 | # combinations left
34 | if (next < 0):
35 | return
36 |
37 | # if found move to next element in that
38 | # array
39 | indices[next] += 1
40 |
41 | # for all arrays to the right of this
42 | # array current index again points to
43 | # first element
44 | for i in range(next + 1, n):
45 | indices[i] = 0
46 | print("],")
47 |
48 |
49 | # Driver Code
50 |
51 | # initializing a vector with 3 empty vectors
52 | arr = [[] for i in range(4)]
53 |
54 | # now entering data
55 | # [[1, 2, 3], [4], [5, 6]]
56 | arr[0].append('TheilsU')
57 | arr[0].append('Chi-SquaredTest')
58 | arr[0].append('RandomForestClassifier')
59 | arr[0].append('ExtraTreesClassifier')
60 |
61 | arr[1].append('OneHotEncoder')
62 | arr[1].append('LabelEncoder')
63 | arr[1].append('BinaryEncoder')
64 | arr[1].append('FrequencyEncoder')
65 |
66 | arr[2].append('Min-Max')
67 | arr[2].append('Standardization')
68 | arr[2].append('Binarizing')
69 | arr[2].append('Normalizing')
70 |
71 | arr[3].append('DecisonTree')
72 | arr[3].append('RandomForestClassifier')
73 | arr[3].append('ExtraTreesClassifier')
74 | arr[3].append('LogisticRegressionRegression')
75 | arr[3].append('LinearDiscriminantAnalysis')
76 | arr[3].append('GuassianNaiveBayes')
77 |
78 | print1(arr)
79 |
80 | # This code is contributed by mohit kumar
81 |
--------------------------------------------------------------------------------
/Project-UtilityFunctions/lstm.py:
--------------------------------------------------------------------------------
1 | import time
2 | import warnings
3 | import numpy as np
4 | from numpy import newaxis
5 | from keras.layers.core import Dense, Activation, Dropout
6 | from keras.layers.recurrent import LSTM
7 | from keras.models import Sequential
8 | import matplotlib.pyplot as plt
9 |
10 |
11 | warnings.filterwarnings("ignore")
12 |
13 | def plot_results_multiple(predicted_data, true_data, prediction_len):
14 | fig = plt.figure(facecolor='white')
15 | ax = fig.add_subplot(111)
16 | ax.plot(true_data, label='True Data')
17 | print ('yo')
18 | #Pad the list of predictions to shift it in the graph to it's correct start
19 | for i, data in enumerate(predicted_data):
20 | padding = [None for p in xrange(i * prediction_len)]
21 | plt.plot(padding + data, label='Prediction')
22 | plt.legend()
23 | plt.show()
24 |
25 | def load_data(filename, seq_len, normalise_window):
26 | f = open(filename, 'r').read()
27 | data = f.split('\n')
28 |
29 | sequence_length = seq_len + 1
30 | result = []
31 | for index in range(len(data) - sequence_length):
32 | result.append(data[index: index + sequence_length])
33 |
34 | if normalise_window:
35 | result = normalise_windows(result)
36 |
37 | result = np.array(result)
38 |
39 | row = round(0.9 * result.shape[0])
40 | train = result[:int(row), :]
41 | np.random.shuffle(train)
42 | x_train = train[:, :-1]
43 | y_train = train[:, -1]
44 | x_test = result[int(row):, :-1]
45 | y_test = result[int(row):, -1]
46 |
47 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
48 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
49 |
50 | return [x_train, y_train, x_test, y_test]
51 |
52 | def normalise_windows(window_data):
53 | normalised_data = []
54 | for window in window_data:
55 | normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
56 | normalised_data.append(normalised_window)
57 | return normalised_data
58 |
59 | def build_model(layers):
60 | model = Sequential()
61 |
62 | model.add(LSTM(
63 | input_dim=layers[0],
64 | output_dim=layers[1],
65 | return_sequences=True))
66 | model.add(Dropout(0.2))
67 |
68 | model.add(LSTM(
69 | layers[2],
70 | return_sequences=False))
71 | model.add(Dropout(0.2))
72 |
73 | model.add(Dense(
74 | output_dim=layers[3]))
75 | model.add(Activation("linear"))
76 |
77 | start = time.time()
78 | model.compile(loss="mse", optimizer="rmsprop")
79 | print ("Compilation Time : ", time.time() - start)
80 | return model
81 |
82 | def predict_point_by_point(model, data):
83 | #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
84 | predicted = model.predict(data)
85 | predicted = np.reshape(predicted, (predicted.size,))
86 | return predicted
87 |
88 | def predict_sequence_full(model, data, window_size):
89 | #Shift the window by 1 new prediction each time, re-run predictions on new window
90 | curr_frame = data[0]
91 | predicted = []
92 | for i in xrange(len(data)):
93 | predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
94 | curr_frame = curr_frame[1:]
95 | curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
96 | return predicted
97 |
98 | def predict_sequences_multiple(model, data, window_size, prediction_len):
99 | #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
100 | prediction_seqs = []
101 | for i in xrange(len(data)/prediction_len):
102 | curr_frame = data[i*prediction_len]
103 | predicted = []
104 | for j in xrange(prediction_len):
105 | predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
106 | curr_frame = curr_frame[1:]
107 | curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
108 | prediction_seqs.append(predicted)
109 | return prediction_seqs
--------------------------------------------------------------------------------
/Project-UtilityFunctions/util.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | #Utils
4 | import operator
5 |
6 | #Seaborn is an open source Python library providing high level API for visualizing the data
7 | import seaborn as sns
8 | import matplotlib.pyplot as plt
9 |
10 | #library for saving the trained models to files
11 | import joblib
12 |
13 | from defineInputs import getPathToTrainingAndTestingDataSets
14 | from defineInputs import getPathToGenerateModels
15 |
16 | #Data loading library
17 | from dataloadinglibrary import loadCSV
18 |
19 | from defineInputs import getLabelName
20 |
21 | #Data pre-processing library
22 | from datapreprocessinglibrary import splitCompleteDataSetIntoTrainingSetAndTestingSet
23 |
24 | #Feature selection library
25 | from featureselectionlibrary import featureSelectionUsingTheilU
26 | from featureselectionlibrary import featureSelectionUsingChisquaredTest
27 | from featureselectionlibrary import featureSelectionUsingRandomForestClassifier
28 | from featureselectionlibrary import featureSelectionUsingExtraTreesClassifier
29 |
30 | #feature encoding library
31 | from featureencodinglibrary import featureEncodingUsingOneHotEncoder
32 | from featureencodinglibrary import featureEncodingUsingLabelEncoder
33 | from featureencodinglibrary import featureEncodingUsingBinaryEncoder
34 | from featureencodinglibrary import featureEncodingUsingFrequencyEncoder
35 |
36 | #feature scaling library
37 | from featurescalinglibrary import featureScalingUsingMinMaxScaler
38 | from featurescalinglibrary import featureScalingUsingStandardScalar
39 | from featurescalinglibrary import featureScalingUsingBinarizer
40 | from featurescalinglibrary import featureScalingUsingNormalizer
41 |
42 | from classificationlibrary import classifyUsingDecisionTreeClassifier
43 | from classificationlibrary import classifyUsingLogisticRegression
44 | from classificationlibrary import classifyUsingLinearDiscriminantAnalysis
45 | from classificationlibrary import classifyUsingGaussianNB
46 | from classificationlibrary import classifyUsingRandomForestClassifier
47 | from classificationlibrary import classifyUsingExtraTreesClassifier
48 | from classificationlibrary import classifyUsingKNNClassifier
49 | from classificationlibrary import findingOptimumNumberOfNeighboursForKNN
50 |
51 | def compareModels(arrayOfModels):
52 | modelsAndAccuracies = {}
53 | for i in range(1,len(arrayOfModels)):
54 | data = arrayOfModels[i]
55 | modelsAndAccuracies[data[3]]=data[5]
56 | bestModelAndItsAccuracy = {}
57 | bestModelAndItsAccuracy[max(modelsAndAccuracies.items(), key=operator.itemgetter(1))[0]]=modelsAndAccuracies[max(modelsAndAccuracies.items(), key=operator.itemgetter(1))[0]]
58 | sns.set_style("whitegrid")
59 | plt.figure(figsize=(5,5))
60 | plt.ylabel("Algorithms",fontsize=10)
61 | plt.xlabel("Accuracy %",fontsize=10)
62 | plt.title("Comparing the models based on the accuries achieved",fontsize=15)
63 | sns.barplot(x=list(modelsAndAccuracies.values()), y=list(modelsAndAccuracies.keys()))
64 | plt.show()
65 | return bestModelAndItsAccuracy
66 |
67 | ### Below function is responsible for performing pre-processing, training, evaluation, persisting model
68 | def performPreprocessingBuildModelsAndEvaluateAccuracy(trainingDataSet, testingDataSet, arrayOfModels):
69 | for i in range(1,len(arrayOfModels)):
70 | print('***************************************************************************************************************************')
71 | print('********************************************* Building Model-', i ,' As Below *************************************************')
72 | print('\t -- Feature Selection: \t ', arrayOfModels[i][0], ' \n\t -- Feature Encoding: \t ', arrayOfModels[i][1], ' \n\t -- Feature Scaling: \t ', arrayOfModels[i][2], ' \n\t -- Classification: \t ', arrayOfModels[i][3], '\n')
73 |
74 | trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()
75 | trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)
76 | testingDataSet = loadCSV(testingFileNameWithAbsolutePath)
77 |
78 | labelName = getLabelName()
79 | label = trainingDataSet[labelName]
80 |
81 | #Combining the test and training datasets for preprocessing then together, because we observed that in sme datasets
82 | #the values in the categorical columns in test dataset and train dataset are being different this causes issues while
83 | #applying classification techniques
84 | completeDataSet = pd.concat(( trainingDataSet, testingDataSet ))
85 |
86 | #difficultyLevel = completeDataSet.pop('difficulty_level')
87 |
88 | print("completeDataSet.shape: ",completeDataSet.shape)
89 | print("completeDataSet.head: ",completeDataSet.head())
90 |
91 | #Feature Selection
92 | if arrayOfModels[i][0] == 'TheilsU':
93 | #Perform feature selection using TheilU
94 | completeDataSetAfterFeatuerSelection = featureSelectionUsingTheilU(completeDataSet)
95 | elif arrayOfModels[i][0] == 'Chi-SquaredTest':
96 | #Perform feature selection using Chi-squared Test
97 | completeDataSetAfterFeatuerSelection = featureSelectionUsingChisquaredTest(completeDataSet)
98 | elif arrayOfModels[i][0] == 'RandomForestClassifier':
99 | #Perform feature selection using RandomForestClassifier
100 | completeDataSetAfterFeatuerSelection = featureSelectionUsingRandomForestClassifier(completeDataSet)
101 | elif arrayOfModels[i][0] == 'ExtraTreesClassifier':
102 | #Perform feature selection using ExtraTreesClassifier
103 | completeDataSetAfterFeatuerSelection = featureSelectionUsingExtraTreesClassifier(completeDataSet)
104 |
105 | #Feature Encoding
106 | if arrayOfModels[i][1] == 'LabelEncoder':
107 | #Perform lable encoding to convert categorical values into label encoded features
108 | completeEncodedDataSet = featureEncodingUsingLabelEncoder(completeDataSetAfterFeatuerSelection)
109 | elif arrayOfModels[i][1] == 'OneHotEncoder':
110 | #Perform OnHot encoding to convert categorical values into one-hot encoded features
111 | completeEncodedDataSet = featureEncodingUsingOneHotEncoder(completeDataSetAfterFeatuerSelection)
112 | elif arrayOfModels[i][1] == 'FrequencyEncoder':
113 | #Perform Frequency encoding to convert categorical values into frequency encoded features
114 | completeEncodedDataSet = featureEncodingUsingFrequencyEncoder(completeDataSetAfterFeatuerSelection)
115 | elif arrayOfModels[i][1] == 'BinaryEncoder':
116 | #Perform Binary encoding to convert categorical values into binary encoded features
117 | completeEncodedDataSet = featureEncodingUsingBinaryEncoder(completeDataSetAfterFeatuerSelection)
118 |
119 | #Feature Scaling
120 | if arrayOfModels[i][2] == 'Min-Max':
121 | #Perform MinMaxScaler to scale the features of the dataset into same range
122 | completeEncodedAndScaledDataset = featureScalingUsingMinMaxScaler(completeEncodedDataSet)
123 | elif arrayOfModels[i][2] == 'Binarizing':
124 | #Perform Binarizing to scale the features of the dataset into same range
125 | completeEncodedAndScaledDataset = featureScalingUsingBinarizer(completeEncodedDataSet)
126 | elif arrayOfModels[i][2] == 'Normalizing':
127 | #Perform Normalizing to scale the features of the dataset into same range
128 | completeEncodedAndScaledDataset = featureScalingUsingNormalizer(completeEncodedDataSet)
129 | elif arrayOfModels[i][2] == 'Standardization':
130 | #Perform Standardization to scale the features of the dataset into same range
131 | completeEncodedAndScaledDataset = featureScalingUsingStandardScalar(completeEncodedDataSet)
132 |
133 | #Split the complete dataSet into training dataSet and testing dataSet
134 | featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet = splitCompleteDataSetIntoTrainingSetAndTestingSet(completeEncodedAndScaledDataset)
135 |
136 | trainingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTrainingDataSet, labelInPreProcessedTrainingDataSet], axis=1, sort=False)
137 | testingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTestingDataSet, labelInPreProcessedTestingDataSet], axis=1, sort=False)
138 |
139 | #Classification
140 | if arrayOfModels[i][3] == 'DecisonTree':
141 | #Perform classification using DecisionTreeClassifier
142 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingDecisionTreeClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
143 | elif arrayOfModels[i][3] == 'RandomForestClassifier':
144 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingRandomForestClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
145 | elif arrayOfModels[i][3] == 'ExtraTreesClassifier':
146 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingExtraTreesClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
147 | elif arrayOfModels[i][3] == 'LogisticRegressionRegression':
148 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingLogisticRegression(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
149 | elif arrayOfModels[i][3] == 'LinearDiscriminantAnalysis':
150 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingLinearDiscriminantAnalysis(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
151 | elif arrayOfModels[i][3] == 'GuassianNaiveBayes':
152 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingGaussianNB(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
153 | elif arrayOfModels[i][3] == 'KNN':
154 | classifier, trainingAccuracyScore, testingAccuracyScore = classifyUsingKNNClassifier(trainingEncodedAndScaledDataset, testingEncodedAndScaledDataset)
155 |
156 | arrayOfModels[i].append(trainingAccuracyScore)
157 | arrayOfModels[i].append(testingAccuracyScore)
158 |
159 | modelName = arrayOfModels[i][0]+"_"+arrayOfModels[i][1]+"_"+arrayOfModels[i][2]+"_"+arrayOfModels[i][3]
160 | modelFileName = getPathToGenerateModels() + modelName+".pkl"
161 | arrayOfModels[i].append(modelName)
162 | arrayOfModels[i].append(modelFileName)
163 | #Save the model to file
164 | joblib.dump(classifier, modelFileName)
165 |
166 | def performPreprocessing(trainingDataSet, testingDataSet, arrayOfModels):
167 | for i in range(0,len(arrayOfModels)):
168 | print('***************************************************************************************************************************')
169 | print('********************************************* Building Model-', i ,' As Below *************************************************')
170 | print('\t -- Feature Selection: \t ', arrayOfModels[i][0], ' \n\t -- Feature Encoding: \t ', arrayOfModels[i][1], ' \n\t -- Feature Scaling: \t ', arrayOfModels[i][2], '\n')
171 |
172 | trainingFileNameWithAbsolutePath, testingFileNameWithAbsolutePath = getPathToTrainingAndTestingDataSets()
173 | trainingDataSet = loadCSV(trainingFileNameWithAbsolutePath)
174 | testingDataSet = loadCSV(testingFileNameWithAbsolutePath)
175 |
176 | labelName = getLabelName()
177 | label = trainingDataSet[labelName]
178 |
179 | #Combining the test and training datasets for preprocessing then together, because we observed that in sme datasets
180 | #the values in the categorical columns in test dataset and train dataset are being different this causes issues while
181 | #applying classification techniques
182 | completeDataSet = pd.concat(( trainingDataSet, testingDataSet ))
183 |
184 | #difficultyLevel = completeDataSet.pop('difficulty_level')
185 |
186 | print("completeDataSet.shape: ",completeDataSet.shape)
187 | print("completeDataSet.head: ",completeDataSet.head())
188 |
189 | #Feature Selection
190 | if arrayOfModels[i][0] == 'TheilsU':
191 | #Perform feature selection using TheilU
192 | completeDataSetAfterFeatuerSelection = featureSelectionUsingTheilU(completeDataSet)
193 | elif arrayOfModels[i][0] == 'Chi-SquaredTest':
194 | #Perform feature selection using Chi-squared Test
195 | completeDataSetAfterFeatuerSelection = featureSelectionUsingChisquaredTest(completeDataSet)
196 | elif arrayOfModels[i][0] == 'RandomForestClassifier':
197 | #Perform feature selection using RandomForestClassifier
198 | completeDataSetAfterFeatuerSelection = featureSelectionUsingRandomForestClassifier(completeDataSet)
199 | elif arrayOfModels[i][0] == 'ExtraTreesClassifier':
200 | #Perform feature selection using ExtraTreesClassifier
201 | completeDataSetAfterFeatuerSelection = featureSelectionUsingExtraTreesClassifier(completeDataSet)
202 |
203 | #Feature Encoding
204 | if arrayOfModels[i][1] == 'LabelEncoder':
205 | #Perform lable encoding to convert categorical values into label encoded features
206 | completeEncodedDataSet = featureEncodingUsingLabelEncoder(completeDataSetAfterFeatuerSelection)
207 | elif arrayOfModels[i][1] == 'OneHotEncoder':
208 | #Perform OnHot encoding to convert categorical values into one-hot encoded features
209 | completeEncodedDataSet = featureEncodingUsingOneHotEncoder(completeDataSetAfterFeatuerSelection)
210 | elif arrayOfModels[i][1] == 'FrequencyEncoder':
211 | #Perform Frequency encoding to convert categorical values into frequency encoded features
212 | completeEncodedDataSet = featureEncodingUsingFrequencyEncoder(completeDataSetAfterFeatuerSelection)
213 | elif arrayOfModels[i][1] == 'BinaryEncoder':
214 | #Perform Binary encoding to convert categorical values into binary encoded features
215 | completeEncodedDataSet = featureEncodingUsingBinaryEncoder(completeDataSetAfterFeatuerSelection)
216 |
217 | #Feature Scaling
218 | if arrayOfModels[i][2] == 'Min-Max':
219 | #Perform MinMaxScaler to scale the features of the dataset into same range
220 | completeEncodedAndScaledDataset = featureScalingUsingMinMaxScaler(completeEncodedDataSet)
221 | elif arrayOfModels[i][2] == 'Binarizing':
222 | #Perform Binarizing to scale the features of the dataset into same range
223 | completeEncodedAndScaledDataset = featureScalingUsingBinarizer(completeEncodedDataSet)
224 | elif arrayOfModels[i][2] == 'Normalizing':
225 | #Perform Normalizing to scale the features of the dataset into same range
226 | completeEncodedAndScaledDataset = featureScalingUsingNormalizer(completeEncodedDataSet)
227 | elif arrayOfModels[i][2] == 'Standardization':
228 | #Perform Standardization to scale the features of the dataset into same range
229 | completeEncodedAndScaledDataset = featureScalingUsingStandardScalar(completeEncodedDataSet)
230 |
231 | #Split the complete dataSet into training dataSet and testing dataSet
232 | featuresInPreProcessedTrainingDataSet,featuresInPreProcessedTestingDataSet,labelInPreProcessedTrainingDataSet,labelInPreProcessedTestingDataSet = splitCompleteDataSetIntoTrainingSetAndTestingSet(completeEncodedAndScaledDataset)
233 |
234 | trainingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTrainingDataSet, labelInPreProcessedTrainingDataSet], axis=1, sort=False)
235 | testingEncodedAndScaledDataset = pd.concat([featuresInPreProcessedTestingDataSet, labelInPreProcessedTestingDataSet], axis=1, sort=False)
236 |
237 | return completeEncodedAndScaledDataset
--------------------------------------------------------------------------------