├── .gitignore ├── 0_Data_wrangling.ipynb ├── 1_Data_cleaning.ipynb ├── 2_Data_analysis.ipynb ├── LICENSE ├── Plots_technical_background.ipynb ├── README.md ├── data ├── JavascriptKeywords.txt ├── README.md ├── SQLCollection.txt ├── SQLKeywords.txt ├── ShellCollection.txt ├── XSSCollection.txt ├── non-maliciousCollection.txt ├── payloads.csv ├── tfidf_2grams_randomforest.p ├── trained_classifier_custom_4_features.p ├── trained_classifier_custom_5_features.p ├── trained_classifier_custom_6_features.p ├── trained_classifier_custom_7_features.p ├── trained_classifier_custom_8_features.p ├── trained_classifier_custom_9_features.p ├── trained_classifier_custom_all_features.p ├── trained_classifiers.p └── trained_classifiers_custom.p ├── demo-server ├── .angular-cli.json ├── .editorconfig ├── .gitignore ├── README.md ├── e2e │ ├── app.e2e-spec.ts │ ├── app.po.ts │ └── tsconfig.e2e.json ├── karma.conf.js ├── npm-debug.log.179372140 ├── package.json ├── protractor.conf.js ├── pyserver.py ├── server.js ├── server │ └── routes │ │ └── api.js ├── src │ ├── app │ │ ├── app.component.css │ │ ├── app.component.html │ │ ├── app.component.spec.ts │ │ ├── app.component.ts │ │ ├── app.module.ts │ │ ├── classifier.service.spec.ts │ │ ├── classifier.service.ts │ │ └── classifier │ │ │ ├── classifier.component.css │ │ │ ├── classifier.component.html │ │ │ ├── classifier.component.spec.ts │ │ │ └── classifier.component.ts │ ├── assets │ │ └── .gitkeep │ ├── environments │ │ ├── environment.prod.ts │ │ └── environment.ts │ ├── favicon.ico │ ├── index.html │ ├── main.ts │ ├── polyfills.ts │ ├── styles.css │ ├── test.ts │ ├── tsconfig.app.json │ ├── tsconfig.spec.json │ └── typings.d.ts ├── tfidf_2grams_randomforest.p ├── tsconfig.json └── tslint.json ├── images ├── features │ ├── distinct-bytes.png │ ├── js-keywords.png │ ├── length.png │ ├── max-byte.png │ ├── mean-byte.png │ ├── min-byte.png │ ├── non-printable.png │ ├── punctuation.png │ ├── sql-keywords.png │ └── std-byte.png ├── presentation_tables │ ├── custom_features_table.png │ ├── f1_result_table.png │ └── ngrams_example.png └── report_images │ ├── 1gram_tfidf_randomforest_learningcurve.png │ ├── 1grams_count_pca.png │ ├── 3gram_count_multinomial_learningcurve.png │ ├── 3grams_tfidf_pca.png │ ├── SVM_kernel.png │ ├── bagofwords_example.png │ ├── classifier_example.png │ ├── classifiers_f1score.png │ ├── custom_pca.png │ ├── custom_svm_learningcurve.png │ ├── data_distribution.png │ ├── logistic.png │ ├── optimizing_example.png │ ├── roc_curves.png │ ├── roc_graph_topbot3.png │ └── website.png └── report.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | data/trained_classifiers.p 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | -------------------------------------------------------------------------------- /0_Data_wrangling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Wrangling\n", 8 | "these functions below transform data from different formats into a single common format, \n", 9 | "appends the transformed data to either ShellCollection.txt, SQLCollection.txt, XSSCollection.txt or non-maliciousCollection.txt, depending on type. \n", 10 | "The last function in the notebook combines the text files into a single .csv file\n", 11 | "\n", 12 | "P.S! The source data files aren't included, so no need to run these scripts. You should start testing the project by starting in the Data Cleaning notebook!\n", 13 | "\n", 14 | "Source to original data: \n", 15 | "https://github.com/foospidy/payloads/blob/master/get.sh \n", 16 | "http://www.isi.csic.es/dataset/" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Step1\n", 24 | "import dependencies" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np\n", 36 | "import pandas as pd\n", 37 | "import csv\n", 38 | "import re\n", 39 | "import json\n", 40 | "from IPython.display import display" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "# Step2\n", 48 | "tranform data from source data set formats into the right format" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 112, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "Number of SQL injection data points: 286\n", 61 | "First 5 SQL injection data points:\n" 62 | ] 63 | }, 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "432 1;DROP TABLE users\n", 68 | "433 1'; DROP TABLE users-- 1\n", 69 | "434 ' OR 1=1 -- 1\n", 70 | "435 ' OR '1'='1\n", 71 | "760 ’ or ‘1’=’1\n", 72 | "Name: Payload, dtype: object" 73 | ] 74 | }, 75 | "metadata": {}, 76 | "output_type": "display_data" 77 | }, 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "Number of XSS injection data points: 1115\n", 83 | "First 5 XSS injection data points:\n" 84 | ] 85 | }, 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "0 script>alert(123)\n", 90 | "1 \n", 91 | "2 javascript:alert(\"hellox worldss\")\n", 92 | "3 \n", 93 | "4 \n", 94 | "Name: Payload, dtype: object" 95 | ] 96 | }, 97 | "metadata": {}, 98 | "output_type": "display_data" 99 | } 100 | ], 101 | "source": [ 102 | "def from_google_spreadsheet_to_collections(file):\n", 103 | " '''Converts web traffic payloads from csv file to right format into collections \n", 104 | "\n", 105 | " the input format of the data points are:\n", 106 | " ,,\n", 107 | " '''\n", 108 | " \n", 109 | " df = pd.read_csv(\"data/{}.csv\".format(file))\n", 110 | " \n", 111 | " #extract injection data\n", 112 | " sql_data = df['Payload'][df['Injection Type'] == 'SQL']\n", 113 | " xss_data = df['Payload'][df['Injection Type'] == 'XSS']\n", 114 | "\n", 115 | " print('Number of SQL injection data points: ' + str(len(sql_data)))\n", 116 | " print('First 5 SQL injection data points:')\n", 117 | " display(sql_data[:5])\n", 118 | "\n", 119 | " print('Number of XSS injection data points: ' + str(len(xss_data)))\n", 120 | " print('First 5 XSS injection data points:')\n", 121 | " display(xss_data[:5])\n", 122 | " \n", 123 | " with open(\"data/SQLCollection.txt\", \"a\") as myfile:\n", 124 | " for sql_row in sql_data:\n", 125 | " myfile.write('{}\\n'.format(sql_row.encode(\"utf-8\")))\n", 126 | " \n", 127 | " with open(\"data/XSSCollection.txt\",\"a\") as myfile:\n", 128 | " for xss_row in xss_data:\n", 129 | " myfile.write('{}\\n'.format(xss_row.encode(\"utf-8\")))\n", 130 | " pass \n", 131 | "\n", 132 | "#IPS_payload_data is our spreadsheet of payloads gathered so far\n", 133 | "from_google_spreadsheet_to_collections('IPS_payload_data')\n" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 124, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "raw data in source file format: Directory Traversal - For Unix##/../../../../file##0\n", 146 | "\n", 147 | "modified data in right format: /../../../../file\n", 148 | " 91\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "def from_xsuperbug_to_collections(src_file, dest_file):\n", 154 | " '''Converts web traffic payloads from xsuperbug's format to the right format into collections \n", 155 | " \n", 156 | " the input format of the data points are:\n", 157 | " ####\n", 158 | " '''\n", 159 | " \n", 160 | " lines = open(\"data/{}\".format(src_file),\"r\").readlines()\n", 161 | " print('raw data in source file format: ' + lines[0])\n", 162 | " lines = [ re.search(r'(.*)##(.*)##[0-9]',line).group(2) for line in lines]\n", 163 | " print('modified data in right format: ' + lines[0])\n", 164 | " print(' ' + str(len(lines)))\n", 165 | " \n", 166 | " with open(\"data/{}\".format(dest_file), \"a\") as myfile:\n", 167 | " for line in lines:\n", 168 | " myfile.write('{}\\n'.format(line.encode(\"utf-8\")))\n", 169 | " \n", 170 | "#from_xsuperbug_to_collections('timetoparseSQL.txt','SQLCollection.txt')\n", 171 | "#from_xsuperbug_to_collections('timetoparseXSS.txt','XSSCollection.txt')\n", 172 | "from_xsuperbug_to_collections('timetoparseCMD.txt','ShellCollection.txt')" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 109, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "def from_cnets_to_collection(src_file, dest_file):\n", 184 | " '''Converts web traffic payloads from CNetS' web traffic data set format to the right format into collections\n", 185 | " \n", 186 | " source data set found here: http://cnets.indiana.edu/resources/data-repository/\n", 187 | " the input file is in JSON format and the input format of the data points are:\n", 188 | " {\"count\": , \"timestamp\": , \"from\": \"/\", \"to\": \"/\"}\n", 189 | " '''\n", 190 | " raw_data = []\n", 191 | " \n", 192 | " with open(\"data/{}.json\".format(src_file)) as f:\n", 193 | " for line in f.readlines():\n", 194 | " raw_data.append(json.loads(line))\n", 195 | " \n", 196 | " #Extract 'from' and 'to' columns\n", 197 | " data = pd.Series([obj['from'] for obj in raw_data] + [obj['to'] for obj in raw_data]) \n", 198 | " \n", 199 | " #Remove empty elements\n", 200 | " data = data[data != '']\n", 201 | " \n", 202 | " \n", 203 | " #Extract data containing payloads, i.e. containing the '=' sign followed by a word\n", 204 | " data = data[ [re.match(r'(.*)=(.+)',x) != None for x in data] ]\n", 205 | " \n", 206 | " payloads = []\n", 207 | " \n", 208 | " #extract each input from the entire payload string\n", 209 | " for payload in data:\n", 210 | " temp = payload.split('&')\n", 211 | " payloads = payloads + [substring.split('=')[1] for substring in temp if len(substring.split('=')) > 1]\n", 212 | " \n", 213 | " #write to destination file\n", 214 | " with open(\"data/{}\".format(dest_file), \"a\") as myfile:\n", 215 | " for payload in payloads:\n", 216 | " if payload != '':\n", 217 | " myfile.write('{}\\n'.format(payload))\n", 218 | "\n", 219 | "#There are 21 files with non-malicious payloads, each with its date as name\n", 220 | "for i in range(1,22):\n", 221 | " date = '0' + str(i) if i < 10 else str(i)\n", 222 | " from_cnets_to_collection('2009-11-{}'.format(date),'non-maliciousCollection.txt')\n" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 22, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "Total payloads found: 2929\n", 235 | "First 20 payloads:\n" 236 | ] 237 | }, 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "['',\n", 242 | " '(173-95)',\n", 243 | " '1331994300888',\n", 244 | " 'results',\n", 245 | " '1331736235',\n", 246 | " '(63245-posstr(chr(97),chr(97))+53475)',\n", 247 | " 'otcymtc0',\n", 248 | " 'ping\\\\x0c-w\\\\x0c7000\\\\x0c-n\\\\x0c1\\\\x0c1.2.3.4',\n", 249 | " '1331910623.41',\n", 250 | " 'rm\\\\x09q92179245',\n", 251 | " 'del\\\\x0cq61251932',\n", 252 | " '1331749591',\n", 253 | " 'ping,-w,11000,-n,1,4.3.2.1|rem,',\n", 254 | " 'ota3nzyw@',\n", 255 | " 'register',\n", 256 | " 'http://xxxxxxxx/1',\n", 257 | " 'linpha_order_sql_injection.nasl_1332008614',\n", 258 | " '\\\\x0ddel q74771226 #',\n", 259 | " '1331884972314',\n", 260 | " 'all']" 261 | ] 262 | }, 263 | "metadata": {}, 264 | "output_type": "display_data" 265 | } 266 | ], 267 | "source": [ 268 | "def from_fsecurify_to_collection(src_file, dest_file):\n", 269 | " '''Extracts payload data inputs from address strings\n", 270 | " \n", 271 | " source data set found here: \n", 272 | " https://raw.githubusercontent.com/faizann24/Fwaf-Machine-Learning-driven-Web-Application-Firewall/master/goodqueries.txt\n", 273 | " \n", 274 | " the format of the data points are:\n", 275 | " ?\n", 276 | " example: folder1/folder2?var1=payloadData\n", 277 | " '''\n", 278 | " payloads = []\n", 279 | " \n", 280 | " with open(\"data/{}\".format(src_file)) as f:\n", 281 | " for line in f.readlines():\n", 282 | " splitted_address = line.split('?')\n", 283 | " \n", 284 | " #if there is payload\n", 285 | " if len(splitted_address) > 1:\n", 286 | " total_payload = splitted_address[1]\n", 287 | " temp = total_payload.split('&')\n", 288 | " \n", 289 | " #Add all input data from payload \n", 290 | " #exclude input that contains http://192.168.202 (these were strange local queries)\n", 291 | " #exclude input that contains the word 'select' AND 'union' (these were actually malicious)\n", 292 | " payloads = payloads + [substring.split('=')[1].strip('\\n') for substring in temp \n", 293 | " if len(substring.split('=')) > 1 and\n", 294 | " 'http://192.168.202' not in substring.split('=')[1] and\n", 295 | " ('select' not in substring.split('=')[1] or 'union' not in substring.split('=')[1])\n", 296 | " ]\n", 297 | " #remove duplicates\n", 298 | " payloads = list(set(payloads))\n", 299 | " \n", 300 | " #write to destination file\n", 301 | " with open(\"data/{}\".format(dest_file), \"a\") as myfile:\n", 302 | " for payload in payloads:\n", 303 | " if payload != '':\n", 304 | " myfile.write('{}\\n'.format(payload))\n", 305 | " \n", 306 | " print('Total payloads found: '+str(len(payloads)))\n", 307 | " print('First 20 payloads:')\n", 308 | " display(payloads[:20])\n", 309 | "\n", 310 | " \n", 311 | "from_fsecurify_to_collection('goodqueries.txt','non-maliciousCollection.txt')" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 35, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "name": "stdout", 321 | "output_type": "stream", 322 | "text": [ 323 | "Total number of data points gathered: 19344\n", 324 | "First 20 data points:\n" 325 | ] 326 | }, 327 | { 328 | "data": { 329 | "text/plain": [ 330 | "['shuster',\n", 331 | " '3288111380573813',\n", 332 | " '7315',\n", 333 | " 'Calle+Montesol+30%2C+',\n", 334 | " 'woolwine0',\n", 335 | " '25742430W',\n", 336 | " 'Ramiro+De+Maeztu%2C+S%2FN+8C',\n", 337 | " '02054707W',\n", 338 | " 'violet%40sunmiles.cf',\n", 339 | " 'oxley%40muevetuweb.lb',\n", 340 | " 'arena%40productosgarantizados.uy',\n", 341 | " 'cioffi',\n", 342 | " '7961872329809538',\n", 343 | " '8622247853302054',\n", 344 | " 'roquemore%40veocime.bu',\n", 345 | " 'oke',\n", 346 | " 'oblong0',\n", 347 | " 'Del+Barrio',\n", 348 | " 'cEgaJOSo',\n", 349 | " 'shashi']" 350 | ] 351 | }, 352 | "metadata": {}, 353 | "output_type": "display_data" 354 | }, 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "Total number of data points gathered: 20035\n", 360 | "First 20 data points:\n" 361 | ] 362 | }, 363 | { 364 | "data": { 365 | "text/plain": [ 366 | "['Sigrid',\n", 367 | " 'Calle+Osa+Mayor+S%2FN%2C+4-G',\n", 368 | " '09348',\n", 369 | " 'Ataquines',\n", 370 | " '7739977532136253',\n", 371 | " '7315',\n", 372 | " '2340',\n", 373 | " 'Presentaci%F3n',\n", 374 | " 'Mallabia',\n", 375 | " '7539210731606782',\n", 376 | " 'kirkland',\n", 377 | " 'Kimberley',\n", 378 | " '54375556Z',\n", 379 | " '51928951B',\n", 380 | " 'darfeuil%40naturalchild.lc',\n", 381 | " 'hsaio%40suecas.com.ec',\n", 382 | " '4587377607226524',\n", 383 | " 'medeiros',\n", 384 | " 'toshi',\n", 385 | " 'Edesio']" 386 | ] 387 | }, 388 | "metadata": {}, 389 | "output_type": "display_data" 390 | } 391 | ], 392 | "source": [ 393 | "def from_CSIC2010_to_collection(src_file, dest_file):\n", 394 | " '''Extracts payload data inputs from CSIC2010 HTTP packet dataset\n", 395 | " \n", 396 | " source dataset found here: http://www.isi.csic.es/dataset/\n", 397 | " input format from source is a complete HTTP packet\n", 398 | " '''\n", 399 | " \n", 400 | " payloads = []\n", 401 | " payload_next_line = False\n", 402 | " \n", 403 | " with open(\"data/{}\".format(src_file)) as f:\n", 404 | " for line in f.readlines():\n", 405 | " \n", 406 | " #Extract inputs from payload if first row in a GET packet\n", 407 | " if line.startswith('GET') and len(line.split('?')) > 1:\n", 408 | " \n", 409 | " #extract total payload string\n", 410 | " total_payload = (line.split('?')[1]).split(' ')[0]\n", 411 | " \n", 412 | " #add each input value separately to payloads\n", 413 | " inputs = total_payload.split('&')\n", 414 | " payloads = payloads + [input.split('=')[1] for input in inputs if len(input.split('=')) > 1]\n", 415 | " \n", 416 | " if line.startswith('Content-Length'):\n", 417 | " #notify that this is a HTTP POST packet and the next line will contain the payload\n", 418 | " payload_next_line = True\n", 419 | " \n", 420 | " elif payload_next_line and len(line) > 2:\n", 421 | " #Current line is a payload of a HTTP POST packet\n", 422 | " \n", 423 | " #add each input value separately to payloads\n", 424 | " inputs = line.split('&')\n", 425 | " payloads = payloads + [input.split('=')[1].strip('\\n') for input in inputs if len(input.split('=')) > 1]\n", 426 | " \n", 427 | " payload_next_line = False\n", 428 | " \n", 429 | " payloads = list(set(payloads))\n", 430 | "\n", 431 | " #write to destination file\n", 432 | " with open(\"data/{}\".format(dest_file), \"a\") as myfile:\n", 433 | " for payload in payloads:\n", 434 | " if payload != '':\n", 435 | " myfile.write('{}\\n'.format(payload))\n", 436 | "\n", 437 | " print('Total number of data points gathered: ' + str(len(payloads)))\n", 438 | " print('First 20 data points:')\n", 439 | " display(payloads[:20])\n", 440 | " \n", 441 | "from_CSIC2010_to_collection('normalTrafficTraining.txt','non-maliciousCollection.txt')\n", 442 | "from_CSIC2010_to_collection('normalTrafficTest.txt','non-maliciousCollection.txt')" 443 | ] 444 | } 445 | ], 446 | "metadata": { 447 | "kernelspec": { 448 | "display_name": "Python 3", 449 | "language": "python", 450 | "name": "python3" 451 | }, 452 | "language_info": { 453 | "codemirror_mode": { 454 | "name": "ipython", 455 | "version": 3 456 | }, 457 | "file_extension": ".py", 458 | "mimetype": "text/x-python", 459 | "name": "python", 460 | "nbconvert_exporter": "python", 461 | "pygments_lexer": "ipython3", 462 | "version": "3.5.3" 463 | } 464 | }, 465 | "nbformat": 4, 466 | "nbformat_minor": 2 467 | } 468 | -------------------------------------------------------------------------------- /1_Data_cleaning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data cleaning\n", 8 | "After gathering all the data (see the report and Data_wrangling.ipynb) it is time to clean the data.\n", 9 | "The cleaning phase follow these steps:\n", 10 | "- Load all the collection txt files into a labeled pandas dataframe\n", 11 | "- Remove empty data points, and remove malicious data points of length 1\n", 12 | "- Remove any duplicates\n", 13 | "- Shuffle the dataset to remove any ordering bias\n", 14 | "- Store into a .csv file for visualization and also store the dataframe into a .pickle file to easily allow further computations on the data set" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Step1\n", 22 | "Import dependencies" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "import csv\n", 36 | "from IPython.display import display" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Step2\n", 44 | "Compute a pandas dataframe with the payloads from the different collections \n", 45 | "dataframe columns: \n", 46 | "$ | | $ \n", 47 | "example: \n", 48 | "$1 | SQL | ' OR 1=1 LIMIT 1 #$" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "First 5 lines of SQL\n" 61 | ] 62 | }, 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
payloadis_maliciousinjection_type
0'\\n1SQL
1a' or 1=1-- \\n1SQL
2\"a\"\" or 1=1--\"\\n1SQL
3or a = a\\n1SQL
4a' or 'a' = 'a\\n1SQL
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " payload is_malicious injection_type\n", 113 | "0 '\\n 1 SQL\n", 114 | "1 a' or 1=1-- \\n 1 SQL\n", 115 | "2 \"a\"\" or 1=1--\"\\n 1 SQL\n", 116 | "3 or a = a\\n 1 SQL\n", 117 | "4 a' or 'a' = 'a\\n 1 SQL" 118 | ] 119 | }, 120 | "metadata": {}, 121 | "output_type": "display_data" 122 | }, 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "First 5 lines of XSS\n" 128 | ] 129 | }, 130 | { 131 | "data": { 132 | "text/html": [ 133 | "
\n", 134 | "\n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | "
payloadis_maliciousinjection_type
0data:text/html;alert(1)/*,<svg%20onload=eval(...1XSS
1'\">*/--></title></style></textarea></script%0A...1XSS
2\" onclick=alert(1)//<button ‘ onclick=alert(1)...1XSS
3';alert(String.fromCharCode(88,83,83))//';aler...1XSS
4\">><marquee><img src=x onerror=confirm(1)></ma...1XSS
\n", 176 | "
" 177 | ], 178 | "text/plain": [ 179 | " payload is_malicious \\\n", 180 | "0 data:text/html;alert(1)/*,*/-->>\n", 208 | "\n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
payloadis_maliciousinjection_type
0() { 0; }; touch /tmp/blns.shellshock1.fail;\\n1SHELL
1() { _; } >_[$($())] { touch /tmp/blns.shellsh...1SHELL
2<<< %s(un='%s') = %u\\n1SHELL
3'+++ATH0\\n1SHELL
4/dev/null; touch /tmp/blns.fail ; echo\\n1SHELL
\n", 250 | "" 251 | ], 252 | "text/plain": [ 253 | " payload is_malicious \\\n", 254 | "0 () { 0; }; touch /tmp/blns.shellshock1.fail;\\n 1 \n", 255 | "1 () { _; } >_[$($())] { touch /tmp/blns.shellsh... 1 \n", 256 | "2 <<< %s(un='%s') = %u\\n 1 \n", 257 | "3 '+++ATH0\\n 1 \n", 258 | "4 /dev/null; touch /tmp/blns.fail ; echo\\n 1 \n", 259 | "\n", 260 | " injection_type \n", 261 | "0 SHELL \n", 262 | "1 SHELL \n", 263 | "2 SHELL \n", 264 | "3 SHELL \n", 265 | "4 SHELL " 266 | ] 267 | }, 268 | "metadata": {}, 269 | "output_type": "display_data" 270 | }, 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "First 5 lines of LEGAL\n" 276 | ] 277 | }, 278 | { 279 | "data": { 280 | "text/html": [ 281 | "
\n", 282 | "\n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | "
payloadis_maliciousinjection_type
0569993989\\n0LEGAL
146201\\n0LEGAL
2Indianapolis\\n0LEGAL
320354328\\n0LEGAL
4A8Cyj4uzrSgkGg4szKuHeI\\n0LEGAL
\n", 324 | "
" 325 | ], 326 | "text/plain": [ 327 | " payload is_malicious injection_type\n", 328 | "0 569993989\\n 0 LEGAL\n", 329 | "1 46201\\n 0 LEGAL\n", 330 | "2 Indianapolis\\n 0 LEGAL\n", 331 | "3 20354328\\n 0 LEGAL\n", 332 | "4 A8Cyj4uzrSgkGg4szKuHeI\\n 0 LEGAL" 333 | ] 334 | }, 335 | "metadata": {}, 336 | "output_type": "display_data" 337 | } 338 | ], 339 | "source": [ 340 | "def from_txt_to_dataframe(src_file,is_malicious,injection_type):\n", 341 | " \n", 342 | " #read file\n", 343 | " payloads_txt = open('data/{}.txt'.format(src_file),'r',encoding='UTF-8').readlines()\n", 344 | " \n", 345 | " #create dataframe\n", 346 | " payloads = pd.DataFrame(payloads_txt,columns=['payload'])\n", 347 | " payloads['is_malicious'] = [is_malicious]*len(payloads)\n", 348 | " payloads['injection_type'] = [injection_type]*len(payloads)\n", 349 | "\n", 350 | " print('First 5 lines of ' + injection_type)\n", 351 | " display(payloads.head())\n", 352 | " \n", 353 | " return payloads\n", 354 | " \n", 355 | "#concatenate all payload dataframes together\n", 356 | "payloads = pd.DataFrame(columns=['payload','is_malicious','injection_type'])\n", 357 | "payloads = payloads.append(from_txt_to_dataframe('SQLCollection',1,'SQL'))\n", 358 | "payloads = payloads.append(from_txt_to_dataframe('XSSCollection',1,'XSS'))\n", 359 | "payloads = payloads.append(from_txt_to_dataframe('ShellCollection',1,'SHELL'))\n", 360 | "payloads = payloads.append(from_txt_to_dataframe('non-maliciousCollection',0,'LEGAL'))\n", 361 | "payloads = payloads.reset_index(drop=True)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "## Step2\n", 369 | "clean the data by\n", 370 | "- removing ending '\\n'\n", 371 | "- removing duplicates\n", 372 | "- removing empty data points\n", 373 | "- removing malicious data points with length 1\n", 374 | "- shuffle the data set" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 3, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "name": "stdout", 384 | "output_type": "stream", 385 | "text": [ 386 | "Empty data points removed: 1\n", 387 | "Malicious data points of size 1 removed: 3\n", 388 | "Duplicate data points removed: 26072\n", 389 | "null/NaN data points removed: 3\n" 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "#Remove ending \\n and white spaces\n", 395 | "payloads['payload'] = payloads['payload'].str.strip('\\n')\n", 396 | "payloads['payload'] = payloads['payload'].str.strip()\n", 397 | "\n", 398 | "#Remove any empty data points\n", 399 | "rows_before = len(payloads['payload'])\n", 400 | "payloads = payloads[payloads['payload'].str.len() != 0]\n", 401 | "print('Empty data points removed: ' + str(rows_before - len(payloads)))\n", 402 | "\n", 403 | "#Remove any malicious data points of size 1\n", 404 | "rows_before = len(payloads['payload'])\n", 405 | "payloads = payloads[(payloads['is_malicious'] == 0) | ((payloads['is_malicious'] == 1) & (payloads['payload'].str.len() > 1))]\n", 406 | "print('Malicious data points of size 1 removed: ' + str(rows_before-len(payloads)))\n", 407 | "\n", 408 | "#Remove duplicates\n", 409 | "rows_before = len(payloads['payload'])\n", 410 | "payloads = payloads.drop_duplicates(subset='payload', keep='last')\n", 411 | "print('Duplicate data points removed: ' + str(rows_before-len(payloads)))\n", 412 | "\n", 413 | "#Reformat rows that have the format b'' into \n", 414 | "payloads['payload'] = [payload[2:-1] if payload.startswith(\"b'\") or payload.startswith('b\"') \n", 415 | " else payload for payload in payloads['payload']]\n", 416 | "\n", 417 | "#Shuffle dataset and reset indices again\n", 418 | "payloads = payloads.sample(frac=1).reset_index(drop=True)\n", 419 | "payloads.index.name = 'index'\n", 420 | "\n", 421 | "#Remove payloads that cant be saved into .csv using pandas, e.g. they will be null/NA/NaN\n", 422 | "payloads.to_csv('data/payloads.csv',encoding='UTF-8')\n", 423 | "#reload dataframe from saved .csv. The dataframe will contain a few null values\n", 424 | "payloads = pd.read_csv(\"data/payloads.csv\",index_col='index',encoding='UTF-8') \n", 425 | "rows_before = len(payloads['payload'])\n", 426 | "payloads = payloads[~payloads['payload'].isnull()]\n", 427 | "print('null/NaN data points removed: ' + str(rows_before-len(payloads)))\n", 428 | "\n", 429 | "#Lastly, save to .csv\n", 430 | "payloads.to_csv('data/payloads.csv',encoding='UTF-8')" 431 | ] 432 | } 433 | ], 434 | "metadata": { 435 | "kernelspec": { 436 | "display_name": "Python 3", 437 | "language": "python", 438 | "name": "python3" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.5.3" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 2 455 | } 456 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 grananqvist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Plots_technical_background.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### The code in this notebook is entirely for making example plots to use in the technical background of the report \n", 8 | "### This notebook doesn't have any relation to the overall firewall project" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 4, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "from sklearn.linear_model import LinearRegression" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Logistic regression" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 72, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtQAAAFACAYAAACcMus4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4FFXa///PyQIE1ABGGGSLigKujKCIwpAICiPwoCOg\nsji44cIScJsRZgT1iyOugIzro8NPwW1cQR0dBRfkUTAoIIsoaEBAQEAQWSRJn98f1Z3uhCzdXZ1U\nd/r9uq6+uk511d13V8Lpm8qpU8ZaKwAAAADRSfE6AQAAACCRUVADAAAALlBQAwAAAC5QUAMAAAAu\nUFADAAAALlBQAwAAAC5QUAMAAAAuUFADAAAALlBQAwAAAC6keZ1ApLKysmx2drbXaQBAVJYsWbLd\nWnuU13nUFPpsAIks3D474Qrq7Oxs5efne50GAETFGLPe6xxqEn02gEQWbp/NkA8AAADABQpqAAAA\nwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADA\nBQpqAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAF\nCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUK\nagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAADABQpq\nAAAAwAUKagAAAMAFCmoAAADABQpqAAAAwAUKagAAAMAFCmoAAFDrWXtoO7DOWsnnO7RdWazQeKH7\nB9aXbZd9/0hzrer9K3qfsttV9hxpfrGIE7pv6HGPNmZ5+5R3DMq2qzrmVUmLbPPwGWOeltRX0jZr\n7cnlvG4kTZN0gaR9koZba7+ornwAAEBymjRJ2rVLeughyRinWOrSxXmtVy/pqaeko46SunWTGjWS\nfv5Z+ve/pbZtpQ8/PDTWO+9InTtLU6c66846S1q5UsrMlAYMcJ7nzpXq1HHer1cvafduqWFDZ/9I\ncx03Lrhvee8/dqy0aJFUr57UoUP5+0pO3MxMJ5cHH5RuvDHYDie30PxiESfwORo2dGJ++qn0ww/S\niBFOrEhjVnX8Aseg7GtLl1Z+3MJRbQW1pJmSZkh6poLX/yjpeP+js6RH/c8AAAAxYa1TRE2b5rQf\neihYgErSvn3Spk3OY+lS6dRTpeXLndd+9zvnjGlKSjDWzz87+wb2t1ZavNhZ3rtXmj5dysqStm8P\n5vDbb07svDxne2PCz3XcOKedl+fkUvb9Jec9JacoLG/fMWOC23Xo4OTy0UfBQjKc3Mrm5+xn9fGH\nVsuX+fT7U336arlPo0da2V99MtYXPE3v8wUf1soW++T7wadNi3x6bZHVye19+nq1T0ZWh8mndx6W\nfv7Zqn07afXX0vDLrezX/twqOeVurVRvndV7s6QpO6S/3Go1ZYr0/mxp6GBnm/nPWd37k3TrrdJ9\n91p98JzUrq304TSr+3+Sbr5Zuv8+q4+elwZfVvGxKMvYaM6lhxvcmGxJb1ZwhvpxSR9aa5/3t9dI\nyrHW/lhZzE6dOtn8/PxqyBYAqp8xZom1tpPXedQU+mzEg8AZx0CxKZUuMsvToYO0ZEmwmA6NNXbs\nofuddpq0bFnFOeTlBc+ARppr6L7B97c6TL8qU7t1hH7RNQN3K2/4bj0z4xct+M8e1dc+ZWi/cjvv\nV+/u+6X9+/XpBwe0dsV+Zch51NMBpatQrZoVKrtFkUxhoVRYKBUVVfhsCwvlO1ikVFtc+QepJYwU\nVp/tZUH9pqR7rLWf+NvzJP3FWntIz2uMGSFphCS1atWq4/r166stZwCoTslQUNNnIx5ZW7o4DozV\nLVswBxQXV/xa2ViB7VNTK35/n6/qYlo+n/Tjj7LfF+iSbpvURNvUVFv1t2u2yWzbKm3bJm3bJrt9\nu+zuX5Si6qvh4Ai3oK7OIR8xY619QtITknO2w+N0AACVoM9GvAmc9Q01dmzl+3TsWPkZ6rJOP73y\neOPG+c8yW5+0fr20YoUz8HrdOqmgwHls2CAdPCgj6aXQnZ8sHcv4H/HAJyOfUuRTilJSjVLTU2RS\nUpwDZ4zzXE7bGqPdv6Tol73OvtYfx8rI+j+dlVHDhs74dhP6v5Hylk1gH2nLVqOdO1US48gjneE7\nMkY//iht3xHc/8gso6OPljZvln7aHlyfdZRR8+ZyxsOEwcuCepOkliHtFv51AAAAMRE6hCIwdCJ0\nyMYpp0hffRXcPjCGeunSQ4vqssM9xoxx1j38cOnhHoEx1Kkq0klaqYEtPlOzaYv1w/PL1fLXVTL7\n9sXks/2WVl8H6mZq895M/aIj5Ds8Uxv3ZKrViYfrzJz6+uCzDC38op5+3yVDhWkZendBhpq0ytDX\nGzLU7JgMrfq+nlofl67V69J18SXpGndLmkyddCktTUoPefYv27R03To+TVP/maaTT0vT0mWmZAx2\nyVjsUVUPbSl7HAP7lj1+JTH/HPlwmcDPumQc+RBnm+nTD32tQwtp6fZD1+cNlrQ0vP+6eFlQz5E0\nyhjzgpyLEXdXNX4aAAAgEsY4szWEjkOeOjV4UV+vXtLOnaVn+eje3ZnlIzOz9BlqY5zXO3cuPcvG\nokXOyebGRxRpbNd8nfbT+zoif55O3rdYGb590kZ/gG1V57u3fpZ2HJatlue0lGnaVPaoJnp5gfM8\naHRTPfxiE839NEsnnZ2pB6enqY6k28vM8jHA/zlzrTRnnJTvn62i3umSyZSOLjM7x2G7pV8aSqZj\nFcdSUoMsaWSes1/3nNJxund3jnVVhW/Z49iwofSHP5Q/y0e4MQNxy/6sH3rIeS0wY0d5rwUuyqxo\nn3BU2xhqY8zzknIkZUnaKmmipHRJstY+5p82b4ak3nKmzbuivPHTZXGBC4BElgxjqEPRZyNelJ3B\nIlD+BC70C7we2q5sDHVgX+3eLTtnrvTaa9L8eTK7d1edTFaWdPLJ0kknSe3aScccI2VnS61bS4cd\nVm6uFeVe3mepaN/AckXP4YplnNC8A+tSUqKPGZpfee2KXqtofbh9drWdobbWVjrZiHUq+ZHV9f4A\nAAABZYuyssNwK2sfEmvfXqeAfukl6d13ZQ4erHjjFi2Cp2I7dnQK6SZNos41nM9S1WsVPYcr1nFi\nGbO8fcI5PlUd46okxEWJAAAAnlu2THr8cWnWLGnPnvK3ad5cOu88qWdPKSfHaaPWo6AGAACoiLXO\nLf2mTHHuhlKe3/9eGjhQ6t9fat8+utOqSGgU1AAAAGVZK736qnTnncFbJ4Y64QRp+HCnkG7TpsbT\nQ3yhoAYAAAi1YIFzb+rPPiu9Pi3NKaCvvdaZloIz0fCjoAYAAJCcu3uMHevMmReqfn1nLrdx46RW\nrbzJDXGNghoAACQ3n0964gnpL3+RfvkluL5OHWnUKOm225yp7oAKUFADAIDk9cMP0tCh0scfl14/\neLA0ebIzPzRQBQpqAACQnN54Q7rySudWiQHHH+9MjZeb611eSDgV3AMIAACglioqcu41feGFwWI6\nJUUaP96Z0YNiGhHiDDUAAEgeu3ZJl1wi/fe/wXUtW0qzZ0vdunmXFxIaBTUAAEgO69ZJfftKX38d\nXNe/v/T001Ljxt7lhYTHkA8AAFD7LVsmdelSupi+/Xbn5i0U03CJM9QAAKB2+/xzqVcv6eefnXbd\nus5Z6cGDvc0LtQYFNQAAqL0WLpQuuCA4v3RmpvTWW9I553ibF2oVCmoAAFA7ff651Lu39OuvTrtx\nY+m996TTT/c2L9Q6FNQAAKD2+eYb58x0oJhu0kR6/33plFO8zQu1EgU1AACoXTZvls4/X9q+3Wk3\nbix98IF04one5oVai1k+AABA7bFnjzPMY/16p12/vjNmmmIa1YiCGgAA1A4+n3T55dJXXznttDTp\n5Zels87yNi/UehTUAACgdpg8WXr99WD7ySelP/7Ru3yQNCioAQBA4ps717lRS8DYsdLw4Z6lg+RC\nQQ0AABLbunXS0KHBdm6udN993uWDpENBDQAAEldhoXPHw8CNW1q3ll580Rk/DdQQCmoAAJC47rxT\nWrzYWQ5chHjUUd7mhKRDQQ0AABLTggXS3XcH2//v/0mdOnmXD5IWBTUAAEg8u3c746Z9Pqedmyvd\ncou3OSFpUVADAIDEc+ut0oYNznKjRtIzz0gplDXwBr95AAAgsXz8sfTEE8H2Y49JLVp4lw+SHgU1\nAABIHAcOSNdcE2z37y8NHOhdPoAoqAEAQCKZPFn65htn+fDDpX/+UzLG25yQ9CioAQBAYli5Urrn\nnmB7yhSpeXPv8gH8KKgBAED8s1YaM0YqKnLa55wjXXuttzkBfhTUAAAg/r3xhjR/vrOcmupciMis\nHogT/CYCAID4duCAdNNNwfb110snn+xdPkAZFNQAACC+TZ0qffeds9y4sXTHHd7mA5RBQQ0AAOLX\njz86txQPuPNOp6gG4ggFNQAAiF+TJkl79zrLJ53EhYiISxTUAAAgPn37rfTUU8H2gw9KaWne5QNU\ngIIaAADEp9tvl4qLneXcXOm887zNB6gABTUAAIg/S5dKL7wQbN99N3dERNyioAYAAPFnwoTg8oUX\nSmed5V0uQBUoqAEAQHz55BPp7bedZWNKz/IBxCEKagAAEF8mTQouDxvmzO4BxDEKagAAED8+/VSa\nN89ZTk11LkwE4hwFNQAAiB933RVcHjJEOu4473IBwkRBDQAA4kN+vvSf/zjLxki33eZtPkCYKKgB\nAEB8CL34cNAgqV0773IBIkBBDQAAvLd8ufTGG8F26LR5QJyjoAYAAN67557g8kUXSaec4l0uQIQo\nqAEAgLfWr5deeinYHj/eu1yAKFBQAwAAb02dKhUXO8u5uVKnTt7mA0SIghoAAHjn55+lJ58Mtm+5\nxbtcgChRUAMAAO88/ri0d6+zfNJJUu/e3uYDRIGCGgAAeOO336Rp04Ltm2925p8GEgwFNQAA8MZz\nz0lbtjjLzZpJl13mbT5AlCioAQBAzbPWuRgxIC9PqlvXu3wAFyioAQBAzVuwwLmZiyTVry+NGOFt\nPoALFNQAAKDmTZ8eXB42TGrUyLtcAJcoqAEAQM3asEF6/fVge9Qo73IBYoCCGgAA1KxHHw3eyOXc\nc6WTT/Y2H8AlCmoAAFBz9u8vfSOX0aO9ywWIEQpqAABQc55/Xtqxw1lu3Vrq18/bfIAYoKAGAAA1\nw1rp4YeD7ZEjpdRU7/IBYoSCGgAA1IxPPpGWLnWWMzKkq67yNh8gRiioAQBAzZgxI7g8dKjUuLF3\nuQAxREENAACq37Zt0muvBdtMlYdahIIaAABUv5kzpcJCZ7lLF+nUUz1NB4glCmoAAFC9fL7SU+Vd\ne613uQDVgIIaAABUrw8+kNaudZYzM6WBA73NB4gxCmoAAFC9nngiuHz55VL9+t7lAlQDCmoAAFB9\nyl6MOGKEd7kA1YSCGgAAVJ/QixHPPls6+WRP0wGqAwU1AACoHj5f6eEenJ1GLUVBDQAAqscHH0jr\n1jnLXIyIWoyCGgAAVA8uRkSSoKAGAACxx8WISCIU1AAAIPa4GBFJhIIaAADEFhcjIslQUAMAgNgK\nvRixYUNp0CBv8wGqGQU1AACIrccfDy4PGyZlZHiXC1ADKKgBAEDsbN3KxYhIOhTUAAAgdmbOlIqK\nnGUuRkSSqNaC2hjT2xizxhiz1hjz13JezzHG7DbGLPU/bq/OfAAAQDXy+aQnnwy2r73Wu1yAGpRW\nXYGNMamS/inpPEkbJX1ujJljrV1VZtMF1tq+1ZUHAACoIWUvRuTOiEgSVZ6hNsaMNsY0iiL2mZLW\nWmu/s9YelPSCpP5RxAEAxAkX3wlIBlyMiCQVzpCPpnLOLr/kH8JhwozdXNIPIe2N/nVlnW2MWW6M\n+Y8x5qTyAhljRhhj8o0x+T/99FOYbw8AqAZVfifQZycpLkZEEquyoLbW/k3S8ZKekjRc0rfGmLuN\nMcfF4P2/kNTKWnuqpIclvV5BDk9YaztZazsdddRRMXhbAEA0wvlOoM9OUlyMiCQW1kWJ1loraYv/\nUSSpkaSXjTH3VrLbJkktQ9ot/OtC4/5irf3Vv/y2pHRjTFb46QMAalqU3wmozbgYEUkunDHUecaY\nJZLulbRQ0inW2usldZR0cSW7fi7peGPMMcaYOpIulTSnTOzfBf5caIw505/Pjqg+CQCg2rn4TkBt\nNn8+FyMiqYUzy0djSX+y1q4PXWmt9RljKpydw1pbZIwZJeldSamSnrbWrjTGXOd//TFJAyRdb4wp\nkrRf0qX+Mx8AgPgU1XcCarknngguX345FyMi6ZhEq187depk8/PzvU4DAKJijFlire3kdR41hT47\nCWzdKrVoERw/vWKFdFK5cwwACSfcPps7JQIAgOiFXox4zjkU00hKFNQAACA6ZS9GZKo8JCkKagAA\nEJ1587gYERAFNQAAiNZjjwWXhw/nYkQkLQpqAAAQuc2bpTfeCLaZexpJjIIaAABE7qmnpOJiZzkn\nR2rXztN0AC9RUAMAgMgUFZWee/q667zLBYgDFNQAACAy//mPtHGjs3zUUdJFF3mbD+AxCmoAABCZ\n0IsRr7xSqlPHu1yAOEBBDQAAwldQ4JyhDmDuaYCCGgAARODJJyVrneVevaRjj/U2HyAOUFADAIDw\nHDzozO4RwMWIgCQKagAAEK433pC2bnWWjz5a6tvX23yAOEFBDQAAwhN6MeI110hpad7lAsQRCmoA\nAFC1Vauk+fOd5ZQU6eqrvc0HiCMU1AAAoGoPPxxc7t9fatHCu1yAOENBDQAAKvfzz9IzzwTbeXne\n5QLEIQpqAABQuaeflvbtc5ZPOUX6wx+8zQeIMxTUAACgYsXF0owZwfaYMZIx3uUDxCEKagAAULG3\n3nLujihJjRtLgwd7mg4QjyioAQBAxaZPDy5fc41Uv753uQBxioIaAACUb+VKad48ZzklRbrhBm/z\nAeIUBTUAAChf6FR5F14otWrlXS5AHKOgBgAAh9qxQ3r22WB7zBjvcgHiHAU1AAA41COPBKfKO+00\npsoDKkFBDQAAStu/v/Rwj5tvZqo8oBIU1AAAoLRnnpF++slZbtlSuuQSb/MB4hwFNQAACCoulh54\nINgeO1ZKT/cuHyABUFADAICgOXOkb791ljMznbmnAVSKghoAAATdd19w+brrpMMP9y4XIEFQUAMA\nAMdHH0mffuos16nDVHlAmCioAQCA4847g8uXXy4dfbR3uQAJhIIaAABICxdK8+c7y6mp0m23eZsP\nkEAoqAEAgHTXXcHloUOlY4/1LhcgwVBQAwCQ7BYvlt5911lOSZHGj/c2HyDBUFADAJDsQs9OX3qp\ndMIJ3uUCJCAKagAAktnnn0tvvuksGyNNmOBtPkACoqAGACCZhV58OHCgdOKJ3uUCJCgKagAAktX7\n70vz5jnLqamlp80DEDYKagAAkpG10l//GmxfeaXUtq13+QAJjIIaAIBk9Mor0pIlznK9etLtt3ub\nD5DAKKgBAEg2RUWlLz4cPVpq0cK7fIAER0ENAECyefRR6ZtvnOXMzNJDPwBEjIIaAIBksn176eEd\nt90mNW7sXT5ALUBBDQBAMvn736Vdu5zlNm2ksWO9zQeoBSioAQBIFkuXSk88EWw/+KBUt653+QC1\nBAU1AADJwFppzBjJ53PavXpJfft6mxNQS1BQAwCQDJ59VlqwwFlOS5OmTnVuNQ7ANQpqAABqu23b\npHHjgu0xY6R27bzLB6hlKKgBAKjtxoyRdu50lrOzucU4EGMU1AAA1GZz50ovvhhsP/641KCBd/kA\ntRAFNQAAtdXu3dL11wfbf/6zdP753uUD1FIU1AAA1EbWStddJ23a5LSbNJEeeMDbnIBaioIaAIDa\n6NlnpRdeCLYfeUQ68kjv8gFqMQpqAABqm7VrpZEjg+2rrpIuvti7fIBajoIaAIDapLBQGjxY+vVX\np33CCc6c0wCqDQU1AAC1ydix0uefO8vp6dLzz0uHHeZtTkAtR0ENAEBt8eSTzljpgH/8Qzr9dO/y\nAZIEBTUAALXBwoWlx01fcol0443e5QMkEQpqAAAS3YYNzkWHhYVOu0MH6amnJGO8zQtIEhTUAAAk\nsp9+cm7WsnWr087Kkl5/nbshAjWIghoAgES1Z490wQXSmjVOu04d6eWXpdatvc0LSDIU1AAAJKLf\nfpMuukjKz3faxkizZ0vdu3ubF5CEKKgBAEg0+/ZJ//M/0rx5wXWPPSYNGOBdTkASS/M6AQAAEIFf\nfpH69pUWLAiuu+suacQI73ICkhwFNQAAiWL7dmfMdODGLZJ0553ShAne5QSAghoAgISwcqUzzOO7\n74LrHniAuaaBOMAYagAA4t1bb0ldugSLaWOcMdMU00BcoKAGACBeFRVJd9zhnJnes8dZ16CB9Npr\n0rXXepsbgBIM+QAAIB5t2CANGSJ98klwXatW0ty50qmnepcXgENwhhoAgHhirTRzpnTaaaWL6e7d\nnYsRKaaBuENBDQBAvPj6ayk3V7riCmnXLmddaqozLd68eVKTJt7mB6BcDPkAAMBrO3ZId98tzZgh\nHTwYXH/MMc7dD7t08S43AFWioAYAwCt79kjTp0v33uvcsCUgNVW66Sbp9tudixABxDUKagAAatrW\nrU4h/cgjwaEdAV26OFPiMVYaSBgU1AAA1ARrnduFP/mk9O9/S7/9Vvr1tm2dYR8XXeTMMw0gYVBQ\nAwBQnVatkl5+2RkL/c03h77epo30l79Iw4dLaXwtA4mIf7kAAMRSUZGUn+/c3fCVV6TVq8vf7swz\npVtvlS680BkzDSBhUVADAOBGUZG0YoUzZ/T770sffFD6AsNQhx/u3Kzlmmuk00+v2TwBVBsKagAA\nwrVvnzNX9MqV0rJl0qJF0pIl0v79Fe9Tv77Up480YIDzzKwdQK1DQQ0AQKhdu6T166WCguDz2rVO\nEf39987FhVVp0ULq0UPq10/64x+dohpArVWtBbUxprekaZJSJf2vtfaeMq8b/+sXSNonabi19ovq\nzAmIFWtLX4gf+I41RvL5gq+Ftiu6cD90X6n0/uUx5tD3jzTXinIv265o36qew1UdcQLKO/aRxi1v\nn9B2VcenohioIYWFzvCLwGP3bmnnTmnbNmfqum3bSj82bXK2iVSrVs6Y6JwcqWdP6YQT+IEDSaTa\nCmpjTKqkf0o6T9JGSZ8bY+ZYa1eFbPZHScf7H50lPep/Tnizv5qtCfMmaMPuDWqV2UqTe0zWkFOG\nhLWdJE2YN0Hrd69XqklVsS3WYXUO096De2VlZWTUoE4D7T24V60yW+mC4y/QSytf0o79O8rNpXVm\n65K4ef/Jq3C7gCMzjtS0P04rN184Jk1yTmI99FCweArcyKxuXWnNGumSS5x2ZqY0Z460fbt09dXO\nvmVjvfOO1LmzNHWq054zx/luN0b605+kxYud9+jc2Vnu1cv5zm/Y8NB44eQ6blxw37LvL0ljxzp/\nya5XT+rQ4dB9ly511mdmOnk8+KB0443Bdjh5heYWyzjvvuvUNcY4ef7yi1SnjnOScOLE0p89HJUd\nP6n81wLHp6J9ksr+/c7QiKIid48DB5zH/v3B59Dl0HW//hosnisbihEFX4rR90ematmRRdrYMlOn\n9r1SO045Tjctu0/rd7+s1B2vqfiFYh2ZcaQOFB3Q3sK9kqQG6Q1UL62edu7fqcYZjUu9FsrIyMqq\ndWbrKvv2gAbpDfR4v8fpswEPVecZ6jMlrbXWfidJxpgXJPWXFFpQ95f0jLXWSvrMGNPQGNPMWvtj\nNeZV7WZ/NVsj5o7QvsJ9kqT1u9drxNwRklSqwytvuytev0LGGB0sdm49W2yLJUm/Hvy1ZD8rW9Je\nv3u9Hs1/tNJ8AnF91lcSrzI79u/QlW9ceUi+cFjrFFHTpjnthx4KFqCSdOSRzl2Ep08v3Zakn38+\n9Ozmzz87+y5aFJymdunS4Ps9/HBwefFi5/m335xt8vIqP/NZXq7jxjntvDzn7G3o+wcEcu/Qofx9\nA+s7dHDy+OijYBEZTl5lc4t1nMBnycpy/iMjOf9hGDvW+WzhxK3q+I0ZU/pYlXd8yjvmSWfVKueA\nJJD9adL6RkZHnHCKjj65i5SdLWVn66307zV05V3aZQNF+m6l754hs/DQPrtsEby3cG9JAV1ZgWzl\n/IklnL49NPafX/uzJPpswCvGhjMWLJrAxgyQ1Ntae7W/PUxSZ2vtqJBt3pR0j7X2E397nqS/WGvz\nK4rbqVMnm59f4ctxIXtqttbvXn/I+taZrVUwtqDK7eJF2XwRFDjjGCiapEMLrLLGjHHOAJct4qwN\nFnqRyMsLngGNNNfQfSt6/zFjnG1uvPHQfQNnkkPXR5pXRbnFOo6buBXFDsSQyn+tvOMT2CclxSyx\n1nYK790TXydjKu7Qa0JKinTEEc4jM9N5btRIatpUatJEatJEY5ZM1iqzXdsaSFsOk35qIMnQZ9cW\nu3bt0vbt21VYWOh1Kohz6enpysrKUsOQPycaE16fnRAFtTFmhKQRktSqVauO69fHb4cmSSl3pJSc\nZQhlZOSb6Ktyu3hRNl+UZq3zXR3g8x+q0HWhKhsXXTZWOKoaZ11Z/LL7lvf+gW0q2reinCPJq6L3\njmUct3HLix0aI9zjExzLXfsL6tA+u0N6escv27Vzblji5lG3rpSR4YxDysgovVz2uUGDYPF82GFV\n/sDps2u31atXKzs7W/Xq1ZNhXDsqYK3VgQMHVFBQoPbt25esD7fPrs4hH5sktQxpt/Cvi3QbWWuf\nkPSE5Jyhjm2asdcqs1W5ZzFaZbYKa7t4UTZfBAXOWoYaO7byfcaOrfwMdaTGjYvsDHVF+1b0/mPH\nBs9Ql903cAbWTV4V5RbrOG7iVhQ7ECOwXPa18o5P6D613SF9dpz/VZE+u/bLyMjwOgXEOWOMq9+T\nCM+JReRzSccbY44xxtSRdKmkOWW2mSPpcuM4S9LuRB8/LUmTe0xW/fTSUyTVT69fcmFgZdulp6Sr\nTmqdmOfbMAZuAAAYXUlEQVSUnpKuVBP+nbjqpNY5JF84QocABMYhjxnjDJmYPt0ZMx0q0J4+3SlS\nQ/8oVHa4xejR4Q03DYzRHTeu8hm8yss1Ly+4r89X+v3HjCk9dKVjx/L3DawP5Br6HE5eZXOLdZyA\nrKzg8ujRzmcLN27Z2GWPwdixzqOy41PeMUf8icc+O1KpJpU+G/BQtZ2httYWGWNGSXpXzrR5T1tr\nVxpjrvO//pikt+VMmbdWzrR5V1RXPjUpcFFIVbN8VLRdYB2zfMQnY5zZGkLH4k6dGrwQrrJZPho1\nKn1m1BhnXefOpWf5kKqe5aN7dyePqqbXK5tr4Cxpw4bOkISy7x8QmOWjvH0DFwxmZjp5hM7OEU5e\nZXOLZZy6dcuf5aNxY2eWj8D24Zyhrur4SZUfn4r2QXypjj6bWT6SU0FBgW6++Wa9/PLLEe+7ZcsW\nPfroo7rjjjsOeW3p0qU6ePCgzjzzzEq3CzVz5kxNnjxZzZs3V3FxsZ599lllZ2dHnJdbM2fOVNu2\nbdUlMBVWhI455hhdfvnlJZ+3T58+ysjICOsY33zzzerbt69ycnLKfb1Tp06K1XV51TaGurokwkWJ\nSA7MQ8081JW9VtH6ZBhDHYo+G15bvXp1qTGx1clNQV2ZmTNn6tdff9WoUaOq3ricfWbNmqVly5bp\nvvvui+r9fT6fUiK90CdGOnXqpGbNmmnu3LnatWuX+vXrp6ZNm1ZbQV329yXcPtubowPUAmWLstCi\nLSWl/HZlsUJfD92/vEd57x9prpW9f2XvU3Z9Rc+R5hbLOJUd+2iuSars+FV1fNy8L4AYqqxTjeYR\npq+++kpdu3bVOeeco3/84x+SpA0bNuicc87RBRdcoEsvvVQzZ85UQUGBBgwYIEm64oor1K1bN+Xk\n5KigoECPPvqopk2bpvPPP7/UdosXL1bXrl2Vk5NTabG8a9cuBU6gfvfdd+rVq5dycnI0zj8Obdeu\nXTr//PPVu3dvDR8+XJP8fyo98cQTdcUVV+jGG2/U9u3bdeGFF+rcc8/VkCFDVFxcrM8++0ydO3dW\nbm6uJk2apMLCQvXr1085OTnKycnRgQMHNGnSJL355puSpJtuukldu3bVueeeq4KCAklS+/bt9ec/\n/1kdOnTQ7Nmzy82/Xbt2WrNmjebMmaN+/fqVrP/ggw901lln6ayzztIzzzwjSVq2bJnOOOMM9e3b\nV8uXL5fkXHA4evRo5ebmqmfPntq4cWPYP79wcetxAACAajJ+/Hg9+eSTateunXr16qXLLrtM999/\nvyZOnKjzzz9fgwcPLrV9YWGh1qxZo4ULF8oYI5/Pp+uvv77kbHOgEJWkcePG6YUXXlDLli3l8x06\nw8u0adP0r3/9S9u2bdP//d//SZL++te/6pFHHtFxxx2n66+/Xvn5+frwww81YMAAjRgxQuPHjy/Z\nf+PGjVq4cKEaNWqkm2++WWPGjNG5556rKVOm6LXXXtOyZcs0ceJEXXDBBfL5fPr+++9Vv359zZ07\nV9Zahc6qkp+fr02bNumTTz7RggULdOedd+rpp5/Wli1b9LD/hgvnnXeehgw5dOjSxRdfrFdeeUXL\nli3T3//+dy3235Thtttu05tvvqnMzEx16dJFAwcO1N/+9jfNmjVLxx9/vLp27SpJeuutt9SoUSN9\n8MEHWrRoke655x7NmDEjyp9o+ThDDQAAUE22bNmi9u3byxij008/XevWrdPatWvVsWNHSSp5DkhP\nT9fIkSM1bNgw5eXlad++fRXGPnjwoFq2dCZLK29IRl5enpYsWaI+ffpo5cqVkqSvv/5aV111lXJy\ncrR48WJt3LixwnzatGmjRo0aSZJWrVqliRMnKicnR6+++qq2bNmikSNH6u2339aQIUP0zjvv6Ljj\njtPZZ5+toUOH6m9/+5uKi4M3k1u7dq3OOOMMSdIZZ5yhb7/9VpJ07LHH6ogjjtARRxxRavtQnTt3\n1ocffihjjA477LCS9cXFxcrKylJ6erratGmjzZs3a8uWLWrbtq1SUlJKPsuqVav02muvKScnR7fe\neqt27dpV4TGNFmeoAQBA7efRNWNNmzbV6tWr1a5dO33xxRe67rrr1KZNG3355Zfq2bNnyXNAcXGx\nBg0apCFDhujuu+/Wq6++qvT09HKLzbp162rTpk1q3rx5peOcJ06cqAEDBqh3795q27at7r//frVu\n3VrWWhUXF2vt2rX68ssv1bFjR3355ZdKS3PKw9B47dq100UXXaRu3bpJcs6kFxUVacaMGTp48KA6\nduyoHj16aPTo0UpJSdGIESO0cOHCkv3btGmj119/XZL0+eef6/jjj5ckhTM3uDFGf/rTn3TssceW\nWp+SkqLt27crMzNT3377rY4++mg1bdpU3377rdq0aaMvvvhCF198sdq1a6dBgwbp73//e0nusUZB\nDQAAECMLFiwoKZB79uypyZMn6+qrr5a1Vn369FF2drZuvfVWXXbZZXrggQeUkZGh9PT0kv337Nmj\n/v37yxgjY4xmz56tAwcO6PLLL9eiRYt09913l2z74IMPatCgQUpPT1efPn10yy23lJtTs2bN1Lx5\nc3322WeaMmWKrrvuOh04cECpqal6+umndfXVV2vgwIH697//raysLJ144omHxJgwYYKuueYaTZw4\nUZJ077336pNPPtGrr76qoqIiDR8+XOvXr9dVV12l1NRUNWjQQKeffrrmz58vKXhxYdeuXZWWlqZ/\n/etfER3X6667TpJKDXm5++671adPHxljNGrUKGVkZOiuu+7S4MGD1aRJk5Kz6/369dP8+fOVm5sr\nY4yGDBmiq666KqL3rwqzfABADWKWD6Bm1eQsH+EqKioqOQs8ePBg5eXlqXPnzp7l4/P5ZK1Vamqq\nxo8fr9NOO02XBOZ+TTLRzvLBGWoAAIAatH79eg0fPlxFRUU67bTTPC2mJWn//v3q3bu3rLVq0qRJ\nySwfCB8FNYAat3XrVu3cudPrNKpd48aN1bRpU6/TABBnjjvuOC1YsMDrNEo0aNAgrvJJRBTUAGrc\nzp07dcIJJyg1NdXrVKpNcXGxvvnmGwpqAEgCTJsHwBO1uZiWav/nAwAEUVADAAAALlBQA0gqw4cP\n14oVK2IS6+uvv9Yf/vAHnX322Zo3b15MYgKoXehzkgMFNYC4NPur2cqemq2UO1KUPTVbs7+a7XVK\nhxg/fryeeuopvfPOO7r99tu9TgeAC/Q5cIOLEgHEndlfzdaIuSO0r9C55e763es1Yu4ISdKQU4aE\nHcdaq1GjRmn58uVKS0vTSy+9VPLa1q1bdemll6qoqEhNmzbViy++qIKCAg0bNkx169bVCSecoMcf\nf1xXXHGF1q5dq9TUVM2cOVPZ2dklMTZv3lxyt6/GjRtr+/btysrKisERAFCT6HPgFgU1gLgzYd6E\nki+2gH2F+zRh3oSIvtzmzp2rlJSUkumgfD5fyWuNGjXSe++9p7S0NOXl5Wn+/PnasGGDhg4dqhtu\nuEE+n0+FhYVas2aNFi5cKGNMqf3LxsvMzNTOnTv5cgMSEH0O3GLIB4C4s2H3hojWV2T16tXq3r17\nSTslJdjl7dixQwMGDFD37t319ttva/PmzRo0aJC+//57DRkyRLNmzVJ6erpGjhypYcOGKS8vT/v2\nlf7CDY23e/duNW7cOKL8AMQH+hy4RUENIO60ymwV0fqKtG/fXh9//HFJO/TsznPPPae+ffvqo48+\nKrlDWFpamu677z7Nnj1bU6ZMUXFxsQYNGqRZs2apadOmevXVV0vFb9asmdatW6c9e/ZwpghIYPQ5\ncIshHwDizuQek0uNZ5Sk+un1NbnH5Iji9OvXT++88466du2q9PT0UuMZe/TooWHDhmnu3LnKyMiQ\nJM2ZM0czZsyQJPXq1Ut79uxR//79ZYyRMUazZ5e+SGny5MkaPny4iouLdccdd0T7cQF4jD4Hbhlr\nrdc5RKRTp042Pz/f6zQAuLB69Wq1b9++0m1mfzVbE+ZN0IbdG9Qqs5Um95gc0VjGeFDe5zTGLLHW\ndvIopRpHnw2vhdPfSLWjz4F7ZX9fwu2zOUMNIC4NOWUIX2YAagx9DtxgDDUAAADgAgU1AAAA4AIF\nNQAAAOACBTUAAADgAgU1gLhUdgKiWE1INHz4cK1YsSImsaZPn67s7GwNGDAgJvEAeIc+B25QUAOI\nO5MmSePGBb/QrHXakyZ5mdWhLr30Us2bN8/rNAC4RJ8DtyioAcQVa6Vdu6Rp04JfcOPGOe1duyI7\na2St1ciRI9WtWzfl5ubqp59+Knlt69atys3NVbdu3TRgwAAVFxdr3bp1Ovvss5Wbm6trr71WknTF\nFVeoW7duysnJUUFBQan4TZo0UWpqaiw+NgCP0OcgFpiHGkBcMUZ66CFnedo05yFJeXnOemPCjzV3\n7lylpKRowYIFkkrfBrhRo0Z67733lJaWpry8PM2fP18bNmzQ0KFDdcMNN8jn86mwsFBr1qzRwoUL\nZYwptT+A2oE+B7HAGWoAcSf0Cy4g0i82ybnjVffu3UvaKSnBLm/Hjh0aMGCAunfvrrffflubN2/W\noEGD9P3332vIkCGaNWuW0tPTNXLkSA0bNkx5eXnat29feW8DIMHR58AtCmoAcSfwJ9dQoeMbw9W+\nfXt9/PHHJe3Qsz3PPfec+vbtq48++ki9e/eWtVZpaWm67777NHv2bE2ZMkXFxcUaNGiQZs2apaZN\nm+rVV19187EAxCn6HLhFQQ0groSOX8zLk3w+5zl0fGO4+vXrp6KiInXt2lW5ubnasWNHyWs9evTQ\ntGnT1L9//5JxjnPmzFG3bt3UrVs39erVS3v27FHPnj2Vk5Oj9957Tz179iwV/4UXXtDQoUO1YMEC\n9ezZkz/PAgmIPgexYGys5oWpIZ06dbL5+flepwHAhdWrV6t9+/YVvj5pknMxUOBProEvvIYN4++q\n+8qU9zmNMUustZ08SqnG0WfDa1X1N1Lt6XPgXtnfl3D7bC5KBBB3Jk1yvtAC4xcD4xsjHc8IAOGg\nz4FbDPkAEJfKfpHxxQagOtHnwA0KagCeSLThZpGq7Z8PSCT8e0Q43PyeUFADqHHp6ek6cOCA12lU\nqwMHDig9Pd3rNICkV69ePe3YsYOiGpWy1mrHjh2qV69eVPszhhpAjcvKyjrkDmC1UbNmzbxOAUh6\nLVq00MaNG0vdtRAoT7169dSiRYuo9qWgBlDjGjZsqIYNG3qdBoAkkJ6ermOOOcbrNFDLMeQDAAAA\ncIGCGgAAAHAh4W7sYozZI2lNDEJlSdpOnKSKE0+5ECd547S21h4VgzgJgT6bOLUoTjzlQpyaixNW\nn52IY6jXxOIuY8aYfOIkV5x4yoU4yRsnCdFnE6dWxImnXIhTc3HCxZAPAAAAwAUKagAAAMCFRCyo\nnyAOcTyMQRziIDLxdvyJQxwvYxAn8eKEJeEuSgQAAADiSSKeoQYAAADiBgU1AAAA4EJCFtTGmBeN\nMUv9jwJjzFIXsUYbY742xqw0xtwbZYxJxphNITldEG0+/ng3GWOsMSYryv3vMsYs9+fyX2PM0VHE\nuM9/XJYbY14zxkR1n2hjzED/sfUZYyKevsYY09sYs8YYs9YY89coc3jaGLPNGLMimv1D4rQ0xnxg\njFnl/0x5UcapZ4xZbIxZ5o9zh4ucUo0xXxpj3ow2hj9OgTHmK//vTL6LOA2NMS/7f3dWG2O6RLh/\n25B/R0uNMb8YY8ZGmcs4//FdYYx53hhTL8o4ef4YK6PNJdnRZ1e5v+s+2x/H8347Fn22P47rfjse\n+2x/PNf9drz02f4YMem3E77PttYm9EPSA5Juj3LfXEnvS6rrbzeJMs4kSTfH6PO0lPSupPWSsqKM\ncUTI8hhJj0UR43xJaf7lKZKmRJlLe0ltJX0oqVOE+6ZKWifpWEl1JC2TdGIUOfxB0umSVrj82TST\ndLp/+XBJ30SZj5F0mH85XdIiSWdFmdONkp6T9KbLz1YQ7e9bmTj/n6Sr/ct1JDV0EStV0hY5k+pH\num9zSd9LyvC3X5I0PIo4J0taIam+nHn735fUxu1xSuYHfXa5MVz32f59Pe23Y9Vn+2O57rfjsc/2\nx3Ddb8djnx3yOxBxv10b+uyEPEMdYIwxkgZJej7KENdLusda+5skWWu3xSo3Fx6SdKukqK8Wtdb+\nEtJsEE0sa+1/rbVF/uZnklpEmctqa220d0k7U9Jaa+131tqDkl6Q1D+KHD6WtDPKHELj/Git/cK/\nvEfSajmdQKRxrLX2V38z3f+I+GdkjGkhqY+k/4103+pgjMmU8yX4lCRZaw9aa3e5CNlD0jpr7foo\n90+TlGGMSZPTuW6OIkZ7SYustfv8/x4+kvSnKPNJevTZ5YtFn+2P43W/HZM+25+D63473vpsKb76\n7WrosyV3/XZC99kJXVBL6iZpq7X22yj3P0FSN2PMImPMR8aYM1zkMtr/Z7anjTGNoglgjOkvaZO1\ndpmLPAKxJhtjfpA0RNLtLsNdKek/bnOKQnNJP4S0NyqKzrA6GGOyJf1ezpmKaPZP9f/Ze5uk96y1\n0cSZKueL3BdNDmVYSe8bY5YYY0ZEGeMYST9J+pf/z5n/a4xp4CKnSxVl4WWt3STpfkkbJP0oabe1\n9r9RhFohp4840hhTX9IFcs5IIjr02RXHimWfLXnTb9NnVy1W/XY89tlSlP12beiz4/bW48aY9yX9\nrpyXJlhr3/AvX6YqfnCVxZHz+RtLOkvSGZJeMsYca/1/N4ggzqOS7pLzC36XnD9pXhlFPuPl/Mmu\nSlUdH2vtBEkTjDG3SRolaWKkMfzbTJBUJGl2tLlU+WESjDHmMEmvSBpb5sxS2Ky1xZI6+Mc4vmaM\nOdlaG/ZYQWNMX0nbrLVLjDE50eRQRldr7SZjTBNJ7xljvvafIYpEmpw/0Y621i4yxkyT9FdJf480\nGWNMHUn/I+m2SPf1799IzpmxYyTtkvRvY8xQa+2sSOJYa1cbY6ZI+q+kvZKWSiqOJqfajj67crHo\ns8OJ49+GfjtEPPTZ/jxi2W/HVZ8tueu3a0WfXRPjSqrjIecXYaukFi5ivCMpN6S9TtJRLvPKVhRj\nviSdIud/vgX+R5Gc/6n9zmU+raLJx7/vcEmfSqofg5/Xh4p8DHUXSe+GtG+TdFtN/lzKiZMuZ7zk\njW5jhcS8XRGO55T0DzlnfwrkjFfbJ2lWjPKZFGk+/v1+J6kgpN1N0ltR5tBf0n9dfIaBkp4KaV8u\n6ZEYHJu7Jd0Qq599Mj3os8OOG3Wf7d/fs347ln22m59NmRhx0Wf796uWfjse+mz//lH327Whz07k\nIR89JX1trd3oIsbrci5ykTHmBDkD8rdHGsQY0yykeZGcPzlExFr7lbW2ibU221qbLecf3enW2i1R\n5HN8SLO/pK+jiNFbzp+l/sdauy/S/WPkc0nHG2OO8f/P91JJczzKJTD+8ylJq621D7qIc5T/LIeM\nMRmSzlOEPyNr7W3W2hb+35VLJc231g6NMp8GxpjDA8tyzrhF8zu8RdIPxpi2/lU9JK2KJieFcSaz\nChsknWWMqe//ufWQM34yYv4zQDLGtJIzFu85F3klM/rsivNx3Wf743jdb9NnVyJW/Xac9tmSu347\n8fvsmqjaq+Mhaaak61zGqCNplpxfxC8knRtlnGclfSVpuZzOo1kMPl+Bor9i/BX/Z1ouaa6k5lHE\nWCtnLNxS/yPaq84vkvNF85ucs1PvRrj/BXKuzF4n50+R0eTwvJwxWYX+XK6KMk5XOX8iXh5yXC6I\nIs6pkr70x1mhKGc8CImXI3dXix8r52r8ZZJWRnuc/bE6SMr3f7bXJTWKIkYDSTskZbo8LnfI+dJb\n4f83WjfKOAvkfMksk9TDTU7J/KDPrnRf1322P47n/XYs+mx/HNf9drz22f6YUffb8dZn++O47rcT\nvc/m1uMAAACAC4k85AMAAADwHAU1AAAA4AIFNQAAAOACBTUAAADgAgU1AAAA4AIFNZKeMaalMeZ7\nY0xjf7uRv53tbWYAgLLosxGPKKiR9Ky1P8i5FfE9/lX3SHrCWlvgWVIAgHLRZyMeMQ81IMkYky5p\niaSnJV0jqYO1ttDbrAAA5aHPRrxJ8zoBIB5YawuNMbdIekfS+XTMABC/6LMRbxjyAQT9Uc6tbk/2\nOhEAQJXosxE3KKgBScaYDpLOk3SWpHHGmGYepwQAqAB9NuINBTWSnjHGyLnAZay1doOk+yTd721W\nAIDy0GcjHlFQA84FLRuste/5249Iam+M6e5hTgCA8tFnI+4wywcAAADgAmeoAQAAABcoqAEAAAAX\nKKgBAAAAFyioAQAAABcoqAEAAAAXKKgBAAAAFyioAQAAABf+f+Zt+Zfc/x7dAAAAAElFTkSuQmCC\n", 39 | "text/plain": [ 40 | "" 41 | ] 42 | }, 43 | "metadata": {}, 44 | "output_type": "display_data" 45 | } 46 | ], 47 | "source": [ 48 | "xmin, xmax = -7, 5\n", 49 | "n = 50\n", 50 | "np.random.seed(0)\n", 51 | "X = np.random.normal(size=n)\n", 52 | "y = (X > 0).astype(np.float)\n", 53 | "X[X >= 0] *= 4\n", 54 | "X[X >= 0] += 2\n", 55 | "X[X < 0] -= 2\n", 56 | "\n", 57 | "# plot separated\n", 58 | "f, axarr = plt.subplots(1,2, sharey=True)\n", 59 | "f.set_figheight(5)\n", 60 | "f.set_figwidth(12)\n", 61 | "\n", 62 | "'''\n", 63 | "Figure 0\n", 64 | "'''\n", 65 | "for t,marker,c in zip([0.0,1.0],\"ox\",\"gb\"):\n", 66 | " # plot each class on its own to get different colored markers\n", 67 | " axarr[0].scatter(X[y == t],\n", 68 | " np.zeros(len(X[y==t])),\n", 69 | " marker=marker,\n", 70 | " c=c)\n", 71 | " \n", 72 | "X_test = np.linspace(-7, 10, 300)\n", 73 | "\n", 74 | "axarr[0].set_ylabel('y')\n", 75 | "axarr[0].set_xlabel('X')\n", 76 | "axarr[0].set_xticks(range(-7, 10))\n", 77 | "axarr[0].set_yticks([0, 0.5, 1])\n", 78 | "axarr[0].set_ylim(-.25, 1.25)\n", 79 | "axarr[0].set_xlim(-7, 10)\n", 80 | "axarr[0].legend(('class 0', 'class 1'),\n", 81 | " loc=\"lower right\", fontsize='small')\n", 82 | "\n", 83 | "\n", 84 | "'''\n", 85 | "Figure 1\n", 86 | "'''\n", 87 | "for t,marker,c in zip([0.0,1.0],\"ox\",\"gb\"):\n", 88 | " # plot each class on its own to get different colored markers\n", 89 | " axarr[1].scatter(X[y == t],\n", 90 | " y[y==t],\n", 91 | " marker=marker,\n", 92 | " c=c)\n", 93 | " \n", 94 | "#plt.scatter(X, y, c=y,color='black', zorder=20)\n", 95 | "X_test = np.linspace(-7, 10, 300)\n", 96 | "\n", 97 | "\n", 98 | "def model(x):\n", 99 | " return 1 / (1 + np.exp(-x))\n", 100 | "\n", 101 | "loss = model(X_test)\n", 102 | "axarr[1].plot(X_test, loss, color='red', linewidth=3)\n", 103 | "\n", 104 | "\n", 105 | "axarr[1].set_ylabel('y')\n", 106 | "axarr[1].set_xlabel('X')\n", 107 | "axarr[1].set_xticks(range(-7, 10))\n", 108 | "axarr[1].set_yticks([0, 0.5, 1])\n", 109 | "axarr[1].set_ylim(-.25, 1.25)\n", 110 | "axarr[1].set_xlim(-7, 10)\n", 111 | "axarr[1].legend(('Logistic Regression Model', 'class 0', 'class 1'),\n", 112 | " loc=\"lower right\", fontsize='small')\n", 113 | "plt.show()\n", 114 | "\n", 115 | "f.savefig('images/report_images/logistic.png', bbox_inches='tight')" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "### Support vector machine\n", 123 | "This is matlab code on how the image demonstrating linear vs RBF kernel was made (svm_kernel.png)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "'''\n", 135 | "clear\n", 136 | "clc\n", 137 | "load('d2.mat');\n", 138 | "\n", 139 | "hold on\n", 140 | "gscatter(X(:,1),X(:,2),Y,'rb','x+',6);\n", 141 | "\n", 142 | "SVMstruct = svmtrain(X,Y,'boxconstraint',1,'autoscale',false,'kernel_function','RBF');\n", 143 | " \n", 144 | "% Make a grid of values to classify the entire space\n", 145 | "x1_axis = linspace(min(X(:,1)), max(X(:,1)), 1000)';\n", 146 | "x2_axis = linspace(min(X(:,2)), max(X(:,2)), 1000)';\n", 147 | "\n", 148 | "[x1_space, x2_space] = meshgrid(x1_axis, x2_axis);\n", 149 | "\n", 150 | "for i = 1:size(x1_space, 2)\n", 151 | " point_in_space = [x1_space(:, i), x2_space(:, i)];\n", 152 | " class(:, i) = svmclassify(SVMstruct, point_in_space);\n", 153 | "end\n", 154 | "\n", 155 | "% Plot the SVM boundary\n", 156 | "hold on\n", 157 | "contour(x1_space, x2_space, class, [0 0], 'k');\n", 158 | "legend('-1','1','RBF boundary');\n", 159 | "xlabel('x1');\n", 160 | "ylabel('x2');\n", 161 | "title('Data points and decision boundary');\n", 162 | "hold off;\n", 163 | "'''" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 28, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "import pickle\n", 175 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 176 | "from sklearn.ensemble import RandomForestClassifier\n", 177 | "\n", 178 | "def get2Grams(payload_obj):\n", 179 | " '''Divides a string into 2-grams\n", 180 | " \n", 181 | " Example: input - payload: \"