├── README.md └── RansomwareD.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Ransomeware Detection Using ML 2 | 3 | ### Machine Learning Algorithms used are: 4 | 5 | 1. Random Forest 6 | 2. Decision Tree 7 | 3. Logistic Regression 8 | 9 | ### Additional Libraries Used: 10 | 11 | * pefile 12 | * pickle 13 | * joblib 14 | * mlxtend 15 | * statsmodel 16 | * sklearn 17 | 18 | ### Concepts Used: 19 | 20 | * Multicollinearity 21 | * Ensemble Technique 22 | * Extra Tree Classifier 23 | -------------------------------------------------------------------------------- /RansomwareD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "id": "2bca8f3e", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Collecting pefile\n", 14 | " Using cached pefile-2021.5.24.tar.gz (66 kB)\n", 15 | "Requirement already satisfied: future in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from pefile) (0.18.2)\n", 16 | "Building wheels for collected packages: pefile\n", 17 | " Building wheel for pefile (setup.py): started\n", 18 | " Building wheel for pefile (setup.py): finished with status 'done'\n", 19 | " Created wheel for pefile: filename=pefile-2021.5.24-py3-none-any.whl size=62578 sha256=cf20a74be7fc5f7210d0a6f4e3714ed405c5d8da883caaeb0d173f1927786bf5\n", 20 | " Stored in directory: c:\\users\\vajha\\appdata\\local\\pip\\cache\\wheels\\43\\04\\fc\\d9305103f7d512f2df35b1878e1009e8217e713b767aee8f13\n", 21 | "Successfully built pefile\n", 22 | "Installing collected packages: pefile\n", 23 | "Successfully installed pefile-2021.5.24\n", 24 | "Note: you may need to restart the kernel to use updated packages.\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "pip install pefile" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "id": "817480c1", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "Collecting mlxtend\n", 43 | " Using cached mlxtend-0.18.0-py2.py3-none-any.whl (1.3 MB)\n", 44 | "Requirement already satisfied: setuptools in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (52.0.0.post20210125)\n", 45 | "Requirement already satisfied: joblib>=0.13.2 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (1.0.1)\n", 46 | "Requirement already satisfied: scikit-learn>=0.20.3 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (0.24.1)\n", 47 | "Requirement already satisfied: scipy>=1.2.1 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (1.6.2)\n", 48 | "Requirement already satisfied: pandas>=0.24.2 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (1.2.4)\n", 49 | "Requirement already satisfied: matplotlib>=3.0.0 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (3.3.4)\n", 50 | "Requirement already satisfied: numpy>=1.16.2 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from mlxtend) (1.20.1)\n", 51 | "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (2.8.1)\n", 52 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (2.4.7)\n", 53 | "Requirement already satisfied: cycler>=0.10 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (0.10.0)\n", 54 | "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (8.2.0)\n", 55 | "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (1.3.1)\n", 56 | "Requirement already satisfied: six in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from cycler>=0.10->matplotlib>=3.0.0->mlxtend) (1.15.0)\n", 57 | "Requirement already satisfied: pytz>=2017.3 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from pandas>=0.24.2->mlxtend) (2021.1)\n", 58 | "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\vajha\\anaconda3\\lib\\site-packages (from scikit-learn>=0.20.3->mlxtend) (2.1.0)\n", 59 | "Installing collected packages: mlxtend\n", 60 | "Successfully installed mlxtend-0.18.0\n", 61 | "Note: you may need to restart the kernel to use updated packages.\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "pip install mlxtend" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "id": "b94512f2", 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "import os\n", 77 | "import pandas as pd\n", 78 | "import numpy as np\n", 79 | "from matplotlib import pyplot as plt\n", 80 | "import pickle\n", 81 | "import pefile\n", 82 | "import sklearn.ensemble as ek\n", 83 | "from sklearn import tree, linear_model\n", 84 | "from sklearn.feature_selection import SelectFromModel\n", 85 | "import joblib\n", 86 | "from sklearn.naive_bayes import GaussianNB\n", 87 | "from sklearn.metrics import confusion_matrix\n", 88 | "from sklearn.pipeline import make_pipeline\n", 89 | "from sklearn import preprocessing\n", 90 | "from sklearn import svm\n", 91 | "from sklearn.linear_model import LogisticRegression\n", 92 | "from statsmodels.stats.outliers_influence import variance_inflation_factor as vif\n", 93 | "from sklearn.model_selection import train_test_split\n", 94 | "from mlxtend.plotting import plot_confusion_matrix\n", 95 | "dataset=pd.read_csv(\"Ransomware.csv\",sep='|')" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "id": "cde9b494", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/html": [ 107 | "
\n", 108 | "\n", 121 | "\n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | "
Namemd5MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedData...ResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSizelegitimate
0memtest.exe631ea355665f28d4707448e442fbf5b8332224258903619841157120...43.2628232.5688443.5379398797.000000216180320161
1ose.exe9d10f99a6712e28f8acd5641e3a7ea6b332224333090130560199680...24.2504613.4207445.080177837.000000518115672181
2setup.exe4d92f518527353c0db88a70fddcfd3903322243330905171206215680...114.4263242.8464495.27181331102.27272710427037672181
3DW20.EXEa41e524f8d45f0074fd07805ff0c9b12332224258905857283691520...104.3642912.6693146.4007201457.00000090426472181
4dwtrig20.exec87e561258f2f8650cef999bf643a731332224258902949122472960...24.3061003.4215985.1906031074.500000849130072181
..................................................................
138042VirusShare_8e292b418568d6e7b87f2a32aee7074b8e292b418568d6e7b87f2a32aee7074b3322242581102058242237440...74.1227361.3702607.67709114900.71428616816547200
138043VirusShare_260d9e2258aed4c8a3bbd703ec895822260d9e2258aed4c8a3bbd703ec89582233222433167225378881853440...263.3776632.0316195.0500746905.84615444676240150
138044VirusShare_8d088a51b7d225c9f5d11d239791ec3f8d088a51b7d225c9f5d11d239791ec3f3322242581001182723804160...226.8254062.6170267.99048714981.909091482264872140
138045VirusShare_4286dccf67ca220fe67635388229a9f34286dccf67ca220fe67635388229a9f33322243316622549152168960...103.4216272.0609644.739744601.600000162216000
138046VirusShare_d7648eae45f09b3adb75127f43be6d11d7648eae45f09b3adb75127f43be6d113322242581101116164684800...44.4072521.9804826.11537496625.000000203184647200
\n", 415 | "

138047 rows × 57 columns

\n", 416 | "
" 417 | ], 418 | "text/plain": [ 419 | " Name \\\n", 420 | "0 memtest.exe \n", 421 | "1 ose.exe \n", 422 | "2 setup.exe \n", 423 | "3 DW20.EXE \n", 424 | "4 dwtrig20.exe \n", 425 | "... ... \n", 426 | "138042 VirusShare_8e292b418568d6e7b87f2a32aee7074b \n", 427 | "138043 VirusShare_260d9e2258aed4c8a3bbd703ec895822 \n", 428 | "138044 VirusShare_8d088a51b7d225c9f5d11d239791ec3f \n", 429 | "138045 VirusShare_4286dccf67ca220fe67635388229a9f3 \n", 430 | "138046 VirusShare_d7648eae45f09b3adb75127f43be6d11 \n", 431 | "\n", 432 | " md5 Machine SizeOfOptionalHeader \\\n", 433 | "0 631ea355665f28d4707448e442fbf5b8 332 224 \n", 434 | "1 9d10f99a6712e28f8acd5641e3a7ea6b 332 224 \n", 435 | "2 4d92f518527353c0db88a70fddcfd390 332 224 \n", 436 | "3 a41e524f8d45f0074fd07805ff0c9b12 332 224 \n", 437 | "4 c87e561258f2f8650cef999bf643a731 332 224 \n", 438 | "... ... ... ... \n", 439 | "138042 8e292b418568d6e7b87f2a32aee7074b 332 224 \n", 440 | "138043 260d9e2258aed4c8a3bbd703ec895822 332 224 \n", 441 | "138044 8d088a51b7d225c9f5d11d239791ec3f 332 224 \n", 442 | "138045 4286dccf67ca220fe67635388229a9f3 332 224 \n", 443 | "138046 d7648eae45f09b3adb75127f43be6d11 332 224 \n", 444 | "\n", 445 | " Characteristics MajorLinkerVersion MinorLinkerVersion SizeOfCode \\\n", 446 | "0 258 9 0 361984 \n", 447 | "1 3330 9 0 130560 \n", 448 | "2 3330 9 0 517120 \n", 449 | "3 258 9 0 585728 \n", 450 | "4 258 9 0 294912 \n", 451 | "... ... ... ... ... \n", 452 | "138042 258 11 0 205824 \n", 453 | "138043 33167 2 25 37888 \n", 454 | "138044 258 10 0 118272 \n", 455 | "138045 33166 2 25 49152 \n", 456 | "138046 258 11 0 111616 \n", 457 | "\n", 458 | " SizeOfInitializedData SizeOfUninitializedData ... ResourcesNb \\\n", 459 | "0 115712 0 ... 4 \n", 460 | "1 19968 0 ... 2 \n", 461 | "2 621568 0 ... 11 \n", 462 | "3 369152 0 ... 10 \n", 463 | "4 247296 0 ... 2 \n", 464 | "... ... ... ... ... \n", 465 | "138042 223744 0 ... 7 \n", 466 | "138043 185344 0 ... 26 \n", 467 | "138044 380416 0 ... 22 \n", 468 | "138045 16896 0 ... 10 \n", 469 | "138046 468480 0 ... 4 \n", 470 | "\n", 471 | " ResourcesMeanEntropy ResourcesMinEntropy ResourcesMaxEntropy \\\n", 472 | "0 3.262823 2.568844 3.537939 \n", 473 | "1 4.250461 3.420744 5.080177 \n", 474 | "2 4.426324 2.846449 5.271813 \n", 475 | "3 4.364291 2.669314 6.400720 \n", 476 | "4 4.306100 3.421598 5.190603 \n", 477 | "... ... ... ... \n", 478 | "138042 4.122736 1.370260 7.677091 \n", 479 | "138043 3.377663 2.031619 5.050074 \n", 480 | "138044 6.825406 2.617026 7.990487 \n", 481 | "138045 3.421627 2.060964 4.739744 \n", 482 | "138046 4.407252 1.980482 6.115374 \n", 483 | "\n", 484 | " ResourcesMeanSize ResourcesMinSize ResourcesMaxSize \\\n", 485 | "0 8797.000000 216 18032 \n", 486 | "1 837.000000 518 1156 \n", 487 | "2 31102.272727 104 270376 \n", 488 | "3 1457.000000 90 4264 \n", 489 | "4 1074.500000 849 1300 \n", 490 | "... ... ... ... \n", 491 | "138042 14900.714286 16 81654 \n", 492 | "138043 6905.846154 44 67624 \n", 493 | "138044 14981.909091 48 22648 \n", 494 | "138045 601.600000 16 2216 \n", 495 | "138046 96625.000000 20 318464 \n", 496 | "\n", 497 | " LoadConfigurationSize VersionInformationSize legitimate \n", 498 | "0 0 16 1 \n", 499 | "1 72 18 1 \n", 500 | "2 72 18 1 \n", 501 | "3 72 18 1 \n", 502 | "4 72 18 1 \n", 503 | "... ... ... ... \n", 504 | "138042 72 0 0 \n", 505 | "138043 0 15 0 \n", 506 | "138044 72 14 0 \n", 507 | "138045 0 0 0 \n", 508 | "138046 72 0 0 \n", 509 | "\n", 510 | "[138047 rows x 57 columns]" 511 | ] 512 | }, 513 | "execution_count": 4, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "dataset" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 5, 525 | "id": "9e106963", 526 | "metadata": {}, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/html": [ 531 | "
\n", 532 | "\n", 545 | "\n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | "
MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedDataAddressOfEntryPointBaseOfCode...ResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSizelegitimate
count138047.000000138047.000000138047.000000138047.000000138047.0000001.380470e+051.380470e+051.380470e+051.380470e+051.380470e+05...138047.000000138047.000000138047.000000138047.0000001.380470e+051.380470e+051.380470e+051.380470e+05138047.000000138047.000000
mean4259.069274225.8456324444.1459948.6197743.8192862.425956e+054.504867e+051.009525e+051.719561e+055.779845e+04...22.0507004.0001272.4345415.5216105.545093e+041.818082e+042.465903e+054.656750e+0512.3631150.299340
std10880.3472455.1213998186.7825244.08875711.8626755.754485e+062.101599e+071.635288e+073.430553e+065.527658e+06...136.4942441.1129810.8155771.5974037.799163e+066.502369e+062.124860e+072.608987e+076.7988780.457971
min332.000000224.0000002.0000000.0000000.0000000.000000e+000.000000e+000.000000e+000.000000e+000.000000e+00...0.0000000.0000000.0000000.0000000.000000e+000.000000e+000.000000e+000.000000e+000.0000000.000000
25%332.000000224.000000258.0000008.0000000.0000003.020800e+042.457600e+040.000000e+001.272100e+044.096000e+03...5.0000003.4585052.1787484.8287069.560000e+024.800000e+012.216000e+030.000000e+0013.0000000.000000
50%332.000000224.000000258.0000009.0000000.0000001.136640e+052.631680e+050.000000e+005.288300e+044.096000e+03...6.0000003.7298242.4584925.3175522.708154e+034.800000e+019.640000e+037.200000e+0115.0000000.000000
75%332.000000224.0000008226.00000010.0000000.0000001.203200e+053.850240e+050.000000e+006.157800e+044.096000e+03...13.0000004.2330512.6968336.5022396.558429e+031.320000e+022.378000e+047.200000e+0116.0000001.000000
max34404.000000352.00000049551.000000255.000000255.0000001.818587e+094.294966e+094.294941e+091.074484e+092.028711e+09...7694.0000007.9997237.9997238.0000002.415919e+092.415919e+094.294903e+094.294967e+0926.0000001.000000
\n", 767 | "

8 rows × 55 columns

\n", 768 | "
" 769 | ], 770 | "text/plain": [ 771 | " Machine SizeOfOptionalHeader Characteristics \\\n", 772 | "count 138047.000000 138047.000000 138047.000000 \n", 773 | "mean 4259.069274 225.845632 4444.145994 \n", 774 | "std 10880.347245 5.121399 8186.782524 \n", 775 | "min 332.000000 224.000000 2.000000 \n", 776 | "25% 332.000000 224.000000 258.000000 \n", 777 | "50% 332.000000 224.000000 258.000000 \n", 778 | "75% 332.000000 224.000000 8226.000000 \n", 779 | "max 34404.000000 352.000000 49551.000000 \n", 780 | "\n", 781 | " MajorLinkerVersion MinorLinkerVersion SizeOfCode \\\n", 782 | "count 138047.000000 138047.000000 1.380470e+05 \n", 783 | "mean 8.619774 3.819286 2.425956e+05 \n", 784 | "std 4.088757 11.862675 5.754485e+06 \n", 785 | "min 0.000000 0.000000 0.000000e+00 \n", 786 | "25% 8.000000 0.000000 3.020800e+04 \n", 787 | "50% 9.000000 0.000000 1.136640e+05 \n", 788 | "75% 10.000000 0.000000 1.203200e+05 \n", 789 | "max 255.000000 255.000000 1.818587e+09 \n", 790 | "\n", 791 | " SizeOfInitializedData SizeOfUninitializedData AddressOfEntryPoint \\\n", 792 | "count 1.380470e+05 1.380470e+05 1.380470e+05 \n", 793 | "mean 4.504867e+05 1.009525e+05 1.719561e+05 \n", 794 | "std 2.101599e+07 1.635288e+07 3.430553e+06 \n", 795 | "min 0.000000e+00 0.000000e+00 0.000000e+00 \n", 796 | "25% 2.457600e+04 0.000000e+00 1.272100e+04 \n", 797 | "50% 2.631680e+05 0.000000e+00 5.288300e+04 \n", 798 | "75% 3.850240e+05 0.000000e+00 6.157800e+04 \n", 799 | "max 4.294966e+09 4.294941e+09 1.074484e+09 \n", 800 | "\n", 801 | " BaseOfCode ... ResourcesNb ResourcesMeanEntropy \\\n", 802 | "count 1.380470e+05 ... 138047.000000 138047.000000 \n", 803 | "mean 5.779845e+04 ... 22.050700 4.000127 \n", 804 | "std 5.527658e+06 ... 136.494244 1.112981 \n", 805 | "min 0.000000e+00 ... 0.000000 0.000000 \n", 806 | "25% 4.096000e+03 ... 5.000000 3.458505 \n", 807 | "50% 4.096000e+03 ... 6.000000 3.729824 \n", 808 | "75% 4.096000e+03 ... 13.000000 4.233051 \n", 809 | "max 2.028711e+09 ... 7694.000000 7.999723 \n", 810 | "\n", 811 | " ResourcesMinEntropy ResourcesMaxEntropy ResourcesMeanSize \\\n", 812 | "count 138047.000000 138047.000000 1.380470e+05 \n", 813 | "mean 2.434541 5.521610 5.545093e+04 \n", 814 | "std 0.815577 1.597403 7.799163e+06 \n", 815 | "min 0.000000 0.000000 0.000000e+00 \n", 816 | "25% 2.178748 4.828706 9.560000e+02 \n", 817 | "50% 2.458492 5.317552 2.708154e+03 \n", 818 | "75% 2.696833 6.502239 6.558429e+03 \n", 819 | "max 7.999723 8.000000 2.415919e+09 \n", 820 | "\n", 821 | " ResourcesMinSize ResourcesMaxSize LoadConfigurationSize \\\n", 822 | "count 1.380470e+05 1.380470e+05 1.380470e+05 \n", 823 | "mean 1.818082e+04 2.465903e+05 4.656750e+05 \n", 824 | "std 6.502369e+06 2.124860e+07 2.608987e+07 \n", 825 | "min 0.000000e+00 0.000000e+00 0.000000e+00 \n", 826 | "25% 4.800000e+01 2.216000e+03 0.000000e+00 \n", 827 | "50% 4.800000e+01 9.640000e+03 7.200000e+01 \n", 828 | "75% 1.320000e+02 2.378000e+04 7.200000e+01 \n", 829 | "max 2.415919e+09 4.294903e+09 4.294967e+09 \n", 830 | "\n", 831 | " VersionInformationSize legitimate \n", 832 | "count 138047.000000 138047.000000 \n", 833 | "mean 12.363115 0.299340 \n", 834 | "std 6.798878 0.457971 \n", 835 | "min 0.000000 0.000000 \n", 836 | "25% 13.000000 0.000000 \n", 837 | "50% 15.000000 0.000000 \n", 838 | "75% 16.000000 1.000000 \n", 839 | "max 26.000000 1.000000 \n", 840 | "\n", 841 | "[8 rows x 55 columns]" 842 | ] 843 | }, 844 | "execution_count": 5, 845 | "metadata": {}, 846 | "output_type": "execute_result" 847 | } 848 | ], 849 | "source": [ 850 | "dataset.describe()" 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": 6, 856 | "id": "f3db099c", 857 | "metadata": {}, 858 | "outputs": [ 859 | { 860 | "data": { 861 | "text/plain": [ 862 | "Name 0\n", 863 | "md5 0\n", 864 | "Machine 0\n", 865 | "SizeOfOptionalHeader 0\n", 866 | "Characteristics 0\n", 867 | "MajorLinkerVersion 0\n", 868 | "MinorLinkerVersion 0\n", 869 | "SizeOfCode 0\n", 870 | "SizeOfInitializedData 0\n", 871 | "SizeOfUninitializedData 0\n", 872 | "AddressOfEntryPoint 0\n", 873 | "BaseOfCode 0\n", 874 | "BaseOfData 0\n", 875 | "ImageBase 0\n", 876 | "SectionAlignment 0\n", 877 | "FileAlignment 0\n", 878 | "MajorOperatingSystemVersion 0\n", 879 | "MinorOperatingSystemVersion 0\n", 880 | "MajorImageVersion 0\n", 881 | "MinorImageVersion 0\n", 882 | "MajorSubsystemVersion 0\n", 883 | "MinorSubsystemVersion 0\n", 884 | "SizeOfImage 0\n", 885 | "SizeOfHeaders 0\n", 886 | "CheckSum 0\n", 887 | "Subsystem 0\n", 888 | "DllCharacteristics 0\n", 889 | "SizeOfStackReserve 0\n", 890 | "SizeOfStackCommit 0\n", 891 | "SizeOfHeapReserve 0\n", 892 | "SizeOfHeapCommit 0\n", 893 | "LoaderFlags 0\n", 894 | "NumberOfRvaAndSizes 0\n", 895 | "SectionsNb 0\n", 896 | "SectionsMeanEntropy 0\n", 897 | "SectionsMinEntropy 0\n", 898 | "SectionsMaxEntropy 0\n", 899 | "SectionsMeanRawsize 0\n", 900 | "SectionsMinRawsize 0\n", 901 | "SectionMaxRawsize 0\n", 902 | "SectionsMeanVirtualsize 0\n", 903 | "SectionsMinVirtualsize 0\n", 904 | "SectionMaxVirtualsize 0\n", 905 | "ImportsNbDLL 0\n", 906 | "ImportsNb 0\n", 907 | "ImportsNbOrdinal 0\n", 908 | "ExportNb 0\n", 909 | "ResourcesNb 0\n", 910 | "ResourcesMeanEntropy 0\n", 911 | "ResourcesMinEntropy 0\n", 912 | "ResourcesMaxEntropy 0\n", 913 | "ResourcesMeanSize 0\n", 914 | "ResourcesMinSize 0\n", 915 | "ResourcesMaxSize 0\n", 916 | "LoadConfigurationSize 0\n", 917 | "VersionInformationSize 0\n", 918 | "legitimate 0\n", 919 | "dtype: int64" 920 | ] 921 | }, 922 | "execution_count": 6, 923 | "metadata": {}, 924 | "output_type": "execute_result" 925 | } 926 | ], 927 | "source": [ 928 | "dataset.isnull().sum()" 929 | ] 930 | }, 931 | { 932 | "cell_type": "code", 933 | "execution_count": 7, 934 | "id": "48a57329", 935 | "metadata": {}, 936 | "outputs": [], 937 | "source": [ 938 | "#Classifying Data Based on - Legitimate OR Malware" 939 | ] 940 | }, 941 | { 942 | "cell_type": "code", 943 | "execution_count": 8, 944 | "id": "52e76632", 945 | "metadata": {}, 946 | "outputs": [ 947 | { 948 | "data": { 949 | "text/plain": [ 950 | "legitimate\n", 951 | "0 96724\n", 952 | "1 41323\n", 953 | "dtype: int64" 954 | ] 955 | }, 956 | "execution_count": 8, 957 | "metadata": {}, 958 | "output_type": "execute_result" 959 | } 960 | ], 961 | "source": [ 962 | "dataset.groupby(dataset['legitimate']).size()\n", 963 | "#1 means legitimate, 0 means malware" 964 | ] 965 | }, 966 | { 967 | "cell_type": "code", 968 | "execution_count": 9, 969 | "id": "77eefc0b", 970 | "metadata": {}, 971 | "outputs": [ 972 | { 973 | "data": { 974 | "text/plain": [ 975 | "([,\n", 976 | " ],\n", 977 | " [Text(0.6484073958497663, 0.8885763045497695, 'Legitimate'),\n", 978 | " Text(-0.6484073958497659, -0.8885763045497698, 'Malware')],\n", 979 | " [Text(0.35367676137259974, 0.4846779842998742, '30%'),\n", 980 | " Text(-0.35367676137259957, -0.48467798429987435, '70%')])" 981 | ] 982 | }, 983 | "execution_count": 9, 984 | "metadata": {}, 985 | "output_type": "execute_result" 986 | }, 987 | { 988 | "data": { 989 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOgAAADnCAYAAAAU/xqtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZPklEQVR4nO3deZwU5Z3H8c/TPcBwWYgI4tleQLglHhjFaOKR3TGaQ1EMAUUlmxizrmaTNiab0uzGMYd3YqIx0WhE1M2uaxqUrKIYXdBIRDSKUZgoiAoCBTPDnP3sH08Th8kMc9Ddv6e6f+/Xq18MM9NV34b5Tj1VXfWUsdailPJTQjqAUqpzWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjWlClPKYFVcpjFdIBVOdS6Uw/YBSwNzAM2KuDx47PB0AjUJd71Lf5uO2jFngbeAP4C/BOTXWV3qDHU0ZvnuSHVDozCJgMHAFMyT3GUvhfovXAm8ArwEvACuClmuqqtQVer+oGLaiA3JbxWOBIPizj4fi1y/EB8BSwEFhYU121TjhPWdKCFkkqnRkBVAGfBk4GBskm6rGV5MoKPFNTXdUsnKcsaEELKQxSwNl3tvzDuO+1fHEWYIQT5ctW4HE+3LrqcLhAtKD5FgYDgRnARcAxANts/1cmNN45TjRX4VjgMeAnwIKa6qqscJ6SogXNlzCYAnwJV87Bbb9kLfb4xpveXcfeI0WyFc8a4GfAnTXVVR8UaiXGmFpr7W7tIhhj9gVuttaeZYyZDOxrrV2Q+9oZwFhrbXUesl4G3G6tre/V87WguyEMkrhCXgZ8dFff+suWTy25pmXWCcWI5YEG4AHgJzXVVc/le+H5KGi75Z0PHGmt/Wq+ltlm2TW5ZW/s1fO1oL0QBn2B84FvAod05ymb7OAXpzT+fHIBU/nqj7jh77ya6qrGfCywo4IaYw7NrWdv3FtHF1trX8t9/jdAErfPfLm1dpAxJgX8DncE/Q2gP7AOuDb38ZHW2q8aY+4CtgNjgIOAC4DZuKPwy6y15+fWfxtwVO65D1lrv2uM+RrwI2AVsNFae5Ix5lTgaqAf7u2tC6y1tZ29Vp8O6/svDAYQBpcBq4Gf081yAuzJtglDiQo27PPYkcCvgFdT6cz0Aq7nduBSa+1Hga8DP819/ibgJmvtUcA77Z9krW0C/g2Yb62dbK2d38Gy9wQ+AfwL8AhwAzAOmJAbHgNcZa09EpgIfNwYM9Fae3NunSflyjkM+DZwsrV2Cu6X1+W7elFa0O4IgwRhMAf3G+8GYL+eLsIYkrMrFr2a92zxcTAwP5XOPJNKZ47O54KNMYOAjwEPGmNexP3y3LG/fyzwYO7j+3q5ikesG2quBN6z1q601mZxJ3ekct8z3RizHPgTrrxjO1jO1Nznn8nlnI3bKndKT/XrShh8HFfKI3Z3UdOTT/a7oeXs3c8Ubx8DlqbSmXlAuqa66u08LDMBbLHWTs7DsjqyY2iebfPxjr9XGGMOxm21j7LWbs4Niys7WI4Bfm+tndHdFesWtDNhcAhh8FvgSfJQToB92DxxEPVb87GsmDPAecCqVDrzH7nTHHvNWrsVWGOMORvAOJNyX14KfD738bmdLGIb7Y6899AeuPOcI2PMCOAfOln2UuA4Y8xhuZwDjDGjdrVgLWh7YZAkDP4VN3z5bD4XbQz9zk0ufjmfy4y5/sC3gDdS6cxFqXSmuz+PA4wxa9s8Lge+AFxojFmB+787M/e9lwGXG2Oeww17ow6WtxgYa4x50RhzTk9fhLV2BW5o+wrwS+CZNl++HVhojFlsrd2AO7g4zxjzEq6wY3a1bD2K21YYfAR3QOOYQq2iJjti6YlNN0wt1PJj7ilgVk111Vv5WqAxZgCw3VprjTHnAjOstWd29TxfaEFhx/uZ3wC+izv8XTDWUjem8a5kI3072kdRbgv31ZrqqnvzsTBjzDTgVtywegswx1r7Rj6WXQxa0DA4CJhPAbea7X2z+aLn5rd+Iq9HMkvQA8A/1VRXbZYOIqm890HD4B+B5RSxnADnJx/TK0G6Nh1YnkpnjpIOIqk8t6BuSHsNcCUCV5hkLVsOb7xnUCtJfZura03AFTXVVbdKB5FQfgUNg+HAPNyZIWIuafra8kx26hTJDDHzAHBRTXXVNukgxVReQ9wwGAMsQ7icABdWLKiTzhAz04GnUunMcOkgxVQ+BQ2D43HvT6WEkwAwybw52pDVayd75gjg6VQ6c6B0kGIpj4KGwVnA74Gh0lF2SBo7fFpi5SvSOWJoFPCHVDqzyzf4S0XpF9RdffIAHZ8bKeriZGaTdIaYOgC3Jd3lNbiloLQLGgZp3InuXs4FNDXxarcvV1N/ZxiwOJXOnCgdpJBKt6DufNprpWPsSh/TesAU8/oq6RwxNhhYmEpnzpAOUiilWdAwuBz4gXSM7phb8bt3pTPEXCXwn6l0ZpZ0kEIovYK6fc4fS8forhMTK/aXzlACKoBfpdKZ06WD5FtpFTQMLsDtc8ZGpWk+dLR5a410jhKQAOal0pnJ0kHyqXQKGgan4a69i525FZm8XV5V5gYBj6TSmX2lg+RLaRQ0DCbi5p2J5bmtpyWe31s6QwnZH1fSgdJB8iH+BQ2DEbiZ1nZnygpRg0zD2P3Nhr+bcU712hTgNz2YocFb8X4BYdAPeBiI/alfFyUzsbmIOCbOJCZH8ncl3gV1R2uLei1noZyZfDaQzlCCrkilM3OlQ+yO+BY0DD4PXCIdI1+GUDthGFs2SOcoQT9JpTMnSYforXgWNAwOAe6UjpFPxpA4v+Kx16RzlKAK4O5UOrOHdJDeiF9B3X1R5gMlNyQ8O/lUf+kMJeoA4HrpEL0Rv4LC93H3+yg5w9kycTB1Hc3bqnbfhal05jTpED0Vr4KGwTG4G9iUJGPoe17yCb1GtHDuiNtQNz4FdUPbO4lT5l44L/l4Sb8+YbEb6sbph+FK3F2jStqB5v2JlTRul85RwmI11I3HrH5hMBZ374u+0lGK4VvNc5bd13qyF+/v2pYm3r3vm9iWZshmGTD6OIZM+wKt27ex8eHraNn6HhV7jGDYZ9IkKwfRsPbPbFr0U0yyD8PO+Ff67Lkv2YZaNjx8HcOnX4MxXlw7/zYwvqa6yvsbWcVlC3o7ZVJOgNnJRS3SGf4m2YcR536ffefcysgLbmb7mhdoXPcaW5c+SGVqEvvNvYPK1CS2LnW34Nz6/H+x92euZMgJs9j2pwUAbHn2foJjp/tSTojRUNf/gobBdOA46RjFNMqsHV9BixezzxtjSPR17/7YbAtkW8EY6t9YxsDxnwRg4PhPUv+Xpe77ExXYliZsSyMmUUHz5vW0bvuAygMniL2GTlyYSme8/7nyu6DuwJDX05YUgjEE/5hY9pJ0jh1stpV3fnUpa2+ZSWVqMv32HU1r3RYqBrlJEisGDSVbtwWAYOrZfPDorWz948MMnnI6W5b8miHTZgqm36Vq6QBd8bug8BWgLCfWurBiYb10hh1MIsm+F9zC/l+5i8b1r9O0oabT7+074hBGzvox+8y4lpboXZK5Em94+Do2PvIjWuu8uhfS8al0pko6xK74W9AwGAJ8RzqGlAlmzRjfJrZOVA6i8oAJbF+9nOTAIbTUullDW2o3kRg4ZKfvtdYSPTuf4LgZbHnmPoYcfx4Dx53E1hceEUi+S9f6fFmat8Fwb6t4M9F0sSWM3fukxIvid+NurY/INtQCkG1upOGvL9Jnr/0ZcNgx1L38OAB1Lz/OgMN2Puhc9/Lj9D/0SJKVg7DNjWASYIz72C8TgPOkQ3TGz7dZwmAo8BZQElfF99b/tY5dMqP52ydIZmh6fw0bMzeAzYLNMmDMNIYcN4PW7VvZ+HA1LVs3ULHH3gw780qS/d0189nmBt5/6GpGTP8eJllBw9svs2nRbZhkBcPO+AZ9hu4n+ZI6sgoYW1Nd5dWIBfwt6Hdwtwcsay02se6wxnu9+2kuUefUVFc9IB2iPf+GuGHQH7hUOoYPKkx2v6PNq69K5ygTV6XSGW/eqN3Bv4LCHEAn0cqZW5F5XzpDmZgIfFo6RHt+FdTd+foK6Rg+mZZ4KfbzLcXI16UDtOdXQeEM4GDpED7pZ1oOHmtq3pTOUSampdIZr953962gF0oH8NHcit+tlc5QRry6x4s/BQ2DkcCnpGP46JTECyOkM5SRL/p0sMifgsJsICkdwkcDTeOYg8y7uhUtjkOA46VD7OBTQS+QDuCzi5MZ3Q8tHm+GuX4UNAyOA0ZJx/DZp5NLy/a0RwFnp9KZSukQ4EtB4SzpAL7bg7pxI9ik74kWR4C7dYQ4Xwpasrcwz5fcxNarpHOUES+GufIFDYPxlOk1nz11VnJJWV88UGSnptIZ8aPn8gX1ZCgRB8OIJgTUbpHOUSYqgFOlQ2hBY8QY+sxM/q9ObF0806QDyBY0DIZTordxKJQZFU/0kc5QRkSvxQXpgrrfUN6ctREH+7FxwgAa6qRzlInRqXRmuGQA6YJ6c8ZGXBhD/7OSS1ZK5ygjoj+j0gX1fl5SH81KLvJuao4SJrofKlfQMBgAHCG2/hg71Lwzvg8tTdI5yoTofqjkFnQq7lC26iFj2OP0xP95M7F1iZuUSmcGS61csqBe3BworuZUPNognaFMJIGPSa1csqDe3awjTsaZmjEJsq3SOcqE2H6oZEHHC6479hLGDvtEYrn4xNZlYozUimUK6iYHGy2y7hJyccWCSDpDmThIasVSW9AUZXS/z0L5qHn9cPBx5vGSIzazolRB9eLsPKgw2ZFTE3/Wia0Lb7jUBdxSBdWpNfNkbjKzQTpDmRDZikoVdB+h9Zac4xMvi+0flRktqOq5vqYlNd6sfkM6RxkQ+UUoVVDxK9VLydyKzDrpDGWgrLagWtA8OjnxwkjpDGWgrAqqQ9w8GmCaRh1s3nlLOkeJK6sh7hCh9Zasi5ML1khnKHFDJFYqVdB+QustWacnl+4lnaHEiUw1I1VQPYsozwZTP24fNr0nnaOEiVwaWfyChkFCZL0lzhjMnIqFOrF14ZTNFlS3ngXyueTTYhcWlwGRgkpstrWgBbIXWyev7veFTdI5SlEWsw02F329EgXVCa8KxBiMwepd0Aoggd0ms97iqwf0EikVNyKzVwgcJIqywPair1ep3SPyMyt1NLVWaL1K9ZbIvr0WVKnuKf4RIrSgSnVXWW1BPxBar1K9VVZb0LeF1qtUb5XVFlQvjVJxs15ipboFVap7RKaV0S2oUt3zpsRKtaBKdW07IDLvk1RBVyN06pRSvbCaMBI5PVXo3ixRA6DXLqq4EBneguyF0y8KrlupnlgptWLJgq4QXLdSPfGc1Ip1C6pU17SgSnlqLWH0rtTK5QoaRu8juPOtVDc9L7ly6dn1nhBev1JdERveghZUqa6I/oxqQZXq3Ebgj5IBZAvq9kNfEc2gVOcW5ebQEiO9BQX4X+kASnXiUekAPhT0v6UDKNUBCzwmHcKHgi4BxN5nUqoTL+R2wUTJF9SN8X8rHUOpduZJBwAfCuo8KB1AqTayaEF3sgTQe1sqXzxBGInMQdSeHwV1w9wHpGMolXOvdIAd/Cio8wvpAErhpjfx5piIPwUNo5eAZdIxVNl7kDASudVgR/wpqHObdABV9m6SDtCWbwWdjzv/USkJTxNGy6VDtOVXQd1kYndIx1Bl60bpAO35VVDnZvQGv6r41uDhaaf+FdRNL6FbUVVst0hfudIR/wrqXAc0Sodob9XGVib/rPZvjz2u3cqNSxvZtN1yyj11HH5LLafcU8fm7W6O42feamHibbUcdUctb2xy//dbGiyn3VuHtSLzIKuOrQd+Jh2iI8bbH5QwuBW4RDpGZ1qzlv2ur2XZRQP5yfNNDO1vSB/fj+o/NLJ5u+W6Uyr53Px6rju5HzVbLI++0cKPT6vkiscaOGN0BR9PVUi/BPWhSwijn0qH6IivW1CAaqBJOkRnHl/TyqFDExw0JMHDq1qYPakPALMn9eG/V7UA0CcJ21ugvtnSJwlvbsqybltWy+mXNXi8S+VvQcNoLZ4OOwDuf7mZGeNdKd+rzTJysPunHDk4wft1bjh75fH9mPtIAzcua+KrR/flqica+N5J/cQyqw5dTRg1S4fojL8FdUI8fF+0qdXyP6taOHvsrreEk/dJsvSigSyePZDVm7PsOziBBc55qJ6Zv93Oe7XeHZMoN68C90iH2BW/CxpGm4FvS8dob+FfWpgyMsGIQe6fb8SgBOu3ubKt35Zl+MCd/1mttfz7kka+c0I/rn6qkatP7MfMiX24eZm3I/hycYWPR27b8rugzh14Ngv9vDbDW4AzRlVw9wo3Srp7RTNnjt55y3r3imaqDq9gz/6G+mZIGPeo93ZgVRYeJowWSofoir9HcdsKg2m4a0bF1TdbDrihltVfG0RQaQD4oD7L9Ie281ZkOTAwPHj2AIb2N3/7/qr76lk0cwB9koan/9rCVxY00DcJ8z7fn1F7JSVfTrmqB8YRRjXSQboSj4IChME9wEzpGKokfJMw+oF0iO6IwxB3h3/GvaGs1O5YAVwvHaK74lPQMNoEXCwdQ8VaEzCHMGqRDtJd8SkoQBhlgF9Kx1Cx9R3fLifrSrwK6vwL8JZ0CBU7TwA/lA7RU/EraBhtBWYDrdJRVGxsAmYRRjE5Ivqh+BUUIIyeBL4lHUPFxsWE0TrpEL0Rz4ICucPk/ykdQ3nvesLIm1n6eiq+BXUuAF6TDqG89SjwDekQuyM+Jyp0JgzG4G5TPlg6ivLKKuAYwiiSDrI74r4FhTB6DTgXiM17W6rgtgBnxL2cUAoFBQijBehJDMppBs4hjF6XDpIPpVFQgDC6C7hSOoYSlQW+SBgtkg6SL6VTUIAwqsZN26nKjwXmEkbzpYPkU2kV1LkM+I10CFV0lxNGd0qHyLfSK6g7W2QWcJdwElU8IWF0o3SIQii9gsKO+43OAX4uHUUV3LcIo6ulQxRK/N8H7UoY3Ii7llSVlizwZcLodukghVSaW9C2wugy4FrpGCqvmoBzS72cUA5b0B3C4CLgp0Cfrr5Vea0O+Cxh9HvpIMVQPgUFCIOTgIeAodJRVK/UAJ8hjFZIBymW0h/ithVGi4GpQEmcZVJmngCOLKdyQrkVFCCM/oIr6WPSUVS33QCcShh9IB2k2MpriNtWGBjcqYHXADo5rZ/qgS8RRvdKB5FSvgXdwU2KfS9woHQUtZPngJm5EU/ZKr8hbnth9DQwCbhfOooC3GWDIXBcuZcTdAu6szA4C3ey/UjpKGXqddzVKM9JB/FFXregxhhrjLmnzd8rjDEbjDG/6+J5J3b1PUURRg8BH8Hdl1R/cxVPE/B94Agt587yfavnOmC8Maa/tXY7cApQ9NnUjDFJa23vpuV0V+F/OXcvmNuBcfnMpv7OY8ClOpztWCH2QRcCVbmPZwDzdnzBGHO0MeZZY8yfcn+Obv9kY8xKY8wQ43xgjJmV+/w9xpiTjTEpY8zTxpjlucfHcl8/0Riz2BhzH7DSGJM0xvzQGPO8MeYlY8yXevQqwuhZ4AjgCqDsDu8XwV+BzxFGn9Jydq4QBb0fONcYUwlMBJa1+dprwAnW2iOAf8MNa9p7BjgOt+VaDUzLfX4qsBR4HzjFWjsFOIedL9A+GrjKWjsWuBCIrLVHAUcBFxtjDu7RKwmjZsLoeuDQXNb6Hj1fdWQLcBXwEcLov4SzeC/fQ1ystS8ZY1K4reeCdl8OgLuNMYfj9vE6Oi/2aeAE3G/Y24C5xpj9gE3W2lpjTADcaoyZjJtdflSb5z5nrV2T+/hUYKIx5qw26z4cWENPuWHvVYTBrcB3ceXP+79didsG3AL8kDDaIpwlNgr1Q/Y/wI+AE4G92nz+e8Bia+1ncyV+soPnLgEuwb0veRXwWeAsXHHB3ZvlPdxbIwmgoc1z69p8bIBLrbX5O2MojNYD/0QY/CCX4wJgYN6WX5q24op5fe4OdaoHCvU+6C+Ba6y1K9t9PuDDg0bnd/REa+3bwDDgcGvtauAPwNf5sKABsN5a6yaI6vwsoMeALxtj+gAYY0YZY/JTpjBaTRhdyoe/RN7Ny3JLyyrga8D+hNG3tZy9U5CCWmvXWmtv6uBLPwCuNcY8w65Pr1vGhye0Pw3shysquEvGZhtjluKGt3V//3QAfgH8GVhujHkZN7tCfkcMYbSJMPo+cBBuBodlXTyj1GWBR4DTcPuYtxBG24QzxZqeqJBvYTAad/e1LwL7C6cpltdwBwd/TRj1fB9fdUoLWihhkAA+AczEve00TDZQ3tUA84H7CaMXZaOULi1oMbiyHgOcjivrJNlAvdKCG8IvAh7VM36KQwsqIQz2x51ldWzuMRb/LlxoxQ1dFwO/B57M3TxZFZEW1AdhsAfuJItjgcnAaOAwoF+REjQDrwLLgRdyf75IGOmJGcK0oL5yw+KDcEeqR+U+3gcYAeyNm1dpT2AAnW9964Go3WM9bv9xTZs/1xFGvTt3WRWUFrRUuEIn2zwaCSO9JWPMaUGV8phvByaUUm1oQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY9pQZXymBZUKY/9P8Iv7ysch+PKAAAAAElFTkSuQmCC\n", 990 | "text/plain": [ 991 | "
" 992 | ] 993 | }, 994 | "metadata": {}, 995 | "output_type": "display_data" 996 | } 997 | ], 998 | "source": [ 999 | "type_classify=['Legitimate','Malware']\n", 1000 | "count_classify=[41323,96724]\n", 1001 | "plt.pie(count_classify, labels=type_classify, autopct='%0.f%%')" 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "code", 1006 | "execution_count": 10, 1007 | "id": "4f846ba0", 1008 | "metadata": {}, 1009 | "outputs": [], 1010 | "source": [ 1011 | "# Total Number of Columns in Dataset" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": 11, 1017 | "id": "cb1a7785", 1018 | "metadata": {}, 1019 | "outputs": [ 1020 | { 1021 | "data": { 1022 | "text/plain": [ 1023 | "57" 1024 | ] 1025 | }, 1026 | "execution_count": 11, 1027 | "metadata": {}, 1028 | "output_type": "execute_result" 1029 | } 1030 | ], 1031 | "source": [ 1032 | "dataset.shape[1]" 1033 | ] 1034 | }, 1035 | { 1036 | "cell_type": "code", 1037 | "execution_count": 12, 1038 | "id": "7c62eedf", 1039 | "metadata": {}, 1040 | "outputs": [], 1041 | "source": [ 1042 | "# Creating Legitimate and Malware Dataset from Main Dataset" 1043 | ] 1044 | }, 1045 | { 1046 | "cell_type": "code", 1047 | "execution_count": 13, 1048 | "id": "bda12c84", 1049 | "metadata": {}, 1050 | "outputs": [ 1051 | { 1052 | "data": { 1053 | "text/html": [ 1054 | "
\n", 1055 | "\n", 1068 | "\n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | "
Namemd5MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedData...ExportNbResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSize
0memtest.exe631ea355665f28d4707448e442fbf5b8332224258903619841157120...043.2628232.5688443.5379398797.00000021618032016
1ose.exe9d10f99a6712e28f8acd5641e3a7ea6b332224333090130560199680...024.2504613.4207445.080177837.00000051811567218
2setup.exe4d92f518527353c0db88a70fddcfd3903322243330905171206215680...1114.4263242.8464495.27181331102.2727271042703767218
3DW20.EXEa41e524f8d45f0074fd07805ff0c9b12332224258905857283691520...1104.3642912.6693146.4007201457.0000009042647218
4dwtrig20.exec87e561258f2f8650cef999bf643a731332224258902949122472960...124.3061003.4215985.1906031074.50000084913007218
..................................................................
41318mfc80.dll1f5afd468eb5e09e9ed75a087529eab53322248450809461761597440...01232.6072510.9609535.130762327.1707322015927216
41319mfc80u.dlle2c48cd0132d4d1dc7d0df9a6bef686a3322248450809461761546240...01232.6072320.9609535.130762327.2357722015927216
41320mfcm80.dll83362ee950ad18adb85b54409155c37833222484508053248163840...2513.5242683.5242683.524268892.0000008928927216
41321mfcm80u.dll26aafee5c30020c99120ee113d751f7e33222484508052736112640...2513.5420713.5420713.542071892.0000008928927216
41322vcomp.dll73dbaa64d589f3262615550dd6881fee33222484508040960204800...11263.0043832.4065123.592623610.33333312414127216
\n", 1362 | "

41323 rows × 56 columns

\n", 1363 | "
" 1364 | ], 1365 | "text/plain": [ 1366 | " Name md5 Machine \\\n", 1367 | "0 memtest.exe 631ea355665f28d4707448e442fbf5b8 332 \n", 1368 | "1 ose.exe 9d10f99a6712e28f8acd5641e3a7ea6b 332 \n", 1369 | "2 setup.exe 4d92f518527353c0db88a70fddcfd390 332 \n", 1370 | "3 DW20.EXE a41e524f8d45f0074fd07805ff0c9b12 332 \n", 1371 | "4 dwtrig20.exe c87e561258f2f8650cef999bf643a731 332 \n", 1372 | "... ... ... ... \n", 1373 | "41318 mfc80.dll 1f5afd468eb5e09e9ed75a087529eab5 332 \n", 1374 | "41319 mfc80u.dll e2c48cd0132d4d1dc7d0df9a6bef686a 332 \n", 1375 | "41320 mfcm80.dll 83362ee950ad18adb85b54409155c378 332 \n", 1376 | "41321 mfcm80u.dll 26aafee5c30020c99120ee113d751f7e 332 \n", 1377 | "41322 vcomp.dll 73dbaa64d589f3262615550dd6881fee 332 \n", 1378 | "\n", 1379 | " SizeOfOptionalHeader Characteristics MajorLinkerVersion \\\n", 1380 | "0 224 258 9 \n", 1381 | "1 224 3330 9 \n", 1382 | "2 224 3330 9 \n", 1383 | "3 224 258 9 \n", 1384 | "4 224 258 9 \n", 1385 | "... ... ... ... \n", 1386 | "41318 224 8450 8 \n", 1387 | "41319 224 8450 8 \n", 1388 | "41320 224 8450 8 \n", 1389 | "41321 224 8450 8 \n", 1390 | "41322 224 8450 8 \n", 1391 | "\n", 1392 | " MinorLinkerVersion SizeOfCode SizeOfInitializedData \\\n", 1393 | "0 0 361984 115712 \n", 1394 | "1 0 130560 19968 \n", 1395 | "2 0 517120 621568 \n", 1396 | "3 0 585728 369152 \n", 1397 | "4 0 294912 247296 \n", 1398 | "... ... ... ... \n", 1399 | "41318 0 946176 159744 \n", 1400 | "41319 0 946176 154624 \n", 1401 | "41320 0 53248 16384 \n", 1402 | "41321 0 52736 11264 \n", 1403 | "41322 0 40960 20480 \n", 1404 | "\n", 1405 | " SizeOfUninitializedData ... ExportNb ResourcesNb \\\n", 1406 | "0 0 ... 0 4 \n", 1407 | "1 0 ... 0 2 \n", 1408 | "2 0 ... 1 11 \n", 1409 | "3 0 ... 1 10 \n", 1410 | "4 0 ... 1 2 \n", 1411 | "... ... ... ... ... \n", 1412 | "41318 0 ... 0 123 \n", 1413 | "41319 0 ... 0 123 \n", 1414 | "41320 0 ... 25 1 \n", 1415 | "41321 0 ... 25 1 \n", 1416 | "41322 0 ... 112 6 \n", 1417 | "\n", 1418 | " ResourcesMeanEntropy ResourcesMinEntropy ResourcesMaxEntropy \\\n", 1419 | "0 3.262823 2.568844 3.537939 \n", 1420 | "1 4.250461 3.420744 5.080177 \n", 1421 | "2 4.426324 2.846449 5.271813 \n", 1422 | "3 4.364291 2.669314 6.400720 \n", 1423 | "4 4.306100 3.421598 5.190603 \n", 1424 | "... ... ... ... \n", 1425 | "41318 2.607251 0.960953 5.130762 \n", 1426 | "41319 2.607232 0.960953 5.130762 \n", 1427 | "41320 3.524268 3.524268 3.524268 \n", 1428 | "41321 3.542071 3.542071 3.542071 \n", 1429 | "41322 3.004383 2.406512 3.592623 \n", 1430 | "\n", 1431 | " ResourcesMeanSize ResourcesMinSize ResourcesMaxSize \\\n", 1432 | "0 8797.000000 216 18032 \n", 1433 | "1 837.000000 518 1156 \n", 1434 | "2 31102.272727 104 270376 \n", 1435 | "3 1457.000000 90 4264 \n", 1436 | "4 1074.500000 849 1300 \n", 1437 | "... ... ... ... \n", 1438 | "41318 327.170732 20 1592 \n", 1439 | "41319 327.235772 20 1592 \n", 1440 | "41320 892.000000 892 892 \n", 1441 | "41321 892.000000 892 892 \n", 1442 | "41322 610.333333 124 1412 \n", 1443 | "\n", 1444 | " LoadConfigurationSize VersionInformationSize \n", 1445 | "0 0 16 \n", 1446 | "1 72 18 \n", 1447 | "2 72 18 \n", 1448 | "3 72 18 \n", 1449 | "4 72 18 \n", 1450 | "... ... ... \n", 1451 | "41318 72 16 \n", 1452 | "41319 72 16 \n", 1453 | "41320 72 16 \n", 1454 | "41321 72 16 \n", 1455 | "41322 72 16 \n", 1456 | "\n", 1457 | "[41323 rows x 56 columns]" 1458 | ] 1459 | }, 1460 | "execution_count": 13, 1461 | "metadata": {}, 1462 | "output_type": "execute_result" 1463 | } 1464 | ], 1465 | "source": [ 1466 | "legit=dataset[0:41323].drop([\"legitimate\"],axis=1) # here axis =1 means vertical \n", 1467 | "legit" 1468 | ] 1469 | }, 1470 | { 1471 | "cell_type": "code", 1472 | "execution_count": 14, 1473 | "id": "5adc7421", 1474 | "metadata": {}, 1475 | "outputs": [ 1476 | { 1477 | "data": { 1478 | "text/html": [ 1479 | "
\n", 1480 | "\n", 1493 | "\n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | "
Namemd5MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedData...ResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSizelegitimate
41323VirusShare_4a400b747afe6547e09ce0b02dae7f1c4a400b747afe6547e09ce0b02dae7f1c3322242581103548162570240...73.9144151.4416887.6770917298.42857116284387200
41324VirusShare_9bd57c8252948bd2fa651ad372bd4f139bd57c8252948bd2fa651ad372bd4f1333222427160240641648641024...63.1991071.9713355.214816452.000000349580150
41325VirusShare_d1456165e9358b8f61f93a5f2042f39cd1456165e9358b8f61f93a5f2042f39c3322242581001187843819520...186.5309462.4584927.99268818523.444444483394572140
41326VirusShare_e4214cc73afbba0f52bb72d5db8f8bb1e4214cc73afbba0f52bb72d5db8f8bb13322242581001745923000320...155.7323932.8523647.98772612706.1333331186050072140
41327VirusShare_710890c07b3f93b90635f8bff6c34605710890c07b3f93b90635f8bff6c34605332224258904756483486720...592.8278260.9609537.2123292637.03389820676247200
..................................................................
138042VirusShare_8e292b418568d6e7b87f2a32aee7074b8e292b418568d6e7b87f2a32aee7074b3322242581102058242237440...74.1227361.3702607.67709114900.71428616816547200
138043VirusShare_260d9e2258aed4c8a3bbd703ec895822260d9e2258aed4c8a3bbd703ec89582233222433167225378881853440...263.3776632.0316195.0500746905.84615444676240150
138044VirusShare_8d088a51b7d225c9f5d11d239791ec3f8d088a51b7d225c9f5d11d239791ec3f3322242581001182723804160...226.8254062.6170267.99048714981.909091482264872140
138045VirusShare_4286dccf67ca220fe67635388229a9f34286dccf67ca220fe67635388229a9f33322243316622549152168960...103.4216272.0609644.739744601.600000162216000
138046VirusShare_d7648eae45f09b3adb75127f43be6d11d7648eae45f09b3adb75127f43be6d113322242581101116164684800...44.4072521.9804826.11537496625.000000203184647200
\n", 1787 | "

96724 rows × 57 columns

\n", 1788 | "
" 1789 | ], 1790 | "text/plain": [ 1791 | " Name \\\n", 1792 | "41323 VirusShare_4a400b747afe6547e09ce0b02dae7f1c \n", 1793 | "41324 VirusShare_9bd57c8252948bd2fa651ad372bd4f13 \n", 1794 | "41325 VirusShare_d1456165e9358b8f61f93a5f2042f39c \n", 1795 | "41326 VirusShare_e4214cc73afbba0f52bb72d5db8f8bb1 \n", 1796 | "41327 VirusShare_710890c07b3f93b90635f8bff6c34605 \n", 1797 | "... ... \n", 1798 | "138042 VirusShare_8e292b418568d6e7b87f2a32aee7074b \n", 1799 | "138043 VirusShare_260d9e2258aed4c8a3bbd703ec895822 \n", 1800 | "138044 VirusShare_8d088a51b7d225c9f5d11d239791ec3f \n", 1801 | "138045 VirusShare_4286dccf67ca220fe67635388229a9f3 \n", 1802 | "138046 VirusShare_d7648eae45f09b3adb75127f43be6d11 \n", 1803 | "\n", 1804 | " md5 Machine SizeOfOptionalHeader \\\n", 1805 | "41323 4a400b747afe6547e09ce0b02dae7f1c 332 224 \n", 1806 | "41324 9bd57c8252948bd2fa651ad372bd4f13 332 224 \n", 1807 | "41325 d1456165e9358b8f61f93a5f2042f39c 332 224 \n", 1808 | "41326 e4214cc73afbba0f52bb72d5db8f8bb1 332 224 \n", 1809 | "41327 710890c07b3f93b90635f8bff6c34605 332 224 \n", 1810 | "... ... ... ... \n", 1811 | "138042 8e292b418568d6e7b87f2a32aee7074b 332 224 \n", 1812 | "138043 260d9e2258aed4c8a3bbd703ec895822 332 224 \n", 1813 | "138044 8d088a51b7d225c9f5d11d239791ec3f 332 224 \n", 1814 | "138045 4286dccf67ca220fe67635388229a9f3 332 224 \n", 1815 | "138046 d7648eae45f09b3adb75127f43be6d11 332 224 \n", 1816 | "\n", 1817 | " Characteristics MajorLinkerVersion MinorLinkerVersion SizeOfCode \\\n", 1818 | "41323 258 11 0 354816 \n", 1819 | "41324 271 6 0 24064 \n", 1820 | "41325 258 10 0 118784 \n", 1821 | "41326 258 10 0 174592 \n", 1822 | "41327 258 9 0 475648 \n", 1823 | "... ... ... ... ... \n", 1824 | "138042 258 11 0 205824 \n", 1825 | "138043 33167 2 25 37888 \n", 1826 | "138044 258 10 0 118272 \n", 1827 | "138045 33166 2 25 49152 \n", 1828 | "138046 258 11 0 111616 \n", 1829 | "\n", 1830 | " SizeOfInitializedData SizeOfUninitializedData ... ResourcesNb \\\n", 1831 | "41323 257024 0 ... 7 \n", 1832 | "41324 164864 1024 ... 6 \n", 1833 | "41325 381952 0 ... 18 \n", 1834 | "41326 300032 0 ... 15 \n", 1835 | "41327 348672 0 ... 59 \n", 1836 | "... ... ... ... ... \n", 1837 | "138042 223744 0 ... 7 \n", 1838 | "138043 185344 0 ... 26 \n", 1839 | "138044 380416 0 ... 22 \n", 1840 | "138045 16896 0 ... 10 \n", 1841 | "138046 468480 0 ... 4 \n", 1842 | "\n", 1843 | " ResourcesMeanEntropy ResourcesMinEntropy ResourcesMaxEntropy \\\n", 1844 | "41323 3.914415 1.441688 7.677091 \n", 1845 | "41324 3.199107 1.971335 5.214816 \n", 1846 | "41325 6.530946 2.458492 7.992688 \n", 1847 | "41326 5.732393 2.852364 7.987726 \n", 1848 | "41327 2.827826 0.960953 7.212329 \n", 1849 | "... ... ... ... \n", 1850 | "138042 4.122736 1.370260 7.677091 \n", 1851 | "138043 3.377663 2.031619 5.050074 \n", 1852 | "138044 6.825406 2.617026 7.990487 \n", 1853 | "138045 3.421627 2.060964 4.739744 \n", 1854 | "138046 4.407252 1.980482 6.115374 \n", 1855 | "\n", 1856 | " ResourcesMeanSize ResourcesMinSize ResourcesMaxSize \\\n", 1857 | "41323 7298.428571 16 28438 \n", 1858 | "41324 452.000000 34 958 \n", 1859 | "41325 18523.444444 48 33945 \n", 1860 | "41326 12706.133333 118 60500 \n", 1861 | "41327 2637.033898 20 67624 \n", 1862 | "... ... ... ... \n", 1863 | "138042 14900.714286 16 81654 \n", 1864 | "138043 6905.846154 44 67624 \n", 1865 | "138044 14981.909091 48 22648 \n", 1866 | "138045 601.600000 16 2216 \n", 1867 | "138046 96625.000000 20 318464 \n", 1868 | "\n", 1869 | " LoadConfigurationSize VersionInformationSize legitimate \n", 1870 | "41323 72 0 0 \n", 1871 | "41324 0 15 0 \n", 1872 | "41325 72 14 0 \n", 1873 | "41326 72 14 0 \n", 1874 | "41327 72 0 0 \n", 1875 | "... ... ... ... \n", 1876 | "138042 72 0 0 \n", 1877 | "138043 0 15 0 \n", 1878 | "138044 72 14 0 \n", 1879 | "138045 0 0 0 \n", 1880 | "138046 72 0 0 \n", 1881 | "\n", 1882 | "[96724 rows x 57 columns]" 1883 | ] 1884 | }, 1885 | "execution_count": 14, 1886 | "metadata": {}, 1887 | "output_type": "execute_result" 1888 | } 1889 | ], 1890 | "source": [ 1891 | "mal=dataset[41323::]\n", 1892 | "maldata=dataset[41323::].drop([\"legitimate\"],axis=1)\n", 1893 | "mal" 1894 | ] 1895 | }, 1896 | { 1897 | "cell_type": "code", 1898 | "execution_count": 15, 1899 | "id": "325f7c48", 1900 | "metadata": {}, 1901 | "outputs": [ 1902 | { 1903 | "name": "stdout", 1904 | "output_type": "stream", 1905 | "text": [ 1906 | "The shape of legit database is 41323 samples and 56 features\n", 1907 | "The shape of malware database is 96724 samples and 57 features\n" 1908 | ] 1909 | } 1910 | ], 1911 | "source": [ 1912 | "print(\"The shape of legit database is %s samples and %s features\"%(legit.shape[0],legit.shape[1])) \n", 1913 | "print(\"The shape of malware database is %s samples and %s features\"%(mal.shape[0],mal.shape[1])) " 1914 | ] 1915 | }, 1916 | { 1917 | "cell_type": "code", 1918 | "execution_count": 16, 1919 | "id": "09e246a0", 1920 | "metadata": {}, 1921 | "outputs": [ 1922 | { 1923 | "name": "stdout", 1924 | "output_type": "stream", 1925 | "text": [ 1926 | "Index(['Name', 'md5', 'Machine', 'SizeOfOptionalHeader', 'Characteristics',\n", 1927 | " 'MajorLinkerVersion', 'MinorLinkerVersion', 'SizeOfCode',\n", 1928 | " 'SizeOfInitializedData', 'SizeOfUninitializedData',\n", 1929 | " 'AddressOfEntryPoint', 'BaseOfCode', 'BaseOfData', 'ImageBase',\n", 1930 | " 'SectionAlignment', 'FileAlignment', 'MajorOperatingSystemVersion',\n", 1931 | " 'MinorOperatingSystemVersion', 'MajorImageVersion', 'MinorImageVersion',\n", 1932 | " 'MajorSubsystemVersion', 'MinorSubsystemVersion', 'SizeOfImage',\n", 1933 | " 'SizeOfHeaders', 'CheckSum', 'Subsystem', 'DllCharacteristics',\n", 1934 | " 'SizeOfStackReserve', 'SizeOfStackCommit', 'SizeOfHeapReserve',\n", 1935 | " 'SizeOfHeapCommit', 'LoaderFlags', 'NumberOfRvaAndSizes', 'SectionsNb',\n", 1936 | " 'SectionsMeanEntropy', 'SectionsMinEntropy', 'SectionsMaxEntropy',\n", 1937 | " 'SectionsMeanRawsize', 'SectionsMinRawsize', 'SectionMaxRawsize',\n", 1938 | " 'SectionsMeanVirtualsize', 'SectionsMinVirtualsize',\n", 1939 | " 'SectionMaxVirtualsize', 'ImportsNbDLL', 'ImportsNb',\n", 1940 | " 'ImportsNbOrdinal', 'ExportNb', 'ResourcesNb', 'ResourcesMeanEntropy',\n", 1941 | " 'ResourcesMinEntropy', 'ResourcesMaxEntropy', 'ResourcesMeanSize',\n", 1942 | " 'ResourcesMinSize', 'ResourcesMaxSize', 'LoadConfigurationSize',\n", 1943 | " 'VersionInformationSize', 'legitimate'],\n", 1944 | " dtype='object')\n", 1945 | "Index(['Name', 'md5', 'Machine', 'SizeOfOptionalHeader', 'Characteristics',\n", 1946 | " 'MajorLinkerVersion', 'MinorLinkerVersion', 'SizeOfCode',\n", 1947 | " 'SizeOfInitializedData', 'SizeOfUninitializedData',\n", 1948 | " 'AddressOfEntryPoint', 'BaseOfCode', 'BaseOfData', 'ImageBase',\n", 1949 | " 'SectionAlignment', 'FileAlignment', 'MajorOperatingSystemVersion',\n", 1950 | " 'MinorOperatingSystemVersion', 'MajorImageVersion', 'MinorImageVersion',\n", 1951 | " 'MajorSubsystemVersion', 'MinorSubsystemVersion', 'SizeOfImage',\n", 1952 | " 'SizeOfHeaders', 'CheckSum', 'Subsystem', 'DllCharacteristics',\n", 1953 | " 'SizeOfStackReserve', 'SizeOfStackCommit', 'SizeOfHeapReserve',\n", 1954 | " 'SizeOfHeapCommit', 'LoaderFlags', 'NumberOfRvaAndSizes', 'SectionsNb',\n", 1955 | " 'SectionsMeanEntropy', 'SectionsMinEntropy', 'SectionsMaxEntropy',\n", 1956 | " 'SectionsMeanRawsize', 'SectionsMinRawsize', 'SectionMaxRawsize',\n", 1957 | " 'SectionsMeanVirtualsize', 'SectionsMinVirtualsize',\n", 1958 | " 'SectionMaxVirtualsize', 'ImportsNbDLL', 'ImportsNb',\n", 1959 | " 'ImportsNbOrdinal', 'ExportNb', 'ResourcesNb', 'ResourcesMeanEntropy',\n", 1960 | " 'ResourcesMinEntropy', 'ResourcesMaxEntropy', 'ResourcesMeanSize',\n", 1961 | " 'ResourcesMinSize', 'ResourcesMaxSize', 'LoadConfigurationSize',\n", 1962 | " 'VersionInformationSize', 'legitimate'],\n", 1963 | " dtype='object')\n" 1964 | ] 1965 | } 1966 | ], 1967 | "source": [ 1968 | "#to find the features ie the column names\n", 1969 | "print(dataset.columns) #but in malware or legit there is no legitimate feature :)\n", 1970 | "print(mal.columns)" 1971 | ] 1972 | }, 1973 | { 1974 | "cell_type": "code", 1975 | "execution_count": 17, 1976 | "id": "55644b80", 1977 | "metadata": {}, 1978 | "outputs": [ 1979 | { 1980 | "data": { 1981 | "text/html": [ 1982 | "
\n", 1983 | "\n", 1996 | "\n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | "
Namemd5MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedData...ResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSizelegitimate
41323VirusShare_4a400b747afe6547e09ce0b02dae7f1c4a400b747afe6547e09ce0b02dae7f1c3322242581103548162570240...73.9144151.4416887.6770917298.42857116284387200
41324VirusShare_9bd57c8252948bd2fa651ad372bd4f139bd57c8252948bd2fa651ad372bd4f1333222427160240641648641024...63.1991071.9713355.214816452.000000349580150
41325VirusShare_d1456165e9358b8f61f93a5f2042f39cd1456165e9358b8f61f93a5f2042f39c3322242581001187843819520...186.5309462.4584927.99268818523.444444483394572140
41326VirusShare_e4214cc73afbba0f52bb72d5db8f8bb1e4214cc73afbba0f52bb72d5db8f8bb13322242581001745923000320...155.7323932.8523647.98772612706.1333331186050072140
41327VirusShare_710890c07b3f93b90635f8bff6c34605710890c07b3f93b90635f8bff6c34605332224258904756483486720...592.8278260.9609537.2123292637.03389820676247200
41328VirusShare_3c2eb01508703752dca01957ea451a403c2eb01508703752dca01957ea451a4033222425990157696624640...133.9432961.8144436.1220452708.153846132964072140
41329VirusShare_3fb2d0ac00c5dff6c4fd5dfe6ba52c3f3fb2d0ac00c5dff6c4fd5dfe6ba52c3f332224259838272499223060480...213.9874632.6421596.47370014288.00000076270376000
41330VirusShare_ad1ca9a4d572c0a2793c4cea29b20887ad1ca9a4d572c0a2793c4cea29b208873322242581001203203850240...63.7298242.4584925.3175522739.50000048964072150
41331VirusShare_7414edb3d0be66aa0816e6ed4b6b0a217414edb3d0be66aa0816e6ed4b6b0a2133222425910023398413777920...184.3283222.3232207.06841376158.2777789134273572190
41332VirusShare_e57b4f294c142d050a784b67e2cf1f2ee57b4f294c142d050a784b67e2cf1f2e33222427160491525611520...00.0000000.0000000.0000000.00000000000
\n", 2266 | "

10 rows × 57 columns

\n", 2267 | "
" 2268 | ], 2269 | "text/plain": [ 2270 | " Name \\\n", 2271 | "41323 VirusShare_4a400b747afe6547e09ce0b02dae7f1c \n", 2272 | "41324 VirusShare_9bd57c8252948bd2fa651ad372bd4f13 \n", 2273 | "41325 VirusShare_d1456165e9358b8f61f93a5f2042f39c \n", 2274 | "41326 VirusShare_e4214cc73afbba0f52bb72d5db8f8bb1 \n", 2275 | "41327 VirusShare_710890c07b3f93b90635f8bff6c34605 \n", 2276 | "41328 VirusShare_3c2eb01508703752dca01957ea451a40 \n", 2277 | "41329 VirusShare_3fb2d0ac00c5dff6c4fd5dfe6ba52c3f \n", 2278 | "41330 VirusShare_ad1ca9a4d572c0a2793c4cea29b20887 \n", 2279 | "41331 VirusShare_7414edb3d0be66aa0816e6ed4b6b0a21 \n", 2280 | "41332 VirusShare_e57b4f294c142d050a784b67e2cf1f2e \n", 2281 | "\n", 2282 | " md5 Machine SizeOfOptionalHeader \\\n", 2283 | "41323 4a400b747afe6547e09ce0b02dae7f1c 332 224 \n", 2284 | "41324 9bd57c8252948bd2fa651ad372bd4f13 332 224 \n", 2285 | "41325 d1456165e9358b8f61f93a5f2042f39c 332 224 \n", 2286 | "41326 e4214cc73afbba0f52bb72d5db8f8bb1 332 224 \n", 2287 | "41327 710890c07b3f93b90635f8bff6c34605 332 224 \n", 2288 | "41328 3c2eb01508703752dca01957ea451a40 332 224 \n", 2289 | "41329 3fb2d0ac00c5dff6c4fd5dfe6ba52c3f 332 224 \n", 2290 | "41330 ad1ca9a4d572c0a2793c4cea29b20887 332 224 \n", 2291 | "41331 7414edb3d0be66aa0816e6ed4b6b0a21 332 224 \n", 2292 | "41332 e57b4f294c142d050a784b67e2cf1f2e 332 224 \n", 2293 | "\n", 2294 | " Characteristics MajorLinkerVersion MinorLinkerVersion SizeOfCode \\\n", 2295 | "41323 258 11 0 354816 \n", 2296 | "41324 271 6 0 24064 \n", 2297 | "41325 258 10 0 118784 \n", 2298 | "41326 258 10 0 174592 \n", 2299 | "41327 258 9 0 475648 \n", 2300 | "41328 259 9 0 157696 \n", 2301 | "41329 259 83 82 724992 \n", 2302 | "41330 258 10 0 120320 \n", 2303 | "41331 259 10 0 233984 \n", 2304 | "41332 271 6 0 49152 \n", 2305 | "\n", 2306 | " SizeOfInitializedData SizeOfUninitializedData ... ResourcesNb \\\n", 2307 | "41323 257024 0 ... 7 \n", 2308 | "41324 164864 1024 ... 6 \n", 2309 | "41325 381952 0 ... 18 \n", 2310 | "41326 300032 0 ... 15 \n", 2311 | "41327 348672 0 ... 59 \n", 2312 | "41328 62464 0 ... 13 \n", 2313 | "41329 2306048 0 ... 21 \n", 2314 | "41330 385024 0 ... 6 \n", 2315 | "41331 1377792 0 ... 18 \n", 2316 | "41332 561152 0 ... 0 \n", 2317 | "\n", 2318 | " ResourcesMeanEntropy ResourcesMinEntropy ResourcesMaxEntropy \\\n", 2319 | "41323 3.914415 1.441688 7.677091 \n", 2320 | "41324 3.199107 1.971335 5.214816 \n", 2321 | "41325 6.530946 2.458492 7.992688 \n", 2322 | "41326 5.732393 2.852364 7.987726 \n", 2323 | "41327 2.827826 0.960953 7.212329 \n", 2324 | "41328 3.943296 1.814443 6.122045 \n", 2325 | "41329 3.987463 2.642159 6.473700 \n", 2326 | "41330 3.729824 2.458492 5.317552 \n", 2327 | "41331 4.328322 2.323220 7.068413 \n", 2328 | "41332 0.000000 0.000000 0.000000 \n", 2329 | "\n", 2330 | " ResourcesMeanSize ResourcesMinSize ResourcesMaxSize \\\n", 2331 | "41323 7298.428571 16 28438 \n", 2332 | "41324 452.000000 34 958 \n", 2333 | "41325 18523.444444 48 33945 \n", 2334 | "41326 12706.133333 118 60500 \n", 2335 | "41327 2637.033898 20 67624 \n", 2336 | "41328 2708.153846 132 9640 \n", 2337 | "41329 14288.000000 76 270376 \n", 2338 | "41330 2739.500000 48 9640 \n", 2339 | "41331 76158.277778 9 1342735 \n", 2340 | "41332 0.000000 0 0 \n", 2341 | "\n", 2342 | " LoadConfigurationSize VersionInformationSize legitimate \n", 2343 | "41323 72 0 0 \n", 2344 | "41324 0 15 0 \n", 2345 | "41325 72 14 0 \n", 2346 | "41326 72 14 0 \n", 2347 | "41327 72 0 0 \n", 2348 | "41328 72 14 0 \n", 2349 | "41329 0 0 0 \n", 2350 | "41330 72 15 0 \n", 2351 | "41331 72 19 0 \n", 2352 | "41332 0 0 0 \n", 2353 | "\n", 2354 | "[10 rows x 57 columns]" 2355 | ] 2356 | }, 2357 | "execution_count": 17, 2358 | "metadata": {}, 2359 | "output_type": "execute_result" 2360 | } 2361 | ], 2362 | "source": [ 2363 | "#first 10 data points from malware database:\n", 2364 | "mal.head(10)" 2365 | ] 2366 | }, 2367 | { 2368 | "cell_type": "code", 2369 | "execution_count": 18, 2370 | "id": "d243486f", 2371 | "metadata": {}, 2372 | "outputs": [ 2373 | { 2374 | "data": { 2375 | "text/html": [ 2376 | "
\n", 2377 | "\n", 2390 | "\n", 2391 | " \n", 2392 | " \n", 2393 | " \n", 2394 | " \n", 2395 | " \n", 2396 | " \n", 2397 | " \n", 2398 | " \n", 2399 | " \n", 2400 | " \n", 2401 | " \n", 2402 | " \n", 2403 | " \n", 2404 | " \n", 2405 | " \n", 2406 | " \n", 2407 | " \n", 2408 | " \n", 2409 | " \n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | "
Namemd5MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedData...ExportNbResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSize
0memtest.exe631ea355665f28d4707448e442fbf5b8332224258903619841157120...043.2628232.5688443.5379398797.021618032016
\n", 2444 | "

1 rows × 56 columns

\n", 2445 | "
" 2446 | ], 2447 | "text/plain": [ 2448 | " Name md5 Machine \\\n", 2449 | "0 memtest.exe 631ea355665f28d4707448e442fbf5b8 332 \n", 2450 | "\n", 2451 | " SizeOfOptionalHeader Characteristics MajorLinkerVersion \\\n", 2452 | "0 224 258 9 \n", 2453 | "\n", 2454 | " MinorLinkerVersion SizeOfCode SizeOfInitializedData \\\n", 2455 | "0 0 361984 115712 \n", 2456 | "\n", 2457 | " SizeOfUninitializedData ... ExportNb ResourcesNb ResourcesMeanEntropy \\\n", 2458 | "0 0 ... 0 4 3.262823 \n", 2459 | "\n", 2460 | " ResourcesMinEntropy ResourcesMaxEntropy ResourcesMeanSize \\\n", 2461 | "0 2.568844 3.537939 8797.0 \n", 2462 | "\n", 2463 | " ResourcesMinSize ResourcesMaxSize LoadConfigurationSize \\\n", 2464 | "0 216 18032 0 \n", 2465 | "\n", 2466 | " VersionInformationSize \n", 2467 | "0 16 \n", 2468 | "\n", 2469 | "[1 rows x 56 columns]" 2470 | ] 2471 | }, 2472 | "execution_count": 18, 2473 | "metadata": {}, 2474 | "output_type": "execute_result" 2475 | } 2476 | ], 2477 | "source": [ 2478 | "#datapoint of legit to have a good comparison \n", 2479 | "legit.take([0]) #1st datapoint" 2480 | ] 2481 | }, 2482 | { 2483 | "cell_type": "code", 2484 | "execution_count": 19, 2485 | "id": "fd741615", 2486 | "metadata": {}, 2487 | "outputs": [ 2488 | { 2489 | "data": { 2490 | "text/html": [ 2491 | "
\n", 2492 | "\n", 2505 | "\n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | "
Namemd5MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedData...ResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSizelegitimate
41323VirusShare_4a400b747afe6547e09ce0b02dae7f1c4a400b747afe6547e09ce0b02dae7f1c3322242581103548162570240...73.9144151.4416887.6770917298.42857116284387200
\n", 2559 | "

1 rows × 57 columns

\n", 2560 | "
" 2561 | ], 2562 | "text/plain": [ 2563 | " Name \\\n", 2564 | "41323 VirusShare_4a400b747afe6547e09ce0b02dae7f1c \n", 2565 | "\n", 2566 | " md5 Machine SizeOfOptionalHeader \\\n", 2567 | "41323 4a400b747afe6547e09ce0b02dae7f1c 332 224 \n", 2568 | "\n", 2569 | " Characteristics MajorLinkerVersion MinorLinkerVersion SizeOfCode \\\n", 2570 | "41323 258 11 0 354816 \n", 2571 | "\n", 2572 | " SizeOfInitializedData SizeOfUninitializedData ... ResourcesNb \\\n", 2573 | "41323 257024 0 ... 7 \n", 2574 | "\n", 2575 | " ResourcesMeanEntropy ResourcesMinEntropy ResourcesMaxEntropy \\\n", 2576 | "41323 3.914415 1.441688 7.677091 \n", 2577 | "\n", 2578 | " ResourcesMeanSize ResourcesMinSize ResourcesMaxSize \\\n", 2579 | "41323 7298.428571 16 28438 \n", 2580 | "\n", 2581 | " LoadConfigurationSize VersionInformationSize legitimate \n", 2582 | "41323 72 0 0 \n", 2583 | "\n", 2584 | "[1 rows x 57 columns]" 2585 | ] 2586 | }, 2587 | "execution_count": 19, 2588 | "metadata": {}, 2589 | "output_type": "execute_result" 2590 | } 2591 | ], 2592 | "source": [ 2593 | "#datapoint of malware to have a good comparison \n", 2594 | "mal.take([0]) #1st datapoint" 2595 | ] 2596 | }, 2597 | { 2598 | "cell_type": "code", 2599 | "execution_count": 20, 2600 | "id": "4dd1e87b", 2601 | "metadata": {}, 2602 | "outputs": [], 2603 | "source": [ 2604 | "# Feature Extraction" 2605 | ] 2606 | }, 2607 | { 2608 | "cell_type": "code", 2609 | "execution_count": 21, 2610 | "id": "d1ecc40f", 2611 | "metadata": {}, 2612 | "outputs": [], 2613 | "source": [ 2614 | "x=dataset.drop(['Name','md5','legitimate'],axis=1).values #independent features\n", 2615 | "y=dataset['legitimate'].values #dependent variable" 2616 | ] 2617 | }, 2618 | { 2619 | "cell_type": "code", 2620 | "execution_count": 22, 2621 | "id": "e26ddd4d", 2622 | "metadata": {}, 2623 | "outputs": [], 2624 | "source": [ 2625 | "extratrees=ek.ExtraTreesClassifier().fit(x,y)\n", 2626 | "model=SelectFromModel(extratrees,prefit=True)\n", 2627 | "x_new=model.transform(x)\n", 2628 | "nbfeatures=x_new.shape[1]" 2629 | ] 2630 | }, 2631 | { 2632 | "cell_type": "code", 2633 | "execution_count": 23, 2634 | "id": "3306769b", 2635 | "metadata": {}, 2636 | "outputs": [ 2637 | { 2638 | "data": { 2639 | "text/plain": [ 2640 | "14" 2641 | ] 2642 | }, 2643 | "execution_count": 23, 2644 | "metadata": {}, 2645 | "output_type": "execute_result" 2646 | } 2647 | ], 2648 | "source": [ 2649 | "nbfeatures" 2650 | ] 2651 | }, 2652 | { 2653 | "cell_type": "code", 2654 | "execution_count": 24, 2655 | "id": "a1bc47cc", 2656 | "metadata": {}, 2657 | "outputs": [ 2658 | { 2659 | "data": { 2660 | "text/plain": [ 2661 | "([,\n", 2662 | " ],\n", 2663 | " [Text(0.7884607600756525, 0.7670264857362649, 'Important Features'),\n", 2664 | " Text(-0.7884607959827531, -0.7670264488257517, 'Not Important Features')],\n", 2665 | " [Text(0.4300695054958104, 0.4183780831288717, '25%'),\n", 2666 | " Text(-0.43006952508150165, -0.4183780629958645, '75%')])" 2667 | ] 2668 | }, 2669 | "execution_count": 24, 2670 | "metadata": {}, 2671 | "output_type": "execute_result" 2672 | }, 2673 | { 2674 | "data": { 2675 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAADnCAYAAAA3gRxRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdi0lEQVR4nO3deZgU5b328e9vVlYbF1yISysquKOixl2M0Si8HjUqRzFqjkmOGjUalzM55k1K41GMr0uMIW4nLtEYlehxGReMwQ1U3FARBY84cQ0RkJZl9nneP6pGB5wZZunup6r6/lxXXzP0VFfdPczc83RV11PmnENEROKrzHcAERHpnopaRCTmVNQiIjGnohYRiTkVtYhIzKmoRURiTkUtIhJzKmoRkZhTUYuIxJyKWkQk5lTUIiIxp6IWEYk5FbWISMypqEVEYk5FLSIScypqEZGYU1GLiMScilpEJOZU1CIiMaeiFhGJORW1iEjMqahFRGJORS0iEnMqahGRmFNRi4jEnIpaRCTmVNQiIjFX4TuASN4EmUHApsBmHW4jgKHAkOg2uMNHgNZmV/7ZVo1/HAgsB1ZEt8VA3Wq3D+omj28pynMR6cCcc74ziPROkCkHtgV2AXaNPm4NDO/L6lpc2cdbNt7xjR4s2gp8Qlja/wu8BMwE3qybPL6tL9sW6QkVtcRfkBkGjItuuwM7AgPztfomV1G3dePt2X6sYhkwi7C0ZwIv1E0evzQP0UQAFbXEUZApA3YDDgO+A4ylgMdTGlzlu6Mbb9sqj6t0wFvAQ8DUusnjX83juqUEqaglPoLMnsAk4Bhg/WJtdoUb8PZ2jX/YpoCbWABMJSztlwq4HUkpFbX4FWRGAScAxwNb+IiQc4Pe3Knx5h2KtLm/A38B7q6bPH5WkbYpCaeiluILMlWExfxjwt0aXi12Q1/btfGGnT1s+mXgN4Sl3exh+5IQKmopniCzHnAacDqwoec0X1rohr28R+MUn38wPgGmANfXTR6/2GMOiSkVtRRekNkGOAf4HjDAc5qv+cit9+I+jdfu4TsHUA/cAVxTN3n8XN9hJD5U1FI4QSYL/IpwN0dsz4Jd0Lbh8wc2XbWn7xyreRC4oG7y+Hm+g4h/KmrJvyAzHPg5cCpQ5TnNGs1r23jGIU2/3tt3jk60ADcAQd3k8Yt8hxF/VNSSP0FmCHBudBvqOU2PvdG2+bOHN/3Xvr5zdCMHXAr8pm7y+EbfYaT4YvtyVBImyBwDvAMEJKikARqpjPtoJQNcDryTran9V99hpPg0KZP0T7gfegpwqOckfdboquJe1O2ywF3ZmtozgZPrJo9/13MeKRKNqKVvgkwZQeYsYA4JLmmAhviPqFe3F/Batqb2NN9BpDhU1NJ7QWYz4BnCkzUGr2Hp2GuI//HOzgwGpmRrah/J1tRu5DuMFJaKWnonyBwJzAbi+C6JPmmg2neE/jgUeDNbU3tMvlZoZsvzta4ebi9rZsf3cx1nm9mgLr72lJnNM7PZ0e3oPqx/jJkd1p+M/aGilp4JMtUEmd8C9wHDPKfJq3pXZb4z9NO6wD3Zmto7sjW1w3yH6Q0zqyDc996vogbOBjot6sgk59yY6Da1D+sfQzibY49ZKC8dq6KWNQsyWxLOs3yG7yiFUE/ii7rdJODlbE3t6HyszMwOMLOnzeweM5tvZpPNbJKZzTKzN81sZLTcrWZ2vZk9Gy03Ibp/gJndEi37mpmNi+4/2czuNbOHgGnAZGDfaLR7TjTCftbMXo1ue3XI85SZTTWzd8zszqgMzyK8ks90M5vew+c23Mz+YmYvRbe9o/t3N7OZUd6ZZjbKzKqAi4GJUcaJZhaY2Xkd1jcnyp01s7fNbArwKrCJmZ0fbeMNM7soWn6wmdWa2evRYyd2l1fv+pDuBZlDgT8Da/mOUij1VKdpwDISeD5bU3tM3eTxf83D+nYCtgGWEE7XerNzbncz+wlwJuFIFsJR8f7R9qeb2ZaEk27hnNvBzEYD08xs62j5PYEdnXNLzOwA4DznXHvBDwK+7ZxrMLOtgLv4avKunYHtCOdHmQHs7Zy71sx+CoxzznV1YtCdZlYfff4twuMrVzvnnjOzTYHHo+f5DrCfc67FzA4CLnXOfdfMfgGMdc6dEWUMuvmejQK+75w73cwOBrYivOCFAQ+a2X6EVyP6xDk3Plpfppv1aUQt3QgypxFOfp/akgZY6QakZUTdbhjwaLam9t/zsK6XnHOfOucagfcIR8AAbxKWc7t7nHNtzrl3CQt9NLAP8EcA59w7hFO8thf1E865JV1ssxK4yczeBO4lvOxau1nOuY+cc22Ex0qyX394pzru+lgMHARcZ2azCU/XX8vMhhK+Z/1eM5sDXE34R6G3/u6ceyH6/ODo9hrhCHs0YXG/CRxkZpeb2b7OuVx3K9SIWr4uyBjwa+C8NS2aBvVUpXHAUgFcn62pHQWc149rOnY8E7Ktw7/bWLU/Vn+LoyMcQXZlRTdfOwdYSDiaLwMausjTSt87rAzY0zlX3/FOM/stMN05d6SZZYGnunh8C6sOdDtONtbxuRlwmXPuhtVXYGa7Eu73vszMpjnnLu4urMhXgswA4B5KpKQB6qku952hgM4BHsjW1A4p8HaOMbOyaL/1FsA8wrdwTgKIdnlsGt2/umWsejZrBvg0GjV/D+jJ/8/q61iTaXQ45mJmYzps++Po85O7WX8d4UWVMbNdgM272M7jwL+Z2ZBo2W+Y2fpmNgJY6Zy7A/h/7evqiopavhJkMsCTQK/fvpRk9S7VRQ0wAXguW1NbyMubzQOeBh4FTnXONRCesVoe7cK4Gzg52oWyujeAlujA2jnR404ysxcId5V0N/pudyPwaE8PJgJnAWOjA3xzCScQg/CV5GVmNoNV/0BMB7ZtP5hIeJWedaJdJ6cB8zvbiHNuGvAn4Pno+zCVsPB3AGZFj78QuKS7sJqUSUJhSU8jPOhRUk5pOnf2k227jvGdowjmAgfWTR6/MJ8rNbNbgYf7+LY36QGNqAWCzFqEL9FKrqQBGqgulWM12wLTszW1sbm6jvSMirrUhSU9DYjDFU68qHdVpVLUEL4FbXo+d4M4507WaLqwVNSl7KuRdMmWNEADVZW+MxTZaODxpJ3FWMpU1KUqvBL4A8A3fUfxrYGSGlG3GwPUZmtqEz+pVilQUZei8H3StwIH+A0SD42uMpHT5+XBXsB92ZraUvxDlSgq6tL0X8BxvkPERQnu+ujoYOAK3yGkeyrqUhNkTgR+5jtGnDRSsiPqdmdna2pP8B1CuqaiLiVBZm/gJt8x4qaJykRPSJ0nN2Zranf2HUI6p6IuFUFmOOEEN6U+evyaJir0PYGBwP3Zmtr1fAeRr1NRl4Lw4OFtgC7ZtBrnaAJL2+x5fbUZcHe2pjbtp9Qnjoq6NJxLwi9AW0CdzT1Ryg5EBxdjR0WddkFmD+BS3zHiymFNvjPE0DnZmtoJvkPIV1TUaRZOtHQX4UTs0gkVdZd+n62pTfUFI5JERZ1u19D1PLkCtGHNvjPE1MZoF0hsqKjTKsiMY9WJz6UTrZSpqLv2w2xN7TjfIURFnU5Bphq43neMJGhTUXfHgJuzNbWDfAcpdSrqdLqQry4iKt1ooazFd4aY24I1XH1ECk9FnTZBZhvgP3zHSIpWylXUa/aTbE1tyc+y6JOKOn1+j84+7LFmylt9Z0iAMmBKtqZWJwZ5oqJOkyAzAdjfd4wkaaZCRd0zO1NiFz2OExV1WgSZMnRiS681OxV1L1ys08v9UFGnx/GEl6CXXmiios13hgQZDZzoO0QpMuec7wzSX0GmEpiHTm7ptTlt2WcnNF26bzG21fLFZyyqvYrW5Z9jVsaQMYew1th/Yelzd7L89ccpG5QBYO39TmTgyN1o+GguS6ZNwcorWe/w86lcewRtDcv57IHLWf/YizE/c0n9Hdi6bvJ4ndFZRLoETzr8OyrpPmmksngjlbJy1h53CtUbbklb40o+ve1sBmTDKaCHjj2CzB5HrbL4Fy/dz/AjfkZL7p8se+0R1jnwByyd+Wcyex7rq6QhnGHvR8B1vgKUIu36SLrwIrX/6TtGUjW64hV1xZB1qN5wSwDKqgdRue4mtC5b3OXyVlaBa2nCtTRiZRU0f/4prcsWM2BT73u4LtRJMMWlok6+E9A8033WQJWXfX8tuYU0LVxA9YhRACx79WE++cMZLHrkGloblgOQ+eYxLH7sOr54+QGG7jKBpc/czrB9Y3HFrA2B032HKCXaR51k4QUB5gDb+o6SVI+07v7U6c1nH1DMbbY11bPwTzVk9pzIoFF70bric8oGrgVmLH32DlqXL2G9w85e5TENH85h5fznGbrzYSx99g6srJy1DzyF8sFrFzN6R+8DI+smj1eBFIFG1Ml2MCrpfmko8rlBrrWFz+6/lMHbHsCgUXsBUD54baysHLMyhu50CE2fzl/1Mc6Rm3k3mb2PY+mMPzFsn+MZvN04vnjloaJmX83mwCE+A5QSFXWynek7QNI1uKqiHZVzzrH40d9Que4mrLX7kV/e37J8yZefr5z/PJXrbbbK41bMeZKBI8dSPmAIrrkRrAzMws/9OtV3gFKhd30kVZDZAl1eq9/qKd4FyBs/nsuKt6ZTOTzLJ7eEf2PX3u9EVrz9DE0LF4AZFZn1WeeQM758TFtzA8vnPMkGx/4KgLV2O4LP7r8UK69gvcMvKFr2LkzI1tRuXDd5/Ee+g6Sdijq5TkGviPqtnqqifQ8HbLwdm/3Hw1+7f+DI3bp8TFnlADY87rKv1rHJ9ow45XcFydcH5cAPgV/6DpJ2+kVPruN9B0iDeletiYb65wfZmloN+ApMRZ1EQWYvIOs7RhrUU63fgf4ZAfwf3yHSTj+kyaTRdJ7UU6XRYP+d7DtA2qmokybIVADH+o6RFivdAP0O9N9B2ZraAb5DpJl+SJPn28Bw3yHSop4qTdvZf4OAA32HSDMVdfIcteZFpKfqqdauj/yY4DtAmqmok+dg3wHSpMFpH3WeqKgLSEWdJEFmFLCp7xhp0qCDifmySbamdiffIdJKRZ0sGk3nWQNVlb4zpIjeplcgKupkUVHnmYo6r7T7o0BU1EkRXm7rAN8x0qbRVRZ3+rx02y1bU5vxHSKNVNTJsRswxHeItGlARZ1HZcDOvkOkkYo6OXb1HSCNmlTU+baL7wBppKJODv0CFEAjlcWb57Q06Oe0AFTUyaFfgDxzjhZHmX4H8kuv/ApAP6RJEGSq0SW3CsH7JVJSaOtsTe1g3yHSRkWdDDugizzknYMm3xlSqAwY4ztE2qiok2GM7wBp5DAVdWFoN12eqaiTYWvfAdKojbJm3xlSSqeS55mKOhk29x0gjdowFXVhfMN3gLRRUSeDiroANKIumI18B0gbFXUyaMa8AmilrMV3hpRSUeeZijrugkwVsJ7vGGnUQrmKujCG68rk+aWijr8RgPkOkUYq6oIxYAPfIdJERR1/uj5igTRT0eo7Q4pp90ceqajjT2d5FUizU1EXkIo6j1TU8aeiLpAmKtp8Z0gxFXUeqajjT0VdICrqgtLPbR6pqONPP/AF0kilirpw9K6PPFJRx5+KukAaqXK+M6SYrkWZRyrq+BvoO0BaNbpKFXXhaESdR/pmxp/e61sg+5W9sceC6klLfOdIowaqWuCfvmOkhoo6/hp8B0grMwYaTq9YCmAQjXq1nkf6ZsafilqSSO9RzyMVdfypqCWJdJmzPFJRx5+KWpJose8AaaKijr963wFE+kBFnUcq6vj73HcAkT5QUeeRijr+PvYdQKQPVNR5pKKOv38AOtVZkkZFnUcq6rgLci3AQt8xRHqhDe2yyysVdTJo94ckyQcEOb0KzCMVdTKoqCVJ5vkOkDYq6mT4wHcAkV6Y7ztA2qiok2Gu7wAivaCizjMVdTK85TuASC9o10eeqaiTQUUtSaIRdZ6pqJMgyC1B+6klGZain9W8U1Enx6u+A4j0wAsEOV05J89U1Mnxiu8AIj0w03eANFJRJ8ezvgOI9MDzvgOkkYo6OZ5HU55KvLUCL/oOkUYq6qQIck3ADN8xRLoxhyC3zHeINFJRJ8uTvgOIdEO75wpERZ0sf/MdQKQbj/gOkFYq6mR5Bcj5DiHSiXpguu8QaaWiTpIg1wo86juGSCeeJMjpQswFoqJOnrt9BxDpxP2+A6SZijp5HgW+8B1CpINW4EHfIdJMRZ00Qa4ReMB3DJEOnibILfIdIs1U1Mmk3R8SJ7f4DpB2KupkmkY4S5mIb0uBv/gOkXYq6iQKcs3Anb5jiAB/IshpaoMCq/AdQPrsd8CPfYeYt6iViVO/+j1d8HkbF4+rZmmD46ZXmxk+yAC49FvVHLZVJTM+aOG02gaqK+Cu7w5iy3XKWNrgmDh1JY9NGoSZ+Xoq0jf/7TtAKTDnNHVsYgWZJ4CDfMdo19rm+MZVy3nxB4O5ZXYTQ6qM8/aqXmWZo+5eyeUHVVO31PHY/7Zw5SEDOPfxBg4fVcH+WY0bEuY1gtwuvkOUAu36SLbrfAfo6Mn3Wxm5ThmbDev6x6qyHOpbYGWzo7Ic3lvSxsfL2lTSyXST7wClQr8dyfYQUAdk/cYI/XlOM8dtX/nlv6+b1cTtrzczdkQ5Vx48gLUHGj/bp5ofPdTAwEr445EDOW9aA78aV93NWiWm/gnc6jtEqdCIOsmCXBswxXcMgKZWx4PzWjhm2/Bv/2ljq3jvrCHMPnUwGw0xzp0Wnl08ZsNyXvjBYKafNJgFn7cxYmgZDpg4dSUn3FfPwuVtHp+F9MI1OohYPCrq5LuRGLxV79F3W9hlozI2GBL+SG0wpIzyMqPMjB/uWsWsj1tXWd45xyXPNPJ/96vmoqcbueiAak7YsZJrX2zyEV96J0d4MFuKREWddEEuB/zGd4y7Vtvt8emyr0bG97/dzPbrr/qjdtvrzYzfqoK1Bxorm6HMwtvK5qJFlr77HUFO0xgUkfZRp8PVwE+AYT42vrLZ8cSCVm6YMPDL+y74ayOz/9GKAdlhZdwwYcAqy9/2ejPTThgEwE+/WcV376mnqhzu+u7A1Vcv8bISuMZ3iFKjt+elRZC5ELjEdwxJvSsJcuf5DlFqtOsjPa4B/uE7hKTaIjQY8EJFnRZBbgXwK98xJNV+SZBb6jtEKVJRp8sNwGzfISSV3iL8+RIPVNRpEl6q6zRABx4k386Jfr7EAxV12gS5F4CbfceQVKklyD3hO0QpU1GnUw3wme8QkgorgDN9hyh1Kuo0CnJLgAt8x5BU+E+C3Pu+Q5Q6FXV63Qbo5ar0x3PAb32HEJ3wkm5BZkPgDWC47yiSOMuBnQhyC3wHEY2o0y3I/QP4N98xJJF+qpKODxV12gW5h9FMZ9I79xPkdFGAGFFRl4bzgDm+Q0gizANO8h1CVqWiLgVBrgH4V8L9jiJdWQ4cSZBb5juIrEpFXSqC3FvA8YAuoSJd+T5B7m3fIeTrVNSlJMg9RHgyjMjqriDITfUdQjqnoi41Qe4K4A++Y0isPAr8zHcI6ZqKujSdBjzjO4TEwgvA0ZpwKd50wkupCjLrEp55Ntp3FPFmLrBvNOWAxJhG1KUqyC0Gvg3UeU4ifnwAHKKSTgYVdSkLch8B3wI+8R1FimoRcHD0/y8JoKIudeFpwuNQWZeKzwhLep7vINJzKmqBIDefsKw/9R1FCuojYD+C3Gu+g0jvrLGozcyZ2ZUd/n2emQVreMwRZrZtF18LzKyol5s3s5PNbEQ/Hj/GzA7r4msHmFnOzGZHt7/2cRtnm9mgvmbst7Cs9wHe9ZZBCuk9wgOH7/gOIr3XkxF1I3CUma3Xi/UeAXRa1MVmZuXAyUCfixoYA3Ra1JFnnXNjottBfdzG2UCvitrMKvq4rc6Fu0H2InzLlqTHW4QlXec7iPRNT4q6BbgROGf1L5jZZmb2pJm9EX3c1Mz2Ag4HrohGmCO7WrGZPWVmV5vZM2b2tpntZmb3mdm7ZnZJtEzWzN4xs9ui7UxtH3ma2bfM7DUze9PM/mBm1dH9dWb2CzN7DjgOGAvcGeUZGH3tJTObY2Y3mpl1yHO5mc0ys/lmtq+ZVQEXAxOjx0/syTfWzE6I1jPbzG6I/mBgZr83s5fN7C0zuyi67yzCPyTTzWx6dN/yDus62sxujT6/1cyuipa73MxGmtljZvaKmT1rZqOj5Y6Jnt/rZtbz90wHuUXAgcCDPX6MxNkLwP4EOe3WSrCe7qP+HTDJzDKr3X8dcLtzbkfgTuBa59xMwl/y86MR5ntrWHeTc24/4HrgAeDHwPbAyWa2brTMKODGaDtfAKeb2QDgVmCic24HoILwRI52Dc65fZxzdwAvA5OiPPXAdc653Zxz2wMDgQkdHlfhnNudcIT7S+dcE/AL4O7o8Xd38hz27bDr40Iz2waYCOztnBsDtAKTomUvdM6NBXYE9jezHZ1z1xIezBvnnBu3hu8XwNbAQc65cwn/iJ7pnNuVcJa8KdEyvwAOcc7tRPiHs+eCXD1wFOH/iSTXncAB0VsxJcF69NLZOfeFmd0OnAXUd/jSnoS/0AB/BH7dhwztI7c3gbecc58CmNkCYBNgKfChc25GtNwdUY4ngPedc/Oj+28jLPlron93VqjtxpnZBYS7GtYhfGn4UPS1+6KPrwDZHj6HZ51zX5a9mZ0B7Aq8FA3WBwL/jL58rJn9iPB7vxHhLqI3eriddvc651rNbAjhrop7o+0AVEcfZwC3mtk9HZ5Tz4Vnqp1GkJlP+P+a390sUkhtwM8Jcpf5DiL50ZtfvmuAV4FbulmmL6c5NkYf2zp83v7v9nyrr9cBRvdWdHZnNBKfAox1zn0YHRgd0EmeVvpeTgbc5pxbZf4EM9uccNS7m3Pu82h3xoBOHg+rPufVl2l/bmXA0mjUvuqDnTvVzPYAxgOzzWyMc673I6sgdzVB5kXCP3wb9/rxUmyfA8cT5B7zHUTyp8dvz3POLQHuAU7pcPdMwnmOIXxp/1z0+TJgaD4CRjY1sz2jz4+LtvMOkDWzLaP7vwc83cXjO+ZpL71F0Yj06B5sv7fP50ngaDNbH8DM1jGzzYC1CEs2Z2YbAId2s42FZraNmZUBR3a2EefcF8D7ZnZMtB0zs52iz0c65150zv2C8ASHTXqRf1VBbiawMzCtz+uQYngNGKuSTp/evo/6SqDjuz/OAr5vZm8QFuVPovv/DJwfHejr8mBiL7wNnBRtZx3g9865BuD7hC/73yQcgXe1T/VW4Hozm004Yr6JcFfL/wAv9WD704Fte3ow0Tk3F/g5MC3K/ASwkXPudcJfprcIZ7Cb0eFhNwKPth9MJJyO9GHgb3T//uZJwClm9nq03n+J7r8iOsg6h3ACptd78Dy7Fh5kPBT4JZrTOm5agcuAPXSdw3SK/aRMZpYFHo4O/EkcBJn9gZuBLde0qBTcAuBEgtyMNS4piaUzE6X3gtzThO9auZJwNCd+/Dewk0o6/WI/opaYCzK7ExaGXvEUz3vAWQS5R3wHkeLQiFr6J8jNInwrYgA0+A2TeisJj31sp5IuLRpRS/4EmSwwmfBkH8mve4FzCXIf+g4ixaeilvwLMnsSniSzj+8oKfAKcAFB7m++g4g/KmopnCAzAbgE2Ml3lAR6CbiIIFfrO4j4p6KWwgsy3wHOJ5zsSbr3ImFBP+o7iMSHilqKJ8jsSljYRwPlntPEiSM86/MqgpzO/pSvUVFL8YUHHc8ATgA28BvGqyXA7cAUgpwu2CBdUlGLP0GmAjgEOIlwKtbq7h+QCm3AU4Rndt5HkGvsfnERFbXERZAZRvi2vkmEU7emaddIC+GcLfcB/0OQW+g5jySMilriJ8isDRxMePmz7wDr+w3UJ8v5qpwfJMh97jmPJJiKWuItyBjhmY/fAfYGvgkM8xmpC0sIp999Jrq9Gl18QaTfVNSSLGFxjya8DuYuhCU+GhhexBQfAnMJp5WdC8wC5hDk9MskBaGilnQIMkOALaLbyOjjxkCmk9vq+78d4SyALYQXdlgU3T4jvJblJ8BHhBermEuQW1bgZyOyChW1lJ4gMyj6rAVoIcjpQggSaypqEZGY0zSnIiIxp6IWEYk5FbWISMypqEVEYk5FLSIScypqEZGYU1GLiMScilpEJOZU1CIiMaeiFhGJORW1iEjMqahFRGJORS0iEnMqahGRmFNRi4jEnIpaRCTmVNQiIjGnohYRiTkVtYhIzKmoRURiTkUtIhJzKmoRkZhTUYuIxJyKWkQk5lTUIiIxp6IWEYk5FbWISMz9f6bjxUcYMcfAAAAAAElFTkSuQmCC\n", 2676 | "text/plain": [ 2677 | "
" 2678 | ] 2679 | }, 2680 | "metadata": {}, 2681 | "output_type": "display_data" 2682 | } 2683 | ], 2684 | "source": [ 2685 | "dataset.columns.size\n", 2686 | "imp_features_visual=['Important Features','Not Important Features']\n", 2687 | "imp_features_visual_val=[nbfeatures,57-nbfeatures]\n", 2688 | "plt.pie(imp_features_visual_val, labels=imp_features_visual, autopct='%0.f%%')" 2689 | ] 2690 | }, 2691 | { 2692 | "cell_type": "code", 2693 | "execution_count": 25, 2694 | "id": "65c60c3c", 2695 | "metadata": {}, 2696 | "outputs": [], 2697 | "source": [ 2698 | "x_train,x_test,y_train,y_test=train_test_split(x_new,y,test_size=0.2)" 2699 | ] 2700 | }, 2701 | { 2702 | "cell_type": "code", 2703 | "execution_count": 26, 2704 | "id": "bc4902c0", 2705 | "metadata": {}, 2706 | "outputs": [], 2707 | "source": [ 2708 | "features=[]\n", 2709 | "index=np.argsort(extratrees.feature_importances_)[::1][:nbfeatures]" 2710 | ] 2711 | }, 2712 | { 2713 | "cell_type": "code", 2714 | "execution_count": 27, 2715 | "id": "d12c9fbb", 2716 | "metadata": {}, 2717 | "outputs": [ 2718 | { 2719 | "name": "stdout", 2720 | "output_type": "stream", 2721 | "text": [ 2722 | "1. feature LoaderFlags (0.000003)\n", 2723 | "2. feature NumberOfRvaAndSizes (0.000049)\n", 2724 | "3. feature SizeOfHeapCommit (0.000331)\n", 2725 | "4. feature BaseOfCode (0.000807)\n", 2726 | "5. feature SizeOfUninitializedData (0.000878)\n", 2727 | "6. feature ResourcesMeanSize (0.001154)\n", 2728 | "7. feature BaseOfData (0.001165)\n", 2729 | "8. feature ResourcesMaxSize (0.001197)\n", 2730 | "9. feature SectionsMeanVirtualsize (0.001212)\n", 2731 | "10. feature SizeOfImage (0.001226)\n", 2732 | "11. feature SectionMaxRawsize (0.001275)\n", 2733 | "12. feature SizeOfInitializedData (0.001280)\n", 2734 | "13. feature SectionMaxVirtualsize (0.001295)\n", 2735 | "14. feature SectionsMeanRawsize (0.001400)\n" 2736 | ] 2737 | } 2738 | ], 2739 | "source": [ 2740 | "for f in range(nbfeatures):\n", 2741 | " print(\"%d. feature %s (%f)\"%(f+1,dataset.columns[2+index[f]],extratrees.feature_importances_[index[f]]))\n", 2742 | " features.append(dataset.columns[2+f])" 2743 | ] 2744 | }, 2745 | { 2746 | "cell_type": "code", 2747 | "execution_count": 28, 2748 | "id": "ffe54b76", 2749 | "metadata": {}, 2750 | "outputs": [], 2751 | "source": [ 2752 | "model ={ \"RandomForest\":ek.RandomForestClassifier(n_estimators=50),\n", 2753 | " \"DecisionTree\":tree.DecisionTreeClassifier(max_depth=10),\n", 2754 | " \"LogisticRegression\":LogisticRegression()\n", 2755 | " }" 2756 | ] 2757 | }, 2758 | { 2759 | "cell_type": "code", 2760 | "execution_count": 29, 2761 | "id": "0ab0113e", 2762 | "metadata": {}, 2763 | "outputs": [ 2764 | { 2765 | "name": "stdout", 2766 | "output_type": "stream", 2767 | "text": [ 2768 | "RandomForest : 0.9940963419051069\n", 2769 | "DecisionTree : 0.9900760593987685\n", 2770 | "LogisticRegression : 0.6964505613908004\n" 2771 | ] 2772 | }, 2773 | { 2774 | "name": "stderr", 2775 | "output_type": "stream", 2776 | "text": [ 2777 | "C:\\Users\\vajha\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=2):\n", 2778 | "ABNORMAL_TERMINATION_IN_LNSRCH.\n", 2779 | "\n", 2780 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n", 2781 | " https://scikit-learn.org/stable/modules/preprocessing.html\n", 2782 | "Please also refer to the documentation for alternative solver options:\n", 2783 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", 2784 | " n_iter_i = _check_optimize_result(\n" 2785 | ] 2786 | } 2787 | ], 2788 | "source": [ 2789 | "results={}\n", 2790 | "for algo in model:\n", 2791 | " clf=model[algo]\n", 2792 | " clf.fit(x_train,y_train)\n", 2793 | " score=clf.score(x_test,y_test)\n", 2794 | " print(\"%s : %s\"%(algo,score))\n", 2795 | " results[algo]=score" 2796 | ] 2797 | }, 2798 | { 2799 | "cell_type": "code", 2800 | "execution_count": 30, 2801 | "id": "4189adb6", 2802 | "metadata": {}, 2803 | "outputs": [ 2804 | { 2805 | "data": { 2806 | "text/plain": [ 2807 | "'RandomForest'" 2808 | ] 2809 | }, 2810 | "execution_count": 30, 2811 | "metadata": {}, 2812 | "output_type": "execute_result" 2813 | } 2814 | ], 2815 | "source": [ 2816 | "winner=max(results,key=results.get)\n", 2817 | "winner" 2818 | ] 2819 | }, 2820 | { 2821 | "cell_type": "code", 2822 | "execution_count": 31, 2823 | "id": "4fb1c869", 2824 | "metadata": {}, 2825 | "outputs": [ 2826 | { 2827 | "name": "stdout", 2828 | "output_type": "stream", 2829 | "text": [ 2830 | "False positive rate : 0.114760 %\n", 2831 | "False negative rate : 0.162137 %\n" 2832 | ] 2833 | } 2834 | ], 2835 | "source": [ 2836 | "clf=model[winner]\n", 2837 | "res=clf.predict(x_new)\n", 2838 | "mt=confusion_matrix(y,res)\n", 2839 | "print(\"False positive rate : %f %%\" % ((mt[0][1] / float(sum(mt[0])))*100))\n", 2840 | "print(\"False negative rate : %f %%\" % ((mt[1][0] / float(sum(mt[1])))*100))" 2841 | ] 2842 | }, 2843 | { 2844 | "cell_type": "code", 2845 | "execution_count": 32, 2846 | "id": "93c1012f", 2847 | "metadata": {}, 2848 | "outputs": [], 2849 | "source": [ 2850 | "# Check for Multicollinearity" 2851 | ] 2852 | }, 2853 | { 2854 | "cell_type": "code", 2855 | "execution_count": 33, 2856 | "id": "978fd992", 2857 | "metadata": {}, 2858 | "outputs": [ 2859 | { 2860 | "data": { 2861 | "text/html": [ 2862 | "
\n", 2863 | "\n", 2876 | "\n", 2877 | " \n", 2878 | " \n", 2879 | " \n", 2880 | " \n", 2881 | " \n", 2882 | " \n", 2883 | " \n", 2884 | " \n", 2885 | " \n", 2886 | " \n", 2887 | " \n", 2888 | " \n", 2889 | " \n", 2890 | " \n", 2891 | " \n", 2892 | " \n", 2893 | " \n", 2894 | " \n", 2895 | " \n", 2896 | " \n", 2897 | " \n", 2898 | " \n", 2899 | " \n", 2900 | " \n", 2901 | " \n", 2902 | " \n", 2903 | " \n", 2904 | " \n", 2905 | " \n", 2906 | " \n", 2907 | " \n", 2908 | " \n", 2909 | " \n", 2910 | " \n", 2911 | " \n", 2912 | " \n", 2913 | " \n", 2914 | " \n", 2915 | " \n", 2916 | " \n", 2917 | " \n", 2918 | " \n", 2919 | " \n", 2920 | " \n", 2921 | " \n", 2922 | " \n", 2923 | " \n", 2924 | " \n", 2925 | " \n", 2926 | " \n", 2927 | " \n", 2928 | " \n", 2929 | " \n", 2930 | " \n", 2931 | " \n", 2932 | " \n", 2933 | " \n", 2934 | " \n", 2935 | " \n", 2936 | " \n", 2937 | " \n", 2938 | " \n", 2939 | " \n", 2940 | " \n", 2941 | " \n", 2942 | " \n", 2943 | " \n", 2944 | " \n", 2945 | " \n", 2946 | " \n", 2947 | " \n", 2948 | " \n", 2949 | " \n", 2950 | " \n", 2951 | " \n", 2952 | " \n", 2953 | " \n", 2954 | " \n", 2955 | " \n", 2956 | " \n", 2957 | " \n", 2958 | " \n", 2959 | " \n", 2960 | " \n", 2961 | " \n", 2962 | " \n", 2963 | " \n", 2964 | " \n", 2965 | " \n", 2966 | " \n", 2967 | " \n", 2968 | " \n", 2969 | " \n", 2970 | " \n", 2971 | " \n", 2972 | " \n", 2973 | " \n", 2974 | " \n", 2975 | " \n", 2976 | " \n", 2977 | " \n", 2978 | " \n", 2979 | " \n", 2980 | " \n", 2981 | " \n", 2982 | " \n", 2983 | " \n", 2984 | " \n", 2985 | " \n", 2986 | " \n", 2987 | " \n", 2988 | " \n", 2989 | " \n", 2990 | " \n", 2991 | " \n", 2992 | " \n", 2993 | " \n", 2994 | " \n", 2995 | " \n", 2996 | " \n", 2997 | " \n", 2998 | " \n", 2999 | " \n", 3000 | " \n", 3001 | " \n", 3002 | " \n", 3003 | " \n", 3004 | " \n", 3005 | " \n", 3006 | " \n", 3007 | " \n", 3008 | " \n", 3009 | " \n", 3010 | " \n", 3011 | " \n", 3012 | " \n", 3013 | " \n", 3014 | " \n", 3015 | " \n", 3016 | " \n", 3017 | " \n", 3018 | " \n", 3019 | " \n", 3020 | " \n", 3021 | " \n", 3022 | " \n", 3023 | " \n", 3024 | " \n", 3025 | " \n", 3026 | " \n", 3027 | " \n", 3028 | " \n", 3029 | " \n", 3030 | " \n", 3031 | " \n", 3032 | " \n", 3033 | " \n", 3034 | " \n", 3035 | " \n", 3036 | " \n", 3037 | " \n", 3038 | " \n", 3039 | " \n", 3040 | " \n", 3041 | " \n", 3042 | " \n", 3043 | " \n", 3044 | " \n", 3045 | " \n", 3046 | " \n", 3047 | " \n", 3048 | " \n", 3049 | " \n", 3050 | " \n", 3051 | " \n", 3052 | " \n", 3053 | " \n", 3054 | " \n", 3055 | " \n", 3056 | " \n", 3057 | " \n", 3058 | " \n", 3059 | " \n", 3060 | " \n", 3061 | " \n", 3062 | " \n", 3063 | " \n", 3064 | " \n", 3065 | " \n", 3066 | " \n", 3067 | " \n", 3068 | " \n", 3069 | " \n", 3070 | " \n", 3071 | " \n", 3072 | " \n", 3073 | " \n", 3074 | " \n", 3075 | " \n", 3076 | " \n", 3077 | " \n", 3078 | " \n", 3079 | " \n", 3080 | " \n", 3081 | " \n", 3082 | " \n", 3083 | " \n", 3084 | " \n", 3085 | " \n", 3086 | " \n", 3087 | " \n", 3088 | " \n", 3089 | " \n", 3090 | " \n", 3091 | " \n", 3092 | " \n", 3093 | " \n", 3094 | " \n", 3095 | " \n", 3096 | " \n", 3097 | " \n", 3098 | " \n", 3099 | " \n", 3100 | " \n", 3101 | " \n", 3102 | " \n", 3103 | " \n", 3104 | " \n", 3105 | " \n", 3106 | " \n", 3107 | " \n", 3108 | " \n", 3109 | " \n", 3110 | " \n", 3111 | " \n", 3112 | " \n", 3113 | " \n", 3114 | " \n", 3115 | " \n", 3116 | " \n", 3117 | " \n", 3118 | " \n", 3119 | " \n", 3120 | " \n", 3121 | " \n", 3122 | " \n", 3123 | " \n", 3124 | " \n", 3125 | " \n", 3126 | " \n", 3127 | " \n", 3128 | " \n", 3129 | " \n", 3130 | " \n", 3131 | " \n", 3132 | " \n", 3133 | " \n", 3134 | " \n", 3135 | " \n", 3136 | " \n", 3137 | " \n", 3138 | " \n", 3139 | " \n", 3140 | " \n", 3141 | " \n", 3142 | " \n", 3143 | " \n", 3144 | " \n", 3145 | " \n", 3146 | " \n", 3147 | " \n", 3148 | " \n", 3149 | " \n", 3150 | " \n", 3151 | " \n", 3152 | " \n", 3153 | " \n", 3154 | " \n", 3155 | " \n", 3156 | " \n", 3157 | " \n", 3158 | " \n", 3159 | " \n", 3160 | " \n", 3161 | " \n", 3162 | " \n", 3163 | " \n", 3164 | " \n", 3165 | " \n", 3166 | " \n", 3167 | " \n", 3168 | " \n", 3169 | "
MachineSizeOfOptionalHeaderCharacteristicsMajorLinkerVersionMinorLinkerVersionSizeOfCodeSizeOfInitializedDataSizeOfUninitializedDataAddressOfEntryPointBaseOfCode...ExportNbResourcesNbResourcesMeanEntropyResourcesMinEntropyResourcesMaxEntropyResourcesMeanSizeResourcesMinSizeResourcesMaxSizeLoadConfigurationSizeVersionInformationSize
033222425890361984115712061354096...043.2628232.5688443.5379398797.00000021618032016
1332224333090130560199680817784096...024.2504613.4207445.080177837.00000051811567218
233222433309051712062156803508964096...1114.4263242.8464495.27181331102.2727271042703767218
33322242589058572836915204512584096...1104.3642912.6693146.4007201457.0000009042647218
43322242589029491224729602173814096...124.3061003.4215985.1906031074.50000084913007218
..................................................................
13804233222425811020582422374401232914096...074.1227361.3702607.67709114900.7142861681654720
13804333222433167225378881853440400004096...0263.3776632.0316195.0500746905.8461544467624015
1380443322242581001182723804160596104096...0226.8254062.6170267.99048714981.90909148226487214
1380453322243316622549152168960512164096...0103.4216272.0609644.739744601.60000016221600
1380463322242581101116164684800227314096...044.4072521.9804826.11537496625.00000020318464720
\n", 3170 | "

138047 rows × 54 columns

\n", 3171 | "
" 3172 | ], 3173 | "text/plain": [ 3174 | " Machine SizeOfOptionalHeader Characteristics MajorLinkerVersion \\\n", 3175 | "0 332 224 258 9 \n", 3176 | "1 332 224 3330 9 \n", 3177 | "2 332 224 3330 9 \n", 3178 | "3 332 224 258 9 \n", 3179 | "4 332 224 258 9 \n", 3180 | "... ... ... ... ... \n", 3181 | "138042 332 224 258 11 \n", 3182 | "138043 332 224 33167 2 \n", 3183 | "138044 332 224 258 10 \n", 3184 | "138045 332 224 33166 2 \n", 3185 | "138046 332 224 258 11 \n", 3186 | "\n", 3187 | " MinorLinkerVersion SizeOfCode SizeOfInitializedData \\\n", 3188 | "0 0 361984 115712 \n", 3189 | "1 0 130560 19968 \n", 3190 | "2 0 517120 621568 \n", 3191 | "3 0 585728 369152 \n", 3192 | "4 0 294912 247296 \n", 3193 | "... ... ... ... \n", 3194 | "138042 0 205824 223744 \n", 3195 | "138043 25 37888 185344 \n", 3196 | "138044 0 118272 380416 \n", 3197 | "138045 25 49152 16896 \n", 3198 | "138046 0 111616 468480 \n", 3199 | "\n", 3200 | " SizeOfUninitializedData AddressOfEntryPoint BaseOfCode ... \\\n", 3201 | "0 0 6135 4096 ... \n", 3202 | "1 0 81778 4096 ... \n", 3203 | "2 0 350896 4096 ... \n", 3204 | "3 0 451258 4096 ... \n", 3205 | "4 0 217381 4096 ... \n", 3206 | "... ... ... ... ... \n", 3207 | "138042 0 123291 4096 ... \n", 3208 | "138043 0 40000 4096 ... \n", 3209 | "138044 0 59610 4096 ... \n", 3210 | "138045 0 51216 4096 ... \n", 3211 | "138046 0 22731 4096 ... \n", 3212 | "\n", 3213 | " ExportNb ResourcesNb ResourcesMeanEntropy ResourcesMinEntropy \\\n", 3214 | "0 0 4 3.262823 2.568844 \n", 3215 | "1 0 2 4.250461 3.420744 \n", 3216 | "2 1 11 4.426324 2.846449 \n", 3217 | "3 1 10 4.364291 2.669314 \n", 3218 | "4 1 2 4.306100 3.421598 \n", 3219 | "... ... ... ... ... \n", 3220 | "138042 0 7 4.122736 1.370260 \n", 3221 | "138043 0 26 3.377663 2.031619 \n", 3222 | "138044 0 22 6.825406 2.617026 \n", 3223 | "138045 0 10 3.421627 2.060964 \n", 3224 | "138046 0 4 4.407252 1.980482 \n", 3225 | "\n", 3226 | " ResourcesMaxEntropy ResourcesMeanSize ResourcesMinSize \\\n", 3227 | "0 3.537939 8797.000000 216 \n", 3228 | "1 5.080177 837.000000 518 \n", 3229 | "2 5.271813 31102.272727 104 \n", 3230 | "3 6.400720 1457.000000 90 \n", 3231 | "4 5.190603 1074.500000 849 \n", 3232 | "... ... ... ... \n", 3233 | "138042 7.677091 14900.714286 16 \n", 3234 | "138043 5.050074 6905.846154 44 \n", 3235 | "138044 7.990487 14981.909091 48 \n", 3236 | "138045 4.739744 601.600000 16 \n", 3237 | "138046 6.115374 96625.000000 20 \n", 3238 | "\n", 3239 | " ResourcesMaxSize LoadConfigurationSize VersionInformationSize \n", 3240 | "0 18032 0 16 \n", 3241 | "1 1156 72 18 \n", 3242 | "2 270376 72 18 \n", 3243 | "3 4264 72 18 \n", 3244 | "4 1300 72 18 \n", 3245 | "... ... ... ... \n", 3246 | "138042 81654 72 0 \n", 3247 | "138043 67624 0 15 \n", 3248 | "138044 22648 72 14 \n", 3249 | "138045 2216 0 0 \n", 3250 | "138046 318464 72 0 \n", 3251 | "\n", 3252 | "[138047 rows x 54 columns]" 3253 | ] 3254 | }, 3255 | "execution_count": 33, 3256 | "metadata": {}, 3257 | "output_type": "execute_result" 3258 | } 3259 | ], 3260 | "source": [ 3261 | "mc=dataset.drop([\"Name\",'md5','legitimate'],axis=1) #independent features\n", 3262 | "mc" 3263 | ] 3264 | }, 3265 | { 3266 | "cell_type": "code", 3267 | "execution_count": 34, 3268 | "id": "6adb0606", 3269 | "metadata": {}, 3270 | "outputs": [ 3271 | { 3272 | "name": "stdout", 3273 | "output_type": "stream", 3274 | "text": [ 3275 | "Variance Inflation Factor for Machine: 1.19\n", 3276 | "Variance Inflation Factor for SizeOfOptionalHeader: 0.02\n", 3277 | "Variance Inflation Factor for Characteristics: 1.43\n", 3278 | "Variance Inflation Factor for MajorLinkerVersion: 1.19\n", 3279 | "Variance Inflation Factor for MinorLinkerVersion: 1.5\n", 3280 | "Variance Inflation Factor for SizeOfCode: 5.13\n", 3281 | "Variance Inflation Factor for SizeOfInitializedData: 1.57\n", 3282 | "Variance Inflation Factor for SizeOfUninitializedData: 1.0\n", 3283 | "Variance Inflation Factor for AddressOfEntryPoint: 1.07\n", 3284 | "Variance Inflation Factor for BaseOfCode: 4.27\n", 3285 | "Variance Inflation Factor for BaseOfData: 1.92\n", 3286 | "Variance Inflation Factor for ImageBase: 1.0\n", 3287 | "Variance Inflation Factor for SectionAlignment: 2.06\n", 3288 | "Variance Inflation Factor for FileAlignment: 1.09\n", 3289 | "Variance Inflation Factor for MajorOperatingSystemVersion: 1.0\n", 3290 | "Variance Inflation Factor for MinorOperatingSystemVersion: 4.16\n", 3291 | "Variance Inflation Factor for MajorImageVersion: 203.26\n", 3292 | "Variance Inflation Factor for MinorImageVersion: 186.8\n", 3293 | "Variance Inflation Factor for MajorSubsystemVersion: 0.6\n", 3294 | "Variance Inflation Factor for MinorSubsystemVersion: 17345.88\n", 3295 | "Variance Inflation Factor for SizeOfImage: 2.86\n", 3296 | "Variance Inflation Factor for SizeOfHeaders: 1.05\n", 3297 | "Variance Inflation Factor for CheckSum: 1.04\n", 3298 | "Variance Inflation Factor for Subsystem: 0.65\n", 3299 | "Variance Inflation Factor for DllCharacteristics: 1.63\n", 3300 | "Variance Inflation Factor for SizeOfStackReserve: 1.31\n", 3301 | "Variance Inflation Factor for SizeOfStackCommit: 1.03\n", 3302 | "Variance Inflation Factor for SizeOfHeapReserve: 0.57\n", 3303 | "Variance Inflation Factor for SizeOfHeapCommit: 140.51\n", 3304 | "Variance Inflation Factor for LoaderFlags: 143.64\n", 3305 | "Variance Inflation Factor for NumberOfRvaAndSizes: 4.65\n", 3306 | "Variance Inflation Factor for SectionsNb: 1.15\n", 3307 | "Variance Inflation Factor for SectionsMeanEntropy: 1.03\n", 3308 | "Variance Inflation Factor for SectionsMinEntropy: 1.18\n", 3309 | "Variance Inflation Factor for SectionsMaxEntropy: 0.7\n", 3310 | "Variance Inflation Factor for SectionsMeanRawsize: 30.3\n", 3311 | "Variance Inflation Factor for SectionsMinRawsize: 619.0\n", 3312 | "Variance Inflation Factor for SectionMaxRawsize: 26.68\n", 3313 | "Variance Inflation Factor for SectionsMeanVirtualsize: 138.58\n", 3314 | "Variance Inflation Factor for SectionsMinVirtualsize: 622.11\n", 3315 | "Variance Inflation Factor for SectionMaxVirtualsize: 146.14\n", 3316 | "Variance Inflation Factor for ImportsNbDLL: 1.42\n", 3317 | "Variance Inflation Factor for ImportsNb: 1.2\n", 3318 | "Variance Inflation Factor for ImportsNbOrdinal: 1.28\n", 3319 | "Variance Inflation Factor for ExportNb: 1.06\n", 3320 | "Variance Inflation Factor for ResourcesNb: 1.24\n", 3321 | "Variance Inflation Factor for ResourcesMeanEntropy: 0.89\n", 3322 | "Variance Inflation Factor for ResourcesMinEntropy: 0.88\n", 3323 | "Variance Inflation Factor for ResourcesMaxEntropy: 1.16\n", 3324 | "Variance Inflation Factor for ResourcesMeanSize: 13.04\n", 3325 | "Variance Inflation Factor for ResourcesMinSize: 7.14\n", 3326 | "Variance Inflation Factor for ResourcesMaxSize: 4.39\n", 3327 | "Variance Inflation Factor for LoadConfigurationSize: 1.0\n" 3328 | ] 3329 | } 3330 | ], 3331 | "source": [ 3332 | "for i in range(len(mc.columns[:-1])):\n", 3333 | " v=vif(np.matrix(mc[:-1]),i)\n", 3334 | " print(\"Variance Inflation Factor for {}: {}\".format(mc.columns[i],round(v,2)))" 3335 | ] 3336 | }, 3337 | { 3338 | "cell_type": "code", 3339 | "execution_count": 35, 3340 | "id": "5cc83368", 3341 | "metadata": {}, 3342 | "outputs": [ 3343 | { 3344 | "name": "stdout", 3345 | "output_type": "stream", 3346 | "text": [ 3347 | "Variance Inflation Factor for MajorImageVersion : 203.26\n", 3348 | "Variance Inflation Factor for MinorImageVersion : 186.8\n", 3349 | "Variance Inflation Factor for MinorSubsystemVersion : 17345.88\n", 3350 | "Variance Inflation Factor for SizeOfHeapCommit : 140.51\n", 3351 | "Variance Inflation Factor for LoaderFlags : 143.64\n", 3352 | "Variance Inflation Factor for SectionsMeanRawsize : 30.3\n", 3353 | "Variance Inflation Factor for SectionsMinRawsize : 619.0\n", 3354 | "Variance Inflation Factor for SectionMaxRawsize : 26.68\n", 3355 | "Variance Inflation Factor for SectionsMeanVirtualsize : 138.58\n", 3356 | "Variance Inflation Factor for SectionsMinVirtualsize : 622.11\n", 3357 | "Variance Inflation Factor for SectionMaxVirtualsize : 146.14\n", 3358 | "Variance Inflation Factor for ResourcesMeanSize : 13.04\n", 3359 | "12\n" 3360 | ] 3361 | } 3362 | ], 3363 | "source": [ 3364 | "count=0\n", 3365 | "for i in range(len(mc.columns[:-1])):\n", 3366 | " v=vif(np.matrix(mc[:-1]),i)\n", 3367 | " if v>10:\n", 3368 | " print(\"Variance Inflation Factor for {} : {}\".format(mc.columns[i],round(v,2)))\n", 3369 | " count=count+1\n", 3370 | "print(count) " 3371 | ] 3372 | }, 3373 | { 3374 | "cell_type": "code", 3375 | "execution_count": 38, 3376 | "id": "28be04aa", 3377 | "metadata": {}, 3378 | "outputs": [], 3379 | "source": [ 3380 | "# Remove Multicollinearity" 3381 | ] 3382 | }, 3383 | { 3384 | "cell_type": "code", 3385 | "execution_count": 39, 3386 | "id": "57f8e502", 3387 | "metadata": {}, 3388 | "outputs": [], 3389 | "source": [ 3390 | "x=dataset.drop(['Name','md5','legitimate','MajorImageVersion','MinorImageVersion','MinorSubsystemVersion','SizeOfHeapCommit','LoaderFlags','SectionsMeanRawsize','SectionsMeanVirtualsize','ResourcesMeanSize'],axis=1).values\n", 3391 | "y=dataset['legitimate'].values #dependent variable" 3392 | ] 3393 | }, 3394 | { 3395 | "cell_type": "code", 3396 | "execution_count": 40, 3397 | "id": "74d1f980", 3398 | "metadata": {}, 3399 | "outputs": [], 3400 | "source": [ 3401 | "extratrees=ek.ExtraTreesClassifier().fit(x,y)\n", 3402 | "model=SelectFromModel(extratrees,prefit=True)\n", 3403 | "x_new=model.transform(x)\n", 3404 | "nbfeatures=x_new.shape[1]" 3405 | ] 3406 | }, 3407 | { 3408 | "cell_type": "code", 3409 | "execution_count": 41, 3410 | "id": "9a959102", 3411 | "metadata": {}, 3412 | "outputs": [ 3413 | { 3414 | "data": { 3415 | "text/plain": [ 3416 | "12" 3417 | ] 3418 | }, 3419 | "execution_count": 41, 3420 | "metadata": {}, 3421 | "output_type": "execute_result" 3422 | } 3423 | ], 3424 | "source": [ 3425 | "nbfeatures" 3426 | ] 3427 | }, 3428 | { 3429 | "cell_type": "code", 3430 | "execution_count": 42, 3431 | "id": "3cbf22c4", 3432 | "metadata": {}, 3433 | "outputs": [ 3434 | { 3435 | "data": { 3436 | "text/plain": [ 3437 | "([,\n", 3438 | " ],\n", 3439 | " [Text(0.8680545570066952, 0.675633988236168, 'Important Features'),\n", 3440 | " Text(-0.8680544937492721, -0.675634069509298, 'Not Important Features')],\n", 3441 | " [Text(0.4734843038218337, 0.3685276299470007, '21%'),\n", 3442 | " Text(-0.47348426931778476, -0.36852767427779887, '79%')])" 3443 | ] 3444 | }, 3445 | "execution_count": 42, 3446 | "metadata": {}, 3447 | "output_type": "execute_result" 3448 | }, 3449 | { 3450 | "data": { 3451 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAADnCAYAAAAU2k2EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAeBElEQVR4nO3deZhcVb3u8e+vuzMPBSFAAgIlCfMUITJkkghHzrERmQMGAceDM6Cc24B6N6AYVDyKyHSvCiigGEZpRjFIIIKBEAhDEjB2mAMkUJl7qFrnj71z6IROp7pTVav2qvfzPPV0d3XV3m9leGvV2pM55xARkfDU+Q4gIiLloYIXEQmUCl5EJFAqeBGRQKngRUQCpYIXEQmUCl5EJFAqeBGRQKngRUQCpYIXEQmUCl5EJFAqeBGRQKngRUQCpYIXEQmUCl5EJFAqeBGRQKngRUQCpYIXEQmUCl5EJFAqeBGRQKngRUQCpYIXEQmUCl5EJFAqeBGRQKngRUQCpYIXEQmUCl5EJFANvgOIVESUGQCMAkYDOwMjgK2S2xbAkOQ2ACgAHV3cVgFLgDc3uL0B/JMol6vY6xEpgjnnfGcQKZ0o0wcYAxwM7AvsQlzq2wFW5rW/Cjyb3OYlX58nyq0t83pFuqSCl3SLMiOBccSFfgiwP/EovFp0AHOAma+64Q9OaL1sVsu0Ro30pSJU8JI+UeYA4FPAUcBHPKcp2rxCduan2i4eBzwBPADcDTzWMq1R/wmlLFTwUv2iTF/gcOJSPxL4kN9AvTOt/aRZV+WPGrfB3S8DfwT+0DKtcY6HWBIwFbxUr3ikfjpwMvHG0FQbt/ayN15n+MhuHrKQuOxvapnW+EKFYknAVPBSXaLMEGAq8J/EG0uD0OYaWnZtvT7bg6c8DvwcmN4yrbGjLKEkeCp4qQ5RZifgHOA0YLDnNCU3tzBq5tFtF03sxVNfBS4HrmmZ1vhuiWNJ4FTw4leU2QU4FzgF6OM5Tdlc1H7KrF/nP7nh/HtPrAauB37eMq1xQYliSeBU8OJHlNkbOB84Aaj3nKbsDlp7+VtLGLZNCRZVAH4HfK9lWuMrJVieBEwFL5UVZbLAj4HjKf+BR1Wh1TUs2q31+p1LvNi1xFM3F2vqRjZGBS+VEWUGAk3E8+z9PaepqCcLuzx8XNsFk8q0+HeBHwG/bJnWqCNmZT062ZiUX5Q5CZgPfI8aK3eAO/OHlHPbwpbEn4gWZpuajy3jeiSFNIKX8onn2a8AerP3SBCcw41tvXLpUjLDK7TK6cDXWqY1vlWh9UkV0wheSi/K1BFlziE+JL9myx2glT4vVbDcId628Xy2qfmUCq5TqpQKXkor3og6g3jaoJ/fMP4943Z+w8NqtwJ+l21qbs42NafytA5SGip4KZ0o83ngGaBcGxRT5878uL4eV/9J4LlsU/MUjxnEI83By+aLMlsAvwWO9hukujiH27/1qnffZegw31mIT3twjk57UFs0gpfNE2X2IZ5rP9pzkqqzhr4Lq6TcAc4EZmSbmrs72ZkERgUvvRdlTgT+TnwpPNnA3MLoN31n2MAEYE62qVlTaDVCBS89F2WMKHMB8altB/mOU61uL4yvpitLrTMCeDDb1Hym7yBSfpqDl56JMv2JT3p1gu8o1cw5Cvu1XrNiOYMzvrN041LieXmVQKA0gpfiRZnBxJeZU7lvwmr6Lajycgf4NvCbbFNzg+8gUh4qeClOvKfMA8Bkz0lSYU5h17QcSXo6cEu2qbnmTiFRC1TwsmlRZmvig5cO9h0lLW7Pjx/oO0MPHAXcl21q3qxPHGa2skR5il1f1sw+s5nLONPMuvy7MrOHzGyBmc1Nbsf3YvljzOyTm5Nxc6jgpXtRZnvgYQK6fF65OUf+vsLY3Xzn6KFJwEPZpuZq2a2zW2bWAGSBzSp44t1Hu3sznuqcG5Pcpvdi+WOIDzgrmsVK0s0qeNm4KLMdMBPY3XeUNFnJgPkrGTjUd45eGAPcnW1q3qw9o8zsUDP7m5ndbGYLzWyamU01s3+Y2TwzG5U87lozu8rMZiaPOzK5v7+Z/TZ57FNmNjm5/3Qz+5OZ/Rm4H5gGTExG12clI/qZZjYnuY3rlOchM5tuZvPN7IakRL8JbAfMMLMZRb62rc3sFjObndzGJ/cfaGazkryzzGw3M+sLXAhMSTJOMbPIzL7TaXnPJrmzZvaCmV0BzAF2MLNzknU8Y2YXJI8fZGbNZvZ08txuj1LWxhXpWpTJAPcCH/YdJW2eKOz6ju8Mm+Eg4NZsU/OnWqY1tm3GcvYD9gCWAYuA/++cO9DMvgV8g3jkDPEo/GPEx1LMMLPRwNcAnHP7mNnuwP1mtmvy+EOAfZ1zy8zsUOA7zrl1bwwDgX9zzq01s12Am4CxyfM+AuwFvA48Cox3zl1mZmcDk51zG/s7u8HM1iTfHwb8Avhv59wjZrYjcF/yOucDk5xzHWZ2OHCxc+44M/s+MNY59/UkY9TNn9luwOecc181s08AuwAHEl8Y504zmwRsDbzunGtMltfttJoKXj4oyvQD7gD28R0ljW7PT0j7sQGfID5Z2ckt0xoLvVzGbOfcGwBm9k/iETfAPNbfUH+zc64AvGhmi4g/LU4AfgngnJtvZouBdQX/gHNu2UbW2Qe43MzGAPlOzwH4h3Pu1STPXOI3lkeKeB1TnXNPrPshKe89zf73YmRDzWwIkAGuS95YHL27vvBi59xjyfefSG5PJT8PJi78mcBPzewS4C7n3MzuFqiCl/VFmTrgBuJRlfSQc3T8pbB/CFNaJxKPvr/Sy+e3dvq+0OnnAuv3zob74Du6v5Tjqm5+dxawhPjTQx3xZQ27ypOn991XBxzinFvT+U4z+yUwwzl3jJllgYc28vwO1p8a77z3UufXZsCPnHNXb7gAMzuAeF7/R2Z2v3Puwu7CinR2OXCc7xBptYKBL6xiwGDfOUrkjGxT80VlXscJZlaXzMvvDCwg3qg/FSCZmtkxuX9DK4AhnX7OAG8knwg+S3EXc99wGZtyP/D1dT8knxbWrfu15PvTu1l+C7B/8tz92fgU6H3A581scPLY7c1sGzPbDljtnPs98NN1y9oYFby8L8r8F70fsQnweGH3pb4zlNh3y3zxkAXA34B7gDOcc2uJrwJWb2bziE+HcbpzrrWL5z4DdCQbHM9KnneamT1GPD3T3Wh/nWuAe4rdyAp8ExibbPh8Hjgjuf/HxCPqR1n/jWUG8ZTO3GSD6C3AsGSa6CvAwq5W4py7H7gR+Hvy5zCd+I1iH+AfyfPPB37QXVidqkBiUWYy8YFMxYx6ZCO+2vatOXcXDup2VJVCa4DxLdMan9rkI3vAzK4lnkfuze6HUgSN4GXd7pA3oXLfLM7RNqMwJoT59w0NAG7LNjVv5TuI9IwKvtZFmQbij8Hb+o6SdjkGzV9DvzQdwdoTOxHvWdPdBtAecc6drtF7eang5RLi3dJkMz1W2HNju++F4j+Ac32HkOKp4GtZlDkWONt3jFDcmp9Q7WePLIULs03NE32HkOJoI2utijLbAs8BmlctAedo3b31WtdK31o4K+NLwL4t0xrXbPKR4pVG8LXrKlTuJfMuQ16okXIHGE18jhWpcir4WhRlPoMukl1Sswp75XxnqLCzsk3NH/UdQrqngq81UWYr4Oe+Y4Tm1vyELXxnqLB64qtB9fUdRDZOBV97fkZ8RjopEedYM7Ow7x6+c3iwN3Ce7xCycSr4WhJlPgac6jtGaJYy9IV2Gmp1JHtetql5L98hpGsq+FoRZYz45ERSYjML+yz3ncGjPsTHUkgVUsHXjpN4/+IHUkK35ifW+t5IjdmmZp1eugqp4GtBlOkL/NB3jBA5x6q/F/YM8fwzPaVRfBVSwdeGr6FL75XF22Tmd9DQm6v3hOagbFOzriNQZVTwoYsyWwDf9R0jVA8X9lvhO0MVuTjb1KyrxFURFXz4vg0M8x0iVLfmJ2iX0/ftCnzRdwh5n85FE7IoMxB4BRV8WTjHitGtvxuQp16j1ve1AKNbpjXmfQcRjeBD9zlU7mWzhC3nq9w/IItOg1E19I8zVFGmjvgq81ImD+XHrC7HcjuWv807zT8jv/JdzOoYPOYIho79NKvmP0LukRtpX/oKI079Gf1G7gLA2lefZ9n9V2D1fRh+1Dn02XI7CmtX8vYdl7DNiRdiVrJrdBTrTOJrj4pnGsGH62hglO8QISvb/HtdPVtO/gLbf+kqRnz2p6yY00zbOy/Td/hObH3MefTbYf0DR5fPvo2tjz6XLSadyoqn7gbgvVl/IHPIiT7KHWBCtqn5AB8rlvWp4MP1bd8BQuYcuSfcbruVY9kNg4fRb8RoAOr6DaTPVjuQX7GUPsN3oM9WH/rA462uAdfRhutoxeoaaH/3DfIrltJ/x33KEa9Y+vRYBVTwIYoyY4FxvmOE7HW2WlCgruwXKe/ILaFtySL6bbfx95LMwSew9N7LWf7EHQzZ/0jee/h6tph4SrmjbcqJ2abmkb5D1DrNwYfpNN8BQvfX/EfKfjWjQtsa3r7tYoYd9iXqurmWd99td2bkqZcCsPaVZ6kfHG9Xf/uOS7C6erb8+BeoH7RlueNuqA/wn0BU6RXL+zSCD02UaSA+74yU0a35iSPKuXyX7+Dt2y5m0J6HMnC34j6MOefIzfojmfEn896jN7LFhM8waK/JLH/yz+WM2p3P+FqxxFTw4fl3YLjvECErON6b60btWq7lO+dYes8v6LPVDgw98Jiin7fq2QcZMGos9f0H49pbwerALP7ej120sdUvTdGE57O+A4TuNTd8gaPuoHItv/W151n13Az6bJ3l9d9+A4AtJ52Ky7ez7IGrya/J8db0C+i7zYfZdspFABTa17Ly2QfZ9sT456EfPZq3b7sYq29g+FH/Va6oxTgJeNJngFqmI1lDEmWGAm8CA3xHCdlvO4742wUdp+n0uMV5Gci2TGtU0XigKZqwHIvKvexuy0/YzneGFNkROMR3iFqlgg/L0b4DhK7gbOkzbufRvnOkjDb6e6KCD0V8UY/DfMcI3ctum4Xg5/DQFCt+S7GUlAo+HBOBwb5DhO6BwgHtvjOk0IeyTc27+A5Ri1Tw4TjCd4BacFt+wva+M6TUZN8BapEKPhyanimzvLO3n3dZncCtd1TwHqjgQxBlhgFjfMcI3WI34kXfGVLsUN8BapEKPgzj0d9l2d1XGNvhO0OKjcg2Ne/uO0StUSmEQYeDV8Dt+fE7+M6QcpqmqTAVfBg+4jtA6PLOlixwO37Yd46U0ymsK0wFH4b9fQcI3SI38iXfGQKw16YfIqWkgk+7KLM18MHL/EhJ3Vs4sOA7QwB2zzY1q3MqSH/Y6afpmQq4LT9hR98ZAjAA2Nl3iFqigk8/FXyZdbi61xe57XbynSMQmqapIBV8+ukQ8DJ7yW3/L98ZAqKCryAVfPpp6qDM7skfqHOZl44KvoJU8OmnfbPL7PbC+KzvDAHRJ84KUsGnnwq+jNpd/auL3QjtpVQ62/gOUEtU8GkWn4NmkO8YIVvoPtTiO0NgtvYdoJao4NNNo/cya84frIt7lNbAbFOzBiUVooJPN52bvMzuLIzT6QlKT6P4ClHBp9sQ3wFC1u7qF7/qttYFtktPBV8hKvh0G+A7QMhecDu+7DtDoLShtUJU8Ommgi+ju/KH6P9HeQzzHaBW6B9wuqngy+jO/LjRvjMEqq/vALVCBZ9uA30HCFWra/jXmwzb1neOQDX4DlArVPDpphF8mTzvsq/6zhCwet8BaoXeSdOtj+8AoRpjLx2wqN/UZb5zhGgNfTvgLd8xaoIKPt3afAcIlRkDDacpsDIYRKsOHqsQTdGk21rfAUR6Ie87QK1Qwadbq+8AIr3Q7jtArVDBp9tK3wFEekHbNipEBZ9uy30HEOmFd3wHqBUq+HRTwUsaqeArRAWfbtrXTNJIBV8hKvh008mwJG06gPd8h6gVKvh0exPtSSPpsowop4uYV4gKPs3i/yiv+I4h0gOv+Q5QS1Tw6adpGkmTBb4D1BIVfPot9h1ApAfm+w5QS1Tw6dfiO4BID6jgK0gFn37zfAcQ6QEVfAWp4NPvSd8BRIpUABb6DlFLVPBpF+VeRgeOSDosJsqt8R2ilqjgwzDHdwCRIsz2HaDWqODDoGkaSYOZvgPUGhV8GFTwkgaP+A5Qa1TwYXjMdwCRTcgBz/gOUWtU8CGIcq8Bz/uOIdKNWUS5gu8QtUYFH457fQcQ6YamZzxQwYdDBS/V7CHfAWqRCj4cDwOrfYcQ6cLbaDuRFyr4UES5VjRKkurUrPl3P1TwYbnHdwCRLtzhO0CtUsGH5Tbi832IVIsVaPuQN+acrp4VlCgzAzjUx6oXvJNnyvT3TzWy6N0CF07ux+RsA2c0r2VlmyO7RR03HDuAof2MR1/u4CvNa+nXADcdN5DRw+p4b61jyvTV3Dt1IGbm42VIad1IlJvqO0St0gg+PDf6WvFuw+uZe8Zg5p4xmCe/PIiBfYxjdu/DF/+8hmmH9WPeVwZzzO4N/OTR+DKyl/69jVtOHMDFH+/PlbPbALjob62cN6Gfyj0cN/sOUMtU8OG5GfB+xr4H/5Vn1LA6dtqijgXvFJi0Uz0A/7ZzA7e80AFAn3pY0wGr2x196uGfywq8tqLAx7INPqNL6bwJ3O07RC1TwYcmyuWI5+K9+sOz7Zy8dx8A9t6mnjsXxKX+p+fbeWV5vJng3An9+PKf1/Lzx9v4+oF9Of+va7locj9vmaXkfk2Ua/cdopap4MP0W58rb8s77lzQwQl7xiPx33y6P7+a3cYB16xkRSv0rY+nX8aMqOexLw5ixmmDWPRuge2G1OGAKdNXc8qta1iyUtuLU6wAXOM7RK3TZ+EwPUh85Zxdfaz8nhc72H9kHdsOjscPuw+v5/7PDgJg4dI8zS+uP6hzzvGDh1v54/ED+fo9a7jg0H60vOe47PE2fnhY/4rnl5K4O7kYjXikEXyIopwDLvW1+ps6Tc8AvLUqHokXnOMHD7dxxti+6z3+uqfbadylgS0HGKvboc7i22p9uE+zq3wHEI3gQ3Y9cBGwTSVXurrd8cCiPFcfOeB/77tpXju/mh239bF7NPC5MX3We/x1T7dz/ykDATj74L4cd/Ma+tbDTccNQFKpBR10VxW0H3zIosx3iUtepJLOJMr9wncI0RRN6K4AVvkOITXlDeBq3yEkpoIPWZRbBvzGdwypKdOIcmt9h5CYCj58PwVafYeQmvA62jWyqqjgQxfvqvZL3zGkJmj0XmVU8LXhh8BS3yEkaBq9VyEVfC2Icu+hvWmkvL6XXHRGqogKvnZcAbzkO4QEaRaeT48hXVPB14r4pE9NvmNIcPLA15Kjp6XKqOBrSZS7BfiL7xgSlCuJcnN9h5CuqeBrzxeBlb5DSBCWAN/1HUI2TgVfa6LcYuD/+I4hQTgnuf6AVCkVfG26EnjIdwhJtbuIcr/zHUK6p4KvRfEGsS8Cq31HkVR6h/jfj1Q5FXytinL/BM71HUNS6UtEuSW+Q8imqeBr2y+Bu3yHkFS5kih3u+8QUhwVfC2Lp2pOAxb7jiKp8Axwtu8QUjwVfK2LTyl8IjrjpHQvB0zRycTSRQUvEOX+AXzNdwypWnnicp/vO4j0zCYL3sycmV3a6efvmFm0ieccbWZ7buR3kZl9p8dJN4OZnW5m223G88eY2Sc38rtDzSxnZnOTW6+OFDWzM81sYG8zbrYo92t0oWTp2llEuft8h5CeK2YE3woca2bDe7Dco4EuC77SzKweOB3odcEDY4AuCz4x0zk3Jrkd3st1nAn0qODNrNQXTf8m8ECJlynpdiVRTtcTSKliCr6D+DzPZ234CzPbycweNLNnkq87mtk44CjgJ8mIdtTGFmxmD5nZf5vZw2b2gpl91MxuNbMXzewHyWOyZjbfzK5L1jN93UjXzA4zs6fMbJ6Z/cbM+iX3t5jZ983sEeBkYCxwQ5JnQPK72Wb2rJldY2bWKc8lZvYPM1toZhPNrC9wITAlef6UYv5gzeyUZDlzzezq5I0GM7vSzJ4ws+fM7ILkvm8SvwHNMLMZyX0rOy3reDO7Nvn+WjP7WfK4S8xslJnda2ZPmtlMM9s9edwJyet72sweLiZzckKyY4HZRT1eQvcX4jd9Sali5+B/BUw1s8wG918OXO+c2xe4AbjMOTcLuBM4JxnR/nMTy25zzk0inh64g3gueG/gdDPbKnnMbsA1yXqWA181s/7AtcAU59w+QAPwlU7LXeucm+Cc+z3wBDA1ybMGuNw591Hn3N7AAODITs9rcM4dSDyi/r/OuTbg+8Afk+f/sYvXMLHTFM35ZrYHMAUY75wbQzyHOTV57PnOubHAvsDHzGxf59xlxBdMmOycm7yJPy+AXYHDnXPfJn7z/YZz7gDgO8SnBSbJfIRzbj/iN9ziRLmVxJ9WFhT9HAnRc8AJRLkO30Gk94oqeOfccuB6PvhufghwY/L974AJvchwZ/J1HvCcc+4N51wrsAjYIfndK865R5Pvf5+sZzfgX865hcn91wGTOi23qyJeZ7KZPW5m84CPA3t1+t2tydcngWyRr6HzFM0PgcOAA4DZZjY3+Xnn5LEnmtkc4Klkvb2ZyvqTcy5vZoOBccCfkvVcDYxMHvMocK2ZfQmo79HSo9w7wBHAa73IJuk3HzgsuVCMpFhP5nB/Dsyh+xP79+ac0Ot2zyuw/q56Bd7Pt+FyHWCbWO6qru5MRv5XAGOdc68kG4z7d5EnT8/+fNZbDXCdc269I0XN7MPEo+yPOufeTaZd+nfxfFj/NW/4mHWvrQ54L/mUsP6TnTvDzA4CGoG5ZjbGOVf8Zfui3GKizL8DDwNbFv08SbsXgY/rSNUwFL2bpHNuGXAz8IVOd88CTkq+nwo8kny/AhhSioCJHc3skOT7k5P1zAeyZjY6uf+zwN828vzOedaV5TvJCPj4Itbf09fzIHC8mW0DYGbDzGwnYChxOefMbFvgP7pZxxIz28PM6oBjulpJ8snqX2Z2QrIeM7P9ku9HOeced859n/jcITt0tYxuRblniUfyy3r8XEmjRcTl/obvIFIaPd0P/lKg89403wQ+Z2bPEBfst5L7/wCck2wA3ehG1h54ATgtWc8w4Ern3Frgc8TTE/OIR/wb283vWuCqZBqjFfh/xFNCt1PcBsUZwJ7FbmR1zj1PfJ7s+5PMDwAjnXNPE0/NPAf8hngaZZ1rgHvWbWQlvvrSXcBfge7+w00FvmBmTyfL/XRy/0+Sjc/PEo/Cny7idX5QlJtNPPWl//RhawEmE+Ve9R1ESsecq+4rbZlZFrgr2SAqvkSZUcR7VWQ9J5HSWwgcQZRr8R1ESktHskpx4rNPTiD+NCXheBQ4ROUepqofwUuViTLDgXuIjy2QdPsTcKrOLxMujeClZ+JdKCcRH/cg6fVTdPKw4GkEL70XZc4GfkxP97MXnzqAM4lyv/IdRMpPBS+bJ8ocRnxQ2Vabeqh49wZwElGuuFNXSOqp4GXzRZks8S6n+/kNIt2YAZysA5hqi+bgZfPFe2AcTHwJQI0YqksH8D3gcJV77dEIXkoryhxBfDqLkZt6qJRdCzCVKDfLdxDxQyN4Ka34whB7EZ8UTvzIEx91vrfKvbZpBC/lE2WOJD59xPa+o9SQJ4EvE+Xm+A4i/mkEL+UT5e4iPq3zRcAaz2lCtwo4GzhI5S7raAQvlRFldiLeZ/5E31EC44h3U20iyi32HUaqiwpeKivKTCS+tsD+npOE4G7gfKLcXN9BpDqp4KXyokwdcAJwLtp3vjceAc4jys30HUSqmwpe/IoyjcRFP953lBSYBVxMlGv2HUTSQQUv1SHKTALOI76ClLyvjXiO/RdEuSd9h5F0UcFLdYkyewCfB04FtvGcxqclxLuYXkWUe9N3GEknFbxUpyjTh/iC4V8gvnZtLZyxsh24l/ggsduJcm2e80jKqeCl+kWZkcTXnj2W+Jw35jdQSeWBh4gvvjGdKLfUbxwJiQpe0iXKjAA+BXwSOAwY4jdQrywhPrvjA8CdyUVUREpOBS/pFU/jTAAmAgclt2o8L/17xKP0vwJ/Jco95zWN1AwVvIQlyozi/bIfC+wCbF3BBC3AvOT2TPJ1PlGuUMEMIoAKXmpBlBkC7AyM6vR1eyCT3IZ2+trQxRIcsBZYDbxFfGWk15Ov675/GXiOKLe8nC9FpCdU8CKdRZmBxCVfIC72DqJcq99QIr2jghcRCZROFywiEigVvIhIoFTwIiKBUsGLiARKBS8iEigVvIhIoFTwIiKBUsGLiARKBS8iEigVvIhIoFTwIiKBUsGLiARKBS8iEigVvIhIoFTwIiKBUsGLiARKBS8iEigVvIhIoFTwIiKBUsGLiARKBS8iEigVvIhIoFTwIiKBUsGLiARKBS8iEigVvIhIoFTwIiKB+h/BIyvZDHErBAAAAABJRU5ErkJggg==\n", 3452 | "text/plain": [ 3453 | "
" 3454 | ] 3455 | }, 3456 | "metadata": {}, 3457 | "output_type": "display_data" 3458 | } 3459 | ], 3460 | "source": [ 3461 | "dataset.columns.size\n", 3462 | "imp_features_visual=['Important Features','Not Important Features']\n", 3463 | "imp_features_visual_val=[nbfeatures,57-nbfeatures]\n", 3464 | "plt.pie(imp_features_visual_val, labels=imp_features_visual, autopct='%0.f%%')" 3465 | ] 3466 | }, 3467 | { 3468 | "cell_type": "code", 3469 | "execution_count": 43, 3470 | "id": "b018fe70", 3471 | "metadata": {}, 3472 | "outputs": [], 3473 | "source": [ 3474 | "x_train,x_test,y_train,y_test=train_test_split(x_new,y,test_size=0.2)" 3475 | ] 3476 | }, 3477 | { 3478 | "cell_type": "code", 3479 | "execution_count": 44, 3480 | "id": "3d0336b6", 3481 | "metadata": {}, 3482 | "outputs": [], 3483 | "source": [ 3484 | "features=[]\n", 3485 | "index=np.argsort(extratrees.feature_importances_)[::1][:nbfeatures]" 3486 | ] 3487 | }, 3488 | { 3489 | "cell_type": "code", 3490 | "execution_count": 45, 3491 | "id": "bc732e5b", 3492 | "metadata": {}, 3493 | "outputs": [ 3494 | { 3495 | "name": "stdout", 3496 | "output_type": "stream", 3497 | "text": [ 3498 | "1. feature SizeOfStackReserve (0.000042)\n", 3499 | "2. feature SizeOfUninitializedData (0.000998)\n", 3500 | "3. feature BaseOfCode (0.001076)\n", 3501 | "4. feature SizeOfInitializedData (0.001339)\n", 3502 | "5. feature MinorImageVersion (0.001343)\n", 3503 | "6. feature SectionsNb (0.001391)\n", 3504 | "7. feature SectionsMinEntropy (0.001397)\n", 3505 | "8. feature BaseOfData (0.001520)\n", 3506 | "9. feature ImportsNbOrdinal (0.001540)\n", 3507 | "10. feature DllCharacteristics (0.001577)\n", 3508 | "11. feature SizeOfCode (0.001702)\n", 3509 | "12. feature AddressOfEntryPoint (0.002303)\n" 3510 | ] 3511 | } 3512 | ], 3513 | "source": [ 3514 | "for f in range(nbfeatures):\n", 3515 | " print(\"%d. feature %s (%f)\"%(f+1,dataset.columns[2+index[f]],extratrees.feature_importances_[index[f]]))\n", 3516 | " features.append(dataset.columns[2+f])" 3517 | ] 3518 | }, 3519 | { 3520 | "cell_type": "code", 3521 | "execution_count": 46, 3522 | "id": "8019777c", 3523 | "metadata": {}, 3524 | "outputs": [], 3525 | "source": [ 3526 | "model ={ \"RandomForest\":ek.RandomForestClassifier(n_estimators=50),\n", 3527 | " \"DecisionTree\":tree.DecisionTreeClassifier(max_depth=10),\n", 3528 | " \"LogisticRegression\":LogisticRegression()\n", 3529 | " }" 3530 | ] 3531 | }, 3532 | { 3533 | "cell_type": "code", 3534 | "execution_count": 47, 3535 | "id": "26424604", 3536 | "metadata": {}, 3537 | "outputs": [ 3538 | { 3539 | "name": "stdout", 3540 | "output_type": "stream", 3541 | "text": [ 3542 | "RandomForest : 0.9942774357116987\n", 3543 | "DecisionTree : 0.9903295907279971\n", 3544 | "LogisticRegression : 0.6968489677653025\n" 3545 | ] 3546 | }, 3547 | { 3548 | "name": "stderr", 3549 | "output_type": "stream", 3550 | "text": [ 3551 | "C:\\Users\\vajha\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=2):\n", 3552 | "ABNORMAL_TERMINATION_IN_LNSRCH.\n", 3553 | "\n", 3554 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n", 3555 | " https://scikit-learn.org/stable/modules/preprocessing.html\n", 3556 | "Please also refer to the documentation for alternative solver options:\n", 3557 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", 3558 | " n_iter_i = _check_optimize_result(\n" 3559 | ] 3560 | } 3561 | ], 3562 | "source": [ 3563 | "results={}\n", 3564 | "for algo in model:\n", 3565 | " clf=model[algo]\n", 3566 | " clf.fit(x_train,y_train)\n", 3567 | " score=clf.score(x_test,y_test)\n", 3568 | " print(\"%s : %s\"%(algo,score))\n", 3569 | " results[algo]=score" 3570 | ] 3571 | }, 3572 | { 3573 | "cell_type": "code", 3574 | "execution_count": 48, 3575 | "id": "295df33e", 3576 | "metadata": {}, 3577 | "outputs": [ 3578 | { 3579 | "data": { 3580 | "text/plain": [ 3581 | "'RandomForest'" 3582 | ] 3583 | }, 3584 | "execution_count": 48, 3585 | "metadata": {}, 3586 | "output_type": "execute_result" 3587 | } 3588 | ], 3589 | "source": [ 3590 | "winner=max(results,key=results.get)\n", 3591 | "winner" 3592 | ] 3593 | }, 3594 | { 3595 | "cell_type": "code", 3596 | "execution_count": 49, 3597 | "id": "b4203503", 3598 | "metadata": {}, 3599 | "outputs": [ 3600 | { 3601 | "name": "stdout", 3602 | "output_type": "stream", 3603 | "text": [ 3604 | "False positive rate : 0.102353 %\n", 3605 | "False negative rate : 0.174237 %\n" 3606 | ] 3607 | } 3608 | ], 3609 | "source": [ 3610 | "clf=model[winner]\n", 3611 | "res=clf.predict(x_new)\n", 3612 | "mt=confusion_matrix(y,res)\n", 3613 | "print(\"False positive rate : %f %%\" % ((mt[0][1] / float(sum(mt[0])))*100))\n", 3614 | "print(\"False negative rate : %f %%\" % ((mt[1][0] / float(sum(mt[1])))*100))" 3615 | ] 3616 | }, 3617 | { 3618 | "cell_type": "code", 3619 | "execution_count": 50, 3620 | "id": "395ebe6a", 3621 | "metadata": {}, 3622 | "outputs": [], 3623 | "source": [ 3624 | "# Confusion Matrix" 3625 | ] 3626 | }, 3627 | { 3628 | "cell_type": "code", 3629 | "execution_count": 51, 3630 | "id": "619f2146", 3631 | "metadata": {}, 3632 | "outputs": [ 3633 | { 3634 | "data": { 3635 | "text/plain": [ 3636 | "array([[96625, 99],\n", 3637 | " [ 72, 41251]], dtype=int64)" 3638 | ] 3639 | }, 3640 | "execution_count": 51, 3641 | "metadata": {}, 3642 | "output_type": "execute_result" 3643 | } 3644 | ], 3645 | "source": [ 3646 | "cf=confusion_matrix(y,res)\n", 3647 | "cf" 3648 | ] 3649 | }, 3650 | { 3651 | "cell_type": "code", 3652 | "execution_count": 52, 3653 | "id": "87bbf42c", 3654 | "metadata": {}, 3655 | "outputs": [ 3656 | { 3657 | "data": { 3658 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVMAAAEWCAYAAADb3nSrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAfFklEQVR4nO3dd5gV5fnG8e/DLggI0hEFARUFUWNsaDACtgR7iaJYoogtllh/atRYorFgiQ1jBRQiKokaC4IFaYoiGGn2RlWkqICgsMvz++OdxcOyu2d3edk57t6f69qLM3OmPDNn5p533jm7mLsjIiLrp1baBYiIVAcKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJINUwNbN6Zva8mX1vZsPWYzknmNnLMWtLg5m9ZGYnp11HGsxsmZltlXYdkL0WM5thZj2qrqLcYWZXmNnDKa27vZm5meWnsf6s3D3rD3A8MAlYBnwFvAT8tjzzZlnuScBEIH99l7UhfoAegANPFxu/UzJ+dDmXcy0wJMXtaJ/Um58MG3AP8CHQegOu90Lga+B7YACwUeTlnwKM38D7bhBwQxV9Tj2AOeu5jKbA88k+nwdcWo55rgemAQXAtRto274EVgLNi41/Lzk225djGWsdx7n2k7VlamYXAXcCNwKbAm2B+4DDs81bDu2Aj929IMKyNpQFQFcza5Yx7mTg41grsKBK7hLMzIAHCCdud3efu4HW83vgcmA/wkmwFXDdhliXrOX/gLrAZsD2wBvlmOdT4FLgxQ1YF8AXQO+iATPbEai3gddZIevV6s1yJWhEaI0eU8Y0GxHCdl7ycydJC4TkSgtcDHxDaNX2Sd67jnClWpWsoy/FWnCs26I6BfgcWEr4YE7IGD8+Y76uwDuEq/M7QNeM90YTrsRvJMt5mWJXy+ItBeB+4JxkXF4y7moyWqbAXcBsYAkwGdg7Gd+z2HZOyajj70kdK4AOybjTkvf/Cfw7Y/m3AK8BVolWQdF+3Ah4FHgXaJbxfifgFWAx8BHQKxm/OzCfjJYA8AfgvXKs83Hgxozh/YCvy5jegQ7J64OA95PPZy5wSSnzrPW5F3uvxG1K3mtGaL0tSY6PG4odP558Hmckn9vK5LN7Pnn/S2D/5PW1wDBgSFLvNGBb4C+EY3428LuMZfcBPkim/Rw4Mxm/cXIcrE7WtQzYnNAVdznwGbAIeApoWsZ+vB74V2VaVsk2XJtlmmtJzlFCaA9J6vou2ZebljLfl8BVwDsZ424DriSjZQocDPwv+WxmZ9ZDRh4A+wDTMt57FZiYMTweOCJ5XbT/libH1ZHFjqE3gH8kx8oNhPPkNmAW4fi/H6iXdf9l2XE9CU3/UpvVwN+At4CWQAvgTeD6jDAqSKapTThJlgNNin8wpQxn7ryNkx3cMXlvM2D74icV4TbnW0IXQj7hSvgtSXgQAuszwgFfLxm+uZRt60EIzq7A2xkn+kjgNNYO0xMJJ2k+4eLxNVC3pO3KqGMWofWQn+yf0fwcpvUJrd9TgL2BhUCbSp4kRfvx38DbQOOM9zYmHLR9kjp2SdZVtG/fBw7MmP6ZZPvaEk6gtqWscwpwbMZw86SGZqVMnxmmX/HzxagJsEsp86z53IuNz7ZNTyQ/9YHOybTrhGnyehDFbvNZN0x/BH6frOsxwoX+yuQzPR34ImPeg4GtCV0t3Qnnwy6Zx1uxdV1AOL/aEE7yB4ChZXzWhxIC+dRKHCcVDdMzCRel+oRGxq7AJqXM9yWwP+HCtl0y/WzC3WlmmPYAdiRcRH5FCLOiUGzPz3lQl3DxaZ4Mf01ozDUknNcr+PmcP4afL0zHAj8Am2UcQwXAecly6hEahM8RsqRhso03Zdt/2W4tmwELvezb8BOAv7n7N+6+gNDiPCnj/VXJ+6vcfTjhitsxy3pLsxrYwczquftX7j6jhGkOBj5x98HuXuDuQwl9g4dmTDPQ3T929xWEK/2vy1qpu78JNDWzjsAfCSdM8WmGuPuiZJ23Ew78bNs5yN1nJPOsKra85YSAvoNwkJ/n7nOyLC+b3wFPuft3GeMOAb5094FJHe8C/wGOTt5/NKkDM2tKCI3H3X2Wuzd291mlrKsB4c6gSNHrhuWocxXQ2cw2cfdvk5oqotRtMrM8Quv6Gndf7u7vJ9u4Psa5+8jkPBlGaFTcnHymTwDtzawxgLu/6O6feTCGcGe0dxnLPhO40t3nuPtPhDA7uqTbUTPrADxICKTLzaxPMn4jM1tpZo3WczuLW0XIiA7uXujuk919SZZ5BhPOoQMI5+Va3UzuPtrdp7n7anefCgwlXHQoNt2PhOc43YDdgKmE1uhewJ6EDFiUTDvM3ecly3wS+ATokrG4ee5+T/L5/Ui4AF7o7ovdfSmhi/O4bDsjW5guAppn6UfYHJiZMTwzGbdmGcXCeDnhRKsQd/+BcFU5C/jKzF40s07lqKeoptYZw19Xop7BwLmE24tnir9pZheb2QfJNxO+I3SRNM+yzNllvenuEwm3gkYI/RIlT5eXJT9lnZiHANeY2akZ49oBe5jZd0U/hAtkq+T9IcChZtYA6EUIjq+ybBeEi+YmGcNFr5eWY94/EO4AZprZGDP7TTnmyVTWNrUgtEAy932Zn0M5zM94vYLQACnMGIbkGDOzA83sLTNbnNR1EGUfJ+2AZzK24wOgkPD8ori+wCvuPpZw0bs+CdQ9gf+5+/clzLM+BhPu0p4ws3lm1s/MapdjnuMJLcJ1GiVmtoeZvW5mC8zse8L5Xtr+GUO4cHRLXo8mBG/3ZLhomX80s/cy9uEOxZaZ+fm3ILS0J2dMPyIZX6ZsYTqBkNRHlDHNPMIHXqRtMq4yfiBsSJFWmW8mV/8DCLf4HwIPlaOeoprW90HLYOBsYHjSalwjCbDLCGHTxN0bE1piVlR6KcssbXzRcs8htHDnER4QlLwQ9+3dvUHyM66MRb5JaKHfZWbHJ+NmA2OSVmbRTwN3/1Oy7LmE4+BIwh3H4LJqzjCD8K2HIjsB84taC2Vx93fc/XBC19GzlHEhKUVZ27SAcFvXJmP6Lcoqp4LrLpWZbURoId9G6FtsDAyn7ONkNqGbJXNb6nrJDw7zCduGu39B6KbrBzxM6GqLKrnbvM7dOxO6wg4htDrLmmcmoRvkIODpEiZ5nHCLvYW7NyL0V1oJ08G6YTqGYmFqZu0IOXEu4ba/MTC92DIz9/tCwgVw+4z93cjdsza4ygzT5Ep2NdDfzI4ws/pmVju5uvZLJhsKXGVmLcyseTL9kGwrLsV7QDcza5vckvyl6A0z29TMDjOzjYGfCC2fwhKWMRzY1syON7N8MzuW0C/2QiVrAtYcnN0JfWHFNSQcxAuAfDO7mrVbZfMJt3rlfmJvZtsSOsNPJITYpWb268pV/7Pk1vIo4EEzO5qwX7Y1s5OSz7a2me1uZttlzPYYIcx3pIRWeSkeA/qaWWcza0J4+DAo20xmVif53nCj5DZ5CSV/zhmzWN3Mn7K2KWkxPg1cmxzPnSg7AOYTvokQQx3CxXEBUGBmBxK6XjLX1azY7fj9wN+TUCA5zw4vZflPA8cm52oeYd9NIfTRlnpRSPZPXUIe5Cf7MS/bxpjZPma2Y8a6VlH2Z1WkL7BvcrdZXENgsbv/aGZdCK3Y0rxJ6ErrQnj4NIPkrgQYm0yzMWHbFyQ19yG0TEvk7qsJ4fsPM2uZzNM6+XZKmbKe3O5+B3AR4WRYQLhSnktoMUA44ScR+iymEZ4U35BtuaWs6xXgyWRZk1k7AGsRHnzMIzx1605oKRZfxiLCFfJiQjfFpcAh7r6wMjUVW/Z4dy+p1T2S8N3bjwldCj+y9q1D0S8kLDKzrP1/SbfKEOAWd5/i7p8AVwCDk9bNekn287GEcOtBOKGPI+zbrwnfHMhczzMkt5tFJ0BywVtmZm1LWccIQqvodcI+mQlcU84STwK+NLMlhNu8E8uYtiuhJVH8p6xtOpfQDfM1oaU9lHCBLskjhP7b78zs2XLWX6Kk/+3PhJb2t4SgeC7j/Q+TWj5P1rc54VsizwEvm9lSwsOoPUpZ/oRkmdckyx9JaFz8ARhqZjuXUtpDhH3Wm9BYWMHazz1K04rwUHMJofthDOVoSCV9xpNKefts4G/Jtl5NGXclybH4LjDD3VcmoycAM939m2Sa94Hbk/HzCQ2CbF8Xu4zwdbG3kmPwVcrxnMfco93FSDVmZp8Rvsbzatq1xGZmtwCt3P3ktGuRXy79br5kZWZ/INwqjUq7lhjMrJOZ/cqCLoTbzvJ2X4iUKDd/x1VyhpmNJvQ5n5T0J1UHDQm305sTvlh/O/DfVCuSXzzd5ouIRKDbfBGRCGrkbb7l13OrU55fxJFcsfN2JX5pQHLUzJlfsnDhwtK+H1ot1cwwrdOQjTr2SrsMqYA33r437RKkAvbaY7e0S6hyus0XEYlAYSoiEoHCVEQkAoWpiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISgcJURCQChamISAQKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJQGEqIhKBwlREJAKFqYhIBApTEZEIFKYiIhEoTEVEIlCYiohEoDAVEYlAYSoiEoHCVEQkAoWpiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISgcJURCQChamISAQKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJQGEqIhKBwlREJAKFqYhIBApTEZEIFKYiIhEoTEVEIlCYiohEoDAVEYkgP+0CZF3n9O5Bn6O6YmYMfPoN7n18NAB/Oq47Zx3bjYLC1YwYN50r7/ovADtsszn3XtWbhhvXZfVq57cn9qNWLeNf/fqyVZvmFK52ho+dxl/vfg6AEw/dgxsvPIJ533wPwP1PjmHQMxNS2daa5N6772LggIdwd/qcejrnnX8BU6dM4bxzzuKHZcto1749Ax/7F5tssknapUolVIswNbOewF1AHvCwu9+cckmV1nnrzehzVFf2PulWVq4q5Ln+Z/PS+Bm0btmYQ3rsyO69bmLlqgJaNGkAQF5eLQbccDJ9//oY0z6eS9NGG7OqoJCN6uRz52OvMXbSJ9TOz+OlB87jd3t15uU33gfgPyPf5cJbhqW5qTXKjOnTGTjgIca9OZE6depw2ME9OfCgg/nTmadxc7/b2Ltbdx4dOIB/3H4r11x3fdrlSiX84m/zzSwP6A8cCHQGeptZ53SrqrxOW7Zi4rQvWfHjKgoLVzNu8qccvs9OnHHM3tw28BVWrioAYMG3ywDY/zedmP7JXKZ9PBeAxd//wOrVzoofVzF20icArCoo5L0PZ9O6ZeNUtkngww8/oEuXPalfvz75+fns3a07//3vM3zy8Uf8du9uAOy7/wE8+8x/Uq5UKusXH6ZAF+BTd//c3VcCTwCHp1xTpc34bB6/3aUDTRttTL26ten52+1p06oJHdq1ZK+dt2bsY5fw8sPns2vntgBs07Yl7vBc/3N48/HLuOjk/ddZZqMG9Tio2468PvGjNeMO3+/XTHzyLzx+a1/abNq4qjavxtp++x0YP34sixYtYvny5Yx4aThzZs+m8/Y78MLzofvl6X8PY87s2SlXKpVVHcK0NZB5BM5Jxq3FzM4ws0lmNskLVlRZcRX10RfzuX3QK7zwz3N5rv85TP14LgUFheTn1aLJJvXp9sfbuOIfzzKk36kA5Ofl0XXnrehz5SD2O/UODtt3J3p02XbN8vLyavHozadw39DRfDl3EQDDx06n08HX0OXYmxj19kc89LeTUtnWmqTTdttx8SWXcUjPAzjs4J786lc7kZ+fzwMPDeCBf/ana5ddWbZsKXXq1Em7VKmk6hCmVsI4X2eE+4Puvpu772b59aqgrMp79NkJdD3+Fg7oeyfffv8Dn85awNz53/Hsa1MAmDRjJqtXO82bNGDuN98xbvKnLPruB1b8uIoR42ewc6ct1iyr/1W9+WzWgjUPsSB0BRR1Fwx4+g123q5tlW5fTXXKqX2Z8M67vPr6WJo0bUqHDtvQsVMnXnjpZd6cOJlex/Zmy622TrtMqaTqEKZzgC0yhtsA81KqJYqih0tbtGrC4fvuxFMjJvH86KlrWpwd2rakTu18Fn67jFfefJ8dtmlNvbq1ycurxd67duCDz78G4JqzD6FRw3pccuva/XCtmv/8tPiQ7jvy0RdfV9GW1WzffPMNALNmzeK/zz5Nr+N6rxm3evVqbr7xBk4/46w0S5T1UB2e5r8DbGNmWwJzgeOA49Mtaf0Mve00mjYOT+UvuPkpvlu6gkefncAD157ApGFXsHJVIaddPRiA75au4O4hoxg/5FLcnZHjZzAiefp/+ek9+fDzr5kw9DLg569And27Bwd335GCwkK+/X45p18zJM3NrTF69/oDixcvonZ+be68uz9NmjTh3rvv4oH7+wNw+BFH8cdT+qRcpVSWua9zR/yLY2YHAXcSvho1wN3/Xtb0teq39I069qqK0iSSb9+5N+0SpAL22mM3Jk+eVFIXXLVVHVqmuPtwYHjadYhIzVUd+kxFRFKnMBURiUBhKiISgcJURCQChamISAQKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJQGEqIhKBwlREJAKFqYhIBApTEZEIFKYiIhEoTEVEIlCYiohEoDAVEYlAYSoiEoHCVEQkAoWpiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISQX7aBQCYWdOy3nf3xVVVi4hIZeREmAKTAQcMaAt8m7xuDMwCtkytMhGRcsiJ23x339LdtwJGAoe6e3N3bwYcAjydbnUiItnlRJhm2N3dhxcNuPtLQPcU6xERKZdcuc0vstDMrgKGEG77TwQWpVuSiEh2udYy7Q20AJ5Jflok40REclpOtUyTp/bnm1kDd1+Wdj0iIuWVUy1TM+tqZu8D7yfDO5nZfSmXJSKSVU6FKfAP4Pck/aTuPgXolmpFIiLlkGthirvPLjaqMJVCREQqIKf6TIHZZtYVcDOrA/wZ+CDlmkREssq1lulZwDlAa2AO8Gvg7DQLEhEpj1xrmXZ09xMyR5jZXsAbKdUjIlIuudYyvaec40REckpOtEzN7DdAV6CFmV2U8dYmQF46VYmIlF9OhClQB2hAqKdhxvglwNGpVCQiUgE5EabuPgYYY2aD3H1m2vWIiFRUrvWZPmxmjYsGzKyJmY1MsR4RkXLJtTBt7u7fFQ24+7dAy/TKEREpn1wL09Vm1rZowMzaEf4Un4hITsuJPtMMVwLjzWxMMtwNOCPFekREyiWnwtTdR5jZLsCehP8D6kJ3Xxh7PTtv15Y33r439mJlA7pr3GdplyAVMH/ZT2mXUOVy4jbfzDol/+5C+A/15gFzgbbJOBGRnJYrLdOLgdOB20t4z4F9q7YcEZGKyYkwdffTk3/3SbsWEZHKyIkwNbOjynrf3fXfPYtITsuJMAUOTf5tSfgd/VHJ8D7AaEBhKiI5LSfC1N37AJjZC0Bnd/8qGd4M6J9mbSIi5ZETT/MztC8K0sR8YNu0ihERKa+caJlmGJ38Lv5QwlP844DX0y1JRCS7nApTdz/XzI7k5/+R9EF3fybNmkREyiOnwjTxLrDU3V81s/pm1tDdl6ZdlIhIWXKqz9TMTgf+DTyQjGoNPJtaQSIi5ZRTYUr4n0n3IvyFfdz9E/Qn+ETkFyDXwvQnd19ZNGBm+ehP8InIL0CuhekYM7sCqGdmBwDDgOdTrklEJKtcC9PLgAXANOBMYDhwVaoViYiUQ848zTezWsBUd98BeCjtekREKiJnWqbuvhqYkvnfloiI/FLkTMs0sRkww8wmAj8UjXT3w9IrSUQku1wL0+vSLkBEpDJyIkzNrC5wFtCB8PDpEXcvSLcqEZHyy5U+00eB3QhBeiAl//clIiI5KydapoS/YbojgJk9AkxMuR4RkQrJlZbpqqIXur0XkV+iXGmZ7mRmS5LXRvgNqCXJa3f3TdIrTUQku5wIU3fPS7sGEZH1kSu3+SIiv2gKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJQGEqIhKBwlREJAKFqYhIBApTEZEIFKYiIhEoTEVEIlCYiohEoDAVEYlAYSoiEoHCVEQkAoWpiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISgcJURCSC/LQLkMr5+KOPOOn4Y9cMf/HF5/z1mr8xb95chr/4PHVq12HLrbfmwYcH0rhx4/QKrSFWFxZyz5+OoFHzVpxy40NMHT2cVx+9mwWzPuOc+56mTccdAfhk0nhGPHQrBQWryM+vzYFnXk6HXX4DwAMXHs/SRQuovVFdAPr2G0SDJs34fMpEXuh/A19//hG9/3onO3Y/MLXtlNJVi5apmQ0ws2/MbHratVSVbTt25O3J7/H25Pd4c+Jk6tevz2FHHMl++x/A5Pem887/prLNNtty6y03pV1qjfDG04No2bbDmuFWW27LSdfdR/tf7b7WdPUbNeHkvz/IhY8M55jLb+Wpmy5Z6/3jrryD8x96nvMfep4GTZoB0HjTzTnmsn7stN+hG35DpNKqRZgCg4CeaReRltdHvcaWW21Nu3bt2P+A35GfH244uuyxJ3PnzEm5uurv+wVf8eFbo9n9oF5rxrVs14EWbbdaZ9rW22zPJs03BWDT9ttQsOonClb+VObym7Zqw2Zbd8JqVZfTtXqqFrf57j7WzNqnXUdahj35BL2O7b3O+McGDeDoY44tYQ6J6fn+N3DgmZfx0/JlFZpv+tgRbN6hM/l1Nlozbli/y6hVK48duv2efU88BzOLXa5sIDXmUmdmZ5jZJDObtGDhgrTLiWblypW8+MJzHHX0MWuNv+Wmv5OXn89xx5+QUmU1wwcTRtGgcTPabLtDheab/8XHvPRgP4688Po144674g4ufGQ4Z901lC+nvsO7rzwbuVrZkGpMmLr7g+6+m7vv1qJ5i7TLiWbkiJf49c67sOmmm64ZN+SxRxn+4gsMeuxfatlsYDOnT+b9N1/j5t7dGXr9BXz2vwk8ceNFZc7z/YKvGHzN2fT6y200a91uzfhGLVoBsFH9Buy032HM+WDKBq1d4qoWt/k12VNPDl3rFv/lkSO4/bZbePm1MdSvXz/FymqGnqf/Hz1P/z8APnvvLcY99QjHXXFHqdOvWLaEgX85nd+fdgntd9h1zfjCwgJ+XLaEjRs1pbBgFR++NYoOu+y1weuXeBSmv2DLly9n1KuvcO99D6wZd+H55/LTTz9xSM8DgPAQ6p777k+rxBpr+riXee6e6/jh+8UMuuI0Ntt6O/r2G8Sbzwxm0byZjBrcn1GD+wPhK1B16tZjwKV9KCwsYHVhIR123YsuB4f+7tkfTmXw1X9ixbIlfDhhFK8MuouLBo5Ic/OkBObuadew3sxsKNADaA7MB65x90dKm37XXXfzN96eVEXVSQx3jfss7RKkAu456wjmfDStRvUxVYuWqbuv+yhbRKQK1ZgHUCIiG5LCVEQkAoWpiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISgcJURCQChamISAQKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJQGEqIhKBwlREJAKFqYhIBApTEZEIFKYiIhEoTEVEIlCYiohEoDAVEYlAYSoiEoHCVEQkAoWpiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISgcJURCQChamISAQKUxGRCBSmIiIRKExFRCJQmIqIRKAwFRGJQGEqIhKBwlREJAKFqYhIBApTEZEIFKYiIhEoTEVEIlCYiohEoDAVEYlAYSoiEoG5e9o1VDkzWwDMTLuODaA5sDDtIqRCqutn1s7dW6RdRFWqkWFaXZnZJHffLe06pPz0mVUfus0XEYlAYSoiEoHCtHp5MO0CpML0mVUT6jMVEYlALVMRkQgUpiIiEShMqwkz62lmH5nZp2Z2edr1SNnMbICZfWNm09OuReJQmFYDZpYH9AcOBDoDvc2sc7pVSRaDgJ5pFyHxKEyrhy7Ap+7+ubuvBJ4ADk+5JimDu48FFqddh8SjMK0eWgOzM4bnJONEpIooTKsHK2GcvvMmUoUUptXDHGCLjOE2wLyUahGpkRSm1cM7wDZmtqWZ1QGOA55LuSaRGkVhWg24ewFwLjAS+AB4yt1npFuVlMXMhgITgI5mNsfM+qZdk6wf/TqpiEgEapmKiESgMBURiUBhKiISgcJURCQChamISAQKU6lSZnakmbmZdcoy3QVmVn891nOKmd1b2flFKkphKlWtNzCe8IsFZbkAqHSYilQ1halUGTNrAOwF9CUJUzPLM7PbzGyamU01s/PM7M/A5sDrZvZ6Mt2yjOUcbWaDkteHmtnbZvY/M3vVzDat6u0SAchPuwCpUY4ARrj7x2a22Mx2AfYAtgR2dvcCM2vq7ovN7CJgH3dfmGWZ44E93d3N7DTgUuDiDbkRIiVRmEpV6g3cmbx+IhneCrg/+ZVY3L2if+OzDfCkmW0G1AG+iFOqSMUoTKVKmFkzYF9gBzNzII/wZwInU74/F5g5Td2M1/cAd7j7c2bWA7g2Rr0iFaU+U6kqRwOPuXs7d2/v7lsQWpHvAmeZWT6AmTVNpl8KNMyYf76ZbWdmtYAjM8Y3AuYmr0/eoFsgUgaFqVSV3sAzxcb9h/CgaRYw1cymAMcn7z0IvFT0AAq4HHgBGAV8lbGMa4FhZjYOyNa/KrLB6K9GiYhEoJapiEgEClMRkQgUpiIiEShMRUQiUJiKiESgMBURiUBhKiISwf8DZW2v4ZXJlsUAAAAASUVORK5CYII=\n", 3659 | "text/plain": [ 3660 | "
" 3661 | ] 3662 | }, 3663 | "metadata": { 3664 | "needs_background": "light" 3665 | }, 3666 | "output_type": "display_data" 3667 | } 3668 | ], 3669 | "source": [ 3670 | "plot_confusion_matrix(conf_mat=cf)\n", 3671 | "plt.xlabel(\"Actual\")\n", 3672 | "plt.ylabel(\"Predicted\")\n", 3673 | "plt.title(\"Confusion Matrix - Key: 0 is Legitimate & 1 is Malware\")\n", 3674 | "plt.show()" 3675 | ] 3676 | }, 3677 | { 3678 | "cell_type": "code", 3679 | "execution_count": null, 3680 | "id": "eadbd20c", 3681 | "metadata": {}, 3682 | "outputs": [], 3683 | "source": [] 3684 | } 3685 | ], 3686 | "metadata": { 3687 | "kernelspec": { 3688 | "display_name": "Python 3", 3689 | "language": "python", 3690 | "name": "python3" 3691 | }, 3692 | "language_info": { 3693 | "codemirror_mode": { 3694 | "name": "ipython", 3695 | "version": 3 3696 | }, 3697 | "file_extension": ".py", 3698 | "mimetype": "text/x-python", 3699 | "name": "python", 3700 | "nbconvert_exporter": "python", 3701 | "pygments_lexer": "ipython3", 3702 | "version": "3.8.8" 3703 | } 3704 | }, 3705 | "nbformat": 4, 3706 | "nbformat_minor": 5 3707 | } 3708 | --------------------------------------------------------------------------------