├── Preproccess - Selecting Attribute.ipynb ├── Preproccess - Selecting Attribute.py ├── README.md ├── RF Grid CV - CICIDS2018 - Fix.ipynb └── RF Grid CV - CICIDS2018 - Fix.py /Preproccess - Selecting Attribute.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from sklearn.model_selection import train_test_split\n", 12 | "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", 13 | "from sklearn.metrics import classification_report\n", 14 | "from sklearn.svm import OneClassSVM\n", 15 | "from sklearn.pipeline import Pipeline" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "class dataset:\n", 25 | " pass\n", 26 | "sample_data = pd.read_csv(\"D:\\KULIAH\\Semester 8\\Dataset\\Thursday-15-02-2018_TrafficForML_CICFlowMeter.csv\")\n", 27 | "sample_data.to_pickle('D:\\KULIAH\\Semester 8\\Dataset\\Thursday-15-02-2018_TrafficForML_CICFlowMeter.pkl')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": { 34 | "scrolled": true 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "\n", 42 | "Int64Index: 1040548 entries, 0 to 1048574\n", 43 | "Data columns (total 12 columns):\n", 44 | "URG Flag Cnt 1040548 non-null int64\n", 45 | "SYN Flag Cnt 1040548 non-null int64\n", 46 | "RST Flag Cnt 1040548 non-null int64\n", 47 | "PSH Flag Cnt 1040548 non-null int64\n", 48 | "Protocol 1040548 non-null int64\n", 49 | "Pkt Size Avg 1040548 non-null float64\n", 50 | "Flow Pkts/s 1040548 non-null float64\n", 51 | "FIN Flag Cnt 1040548 non-null int64\n", 52 | "ECE Flag Cnt 1040548 non-null int64\n", 53 | "ACK Flag Cnt 1040548 non-null int64\n", 54 | "Dst Port 1040548 non-null int64\n", 55 | "Label 1040548 non-null object\n", 56 | "dtypes: float64(2), int64(9), object(1)\n", 57 | "memory usage: 103.2+ MB\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "df = pd.read_pickle('D:\\KULIAH\\Semester 8\\Dataset\\Thursday-15-02-2018_TrafficForML_CICFlowMeter.pkl')\n", 63 | "df = df[['URG Flag Cnt','SYN Flag Cnt','RST Flag Cnt','PSH Flag Cnt','Protocol',\n", 64 | " 'Pkt Size Avg','Flow Pkts/s','FIN Flag Cnt','ECE Flag Cnt','ACK Flag Cnt','Dst Port','Label']]\n", 65 | "df[\"Flow Pkts/s\"] = pd.to_numeric(df[\"Flow Pkts/s\"], errors='coerce')\n", 66 | "df.dropna(inplace=True)\n", 67 | "df.info(verbose=True)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "dataset.train = df.groupby('Label')\n", 77 | " .apply(pd.DataFrame.sample, frac=0.8)\n", 78 | " .reset_index(level='Label', drop=True)\n", 79 | "dataset.test = df.drop(dataset.train.index)\n", 80 | "dataset.label = dataset.train.Label.copy()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 6, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/html": [ 91 | "
\n", 92 | "\n", 105 | "\n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | "
URG Flag CntSYN Flag CntRST Flag CntPSH Flag CntProtocolPkt Size AvgFlow Pkts/sFIN Flag CntECE Flag CntACK Flag CntDst PortLabel
7402100016225.9000000.441997000443Benign
60061600001778.5000007220.21660600053Benign
89755200006164.6666671.006194001443Benign
72952400001749.5000004.7219260005355Benign
869560000060.0000005221.93211500180Benign
41020600116171.5000005.6435600103389Benign
779155100060.000000222222.22222200152170Benign
970174000067.7500008639.30885500149807Benign
127130100060.00000062500.00000000152155Benign
44946400001762.500000150.81818900053Benign
6882900001780.00000013.93980800053Benign
14042200016240.2592590.230985000443Benign
5469230001686.1515150.27684500080Benign
777338000017101.250000144.51911300053Benign
836805000060.5416670.215965001443Benign
686674100060.00000041666.66666700151644Benign
103528400001777.0000002100.84033600053Benign
66746800001762.0000004464.28571400053Benign
4566060001692.7142868284.02366900080Benign
141822000000.0000000.0266350000Benign
28284800001783.25000064.12003300053Benign
4603680001672.0909098.965744000445Benign
926736000060.00000014705.88235300180Benign
15054200001796.000000968.52300200053Benign
72217700001797.00000025.42103600053Benign
1543540100641.0000003811.944091001443Benign
101969500001784.5000003521.12676100053Benign
925399000060.00000054054.05405400151556Benign
16377400001793.0000002143.62272200053Benign
66126600001784.5000006230.52959500053Benign
.......................................
4614000016140.5555560.16573000080DoS attacks-Slowloris
35238000160.0000000.98431600080DoS attacks-Slowloris
4430100016140.5555560.16856800080DoS attacks-Slowloris
31367010066.0000000.03734900180DoS attacks-Slowloris
34474000160.0000000.98583800080DoS attacks-Slowloris
29827010068.0000001000000.00000000180DoS attacks-Slowloris
4894500016140.5555560.16691800080DoS attacks-Slowloris
4935500016140.5555560.16776400080DoS attacks-Slowloris
36744000160.0000000.98957900080DoS attacks-Slowloris
32487010065.3333330.14392600180DoS attacks-Slowloris
5051100016140.0000000.15745200080DoS attacks-Slowloris
4153900016148.8235290.15958500080DoS attacks-Slowloris
287290001627.3333330.08998900080DoS attacks-Slowloris
29085010066.0000000.04000000180DoS attacks-Slowloris
40738000160.0000000.99216000080DoS attacks-Slowloris
37315000160.0000000.98613600080DoS attacks-Slowloris
3859900016140.5555560.16672000080DoS attacks-Slowloris
51777000160.0000000.98697300080DoS attacks-Slowloris
38897000060.00000010928.96174910080DoS attacks-Slowloris
31789010068.0000001000000.00000000180DoS attacks-Slowloris
4243400016140.5555560.16766400080DoS attacks-Slowloris
37049000160.0000000.99348700080DoS attacks-Slowloris
5058400016148.8235290.15883100080DoS attacks-Slowloris
30137010068.0000001000000.00000000180DoS attacks-Slowloris
29974010066.0000000.04000000180DoS attacks-Slowloris
46070000160.0000000.99216000080DoS attacks-Slowloris
4521700016140.5555560.16486600080DoS attacks-Slowloris
30018010066.0000000.03806500180DoS attacks-Slowloris
30007010066.0000000.03806500180DoS attacks-Slowloris
5380600016140.5555560.16762200080DoS attacks-Slowloris
\n", 1041 | "

832438 rows × 12 columns

\n", 1042 | "
" 1043 | ], 1044 | "text/plain": [ 1045 | " URG Flag Cnt SYN Flag Cnt RST Flag Cnt PSH Flag Cnt Protocol \\\n", 1046 | "74021 0 0 0 1 6 \n", 1047 | "600616 0 0 0 0 17 \n", 1048 | "897552 0 0 0 0 6 \n", 1049 | "729524 0 0 0 0 17 \n", 1050 | "869560 0 0 0 0 6 \n", 1051 | "410206 0 0 1 1 6 \n", 1052 | "779155 1 0 0 0 6 \n", 1053 | "970174 0 0 0 0 6 \n", 1054 | "127130 1 0 0 0 6 \n", 1055 | "449464 0 0 0 0 17 \n", 1056 | "68829 0 0 0 0 17 \n", 1057 | "140422 0 0 0 1 6 \n", 1058 | "546923 0 0 0 1 6 \n", 1059 | "777338 0 0 0 0 17 \n", 1060 | "836805 0 0 0 0 6 \n", 1061 | "686674 1 0 0 0 6 \n", 1062 | "1035284 0 0 0 0 17 \n", 1063 | "667468 0 0 0 0 17 \n", 1064 | "456606 0 0 0 1 6 \n", 1065 | "141822 0 0 0 0 0 \n", 1066 | "282848 0 0 0 0 17 \n", 1067 | "460368 0 0 0 1 6 \n", 1068 | "926736 0 0 0 0 6 \n", 1069 | "150542 0 0 0 0 17 \n", 1070 | "722177 0 0 0 0 17 \n", 1071 | "154354 0 1 0 0 6 \n", 1072 | "1019695 0 0 0 0 17 \n", 1073 | "925399 0 0 0 0 6 \n", 1074 | "163774 0 0 0 0 17 \n", 1075 | "661266 0 0 0 0 17 \n", 1076 | "... ... ... ... ... ... \n", 1077 | "46140 0 0 0 1 6 \n", 1078 | "35238 0 0 0 1 6 \n", 1079 | "44301 0 0 0 1 6 \n", 1080 | "31367 0 1 0 0 6 \n", 1081 | "34474 0 0 0 1 6 \n", 1082 | "29827 0 1 0 0 6 \n", 1083 | "48945 0 0 0 1 6 \n", 1084 | "49355 0 0 0 1 6 \n", 1085 | "36744 0 0 0 1 6 \n", 1086 | "32487 0 1 0 0 6 \n", 1087 | "50511 0 0 0 1 6 \n", 1088 | "41539 0 0 0 1 6 \n", 1089 | "28729 0 0 0 1 6 \n", 1090 | "29085 0 1 0 0 6 \n", 1091 | "40738 0 0 0 1 6 \n", 1092 | "37315 0 0 0 1 6 \n", 1093 | "38599 0 0 0 1 6 \n", 1094 | "51777 0 0 0 1 6 \n", 1095 | "38897 0 0 0 0 6 \n", 1096 | "31789 0 1 0 0 6 \n", 1097 | "42434 0 0 0 1 6 \n", 1098 | "37049 0 0 0 1 6 \n", 1099 | "50584 0 0 0 1 6 \n", 1100 | "30137 0 1 0 0 6 \n", 1101 | "29974 0 1 0 0 6 \n", 1102 | "46070 0 0 0 1 6 \n", 1103 | "45217 0 0 0 1 6 \n", 1104 | "30018 0 1 0 0 6 \n", 1105 | "30007 0 1 0 0 6 \n", 1106 | "53806 0 0 0 1 6 \n", 1107 | "\n", 1108 | " Pkt Size Avg Flow Pkts/s FIN Flag Cnt ECE Flag Cnt \\\n", 1109 | "74021 225.900000 0.441997 0 0 \n", 1110 | "600616 78.500000 7220.216606 0 0 \n", 1111 | "897552 164.666667 1.006194 0 0 \n", 1112 | "729524 49.500000 4.721926 0 0 \n", 1113 | "869560 0.000000 5221.932115 0 0 \n", 1114 | "410206 171.500000 5.643560 0 1 \n", 1115 | "779155 0.000000 222222.222222 0 0 \n", 1116 | "970174 7.750000 8639.308855 0 0 \n", 1117 | "127130 0.000000 62500.000000 0 0 \n", 1118 | "449464 62.500000 150.818189 0 0 \n", 1119 | "68829 80.000000 13.939808 0 0 \n", 1120 | "140422 240.259259 0.230985 0 0 \n", 1121 | "546923 86.151515 0.276845 0 0 \n", 1122 | "777338 101.250000 144.519113 0 0 \n", 1123 | "836805 0.541667 0.215965 0 0 \n", 1124 | "686674 0.000000 41666.666667 0 0 \n", 1125 | "1035284 77.000000 2100.840336 0 0 \n", 1126 | "667468 62.000000 4464.285714 0 0 \n", 1127 | "456606 92.714286 8284.023669 0 0 \n", 1128 | "141822 0.000000 0.026635 0 0 \n", 1129 | "282848 83.250000 64.120033 0 0 \n", 1130 | "460368 72.090909 8.965744 0 0 \n", 1131 | "926736 0.000000 14705.882353 0 0 \n", 1132 | "150542 96.000000 968.523002 0 0 \n", 1133 | "722177 97.000000 25.421036 0 0 \n", 1134 | "154354 41.000000 3811.944091 0 0 \n", 1135 | "1019695 84.500000 3521.126761 0 0 \n", 1136 | "925399 0.000000 54054.054054 0 0 \n", 1137 | "163774 93.000000 2143.622722 0 0 \n", 1138 | "661266 84.500000 6230.529595 0 0 \n", 1139 | "... ... ... ... ... \n", 1140 | "46140 140.555556 0.165730 0 0 \n", 1141 | "35238 0.000000 0.984316 0 0 \n", 1142 | "44301 140.555556 0.168568 0 0 \n", 1143 | "31367 6.000000 0.037349 0 0 \n", 1144 | "34474 0.000000 0.985838 0 0 \n", 1145 | "29827 8.000000 1000000.000000 0 0 \n", 1146 | "48945 140.555556 0.166918 0 0 \n", 1147 | "49355 140.555556 0.167764 0 0 \n", 1148 | "36744 0.000000 0.989579 0 0 \n", 1149 | "32487 5.333333 0.143926 0 0 \n", 1150 | "50511 140.000000 0.157452 0 0 \n", 1151 | "41539 148.823529 0.159585 0 0 \n", 1152 | "28729 27.333333 0.089989 0 0 \n", 1153 | "29085 6.000000 0.040000 0 0 \n", 1154 | "40738 0.000000 0.992160 0 0 \n", 1155 | "37315 0.000000 0.986136 0 0 \n", 1156 | "38599 140.555556 0.166720 0 0 \n", 1157 | "51777 0.000000 0.986973 0 0 \n", 1158 | "38897 0.000000 10928.961749 1 0 \n", 1159 | "31789 8.000000 1000000.000000 0 0 \n", 1160 | "42434 140.555556 0.167664 0 0 \n", 1161 | "37049 0.000000 0.993487 0 0 \n", 1162 | "50584 148.823529 0.158831 0 0 \n", 1163 | "30137 8.000000 1000000.000000 0 0 \n", 1164 | "29974 6.000000 0.040000 0 0 \n", 1165 | "46070 0.000000 0.992160 0 0 \n", 1166 | "45217 140.555556 0.164866 0 0 \n", 1167 | "30018 6.000000 0.038065 0 0 \n", 1168 | "30007 6.000000 0.038065 0 0 \n", 1169 | "53806 140.555556 0.167622 0 0 \n", 1170 | "\n", 1171 | " ACK Flag Cnt Dst Port Label \n", 1172 | "74021 0 443 Benign \n", 1173 | "600616 0 53 Benign \n", 1174 | "897552 1 443 Benign \n", 1175 | "729524 0 5355 Benign \n", 1176 | "869560 1 80 Benign \n", 1177 | "410206 0 3389 Benign \n", 1178 | "779155 1 52170 Benign \n", 1179 | "970174 1 49807 Benign \n", 1180 | "127130 1 52155 Benign \n", 1181 | "449464 0 53 Benign \n", 1182 | "68829 0 53 Benign \n", 1183 | "140422 0 443 Benign \n", 1184 | "546923 0 80 Benign \n", 1185 | "777338 0 53 Benign \n", 1186 | "836805 1 443 Benign \n", 1187 | "686674 1 51644 Benign \n", 1188 | "1035284 0 53 Benign \n", 1189 | "667468 0 53 Benign \n", 1190 | "456606 0 80 Benign \n", 1191 | "141822 0 0 Benign \n", 1192 | "282848 0 53 Benign \n", 1193 | "460368 0 445 Benign \n", 1194 | "926736 1 80 Benign \n", 1195 | "150542 0 53 Benign \n", 1196 | "722177 0 53 Benign \n", 1197 | "154354 1 443 Benign \n", 1198 | "1019695 0 53 Benign \n", 1199 | "925399 1 51556 Benign \n", 1200 | "163774 0 53 Benign \n", 1201 | "661266 0 53 Benign \n", 1202 | "... ... ... ... \n", 1203 | "46140 0 80 DoS attacks-Slowloris \n", 1204 | "35238 0 80 DoS attacks-Slowloris \n", 1205 | "44301 0 80 DoS attacks-Slowloris \n", 1206 | "31367 1 80 DoS attacks-Slowloris \n", 1207 | "34474 0 80 DoS attacks-Slowloris \n", 1208 | "29827 1 80 DoS attacks-Slowloris \n", 1209 | "48945 0 80 DoS attacks-Slowloris \n", 1210 | "49355 0 80 DoS attacks-Slowloris \n", 1211 | "36744 0 80 DoS attacks-Slowloris \n", 1212 | "32487 1 80 DoS attacks-Slowloris \n", 1213 | "50511 0 80 DoS attacks-Slowloris \n", 1214 | "41539 0 80 DoS attacks-Slowloris \n", 1215 | "28729 0 80 DoS attacks-Slowloris \n", 1216 | "29085 1 80 DoS attacks-Slowloris \n", 1217 | "40738 0 80 DoS attacks-Slowloris \n", 1218 | "37315 0 80 DoS attacks-Slowloris \n", 1219 | "38599 0 80 DoS attacks-Slowloris \n", 1220 | "51777 0 80 DoS attacks-Slowloris \n", 1221 | "38897 0 80 DoS attacks-Slowloris \n", 1222 | "31789 1 80 DoS attacks-Slowloris \n", 1223 | "42434 0 80 DoS attacks-Slowloris \n", 1224 | "37049 0 80 DoS attacks-Slowloris \n", 1225 | "50584 0 80 DoS attacks-Slowloris \n", 1226 | "30137 1 80 DoS attacks-Slowloris \n", 1227 | "29974 1 80 DoS attacks-Slowloris \n", 1228 | "46070 0 80 DoS attacks-Slowloris \n", 1229 | "45217 0 80 DoS attacks-Slowloris \n", 1230 | "30018 1 80 DoS attacks-Slowloris \n", 1231 | "30007 1 80 DoS attacks-Slowloris \n", 1232 | "53806 0 80 DoS attacks-Slowloris \n", 1233 | "\n", 1234 | "[832438 rows x 12 columns]" 1235 | ] 1236 | }, 1237 | "execution_count": 6, 1238 | "metadata": {}, 1239 | "output_type": "execute_result" 1240 | } 1241 | ], 1242 | "source": [ 1243 | "dataset.train" 1244 | ] 1245 | }, 1246 | { 1247 | "cell_type": "code", 1248 | "execution_count": 7, 1249 | "metadata": {}, 1250 | "outputs": [ 1251 | { 1252 | "data": { 1253 | "text/plain": [ 1254 | "array(['Benign', 'DoS attacks-GoldenEye', 'DoS attacks-Slowloris'],\n", 1255 | " dtype=object)" 1256 | ] 1257 | }, 1258 | "execution_count": 7, 1259 | "metadata": {}, 1260 | "output_type": "execute_result" 1261 | } 1262 | ], 1263 | "source": [ 1264 | "dataset.label.unique()" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "execution_count": 8, 1270 | "metadata": {}, 1271 | "outputs": [], 1272 | "source": [ 1273 | "d1 = dataset.train.replace('Benign', 0)" 1274 | ] 1275 | }, 1276 | { 1277 | "cell_type": "code", 1278 | "execution_count": 9, 1279 | "metadata": {}, 1280 | "outputs": [], 1281 | "source": [ 1282 | "d2 = d1.replace('DoS attacks-GoldenEye', 1)" 1283 | ] 1284 | }, 1285 | { 1286 | "cell_type": "code", 1287 | "execution_count": 10, 1288 | "metadata": {}, 1289 | "outputs": [], 1290 | "source": [ 1291 | "d3 = d2.replace('DoS attacks-Slowloris', 1)" 1292 | ] 1293 | }, 1294 | { 1295 | "cell_type": "code", 1296 | "execution_count": 11, 1297 | "metadata": {}, 1298 | "outputs": [], 1299 | "source": [ 1300 | "d6_label = d3.Label.copy()" 1301 | ] 1302 | }, 1303 | { 1304 | "cell_type": "code", 1305 | "execution_count": 12, 1306 | "metadata": {}, 1307 | "outputs": [ 1308 | { 1309 | "data": { 1310 | "text/plain": [ 1311 | "array([0, 1], dtype=int64)" 1312 | ] 1313 | }, 1314 | "execution_count": 12, 1315 | "metadata": {}, 1316 | "output_type": "execute_result" 1317 | } 1318 | ], 1319 | "source": [ 1320 | "d6_label.unique()" 1321 | ] 1322 | }, 1323 | { 1324 | "cell_type": "code", 1325 | "execution_count": 13, 1326 | "metadata": {}, 1327 | "outputs": [ 1328 | { 1329 | "data": { 1330 | "text/plain": [ 1331 | "0 790440\n", 1332 | "1 41998\n", 1333 | "Name: Label, dtype: int64" 1334 | ] 1335 | }, 1336 | "execution_count": 13, 1337 | "metadata": {}, 1338 | "output_type": "execute_result" 1339 | } 1340 | ], 1341 | "source": [ 1342 | "d6_label.value_counts()" 1343 | ] 1344 | }, 1345 | { 1346 | "cell_type": "code", 1347 | "execution_count": 14, 1348 | "metadata": {}, 1349 | "outputs": [], 1350 | "source": [ 1351 | "dataset.test_label = dataset.test.Label.copy() #ra kanggo" 1352 | ] 1353 | }, 1354 | { 1355 | "cell_type": "code", 1356 | "execution_count": 16, 1357 | "metadata": {}, 1358 | "outputs": [ 1359 | { 1360 | "data": { 1361 | "text/plain": [ 1362 | "array(['Benign', 'DoS attacks-GoldenEye', 'DoS attacks-Slowloris'],\n", 1363 | " dtype=object)" 1364 | ] 1365 | }, 1366 | "execution_count": 16, 1367 | "metadata": {}, 1368 | "output_type": "execute_result" 1369 | } 1370 | ], 1371 | "source": [ 1372 | "dataset.test_label.unique() #ra kanggo" 1373 | ] 1374 | }, 1375 | { 1376 | "cell_type": "code", 1377 | "execution_count": 15, 1378 | "metadata": {}, 1379 | "outputs": [], 1380 | "source": [ 1381 | "a1_label = dataset.test.Label.copy()" 1382 | ] 1383 | }, 1384 | { 1385 | "cell_type": "code", 1386 | "execution_count": 16, 1387 | "metadata": {}, 1388 | "outputs": [ 1389 | { 1390 | "data": { 1391 | "text/plain": [ 1392 | "array(['Benign', 'DoS attacks-GoldenEye', 'DoS attacks-Slowloris'],\n", 1393 | " dtype=object)" 1394 | ] 1395 | }, 1396 | "execution_count": 16, 1397 | "metadata": {}, 1398 | "output_type": "execute_result" 1399 | } 1400 | ], 1401 | "source": [ 1402 | "a1_label.unique()" 1403 | ] 1404 | }, 1405 | { 1406 | "cell_type": "code", 1407 | "execution_count": 17, 1408 | "metadata": {}, 1409 | "outputs": [], 1410 | "source": [ 1411 | "a1 = dataset.test.replace('Benign', 0)" 1412 | ] 1413 | }, 1414 | { 1415 | "cell_type": "code", 1416 | "execution_count": 18, 1417 | "metadata": {}, 1418 | "outputs": [], 1419 | "source": [ 1420 | "a2 = a1.replace('DoS attacks-GoldenEye', 1)" 1421 | ] 1422 | }, 1423 | { 1424 | "cell_type": "code", 1425 | "execution_count": 19, 1426 | "metadata": {}, 1427 | "outputs": [], 1428 | "source": [ 1429 | "a3 = a2.replace('DoS attacks-Slowloris', 1)" 1430 | ] 1431 | }, 1432 | { 1433 | "cell_type": "code", 1434 | "execution_count": 20, 1435 | "metadata": {}, 1436 | "outputs": [], 1437 | "source": [ 1438 | "a5_label = a3.Label.copy()" 1439 | ] 1440 | }, 1441 | { 1442 | "cell_type": "code", 1443 | "execution_count": 21, 1444 | "metadata": {}, 1445 | "outputs": [ 1446 | { 1447 | "data": { 1448 | "text/plain": [ 1449 | "array([0, 1], dtype=int64)" 1450 | ] 1451 | }, 1452 | "execution_count": 21, 1453 | "metadata": {}, 1454 | "output_type": "execute_result" 1455 | } 1456 | ], 1457 | "source": [ 1458 | "a5_label.unique()" 1459 | ] 1460 | }, 1461 | { 1462 | "cell_type": "code", 1463 | "execution_count": 22, 1464 | "metadata": {}, 1465 | "outputs": [ 1466 | { 1467 | "data": { 1468 | "text/plain": [ 1469 | "0 197610\n", 1470 | "1 10500\n", 1471 | "Name: Label, dtype: int64" 1472 | ] 1473 | }, 1474 | "execution_count": 22, 1475 | "metadata": {}, 1476 | "output_type": "execute_result" 1477 | } 1478 | ], 1479 | "source": [ 1480 | "a5_label.value_counts()" 1481 | ] 1482 | }, 1483 | { 1484 | "cell_type": "code", 1485 | "execution_count": 23, 1486 | "metadata": {}, 1487 | "outputs": [ 1488 | { 1489 | "name": "stdout", 1490 | "output_type": "stream", 1491 | "text": [ 1492 | "Length of Categories for Protocol are 3\n", 1493 | "Categories for Protocol are Int64Index([0, 6, 17], dtype='int64') \n", 1494 | "\n" 1495 | ] 1496 | } 1497 | ], 1498 | "source": [ 1499 | "category_variables = [\"Protocol\"]\n", 1500 | "\n", 1501 | "for cv in category_variables:\n", 1502 | " d3[cv] = d3[cv].astype(\"category\")\n", 1503 | " a3[cv] = a3[cv].astype(\"category\")\n", 1504 | " \n", 1505 | " print(\"Length of Categories for {} are {}\".format(cv , len(d3[cv].cat.categories)))\n", 1506 | " print(\"Categories for {} are {} \\n\".format(cv ,d3[cv].cat.categories))" 1507 | ] 1508 | }, 1509 | { 1510 | "cell_type": "code", 1511 | "execution_count": 24, 1512 | "metadata": {}, 1513 | "outputs": [], 1514 | "source": [ 1515 | "dummy_variables_2labels = category_variables\n", 1516 | " \n", 1517 | "class preprocessing:\n", 1518 | " train_labels = pd.get_dummies(d3, columns = dummy_variables_2labels, prefix=dummy_variables_2labels)\n", 1519 | " test_labels = pd.get_dummies(a3, columns = dummy_variables_2labels, prefix=dummy_variables_2labels)" 1520 | ] 1521 | }, 1522 | { 1523 | "cell_type": "code", 1524 | "execution_count": 27, 1525 | "metadata": {}, 1526 | "outputs": [ 1527 | { 1528 | "name": "stdout", 1529 | "output_type": "stream", 1530 | "text": [ 1531 | "\n", 1532 | "Int64Index: 208110 entries, 0 to 1048571\n", 1533 | "Data columns (total 14 columns):\n", 1534 | "URG Flag Cnt 208110 non-null int64\n", 1535 | "SYN Flag Cnt 208110 non-null int64\n", 1536 | "RST Flag Cnt 208110 non-null int64\n", 1537 | "PSH Flag Cnt 208110 non-null int64\n", 1538 | "Pkt Size Avg 208110 non-null float64\n", 1539 | "Flow Pkts/s 208110 non-null float64\n", 1540 | "FIN Flag Cnt 208110 non-null int64\n", 1541 | "ECE Flag Cnt 208110 non-null int64\n", 1542 | "ACK Flag Cnt 208110 non-null int64\n", 1543 | "Dst Port 208110 non-null int64\n", 1544 | "Label 208110 non-null int64\n", 1545 | "Protocol_0 208110 non-null uint8\n", 1546 | "Protocol_6 208110 non-null uint8\n", 1547 | "Protocol_17 208110 non-null uint8\n", 1548 | "dtypes: float64(2), int64(9), uint8(3)\n", 1549 | "memory usage: 19.6 MB\n" 1550 | ] 1551 | } 1552 | ], 1553 | "source": [ 1554 | "preprocessing.test_labels.info(verbose=True)" 1555 | ] 1556 | }, 1557 | { 1558 | "cell_type": "code", 1559 | "execution_count": 29, 1560 | "metadata": {}, 1561 | "outputs": [ 1562 | { 1563 | "data": { 1564 | "text/html": [ 1565 | "
\n", 1566 | "\n", 1579 | "\n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | "
URG Flag CntSYN Flag CntRST Flag CntPSH Flag CntProtocolPkt Size AvgFlow Pkts/sFIN Flag CntECE Flag CntACK Flag CntDst PortLabel
7402100016225.9000000.4419970004430
60061600001778.5000007220.216606000530
89755200006164.6666671.0061940014430
72952400001749.5000004.72192600053550
869560000060.0000005221.932115001800
\n", 1675 | "
" 1676 | ], 1677 | "text/plain": [ 1678 | " URG Flag Cnt SYN Flag Cnt RST Flag Cnt PSH Flag Cnt Protocol \\\n", 1679 | "74021 0 0 0 1 6 \n", 1680 | "600616 0 0 0 0 17 \n", 1681 | "897552 0 0 0 0 6 \n", 1682 | "729524 0 0 0 0 17 \n", 1683 | "869560 0 0 0 0 6 \n", 1684 | "\n", 1685 | " Pkt Size Avg Flow Pkts/s FIN Flag Cnt ECE Flag Cnt ACK Flag Cnt \\\n", 1686 | "74021 225.900000 0.441997 0 0 0 \n", 1687 | "600616 78.500000 7220.216606 0 0 0 \n", 1688 | "897552 164.666667 1.006194 0 0 1 \n", 1689 | "729524 49.500000 4.721926 0 0 0 \n", 1690 | "869560 0.000000 5221.932115 0 0 1 \n", 1691 | "\n", 1692 | " Dst Port Label \n", 1693 | "74021 443 0 \n", 1694 | "600616 53 0 \n", 1695 | "897552 443 0 \n", 1696 | "729524 5355 0 \n", 1697 | "869560 80 0 " 1698 | ] 1699 | }, 1700 | "execution_count": 29, 1701 | "metadata": {}, 1702 | "output_type": "execute_result" 1703 | } 1704 | ], 1705 | "source": [ 1706 | "d3.head()" 1707 | ] 1708 | }, 1709 | { 1710 | "cell_type": "code", 1711 | "execution_count": 28, 1712 | "metadata": {}, 1713 | "outputs": [ 1714 | { 1715 | "data": { 1716 | "text/html": [ 1717 | "
\n", 1718 | "\n", 1731 | "\n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | "
URG Flag CntSYN Flag CntRST Flag CntPSH Flag CntPkt Size AvgFlow Pkts/sFIN Flag CntECE Flag CntACK Flag CntDst PortLabelProtocol_0Protocol_6Protocol_17
000000.0000000.02663300000100
220001240.0454550.740696000220010
2600000.0000000.02663300000100
310001181.1600000.944924000220010
3200000.0000000.02663300000100
\n", 1839 | "
" 1840 | ], 1841 | "text/plain": [ 1842 | " URG Flag Cnt SYN Flag Cnt RST Flag Cnt PSH Flag Cnt Pkt Size Avg \\\n", 1843 | "0 0 0 0 0 0.000000 \n", 1844 | "22 0 0 0 1 240.045455 \n", 1845 | "26 0 0 0 0 0.000000 \n", 1846 | "31 0 0 0 1 181.160000 \n", 1847 | "32 0 0 0 0 0.000000 \n", 1848 | "\n", 1849 | " Flow Pkts/s FIN Flag Cnt ECE Flag Cnt ACK Flag Cnt Dst Port Label \\\n", 1850 | "0 0.026633 0 0 0 0 0 \n", 1851 | "22 0.740696 0 0 0 22 0 \n", 1852 | "26 0.026633 0 0 0 0 0 \n", 1853 | "31 0.944924 0 0 0 22 0 \n", 1854 | "32 0.026633 0 0 0 0 0 \n", 1855 | "\n", 1856 | " Protocol_0 Protocol_6 Protocol_17 \n", 1857 | "0 1 0 0 \n", 1858 | "22 0 1 0 \n", 1859 | "26 1 0 0 \n", 1860 | "31 0 1 0 \n", 1861 | "32 1 0 0 " 1862 | ] 1863 | }, 1864 | "execution_count": 28, 1865 | "metadata": {}, 1866 | "output_type": "execute_result" 1867 | } 1868 | ], 1869 | "source": [ 1870 | "preprocessing.test_labels.head()" 1871 | ] 1872 | }, 1873 | { 1874 | "cell_type": "code", 1875 | "execution_count": 47, 1876 | "metadata": {}, 1877 | "outputs": [], 1878 | "source": [ 1879 | "preprocessing.train_labels.to_csv(\"D:\\KULIAH\\Semester 8\\Dataset\\preprocessed_train_4_new.csv\")\n", 1880 | "preprocessing.test_labels.to_csv(\"D:\\KULIAH\\Semester 8\\Dataset\\preprocessed_test_4_new.csv\")" 1881 | ] 1882 | }, 1883 | { 1884 | "cell_type": "code", 1885 | "execution_count": null, 1886 | "metadata": {}, 1887 | "outputs": [], 1888 | "source": [ 1889 | "preprocessing.train_labels.head()" 1890 | ] 1891 | } 1892 | ], 1893 | "metadata": { 1894 | "kernelspec": { 1895 | "display_name": "TensorFlow-GPU", 1896 | "language": "python", 1897 | "name": "tf-gpu" 1898 | }, 1899 | "language_info": { 1900 | "codemirror_mode": { 1901 | "name": "ipython", 1902 | "version": 3 1903 | }, 1904 | "file_extension": ".py", 1905 | "mimetype": "text/x-python", 1906 | "name": "python", 1907 | "nbconvert_exporter": "python", 1908 | "pygments_lexer": "ipython3", 1909 | "version": "3.6.7" 1910 | } 1911 | }, 1912 | "nbformat": 4, 1913 | "nbformat_minor": 2 1914 | } 1915 | -------------------------------------------------------------------------------- /Preproccess - Selecting Attribute.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | import numpy as np 6 | import pandas as pd 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.preprocessing import MinMaxScaler, StandardScaler 9 | from sklearn.metrics import classification_report 10 | from sklearn.svm import OneClassSVM 11 | from sklearn.pipeline import Pipeline 12 | 13 | # In[2]: 14 | class dataset: 15 | pass 16 | sample_data = pd.read_csv("D:\KULIAH\Semester 8\Dataset\Thursday-15-02-2018_TrafficForML_CICFlowMeter.csv") 17 | sample_data.to_pickle('D:\KULIAH\Semester 8\Dataset\Thursday-15-02-2018_TrafficForML_CICFlowMeter.pkl') 18 | 19 | # In[3]: 20 | df = pd.read_pickle('D:\KULIAH\Semester 8\Dataset\Thursday-15-02-2018_TrafficForML_CICFlowMeter.pkl') 21 | df = df[['URG Flag Cnt','SYN Flag Cnt','RST Flag Cnt','PSH Flag Cnt','Protocol', 22 | 'Pkt Size Avg','Flow Pkts/s','FIN Flag Cnt','ECE Flag Cnt','ACK Flag Cnt','Dst Port','Label']] 23 | df["Flow Pkts/s"] = pd.to_numeric(df["Flow Pkts/s"], errors='coerce') 24 | df.dropna(inplace=True) 25 | df.info(verbose=True) 26 | 27 | # In[5]: 28 | dataset.train = df.groupby('Label') 29 | .apply(pd.DataFrame.sample, frac=0.8) 30 | .reset_index(level='Label', drop=True) 31 | dataset.test = df.drop(dataset.train.index) 32 | dataset.label = dataset.train.Label.copy() 33 | 34 | # In[6]: 35 | dataset.train 36 | 37 | # In[7]: 38 | dataset.label.unique() 39 | 40 | # In[8]: 41 | d1 = dataset.train.replace('Benign', 0) 42 | 43 | # In[9]: 44 | d2 = d1.replace('DoS attacks-GoldenEye', 1) 45 | 46 | # In[10]: 47 | d3 = d2.replace('DoS attacks-Slowloris', 1) 48 | 49 | # In[11]: 50 | d6_label = d3.Label.copy() 51 | 52 | # In[12]: 53 | d6_label.unique() 54 | 55 | # In[13]: 56 | d6_label.value_counts() 57 | 58 | # In[14]: 59 | dataset.test_label = dataset.test.Label.copy() #ra kanggo 60 | 61 | # In[16]: 62 | dataset.test_label.unique() #ra kanggo 63 | 64 | # In[15]: 65 | a1_label = dataset.test.Label.copy() 66 | 67 | # In[16]: 68 | a1_label.unique() 69 | 70 | # In[17]: 71 | a1 = dataset.test.replace('Benign', 0) 72 | 73 | # In[18]: 74 | a2 = a1.replace('DoS attacks-GoldenEye', 1) 75 | 76 | # In[19]: 77 | a3 = a2.replace('DoS attacks-Slowloris', 1) 78 | 79 | # In[20]: 80 | a5_label = a3.Label.copy() 81 | 82 | # In[21]: 83 | a5_label.unique() 84 | 85 | # In[22]: 86 | a5_label.value_counts() 87 | 88 | # In[23]: 89 | category_variables = ["Protocol"] 90 | for cv in category_variables: 91 | d3[cv] = d3[cv].astype("category") 92 | a3[cv] = a3[cv].astype("category") 93 | 94 | print("Length of Categories for {} are {}".format(cv , len(d3[cv].cat.categories))) 95 | print("Categories for {} are {} \n".format(cv ,d3[cv].cat.categories)) 96 | 97 | # In[24]: 98 | dummy_variables_2labels = category_variables 99 | 100 | class preprocessing: 101 | train_labels = pd.get_dummies(d3, columns = dummy_variables_2labels, prefix=dummy_variables_2labels) 102 | test_labels = pd.get_dummies(a3, columns = dummy_variables_2labels, prefix=dummy_variables_2labels) 103 | 104 | # In[27]: 105 | preprocessing.test_labels.info(verbose=True) 106 | 107 | # In[29]: 108 | d3.head() 109 | 110 | # In[28]: 111 | preprocessing.test_labels.head() 112 | 113 | # In[47]: 114 | preprocessing.train_labels.to_csv("D:\KULIAH\Semester 8\Dataset\preprocessed_train_4_new.csv") 115 | preprocessing.test_labels.to_csv("D:\KULIAH\Semester 8\Dataset\preprocessed_test_4_new.csv") 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intrution-Detection-System with CSE-CIC-IDS-2018 2 | 3 | This is machine learning analyze with random forest algorithm for CIC-IDS-2018. It used only "Thursday-15-02-2018_TrafficForML_CICFlowMeter.csv" files for analyzing DDoS attack. I applied the model for clasifying DDoS attack in Software-Defined Network with utilizing sFlow using Django + Django-Channels. 4 | Here for more : https://github.com/nadhirfr/rf-ids 5 | 6 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/H2H146AUD) 7 | 8 | Credit: 9 | - https://www.unb.ca/cic/datasets/ids-2018.html 10 | - https://registry.opendata.aws/cse-cic-ids2018 11 | -------------------------------------------------------------------------------- /RF Grid CV - CICIDS2018 - Fix.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | " \n", 12 | "from sklearn.model_selection import train_test_split\n", 13 | "from sklearn import preprocessing\n", 14 | "from sklearn.ensemble import RandomForestClassifier\n", 15 | "from sklearn.pipeline import make_pipeline, Pipeline\n", 16 | "from sklearn.model_selection import GridSearchCV\n", 17 | "from sklearn.metrics import mean_squared_error, r2_score\n", 18 | "from sklearn.externals import joblib " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | "
URG Flag CntSYN Flag CntRST Flag CntPSH Flag CntPkt Size AvgFlow Pkts/sFIN Flag CntECE Flag CntACK Flag CntDst PortLabelProtocol_0Protocol_6Protocol_17
00001225.9000000.4419970004430010
1000078.5000007220.216606000530001
20000164.6666671.0061940014430010
3000049.5000004.72192600053550001
400000.0000005221.932115001800010
\n", 151 | "
" 152 | ], 153 | "text/plain": [ 154 | " URG Flag Cnt SYN Flag Cnt RST Flag Cnt PSH Flag Cnt Pkt Size Avg \\\n", 155 | "0 0 0 0 1 225.900000 \n", 156 | "1 0 0 0 0 78.500000 \n", 157 | "2 0 0 0 0 164.666667 \n", 158 | "3 0 0 0 0 49.500000 \n", 159 | "4 0 0 0 0 0.000000 \n", 160 | "\n", 161 | " Flow Pkts/s FIN Flag Cnt ECE Flag Cnt ACK Flag Cnt Dst Port Label \\\n", 162 | "0 0.441997 0 0 0 443 0 \n", 163 | "1 7220.216606 0 0 0 53 0 \n", 164 | "2 1.006194 0 0 1 443 0 \n", 165 | "3 4.721926 0 0 0 5355 0 \n", 166 | "4 5221.932115 0 0 1 80 0 \n", 167 | "\n", 168 | " Protocol_0 Protocol_6 Protocol_17 \n", 169 | "0 0 1 0 \n", 170 | "1 0 0 1 \n", 171 | "2 0 1 0 \n", 172 | "3 0 0 1 \n", 173 | "4 0 1 0 " 174 | ] 175 | }, 176 | "execution_count": 2, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "traindata = pd.read_csv('D:\\KULIAH\\Semester 8\\Dataset\\preprocessed_train_4_new.csv')\n", 183 | "testdata = pd.read_csv('D:\\KULIAH\\Semester 8\\Dataset\\preprocessed_test_4_new.csv')\n", 184 | "\n", 185 | "traindata.pop(\"Unnamed: 0\")\n", 186 | "testdata.pop(\"Unnamed: 0\")\n", 187 | "\n", 188 | "traindata.head()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 3, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "\n", 201 | "RangeIndex: 832438 entries, 0 to 832437\n", 202 | "Data columns (total 13 columns):\n", 203 | "URG Flag Cnt 832438 non-null int64\n", 204 | "SYN Flag Cnt 832438 non-null int64\n", 205 | "RST Flag Cnt 832438 non-null int64\n", 206 | "PSH Flag Cnt 832438 non-null int64\n", 207 | "Pkt Size Avg 832438 non-null float64\n", 208 | "Flow Pkts/s 832438 non-null float64\n", 209 | "FIN Flag Cnt 832438 non-null int64\n", 210 | "ECE Flag Cnt 832438 non-null int64\n", 211 | "ACK Flag Cnt 832438 non-null int64\n", 212 | "Dst Port 832438 non-null int64\n", 213 | "Protocol_0 832438 non-null int64\n", 214 | "Protocol_6 832438 non-null int64\n", 215 | "Protocol_17 832438 non-null int64\n", 216 | "dtypes: float64(2), int64(11)\n", 217 | "memory usage: 82.6 MB\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "Y = traindata.pop('Label')\n", 223 | "X = traindata.iloc[:,0:13]\n", 224 | "C = testdata.pop('Label')\n", 225 | "T = testdata.iloc[:,0:13]\n", 226 | "\n", 227 | "X.info(verbose=True)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 4, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "array([0, 0, 0, ..., 0, 0, 0], dtype=int64)" 239 | ] 240 | }, 241 | "execution_count": 4, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "_traindata = np.array(X)\n", 248 | "_trainlabel = np.array(Y)\n", 249 | "\n", 250 | "_testdata = np.array(T)\n", 251 | "_testlabel = np.array(C)\n", 252 | "\n", 253 | "_testlabel" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 5, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "# 5. Declare data preprocessing steps\n", 263 | "pipeline = make_pipeline(RandomForestClassifier())\n", 264 | "\n", 265 | "# Add a dict of estimator and estimator related parameters in this list\n", 266 | "hyperparameters = {\n", 267 | " 'randomforestclassifier__n_estimators': [25,50,75,100],\n", 268 | " 'randomforestclassifier__max_features' : [None, \"log2\", \"auto\"]\n", 269 | " }\n" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 6, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "Fitting 5 folds for each of 12 candidates, totalling 60 fits\n" 282 | ] 283 | }, 284 | { 285 | "name": "stderr", 286 | "output_type": "stream", 287 | "text": [ 288 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", 289 | "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 56.7min\n", 290 | "[Parallel(n_jobs=-1)]: Done 60 out of 60 | elapsed: 67.7min finished\n" 291 | ] 292 | }, 293 | { 294 | "data": { 295 | "text/plain": [ 296 | "GridSearchCV(cv=5, error_score='raise-deprecating',\n", 297 | " estimator=Pipeline(memory=None,\n", 298 | " steps=[('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 299 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", 300 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 301 | " min_sam...bs=None,\n", 302 | " oob_score=False, random_state=None, verbose=0,\n", 303 | " warm_start=False))]),\n", 304 | " fit_params=None, iid='warn', n_jobs=-1,\n", 305 | " param_grid={'randomforestclassifier__n_estimators': [25, 50, 75, 100], 'randomforestclassifier__max_features': [None, 'log2', 'auto']},\n", 306 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n", 307 | " scoring=None, verbose=1)" 308 | ] 309 | }, 310 | "execution_count": 6, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "# 7. Tune model using cross-validation pipeline\n", 317 | "clf = GridSearchCV(pipeline, hyperparameters, cv=5,verbose=1,n_jobs=-1)\n", 318 | "clf.fit(_traindata, _trainlabel)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 7, 324 | "metadata": { 325 | "scrolled": true 326 | }, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "{'randomforestclassifier__max_features': 'log2', 'randomforestclassifier__n_estimators': 100}\n", 333 | "Pipeline(memory=None,\n", 334 | " steps=[('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 335 | " max_depth=None, max_features='log2', max_leaf_nodes=None,\n", 336 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 337 | " min_samples_leaf=1, min_samples_split=2,\n", 338 | " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", 339 | " oob_score=False, random_state=None, verbose=0,\n", 340 | " warm_start=False))])\n" 341 | ] 342 | } 343 | ], 344 | "source": [ 345 | "print(clf.best_params_)\n", 346 | "print(clf.best_estimator_)\n", 347 | "# print(clf.cv_results_ )" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 8, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "0.9985620550719694\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "print(clf.best_score_ )" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 9, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "name": "stdout", 374 | "output_type": "stream", 375 | "text": [ 376 | "True\n", 377 | "0.9985680649656432\n" 378 | ] 379 | } 380 | ], 381 | "source": [ 382 | "print (clf.refit)\n", 383 | " \n", 384 | "# 9. Evaluate model pipeline on test data\n", 385 | "pred = clf.predict(_testdata)\n", 386 | "\n", 387 | "\n", 388 | "from sklearn.metrics import accuracy_score\n", 389 | "print(accuracy_score(_testlabel, pred))" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 10, 395 | "metadata": {}, 396 | "outputs": [ 397 | { 398 | "name": "stdout", 399 | "output_type": "stream", 400 | "text": [ 401 | " precision recall f1-score support\n", 402 | "\n", 403 | " 0 1.00 1.00 1.00 197610\n", 404 | " 1 0.98 0.99 0.99 10500\n", 405 | "\n", 406 | " micro avg 1.00 1.00 1.00 208110\n", 407 | " macro avg 0.99 0.99 0.99 208110\n", 408 | "weighted avg 1.00 1.00 1.00 208110\n", 409 | "\n", 410 | "[[197430 180]\n", 411 | " [ 118 10382]]\n" 412 | ] 413 | } 414 | ], 415 | "source": [ 416 | "from sklearn.metrics import confusion_matrix,classification_report\n", 417 | "\n", 418 | "cm = confusion_matrix(_testlabel, pred)\n", 419 | "print(classification_report(_testlabel, pred))\n", 420 | "print(cm)" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 11, 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "data": { 430 | "text/plain": [ 431 | "['rf_gridcv_tanpa_scaler_100-est-log2.pkl']" 432 | ] 433 | }, 434 | "execution_count": 11, 435 | "metadata": {}, 436 | "output_type": "execute_result" 437 | } 438 | ], 439 | "source": [ 440 | "# 10. Save model for future use\n", 441 | "joblib.dump(clf, 'rf_gridcv_tanpa_scaler_100-est-log2.pkl')" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 9, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "clf2 = joblib.load('v-rf_gridcv_robust.pkl')\n", 451 | "\n" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 10, 457 | "metadata": {}, 458 | "outputs": [ 459 | { 460 | "name": "stdout", 461 | "output_type": "stream", 462 | "text": [ 463 | "{'randomforestclassifier__max_depth': None, 'randomforestclassifier__max_features': 'log2'}\n", 464 | "Pipeline(memory=None,\n", 465 | " steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,\n", 466 | " with_scaling=True)), ('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 467 | " max_depth=None, max_features='log2', max_leaf_nodes=None,\n", 468 | " ...obs=None,\n", 469 | " oob_score=False, random_state=None, verbose=0,\n", 470 | " warm_start=False))])\n" 471 | ] 472 | } 473 | ], 474 | "source": [ 475 | "print(clf2.best_params_)\n", 476 | "print(clf2.best_estimator_)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [] 485 | } 486 | ], 487 | "metadata": { 488 | "kernelspec": { 489 | "display_name": "Python 3", 490 | "language": "python", 491 | "name": "python3" 492 | }, 493 | "language_info": { 494 | "codemirror_mode": { 495 | "name": "ipython", 496 | "version": 3 497 | }, 498 | "file_extension": ".py", 499 | "mimetype": "text/x-python", 500 | "name": "python", 501 | "nbconvert_exporter": "python", 502 | "pygments_lexer": "ipython3", 503 | "version": "3.7.1" 504 | } 505 | }, 506 | "nbformat": 4, 507 | "nbformat_minor": 2 508 | } 509 | -------------------------------------------------------------------------------- /RF Grid CV - CICIDS2018 - Fix.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from sklearn.model_selection import train_test_split 11 | from sklearn import preprocessing 12 | from sklearn.ensemble import RandomForestClassifier 13 | from sklearn.pipeline import make_pipeline, Pipeline 14 | from sklearn.model_selection import GridSearchCV 15 | from sklearn.metrics import mean_squared_error, r2_score 16 | from sklearn.externals import joblib 17 | 18 | 19 | # In[2]: 20 | 21 | 22 | traindata = pd.read_csv('D:\KULIAH\Semester 8\Dataset\preprocessed_train_4_new.csv') 23 | testdata = pd.read_csv('D:\KULIAH\Semester 8\Dataset\preprocessed_test_4_new.csv') 24 | 25 | traindata.pop("Unnamed: 0") 26 | testdata.pop("Unnamed: 0") 27 | 28 | traindata.head() 29 | 30 | 31 | # In[3]: 32 | 33 | 34 | Y = traindata.pop('Label') 35 | X = traindata.iloc[:,0:13] 36 | C = testdata.pop('Label') 37 | T = testdata.iloc[:,0:13] 38 | 39 | X.info(verbose=True) 40 | 41 | 42 | # In[4]: 43 | 44 | 45 | _traindata = np.array(X) 46 | _trainlabel = np.array(Y) 47 | 48 | _testdata = np.array(T) 49 | _testlabel = np.array(C) 50 | 51 | _testlabel 52 | 53 | 54 | # In[5]: 55 | 56 | 57 | # 5. Declare data preprocessing steps 58 | pipeline = make_pipeline(RandomForestClassifier()) 59 | 60 | # Add a dict of estimator and estimator related parameters in this list 61 | hyperparameters = { 62 | 'randomforestclassifier__n_estimators': [25,50,75,100], 63 | 'randomforestclassifier__max_features' : [None, "log2", "auto"] 64 | } 65 | 66 | 67 | # In[6]: 68 | 69 | 70 | # 7. Tune model using cross-validation pipeline 71 | clf = GridSearchCV(pipeline, hyperparameters, cv=5,verbose=1,n_jobs=-1) 72 | clf.fit(_traindata, _trainlabel) 73 | 74 | 75 | # In[7]: 76 | 77 | 78 | print(clf.best_params_) 79 | print(clf.best_estimator_) 80 | # print(clf.cv_results_ ) 81 | 82 | 83 | # In[8]: 84 | 85 | 86 | print(clf.best_score_ ) 87 | 88 | 89 | # In[9]: 90 | 91 | 92 | print (clf.refit) 93 | 94 | # 9. Evaluate model pipeline on test data 95 | pred = clf.predict(_testdata) 96 | 97 | 98 | from sklearn.metrics import accuracy_score 99 | print(accuracy_score(_testlabel, pred)) 100 | 101 | 102 | # In[10]: 103 | 104 | 105 | from sklearn.metrics import confusion_matrix,classification_report 106 | 107 | cm = confusion_matrix(_testlabel, pred) 108 | print(classification_report(_testlabel, pred)) 109 | print(cm) 110 | 111 | 112 | # In[11]: 113 | 114 | 115 | # 10. Save model for future use 116 | joblib.dump(clf, 'rf_gridcv_tanpa_scaler_100-est-log2.pkl') 117 | 118 | 119 | # In[9]: 120 | 121 | 122 | clf2 = joblib.load('v-rf_gridcv_robust.pkl') 123 | 124 | 125 | # In[10]: 126 | 127 | 128 | print(clf2.best_params_) 129 | print(clf2.best_estimator_) 130 | 131 | --------------------------------------------------------------------------------