├── 1-Data_pre-processing_CAN.ipynb ├── 2-CNN_Model_Development&Hyperparameter Optimization.ipynb ├── 3-Ensemble_Models-CAN.ipynb ├── CAN.png ├── LICENSE ├── Paper_2201.11812.pdf ├── README.md ├── data ├── Car_Hacking_5%.csv └── README.md ├── framework.png └── supplementary_code ├── CAR_IDS_SVC.ipynb └── README.md /1-Data_pre-processing_CAN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles \n", 8 | "This is the code for the paper entitled \"**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**\" accepted in IEEE International Conference on Communications (IEEE ICC). \n", 9 | "Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca) \n", 10 | "Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University\n", 11 | "\n", 12 | "**Notebook 1: Data pre-processing** \n", 13 | "Procedures: \n", 14 | "  1): Read the dataset \n", 15 | "  2): Transform the tabular data into images \n", 16 | "  3): Display the transformed images \n", 17 | "  4): Split the training and test set " 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Import libraries" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "import os\n", 36 | "import cv2\n", 37 | "import math\n", 38 | "import random\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "import shutil\n", 41 | "from sklearn.preprocessing import QuantileTransformer\n", 42 | "from PIL import Image\n", 43 | "import warnings\n", 44 | "warnings.filterwarnings(\"ignore\")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Read the Car-Hacking/CAN-Intrusion dataset\n", 52 | "The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset \n", 53 | "In this repository, due to the file size limit of GitHub, we use the 5% subset." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "#Read dataset\n", 65 | "df=pd.read_csv('data/Car_Hacking_5%.csv')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 4, 71 | "metadata": { 72 | "scrolled": true 73 | }, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/html": [ 78 | "
\n", 79 | "\n", 92 | "\n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | "
CAN IDDATA[0]DATA[1]DATA[2]DATA[3]DATA[4]DATA[5]DATA[6]DATA[7]Label
0120141393935000154R
180964187127201732020R
21349216001360000R
3120141393935000154R
420000032228R
.................................
818435848532521041170012R
818436108825500025513490R
8184378485321001041170092R
81843813492169001370000R
818439790533481033300111R
\n", 254 | "

818440 rows × 10 columns

\n", 255 | "
" 256 | ], 257 | "text/plain": [ 258 | " CAN ID DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] DATA[6] \\\n", 259 | "0 1201 41 39 39 35 0 0 0 \n", 260 | "1 809 64 187 127 20 17 32 0 \n", 261 | "2 1349 216 0 0 136 0 0 0 \n", 262 | "3 1201 41 39 39 35 0 0 0 \n", 263 | "4 2 0 0 0 0 0 3 2 \n", 264 | "... ... ... ... ... ... ... ... ... \n", 265 | "818435 848 5 32 52 104 117 0 0 \n", 266 | "818436 1088 255 0 0 0 255 134 9 \n", 267 | "818437 848 5 32 100 104 117 0 0 \n", 268 | "818438 1349 216 90 0 137 0 0 0 \n", 269 | "818439 790 5 33 48 10 33 30 0 \n", 270 | "\n", 271 | " DATA[7] Label \n", 272 | "0 154 R \n", 273 | "1 20 R \n", 274 | "2 0 R \n", 275 | "3 154 R \n", 276 | "4 228 R \n", 277 | "... ... ... \n", 278 | "818435 12 R \n", 279 | "818436 0 R \n", 280 | "818437 92 R \n", 281 | "818438 0 R \n", 282 | "818439 111 R \n", 283 | "\n", 284 | "[818440 rows x 10 columns]" 285 | ] 286 | }, 287 | "execution_count": 4, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "df" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 5, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "R 701832\n", 305 | "RPM 32539\n", 306 | "gear 29944\n", 307 | "DoS 29501\n", 308 | "Fuzzy 24624\n", 309 | "Name: Label, dtype: int64" 310 | ] 311 | }, 312 | "execution_count": 5, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "# The labels of the dataset. \"R\" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)\n", 319 | "df.Label.value_counts()" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": { 325 | "collapsed": true 326 | }, 327 | "source": [ 328 | "## Data Transformation\n", 329 | "Convert tabular data to images\n", 330 | "Procedures:\n", 331 | "1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values\n", 332 | "2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB)." 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 6, 338 | "metadata": { 339 | "collapsed": true 340 | }, 341 | "outputs": [], 342 | "source": [ 343 | "# Transform all features into the scale of [0,1]\n", 344 | "numeric_features = df.dtypes[df.dtypes != 'object'].index\n", 345 | "scaler = QuantileTransformer() \n", 346 | "df[numeric_features] = scaler.fit_transform(df[numeric_features])" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 7, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "# Multiply the feature values by 255 to transform them into the scale of [0,255]\n", 356 | "df[numeric_features] = df[numeric_features].apply(\n", 357 | " lambda x: (x*255))" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 8, 363 | "metadata": {}, 364 | "outputs": [ 365 | { 366 | "data": { 367 | "text/html": [ 368 | "
\n", 369 | "\n", 382 | "\n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | "
CAN IDDATA[0]DATA[1]DATA[2]DATA[3]DATA[4]DATA[5]DATA[6]DATA[7]
count818440.000000818440.000000818440.000000818440.000000818440.000000818440.000000818440.000000818440.000000818440.000000
mean127.458603113.635407108.05550089.524039109.930495105.682464112.27309684.94544093.094805
std73.78040289.99327593.448831100.589117103.63269095.71642090.993393101.365609100.186463
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%66.8768770.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%122.650150126.096096115.5030030.000000130.818318127.755255129.5420420.0000000.000000
75%190.548048192.462462193.611111199.099099190.675676193.355856190.165165192.207207190.675676
max255.000000255.000000255.000000255.000000255.000000255.000000255.000000255.000000255.000000
\n", 496 | "
" 497 | ], 498 | "text/plain": [ 499 | " CAN ID DATA[0] DATA[1] DATA[2] \\\n", 500 | "count 818440.000000 818440.000000 818440.000000 818440.000000 \n", 501 | "mean 127.458603 113.635407 108.055500 89.524039 \n", 502 | "std 73.780402 89.993275 93.448831 100.589117 \n", 503 | "min 0.000000 0.000000 0.000000 0.000000 \n", 504 | "25% 66.876877 0.000000 0.000000 0.000000 \n", 505 | "50% 122.650150 126.096096 115.503003 0.000000 \n", 506 | "75% 190.548048 192.462462 193.611111 199.099099 \n", 507 | "max 255.000000 255.000000 255.000000 255.000000 \n", 508 | "\n", 509 | " DATA[3] DATA[4] DATA[5] DATA[6] \\\n", 510 | "count 818440.000000 818440.000000 818440.000000 818440.000000 \n", 511 | "mean 109.930495 105.682464 112.273096 84.945440 \n", 512 | "std 103.632690 95.716420 90.993393 101.365609 \n", 513 | "min 0.000000 0.000000 0.000000 0.000000 \n", 514 | "25% 0.000000 0.000000 0.000000 0.000000 \n", 515 | "50% 130.818318 127.755255 129.542042 0.000000 \n", 516 | "75% 190.675676 193.355856 190.165165 192.207207 \n", 517 | "max 255.000000 255.000000 255.000000 255.000000 \n", 518 | "\n", 519 | " DATA[7] \n", 520 | "count 818440.000000 \n", 521 | "mean 93.094805 \n", 522 | "std 100.186463 \n", 523 | "min 0.000000 \n", 524 | "25% 0.000000 \n", 525 | "50% 0.000000 \n", 526 | "75% 190.675676 \n", 527 | "max 255.000000 " 528 | ] 529 | }, 530 | "execution_count": 8, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "df.describe()" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": { 542 | "collapsed": true 543 | }, 544 | "source": [ 545 | "All features are in the same scale of [0,255]" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "### Generate images for each class" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 9, 558 | "metadata": { 559 | "collapsed": true 560 | }, 561 | "outputs": [], 562 | "source": [ 563 | "df0=df[df['Label']=='R'].drop(['Label'],axis=1)\n", 564 | "df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)\n", 565 | "df2=df[df['Label']=='gear'].drop(['Label'],axis=1)\n", 566 | "df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)\n", 567 | "df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 30, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "# Generate 9*9 color images for class 0 (Normal)\n", 577 | "count=0\n", 578 | "ims = []\n", 579 | "\n", 580 | "image_path = \"train/0/\"\n", 581 | "os.makedirs(image_path)\n", 582 | "\n", 583 | "for i in range(0, len(df0)): \n", 584 | " count=count+1\n", 585 | " if count<=27: \n", 586 | " im=df0.iloc[i].values\n", 587 | " ims=np.append(ims,im)\n", 588 | " else:\n", 589 | " ims=np.array(ims).reshape(9,9,3)\n", 590 | " array = np.array(ims, dtype=np.uint8)\n", 591 | " new_image = Image.fromarray(array)\n", 592 | " new_image.save(image_path+str(i)+'.png')\n", 593 | " count=0\n", 594 | " ims = []" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 31, 600 | "metadata": {}, 601 | "outputs": [], 602 | "source": [ 603 | "# Generate 9*9 color images for class 1 (RPM spoofing)\n", 604 | "count=0\n", 605 | "ims = []\n", 606 | "\n", 607 | "image_path = \"train/1/\"\n", 608 | "os.makedirs(image_path)\n", 609 | "\n", 610 | "for i in range(0, len(df1)): \n", 611 | " count=count+1\n", 612 | " if count<=27: \n", 613 | " im=df1.iloc[i].values\n", 614 | " ims=np.append(ims,im)\n", 615 | " else:\n", 616 | " ims=np.array(ims).reshape(9,9,3)\n", 617 | " array = np.array(ims, dtype=np.uint8)\n", 618 | " new_image = Image.fromarray(array)\n", 619 | " new_image.save(image_path+str(i)+'.png')\n", 620 | " count=0\n", 621 | " ims = []" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 33, 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [ 630 | "# Generate 9*9 color images for class 2 (Gear spoofing)\n", 631 | "count=0\n", 632 | "ims = []\n", 633 | "\n", 634 | "image_path = \"train/2/\"\n", 635 | "os.makedirs(image_path)\n", 636 | "\n", 637 | "for i in range(0, len(df2)): \n", 638 | " count=count+1\n", 639 | " if count<=27: \n", 640 | " im=df2.iloc[i].values\n", 641 | " ims=np.append(ims,im)\n", 642 | " else:\n", 643 | " ims=np.array(ims).reshape(9,9,3)\n", 644 | " array = np.array(ims, dtype=np.uint8)\n", 645 | " new_image = Image.fromarray(array)\n", 646 | " new_image.save(image_path+str(i)+'.png')\n", 647 | " count=0\n", 648 | " ims = []" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 34, 654 | "metadata": { 655 | "collapsed": true 656 | }, 657 | "outputs": [], 658 | "source": [ 659 | "# Generate 9*9 color images for class 3 (DoS attack)\n", 660 | "count=0\n", 661 | "ims = []\n", 662 | "\n", 663 | "image_path = \"train/3/\"\n", 664 | "os.makedirs(image_path)\n", 665 | "\n", 666 | "\n", 667 | "for i in range(0, len(df3)): \n", 668 | " count=count+1\n", 669 | " if count<=27: \n", 670 | " im=df3.iloc[i].values\n", 671 | " ims=np.append(ims,im)\n", 672 | " else:\n", 673 | " ims=np.array(ims).reshape(9,9,3)\n", 674 | " array = np.array(ims, dtype=np.uint8)\n", 675 | " new_image = Image.fromarray(array)\n", 676 | " new_image.save(image_path+str(i)+'.png')\n", 677 | " count=0\n", 678 | " ims = []" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": 35, 684 | "metadata": { 685 | "collapsed": true 686 | }, 687 | "outputs": [], 688 | "source": [ 689 | "# Generate 9*9 color images for class 4 (Fuzzy attack)\n", 690 | "count=0\n", 691 | "ims = []\n", 692 | "\n", 693 | "image_path = \"train/4/\"\n", 694 | "os.makedirs(image_path)\n", 695 | "\n", 696 | "\n", 697 | "for i in range(0, len(df4)): \n", 698 | " count=count+1\n", 699 | " if count<=27: \n", 700 | " im=df4.iloc[i].values\n", 701 | " ims=np.append(ims,im)\n", 702 | " else:\n", 703 | " ims=np.array(ims).reshape(9,9,3)\n", 704 | " array = np.array(ims, dtype=np.uint8)\n", 705 | " new_image = Image.fromarray(array)\n", 706 | " new_image.save(image_path+str(i)+'.png')\n", 707 | " count=0\n", 708 | " ims = []" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "## Split the training and test set " 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": 56, 721 | "metadata": {}, 722 | "outputs": [ 723 | { 724 | "name": "stdout", 725 | "output_type": "stream", 726 | "text": [ 727 | "29227\n" 728 | ] 729 | } 730 | ], 731 | "source": [ 732 | "# Create folders to store images\n", 733 | "Train_Dir='./train/'\n", 734 | "Val_Dir='./test/'\n", 735 | "allimgs=[]\n", 736 | "for subdir in os.listdir(Train_Dir):\n", 737 | " for filename in os.listdir(os.path.join(Train_Dir,subdir)):\n", 738 | " filepath=os.path.join(Train_Dir,subdir,filename)\n", 739 | " allimgs.append(filepath)\n", 740 | "print(len(allimgs)) # Print the total number of images" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": 58, 746 | "metadata": {}, 747 | "outputs": [], 748 | "source": [ 749 | "#split a test set from the dataset, train/test size = 80%/20%\n", 750 | "Numbers=len(allimgs)//5 \t#size of test set (20%)\n", 751 | "\n", 752 | "def mymovefile(srcfile,dstfile):\n", 753 | " if not os.path.isfile(srcfile):\n", 754 | " print (\"%s not exist!\"%(srcfile))\n", 755 | " else:\n", 756 | " fpath,fname=os.path.split(dstfile) \n", 757 | " if not os.path.exists(fpath):\n", 758 | " os.makedirs(fpath) \n", 759 | " shutil.move(srcfile,dstfile) \n", 760 | " #print (\"move %s -> %s\"%(srcfile,dstfile))" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 59, 766 | "metadata": { 767 | "scrolled": true 768 | }, 769 | "outputs": [ 770 | { 771 | "data": { 772 | "text/plain": [ 773 | "5845" 774 | ] 775 | }, 776 | "execution_count": 59, 777 | "metadata": {}, 778 | "output_type": "execute_result" 779 | } 780 | ], 781 | "source": [ 782 | "# The size of test set\n", 783 | "Numbers" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 60, 789 | "metadata": {}, 790 | "outputs": [ 791 | { 792 | "name": "stdout", 793 | "output_type": "stream", 794 | "text": [ 795 | "Finish creating test set\n" 796 | ] 797 | } 798 | ], 799 | "source": [ 800 | "# Create the test set\n", 801 | "val_imgs=random.sample(allimgs,Numbers)\n", 802 | "for img in val_imgs:\n", 803 | " dest_path=img.replace(Train_Dir,Val_Dir)\n", 804 | " mymovefile(img,dest_path)\n", 805 | "print('Finish creating test set')" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": 61, 811 | "metadata": { 812 | "collapsed": true 813 | }, 814 | "outputs": [], 815 | "source": [ 816 | "#resize the images 224*224 for better CNN training\n", 817 | "def get_224(folder,dstdir):\n", 818 | " imgfilepaths=[]\n", 819 | " for root,dirs,imgs in os.walk(folder):\n", 820 | " for thisimg in imgs:\n", 821 | " thisimg_path=os.path.join(root,thisimg)\n", 822 | " imgfilepaths.append(thisimg_path)\n", 823 | " for thisimg_path in imgfilepaths:\n", 824 | " dir_name,filename=os.path.split(thisimg_path)\n", 825 | " dir_name=dir_name.replace(folder,dstdir)\n", 826 | " new_file_path=os.path.join(dir_name,filename)\n", 827 | " if not os.path.exists(dir_name):\n", 828 | " os.makedirs(dir_name)\n", 829 | " img=cv2.imread(thisimg_path)\n", 830 | " img=cv2.resize(img,(224,224))\n", 831 | " cv2.imwrite(new_file_path,img)\n", 832 | " print('Finish resizing'.format(folder=folder))" 833 | ] 834 | }, 835 | { 836 | "cell_type": "code", 837 | "execution_count": 62, 838 | "metadata": {}, 839 | "outputs": [ 840 | { 841 | "name": "stdout", 842 | "output_type": "stream", 843 | "text": [ 844 | "Finish resizing\n" 845 | ] 846 | } 847 | ], 848 | "source": [ 849 | "DATA_DIR_224='./train_224/'\n", 850 | "get_224(folder='./train/',dstdir=DATA_DIR_224)" 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": 63, 856 | "metadata": {}, 857 | "outputs": [ 858 | { 859 | "name": "stdout", 860 | "output_type": "stream", 861 | "text": [ 862 | "Finish resizing\n" 863 | ] 864 | } 865 | ], 866 | "source": [ 867 | "DATA_DIR2_224='./test_224/'\n", 868 | "get_224(folder='./test/',dstdir=DATA_DIR2_224)" 869 | ] 870 | }, 871 | { 872 | "cell_type": "markdown", 873 | "metadata": {}, 874 | "source": [ 875 | "### Display samples for each category" 876 | ] 877 | }, 878 | { 879 | "cell_type": "code", 880 | "execution_count": 2, 881 | "metadata": {}, 882 | "outputs": [ 883 | { 884 | "data": { 885 | "image/png": "\n", 886 | "text/plain": [ 887 | "
" 888 | ] 889 | }, 890 | "metadata": { 891 | "needs_background": "light" 892 | }, 893 | "output_type": "display_data" 894 | } 895 | ], 896 | "source": [ 897 | "# Read the images for each category, the file name may vary (27.png, 83.png...)\n", 898 | "img1 = Image.open('./train_224/0/27.png')\n", 899 | "img2 = Image.open('./train_224/1/83.png')\n", 900 | "img3 = Image.open('./train_224/2/27.png')\n", 901 | "img4 = Image.open('./train_224/3/27.png')\n", 902 | "img5 = Image.open('./train_224/4/27.png')\n", 903 | "\n", 904 | "plt.figure(figsize=(10, 10)) \n", 905 | "plt.subplot(1,5,1)\n", 906 | "plt.imshow(img1)\n", 907 | "plt.title(\"Normal\")\n", 908 | "plt.subplot(1,5,2)\n", 909 | "plt.imshow(img2)\n", 910 | "plt.title(\"RPM Spoofing\")\n", 911 | "plt.subplot(1,5,3)\n", 912 | "plt.imshow(img3)\n", 913 | "plt.title(\"Gear Spoofing\")\n", 914 | "plt.subplot(1,5,4)\n", 915 | "plt.imshow(img4)\n", 916 | "plt.title(\"DoS Attack\")\n", 917 | "plt.subplot(1,5,5)\n", 918 | "plt.imshow(img5)\n", 919 | "plt.title(\"Fuzzy Attack\")\n", 920 | "plt.show() # display it" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": null, 926 | "metadata": {}, 927 | "outputs": [], 928 | "source": [] 929 | } 930 | ], 931 | "metadata": { 932 | "anaconda-cloud": {}, 933 | "kernelspec": { 934 | "display_name": "Python 3", 935 | "language": "python", 936 | "name": "python3" 937 | }, 938 | "language_info": { 939 | "codemirror_mode": { 940 | "name": "ipython", 941 | "version": 3 942 | }, 943 | "file_extension": ".py", 944 | "mimetype": "text/x-python", 945 | "name": "python", 946 | "nbconvert_exporter": "python", 947 | "pygments_lexer": "ipython3", 948 | "version": "3.6.8" 949 | } 950 | }, 951 | "nbformat": 4, 952 | "nbformat_minor": 2 953 | } 954 | -------------------------------------------------------------------------------- /CAN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/c2aacb76cc184dc1ea29f2c6b97e5bbde8221f71/CAN.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Western OC2 Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Paper_2201.11812.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/c2aacb76cc184dc1ea29f2c6b97e5bbde8221f71/Paper_2201.11812.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intrusion-Detection-System-Using-CNN-and-Transfer-Learning 2 | 3 | This is the code for the paper entitled "**[A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles](https://arxiv.org/pdf/2201.11812.pdf)**" published in **IEEE International Conference on Communications (IEEE ICC)**, doi: [10.1109/ICC45855.2022.9838780](https://ieeexplore.ieee.org/document/9838780). 4 | - Authors: Li Yang and Abdallah Shami 5 | - Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University 6 | 7 | This repository introduces how to use **convolutional neural networks (CNNs)** and **transfer learning** techniques to develop **intrusion detection systems**. **Ensemble learning** and **hyperparameter optimization techniques** are also used to achieve optimized model performance. 8 | 9 | - Another **intrusion detection system development code** using **decision tree-based machine learning algorithms (Decision tree, random forest, XGBoost, stacking, etc.)** can be found in: [Intrusion-Detection-System-Using-Machine-Learning](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-Machine-Learning) 10 | 11 | - A comprehensive **hyperparameter optimization** tutorial code can be found in: [Hyperparameter-Optimization-of-Machine-Learning-Algorithms](https://github.com/LiYangHart/Hyperparameter-Optimization-of-Machine-Learning-Algorithms) 12 | 13 | ## Abstract of The Paper 14 | Modern vehicles, including autonomous vehicles and connected vehicles, are increasingly connected to the external world, which enables various functionalities and services. However, the improving connectivity also increases the attack surfaces of the Internet of Vehicles (IoV), causing its vulnerabilities to cyber-threats. Due to the lack of authentication and encryption procedures in vehicular networks, Intrusion Detection Systems (IDSs) are essential approaches to protect modern vehicle systems from network attacks. In this paper, a transfer learning and ensemble learning-based IDS is proposed for IoV systems using convolutional neural networks (CNNs) and hyper-parameter optimization techniques. In the experiments, the proposed IDS has demonstrated over 99.25% detection rates and F1-scores on two well-known public benchmark IoV security datasets: the Car-Hacking dataset and the CICIDS2017 dataset. This shows the effectiveness of the proposed IDS for cyber-attack detection in both intra-vehicle and external vehicular networks. 15 | 16 |

17 | 18 | 19 |

20 | 21 | ## Implementation 22 | ### CNN Models 23 | * VGG16 24 | * VGG19 25 | * Xception 26 | * Inception 27 | * Resnet 28 | * InceptionResnet 29 | 30 | ### Ensemble Learning Models 31 | * Bagging 32 | * Probability Averaging 33 | * Concatenation 34 | 35 | ### Hyperparameter Optimization Methods 36 | * Random Search (RS) 37 | * Bayesian Optimization - Tree Parzen Estimator(BO-TPE) 38 | 39 | ### Dataset 40 | 1. CAN-intrusion/Car-Hacking dataset, a benchmark network security dataset for intra-vehicle intrusion detection 41 | * Publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset 42 | * Can be processed using the same code 43 | 44 | 2. CICIDS2017 dataset, a popular network traffic dataset for intrusion detection problems 45 | * Publicly available at: https://www.unb.ca/cic/datasets/ids-2017.html 46 | 47 | For the purpose of displaying the experimental results in Jupyter Notebook, the sampled subset of the CAN-intrusion dataset is used in the sample code. The subsets are in the "[data](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/tree/main/data)" folder. 48 | 49 | ### Code 50 | * [1-Data_pre-processing_CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/1-Data_pre-processing_CAN.ipynb): code for data pre-processing and transformation (tabular data to images). 51 | * [2-CNN_Model_Development&Hyperparameter Optimization.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/2-CNN_Model_Development%26Hyperparameter%20Optimization.ipynb): code for the development and CNN models and their hyperparameter optimization. 52 | * [3-Ensemble_Models-CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/3-Ensemble_Models-CAN.ipynb): code for the construction of three ensemble learning techniques. 53 | 54 | Libraries 55 | * Python 3.5+ 56 | * [Keras 2.1.0+](hhttps://keras.io/) 57 | * [Tensorflow 1.10.0+](https://www.tensorflow.org/install/gpu) 58 | * [OpenCV-python](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html) 59 | * [hyperopt](https://github.com/hyperopt/hyperopt) 60 | 61 | ## Contact-Info 62 | Please feel free to contact us for any questions or cooperation opportunities. We will be happy to help. 63 | * Email: [liyanghart@gmail.com](mailto:liyanghart@gmail.com) or [Abdallah.Shami@uwo.ca](mailto:Abdallah.Shami@uwo.ca) 64 | * GitHub: [LiYangHart](https://github.com/LiYangHart) and [Western OC2 Lab](https://github.com/Western-OC2-Lab/) 65 | * LinkedIn: [Li Yang](https://www.linkedin.com/in/li-yang-phd-65a190176/) 66 | * Google Scholar: [Li Yang](https://scholar.google.com.eg/citations?user=XEfM7bIAAAAJ&hl=en) and [OC2 Lab](https://scholar.google.com.eg/citations?user=oiebNboAAAAJ&hl=en) 67 | 68 | ## Citation 69 | If you find this repository useful in your research, please cite this article as: 70 | 71 | L. Yang and A. Shami, "A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles," ICC 2022 - IEEE International Conference on Communications, 2022, pp. 2774-2779, doi: 10.1109/ICC45855.2022.9838780. 72 | 73 | ``` 74 | @INPROCEEDINGS{9838780, 75 | author={Yang, Li and Shami, Abdallah}, 76 | booktitle={ICC 2022 - IEEE International Conference on Communications}, 77 | title={A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles}, 78 | year={2022}, 79 | pages={2774-2779}, 80 | doi={10.1109/ICC45855.2022.9838780}} 81 | ``` 82 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # The sampled datasets used for the experiments in the sample code 2 | 3 | **Car_Hacking_5%.csv**: The 5% randomly sampled subset of the [Car Hacking dataset](https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset) 4 | -------------------------------------------------------------------------------- /framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/c2aacb76cc184dc1ea29f2c6b97e5bbde8221f71/framework.png -------------------------------------------------------------------------------- /supplementary_code/CAR_IDS_SVC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "CAR_IDS_LOGISTIC_SVM (1).ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "id": "lGiyP2dR6Jw-" 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import numpy as np\n", 28 | "import pandas as pd\n", 29 | "from sklearn.linear_model import LogisticRegression\n", 30 | "from sklearn.ensemble import RandomForestClassifier" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "source": [ 36 | "def changecolumn(dataset, AttackType):\n", 37 | " df = pd.read_csv(dataset).sample(frac = 0.1, random_state = 20, replace = False).reset_index(drop=True)\n", 38 | " df.columns = [\"Timestamp\", \"CAN ID\", \"Byte\", \"DATA[0]\",\"DATA[1]\",\"DATA[2]\",\"DATA[3]\",\"DATA[4]\",\"DATA[5]\",\"DATA[6]\",\"DATA[7]\",\"AttackType\"]\n", 39 | " df['AttackType'] = np.where(df['AttackType'] == 'T',AttackType, 'Normal Message')\n", 40 | " df.dropna()\n", 41 | " return df\n", 42 | "\n", 43 | "dfDos = changecolumn('DoS_dataset.csv','DoS Attack')\n", 44 | "dfFuzzy = changecolumn('Fuzzy_dataset.csv','Fuzzy Attack')\n", 45 | "dfGear = changecolumn('gear_dataset.csv','Gear Spooing Attack')\n", 46 | "dfRPM = changecolumn('RPM_dataset.csv','RPM Spoofing Attack')\n", 47 | "frames = [dfDos, dfFuzzy, dfGear, dfRPM]\n", 48 | "df = pd.concat(frames)\n", 49 | "print(df.head(10))\n", 50 | "print(df.shape)\n" 51 | ], 52 | "metadata": { 53 | "colab": { 54 | "base_uri": "https://localhost:8080/" 55 | }, 56 | "id": "K4qXdKi-756E", 57 | "outputId": "fcf9fee4-8ccb-48a1-99bc-401628828b0f" 58 | }, 59 | "execution_count": 2, 60 | "outputs": [ 61 | { 62 | "output_type": "stream", 63 | "name": "stdout", 64 | "text": [ 65 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] \\\n", 66 | "0 1.478200e+09 0000 8 00 00 00 00 00 00 \n", 67 | "1 1.478201e+09 0131 8 1b 80 00 00 3f 7f \n", 68 | "2 1.478199e+09 00a1 8 80 89 00 00 24 00 \n", 69 | "3 1.478200e+09 0260 8 18 21 22 30 08 8f \n", 70 | "4 1.478201e+09 02c0 8 14 00 00 00 00 00 \n", 71 | "5 1.478200e+09 0130 8 0b 80 00 ff 08 80 \n", 72 | "6 1.478200e+09 0370 8 00 20 00 00 00 00 \n", 73 | "7 1.478199e+09 04f0 8 00 00 00 80 00 69 \n", 74 | "8 1.478199e+09 0130 8 05 80 00 ff 0b 80 \n", 75 | "9 1.478198e+09 0131 8 f7 7f 00 00 4c 7f \n", 76 | "\n", 77 | " DATA[6] DATA[7] AttackType \n", 78 | "0 00 00 DoS Attack \n", 79 | "1 0e a6 Normal Message \n", 80 | "2 00 00 Normal Message \n", 81 | "3 70 05 Normal Message \n", 82 | "4 00 00 Normal Message \n", 83 | "5 04 88 Normal Message \n", 84 | "6 00 00 Normal Message \n", 85 | "7 d1 13 Normal Message \n", 86 | "8 0c ed Normal Message \n", 87 | "9 0d e7 Normal Message \n", 88 | "(1656947, 12)\n" 89 | ] 90 | } 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "source": [ 96 | "print(df.dtypes)\n", 97 | "df = df.dropna()\n", 98 | "def changecolumntype(df):\n", 99 | " for column in df[['CAN ID', 'DATA[0]', 'DATA[1]', 'DATA[2]', 'DATA[3]', 'DATA[4]', 'DATA[5]', 'DATA[6]', 'DATA[7]']]:\n", 100 | " df[column] = df[column].apply(lambda x: int(str(x), base=16))\n", 101 | " return df\n", 102 | "\n", 103 | "df = changecolumntype(df)\n", 104 | "print(df.dtypes)\n", 105 | "df.head(10)" 106 | ], 107 | "metadata": { 108 | "colab": { 109 | "base_uri": "https://localhost:8080/", 110 | "height": 1000 111 | }, 112 | "id": "XZTb7XOpJhQw", 113 | "outputId": "2755474a-cd7b-4575-cca0-7b7017da8297" 114 | }, 115 | "execution_count": 3, 116 | "outputs": [ 117 | { 118 | "output_type": "stream", 119 | "name": "stdout", 120 | "text": [ 121 | "Timestamp float64\n", 122 | "CAN ID object\n", 123 | "Byte int64\n", 124 | "DATA[0] object\n", 125 | "DATA[1] object\n", 126 | "DATA[2] object\n", 127 | "DATA[3] object\n", 128 | "DATA[4] object\n", 129 | "DATA[5] object\n", 130 | "DATA[6] object\n", 131 | "DATA[7] object\n", 132 | "AttackType object\n", 133 | "dtype: object\n", 134 | "Timestamp float64\n", 135 | "CAN ID int64\n", 136 | "Byte int64\n", 137 | "DATA[0] int64\n", 138 | "DATA[1] int64\n", 139 | "DATA[2] int64\n", 140 | "DATA[3] int64\n", 141 | "DATA[4] int64\n", 142 | "DATA[5] int64\n", 143 | "DATA[6] int64\n", 144 | "DATA[7] int64\n", 145 | "AttackType object\n", 146 | "dtype: object\n" 147 | ] 148 | }, 149 | { 150 | "output_type": "execute_result", 151 | "data": { 152 | "text/html": [ 153 | "\n", 154 | "
\n", 155 | "
\n", 156 | "
\n", 157 | "\n", 170 | "\n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | "
TimestampCAN IDByteDATA[0]DATA[1]DATA[2]DATA[3]DATA[4]DATA[5]DATA[6]DATA[7]AttackType
01.478200e+090800000000DoS Attack
11.478201e+09305827128006312714166Normal Message
21.478199e+0916181281370036000Normal Message
31.478200e+0960882433344881431125Normal Message
41.478201e+097048200000000Normal Message
51.478200e+09304811128025581284136Normal Message
61.478200e+098808032000000Normal Message
71.478199e+0912648000128010520919Normal Message
81.478199e+093048512802551112812237Normal Message
91.478198e+093058247127007612713231Normal Message
\n", 341 | "
\n", 342 | " \n", 352 | " \n", 353 | " \n", 390 | "\n", 391 | " \n", 415 | "
\n", 416 | "
\n", 417 | " " 418 | ], 419 | "text/plain": [ 420 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] \\\n", 421 | "0 1.478200e+09 0 8 0 0 0 0 0 \n", 422 | "1 1.478201e+09 305 8 27 128 0 0 63 \n", 423 | "2 1.478199e+09 161 8 128 137 0 0 36 \n", 424 | "3 1.478200e+09 608 8 24 33 34 48 8 \n", 425 | "4 1.478201e+09 704 8 20 0 0 0 0 \n", 426 | "5 1.478200e+09 304 8 11 128 0 255 8 \n", 427 | "6 1.478200e+09 880 8 0 32 0 0 0 \n", 428 | "7 1.478199e+09 1264 8 0 0 0 128 0 \n", 429 | "8 1.478199e+09 304 8 5 128 0 255 11 \n", 430 | "9 1.478198e+09 305 8 247 127 0 0 76 \n", 431 | "\n", 432 | " DATA[5] DATA[6] DATA[7] AttackType \n", 433 | "0 0 0 0 DoS Attack \n", 434 | "1 127 14 166 Normal Message \n", 435 | "2 0 0 0 Normal Message \n", 436 | "3 143 112 5 Normal Message \n", 437 | "4 0 0 0 Normal Message \n", 438 | "5 128 4 136 Normal Message \n", 439 | "6 0 0 0 Normal Message \n", 440 | "7 105 209 19 Normal Message \n", 441 | "8 128 12 237 Normal Message \n", 442 | "9 127 13 231 Normal Message " 443 | ] 444 | }, 445 | "metadata": {}, 446 | "execution_count": 3 447 | } 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "source": [ 453 | "df['Message'] = df.iloc[:,3:11].apply(lambda x: ''.join(x.astype(str)), axis = 1)\n", 454 | "df.head(10)" 455 | ], 456 | "metadata": { 457 | "colab": { 458 | "base_uri": "https://localhost:8080/", 459 | "height": 601 460 | }, 461 | "id": "ym4-oGjemqFD", 462 | "outputId": "2dc335e5-788b-41ff-d48c-1a2602f67391" 463 | }, 464 | "execution_count": 4, 465 | "outputs": [ 466 | { 467 | "output_type": "execute_result", 468 | "data": { 469 | "text/html": [ 470 | "\n", 471 | "
\n", 472 | "
\n", 473 | "
\n", 474 | "\n", 487 | "\n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | "
TimestampCAN IDByteDATA[0]DATA[1]DATA[2]DATA[3]DATA[4]DATA[5]DATA[6]DATA[7]AttackTypeMessage
01.478200e+090800000000DoS Attack00000000
11.478201e+09305827128006312714166Normal Message27128006312714166
21.478199e+0916181281370036000Normal Message1281370036000
31.478200e+0960882433344881431125Normal Message2433344881431125
41.478201e+097048200000000Normal Message200000000
51.478200e+09304811128025581284136Normal Message11128025581284136
61.478200e+098808032000000Normal Message032000000
71.478199e+0912648000128010520919Normal Message000128010520919
81.478199e+093048512802551112812237Normal Message512802551112812237
91.478198e+093058247127007612713231Normal Message247127007612713231
\n", 669 | "
\n", 670 | " \n", 680 | " \n", 681 | " \n", 718 | "\n", 719 | " \n", 743 | "
\n", 744 | "
\n", 745 | " " 746 | ], 747 | "text/plain": [ 748 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] \\\n", 749 | "0 1.478200e+09 0 8 0 0 0 0 0 \n", 750 | "1 1.478201e+09 305 8 27 128 0 0 63 \n", 751 | "2 1.478199e+09 161 8 128 137 0 0 36 \n", 752 | "3 1.478200e+09 608 8 24 33 34 48 8 \n", 753 | "4 1.478201e+09 704 8 20 0 0 0 0 \n", 754 | "5 1.478200e+09 304 8 11 128 0 255 8 \n", 755 | "6 1.478200e+09 880 8 0 32 0 0 0 \n", 756 | "7 1.478199e+09 1264 8 0 0 0 128 0 \n", 757 | "8 1.478199e+09 304 8 5 128 0 255 11 \n", 758 | "9 1.478198e+09 305 8 247 127 0 0 76 \n", 759 | "\n", 760 | " DATA[5] DATA[6] DATA[7] AttackType Message \n", 761 | "0 0 0 0 DoS Attack 00000000 \n", 762 | "1 127 14 166 Normal Message 27128006312714166 \n", 763 | "2 0 0 0 Normal Message 1281370036000 \n", 764 | "3 143 112 5 Normal Message 2433344881431125 \n", 765 | "4 0 0 0 Normal Message 200000000 \n", 766 | "5 128 4 136 Normal Message 11128025581284136 \n", 767 | "6 0 0 0 Normal Message 032000000 \n", 768 | "7 105 209 19 Normal Message 000128010520919 \n", 769 | "8 128 12 237 Normal Message 512802551112812237 \n", 770 | "9 127 13 231 Normal Message 247127007612713231 " 771 | ] 772 | }, 773 | "metadata": {}, 774 | "execution_count": 4 775 | } 776 | ] 777 | }, 778 | { 779 | "cell_type": "code", 780 | "source": [ 781 | "#df['Message'] = df['Message'].map(lambda x: int(x))\n", 782 | "df['Message'] = df['Message'].astype(float)\n", 783 | "df.head(10)" 784 | ], 785 | "metadata": { 786 | "colab": { 787 | "base_uri": "https://localhost:8080/", 788 | "height": 601 789 | }, 790 | "id": "fUPHSmKPAP6_", 791 | "outputId": "4d196f89-62c4-4c91-eb66-cb3a8172af9f" 792 | }, 793 | "execution_count": 5, 794 | "outputs": [ 795 | { 796 | "output_type": "execute_result", 797 | "data": { 798 | "text/html": [ 799 | "\n", 800 | "
\n", 801 | "
\n", 802 | "
\n", 803 | "\n", 816 | "\n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | "
TimestampCAN IDByteDATA[0]DATA[1]DATA[2]DATA[3]DATA[4]DATA[5]DATA[6]DATA[7]AttackTypeMessage
01.478200e+090800000000DoS Attack0.000000e+00
11.478201e+09305827128006312714166Normal Message2.712801e+16
21.478199e+0916181281370036000Normal Message1.281370e+12
31.478200e+0960882433344881431125Normal Message2.433345e+15
41.478201e+097048200000000Normal Message2.000000e+08
51.478200e+09304811128025581284136Normal Message1.112803e+16
61.478200e+098808032000000Normal Message3.200000e+07
71.478199e+0912648000128010520919Normal Message1.280105e+11
81.478199e+093048512802551112812237Normal Message5.128026e+17
91.478198e+093058247127007612713231Normal Message2.471270e+17
\n", 998 | "
\n", 999 | " \n", 1009 | " \n", 1010 | " \n", 1047 | "\n", 1048 | " \n", 1072 | "
\n", 1073 | "
\n", 1074 | " " 1075 | ], 1076 | "text/plain": [ 1077 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] \\\n", 1078 | "0 1.478200e+09 0 8 0 0 0 0 0 \n", 1079 | "1 1.478201e+09 305 8 27 128 0 0 63 \n", 1080 | "2 1.478199e+09 161 8 128 137 0 0 36 \n", 1081 | "3 1.478200e+09 608 8 24 33 34 48 8 \n", 1082 | "4 1.478201e+09 704 8 20 0 0 0 0 \n", 1083 | "5 1.478200e+09 304 8 11 128 0 255 8 \n", 1084 | "6 1.478200e+09 880 8 0 32 0 0 0 \n", 1085 | "7 1.478199e+09 1264 8 0 0 0 128 0 \n", 1086 | "8 1.478199e+09 304 8 5 128 0 255 11 \n", 1087 | "9 1.478198e+09 305 8 247 127 0 0 76 \n", 1088 | "\n", 1089 | " DATA[5] DATA[6] DATA[7] AttackType Message \n", 1090 | "0 0 0 0 DoS Attack 0.000000e+00 \n", 1091 | "1 127 14 166 Normal Message 2.712801e+16 \n", 1092 | "2 0 0 0 Normal Message 1.281370e+12 \n", 1093 | "3 143 112 5 Normal Message 2.433345e+15 \n", 1094 | "4 0 0 0 Normal Message 2.000000e+08 \n", 1095 | "5 128 4 136 Normal Message 1.112803e+16 \n", 1096 | "6 0 0 0 Normal Message 3.200000e+07 \n", 1097 | "7 105 209 19 Normal Message 1.280105e+11 \n", 1098 | "8 128 12 237 Normal Message 5.128026e+17 \n", 1099 | "9 127 13 231 Normal Message 2.471270e+17 " 1100 | ] 1101 | }, 1102 | "metadata": {}, 1103 | "execution_count": 5 1104 | } 1105 | ] 1106 | }, 1107 | { 1108 | "cell_type": "code", 1109 | "source": [ 1110 | "import datetime\n", 1111 | "newdf = df.copy(deep = True)\n", 1112 | "dateformat = \"%Y-%m-%d %H:%M:%S.%f\"\n", 1113 | "df['Timestamp'] = df['Timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(float(x)).strftime(dateformat))\n", 1114 | "print(df.dtypes)\n", 1115 | "df.head(100)" 1116 | ], 1117 | "metadata": { 1118 | "id": "DMt9KCx9ql_W", 1119 | "colab": { 1120 | "base_uri": "https://localhost:8080/", 1121 | "height": 921 1122 | }, 1123 | "outputId": "68acd251-e138-489f-8e43-ae6b6632c6af" 1124 | }, 1125 | "execution_count": 6, 1126 | "outputs": [ 1127 | { 1128 | "output_type": "stream", 1129 | "name": "stdout", 1130 | "text": [ 1131 | "Timestamp object\n", 1132 | "CAN ID int64\n", 1133 | "Byte int64\n", 1134 | "DATA[0] int64\n", 1135 | "DATA[1] int64\n", 1136 | "DATA[2] int64\n", 1137 | "DATA[3] int64\n", 1138 | "DATA[4] int64\n", 1139 | "DATA[5] int64\n", 1140 | "DATA[6] int64\n", 1141 | "DATA[7] int64\n", 1142 | "AttackType object\n", 1143 | "Message float64\n", 1144 | "dtype: object\n" 1145 | ] 1146 | }, 1147 | { 1148 | "output_type": "execute_result", 1149 | "data": { 1150 | "text/html": [ 1151 | "\n", 1152 | "
\n", 1153 | "
\n", 1154 | "
\n", 1155 | "\n", 1168 | "\n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | "
TimestampCAN IDByteDATA[0]DATA[1]DATA[2]DATA[3]DATA[4]DATA[5]DATA[6]DATA[7]AttackTypeMessage
02016-11-03 19:08:43.0441570800000000DoS Attack0.000000e+00
12016-11-03 19:24:35.989254305827128006312714166Normal Message2.712801e+16
22016-11-03 18:54:13.78868116181281370036000Normal Message1.281370e+12
32016-11-03 19:06:50.28611960882433344881431125Normal Message2.433345e+15
42016-11-03 19:26:04.1397147048200000000Normal Message2.000000e+08
..........................................
952016-11-03 19:05:13.3464160800000000DoS Attack0.000000e+00
972016-11-03 19:15:01.1463057048200000000Normal Message2.000000e+08
982016-11-03 18:56:54.7611378098220190127201732020Normal Message2.201901e+17
992016-11-03 18:52:14.511839497880000000Normal Message8.000000e+07
1002016-11-03 19:16:38.7902567048200000000Normal Message2.000000e+08
\n", 1366 | "

100 rows × 13 columns

\n", 1367 | "
\n", 1368 | " \n", 1378 | " \n", 1379 | " \n", 1416 | "\n", 1417 | " \n", 1441 | "
\n", 1442 | "
\n", 1443 | " " 1444 | ], 1445 | "text/plain": [ 1446 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] \\\n", 1447 | "0 2016-11-03 19:08:43.044157 0 8 0 0 0 \n", 1448 | "1 2016-11-03 19:24:35.989254 305 8 27 128 0 \n", 1449 | "2 2016-11-03 18:54:13.788681 161 8 128 137 0 \n", 1450 | "3 2016-11-03 19:06:50.286119 608 8 24 33 34 \n", 1451 | "4 2016-11-03 19:26:04.139714 704 8 20 0 0 \n", 1452 | ".. ... ... ... ... ... ... \n", 1453 | "95 2016-11-03 19:05:13.346416 0 8 0 0 0 \n", 1454 | "97 2016-11-03 19:15:01.146305 704 8 20 0 0 \n", 1455 | "98 2016-11-03 18:56:54.761137 809 8 220 190 127 \n", 1456 | "99 2016-11-03 18:52:14.511839 497 8 8 0 0 \n", 1457 | "100 2016-11-03 19:16:38.790256 704 8 20 0 0 \n", 1458 | "\n", 1459 | " DATA[3] DATA[4] DATA[5] DATA[6] DATA[7] AttackType Message \n", 1460 | "0 0 0 0 0 0 DoS Attack 0.000000e+00 \n", 1461 | "1 0 63 127 14 166 Normal Message 2.712801e+16 \n", 1462 | "2 0 36 0 0 0 Normal Message 1.281370e+12 \n", 1463 | "3 48 8 143 112 5 Normal Message 2.433345e+15 \n", 1464 | "4 0 0 0 0 0 Normal Message 2.000000e+08 \n", 1465 | ".. ... ... ... ... ... ... ... \n", 1466 | "95 0 0 0 0 0 DoS Attack 0.000000e+00 \n", 1467 | "97 0 0 0 0 0 Normal Message 2.000000e+08 \n", 1468 | "98 20 17 32 0 20 Normal Message 2.201901e+17 \n", 1469 | "99 0 0 0 0 0 Normal Message 8.000000e+07 \n", 1470 | "100 0 0 0 0 0 Normal Message 2.000000e+08 \n", 1471 | "\n", 1472 | "[100 rows x 13 columns]" 1473 | ] 1474 | }, 1475 | "metadata": {}, 1476 | "execution_count": 6 1477 | } 1478 | ] 1479 | }, 1480 | { 1481 | "cell_type": "code", 1482 | "source": [ 1483 | "#df = newdf.copy(deep = True)\n", 1484 | "from sklearn import preprocessing\n", 1485 | "#print(df['AttackType'].unique())\n", 1486 | "#print(df['AttackType'].value_counts())\n", 1487 | "encoder = preprocessing.LabelEncoder()\n", 1488 | "#df1 = df[['AttackType']].copy()\n", 1489 | "df['AttackType']= encoder.fit_transform(df['AttackType'].values)\n", 1490 | "# df = df.drop(['AttackType'], axis = 1)\n", 1491 | "# df1\n", 1492 | "#df = pd.concat([df.iloc[:,0:11],df1, df.iloc[:, 11:]], axis=1)\n", 1493 | "#df = pd.get_dummies(df, columns =['AttackType'], prefix = '', prefix_sep = '')\n", 1494 | "print(df.head(10))\n", 1495 | "# print(df['AttackType Encode'])\n", 1496 | "print(df['AttackType'])\n", 1497 | "print(df.shape)\n", 1498 | "#print(df.shape)" 1499 | ], 1500 | "metadata": { 1501 | "colab": { 1502 | "base_uri": "https://localhost:8080/" 1503 | }, 1504 | "id": "vEUXBabOBtpQ", 1505 | "outputId": "076885d8-3a39-446d-96b6-0816f653f48a" 1506 | }, 1507 | "execution_count": 7, 1508 | "outputs": [ 1509 | { 1510 | "output_type": "stream", 1511 | "name": "stdout", 1512 | "text": [ 1513 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] \\\n", 1514 | "0 2016-11-03 19:08:43.044157 0 8 0 0 0 \n", 1515 | "1 2016-11-03 19:24:35.989254 305 8 27 128 0 \n", 1516 | "2 2016-11-03 18:54:13.788681 161 8 128 137 0 \n", 1517 | "3 2016-11-03 19:06:50.286119 608 8 24 33 34 \n", 1518 | "4 2016-11-03 19:26:04.139714 704 8 20 0 0 \n", 1519 | "5 2016-11-03 19:03:07.624543 304 8 11 128 0 \n", 1520 | "6 2016-11-03 19:06:31.658461 880 8 0 32 0 \n", 1521 | "7 2016-11-03 18:55:47.812754 1264 8 0 0 0 \n", 1522 | "8 2016-11-03 18:46:48.226079 304 8 5 128 0 \n", 1523 | "9 2016-11-03 18:40:52.891089 305 8 247 127 0 \n", 1524 | "\n", 1525 | " DATA[3] DATA[4] DATA[5] DATA[6] DATA[7] AttackType Message \n", 1526 | "0 0 0 0 0 0 0 0.000000e+00 \n", 1527 | "1 0 63 127 14 166 3 2.712801e+16 \n", 1528 | "2 0 36 0 0 0 3 1.281370e+12 \n", 1529 | "3 48 8 143 112 5 3 2.433345e+15 \n", 1530 | "4 0 0 0 0 0 3 2.000000e+08 \n", 1531 | "5 255 8 128 4 136 3 1.112803e+16 \n", 1532 | "6 0 0 0 0 0 3 3.200000e+07 \n", 1533 | "7 128 0 105 209 19 3 1.280105e+11 \n", 1534 | "8 255 11 128 12 237 3 5.128026e+17 \n", 1535 | "9 0 76 127 13 231 3 2.471270e+17 \n", 1536 | "0 0\n", 1537 | "1 3\n", 1538 | "2 3\n", 1539 | "3 3\n", 1540 | "4 3\n", 1541 | " ..\n", 1542 | "462165 3\n", 1543 | "462166 3\n", 1544 | "462167 3\n", 1545 | "462168 3\n", 1546 | "462169 4\n", 1547 | "Name: AttackType, Length: 1636855, dtype: int64\n", 1548 | "(1636855, 13)\n" 1549 | ] 1550 | } 1551 | ] 1552 | }, 1553 | { 1554 | "cell_type": "code", 1555 | "source": [ 1556 | "df.columns" 1557 | ], 1558 | "metadata": { 1559 | "colab": { 1560 | "base_uri": "https://localhost:8080/" 1561 | }, 1562 | "id": "jPQUX0V2PPkm", 1563 | "outputId": "45225c4a-9642-444e-bb72-1ff1c5f3e0cc" 1564 | }, 1565 | "execution_count": 8, 1566 | "outputs": [ 1567 | { 1568 | "output_type": "execute_result", 1569 | "data": { 1570 | "text/plain": [ 1571 | "Index(['Timestamp', 'CAN ID', 'Byte', 'DATA[0]', 'DATA[1]', 'DATA[2]',\n", 1572 | " 'DATA[3]', 'DATA[4]', 'DATA[5]', 'DATA[6]', 'DATA[7]', 'AttackType',\n", 1573 | " 'Message'],\n", 1574 | " dtype='object')" 1575 | ] 1576 | }, 1577 | "metadata": {}, 1578 | "execution_count": 8 1579 | } 1580 | ] 1581 | }, 1582 | { 1583 | "cell_type": "code", 1584 | "source": [ 1585 | "X = df.iloc[:, np.r_[:,1,3:11]]\n", 1586 | "#X = df[['CAN ID', 'DATA[0]', 'DATA[1]', 'DATA[2]', 'DATA[3]', 'DATA[4]', 'DATA[5]', 'DATA[6]', 'DATA[7]']]\n", 1587 | "Y = df[['AttackType']]\n", 1588 | "X,Y" 1589 | ], 1590 | "metadata": { 1591 | "colab": { 1592 | "base_uri": "https://localhost:8080/" 1593 | }, 1594 | "id": "-E_zgBKtbX4C", 1595 | "outputId": "fe0d954e-9b94-46d9-f3a1-3e8503d67c3d" 1596 | }, 1597 | "execution_count": 9, 1598 | "outputs": [ 1599 | { 1600 | "output_type": "execute_result", 1601 | "data": { 1602 | "text/plain": [ 1603 | "( CAN ID DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] DATA[6] \\\n", 1604 | " 0 0 0 0 0 0 0 0 0 \n", 1605 | " 1 305 27 128 0 0 63 127 14 \n", 1606 | " 2 161 128 137 0 0 36 0 0 \n", 1607 | " 3 608 24 33 34 48 8 143 112 \n", 1608 | " 4 704 20 0 0 0 0 0 0 \n", 1609 | " ... ... ... ... ... ... ... ... ... \n", 1610 | " 462165 809 220 183 126 20 17 32 0 \n", 1611 | " 462166 305 242 127 0 0 58 127 12 \n", 1612 | " 462167 305 242 127 0 0 64 127 6 \n", 1613 | " 462168 704 21 0 0 0 0 0 0 \n", 1614 | " 462169 790 69 41 36 255 41 36 0 \n", 1615 | " \n", 1616 | " DATA[7] \n", 1617 | " 0 0 \n", 1618 | " 1 166 \n", 1619 | " 2 0 \n", 1620 | " 3 5 \n", 1621 | " 4 0 \n", 1622 | " ... ... \n", 1623 | " 462165 20 \n", 1624 | " 462166 131 \n", 1625 | " 462167 22 \n", 1626 | " 462168 0 \n", 1627 | " 462169 255 \n", 1628 | " \n", 1629 | " [1636855 rows x 9 columns], AttackType\n", 1630 | " 0 0\n", 1631 | " 1 3\n", 1632 | " 2 3\n", 1633 | " 3 3\n", 1634 | " 4 3\n", 1635 | " ... ...\n", 1636 | " 462165 3\n", 1637 | " 462166 3\n", 1638 | " 462167 3\n", 1639 | " 462168 3\n", 1640 | " 462169 4\n", 1641 | " \n", 1642 | " [1636855 rows x 1 columns])" 1643 | ] 1644 | }, 1645 | "metadata": {}, 1646 | "execution_count": 9 1647 | } 1648 | ] 1649 | }, 1650 | { 1651 | "cell_type": "code", 1652 | "source": [ 1653 | "from sklearn.model_selection import train_test_split\n", 1654 | "from sklearn.svm import SVC\n", 1655 | "from sklearn.pipeline import Pipeline\n", 1656 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state = 20)" 1657 | ], 1658 | "metadata": { 1659 | "id": "7_qOxOWjbeX8" 1660 | }, 1661 | "execution_count": 10, 1662 | "outputs": [] 1663 | }, 1664 | { 1665 | "cell_type": "code", 1666 | "source": [ 1667 | "val = encoder.inverse_transform(df['AttackType'])\n", 1668 | "(unique, counts) = np.unique(val, return_counts=True)\n", 1669 | "frequencies = np.asarray((unique, counts)).T\n", 1670 | "print(frequencies)\n", 1671 | "print(df['AttackType'].value_counts())" 1672 | ], 1673 | "metadata": { 1674 | "colab": { 1675 | "base_uri": "https://localhost:8080/" 1676 | }, 1677 | "id": "oUcMhUpnh4Tq", 1678 | "outputId": "6fe79717-8b48-42ea-b5a8-11fe00f2d568" 1679 | }, 1680 | "execution_count": 23, 1681 | "outputs": [ 1682 | { 1683 | "output_type": "stream", 1684 | "name": "stdout", 1685 | "text": [ 1686 | "[['DoS Attack' 58469]\n", 1687 | " ['Fuzzy Attack' 49258]\n", 1688 | " ['Gear Spooing Attack' 60016]\n", 1689 | " ['Normal Message' 1403673]\n", 1690 | " ['RPM Spoofing Attack' 65439]]\n", 1691 | "3 1403673\n", 1692 | "4 65439\n", 1693 | "2 60016\n", 1694 | "0 58469\n", 1695 | "1 49258\n", 1696 | "Name: AttackType, dtype: int64\n" 1697 | ] 1698 | } 1699 | ] 1700 | }, 1701 | { 1702 | "cell_type": "code", 1703 | "source": [ 1704 | "model = Pipeline([\n", 1705 | " ('svc', SVC(random_state=20))\n", 1706 | " ])\n", 1707 | "model.fit(X_train, Y_train)\n", 1708 | "pred = model.predict(X_test)\n", 1709 | "pred" 1710 | ], 1711 | "metadata": { 1712 | "colab": { 1713 | "base_uri": "https://localhost:8080/" 1714 | }, 1715 | "id": "NkzbFP_GbjSe", 1716 | "outputId": "bf0d413b-a09f-49d3-82fb-87ce53b04aae" 1717 | }, 1718 | "execution_count": null, 1719 | "outputs": [ 1720 | { 1721 | "output_type": "stream", 1722 | "name": "stderr", 1723 | "text": [ 1724 | "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", 1725 | " y = column_or_1d(y, warn=True)\n" 1726 | ] 1727 | } 1728 | ] 1729 | }, 1730 | { 1731 | "cell_type": "code", 1732 | "source": [ 1733 | "from sklearn.metrics import confusion_matrix\n", 1734 | "from sklearn.metrics import classification_report\n", 1735 | "from sklearn.metrics import accuracy_score\n", 1736 | "accuracy = accuracy_score(Y_test, pred)\n", 1737 | "print('accuracy : \\n', accuracy)\n", 1738 | "matrix = confusion_matrix(Y_test,pred)\n", 1739 | "print('Confusion matrix : \\n',matrix)\n", 1740 | "matrix = classification_report(Y_test,pred)\n", 1741 | "print('Classification report : \\n',matrix)" 1742 | ], 1743 | "metadata": { 1744 | "id": "zCFivudpci-x" 1745 | }, 1746 | "execution_count": null, 1747 | "outputs": [] 1748 | }, 1749 | { 1750 | "cell_type": "code", 1751 | "source": [ 1752 | "dfC = df['AttackType'].value_counts()\n", 1753 | "dfcount = pd.DataFrame(dfC, index = ['Normal Message','RPM Spoofing Attack','Gear Spooing Attack','DoS Attack','Fuzzy Attack'])\n", 1754 | "#dfcount_reset = dfcount.reset_index()\n", 1755 | "dfcount.columns = ['Injected Messages']\n", 1756 | "#dfcount_reset.set_index('Attack Type')\n", 1757 | "#dfcount_reset.dropna()\n", 1758 | "print(\"\\n\",dfcount)\n", 1759 | "\n", 1760 | "#index = ['Normal Message','RPM Spoofing Attack','Gear Spooing Attack','DoS Attack','Fuzzy Attack']\n", 1761 | "# df2frame = dfDos['AttackType'].value_counts()\n", 1762 | "# df2frame_count = pd.DataFrame(df2frame)\n", 1763 | "# df2frame_count_reset = df2frame_count.reset_index()\n", 1764 | "# df2frame_count_reset.columns = ['No Of Normal Message','No Of Injected Messages']\n", 1765 | "# print(\"\\n\",df2frame_count_reset)" 1766 | ], 1767 | "metadata": { 1768 | "id": "z-aD9lxNQ2hX" 1769 | }, 1770 | "execution_count": null, 1771 | "outputs": [] 1772 | }, 1773 | { 1774 | "cell_type": "code", 1775 | "source": [ 1776 | "import matplotlib.pyplot as plt\n", 1777 | "# dffinal = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],\n", 1778 | "# 'radius': [2439.7, 6051.8, 6378.1]},\n", 1779 | "# index=['Mercury', 'Venus', 'Earth'])\n", 1780 | "#plts = dfcount.plot.bar(x='Attack Type', y='Injected Messages', rot=0, figsize=(12, 8))\n", 1781 | "plot = dfcount.plot.pie(y='Injected Messages', figsize=(12, 8))" 1782 | ], 1783 | "metadata": { 1784 | "id": "7Am7edMcou4Y" 1785 | }, 1786 | "execution_count": null, 1787 | "outputs": [] 1788 | } 1789 | ] 1790 | } -------------------------------------------------------------------------------- /supplementary_code/README.md: -------------------------------------------------------------------------------- 1 | # The code in this folder shows an example of the pre-processing of the Car-Hacking dataset. 2 | --------------------------------------------------------------------------------