├── 1-Data_pre-processing_CAN.ipynb
├── 2-CNN_Model_Development&Hyperparameter Optimization.ipynb
├── 3-Ensemble_Models-CAN.ipynb
├── CAN.png
├── LICENSE
├── Paper_2201.11812.pdf
├── README.md
├── data
├── Car_Hacking_5%.csv
└── README.md
├── framework.png
└── supplementary_code
├── CAR_IDS_SVC.ipynb
└── README.md
/1-Data_pre-processing_CAN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles \n",
8 | "This is the code for the paper entitled \"**A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles**\" accepted in IEEE International Conference on Communications (IEEE ICC). \n",
9 | "Authors: Li Yang (lyang339@uwo.ca) and Abdallah Shami (Abdallah.Shami@uwo.ca) \n",
10 | "Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University\n",
11 | "\n",
12 | "**Notebook 1: Data pre-processing** \n",
13 | "Procedures: \n",
14 | " 1): Read the dataset \n",
15 | " 2): Transform the tabular data into images \n",
16 | " 3): Display the transformed images \n",
17 | " 4): Split the training and test set "
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Import libraries"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "import numpy as np\n",
34 | "import pandas as pd\n",
35 | "import os\n",
36 | "import cv2\n",
37 | "import math\n",
38 | "import random\n",
39 | "import matplotlib.pyplot as plt\n",
40 | "import shutil\n",
41 | "from sklearn.preprocessing import QuantileTransformer\n",
42 | "from PIL import Image\n",
43 | "import warnings\n",
44 | "warnings.filterwarnings(\"ignore\")"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "## Read the Car-Hacking/CAN-Intrusion dataset\n",
52 | "The complete Car-Hacking dataset is publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset \n",
53 | "In this repository, due to the file size limit of GitHub, we use the 5% subset."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 3,
59 | "metadata": {
60 | "collapsed": true
61 | },
62 | "outputs": [],
63 | "source": [
64 | "#Read dataset\n",
65 | "df=pd.read_csv('data/Car_Hacking_5%.csv')"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 4,
71 | "metadata": {
72 | "scrolled": true
73 | },
74 | "outputs": [
75 | {
76 | "data": {
77 | "text/html": [
78 | "
\n",
79 | "\n",
92 | "
\n",
93 | " \n",
94 | " \n",
95 | " \n",
96 | " CAN ID \n",
97 | " DATA[0] \n",
98 | " DATA[1] \n",
99 | " DATA[2] \n",
100 | " DATA[3] \n",
101 | " DATA[4] \n",
102 | " DATA[5] \n",
103 | " DATA[6] \n",
104 | " DATA[7] \n",
105 | " Label \n",
106 | " \n",
107 | " \n",
108 | " \n",
109 | " \n",
110 | " 0 \n",
111 | " 1201 \n",
112 | " 41 \n",
113 | " 39 \n",
114 | " 39 \n",
115 | " 35 \n",
116 | " 0 \n",
117 | " 0 \n",
118 | " 0 \n",
119 | " 154 \n",
120 | " R \n",
121 | " \n",
122 | " \n",
123 | " 1 \n",
124 | " 809 \n",
125 | " 64 \n",
126 | " 187 \n",
127 | " 127 \n",
128 | " 20 \n",
129 | " 17 \n",
130 | " 32 \n",
131 | " 0 \n",
132 | " 20 \n",
133 | " R \n",
134 | " \n",
135 | " \n",
136 | " 2 \n",
137 | " 1349 \n",
138 | " 216 \n",
139 | " 0 \n",
140 | " 0 \n",
141 | " 136 \n",
142 | " 0 \n",
143 | " 0 \n",
144 | " 0 \n",
145 | " 0 \n",
146 | " R \n",
147 | " \n",
148 | " \n",
149 | " 3 \n",
150 | " 1201 \n",
151 | " 41 \n",
152 | " 39 \n",
153 | " 39 \n",
154 | " 35 \n",
155 | " 0 \n",
156 | " 0 \n",
157 | " 0 \n",
158 | " 154 \n",
159 | " R \n",
160 | " \n",
161 | " \n",
162 | " 4 \n",
163 | " 2 \n",
164 | " 0 \n",
165 | " 0 \n",
166 | " 0 \n",
167 | " 0 \n",
168 | " 0 \n",
169 | " 3 \n",
170 | " 2 \n",
171 | " 228 \n",
172 | " R \n",
173 | " \n",
174 | " \n",
175 | " ... \n",
176 | " ... \n",
177 | " ... \n",
178 | " ... \n",
179 | " ... \n",
180 | " ... \n",
181 | " ... \n",
182 | " ... \n",
183 | " ... \n",
184 | " ... \n",
185 | " ... \n",
186 | " \n",
187 | " \n",
188 | " 818435 \n",
189 | " 848 \n",
190 | " 5 \n",
191 | " 32 \n",
192 | " 52 \n",
193 | " 104 \n",
194 | " 117 \n",
195 | " 0 \n",
196 | " 0 \n",
197 | " 12 \n",
198 | " R \n",
199 | " \n",
200 | " \n",
201 | " 818436 \n",
202 | " 1088 \n",
203 | " 255 \n",
204 | " 0 \n",
205 | " 0 \n",
206 | " 0 \n",
207 | " 255 \n",
208 | " 134 \n",
209 | " 9 \n",
210 | " 0 \n",
211 | " R \n",
212 | " \n",
213 | " \n",
214 | " 818437 \n",
215 | " 848 \n",
216 | " 5 \n",
217 | " 32 \n",
218 | " 100 \n",
219 | " 104 \n",
220 | " 117 \n",
221 | " 0 \n",
222 | " 0 \n",
223 | " 92 \n",
224 | " R \n",
225 | " \n",
226 | " \n",
227 | " 818438 \n",
228 | " 1349 \n",
229 | " 216 \n",
230 | " 90 \n",
231 | " 0 \n",
232 | " 137 \n",
233 | " 0 \n",
234 | " 0 \n",
235 | " 0 \n",
236 | " 0 \n",
237 | " R \n",
238 | " \n",
239 | " \n",
240 | " 818439 \n",
241 | " 790 \n",
242 | " 5 \n",
243 | " 33 \n",
244 | " 48 \n",
245 | " 10 \n",
246 | " 33 \n",
247 | " 30 \n",
248 | " 0 \n",
249 | " 111 \n",
250 | " R \n",
251 | " \n",
252 | " \n",
253 | "
\n",
254 | "
818440 rows × 10 columns
\n",
255 | "
"
256 | ],
257 | "text/plain": [
258 | " CAN ID DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] DATA[6] \\\n",
259 | "0 1201 41 39 39 35 0 0 0 \n",
260 | "1 809 64 187 127 20 17 32 0 \n",
261 | "2 1349 216 0 0 136 0 0 0 \n",
262 | "3 1201 41 39 39 35 0 0 0 \n",
263 | "4 2 0 0 0 0 0 3 2 \n",
264 | "... ... ... ... ... ... ... ... ... \n",
265 | "818435 848 5 32 52 104 117 0 0 \n",
266 | "818436 1088 255 0 0 0 255 134 9 \n",
267 | "818437 848 5 32 100 104 117 0 0 \n",
268 | "818438 1349 216 90 0 137 0 0 0 \n",
269 | "818439 790 5 33 48 10 33 30 0 \n",
270 | "\n",
271 | " DATA[7] Label \n",
272 | "0 154 R \n",
273 | "1 20 R \n",
274 | "2 0 R \n",
275 | "3 154 R \n",
276 | "4 228 R \n",
277 | "... ... ... \n",
278 | "818435 12 R \n",
279 | "818436 0 R \n",
280 | "818437 92 R \n",
281 | "818438 0 R \n",
282 | "818439 111 R \n",
283 | "\n",
284 | "[818440 rows x 10 columns]"
285 | ]
286 | },
287 | "execution_count": 4,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "df"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 5,
299 | "metadata": {},
300 | "outputs": [
301 | {
302 | "data": {
303 | "text/plain": [
304 | "R 701832\n",
305 | "RPM 32539\n",
306 | "gear 29944\n",
307 | "DoS 29501\n",
308 | "Fuzzy 24624\n",
309 | "Name: Label, dtype: int64"
310 | ]
311 | },
312 | "execution_count": 5,
313 | "metadata": {},
314 | "output_type": "execute_result"
315 | }
316 | ],
317 | "source": [
318 | "# The labels of the dataset. \"R\" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)\n",
319 | "df.Label.value_counts()"
320 | ]
321 | },
322 | {
323 | "cell_type": "markdown",
324 | "metadata": {
325 | "collapsed": true
326 | },
327 | "source": [
328 | "## Data Transformation\n",
329 | "Convert tabular data to images\n",
330 | "Procedures:\n",
331 | "1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values\n",
332 | "2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB)."
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 6,
338 | "metadata": {
339 | "collapsed": true
340 | },
341 | "outputs": [],
342 | "source": [
343 | "# Transform all features into the scale of [0,1]\n",
344 | "numeric_features = df.dtypes[df.dtypes != 'object'].index\n",
345 | "scaler = QuantileTransformer() \n",
346 | "df[numeric_features] = scaler.fit_transform(df[numeric_features])"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 7,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "# Multiply the feature values by 255 to transform them into the scale of [0,255]\n",
356 | "df[numeric_features] = df[numeric_features].apply(\n",
357 | " lambda x: (x*255))"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 8,
363 | "metadata": {},
364 | "outputs": [
365 | {
366 | "data": {
367 | "text/html": [
368 | "\n",
369 | "\n",
382 | "
\n",
383 | " \n",
384 | " \n",
385 | " \n",
386 | " CAN ID \n",
387 | " DATA[0] \n",
388 | " DATA[1] \n",
389 | " DATA[2] \n",
390 | " DATA[3] \n",
391 | " DATA[4] \n",
392 | " DATA[5] \n",
393 | " DATA[6] \n",
394 | " DATA[7] \n",
395 | " \n",
396 | " \n",
397 | " \n",
398 | " \n",
399 | " count \n",
400 | " 818440.000000 \n",
401 | " 818440.000000 \n",
402 | " 818440.000000 \n",
403 | " 818440.000000 \n",
404 | " 818440.000000 \n",
405 | " 818440.000000 \n",
406 | " 818440.000000 \n",
407 | " 818440.000000 \n",
408 | " 818440.000000 \n",
409 | " \n",
410 | " \n",
411 | " mean \n",
412 | " 127.458603 \n",
413 | " 113.635407 \n",
414 | " 108.055500 \n",
415 | " 89.524039 \n",
416 | " 109.930495 \n",
417 | " 105.682464 \n",
418 | " 112.273096 \n",
419 | " 84.945440 \n",
420 | " 93.094805 \n",
421 | " \n",
422 | " \n",
423 | " std \n",
424 | " 73.780402 \n",
425 | " 89.993275 \n",
426 | " 93.448831 \n",
427 | " 100.589117 \n",
428 | " 103.632690 \n",
429 | " 95.716420 \n",
430 | " 90.993393 \n",
431 | " 101.365609 \n",
432 | " 100.186463 \n",
433 | " \n",
434 | " \n",
435 | " min \n",
436 | " 0.000000 \n",
437 | " 0.000000 \n",
438 | " 0.000000 \n",
439 | " 0.000000 \n",
440 | " 0.000000 \n",
441 | " 0.000000 \n",
442 | " 0.000000 \n",
443 | " 0.000000 \n",
444 | " 0.000000 \n",
445 | " \n",
446 | " \n",
447 | " 25% \n",
448 | " 66.876877 \n",
449 | " 0.000000 \n",
450 | " 0.000000 \n",
451 | " 0.000000 \n",
452 | " 0.000000 \n",
453 | " 0.000000 \n",
454 | " 0.000000 \n",
455 | " 0.000000 \n",
456 | " 0.000000 \n",
457 | " \n",
458 | " \n",
459 | " 50% \n",
460 | " 122.650150 \n",
461 | " 126.096096 \n",
462 | " 115.503003 \n",
463 | " 0.000000 \n",
464 | " 130.818318 \n",
465 | " 127.755255 \n",
466 | " 129.542042 \n",
467 | " 0.000000 \n",
468 | " 0.000000 \n",
469 | " \n",
470 | " \n",
471 | " 75% \n",
472 | " 190.548048 \n",
473 | " 192.462462 \n",
474 | " 193.611111 \n",
475 | " 199.099099 \n",
476 | " 190.675676 \n",
477 | " 193.355856 \n",
478 | " 190.165165 \n",
479 | " 192.207207 \n",
480 | " 190.675676 \n",
481 | " \n",
482 | " \n",
483 | " max \n",
484 | " 255.000000 \n",
485 | " 255.000000 \n",
486 | " 255.000000 \n",
487 | " 255.000000 \n",
488 | " 255.000000 \n",
489 | " 255.000000 \n",
490 | " 255.000000 \n",
491 | " 255.000000 \n",
492 | " 255.000000 \n",
493 | " \n",
494 | " \n",
495 | "
\n",
496 | "
"
497 | ],
498 | "text/plain": [
499 | " CAN ID DATA[0] DATA[1] DATA[2] \\\n",
500 | "count 818440.000000 818440.000000 818440.000000 818440.000000 \n",
501 | "mean 127.458603 113.635407 108.055500 89.524039 \n",
502 | "std 73.780402 89.993275 93.448831 100.589117 \n",
503 | "min 0.000000 0.000000 0.000000 0.000000 \n",
504 | "25% 66.876877 0.000000 0.000000 0.000000 \n",
505 | "50% 122.650150 126.096096 115.503003 0.000000 \n",
506 | "75% 190.548048 192.462462 193.611111 199.099099 \n",
507 | "max 255.000000 255.000000 255.000000 255.000000 \n",
508 | "\n",
509 | " DATA[3] DATA[4] DATA[5] DATA[6] \\\n",
510 | "count 818440.000000 818440.000000 818440.000000 818440.000000 \n",
511 | "mean 109.930495 105.682464 112.273096 84.945440 \n",
512 | "std 103.632690 95.716420 90.993393 101.365609 \n",
513 | "min 0.000000 0.000000 0.000000 0.000000 \n",
514 | "25% 0.000000 0.000000 0.000000 0.000000 \n",
515 | "50% 130.818318 127.755255 129.542042 0.000000 \n",
516 | "75% 190.675676 193.355856 190.165165 192.207207 \n",
517 | "max 255.000000 255.000000 255.000000 255.000000 \n",
518 | "\n",
519 | " DATA[7] \n",
520 | "count 818440.000000 \n",
521 | "mean 93.094805 \n",
522 | "std 100.186463 \n",
523 | "min 0.000000 \n",
524 | "25% 0.000000 \n",
525 | "50% 0.000000 \n",
526 | "75% 190.675676 \n",
527 | "max 255.000000 "
528 | ]
529 | },
530 | "execution_count": 8,
531 | "metadata": {},
532 | "output_type": "execute_result"
533 | }
534 | ],
535 | "source": [
536 | "df.describe()"
537 | ]
538 | },
539 | {
540 | "cell_type": "markdown",
541 | "metadata": {
542 | "collapsed": true
543 | },
544 | "source": [
545 | "All features are in the same scale of [0,255]"
546 | ]
547 | },
548 | {
549 | "cell_type": "markdown",
550 | "metadata": {},
551 | "source": [
552 | "### Generate images for each class"
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": 9,
558 | "metadata": {
559 | "collapsed": true
560 | },
561 | "outputs": [],
562 | "source": [
563 | "df0=df[df['Label']=='R'].drop(['Label'],axis=1)\n",
564 | "df1=df[df['Label']=='RPM'].drop(['Label'],axis=1)\n",
565 | "df2=df[df['Label']=='gear'].drop(['Label'],axis=1)\n",
566 | "df3=df[df['Label']=='DoS'].drop(['Label'],axis=1)\n",
567 | "df4=df[df['Label']=='Fuzzy'].drop(['Label'],axis=1)"
568 | ]
569 | },
570 | {
571 | "cell_type": "code",
572 | "execution_count": 30,
573 | "metadata": {},
574 | "outputs": [],
575 | "source": [
576 | "# Generate 9*9 color images for class 0 (Normal)\n",
577 | "count=0\n",
578 | "ims = []\n",
579 | "\n",
580 | "image_path = \"train/0/\"\n",
581 | "os.makedirs(image_path)\n",
582 | "\n",
583 | "for i in range(0, len(df0)): \n",
584 | " count=count+1\n",
585 | " if count<=27: \n",
586 | " im=df0.iloc[i].values\n",
587 | " ims=np.append(ims,im)\n",
588 | " else:\n",
589 | " ims=np.array(ims).reshape(9,9,3)\n",
590 | " array = np.array(ims, dtype=np.uint8)\n",
591 | " new_image = Image.fromarray(array)\n",
592 | " new_image.save(image_path+str(i)+'.png')\n",
593 | " count=0\n",
594 | " ims = []"
595 | ]
596 | },
597 | {
598 | "cell_type": "code",
599 | "execution_count": 31,
600 | "metadata": {},
601 | "outputs": [],
602 | "source": [
603 | "# Generate 9*9 color images for class 1 (RPM spoofing)\n",
604 | "count=0\n",
605 | "ims = []\n",
606 | "\n",
607 | "image_path = \"train/1/\"\n",
608 | "os.makedirs(image_path)\n",
609 | "\n",
610 | "for i in range(0, len(df1)): \n",
611 | " count=count+1\n",
612 | " if count<=27: \n",
613 | " im=df1.iloc[i].values\n",
614 | " ims=np.append(ims,im)\n",
615 | " else:\n",
616 | " ims=np.array(ims).reshape(9,9,3)\n",
617 | " array = np.array(ims, dtype=np.uint8)\n",
618 | " new_image = Image.fromarray(array)\n",
619 | " new_image.save(image_path+str(i)+'.png')\n",
620 | " count=0\n",
621 | " ims = []"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 33,
627 | "metadata": {},
628 | "outputs": [],
629 | "source": [
630 | "# Generate 9*9 color images for class 2 (Gear spoofing)\n",
631 | "count=0\n",
632 | "ims = []\n",
633 | "\n",
634 | "image_path = \"train/2/\"\n",
635 | "os.makedirs(image_path)\n",
636 | "\n",
637 | "for i in range(0, len(df2)): \n",
638 | " count=count+1\n",
639 | " if count<=27: \n",
640 | " im=df2.iloc[i].values\n",
641 | " ims=np.append(ims,im)\n",
642 | " else:\n",
643 | " ims=np.array(ims).reshape(9,9,3)\n",
644 | " array = np.array(ims, dtype=np.uint8)\n",
645 | " new_image = Image.fromarray(array)\n",
646 | " new_image.save(image_path+str(i)+'.png')\n",
647 | " count=0\n",
648 | " ims = []"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": 34,
654 | "metadata": {
655 | "collapsed": true
656 | },
657 | "outputs": [],
658 | "source": [
659 | "# Generate 9*9 color images for class 3 (DoS attack)\n",
660 | "count=0\n",
661 | "ims = []\n",
662 | "\n",
663 | "image_path = \"train/3/\"\n",
664 | "os.makedirs(image_path)\n",
665 | "\n",
666 | "\n",
667 | "for i in range(0, len(df3)): \n",
668 | " count=count+1\n",
669 | " if count<=27: \n",
670 | " im=df3.iloc[i].values\n",
671 | " ims=np.append(ims,im)\n",
672 | " else:\n",
673 | " ims=np.array(ims).reshape(9,9,3)\n",
674 | " array = np.array(ims, dtype=np.uint8)\n",
675 | " new_image = Image.fromarray(array)\n",
676 | " new_image.save(image_path+str(i)+'.png')\n",
677 | " count=0\n",
678 | " ims = []"
679 | ]
680 | },
681 | {
682 | "cell_type": "code",
683 | "execution_count": 35,
684 | "metadata": {
685 | "collapsed": true
686 | },
687 | "outputs": [],
688 | "source": [
689 | "# Generate 9*9 color images for class 4 (Fuzzy attack)\n",
690 | "count=0\n",
691 | "ims = []\n",
692 | "\n",
693 | "image_path = \"train/4/\"\n",
694 | "os.makedirs(image_path)\n",
695 | "\n",
696 | "\n",
697 | "for i in range(0, len(df4)): \n",
698 | " count=count+1\n",
699 | " if count<=27: \n",
700 | " im=df4.iloc[i].values\n",
701 | " ims=np.append(ims,im)\n",
702 | " else:\n",
703 | " ims=np.array(ims).reshape(9,9,3)\n",
704 | " array = np.array(ims, dtype=np.uint8)\n",
705 | " new_image = Image.fromarray(array)\n",
706 | " new_image.save(image_path+str(i)+'.png')\n",
707 | " count=0\n",
708 | " ims = []"
709 | ]
710 | },
711 | {
712 | "cell_type": "markdown",
713 | "metadata": {},
714 | "source": [
715 | "## Split the training and test set "
716 | ]
717 | },
718 | {
719 | "cell_type": "code",
720 | "execution_count": 56,
721 | "metadata": {},
722 | "outputs": [
723 | {
724 | "name": "stdout",
725 | "output_type": "stream",
726 | "text": [
727 | "29227\n"
728 | ]
729 | }
730 | ],
731 | "source": [
732 | "# Create folders to store images\n",
733 | "Train_Dir='./train/'\n",
734 | "Val_Dir='./test/'\n",
735 | "allimgs=[]\n",
736 | "for subdir in os.listdir(Train_Dir):\n",
737 | " for filename in os.listdir(os.path.join(Train_Dir,subdir)):\n",
738 | " filepath=os.path.join(Train_Dir,subdir,filename)\n",
739 | " allimgs.append(filepath)\n",
740 | "print(len(allimgs)) # Print the total number of images"
741 | ]
742 | },
743 | {
744 | "cell_type": "code",
745 | "execution_count": 58,
746 | "metadata": {},
747 | "outputs": [],
748 | "source": [
749 | "#split a test set from the dataset, train/test size = 80%/20%\n",
750 | "Numbers=len(allimgs)//5 \t#size of test set (20%)\n",
751 | "\n",
752 | "def mymovefile(srcfile,dstfile):\n",
753 | " if not os.path.isfile(srcfile):\n",
754 | " print (\"%s not exist!\"%(srcfile))\n",
755 | " else:\n",
756 | " fpath,fname=os.path.split(dstfile) \n",
757 | " if not os.path.exists(fpath):\n",
758 | " os.makedirs(fpath) \n",
759 | " shutil.move(srcfile,dstfile) \n",
760 | " #print (\"move %s -> %s\"%(srcfile,dstfile))"
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "execution_count": 59,
766 | "metadata": {
767 | "scrolled": true
768 | },
769 | "outputs": [
770 | {
771 | "data": {
772 | "text/plain": [
773 | "5845"
774 | ]
775 | },
776 | "execution_count": 59,
777 | "metadata": {},
778 | "output_type": "execute_result"
779 | }
780 | ],
781 | "source": [
782 | "# The size of test set\n",
783 | "Numbers"
784 | ]
785 | },
786 | {
787 | "cell_type": "code",
788 | "execution_count": 60,
789 | "metadata": {},
790 | "outputs": [
791 | {
792 | "name": "stdout",
793 | "output_type": "stream",
794 | "text": [
795 | "Finish creating test set\n"
796 | ]
797 | }
798 | ],
799 | "source": [
800 | "# Create the test set\n",
801 | "val_imgs=random.sample(allimgs,Numbers)\n",
802 | "for img in val_imgs:\n",
803 | " dest_path=img.replace(Train_Dir,Val_Dir)\n",
804 | " mymovefile(img,dest_path)\n",
805 | "print('Finish creating test set')"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": 61,
811 | "metadata": {
812 | "collapsed": true
813 | },
814 | "outputs": [],
815 | "source": [
816 | "#resize the images 224*224 for better CNN training\n",
817 | "def get_224(folder,dstdir):\n",
818 | " imgfilepaths=[]\n",
819 | " for root,dirs,imgs in os.walk(folder):\n",
820 | " for thisimg in imgs:\n",
821 | " thisimg_path=os.path.join(root,thisimg)\n",
822 | " imgfilepaths.append(thisimg_path)\n",
823 | " for thisimg_path in imgfilepaths:\n",
824 | " dir_name,filename=os.path.split(thisimg_path)\n",
825 | " dir_name=dir_name.replace(folder,dstdir)\n",
826 | " new_file_path=os.path.join(dir_name,filename)\n",
827 | " if not os.path.exists(dir_name):\n",
828 | " os.makedirs(dir_name)\n",
829 | " img=cv2.imread(thisimg_path)\n",
830 | " img=cv2.resize(img,(224,224))\n",
831 | " cv2.imwrite(new_file_path,img)\n",
832 | " print('Finish resizing'.format(folder=folder))"
833 | ]
834 | },
835 | {
836 | "cell_type": "code",
837 | "execution_count": 62,
838 | "metadata": {},
839 | "outputs": [
840 | {
841 | "name": "stdout",
842 | "output_type": "stream",
843 | "text": [
844 | "Finish resizing\n"
845 | ]
846 | }
847 | ],
848 | "source": [
849 | "DATA_DIR_224='./train_224/'\n",
850 | "get_224(folder='./train/',dstdir=DATA_DIR_224)"
851 | ]
852 | },
853 | {
854 | "cell_type": "code",
855 | "execution_count": 63,
856 | "metadata": {},
857 | "outputs": [
858 | {
859 | "name": "stdout",
860 | "output_type": "stream",
861 | "text": [
862 | "Finish resizing\n"
863 | ]
864 | }
865 | ],
866 | "source": [
867 | "DATA_DIR2_224='./test_224/'\n",
868 | "get_224(folder='./test/',dstdir=DATA_DIR2_224)"
869 | ]
870 | },
871 | {
872 | "cell_type": "markdown",
873 | "metadata": {},
874 | "source": [
875 | "### Display samples for each category"
876 | ]
877 | },
878 | {
879 | "cell_type": "code",
880 | "execution_count": 2,
881 | "metadata": {},
882 | "outputs": [
883 | {
884 | "data": {
885 | "image/png": "\n",
886 | "text/plain": [
887 | ""
888 | ]
889 | },
890 | "metadata": {
891 | "needs_background": "light"
892 | },
893 | "output_type": "display_data"
894 | }
895 | ],
896 | "source": [
897 | "# Read the images for each category, the file name may vary (27.png, 83.png...)\n",
898 | "img1 = Image.open('./train_224/0/27.png')\n",
899 | "img2 = Image.open('./train_224/1/83.png')\n",
900 | "img3 = Image.open('./train_224/2/27.png')\n",
901 | "img4 = Image.open('./train_224/3/27.png')\n",
902 | "img5 = Image.open('./train_224/4/27.png')\n",
903 | "\n",
904 | "plt.figure(figsize=(10, 10)) \n",
905 | "plt.subplot(1,5,1)\n",
906 | "plt.imshow(img1)\n",
907 | "plt.title(\"Normal\")\n",
908 | "plt.subplot(1,5,2)\n",
909 | "plt.imshow(img2)\n",
910 | "plt.title(\"RPM Spoofing\")\n",
911 | "plt.subplot(1,5,3)\n",
912 | "plt.imshow(img3)\n",
913 | "plt.title(\"Gear Spoofing\")\n",
914 | "plt.subplot(1,5,4)\n",
915 | "plt.imshow(img4)\n",
916 | "plt.title(\"DoS Attack\")\n",
917 | "plt.subplot(1,5,5)\n",
918 | "plt.imshow(img5)\n",
919 | "plt.title(\"Fuzzy Attack\")\n",
920 | "plt.show() # display it"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "execution_count": null,
926 | "metadata": {},
927 | "outputs": [],
928 | "source": []
929 | }
930 | ],
931 | "metadata": {
932 | "anaconda-cloud": {},
933 | "kernelspec": {
934 | "display_name": "Python 3",
935 | "language": "python",
936 | "name": "python3"
937 | },
938 | "language_info": {
939 | "codemirror_mode": {
940 | "name": "ipython",
941 | "version": 3
942 | },
943 | "file_extension": ".py",
944 | "mimetype": "text/x-python",
945 | "name": "python",
946 | "nbconvert_exporter": "python",
947 | "pygments_lexer": "ipython3",
948 | "version": "3.6.8"
949 | }
950 | },
951 | "nbformat": 4,
952 | "nbformat_minor": 2
953 | }
954 |
--------------------------------------------------------------------------------
/CAN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/c2aacb76cc184dc1ea29f2c6b97e5bbde8221f71/CAN.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Western OC2 Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Paper_2201.11812.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/c2aacb76cc184dc1ea29f2c6b97e5bbde8221f71/Paper_2201.11812.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Intrusion-Detection-System-Using-CNN-and-Transfer-Learning
2 |
3 | This is the code for the paper entitled "**[A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles](https://arxiv.org/pdf/2201.11812.pdf)**" published in **IEEE International Conference on Communications (IEEE ICC)**, doi: [10.1109/ICC45855.2022.9838780](https://ieeexplore.ieee.org/document/9838780).
4 | - Authors: Li Yang and Abdallah Shami
5 | - Organization: The Optimized Computing and Communications (OC2) Lab, ECE Department, Western University
6 |
7 | This repository introduces how to use **convolutional neural networks (CNNs)** and **transfer learning** techniques to develop **intrusion detection systems**. **Ensemble learning** and **hyperparameter optimization techniques** are also used to achieve optimized model performance.
8 |
9 | - Another **intrusion detection system development code** using **decision tree-based machine learning algorithms (Decision tree, random forest, XGBoost, stacking, etc.)** can be found in: [Intrusion-Detection-System-Using-Machine-Learning](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-Machine-Learning)
10 |
11 | - A comprehensive **hyperparameter optimization** tutorial code can be found in: [Hyperparameter-Optimization-of-Machine-Learning-Algorithms](https://github.com/LiYangHart/Hyperparameter-Optimization-of-Machine-Learning-Algorithms)
12 |
13 | ## Abstract of The Paper
14 | Modern vehicles, including autonomous vehicles and connected vehicles, are increasingly connected to the external world, which enables various functionalities and services. However, the improving connectivity also increases the attack surfaces of the Internet of Vehicles (IoV), causing its vulnerabilities to cyber-threats. Due to the lack of authentication and encryption procedures in vehicular networks, Intrusion Detection Systems (IDSs) are essential approaches to protect modern vehicle systems from network attacks. In this paper, a transfer learning and ensemble learning-based IDS is proposed for IoV systems using convolutional neural networks (CNNs) and hyper-parameter optimization techniques. In the experiments, the proposed IDS has demonstrated over 99.25% detection rates and F1-scores on two well-known public benchmark IoV security datasets: the Car-Hacking dataset and the CICIDS2017 dataset. This shows the effectiveness of the proposed IDS for cyber-attack detection in both intra-vehicle and external vehicular networks.
15 |
16 |
17 |
18 |
19 |
20 |
21 | ## Implementation
22 | ### CNN Models
23 | * VGG16
24 | * VGG19
25 | * Xception
26 | * Inception
27 | * Resnet
28 | * InceptionResnet
29 |
30 | ### Ensemble Learning Models
31 | * Bagging
32 | * Probability Averaging
33 | * Concatenation
34 |
35 | ### Hyperparameter Optimization Methods
36 | * Random Search (RS)
37 | * Bayesian Optimization - Tree Parzen Estimator(BO-TPE)
38 |
39 | ### Dataset
40 | 1. CAN-intrusion/Car-Hacking dataset, a benchmark network security dataset for intra-vehicle intrusion detection
41 | * Publicly available at: https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset
42 | * Can be processed using the same code
43 |
44 | 2. CICIDS2017 dataset, a popular network traffic dataset for intrusion detection problems
45 | * Publicly available at: https://www.unb.ca/cic/datasets/ids-2017.html
46 |
47 | For the purpose of displaying the experimental results in Jupyter Notebook, the sampled subset of the CAN-intrusion dataset is used in the sample code. The subsets are in the "[data](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/tree/main/data)" folder.
48 |
49 | ### Code
50 | * [1-Data_pre-processing_CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/1-Data_pre-processing_CAN.ipynb): code for data pre-processing and transformation (tabular data to images).
51 | * [2-CNN_Model_Development&Hyperparameter Optimization.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/2-CNN_Model_Development%26Hyperparameter%20Optimization.ipynb): code for the development and CNN models and their hyperparameter optimization.
52 | * [3-Ensemble_Models-CAN.ipynb](https://github.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/blob/main/3-Ensemble_Models-CAN.ipynb): code for the construction of three ensemble learning techniques.
53 |
54 | Libraries
55 | * Python 3.5+
56 | * [Keras 2.1.0+](hhttps://keras.io/)
57 | * [Tensorflow 1.10.0+](https://www.tensorflow.org/install/gpu)
58 | * [OpenCV-python](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html)
59 | * [hyperopt](https://github.com/hyperopt/hyperopt)
60 |
61 | ## Contact-Info
62 | Please feel free to contact us for any questions or cooperation opportunities. We will be happy to help.
63 | * Email: [liyanghart@gmail.com](mailto:liyanghart@gmail.com) or [Abdallah.Shami@uwo.ca](mailto:Abdallah.Shami@uwo.ca)
64 | * GitHub: [LiYangHart](https://github.com/LiYangHart) and [Western OC2 Lab](https://github.com/Western-OC2-Lab/)
65 | * LinkedIn: [Li Yang](https://www.linkedin.com/in/li-yang-phd-65a190176/)
66 | * Google Scholar: [Li Yang](https://scholar.google.com.eg/citations?user=XEfM7bIAAAAJ&hl=en) and [OC2 Lab](https://scholar.google.com.eg/citations?user=oiebNboAAAAJ&hl=en)
67 |
68 | ## Citation
69 | If you find this repository useful in your research, please cite this article as:
70 |
71 | L. Yang and A. Shami, "A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles," ICC 2022 - IEEE International Conference on Communications, 2022, pp. 2774-2779, doi: 10.1109/ICC45855.2022.9838780.
72 |
73 | ```
74 | @INPROCEEDINGS{9838780,
75 | author={Yang, Li and Shami, Abdallah},
76 | booktitle={ICC 2022 - IEEE International Conference on Communications},
77 | title={A Transfer Learning and Optimized CNN Based Intrusion Detection System for Internet of Vehicles},
78 | year={2022},
79 | pages={2774-2779},
80 | doi={10.1109/ICC45855.2022.9838780}}
81 | ```
82 |
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # The sampled datasets used for the experiments in the sample code
2 |
3 | **Car_Hacking_5%.csv**: The 5% randomly sampled subset of the [Car Hacking dataset](https://ocslab.hksecurity.net/Datasets/CAN-intrusion-dataset)
4 |
--------------------------------------------------------------------------------
/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Western-OC2-Lab/Intrusion-Detection-System-Using-CNN-and-Transfer-Learning/c2aacb76cc184dc1ea29f2c6b97e5bbde8221f71/framework.png
--------------------------------------------------------------------------------
/supplementary_code/CAR_IDS_SVC.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "CAR_IDS_LOGISTIC_SVM (1).ipynb",
7 | "provenance": [],
8 | "collapsed_sections": []
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "id": "lGiyP2dR6Jw-"
24 | },
25 | "outputs": [],
26 | "source": [
27 | "import numpy as np\n",
28 | "import pandas as pd\n",
29 | "from sklearn.linear_model import LogisticRegression\n",
30 | "from sklearn.ensemble import RandomForestClassifier"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "source": [
36 | "def changecolumn(dataset, AttackType):\n",
37 | " df = pd.read_csv(dataset).sample(frac = 0.1, random_state = 20, replace = False).reset_index(drop=True)\n",
38 | " df.columns = [\"Timestamp\", \"CAN ID\", \"Byte\", \"DATA[0]\",\"DATA[1]\",\"DATA[2]\",\"DATA[3]\",\"DATA[4]\",\"DATA[5]\",\"DATA[6]\",\"DATA[7]\",\"AttackType\"]\n",
39 | " df['AttackType'] = np.where(df['AttackType'] == 'T',AttackType, 'Normal Message')\n",
40 | " df.dropna()\n",
41 | " return df\n",
42 | "\n",
43 | "dfDos = changecolumn('DoS_dataset.csv','DoS Attack')\n",
44 | "dfFuzzy = changecolumn('Fuzzy_dataset.csv','Fuzzy Attack')\n",
45 | "dfGear = changecolumn('gear_dataset.csv','Gear Spooing Attack')\n",
46 | "dfRPM = changecolumn('RPM_dataset.csv','RPM Spoofing Attack')\n",
47 | "frames = [dfDos, dfFuzzy, dfGear, dfRPM]\n",
48 | "df = pd.concat(frames)\n",
49 | "print(df.head(10))\n",
50 | "print(df.shape)\n"
51 | ],
52 | "metadata": {
53 | "colab": {
54 | "base_uri": "https://localhost:8080/"
55 | },
56 | "id": "K4qXdKi-756E",
57 | "outputId": "fcf9fee4-8ccb-48a1-99bc-401628828b0f"
58 | },
59 | "execution_count": 2,
60 | "outputs": [
61 | {
62 | "output_type": "stream",
63 | "name": "stdout",
64 | "text": [
65 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] \\\n",
66 | "0 1.478200e+09 0000 8 00 00 00 00 00 00 \n",
67 | "1 1.478201e+09 0131 8 1b 80 00 00 3f 7f \n",
68 | "2 1.478199e+09 00a1 8 80 89 00 00 24 00 \n",
69 | "3 1.478200e+09 0260 8 18 21 22 30 08 8f \n",
70 | "4 1.478201e+09 02c0 8 14 00 00 00 00 00 \n",
71 | "5 1.478200e+09 0130 8 0b 80 00 ff 08 80 \n",
72 | "6 1.478200e+09 0370 8 00 20 00 00 00 00 \n",
73 | "7 1.478199e+09 04f0 8 00 00 00 80 00 69 \n",
74 | "8 1.478199e+09 0130 8 05 80 00 ff 0b 80 \n",
75 | "9 1.478198e+09 0131 8 f7 7f 00 00 4c 7f \n",
76 | "\n",
77 | " DATA[6] DATA[7] AttackType \n",
78 | "0 00 00 DoS Attack \n",
79 | "1 0e a6 Normal Message \n",
80 | "2 00 00 Normal Message \n",
81 | "3 70 05 Normal Message \n",
82 | "4 00 00 Normal Message \n",
83 | "5 04 88 Normal Message \n",
84 | "6 00 00 Normal Message \n",
85 | "7 d1 13 Normal Message \n",
86 | "8 0c ed Normal Message \n",
87 | "9 0d e7 Normal Message \n",
88 | "(1656947, 12)\n"
89 | ]
90 | }
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "source": [
96 | "print(df.dtypes)\n",
97 | "df = df.dropna()\n",
98 | "def changecolumntype(df):\n",
99 | " for column in df[['CAN ID', 'DATA[0]', 'DATA[1]', 'DATA[2]', 'DATA[3]', 'DATA[4]', 'DATA[5]', 'DATA[6]', 'DATA[7]']]:\n",
100 | " df[column] = df[column].apply(lambda x: int(str(x), base=16))\n",
101 | " return df\n",
102 | "\n",
103 | "df = changecolumntype(df)\n",
104 | "print(df.dtypes)\n",
105 | "df.head(10)"
106 | ],
107 | "metadata": {
108 | "colab": {
109 | "base_uri": "https://localhost:8080/",
110 | "height": 1000
111 | },
112 | "id": "XZTb7XOpJhQw",
113 | "outputId": "2755474a-cd7b-4575-cca0-7b7017da8297"
114 | },
115 | "execution_count": 3,
116 | "outputs": [
117 | {
118 | "output_type": "stream",
119 | "name": "stdout",
120 | "text": [
121 | "Timestamp float64\n",
122 | "CAN ID object\n",
123 | "Byte int64\n",
124 | "DATA[0] object\n",
125 | "DATA[1] object\n",
126 | "DATA[2] object\n",
127 | "DATA[3] object\n",
128 | "DATA[4] object\n",
129 | "DATA[5] object\n",
130 | "DATA[6] object\n",
131 | "DATA[7] object\n",
132 | "AttackType object\n",
133 | "dtype: object\n",
134 | "Timestamp float64\n",
135 | "CAN ID int64\n",
136 | "Byte int64\n",
137 | "DATA[0] int64\n",
138 | "DATA[1] int64\n",
139 | "DATA[2] int64\n",
140 | "DATA[3] int64\n",
141 | "DATA[4] int64\n",
142 | "DATA[5] int64\n",
143 | "DATA[6] int64\n",
144 | "DATA[7] int64\n",
145 | "AttackType object\n",
146 | "dtype: object\n"
147 | ]
148 | },
149 | {
150 | "output_type": "execute_result",
151 | "data": {
152 | "text/html": [
153 | "\n",
154 | " \n",
155 | "
\n",
156 | "
\n",
157 | "\n",
170 | "
\n",
171 | " \n",
172 | " \n",
173 | " \n",
174 | " Timestamp \n",
175 | " CAN ID \n",
176 | " Byte \n",
177 | " DATA[0] \n",
178 | " DATA[1] \n",
179 | " DATA[2] \n",
180 | " DATA[3] \n",
181 | " DATA[4] \n",
182 | " DATA[5] \n",
183 | " DATA[6] \n",
184 | " DATA[7] \n",
185 | " AttackType \n",
186 | " \n",
187 | " \n",
188 | " \n",
189 | " \n",
190 | " 0 \n",
191 | " 1.478200e+09 \n",
192 | " 0 \n",
193 | " 8 \n",
194 | " 0 \n",
195 | " 0 \n",
196 | " 0 \n",
197 | " 0 \n",
198 | " 0 \n",
199 | " 0 \n",
200 | " 0 \n",
201 | " 0 \n",
202 | " DoS Attack \n",
203 | " \n",
204 | " \n",
205 | " 1 \n",
206 | " 1.478201e+09 \n",
207 | " 305 \n",
208 | " 8 \n",
209 | " 27 \n",
210 | " 128 \n",
211 | " 0 \n",
212 | " 0 \n",
213 | " 63 \n",
214 | " 127 \n",
215 | " 14 \n",
216 | " 166 \n",
217 | " Normal Message \n",
218 | " \n",
219 | " \n",
220 | " 2 \n",
221 | " 1.478199e+09 \n",
222 | " 161 \n",
223 | " 8 \n",
224 | " 128 \n",
225 | " 137 \n",
226 | " 0 \n",
227 | " 0 \n",
228 | " 36 \n",
229 | " 0 \n",
230 | " 0 \n",
231 | " 0 \n",
232 | " Normal Message \n",
233 | " \n",
234 | " \n",
235 | " 3 \n",
236 | " 1.478200e+09 \n",
237 | " 608 \n",
238 | " 8 \n",
239 | " 24 \n",
240 | " 33 \n",
241 | " 34 \n",
242 | " 48 \n",
243 | " 8 \n",
244 | " 143 \n",
245 | " 112 \n",
246 | " 5 \n",
247 | " Normal Message \n",
248 | " \n",
249 | " \n",
250 | " 4 \n",
251 | " 1.478201e+09 \n",
252 | " 704 \n",
253 | " 8 \n",
254 | " 20 \n",
255 | " 0 \n",
256 | " 0 \n",
257 | " 0 \n",
258 | " 0 \n",
259 | " 0 \n",
260 | " 0 \n",
261 | " 0 \n",
262 | " Normal Message \n",
263 | " \n",
264 | " \n",
265 | " 5 \n",
266 | " 1.478200e+09 \n",
267 | " 304 \n",
268 | " 8 \n",
269 | " 11 \n",
270 | " 128 \n",
271 | " 0 \n",
272 | " 255 \n",
273 | " 8 \n",
274 | " 128 \n",
275 | " 4 \n",
276 | " 136 \n",
277 | " Normal Message \n",
278 | " \n",
279 | " \n",
280 | " 6 \n",
281 | " 1.478200e+09 \n",
282 | " 880 \n",
283 | " 8 \n",
284 | " 0 \n",
285 | " 32 \n",
286 | " 0 \n",
287 | " 0 \n",
288 | " 0 \n",
289 | " 0 \n",
290 | " 0 \n",
291 | " 0 \n",
292 | " Normal Message \n",
293 | " \n",
294 | " \n",
295 | " 7 \n",
296 | " 1.478199e+09 \n",
297 | " 1264 \n",
298 | " 8 \n",
299 | " 0 \n",
300 | " 0 \n",
301 | " 0 \n",
302 | " 128 \n",
303 | " 0 \n",
304 | " 105 \n",
305 | " 209 \n",
306 | " 19 \n",
307 | " Normal Message \n",
308 | " \n",
309 | " \n",
310 | " 8 \n",
311 | " 1.478199e+09 \n",
312 | " 304 \n",
313 | " 8 \n",
314 | " 5 \n",
315 | " 128 \n",
316 | " 0 \n",
317 | " 255 \n",
318 | " 11 \n",
319 | " 128 \n",
320 | " 12 \n",
321 | " 237 \n",
322 | " Normal Message \n",
323 | " \n",
324 | " \n",
325 | " 9 \n",
326 | " 1.478198e+09 \n",
327 | " 305 \n",
328 | " 8 \n",
329 | " 247 \n",
330 | " 127 \n",
331 | " 0 \n",
332 | " 0 \n",
333 | " 76 \n",
334 | " 127 \n",
335 | " 13 \n",
336 | " 231 \n",
337 | " Normal Message \n",
338 | " \n",
339 | " \n",
340 | "
\n",
341 | "
\n",
342 | "
\n",
345 | " \n",
346 | " \n",
348 | " \n",
349 | " \n",
350 | " \n",
351 | " \n",
352 | " \n",
353 | " \n",
390 | "\n",
391 | " \n",
415 | "
\n",
416 | "
\n",
417 | " "
418 | ],
419 | "text/plain": [
420 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] \\\n",
421 | "0 1.478200e+09 0 8 0 0 0 0 0 \n",
422 | "1 1.478201e+09 305 8 27 128 0 0 63 \n",
423 | "2 1.478199e+09 161 8 128 137 0 0 36 \n",
424 | "3 1.478200e+09 608 8 24 33 34 48 8 \n",
425 | "4 1.478201e+09 704 8 20 0 0 0 0 \n",
426 | "5 1.478200e+09 304 8 11 128 0 255 8 \n",
427 | "6 1.478200e+09 880 8 0 32 0 0 0 \n",
428 | "7 1.478199e+09 1264 8 0 0 0 128 0 \n",
429 | "8 1.478199e+09 304 8 5 128 0 255 11 \n",
430 | "9 1.478198e+09 305 8 247 127 0 0 76 \n",
431 | "\n",
432 | " DATA[5] DATA[6] DATA[7] AttackType \n",
433 | "0 0 0 0 DoS Attack \n",
434 | "1 127 14 166 Normal Message \n",
435 | "2 0 0 0 Normal Message \n",
436 | "3 143 112 5 Normal Message \n",
437 | "4 0 0 0 Normal Message \n",
438 | "5 128 4 136 Normal Message \n",
439 | "6 0 0 0 Normal Message \n",
440 | "7 105 209 19 Normal Message \n",
441 | "8 128 12 237 Normal Message \n",
442 | "9 127 13 231 Normal Message "
443 | ]
444 | },
445 | "metadata": {},
446 | "execution_count": 3
447 | }
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "source": [
453 | "df['Message'] = df.iloc[:,3:11].apply(lambda x: ''.join(x.astype(str)), axis = 1)\n",
454 | "df.head(10)"
455 | ],
456 | "metadata": {
457 | "colab": {
458 | "base_uri": "https://localhost:8080/",
459 | "height": 601
460 | },
461 | "id": "ym4-oGjemqFD",
462 | "outputId": "2dc335e5-788b-41ff-d48c-1a2602f67391"
463 | },
464 | "execution_count": 4,
465 | "outputs": [
466 | {
467 | "output_type": "execute_result",
468 | "data": {
469 | "text/html": [
470 | "\n",
471 | " \n",
472 | "
\n",
473 | "
\n",
474 | "\n",
487 | "
\n",
488 | " \n",
489 | " \n",
490 | " \n",
491 | " Timestamp \n",
492 | " CAN ID \n",
493 | " Byte \n",
494 | " DATA[0] \n",
495 | " DATA[1] \n",
496 | " DATA[2] \n",
497 | " DATA[3] \n",
498 | " DATA[4] \n",
499 | " DATA[5] \n",
500 | " DATA[6] \n",
501 | " DATA[7] \n",
502 | " AttackType \n",
503 | " Message \n",
504 | " \n",
505 | " \n",
506 | " \n",
507 | " \n",
508 | " 0 \n",
509 | " 1.478200e+09 \n",
510 | " 0 \n",
511 | " 8 \n",
512 | " 0 \n",
513 | " 0 \n",
514 | " 0 \n",
515 | " 0 \n",
516 | " 0 \n",
517 | " 0 \n",
518 | " 0 \n",
519 | " 0 \n",
520 | " DoS Attack \n",
521 | " 00000000 \n",
522 | " \n",
523 | " \n",
524 | " 1 \n",
525 | " 1.478201e+09 \n",
526 | " 305 \n",
527 | " 8 \n",
528 | " 27 \n",
529 | " 128 \n",
530 | " 0 \n",
531 | " 0 \n",
532 | " 63 \n",
533 | " 127 \n",
534 | " 14 \n",
535 | " 166 \n",
536 | " Normal Message \n",
537 | " 27128006312714166 \n",
538 | " \n",
539 | " \n",
540 | " 2 \n",
541 | " 1.478199e+09 \n",
542 | " 161 \n",
543 | " 8 \n",
544 | " 128 \n",
545 | " 137 \n",
546 | " 0 \n",
547 | " 0 \n",
548 | " 36 \n",
549 | " 0 \n",
550 | " 0 \n",
551 | " 0 \n",
552 | " Normal Message \n",
553 | " 1281370036000 \n",
554 | " \n",
555 | " \n",
556 | " 3 \n",
557 | " 1.478200e+09 \n",
558 | " 608 \n",
559 | " 8 \n",
560 | " 24 \n",
561 | " 33 \n",
562 | " 34 \n",
563 | " 48 \n",
564 | " 8 \n",
565 | " 143 \n",
566 | " 112 \n",
567 | " 5 \n",
568 | " Normal Message \n",
569 | " 2433344881431125 \n",
570 | " \n",
571 | " \n",
572 | " 4 \n",
573 | " 1.478201e+09 \n",
574 | " 704 \n",
575 | " 8 \n",
576 | " 20 \n",
577 | " 0 \n",
578 | " 0 \n",
579 | " 0 \n",
580 | " 0 \n",
581 | " 0 \n",
582 | " 0 \n",
583 | " 0 \n",
584 | " Normal Message \n",
585 | " 200000000 \n",
586 | " \n",
587 | " \n",
588 | " 5 \n",
589 | " 1.478200e+09 \n",
590 | " 304 \n",
591 | " 8 \n",
592 | " 11 \n",
593 | " 128 \n",
594 | " 0 \n",
595 | " 255 \n",
596 | " 8 \n",
597 | " 128 \n",
598 | " 4 \n",
599 | " 136 \n",
600 | " Normal Message \n",
601 | " 11128025581284136 \n",
602 | " \n",
603 | " \n",
604 | " 6 \n",
605 | " 1.478200e+09 \n",
606 | " 880 \n",
607 | " 8 \n",
608 | " 0 \n",
609 | " 32 \n",
610 | " 0 \n",
611 | " 0 \n",
612 | " 0 \n",
613 | " 0 \n",
614 | " 0 \n",
615 | " 0 \n",
616 | " Normal Message \n",
617 | " 032000000 \n",
618 | " \n",
619 | " \n",
620 | " 7 \n",
621 | " 1.478199e+09 \n",
622 | " 1264 \n",
623 | " 8 \n",
624 | " 0 \n",
625 | " 0 \n",
626 | " 0 \n",
627 | " 128 \n",
628 | " 0 \n",
629 | " 105 \n",
630 | " 209 \n",
631 | " 19 \n",
632 | " Normal Message \n",
633 | " 000128010520919 \n",
634 | " \n",
635 | " \n",
636 | " 8 \n",
637 | " 1.478199e+09 \n",
638 | " 304 \n",
639 | " 8 \n",
640 | " 5 \n",
641 | " 128 \n",
642 | " 0 \n",
643 | " 255 \n",
644 | " 11 \n",
645 | " 128 \n",
646 | " 12 \n",
647 | " 237 \n",
648 | " Normal Message \n",
649 | " 512802551112812237 \n",
650 | " \n",
651 | " \n",
652 | " 9 \n",
653 | " 1.478198e+09 \n",
654 | " 305 \n",
655 | " 8 \n",
656 | " 247 \n",
657 | " 127 \n",
658 | " 0 \n",
659 | " 0 \n",
660 | " 76 \n",
661 | " 127 \n",
662 | " 13 \n",
663 | " 231 \n",
664 | " Normal Message \n",
665 | " 247127007612713231 \n",
666 | " \n",
667 | " \n",
668 | "
\n",
669 | "
\n",
670 | "
\n",
673 | " \n",
674 | " \n",
676 | " \n",
677 | " \n",
678 | " \n",
679 | " \n",
680 | " \n",
681 | " \n",
718 | "\n",
719 | " \n",
743 | "
\n",
744 | "
\n",
745 | " "
746 | ],
747 | "text/plain": [
748 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] \\\n",
749 | "0 1.478200e+09 0 8 0 0 0 0 0 \n",
750 | "1 1.478201e+09 305 8 27 128 0 0 63 \n",
751 | "2 1.478199e+09 161 8 128 137 0 0 36 \n",
752 | "3 1.478200e+09 608 8 24 33 34 48 8 \n",
753 | "4 1.478201e+09 704 8 20 0 0 0 0 \n",
754 | "5 1.478200e+09 304 8 11 128 0 255 8 \n",
755 | "6 1.478200e+09 880 8 0 32 0 0 0 \n",
756 | "7 1.478199e+09 1264 8 0 0 0 128 0 \n",
757 | "8 1.478199e+09 304 8 5 128 0 255 11 \n",
758 | "9 1.478198e+09 305 8 247 127 0 0 76 \n",
759 | "\n",
760 | " DATA[5] DATA[6] DATA[7] AttackType Message \n",
761 | "0 0 0 0 DoS Attack 00000000 \n",
762 | "1 127 14 166 Normal Message 27128006312714166 \n",
763 | "2 0 0 0 Normal Message 1281370036000 \n",
764 | "3 143 112 5 Normal Message 2433344881431125 \n",
765 | "4 0 0 0 Normal Message 200000000 \n",
766 | "5 128 4 136 Normal Message 11128025581284136 \n",
767 | "6 0 0 0 Normal Message 032000000 \n",
768 | "7 105 209 19 Normal Message 000128010520919 \n",
769 | "8 128 12 237 Normal Message 512802551112812237 \n",
770 | "9 127 13 231 Normal Message 247127007612713231 "
771 | ]
772 | },
773 | "metadata": {},
774 | "execution_count": 4
775 | }
776 | ]
777 | },
778 | {
779 | "cell_type": "code",
780 | "source": [
781 | "#df['Message'] = df['Message'].map(lambda x: int(x))\n",
782 | "df['Message'] = df['Message'].astype(float)\n",
783 | "df.head(10)"
784 | ],
785 | "metadata": {
786 | "colab": {
787 | "base_uri": "https://localhost:8080/",
788 | "height": 601
789 | },
790 | "id": "fUPHSmKPAP6_",
791 | "outputId": "4d196f89-62c4-4c91-eb66-cb3a8172af9f"
792 | },
793 | "execution_count": 5,
794 | "outputs": [
795 | {
796 | "output_type": "execute_result",
797 | "data": {
798 | "text/html": [
799 | "\n",
800 | " \n",
801 | "
\n",
802 | "
\n",
803 | "\n",
816 | "
\n",
817 | " \n",
818 | " \n",
819 | " \n",
820 | " Timestamp \n",
821 | " CAN ID \n",
822 | " Byte \n",
823 | " DATA[0] \n",
824 | " DATA[1] \n",
825 | " DATA[2] \n",
826 | " DATA[3] \n",
827 | " DATA[4] \n",
828 | " DATA[5] \n",
829 | " DATA[6] \n",
830 | " DATA[7] \n",
831 | " AttackType \n",
832 | " Message \n",
833 | " \n",
834 | " \n",
835 | " \n",
836 | " \n",
837 | " 0 \n",
838 | " 1.478200e+09 \n",
839 | " 0 \n",
840 | " 8 \n",
841 | " 0 \n",
842 | " 0 \n",
843 | " 0 \n",
844 | " 0 \n",
845 | " 0 \n",
846 | " 0 \n",
847 | " 0 \n",
848 | " 0 \n",
849 | " DoS Attack \n",
850 | " 0.000000e+00 \n",
851 | " \n",
852 | " \n",
853 | " 1 \n",
854 | " 1.478201e+09 \n",
855 | " 305 \n",
856 | " 8 \n",
857 | " 27 \n",
858 | " 128 \n",
859 | " 0 \n",
860 | " 0 \n",
861 | " 63 \n",
862 | " 127 \n",
863 | " 14 \n",
864 | " 166 \n",
865 | " Normal Message \n",
866 | " 2.712801e+16 \n",
867 | " \n",
868 | " \n",
869 | " 2 \n",
870 | " 1.478199e+09 \n",
871 | " 161 \n",
872 | " 8 \n",
873 | " 128 \n",
874 | " 137 \n",
875 | " 0 \n",
876 | " 0 \n",
877 | " 36 \n",
878 | " 0 \n",
879 | " 0 \n",
880 | " 0 \n",
881 | " Normal Message \n",
882 | " 1.281370e+12 \n",
883 | " \n",
884 | " \n",
885 | " 3 \n",
886 | " 1.478200e+09 \n",
887 | " 608 \n",
888 | " 8 \n",
889 | " 24 \n",
890 | " 33 \n",
891 | " 34 \n",
892 | " 48 \n",
893 | " 8 \n",
894 | " 143 \n",
895 | " 112 \n",
896 | " 5 \n",
897 | " Normal Message \n",
898 | " 2.433345e+15 \n",
899 | " \n",
900 | " \n",
901 | " 4 \n",
902 | " 1.478201e+09 \n",
903 | " 704 \n",
904 | " 8 \n",
905 | " 20 \n",
906 | " 0 \n",
907 | " 0 \n",
908 | " 0 \n",
909 | " 0 \n",
910 | " 0 \n",
911 | " 0 \n",
912 | " 0 \n",
913 | " Normal Message \n",
914 | " 2.000000e+08 \n",
915 | " \n",
916 | " \n",
917 | " 5 \n",
918 | " 1.478200e+09 \n",
919 | " 304 \n",
920 | " 8 \n",
921 | " 11 \n",
922 | " 128 \n",
923 | " 0 \n",
924 | " 255 \n",
925 | " 8 \n",
926 | " 128 \n",
927 | " 4 \n",
928 | " 136 \n",
929 | " Normal Message \n",
930 | " 1.112803e+16 \n",
931 | " \n",
932 | " \n",
933 | " 6 \n",
934 | " 1.478200e+09 \n",
935 | " 880 \n",
936 | " 8 \n",
937 | " 0 \n",
938 | " 32 \n",
939 | " 0 \n",
940 | " 0 \n",
941 | " 0 \n",
942 | " 0 \n",
943 | " 0 \n",
944 | " 0 \n",
945 | " Normal Message \n",
946 | " 3.200000e+07 \n",
947 | " \n",
948 | " \n",
949 | " 7 \n",
950 | " 1.478199e+09 \n",
951 | " 1264 \n",
952 | " 8 \n",
953 | " 0 \n",
954 | " 0 \n",
955 | " 0 \n",
956 | " 128 \n",
957 | " 0 \n",
958 | " 105 \n",
959 | " 209 \n",
960 | " 19 \n",
961 | " Normal Message \n",
962 | " 1.280105e+11 \n",
963 | " \n",
964 | " \n",
965 | " 8 \n",
966 | " 1.478199e+09 \n",
967 | " 304 \n",
968 | " 8 \n",
969 | " 5 \n",
970 | " 128 \n",
971 | " 0 \n",
972 | " 255 \n",
973 | " 11 \n",
974 | " 128 \n",
975 | " 12 \n",
976 | " 237 \n",
977 | " Normal Message \n",
978 | " 5.128026e+17 \n",
979 | " \n",
980 | " \n",
981 | " 9 \n",
982 | " 1.478198e+09 \n",
983 | " 305 \n",
984 | " 8 \n",
985 | " 247 \n",
986 | " 127 \n",
987 | " 0 \n",
988 | " 0 \n",
989 | " 76 \n",
990 | " 127 \n",
991 | " 13 \n",
992 | " 231 \n",
993 | " Normal Message \n",
994 | " 2.471270e+17 \n",
995 | " \n",
996 | " \n",
997 | "
\n",
998 | "
\n",
999 | "
\n",
1002 | " \n",
1003 | " \n",
1005 | " \n",
1006 | " \n",
1007 | " \n",
1008 | " \n",
1009 | " \n",
1010 | " \n",
1047 | "\n",
1048 | " \n",
1072 | "
\n",
1073 | "
\n",
1074 | " "
1075 | ],
1076 | "text/plain": [
1077 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] \\\n",
1078 | "0 1.478200e+09 0 8 0 0 0 0 0 \n",
1079 | "1 1.478201e+09 305 8 27 128 0 0 63 \n",
1080 | "2 1.478199e+09 161 8 128 137 0 0 36 \n",
1081 | "3 1.478200e+09 608 8 24 33 34 48 8 \n",
1082 | "4 1.478201e+09 704 8 20 0 0 0 0 \n",
1083 | "5 1.478200e+09 304 8 11 128 0 255 8 \n",
1084 | "6 1.478200e+09 880 8 0 32 0 0 0 \n",
1085 | "7 1.478199e+09 1264 8 0 0 0 128 0 \n",
1086 | "8 1.478199e+09 304 8 5 128 0 255 11 \n",
1087 | "9 1.478198e+09 305 8 247 127 0 0 76 \n",
1088 | "\n",
1089 | " DATA[5] DATA[6] DATA[7] AttackType Message \n",
1090 | "0 0 0 0 DoS Attack 0.000000e+00 \n",
1091 | "1 127 14 166 Normal Message 2.712801e+16 \n",
1092 | "2 0 0 0 Normal Message 1.281370e+12 \n",
1093 | "3 143 112 5 Normal Message 2.433345e+15 \n",
1094 | "4 0 0 0 Normal Message 2.000000e+08 \n",
1095 | "5 128 4 136 Normal Message 1.112803e+16 \n",
1096 | "6 0 0 0 Normal Message 3.200000e+07 \n",
1097 | "7 105 209 19 Normal Message 1.280105e+11 \n",
1098 | "8 128 12 237 Normal Message 5.128026e+17 \n",
1099 | "9 127 13 231 Normal Message 2.471270e+17 "
1100 | ]
1101 | },
1102 | "metadata": {},
1103 | "execution_count": 5
1104 | }
1105 | ]
1106 | },
1107 | {
1108 | "cell_type": "code",
1109 | "source": [
1110 | "import datetime\n",
1111 | "newdf = df.copy(deep = True)\n",
1112 | "dateformat = \"%Y-%m-%d %H:%M:%S.%f\"\n",
1113 | "df['Timestamp'] = df['Timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(float(x)).strftime(dateformat))\n",
1114 | "print(df.dtypes)\n",
1115 | "df.head(100)"
1116 | ],
1117 | "metadata": {
1118 | "id": "DMt9KCx9ql_W",
1119 | "colab": {
1120 | "base_uri": "https://localhost:8080/",
1121 | "height": 921
1122 | },
1123 | "outputId": "68acd251-e138-489f-8e43-ae6b6632c6af"
1124 | },
1125 | "execution_count": 6,
1126 | "outputs": [
1127 | {
1128 | "output_type": "stream",
1129 | "name": "stdout",
1130 | "text": [
1131 | "Timestamp object\n",
1132 | "CAN ID int64\n",
1133 | "Byte int64\n",
1134 | "DATA[0] int64\n",
1135 | "DATA[1] int64\n",
1136 | "DATA[2] int64\n",
1137 | "DATA[3] int64\n",
1138 | "DATA[4] int64\n",
1139 | "DATA[5] int64\n",
1140 | "DATA[6] int64\n",
1141 | "DATA[7] int64\n",
1142 | "AttackType object\n",
1143 | "Message float64\n",
1144 | "dtype: object\n"
1145 | ]
1146 | },
1147 | {
1148 | "output_type": "execute_result",
1149 | "data": {
1150 | "text/html": [
1151 | "\n",
1152 | " \n",
1153 | "
\n",
1154 | "
\n",
1155 | "\n",
1168 | "
\n",
1169 | " \n",
1170 | " \n",
1171 | " \n",
1172 | " Timestamp \n",
1173 | " CAN ID \n",
1174 | " Byte \n",
1175 | " DATA[0] \n",
1176 | " DATA[1] \n",
1177 | " DATA[2] \n",
1178 | " DATA[3] \n",
1179 | " DATA[4] \n",
1180 | " DATA[5] \n",
1181 | " DATA[6] \n",
1182 | " DATA[7] \n",
1183 | " AttackType \n",
1184 | " Message \n",
1185 | " \n",
1186 | " \n",
1187 | " \n",
1188 | " \n",
1189 | " 0 \n",
1190 | " 2016-11-03 19:08:43.044157 \n",
1191 | " 0 \n",
1192 | " 8 \n",
1193 | " 0 \n",
1194 | " 0 \n",
1195 | " 0 \n",
1196 | " 0 \n",
1197 | " 0 \n",
1198 | " 0 \n",
1199 | " 0 \n",
1200 | " 0 \n",
1201 | " DoS Attack \n",
1202 | " 0.000000e+00 \n",
1203 | " \n",
1204 | " \n",
1205 | " 1 \n",
1206 | " 2016-11-03 19:24:35.989254 \n",
1207 | " 305 \n",
1208 | " 8 \n",
1209 | " 27 \n",
1210 | " 128 \n",
1211 | " 0 \n",
1212 | " 0 \n",
1213 | " 63 \n",
1214 | " 127 \n",
1215 | " 14 \n",
1216 | " 166 \n",
1217 | " Normal Message \n",
1218 | " 2.712801e+16 \n",
1219 | " \n",
1220 | " \n",
1221 | " 2 \n",
1222 | " 2016-11-03 18:54:13.788681 \n",
1223 | " 161 \n",
1224 | " 8 \n",
1225 | " 128 \n",
1226 | " 137 \n",
1227 | " 0 \n",
1228 | " 0 \n",
1229 | " 36 \n",
1230 | " 0 \n",
1231 | " 0 \n",
1232 | " 0 \n",
1233 | " Normal Message \n",
1234 | " 1.281370e+12 \n",
1235 | " \n",
1236 | " \n",
1237 | " 3 \n",
1238 | " 2016-11-03 19:06:50.286119 \n",
1239 | " 608 \n",
1240 | " 8 \n",
1241 | " 24 \n",
1242 | " 33 \n",
1243 | " 34 \n",
1244 | " 48 \n",
1245 | " 8 \n",
1246 | " 143 \n",
1247 | " 112 \n",
1248 | " 5 \n",
1249 | " Normal Message \n",
1250 | " 2.433345e+15 \n",
1251 | " \n",
1252 | " \n",
1253 | " 4 \n",
1254 | " 2016-11-03 19:26:04.139714 \n",
1255 | " 704 \n",
1256 | " 8 \n",
1257 | " 20 \n",
1258 | " 0 \n",
1259 | " 0 \n",
1260 | " 0 \n",
1261 | " 0 \n",
1262 | " 0 \n",
1263 | " 0 \n",
1264 | " 0 \n",
1265 | " Normal Message \n",
1266 | " 2.000000e+08 \n",
1267 | " \n",
1268 | " \n",
1269 | " ... \n",
1270 | " ... \n",
1271 | " ... \n",
1272 | " ... \n",
1273 | " ... \n",
1274 | " ... \n",
1275 | " ... \n",
1276 | " ... \n",
1277 | " ... \n",
1278 | " ... \n",
1279 | " ... \n",
1280 | " ... \n",
1281 | " ... \n",
1282 | " ... \n",
1283 | " \n",
1284 | " \n",
1285 | " 95 \n",
1286 | " 2016-11-03 19:05:13.346416 \n",
1287 | " 0 \n",
1288 | " 8 \n",
1289 | " 0 \n",
1290 | " 0 \n",
1291 | " 0 \n",
1292 | " 0 \n",
1293 | " 0 \n",
1294 | " 0 \n",
1295 | " 0 \n",
1296 | " 0 \n",
1297 | " DoS Attack \n",
1298 | " 0.000000e+00 \n",
1299 | " \n",
1300 | " \n",
1301 | " 97 \n",
1302 | " 2016-11-03 19:15:01.146305 \n",
1303 | " 704 \n",
1304 | " 8 \n",
1305 | " 20 \n",
1306 | " 0 \n",
1307 | " 0 \n",
1308 | " 0 \n",
1309 | " 0 \n",
1310 | " 0 \n",
1311 | " 0 \n",
1312 | " 0 \n",
1313 | " Normal Message \n",
1314 | " 2.000000e+08 \n",
1315 | " \n",
1316 | " \n",
1317 | " 98 \n",
1318 | " 2016-11-03 18:56:54.761137 \n",
1319 | " 809 \n",
1320 | " 8 \n",
1321 | " 220 \n",
1322 | " 190 \n",
1323 | " 127 \n",
1324 | " 20 \n",
1325 | " 17 \n",
1326 | " 32 \n",
1327 | " 0 \n",
1328 | " 20 \n",
1329 | " Normal Message \n",
1330 | " 2.201901e+17 \n",
1331 | " \n",
1332 | " \n",
1333 | " 99 \n",
1334 | " 2016-11-03 18:52:14.511839 \n",
1335 | " 497 \n",
1336 | " 8 \n",
1337 | " 8 \n",
1338 | " 0 \n",
1339 | " 0 \n",
1340 | " 0 \n",
1341 | " 0 \n",
1342 | " 0 \n",
1343 | " 0 \n",
1344 | " 0 \n",
1345 | " Normal Message \n",
1346 | " 8.000000e+07 \n",
1347 | " \n",
1348 | " \n",
1349 | " 100 \n",
1350 | " 2016-11-03 19:16:38.790256 \n",
1351 | " 704 \n",
1352 | " 8 \n",
1353 | " 20 \n",
1354 | " 0 \n",
1355 | " 0 \n",
1356 | " 0 \n",
1357 | " 0 \n",
1358 | " 0 \n",
1359 | " 0 \n",
1360 | " 0 \n",
1361 | " Normal Message \n",
1362 | " 2.000000e+08 \n",
1363 | " \n",
1364 | " \n",
1365 | "
\n",
1366 | "
100 rows × 13 columns
\n",
1367 | "
\n",
1368 | "
\n",
1371 | " \n",
1372 | " \n",
1374 | " \n",
1375 | " \n",
1376 | " \n",
1377 | " \n",
1378 | " \n",
1379 | " \n",
1416 | "\n",
1417 | " \n",
1441 | "
\n",
1442 | "
\n",
1443 | " "
1444 | ],
1445 | "text/plain": [
1446 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] \\\n",
1447 | "0 2016-11-03 19:08:43.044157 0 8 0 0 0 \n",
1448 | "1 2016-11-03 19:24:35.989254 305 8 27 128 0 \n",
1449 | "2 2016-11-03 18:54:13.788681 161 8 128 137 0 \n",
1450 | "3 2016-11-03 19:06:50.286119 608 8 24 33 34 \n",
1451 | "4 2016-11-03 19:26:04.139714 704 8 20 0 0 \n",
1452 | ".. ... ... ... ... ... ... \n",
1453 | "95 2016-11-03 19:05:13.346416 0 8 0 0 0 \n",
1454 | "97 2016-11-03 19:15:01.146305 704 8 20 0 0 \n",
1455 | "98 2016-11-03 18:56:54.761137 809 8 220 190 127 \n",
1456 | "99 2016-11-03 18:52:14.511839 497 8 8 0 0 \n",
1457 | "100 2016-11-03 19:16:38.790256 704 8 20 0 0 \n",
1458 | "\n",
1459 | " DATA[3] DATA[4] DATA[5] DATA[6] DATA[7] AttackType Message \n",
1460 | "0 0 0 0 0 0 DoS Attack 0.000000e+00 \n",
1461 | "1 0 63 127 14 166 Normal Message 2.712801e+16 \n",
1462 | "2 0 36 0 0 0 Normal Message 1.281370e+12 \n",
1463 | "3 48 8 143 112 5 Normal Message 2.433345e+15 \n",
1464 | "4 0 0 0 0 0 Normal Message 2.000000e+08 \n",
1465 | ".. ... ... ... ... ... ... ... \n",
1466 | "95 0 0 0 0 0 DoS Attack 0.000000e+00 \n",
1467 | "97 0 0 0 0 0 Normal Message 2.000000e+08 \n",
1468 | "98 20 17 32 0 20 Normal Message 2.201901e+17 \n",
1469 | "99 0 0 0 0 0 Normal Message 8.000000e+07 \n",
1470 | "100 0 0 0 0 0 Normal Message 2.000000e+08 \n",
1471 | "\n",
1472 | "[100 rows x 13 columns]"
1473 | ]
1474 | },
1475 | "metadata": {},
1476 | "execution_count": 6
1477 | }
1478 | ]
1479 | },
1480 | {
1481 | "cell_type": "code",
1482 | "source": [
1483 | "#df = newdf.copy(deep = True)\n",
1484 | "from sklearn import preprocessing\n",
1485 | "#print(df['AttackType'].unique())\n",
1486 | "#print(df['AttackType'].value_counts())\n",
1487 | "encoder = preprocessing.LabelEncoder()\n",
1488 | "#df1 = df[['AttackType']].copy()\n",
1489 | "df['AttackType']= encoder.fit_transform(df['AttackType'].values)\n",
1490 | "# df = df.drop(['AttackType'], axis = 1)\n",
1491 | "# df1\n",
1492 | "#df = pd.concat([df.iloc[:,0:11],df1, df.iloc[:, 11:]], axis=1)\n",
1493 | "#df = pd.get_dummies(df, columns =['AttackType'], prefix = '', prefix_sep = '')\n",
1494 | "print(df.head(10))\n",
1495 | "# print(df['AttackType Encode'])\n",
1496 | "print(df['AttackType'])\n",
1497 | "print(df.shape)\n",
1498 | "#print(df.shape)"
1499 | ],
1500 | "metadata": {
1501 | "colab": {
1502 | "base_uri": "https://localhost:8080/"
1503 | },
1504 | "id": "vEUXBabOBtpQ",
1505 | "outputId": "076885d8-3a39-446d-96b6-0816f653f48a"
1506 | },
1507 | "execution_count": 7,
1508 | "outputs": [
1509 | {
1510 | "output_type": "stream",
1511 | "name": "stdout",
1512 | "text": [
1513 | " Timestamp CAN ID Byte DATA[0] DATA[1] DATA[2] \\\n",
1514 | "0 2016-11-03 19:08:43.044157 0 8 0 0 0 \n",
1515 | "1 2016-11-03 19:24:35.989254 305 8 27 128 0 \n",
1516 | "2 2016-11-03 18:54:13.788681 161 8 128 137 0 \n",
1517 | "3 2016-11-03 19:06:50.286119 608 8 24 33 34 \n",
1518 | "4 2016-11-03 19:26:04.139714 704 8 20 0 0 \n",
1519 | "5 2016-11-03 19:03:07.624543 304 8 11 128 0 \n",
1520 | "6 2016-11-03 19:06:31.658461 880 8 0 32 0 \n",
1521 | "7 2016-11-03 18:55:47.812754 1264 8 0 0 0 \n",
1522 | "8 2016-11-03 18:46:48.226079 304 8 5 128 0 \n",
1523 | "9 2016-11-03 18:40:52.891089 305 8 247 127 0 \n",
1524 | "\n",
1525 | " DATA[3] DATA[4] DATA[5] DATA[6] DATA[7] AttackType Message \n",
1526 | "0 0 0 0 0 0 0 0.000000e+00 \n",
1527 | "1 0 63 127 14 166 3 2.712801e+16 \n",
1528 | "2 0 36 0 0 0 3 1.281370e+12 \n",
1529 | "3 48 8 143 112 5 3 2.433345e+15 \n",
1530 | "4 0 0 0 0 0 3 2.000000e+08 \n",
1531 | "5 255 8 128 4 136 3 1.112803e+16 \n",
1532 | "6 0 0 0 0 0 3 3.200000e+07 \n",
1533 | "7 128 0 105 209 19 3 1.280105e+11 \n",
1534 | "8 255 11 128 12 237 3 5.128026e+17 \n",
1535 | "9 0 76 127 13 231 3 2.471270e+17 \n",
1536 | "0 0\n",
1537 | "1 3\n",
1538 | "2 3\n",
1539 | "3 3\n",
1540 | "4 3\n",
1541 | " ..\n",
1542 | "462165 3\n",
1543 | "462166 3\n",
1544 | "462167 3\n",
1545 | "462168 3\n",
1546 | "462169 4\n",
1547 | "Name: AttackType, Length: 1636855, dtype: int64\n",
1548 | "(1636855, 13)\n"
1549 | ]
1550 | }
1551 | ]
1552 | },
1553 | {
1554 | "cell_type": "code",
1555 | "source": [
1556 | "df.columns"
1557 | ],
1558 | "metadata": {
1559 | "colab": {
1560 | "base_uri": "https://localhost:8080/"
1561 | },
1562 | "id": "jPQUX0V2PPkm",
1563 | "outputId": "45225c4a-9642-444e-bb72-1ff1c5f3e0cc"
1564 | },
1565 | "execution_count": 8,
1566 | "outputs": [
1567 | {
1568 | "output_type": "execute_result",
1569 | "data": {
1570 | "text/plain": [
1571 | "Index(['Timestamp', 'CAN ID', 'Byte', 'DATA[0]', 'DATA[1]', 'DATA[2]',\n",
1572 | " 'DATA[3]', 'DATA[4]', 'DATA[5]', 'DATA[6]', 'DATA[7]', 'AttackType',\n",
1573 | " 'Message'],\n",
1574 | " dtype='object')"
1575 | ]
1576 | },
1577 | "metadata": {},
1578 | "execution_count": 8
1579 | }
1580 | ]
1581 | },
1582 | {
1583 | "cell_type": "code",
1584 | "source": [
1585 | "X = df.iloc[:, np.r_[:,1,3:11]]\n",
1586 | "#X = df[['CAN ID', 'DATA[0]', 'DATA[1]', 'DATA[2]', 'DATA[3]', 'DATA[4]', 'DATA[5]', 'DATA[6]', 'DATA[7]']]\n",
1587 | "Y = df[['AttackType']]\n",
1588 | "X,Y"
1589 | ],
1590 | "metadata": {
1591 | "colab": {
1592 | "base_uri": "https://localhost:8080/"
1593 | },
1594 | "id": "-E_zgBKtbX4C",
1595 | "outputId": "fe0d954e-9b94-46d9-f3a1-3e8503d67c3d"
1596 | },
1597 | "execution_count": 9,
1598 | "outputs": [
1599 | {
1600 | "output_type": "execute_result",
1601 | "data": {
1602 | "text/plain": [
1603 | "( CAN ID DATA[0] DATA[1] DATA[2] DATA[3] DATA[4] DATA[5] DATA[6] \\\n",
1604 | " 0 0 0 0 0 0 0 0 0 \n",
1605 | " 1 305 27 128 0 0 63 127 14 \n",
1606 | " 2 161 128 137 0 0 36 0 0 \n",
1607 | " 3 608 24 33 34 48 8 143 112 \n",
1608 | " 4 704 20 0 0 0 0 0 0 \n",
1609 | " ... ... ... ... ... ... ... ... ... \n",
1610 | " 462165 809 220 183 126 20 17 32 0 \n",
1611 | " 462166 305 242 127 0 0 58 127 12 \n",
1612 | " 462167 305 242 127 0 0 64 127 6 \n",
1613 | " 462168 704 21 0 0 0 0 0 0 \n",
1614 | " 462169 790 69 41 36 255 41 36 0 \n",
1615 | " \n",
1616 | " DATA[7] \n",
1617 | " 0 0 \n",
1618 | " 1 166 \n",
1619 | " 2 0 \n",
1620 | " 3 5 \n",
1621 | " 4 0 \n",
1622 | " ... ... \n",
1623 | " 462165 20 \n",
1624 | " 462166 131 \n",
1625 | " 462167 22 \n",
1626 | " 462168 0 \n",
1627 | " 462169 255 \n",
1628 | " \n",
1629 | " [1636855 rows x 9 columns], AttackType\n",
1630 | " 0 0\n",
1631 | " 1 3\n",
1632 | " 2 3\n",
1633 | " 3 3\n",
1634 | " 4 3\n",
1635 | " ... ...\n",
1636 | " 462165 3\n",
1637 | " 462166 3\n",
1638 | " 462167 3\n",
1639 | " 462168 3\n",
1640 | " 462169 4\n",
1641 | " \n",
1642 | " [1636855 rows x 1 columns])"
1643 | ]
1644 | },
1645 | "metadata": {},
1646 | "execution_count": 9
1647 | }
1648 | ]
1649 | },
1650 | {
1651 | "cell_type": "code",
1652 | "source": [
1653 | "from sklearn.model_selection import train_test_split\n",
1654 | "from sklearn.svm import SVC\n",
1655 | "from sklearn.pipeline import Pipeline\n",
1656 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state = 20)"
1657 | ],
1658 | "metadata": {
1659 | "id": "7_qOxOWjbeX8"
1660 | },
1661 | "execution_count": 10,
1662 | "outputs": []
1663 | },
1664 | {
1665 | "cell_type": "code",
1666 | "source": [
1667 | "val = encoder.inverse_transform(df['AttackType'])\n",
1668 | "(unique, counts) = np.unique(val, return_counts=True)\n",
1669 | "frequencies = np.asarray((unique, counts)).T\n",
1670 | "print(frequencies)\n",
1671 | "print(df['AttackType'].value_counts())"
1672 | ],
1673 | "metadata": {
1674 | "colab": {
1675 | "base_uri": "https://localhost:8080/"
1676 | },
1677 | "id": "oUcMhUpnh4Tq",
1678 | "outputId": "6fe79717-8b48-42ea-b5a8-11fe00f2d568"
1679 | },
1680 | "execution_count": 23,
1681 | "outputs": [
1682 | {
1683 | "output_type": "stream",
1684 | "name": "stdout",
1685 | "text": [
1686 | "[['DoS Attack' 58469]\n",
1687 | " ['Fuzzy Attack' 49258]\n",
1688 | " ['Gear Spooing Attack' 60016]\n",
1689 | " ['Normal Message' 1403673]\n",
1690 | " ['RPM Spoofing Attack' 65439]]\n",
1691 | "3 1403673\n",
1692 | "4 65439\n",
1693 | "2 60016\n",
1694 | "0 58469\n",
1695 | "1 49258\n",
1696 | "Name: AttackType, dtype: int64\n"
1697 | ]
1698 | }
1699 | ]
1700 | },
1701 | {
1702 | "cell_type": "code",
1703 | "source": [
1704 | "model = Pipeline([\n",
1705 | " ('svc', SVC(random_state=20))\n",
1706 | " ])\n",
1707 | "model.fit(X_train, Y_train)\n",
1708 | "pred = model.predict(X_test)\n",
1709 | "pred"
1710 | ],
1711 | "metadata": {
1712 | "colab": {
1713 | "base_uri": "https://localhost:8080/"
1714 | },
1715 | "id": "NkzbFP_GbjSe",
1716 | "outputId": "bf0d413b-a09f-49d3-82fb-87ce53b04aae"
1717 | },
1718 | "execution_count": null,
1719 | "outputs": [
1720 | {
1721 | "output_type": "stream",
1722 | "name": "stderr",
1723 | "text": [
1724 | "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
1725 | " y = column_or_1d(y, warn=True)\n"
1726 | ]
1727 | }
1728 | ]
1729 | },
1730 | {
1731 | "cell_type": "code",
1732 | "source": [
1733 | "from sklearn.metrics import confusion_matrix\n",
1734 | "from sklearn.metrics import classification_report\n",
1735 | "from sklearn.metrics import accuracy_score\n",
1736 | "accuracy = accuracy_score(Y_test, pred)\n",
1737 | "print('accuracy : \\n', accuracy)\n",
1738 | "matrix = confusion_matrix(Y_test,pred)\n",
1739 | "print('Confusion matrix : \\n',matrix)\n",
1740 | "matrix = classification_report(Y_test,pred)\n",
1741 | "print('Classification report : \\n',matrix)"
1742 | ],
1743 | "metadata": {
1744 | "id": "zCFivudpci-x"
1745 | },
1746 | "execution_count": null,
1747 | "outputs": []
1748 | },
1749 | {
1750 | "cell_type": "code",
1751 | "source": [
1752 | "dfC = df['AttackType'].value_counts()\n",
1753 | "dfcount = pd.DataFrame(dfC, index = ['Normal Message','RPM Spoofing Attack','Gear Spooing Attack','DoS Attack','Fuzzy Attack'])\n",
1754 | "#dfcount_reset = dfcount.reset_index()\n",
1755 | "dfcount.columns = ['Injected Messages']\n",
1756 | "#dfcount_reset.set_index('Attack Type')\n",
1757 | "#dfcount_reset.dropna()\n",
1758 | "print(\"\\n\",dfcount)\n",
1759 | "\n",
1760 | "#index = ['Normal Message','RPM Spoofing Attack','Gear Spooing Attack','DoS Attack','Fuzzy Attack']\n",
1761 | "# df2frame = dfDos['AttackType'].value_counts()\n",
1762 | "# df2frame_count = pd.DataFrame(df2frame)\n",
1763 | "# df2frame_count_reset = df2frame_count.reset_index()\n",
1764 | "# df2frame_count_reset.columns = ['No Of Normal Message','No Of Injected Messages']\n",
1765 | "# print(\"\\n\",df2frame_count_reset)"
1766 | ],
1767 | "metadata": {
1768 | "id": "z-aD9lxNQ2hX"
1769 | },
1770 | "execution_count": null,
1771 | "outputs": []
1772 | },
1773 | {
1774 | "cell_type": "code",
1775 | "source": [
1776 | "import matplotlib.pyplot as plt\n",
1777 | "# dffinal = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],\n",
1778 | "# 'radius': [2439.7, 6051.8, 6378.1]},\n",
1779 | "# index=['Mercury', 'Venus', 'Earth'])\n",
1780 | "#plts = dfcount.plot.bar(x='Attack Type', y='Injected Messages', rot=0, figsize=(12, 8))\n",
1781 | "plot = dfcount.plot.pie(y='Injected Messages', figsize=(12, 8))"
1782 | ],
1783 | "metadata": {
1784 | "id": "7Am7edMcou4Y"
1785 | },
1786 | "execution_count": null,
1787 | "outputs": []
1788 | }
1789 | ]
1790 | }
--------------------------------------------------------------------------------
/supplementary_code/README.md:
--------------------------------------------------------------------------------
1 | # The code in this folder shows an example of the pre-processing of the Car-Hacking dataset.
2 |
--------------------------------------------------------------------------------