├── img ├── test.txt ├── hulk.gif ├── portscan.gif ├── slowloris.png ├── InputFormat.png ├── golden-eye.png ├── fine-tune-goldeneye.png └── fine-tune-goldeneye-2.png ├── PcapSamples ├── README.md ├── hulk.pcap ├── text.pcap ├── xmas.pcap ├── GoldenEye.pcap ├── nmap_fin.pcap ├── nmap_null.pcap ├── portscan.pcap ├── http_slowloris.pcap ├── discovery_scan_dcerpc_endpoint_mapper.pcapng └── DE_byt3bl33d3r_remote_eventservice_crash.pcapng ├── README.md └── NetworkPcapAnalysis.ipynb /img/test.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PcapSamples/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /img/hulk.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/hulk.gif -------------------------------------------------------------------------------- /img/portscan.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/portscan.gif -------------------------------------------------------------------------------- /img/slowloris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/slowloris.png -------------------------------------------------------------------------------- /img/InputFormat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/InputFormat.png -------------------------------------------------------------------------------- /img/golden-eye.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/golden-eye.png -------------------------------------------------------------------------------- /PcapSamples/hulk.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/hulk.pcap -------------------------------------------------------------------------------- /PcapSamples/text.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/text.pcap -------------------------------------------------------------------------------- /PcapSamples/xmas.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/xmas.pcap -------------------------------------------------------------------------------- /PcapSamples/GoldenEye.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/GoldenEye.pcap -------------------------------------------------------------------------------- /PcapSamples/nmap_fin.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/nmap_fin.pcap -------------------------------------------------------------------------------- /PcapSamples/nmap_null.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/nmap_null.pcap -------------------------------------------------------------------------------- /PcapSamples/portscan.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/portscan.pcap -------------------------------------------------------------------------------- /img/fine-tune-goldeneye.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/fine-tune-goldeneye.png -------------------------------------------------------------------------------- /img/fine-tune-goldeneye-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/img/fine-tune-goldeneye-2.png -------------------------------------------------------------------------------- /PcapSamples/http_slowloris.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/http_slowloris.pcap -------------------------------------------------------------------------------- /PcapSamples/discovery_scan_dcerpc_endpoint_mapper.pcapng: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/discovery_scan_dcerpc_endpoint_mapper.pcapng -------------------------------------------------------------------------------- /PcapSamples/DE_byt3bl33d3r_remote_eventservice_crash.pcapng: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPs-ESIR-S9/PcapFileAnalysis/HEAD/PcapSamples/DE_byt3bl33d3r_remote_eventservice_crash.pcapng -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

ESIR-S9 - AI Project : Network Traffic Analysis 🦈

2 | Yazid BENJAMAA (@Xacone) & Thomas DELAPART (@Thomega35) 3 |

4 | The project's purpose is to predict wether a network activiy is malicious or not, this classification is achieved by analysis each packet content and context in a network capture file (pcap) and then returning a brief of the attacks that were detected. 5 |

We also built a little Flask web app which has Google Colab as a backend that allows to visualiaze classification results, more details below. 6 | 7 |

Model and input format

8 | 9 | We took the following model from HuggingFace : rdpahalavan/bert-network-packet-flow-header-payload which classifies a single network packet into one of these categories : 10 | ``` 11 | ['Analysis', 'Backdoor', 'Bot', 'DDoS', 'DoS', 'DoS GoldenEye', 'DoS Hulk', 'DoS SlowHTTPTest', 'DoS Slowloris', 'Exploits', 'FTP Patator', 'Fuzzers', 'Generic', 'Heartbleed', 'Infiltration', 'Normal', 'Port Scan', 'Reconnaissance', 'SSH Patator', 'Shellcode', 'Web Attack - Brute Force', 'Web Attack - SQL Injection', 'Web Attack - XSS', 'Worms'] 12 | ``` 13 | 14 | Each input represents a network packet which respects the following structure : 15 |

16 | 17 | 18 | The model is based on BERT (Bidirectional Encoder Representations from Transformers) which is based on the Transformer Neural Network architecture. We appreciated the usage of BERT as it is suitable in the context of analyzing pcap files where bidirectional packets data contexts in the network flow is important. 19 | 20 | Each IP packet in a a loaded pcap file is converted to the format before being processed by the model, pcap/packets manipulation is done using Scapy 21 | 22 | Pcap files that were used for testing & fine-tuning the model were taken from the following sources, they provide a wide range of samples containing benign/malicious activities :
23 | TII-SSRC-23 Dataset - Network traffic for intrusion detection research
24 | Network datasets
25 | Network Forensics and Network Security Monitoring - Publicly available PCAP files 26 | 27 |

Fine-tuning

28 | 29 | There was many attempts to fine-tune and half of them provided satisfying results. When adding more training labeled samples, the model has been much more proficient in detecting the same attack or attacks of the same family (acting at the same TCP/IP layer) but it returned inconsistent and false results for the other attacks, either detecting nothing at all or a bunch of other attacks that had nothing to do with the content of the pcap file. Knowng that we have also filtered packets that were taken into account during training (e.g. only `GET` or `POST` requests for HTTP DoS attack samples). 30 | 31 | The notebook provides the function `trainFromPcapFile(file_path, label, application_filter)` which allow to add transformed training samples (packets) retrieved from a pcap file + the ability to select packets based on filter patterns. 32 | ```python 33 | trainFromPcapFile("/content/sample_data/dvwa_sql_injection.pcap", 21, b"GET /") # Transforming and adding packets from the pcap file + labelize them with 21 (Web Attack - SQL Injection) + taking only GET requests. 34 | ``` 35 | 36 | We also tried to get rid of certain parameters such as the backward and forward packets (which seemed to us to be irrelevant in a normal packet capture sequence) which also ameloried the results of some attacks detection such as web attacks and port scans but which also proved to distort certain results 37 | We aren't able to provide a stable statement on the efficiency of fine-tuning , however we truly believe that more efforts and testings could lead to a more performant and balanced fine-tuned model. 38 | 39 |

Detecting Applicative (Layer 7) Denial of Service Attacks & Used Tools

40 | 41 | The model does such a great job in detecting DoS attacks through the network.
42 | Two HTTP Simple Denial of Service (DoS) tools were used to test its capabilites at detecting attacks that emanate from them : 43 | Hulk & GoldenEye . 44 | 45 |

Hulk attacks detection

46 | 47 | 48 | 49 |

20 ports TCP SYN Scannning (Assimiled to a "normal" activity)

50 | 51 | 52 | 53 | We clearly see that the model has no problem to detect malicious anomaly flows in the network packets capture, he succeed to detect the anomaly type and the tool that was used with precision. 54 | 55 |

GoldenEye Attacks Detection

56 | 57 | 58 | 59 |

GoldenEye Attacks Detection after Fine-Tuning the Model

60 | 61 | Applying the described methods above for fine-tuning allowed to retrieve more relevant & explicit results when analyzing a network traffic that suffered a DoS attack. 62 | Compared to the model state before fine-tuning, we see that more DoS-related packets were detected (500 malicious packets out of 1600 w/ fine-tuning VS 60 malicious packets out of 3000 by default) 63 | 64 | 65 | 66 | 67 | 68 | You could also experiment it by using your own pcap samples or the ones that are provided in this repository. 69 | 70 |

Example of Illogical Results Caused by Fine-Tuning

71 | 72 | We are trying here to detect DoS that were done by a tool which name is Slowloris. 73 | Logically, the model should predict that there's a lot of packets assimilated to DoS/DDoS or predict that they're normal if he failed. 74 | The model predicted the presence of a completely unrelated attack which is SSH brute-force with the Patator tool even though there is no communication to TCP port 22. 75 | 76 | 77 | 78 |

How to set up the app on Google Collab

79 | 80 | Once executed, the following cell will print a link which will be routing to the app : 81 | 82 | ```python 83 | from google.colab.output import eval_js 84 | print(eval_js("google.colab.kernel.proxyPort(5000)")) 85 | ``` 86 | 87 | Then execute the next cell that will fire up the backend, it is a flask-based application w/ two endpoints 88 | ```python 89 | [....] 90 | 91 | @app.route("/") 92 | def home(): 93 | return index 94 | 95 | @app.route('/upload', methods=['POST']) 96 | def upload_file(): 97 | 98 | [...] 99 | 100 | if __name__ == "__main__": 101 | app.run() 102 | [....] 103 | ``` 104 | 105 | Then click the link, you should land on the app. 106 |
107 | The given notebook allows to use Colab's default GPU w/ Pytorch in order to make trainings/predictions faster : 108 | 109 | ```python 110 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 111 | model = model.to(device) 112 | ``` 113 | 114 |

To conclude

115 | 116 | Working on this small project has been fun and instructive, and even if it's only a POC in the end, this project and its model can be applied and show their usefulness to many practical cases dealing with detectability in computer networks. 117 | 118 | 119 | -------------------------------------------------------------------------------- /NetworkPcapAnalysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "**

AI Project : Network Traffic Analysis

**\n", 7 | "

Yazid BENJAMAA & Thomas DELAPART - ESIR3 SI

\n", 8 | "\n", 9 | "Project Repo & report : https://github.com/TPs-ESIR-S9/PcapFileAnalysis\n", 10 | "\n", 11 | "This project's purpose is to predict wether a network activiy is malicious or not, this classification is achieved by analysis each packet content and context in a network capture file (pcap) and then returning a brief of the attacks that were detected." 12 | ], 13 | "metadata": { 14 | "id": "2Usss5c2Vtjv" 15 | } 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "mIMq4hhdr0Xg" 21 | }, 22 | "source": [ 23 | "Installing necessary python packages:\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "id": "vT0s9-D0bJ7g" 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "!pip install torch torchvision torchaudio\n", 35 | "!pip install scapy\n", 36 | "!pip install flask\n", 37 | "!pip install flask_ngrok" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "source": [ 43 | "Necessary imports for model manipulation & Flask app deployment:\n", 44 | "\n", 45 | "---\n", 46 | "\n" 47 | ], 48 | "metadata": { 49 | "id": "easPfS34smXN" 50 | } 51 | }, 52 | { 53 | "cell_type": "code", 54 | "source": [ 55 | "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", 56 | "from torch.utils.data import DataLoader, TensorDataset\n", 57 | "from torch.nn import CrossEntropyLoss\n", 58 | "from torch.optim import Adam\n", 59 | "import torch\n", 60 | "from scapy.all import *\n", 61 | "import os\n", 62 | "import matplotlib.pyplot as plt\n", 63 | "from flask import Flask, render_template, request, redirect, url_for, send_file, render_template_string\n", 64 | "from werkzeug.utils import secure_filename\n", 65 | "from io import BytesIO\n", 66 | "import base64" 67 | ], 68 | "metadata": { 69 | "id": "K4e_wMNQr6tr" 70 | }, 71 | "execution_count": null, 72 | "outputs": [] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "source": [ 77 | "The model has 24 output classes:\n" 78 | ], 79 | "metadata": { 80 | "id": "b1mH32oHsl5I" 81 | } 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "id": "P2sg99F0xNax" 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "classes = [\n", 92 | " 'Analysis',\n", 93 | " 'Backdoor',\n", 94 | " 'Bot',\n", 95 | " 'DDoS',\n", 96 | " 'DoS',\n", 97 | " 'DoS GoldenEye',\n", 98 | " 'DoS Hulk',\n", 99 | " 'DoS SlowHTTPTest',\n", 100 | " 'DoS Slowloris',\n", 101 | " 'Exploits',\n", 102 | " 'FTP Patator',\n", 103 | " 'Fuzzers',\n", 104 | " 'Generic',\n", 105 | " 'Heartbleed',\n", 106 | " 'Infiltration',\n", 107 | " 'Normal',\n", 108 | " 'Port Scan',\n", 109 | " 'Reconnaissance',\n", 110 | " 'SSH Patator',\n", 111 | " 'Shellcode',\n", 112 | " 'Web Attack - Brute Force',\n", 113 | " 'Web Attack - SQL Injection',\n", 114 | " 'Web Attack - XSS',\n", 115 | " 'Worms']\n", 116 | "\n", 117 | "print(len(classes))\n" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "source": [ 123 | "We retrieve the model we are using from HugginFace, the model employs the BERT model fine-tuned for classifying network packet flow based on header and payload information.\n" 124 | ], 125 | "metadata": { 126 | "id": "cNlpA0msFT-L" 127 | } 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "id": "_cwFuaYrqXVN" 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "tokenizer = AutoTokenizer.from_pretrained(\"rdpahalavan/bert-network-packet-flow-header-payload\")\n", 138 | "model = AutoModelForSequenceClassification.from_pretrained(\"rdpahalavan/bert-network-packet-flow-header-payload\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "source": [ 144 | " Details and architecture of the pre-trained BERT model." 145 | ], 146 | "metadata": { 147 | "id": "_e5eoXO3F-Pk" 148 | } 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "id": "7RiF52uasO0j", 155 | "collapsed": true 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "print(model)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "source": [ 165 | "nput embeddings of the pre-trained BERT model previously loaded." 166 | ], 167 | "metadata": { 168 | "id": "l9O69SjNF2kb" 169 | } 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "id": "tRYBhYFZKZq2", 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "print(model.get_input_embeddings())" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "source": [ 186 | "Configuration settings of the pre-trained BERT model, revealing details such as model architecture, hyperparameters, and other configuration parameters." 187 | ], 188 | "metadata": { 189 | "id": "yJS4yJbEGM4M" 190 | } 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "id": "Fb5b9NsEsnTS", 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "print(model.config)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "source": [ 207 | "Information about the tokenizer and the model input names. The first line prints details about the tokenizer, while the second line displays the input names expected by the model according to the tokenizer." 208 | ], 209 | "metadata": { 210 | "id": "BDqOPjzvGbTh" 211 | } 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "id": "Yo1sc5YJs30f", 218 | "collapsed": true 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "print(tokenizer)\n", 223 | "print(tokenizer.model_input_names)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "source": [ 229 | "`processing_packet_conversion` performs packet decimal conversion and aditionnal changes to fit the input structure of the model representing a packet, The following diagram shows the structure to be respected:" 230 | ], 231 | "metadata": { 232 | "id": "5f7-7-wiGhH1" 233 | } 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "id": "MsDQYUNKLxh2" 239 | }, 240 | "source": [ 241 | "![Data-Format.png]()" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": { 248 | "collapsed": true, 249 | "id": "-plbVMUhOXc4" 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "# Initialize dictionaries and lists for packet analysis.\n", 254 | "packets_brief = {}\n", 255 | "forward_packets = {}\n", 256 | "backward_packets = {}\n", 257 | "protocols = []\n", 258 | "protocol_counts = {}\n", 259 | "\n", 260 | "def processing_packet_conversion(packet):\n", 261 | " # Clone the packet for processing without modifying the original.\n", 262 | " packet_2 = packet\n", 263 | "\n", 264 | " while packet_2:\n", 265 | " # Extract and count protocol layers in the packet.\n", 266 | " layer = packet_2[0]\n", 267 | " if layer.name not in protocol_counts:\n", 268 | " protocol_counts[layer.name] = 0\n", 269 | " else:\n", 270 | " protocol_counts[layer.name] += 1\n", 271 | " protocols.append(layer.name)\n", 272 | "\n", 273 | " # Break if there are no more payload layers.\n", 274 | " if not layer.payload:\n", 275 | " break\n", 276 | " packet_2 = layer.payload\n", 277 | "\n", 278 | " # Extract relevant information for feature creation.\n", 279 | " src_ip = packet[IP].src\n", 280 | " dst_ip = packet[IP].dst\n", 281 | " src_port = packet.sport\n", 282 | " dst_port = packet.dport\n", 283 | " ip_length = len(packet[IP])\n", 284 | " ip_ttl = packet[IP].ttl\n", 285 | " ip_tos = packet[IP].tos\n", 286 | " tcp_data_offset = packet[TCP].dataofs\n", 287 | " tcp_flags = packet[TCP].flags\n", 288 | "\n", 289 | " # Process payload content and create a feature string.\n", 290 | " payload_bytes = bytes(packet.payload)\n", 291 | " payload_length = len(payload_bytes)\n", 292 | " payload_content = payload_bytes.decode('utf-8', 'replace')\n", 293 | " payload_decimal = ' '.join(str(byte) for byte in payload_bytes)\n", 294 | " final_data = \"0\" + \" \" + \"0\" + \" \" + \"195\" + \" \" + \"-1\" + \" \" + str(src_port) + \" \" + str(dst_port) + \" \" + str(ip_length) + \" \" + str(payload_length) + \" \" + str(ip_ttl) + \" \" + str(ip_tos) + \" \" + str(tcp_data_offset) + \" \" + str(int(tcp_flags)) + \" \" + \"-1\" + \" \" + str(payload_decimal)\n", 295 | " return final_data\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "source": [ 301 | "Generate a graph giving an overview of the predictions that have been made." 302 | ], 303 | "metadata": { 304 | "id": "70MOYThyHfq3" 305 | } 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "source": [ 310 | "The `trainFromPcapFile` function processes network packets from a pcap file, extracts relevant features, tokenizes the input using a specified tokenizer, and utilizes a pre-trained model for classifying packet content. It prints predictions and probabilities for non-normal packet classes, updating a dictionary with counts for each identified attack type. The function also tracks the total number of processed packets.\n" 311 | ], 312 | "metadata": { 313 | "id": "UBk0zL9xU-Wg" 314 | } 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "id": "PzfCTPFwKi2l" 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "def trainFromPcapFile(file_path, label, application_filter):\n", 325 | "\n", 326 | " training_set = []\n", 327 | " train_labels = []\n", 328 | "\n", 329 | " with PcapReader(file_path) as pcap:\n", 330 | " for pkt in pcap:\n", 331 | " if IP in pkt and TCP in pkt: # IPv4 and TCP\n", 332 | " payload_bytes_to_filter = bytes(pkt.payload)\n", 333 | " if application_filter is None or application_filter in payload_bytes_to_filter:\n", 334 | " input_line = processing_packet_conversion(pkt)\n", 335 | " if input_line is not None:\n", 336 | " truncated_line = input_line[:1024]\n", 337 | " training_set.append(truncated_line)\n", 338 | " train_labels.append(label)\n", 339 | "\n", 340 | " tokenized_input = tokenizer(training_set, padding=True, truncation=True, return_tensors=\"pt\")\n", 341 | " tokenized_input['labels'] = torch.tensor(train_labels)\n", 342 | "\n", 343 | " # Move input tensors to the specified device\n", 344 | " tokenized_input = {key: value.to(device) for key, value in tokenized_input.items()}\n", 345 | "\n", 346 | " # Data loader\n", 347 | " dataset = TensorDataset(tokenized_input[\"input_ids\"], tokenized_input[\"attention_mask\"], tokenized_input[\"labels\"])\n", 348 | " dataloader = DataLoader(dataset, batch_size=4, shuffle=True)\n", 349 | "\n", 350 | " num_training_samples = len(dataloader.dataset)\n", 351 | " print(f\"Number of training samples: {num_training_samples}\")\n", 352 | "\n", 353 | " # Optimizer and loss function\n", 354 | " optimizer = Adam(model.parameters(), lr=1e-5)\n", 355 | " criterion = CrossEntropyLoss()\n", 356 | "\n", 357 | " # Training loop\n", 358 | " num_epochs = 3\n", 359 | " for epoch in range(num_epochs):\n", 360 | " print(f\"Epoch {epoch + 1}/{num_epochs}\")\n", 361 | " total_loss = 0.0\n", 362 | " correct_predictions = 0\n", 363 | " total_samples = 0\n", 364 | "\n", 365 | " for iteration, batch in enumerate(dataloader, 1):\n", 366 | " input_ids, attention_mask, labels = batch\n", 367 | "\n", 368 | " # Move batch tensors to the specified device\n", 369 | " input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)\n", 370 | "\n", 371 | " optimizer.zero_grad()\n", 372 | " outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n", 373 | " loss = outputs.loss\n", 374 | " loss.backward()\n", 375 | " optimizer.step()\n", 376 | "\n", 377 | " # Calculate accuracy\n", 378 | " predictions = torch.argmax(outputs.logits, dim=1)\n", 379 | " correct_predictions += (predictions == labels).sum().item()\n", 380 | " total_samples += labels.size(0)\n", 381 | " print(f\"Total samples: {total_samples}\")\n", 382 | "\n", 383 | " model.save_pretrained(\"fine_tuned_model\")\n", 384 | "\n", 385 | "# trainFromPcapFile(\"/content/sample_data/nmap.pcap\", 16) # Port scanning\n", 386 | "# trainFromPcapFile(\"/content/sample_data/portscan.pcap\", 16) # Port scanning\n", 387 | "# trainFromPcapFile(\"/content/sample_data/hulk.pcap\", 6) # Hulk\n", 388 | "# trainFromPcapFile(\"/content/sample_data/dvwa_sql_injection.pcap.pcapng\", 21, b\"GET /\")\n" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "source": [ 394 | "The `predictingRowsCategory` function reads packets from a pcap file, processes IPv4 and TCP packets, extracts features, tokenizes them using a specified tokenizer, and predicts their class using a pre-trained model. If the predicted class is a non-normal packet, it updates a dictionary with counts for each identified attack type. Prediction details are printed, and the total number of processed packets is tracked. The processed packets' textual representations are stored in the `text_data` list." 395 | ], 396 | "metadata": { 397 | "id": "pVnzwqYZVPbe" 398 | } 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "id": "BqWv5GzHdfxx" 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "text_data = []\n", 409 | "\n", 410 | "def predictingRowsCategory(file_path, filter):\n", 411 | " packets_nbr = 0 # Initialize packet counter\n", 412 | " with PcapReader(file_path) as pcap:\n", 413 | " for pkt in pcap :\n", 414 | " if IP in pkt : # Check for IPv4 packets\n", 415 | " if TCP in pkt:\n", 416 | "\n", 417 | " input_line = processing_packet_conversion(pkt) # Process packet data\n", 418 | " if input_line is not None:\n", 419 | "\n", 420 | " truncated_line = input_line[:1024] # Limit input length\n", 421 | " tokens = tokenizer(truncated_line, return_tensors=\"pt\") # Tokenize input\n", 422 | " outputs = model(**tokens) # Pass tokens through the model\n", 423 | " logits = outputs.logits\n", 424 | " probabilities = logits.softmax(dim=1) # Calculate class probabilities\n", 425 | " predicted_class = torch.argmax(probabilities, dim=1).item() # Get predicted class index\n", 426 | "\n", 427 | " predictedAttack = classes[predicted_class] # Map index to corresponding attack class\n", 428 | "\n", 429 | " if predictedAttack != \"Normal\":\n", 430 | " # Update or add count for non-normal packets in packets_brief dictionary\n", 431 | " if predictedAttack not in packets_brief :\n", 432 | " packets_brief[predictedAttack] = 1\n", 433 | " else :\n", 434 | " packets_brief[predictedAttack] += 1\n", 435 | "\n", 436 | " # Print prediction details\n", 437 | " print(\"Predicted class:\", predicted_class)\n", 438 | " print(\"predicted class is : \", classes[predicted_class])\n", 439 | " print(\"Class probabilities:\", probabilities.tolist())\n", 440 | "\n", 441 | " packets_nbr += 1 # Increment packet counter\n" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "source": [ 447 | "Predictions test" 448 | ], 449 | "metadata": { 450 | "id": "Kjtlpl4wY5B7" 451 | } 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": { 457 | "collapsed": true, 458 | "id": "p0ikQlEVFU50" 459 | }, 460 | "outputs": [], 461 | "source": [ 462 | "predictingRowsCategory(\"/content/sample_data/hulk.pcap\", b\"HTTP\")\n", 463 | "\n", 464 | "import matplotlib.pyplot as plt\n", 465 | "\n", 466 | "keys = list(packets_brief.keys())\n", 467 | "vals = list(packets_brief.values())\n", 468 | "\n", 469 | "plt.bar(keys, vals)\n", 470 | "\n", 471 | "plt.xlabel('Attacks')\n", 472 | "plt.ylabel('Values')\n", 473 | "plt.title('Detected possible attacks')\n", 474 | "plt.show()" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": { 480 | "id": "XwbDUjaHcpF-" 481 | }, 482 | "source": [ 483 | "We specify to Pytorch that we wish to prioritize training on GPU and not on CPU." 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": { 490 | "id": "CO4N3tcOaQGh" 491 | }, 492 | "outputs": [], 493 | "source": [ 494 | "print(torch.cuda.get_device_name(0))\n", 495 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 496 | "model = model.to(device)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "source": [ 502 | "# Initialize an empty list to store textual data.\n", 503 | "text_data = []\n", 504 | "\n", 505 | "# Function for predicting packet categories on GPU and updating briefs.\n", 506 | "def predictingRowsCategoryOnGPU(file_path, filter, debug):\n", 507 | " packets_brief.clear() # Clear the dictionary tracking packet briefs.\n", 508 | "\n", 509 | " packets_nbr = 0 # Initialize packet counter.\n", 510 | " with PcapReader(file_path) as pcap: # Iterate through packets in the pcap file.\n", 511 | " for pkt in pcap:\n", 512 | " if IP in pkt: # Check for IPv4 packets.\n", 513 | " if TCP in pkt: # Ensure the packet is TCP.\n", 514 | "\n", 515 | " # Filter packets based on payload content.\n", 516 | " payload_bytes_to_filter = bytes(pkt.payload)\n", 517 | " if filter in payload_bytes_to_filter:\n", 518 | "\n", 519 | " # Process and truncate packet data.\n", 520 | " input_line = processing_packet_conversion(pkt)\n", 521 | " if input_line is not None:\n", 522 | " truncated_line = input_line[:1024]\n", 523 | "\n", 524 | " # Tokenize the truncated input and move it to the specified device.\n", 525 | " tokens = tokenizer(truncated_line, return_tensors=\"pt\")\n", 526 | " tokens = {key: value.to(device) for key, value in tokens.items()}\n", 527 | "\n", 528 | " # Pass tokens through the pre-trained model for prediction.\n", 529 | " outputs = model(**tokens)\n", 530 | "\n", 531 | " logits = outputs.logits\n", 532 | " probabilities = logits.softmax(dim=1)\n", 533 | " predicted_class = torch.argmax(probabilities, dim=1).item()\n", 534 | "\n", 535 | " predictedAttack = classes[predicted_class]\n", 536 | "\n", 537 | " # Update packet brief dictionary for non-normal packets.\n", 538 | " if predictedAttack != \"Normal\":\n", 539 | " if predictedAttack not in packets_brief:\n", 540 | " packets_brief[predictedAttack] = 1\n", 541 | " else:\n", 542 | " packets_brief[predictedAttack] += 1\n", 543 | "\n", 544 | " # Append truncated line to the textual data list.\n", 545 | " text_data.append(truncated_line)\n", 546 | "\n", 547 | " # Print prediction details when debugging is enabled.\n", 548 | " if debug:\n", 549 | " print(\"Predicted class:\", predicted_class)\n", 550 | " print(\"Predicted class is: \", classes[predicted_class])\n", 551 | " print(\"Class probabilities:\", probabilities.tolist())\n", 552 | "\n", 553 | " packets_nbr += 1 # Increment packet counter.\n" 554 | ], 555 | "metadata": { 556 | "id": "FwgCdHwfZ0ge" 557 | }, 558 | "execution_count": null, 559 | "outputs": [] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "source": [ 564 | " `predictingRowsCategoryOnGPUByGettingRidOfParameters` function processes network packets from a pcap file, filters based on specified criteria, modifies the input by excluding certain tokens, tokenizes the modified input using a pre-trained tokenizer on a GPU, and predicts the packet class using a pre-trained model. If the predicted class is non-normal, it updates a dictionary with counts for each identified attack type. Optionally, it prints prediction details when debugging is enabled. The total number of processed packets is tracked, and the resulting brief is stored in the packets_brief dictionary." 565 | ], 566 | "metadata": { 567 | "id": "D-Gm9QvEVdd-" 568 | } 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": { 574 | "id": "C2cjhdonuGE6" 575 | }, 576 | "outputs": [], 577 | "source": [ 578 | "def predictingRowsCategoryOnGPUByGettingRidOfParameters(file_path, filter, debug, tokens_array):\n", 579 | " packets_brief.clear() # Clear the dictionary tracking packet briefs.\n", 580 | "\n", 581 | " packets_nbr = 0 # Initialize packet counter.\n", 582 | " with PcapReader(file_path) as pcap: # Iterate through packets in the pcap file.\n", 583 | " for pkt in pcap:\n", 584 | " if IP in pkt: # Check for IPv4 packets.\n", 585 | " if TCP in pkt: # Ensure the packet is TCP.\n", 586 | " payload_bytes_to_filter = bytes(pkt.payload)\n", 587 | " if filter in payload_bytes_to_filter: # Apply payload filtering criteria.\n", 588 | "\n", 589 | " # Process and truncate packet data.\n", 590 | " input_line = processing_packet_conversion(pkt)\n", 591 | " if input_line is not None:\n", 592 | " truncated_line = input_line[:1024]\n", 593 | "\n", 594 | " # Remove specified tokens from the truncated line.\n", 595 | " tokens_to_exclude = tokens_array\n", 596 | " tokens_list = truncated_line.split()\n", 597 | " modified_tokens_list = [token for i, token in enumerate(tokens_list) if i not in tokens_to_exclude]\n", 598 | " modified_truncated_line = ' '.join(modified_tokens_list)\n", 599 | "\n", 600 | " # Tokenize the modified input and move it to the specified device.\n", 601 | " tokens = tokenizer(modified_truncated_line, return_tensors=\"pt\")\n", 602 | " tokens = {key: value.to(device) for key, value in tokens.items()}\n", 603 | "\n", 604 | " # Pass tokens through the pre-trained model.\n", 605 | " outputs = model(**tokens)\n", 606 | "\n", 607 | " # Extract prediction details.\n", 608 | " logits = outputs.logits\n", 609 | " probabilities = logits.softmax(dim=1)\n", 610 | " predicted_class = torch.argmax(probabilities, dim=1).item()\n", 611 | " predictedAttack = classes[predicted_class]\n", 612 | "\n", 613 | " # Update packet brief dictionary for non-normal packets.\n", 614 | " if predictedAttack != \"Normal\":\n", 615 | " if predictedAttack not in packets_brief:\n", 616 | " packets_brief[predictedAttack] = 1\n", 617 | " else:\n", 618 | " packets_brief[predictedAttack] += 1\n", 619 | "\n", 620 | " # Print prediction details when debugging is enabled.\n", 621 | " if debug:\n", 622 | " print(\"Predicted class:\", predicted_class)\n", 623 | " print(\"Predicted class is: \", classes[predicted_class])\n", 624 | " print(\"Class probabilities:\", probabilities.tolist())\n", 625 | "\n", 626 | " packets_nbr += 1 # Increment packet counter.\n" 627 | ] 628 | }, 629 | { 630 | "cell_type": "markdown", 631 | "source": [ 632 | "Predictions test on GPU" 633 | ], 634 | "metadata": { 635 | "id": "yFnhWy2zWwd4" 636 | } 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": null, 641 | "metadata": { 642 | "collapsed": true, 643 | "id": "qDSC070Gdvrc" 644 | }, 645 | "outputs": [], 646 | "source": [ 647 | "predictingRowsCategoryOnGPU(\"/content/sample_data/nmap.pcap\", b\"\", False)\n", 648 | "\n", 649 | "keys = list(packets_brief.keys())\n", 650 | "vals = list(packets_brief.values())\n", 651 | "\n", 652 | "plt.bar(keys, vals, color='red', width=0.7)\n", 653 | "\n", 654 | "plt.xlabel('Attacks', weight='bold')\n", 655 | "plt.ylabel('Number of packets', weight='bold')\n", 656 | "plt.title('Detected possible attacks')\n", 657 | "plt.show()" 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "source": [ 663 | "Network-visualisation & graphs creating functions :" 664 | ], 665 | "metadata": { 666 | "id": "u5Yd3mn29Ler" 667 | } 668 | }, 669 | { 670 | "cell_type": "code", 671 | "source": [ 672 | "import matplotlib.pyplot as plt\n", 673 | "import networkx as nx\n", 674 | "from scapy.all import *\n", 675 | "\n", 676 | "def create_network_graph(pcap_file):\n", 677 | " packets = rdpcap(pcap_file)\n", 678 | " G = nx.DiGraph()\n", 679 | " for packet in packets:\n", 680 | " src_ip = packet[IP].src\n", 681 | " dst_ip = packet[IP].dst\n", 682 | " G.add_edge(src_ip, dst_ip)\n", 683 | " return G\n", 684 | "\n", 685 | "def visualize_network_graph(pcap_file_path):\n", 686 | "\n", 687 | " network_graph = create_network_graph(pcap_file_path)\n", 688 | "\n", 689 | " pos = nx.spring_layout(network_graph)\n", 690 | " nx.draw(network_graph, pos, with_labels=True, font_size=8, node_size=1000, node_color='skyblue', font_color='black', font_weight='bold')\n", 691 | " #plt.show()\n", 692 | "\n", 693 | " img_bytes = BytesIO()\n", 694 | " plt.savefig(img_bytes, format='png')\n", 695 | " img_bytes.seek(0)\n", 696 | "\n", 697 | " encoded_image = base64.b64encode(img_bytes.getvalue()).decode('utf-8')\n", 698 | " plt.close()\n", 699 | "\n", 700 | " return encoded_image\n", 701 | "\n", 702 | "def visualize_destination_ports_plot(pcap_file_path, top_n=20):\n", 703 | "\n", 704 | " packets = rdpcap(pcap_file_path)\n", 705 | "\n", 706 | " destination_ports = {}\n", 707 | "\n", 708 | " for packet in packets:\n", 709 | " if IP in packet and TCP in packet:\n", 710 | " dst_ip = packet[IP].dst\n", 711 | " dst_port = packet[TCP].dport\n", 712 | " destination_ports[(dst_ip, dst_port)] = destination_ports.get((dst_ip, dst_port), 0) + 1\n", 713 | " sorted_ports = sorted(destination_ports.items(), key=lambda x: x[1], reverse=True)\n", 714 | "\n", 715 | " plt.figure(figsize=(10, 6))\n", 716 | "\n", 717 | " top_ports = sorted_ports[:top_n]\n", 718 | "\n", 719 | " destinations, counts = zip(*top_ports)\n", 720 | " dst_labels = [f\"{ip}:{port}\" for (ip, port) in destinations]\n", 721 | "\n", 722 | " plt.bar(dst_labels, counts, color='skyblue')\n", 723 | " plt.xlabel('Destination IP and TCP Ports')\n", 724 | " plt.ylabel('Count')\n", 725 | " plt.title(f'Top {top_n} Most Contacted TCP Ports')\n", 726 | " plt.xticks(rotation=45, ha='right')\n", 727 | " plt.tight_layout()\n", 728 | "\n", 729 | " img_bytes = BytesIO()\n", 730 | " plt.savefig(img_bytes, format='png')\n", 731 | " img_bytes.seek(0)\n", 732 | "\n", 733 | " encoded_image = base64.b64encode(img_bytes.getvalue()).decode('utf-8')\n", 734 | " plt.close()\n", 735 | "\n", 736 | " return encoded_image\n" 737 | ], 738 | "metadata": { 739 | "id": "K8onRkXzzgaF" 740 | }, 741 | "execution_count": null, 742 | "outputs": [] 743 | }, 744 | { 745 | "cell_type": "markdown", 746 | "metadata": { 747 | "id": "1zW-mUi9abGd" 748 | }, 749 | "source": [ 750 | "Expose & print local link for the web app (print it before launching the web app !)" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": null, 756 | "metadata": { 757 | "id": "-eR58WgdF8Di" 758 | }, 759 | "outputs": [], 760 | "source": [ 761 | "from google.colab.output import eval_js\n", 762 | "print(eval_js(\"google.colab.kernel.proxyPort(5000)\"))" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "source": [ 768 | "The code below build the whole Flask web app (front & back)." 769 | ], 770 | "metadata": { 771 | "id": "fq58ezPsa7B3" 772 | } 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": null, 777 | "metadata": { 778 | "id": "k5rDAwM1FE02" 779 | }, 780 | "outputs": [], 781 | "source": [ 782 | "app = Flask(__name__, template_folder='/content/sample_data/')\n", 783 | "\n", 784 | "index = \"\"\"\n", 785 | "\n", 786 | "\n", 787 | "\n", 788 | " \n", 789 | " \n", 790 | " PCAP File Processor\n", 791 | " \n", 830 | "\n", 831 | "\n", 832 | "

Malicious PCAP File Analysis 🦈

\n", 833 | "
\n", 834 | " \n", 835 | "
\n", 836 | " \n", 837 | "

\n", 838 | " \n", 839 | " \n", 840 | "

\n", 841 | " \n", 842 | "
\n", 843 | "\n", 844 | "\n", 845 | "\"\"\"\n", 846 | "response = \"\"\"\n", 847 | "\n", 848 | "\n", 849 | "\n", 850 | " \n", 851 | " \n", 852 | " PCAP File Processor\n", 853 | " \n", 909 | "\n", 910 | "\n", 911 | "

Malicious PCAP File Analysis 🦈

\n", 912 | "

{{ alert_text }}

\n", 913 | "
\n", 914 | " {% if graph1 %}\n", 915 | "
\n", 916 | "

Identified Attacks 🚨​​​

\n", 917 | " \"Graph\n", 918 | "
\n", 919 | " {% endif %}\n", 920 | " {% if graph2 %}\n", 921 | "
\n", 922 | "

Protocols 🔎​

\n", 923 | " \"Protocols\n", 924 | "
\n", 925 | " {% endif %}\n", 926 | " \n", 927 | " \n", 928 | " {% if graph3 %}\n", 929 | "
\n", 930 | "

Network Endpoints 🌐​

\n", 931 | " \"Graph\n", 932 | "
\n", 933 | " {% endif %}\n", 934 | " {% if graph4 %}\n", 935 | "
\n", 936 | "

TCP Ports 🛜

\n", 937 | " \"Graph\n", 938 | "
\n", 939 | " {% endif %}\n", 940 | "
\n", 941 | "\n", 942 | "\n", 943 | "\"\"\"\n", 944 | "\n", 945 | "app.config['UPLOAD_FOLDER'] = '/content/sample_data'\n", 946 | "app.config['ALLOWED_EXTENSIONS'] = {'pcap', 'pcapng'}\n", 947 | "\n", 948 | "os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)\n", 949 | "\n", 950 | "def generate_graph(data, title, graph_color, xtext, ytext):\n", 951 | " plt.bar(data.keys(), data.values(), color=graph_color, width=0.7)\n", 952 | " #plt.ylim(0, 50)\n", 953 | " plt.xlabel(xtext, weight='bold')\n", 954 | " plt.xticks(rotation=45, ha='right')\n", 955 | " plt.ylabel(ytext, weight='bold')\n", 956 | " plt.title(title)\n", 957 | "\n", 958 | " img_bytes = BytesIO()\n", 959 | " plt.savefig(img_bytes, format='png')\n", 960 | " img_bytes.seek(0)\n", 961 | "\n", 962 | " # Convert the image to base64 encoding\n", 963 | " encoded_image = base64.b64encode(img_bytes.getvalue()).decode('utf-8')\n", 964 | " plt.close()\n", 965 | "\n", 966 | " return encoded_image\n", 967 | "\n", 968 | "def allowed_file(filename):\n", 969 | " return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']\n", 970 | "\n", 971 | "@app.route(\"/\")\n", 972 | "def home():\n", 973 | " return index\n", 974 | "\n", 975 | "@app.route('/upload', methods=['POST'])\n", 976 | "def upload_file():\n", 977 | "\n", 978 | " packets_brief.clear()\n", 979 | " protocol_counts.clear()\n", 980 | " text_data.clear()\n", 981 | "\n", 982 | " if 'file' not in request.files:\n", 983 | " return redirect(request.url)\n", 984 | "\n", 985 | " file = request.files['file']\n", 986 | "\n", 987 | " filter_value = request.form['filter']\n", 988 | "\n", 989 | "\n", 990 | " # debug_bool = False\n", 991 | " # if filter_value == 'on':\n", 992 | " # debug_bool = True\n", 993 | "\n", 994 | "\n", 995 | " if file.filename == '':\n", 996 | " return redirect(request.url)\n", 997 | "\n", 998 | " if file and allowed_file(file.filename):\n", 999 | " filename = secure_filename(file.filename)\n", 1000 | " file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)\n", 1001 | " file.save(file_path)\n", 1002 | "\n", 1003 | "\n", 1004 | " print(\"FILTER VALUE : \", filter_value)\n", 1005 | " if len(filter_value) > 0 :\n", 1006 | " predictingRowsCategoryOnGPU(file_path, filter_value.encode('utf-8'), False) # Will take care of saving data in packets_brief\n", 1007 | " #predictingRowsCategoryOnGPUByGettingRidOfParameters(file_path, filter_value.encode('utf-8'), debug_bool)\n", 1008 | " else:\n", 1009 | " predictingRowsCategoryOnGPU(file_path, b\"\", False) # Will take care of saving data in packets_brief\n", 1010 | " #predictingRowsCategoryOnGPUByGettingRidOfParameters(file_path, filter_value.encode('utf-8'), debug_bool)\n", 1011 | "\n", 1012 | " # Generate first graph\n", 1013 | " keys1 = list(packets_brief.keys())\n", 1014 | " vals1 = list(packets_brief.values())\n", 1015 | " graph1 = generate_graph(dict(zip(keys1, vals1)), 'Identified Known Attacks​', '#ef6666', \"Attacks\", \"Number of Malicious Packets\")\n", 1016 | "\n", 1017 | " # Generate Second graph\n", 1018 | " keys2 = list(protocol_counts.keys())\n", 1019 | " vals2 = list(protocol_counts.values())\n", 1020 | " graph2 = generate_graph(dict(zip(keys2, vals2)), 'Identified Protocols​', '#341f97', \"Protocols\", \"Number of Packets\")\n", 1021 | "\n", 1022 | " # Generate Third graph\n", 1023 | "\n", 1024 | " graphh3 = visualize_network_graph(file_path)\n", 1025 | "\n", 1026 | " # Generate Fourth graph\n", 1027 | "\n", 1028 | " graph4 = visualize_destination_ports_plot(file_path)\n", 1029 | "\n", 1030 | " if len(packets_brief) > 0 :\n", 1031 | " return render_template_string(response, graph1=graph1, graph2=graph2, graph3=graphh3, graph4=graph4, alert_color=\"#c0392b\", alert_text=f\"{filename} contains malicious network activity !\")\n", 1032 | "\n", 1033 | " return render_template_string(response, graph1=graph1, graph2=graph2, graph3=graphh3, graph4=graph4, alert_color=\"#27ae60\", alert_text=f\"{filename} is clear ! 👌\")\n", 1034 | "\n", 1035 | " # keys = list(packets_brief.keys())\n", 1036 | " # vals = list(packets_brief.values())\n", 1037 | "\n", 1038 | " # plt.bar(keys, vals, color='red', width=0.7)\n", 1039 | "\n", 1040 | " # plt.xlabel('Attacks', weight='bold')\n", 1041 | " # plt.ylabel('Number of packets', weight='bold')\n", 1042 | " # plt.title('Detected possible attacks')\n", 1043 | "\n", 1044 | " # img_bytes = BytesIO()\n", 1045 | " # plt.savefig(img_bytes, format='png')\n", 1046 | " # img_bytes.seek(0)\n", 1047 | "\n", 1048 | " # return send_file(img_bytes, mimetype='image/png')\n", 1049 | "\n", 1050 | "if __name__ == \"__main__\":\n", 1051 | " app.run()" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "source": [ 1057 | "import matplotlib.pyplot as plt\n", 1058 | "from scapy.all import *\n", 1059 | "\n", 1060 | "def create_destination_ports_graph(pcap_file):\n", 1061 | " packets = rdpcap(pcap_file)\n", 1062 | "\n", 1063 | " # Dictionary to store the count of destination ports\n", 1064 | " destination_ports = {}\n", 1065 | "\n", 1066 | " for packet in packets:\n", 1067 | " # Check if the packet has IP and TCP layers\n", 1068 | " if IP in packet and TCP in packet:\n", 1069 | " dst_ip = packet[IP].dst\n", 1070 | " dst_port = packet[TCP].dport\n", 1071 | "\n", 1072 | " # Update the count of destination ports\n", 1073 | " destination_ports[(dst_ip, dst_port)] = destination_ports.get((dst_ip, dst_port), 0) + 1\n", 1074 | "\n", 1075 | " # Sort destination ports based on their count in descending order\n", 1076 | " sorted_ports = sorted(destination_ports.items(), key=lambda x: x[1], reverse=True)\n", 1077 | "\n", 1078 | " return sorted_ports\n", 1079 | "\n", 1080 | "def visualize_destination_ports_plot(sorted_ports, top_n=20):\n", 1081 | " plt.figure(figsize=(10, 6))\n", 1082 | "\n", 1083 | " # Take only the top n destination ports\n", 1084 | " top_ports = sorted_ports[:top_n]\n", 1085 | "\n", 1086 | " destinations, counts = zip(*top_ports)\n", 1087 | " dst_labels = [f\"{ip}:{port}\" for (ip, port) in destinations]\n", 1088 | "\n", 1089 | " plt.bar(dst_labels, counts, color='skyblue')\n", 1090 | " plt.xlabel('Destination IP and TCP Ports')\n", 1091 | " plt.ylabel('Count')\n", 1092 | " plt.title(f'Top {top_n} Most Contacted TCP Ports')\n", 1093 | " plt.xticks(rotation=45, ha='right')\n", 1094 | " plt.tight_layout()\n", 1095 | " plt.show()\n", 1096 | "\n", 1097 | "if __name__ == \"__main__\":\n", 1098 | " pcap_file_path = \"/content/sample_data/hulk.pcap\"\n", 1099 | " sorted_ports = create_destination_ports_graph(pcap_file_path)\n", 1100 | " visualize_destination_ports_plot(sorted_ports, top_n=20)" 1101 | ], 1102 | "metadata": { 1103 | "id": "wXY1Au8D9hIV" 1104 | }, 1105 | "execution_count": null, 1106 | "outputs": [] 1107 | } 1108 | ], 1109 | "metadata": { 1110 | "accelerator": "GPU", 1111 | "colab": { 1112 | "provenance": [], 1113 | "gpuType": "T4" 1114 | }, 1115 | "kernelspec": { 1116 | "display_name": "Python 3", 1117 | "name": "python3" 1118 | }, 1119 | "language_info": { 1120 | "name": "python" 1121 | } 1122 | }, 1123 | "nbformat": 4, 1124 | "nbformat_minor": 0 1125 | } --------------------------------------------------------------------------------