├── requirements.txt ├── .gitignore ├── bccc.jpg ├── NTLFlowLyzer ├── writers │ ├── __init__.py │ ├── strategy.py │ ├── writer.py │ └── csv_writer.py ├── network_flow_capturer │ ├── __init__.py │ ├── packet.py │ ├── flow.py │ └── network_flow_capturer.py ├── __init__.py ├── features │ ├── __init__.py │ ├── feature.py │ ├── count_related.py │ ├── utils.py │ ├── subflow_related.py │ ├── rate_related.py │ ├── bulk_related.py │ ├── IAT_related.py │ ├── time_related.py │ └── flag_related.py ├── config.json ├── config_loader.py ├── __main__.py ├── network_flow_analyzer.py └── feature_extractor.py ├── setup.py ├── LICENSE └── docs └── index.md /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | multipledispatch 3 | dpkt -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | build.log 3 | build 4 | dist 5 | *.csv 6 | *.swp 7 | -------------------------------------------------------------------------------- /bccc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahlashkari/NTLFlowLyzer/HEAD/bccc.jpg -------------------------------------------------------------------------------- /NTLFlowLyzer/writers/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from .strategy import Strategy 4 | from .writer import Writer 5 | from .csv_writer import CSVWriter 6 | -------------------------------------------------------------------------------- /NTLFlowLyzer/network_flow_capturer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from .network_flow_capturer import NetworkFlowCapturer 4 | from .flow import Flow 5 | from .packet import Packet 6 | -------------------------------------------------------------------------------- /NTLFlowLyzer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from . import features 4 | from . import writers 5 | from .network_flow_analyzer import NTLFlowLyzer 6 | from .feature_extractor import FeatureExtractor 7 | from .config_loader import ConfigLoader 8 | -------------------------------------------------------------------------------- /NTLFlowLyzer/writers/strategy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from abc import ABC, abstractmethod 4 | 5 | class Strategy(ABC): 6 | @abstractmethod 7 | def write(self, file_address: str, data: list) -> None: 8 | pass 9 | 10 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from . import utils 4 | from .feature import Feature 5 | from .len_related import * 6 | from .count_related import * 7 | from .time_related import * 8 | from .rate_related import * 9 | from .bulk_related import * 10 | from .flag_related import * 11 | from .IAT_related import * 12 | from .subflow_related import * -------------------------------------------------------------------------------- /NTLFlowLyzer/features/feature.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from abc import ABC, abstractmethod 4 | from ..network_flow_capturer import Flow 5 | 6 | class Feature(ABC): 7 | name: str 8 | @abstractmethod 9 | def extract(self, flow: Flow) -> float: 10 | pass 11 | 12 | def set_floating_point_unit(self, floating_point_unit: str) -> None: 13 | self.floating_point_unit = floating_point_unit 14 | -------------------------------------------------------------------------------- /NTLFlowLyzer/writers/writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from .strategy import Strategy 4 | from .csv_writer import CSVWriter 5 | 6 | class Writer(object): 7 | strategy: Strategy 8 | 9 | def __init__(self, strategy: Strategy = None) -> None: 10 | if strategy is not None: 11 | self.strategy = strategy 12 | else: 13 | self.strategy = CSVWriter() 14 | 15 | def write(self, file_address: str, data: list, writing_mode: str = 'w', 16 | only_headers: bool = False) -> None: 17 | self.strategy.write(file_address, data, writing_mode, only_headers) -------------------------------------------------------------------------------- /NTLFlowLyzer/features/count_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from ..network_flow_capturer import Flow 4 | from .feature import Feature 5 | 6 | 7 | class PacketsCount(Feature): 8 | name = "packets_count" 9 | def extract(self, flow: Flow) -> float: 10 | return len(flow.get_packets()) 11 | 12 | 13 | class FwdPacketsCount(Feature): 14 | name = "fwd_packets_count" 15 | def extract(self, flow: Flow) -> float: 16 | return len(flow.get_forwardpackets()) 17 | 18 | 19 | class BwdPacketsCount(Feature): 20 | name = "bwd_packets_count" 21 | def extract(self, flow: Flow) -> float: 22 | return len(flow.get_backwardpackets()) 23 | -------------------------------------------------------------------------------- /NTLFlowLyzer/writers/csv_writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | from .strategy import Strategy 5 | 6 | 7 | class CSVWriter(Strategy): 8 | def write(self, file_address: str, data: list, writing_mode: str = 'w', 9 | only_headers: bool = False) -> None: 10 | with open(file_address, writing_mode, newline='') as f: 11 | writer = csv.writer(f) 12 | if len(data) == 0: 13 | return 14 | 15 | headers = list(data[0].keys()) 16 | if only_headers: 17 | writer.writerow(headers) 18 | return 19 | 20 | for data_row in data: 21 | row = [] 22 | for header in headers: 23 | row.append(data_row[header]) 24 | writer.writerow(row) 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import setuptools 4 | 5 | with open(file="README.md", mode="r", encoding="utf8") as fh: 6 | long_description = fh.read() 7 | 8 | setuptools.setup( 9 | name="NTLFlowLyzer", 10 | version="0.1.0", 11 | author="Moein Shafi", 12 | author_email="moeinsh@yorku.ca", 13 | description="The Network and Transport Layer Flow Analyzer", 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/ahlashkari/NTLFlowLyzer", 17 | packages=setuptools.find_packages(), 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: York University", 21 | "Operating System :: OS Independent", 22 | ], 23 | python_requires='>=3.6', 24 | entry_points={ 25 | "console_scripts": ["ntlflowlyzer = NTLFlowLyzer.__main__:main"] 26 | }, 27 | ) 28 | -------------------------------------------------------------------------------- /NTLFlowLyzer/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "pcap_file_address": "/home/bccc/input_pcaps/test.pcap", 3 | "output_file_address": "Z:\\Home\\BCCC\\output-of-test.csv", 4 | "batch_address": "Z:\\Home\\BCCC\\input_pcaps", 5 | "batch_address_output": "Z:\\Home\\BCCC\\outputs", 6 | "continues_batch_address": "Z:\\Home\\BCCC\\input_pcaps_folder", 7 | "output_file_address": "Z:\\Home\\BCCC\\output_folder\\my-output.csv", 8 | "continues_pcap_prefix": "2023_12_19.pcap", 9 | "number_of_continues_files": 110, 10 | "label": "Benign", 11 | "number_of_threads": 8, 12 | "feature_extractor_min_flows": 2500, 13 | "writer_min_rows": 8000, 14 | "read_packets_count_value_log_info": 200000, 15 | "check_flows_ending_min_flows": 2500, 16 | "capturer_updating_flows_min_value": 2500, 17 | "max_flow_duration": 3600, 18 | "activity_timeout": 300, 19 | "floating_point_unit": ".4f", 20 | "max_rows_number": 800000, 21 | "features_ignore_list": [] 22 | } 23 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from ..network_flow_capturer import Flow 4 | 5 | def calculate_flow_payload_bytes(flow: Flow): 6 | total_bytes = [packet.get_payloadbytes() for packet in flow.get_packets()] 7 | return sum(total_bytes) 8 | 9 | 10 | def calculate_fwd_flow_payload_bytes(flow: Flow): 11 | total_bytes = [packet.get_payloadbytes() for packet in flow.get_forwardpackets()] 12 | return sum(total_bytes) 13 | 14 | 15 | def calculate_bwd_flow_payload_bytes(flow: Flow): 16 | total_bytes = [packet.get_payloadbytes() for packet in flow.get_backwardpackets()] 17 | return sum(total_bytes) 18 | 19 | 20 | def calculate_IAT(packets: list): 21 | times = [packet.get_timestamp() for packet in packets] 22 | if len(times) > 1: 23 | for i in range(len(times) - 1): 24 | times[i] = times[i + 1] - times[i] 25 | times.pop() 26 | times = [float(t) for t in times] 27 | return times 28 | 29 | 30 | def calculate_flow_duration(flow: Flow): 31 | return float(flow.get_flow_last_seen() - flow.get_flow_start_time()) 32 | 33 | def calculate_flow_header_bytes(packets: list): 34 | header_size = 0 35 | for packet in packets: 36 | header_size += packet.get_header_size() 37 | return header_size 38 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/subflow_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from ..network_flow_capturer import Flow 4 | from .feature import Feature 5 | from . import utils 6 | 7 | 8 | class SubflowFwdPackets(Feature): 9 | name = "subflow_fwd_packets" 10 | def extract(self, flow: Flow) -> float: 11 | if flow.get_subflow_count() <= 0: 12 | return 0 13 | else: 14 | return len(flow.get_forwardpackets()) / flow.get_subflow_count() 15 | 16 | 17 | class SubflowBwdPackets(Feature): 18 | name = "subflow_bwd_packets" 19 | def extract(self, flow: Flow) -> float: 20 | if flow.get_subflow_count() <= 0: 21 | return 0 22 | return len(flow.get_backwardpackets()) / flow.get_subflow_count() 23 | 24 | 25 | 26 | class SubflowFwdBytes(Feature): 27 | name = "subflow_fwd_bytes" 28 | def extract(self, flow: Flow) -> float: 29 | if flow.get_subflow_count() <= 0: 30 | return 0 31 | else: 32 | return utils.calculate_fwd_flow_payload_bytes(flow) / flow.get_subflow_count() 33 | 34 | 35 | class SubflowBwdBytes(Feature): 36 | name = "subflow_bwd_bytes" 37 | def extract(self, flow: Flow) -> float: 38 | if flow.get_subflow_count() <= 0: 39 | return 0 40 | return utils.calculate_fwd_flow_payload_bytes(flow) / flow.get_subflow_count() 41 | 42 | -------------------------------------------------------------------------------- /NTLFlowLyzer/config_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import multiprocessing 5 | 6 | class ConfigLoader: 7 | def __init__(self, config_file_address: str): 8 | self.config_file_address = config_file_address 9 | self.pcap_file_address: str = None 10 | self.output_file_address: str = "./" 11 | self.interface_name: str = "eth0" 12 | self.max_flow_duration: int = 120000 13 | self.activity_timeout: int = 5000 14 | self.protocols: list = [] 15 | self.floating_point_unit: str = ".4f" 16 | self.features_ignore_list: list = [] 17 | self.number_of_threads: int = multiprocessing.cpu_count() 18 | self.label = "Unknown" 19 | self.feature_extractor_min_flows = 4000 20 | self.writer_min_rows = 6000 21 | self.read_packets_count_value_log_info = 10000 22 | self.check_flows_ending_min_flows = 2000 23 | self.capturer_updating_flows_min_value = 2000 24 | self.max_rows_number = 900000 25 | self.batch_address = "" 26 | self.vxlan_ip = "" 27 | self.continues_batch_address = "" 28 | self.continues_pcap_prefix = "" 29 | self.batch_address_output = "" 30 | self.number_of_continues_files = 0 31 | self.base_number_continues_files = 1 32 | self.read_config_file() 33 | 34 | def read_config_file(self): 35 | try: 36 | with open(self.config_file_address) as config_file: 37 | for key, value in json.loads(config_file.read()).items(): 38 | setattr(self, key, value) 39 | if self.pcap_file_address is None and self.batch_address is None and self.continues_batch_address is None: 40 | raise Exception("Please specify the 'pcap_file_address' or 'batch_address' or 'continues_batch_address' in the config file.") 41 | except Exception as error: 42 | print(f">> Error was detected while reading {self.config_file_address}: {str(error)}. "\ 43 | "Default values will be applied.") 44 | exit(-1) 45 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/rate_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from ..network_flow_capturer import Flow 4 | from .feature import Feature 5 | from . import utils 6 | 7 | 8 | class BytesRate(Feature): 9 | name = "bytes_rate" 10 | def extract(self, flow: Flow) -> float: 11 | try: 12 | return utils.calculate_flow_payload_bytes(flow) / utils.calculate_flow_duration(flow) 13 | except ZeroDivisionError: 14 | return 0 15 | 16 | 17 | # TODO: we should calculate the duration for just forward packets, not whole packets, the same is true for others 18 | class FwdBytesRate(Feature): 19 | name = "fwd_bytes_rate" 20 | def extract(self, flow: Flow) -> float: 21 | try: 22 | return utils.calculate_fwd_flow_payload_bytes(flow) / utils.calculate_flow_duration(flow) 23 | except ZeroDivisionError: 24 | return 0 25 | 26 | 27 | class BwdBytesRate(Feature): 28 | name = "bwd_bytes_rate" 29 | def extract(self, flow: Flow) -> float: 30 | try: 31 | return utils.calculate_bwd_flow_payload_bytes(flow) / utils.calculate_flow_duration(flow) 32 | except ZeroDivisionError: 33 | return 0 34 | 35 | 36 | class PacketsRate(Feature): 37 | name = "packets_rate" 38 | def extract(self, flow: Flow) -> float: 39 | try: 40 | return len(flow.get_packets()) / utils.calculate_flow_duration(flow) 41 | except ZeroDivisionError: 42 | return 0 43 | 44 | 45 | class BwdPacketsRate(Feature): 46 | name = "bwd_packets_rate" 47 | def extract(self, flow: Flow) -> float: 48 | try: 49 | return len(flow.get_backwardpackets()) / utils.calculate_flow_duration(flow) 50 | except ZeroDivisionError: 51 | return 0 52 | 53 | 54 | class FwdPacketsRate(Feature): 55 | name = "fwd_packets_rate" 56 | def extract(self, flow: Flow) -> float: 57 | try: 58 | return len(flow.get_forwardpackets()) / utils.calculate_flow_duration(flow) 59 | except ZeroDivisionError: 60 | return 0 61 | 62 | 63 | class DownUpRate(Feature): 64 | name = "down_up_rate" 65 | def extract(self, flow: Flow) -> float: 66 | if len(flow.get_forwardpackets()) > 0: 67 | return len(flow.get_backwardpackets()) / len(flow.get_forwardpackets()) 68 | return 0 69 | -------------------------------------------------------------------------------- /NTLFlowLyzer/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import glob 5 | 6 | from NTLFlowLyzer.config_loader import ConfigLoader 7 | from .network_flow_analyzer import NTLFlowLyzer 8 | 9 | def args_parser() -> argparse.ArgumentParser: 10 | parser = argparse.ArgumentParser(prog='NTLFlowLyzer') 11 | parser.add_argument('-c', '--config-file', action='store', help='Json config file address.') 12 | parser.add_argument('-o', '--online-capturing', action='store_true', 13 | help='Capturing mode. The default mode is offline capturing.') 14 | parser.add_argument('-b', '--batch-mode', action='store_true', 15 | help='Analyze all the files in the given directory. The default is False.') 16 | parser.add_argument('-cb', '--continues-batch-mode', action='store_true', 17 | help='Continues batch mode. Analyze files in the given directory continuously.' 18 | ' Default is False.') 19 | return parser 20 | 21 | 22 | def find_pcap_files(directory): 23 | file_pattern = directory + '/*' 24 | pcap_files = glob.glob(file_pattern) 25 | return pcap_files 26 | 27 | 28 | def main(): 29 | print("You initiated NTLFlowLyzer!") 30 | parsed_arguments = args_parser().parse_args() 31 | config_file_address = "./NTLFlowLyzer/config.json" if parsed_arguments.config_file is None else parsed_arguments.config_file 32 | online_capturing = parsed_arguments.online_capturing 33 | if not parsed_arguments.batch_mode: 34 | config = ConfigLoader(config_file_address) 35 | network_flow_analyzer = NTLFlowLyzer(config, online_capturing, parsed_arguments.continues_batch_mode) 36 | network_flow_analyzer.run() 37 | return 38 | 39 | print(">> Batch mode is on!") 40 | config = ConfigLoader(config_file_address) 41 | batch_address = config.batch_address 42 | batch_address_output = config.batch_address_output 43 | pcap_files = find_pcap_files(batch_address) 44 | print(f">> {len(pcap_files)} number of files detected. Lets go for analyze them!") 45 | for file in pcap_files: 46 | print(100*"#") 47 | output_file_name = file.split('\\')[-1] 48 | config.pcap_file_address = file 49 | config.output_file_address = f"{batch_address_output}\\{output_file_name}.csv" 50 | network_flow_analyzer = NTLFlowLyzer(config, online_capturing, parsed_arguments.continues_batch_mode) 51 | network_flow_analyzer.run() 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /NTLFlowLyzer/network_flow_capturer/packet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import dpkt 4 | import socket 5 | import datetime 6 | from datetime import datetime 7 | 8 | 9 | class Packet(): 10 | def __init__(self, src_ip="", src_port=0, dst_ip="", dst_port=0, protocol=None, flags=0, 11 | timestamp=0, forward=True, length=0, payloadbytes=0, header_size=0, 12 | window_size=0, seq_number=0, ack_number=0): 13 | self.src_ip = src_ip 14 | self.src_port = src_port 15 | self.dst_ip = dst_ip 16 | self.dst_port = dst_port 17 | self.protocol = protocol 18 | self.__tcp_flags = flags 19 | self.timestamp = timestamp 20 | self.forward = forward 21 | self.length = length 22 | self.payloadbytes = payloadbytes 23 | self.header_size = header_size 24 | self.__segment_size = self.header_size + self.payloadbytes 25 | self.window_size = window_size 26 | self.seq_number = seq_number 27 | self.ack_number = ack_number 28 | 29 | def __len__(self): 30 | return self.get_length 31 | 32 | def __lt__(self, o: object): 33 | return (self.timestamp <= o.get_timestamp()) 34 | 35 | 36 | def get_src_ip(self): 37 | return self.src_ip 38 | 39 | def get_dst_ip(self): 40 | return self.dst_ip 41 | 42 | def get_src_port(self): 43 | return self.src_port 44 | 45 | def get_dst_port(self): 46 | return self.dst_port 47 | 48 | def get_protocol(self): 49 | return self.protocol 50 | 51 | def has_flagFIN(self): 52 | return (self.__tcp_flags & dpkt.tcp.TH_FIN) 53 | 54 | def has_flagPSH(self): 55 | return (self.__tcp_flags & dpkt.tcp.TH_PUSH) 56 | 57 | def has_flagURG(self): 58 | return (self.__tcp_flags & dpkt.tcp.TH_URG) 59 | 60 | def has_flagECE(self): 61 | return (self.__tcp_flags & dpkt.tcp.TH_ECE) 62 | 63 | def has_flagSYN(self): 64 | return (self.__tcp_flags & dpkt.tcp.TH_SYN) 65 | 66 | def has_flagACK(self): 67 | return (self.__tcp_flags & dpkt.tcp.TH_ACK) 68 | 69 | def has_flagCWR(self): 70 | return (self.__tcp_flags & dpkt.tcp.TH_CWR) 71 | 72 | def has_flagRST(self): 73 | return (self.__tcp_flags & dpkt.tcp.TH_RST) 74 | 75 | def get_seq_number(self) -> int: 76 | return self.seq_number 77 | 78 | def get_ack_number(self) -> int: 79 | return self.ack_number 80 | 81 | def is_forward(self): 82 | return self.forward 83 | 84 | def get_timestamp(self): 85 | return self.timestamp 86 | 87 | def get_length(self): 88 | return self.length 89 | 90 | def get_payloadbytes(self): 91 | return self.payloadbytes 92 | 93 | def get_header_size(self): 94 | return self.header_size 95 | 96 | def get_window_size(self): 97 | return self.window_size 98 | 99 | def get_segment_size(self): 100 | return self.__segment_size -------------------------------------------------------------------------------- /NTLFlowLyzer/features/bulk_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from ..network_flow_capturer import Flow 4 | from .feature import Feature 5 | 6 | 7 | class AvgFwdBytesPerBulk(Feature): 8 | name = "avg_fwd_bytes_per_bulk" 9 | def extract(self, flow: Flow) -> float: 10 | if flow.get_fBulkStateCount() != 0: 11 | return float(flow.get_fBulkSizeTotal() / flow.get_fBulkStateCount()) 12 | return 0 13 | 14 | 15 | class AvgFwdPacketsPerBulk(Feature): 16 | name='avg_fwd_packets_per_bulk' 17 | def extract(self, flow: Flow) -> float: 18 | if flow.get_fBulkStateCount() != 0: 19 | return float(flow.get_fBulkPacketCount() / flow.get_fBulkStateCount()) 20 | return 0 21 | 22 | 23 | class AvgFwdBulkRate(Feature): 24 | name = "avg_fwd_bulk_rate" 25 | def extract(self, flow: Flow) -> float: 26 | if flow.get_fBulkDuration() != 0: 27 | return float(flow.get_fBulkSizeTotal() / flow.get_fBulkDuration()) 28 | return 0 29 | 30 | 31 | class AvgBwdBytesPerBulk(Feature): 32 | name = "avg_bwd_bytes_per_bulk" 33 | def extract(self, flow: Flow) -> float: 34 | if flow.bbulkStateCount != 0: 35 | return float(flow.get_bBulkSizeTotal() / flow.bbulkStateCount) 36 | return 0 37 | 38 | 39 | class AvgBwdPacketsPerBulk(Feature): 40 | name = "avg_bwd_packets_bulk_rate" 41 | def extract(self, flow: Flow) -> float: 42 | if flow.get_bBulkStateCount() != 0: 43 | return float(flow.get_bBulkPacketCount() / flow.get_bBulkStateCount()) 44 | return 0 45 | 46 | 47 | class AvgBwdBulkRate(Feature): 48 | name = "avg_bwd_bulk_rate" 49 | def extract(self, flow: Flow) -> float: 50 | if flow.get_bBulkDuration() != 0: 51 | return float(flow.get_bBulkSizeTotal() / flow.get_bBulkDuration()) 52 | return 0 53 | 54 | 55 | class FwdBulkStateCount(Feature): 56 | name = "fwd_bulk_state_count" 57 | def extract(self, flow: Flow) -> float: 58 | return flow.fbulkStateCount 59 | 60 | 61 | class FwdBulkSizeTotal(Feature): 62 | name = "fwd_bulk_total_size" 63 | def extract(self, flow: Flow) -> float: 64 | return flow.fbulkSizeTotal 65 | 66 | 67 | class FwdBulkPacketCount(Feature): 68 | name = "fwd_bulk_per_packet" 69 | def extract(self, flow: Flow) -> float: 70 | return flow.fbulkPacketCount 71 | 72 | 73 | class FwdBulkDuration(Feature): 74 | name = "fwd_bulk_duration" 75 | def extract(self, flow: Flow) -> float: 76 | return flow.fbulkDuration 77 | 78 | 79 | class BwdBulkStateCount(Feature): 80 | name = "bwd_bulk_state_count" 81 | def extract(self, flow: Flow) -> float: 82 | return flow.bbulkStateCount 83 | 84 | 85 | class BwdBulkSizeTotal(Feature): 86 | name = "bwd_bulk_total_size" 87 | def extract(self, flow: Flow) -> float: 88 | return flow.bbulkSizeTotal 89 | 90 | 91 | class BwdBulkPacketCount(Feature): 92 | name = "bwd_bulk_per_packet" 93 | def extract(self, flow: Flow) -> float: 94 | return flow.bbulkPacketCount 95 | 96 | 97 | class BwdBulkDuration(Feature): 98 | name ="bwd_bulk_duration" 99 | def extract(self, flow: Flow) -> float: 100 | return flow.bbulkDuration 101 | -------------------------------------------------------------------------------- /NTLFlowLyzer/network_flow_analyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import dpkt 4 | import socket 5 | import multiprocessing 6 | import warnings 7 | from collections import defaultdict, Counter 8 | from multiprocessing import Process, Manager, Pool 9 | from .network_flow_capturer import NetworkFlowCapturer 10 | from .feature_extractor import FeatureExtractor 11 | from .writers import Writer, CSVWriter 12 | from .config_loader import ConfigLoader 13 | 14 | from dpkt import ethernet 15 | 16 | 17 | class NTLFlowLyzer(object): 18 | def __init__(self, config: ConfigLoader, online_capturing: bool, continues_batch_mode: bool): 19 | self.__config = config 20 | self.__continues_batch_mode = continues_batch_mode 21 | warnings.filterwarnings("ignore") 22 | 23 | def run(self): 24 | with warnings.catch_warnings(): 25 | warnings.simplefilter("ignore") 26 | print(">> Analyzing the", self.__config.pcap_file_address, "...") 27 | f = open(self.__config.pcap_file_address, 'rb') 28 | pcap = dpkt.pcap.Reader(f) 29 | packets_temp = [1 for ts, buf in pcap] 30 | total_packets = len(packets_temp) 31 | print(f">> The input PCAP file contains {total_packets} packets.") 32 | 33 | with Manager() as manager: 34 | self.__flows = manager.list() 35 | self.__data = manager.list() 36 | number_of_writer_threads = 1 37 | number_of_required_threads = 3 38 | number_of_extractor_threads = self.__config.number_of_threads - number_of_writer_threads 39 | if self.__config.number_of_threads < number_of_required_threads: 40 | print(">> At least 3 threads are required. " 41 | "There should be one for the capturer, one for the writer, " 42 | "and one or more for the feature extractor." 43 | "\nWe set the number of threads based on your CPU cores.") 44 | number_of_extractor_threads = multiprocessing.cpu_count() - number_of_writer_threads 45 | if multiprocessing.cpu_count() < number_of_required_threads: 46 | number_of_extractor_threads = number_of_required_threads - number_of_writer_threads 47 | 48 | self.__capturer_thread_finish = manager.Value('i', False) 49 | self.__extractor_thread_finish = manager.Value('i', False) 50 | self.__writed_rows = manager.Value('i', 0) 51 | self.__output_file_index = manager.Value('i', 1) 52 | 53 | self.__data_lock = manager.Lock() 54 | self.__flows_lock = manager.Lock() 55 | self.__feature_extractor_watchdog_lock = manager.Lock() 56 | self.__writed_rows_lock = manager.Lock() 57 | self.__output_file_index_lock = manager.Lock() 58 | 59 | capturer = NetworkFlowCapturer( 60 | max_flow_duration=self.__config.max_flow_duration, 61 | activity_timeout=self.__config.activity_timeout, 62 | check_flows_ending_min_flows=self.__config.check_flows_ending_min_flows, 63 | capturer_updating_flows_min_value=self.__config.capturer_updating_flows_min_value, 64 | read_packets_count_value_log_info=self.__config.read_packets_count_value_log_info, 65 | vxlan_ip=self.__config.vxlan_ip, 66 | continues_batch_address=self.__config.continues_batch_address, 67 | continues_pcap_prefix=self.__config.continues_pcap_prefix, 68 | number_of_continues_files=self.__config.number_of_continues_files, 69 | continues_batch_mode=self.__continues_batch_mode, 70 | base_number_continues_files=self.__config.base_number_continues_files) 71 | writer_thread = Process(target=self.writer) 72 | writer_thread.start() 73 | with Pool(processes=number_of_extractor_threads) as pool: 74 | pool.starmap_async(capturer.capture, 75 | [(self.__config.pcap_file_address, self.__flows, 76 | self.__flows_lock, self.__capturer_thread_finish,)]) 77 | self.feature_extractor(pool) 78 | pool.close() 79 | pool.join() 80 | with self.__feature_extractor_watchdog_lock: 81 | self.__extractor_thread_finish.set(True) 82 | 83 | writer_thread.join() 84 | print(">> Results are ready!") 85 | 86 | def feature_extractor(self, pool: Pool): 87 | feature_extractor = FeatureExtractor(self.__config.floating_point_unit) 88 | while 1: 89 | if len(self.__flows) >= self.__config.feature_extractor_min_flows: 90 | temp_flows = [] 91 | with self.__flows_lock: 92 | temp_flows.extend(self.__flows) 93 | self.__flows[:] = [] 94 | print(f">> Extracting features of {len(temp_flows)} number of flows...") 95 | pool.starmap_async(feature_extractor.execute, 96 | [(self.__data, self.__data_lock, temp_flows, 97 | self.__config.features_ignore_list, self.__config.label)]) 98 | del temp_flows 99 | if self.__capturer_thread_finish.get(): 100 | if len(self.__flows) == 0: 101 | return 102 | 103 | temp_flows = [] 104 | with self.__flows_lock: 105 | temp_flows.extend(self.__flows) 106 | self.__flows[:] = [] 107 | print(f">> Extracting features of the last {len(temp_flows)} number of flows...") 108 | pool.starmap_async(feature_extractor.execute, 109 | [(self.__data, self.__data_lock, temp_flows, 110 | self.__config.features_ignore_list, self.__config.label)]) 111 | del temp_flows 112 | 113 | 114 | def writer(self): 115 | writer = Writer(CSVWriter()) 116 | header_writing_mode = 'w' 117 | data_writing_mode = 'a+' 118 | file_address = self.__config.output_file_address 119 | write_headers = True 120 | while 1: 121 | if len(self.__data) >= self.__config.writer_min_rows: 122 | with self.__writed_rows_lock and self.__output_file_index_lock: 123 | if self.__writed_rows.get() > self.__config.max_rows_number: 124 | new_file_address = self.__config.output_file_address + str(self.__output_file_index.get()) 125 | print(f">> {file_address} has reached its maximum number of rows.") 126 | print(f">> The {file_address} file will be closed and other rows" 127 | f" will be written in the {new_file_address}.") 128 | file_address = new_file_address 129 | self.__output_file_index.set(self.__output_file_index.get() + 1) 130 | write_headers = True 131 | self.__writed_rows.set(0) 132 | if write_headers: 133 | writer.write(file_address, self.__data, header_writing_mode, only_headers=True) 134 | write_headers = False 135 | temp_data = [] 136 | with self.__data_lock: 137 | temp_data.extend(self.__data) 138 | self.__data[:] = [] 139 | print(f">> Writing {len(temp_data)} flows with extracted features...") 140 | writer.write(file_address, temp_data, data_writing_mode) 141 | with self.__writed_rows_lock: 142 | self.__writed_rows.set(self.__writed_rows.get() + len(temp_data)) 143 | del temp_data 144 | with self.__feature_extractor_watchdog_lock: 145 | if self.__extractor_thread_finish.get(): 146 | print(">> Extracting finished, lets go for final writing...") 147 | temp_data = [] 148 | with self.__data_lock: 149 | temp_data.extend(self.__data) 150 | self.__data[:] = [] 151 | print(f">> Writing the last {len(temp_data)} flows with extracted features...") 152 | 153 | if write_headers: 154 | writer.write(file_address, temp_data, header_writing_mode, only_headers=True) 155 | write_headers = False 156 | 157 | if len(temp_data) > 0: 158 | writer.write(file_address, temp_data, data_writing_mode) 159 | if len(self.__data) == 0: 160 | print(">> Writing finished, lets wrapp up!") 161 | return 0 162 | -------------------------------------------------------------------------------- /NTLFlowLyzer/network_flow_capturer/flow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | from datetime import datetime 5 | from typing import List 6 | from .packet import Packet 7 | 8 | 9 | class Flow(object): 10 | def __init__(self, packet, activity_timeout): 11 | self.src_ip = packet.get_src_ip() 12 | self.dst_ip = packet.get_dst_ip() 13 | self.src_port = packet.get_src_port() 14 | self.dst_port = packet.get_dst_port() 15 | self.protocol = packet.get_protocol() 16 | self.__activity_timeout = activity_timeout 17 | self.flow_start_time = float(packet.get_timestamp()) 18 | self.flow_last_seen = float(packet.get_timestamp()) 19 | self.packets = [] 20 | self.sflastts = -1 21 | self.sfcount = 0 22 | self.flow_active = [] 23 | self.flow_idle = [] 24 | self.start_active_time = float(packet.get_timestamp()) 25 | self.end_active_time = float(packet.get_timestamp()) 26 | 27 | self.__number_of_fwd_fin_flags = 0 28 | self.__number_of_bwd_fin_flags = 0 29 | 30 | self.__has_rst_flag = False 31 | 32 | self.fbulkDuration = 0 33 | self.fbulkPacketCount = 0 34 | self.fbulkSizeTotal = 0 35 | self.fbulkStateCount = 0 36 | self.fbulkPacketCountHelper = 0 37 | self.fbulkStartHelper = 0 38 | self.fbulkSizeHelper = 0 39 | self.flastBulkTS = 0 40 | self.bbulkDuration = 0 41 | self.bbulkPacketCount = 0 42 | self.bbulkSizeTotal = 0 43 | self.bbulkStateCount = 0 44 | self.bbulkPacketCountHelper = 0 45 | self.bbulkStartHelper = 0 46 | self.bbulkSizeHelper = 0 47 | self.blastBulkTS = 0 48 | 49 | def __str__(self): 50 | return "_".join([str(self.src_ip), str(self.src_port), str(self.dst_ip), str(self.dst_port), 51 | str(self.protocol), str(datetime.fromtimestamp(float(self.flow_start_time)))]) 52 | 53 | def get_src_ip(self): 54 | return self.src_ip 55 | 56 | def get_dst_ip(self): 57 | return self.dst_ip 58 | 59 | def get_src_port(self): 60 | return self.src_port 61 | 62 | def get_dst_port(self): 63 | return self.dst_port 64 | 65 | def get_protocol(self): 66 | return self.protocol 67 | 68 | def get_packets(self) -> List[Packet]: 69 | return self.packets 70 | 71 | def get_flow_start_time(self): 72 | return self.flow_start_time 73 | 74 | def get_flow_last_seen(self): 75 | return self.packets[-1].get_timestamp() 76 | 77 | def get_forwardpackets(self) -> List[Packet]: 78 | return [packet for packet in self.packets if packet.is_forward()] 79 | 80 | def get_backwardpackets(self) -> List[Packet]: 81 | return [packet for packet in self.packets if not packet.is_forward()] 82 | 83 | def get_timestamp(self): 84 | return self.flow_start_time 85 | 86 | def total_packets_payloadbytes(self): 87 | sum_payloads=0 88 | for packet in self.packets: 89 | sum_payloads += packet.get_payloadbytes() 90 | return sum_payloads 91 | 92 | def update_subflow(self, packet_time): 93 | if self.sflastts == -1: 94 | self.sflastts = packet_time 95 | if packet_time - self.sflastts > 1: 96 | self.sfcount += 1 97 | self.sflastts = packet_time 98 | 99 | def get_subflow_count(self): 100 | return self.sfcount 101 | 102 | def updateFlowBulk(self, packet): 103 | if packet.is_forward(): 104 | self.updateForwardBulk(packet, self.blastBulkTS) 105 | else: 106 | self.updateBackwardBulk(packet, self.flastBulkTS) 107 | 108 | def updateForwardBulk(self, packet, tsOflastBulkInOther): 109 | size = packet.get_payloadbytes() 110 | if tsOflastBulkInOther > self.fbulkStartHelper: 111 | self.fbulkStartHelper=0 112 | if size <= 0: 113 | return 114 | 115 | if self.fbulkStartHelper == 0: 116 | self.fbulkStartHelper = float(packet.get_timestamp()) 117 | self.fbulkPacketCountHelper = 1 118 | self.fbulkSizeHelper = size 119 | self.flastBulkTS = float(packet.get_timestamp()) 120 | # Possible bulk 121 | else: 122 | if (float(packet.get_timestamp()) - self.flastBulkTS) > 1: 123 | self.fbulkStartHelper = float(packet.get_timestamp()) 124 | self.flastBulkTS = float(packet.get_timestamp()) 125 | self.fbulkPacketCountHelper = 1 126 | self.fbulkSizeHelper = size 127 | else: # Add to bulk 128 | self.fbulkPacketCountHelper += 1 129 | self.fbulkSizeHelper += size 130 | # New bulk 131 | if self.fbulkPacketCountHelper == 4: 132 | self.fbulkStateCount += 1 133 | self.fbulkPacketCount += self.fbulkPacketCountHelper 134 | self.fbulkSizeTotal += self.fbulkSizeHelper 135 | self.fbulkDuration += float(packet.get_timestamp())- self.fbulkStartHelper 136 | else: 137 | if self.fbulkPacketCountHelper > 4: 138 | self.fbulkPacketCount += 1 139 | self.fbulkSizeTotal += size 140 | self.fbulkDuration += float(packet.get_timestamp()) - self.flastBulkTS 141 | self.flastBulkTS = float(packet.get_timestamp()) 142 | 143 | def updateBackwardBulk(self, packet, tsOflastBulkInOther): 144 | size = packet.get_payloadbytes() 145 | if tsOflastBulkInOther > self.bbulkStartHelper: 146 | self.bbulkStartHelper = 0 147 | if size <= 0: 148 | return 149 | 150 | if self.bbulkStartHelper == 0: 151 | self.bbulkStartHelper = float(packet.get_timestamp()) 152 | self.bbulkPacketCountHelper = 1 153 | self.bbulkSizeHelper = size 154 | self.blastBulkTS = float(packet.get_timestamp()) 155 | # Possible bulk 156 | else: 157 | if (float(packet.get_timestamp()) - self.flastBulkTS) > 1: 158 | self.bbulkStartHelper = float(packet.get_timestamp()) 159 | self.bblastBulkTS = float(packet.get_timestamp()) 160 | self.bbulkPacketCountHelper = 1 161 | self.bbulkSizeHelper = size 162 | else: # Add to bulk 163 | self.bbulkPacketCountHelper += 1 164 | self.bbulkSizeHelper += size 165 | # New bulk 166 | if self.bbulkPacketCountHelper == 4: 167 | self.bbulkStateCount += 1 168 | self.bbulkPacketCount += self.bbulkPacketCountHelper 169 | self.bbulkSizeTotal += self.bbulkSizeHelper 170 | self.bbulkDuration += float(packet.get_timestamp()) - self.bbulkStartHelper 171 | else: 172 | if self.bbulkPacketCountHelper > 4: 173 | self.bbulkPacketCount += 1 174 | self.bbulkSizeTotal += size 175 | self.bbulkDuration += float(packet.get_timestamp()) - self.blastBulkTS 176 | self.blastBulkTS = float(packet.get_timestamp()) 177 | 178 | def get_fBulkStateCount(self): 179 | return self.fbulkStateCount 180 | 181 | def get_fBulkSizeTotal(self): 182 | return self.fbulkSizeTotal 183 | 184 | def get_fBulkPacketCount(self): 185 | return self.fbulkPacketCount 186 | 187 | def get_fBulkDuration(self): 188 | return self.fbulkDuration 189 | 190 | def get_bBulkStateCount(self): 191 | return self.bbulkStateCount 192 | 193 | def get_bBulkSizeTotal(self): 194 | return self.bbulkSizeTotal 195 | 196 | def get_bBulkPacketCount(self): 197 | return self.bbulkPacketCount 198 | 199 | def get_bBulkDuration(self): 200 | return self.bbulkDuration 201 | 202 | def update_active_idle_time(self, current_time, active_thr=100): 203 | if current_time - self.end_active_time > active_thr: 204 | if self.end_active_time - self.start_active_time > 0: 205 | self.flow_active.append(self.start_active_time - self.end_active_time) 206 | self.flow_idle.append(current_time - self.end_active_time) 207 | self.start_active_time = current_time 208 | self.end_active_time = current_time 209 | 210 | def get_flow_idle(self): 211 | return self.flow_idle 212 | 213 | def get_flow_active(self): 214 | return self.flow_active 215 | 216 | def add_packet(self, packet): 217 | time = float(packet.get_timestamp()) 218 | self.packets.append(packet) 219 | self.flow_last_seen = time 220 | self.update_active_idle_time(time) 221 | self.update_subflow(time) 222 | self.updateFlowBulk(packet) 223 | 224 | if packet.has_flagFIN(): 225 | if packet.is_forward(): 226 | self.__number_of_fwd_fin_flags += 1 227 | else: 228 | self.__number_of_bwd_fin_flags += 1 229 | 230 | if packet.has_flagRST(): 231 | self.__has_rst_flag = True 232 | 233 | def has_two_FIN_flags(self): 234 | if self.__number_of_fwd_fin_flags >= 1 and self.__number_of_bwd_fin_flags >= 1: 235 | return True 236 | return False 237 | 238 | def has_flagRST(self): 239 | return self.__has_rst_flag 240 | 241 | def actvity_timeout(self, packet: Packet): 242 | active_time = float(packet.get_timestamp()) - float(self.get_flow_last_seen()) 243 | if active_time > self.__activity_timeout: 244 | return True 245 | return False 246 | 247 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/IAT_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import statistics 4 | 5 | from scipy import stats 6 | from .feature import Feature 7 | from ..network_flow_capturer import Flow 8 | from . import utils 9 | 10 | 11 | class PacketsIATMean(Feature): 12 | name = "packets_IAT_mean" 13 | def extract(self, flow: Flow) -> float: 14 | times = utils.calculate_IAT(flow.get_packets()) 15 | if times: 16 | return format(statistics.mean(times), self.floating_point_unit) 17 | return 0 18 | 19 | 20 | class PacketsIATStd(Feature): 21 | name = "packet_IAT_std" 22 | def extract(self, flow: Flow) -> float: 23 | times = utils.calculate_IAT(flow.get_packets()) 24 | try: 25 | return format(statistics.pstdev(times), self.floating_point_unit) 26 | except (ZeroDivisionError, ValueError): 27 | return 0 28 | 29 | 30 | class PacketsIATMax(Feature): 31 | name = "packet_IAT_max" 32 | def extract(self, flow: Flow) -> float: 33 | times = utils.calculate_IAT(flow.get_packets()) 34 | if times: 35 | return max(times) 36 | return 0 37 | 38 | 39 | class PacketsIATMin(Feature): 40 | name = "packet_IAT_min" 41 | def extract(self, flow: Flow) -> float: 42 | times = utils.calculate_IAT(flow.get_packets()) 43 | if times: 44 | return min(times) 45 | return 0 46 | 47 | 48 | class PacketsIATSum(Feature): 49 | name = "packet_IAT_total" 50 | def extract(self, flow: Flow) -> float: 51 | times = utils.calculate_IAT(flow.get_packets()) 52 | if times: 53 | return sum(times) 54 | return 0 55 | 56 | 57 | class PacketsIATMedian(Feature): 58 | name = "packets_IAT_median" 59 | def extract(self, flow: Flow) -> float: 60 | times = utils.calculate_IAT(flow.get_packets()) 61 | try: 62 | return format(statistics.median(times), self.floating_point_unit) 63 | except (ZeroDivisionError, ValueError): 64 | return 0 65 | 66 | 67 | class PacketsIATSkewness(Feature): 68 | name = "packets_IAT_skewness" 69 | def extract(self, flow: Flow) -> float: 70 | times = utils.calculate_IAT(flow.get_packets()) 71 | try: 72 | return format(float(stats.skew(times)), self.floating_point_unit) 73 | except (ZeroDivisionError, ValueError): 74 | return 0 75 | 76 | 77 | class PacketsIATCoV(Feature): 78 | name = "packets_IAT_cov" 79 | def extract(self, flow: Flow) -> float: 80 | times = utils.calculate_IAT(flow.get_packets()) 81 | try: 82 | return format(stats.variation(times), self.floating_point_unit) 83 | except (ZeroDivisionError, ValueError): 84 | return 0 85 | 86 | 87 | class PacketsIATMode(Feature): 88 | name = "packets_IAT_mode" 89 | def extract(self, flow: Flow) -> float: 90 | times = utils.calculate_IAT(flow.get_packets()) 91 | try: 92 | if len(times) > 0: 93 | return format(float(stats.mode(times)[0]), self.floating_point_unit) 94 | return 0 95 | except (ZeroDivisionError, ValueError): 96 | return 0 97 | 98 | 99 | class PacketsIATVariance(Feature): 100 | name = "packets_IAT_variance" 101 | def extract(self, flow: Flow) -> float: 102 | times = utils.calculate_IAT(flow.get_packets()) 103 | try: 104 | return format(statistics.pvariance(times), self.floating_point_unit) 105 | except (ZeroDivisionError, ValueError): 106 | return 0 107 | 108 | 109 | class FwdPacketsIATMean(Feature): 110 | name = "fwd_packets_IAT_mean" 111 | def extract(self, flow: Flow) -> float: 112 | times = utils.calculate_IAT(flow.get_forwardpackets()) 113 | if times: 114 | return format(statistics.mean(times), self.floating_point_unit) 115 | return 0 116 | 117 | 118 | class FwdPacketsIATStd(Feature): 119 | name = "fwd_packets_IAT_std" 120 | def extract(self, flow: Flow) -> float: 121 | times = utils.calculate_IAT(flow.get_forwardpackets()) 122 | try: 123 | return format(statistics.pstdev(times), self.floating_point_unit) 124 | except (ZeroDivisionError, ValueError): 125 | return 0 126 | 127 | 128 | class FwdPacketsIATMax(Feature): 129 | name = "fwd_packets_IAT_max" 130 | def extract(self, flow: Flow) -> float: 131 | times = utils.calculate_IAT(flow.get_forwardpackets()) 132 | if times: 133 | return max(times) 134 | return 0 135 | 136 | 137 | class FwdPacketsIATMin(Feature): 138 | name = "fwd_packets_IAT_min" 139 | def extract(self, flow: Flow) -> float: 140 | times = utils.calculate_IAT(flow.get_forwardpackets()) 141 | if times: 142 | return min(times) 143 | return 0 144 | 145 | 146 | class FwdPacketsIATSum(Feature): 147 | name = "fwd_packets_IAT_total" 148 | def extract(self, flow: Flow) -> float: 149 | times = utils.calculate_IAT(flow.get_forwardpackets()) 150 | if times: 151 | return sum(times) 152 | return 0 153 | 154 | 155 | class FwdPacketsIATMedian(Feature): 156 | name = "fwd_packets_IAT_median" 157 | def extract(self, flow: Flow) -> float: 158 | times = utils.calculate_IAT(flow.get_forwardpackets()) 159 | try: 160 | return format(statistics.median(times), self.floating_point_unit) 161 | except (ZeroDivisionError, ValueError): 162 | return 0 163 | 164 | 165 | class FwdPacketsIATSkewness(Feature): 166 | name = "fwd_packets_IAT_skewness" 167 | def extract(self, flow: Flow) -> float: 168 | times = utils.calculate_IAT(flow.get_forwardpackets()) 169 | try: 170 | return format(float(stats.skew(times)), self.floating_point_unit) 171 | except (ZeroDivisionError, ValueError): 172 | return 0 173 | 174 | 175 | class FwdPacketsIATCoV(Feature): 176 | name = "fwd_packets_IAT_cov" 177 | def extract(self, flow: Flow) -> float: 178 | times = utils.calculate_IAT(flow.get_forwardpackets()) 179 | try: 180 | return format(stats.variation(times), self.floating_point_unit) 181 | except (ZeroDivisionError, ValueError): 182 | return 0 183 | 184 | 185 | class FwdPacketsIATMode(Feature): 186 | name = "fwd_packets_IAT_mode" 187 | def extract(self, flow: Flow) -> float: 188 | times = utils.calculate_IAT(flow.get_forwardpackets()) 189 | try: 190 | if len(times) > 0: 191 | return format(float(stats.mode(times)[0]), self.floating_point_unit) 192 | return 0 193 | except (ZeroDivisionError, ValueError): 194 | return 0 195 | 196 | 197 | class FwdPacketsIATVariance(Feature): 198 | name = "fwd_packets_IAT_variance" 199 | def extract(self, flow: Flow) -> float: 200 | times = utils.calculate_IAT(flow.get_forwardpackets()) 201 | try: 202 | return format(statistics.pvariance(times), self.floating_point_unit) 203 | except (ZeroDivisionError, ValueError): 204 | return 0 205 | 206 | 207 | class BwdPacketsIATMean(Feature): 208 | name = "bwd_packets_IAT_mean" 209 | def extract(self, flow: Flow) -> float: 210 | times = utils.calculate_IAT(flow.get_backwardpackets()) 211 | if times: 212 | return format(statistics.mean(times), self.floating_point_unit) 213 | return 0 214 | 215 | 216 | class BwdPacketsIATStd(Feature): 217 | name = "bwd_packets_IAT_std" 218 | def extract(self, flow: Flow) -> float: 219 | times = utils.calculate_IAT(flow.get_backwardpackets()) 220 | try: 221 | return format(statistics.pstdev(times), self.floating_point_unit) 222 | except (ZeroDivisionError, ValueError): 223 | return 0 224 | 225 | 226 | class BwdPacketsIATMax(Feature): 227 | name = "bwd_packets_IAT_max" 228 | def extract(self, flow: Flow) -> float: 229 | times = utils.calculate_IAT(flow.get_backwardpackets()) 230 | if times: 231 | return max(times) 232 | return 0 233 | 234 | 235 | class BwdPacketsIATMin(Feature): 236 | name = "bwd_packets_IAT_min" 237 | def extract(self, flow: Flow) -> float: 238 | times = utils.calculate_IAT(flow.get_backwardpackets()) 239 | if times: 240 | return min(times) 241 | return 0 242 | 243 | 244 | class BwdPacketsIATSum(Feature): 245 | name = "bwd_packets_IAT_total" 246 | def extract(self, flow: Flow) -> float: 247 | times = utils.calculate_IAT(flow.get_backwardpackets()) 248 | if times: 249 | return sum(times) 250 | return 0 251 | 252 | 253 | class BwdPacketsIATMedian(Feature): 254 | name = "bwd_packets_IAT_median" 255 | def extract(self, flow: Flow) -> float: 256 | times = utils.calculate_IAT(flow.get_backwardpackets()) 257 | try: 258 | return format(statistics.median(times), self.floating_point_unit) 259 | except (ZeroDivisionError, ValueError): 260 | return 0 261 | 262 | 263 | class BwdPacketsIATSkewness(Feature): 264 | name = "bwd_packets_IAT_skewness" 265 | def extract(self, flow: Flow) -> float: 266 | times = utils.calculate_IAT(flow.get_backwardpackets()) 267 | try: 268 | return format(float(stats.skew(times)), self.floating_point_unit) 269 | except (ZeroDivisionError, ValueError): 270 | return 0 271 | 272 | 273 | class BwdPacketsIATCoV(Feature): 274 | name = "bwd_packets_IAT_cov" 275 | def extract(self, flow: Flow) -> float: 276 | times = utils.calculate_IAT(flow.get_backwardpackets()) 277 | try: 278 | return format(stats.variation(times), self.floating_point_unit) 279 | except (ZeroDivisionError, ValueError): 280 | return 0 281 | 282 | 283 | class BwdPacketsIATMode(Feature): 284 | name = "bwd_packets_IAT_mode" 285 | def extract(self, flow: Flow) -> float: 286 | times = utils.calculate_IAT(flow.get_backwardpackets()) 287 | try: 288 | if len(times) > 0: 289 | return format(float(stats.mode(times)[0]), self.floating_point_unit) 290 | return 0 291 | except (ZeroDivisionError, ValueError): 292 | return 0 293 | 294 | 295 | class BwdPacketsIATVariance(Feature): 296 | name = "bwd_packets_IAT_variance" 297 | def extract(self, flow: Flow) -> float: 298 | times = utils.calculate_IAT(flow.get_backwardpackets()) 299 | try: 300 | return format(statistics.pvariance(times), self.floating_point_unit) 301 | except (ZeroDivisionError, ValueError): 302 | return 0 -------------------------------------------------------------------------------- /NTLFlowLyzer/network_flow_capturer/network_flow_capturer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | from datetime import datetime 5 | import dpkt 6 | from dpkt import ethernet 7 | import socket 8 | import os 9 | import time 10 | from collections import defaultdict, Counter 11 | from .packet import Packet 12 | from .flow import Flow 13 | 14 | 15 | class NetworkFlowCapturer: 16 | def __init__(self, max_flow_duration: int, activity_timeout: int, 17 | check_flows_ending_min_flows: int, capturer_updating_flows_min_value: int, 18 | read_packets_count_value_log_info: int, vxlan_ip: str, 19 | continues_batch_address: str, continues_pcap_prefix: str, 20 | number_of_continues_files: int, continues_batch_mode: bool, 21 | base_number_continues_files:int): 22 | self.__finished_flows = [] 23 | self.__ongoing_flows = {} 24 | self.__max_flow_duration = max_flow_duration 25 | self.__activity_timeout = activity_timeout 26 | self.__check_flows_ending_min_flows = check_flows_ending_min_flows 27 | self.__capturer_updating_flows_min_value = capturer_updating_flows_min_value 28 | self.__read_packets_count_value_log_info = read_packets_count_value_log_info 29 | self.__vxlan_ip = vxlan_ip 30 | self.__continues_batch_address = continues_batch_address 31 | self.__continues_pcap_prefix = continues_pcap_prefix 32 | self.__number_of_continues_files = number_of_continues_files 33 | self.__continues_batch_mode = continues_batch_mode 34 | self.__base_number_continues_files = base_number_continues_files 35 | self.flows_counter = 0 36 | self.tcp_packets = 0 37 | self.udp_packets = 0 38 | self.ip_packets = 0 39 | self.all_packets = 0 40 | 41 | 42 | def pcap_summary(self, address): 43 | ip_count, tcp_count, udp_count = 0, 0, 0 44 | app_protocol_count = defaultdict(int) 45 | try: 46 | f = open(address, 'rb') 47 | pcap = dpkt.pcap.Reader(f) 48 | total_packets = 0 49 | except Exception as e: 50 | print(f">>> There was an error in reading the pcap file at ==> '{address}'.\n", 51 | f">>> Here is the error message: '{e}'") 52 | 53 | for ts, buf in pcap: 54 | total_packets += 1 55 | try: 56 | eth = dpkt.ethernet.Ethernet(buf) 57 | decapsulation = True 58 | while decapsulation: 59 | if not isinstance(eth.data, dpkt.ip.IP): 60 | decapsulation = False 61 | break 62 | ip = eth.data 63 | if (socket.inet_ntoa(ip.src) == self.__vxlan_ip) or (socket.inet_ntoa(ip.dst) == self.__vxlan_ip): 64 | 65 | if not ((socket.inet_ntoa(ip.src) == self.__vxlan_ip and socket.inet_ntoa(ip.dst)[0:5] == "10.0.") or \ 66 | (socket.inet_ntoa(ip.dst) == self.__vxlan_ip and socket.inet_ntoa(ip.src)[0:5] == "10.0.")): 67 | decapsulation = False 68 | break 69 | 70 | if len(eth.data.data.data) == 0: 71 | decapsulation = False 72 | break 73 | 74 | # To understand what is happening here, I recommend you to check the packets in wireshark 75 | new_buf = eth.data.data.data 76 | if isinstance(eth.data.data, dpkt.icmp.ICMP): 77 | new_buf = eth.data.data.data.data.data.data 78 | new_buf = new_buf[8:] # Passing the vxlan 79 | eth = ethernet.Ethernet(new_buf) 80 | else: 81 | decapsulation = False 82 | break 83 | 84 | if not isinstance(eth.data, dpkt.ip.IP): 85 | continue 86 | 87 | if (socket.inet_ntoa(ip.src) == self.__vxlan_ip) or (socket.inet_ntoa(ip.dst) == self.__vxlan_ip): 88 | if len(eth.data.data.data) == 0: 89 | continue 90 | ip_count += 1 91 | 92 | if isinstance(ip.data, dpkt.udp.UDP): 93 | udp_count += 1 94 | app_protocol_count[eth.data.data.dport] += 1 95 | continue 96 | 97 | if isinstance(ip.data, dpkt.tcp.TCP): 98 | tcp_count += 1 99 | app_protocol_count[eth.data.data.dport] += 1 100 | 101 | 102 | except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError, Exception) as e: 103 | print(f"ERROR in packet number {total_packets}") 104 | print(e) 105 | continue 106 | 107 | print(50 * "=") 108 | print("Number and percentage of IP packets:") 109 | print(f" Total IP packets: {ip_count}") 110 | print(f" Percentage of IP packets: {(ip_count / total_packets) * 100:.2f}%\n") 111 | 112 | print("Number and percentage of TCP packets:") 113 | print(f" Total TCP packets: {tcp_count}") 114 | print(f" Percentage of TCP packets: {(tcp_count / total_packets) * 100:.2f}%\n") 115 | 116 | print("Number and percentage of UDP packets:") 117 | print(f" Total UDP packets: {udp_count}") 118 | print(f" Percentage of UDP packets: {(udp_count / total_packets) * 100:.2f}%\n") 119 | 120 | top_protocols = Counter(app_protocol_count).most_common(10) 121 | print("Top 10 Application Layer Protocols:") 122 | for port, count in top_protocols: 123 | protocol_name = self.get_protocol_name(port) 124 | print(f" Port {port} ({protocol_name}): {count} packets, {(count / total_packets) * 100:.2f}%") 125 | 126 | print(50 * "=") 127 | self.ip_packets += ip_count 128 | self.tcp_packets += tcp_count 129 | self.udp_packets += udp_count 130 | self.all_packets += total_packets 131 | f.close() 132 | 133 | def get_protocol_name(self, port): 134 | protocol_names = { 135 | 80: "HTTP", 136 | 443: "HTTPS", 137 | 21: "FTP", 138 | 22: "SSH", 139 | 25: "SMTP", 140 | 110: "POP3", 141 | 143: "IMAP", 142 | 53: "DNS", 143 | 137: "NetBIOS-NS", 144 | 3389: "RDP", 145 | } 146 | return protocol_names.get(port, "Unknown") 147 | 148 | 149 | def pcap_parser(self, pcap_file: str, flows: list, flows_lock): 150 | print(f">> Analyzing {pcap_file}") 151 | self.packet_counter = 0 152 | self.pcap_summary(pcap_file) 153 | f = open(pcap_file, 'rb') 154 | pcap = dpkt.pcap.Reader(f) 155 | for ts, buf in pcap: 156 | self.packet_counter +=1 157 | try: 158 | new_buf = buf 159 | eth = dpkt.ethernet.Ethernet(buf) 160 | 161 | decapsulation = True 162 | while decapsulation: 163 | if not isinstance(eth.data, dpkt.ip.IP): 164 | decapsulation = False 165 | break 166 | ip = eth.data 167 | if (socket.inet_ntoa(ip.src) == self.__vxlan_ip) or (socket.inet_ntoa(ip.dst) == self.__vxlan_ip): 168 | if not ((socket.inet_ntoa(ip.src) == self.__vxlan_ip and socket.inet_ntoa(ip.dst)[0:5] == "10.0.") or \ 169 | (socket.inet_ntoa(ip.dst) == self.__vxlan_ip and socket.inet_ntoa(ip.src)[0:5] == "10.0.")): 170 | decapsulation = False 171 | break 172 | 173 | # This part is for the decapsulation of VXLAN tag that is used in AWS traffic mirroring service. 174 | # To understand what is happening here, I recommend you to check the packets in wireshark. 175 | new_buf = eth.data.data.data 176 | if isinstance(eth.data.data, dpkt.icmp.ICMP): 177 | new_buf = eth.data.data.data.data.data.data 178 | 179 | new_buf = new_buf[8:] # Passing the vxlan 180 | eth = dpkt.ethernet.Ethernet(new_buf) 181 | else: 182 | decapsulation = False 183 | break 184 | if not isinstance(eth.data, dpkt.ip.IP): 185 | continue 186 | ip = eth.data 187 | 188 | if not isinstance(ip.data, dpkt.tcp.TCP): 189 | continue 190 | 191 | if (socket.inet_ntoa(ip.src) == self.__vxlan_ip) or (socket.inet_ntoa(ip.dst) == self.__vxlan_ip): 192 | 193 | if not ((socket.inet_ntoa(ip.src) == self.__vxlan_ip and socket.inet_ntoa(ip.dst)[0:5] == "10.0.") or \ 194 | (socket.inet_ntoa(ip.dst) == self.__vxlan_ip and socket.inet_ntoa(ip.src)[0:5] == "10.0.")): 195 | continue 196 | 197 | if not isinstance(ip.data, dpkt.tcp.TCP): 198 | continue 199 | 200 | tcp_layer = ip.data 201 | network_protocol = 'TCP' 202 | window_size = tcp_layer.win 203 | tcp_flags = tcp_layer.flags 204 | seq_number = tcp_layer.seq 205 | ack_number = tcp_layer.ack 206 | 207 | nlflyzer_packet = Packet( 208 | src_ip=socket.inet_ntoa(ip.src), 209 | src_port=tcp_layer.sport, 210 | dst_ip=socket.inet_ntoa(ip.dst), 211 | dst_port=tcp_layer.dport, 212 | protocol=network_protocol, 213 | flags=tcp_flags, 214 | timestamp=ts, 215 | length=len(new_buf), 216 | payloadbytes=len(tcp_layer.data), 217 | header_size=len(ip.data) - len(tcp_layer.data), 218 | window_size=window_size, 219 | seq_number=seq_number, 220 | ack_number=ack_number) 221 | 222 | self.__add_packet_to_flow(nlflyzer_packet, flows, flows_lock) 223 | 224 | if self.packet_counter % self.__read_packets_count_value_log_info == 0: 225 | print(f">> {self.packet_counter} number of packets has been processed...") 226 | 227 | except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError, Exception) as e: 228 | print(f"!! Exception happened!") 229 | print(f"packet number: {self.packet_counter}") 230 | print(e) 231 | print(30*"*") 232 | continue 233 | f.close() 234 | 235 | 236 | def capture(self, pcap_file: str, flows: list, flows_lock, thread_finished) -> list: 237 | print(">> Parser has started...") 238 | if self.__continues_batch_mode is True: 239 | print(">> Continues Batch mode is on!") 240 | for i in range(self.__base_number_continues_files, self.__base_number_continues_files + self.__number_of_continues_files): 241 | filename = self.__continues_pcap_prefix + str(i) 242 | continues_pcap_file = os.path.join(self.__continues_batch_address, filename) 243 | self.pcap_parser(pcap_file=continues_pcap_file, flows=flows, flows_lock=flows_lock) 244 | 245 | else: 246 | self.pcap_parser(pcap_file=pcap_file, flows=flows, flows_lock=flows_lock) 247 | 248 | print(f">> End of parsing pcap file(s).") 249 | print(f">>> {self.packet_counter} packets analyzed and {self.flows_counter} flows created in total.") 250 | 251 | with flows_lock: 252 | flows.extend(self.__finished_flows) 253 | list_of_values = [self.__ongoing_flows[key] for key in self.__ongoing_flows] 254 | flows.extend(list_of_values) 255 | 256 | print(50 * "#") 257 | print(">> Parser Report:") 258 | print(50 * "#") 259 | print(">>> Number and percentage of IP packets:") 260 | print(f" Total IP packets: {self.ip_packets}") 261 | print(f" Percentage of IP packets: {(self.ip_packets / self.all_packets) * 100:.2f}%\n") 262 | 263 | print(">>> Number and percentage of TCP packets:") 264 | print(f" Total TCP packets: {self.tcp_packets}") 265 | print(f" Percentage of TCP packets: {(self.tcp_packets / self.all_packets) * 100:.2f}%\n") 266 | 267 | print(">>> Number and percentage of UDP packets:") 268 | print(f" Total UDP packets: {self.udp_packets}") 269 | print(f" Percentage of UDP packets: {(self.udp_packets / self.all_packets) * 100:.2f}%\n") 270 | print(50 * "#") 271 | 272 | print(">> Preparing the output file...") 273 | 274 | thread_finished.set(True) 275 | 276 | def __add_packet_to_flow(self, packet: Packet, flows: list, flows_lock) -> None: 277 | flow_id_dict = self.__search_for_flow(packet) 278 | 279 | if flow_id_dict == None: 280 | self.__create_new_flow(packet) 281 | return 282 | flow = self.__ongoing_flows[flow_id_dict] 283 | if self.flow_is_ended(flow, packet): 284 | self.__finished_flows.append(flow) 285 | del self.__ongoing_flows[flow_id_dict] 286 | self.__create_new_flow(packet) 287 | 288 | if len(self.__finished_flows) >= self.__capturer_updating_flows_min_value: 289 | with flows_lock: 290 | for ff in self.__finished_flows: 291 | flows.append(ff) 292 | self.__finished_flows.clear() 293 | 294 | if len(self.__ongoing_flows) >= self.__check_flows_ending_min_flows: 295 | for oflow_id in self.__ongoing_flows.copy(): 296 | oflow = self.__ongoing_flows[oflow_id] 297 | if oflow.actvity_timeout(packet): 298 | self.__finished_flows.append(oflow) 299 | del self.__ongoing_flows[oflow_id] 300 | 301 | return 302 | 303 | flow.add_packet(packet) 304 | 305 | def flow_is_ended(self,flow,packet): 306 | flow_duration = datetime.fromtimestamp(float(packet.get_timestamp())) - datetime.fromtimestamp(float(flow.get_flow_start_time())) 307 | active_time = datetime.fromtimestamp(float(packet.get_timestamp())) - datetime.fromtimestamp(float(flow.get_flow_last_seen())) 308 | if flow_duration.total_seconds() > self.__max_flow_duration \ 309 | or active_time.total_seconds() > self.__activity_timeout \ 310 | or flow.has_two_FIN_flags() \ 311 | or flow.has_flagRST(): 312 | return True 313 | return False 314 | 315 | def __search_for_flow(self, packet) -> object: 316 | flow_id_dict = str(packet.get_src_ip()) + '_' + str(packet.get_src_port()) + \ 317 | '_' + str(packet.get_dst_ip()) + '_' + str(packet.get_dst_port()) + \ 318 | '_' + str(packet.get_protocol()) 319 | 320 | alternative_flow_id_dict = str(packet.get_dst_ip()) + '_' + str(packet.get_dst_port()) + \ 321 | '_' + str(packet.get_src_ip()) + '_' + str(packet.get_src_port()) + \ 322 | '_' + str(packet.get_protocol()) 323 | 324 | if alternative_flow_id_dict in self.__ongoing_flows: 325 | packet.forward = False 326 | return alternative_flow_id_dict 327 | 328 | if flow_id_dict in self.__ongoing_flows: 329 | return flow_id_dict 330 | return None 331 | 332 | def __create_new_flow(self, packet) -> None: 333 | self.flows_counter += 1 334 | new_flow = Flow(packet, self.__activity_timeout) 335 | new_flow.add_packet(packet) 336 | flow_id_dict = str(packet.get_src_ip()) + '_' + str(packet.get_src_port()) + \ 337 | '_' + str(packet.get_dst_ip()) + '_' + str(packet.get_dst_port()) + \ 338 | '_' + str(packet.get_protocol()) 339 | self.__ongoing_flows[flow_id_dict] = new_flow 340 | -------------------------------------------------------------------------------- /NTLFlowLyzer/feature_extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from datetime import datetime 4 | from .features import * 5 | import warnings 6 | 7 | class FeatureExtractor(object): 8 | def __init__(self, floating_point_unit: str): 9 | warnings.filterwarnings("ignore") 10 | 11 | self.floating_point_unit = floating_point_unit 12 | self.__features = [ 13 | Duration(), 14 | PacketsCount(), 15 | FwdPacketsCount(), 16 | BwdPacketsCount(), 17 | 18 | TotalPayloadBytes(), 19 | FwdTotalPayloadBytes(), 20 | BwdTotalPayloadBytes(), 21 | PayloadBytesMax(), 22 | PayloadBytesMin(), 23 | PayloadBytesMean(), 24 | PayloadBytesStd(), 25 | PayloadBytesVariance(), 26 | PayloadBytesMedian(), 27 | PayloadBytesSkewness(), 28 | PayloadBytesCov(), 29 | PayloadBytesMode(), 30 | FwdPayloadBytesMax(), 31 | FwdPayloadBytesMin(), 32 | FwdPayloadBytesMean(), 33 | FwdPayloadBytesStd(), 34 | FwdPayloadBytesVariance(), 35 | FwdPayloadBytesMedian(), 36 | FwdPayloadBytesSkewness(), 37 | FwdPayloadBytesCov(), 38 | FwdPayloadBytesMode(), 39 | BwdPayloadBytesMax(), 40 | BwdPayloadBytesMin(), 41 | BwdPayloadBytesMean(), 42 | BwdPayloadBytesStd(), 43 | BwdPayloadBytesVariance(), 44 | BwdPayloadBytesMedian(), 45 | BwdPayloadBytesSkewness(), 46 | BwdPayloadBytesCov(), 47 | BwdPayloadBytesMode(), 48 | 49 | TotalHeaderBytes(), 50 | MaxHeaderBytes(), 51 | MinHeaderBytes(), 52 | MeanHeaderBytes(), 53 | StdHeaderBytes(), 54 | MedianHeaderBytes(), 55 | SkewnessHeaderBytes(), 56 | CoVHeaderBytes(), 57 | ModeHeaderBytes(), 58 | VarianceHeaderBytes(), 59 | FwdTotalHeaderBytes(), 60 | FwdMaxHeaderBytes(), 61 | FwdMinHeaderBytes(), 62 | FwdMeanHeaderBytes(), 63 | FwdStdHeaderBytes(), 64 | FwdMedianHeaderBytes(), 65 | FwdSkewnessHeaderBytes(), 66 | FwdCoVHeaderBytes(), 67 | FwdModeHeaderBytes(), 68 | FwdVarianceHeaderBytes(), 69 | BwdTotalHeaderBytes(), 70 | BwdMaxHeaderBytes(), 71 | BwdMinHeaderBytes(), 72 | BwdMeanHeaderBytes(), 73 | BwdStdHeaderBytes(), 74 | BwdMedianHeaderBytes(), 75 | BwdSkewnessHeaderBytes(), 76 | BwdCoVHeaderBytes(), 77 | BwdModeHeaderBytes(), 78 | BwdVarianceHeaderBytes(), 79 | 80 | FwdSegmentSizeMean(), 81 | FwdSegmentSizeMax(), 82 | FwdSegmentSizeMin(), 83 | FwdSegmentSizeStd(), 84 | FwdSegmentSizeVariance(), 85 | FwdSegmentSizeMedian(), 86 | FwdSegmentSizeSkewness(), 87 | FwdSegmentSizeCov(), 88 | FwdSegmentSizeMode(), 89 | BwdSegmentSizeMean(), 90 | BwdSegmentSizeMax(), 91 | BwdSegmentSizeMin(), 92 | BwdSegmentSizeStd(), 93 | BwdSegmentSizeVariance(), 94 | BwdSegmentSizeMedian(), 95 | BwdSegmentSizeSkewness(), 96 | BwdSegmentSizeCov(), 97 | BwdSegmentSizeMode(), 98 | SegmentSizeMean(), 99 | SegmentSizeMax(), 100 | SegmentSizeMin(), 101 | SegmentSizeStd(), 102 | SegmentSizeVariance(), 103 | SegmentSizeMedian(), 104 | SegmentSizeSkewness(), 105 | SegmentSizeCov(), 106 | SegmentSizeMode(), 107 | 108 | FwdInitWinBytes(), 109 | BwdInitWinBytes(), 110 | 111 | ActiveMin(), 112 | ActiveMax(), 113 | ActiveMean(), 114 | ActiveStd(), 115 | ActiveMedian(), 116 | ActiveSkewness(), 117 | ActiveCoV(), 118 | ActiveMode(), 119 | ActiveVariance(), 120 | 121 | IdleMin(), 122 | IdleMax(), 123 | IdleMean(), 124 | IdleStd(), 125 | IdleMedian(), 126 | IdleSkewness(), 127 | IdleCoV(), 128 | IdleMode(), 129 | IdleVariance(), 130 | 131 | BytesRate(), 132 | FwdBytesRate(), 133 | BwdBytesRate(), 134 | PacketsRate(), 135 | BwdPacketsRate(), 136 | FwdPacketsRate(), 137 | DownUpRate(), 138 | 139 | AvgFwdBytesPerBulk(), 140 | AvgFwdPacketsPerBulk(), 141 | AvgFwdBulkRate(), 142 | AvgBwdBytesPerBulk(), 143 | AvgBwdPacketsPerBulk(), 144 | AvgBwdBulkRate(), 145 | FwdBulkStateCount(), 146 | FwdBulkSizeTotal(), 147 | FwdBulkPacketCount(), 148 | FwdBulkDuration(), 149 | BwdBulkStateCount(), 150 | BwdBulkSizeTotal(), 151 | BwdBulkPacketCount(), 152 | BwdBulkDuration(), 153 | 154 | FINFlagCounts(), 155 | PSHFlagCounts(), 156 | URGFlagCounts(), 157 | ECEFlagCounts(), 158 | SYNFlagCounts(), 159 | ACKFlagCounts(), 160 | CWRFlagCounts(), 161 | RSTFlagCounts(), 162 | FwdFINFlagCounts(), 163 | FwdPSHFlagCounts(), 164 | FwdURGFlagCounts(), 165 | FwdECEFlagCounts(), 166 | FwdSYNFlagCounts(), 167 | FwdACKFlagCounts(), 168 | FwdCWRFlagCounts(), 169 | FwdRSTFlagCounts(), 170 | BwdFINFlagCounts(), 171 | BwdPSHFlagCounts(), 172 | BwdURGFlagCounts(), 173 | BwdECEFlagCounts(), 174 | BwdSYNFlagCounts(), 175 | BwdACKFlagCounts(), 176 | BwdCWRFlagCounts(), 177 | BwdRSTFlagCounts(), 178 | 179 | FINFlagPercentageInTotal(), 180 | PSHFlagPercentageInTotal(), 181 | URGFlagPercentageInTotal(), 182 | ECEFlagPercentageInTotal(), 183 | SYNFlagPercentageInTotal(), 184 | ACKFlagPercentageInTotal(), 185 | CWRFlagPercentageInTotal(), 186 | RSTFlagPercentageInTotal(), 187 | FwdFINFlagPercentageInTotal(), 188 | FwdPSHFlagPercentageInTotal(), 189 | FwdURGFlagPercentageInTotal(), 190 | FwdECEFlagPercentageInTotal(), 191 | FwdSYNFlagPercentageInTotal(), 192 | FwdACKFlagPercentageInTotal(), 193 | FwdCWRFlagPercentageInTotal(), 194 | FwdRSTFlagPercentageInTotal(), 195 | BwdFINFlagPercentageInTotal(), 196 | BwdPSHFlagPercentageInTotal(), 197 | BwdURGFlagPercentageInTotal(), 198 | BwdECEFlagPercentageInTotal(), 199 | BwdSYNFlagPercentageInTotal(), 200 | BwdACKFlagPercentageInTotal(), 201 | BwdCWRFlagPercentageInTotal(), 202 | BwdRSTFlagPercentageInTotal(), 203 | FwdFINFlagPercentageInFwdPackets(), 204 | FwdPSHFlagPercentageInFwdPackets(), 205 | FwdURGFlagPercentageInFwdPackets(), 206 | FwdECEFlagPercentageInFwdPackets(), 207 | FwdSYNFlagPercentageInFwdPackets(), 208 | FwdACKFlagPercentageInFwdPackets(), 209 | FwdCWRFlagPercentageInFwdPackets(), 210 | FwdRSTFlagPercentageInFwdPackets(), 211 | BwdFINFlagPercentageInBwdPackets(), 212 | BwdPSHFlagPercentageInBwdPackets(), 213 | BwdURGFlagPercentageInBwdPackets(), 214 | BwdECEFlagPercentageInBwdPackets(), 215 | BwdSYNFlagPercentageInBwdPackets(), 216 | BwdACKFlagPercentageInBwdPackets(), 217 | BwdCWRFlagPercentageInBwdPackets(), 218 | BwdRSTFlagPercentageInBwdPackets(), 219 | 220 | PacketsIATMean(), 221 | PacketsIATStd(), 222 | PacketsIATMax(), 223 | PacketsIATMin(), 224 | PacketsIATSum(), 225 | PacketsIATMedian(), 226 | PacketsIATSkewness(), 227 | PacketsIATCoV(), 228 | PacketsIATMode(), 229 | PacketsIATVariance(), 230 | FwdPacketsIATMean(), 231 | FwdPacketsIATStd(), 232 | FwdPacketsIATMax(), 233 | FwdPacketsIATMin(), 234 | FwdPacketsIATSum(), 235 | FwdPacketsIATMedian(), 236 | FwdPacketsIATSkewness(), 237 | FwdPacketsIATCoV(), 238 | FwdPacketsIATMode(), 239 | FwdPacketsIATVariance(), 240 | BwdPacketsIATMean(), 241 | BwdPacketsIATStd(), 242 | BwdPacketsIATMax(), 243 | BwdPacketsIATMin(), 244 | BwdPacketsIATSum(), 245 | BwdPacketsIATMedian(), 246 | BwdPacketsIATSkewness(), 247 | BwdPacketsIATCoV(), 248 | BwdPacketsIATMode(), 249 | BwdPacketsIATVariance(), 250 | 251 | SubflowFwdPackets(), 252 | SubflowBwdPackets(), 253 | SubflowFwdBytes(), 254 | SubflowBwdBytes(), 255 | 256 | DeltaStart(), 257 | HandshakeDuration(), 258 | HandshakeState(), 259 | 260 | PacketsDeltaTimeMin(), 261 | PacketsDeltaTimeMax(), 262 | PacketsDeltaTimeMean(), 263 | PacketsDeltaTimeMode(), 264 | PacketsDeltaTimeVariance(), 265 | PacketsDeltaTimeStd(), 266 | PacketsDeltaTimeMedian(), 267 | PacketsDeltaTimeSkewness(), 268 | PacketsDeltaTimeCoV(), 269 | BwdPacketsDeltaTimeMin(), 270 | BwdPacketsDeltaTimeMax(), 271 | BwdPacketsDeltaTimeMean(), 272 | BwdPacketsDeltaTimeMode(), 273 | BwdPacketsDeltaTimeVariance(), 274 | BwdPacketsDeltaTimeStd(), 275 | BwdPacketsDeltaTimeMedian(), 276 | BwdPacketsDeltaTimeSkewness(), 277 | BwdPacketsDeltaTimeCoV(), 278 | FwdPacketsDeltaTimeMin(), 279 | FwdPacketsDeltaTimeMax(), 280 | FwdPacketsDeltaTimeMean(), 281 | FwdPacketsDeltaTimeMode(), 282 | FwdPacketsDeltaTimeVariance(), 283 | FwdPacketsDeltaTimeStd(), 284 | FwdPacketsDeltaTimeMedian(), 285 | FwdPacketsDeltaTimeSkewness(), 286 | FwdPacketsDeltaTimeCoV(), 287 | 288 | PacketsDeltaLenMin(), 289 | PacketsDeltaLenMax(), 290 | PacketsDeltaLenMean(), 291 | PacketsDeltaLenMode(), 292 | PacketsDeltaLenVariance(), 293 | PacketsDeltaLenStd(), 294 | PacketsDeltaLenMedian(), 295 | PacketsDeltaLenSkewness(), 296 | PacketsDeltaLenCoV(), 297 | BwdPacketsDeltaLenMin(), 298 | BwdPacketsDeltaLenMax(), 299 | BwdPacketsDeltaLenMean(), 300 | BwdPacketsDeltaLenMode(), 301 | BwdPacketsDeltaLenVariance(), 302 | BwdPacketsDeltaLenStd(), 303 | BwdPacketsDeltaLenMedian(), 304 | BwdPacketsDeltaLenSkewness(), 305 | BwdPacketsDeltaLenCoV(), 306 | FwdPacketsDeltaLenMin(), 307 | FwdPacketsDeltaLenMax(), 308 | FwdPacketsDeltaLenMean(), 309 | FwdPacketsDeltaLenMode(), 310 | FwdPacketsDeltaLenVariance(), 311 | FwdPacketsDeltaLenStd(), 312 | FwdPacketsDeltaLenMedian(), 313 | FwdPacketsDeltaLenSkewness(), 314 | FwdPacketsDeltaLenCoV(), 315 | 316 | HeaderBytesDeltaLenMin(), 317 | HeaderBytesDeltaLenMax(), 318 | HeaderBytesDeltaLenMean(), 319 | HeaderBytesDeltaLenMode(), 320 | HeaderBytesDeltaLenVariance(), 321 | HeaderBytesDeltaLenStd(), 322 | HeaderBytesDeltaLenMedian(), 323 | HeaderBytesDeltaLenSkewness(), 324 | HeaderBytesDeltaLenCoV(), 325 | BwdHeaderBytesDeltaLenMin(), 326 | BwdHeaderBytesDeltaLenMax(), 327 | BwdHeaderBytesDeltaLenMean(), 328 | BwdHeaderBytesDeltaLenMode(), 329 | BwdHeaderBytesDeltaLenVariance(), 330 | BwdHeaderBytesDeltaLenStd(), 331 | BwdHeaderBytesDeltaLenMedian(), 332 | BwdHeaderBytesDeltaLenSkewness(), 333 | BwdHeaderBytesDeltaLenCoV(), 334 | FwdHeaderBytesDeltaLenMin(), 335 | FwdHeaderBytesDeltaLenMax(), 336 | FwdHeaderBytesDeltaLenMean(), 337 | FwdHeaderBytesDeltaLenMode(), 338 | FwdHeaderBytesDeltaLenVariance(), 339 | FwdHeaderBytesDeltaLenStd(), 340 | FwdHeaderBytesDeltaLenMedian(), 341 | FwdHeaderBytesDeltaLenSkewness(), 342 | FwdHeaderBytesDeltaLenCoV(), 343 | 344 | PayloadBytesDeltaLenMin(), 345 | PayloadBytesDeltaLenMax(), 346 | PayloadBytesDeltaLenMean(), 347 | PayloadBytesDeltaLenMode(), 348 | PayloadBytesDeltaLenVariance(), 349 | PayloadBytesDeltaLenStd(), 350 | PayloadBytesDeltaLenMedian(), 351 | PayloadBytesDeltaLenSkewness(), 352 | PayloadBytesDeltaLenCoV(), 353 | BwdPayloadBytesDeltaLenMin(), 354 | BwdPayloadBytesDeltaLenMax(), 355 | BwdPayloadBytesDeltaLenMean(), 356 | BwdPayloadBytesDeltaLenMode(), 357 | BwdPayloadBytesDeltaLenVariance(), 358 | BwdPayloadBytesDeltaLenStd(), 359 | BwdPayloadBytesDeltaLenMedian(), 360 | BwdPayloadBytesDeltaLenSkewness(), 361 | BwdPayloadBytesDeltaLenCoV(), 362 | FwdPayloadBytesDeltaLenMin(), 363 | FwdPayloadBytesDeltaLenMax(), 364 | FwdPayloadBytesDeltaLenMean(), 365 | FwdPayloadBytesDeltaLenMode(), 366 | FwdPayloadBytesDeltaLenVariance(), 367 | FwdPayloadBytesDeltaLenStd(), 368 | FwdPayloadBytesDeltaLenMedian(), 369 | FwdPayloadBytesDeltaLenSkewness(), 370 | FwdPayloadBytesDeltaLenCoV(), 371 | ] 372 | 373 | def execute(self, data: list, data_lock, flows: List[Flow], features_ignore_list: list = [], 374 | label: str = "") -> list: 375 | with warnings.catch_warnings(): 376 | warnings.simplefilter("ignore") 377 | 378 | self.__extracted_data = [] 379 | for flow in flows: 380 | features_of_flow = {} 381 | features_of_flow["flow_id"] = str(flow) 382 | features_of_flow["timestamp"] = datetime.fromtimestamp(float(flow.get_timestamp())) 383 | features_of_flow["src_ip"] = flow.get_src_ip() 384 | features_of_flow["src_port"] = flow.get_src_port() 385 | features_of_flow["dst_ip"] = flow.get_dst_ip() 386 | features_of_flow["dst_port"] = flow.get_dst_port() 387 | features_of_flow["protocol"] = flow.get_protocol() 388 | feature: Feature 389 | for feature in self.__features: 390 | if feature.name in features_ignore_list: 391 | continue 392 | feature.set_floating_point_unit(self.floating_point_unit) 393 | try: 394 | features_of_flow[feature.name] = feature.extract(flow) 395 | except Exception as e: 396 | print(f">>> Error occured in extracting the '{feature.name}' for '{flow}' flow.") 397 | print(f">>> Error message: {e}") 398 | print(110*"=") 399 | features_of_flow[feature.name] = None 400 | continue 401 | features_of_flow["label"] = label 402 | self.__extracted_data.append(features_of_flow.copy()) 403 | # print(len(features_of_flow)) 404 | with data_lock: 405 | data.extend(self.__extracted_data) 406 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/time_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from datetime import datetime 4 | import statistics 5 | from enum import Enum 6 | from typing import List 7 | from scipy import stats 8 | 9 | from NTLFlowLyzer.network_flow_capturer.packet import Packet 10 | from ..network_flow_capturer import Flow 11 | from .feature import Feature 12 | from . import utils 13 | 14 | 15 | class Duration(Feature): 16 | name = "duration" 17 | def extract(self, flow: Flow) -> float: 18 | return utils.calculate_flow_duration(flow) 19 | 20 | 21 | class ActiveMin(Feature): 22 | name = "active_min" 23 | def extract(self, flow: Flow) -> float: 24 | if not flow.get_flow_active(): 25 | return 0 26 | return min(flow.get_flow_active()) 27 | 28 | 29 | class ActiveMax(Feature): 30 | name = "active_max" 31 | def extract(self, flow: Flow) -> float: 32 | if not flow.get_flow_active(): 33 | return 0 34 | return max(flow.get_flow_active()) 35 | 36 | 37 | class ActiveMean(Feature): 38 | name = "active_mean" 39 | def extract(self, flow: Flow) -> float: 40 | if not flow.get_flow_active(): 41 | return 0 42 | return format(statistics.mean(flow.get_flow_active()), self.floating_point_unit) 43 | 44 | 45 | class ActiveStd(Feature): 46 | name = "active_std" 47 | def extract(self, flow: Flow) -> float: 48 | if not flow.get_flow_active(): 49 | return 0 50 | return format(statistics.pstdev(flow.get_flow_active()), self.floating_point_unit) 51 | 52 | 53 | class ActiveMedian(Feature): 54 | name = "active_median" 55 | def extract(self, flow: Flow) -> float: 56 | if not flow.get_flow_active(): 57 | return 0 58 | return format(statistics.median(flow.get_flow_active()), self.floating_point_unit) 59 | 60 | 61 | class ActiveSkewness(Feature): 62 | name = "active_skewness" 63 | def extract(self, flow: Flow) -> float: 64 | if not flow.get_flow_active(): 65 | return 0 66 | return format(float(stats.skew(flow.get_flow_active())), self.floating_point_unit) 67 | 68 | 69 | class ActiveCoV(Feature): 70 | name = "active_cov" 71 | def extract(self, flow: Flow) -> float: 72 | if not flow.get_flow_active(): 73 | return 0 74 | return format(stats.variation(flow.get_flow_active()), self.floating_point_unit) 75 | 76 | 77 | class ActiveMode(Feature): 78 | name = "active_mode" 79 | def extract(self, flow: Flow) -> float: 80 | if not flow.get_flow_active(): 81 | return 0 82 | return format(float(stats.mode(flow.get_flow_active())[0]), self.floating_point_unit) 83 | 84 | 85 | class ActiveVariance(Feature): 86 | name = "active_variance" 87 | def extract(self, flow: Flow) -> float: 88 | if not flow.get_flow_active(): 89 | return 0 90 | return format(statistics.pvariance(flow.get_flow_active()), self.floating_point_unit) 91 | 92 | 93 | class IdleMin(Feature): 94 | name = "idle_min" 95 | def extract(self, flow: Flow) -> float: 96 | if not flow.get_flow_idle(): 97 | return 0 98 | return min(flow.get_flow_idle()) 99 | 100 | 101 | class IdleMax(Feature): 102 | name = "idle_max" 103 | def extract(self, flow: Flow) -> float: 104 | if not flow.get_flow_idle(): 105 | return 0 106 | return max(flow.get_flow_idle()) 107 | 108 | 109 | class IdleMean(Feature): 110 | name = "idle_mean" 111 | def extract(self, flow: Flow) -> float: 112 | if not flow.get_flow_idle(): 113 | return 0 114 | return format(statistics.mean(flow.get_flow_idle()), self.floating_point_unit) 115 | 116 | 117 | class IdleStd(Feature): 118 | name = "idle_std" 119 | def extract(self, flow: Flow) -> float: 120 | if not flow.get_flow_idle(): 121 | return 0 122 | return format(statistics.pstdev(flow.get_flow_idle()), self.floating_point_unit) 123 | 124 | 125 | class IdleMedian(Feature): 126 | name = "idle_median" 127 | def extract(self, flow: Flow) -> float: 128 | if not flow.get_flow_active(): 129 | return 0 130 | return format(statistics.median(flow.get_flow_idle()), self.floating_point_unit) 131 | 132 | 133 | class IdleSkewness(Feature): 134 | name = "idle_skewness" 135 | def extract(self, flow: Flow) -> float: 136 | if not flow.get_flow_active(): 137 | return 0 138 | return format(float(stats.skew(flow.get_flow_idle())), self.floating_point_unit) 139 | 140 | 141 | class IdleCoV(Feature): 142 | name = "idle_cov" 143 | def extract(self, flow: Flow) -> float: 144 | if not flow.get_flow_active(): 145 | return 0 146 | return format(stats.variation(flow.get_flow_idle()), self.floating_point_unit) 147 | 148 | 149 | class IdleMode(Feature): 150 | name = "idle_mode" 151 | def extract(self, flow: Flow) -> float: 152 | if not flow.get_flow_active(): 153 | return 0 154 | return format(float(stats.mode(flow.get_flow_idle())[0]), self.floating_point_unit) 155 | 156 | 157 | class IdleVariance(Feature): 158 | name = "idle_variance" 159 | def extract(self, flow: Flow) -> float: 160 | if not flow.get_flow_active(): 161 | return 0 162 | return format(statistics.pvariance(flow.get_flow_idle()), self.floating_point_unit) 163 | 164 | 165 | class PacketsDeltaTimeBase(Feature): 166 | def __get_packets_delta_time(self, packets: List[Packet]): 167 | packets_timestamp = [datetime.fromtimestamp(float(packet.get_timestamp())) for packet in packets] 168 | packets_sorted = sorted(packets_timestamp) 169 | packets_del_time = [(pkt - pkt_prev).microseconds/1000 for pkt_prev, pkt in zip(packets_sorted[:-1], packets_sorted[1:])] 170 | return packets_del_time 171 | 172 | def get_receiving_delta(self, flow: Flow) -> list: 173 | return self.__get_packets_delta_time(flow.get_backwardpackets()) 174 | 175 | def get_sending_delta(self, flow: Flow) -> list: 176 | return self.__get_packets_delta_time(flow.get_forwardpackets()) 177 | 178 | def get_all_delta(self, flow: Flow) -> list: 179 | return self.__get_packets_delta_time(flow.get_packets()) 180 | 181 | 182 | class HandshakingStates(Enum): 183 | Ideal = 0 184 | CLIENT_SENT_HANDSHAKE_REQUEST = 1 185 | SERVER_ACKNWOLEDGED_CLIENT_HANDSHAKE_REQUEST = 2 186 | END_OF_HANDSHAKING = 3 187 | 188 | 189 | class Handshake(Feature): 190 | name = "handshake" 191 | delta = None 192 | duration = None 193 | 194 | def extract_data_from_handshaking_process(self, flow: Flow): 195 | if flow.get_protocol() != "TCP": 196 | self.delta = "not a tcp connection" 197 | self.duration = "not a tcp connection" 198 | return 199 | 200 | packets = flow.get_packets() 201 | last_handshake_packet_time = 0 202 | first_handshake_packet_time = 0 203 | first_not_handshake_packet_time = 0 204 | STATE = HandshakingStates.Ideal 205 | self.final_state = STATE 206 | seq_number = 0 207 | ack_number = 0 208 | for packet in packets: 209 | if STATE == HandshakingStates.END_OF_HANDSHAKING: 210 | if first_not_handshake_packet_time == 0: 211 | first_not_handshake_packet_time = packet.get_timestamp() 212 | self.delta = format(first_not_handshake_packet_time - last_handshake_packet_time, 213 | self.floating_point_unit) 214 | self.duration = format(last_handshake_packet_time - first_handshake_packet_time, 215 | self.floating_point_unit) 216 | return 217 | 218 | if STATE == HandshakingStates.Ideal and packet.has_flagSYN(): 219 | first_handshake_packet_time = packet.get_timestamp() 220 | seq_number = packet.get_seq_number() 221 | STATE = HandshakingStates.CLIENT_SENT_HANDSHAKE_REQUEST 222 | self.final_state = STATE 223 | 224 | elif STATE == HandshakingStates.CLIENT_SENT_HANDSHAKE_REQUEST \ 225 | and packet.has_flagSYN() \ 226 | and packet.has_flagACK() and seq_number == packet.get_ack_number() - 1: 227 | seq_number = packet.get_seq_number() 228 | ack_number = packet.get_ack_number() 229 | STATE = HandshakingStates.SERVER_ACKNWOLEDGED_CLIENT_HANDSHAKE_REQUEST 230 | self.final_state = STATE 231 | 232 | elif STATE == HandshakingStates.SERVER_ACKNWOLEDGED_CLIENT_HANDSHAKE_REQUEST \ 233 | and packet.has_flagACK() and seq_number == packet.get_ack_number() - 1 \ 234 | and ack_number == packet.get_seq_number(): 235 | last_handshake_packet_time = packet.get_timestamp() 236 | STATE = HandshakingStates.END_OF_HANDSHAKING 237 | self.final_state = STATE 238 | 239 | elif first_not_handshake_packet_time == 0: 240 | first_not_handshake_packet_time = packet.get_timestamp() 241 | 242 | self.delta = "not a complete handshake" 243 | self.duration = "not a complete handshake" 244 | 245 | def extract(self, flow: Flow) -> float: 246 | pass 247 | 248 | 249 | class DeltaStart(Handshake): 250 | name = "delta_start" 251 | def extract(self, flow: Flow) -> float: 252 | self.extract_data_from_handshaking_process(flow) 253 | return self.delta 254 | 255 | 256 | class HandshakeDuration(Handshake): 257 | name = "handshake_duration" 258 | def extract(self, flow: Flow) -> float: 259 | self.extract_data_from_handshaking_process(flow) 260 | return self.duration 261 | 262 | 263 | class HandshakeState(Handshake): 264 | name = "handshake_state" 265 | def extract(self, flow: Flow) -> float: 266 | self.extract_data_from_handshaking_process(flow) 267 | return self.final_state.value 268 | 269 | 270 | class PacketsDeltaTimeMin(PacketsDeltaTimeBase): 271 | name = "min_bwd_packets_delta_time" 272 | def extract(self, flow: Flow) -> float: 273 | packets_del_time = super().get_all_delta(flow) 274 | if len(packets_del_time) > 0: 275 | return format(min(packets_del_time), self.floating_point_unit) 276 | return 0 277 | 278 | 279 | class PacketsDeltaTimeMax(PacketsDeltaTimeBase): 280 | name = "max_bwd_packets_delta_time" 281 | def extract(self, flow: Flow) -> float: 282 | packets_del_time = super().get_all_delta(flow) 283 | if len(packets_del_time) > 0: 284 | return format(max(packets_del_time), self.floating_point_unit) 285 | return 0 286 | 287 | 288 | class PacketsDeltaTimeMean(PacketsDeltaTimeBase): 289 | name = "mean_packets_delta_time" 290 | def extract(self, flow: Flow) -> float: 291 | packets_del_time = super().get_all_delta(flow) 292 | if len(packets_del_time) > 0: 293 | return format(statistics.mean(packets_del_time), self.floating_point_unit) 294 | return 0 295 | 296 | 297 | class PacketsDeltaTimeMode(PacketsDeltaTimeBase): 298 | name = "mode_packets_delta_time" 299 | def extract(self, flow: Flow) -> float: 300 | packets_del_time = super().get_all_delta(flow) 301 | if len(packets_del_time) > 0: 302 | return format(float(stats.mode(packets_del_time)[0]), self.floating_point_unit) 303 | return 0 304 | 305 | 306 | class PacketsDeltaTimeVariance(PacketsDeltaTimeBase): 307 | name = "variance_packets_delta_time" 308 | def extract(self, flow: Flow) -> float: 309 | packets_del_time = super().get_all_delta(flow) 310 | if len(packets_del_time) > 0: 311 | return format(statistics.pvariance(packets_del_time), self.floating_point_unit) 312 | return 0 313 | 314 | 315 | class PacketsDeltaTimeStd(PacketsDeltaTimeBase): 316 | name = "std_packets_delta_time" 317 | def extract(self, flow: Flow) -> float: 318 | packets_del_time = super().get_all_delta(flow) 319 | if len(packets_del_time) > 0: 320 | return format(statistics.pstdev(packets_del_time), self.floating_point_unit) 321 | return 0 322 | 323 | 324 | class PacketsDeltaTimeMedian(PacketsDeltaTimeBase): 325 | name = "median_packets_delta_time" 326 | def extract(self, flow: Flow) -> float: 327 | packets_del_time = super().get_all_delta(flow) 328 | if len(packets_del_time) > 0: 329 | return format(statistics.median(packets_del_time), self.floating_point_unit) 330 | return 0 331 | 332 | 333 | class PacketsDeltaTimeSkewness(PacketsDeltaTimeBase): 334 | name = "skewness_packets_delta_time" 335 | def extract(self, flow: Flow) -> float: 336 | packets_del_time = super().get_all_delta(flow) 337 | if len(packets_del_time) > 0: 338 | return format(float(stats.skew(packets_del_time)), self.floating_point_unit) 339 | return 0 340 | 341 | 342 | class PacketsDeltaTimeCoV(PacketsDeltaTimeBase): 343 | name = "cov_packets_delta_time" 344 | def extract(self, flow: Flow) -> float: 345 | packets_del_time = super().get_all_delta(flow) 346 | if len(packets_del_time) > 0: 347 | return format(stats.variation(packets_del_time), self.floating_point_unit) 348 | return 0 349 | 350 | 351 | class BwdPacketsDeltaTimeMin(PacketsDeltaTimeBase): 352 | name = "min_bwd_packets_delta_time" 353 | def extract(self, flow: Flow) -> float: 354 | receiving_packets_del_time = super().get_receiving_delta(flow) 355 | if len(receiving_packets_del_time) > 0: 356 | return format(min(receiving_packets_del_time), self.floating_point_unit) 357 | return 0 358 | 359 | 360 | class BwdPacketsDeltaTimeMax(PacketsDeltaTimeBase): 361 | name = "max_bwd_packets_delta_time" 362 | def extract(self, flow: Flow) -> float: 363 | receiving_packets_del_time = super().get_receiving_delta(flow) 364 | if len(receiving_packets_del_time) > 0: 365 | return format(max(receiving_packets_del_time), self.floating_point_unit) 366 | return 0 367 | 368 | 369 | class BwdPacketsDeltaTimeMean(PacketsDeltaTimeBase): 370 | name = "mean_bwd_packets_delta_time" 371 | def extract(self, flow: Flow) -> float: 372 | receiving_packets_del_time = super().get_receiving_delta(flow) 373 | if len(receiving_packets_del_time) > 0: 374 | return format(statistics.mean(receiving_packets_del_time), self.floating_point_unit) 375 | return 0 376 | 377 | 378 | class BwdPacketsDeltaTimeMode(PacketsDeltaTimeBase): 379 | name = "mode_bwd_packets_delta_time" 380 | def extract(self, flow: Flow) -> float: 381 | receiving_packets_del_time = super().get_receiving_delta(flow) 382 | if len(receiving_packets_del_time) > 0: 383 | return format(float(stats.mode(receiving_packets_del_time)[0]), self.floating_point_unit) 384 | return 0 385 | 386 | 387 | class BwdPacketsDeltaTimeVariance(PacketsDeltaTimeBase): 388 | name = "variance_bwd_packets_delta_time" 389 | def extract(self, flow: Flow) -> float: 390 | receiving_packets_del_time = super().get_receiving_delta(flow) 391 | if len(receiving_packets_del_time) > 0: 392 | return format(statistics.pvariance(receiving_packets_del_time), self.floating_point_unit) 393 | return 0 394 | 395 | 396 | class BwdPacketsDeltaTimeStd(PacketsDeltaTimeBase): 397 | name = "std_bwd_packets_delta_time" 398 | def extract(self, flow: Flow) -> float: 399 | receiving_packets_del_time = super().get_receiving_delta(flow) 400 | if len(receiving_packets_del_time) > 0: 401 | return format(statistics.pstdev(receiving_packets_del_time), self.floating_point_unit) 402 | return 0 403 | 404 | 405 | class BwdPacketsDeltaTimeMedian(PacketsDeltaTimeBase): 406 | name = "median_bwd_packets_delta_time" 407 | def extract(self, flow: Flow) -> float: 408 | receiving_packets_del_time = super().get_receiving_delta(flow) 409 | if len(receiving_packets_del_time) > 0: 410 | return format(statistics.median(receiving_packets_del_time), self.floating_point_unit) 411 | return 0 412 | 413 | 414 | class BwdPacketsDeltaTimeSkewness(PacketsDeltaTimeBase): 415 | name = "skewness_bwd_packets_delta_time" 416 | def extract(self, flow: Flow) -> float: 417 | receiving_packets_del_time = super().get_receiving_delta(flow) 418 | if len(receiving_packets_del_time) > 0: 419 | return format(float(stats.skew(receiving_packets_del_time)), self.floating_point_unit) 420 | return 0 421 | 422 | 423 | class BwdPacketsDeltaTimeCoV(PacketsDeltaTimeBase): 424 | name = "cov_bwd_packets_delta_time" 425 | def extract(self, flow: Flow) -> float: 426 | receiving_packets_del_time = super().get_receiving_delta(flow) 427 | if len(receiving_packets_del_time) > 0: 428 | return format(stats.variation(receiving_packets_del_time), self.floating_point_unit) 429 | return 0 430 | 431 | 432 | class FwdPacketsDeltaTimeMin(PacketsDeltaTimeBase): 433 | name = "min_fwd_packets_delta_time" 434 | def extract(self, flow: Flow) -> float: 435 | sending_packets_del_time = super().get_sending_delta(flow) 436 | if len(sending_packets_del_time) > 0: 437 | return format(min(sending_packets_del_time), self.floating_point_unit) 438 | return 0 439 | 440 | 441 | class FwdPacketsDeltaTimeMax(PacketsDeltaTimeBase): 442 | name = "max_fwd_packets_delta_time" 443 | def extract(self, flow: Flow) -> float: 444 | sending_packets_del_time = super().get_sending_delta(flow) 445 | if len(sending_packets_del_time) > 0: 446 | return format(max(sending_packets_del_time), self.floating_point_unit) 447 | return 0 448 | 449 | 450 | class FwdPacketsDeltaTimeMean(PacketsDeltaTimeBase): 451 | name = "mean_fwd_packets_delta_time" 452 | def extract(self, flow: Flow) -> float: 453 | sending_packets_del_time = super().get_sending_delta(flow) 454 | if len(sending_packets_del_time) > 0: 455 | return format(statistics.mean(sending_packets_del_time), self.floating_point_unit) 456 | return 0 457 | 458 | 459 | class FwdPacketsDeltaTimeMode(PacketsDeltaTimeBase): 460 | name = "mode_fwd_packets_delta_time" 461 | def extract(self, flow: Flow) -> float: 462 | sending_packets_del_time = super().get_sending_delta(flow) 463 | if len(sending_packets_del_time) > 0: 464 | return format(float(stats.mode(sending_packets_del_time)[0]), self.floating_point_unit) 465 | return 0 466 | 467 | 468 | class FwdPacketsDeltaTimeVariance(PacketsDeltaTimeBase): 469 | name = "variance_fwd_packets_delta_time" 470 | def extract(self, flow: Flow) -> float: 471 | sending_packets_del_time = super().get_sending_delta(flow) 472 | if len(sending_packets_del_time) > 0: 473 | return format(statistics.pvariance(sending_packets_del_time), self.floating_point_unit) 474 | return 0 475 | 476 | 477 | class FwdPacketsDeltaTimeStd(PacketsDeltaTimeBase): 478 | name = "std_fwd_packets_delta_time" 479 | def extract(self, flow: Flow) -> float: 480 | sending_packets_del_time = super().get_sending_delta(flow) 481 | if len(sending_packets_del_time) > 0: 482 | return format(statistics.pstdev(sending_packets_del_time), self.floating_point_unit) 483 | return 0 484 | 485 | 486 | class FwdPacketsDeltaTimeMedian(PacketsDeltaTimeBase): 487 | name = "median_fwd_packets_delta_time" 488 | def extract(self, flow: Flow) -> float: 489 | sending_packets_del_time = super().get_sending_delta(flow) 490 | if len(sending_packets_del_time) > 0: 491 | return format(statistics.median(sending_packets_del_time), self.floating_point_unit) 492 | return 0 493 | 494 | 495 | class FwdPacketsDeltaTimeSkewness(PacketsDeltaTimeBase): 496 | name = "skewness_fwd_packets_delta_time" 497 | def extract(self, flow: Flow) -> float: 498 | sending_packets_del_time = super().get_sending_delta(flow) 499 | if len(sending_packets_del_time) > 0: 500 | return format(float(stats.skew(sending_packets_del_time)), self.floating_point_unit) 501 | return 0 502 | 503 | 504 | class FwdPacketsDeltaTimeCoV(PacketsDeltaTimeBase): 505 | name = "cov_fwd_packets_delta_time" 506 | def extract(self, flow: Flow) -> float: 507 | sending_packets_del_time = super().get_sending_delta(flow) 508 | if len(sending_packets_del_time) > 0: 509 | return format(stats.variation(sending_packets_del_time), self.floating_point_unit) 510 | return 0 511 | -------------------------------------------------------------------------------- /NTLFlowLyzer/features/flag_related.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from ..network_flow_capturer import Flow 4 | from .feature import Feature 5 | 6 | 7 | class FINFlagCounts(Feature): 8 | name = "fin_flag_counts" 9 | def extract(self, flow: Flow) -> int: 10 | counts = 0 11 | for packet in flow.get_packets(): 12 | if packet.has_flagFIN(): 13 | counts+=1 14 | return counts 15 | 16 | 17 | class PSHFlagCounts(Feature): 18 | name = "psh_flag_counts" 19 | def extract(self, flow: Flow) -> int: 20 | counts = 0 21 | for packet in flow.get_packets(): 22 | if packet.has_flagPSH(): 23 | counts+=1 24 | return counts 25 | 26 | 27 | class URGFlagCounts(Feature): 28 | name = "urg_flag_counts" 29 | def extract(self, flow: Flow) -> int: 30 | counts = 0 31 | for packet in flow.get_packets(): 32 | if packet.has_flagURG(): 33 | counts+=1 34 | return counts 35 | 36 | 37 | class ECEFlagCounts(Feature): 38 | name = "ece_flag_counts" 39 | def extract(self, flow: Flow) -> int: 40 | counts = 0 41 | for packet in flow.get_packets(): 42 | if packet.has_flagECE(): 43 | counts+=1 44 | return counts 45 | 46 | 47 | class SYNFlagCounts(Feature): 48 | name = "syn_flag_counts" 49 | def extract(self, flow: Flow) -> int: 50 | counts = 0 51 | for packet in flow.get_packets(): 52 | if packet.has_flagSYN(): 53 | counts+=1 54 | return counts 55 | 56 | 57 | class ACKFlagCounts(Feature): 58 | name = "ack_flag_counts" 59 | def extract(self, flow: Flow) -> int: 60 | counts = 0 61 | for packet in flow.get_packets(): 62 | if packet.has_flagACK(): 63 | counts+=1 64 | return counts 65 | 66 | 67 | class CWRFlagCounts(Feature): 68 | name = "cwr_flag_counts" 69 | def extract(self, flow: Flow) -> int: 70 | counts = 0 71 | for packet in flow.get_packets(): 72 | if packet.has_flagCWR(): 73 | counts+=1 74 | return counts 75 | 76 | 77 | class RSTFlagCounts(Feature): 78 | name = "rst_flag_counts" 79 | def extract(self, flow: Flow) -> int: 80 | counts = 0 81 | for packet in flow.get_packets(): 82 | if packet.has_flagRST(): 83 | counts+=1 84 | return counts 85 | 86 | 87 | class FwdFINFlagCounts(Feature): 88 | name = "fwd_fin_flag_counts" 89 | def extract(self, flow: Flow) -> int: 90 | counts = 0 91 | for packet in flow.get_forwardpackets(): 92 | if packet.has_flagFIN(): 93 | counts+=1 94 | return counts 95 | 96 | 97 | class FwdPSHFlagCounts(Feature): 98 | name = "fwd_psh_flag_counts" 99 | def extract(self, flow: Flow) -> int: 100 | counts = 0 101 | for packet in flow.get_forwardpackets(): 102 | if packet.has_flagPSH(): 103 | counts+=1 104 | return counts 105 | 106 | 107 | class FwdURGFlagCounts(Feature): 108 | name = "fwd_urg_flag_counts" 109 | def extract(self, flow: Flow) -> int: 110 | counts = 0 111 | for packet in flow.get_forwardpackets(): 112 | if packet.has_flagURG(): 113 | counts+=1 114 | return counts 115 | 116 | 117 | class FwdECEFlagCounts(Feature): 118 | name = "fwd_ece_flag_counts" 119 | def extract(self, flow: Flow) -> int: 120 | counts = 0 121 | for packet in flow.get_forwardpackets(): 122 | if packet.has_flagECE(): 123 | counts+=1 124 | return counts 125 | 126 | 127 | class FwdSYNFlagCounts(Feature): 128 | name = "fwd_syn_flag_counts" 129 | def extract(self, flow: Flow) -> int: 130 | counts = 0 131 | for packet in flow.get_forwardpackets(): 132 | if packet.has_flagSYN(): 133 | counts+=1 134 | return counts 135 | 136 | 137 | class FwdACKFlagCounts(Feature): 138 | name = "fwd_ack_flag_counts" 139 | def extract(self, flow: Flow) -> int: 140 | counts = 0 141 | for packet in flow.get_forwardpackets(): 142 | if packet.has_flagACK(): 143 | counts+=1 144 | return counts 145 | 146 | 147 | class FwdCWRFlagCounts(Feature): 148 | name = "fwd_cwr_flag_counts" 149 | def extract(self, flow: Flow) -> int: 150 | counts = 0 151 | for packet in flow.get_forwardpackets(): 152 | if packet.has_flagCWR(): 153 | counts+=1 154 | return counts 155 | 156 | 157 | class FwdRSTFlagCounts(Feature): 158 | name = "fwd_rst_flag_counts" 159 | def extract(self, flow: Flow) -> int: 160 | counts = 0 161 | for packet in flow.get_forwardpackets(): 162 | if packet.has_flagRST(): 163 | counts+=1 164 | return counts 165 | 166 | 167 | class BwdFINFlagCounts(Feature): 168 | name = "bwd_fin_flag_counts" 169 | def extract(self, flow: Flow) -> int: 170 | counts = 0 171 | for packet in flow.get_backwardpackets(): 172 | if packet.has_flagFIN(): 173 | counts+=1 174 | return counts 175 | 176 | 177 | class BwdPSHFlagCounts(Feature): 178 | name = "bwd_psh_flag_counts" 179 | def extract(self, flow: Flow) -> int: 180 | counts = 0 181 | for packet in flow.get_backwardpackets(): 182 | if packet.has_flagPSH(): 183 | counts+=1 184 | return counts 185 | 186 | 187 | class BwdURGFlagCounts(Feature): 188 | name = "bwd_urg_flag_counts" 189 | def extract(self, flow: Flow) -> int: 190 | counts = 0 191 | for packet in flow.get_backwardpackets(): 192 | if packet.has_flagURG(): 193 | counts+=1 194 | return counts 195 | 196 | 197 | class BwdECEFlagCounts(Feature): 198 | name = "bwd_ece_flag_counts" 199 | def extract(self, flow: Flow) -> int: 200 | counts = 0 201 | for packet in flow.get_backwardpackets(): 202 | if packet.has_flagECE(): 203 | counts+=1 204 | return counts 205 | 206 | 207 | class BwdSYNFlagCounts(Feature): 208 | name = "bwd_syn_flag_counts" 209 | def extract(self, flow: Flow) -> int: 210 | counts = 0 211 | for packet in flow.get_backwardpackets(): 212 | if packet.has_flagSYN(): 213 | counts+=1 214 | return counts 215 | 216 | 217 | class BwdACKFlagCounts(Feature): 218 | name = "bwd_ack_flag_counts" 219 | def extract(self, flow: Flow) -> int: 220 | counts = 0 221 | for packet in flow.get_backwardpackets(): 222 | if packet.has_flagACK(): 223 | counts+=1 224 | return counts 225 | 226 | 227 | class BwdCWRFlagCounts(Feature): 228 | name = "bwd_cwr_flag_counts" 229 | def extract(self, flow: Flow) -> int: 230 | counts = 0 231 | for packet in flow.get_backwardpackets(): 232 | if packet.has_flagCWR(): 233 | counts+=1 234 | return counts 235 | 236 | 237 | class BwdRSTFlagCounts(Feature): 238 | name = "bwd_rst_flag_counts" 239 | def extract(self, flow: Flow) -> int: 240 | counts = 0 241 | for packet in flow.get_backwardpackets(): 242 | if packet.has_flagRST(): 243 | counts+=1 244 | return counts 245 | 246 | 247 | class FINFlagPercentageInTotal(Feature): 248 | name = "fin_flag_percentage_in_total" 249 | def extract(self, flow: Flow) -> float: 250 | counts = 0 251 | for packet in flow.get_packets(): 252 | if packet.has_flagFIN(): 253 | counts += 1 254 | return float(counts / len(flow.get_packets())) 255 | 256 | 257 | class PSHFlagPercentageInTotal(Feature): 258 | name = "psh_flag_percentage_in_total" 259 | def extract(self, flow: Flow) -> float: 260 | counts = 0 261 | for packet in flow.get_packets(): 262 | if packet.has_flagPSH(): 263 | counts+=1 264 | return float(counts / len(flow.get_packets())) 265 | 266 | 267 | class URGFlagPercentageInTotal(Feature): 268 | name = "urg_flag_percentage_in_total" 269 | def extract(self, flow: Flow) -> float: 270 | counts = 0 271 | for packet in flow.get_packets(): 272 | if packet.has_flagURG(): 273 | counts+=1 274 | return float(counts / len(flow.get_packets())) 275 | 276 | 277 | class ECEFlagPercentageInTotal(Feature): 278 | name = "ece_flag_percentage_in_total" 279 | def extract(self, flow: Flow) -> float: 280 | counts = 0 281 | for packet in flow.get_packets(): 282 | if packet.has_flagECE(): 283 | counts+=1 284 | return float(counts / len(flow.get_packets())) 285 | 286 | 287 | class SYNFlagPercentageInTotal(Feature): 288 | name = "syn_flag_percentage_in_total" 289 | def extract(self, flow: Flow) -> float: 290 | counts = 0 291 | for packet in flow.get_packets(): 292 | if packet.has_flagSYN(): 293 | counts+=1 294 | return float(counts / len(flow.get_packets())) 295 | 296 | 297 | class ACKFlagPercentageInTotal(Feature): 298 | name = "ack_flag_percentage_in_total" 299 | def extract(self, flow: Flow) -> float: 300 | counts = 0 301 | for packet in flow.get_packets(): 302 | if packet.has_flagACK(): 303 | counts+=1 304 | return float(counts / len(flow.get_packets())) 305 | 306 | 307 | class CWRFlagPercentageInTotal(Feature): 308 | name = "cwr_flag_percentage_in_total" 309 | def extract(self, flow: Flow) -> float: 310 | counts = 0 311 | for packet in flow.get_packets(): 312 | if packet.has_flagCWR(): 313 | counts+=1 314 | return float(counts / len(flow.get_packets())) 315 | 316 | 317 | class RSTFlagPercentageInTotal(Feature): 318 | name = "rst_flag_percentage_in_total" 319 | def extract(self, flow: Flow) -> float: 320 | counts = 0 321 | for packet in flow.get_packets(): 322 | if packet.has_flagRST(): 323 | counts+=1 324 | return float(counts / len(flow.get_packets())) 325 | 326 | 327 | class FwdFINFlagPercentageInTotal(Feature): 328 | name = "fwd_fin_flag_percentage_in_total" 329 | def extract(self, flow: Flow) -> float: 330 | counts = 0 331 | for packet in flow.get_forwardpackets(): 332 | if packet.has_flagFIN(): 333 | counts += 1 334 | return float(counts / len(flow.get_packets())) 335 | 336 | 337 | class FwdPSHFlagPercentageInTotal(Feature): 338 | name = "fwd_psh_flag_percentage_in_total" 339 | def extract(self, flow: Flow) -> float: 340 | counts = 0 341 | for packet in flow.get_forwardpackets(): 342 | if packet.has_flagPSH(): 343 | counts+=1 344 | return float(counts / len(flow.get_packets())) 345 | 346 | 347 | class FwdURGFlagPercentageInTotal(Feature): 348 | name = "fwd_urg_flag_percentage_in_total" 349 | def extract(self, flow: Flow) -> float: 350 | counts = 0 351 | for packet in flow.get_forwardpackets(): 352 | if packet.has_flagURG(): 353 | counts+=1 354 | return float(counts / len(flow.get_packets())) 355 | 356 | 357 | class FwdECEFlagPercentageInTotal(Feature): 358 | name = "fwd_ece_flag_percentage_in_total" 359 | def extract(self, flow: Flow) -> float: 360 | counts = 0 361 | for packet in flow.get_forwardpackets(): 362 | if packet.has_flagECE(): 363 | counts+=1 364 | return float(counts / len(flow.get_packets())) 365 | 366 | 367 | class FwdSYNFlagPercentageInTotal(Feature): 368 | name = "fwd_syn_flag_percentage_in_total" 369 | def extract(self, flow: Flow) -> float: 370 | counts = 0 371 | for packet in flow.get_forwardpackets(): 372 | if packet.has_flagSYN(): 373 | counts+=1 374 | return float(counts / len(flow.get_packets())) 375 | 376 | 377 | class FwdACKFlagPercentageInTotal(Feature): 378 | name = "fwd_ack_flag_percentage_in_total" 379 | def extract(self, flow: Flow) -> float: 380 | counts = 0 381 | for packet in flow.get_forwardpackets(): 382 | if packet.has_flagACK(): 383 | counts+=1 384 | return float(counts / len(flow.get_packets())) 385 | 386 | 387 | class FwdCWRFlagPercentageInTotal(Feature): 388 | name = "fwd_cwr_flag_percentage_in_total" 389 | def extract(self, flow: Flow) -> float: 390 | counts = 0 391 | for packet in flow.get_forwardpackets(): 392 | if packet.has_flagCWR(): 393 | counts+=1 394 | return float(counts / len(flow.get_packets())) 395 | 396 | 397 | class FwdRSTFlagPercentageInTotal(Feature): 398 | name = "fwd_rst_flag_percentage_in_total" 399 | def extract(self, flow: Flow) -> float: 400 | counts = 0 401 | for packet in flow.get_forwardpackets(): 402 | if packet.has_flagRST(): 403 | counts+=1 404 | return float(counts / len(flow.get_packets())) 405 | 406 | 407 | class BwdFINFlagPercentageInTotal(Feature): 408 | name = "bwd_fin_flag_percentage_in_total" 409 | def extract(self, flow: Flow) -> float: 410 | counts = 0 411 | for packet in flow.get_backwardpackets(): 412 | if packet.has_flagFIN(): 413 | counts += 1 414 | return float(counts / len(flow.get_packets())) 415 | 416 | 417 | class BwdPSHFlagPercentageInTotal(Feature): 418 | name = "bwd_psh_flag_percentage_in_total" 419 | def extract(self, flow: Flow) -> float: 420 | counts = 0 421 | for packet in flow.get_backwardpackets(): 422 | if packet.has_flagPSH(): 423 | counts+=1 424 | return float(counts / len(flow.get_packets())) 425 | 426 | 427 | class BwdURGFlagPercentageInTotal(Feature): 428 | name = "bwd_urg_flag_percentage_in_total" 429 | def extract(self, flow: Flow) -> float: 430 | counts = 0 431 | for packet in flow.get_backwardpackets(): 432 | if packet.has_flagURG(): 433 | counts+=1 434 | return float(counts / len(flow.get_packets())) 435 | 436 | 437 | class BwdECEFlagPercentageInTotal(Feature): 438 | name = "bwd_ece_flag_percentage_in_total" 439 | def extract(self, flow: Flow) -> float: 440 | counts = 0 441 | for packet in flow.get_backwardpackets(): 442 | if packet.has_flagECE(): 443 | counts+=1 444 | return float(counts / len(flow.get_packets())) 445 | 446 | 447 | class BwdSYNFlagPercentageInTotal(Feature): 448 | name = "bwd_syn_flag_percentage_in_total" 449 | def extract(self, flow: Flow) -> float: 450 | counts = 0 451 | for packet in flow.get_backwardpackets(): 452 | if packet.has_flagSYN(): 453 | counts+=1 454 | return float(counts / len(flow.get_packets())) 455 | 456 | 457 | class BwdACKFlagPercentageInTotal(Feature): 458 | name = "bwd_ack_flag_percentage_in_total" 459 | def extract(self, flow: Flow) -> float: 460 | counts = 0 461 | for packet in flow.get_backwardpackets(): 462 | if packet.has_flagACK(): 463 | counts+=1 464 | return float(counts / len(flow.get_packets())) 465 | 466 | 467 | class BwdCWRFlagPercentageInTotal(Feature): 468 | name = "bwd_cwr_flag_percentage_in_total" 469 | def extract(self, flow: Flow) -> float: 470 | counts = 0 471 | for packet in flow.get_backwardpackets(): 472 | if packet.has_flagCWR(): 473 | counts+=1 474 | return float(counts / len(flow.get_packets())) 475 | 476 | 477 | class BwdRSTFlagPercentageInTotal(Feature): 478 | name = "bwd_rst_flag_percentage_in_total" 479 | def extract(self, flow: Flow) -> float: 480 | counts = 0 481 | for packet in flow.get_backwardpackets(): 482 | if packet.has_flagRST(): 483 | counts+=1 484 | return float(counts / len(flow.get_packets())) 485 | 486 | 487 | class FwdFINFlagPercentageInFwdPackets(Feature): 488 | name = "fwd_fin_flag_percentage_in_fwd_packets" 489 | def extract(self, flow: Flow) -> float: 490 | counts = 0 491 | for packet in flow.get_forwardpackets(): 492 | if packet.has_flagFIN(): 493 | counts += 1 494 | if len(flow.get_forwardpackets()) == 0: 495 | return 0 496 | return float(counts / len(flow.get_forwardpackets())) 497 | 498 | 499 | class FwdPSHFlagPercentageInFwdPackets(Feature): 500 | name = "fwd_psh_flag_percentage_in_fwd_packets" 501 | def extract(self, flow: Flow) -> float: 502 | counts = 0 503 | for packet in flow.get_forwardpackets(): 504 | if packet.has_flagPSH(): 505 | counts+=1 506 | if len(flow.get_forwardpackets()) == 0: 507 | return 0 508 | return float(counts / len(flow.get_forwardpackets())) 509 | 510 | 511 | class FwdURGFlagPercentageInFwdPackets(Feature): 512 | name = "fwd_urg_flag_percentage_in_fwd_packets" 513 | def extract(self, flow: Flow) -> float: 514 | counts = 0 515 | for packet in flow.get_forwardpackets(): 516 | if packet.has_flagURG(): 517 | counts+=1 518 | if len(flow.get_forwardpackets()) == 0: 519 | return 0 520 | return float(counts / len(flow.get_forwardpackets())) 521 | 522 | 523 | class FwdECEFlagPercentageInFwdPackets(Feature): 524 | name = "fwd_ece_flag_percentage_in_fwd_packets" 525 | def extract(self, flow: Flow) -> float: 526 | counts = 0 527 | for packet in flow.get_forwardpackets(): 528 | if packet.has_flagECE(): 529 | counts+=1 530 | if len(flow.get_forwardpackets()) == 0: 531 | return 0 532 | return float(counts / len(flow.get_forwardpackets())) 533 | 534 | 535 | class FwdSYNFlagPercentageInFwdPackets(Feature): 536 | name = "fwd_syn_flag_percentage_in_fwd_packets" 537 | def extract(self, flow: Flow) -> float: 538 | counts = 0 539 | for packet in flow.get_forwardpackets(): 540 | if packet.has_flagSYN(): 541 | counts+=1 542 | if len(flow.get_forwardpackets()) == 0: 543 | return 0 544 | return float(counts / len(flow.get_forwardpackets())) 545 | 546 | 547 | class FwdACKFlagPercentageInFwdPackets(Feature): 548 | name = "fwd_ack_flag_percentage_in_fwd_packets" 549 | def extract(self, flow: Flow) -> float: 550 | counts = 0 551 | for packet in flow.get_forwardpackets(): 552 | if packet.has_flagACK(): 553 | counts+=1 554 | if len(flow.get_forwardpackets()) == 0: 555 | return 0 556 | return float(counts / len(flow.get_forwardpackets())) 557 | 558 | 559 | class FwdCWRFlagPercentageInFwdPackets(Feature): 560 | name = "fwd_cwr_flag_percentage_in_fwd_packets" 561 | def extract(self, flow: Flow) -> float: 562 | counts = 0 563 | for packet in flow.get_forwardpackets(): 564 | if packet.has_flagCWR(): 565 | counts+=1 566 | if len(flow.get_forwardpackets()) == 0: 567 | return 0 568 | return float(counts / len(flow.get_forwardpackets())) 569 | 570 | 571 | class FwdRSTFlagPercentageInFwdPackets(Feature): 572 | name = "fwd_rst_flag_percentage_in_fwd_packets" 573 | def extract(self, flow: Flow) -> float: 574 | counts = 0 575 | for packet in flow.get_forwardpackets(): 576 | if packet.has_flagRST(): 577 | counts+=1 578 | if len(flow.get_forwardpackets()) == 0: 579 | return 0 580 | return float(counts / len(flow.get_forwardpackets())) 581 | 582 | 583 | class BwdFINFlagPercentageInBwdPackets(Feature): 584 | name = "bwd_fin_flag_percentage_in_bwd_packets" 585 | def extract(self, flow: Flow) -> float: 586 | counts = 0 587 | for packet in flow.get_backwardpackets(): 588 | if packet.has_flagFIN(): 589 | counts += 1 590 | if len(flow.get_backwardpackets()) == 0: 591 | return 0 592 | return float(counts / len(flow.get_backwardpackets())) 593 | 594 | 595 | class BwdPSHFlagPercentageInBwdPackets(Feature): 596 | name = "bwd_psh_flag_percentage_in_bwd_packets" 597 | def extract(self, flow: Flow) -> float: 598 | counts = 0 599 | for packet in flow.get_backwardpackets(): 600 | if packet.has_flagPSH(): 601 | counts+=1 602 | if len(flow.get_backwardpackets()) == 0: 603 | return 0 604 | return float(counts / len(flow.get_backwardpackets())) 605 | 606 | 607 | class BwdURGFlagPercentageInBwdPackets(Feature): 608 | name = "bwd_urg_flag_percentage_in_bwd_packets" 609 | def extract(self, flow: Flow) -> float: 610 | counts = 0 611 | for packet in flow.get_backwardpackets(): 612 | if packet.has_flagURG(): 613 | counts+=1 614 | if len(flow.get_backwardpackets()) == 0: 615 | return 0 616 | return float(counts / len(flow.get_backwardpackets())) 617 | 618 | 619 | class BwdECEFlagPercentageInBwdPackets(Feature): 620 | name = "bwd_ece_flag_percentage_in_bwd_packets" 621 | def extract(self, flow: Flow) -> float: 622 | counts = 0 623 | for packet in flow.get_backwardpackets(): 624 | if packet.has_flagECE(): 625 | counts+=1 626 | if len(flow.get_backwardpackets()) == 0: 627 | return 0 628 | return float(counts / len(flow.get_backwardpackets())) 629 | 630 | 631 | class BwdSYNFlagPercentageInBwdPackets(Feature): 632 | name = "bwd_syn_flag_percentage_in_bwd_packets" 633 | def extract(self, flow: Flow) -> float: 634 | counts = 0 635 | for packet in flow.get_backwardpackets(): 636 | if packet.has_flagSYN(): 637 | counts+=1 638 | if len(flow.get_backwardpackets()) == 0: 639 | return 0 640 | return float(counts / len(flow.get_backwardpackets())) 641 | 642 | 643 | class BwdACKFlagPercentageInBwdPackets(Feature): 644 | name = "bwd_ack_flag_percentage_in_bwd_packets" 645 | def extract(self, flow: Flow) -> float: 646 | counts = 0 647 | for packet in flow.get_backwardpackets(): 648 | if packet.has_flagACK(): 649 | counts+=1 650 | if len(flow.get_backwardpackets()) == 0: 651 | return 0 652 | return float(counts / len(flow.get_backwardpackets())) 653 | 654 | 655 | class BwdCWRFlagPercentageInBwdPackets(Feature): 656 | name = "bwd_cwr_flag_percentage_in_bwd_packets" 657 | def extract(self, flow: Flow) -> float: 658 | counts = 0 659 | for packet in flow.get_backwardpackets(): 660 | if packet.has_flagCWR(): 661 | counts+=1 662 | if len(flow.get_backwardpackets()) == 0: 663 | return 0 664 | return float(counts / len(flow.get_backwardpackets())) 665 | 666 | 667 | class BwdRSTFlagPercentageInBwdPackets(Feature): 668 | name = "bwd_rst_flag_percentage_in_bwd_packets" 669 | def extract(self, flow: Flow) -> float: 670 | counts = 0 671 | for packet in flow.get_backwardpackets(): 672 | if packet.has_flagRST(): 673 | counts+=1 674 | if len(flow.get_backwardpackets()) == 0: 675 | return 0 676 | return float(counts / len(flow.get_backwardpackets())) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ![](https://github.com/ahlashkari/NTLFlowLyzer/blob/master/bccc.jpg) 2 | 3 | # NTLFlowLyzer 4 | 5 | As part of the Understanding Cybersecurity Series (UCS), NTLFlowLyzer is a Python open-source project to extract network layer features from TCP-based network traffic for Anomaly Profiling (AP) which is the second component of the [**NetFlowLyzer**](https://github.com/ahlashkari/NetFlowLyzer). 6 | 7 | NTLFlowLyzer generates bidirectional flows from the Network and Transportation Layers of network traffic, where the first packet determines the forward (source to destination) and backward (destination to source) directions, hence the statistical time-related features can be calculated separately in the forward and backward directions. Additional functionalities include selecting features from the list of existing features, adding new features, and controlling the duration of flow timeout. Moreover, TCP flows are terminated upon connection teardown (by FIN or RST packet), reaching the flow's maximum duration, or being inactive for a certain amount of time (timeout). 8 | 9 | 10 | # Table of Contents 11 | 12 | - [NTLFlowLyzer](#ntlflowlyzer) 13 | - [Table of Contents](#table-of-contents) 14 | - [Installation](#installation) 15 | - [Execution](#execution) 16 | - [Architecture](#architecture) 17 | - [Extracted Features](#extracted-features) 18 | - [Definitions](#definitions) 19 | - [Statistical Information Calculation](#statistical-information-calculation) 20 | - [Output](#output) 21 | - [Copyright (c) 2023](#copyright-c-2023) 22 | - [Contributing](#contributing) 23 | - [Project Team members](#project-team-members) 24 | - [Acknowledgment](#acknowledgment) 25 | 26 | # Installation 27 | 28 | Before installing or running the NTLFlowLyzer package, it's essential to set up the necessary requirements on your system. Begin by ensuring you have both `Python` and `pip` installed and functioning properly (execute the `pip3 --version` command). Then, execute the following command: 29 | 30 | ```bash 31 | pip3 install -r requirements.txt 32 | ``` 33 | 34 | You are prepared to install NTLFlowLyzer. To proceed, execute the following command in the package's root directory (where the setup.py file is located), which will install the NTLFlowLyzer package on your system: 35 | 36 | ### On Linux: 37 | ```bash 38 | python3 setup.py install 39 | ``` 40 | 41 | ### On Windows: 42 | ```bash 43 | pip3 install . 44 | ``` 45 | 46 | After successfully installing the package, confirm the installation by running the following command: 47 | 48 | ```bash 49 | ntlflowlyzer --version 50 | ``` 51 | 52 | 53 | # Execution 54 | 55 | The core aspect of running NTLFlowLyzer involves preparing the configuration file. This file is designed to facilitate users in customizing the program's behavior with minimal complexity and cost, thus enhancing program scalability. Below, we outline how to prepare the configuration file and subsequently demonstrate how to execute NTLFlowLyzer using it. 56 | 57 | ## Configuration File 58 | 59 | The configuration file is formatted in `JSON`, comprising key-value pairs that enable customization of the package. While some keys are mandatory, others are optional. Below, each key is explained along with its corresponding value: 60 | 61 | * **pcap_file_address** [Required] 62 | 63 | This key specifies the input PCAP file address. The format of the value should be a string. 64 | 65 | **Note**: At this version of NTLFlowLyzer, we only support the `PCAP` format. For other formats such as `PCAPNG`, you must convert them to `PCAP`. To convert `PCAPNG` to `PCAP`, you can use Wireshark. If you prefer command-line tools, you can use the following command: 66 | 67 | ```bash 68 | tshark -F pcap -r {pcapng_file} -w {pcap_file} 69 | ``` 70 | 71 | Replace `{pcapng_file}` with the path to your PCAPNG file and `{pcap_file}` with the desired output PCAP file name. 72 | 73 | * **output_file_address** [Required] 74 | 75 | This key specifies the output CSV file address. The format of the value should be a string. 76 | 77 | * **label** [Optional] 78 | 79 | This key specifies the value of the `label` column in the output CSV file address. The format of the value should be a string. The default value is `Unknown`. 80 | 81 | 82 | * **number_of_threads** [Optional] 83 | 84 | This key specifies the number of threads to be used for all processes, including flow extraction, feature calculation, and output writing. The value must be an integer of at least `3`. The default value is `4`. 85 | 86 | It's important to consider that the optimal value for this option varies based on the system configuration and the format of the input PCAP file. For instance, if the PCAP file contains a large number of packets (e.g., more than 5 million) and they are all TCP packets, increasing the number of threads might be beneficial. However, if the packets represent a small number of flows and all related packets are contiguous, adding more threads could potentially slow down the program since there are fewer distinct flows. 87 | 88 | As a rule of thumb, the ideal value for this option typically falls between half the number of CPU cores (CPU count) and twice the CPU count. This helps balance computational resources without overwhelming the system. (`0.5 * cpu_count < best_option < 2 * cpu_count`) 89 | 90 | 91 | * **feature_extractor_min_flows** [Optional] 92 | 93 | This key determines the minimum number of finished flows required for the feature extractor thread to initiate its work and extract features from these finished flows. The value must be an integer. The default value is `4000`. 94 | 95 | Selecting a high value for this option will consume more RAM since more flows will be stored in memory, potentially slowing down the entire program. Conversely, choosing a low value for this option can slow down the execution process, as it involves locking the finished flows list and then copying those flows for feature extraction. These two processes, locking and copying, are slow and can impede other program components. 96 | 97 | 98 | * **writer_min_rows** [Optional] 99 | 100 | This key specifies the minimum number of ready flows (i.e., finished flows from which features have been extracted) required for the writer thread to begin its work of writing the flows to the CSV file. The value must be an integer. The default value is `6000`. 101 | 102 | Opting for a high value for this option will increase RAM usage since more flows will be stored in memory, potentially slowing down the overall program performance. Conversely, selecting a low value for this option can slow down the execution process, involving locking the finished flows list, copying those flows for the writing process, and performing I/O operations to write to the file. These three processes — locking, copying, and I/O — are slow and may impede other program components. 103 | 104 | * **read_packets_count_value_log_info** [Optional] 105 | 106 | This key determines the minimum number of processed packets (i.e., the number of packets read from the PCAP file and assigned to a flow) required for the logger to log. The value must be an integer. The default value is `10,000`. This means that after processing every `10,000` packets, the program will print a statement indicating the number of packets analyzed. 107 | 108 | 109 | * **check_flows_ending_min_flows** [Optional] 110 | 111 | This key specifies the minimum number of ongoing flows (i.e., created flows that have not yet finished) required for checking if they have reached the timeout or maximum flow time value. The value must be an integer. The default value is `2000`. This indicates that if the number of ongoing flows exceeds `2000`, the program will proceed to check all flows for timeout or maximum flow time. 112 | 113 | 114 | * **capturer_updating_flows_min_value** [Optional] 115 | 116 | This key determines the minimum number of finished flows required to be added to the queue for feature extraction. The value must be an integer. The default value is `2000`. This means that if the number of finished flows exceeds `2000`, the program will move them to a separate list for the feature extractor. 117 | 118 | 119 | * **max_flow_duration** [Optional] 120 | 121 | This key sets the maximum duration of a flow in seconds. The value must be an integer. The default value is `120,000`. It means if the flow duration exceeds `120,000` seconds, the program will terminate the flow and initiate a new one. 122 | 123 | 124 | * **activity_timeout** [Optional] 125 | 126 | This key defines the flow activity timeout in seconds. The value must be an integer. The default value is `5000`. It means if `5000` seconds have elapsed since the last packet of the flow, the program will terminate the flow. 127 | 128 | 129 | * **floating_point_unit** [Optional] 130 | 131 | This key specifies the floating point unit used for the feature extraction process. The value must be in the format: `.[UNIT]f`. The default value is `.4f`. This indicates that the feature values will be rounded to the fourth decimal place. 132 | 133 | 134 | * **max_rows_number** [Optional] 135 | 136 | This key defines the maximum number of rows in the output CSV file. The value must be an integer. The default value is `900,000`. It means if there are more than `900,000` flows to be written in the CSV file, the program will close the current CSV file and create a new one for the remaining flows. 137 | 138 | 139 | * **features_ignore_list** [Optional] 140 | 141 | This key specifies the features that you do not want to extract. The value must be a list of string values, where each string represents a feature name. The default value is an empty list. If you include a feature name in this list, the program will skip extracting that feature, and it will not appear in the output CSV file. 142 | 143 | 144 | An example of a configuration file would be like this: 145 | 146 | ```json 147 | { 148 | "pcap_file_address": "/mnt/c/dataset/my_pcap_file.pcap", 149 | "output_file_address": "./output-of-my_pcap_file.csv", 150 | "label": "Benign", 151 | "number_of_threads": 4, 152 | "feature_extractor_min_flows": 2500, 153 | "writer_min_rows": 1000, 154 | "read_packets_count_value_log_info": 1000000, 155 | "check_flows_ending_min_flows": 20000, 156 | "capturer_updating_flows_min_value": 5000, 157 | "max_flow_duration": 120000, 158 | "activity_timeout": 300, 159 | "floating_point_unit": ".4f", 160 | "max_rows_number": 800000, 161 | "features_ignore_list": ["duration", "src_ip"] 162 | } 163 | ``` 164 | 165 | 166 | In general, we recommend adjusting the values of the following options: `number_of_threads`, `feature_extractor_min_flows`, `writer_min_rows`, `check_flows_ending_min_flows`, and `capturer_updating_flows_min_value`, based on your system configuration. This is particularly important if your PCAP file is large (usually more than 4 GB with over 1 million TCP packets), to optimize program efficiency. 167 | 168 | 169 | ## Argument Parser 170 | 171 | You can use `-h` to see different options of the program. 172 | 173 | To execute NTLFlowLyzer, simply run the following command: 174 | 175 | ```bash 176 | ntlflowlyzer -c YOUR_CONFIG_FILE 177 | ``` 178 | 179 | Replace `YOUR_CONFIG_FILE` with the path to your configuration file. 180 | 181 | 182 | Moreover, this project has been successfully tested on Ubuntu 20.04, Ubuntu 22.04, Windows 10, and Windows 11. It should work on other versions of Ubuntu OS (or even Debian OS) as long as your system has the necessary Python3 packages (you can find the required packages listed in the `requirements.txt` file). 183 | 184 | 185 | # Architecture 186 | 187 | 188 | ![](./Architecture.svg) 189 | 190 | 191 | ---- 192 | 193 | # Extracted Features 194 | 195 | We currently have 348 features that are as follows (features' explanation will be added): 196 | 197 | 1. flow_id 198 | 1. src_ip 199 | 1. src_port 200 | 1. dst_ip 201 | 1. dst_port 202 | 1. protocol 203 | 1. timestamp 204 | 1. Duration 205 | 1. PacketsCount 206 | 1. FwdPacketsCount 207 | 1. BwdPacketsCount 208 | 1. TotalPayloadBytes 209 | 1. FwdTotalPayloadBytes 210 | 1. BwdTotalPayloadBytes 211 | 1. PayloadBytesMax 212 | 1. PayloadBytesMin 213 | 1. PayloadBytesMean 214 | 1. PayloadBytesStd 215 | 1. PayloadBytesVariance 216 | 1. PayloadBytesMedian 217 | 1. PayloadBytesSkewness 218 | 1. PayloadBytesCov 219 | 1. PayloadBytesMode 220 | 1. FwdPayloadBytesMax 221 | 1. FwdPayloadBytesMin 222 | 1. FwdPayloadBytesMean 223 | 1. FwdPayloadBytesStd 224 | 1. FwdPayloadBytesVariance 225 | 1. FwdPayloadBytesMedian 226 | 1. FwdPayloadBytesSkewness 227 | 1. FwdPayloadBytesCov 228 | 1. FwdPayloadBytesMode 229 | 1. BwdPayloadBytesMax 230 | 1. BwdPayloadBytesMin 231 | 1. BwdPayloadBytesMean 232 | 1. BwdPayloadBytesStd 233 | 1. BwdPayloadBytesVariance 234 | 1. BwdPayloadBytesMedian 235 | 1. BwdPayloadBytesSkewness 236 | 1. BwdPayloadBytesCov 237 | 1. BwdPayloadBytesMode 238 | 1. TotalHeaderBytes 239 | 1. MaxHeaderBytes 240 | 1. MinHeaderBytes 241 | 1. MeanHeaderBytes 242 | 1. StdHeaderBytes 243 | 1. MedianHeaderBytes 244 | 1. SkewnessHeaderBytes 245 | 1. CoVHeaderBytes 246 | 1. ModeHeaderBytes 247 | 1. VarianceHeaderBytes 248 | 1. FwdTotalHeaderBytes 249 | 1. FwdMaxHeaderBytes 250 | 1. FwdMinHeaderBytes 251 | 1. FwdMeanHeaderBytes 252 | 1. FwdStdHeaderBytes 253 | 1. FwdMedianHeaderBytes 254 | 1. FwdSkewnessHeaderBytes 255 | 1. FwdCoVHeaderBytes 256 | 1. FwdModeHeaderBytes 257 | 1. FwdVarianceHeaderBytes 258 | 1. BwdTotalHeaderBytes 259 | 1. BwdMaxHeaderBytes 260 | 1. BwdMinHeaderBytes 261 | 1. BwdMeanHeaderBytes 262 | 1. BwdStdHeaderBytes 263 | 1. BwdMedianHeaderBytes 264 | 1. BwdSkewnessHeaderBytes 265 | 1. BwdCoVHeaderBytes 266 | 1. BwdModeHeaderBytes 267 | 1. BwdVarianceHeaderBytes 268 | 1. FwdSegmentSizeMean 269 | 1. FwdSegmentSizeMax 270 | 1. FwdSegmentSizeMin 271 | 1. FwdSegmentSizeStd 272 | 1. FwdSegmentSizeVariance 273 | 1. FwdSegmentSizeMedian 274 | 1. FwdSegmentSizeSkewness 275 | 1. FwdSegmentSizeCov 276 | 1. FwdSegmentSizeMode 277 | 1. BwdSegmentSizeMean 278 | 1. BwdSegmentSizeMax 279 | 1. BwdSegmentSizeMin 280 | 1. BwdSegmentSizeStd 281 | 1. BwdSegmentSizeVariance 282 | 1. BwdSegmentSizeMedian 283 | 1. BwdSegmentSizeSkewness 284 | 1. BwdSegmentSizeCov 285 | 1. BwdSegmentSizeMode 286 | 1. SegmentSizeMean 287 | 1. SegmentSizeMax 288 | 1. SegmentSizeMin 289 | 1. SegmentSizeStd 290 | 1. SegmentSizeVariance 291 | 1. SegmentSizeMedian 292 | 1. SegmentSizeSkewness 293 | 1. SegmentSizeCov 294 | 1. SegmentSizeMode 295 | 1. FwdInitWinBytes 296 | 1. BwdInitWinBytes 297 | 1. ActiveMin 298 | 1. ActiveMax 299 | 1. ActiveMean 300 | 1. ActiveStd 301 | 1. ActiveMedian 302 | 1. ActiveSkewness 303 | 1. ActiveCoV 304 | 1. ActiveMode 305 | 1. ActiveVariance 306 | 1. IdleMin 307 | 1. IdleMax 308 | 1. IdleMean 309 | 1. IdleStd 310 | 1. IdleMedian 311 | 1. IdleSkewness 312 | 1. IdleCoV 313 | 1. IdleMode 314 | 1. IdleVariance 315 | 1. BytesRate 316 | 1. FwdBytesRate 317 | 1. BwdBytesRate 318 | 1. PacketsRate 319 | 1. BwdPacketsRate 320 | 1. FwdPacketsRate 321 | 1. DownUpRate 322 | 1. AvgFwdBytesPerBulk 323 | 1. AvgFwdPacketsPerBulk 324 | 1. AvgFwdBulkRate 325 | 1. AvgBwdBytesPerBulk 326 | 1. AvgBwdPacketsPerBulk 327 | 1. AvgBwdBulkRate 328 | 1. FwdBulkStateCount 329 | 1. FwdBulkSizeTotal 330 | 1. FwdBulkPacketCount 331 | 1. FwdBulkDuration 332 | 1. BwdBulkStateCount 333 | 1. BwdBulkSizeTotal 334 | 1. BwdBulkPacketCount 335 | 1. BwdBulkDuration 336 | 1. FINFlagCounts 337 | 1. PSHFlagCounts 338 | 1. URGFlagCounts 339 | 1. ECEFlagCounts 340 | 1. SYNFlagCounts 341 | 1. ACKFlagCounts 342 | 1. CWRFlagCounts 343 | 1. RSTFlagCounts 344 | 1. FwdFINFlagCounts 345 | 1. FwdPSHFlagCounts 346 | 1. FwdURGFlagCounts 347 | 1. FwdECEFlagCounts 348 | 1. FwdSYNFlagCounts 349 | 1. FwdACKFlagCounts 350 | 1. FwdCWRFlagCounts 351 | 1. FwdRSTFlagCounts 352 | 1. BwdFINFlagCounts 353 | 1. BwdPSHFlagCounts 354 | 1. BwdURGFlagCounts 355 | 1. BwdECEFlagCounts 356 | 1. BwdSYNFlagCounts 357 | 1. BwdACKFlagCounts 358 | 1. BwdCWRFlagCounts 359 | 1. BwdRSTFlagCounts 360 | 1. FINFlagPercentageInTotal 361 | 1. PSHFlagPercentageInTotal 362 | 1. URGFlagPercentageInTotal 363 | 1. ECEFlagPercentageInTotal 364 | 1. SYNFlagPercentageInTotal 365 | 1. ACKFlagPercentageInTotal 366 | 1. CWRFlagPercentageInTotal 367 | 1. RSTFlagPercentageInTotal 368 | 1. FwdFINFlagPercentageInTotal 369 | 1. FwdPSHFlagPercentageInTotal 370 | 1. FwdURGFlagPercentageInTotal 371 | 1. FwdECEFlagPercentageInTotal 372 | 1. FwdSYNFlagPercentageInTotal 373 | 1. FwdACKFlagPercentageInTotal 374 | 1. FwdCWRFlagPercentageInTotal 375 | 1. FwdRSTFlagPercentageInTotal 376 | 1. BwdFINFlagPercentageInTotal 377 | 1. BwdPSHFlagPercentageInTotal 378 | 1. BwdURGFlagPercentageInTotal 379 | 1. BwdECEFlagPercentageInTotal 380 | 1. BwdSYNFlagPercentageInTotal 381 | 1. BwdACKFlagPercentageInTotal 382 | 1. BwdCWRFlagPercentageInTotal 383 | 1. BwdRSTFlagPercentageInTotal 384 | 1. FwdFINFlagPercentageInFwdPackets 385 | 1. FwdPSHFlagPercentageInFwdPackets 386 | 1. FwdURGFlagPercentageInFwdPackets 387 | 1. FwdECEFlagPercentageInFwdPackets 388 | 1. FwdSYNFlagPercentageInFwdPackets 389 | 1. FwdACKFlagPercentageInFwdPackets 390 | 1. FwdCWRFlagPercentageInFwdPackets 391 | 1. FwdRSTFlagPercentageInFwdPackets 392 | 1. BwdFINFlagPercentageInBwdPackets 393 | 1. BwdPSHFlagPercentageInBwdPackets 394 | 1. BwdURGFlagPercentageInBwdPackets 395 | 1. BwdECEFlagPercentageInBwdPackets 396 | 1. BwdSYNFlagPercentageInBwdPackets 397 | 1. BwdACKFlagPercentageInBwdPackets 398 | 1. BwdCWRFlagPercentageInBwdPackets 399 | 1. BwdRSTFlagPercentageInBwdPackets 400 | 1. PacketsIATMean 401 | 1. PacketsIATStd 402 | 1. PacketsIATMax 403 | 1. PacketsIATMin 404 | 1. PacketsIATSum 405 | 1. PacketsIATMedian 406 | 1. PacketsIATSkewness 407 | 1. PacketsIATCoV 408 | 1. PacketsIATMode 409 | 1. PacketsIATVariance 410 | 1. FwdPacketsIATMean 411 | 1. FwdPacketsIATStd 412 | 1. FwdPacketsIATMax 413 | 1. FwdPacketsIATMin 414 | 1. FwdPacketsIATSum 415 | 1. FwdPacketsIATMedian 416 | 1. FwdPacketsIATSkewness 417 | 1. FwdPacketsIATCoV 418 | 1. FwdPacketsIATMode 419 | 1. FwdPacketsIATVariance 420 | 1. BwdPacketsIATMean 421 | 1. BwdPacketsIATStd 422 | 1. BwdPacketsIATMax 423 | 1. BwdPacketsIATMin 424 | 1. BwdPacketsIATSum 425 | 1. BwdPacketsIATMedian 426 | 1. BwdPacketsIATSkewness 427 | 1. BwdPacketsIATCoV 428 | 1. BwdPacketsIATMode 429 | 1. BwdPacketsIATVariance 430 | 1. SubflowFwdPackets 431 | 1. SubflowBwdPackets 432 | 1. SubflowFwdBytes 433 | 1. SubflowBwdBytes 434 | 1. DeltaStart 435 | 1. HandshakeDuration 436 | 1. HandshakeState 437 | 1. PacketsDeltaTimeMin 438 | 1. PacketsDeltaTimeMax 439 | 1. PacketsDeltaTimeMean 440 | 1. PacketsDeltaTimeMode 441 | 1. PacketsDeltaTimeVariance 442 | 1. PacketsDeltaTimeStd 443 | 1. PacketsDeltaTimeMedian 444 | 1. PacketsDeltaTimeSkewness 445 | 1. PacketsDeltaTimeCoV 446 | 1. BwdPacketsDeltaTimeMin 447 | 1. BwdPacketsDeltaTimeMax 448 | 1. BwdPacketsDeltaTimeMean 449 | 1. BwdPacketsDeltaTimeMode 450 | 1. BwdPacketsDeltaTimeVariance 451 | 1. BwdPacketsDeltaTimeStd 452 | 1. BwdPacketsDeltaTimeMedian 453 | 1. BwdPacketsDeltaTimeSkewness 454 | 1. BwdPacketsDeltaTimeCoV 455 | 1. FwdPacketsDeltaTimeMin 456 | 1. FwdPacketsDeltaTimeMax 457 | 1. FwdPacketsDeltaTimeMean 458 | 1. FwdPacketsDeltaTimeMode 459 | 1. FwdPacketsDeltaTimeVariance 460 | 1. FwdPacketsDeltaTimeStd 461 | 1. FwdPacketsDeltaTimeMedian 462 | 1. FwdPacketsDeltaTimeSkewness 463 | 1. FwdPacketsDeltaTimeCoV 464 | 1. PacketsDeltaLenMin 465 | 1. PacketsDeltaLenMax 466 | 1. PacketsDeltaLenMean 467 | 1. PacketsDeltaLenMode 468 | 1. PacketsDeltaLenVariance 469 | 1. PacketsDeltaLenStd 470 | 1. PacketsDeltaLenMedian 471 | 1. PacketsDeltaLenSkewness 472 | 1. PacketsDeltaLenCoV 473 | 1. BwdPacketsDeltaLenMin 474 | 1. BwdPacketsDeltaLenMax 475 | 1. BwdPacketsDeltaLenMean 476 | 1. BwdPacketsDeltaLenMode 477 | 1. BwdPacketsDeltaLenVariance 478 | 1. BwdPacketsDeltaLenStd 479 | 1. BwdPacketsDeltaLenMedian 480 | 1. BwdPacketsDeltaLenSkewness 481 | 1. BwdPacketsDeltaLenCoV 482 | 1. FwdPacketsDeltaLenMin 483 | 1. FwdPacketsDeltaLenMax 484 | 1. FwdPacketsDeltaLenMean 485 | 1. FwdPacketsDeltaLenMode 486 | 1. FwdPacketsDeltaLenVariance 487 | 1. FwdPacketsDeltaLenStd 488 | 1. FwdPacketsDeltaLenMedian 489 | 1. FwdPacketsDeltaLenSkewness 490 | 1. FwdPacketsDeltaLenCoV 491 | 1. HeaderBytesDeltaLenMin 492 | 1. HeaderBytesDeltaLenMax 493 | 1. HeaderBytesDeltaLenMean 494 | 1. HeaderBytesDeltaLenMode 495 | 1. HeaderBytesDeltaLenVariance 496 | 1. HeaderBytesDeltaLenStd 497 | 1. HeaderBytesDeltaLenMedian 498 | 1. HeaderBytesDeltaLenSkewness 499 | 1. HeaderBytesDeltaLenCoV 500 | 1. BwdHeaderBytesDeltaLenMin 501 | 1. BwdHeaderBytesDeltaLenMax 502 | 1. BwdHeaderBytesDeltaLenMean 503 | 1. BwdHeaderBytesDeltaLenMode 504 | 1. BwdHeaderBytesDeltaLenVariance 505 | 1. BwdHeaderBytesDeltaLenStd 506 | 1. BwdHeaderBytesDeltaLenMedian 507 | 1. BwdHeaderBytesDeltaLenSkewness 508 | 1. BwdHeaderBytesDeltaLenCoV 509 | 1. FwdHeaderBytesDeltaLenMin 510 | 1. FwdHeaderBytesDeltaLenMax 511 | 1. FwdHeaderBytesDeltaLenMean 512 | 1. FwdHeaderBytesDeltaLenMode 513 | 1. FwdHeaderBytesDeltaLenVariance 514 | 1. FwdHeaderBytesDeltaLenStd 515 | 1. FwdHeaderBytesDeltaLenMedian 516 | 1. FwdHeaderBytesDeltaLenSkewness 517 | 1. FwdHeaderBytesDeltaLenCoV 518 | 1. PayloadBytesDeltaLenMin 519 | 1. PayloadBytesDeltaLenMax 520 | 1. PayloadBytesDeltaLenMean 521 | 1. PayloadBytesDeltaLenMode 522 | 1. PayloadBytesDeltaLenVariance 523 | 1. PayloadBytesDeltaLenStd 524 | 1. PayloadBytesDeltaLenMedian 525 | 1. PayloadBytesDeltaLenSkewness 526 | 1. PayloadBytesDeltaLenCoV 527 | 1. BwdPayloadBytesDeltaLenMin 528 | 1. BwdPayloadBytesDeltaLenMax 529 | 1. BwdPayloadBytesDeltaLenMean 530 | 1. BwdPayloadBytesDeltaLenMode 531 | 1. BwdPayloadBytesDeltaLenVariance 532 | 1. BwdPayloadBytesDeltaLenStd 533 | 1. BwdPayloadBytesDeltaLenMedian 534 | 1. BwdPayloadBytesDeltaLenSkewness 535 | 1. BwdPayloadBytesDeltaLenCoV 536 | 1. FwdPayloadBytesDeltaLenMin 537 | 1. FwdPayloadBytesDeltaLenMax 538 | 1. FwdPayloadBytesDeltaLenMean 539 | 1. FwdPayloadBytesDeltaLenMode 540 | 1. FwdPayloadBytesDeltaLenVariance 541 | 1. FwdPayloadBytesDeltaLenStd 542 | 1. FwdPayloadBytesDeltaLenMedian 543 | 1. FwdPayloadBytesDeltaLenSkewness 544 | 1. FwdPayloadBytesDeltaLenCoV 545 | 546 | ## Definitions 547 | 548 | In this section, we provide clear definitions for several category of features essential for understanding network traffic analysis within the context of the NTLFlowLyzer. 549 | 550 | ### 1.1. Flow Bulk Calculation 551 | 552 | Flow bulk is determined based on certain criteria such as the number of packets transferred within a short time interval 553 | or a specific pattern of packet transmission. In NTLFlowLyzer, flow bulk is identified based on the following criteria: 554 | * A bulk transfer is initiated when a certain number of consecutive packets are observed within a short time frame (1 555 | second in this case). 556 | * The bulk transfer is considered terminated if the gap between subsequent packets exceeds the defined threshold (1 557 | second). 558 | * During a bulk transfer, packet count, total size of transferred data, and duration are accumulated. 559 | * The flow maintains separate attributes (fbulkDuration, fbulkPacketCount, fbulkSizeTotal, fbulkStateCount) to track 560 | these bulk transfer characteristics. 561 | 562 | *Example*: For a flow, the bulk transfer is initiated when 5 consecutive packets are transferred within 1 second. During the bulk transfer, the flow's `fbulkPacketCount` is incremented, and the total size of the data transferred is accumulated in `fbulkSizeTotal`. If the gap between packets exceeds 1 second, the bulk transfer is terminated, and `fbulkDuration` records the total duration. 563 | 564 | 565 | ### 1.2. Subflow Calculation 566 | Subflows are identified based on changes in packet transmission patterns within the flow. In NTLFlowLyzer, subflows are 567 | calculated as follows: 568 | 569 | * Subflow count (sfcount) is incremented whenever there’s a gap of more than 1 second between consecutive packets. 570 | 571 | * This indicates a potential change in the transmission pattern, suggesting the beginning of a new subflow. 572 | 573 | * The subflow count provides insights into how many distinct patterns of packet transmission occurred within the flow. 574 | 575 | *Example*: A flow's subflow count (`sfcount`) is incremented each time there's a gap of more than 1 second between packets. This indicates a change in the transmission pattern and the start of a new subflow. If a flow contains several subflows, `sfcount` tracks how many distinct subflows exist. 576 | 577 | 578 | ### 1.3 Time 579 | 580 | #### 1.3.1 Idle Time 581 | Idle time refers to the duration during which there is no activity observed within the network flow. It represents periods of 582 | inactivity or low activity between packet transmissions. Idle time is calculated based on the timestamps of consecutive packets 583 | within the flow. Whenever there is a gap between the arrival times of successive packets exceeding a predefined threshold 584 | (in this case, 100 seconds), it is considered as an idle period. The start and end times of each idle period are recorded to 585 | determine the duration of idle time. The cumulative idle time for the flow is computed by summing up the durations of all 586 | idle periods observed. 587 | 588 | *Example*: Idle time is calculated as the total duration where no packets are transferred in a flow. For example, if there’s a 120-second gap between packets, the idle time is 120 seconds, and this value is added to the cumulative `idleTime` for the flow. 589 | 590 | 591 | 592 | #### 1.3.2 Active Time 593 | 594 | Active time represents the duration during which packet transmissions occur within the network flow. It indicates periods 595 | of activity or high activity characterized by the exchange of packets. Active time is calculated based on the timestamps of 596 | packets within the flow. Whenever packet transmissions occur, the start and end times of the active period are recorded. The 597 | duration of active time is computed as the difference between the start and end times of each active period. The cumulative 598 | active time for the flow is calculated by summing up the durations of all active periods observed. 599 | 600 | *Example*: Active time represents the period when packet transmissions occur. For instance, if there’s consistent packet exchange over a span of 300 seconds, this is recorded as an active period. The cumulative `activeTime` for the flow sums the durations of all such active periods. 601 | 602 | 603 | 604 | ### 1.4. Packet Delta Time (DT) 605 | 606 | Packet delta time (DT) is the time difference between consecutive packets in a flow, measured for both forward and backward directions. 607 | 608 | *Example*: Packet delta time (DT) measures the time between consecutive packets. If packet A is received at time 100ms and packet B is received at 150ms, the `packetDeltaTime` between them would be 50ms. 609 | 610 | 611 | ### 1.5. Payload 612 | The size of the TCP payload, which is the data portion of the packet, excluding the header. 613 | 614 | *Example*: The TCP payload size for each packet in a flow is recorded. For example, a packet may have a payload size of 512 bytes, which is stored in the `payloadSize` feature. 615 | 616 | 617 | ### 1.6. Header 618 | The size of the TCP header, which contains control information like source and destination addresses, sequence numbers, and flags. 619 | 620 | *Example*: The TCP header size for a packet might be 40 bytes, representing control information such as sequence numbers and flags. This value is stored as `headerSize`. 621 | 622 | 623 | ### 1.7. Payload Delta Length (DL) 624 | Payload delta length (DL) is the difference in the TCP payload size between consecutive packets in a flow. 625 | 626 | *Example*: The payload delta length (DL) between two consecutive packets is the difference in their payload sizes. For instance, if packet A has a payload of 500 bytes and packet B has 700 bytes, the `payloadDeltaLength` would be 200 bytes. 627 | 628 | 629 | ### 1.8. Header Delta Length (DL) 630 | Header delta length (DL) is the difference in the TCP header size between consecutive packets in a flow. 631 | 632 | *Example*: The header delta length (DL) represents the difference in header sizes between consecutive packets. For example, if packet A has a header size of 40 bytes and packet B has a header size of 60 bytes, the `headerDeltaLength` would be 20 bytes. 633 | 634 | 635 | ### 1.9. Packet Delta Length (DL) 636 | Packet delta length (DL) is the difference in the total packet size, including the whole packet, between consecutive packets in a flow. 637 | 638 | *Example*: Packet delta length (DL) is the difference in total packet size, including payload and header, between consecutive packets. If packet A has a total size of 800 bytes and packet B has 1000 bytes, the `packetDeltaLength` is 200 bytes. 639 | 640 | 641 | ### 1.10. Flag Count 642 | The number of occurrences of each TCP flag (e.g., SYN, ACK, FIN) in a flow. 643 | 644 | *Example*: The number of TCP flags (such as SYN, ACK, FIN) is tracked. For instance, a flow may have 10 SYN flags and 15 ACK flags, recorded as `flagCount`. 645 | 646 | 647 | ### 1.11. Flag Count Percentage in Total 648 | The percentage of each TCP flag type relative to the total number of flags in all packets in a flow. 649 | 650 | *Example*: The percentage of SYN flags relative to all flags in the flow is calculated. If a flow contains 100 flags and 10 of them are SYN, the `flagCountPercentageTotal` for SYN is 10%. 651 | 652 | 653 | 654 | ### 1.12. Flag Count Percentage in Direction 655 | The percentage of each TCP flag type in either the forward or backward direction in a flow. 656 | 657 | *Example*: The percentage of SYN flags in the forward direction is tracked separately. If a flow has 50 SYN flags in the forward direction out of 200 total forward packets, the `flagCountPercentageDirection` for SYN is 25%. 658 | 659 | 660 | ### 1.13. Rate 661 | The rate of packet transmission in a flow is calculated as the total number of packets or the size of packets/headers/payloads divided by the total duration of the flow. This metric indicates how many packets/bytes are being transmitted over a specific time frame. 662 | 663 | *Example*: If a flow contains 120 packets transmitted over a duration of 60 seconds, the `packetRate` would be 2. 664 | 665 | 666 | ### 1.14. Inter-Arrival Time (IAT) 667 | Inter-Arrival Time (IAT) measures the time intervals between consecutive packet arrivals in a flow. 668 | 669 | *Example*: Given a series of packets with the following timestamps: 670 | - Arrival time of Packet 1 at the destination: 0ms 671 | - Arrival time of Packet 2 at the destination: at 50ms 672 | - Arrival time of Packet 3 at the destination: at 120ms 673 | 674 | The IAT values calculated would be: 675 | - IAT between Packet 1 and Packet 2: \(50ms - 0ms = 50ms\) 676 | - IAT between Packet 2 and Packet 3: \(120ms - 50ms = 70ms\) 677 | 678 | Thus, the `packets_IAT_mean` would be the mean of these IAT values which equal to 60ms in this case. 679 | 680 | 681 | ## Statistical Information Calculation 682 | 683 | We use differnet libraries to calculate various mathematical equations. Below you can see the libraries and their brief definition based on their documentations: 684 | 685 | + [**statistics**](https://docs.python.org/3/library/statistics.html) 686 | 687 | This module provides functions for calculating mathematical statistics of numeric (Real-valued) data. 688 | 689 | The module is not intended to be a competitor to third-party libraries such as NumPy, SciPy, or proprietary full-featured statistics packages aimed at professional statisticians such as Minitab, SAS and Matlab. It is aimed at the level of graphing and scientific calculators. 690 | 691 | 692 | Nine mathematical functions are used to extract different features. You can see how those functions are calculated in the NTLFlowLyzer below: 693 | 694 | 1. Min 695 | 696 | You know what it means :). The 'min' function (Python built-in) calculates the minimum value in a given list. 697 | 698 | 1. Max 699 | 700 | Same as min. The 'max' function (Python built-in) calculates the minimum value in a given list. 701 | 702 | 1. Mean 703 | 704 | The ['mean'](https://docs.python.org/3/library/statistics.html#statistics.mean) function from 'statistics' library (Python built-in) calculates the mean value of a given list. According to the library documentation: 705 | 706 | The arithmetic mean is the sum of the data divided by the number of data points. It is commonly called “the average”, although it is only one of many different mathematical averages. It is a measure of the central location of the data. 707 | 708 | This runs faster than the mean() function and it always returns a float. The data may be a sequence or iterable. If the input dataset is empty, raises a StatisticsError. 709 | 710 | 711 | 1. Standard Deviation 712 | 713 | The ['pstdev'](https://docs.python.org/3/library/statistics.html#statistics.pstdev) function from 'statistics' library (Python built-in) calculates the mean value of a given list. According to the library documentation: 714 | 715 | Return the population standard deviation (the square root of the population variance). See pvariance() for arguments and other details. 716 | 717 | 718 | 719 | 720 | ---- 721 | 722 | 723 | # Output 724 | 725 | 726 | | flow_id | timestamp | src_ip | src_port | dst_ip | dst_port | protocol | duration | packets_count | fwd_packets_count | bwd_packets_count | total_payload_bytes | fwd_total_payload_bytes | bwd_total_payload_bytes | payload_bytes_max | payload_bytes_min | payload_bytes_mean | payload_bytes_std | payload_bytes_variance | fwd_payload_bytes_max | fwd_payload_bytes_min | fwd_payload_bytes_mean | fwd_payload_bytes_std | fwd_payload_bytes_variance | bwd_payload_bytes_max | bwd_payload_bytes_min | bwd_payload_bytes_mean | bwd_payload_bytes_std | bwd_payload_bytes_variance | total_header_bytes | max_header_bytes | min_header_bytes | mean_header_bytes | std_header_bytes | fwd_total_header_bytes | fwd_max_header_bytes | fwd_min_header_bytes | fwd_mean_header_bytes | fwd_std_header_bytes | bwd_total_header_bytes | bwd_max_header_bytes | bwd_min_header_bytes | bwd_mean_header_bytes | bwd_std_header_bytes | fwd_avg_segment_size | bwd_avg_segment_size | avg_segment_size | fwd_init_win_bytes | bwd_init_win_bytes | active_min | active_max | active_mean | active_std | idle_min | idle_max | idle_mean | idle_std | bytes_rate | fwd_bytes_rate | bwd_bytes_rate | packets_rate | bwd_packets_rate | fwd_packets_rate | down_up_rate | avg_fwd_bytes_per_bulk | avg_fwd_packets_per_bulk | avg_fwd_bulk_rate | avg_bwd_bytes_per_bulk | avg_bwd_packets_bulk_rate | avg_bwd_bulk_rate | fwd_bulk_state_count | fwd_bulk_total_size | fwd_bulk_per_packet | fwd_bulk_duration | bwd_bulk_state_count | bwd_bulk_total_size | bwd_bulk_per_packet | bwd_bulk_duration | fin_flag_counts | psh_flag_counts | urg_flag_counts | ece_flag_counts | syn_flag_counts | ack_flag_counts | cwr_flag_counts | rst_flag_counts | fwd_fin_flag_counts | fwd_psh_flag_counts | fwd_urg_flag_counts | fwd_ece_flag_counts | fwd_syn_flag_counts | fwd_ack_flag_counts | fwd_cwr_flag_counts | fwd_rst_flag_counts | bwd_fin_flag_counts | bwd_psh_flag_counts | bwd_urg_flag_counts | bwd_ece_flag_counts | bwd_syn_flag_counts | bwd_ack_flag_counts | bwd_cwr_flag_counts | bwd_rst_flag_counts | packets_IAT_mean | packet_IAT_std | packet_IAT_max | packet_IAT_min | packet_IAT_total | fwd_packets_IAT_mean | fwd_packets_IAT_std | fwd_packets_IAT_max | fwd_packets_IAT_min | fwd_packets_IAT_total | bwd_packets_IAT_mean | bwd_packets_IAT_std | bwd_packets_IAT_max | bwd_packets_IAT_min | bwd_packets_IAT_total | subflow_fwd_packets | subflow_bwd_packets | subflow_fwd_bytes | subflow_bwd_bytes | 727 | | :-----------------------------------------------------------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :----------------: | :------------: | :----------------: | :----------------: | :----------------: 728 | | 192.168.43.116_52807_94.182.113.152_443_TCP_2022-07-27 18:15:06.851907 | 2022-07-27 14:15:06.851907 | 192.168.43.116 | 52807 | 94.182.113.152 | 443 | TCP | 35.190285 | 160 | 57 | 103 | 107851 | 6506 | 101345 | 1400 | 0 | 674.0687500000000227373675443232059478759765625000000000000000000000 | 641.5775491111246537911938503384590148925781250000000000000000000000 | 411621.7515234375023283064365386962890625000000000000000000000000000000 | 1400 | 0 | 674.0687500000000227373675443232059478759765625000000000000000000000 | 641.5775491111246537911938503384590148925781250000000000000000000000 | 28619.4890735610933916177600622177124023437500000000000000000000000000 | 1400 | 0 | 674.0687500000000227373675443232059478759765625000000000000000000000 | 641.5775491111246537911938503384590148925781250000000000000000000000 | 354057.9468375907163135707378387451171875000000000000000000000000000000 | 3224 | 32 | 20 | 20.1499999999999985789145284797996282577514648437500000000000000000 | 1.3332291625973382576120229714433662593364715576171875000000000000 | 1152 | 32 | 20 | 20.2105263157894725623009435366839170455932617187500000000000000000 | 1.5754346891679753550619125235243700444698333740234375000000000000 | 2072 | 32 | 20 | 20.1165048543689337634532421361654996871948242187500000000000000000 | 1.1766413520421838967422445421107113361358642578125000000000000000 | 114.14035087719299 | 983.9320388349514 | 674.06875 | 64240 | 64240 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3064.794729568118 | 184.88057144180559 | 2879.914158126312 | 4.5467094114185205 | 2.9269441836006727 | 1.6197652278178478 | 1.8070175438596492 | 1402.0 | 8.0 | 135714.63143119888 | 24633.25 | 20.0 | 1305955.0159710534 | 2 | 2804 | 16 | 0.020661 | 4 | 98533 | 80 | 0.075449 | 2 | 87 | 0 | 0 | 2 | 159 | 0 | 0 | 1 | 31 | 0 | 0 | 1 | 56 | 0 | 0 | 1 | 56 | 0 | 0 | 1 | 103 | 0 | 0 | 0.2213225471698113400176310960887349210679531097412109375000000000 | 2.3877912454750056525654144934378564357757568359375000000000000000 | 29.947797 | 0.0 | 35.190285 | 0.6283979464285713856241954999859444797039031982421875000000000000 | 3.9915945479897789738288338412530720233917236328125000000000000000 | 29.947841 | 5.7e-05 | 35.190285 | 0.3447076274509803806012087079579941928386688232421875000000000000 | 2.9789979621974613799295639182673767209053039550781250000000000000 | 29.991346 | 0.0 | 35.160177999999995 | 28.5 | 51.5 | 3253.0 | 3253.0 | 729 | | 192.168.43.116_64362_104.21.69.158_443_UDP_2022-07-27 18:14:09.705289 | 2022-07-27 14:14:09.705289 | 192.168.43.116 | 64362 | 104.21.69.158 | 443 | UDP | 12.018215 | 1834 | 375 | 1459 | 1665985 | 37224 | 1628761 | 1250 | 23 | 908.3887677208288096153410151600837707519531250000000000000000000000 | 474.2885746274578195880167186260223388671875000000000000000000000000 | 224949.6520221456012222915887832641601562500000000000000000000000000000 | 1250 | 23 | 908.3887677208288096153410151600837707519531250000000000000000000000 | 474.2885746274578195880167186260223388671875000000000000000000000000 | 23478.7703040000014880206435918807983398437500000000000000000000000000 | 1250 | 23 | 908.3887677208288096153410151600837707519531250000000000000000000000 | 474.2885746274578195880167186260223388671875000000000000000000000000 | 65212.9882110095422831363976001739501953125000000000000000000000000000 | 14672 | 8 | 8 | 8.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 3000 | 8 | 8 | 8.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 11672 | 8 | 8 | 8.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 99.264 | 1116.354352296093 | 908.3887677208288 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 138621.66719433793 | 3097.2985588958095 | 135524.3686354421 | 152.60169667458936 | 121.39905967733145 | 31.202636997257912 | 3.8906666666666667 | 1750.5 | 7.916666666666667 | 483352.1249913712 | 10010.292517006803 | 8.82312925170068 | 2534980.3525684644 | 12 | 21006 | 95 | 0.043459 | 147 | 1471513 | 1297 | 0.580483 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0065565821058374247967681647253357368754222989082336425781250000 | 0.1664041060697670770807121698453556746244430541992187500000000000 | 6.901232 | 0.0 | 12.018215000000003 | 0.0321342647058823552286277447365137049928307533264160156250000000 | 0.3685040949375026908541030934429727494716644287109375000000000000 | 6.901232 | 8.5e-05 | 12.018215000000003 | 0.0081416961591220856492290280925772094633430242538452148437500000 | 0.1911619764558343259608363950974307954311370849609375000000000000 | 7.041971 | 0.0 | 11.870592999999998 | 187.5 | 729.5 | 18612.0 | 18612.0 | 730 | | 192.168.43.116_52790_104.21.69.158_443_TCP_2022-07-27 18:14:08.578480 | 2022-07-27 14:14:08.578480 | 192.168.43.116 | 52790 | 104.21.69.158 | 443 | TCP | 0.343462 | 14 | 6 | 8 | 4846 | 305 | 4541 | 1400 | 0 | 346.1428571428571672186080832034349441528320312500000000000000000000 | 561.3693534745268607366597279906272888183593750000000000000000000000 | 315135.5510204082238487899303436279296875000000000000000000000000000000 | 1400 | 0 | 346.1428571428571672186080832034349441528320312500000000000000000000 | 561.3693534745268607366597279906272888183593750000000000000000000000 | 12920.1388888888886867789551615715026855468750000000000000000000000000 | 1400 | 0 | 346.1428571428571672186080832034349441528320312500000000000000000000 | 561.3693534745268607366597279906272888183593750000000000000000000000 | 427336.9843750000000000000000000000000000000000000000000000000000000000 | 304 | 32 | 20 | 21.7142857142857153007753368001431226730346679687500000000000000000 | 4.1991252733425907806008581246715039014816284179687500000000000000 | 132 | 32 | 20 | 22.0000000000000000000000000000000000000000000000000000000000000000 | 4.4721359549995796101029554847627878189086914062500000000000000000 | 172 | 32 | 20 | 21.5000000000000000000000000000000000000000000000000000000000000000 | 3.9686269665968860742566448607249185442924499511718750000000000000 | 50.833333333333336 | 567.625 | 346.14285714285717 | 64240 | 65535 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14109.27555304517 | 888.0167238297105 | 13221.25882921546 | 40.76142338890474 | 23.292241936516994 | 17.469181452387748 | 1.3333333333333333 | 0 | 0 | 0 | 4541.0 | 4.0 | 1795571.3720838276 | 0 | 0 | 0 | 0 | 1 | 4541 | 4 | 0.002529 | 2 | 3 | 0 | 0 | 2 | 13 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 5 | 0 | 0 | 1 | 2 | 0 | 0 | 1 | 8 | 0 | 0 | 0.0264201538461538466828759652571534388698637485504150390625000000 | 0.0349830592470702014806782642608595779165625572204589843750000000 | 0.105479 | 9.1e-05 | 0.343462 | 0.0495307999999999998275157508942356798797845840454101562500000000 | 0.0517288412528252999900146846812276635318994522094726562500000000 | 0.119035 | 0.002165 | 0.24765399999999999 | 0.0339975714285714256113202225151326274499297142028808593750000000 | 0.0321096063145201032762443560386600438505411148071289062500000000 | 0.088577 | 0.000417 | 0.237983 | 0 | 0 | 0 | 0 | 731 | | 192.168.43.116_52765_142.250.186.133_443_TCP_2022-07-27 18:14:04.374890 | 2022-07-27 14:14:04.374890 | 192.168.43.116 | 52765 | 142.250.186.133 | 443 | TCP | 100.345666 | 276 | 91 | 185 | 204871 | 38998 | 165873 | 1400 | 0 | 742.2862318840579973766580224037170410156250000000000000000000000000 | 656.5600251477645770137314684689044952392578125000000000000000000000 | 431071.0666220331913791596889495849609375000000000000000000000000000000 | 1400 | 0 | 742.2862318840579973766580224037170410156250000000000000000000000000 | 656.5600251477645770137314684689044952392578125000000000000000000000 | 363470.2035985992406494915485382080078125000000000000000000000000000000 | 1400 | 0 | 742.2862318840579973766580224037170410156250000000000000000000000000 | 656.5600251477645770137314684689044952392578125000000000000000000000 | 392090.0106939371908083558082580566406250000000000000000000000000000000 | 5592 | 32 | 20 | 20.2608695652173906864845775999128818511962890625000000000000000000 | 1.7499662432607050455146691092522814869880676269531250000000000000 | 1820 | 20 | 20 | 20.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 3772 | 32 | 20 | 20.3891891891891887667043192777782678604125976562500000000000000000 | 2.1257474086279701808166464616078883409500122070312500000000000000 | 428.54945054945057 | 896.6108108108108 | 742.286231884058 | 65527 | 2174 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2041.652700775338 | 388.63661535715954 | 1653.0160854181786 | 2.750492482654906 | 1.8436272075766582 | 0.9068652750782481 | 2.032967032967033 | 7021.666666666667 | 7.0 | 48624.59229439288 | 31408.8 | 25.2 | 258988.2498453927 | 3 | 21065 | 21 | 0.433217 | 5 | 157044 | 126 | 0.606375 | 0 | 114 | 0 | 0 | 0 | 276 | 0 | 0 | 0 | 27 | 0 | 0 | 0 | 91 | 0 | 0 | 0 | 87 | 0 | 0 | 0 | 185 | 0 | 0 | 0.3648933309090909293814775082864798605442047119140625000000000000 | 3.4364095339008131624325415032217279076576232910156250000000000000 | 45.010319 | 0.0 | 100.345666 | 1.1146453555555555503531195427058264613151550292968750000000000000 | 5.9432496691043290582001645816490054130554199218750000000000000000 | 45.046832 | 0.0 | 100.31808199999999 | 0.5450481086956522336350872137700207531452178955078125000000000000 | 4.1926245931902839103599944792222231626510620117187500000000000000 | 45.030952 | 0.0 | 100.28885200000002 | 18.2 | 37.0 | 7799.6 | 7799.6 | 732 | | 192.168.43.116_54924_142.250.185.106_443_UDP_2022-07-27 18:14:08.127456 | 2022-07-27 14:14:08.127456 | 192.168.43.116 | 54924 | 142.250.185.106 | 443 | UDP | 0.291493 | 18 | 9 | 9 | 6376 | 2440 | 3936 | 1250 | 25 | 354.2222222222222285381576512008905410766601562500000000000000000000 | 469.3852428153653590925387106835842132568359375000000000000000000000 | 220322.5061728395230602473020553588867187500000000000000000000000000000 | 1250 | 25 | 354.2222222222222285381576512008905410766601562500000000000000000000 | 469.3852428153653590925387106835842132568359375000000000000000000000 | 184884.3209876543260179460048675537109375000000000000000000000000000000 | 1250 | 25 | 354.2222222222222285381576512008905410766601562500000000000000000000 | 469.3852428153653590925387106835842132568359375000000000000000000000 | 241945.7777777777810115367174148559570312500000000000000000000000000000 | 144 | 8 | 8 | 8.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 72 | 8 | 8 | 8.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 72 | 8 | 8 | 8.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 271.1111111111111 | 437.3333333333333 | 354.22222222222223 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21873.595592346985 | 8370.698438727517 | 13502.89715361947 | 61.7510540561866 | 30.8755270280933 | 30.8755270280933 | 1.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0171466470588235303518231944508443120867013931274414062500000000 | 0.0263312681938975730322471946465157088823616504669189453125000000 | 0.100056 | 0.000165 | 0.29149300000000006 | 0.0288457499999999998629984787612556829117238521575927734375000000 | 0.0361429524587781811351128169462754158303141593933105468750000000 | 0.101686 | 0.000165 | 0.230766 | 0.0221667500000000024240609519665667903609573841094970703125000000 | 0.0238617413224077208611095812784697045572102069854736328125000000 | 0.065875 | 0.000285 | 0.17733400000000002 | 0 | 0 | 0 | 0 | 733 | | 192.168.43.116_52794_151.101.114.133_443_TCP_2022-07-27 18:14:11.191157 | 2022-07-27 14:14:11.191157 | 192.168.43.116 | 52794 | 151.101.114.133 | 443 | TCP | 91.000385 | 36 | 15 | 21 | 8425 | 2223 | 6202 | 1400 | 0 | 234.0277777777777714618423487991094589233398437500000000000000000000 | 394.9627609809921864325588103383779525756835937500000000000000000000 | 155995.5825617283699102699756622314453125000000000000000000000000000000 | 1400 | 0 | 234.0277777777777714618423487991094589233398437500000000000000000000 | 394.9627609809921864325588103383779525756835937500000000000000000000 | 42528.1599999999962165020406246185302734375000000000000000000000000000 | 1400 | 0 | 234.0277777777777714618423487991094589233398437500000000000000000000 | 394.9627609809921864325588103383779525756835937500000000000000000000 | 228023.6507936507987324148416519165039062500000000000000000000000000000 | 768 | 32 | 20 | 21.3333333333333321490954403998330235481262207031250000000000000000 | 3.7712361663282534252061850565951317548751831054687500000000000000 | 312 | 32 | 20 | 20.8000000000000007105427357601001858711242675781250000000000000000 | 2.9933259094191528859596473921556025743484497070312500000000000000 | 456 | 32 | 20 | 21.7142857142857153007753368001431226730346679687500000000000000000 | 4.1991252733425907806008581246715039014816284179687500000000000000 | 148.2 | 295.3333333333333 | 234.02777777777777 | 64240 | 65535 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 92.58202588923113 | 24.428468077360332 | 68.1535578118708 | 0.3956027219005722 | 0.23076825444200044 | 0.16483446745857175 | 1.4 | 0 | 0 | 0 | 4796.0 | 4.0 | 4715830.87512291 | 0 | 0 | 0 | 0 | 1 | 4796 | 4 | 0.001017 | 0 | 16 | 0 | 0 | 2 | 35 | 0 | 0 | 0 | 8 | 0 | 0 | 1 | 14 | 0 | 0 | 0 | 8 | 0 | 0 | 1 | 21 | 0 | 0 | 2.6000109999999998500186393357580527663230895996093750000000000000 | 10.4399773333921093154685877379961311817169189453125000000000000000 | 45.008387 | 0.0 | 91.000385 | 6.4987426428571426484381845511961728334426879882812500000000000000 | 15.7475691556839763052266789600253105163574218750000000000000000000 | 45.138092 | 0.000215 | 90.98239699999999 | 4.5450002500000001859348230937030166387557983398437500000000000000 | 13.5120536883376498593634096323512494564056396484375000000000000000 | 45.135349 | 0.0 | 90.900005 | 7.5 | 10.5 | 1111.5 | 1111.5 | 734 | | 192.168.43.116_52834_80.66.179.18_443_TCP_2022-07-27 18:15:26.541156 | 2022-07-27 14:15:26.541156 | 192.168.43.116 | 52834 | 80.66.179.18 | 443 | TCP | 2.823269 | 2754 | 497 | 2257 | 3134937 | 3564 | 3131373 | 1400 | 0 | 1138.3213507625273450685199350118637084960937500000000000000000000000 | 542.6697782601822837023064494132995605468750000000000000000000000000 | 294490.4882369554252363741397857666015625000000000000000000000000000000 | 1400 | 0 | 1138.3213507625273450685199350118637084960937500000000000000000000000 | 542.6697782601822837023064494132995605468750000000000000000000000000 | 1736.3027420053517744236160069704055786132812500000000000000000000000 | 1400 | 0 | 1138.3213507625273450685199350118637084960937500000000000000000000000 | 542.6697782601822837023064494132995605468750000000000000000000000000 | 15162.4217387779335695086047053337097167968750000000000000000000000000 | 55832 | 40 | 20 | 20.2730573710965877864964568288996815681457519531250000000000000000 | 2.0750848349489388056099414825439453125000000000000000000000000000 | 10680 | 40 | 20 | 21.4889336016096592629764927551150321960449218750000000000000000000 | 4.6655096345973641192017566936556249856948852539062500000000000000 | 45152 | 32 | 20 | 20.0053167922020378455272293649613857269287109375000000000000000000 | 0.2525336376507842373051460072019835934042930603027343750000000000 | 7.17102615694165 | 1387.4049623393885 | 1138.3213507625273 | 64240 | 29200 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1110392.5980839941 | 1262.3664270035906 | 1109130.2316569907 | 975.4649663209564 | 799.4278972354389 | 176.03706908551754 | 4.541247484909457 | 582.0 | 4.0 | 32786.88524590164 | 195188.4375 | 140.0625 | 2555729.8648243896 | 1 | 582 | 4 | 0.017751 | 16 | 3123015 | 2241 | 1.221966 | 0 | 413 | 0 | 0 | 2 | 2753 | 0 | 0 | 0 | 26 | 0 | 0 | 1 | 496 | 0 | 0 | 0 | 387 | 0 | 0 | 1 | 2257 | 0 | 0 | 0.0010255245187068653175271881750063585059251636266708374023437500 | 0.0093568105415945523190002219848793174605816602706909179687500000 | 0.305383 | 0.0 | 2.823268999999997 | 0.0056920745967741935220085558455593854887410998344421386718750000 | 0.0216513998968292861735385201882309047505259513854980468750000000 | 0.305383 | 6.5e-05 | 2.823268999999999 | 0.0012158900709219858018300675084333306585904210805892944335937500 | 0.0119827414129954849114634285456304496619850397109985351562500000 | 0.357711 | 0.0 | 2.7430479999999964 | 0 | 0 | 0 | 0 | 735 | | 192.168.43.116_52838_152.199.21.118_443_TCP_2022-07-27 18:15:54.171015 | 2022-07-27 14:15:54.171015 | 192.168.43.116 | 52838 | 152.199.21.118 | 443 | TCP | 4.655009 | 1686 | 281 | 1405 | 1935209 | 3671 | 1931538 | 1400 | 0 | 1147.8107947805456205969676375389099121093750000000000000000000000000 | 531.9658190592804203333798795938491821289062500000000000000000000000 | 282987.6326474110246635973453521728515625000000000000000000000000000000 | 1400 | 0 | 1147.8107947805456205969676375389099121093750000000000000000000000000 | 531.9658190592804203333798795938491821289062500000000000000000000000 | 4004.6364661035190692928154021501541137695312500000000000000000000000 | 1400 | 0 | 1147.8107947805456205969676375389099121093750000000000000000000000000 | 531.9658190592804203333798795938491821289062500000000000000000000000 | 29748.1937138587381923571228981018066406250000000000000000000000000000 | 34344 | 32 | 20 | 20.3701067615658359954977640882134437561035156250000000000000000000 | 2.0746812101701999075942239869618788361549377441406250000000000000 | 6232 | 32 | 20 | 22.1779359430604969816158700268715620040893554687500000000000000000 | 4.6251298732739556385240575764328241348266601562500000000000000000 | 28112 | 32 | 20 | 20.0085409252669030877314071403816342353820800781250000000000000000 | 0.3200283671777114924417162455938523635268211364746093750000000000 | 13.064056939501778 | 1374.7601423487545 | 1147.8107947805456 | 64240 | 65535 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 415726.15649078233 | 788.6128684176551 | 414937.54362236464 | 362.19049200549347 | 301.82541000457786 | 60.36508200091558 | 5.0 | 713.0 | 5.5 | 24195.737749423104 | 275433.28571428574 | 198.14285714285714 | 2019396.558496036 | 2 | 1426 | 11 | 0.058936 | 7 | 1928033 | 1387 | 0.954757 | 0 | 418 | 0 | 0 | 2 | 1685 | 0 | 0 | 0 | 24 | 0 | 0 | 1 | 280 | 0 | 0 | 0 | 394 | 0 | 0 | 1 | 1405 | 0 | 0 | 0.0027626166172106825930088191967115562874823808670043945312500000 | 0.0454314267416226630347253490072034765034914016723632812500000000 | 1.620274 | 0.0 | 4.655008999999995 | 0.0166250321428571441739752145849706721492111682891845703125000000 | 0.1108640178950371091293192193916183896362781524658203125000000000 | 1.620274 | 5.3e-05 | 4.655008999999995 | 0.0032447108262108263591894097288559351000003516674041748046875000 | 0.0513123862702731040053016897672932827845215797424316406250000000 | 1.641034 | 0.0 | 4.5555739999999965 | 281.0 | 1405.0 | 3671.0 | 3671.0 | 736 | | 192.168.43.116_52775_142.250.184.229_443_TCP_2022-07-27 18:14:06.005934 | 2022-07-27 14:14:06.005934 | 192.168.43.116 | 52775 | 142.250.184.229 | 443 | TCP | 14.783576 | 11 | 5 | 6 | 193 | 64 | 129 | 73 | 0 | 17.5454545454545467464413377456367015838623046875000000000000000000 | 25.5427044783068275535242719342932105064392089843750000000000000000 | 652.4297520661157250287942588329315185546875000000000000000000000000 | 73 | 0 | 17.5454545454545467464413377456367015838623046875000000000000000000 | 25.5427044783068275535242719342932105064392089843750000000000000000 | 255.7599999999999909050529822707176208496093750000000000000000000000 | 73 | 0 | 17.5454545454545467464413377456367015838623046875000000000000000000 | 25.5427044783068275535242719342932105064392089843750000000000000000 | 948.5833333333333712289459072053432464599609375000000000000000000000 | 244 | 32 | 20 | 22.1818181818181834330516721820458769798278808593750000000000000000 | 4.6283352950392204760987624467816203832626342773437500000000000000 | 100 | 20 | 20 | 20.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 144 | 32 | 20 | 24.0000000000000000000000000000000000000000000000000000000000000000 | 5.6568542494923805818984874349553138017654418945312500000000000000 | 12.8 | 21.5 | 17.545454545454547 | 508 | 374 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13.055028093338175 | 4.329128486910069 | 8.725899606428106 | 0.744068958687668 | 0.40585579564781893 | 0.3382131630398491 | 1.2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 11 | 0 | 1 | 1 | 2 | 0 | 0 | 0 | 5 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 6 | 0 | 0 | 1.4783576000000000494338792123016901314258575439453125000000000000 | 4.3623927038047867199566098861396312713623046875000000000000000000 | 14.565012 | 5.4e-05 | 14.783575999999996 | 3.6958940000000000125623955682385712862014770507812500000000000000 | 6.3448243181175234539637131092604249715805053710937500000000000000 | 14.685235 | 5.4e-05 | 14.783576 | 2.9326550000000000117950094136176630854606628417968750000000000000 | 5.8162435191021701896829654288012534379959106445312500000000000000 | 14.565012 | 0.000331 | 14.663274999999999 | 5.0 | 6.0 | 64.0 | 64.0 | 737 | | 192.168.43.116_52786_172.67.75.39_443_TCP_2022-07-27 18:15:40.490110 | 2022-07-27 14:15:40.490110 | 192.168.43.116 | 52786 | 172.67.75.39 | 443 | TCP | 0.108553 | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0 | 0 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0 | 0 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 60 | 20 | 20 | 20.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 20 | 20 | 20 | 20.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 40 | 20 | 20 | 20.0000000000000000000000000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.0 | 0.0 | 0.0 | 1020 | 95 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 27.636269840538723 | 18.424179893692482 | 9.212089946846241 | 2.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0.0542764999999999983582021911843185080215334892272949218750000000 | 0.0051684999999999994835242489443771773949265480041503906250000000 | 0.059445 | 0.049108 | 0.108553 | 1658945740.4901099205017089843750000000000000000000000000000000000000000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 1658945740.49011 | 1658945740.49011 | 1658945740.49011 | 0.0491079999999999988746779422399413306266069412231445312500000000 | 0.0000000000000000000000000000000000000000000000000000000000000000 | 0.049108 | 0.049108 | 0.049108 | 0 | 0 | 0 | 0 | 738 | 739 | 740 | ---- 741 | 742 | 743 | # Copyright (c) 2023 744 | 745 | For citation in your works and also understanding NTLFlowLyzer completely, you can find below published papers: 746 | 747 | - "NTLFlowLyzer: Towards generating an intrusion detection dataset and intruders behavior profiling through network and transport layers traffic analysis and pattern extraction", MohammadMoein Shafi, Arash Habibi Lashkari, Arousha Haghighian Roudsari, Computers & Security, 2024, 104160, ISSN 0167-4048, https://doi.org/10.1016/j.cose.2024.104160. 748 | 749 | # Contributing 750 | 751 | Any contribution is welcome in form of pull requests. 752 | 753 | 754 | # Project Team members 755 | 756 | * [**Arash Habibi Lashkari:**](http://ahlashkari.com/index.asp) Founder and supervisor 757 | 758 | * [**Moein Shafi:**](https://github.com/moein-shafi) Graduate student, Researcher and developer - York University ( 2 years, 2022 - 2024) 759 | 760 | * [**Mohamed Aziz El Fadhel:**](https://github.com/MohamedAzizFadhel) Mitacs Global Research Intern, Researcher and developer - York University (4 months, 2024-2024) 761 | 762 | * [**Sepideh Niktabe:**](https://github.com/sepideh2020) Graduate students, Researcher and developer - York University (6 months, 2022-2023) 763 | 764 | * [**Mehrsa Khoshpasand:**](https://github.com/Khoshpasand-mehrsa) Researcher Assistant (RA) - York University (3 months, 2022) 765 | 766 | * [**Parisa Ghanad:**](https://github.com/parishisit) Volunteer Researcher and developer - Amirkabir University (4 months, 2022) 767 | 768 | 769 | # Acknowledgment 770 | 771 | This project has been made possible through funding from the Natural Sciences and Engineering Research Council of Canada — NSERC (#RGPIN-2020-04701) and Canada Research Chair (Tier II) - (#CRC-2021-00340) to Arash Habibi Lashkari. 772 | --------------------------------------------------------------------------------