├── requirements.txt ├── tests ├── packet_captures │ ├── PING1.pcap │ ├── http1.pcap │ ├── tcp_syn_only.pcap │ └── tcp_syn_flow_expiration.pcap ├── test_flows.py ├── test_nethash.py ├── groundtruth_tcp_syn_only.py ├── groundtruth_tcp_syn_flow_expiration.py ├── groundtruth_PING1.py ├── http1.py └── test_system.py ├── .gitignore ├── flowRecorder ├── config │ └── config.yaml ├── baseclass.py ├── nethash.py ├── config.py ├── flowRecorder.py ├── .pylintrc └── flows.py ├── README.RST ├── setup.py └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | dpkt 2 | pcapy 3 | hashlib 4 | numpy 5 | -------------------------------------------------------------------------------- /tests/packet_captures/PING1.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drnpkr/flowRecorder/HEAD/tests/packet_captures/PING1.pcap -------------------------------------------------------------------------------- /tests/packet_captures/http1.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drnpkr/flowRecorder/HEAD/tests/packet_captures/http1.pcap -------------------------------------------------------------------------------- /tests/packet_captures/tcp_syn_only.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drnpkr/flowRecorder/HEAD/tests/packet_captures/tcp_syn_only.pcap -------------------------------------------------------------------------------- /tests/packet_captures/tcp_syn_flow_expiration.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drnpkr/flowRecorder/HEAD/tests/packet_captures/tcp_syn_flow_expiration.pcap -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs/build/ 2 | docs/_build 3 | docs/_static 4 | docs/_templates 5 | *.pyc 6 | flowRecorder/*.pyc 7 | tests/temp/ 8 | tests/.cache/ 9 | tests/*.pyc 10 | tests/__pycache__/ 11 | *.egg-info/ 12 | build/ 13 | dist/ 14 | .cache/ 15 | -------------------------------------------------------------------------------- /flowRecorder/config/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Config for flowRecorder 3 | # Written in YAML 4 | # 5 | #========== SYSLOG ================================== 6 | # Set to 1 if want to log to syslog: 7 | syslog_enabled: 0 8 | # 9 | # Where to send syslog (use localhost to keep on-box): 10 | loghost: localhost 11 | # 12 | # Port number to send on: 13 | logport: 514 14 | # 15 | # Facility to use: 16 | logfacility: 19 17 | # 18 | # Syslog Log Format: 19 | syslog_format: "sev=%(levelname)s module=%(module)s func=%(funcName)s %(message)s" 20 | # 21 | # Syslog Logging Levels Per Module 22 | # Values can be one of CRITICAL|ERROR|WARNING|INFO|DEBUG: 23 | flowRecorder_logging_level_s: INFO 24 | config_logging_level_s: INFO 25 | flows_logging_level_s: INFO 26 | # 27 | #========== CONSOLE LOGGING ========================= 28 | # Set to 1 if want to log to console: 29 | console_log_enabled: 1 30 | # 31 | # Set to 1 if you want colorised logs in terminal: 32 | coloredlogs_enabled: 1 33 | # 34 | # Console Log Format: 35 | #console_format: "%(asctime)s %(module)s[%(process)d] %(funcName)s %(levelname)s %(message)s" 36 | console_format: "%(asctime)s.%(msecs)03d %(name)s[%(process)d] %(funcName)s %(levelname)s %(message)s" 37 | # 38 | # Console Logging Levels Per Module 39 | # Values can be one of CRITICAL|ERROR|WARNING|INFO|DEBUG: 40 | flowRecorder_logging_level_c: DEBUG 41 | config_logging_level_c: DEBUG 42 | flows_logging_level_c: DEBUG 43 | # 44 | #========== PACKET CAPTURE ========================== 45 | # Max size of packet to capture: 46 | maxlen: 65535 47 | # Promiscuous mode?: 48 | promiscuous: 1 49 | # Read timeout in milliseconds: 50 | read_timeout: 100 51 | # 52 | #========== FLOW EXPIRATION ========================== 53 | # Threshold time in seconds between packets with same flow keys 54 | # whereby will be considered a new separate flow: 55 | flow_expiration: 3600 56 | # 57 | # Frequency of information status messages 58 | # The value defines after how many processed packets an info status message is going to be shown 59 | infoFrequency: 10000 60 | # -------------------------------------------------------------------------------- /README.RST: -------------------------------------------------------------------------------- 1 | flowRecorder 2 | ============ 3 | 4 | Use flowRecorder to turn packets into flow records. 5 | The tool can work in two modes: 6 | 7 | 1. Live packet capture from a NIC 8 | 2. Parsing packets from a PCAP file. 9 | 10 | The program can take a number of arguments: 11 | 12 | -d, --direction sets whether the packets will be organised into flows in uni- or bidirection 13 | 14 | -i, --interface interface_name sets the networking interface card from which the packets will be sniffed 15 | 16 | -f, --file file_name sets the name of the PCAP file 17 | 18 | -o, --out file_name sets the name of the CSV file into which the results will be saved 19 | 20 | 21 | **Examples:** 22 | 23 | 1. To read in a PCAP file and process the packets into flows 24 | in one direction, and save the results into a CSV file the following 25 | command can be used: 26 | 27 | :: 28 | 29 | python ~/flowRecorder/flowRecorder/flowRecorder.py -d u -f p.pcap -o results.csv 30 | 31 | 2. To start capturing the packets from a NIC (en0) and organize them 32 | into flow records in bidirection, the following command can be used: 33 | 34 | :: 35 | 36 | sudo python ~/flowRecorder/flowRecorder/flowRecorder.py -d b -i en0 -o results.csv 37 | 38 | Notes 39 | ===== 40 | * Only creates flows for IP packets 41 | * Uses 5-tuple of ip_src, ip_dst, proto, src_port, dst_port flow keys as 42 | common properties to identify TCP/UDP flows 43 | * Uses 3-tuple of ip_src, ip_dst, proto flow keys as 44 | common properties to identify non-TCP/UDP flows 45 | * Packet lengths are calculated on sizes of IP packets (i.e. ignores 46 | Ethernet framing size) 47 | * Flows are considered to have expired if more than a threshold of time 48 | has passed since previous packet and when this occurs further packets 49 | will be considered as a new flow and the previous packets moved to the 50 | flow archive. Threshold for flow expiration is in the config file. 51 | 52 | Dependencies 53 | ============ 54 | 55 | flowRecorder depends on the following libraries: 56 | 57 | - dpkt 58 | - pcapy 59 | - hashlib 60 | - numpy 61 | 62 | These can be installed with (Python 2.x): 63 | 64 | :: 65 | 66 | sudo apt-get update 67 | sudo apt-get install libpcap-dev 68 | 69 | sudo apt install python-pip python-pytest python-yaml 70 | 71 | pip install dpkt pcapy numpy 72 | 73 | These can be installed with (Python 3.x): 74 | 75 | :: 76 | 77 | sudo apt-get update 78 | sudo apt-get install libpcap-dev 79 | 80 | sudo apt install python3-pip python-pytest3 python-yaml 81 | 82 | pip3 install dpkt pcapy numpy 83 | 84 | 85 | Testing 86 | ======= 87 | 88 | Run python 2.x self tests with: 89 | 90 | :: 91 | 92 | cd ~/flowRecorder/tests/; py.test 93 | 94 | Run python 3.x self tests with: 95 | 96 | :: 97 | 98 | cd ~/flowRecorder/tests/; py.test-3 99 | 100 | Known issues 101 | ============ 102 | 103 | The program is not optimized for processing large PCAP files. For 104 | example, processsing 500K packets takes approximately 40 minutes. The 105 | processing time mainly depends on the selected directionality and the 106 | computing resources. 107 | 108 | **The tool is under testing. Please report any issues/bugs to the 109 | developers.** 110 | -------------------------------------------------------------------------------- /flowRecorder/baseclass.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | The baseclass module is part of the flowRecorder project and 16 | provides an inheritable class methods for logging 17 | """ 18 | 19 | # logging imports: 20 | import logging 21 | import logging.handlers 22 | import coloredlogs 23 | 24 | class BaseClass(object): 25 | """ 26 | This class provides the common methods for inheritance by 27 | other classes 28 | """ 29 | def __init__(self): 30 | """ 31 | Initialise the BaseClass class 32 | """ 33 | pass 34 | 35 | def configure_logging(self, name, s_name, c_name): 36 | """ 37 | Configure logging for the class that has inherited 38 | this method 39 | """ 40 | # Set up Logging: 41 | self.logger = logging.getLogger(name) 42 | 43 | # Get logging config values from config class: 44 | _logging_level_s = self.config.get_value(s_name) 45 | _logging_level_c = self.config.get_value(c_name) 46 | 47 | _syslog_enabled = self.config.get_value('syslog_enabled') 48 | _loghost = self.config.get_value('loghost') 49 | _logport = self.config.get_value('logport') 50 | _logfacility = self.config.get_value('logfacility') 51 | _syslog_format = self.config.get_value('syslog_format') 52 | _console_log_enabled = self.config.get_value('console_log_enabled') 53 | _coloredlogs_enabled = self.config.get_value('coloredlogs_enabled') 54 | _console_format = self.config.get_value('console_format') 55 | 56 | self.logger.propagate = False 57 | 58 | # Syslog: 59 | if _syslog_enabled: 60 | # Log to syslog on host specified in config.yaml: 61 | self.syslog_handler = logging.handlers.SysLogHandler(address=( 62 | _loghost, _logport), 63 | facility=_logfacility) 64 | syslog_formatter = logging.Formatter(_syslog_format) 65 | self.syslog_handler.setFormatter(syslog_formatter) 66 | self.syslog_handler.setLevel(_logging_level_s) 67 | # Add syslog log handler to logger: 68 | self.logger.addHandler(self.syslog_handler) 69 | # Console logging: 70 | if _console_log_enabled: 71 | # Log to the console: 72 | if _coloredlogs_enabled: 73 | # Colourise the logs to make them easier to understand: 74 | coloredlogs.install(level=_logging_level_c, 75 | logger=self.logger, fmt=_console_format, datefmt='%H:%M:%S') 76 | else: 77 | # Add console log handler to logger: 78 | self.console_handler = logging.StreamHandler() 79 | console_formatter = logging.Formatter(_console_format) 80 | self.console_handler.setFormatter(console_formatter) 81 | self.console_handler.setLevel(_logging_level_c) 82 | self.logger.addHandler(self.console_handler) 83 | -------------------------------------------------------------------------------- /tests/test_flows.py: -------------------------------------------------------------------------------- 1 | """ 2 | flows module unit tests 3 | """ 4 | 5 | # Handle tests being in different directory branch to app code: 6 | import sys 7 | import struct 8 | 9 | sys.path.insert(0, '../flowRecorder') 10 | 11 | import logging 12 | 13 | # Import dpkt for packet parsing: 14 | import dpkt 15 | 16 | # flowRecorder imports: 17 | import flowRecorder 18 | import config 19 | import flows as flows_module 20 | 21 | # test packet imports: 22 | import http1 as pkts 23 | 24 | # Instantiate Config class: 25 | config = config.Config() 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | TEST_PCAP_HTTP1 = '../tests/packet_captures/http1.pcap' 30 | 31 | #======================== data.py Unit Tests ============================ 32 | 33 | def test_packet(): 34 | """ 35 | Test packet class works correctly 36 | """ 37 | # For each packet in the pcap process the contents: 38 | mode = 'b' 39 | packet_number = 1 40 | with open(TEST_PCAP_HTTP1, 'rb') as pcap_file: 41 | pcap_file_handle = dpkt.pcap.Reader(pcap_file) 42 | for timestamp, pcap_packet in pcap_file_handle: 43 | #*** Instantiate an instance of Packet class: 44 | packet = flows_module.Packet(logger, timestamp, pcap_packet, mode) 45 | pkt_test(packet, pkts, packet_number) 46 | packet_number += 1 47 | 48 | # TBD: check mode=u 49 | 50 | def test_packet_dir(): 51 | """ 52 | Test Flow class packet_dir method works correctly 53 | """ 54 | # For each packet in the pcap process the contents: 55 | mode = 'b' 56 | packet_number = 1 57 | flows_instance = flows_module.Flows(config, mode) 58 | with open(TEST_PCAP_HTTP1, 'rb') as pcap_file: 59 | pcap_file_handle = dpkt.pcap.Reader(pcap_file) 60 | for timestamp, pcap_packet in pcap_file_handle: 61 | #*** Instantiate an instance of Packet class: 62 | packet = flows_module.Packet(logger, timestamp, pcap_packet, mode) 63 | flows_instance.flow.update(packet) 64 | flow_dict = flows_instance.flow_cache[packet.flow_hash] 65 | logger.info("flow ip_src=%s", flow_dict['src_ip']) 66 | logger.info("pkt=%s ground_truth=%s", packet_number - 1, pkts.DIRECTION[packet_number - 1]) 67 | logger.info("packet_dir=%s", flows_instance.flow.packet_dir(packet, flow_dict)) 68 | if pkts.DIRECTION[packet_number - 1] == 'c2s': 69 | assert flows_instance.flow.packet_dir(packet, flow_dict) == 'f' 70 | else: 71 | assert flows_instance.flow.packet_dir(packet, flow_dict) == 'b' 72 | packet_number += 1 73 | 74 | # TBD: check mode=u 75 | 76 | #================= HELPER FUNCTIONS =========================================== 77 | 78 | def pkt_test(packet, pkts, pkt_num): 79 | """ 80 | Passed a Packet object, a packets file and packet number 81 | and check parameters match 82 | """ 83 | assert packet.length == pkts.LEN[pkt_num - 1] 84 | assert packet.ip_src == pkts.IP_SRC[pkt_num - 1] 85 | assert packet.ip_dst == pkts.IP_DST[pkt_num - 1] 86 | assert packet.proto == pkts.PROTO[pkt_num - 1] 87 | assert packet.tp_src == pkts.TP_SRC[pkt_num - 1] 88 | assert packet.tp_dst == pkts.TP_DST[pkt_num - 1] 89 | assert packet.tp_seq_src == pkts.TP_SEQ_SRC[pkt_num - 1] 90 | assert packet.tp_seq_dst == pkts.TP_SEQ_DST[pkt_num - 1] 91 | assert packet.tcp_syn() == pkts.TCP_SYN[pkt_num - 1] 92 | assert packet.tcp_fin() == pkts.TCP_FIN[pkt_num - 1] 93 | assert packet.tcp_rst() == pkts.TCP_RST[pkt_num - 1] 94 | assert packet.tcp_psh() == pkts.TCP_PSH[pkt_num - 1] 95 | assert packet.tcp_ack() == pkts.TCP_ACK[pkt_num - 1] 96 | 97 | def mac_addr(address): 98 | """ 99 | Convert a MAC address to a readable/printable string 100 | """ 101 | return ':'.join('%02x' % ord(b) for b in address) 102 | 103 | def _ipv4_t2i(ip_text): 104 | """ 105 | Turns an IPv4 address in text format into an integer. 106 | Borrowed from rest_router.py code 107 | """ 108 | if ip_text == 0: 109 | return ip_text 110 | assert isinstance(ip_text, str) 111 | return struct.unpack('!I', addrconv.ipv4.text_to_bin(ip_text))[0] 112 | -------------------------------------------------------------------------------- /tests/test_nethash.py: -------------------------------------------------------------------------------- 1 | """ 2 | nethash.py Unit Tests 3 | """ 4 | 5 | #*** Handle tests being in different directory branch to app code: 6 | import sys 7 | import struct 8 | 9 | sys.path.insert(0, '../flowRecorder') 10 | 11 | #*** For timestamps: 12 | import datetime 13 | 14 | import logging 15 | 16 | # Import dpkt for packet parsing: 17 | import dpkt 18 | 19 | # flowRecorder imports: 20 | import flowRecorder 21 | import config 22 | import flows as flows_module 23 | import nethash 24 | 25 | # test packet imports: 26 | import http1 as pkts 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | # test packet imports: 31 | import http1 as pkts 32 | 33 | # Instantiate Config class: 34 | config = config.Config() 35 | 36 | # Test 5-Tuple: 37 | IP_A = '192.168.0.1' 38 | IP_B = '192.168.0.2' 39 | TP_A = 12345 40 | TP_B = 443 41 | TCP = 6 42 | TIMESTAMP = '1538857982.301350' 43 | 44 | # Test packet capture files: 45 | TEST_PCAP_HTTP1 = '../tests/packet_captures/http1.pcap' 46 | 47 | #======================== nethash.py Unit Tests ============================ 48 | def test_hash_b6(): 49 | """ 50 | Test bidirectional 6-tuple hashing 51 | """ 52 | # Test that TCP tuples of packets in both directions on 53 | # a flow generate the same hash: 54 | hash1 = nethash.hash_b6((IP_A, IP_B, TCP, TP_A, TP_B, TIMESTAMP)) 55 | hash2 = nethash.hash_b6((IP_B, IP_A, TCP, TP_B, TP_A, TIMESTAMP)) 56 | assert hash1 == hash2 57 | 58 | def test_hash_b5(): 59 | """ 60 | Test bidirectional 5-tuple hashing 61 | """ 62 | # Test that TCP tuples of packets in both directions on 63 | # a flow generate the same hash: 64 | hash1 = nethash.hash_b5((IP_A, IP_B, TCP, TP_A, TP_B)) 65 | hash2 = nethash.hash_b5((IP_B, IP_A, TCP, TP_B, TP_A)) 66 | assert hash1 == hash2 67 | 68 | # Test reading in a packet capture of a single flow and ensuring 69 | # all packets have same b5 flow hash: 70 | mode = 'b' 71 | flow_hash_packet_1 = 0 72 | packet_number = 1 73 | with open(TEST_PCAP_HTTP1, 'rb') as pcap_file: 74 | pcap_file_handle = dpkt.pcap.Reader(pcap_file) 75 | for timestamp, pcap_packet in pcap_file_handle: 76 | #*** Instantiate an instance of Packet class: 77 | packet = flows_module.Packet(logger, timestamp, pcap_packet, mode) 78 | if packet_number == 1: 79 | flow_hash_packet_1 = packet.flow_hash 80 | else: 81 | assert packet.flow_hash == flow_hash_packet_1 82 | logger.info("packet.flow_hash=%s, flow_hash_packet_1=%s", 83 | packet.flow_hash, flow_hash_packet_1) 84 | packet_number += 1 85 | 86 | def test_hash_b4(): 87 | """ 88 | Test bidirectional 4-tuple hashing 89 | """ 90 | # Test that TCP tuples of packets in both directions on 91 | # a flow generate the same hash: 92 | hash1 = nethash.hash_b4((IP_A, IP_B, TCP, TIMESTAMP)) 93 | hash2 = nethash.hash_b4((IP_B, IP_A, TCP, TIMESTAMP)) 94 | assert hash1 == hash2 95 | 96 | # TBD: more tests here... 97 | 98 | def test_hash_b3(): 99 | """ 100 | Test bidirectional 3-tuple hashing 101 | """ 102 | # Test that TCP tuples of packets in both directions on 103 | # a flow generate the same hash: 104 | hash1 = nethash.hash_b3((IP_A, IP_B, TCP)) 105 | hash2 = nethash.hash_b3((IP_B, IP_A, TCP)) 106 | assert hash1 == hash2 107 | 108 | # Test reading in a packet capture of a single flow and ensuring 109 | # all packets have same b3 flow hash 110 | 111 | # TBD: needs a non-TCP or UDP packet capture of a flow (i.e. IPsec 112 | # or similar... 113 | 114 | def test_hash_u6(): 115 | """ 116 | Test unidirectional 6-tuple hashing 117 | """ 118 | # Test that TCP tuples of packets in both directions on 119 | # a flow generate the same hash: 120 | hash1 = nethash.hash_u6((IP_A, IP_B, TCP, TP_A, TP_B, TIMESTAMP)) 121 | hash2 = nethash.hash_u6((IP_B, IP_A, TCP, TP_B, TP_A, TIMESTAMP)) 122 | assert hash1 != hash2 123 | 124 | def test_hash_u5(): 125 | """ 126 | Test unidirectional 5-tuple hashing 127 | """ 128 | # Test that TCP tuples of packets in both directions on 129 | # a flow generate the same hash: 130 | hash1 = nethash.hash_u5((IP_A, IP_B, TCP, TP_A, TP_B)) 131 | hash2 = nethash.hash_u5((IP_B, IP_A, TCP, TP_B, TP_A)) 132 | assert hash1 != hash2 133 | 134 | # Test reading in a packet capture of a single flow and ensuring 135 | # all packets have same u5 flow hash per direction: 136 | mode = 'u' 137 | flow_hash_packet_1 = 0 138 | packet_number = 1 139 | with open(TEST_PCAP_HTTP1, 'rb') as pcap_file: 140 | pcap_file_handle = dpkt.pcap.Reader(pcap_file) 141 | for timestamp, pcap_packet in pcap_file_handle: 142 | #*** Instantiate an instance of Packet class: 143 | packet = flows_module.Packet(logger, timestamp, pcap_packet, mode) 144 | if packet_number == 1: 145 | flow_hash_packet_forward = packet.flow_hash 146 | elif packet_number == 2: 147 | flow_hash_packet_backward = packet.flow_hash 148 | else: 149 | if pkts.DIRECTION[packet_number - 1] == 'c2s': 150 | assert packet.flow_hash == flow_hash_packet_forward 151 | else: 152 | assert packet.flow_hash == flow_hash_packet_backward 153 | packet_number += 1 154 | 155 | def test_hash_u4(): 156 | """ 157 | Test unidirectional 4-tuple hashing 158 | """ 159 | # Test that TCP tuples of packets in both directions on 160 | # a flow generate the same hash: 161 | hash1 = nethash.hash_u4((IP_A, IP_B, TCP, TIMESTAMP)) 162 | hash2 = nethash.hash_u4((IP_B, IP_A, TCP, TIMESTAMP)) 163 | assert hash1 != hash2 164 | 165 | # TBD: more tests here... 166 | 167 | def test_hash_u3(): 168 | """ 169 | Test unidirectional 3-tuple hashing 170 | """ 171 | # Test that TCP tuples of packets in both directions on 172 | # a flow generate the same hash: 173 | hash1 = nethash.hash_u3((IP_A, IP_B, TCP)) 174 | hash2 = nethash.hash_u3((IP_B, IP_A, TCP)) 175 | assert hash1 != hash2 176 | 177 | # Test reading in a packet capture of a single flow and ensuring 178 | # all packets per direction have same u3 flow hash 179 | 180 | # TBD: needs a non-TCP or UDP packet capture of a flow (i.e. IPsec 181 | # or similar... 182 | 183 | 184 | -------------------------------------------------------------------------------- /flowRecorder/nethash.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | This module provides functions for hashing packets and flows to 16 | unique identifiers 17 | """ 18 | 19 | # For hashing flow 5-tuples: 20 | import hashlib 21 | 22 | def hash_b6(flow_6_tuple): 23 | """ 24 | Generate a predictable bidirectional flow_hash for a TCP or UDP 25 | 6-tuple that includes timestamp for flow start. The hash is the 26 | same no matter which direction the traffic is travelling for all 27 | packets that are part of that flow. 28 | 29 | Pass this function a 6-tuple: 30 | (ip_src, ip_dst, ip_proto, tp_src, tp_dst, timestamp) 31 | """ 32 | ip_A = flow_6_tuple[0] 33 | ip_B = flow_6_tuple[1] 34 | proto = int(flow_6_tuple[2]) 35 | tp_src = flow_6_tuple[3] 36 | tp_dst = flow_6_tuple[4] 37 | timestamp = flow_6_tuple[5] 38 | 39 | # Assign arbitrary consistent direction: 40 | if ip_A > ip_B: 41 | direction = 1 42 | elif ip_B > ip_A: 43 | direction = 2 44 | elif tp_src > tp_dst: 45 | direction = 1 46 | elif tp_dst > tp_src: 47 | direction = 2 48 | else: 49 | direction = 1 50 | 51 | # Calculate hash based on direction: 52 | if direction == 1: 53 | flow_tuple = (ip_A, ip_B, proto, tp_src, tp_dst, timestamp) 54 | else: 55 | # Transpose IPs and port numbers for reverse packets: 56 | flow_tuple = (ip_B, ip_A, proto, tp_dst, tp_src, timestamp) 57 | return hash_tuple(flow_tuple) 58 | 59 | def hash_b5(flow_5_tuple): 60 | """ 61 | Generate a predictable bidirectional flow_hash for a TCP or UDP 62 | 5-tuple. The hash is the same no matter which direction the 63 | traffic is travelling for all packets that are part of that flow. 64 | 65 | Pass this function a 5-tuple: 66 | (ip_src, ip_dst, ip_proto, tp_src, tp_dst) 67 | """ 68 | ip_A = flow_5_tuple[0] 69 | ip_B = flow_5_tuple[1] 70 | proto = int(flow_5_tuple[2]) 71 | tp_src = flow_5_tuple[3] 72 | tp_dst = flow_5_tuple[4] 73 | 74 | # Assign arbitrary consistent direction: 75 | if ip_A > ip_B: 76 | direction = 1 77 | elif ip_B > ip_A: 78 | direction = 2 79 | elif tp_src > tp_dst: 80 | direction = 1 81 | elif tp_dst > tp_src: 82 | direction = 2 83 | else: 84 | direction = 1 85 | 86 | # Calculate hash based on direction: 87 | if direction == 1: 88 | flow_tuple = (ip_A, ip_B, proto, tp_src, tp_dst) 89 | else: 90 | # Transpose IPs and port numbers for reverse packets: 91 | flow_tuple = (ip_B, ip_A, proto, tp_dst, tp_src) 92 | return hash_tuple(flow_tuple) 93 | 94 | def hash_b4(flow_4_tuple): 95 | """ 96 | Generate a predictable bidirectional flow_hash for a TCP or UDP 97 | 4-tuple that includes timestamp for flow start. The hash is the 98 | same no matter which direction the traffic is travelling for all 99 | packets that are part of that flow. 100 | 101 | Pass this function a 4-tuple: 102 | (ip_src, ip_dst, ip_proto, timestamp) 103 | """ 104 | ip_A = flow_4_tuple[0] 105 | ip_B = flow_4_tuple[1] 106 | proto = int(flow_4_tuple[2]) 107 | timestamp = flow_4_tuple[3] 108 | 109 | # Assign arbitrary consistent direction: 110 | if ip_A > ip_B: 111 | direction = 1 112 | elif ip_B > ip_A: 113 | direction = 2 114 | else: 115 | direction = 1 116 | 117 | # Calculate hash based on direction: 118 | if direction == 1: 119 | flow_tuple = (ip_A, ip_B, proto, timestamp) 120 | else: 121 | # Transpose IPs for reverse packets: 122 | flow_tuple = (ip_B, ip_A, proto, timestamp) 123 | return hash_tuple(flow_tuple) 124 | 125 | def hash_b3(flow_3_tuple): 126 | """ 127 | Generate a predictable bidirectional flow_hash for a TCP or UDP 128 | 3-tuple. The hash is the same no matter which direction the 129 | traffic is travelling for all packets that are part of that flow. 130 | 131 | Pass this function a 3-tuple: 132 | (ip_src, ip_dst, ip_proto) 133 | """ 134 | ip_A = flow_3_tuple[0] 135 | ip_B = flow_3_tuple[1] 136 | proto = int(flow_3_tuple[2]) 137 | 138 | # Assign arbitrary consistent direction: 139 | if ip_A > ip_B: 140 | direction = 1 141 | elif ip_B > ip_A: 142 | direction = 2 143 | else: 144 | direction = 1 145 | 146 | # Calculate hash based on direction: 147 | if direction == 1: 148 | flow_tuple = (ip_A, ip_B, proto) 149 | else: 150 | # Transpose IPs for reverse packets: 151 | flow_tuple = (ip_B, ip_A, proto) 152 | return hash_tuple(flow_tuple) 153 | 154 | def hash_u6(flow_6_tuple): 155 | """ 156 | Generate a unidirectional flow_hash for a TCP or UDP 157 | 6-tuple. 158 | 159 | Pass this function a 6-tuple: 160 | (ip_src, ip_dst, ip_proto, tp_src, tp_dst, timestamp) 161 | """ 162 | ip_A = flow_6_tuple[0] 163 | ip_B = flow_6_tuple[1] 164 | proto = int(flow_6_tuple[2]) 165 | tp_src = flow_6_tuple[3] 166 | tp_dst = flow_6_tuple[4] 167 | timestamp = flow_6_tuple[5] 168 | flow_tuple = (ip_A, ip_B, proto, tp_src, tp_dst, timestamp) 169 | return hash_tuple(flow_tuple) 170 | 171 | def hash_u5(flow_5_tuple): 172 | """ 173 | Generate a unidirectional flow_hash for a TCP or UDP 174 | 5-tuple. 175 | 176 | Pass this function a 5-tuple: 177 | (ip_src, ip_dst, ip_proto, tp_src, tp_dst) 178 | """ 179 | ip_A = flow_5_tuple[0] 180 | ip_B = flow_5_tuple[1] 181 | proto = int(flow_5_tuple[2]) 182 | tp_src = flow_5_tuple[3] 183 | tp_dst = flow_5_tuple[4] 184 | flow_tuple = (ip_A, ip_B, proto, tp_src, tp_dst) 185 | return hash_tuple(flow_tuple) 186 | 187 | def hash_u4(flow_4_tuple): 188 | """ 189 | Generate a unidirectional flow_hash for a TCP or UDP 190 | 4-tuple. 191 | 192 | Pass this function a 4-tuple: 193 | (ip_src, ip_dst, ip_proto, timestamp) 194 | """ 195 | ip_A = flow_4_tuple[0] 196 | ip_B = flow_4_tuple[1] 197 | proto = int(flow_4_tuple[2]) 198 | timestamp = flow_4_tuple[3] 199 | flow_tuple = (ip_A, ip_B, proto, timestamp) 200 | return hash_tuple(flow_tuple) 201 | 202 | def hash_u3(flow_3_tuple): 203 | """ 204 | Generate a unidirectional flow_hash for a TCP or UDP 205 | 3-tuple. 206 | 207 | Pass this function a 3-tuple: 208 | (ip_src, ip_dst, ip_proto) 209 | """ 210 | ip_A = flow_3_tuple[0] 211 | ip_B = flow_3_tuple[1] 212 | proto = int(flow_3_tuple[2]) 213 | flow_tuple = (ip_A, ip_B, proto) 214 | return hash_tuple(flow_tuple) 215 | 216 | def hash_tuple(hash_tuple): 217 | """ 218 | Simple function to hash a tuple with MD5. 219 | Returns a hash value for the tuple 220 | """ 221 | hash_result = hashlib.md5() 222 | tuple_as_string = str(hash_tuple) 223 | hash_result.update(tuple_as_string.encode('utf-8')) 224 | return hash_result.hexdigest() 225 | -------------------------------------------------------------------------------- /tests/groundtruth_tcp_syn_only.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | Packets with metadata to use in testing of flowRecorder 16 | 17 | This flow is a single TCP SYN packet 18 | 19 | To create test packet data, capture packet in Wireshark and: 20 | 21 | For the packet summary: 22 | Right-click packet in top pane, Copy -> Summary (text). 23 | Edit pasted text as appropriate 24 | 25 | For the packet hex: 26 | Right-click packet in top pane, Copy -> Bytes -> Hex Stream 27 | 28 | For the packet timestamp: 29 | Expand 'Frame' in the middle pane, 30 | right-click 'Epoch Time' Copy -> Value 31 | 32 | Packet capture file is 'tcp_syn_only.pcap' 33 | """ 34 | 35 | import binascii 36 | 37 | name = 'groundtruth_tcp_syn_only.py' 38 | capture_file = 'tcp_syn_only.pcap' 39 | 40 | #======================== Initiate Lists ====================== 41 | #*** Raw packet data: 42 | RAW = [] 43 | #*** Packet on the wire lengths in bytes: 44 | LEN = [] 45 | #*** Ethernet parameters: 46 | ETH_SRC = [] 47 | ETH_DST = [] 48 | ETH_TYPE = [] 49 | #*** IP addresses: 50 | IP_SRC = [] 51 | IP_DST = [] 52 | #*** IP protocol number in decimal: 53 | PROTO = [] 54 | #*** Transport-layer protocol numbers in decimal: 55 | TP_SRC = [] 56 | TP_DST = [] 57 | #*** Transport-layer sequence numbers in decimal: 58 | TP_SEQ_SRC = [] 59 | TP_SEQ_DST = [] 60 | #*** TCP FLAGS: 61 | TCP_SYN = [] 62 | TCP_FIN = [] 63 | TCP_RST = [] 64 | TCP_PSH = [] 65 | TCP_ACK = [] 66 | #*** HEX-encoded payload 67 | PAYLOAD = [] 68 | #*** Packet direction, c2s (client to server) or s2c 69 | DIRECTION = [] 70 | 71 | # Unidir Flow values: 72 | UNIDIR_SRC_IP = [] 73 | UNIDIR_SRC_PORT = [] 74 | UNIDIR_DST_IP = [] 75 | UNIDIR_DST_PORT = [] 76 | UNIDIR_PROTO = [] 77 | UNIDIR_PKTTOTALCOUNT = [] 78 | UNIDIR_OCTETTOTALCOUNT = [] 79 | UNIDIR_MIN_PS = [] 80 | UNIDIR_MAX_PS = [] 81 | UNIDIR_AVG_PS = [] 82 | UNIDIR_STD_DEV_PS = [] 83 | UNIDIR_FLOWSTART = [] 84 | UNIDIR_FLOWEND = [] 85 | UNIDIR_FLOWDURATION = [] 86 | UNIDIR_MIN_PIAT = [] 87 | UNIDIR_MAX_PIAT = [] 88 | UNIDIR_AVG_PIAT = [] 89 | UNIDIR_STD_DEV_PIAT = [] 90 | # Bidirectional: 91 | # Combined Flow values: 92 | BIDIR_SRC_IP = [] 93 | BIDIR_SRC_PORT = [] 94 | BIDIR_DST_IP = [] 95 | BIDIR_DST_PORT = [] 96 | BIDIR_PROTO = [] 97 | BIDIR_PKTTOTALCOUNT = [] 98 | BIDIR_OCTETTOTALCOUNT = [] 99 | BIDIR_MIN_PS = [] 100 | BIDIR_MAX_PS = [] 101 | BIDIR_AVG_PS = [] 102 | BIDIR_STD_DEV_PS = [] 103 | BIDIR_FLOWSTART = [] 104 | BIDIR_FLOWEND = [] 105 | BIDIR_FLOWDURATION = [] 106 | BIDIR_MIN_PIAT = [] 107 | BIDIR_MAX_PIAT = [] 108 | BIDIR_AVG_PIAT = [] 109 | BIDIR_STD_DEV_PIAT = [] 110 | # Forward Flow values: 111 | BIDIR_F_SRC_IP = [] 112 | BIDIR_F_SRC_PORT = [] 113 | BIDIR_F_DST_IP = [] 114 | BIDIR_F_DST_PORT = [] 115 | BIDIR_F_PROTO = [] 116 | BIDIR_F_PKTTOTALCOUNT = [] 117 | BIDIR_F_OCTETTOTALCOUNT = [] 118 | BIDIR_F_MIN_PS = [] 119 | BIDIR_F_MAX_PS = [] 120 | BIDIR_F_AVG_PS = [] 121 | BIDIR_F_STD_DEV_PS = [] 122 | BIDIR_F_FLOWSTART = [] 123 | BIDIR_F_FLOWEND = [] 124 | BIDIR_F_FLOWDURATION = [] 125 | BIDIR_F_MIN_PIAT = [] 126 | BIDIR_F_MAX_PIAT = [] 127 | BIDIR_F_AVG_PIAT = [] 128 | BIDIR_F_STD_DEV_PIAT = [] 129 | # Backward Flow values: 130 | BIDIR_B_SRC_IP = [] 131 | BIDIR_B_SRC_PORT = [] 132 | BIDIR_B_DST_IP = [] 133 | BIDIR_B_DST_PORT = [] 134 | BIDIR_B_PROTO = [] 135 | BIDIR_B_PKTTOTALCOUNT = [] 136 | BIDIR_B_OCTETTOTALCOUNT = [] 137 | BIDIR_B_MIN_PS = [] 138 | BIDIR_B_MAX_PS = [] 139 | BIDIR_B_AVG_PS = [] 140 | BIDIR_B_STD_DEV_PS = [] 141 | BIDIR_B_FLOWSTART = [] 142 | BIDIR_B_FLOWEND = [] 143 | BIDIR_B_FLOWDURATION = [] 144 | BIDIR_B_MIN_PIAT = [] 145 | BIDIR_B_MAX_PIAT = [] 146 | BIDIR_B_AVG_PIAT = [] 147 | BIDIR_B_STD_DEV_PIAT = [] 148 | 149 | #*** Packet 1 - TCP SYN 150 | # 1 1538798807.496015 10.0.2.15 10.0.2.2 TCP 74 49440 443 [SYN] Seq=0 Win=29200 Len=0 MSS=1460 SACK_PERM=1 TSval=3618186009 TSecr=0 WS=128 151 | RAW.append(binascii.unhexlify("525400123502080027fc133d08004510003c410440004006e1970a00020f0a000202c12001bbcaf098b800000000a0027210183f0000020405b40402080ad7a923190000000001030307")) 152 | LEN.append(60) 153 | ETH_SRC.append('08:00:27:fc:13:3d') 154 | ETH_DST.append('52:54:00:12:35:02') 155 | ETH_TYPE.append(2048) 156 | IP_SRC.append('10.0.2.15') 157 | IP_DST.append('10.0.2.2') 158 | PROTO.append(6) 159 | TP_SRC.append(49440) 160 | TP_DST.append(443) 161 | TP_SEQ_SRC.append(0) 162 | TP_SEQ_DST.append(0) 163 | TCP_SYN.append(1) 164 | TCP_FIN.append(0) 165 | TCP_RST.append(0) 166 | TCP_PSH.append(0) 167 | TCP_ACK.append(0) 168 | PAYLOAD.append("") 169 | DIRECTION.append("c2s") 170 | 171 | #*** Metadata for whole flow: 172 | FLOW_IP_CLIENT = '10.0.2.15' 173 | FLOW_IP_SERVER = '10.0.2.2' 174 | 175 | # Unidirectional flow values: 176 | # Flow 1: 177 | UNIDIR_SRC_IP.append('10.0.2.15') 178 | UNIDIR_SRC_PORT.append('49440') 179 | UNIDIR_DST_IP.append('10.0.2.2') 180 | UNIDIR_DST_PORT.append('443') 181 | UNIDIR_PROTO.append('6') 182 | UNIDIR_PKTTOTALCOUNT.append('1') 183 | UNIDIR_OCTETTOTALCOUNT.append('60') 184 | UNIDIR_MIN_PS.append('60') 185 | UNIDIR_MAX_PS.append('60') 186 | UNIDIR_AVG_PS.append('60') 187 | UNIDIR_STD_DEV_PS.append('0') 188 | UNIDIR_FLOWSTART.append('1538798807.496015') 189 | UNIDIR_FLOWEND.append('1538798807.496015') 190 | UNIDIR_FLOWDURATION.append('0') 191 | UNIDIR_MIN_PIAT.append('0') 192 | UNIDIR_MAX_PIAT.append('0') 193 | UNIDIR_AVG_PIAT.append('0') 194 | UNIDIR_STD_DEV_PIAT.append('0') 195 | # Bidirectional Combined Flow 1: 196 | BIDIR_SRC_IP.append('10.0.2.15') 197 | BIDIR_SRC_PORT.append('49440') 198 | BIDIR_DST_IP.append('10.0.2.2') 199 | BIDIR_DST_PORT.append('443') 200 | BIDIR_PROTO.append('6') 201 | BIDIR_PKTTOTALCOUNT.append('1') 202 | BIDIR_OCTETTOTALCOUNT.append('60') 203 | BIDIR_MIN_PS.append('60') 204 | BIDIR_MAX_PS.append('60') 205 | BIDIR_AVG_PS.append('60') 206 | BIDIR_STD_DEV_PS.append('0') 207 | BIDIR_FLOWSTART.append('1538798807.496015') 208 | BIDIR_FLOWEND.append('1538798807.496015') 209 | BIDIR_FLOWDURATION.append('0') 210 | BIDIR_MIN_PIAT.append('0') 211 | BIDIR_MAX_PIAT.append('0') 212 | BIDIR_AVG_PIAT.append('0') 213 | BIDIR_STD_DEV_PIAT.append('0') 214 | BIDIR_F_PKTTOTALCOUNT.append('1') 215 | BIDIR_F_OCTETTOTALCOUNT.append('60') 216 | BIDIR_F_MIN_PS.append('60') 217 | BIDIR_F_MAX_PS.append('60') 218 | BIDIR_F_AVG_PS.append('60') 219 | BIDIR_F_STD_DEV_PS.append('0') 220 | BIDIR_F_FLOWSTART.append('1538798807.496015') 221 | BIDIR_F_FLOWEND.append('1538798807.496015') 222 | BIDIR_F_FLOWDURATION.append('0') 223 | BIDIR_F_MIN_PIAT.append('0') 224 | BIDIR_F_MAX_PIAT.append('0') 225 | BIDIR_F_AVG_PIAT.append('0') 226 | BIDIR_F_STD_DEV_PIAT.append('0') 227 | BIDIR_B_PKTTOTALCOUNT.append('0') 228 | BIDIR_B_OCTETTOTALCOUNT.append('0') 229 | BIDIR_B_MIN_PS.append('0') 230 | BIDIR_B_MAX_PS.append('0') 231 | BIDIR_B_AVG_PS.append('0') 232 | BIDIR_B_STD_DEV_PS.append('0') 233 | BIDIR_B_FLOWSTART.append('0') 234 | BIDIR_B_FLOWEND.append('0') 235 | BIDIR_B_FLOWDURATION.append('0') 236 | BIDIR_B_MIN_PIAT.append('0') 237 | BIDIR_B_MAX_PIAT.append('0') 238 | BIDIR_B_AVG_PIAT.append('0') 239 | BIDIR_B_STD_DEV_PIAT.append('0') 240 | 241 | 242 | 243 | 244 | 245 | -------------------------------------------------------------------------------- /flowRecorder/config.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | The config module is part of flowRecorder. 16 | 17 | It represents flowRecorder configuration data 18 | 19 | It loads configuration from file, validates keys and provides 20 | access to values 21 | 22 | It expects a file called "config.yaml" to be in the config 23 | subdirectory, containing properly formed YAML 24 | """ 25 | 26 | import logging 27 | import logging.handlers 28 | import coloredlogs 29 | 30 | import sys 31 | import os 32 | 33 | #*** For logging configuration: 34 | from baseclass import BaseClass 35 | 36 | #*** YAML for config and policy file parsing: 37 | import yaml 38 | 39 | #*** Default config file location parameters: 40 | CONFIG_DIR_DEFAULT = "config" 41 | CONFIG_DIR_USER = "config/user" 42 | CONFIG_FILENAME = "config.yaml" 43 | 44 | class Config(BaseClass): 45 | """ 46 | This class provides methods to ingest the configuration 47 | file and provides access to the config keys/values. 48 | Config file is YAML format in config subdirectory, and is 49 | called 'config.yaml' 50 | """ 51 | def __init__(self, dir_default=CONFIG_DIR_DEFAULT, 52 | dir_user=CONFIG_DIR_USER, 53 | config_filename=CONFIG_FILENAME): 54 | #*** Set up basic logging, as can't use 55 | #*** inherited method due to chicken and egg issue 56 | #*** (set up properly later) 57 | logging.basicConfig(level=logging.DEBUG) 58 | self.logger = logging.getLogger(__name__) 59 | self.logger.setLevel(logging.DEBUG) 60 | #*** Log to syslog on localhost 61 | self.handler = logging.handlers.SysLogHandler(address= \ 62 | ('localhost', 514), facility=19) 63 | formatter = logging.Formatter \ 64 | ('sev=%(levelname)s module=%(name)s func=%(funcName)s %(message)s') 65 | self.handler.setFormatter(formatter) 66 | self.logger.addHandler(self.handler) 67 | coloredlogs.install(level="DEBUG", 68 | logger=self.logger, fmt="%(asctime)s.%(msecs)03d %(name)s[%(process)d] %(funcName)s %(levelname)s %(message)s", datefmt='%H:%M:%S') 69 | 70 | self.logger.debug("dir_default=%s dir_user=%s config_filename=%s", 71 | dir_default, dir_user, config_filename) 72 | 73 | self.ingest_config_default(config_filename, dir_default) 74 | self.ingest_config_user(config_filename, dir_user) 75 | 76 | def ingest_config_default(self, config_filename, dir_default): 77 | """ 78 | Ingest default config file 79 | """ 80 | #*** Get working directory: 81 | working_directory = os.path.dirname(__file__) 82 | #*** Build the full path and filename for the config file: 83 | fullpathname = os.path.join(working_directory, 84 | dir_default, 85 | config_filename) 86 | self._config_yaml = self.ingest_config_file(fullpathname) 87 | 88 | def ingest_config_user(self, config_filename, dir_user): 89 | """ 90 | Ingest user config file that overrides values set in the 91 | default config file. 92 | """ 93 | #*** Get working directory: 94 | working_directory = os.path.dirname(__file__) 95 | #*** Build the full path and filename for the config file: 96 | fullpathname = os.path.join(working_directory, 97 | dir_user, 98 | config_filename) 99 | #*** File doesn't have to exist, so check if it exists: 100 | if not os.path.isfile(fullpathname): 101 | self.logger.info("User-defined config does not exist, skipping") 102 | return 1 103 | 104 | #*** Ingest user-defined config file: 105 | _user_config_yaml = self.ingest_config_file(fullpathname) 106 | #*** Go through all keys checking key exists in default yaml. 107 | #*** If doesn't exist, raise warning 108 | #*** If does exist, overwrite the value in internal config 109 | if not isinstance(_user_config_yaml, dict): 110 | self.logger.info("User-defined config missing, skipping") 111 | return 1 112 | if len(_user_config_yaml) == 0: 113 | self.logger.info("User-defined config is empty, skipping") 114 | return 1 115 | for key, value in _user_config_yaml.iteritems(): 116 | if key in self._config_yaml: 117 | self.logger.info("Overriding a default config parameter" 118 | " with key=%s value=%s", key, value) 119 | self._config_yaml[key] = value 120 | else: 121 | self.logger.error("key=%s does not exist in default " 122 | "config so not importing, value=%s", key, value) 123 | 124 | def ingest_config_file(self, fullpath): 125 | """ 126 | Passed full path to a YAML-formatted config file 127 | and ingest into a dictionary 128 | """ 129 | _config = {} 130 | self.logger.info("Ingesting config file=%s", fullpath) 131 | try: 132 | with open(fullpath, 'r') as file_: 133 | _config = yaml.safe_load(file_) 134 | except (IOError, OSError) as exception: 135 | #*** IO exception: 136 | self.logger.critical("Failed to open config file %s, " 137 | "error=%s", fullpath, exception) 138 | sys.exit("Exiting config module. Please create config file") 139 | except yaml.YAMLError as exception: 140 | #*** YAML exception: 141 | if hasattr(exception, 'problem_mark'): 142 | mark = exception.problem_mark 143 | self.logger.critical("Failed to open config file %s, " 144 | "error=%s on line=%s character=%s. Exiting", 145 | fullpath, exception, mark.line+1, mark.column+1) 146 | else: 147 | self.logger.critical("Failed to open config file=%s, " 148 | "error=%s. Exiting", fullpath, exception) 149 | sys.exit("Exiting config module. Please fix config file") 150 | return _config 151 | 152 | def get_value(self, config_key): 153 | """ 154 | Passed a key and see if it exists in the config YAML. If it does 155 | then return the value, if not return 0 156 | """ 157 | try: 158 | return self._config_yaml[config_key] 159 | except KeyError: 160 | self.logger.error("Config file key %s does " 161 | "not exist", config_key) 162 | return 0 163 | 164 | def inherit_logging(self, config): 165 | """ 166 | Call base class method to set up logging properly for 167 | this class now that it is running 168 | """ 169 | self.config = config 170 | #*** Set up Logging with inherited base class method: 171 | self.configure_logging(__name__, "config_logging_level_s", 172 | "config_logging_level_c") 173 | self.logger.info("Config logging now fully configured") 174 | -------------------------------------------------------------------------------- /flowRecorder/flowRecorder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Adrian Pekar 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | flowRecorder - a packet parser tool by Adrian Pekar 17 | 18 | flowRecorder creates metadata about flows from packets, 19 | either from live capture or from capture file 20 | """ 21 | 22 | import time 23 | 24 | # Import sys and getopt for command line argument parsing: 25 | import sys 26 | import getopt 27 | 28 | # Logging: 29 | import logging 30 | 31 | # Colorise the logs: 32 | import coloredlogs 33 | 34 | # For live packet capture: 35 | import pcapy 36 | 37 | # Import dpkt for packet parsing: 38 | import dpkt 39 | 40 | # flowRecorder project imports: 41 | import config 42 | import flows 43 | 44 | # flowRecorder, for logging configuration: 45 | from baseclass import BaseClass 46 | 47 | VERSION = "0.2.0" 48 | 49 | # Configure Logging: 50 | logger = logging.getLogger(__name__) 51 | coloredlogs.install(level='DEBUG', logger=logger, 52 | fmt="%(asctime)s %(module)s[%(process)d] %(funcName)s " + 53 | "%(levelname)s %(message)s", 54 | datefmt='%H:%M:%S') 55 | 56 | class FlowRecorder(BaseClass): 57 | """ 58 | This class provides core methods for flowRecorder 59 | """ 60 | def __init__(self, CLI_arguments): 61 | """ 62 | Initialise the FlowRecorder class 63 | """ 64 | # Instantiate config class which imports configuration file 65 | # config.yaml and provides access to keys/values: 66 | self.config = config.Config() 67 | 68 | # Now set config module to log properly: 69 | self.config.inherit_logging(self.config) 70 | 71 | # Set up Logging with inherited base class method: 72 | self.configure_logging(__name__, "flowRecorder_logging_level_s", 73 | "flowRecorder_logging_level_c") 74 | 75 | # Parse command line parameters: 76 | self.input_filename = "" 77 | self.interface = "" 78 | self.output_filename = "" 79 | # Direction parameter recorded in mode: 80 | self.mode = "" 81 | try: 82 | opts, args = getopt.getopt(CLI_arguments, "d:f:hi:o:v", 83 | ["direction=", 84 | "file=", 85 | "help", 86 | "interface=", 87 | "out=", 88 | "version"]) 89 | except getopt.GetoptError as err: 90 | logger.critical('flowRecorder: Error with options: %s', err) 91 | print_help() 92 | sys.exit(2) 93 | for opt, arg in opts: 94 | if opt in ("-d", "--direction"): 95 | self.mode = arg 96 | elif opt in ("-f", "--file"): 97 | self.input_filename = arg 98 | elif opt in ("-h", "--help"): 99 | print_help() 100 | sys.exit() 101 | elif opt in ("-i", "--interface"): 102 | self.interface = arg 103 | elif opt in ("-o", "--out"): 104 | self.output_filename = arg 105 | elif opt in ("-v", "--version"): 106 | print("\n\n flowRecorder version", VERSION, "\n") 107 | sys.exit() 108 | else: 109 | print("ERROR: unhandled argument", opt) 110 | sys.exit() 111 | 112 | # Assume bidirectional if not specified: 113 | if not self.mode: 114 | logger.info("Direction not specified. Defaulting to bidirectional") 115 | self.mode = 'b' 116 | else: 117 | # Sanity check direction input: 118 | if self.mode != 'b' and self.mode != 'u': 119 | logger.critical("Invalid direction %s", self.mode) 120 | sys.exit() 121 | 122 | # Must have a file OR interface specified: 123 | if self.input_filename and self.interface: 124 | logger.critical("file and interface specified. Choose only one") 125 | sys.exit() 126 | if not self.input_filename and not self.interface: 127 | logger.critical("An input file or interface must be specified") 128 | sys.exit() 129 | 130 | # Must have an output file specified: 131 | if not self.output_filename: 132 | logger.critical("Output filename not set") 133 | sys.exit() 134 | 135 | # Instantiate Flows Class: 136 | self.flows = flows.Flows(self.config, self.mode) 137 | 138 | def run(self): 139 | """ 140 | Run flowRecorder 141 | """ 142 | self.logger.info("Starting flowRecorder") 143 | time0 = time.time() 144 | if self.input_filename: 145 | # File Mode 146 | self._run_file(time0) 147 | else: 148 | # Live Packet Capture Mode 149 | self._run_live() 150 | # Write results to file: 151 | time3 = time.time() 152 | self.flows.write(self.output_filename) 153 | time4 = time.time() 154 | self.logger.info("Wrote results in %s seconds", time4 - time3) 155 | self.flows.stats() 156 | time5 = time.time() 157 | self.logger.info("Finished, total time %s seconds", time5 - time0) 158 | 159 | def _run_file(self, time0): 160 | """ 161 | Read in packet capture file 162 | """ 163 | self.logger.info("Opening PCAP file=%s", self.input_filename) 164 | # Open the PCAP file: 165 | with open(self.input_filename, 'rb') as pcap_file: 166 | pcap_file_handle = dpkt.pcap.Reader(pcap_file) 167 | time1 = time.time() 168 | self.logger.info("Opened PCAP in %s seconds", time1 - time0) 169 | try: 170 | # Process PCAP packets into flows: 171 | self.flows.ingest_pcap(pcap_file_handle) 172 | except (KeyboardInterrupt, SystemExit): 173 | self.logger.info("SIGINT (Ctrl-c) detected.") 174 | time2 = time.time() 175 | self.logger.info("Processed in %s seconds", time2 - time1) 176 | 177 | def _run_live(self): 178 | """ 179 | Run live packet capture 180 | """ 181 | self.logger.info("Running live packet capture") 182 | # Retrieve parameters from config: 183 | maxlen = self.config.get_value("maxlen") 184 | promiscuous = self.config.get_value("promiscuous") 185 | read_timeout = self.config.get_value("read_timeout") 186 | # Instantiate sniffer: 187 | sniffer = pcapy.open_live(self.interface, maxlen, promiscuous, read_timeout) 188 | # Start sniffing: 189 | sniffing = True 190 | while sniffing: 191 | self.logger.info("Start sniffing on interface %s", self.interface) 192 | self.logger.info("Sniffing can be aborted via pressing Ctrl-c") 193 | try: 194 | sniffer.loop(0, self.flows.ingest_packet) 195 | except (KeyboardInterrupt, SystemExit): 196 | self.logger.info("SIGINT (Ctrl-c) detected.") 197 | sniffing = False 198 | 199 | def print_help(): 200 | """ 201 | Print out the help instructions 202 | """ 203 | print(""" 204 | flowRecorder 205 | ------- 206 | 207 | flowRecorder parses packets and generates flow records. It has two modes: 208 | 209 | 1) Live packet capture from a NIC 210 | OR 211 | 2) Parsing packets from a PCAP file. 212 | 213 | Example Usage: 214 | 215 | To read in a PCAP file and process the packets into flows in one direction, 216 | and save the results into a CSV file the following command can be used: 217 | 218 | python3 flowRecorder.py -d u -f p.pcap -o results.csv 219 | 220 | To start caputring the packets from a NIC (en0) and organize them into flow 221 | records in bidirection, the following command can be used: 222 | 223 | sudo python3 flowRecorder.py -d b -i en0 -o results.csv 224 | 225 | Options: 226 | -d --direction Unidirectional (u) or Bidirectional (b) flows 227 | (default is b) 228 | -f --file Input PCAP filename 229 | -h --help Display this help and exit 230 | -i --interface Name of interface (NIC) to capture from 231 | -o --out Output filename for flow results CSV export 232 | -v --version Show version information and exit 233 | """) 234 | 235 | if __name__ == "__main__": 236 | # Instantiate the FlowRecorder class: 237 | flowRecorder = FlowRecorder(sys.argv[1:]) 238 | # Start flowRecorder with command line arguments from position 1: 239 | flowRecorder.run() 240 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | See: 3 | https://packaging.python.org/en/latest/distributing.html 4 | https://github.com/pypa/sampleproject 5 | """ 6 | 7 | # Always prefer setuptools over distutils 8 | from setuptools import setup, find_packages 9 | from os import path 10 | # io.open is needed for projects that support Python 2.7 11 | # It ensures open() defaults to text mode with universal newlines, 12 | # and accepts an argument to specify the text encoding 13 | # Python 3 only projects can skip this import 14 | from io import open 15 | 16 | VERSION = '0.1.0' 17 | 18 | here = path.abspath(path.dirname(__file__)) 19 | 20 | # Get the long description from the README file 21 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 22 | long_description = f.read() 23 | 24 | # Arguments marked as "Required" below must be included for upload to PyPI. 25 | # Fields marked as "Optional" may be commented out. 26 | 27 | setup( 28 | # This is the name of your project. The first time you publish this 29 | # package, this name will be registered for you. It will determine how 30 | # users can install this project, e.g.: 31 | # 32 | # $ pip install sampleproject 33 | # 34 | # And where it will live on PyPI: https://pypi.org/project/sampleproject/ 35 | # 36 | # There are some restrictions on what makes a valid project name 37 | # specification here: 38 | # https://packaging.python.org/specifications/core-metadata/#name 39 | name='flowRecorder', # Required 40 | 41 | license="Apache License 2.0", 42 | 43 | # Versions should comply with PEP 440: 44 | # https://www.python.org/dev/peps/pep-0440/ 45 | # 46 | # For a discussion on single-sourcing the version across setup.py and the 47 | # project code, see 48 | # https://packaging.python.org/en/latest/single_source_version.html 49 | version=VERSION, # Required 50 | 51 | # This is a one-line description or tagline of what your project does. This 52 | # corresponds to the "Summary" metadata field: 53 | # https://packaging.python.org/specifications/core-metadata/#summary 54 | description='A tool that organises packets into flow records', # Required 55 | 56 | # This is an optional longer description of your project that represents 57 | # the body of text which users will see when they visit PyPI. 58 | # 59 | # Often, this is the same as your README, so you can just read it in from 60 | # that file directly (as we have already done above) 61 | # 62 | # This field corresponds to the "Description" metadata field: 63 | # https://packaging.python.org/specifications/core-metadata/#description-optional 64 | long_description=long_description, # Optional 65 | 66 | # Denotes that our long_description is in Markdown; valid values are 67 | # text/plain, text/x-rst, and text/markdown 68 | # 69 | # Optional if long_description is written in reStructuredText (rst) but 70 | # required for plain-text or Markdown; if unspecified, "applications should 71 | # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and 72 | # fall back to text/plain if it is not valid rst" (see link below) 73 | # 74 | # This field corresponds to the "Description-Content-Type" metadata field: 75 | # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional 76 | long_description_content_type='text/markdown', # Optional (see note above) 77 | 78 | # This should be a valid link to your project's main homepage. 79 | # 80 | # This field corresponds to the "Home-Page" metadata field: 81 | # https://packaging.python.org/specifications/core-metadata/#home-page-optional 82 | url='https://github.com/drnpkr/flowRecorder/', # Optional 83 | 84 | # This should be your name or the name of the organization which owns the 85 | # project. 86 | author='Adrian Pekar', # Optional 87 | 88 | # This should be a valid email address corresponding to the author listed 89 | # above. 90 | author_email='adrian.pekar@gmail.com', # Optional 91 | 92 | # Classifiers help users find your project by categorizing it. 93 | # 94 | # For a list of valid classifiers, see https://pypi.org/classifiers/ 95 | classifiers=[ # Optional 96 | # How mature is this project? Common values are 97 | # 3 - Alpha 98 | # 4 - Beta 99 | # 5 - Production/Stable 100 | 'Development Status :: 3 - Alpha', 101 | 102 | # Indicate who your project is intended for 103 | 'Intended Audience :: Science/Research', 104 | 'Topic :: Utilities :: Networking', 105 | 106 | # Pick your license as you wish 107 | 'License :: OSI Approved :: Apache Software License', 108 | 109 | # Specify the Python versions you support here. In particular, ensure 110 | # that you indicate whether you support Python 2, Python 3 or both. 111 | 'Programming Language :: Python :: 3', 112 | 'Programming Language :: Python :: 3.5', 113 | 'Programming Language :: Python :: 3.6', 114 | 'Programming Language :: Python :: 3.7', 115 | 'Programming Language :: Python :: 3 :: Only', 116 | ], 117 | 118 | # This field adds keywords for your project which will appear on the 119 | # project page. What does your project relate to? 120 | # 121 | # Note that this is a string of words separated by whitespace, not a list. 122 | keywords='packet parsing ipfix flow-record flow', # Optional 123 | 124 | # You can just specify package directories manually here if your project is 125 | # simple. Or you can use find_packages(). 126 | # 127 | # Alternatively, if you just want to distribute a single Python file, use 128 | # the `py_modules` argument instead as follows, which will expect a file 129 | # called `my_module.py` to exist: 130 | # 131 | # py_modules=["my_module"], 132 | # 133 | packages=find_packages(exclude=['contrib', 'docs', 'tests']), # Required 134 | 135 | # This field lists other packages that your project depends on to run. 136 | # Any package you put here will be installed by pip when your project is 137 | # installed, so they must be valid existing projects. 138 | # 139 | # For an analysis of "install_requires" vs pip's requirements files see: 140 | # https://packaging.python.org/en/latest/requirements.html 141 | install_requires=['dpkt', 142 | 'pcapy', 143 | 'socket', 144 | 'hashlib', 145 | 'pandas', 146 | 'numpy'], # Optional 147 | 148 | # List additional groups of dependencies here (e.g. development 149 | # dependencies). Users will be able to install these using the "extras" 150 | # syntax, for example: 151 | # 152 | # $ pip install sampleproject[dev] 153 | # 154 | # Similar to `install_requires` above, these must be valid existing 155 | # projects. 156 | # extras_require={ # Optional 157 | # 'dev': ['check-manifest'], 158 | # 'test': ['coverage'], 159 | # }, 160 | 161 | # If there are data files included in your packages that need to be 162 | # installed, specify them here. 163 | # 164 | # If using Python 2.6 or earlier, then these have to be included in 165 | # MANIFEST.in as well. 166 | # package_data={ # Optional 167 | # 'sample': ['package_data.dat'], 168 | # }, 169 | 170 | # Although 'package_data' is the preferred approach, in some case you may 171 | # need to place data files outside of your packages. See: 172 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files 173 | # 174 | # In this case, 'data_file' will be installed into '/my_data' 175 | # data_files=[('my_data', ['data/data_file'])], # Optional 176 | 177 | # To provide executable scripts, use entry points in preference to the 178 | # "scripts" keyword. Entry points provide cross-platform support and allow 179 | # `pip` to create the appropriate form of executable for the target 180 | # platform. 181 | # 182 | # For example, the following would provide a command called `sample` which 183 | # executes the function `main` from this package when invoked: 184 | # entry_points={ # Optional 185 | # 'console_scripts': [ 186 | # 'sample=sample:main', 187 | # ], 188 | # }, 189 | 190 | # List additional URLs that are relevant to your project as a dict. 191 | # 192 | # This field corresponds to the "Project-URL" metadata fields: 193 | # https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use 194 | # 195 | # Examples listed include a pattern for specifying where the package tracks 196 | # issues, where the source is hosted, where to say thanks to the package 197 | # maintainers, and where to support the project financially. The key is 198 | # what's used to render the link text on PyPI. 199 | project_urls={ # Optional 200 | 'Documentation': 'https://github.com/drnpkr/flowRecorder/', 201 | 'Source': 'https://github.com/drnpkr/flowRecorder/', 202 | 203 | }, 204 | ) 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /tests/groundtruth_tcp_syn_flow_expiration.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | Packets with metadata to use in testing of flowRecorder 16 | 17 | This flow is a two TCP SYN packets with same source port between 18 | same IP addresses separated by 3601 seconds for testing flow 19 | expiration 20 | 21 | To create test packet data, capture packet in Wireshark and: 22 | 23 | For the packet summary: 24 | Right-click packet in top pane, Copy -> Summary (text). 25 | Edit pasted text as appropriate 26 | 27 | For the packet hex: 28 | Right-click packet in top pane, Copy -> Bytes -> Hex Stream 29 | 30 | For the packet timestamp: 31 | Expand 'Frame' in the middle pane, 32 | right-click 'Epoch Time' Copy -> Value 33 | 34 | Packet capture file is 'tcp_syn_flow_expiration.pcap' 35 | """ 36 | 37 | import binascii 38 | 39 | name = 'groundtruth_tcp_syn_flow_expiration.py' 40 | capture_file = 'tcp_syn_flow_expiration.pcap' 41 | 42 | #======================== Initiate Lists ====================== 43 | #*** Raw packet data: 44 | RAW = [] 45 | #*** Packet on the wire lengths in bytes: 46 | LEN = [] 47 | #*** Ethernet parameters: 48 | ETH_SRC = [] 49 | ETH_DST = [] 50 | ETH_TYPE = [] 51 | #*** IP addresses: 52 | IP_SRC = [] 53 | IP_DST = [] 54 | #*** IP protocol number in decimal: 55 | PROTO = [] 56 | #*** Transport-layer protocol numbers in decimal: 57 | TP_SRC = [] 58 | TP_DST = [] 59 | #*** Transport-layer sequence numbers in decimal: 60 | TP_SEQ_SRC = [] 61 | TP_SEQ_DST = [] 62 | #*** TCP FLAGS: 63 | TCP_SYN = [] 64 | TCP_FIN = [] 65 | TCP_RST = [] 66 | TCP_PSH = [] 67 | TCP_ACK = [] 68 | #*** HEX-encoded payload 69 | PAYLOAD = [] 70 | #*** Packet direction, c2s (client to server) or s2c 71 | DIRECTION = [] 72 | 73 | # Unidir Flow values: 74 | UNIDIR_SRC_IP = [] 75 | UNIDIR_SRC_PORT = [] 76 | UNIDIR_DST_IP = [] 77 | UNIDIR_DST_PORT = [] 78 | UNIDIR_PROTO = [] 79 | UNIDIR_PKTTOTALCOUNT = [] 80 | UNIDIR_OCTETTOTALCOUNT = [] 81 | UNIDIR_MIN_PS = [] 82 | UNIDIR_MAX_PS = [] 83 | UNIDIR_AVG_PS = [] 84 | UNIDIR_STD_DEV_PS = [] 85 | UNIDIR_FLOWSTART = [] 86 | UNIDIR_FLOWEND = [] 87 | UNIDIR_FLOWDURATION = [] 88 | UNIDIR_MIN_PIAT = [] 89 | UNIDIR_MAX_PIAT = [] 90 | UNIDIR_AVG_PIAT = [] 91 | UNIDIR_STD_DEV_PIAT = [] 92 | # Bidirectional: 93 | # Combined Flow values: 94 | BIDIR_SRC_IP = [] 95 | BIDIR_SRC_PORT = [] 96 | BIDIR_DST_IP = [] 97 | BIDIR_DST_PORT = [] 98 | BIDIR_PROTO = [] 99 | BIDIR_PKTTOTALCOUNT = [] 100 | BIDIR_OCTETTOTALCOUNT = [] 101 | BIDIR_MIN_PS = [] 102 | BIDIR_MAX_PS = [] 103 | BIDIR_AVG_PS = [] 104 | BIDIR_STD_DEV_PS = [] 105 | BIDIR_FLOWSTART = [] 106 | BIDIR_FLOWEND = [] 107 | BIDIR_FLOWDURATION = [] 108 | BIDIR_MIN_PIAT = [] 109 | BIDIR_MAX_PIAT = [] 110 | BIDIR_AVG_PIAT = [] 111 | BIDIR_STD_DEV_PIAT = [] 112 | # Forward Flow values: 113 | BIDIR_F_SRC_IP = [] 114 | BIDIR_F_SRC_PORT = [] 115 | BIDIR_F_DST_IP = [] 116 | BIDIR_F_DST_PORT = [] 117 | BIDIR_F_PROTO = [] 118 | BIDIR_F_PKTTOTALCOUNT = [] 119 | BIDIR_F_OCTETTOTALCOUNT = [] 120 | BIDIR_F_MIN_PS = [] 121 | BIDIR_F_MAX_PS = [] 122 | BIDIR_F_AVG_PS = [] 123 | BIDIR_F_STD_DEV_PS = [] 124 | BIDIR_F_FLOWSTART = [] 125 | BIDIR_F_FLOWEND = [] 126 | BIDIR_F_FLOWDURATION = [] 127 | BIDIR_F_MIN_PIAT = [] 128 | BIDIR_F_MAX_PIAT = [] 129 | BIDIR_F_AVG_PIAT = [] 130 | BIDIR_F_STD_DEV_PIAT = [] 131 | # Backward Flow values: 132 | BIDIR_B_SRC_IP = [] 133 | BIDIR_B_SRC_PORT = [] 134 | BIDIR_B_DST_IP = [] 135 | BIDIR_B_DST_PORT = [] 136 | BIDIR_B_PROTO = [] 137 | BIDIR_B_PKTTOTALCOUNT = [] 138 | BIDIR_B_OCTETTOTALCOUNT = [] 139 | BIDIR_B_MIN_PS = [] 140 | BIDIR_B_MAX_PS = [] 141 | BIDIR_B_AVG_PS = [] 142 | BIDIR_B_STD_DEV_PS = [] 143 | BIDIR_B_FLOWSTART = [] 144 | BIDIR_B_FLOWEND = [] 145 | BIDIR_B_FLOWDURATION = [] 146 | BIDIR_B_MIN_PIAT = [] 147 | BIDIR_B_MAX_PIAT = [] 148 | BIDIR_B_AVG_PIAT = [] 149 | BIDIR_B_STD_DEV_PIAT = [] 150 | 151 | #*** Packet 1 - TCP SYN 152 | # 1 1538857982.301350 10.0.2.15 10.0.2.2 TCP 54 40508 443 [SYN] Seq=0 Win=8192 Len=0 153 | RAW.append(binascii.unhexlify("525400123502080027fc133d08004500002800010000400662bf0a00020f0a0002029e3c01bb000030390000000050022000a7a10000")) 154 | LEN.append(40) 155 | ETH_SRC.append('08:00:27:fc:13:3d') 156 | ETH_DST.append('52:54:00:12:35:02') 157 | ETH_TYPE.append(2048) 158 | IP_SRC.append('10.0.2.15') 159 | IP_DST.append('10.0.2.2') 160 | PROTO.append(6) 161 | TP_SRC.append(40508) 162 | TP_DST.append(443) 163 | TP_SEQ_SRC.append(0) 164 | TP_SEQ_DST.append(0) 165 | TCP_SYN.append(1) 166 | TCP_FIN.append(0) 167 | TCP_RST.append(0) 168 | TCP_PSH.append(0) 169 | TCP_ACK.append(0) 170 | PAYLOAD.append("") 171 | DIRECTION.append("c2s") 172 | 173 | #*** Packet 2 - TCP SYN 174 | # 2 1538861583.416666 10.0.2.15 10.0.2.2 TCP 54 [TCP Retransmission] 40508 443 [SYN] Seq=0 Win=8192 Len=0 175 | RAW.append(binascii.unhexlify("525400123502080027fc133d08004500002800010000400662bf0a00020f0a0002029e3c01bb000030390000000050022000a7a10000")) 176 | LEN.append(40) 177 | ETH_SRC.append('08:00:27:fc:13:3d') 178 | ETH_DST.append('52:54:00:12:35:02') 179 | ETH_TYPE.append(2048) 180 | IP_SRC.append('10.0.2.15') 181 | IP_DST.append('10.0.2.2') 182 | PROTO.append(6) 183 | TP_SRC.append(40508) 184 | TP_DST.append(443) 185 | TP_SEQ_SRC.append(0) 186 | TP_SEQ_DST.append(0) 187 | TCP_SYN.append(1) 188 | TCP_FIN.append(0) 189 | TCP_RST.append(0) 190 | TCP_PSH.append(0) 191 | TCP_ACK.append(0) 192 | PAYLOAD.append("") 193 | DIRECTION.append("c2s") 194 | 195 | #*** Metadata for whole flow: 196 | FLOW_IP_CLIENT = '10.0.2.15' 197 | FLOW_IP_SERVER = '10.0.2.2' 198 | 199 | # Unidirectional flow values: 200 | # Flow 1: 201 | UNIDIR_SRC_IP.append('10.0.2.15') 202 | UNIDIR_SRC_PORT.append('40508') 203 | UNIDIR_DST_IP.append('10.0.2.2') 204 | UNIDIR_DST_PORT.append('443') 205 | UNIDIR_PROTO.append('6') 206 | UNIDIR_PKTTOTALCOUNT.append('1') 207 | UNIDIR_OCTETTOTALCOUNT.append('40') 208 | UNIDIR_MIN_PS.append('40') 209 | UNIDIR_MAX_PS.append('40') 210 | UNIDIR_AVG_PS.append('40') 211 | UNIDIR_STD_DEV_PS.append('0') 212 | UNIDIR_FLOWSTART.append('1538857982.30135') 213 | UNIDIR_FLOWEND.append('1538857982.30135') 214 | UNIDIR_FLOWDURATION.append('0') 215 | UNIDIR_MIN_PIAT.append('0') 216 | UNIDIR_MAX_PIAT.append('0') 217 | UNIDIR_AVG_PIAT.append('0') 218 | UNIDIR_STD_DEV_PIAT.append('0') 219 | # Flow 2: 220 | UNIDIR_SRC_IP.append('10.0.2.15') 221 | UNIDIR_SRC_PORT.append('40508') 222 | UNIDIR_DST_IP.append('10.0.2.2') 223 | UNIDIR_DST_PORT.append('443') 224 | UNIDIR_PROTO.append('6') 225 | UNIDIR_PKTTOTALCOUNT.append('1') 226 | UNIDIR_OCTETTOTALCOUNT.append('40') 227 | UNIDIR_MIN_PS.append('40') 228 | UNIDIR_MAX_PS.append('40') 229 | UNIDIR_AVG_PS.append('40') 230 | UNIDIR_STD_DEV_PS.append('0') 231 | UNIDIR_FLOWSTART.append('1538861583.416666') 232 | UNIDIR_FLOWEND.append('1538861583.416666') 233 | UNIDIR_FLOWDURATION.append('0') 234 | UNIDIR_MIN_PIAT.append('0') 235 | UNIDIR_MAX_PIAT.append('0') 236 | UNIDIR_AVG_PIAT.append('0') 237 | UNIDIR_STD_DEV_PIAT.append('0') 238 | # Bidirectional Combined Flow 1: 239 | BIDIR_SRC_IP.append('10.0.2.15') 240 | BIDIR_SRC_PORT.append('40508') 241 | BIDIR_DST_IP.append('10.0.2.2') 242 | BIDIR_DST_PORT.append('443') 243 | BIDIR_PROTO.append('6') 244 | BIDIR_PKTTOTALCOUNT.append('1') 245 | BIDIR_OCTETTOTALCOUNT.append('40') 246 | BIDIR_MIN_PS.append('40') 247 | BIDIR_MAX_PS.append('40') 248 | BIDIR_AVG_PS.append('40') 249 | BIDIR_STD_DEV_PS.append('0') 250 | BIDIR_FLOWSTART.append('1538857982.30135') 251 | BIDIR_FLOWEND.append('1538857982.30135') 252 | BIDIR_FLOWDURATION.append('0') 253 | BIDIR_MIN_PIAT.append('0') 254 | BIDIR_MAX_PIAT.append('0') 255 | BIDIR_AVG_PIAT.append('0') 256 | BIDIR_STD_DEV_PIAT.append('0') 257 | BIDIR_F_PKTTOTALCOUNT.append('1') 258 | BIDIR_F_OCTETTOTALCOUNT.append('40') 259 | BIDIR_F_MIN_PS.append('40') 260 | BIDIR_F_MAX_PS.append('40') 261 | BIDIR_F_AVG_PS.append('40') 262 | BIDIR_F_STD_DEV_PS.append('0') 263 | BIDIR_F_FLOWSTART.append('1538857982.30135') 264 | BIDIR_F_FLOWEND.append('1538857982.30135') 265 | BIDIR_F_FLOWDURATION.append('0') 266 | BIDIR_F_MIN_PIAT.append('0') 267 | BIDIR_F_MAX_PIAT.append('0') 268 | BIDIR_F_AVG_PIAT.append('0') 269 | BIDIR_F_STD_DEV_PIAT.append('0') 270 | BIDIR_B_PKTTOTALCOUNT.append('0') 271 | BIDIR_B_OCTETTOTALCOUNT.append('0') 272 | BIDIR_B_MIN_PS.append('0') 273 | BIDIR_B_MAX_PS.append('0') 274 | BIDIR_B_AVG_PS.append('0') 275 | BIDIR_B_STD_DEV_PS.append('0') 276 | BIDIR_B_FLOWSTART.append('0') 277 | BIDIR_B_FLOWEND.append('0') 278 | BIDIR_B_FLOWDURATION.append('0') 279 | BIDIR_B_MIN_PIAT.append('0') 280 | BIDIR_B_MAX_PIAT.append('0') 281 | BIDIR_B_AVG_PIAT.append('0') 282 | BIDIR_B_STD_DEV_PIAT.append('0') 283 | # Bidirectional Combined Flow 2: 284 | BIDIR_SRC_IP.append('10.0.2.15') 285 | BIDIR_SRC_PORT.append('40508') 286 | BIDIR_DST_IP.append('10.0.2.2') 287 | BIDIR_DST_PORT.append('443') 288 | BIDIR_PROTO.append('6') 289 | BIDIR_PKTTOTALCOUNT.append('1') 290 | BIDIR_OCTETTOTALCOUNT.append('40') 291 | BIDIR_MIN_PS.append('40') 292 | BIDIR_MAX_PS.append('40') 293 | BIDIR_AVG_PS.append('40') 294 | BIDIR_STD_DEV_PS.append('0') 295 | BIDIR_FLOWSTART.append('1538861583.416666') 296 | BIDIR_FLOWEND.append('1538861583.416666') 297 | BIDIR_FLOWDURATION.append('0') 298 | BIDIR_MIN_PIAT.append('0') 299 | BIDIR_MAX_PIAT.append('0') 300 | BIDIR_AVG_PIAT.append('0') 301 | BIDIR_STD_DEV_PIAT.append('0') 302 | BIDIR_F_PKTTOTALCOUNT.append('1') 303 | BIDIR_F_OCTETTOTALCOUNT.append('40') 304 | BIDIR_F_MIN_PS.append('40') 305 | BIDIR_F_MAX_PS.append('40') 306 | BIDIR_F_AVG_PS.append('40') 307 | BIDIR_F_STD_DEV_PS.append('0') 308 | BIDIR_F_FLOWSTART.append('1538861583.416666') 309 | BIDIR_F_FLOWEND.append('1538861583.416666') 310 | BIDIR_F_FLOWDURATION.append('0') 311 | BIDIR_F_MIN_PIAT.append('0') 312 | BIDIR_F_MAX_PIAT.append('0') 313 | BIDIR_F_AVG_PIAT.append('0') 314 | BIDIR_F_STD_DEV_PIAT.append('0') 315 | BIDIR_B_PKTTOTALCOUNT.append('0') 316 | BIDIR_B_OCTETTOTALCOUNT.append('0') 317 | BIDIR_B_MIN_PS.append('0') 318 | BIDIR_B_MAX_PS.append('0') 319 | BIDIR_B_AVG_PS.append('0') 320 | BIDIR_B_STD_DEV_PS.append('0') 321 | BIDIR_B_FLOWSTART.append('0') 322 | BIDIR_B_FLOWEND.append('0') 323 | BIDIR_B_FLOWDURATION.append('0') 324 | BIDIR_B_MIN_PIAT.append('0') 325 | BIDIR_B_MAX_PIAT.append('0') 326 | BIDIR_B_AVG_PIAT.append('0') 327 | BIDIR_B_STD_DEV_PIAT.append('0') 328 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /tests/groundtruth_PING1.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | Packets with metadata to use in testing of flowRecorder 16 | 17 | This flow is two separate ICMP echo request/reply (PING) pairs 18 | between same source and destination 19 | 20 | To create test packet data, capture packet in Wireshark and: 21 | 22 | For the packet summary: 23 | Right-click packet in top pane, Copy -> Summary (text). 24 | Edit pasted text as appropriate 25 | 26 | For the packet hex: 27 | Right-click packet in top pane, Copy -> Bytes -> Hex Stream 28 | 29 | For the packet timestamp: 30 | Expand 'Frame' in the middle pane, 31 | right-click 'Epoch Time' Copy -> Value 32 | 33 | Packet capture file is 'http1.pcap' 34 | """ 35 | 36 | import binascii 37 | 38 | name = 'groundtruth_PING1.py' 39 | capture_file = 'PING1.pcap' 40 | 41 | #======================== Initiate Lists ====================== 42 | #*** Raw packet data: 43 | RAW = [] 44 | #*** Packet on the wire lengths in bytes: 45 | LEN = [] 46 | #*** Ethernet parameters: 47 | ETH_SRC = [] 48 | ETH_DST = [] 49 | ETH_TYPE = [] 50 | #*** IP addresses: 51 | IP_SRC = [] 52 | IP_DST = [] 53 | #*** IP protocol number in decimal: 54 | PROTO = [] 55 | #*** Transport-layer protocol numbers in decimal: 56 | TP_SRC = [] 57 | TP_DST = [] 58 | #*** Transport-layer sequence numbers in decimal: 59 | TP_SEQ_SRC = [] 60 | TP_SEQ_DST = [] 61 | #*** TCP FLAGS: 62 | TCP_SYN = [] 63 | TCP_FIN = [] 64 | TCP_RST = [] 65 | TCP_PSH = [] 66 | TCP_ACK = [] 67 | #*** HEX-encoded payload 68 | PAYLOAD = [] 69 | #*** Packet direction, c2s (client to server) or s2c 70 | DIRECTION = [] 71 | 72 | # Unidir Flow values: 73 | UNIDIR_SRC_IP = [] 74 | UNIDIR_SRC_PORT = [] 75 | UNIDIR_DST_IP = [] 76 | UNIDIR_DST_PORT = [] 77 | UNIDIR_PROTO = [] 78 | UNIDIR_PKTTOTALCOUNT = [] 79 | UNIDIR_OCTETTOTALCOUNT = [] 80 | UNIDIR_MIN_PS = [] 81 | UNIDIR_MAX_PS = [] 82 | UNIDIR_AVG_PS = [] 83 | UNIDIR_STD_DEV_PS = [] 84 | UNIDIR_FLOWSTART = [] 85 | UNIDIR_FLOWEND = [] 86 | UNIDIR_FLOWDURATION = [] 87 | UNIDIR_MIN_PIAT = [] 88 | UNIDIR_MAX_PIAT = [] 89 | UNIDIR_AVG_PIAT = [] 90 | UNIDIR_STD_DEV_PIAT = [] 91 | # Bidirectional: 92 | # Combined Flow values: 93 | BIDIR_SRC_IP = [] 94 | BIDIR_SRC_PORT = [] 95 | BIDIR_DST_IP = [] 96 | BIDIR_DST_PORT = [] 97 | BIDIR_PROTO = [] 98 | BIDIR_PKTTOTALCOUNT = [] 99 | BIDIR_OCTETTOTALCOUNT = [] 100 | BIDIR_MIN_PS = [] 101 | BIDIR_MAX_PS = [] 102 | BIDIR_AVG_PS = [] 103 | BIDIR_STD_DEV_PS = [] 104 | BIDIR_FLOWSTART = [] 105 | BIDIR_FLOWEND = [] 106 | BIDIR_FLOWDURATION = [] 107 | BIDIR_MIN_PIAT = [] 108 | BIDIR_MAX_PIAT = [] 109 | BIDIR_AVG_PIAT = [] 110 | BIDIR_STD_DEV_PIAT = [] 111 | # Forward Flow values: 112 | BIDIR_F_SRC_IP = [] 113 | BIDIR_F_SRC_PORT = [] 114 | BIDIR_F_DST_IP = [] 115 | BIDIR_F_DST_PORT = [] 116 | BIDIR_F_PROTO = [] 117 | BIDIR_F_PKTTOTALCOUNT = [] 118 | BIDIR_F_OCTETTOTALCOUNT = [] 119 | BIDIR_F_MIN_PS = [] 120 | BIDIR_F_MAX_PS = [] 121 | BIDIR_F_AVG_PS = [] 122 | BIDIR_F_STD_DEV_PS = [] 123 | BIDIR_F_FLOWSTART = [] 124 | BIDIR_F_FLOWEND = [] 125 | BIDIR_F_FLOWDURATION = [] 126 | BIDIR_F_MIN_PIAT = [] 127 | BIDIR_F_MAX_PIAT = [] 128 | BIDIR_F_AVG_PIAT = [] 129 | BIDIR_F_STD_DEV_PIAT = [] 130 | # Backward Flow values: 131 | BIDIR_B_SRC_IP = [] 132 | BIDIR_B_SRC_PORT = [] 133 | BIDIR_B_DST_IP = [] 134 | BIDIR_B_DST_PORT = [] 135 | BIDIR_B_PROTO = [] 136 | BIDIR_B_PKTTOTALCOUNT = [] 137 | BIDIR_B_OCTETTOTALCOUNT = [] 138 | BIDIR_B_MIN_PS = [] 139 | BIDIR_B_MAX_PS = [] 140 | BIDIR_B_AVG_PS = [] 141 | BIDIR_B_STD_DEV_PS = [] 142 | BIDIR_B_FLOWSTART = [] 143 | BIDIR_B_FLOWEND = [] 144 | BIDIR_B_FLOWDURATION = [] 145 | BIDIR_B_MIN_PIAT = [] 146 | BIDIR_B_MAX_PIAT = [] 147 | BIDIR_B_AVG_PIAT = [] 148 | BIDIR_B_STD_DEV_PIAT = [] 149 | 150 | #*** Packet 1 - Echo Request 151 | # 1 0.000000000 10.0.2.15 10.0.2.2 ICMP 98 Echo (ping) request id=0x1ac1, seq=1/256, ttl=64 (reply in 2) 152 | RAW.append(binascii.unhexlify("525400123502080027fc133d080045000054ec934000400136050a00020f0a0002020800c96c1ac10001b4c8b55b00000000e2d9080000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637")) 153 | LEN.append(84) 154 | ETH_SRC.append('08:00:27:fc:13:3d') 155 | ETH_DST.append('52:54:00:12:35:02') 156 | ETH_TYPE.append(2048) 157 | IP_SRC.append('10.0.2.15') 158 | IP_DST.append('10.0.2.2') 159 | PROTO.append(1) 160 | TP_SRC.append(0) 161 | TP_DST.append(0) 162 | TP_SEQ_SRC.append(0) 163 | TP_SEQ_DST.append(0) 164 | TCP_SYN.append(0) 165 | TCP_FIN.append(0) 166 | TCP_RST.append(0) 167 | TCP_PSH.append(0) 168 | TCP_ACK.append(0) 169 | PAYLOAD.append("") 170 | DIRECTION.append("c2s") 171 | 172 | #*** Packet 2 - Echo Reply 173 | # 2 0.000188156 10.0.2.2 10.0.2.15 ICMP 98 Echo (ping) reply id=0x1ac1, seq=1/256, ttl=64 (request in 1) 174 | RAW.append(binascii.unhexlify("080027fc133d525400123502080045000054c47b400040015e1d0a0002020a00020f0000d16c1ac10001b4c8b55b00000000e2d9080000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637")) 175 | LEN.append(84) 176 | ETH_SRC.append('52:54:00:12:35:02') 177 | ETH_DST.append('08:00:27:fc:13:3d') 178 | ETH_TYPE.append(2048) 179 | IP_SRC.append('10.0.2.2') 180 | IP_DST.append('10.0.2.15') 181 | PROTO.append(1) 182 | TP_SRC.append(0) 183 | TP_DST.append(0) 184 | TP_SEQ_SRC.append(0) 185 | TP_SEQ_DST.append(0) 186 | TCP_SYN.append(0) 187 | TCP_FIN.append(0) 188 | TCP_RST.append(0) 189 | TCP_PSH.append(0) 190 | TCP_ACK.append(0) 191 | PAYLOAD.append("") 192 | DIRECTION.append("s2c") 193 | 194 | #*** Packet 3 - Echo Request 195 | # 3 1.018124169 10.0.2.15 10.0.2.2 ICMP 98 Echo (ping) request id=0x1ac1, seq=2/512, ttl=64 (reply in 4) 196 | RAW.append(binascii.unhexlify("525400123502080027fc133d080045000054ed734000400135250a00020f0a00020208000d251ac10002b5c8b55b000000009d20090000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637")) 197 | LEN.append(84) 198 | ETH_SRC.append('08:00:27:fc:13:3d') 199 | ETH_DST.append('52:54:00:12:35:02') 200 | ETH_TYPE.append(2048) 201 | IP_SRC.append('10.0.2.15') 202 | IP_DST.append('10.0.2.2') 203 | PROTO.append(1) 204 | TP_SRC.append(0) 205 | TP_DST.append(0) 206 | TP_SEQ_SRC.append(0) 207 | TP_SEQ_DST.append(0) 208 | TCP_SYN.append(0) 209 | TCP_FIN.append(0) 210 | TCP_RST.append(0) 211 | TCP_PSH.append(0) 212 | TCP_ACK.append(0) 213 | PAYLOAD.append("") 214 | DIRECTION.append("c2s") 215 | 216 | #*** Packet 4 - Echo Reply 217 | # 4 1.018347117 10.0.2.2 10.0.2.15 ICMP 98 Echo (ping) reply id=0x1ac1, seq=2/512, ttl=64 (request in 3) 218 | RAW.append(binascii.unhexlify("080027fc133d525400123502080045000054c47c400040015e1c0a0002020a00020f000015251ac10002b5c8b55b000000009d20090000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637")) 219 | LEN.append(84) 220 | ETH_SRC.append('52:54:00:12:35:02') 221 | ETH_DST.append('08:00:27:fc:13:3d') 222 | ETH_TYPE.append(2048) 223 | IP_SRC.append('10.0.2.2') 224 | IP_DST.append('10.0.2.15') 225 | PROTO.append(1) 226 | TP_SRC.append(0) 227 | TP_DST.append(0) 228 | TP_SEQ_SRC.append(0) 229 | TP_SEQ_DST.append(0) 230 | TCP_SYN.append(0) 231 | TCP_FIN.append(0) 232 | TCP_RST.append(0) 233 | TCP_PSH.append(0) 234 | TCP_ACK.append(0) 235 | PAYLOAD.append("4745540d0a") 236 | DIRECTION.append("s2c") 237 | 238 | #*** Metadata for whole flow: 239 | FLOW_IP_CLIENT = '10.0.2.15' 240 | FLOW_IP_SERVER = '10.0.2.2' 241 | 242 | # Unidirectional flow values: 243 | # Flow 1: 244 | UNIDIR_SRC_IP.append('10.0.2.15') 245 | UNIDIR_SRC_PORT.append('0') 246 | UNIDIR_DST_IP.append('10.0.2.2') 247 | UNIDIR_DST_PORT.append('0') 248 | UNIDIR_PROTO.append('1') 249 | UNIDIR_PKTTOTALCOUNT.append('2') 250 | UNIDIR_OCTETTOTALCOUNT.append('168') 251 | UNIDIR_MIN_PS.append('84') 252 | UNIDIR_MAX_PS.append('84') 253 | UNIDIR_AVG_PS.append('84') 254 | UNIDIR_STD_DEV_PS.append('0') 255 | UNIDIR_FLOWSTART.append('1538640052.580081') 256 | UNIDIR_FLOWEND.append('1538640053.598205') 257 | UNIDIR_FLOWDURATION.append('1.01812') 258 | UNIDIR_MIN_PIAT.append('1.01812005') 259 | UNIDIR_MAX_PIAT.append('1.01812005') 260 | UNIDIR_AVG_PIAT.append('1.01812005') 261 | UNIDIR_STD_DEV_PIAT.append('0') 262 | # Flow 2: 263 | UNIDIR_SRC_IP.append('10.0.2.2') 264 | UNIDIR_SRC_PORT.append('0') 265 | UNIDIR_DST_IP.append('10.0.2.15') 266 | UNIDIR_DST_PORT.append('0') 267 | UNIDIR_PROTO.append('1') 268 | UNIDIR_PKTTOTALCOUNT.append('2') 269 | UNIDIR_OCTETTOTALCOUNT.append('168') 270 | UNIDIR_MIN_PS.append('84') 271 | UNIDIR_MAX_PS.append('84') 272 | UNIDIR_AVG_PS.append('84') 273 | UNIDIR_STD_DEV_PS.append('0') 274 | UNIDIR_FLOWSTART.append('1538640052.580269') 275 | UNIDIR_FLOWEND.append('1538640053.598428') 276 | UNIDIR_FLOWDURATION.append('1.018159') 277 | UNIDIR_MIN_PIAT.append('1.018159866') 278 | UNIDIR_MAX_PIAT.append('1.018159866') 279 | UNIDIR_AVG_PIAT.append('1.018159866') 280 | UNIDIR_STD_DEV_PIAT.append('0') 281 | # Bidirectional Combined Flow 1: 282 | BIDIR_SRC_IP.append('10.0.2.15') 283 | BIDIR_SRC_PORT.append('0') 284 | BIDIR_DST_IP.append('10.0.2.2') 285 | BIDIR_DST_PORT.append('0') 286 | BIDIR_PROTO.append('1') 287 | BIDIR_PKTTOTALCOUNT.append('4') 288 | BIDIR_OCTETTOTALCOUNT.append('336') 289 | BIDIR_MIN_PS.append('84') 290 | BIDIR_MAX_PS.append('84') 291 | BIDIR_AVG_PS.append('84') 292 | BIDIR_STD_DEV_PS.append('0') 293 | BIDIR_FLOWSTART.append('1538640052.580081') 294 | BIDIR_FLOWEND.append('1538640053.598428') 295 | BIDIR_FLOWDURATION.append('1.018339') 296 | BIDIR_MIN_PIAT.append('0.000180006') 297 | BIDIR_MAX_PIAT.append('1.017940044') 298 | BIDIR_AVG_PIAT.append('0.339446624') 299 | BIDIR_STD_DEV_PIAT.append('0.479767299') 300 | BIDIR_F_PKTTOTALCOUNT.append('2') 301 | BIDIR_F_OCTETTOTALCOUNT.append('168') 302 | BIDIR_F_MIN_PS.append('84') 303 | BIDIR_F_MAX_PS.append('84') 304 | BIDIR_F_AVG_PS.append('84') 305 | BIDIR_F_STD_DEV_PS.append('0') 306 | BIDIR_F_FLOWSTART.append('1538640052.580081') 307 | BIDIR_F_FLOWEND.append('1538640053.598205') 308 | BIDIR_F_FLOWDURATION.append('1.018120') 309 | BIDIR_F_MIN_PIAT.append('1.01812005') 310 | BIDIR_F_MAX_PIAT.append('1.01812005') 311 | BIDIR_F_AVG_PIAT.append('1.01812005') 312 | BIDIR_F_STD_DEV_PIAT.append('0') 313 | BIDIR_B_PKTTOTALCOUNT.append('2') 314 | BIDIR_B_OCTETTOTALCOUNT.append('168') 315 | BIDIR_B_MIN_PS.append('84') 316 | BIDIR_B_MAX_PS.append('84') 317 | BIDIR_B_AVG_PS.append('84') 318 | BIDIR_B_STD_DEV_PS.append('0') 319 | BIDIR_B_FLOWSTART.append('1538640052.580269') 320 | BIDIR_B_FLOWEND.append('1538640053.598428') 321 | BIDIR_B_FLOWDURATION.append('1.018159') 322 | BIDIR_B_MIN_PIAT.append('1.018159866') 323 | BIDIR_B_MAX_PIAT.append('1.018159866') 324 | BIDIR_B_AVG_PIAT.append('1.018159866') 325 | BIDIR_B_STD_DEV_PIAT.append('0') 326 | 327 | 328 | 329 | 330 | 331 | -------------------------------------------------------------------------------- /tests/http1.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | Packets with metadata to use in testing of flowRecorder 16 | 17 | This flow is IPv4 + TCP + HTTP with a GET returning a "HTTP/1.1 18 | 400 Bad Request" 19 | 20 | To create test packet data, capture packet in Wireshark and: 21 | 22 | For the packet summary: 23 | Right-click packet in top pane, Copy -> Summary (text). 24 | Edit pasted text as appropriate 25 | 26 | For the packet hex: 27 | Right-click packet in top pane, Copy -> Bytes -> Hex Stream 28 | 29 | For the packet timestamp: 30 | Expand 'Frame' in the middle pane, 31 | right-click 'Epoch Time' Copy -> Value 32 | 33 | Packet capture file is 'http1.pcap' 34 | """ 35 | 36 | import binascii 37 | 38 | name = 'http1.py' 39 | capture_file = 'http1.pcap' 40 | 41 | #======================== IPv4 + TCP + HTTP port 80 flow ====================== 42 | #*** Raw packet data: 43 | RAW = [] 44 | #*** Packet on the wire lengths in bytes: 45 | LEN = [] 46 | #*** Ethernet parameters: 47 | ETH_SRC = [] 48 | ETH_DST = [] 49 | ETH_TYPE = [] 50 | #*** IP addresses: 51 | IP_SRC = [] 52 | IP_DST = [] 53 | #*** IP protocol number in decimal: 54 | PROTO = [] 55 | #*** Transport-layer protocol numbers in decimal: 56 | TP_SRC = [] 57 | TP_DST = [] 58 | #*** Transport-layer sequence numbers in decimal: 59 | TP_SEQ_SRC = [] 60 | TP_SEQ_DST = [] 61 | #*** TCP FLAGS: 62 | TCP_SYN = [] 63 | TCP_FIN = [] 64 | TCP_RST = [] 65 | TCP_PSH = [] 66 | TCP_ACK = [] 67 | #*** HEX-encoded payload 68 | PAYLOAD = [] 69 | #*** Packet direction, c2s (client to server) or s2c 70 | DIRECTION = [] 71 | 72 | # Unidir Flow values: 73 | UNIDIR_SRC_IP = [] 74 | UNIDIR_SRC_PORT = [] 75 | UNIDIR_DST_IP = [] 76 | UNIDIR_DST_PORT = [] 77 | UNIDIR_PROTO = [] 78 | UNIDIR_PKTTOTALCOUNT = [] 79 | UNIDIR_OCTETTOTALCOUNT = [] 80 | UNIDIR_MIN_PS = [] 81 | UNIDIR_MAX_PS = [] 82 | UNIDIR_AVG_PS = [] 83 | UNIDIR_STD_DEV_PS = [] 84 | UNIDIR_FLOWSTART = [] 85 | UNIDIR_FLOWEND = [] 86 | UNIDIR_FLOWDURATION = [] 87 | UNIDIR_MIN_PIAT = [] 88 | UNIDIR_MAX_PIAT = [] 89 | UNIDIR_AVG_PIAT = [] 90 | UNIDIR_STD_DEV_PIAT = [] 91 | # Bidirectional: 92 | # Combined Flow values: 93 | BIDIR_SRC_IP = [] 94 | BIDIR_SRC_PORT = [] 95 | BIDIR_DST_IP = [] 96 | BIDIR_DST_PORT = [] 97 | BIDIR_PROTO = [] 98 | BIDIR_PKTTOTALCOUNT = [] 99 | BIDIR_OCTETTOTALCOUNT = [] 100 | BIDIR_MIN_PS = [] 101 | BIDIR_MAX_PS = [] 102 | BIDIR_AVG_PS = [] 103 | BIDIR_STD_DEV_PS = [] 104 | BIDIR_FLOWSTART = [] 105 | BIDIR_FLOWEND = [] 106 | BIDIR_FLOWDURATION = [] 107 | BIDIR_MIN_PIAT = [] 108 | BIDIR_MAX_PIAT = [] 109 | BIDIR_AVG_PIAT = [] 110 | BIDIR_STD_DEV_PIAT = [] 111 | # Forward Flow values: 112 | BIDIR_F_SRC_IP = [] 113 | BIDIR_F_SRC_PORT = [] 114 | BIDIR_F_DST_IP = [] 115 | BIDIR_F_DST_PORT = [] 116 | BIDIR_F_PROTO = [] 117 | BIDIR_F_PKTTOTALCOUNT = [] 118 | BIDIR_F_OCTETTOTALCOUNT = [] 119 | BIDIR_F_MIN_PS = [] 120 | BIDIR_F_MAX_PS = [] 121 | BIDIR_F_AVG_PS = [] 122 | BIDIR_F_STD_DEV_PS = [] 123 | BIDIR_F_FLOWSTART = [] 124 | BIDIR_F_FLOWEND = [] 125 | BIDIR_F_FLOWDURATION = [] 126 | BIDIR_F_MIN_PIAT = [] 127 | BIDIR_F_MAX_PIAT = [] 128 | BIDIR_F_AVG_PIAT = [] 129 | BIDIR_F_STD_DEV_PIAT = [] 130 | # Backward Flow values: 131 | BIDIR_B_SRC_IP = [] 132 | BIDIR_B_SRC_PORT = [] 133 | BIDIR_B_DST_IP = [] 134 | BIDIR_B_DST_PORT = [] 135 | BIDIR_B_PROTO = [] 136 | BIDIR_B_PKTTOTALCOUNT = [] 137 | BIDIR_B_OCTETTOTALCOUNT = [] 138 | BIDIR_B_MIN_PS = [] 139 | BIDIR_B_MAX_PS = [] 140 | BIDIR_B_AVG_PS = [] 141 | BIDIR_B_STD_DEV_PS = [] 142 | BIDIR_B_FLOWSTART = [] 143 | BIDIR_B_FLOWEND = [] 144 | BIDIR_B_FLOWDURATION = [] 145 | BIDIR_B_MIN_PIAT = [] 146 | BIDIR_B_MAX_PIAT = [] 147 | BIDIR_B_AVG_PIAT = [] 148 | BIDIR_B_STD_DEV_PIAT = [] 149 | 150 | #*** Packet 1 - TCP handshake packet 1 151 | # 10.1.0.1 10.1.0.2 TCP 74 43297 > http [SYN] Seq=0 Win=29200 Len=0 MSS=1460 SACK_PERM=1 TSval=5982511 TSecr=0 WS=64 152 | RAW.append(binascii.unhexlify("080027c8db910800272ad6dd08004510003c19fd400040060cab0a0100010a010002a9210050c37250d200000000a002721014330000020405b40402080a005b492f0000000001030306")) 153 | LEN.append(60) 154 | ETH_SRC.append('08:00:27:2a:d6:dd') 155 | ETH_DST.append('08:00:27:c8:db:91') 156 | ETH_TYPE.append(2048) 157 | IP_SRC.append('10.1.0.1') 158 | IP_DST.append('10.1.0.2') 159 | PROTO.append(6) 160 | TP_SRC.append(43297) 161 | TP_DST.append(80) 162 | TP_SEQ_SRC.append(3279048914) 163 | TP_SEQ_DST.append(0) 164 | TCP_SYN.append(1) 165 | TCP_FIN.append(0) 166 | TCP_RST.append(0) 167 | TCP_PSH.append(0) 168 | TCP_ACK.append(0) 169 | PAYLOAD.append("") 170 | DIRECTION.append("c2s") 171 | 172 | #*** Packet 2 - TCP handshake packet 2 173 | # 10.1.0.2 10.1.0.1 TCP 74 http > 43297 [SYN, ACK] Seq=0 Ack=1 Win=28960 Len=0 MSS=1460 SACK_PERM=1 TSval=5977583 TSecr=5982511 WS=64 174 | RAW.append(binascii.unhexlify("0800272ad6dd080027c8db9108004500003c00004000400626b80a0100020a0100010050a9219e5c9d99c37250d3a0127120494a0000020405b40402080a005b35ef005b492f01030306")) 175 | LEN.append(60) 176 | ETH_SRC.append('08:00:27:c8:db:91') 177 | ETH_DST.append('08:00:27:2a:d6:dd') 178 | ETH_TYPE.append(2048) 179 | IP_SRC.append('10.1.0.2') 180 | IP_DST.append('10.1.0.1') 181 | PROTO.append(6) 182 | TP_SRC.append(80) 183 | TP_DST.append(43297) 184 | TP_SEQ_SRC.append(2656869785) 185 | TP_SEQ_DST.append(3279048915) 186 | TCP_SYN.append(1) 187 | TCP_FIN.append(0) 188 | TCP_RST.append(0) 189 | TCP_PSH.append(0) 190 | TCP_ACK.append(1) 191 | PAYLOAD.append("") 192 | DIRECTION.append("s2c") 193 | 194 | #*** Packet 3 - TCP handshake packet 3 195 | # 10.1.0.1 10.1.0.2 TCP 66 43297 > http [ACK] Seq=1 Ack=1 Win=29248 Len=0 TSval=5982512 TSecr=5977583 196 | RAW.append(binascii.unhexlify("080027c8db910800272ad6dd08004510003419fe400040060cb20a0100010a010002a9210050c37250d39e5c9d9a801001c9142b00000101080a005b4930005b35ef")) 197 | LEN.append(52) 198 | ETH_SRC.append('08:00:27:2a:d6:dd') 199 | ETH_DST.append('08:00:27:c8:db:91') 200 | ETH_TYPE.append(2048) 201 | IP_SRC.append('10.1.0.1') 202 | IP_DST.append('10.1.0.2') 203 | PROTO.append(6) 204 | TP_SRC.append(43297) 205 | TP_DST.append(80) 206 | TP_SEQ_SRC.append(3279048915) 207 | TP_SEQ_DST.append(2656869786) 208 | TCP_SYN.append(0) 209 | TCP_FIN.append(0) 210 | TCP_RST.append(0) 211 | TCP_PSH.append(0) 212 | TCP_ACK.append(1) 213 | PAYLOAD.append("") 214 | DIRECTION.append("c2s") 215 | 216 | #*** Packet 4 - Client to server payload 1 "GET\r\n" 217 | # 10.1.0.1 10.1.0.2 TCP 71 [TCP segment of a reassembled PDU] [PSH + ACK] 218 | RAW.append(binascii.unhexlify("080027c8db910800272ad6dd08004510003919ff400040060cac0a0100010a010002a9210050c37250d39e5c9d9a801801c9143000000101080a005b4d59005b35ef4745540d0a")) 219 | LEN.append(57) 220 | ETH_SRC.append('08:00:27:2a:d6:dd') 221 | ETH_DST.append('08:00:27:c8:db:91') 222 | ETH_TYPE.append(2048) 223 | IP_SRC.append('10.1.0.1') 224 | IP_DST.append('10.1.0.2') 225 | PROTO.append(6) 226 | TP_SRC.append(43297) 227 | TP_DST.append(80) 228 | TP_SEQ_SRC.append(3279048915) 229 | TP_SEQ_DST.append(2656869786) 230 | TCP_SYN.append(0) 231 | TCP_FIN.append(0) 232 | TCP_RST.append(0) 233 | TCP_PSH.append(1) 234 | TCP_ACK.append(1) 235 | PAYLOAD.append("4745540d0a") 236 | DIRECTION.append("c2s") 237 | 238 | #*** Packet 5 - TCP ACK server to client 239 | # 10.1.0.2 10.1.0.1 TCP 66 http > 43297 [ACK] Seq=1 Ack=6 Win=28992 Len=0 TSval=5978648 TSecr=5983577 240 | RAW.append(binascii.unhexlify("0800272ad6dd080027c8db91080045000034a875400040067e4a0a0100020a0100010050a9219e5c9d9ac37250d8801001c5df1800000101080a005b3a18005b4d59")) 241 | LEN.append(52) 242 | ETH_SRC.append('08:00:27:c8:db:91') 243 | ETH_DST.append('08:00:27:2a:d6:dd') 244 | ETH_TYPE.append(2048) 245 | IP_SRC.append('10.1.0.2') 246 | IP_DST.append('10.1.0.1') 247 | PROTO.append(6) 248 | TP_SRC.append(80) 249 | TP_DST.append(43297) 250 | TP_SEQ_SRC.append(2656869786) 251 | TP_SEQ_DST.append(3279048920) 252 | TCP_SYN.append(0) 253 | TCP_FIN.append(0) 254 | TCP_RST.append(0) 255 | TCP_PSH.append(0) 256 | TCP_ACK.append(1) 257 | PAYLOAD.append("") 258 | DIRECTION.append("s2c") 259 | 260 | #*** Packet 6 - Server to client response 261 | # 10.1.0.2 10.1.0.1 HTTP 162 HTTP/1.1 400 Bad Request (text/plain) [PSH + ACK] 262 | RAW.append(binascii.unhexlify("0800272ad6dd080027c8db91080045000094a876400040067de90a0100020a0100010050a9219e5c9d9ac37250d8801801c5792f00000101080a005b3a18005b4d59485454502f312e31203430302042616420526571756573740d0a436f6e74656e742d4c656e6774683a2032320d0a436f6e74656e742d547970653a20746578742f706c61696e0d0a0d0a4d616c666f726d656420526571756573742d4c696e65")) 263 | LEN.append(148) 264 | ETH_SRC.append('08:00:27:c8:db:91') 265 | ETH_DST.append('08:00:27:2a:d6:dd') 266 | ETH_TYPE.append(2048) 267 | IP_SRC.append('10.1.0.2') 268 | IP_DST.append('10.1.0.1') 269 | PROTO.append(6) 270 | TP_SRC.append(80) 271 | TP_DST.append(43297) 272 | TP_SEQ_SRC.append(2656869786) 273 | TP_SEQ_DST.append(3279048920) 274 | TCP_SYN.append(0) 275 | TCP_FIN.append(0) 276 | TCP_RST.append(0) 277 | TCP_PSH.append(1) 278 | TCP_ACK.append(1) 279 | PAYLOAD.append("485454502f312e31203430302042616420526571756573740d0a436f6e74656e742d4c656e6774683a2032320d0a436f6e74656e742d547970653a20746578742f706c61696e0d0a0d0a4d616c666f726d656420526571756573742d4c696e65") 280 | DIRECTION.append("s2c") 281 | 282 | #*** Packet 7- Client to server ACK 283 | # 10.1.0.1 10.1.0.2 TCP 66 43297 > http [ACK] Seq=6 Ack=97 Win=29248 Len=0 TSval=5983577 TSecr=5978648 284 | RAW.append(binascii.unhexlify("080027c8db910800272ad6dd0800451000341a00400040060cb00a0100010a010002a9210050c37250d89e5c9dfa801001c9142b00000101080a005b4d59005b3a18")) 285 | LEN.append(52) 286 | ETH_SRC.append('08:00:27:2a:d6:dd') 287 | ETH_DST.append('08:00:27:c8:db:91') 288 | ETH_TYPE.append(2048) 289 | IP_SRC.append('10.1.0.1') 290 | IP_DST.append('10.1.0.2') 291 | PROTO.append(6) 292 | TP_SRC.append(43297) 293 | TP_DST.append(80) 294 | TP_SEQ_SRC.append(3279048920) 295 | TP_SEQ_DST.append(2656869882) 296 | TCP_SYN.append(0) 297 | TCP_FIN.append(0) 298 | TCP_RST.append(0) 299 | TCP_PSH.append(0) 300 | TCP_ACK.append(1) 301 | PAYLOAD.append("") 302 | DIRECTION.append("c2s") 303 | 304 | #*** Metadata for whole flow: 305 | FLOW_IP_CLIENT = '10.1.0.1' 306 | FLOW_IP_SERVER = '10.1.0.2' 307 | 308 | # Unidirectional flow values: 309 | # Flow 1: 310 | UNIDIR_SRC_IP.append('10.1.0.1') 311 | UNIDIR_SRC_PORT.append('43297') 312 | UNIDIR_DST_IP.append('10.1.0.2') 313 | UNIDIR_DST_PORT.append('80') 314 | UNIDIR_PROTO.append('6') 315 | UNIDIR_PKTTOTALCOUNT.append('4') 316 | UNIDIR_OCTETTOTALCOUNT.append('221') 317 | UNIDIR_MIN_PS.append('52') 318 | UNIDIR_MAX_PS.append('60') 319 | UNIDIR_AVG_PS.append('55.25') 320 | UNIDIR_STD_DEV_PS.append('3.418698583') 321 | UNIDIR_FLOWSTART.append('1458782847.829442') 322 | UNIDIR_FLOWEND.append('1458782852.091702') 323 | UNIDIR_FLOWDURATION.append('4.262260') 324 | UNIDIR_MIN_PIAT.append('0.000980') 325 | UNIDIR_MAX_PIAT.append('4.260270') 326 | UNIDIR_AVG_PIAT.append('1.420753') 327 | UNIDIR_STD_DEV_PIAT.append('2.007841') 328 | # Flow 2: 329 | UNIDIR_SRC_IP.append('10.1.0.2') 330 | UNIDIR_SRC_PORT.append('80') 331 | UNIDIR_DST_IP.append('10.1.0.1') 332 | UNIDIR_DST_PORT.append('43297') 333 | UNIDIR_PROTO.append('6') 334 | UNIDIR_PKTTOTALCOUNT.append('3') 335 | UNIDIR_OCTETTOTALCOUNT.append('260') 336 | UNIDIR_MIN_PS.append('52') 337 | UNIDIR_MAX_PS.append('148') 338 | UNIDIR_AVG_PS.append('86.66666667') 339 | UNIDIR_STD_DEV_PS.append('43.49201715') 340 | UNIDIR_FLOWSTART.append('1458782847.830399') 341 | UNIDIR_FLOWEND.append('1458782852.091692') 342 | UNIDIR_FLOWDURATION.append('4.261300') 343 | UNIDIR_MIN_PIAT.append('0.000149965') 344 | UNIDIR_MAX_PIAT.append('4.261150') 345 | UNIDIR_AVG_PIAT.append('2.130650') 346 | UNIDIR_STD_DEV_PIAT.append('2.130500078') 347 | # Bidirectional Combined Flow 1: 348 | BIDIR_SRC_IP.append('10.1.0.1') 349 | BIDIR_SRC_PORT.append('43297') 350 | BIDIR_DST_IP.append('10.1.0.2') 351 | BIDIR_DST_PORT.append('80') 352 | BIDIR_PROTO.append('6') 353 | BIDIR_PKTTOTALCOUNT.append('7') 354 | BIDIR_OCTETTOTALCOUNT.append('481') 355 | BIDIR_MIN_PS.append('52') 356 | BIDIR_MAX_PS.append('148') 357 | BIDIR_AVG_PS.append('68.71428571') 358 | BIDIR_STD_DEV_PS.append('32.54322087') 359 | BIDIR_FLOWSTART.append('1458782847.829442') 360 | BIDIR_FLOWEND.append('1458782852.091702') 361 | BIDIR_FLOWDURATION.append('4.262260') 362 | BIDIR_MIN_PIAT.append('0.000010014') 363 | BIDIR_MAX_PIAT.append('4.260269880') 364 | BIDIR_AVG_PIAT.append('0.710376660') 365 | BIDIR_STD_DEV_PIAT.append('1.587560556') 366 | BIDIR_F_PKTTOTALCOUNT.append('4') 367 | BIDIR_F_OCTETTOTALCOUNT.append('221') 368 | BIDIR_F_MIN_PS.append('52') 369 | BIDIR_F_MAX_PS.append('60') 370 | BIDIR_F_AVG_PS.append('55.25') 371 | BIDIR_F_STD_DEV_PS.append('3.418698583') 372 | BIDIR_F_FLOWSTART.append('1458782847.829442') 373 | BIDIR_F_FLOWEND.append('1458782852.091702') 374 | BIDIR_F_FLOWDURATION.append('4.262260') 375 | BIDIR_F_MIN_PIAT.append('0.000980') 376 | BIDIR_F_MAX_PIAT.append('4.260270') 377 | BIDIR_F_AVG_PIAT.append('1.420753') 378 | BIDIR_F_STD_DEV_PIAT.append('2.007841') 379 | BIDIR_B_PKTTOTALCOUNT.append('3') 380 | BIDIR_B_OCTETTOTALCOUNT.append('260') 381 | BIDIR_B_MIN_PS.append('52') 382 | BIDIR_B_MAX_PS.append('148') 383 | BIDIR_B_AVG_PS.append('86.66666667') 384 | BIDIR_B_STD_DEV_PS.append('43.49201715') 385 | BIDIR_B_FLOWSTART.append('1458782847.830399') 386 | BIDIR_B_FLOWEND.append('1458782852.091692') 387 | BIDIR_B_FLOWDURATION.append('4.261300') 388 | BIDIR_B_MIN_PIAT.append('0.000149965') 389 | BIDIR_B_MAX_PIAT.append('4.261150') 390 | BIDIR_B_AVG_PIAT.append('2.130650') 391 | BIDIR_B_STD_DEV_PIAT.append('2.130500078') 392 | 393 | 394 | 395 | 396 | 397 | -------------------------------------------------------------------------------- /flowRecorder/.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | #*** Custom Pylint Configuration File for nmeta 4 | 5 | #*** Disables bad-continuation checking 6 | 7 | # Specify a configuration file. 8 | #rcfile= 9 | 10 | # Python code to execute, usually for sys.path manipulation such as 11 | # pygtk.require(). 12 | #init-hook= 13 | 14 | # Add files or directories to the blacklist. They should be base names, not 15 | # paths. 16 | ignore=CVS 17 | 18 | # Pickle collected data for later comparisons. 19 | persistent=yes 20 | 21 | # List of plugins (as comma separated values of python modules names) to load, 22 | # usually to register additional checkers. 23 | load-plugins= 24 | 25 | # Use multiple processes to speed up Pylint. 26 | jobs=1 27 | 28 | # Allow loading of arbitrary C extensions. Extensions are imported into the 29 | # active Python interpreter and may run arbitrary code. 30 | unsafe-load-any-extension=no 31 | 32 | # A comma-separated list of package or module names from where C extensions may 33 | # be loaded. Extensions are loading into the active Python interpreter and may 34 | # run arbitrary code 35 | extension-pkg-whitelist= 36 | 37 | # Allow optimization of some AST trees. This will activate a peephole AST 38 | # optimizer, which will apply various small optimizations. For instance, it can 39 | # be used to obtain the result of joining multiple strings with the addition 40 | # operator. Joining a lot of strings can lead to a maximum recursion error in 41 | # Pylint and this flag can prevent that. It has one side effect, the resulting 42 | # AST will be different than the one from reality. 43 | optimize-ast=no 44 | 45 | 46 | [MESSAGES CONTROL] 47 | 48 | # Only show warnings with the listed confidence levels. Leave empty to show 49 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 50 | confidence= 51 | 52 | # Enable the message, report, category or checker with the given id(s). You can 53 | # either give multiple identifier separated by comma (,) or put this option 54 | # multiple time. See also the "--disable" option for examples. 55 | #enable= 56 | 57 | # Disable the message, report, category or checker with the given id(s). You 58 | # can either give multiple identifiers separated by comma (,) or put this 59 | # option multiple times (only on the command line, not in the configuration 60 | # file where it should appear only once).You can also use "--disable=all" to 61 | # disable everything first and then reenable specific checks. For example, if 62 | # you want to run only the similarities checker, you can use "--disable=all 63 | # --enable=similarities". If you want to run only the classes checker, but have 64 | # no Warning level messages displayed, use"--disable=all --enable=classes 65 | # --disable=W" 66 | disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,bad-continuation,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating 67 | 68 | 69 | [REPORTS] 70 | 71 | # Set the output format. Available formats are text, parseable, colorized, msvs 72 | # (visual studio) and html. You can also give a reporter class, eg 73 | # mypackage.mymodule.MyReporterClass. 74 | output-format=text 75 | 76 | # Put messages in a separate file for each module / package specified on the 77 | # command line instead of printing them on stdout. Reports (if any) will be 78 | # written in a file name "pylint_global.[txt|html]". 79 | files-output=no 80 | 81 | # Tells whether to display a full report or only the messages 82 | reports=yes 83 | 84 | # Python expression which should return a note less than 10 (10 is the highest 85 | # note). You have access to the variables errors warning, statement which 86 | # respectively contain the number of errors / warnings messages and the total 87 | # number of statements analyzed. This is used by the global evaluation report 88 | # (RP0004). 89 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 90 | 91 | # Template used to display messages. This is a python new-style format string 92 | # used to format the message information. See doc for all details 93 | #msg-template= 94 | 95 | 96 | [TYPECHECK] 97 | 98 | # Tells whether missing members accessed in mixin class should be ignored. A 99 | # mixin class is detected if its name ends with "mixin" (case insensitive). 100 | ignore-mixin-members=yes 101 | 102 | # List of module names for which member attributes should not be checked 103 | # (useful for modules/projects where namespaces are manipulated during runtime 104 | # and thus existing member attributes cannot be deduced by static analysis. It 105 | # supports qualified module names, as well as Unix pattern matching. 106 | ignored-modules= 107 | 108 | # List of classes names for which member attributes should not be checked 109 | # (useful for classes with attributes dynamically set). This supports can work 110 | # with qualified names. 111 | ignored-classes= 112 | 113 | # List of members which are set dynamically and missed by pylint inference 114 | # system, and so shouldn't trigger E1101 when accessed. Python regular 115 | # expressions are accepted. 116 | generated-members= 117 | 118 | 119 | [SIMILARITIES] 120 | 121 | # Minimum lines number of a similarity. 122 | min-similarity-lines=4 123 | 124 | # Ignore comments when computing similarities. 125 | ignore-comments=yes 126 | 127 | # Ignore docstrings when computing similarities. 128 | ignore-docstrings=yes 129 | 130 | # Ignore imports when computing similarities. 131 | ignore-imports=no 132 | 133 | 134 | [LOGGING] 135 | 136 | # Logging modules to check that the string format arguments are in logging 137 | # function parameter format 138 | logging-modules=logging 139 | 140 | 141 | [MISCELLANEOUS] 142 | 143 | # List of note tags to take in consideration, separated by a comma. 144 | notes=FIXME,XXX,TODO 145 | 146 | 147 | [VARIABLES] 148 | 149 | # Tells whether we should check for unused import in __init__ files. 150 | init-import=no 151 | 152 | # A regular expression matching the name of dummy variables (i.e. expectedly 153 | # not used). 154 | dummy-variables-rgx=_$|dummy 155 | 156 | # List of additional names supposed to be defined in builtins. Remember that 157 | # you should avoid to define new builtins when possible. 158 | additional-builtins= 159 | 160 | # List of strings which can identify a callback function by name. A callback 161 | # name must start or end with one of those strings. 162 | callbacks=cb_,_cb 163 | 164 | 165 | [FORMAT] 166 | 167 | # Maximum number of characters on a single line. 168 | max-line-length=100 169 | 170 | # Regexp for a line that is allowed to be longer than the limit. 171 | ignore-long-lines=^\s*(# )??$ 172 | 173 | # Allow the body of an if to be on the same line as the test if there is no 174 | # else. 175 | single-line-if-stmt=no 176 | 177 | # List of optional constructs for which whitespace checking is disabled. `dict- 178 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 179 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 180 | # `empty-line` allows space-only lines. 181 | no-space-check=trailing-comma,dict-separator 182 | 183 | # Maximum number of lines in a module 184 | max-module-lines=1000 185 | 186 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 187 | # tab). 188 | indent-string=' ' 189 | 190 | # Number of spaces of indent required inside a hanging or continued line. 191 | indent-after-paren=4 192 | 193 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 194 | expected-line-ending-format= 195 | 196 | 197 | [SPELLING] 198 | 199 | # Spelling dictionary name. Available dictionaries: none. To make it working 200 | # install python-enchant package. 201 | spelling-dict= 202 | 203 | # List of comma separated words that should not be checked. 204 | spelling-ignore-words= 205 | 206 | # A path to a file that contains private dictionary; one word per line. 207 | spelling-private-dict-file= 208 | 209 | # Tells whether to store unknown words to indicated private dictionary in 210 | # --spelling-private-dict-file option instead of raising a message. 211 | spelling-store-unknown-words=no 212 | 213 | 214 | [BASIC] 215 | 216 | # List of builtins function names that should not be used, separated by a comma 217 | bad-functions=map,filter,input 218 | 219 | # Good variable names which should always be accepted, separated by a comma 220 | good-names=i,j,k,ex,Run,_ 221 | 222 | # Bad variable names which should always be refused, separated by a comma 223 | bad-names=foo,bar,baz,toto,tutu,tata 224 | 225 | # Colon-delimited sets of names that determine each other's naming style when 226 | # the name regexes allow several styles. 227 | name-group= 228 | 229 | # Include a hint for the correct naming format with invalid-name 230 | include-naming-hint=no 231 | 232 | # Regular expression matching correct function names 233 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 234 | 235 | # Naming hint for function names 236 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 237 | 238 | # Regular expression matching correct variable names 239 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 240 | 241 | # Naming hint for variable names 242 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 243 | 244 | # Regular expression matching correct constant names 245 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 246 | 247 | # Naming hint for constant names 248 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 249 | 250 | # Regular expression matching correct attribute names 251 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 252 | 253 | # Naming hint for attribute names 254 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 255 | 256 | # Regular expression matching correct argument names 257 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 258 | 259 | # Naming hint for argument names 260 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 261 | 262 | # Regular expression matching correct class attribute names 263 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 264 | 265 | # Naming hint for class attribute names 266 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 267 | 268 | # Regular expression matching correct inline iteration names 269 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 270 | 271 | # Naming hint for inline iteration names 272 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 273 | 274 | # Regular expression matching correct class names 275 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 276 | 277 | # Naming hint for class names 278 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 279 | 280 | # Regular expression matching correct module names 281 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 282 | 283 | # Naming hint for module names 284 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 285 | 286 | # Regular expression matching correct method names 287 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 288 | 289 | # Naming hint for method names 290 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 291 | 292 | # Regular expression which should only match function or class names that do 293 | # not require a docstring. 294 | no-docstring-rgx=^_ 295 | 296 | # Minimum line length for functions/classes that require docstrings, shorter 297 | # ones are exempt. 298 | docstring-min-length=-1 299 | 300 | 301 | [ELIF] 302 | 303 | # Maximum number of nested blocks for function / method body 304 | max-nested-blocks=5 305 | 306 | 307 | [CLASSES] 308 | 309 | # List of method names used to declare (i.e. assign) instance attributes. 310 | defining-attr-methods=__init__,__new__,setUp 311 | 312 | # List of valid names for the first argument in a class method. 313 | valid-classmethod-first-arg=cls 314 | 315 | # List of valid names for the first argument in a metaclass class method. 316 | valid-metaclass-classmethod-first-arg=mcs 317 | 318 | # List of member names, which should be excluded from the protected access 319 | # warning. 320 | exclude-protected=_asdict,_fields,_replace,_source,_make 321 | 322 | 323 | [IMPORTS] 324 | 325 | # Deprecated modules which should not be used, separated by a comma 326 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 327 | 328 | # Create a graph of every (i.e. internal and external) dependencies in the 329 | # given file (report RP0402 must not be disabled) 330 | import-graph= 331 | 332 | # Create a graph of external dependencies in the given file (report RP0402 must 333 | # not be disabled) 334 | ext-import-graph= 335 | 336 | # Create a graph of internal dependencies in the given file (report RP0402 must 337 | # not be disabled) 338 | int-import-graph= 339 | 340 | 341 | [DESIGN] 342 | 343 | # Maximum number of arguments for function / method 344 | max-args=5 345 | 346 | # Argument names that match this expression will be ignored. Default to name 347 | # with leading underscore 348 | ignored-argument-names=_.* 349 | 350 | # Maximum number of locals for function / method body 351 | max-locals=15 352 | 353 | # Maximum number of return / yield for function / method body 354 | max-returns=6 355 | 356 | # Maximum number of branch for function / method body 357 | max-branches=12 358 | 359 | # Maximum number of statements in function / method body 360 | max-statements=50 361 | 362 | # Maximum number of parents for a class (see R0901). 363 | max-parents=7 364 | 365 | # Maximum number of attributes for a class (see R0902). 366 | max-attributes=7 367 | 368 | # Minimum number of public methods for a class (see R0903). 369 | min-public-methods=2 370 | 371 | # Maximum number of public methods for a class (see R0904). 372 | max-public-methods=20 373 | 374 | # Maximum number of boolean expressions in a if statement 375 | max-bool-expr=5 376 | 377 | 378 | [EXCEPTIONS] 379 | 380 | # Exceptions that will emit a warning when being caught. Defaults to 381 | # "Exception" 382 | overgeneral-exceptions=Exception 383 | 384 | -------------------------------------------------------------------------------- /tests/test_system.py: -------------------------------------------------------------------------------- 1 | """ 2 | flowRecorder system tests 3 | """ 4 | 5 | # Handle tests being in different directory branch to app code: 6 | import sys 7 | import struct 8 | 9 | # For file handling: 10 | import os 11 | import csv 12 | 13 | # For system calls to run commands: 14 | import subprocess 15 | 16 | # flowRecorder imports: 17 | import config 18 | 19 | # test packet imports: 20 | import http1 as groundtruth_http1 21 | import groundtruth_PING1 22 | import groundtruth_tcp_syn_only 23 | import groundtruth_tcp_syn_flow_expiration 24 | 25 | sys.path.insert(0, '../flowRecorder') 26 | 27 | import logging 28 | 29 | # Instantiate Config class: 30 | config = config.Config() 31 | 32 | logger = logging.getLogger(__name__) 33 | 34 | PYTHON2 = 'python2' 35 | PYTHON3 = 'python3' 36 | FLOWRECORDER = "../flowRecorder/flowRecorder.py" 37 | TEST_PCAP_HTTP1 = 'packet_captures/http1.pcap' 38 | TEST_PCAP_PING1 = 'packet_captures/PING1.pcap' 39 | TEST_PCAP_TCP_SYN_ONLY = 'packet_captures/tcp_syn_only.pcap' 40 | TEST_PCAP_TCP_SYN_FLOW_EXPIRATION = 'packet_captures/tcp_syn_flow_expiration.pcap' 41 | RESULT_FILE = 'temp/temp_test_output.csv' 42 | UNIDIR = 'u' 43 | BIDIR = 'b' 44 | # MARGIN is used to allow for small differences in results due to 45 | # use of float type, rounding etc. Applies on both sides of result: 46 | MARGIN = 0.0001 47 | 48 | #======================== data.py Unit Tests ============================ 49 | 50 | def test_http1_unidir(): 51 | """ 52 | Test output for unidirectional processing of http1.pcap file 53 | """ 54 | for python_ver in (PYTHON2, PYTHON3): 55 | # System call to remove old result file if exists: 56 | if os.path.isfile(RESULT_FILE): 57 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 58 | os.remove(RESULT_FILE) 59 | 60 | # Run flowRecorder to generate output file: 61 | try: 62 | result = subprocess.check_output([python_ver, FLOWRECORDER, 63 | "-f" , TEST_PCAP_HTTP1, 64 | "-d", UNIDIR, 65 | "-o", RESULT_FILE]) 66 | logger.info("flowRecorder result is %s", result) 67 | except subprocess.CalledProcessError as e: 68 | logger.critical("Stdout output: %s", e.output) 69 | 70 | # Check results file exists: 71 | assert os.path.isfile(RESULT_FILE) 72 | 73 | # Call helper function to validate the results file values: 74 | validate_results_file_unidir(RESULT_FILE, groundtruth_http1, 2) 75 | 76 | def test_http1_bidir(): 77 | """ 78 | Test output for bidirectional processing of http1.pcap file 79 | """ 80 | for python_ver in (PYTHON2, PYTHON3): 81 | # System call to remove old result file if exists: 82 | if os.path.isfile(RESULT_FILE): 83 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 84 | os.remove(RESULT_FILE) 85 | 86 | # Run flowRecorder to generate output file: 87 | try: 88 | result = subprocess.check_output([python_ver, FLOWRECORDER, 89 | "-f" , TEST_PCAP_HTTP1, 90 | "-d", BIDIR, 91 | "-o", RESULT_FILE]) 92 | logger.info("flowRecorder result is %s", result) 93 | except subprocess.CalledProcessError as e: 94 | logger.critical("Stdout output: %s", e.output) 95 | 96 | # Check results file exists: 97 | assert os.path.isfile(RESULT_FILE) 98 | 99 | # Call helper function to validate the results file values: 100 | validate_results_file_bidir(RESULT_FILE, groundtruth_http1, 1) 101 | 102 | def test_PING1_unidir(): 103 | """ 104 | Test output for unidirectional processing of PING1.pcap file 105 | """ 106 | for python_ver in (PYTHON2, PYTHON3): 107 | # System call to remove old result file if exists: 108 | if os.path.isfile(RESULT_FILE): 109 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 110 | os.remove(RESULT_FILE) 111 | 112 | # Run flowRecorder to generate output file: 113 | try: 114 | result = subprocess.check_output([python_ver, FLOWRECORDER, 115 | "-f" , TEST_PCAP_PING1, 116 | "-d", UNIDIR, 117 | "-o", RESULT_FILE]) 118 | logger.info("flowRecorder result is %s", result) 119 | except subprocess.CalledProcessError as e: 120 | logger.critical("Stdout output: %s", e.output) 121 | 122 | # Check results file exists: 123 | assert os.path.isfile(RESULT_FILE) 124 | 125 | # Call helper function to validate the results file values: 126 | validate_results_file_unidir(RESULT_FILE, groundtruth_PING1, 2) 127 | 128 | def test_PING1_bidir(): 129 | """ 130 | Test output for bidirectional processing of PING1.pcap file 131 | """ 132 | for python_ver in (PYTHON2, PYTHON3): 133 | # System call to remove old result file if exists: 134 | if os.path.isfile(RESULT_FILE): 135 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 136 | os.remove(RESULT_FILE) 137 | 138 | # Run flowRecorder to generate output file: 139 | try: 140 | result = subprocess.check_output([python_ver, FLOWRECORDER, 141 | "-f" , TEST_PCAP_PING1, 142 | "-d", BIDIR, 143 | "-o", RESULT_FILE]) 144 | logger.info("flowRecorder result is %s", result) 145 | except subprocess.CalledProcessError as e: 146 | logger.critical("Stdout output: %s", e.output) 147 | 148 | # Check results file exists: 149 | assert os.path.isfile(RESULT_FILE) 150 | 151 | # Call helper function to validate the results file values: 152 | validate_results_file_bidir(RESULT_FILE, groundtruth_PING1, 1) 153 | 154 | def test_tcp_syn_only_unidir(): 155 | """ 156 | Test output for unidirectional processing of tcp_syn_only.pcap file 157 | """ 158 | for python_ver in (PYTHON2, PYTHON3): 159 | # System call to remove old result file if exists: 160 | if os.path.isfile(RESULT_FILE): 161 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 162 | os.remove(RESULT_FILE) 163 | 164 | # Run flowRecorder to generate output file: 165 | try: 166 | result = subprocess.check_output([python_ver, FLOWRECORDER, 167 | "-f" , TEST_PCAP_TCP_SYN_ONLY, 168 | "-d", UNIDIR, 169 | "-o", RESULT_FILE]) 170 | logger.info("flowRecorder result is %s", result) 171 | except subprocess.CalledProcessError as e: 172 | logger.critical("Stdout output: %s", e.output) 173 | 174 | # Check results file exists: 175 | assert os.path.isfile(RESULT_FILE) 176 | 177 | # Call helper function to validate the results file values: 178 | validate_results_file_unidir(RESULT_FILE, groundtruth_tcp_syn_only, 1) 179 | 180 | def test_tcp_syn_only_bidir(): 181 | """ 182 | Test output for bidirectional processing of tcp_syn_only.pcap file 183 | """ 184 | for python_ver in (PYTHON2, PYTHON3): 185 | # System call to remove old result file if exists: 186 | if os.path.isfile(RESULT_FILE): 187 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 188 | os.remove(RESULT_FILE) 189 | 190 | # Run flowRecorder to generate output file: 191 | try: 192 | result = subprocess.check_output([python_ver, FLOWRECORDER, 193 | "-f" , TEST_PCAP_TCP_SYN_ONLY, 194 | "-d", BIDIR, 195 | "-o", RESULT_FILE]) 196 | logger.info("flowRecorder result is %s", result) 197 | except subprocess.CalledProcessError as e: 198 | logger.critical("Stdout output: %s", e.output) 199 | 200 | # Check results file exists: 201 | assert os.path.isfile(RESULT_FILE) 202 | 203 | # Call helper function to validate the results file values: 204 | validate_results_file_bidir(RESULT_FILE, groundtruth_tcp_syn_only, 1) 205 | 206 | def test_tcp_flow_expiration_unidir(): 207 | """ 208 | Test output for unidirectional processing of tcp_syn_only.pcap file 209 | """ 210 | for python_ver in (PYTHON2, PYTHON3): 211 | # System call to remove old result file if exists: 212 | if os.path.isfile(RESULT_FILE): 213 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 214 | os.remove(RESULT_FILE) 215 | 216 | # Run flowRecorder to generate output file: 217 | try: 218 | result = subprocess.check_output([python_ver, FLOWRECORDER, 219 | "-f" , TEST_PCAP_TCP_SYN_FLOW_EXPIRATION, 220 | "-d", UNIDIR, 221 | "-o", RESULT_FILE]) 222 | logger.info("flowRecorder result is %s", result) 223 | except subprocess.CalledProcessError as e: 224 | logger.critical("Stdout output: %s", e.output) 225 | 226 | # Check results file exists: 227 | assert os.path.isfile(RESULT_FILE) 228 | 229 | # Call helper function to validate the results file values: 230 | validate_results_file_unidir(RESULT_FILE, groundtruth_tcp_syn_flow_expiration, 2) 231 | 232 | def test_tcp_flow_expiration_bidir(): 233 | """ 234 | Test output for bidirectional processing of tcp_syn_only.pcap file 235 | """ 236 | for python_ver in (PYTHON2, PYTHON3): 237 | # System call to remove old result file if exists: 238 | if os.path.isfile(RESULT_FILE): 239 | logger.info("deleting RESULT_FILE=%s", RESULT_FILE) 240 | os.remove(RESULT_FILE) 241 | 242 | # Run flowRecorder to generate output file: 243 | try: 244 | result = subprocess.check_output([python_ver, FLOWRECORDER, 245 | "-f" , TEST_PCAP_TCP_SYN_FLOW_EXPIRATION, 246 | "-d", BIDIR, 247 | "-o", RESULT_FILE]) 248 | logger.info("flowRecorder result is %s", result) 249 | except subprocess.CalledProcessError as e: 250 | logger.critical("Stdout output: %s", e.output) 251 | 252 | # Check results file exists: 253 | assert os.path.isfile(RESULT_FILE) 254 | 255 | # Call helper function to validate the results file values: 256 | validate_results_file_bidir(RESULT_FILE, groundtruth_tcp_syn_flow_expiration, 2) 257 | 258 | #================= HELPER FUNCTIONS =========================================== 259 | 260 | def validate_results_file_unidir(filename, ground_truth, results_length): 261 | """ 262 | Validate a unidirectional results file against ground truth values from 263 | a separate ground truth object 264 | """ 265 | logger.debug("Validating unidir results filename=%s against %s", filename, ground_truth.name) 266 | # Read in results file: 267 | with open(filename) as csv_file: 268 | csv_reader = list(csv.DictReader(csv_file)) 269 | # Validate results file has correct number of rows (excl header): 270 | assert len(csv_reader) == results_length 271 | row_number = 0 272 | # Iterate through rows of result data, checking values: 273 | for row in csv_reader: 274 | logger.debug("Validating row=%s", row_number) 275 | assert row['src_ip'] == ground_truth.UNIDIR_SRC_IP[row_number] 276 | assert row['src_port'] == ground_truth.UNIDIR_SRC_PORT[row_number] 277 | assert row['dst_ip'] == ground_truth.UNIDIR_DST_IP[row_number] 278 | assert row['dst_port'] == ground_truth.UNIDIR_DST_PORT[row_number] 279 | assert row['proto'] == ground_truth.UNIDIR_PROTO[row_number] 280 | assert row['pktTotalCount'] == ground_truth.UNIDIR_PKTTOTALCOUNT[row_number] 281 | assert row['octetTotalCount'] == ground_truth.UNIDIR_OCTETTOTALCOUNT[row_number] 282 | assert row['min_ps'] == ground_truth.UNIDIR_MIN_PS[row_number] 283 | assert row['max_ps'] == ground_truth.UNIDIR_MAX_PS[row_number] 284 | # Average needs leeway to cope with floats/division/rounding etc: 285 | assert float(row['avg_ps']) < float(ground_truth.UNIDIR_AVG_PS[row_number]) + MARGIN 286 | assert float(row['avg_ps']) > float(ground_truth.UNIDIR_AVG_PS[row_number]) - MARGIN 287 | # Std Dev needs leeway to cope with floats/division/rounding etc: 288 | assert float(row['std_dev_ps']) < float(ground_truth.UNIDIR_STD_DEV_PS[row_number]) + MARGIN 289 | assert float(row['std_dev_ps']) > float(ground_truth.UNIDIR_STD_DEV_PS[row_number]) - MARGIN 290 | assert row['flowStart'] == ground_truth.UNIDIR_FLOWSTART[row_number] 291 | assert row['flowEnd'] == ground_truth.UNIDIR_FLOWEND[row_number] 292 | # Flow Duration needs leeway to cope with floats/division/rounding etc: 293 | assert float(row['flowDuration']) < float(ground_truth.UNIDIR_FLOWDURATION[row_number]) + MARGIN 294 | assert float(row['flowDuration']) > float(ground_truth.UNIDIR_FLOWDURATION[row_number]) - MARGIN 295 | # Inter-packet arrival times need leeway to cope with floats/division/rounding etc: 296 | assert float(row['min_piat']) < float(ground_truth.UNIDIR_MIN_PIAT[row_number]) + MARGIN 297 | assert float(row['min_piat']) > float(ground_truth.UNIDIR_MIN_PIAT[row_number]) - MARGIN 298 | assert float(row['max_piat']) < float(ground_truth.UNIDIR_MAX_PIAT[row_number]) + MARGIN 299 | assert float(row['max_piat']) > float(ground_truth.UNIDIR_MAX_PIAT[row_number]) - MARGIN 300 | assert float(row['avg_piat']) < float(ground_truth.UNIDIR_AVG_PIAT[row_number]) + MARGIN 301 | assert float(row['avg_piat']) > float(ground_truth.UNIDIR_AVG_PIAT[row_number]) - MARGIN 302 | assert float(row['std_dev_piat']) < float(ground_truth.UNIDIR_STD_DEV_PIAT[row_number]) + MARGIN 303 | assert float(row['std_dev_piat']) > float(ground_truth.UNIDIR_STD_DEV_PIAT[row_number]) - MARGIN 304 | row_number += 1 305 | 306 | def validate_results_file_bidir(filename, ground_truth, results_length): 307 | """ 308 | Validate a bidirectional results file against ground truth values from 309 | a separate ground truth object 310 | """ 311 | logger.debug("Validating bidir results filename=%s against %s", filename, ground_truth.name) 312 | # Read in results file: 313 | with open(filename) as csv_file: 314 | csv_reader = list(csv.DictReader(csv_file)) 315 | # Validate results file has correct number of rows (excl header): 316 | assert len(csv_reader) == results_length 317 | row_number = 0 318 | # Iterate through rows of result data, checking values: 319 | for row in csv_reader: 320 | logger.debug("Validating row=%s", row_number) 321 | # Combined values: 322 | assert row['src_ip'] == ground_truth.BIDIR_SRC_IP[row_number] 323 | assert row['src_port'] == ground_truth.BIDIR_SRC_PORT[row_number] 324 | assert row['dst_ip'] == ground_truth.BIDIR_DST_IP[row_number] 325 | assert row['dst_port'] == ground_truth.BIDIR_DST_PORT[row_number] 326 | assert row['proto'] == ground_truth.BIDIR_PROTO[row_number] 327 | assert row['min_ps'] == ground_truth.BIDIR_MIN_PS[row_number] 328 | assert row['max_ps'] == ground_truth.BIDIR_MAX_PS[row_number] 329 | # Average needs leeway to cope with floats/division/rounding etc: 330 | assert float(row['avg_ps']) < float(ground_truth.BIDIR_AVG_PS[row_number]) + MARGIN 331 | assert float(row['avg_ps']) > float(ground_truth.BIDIR_AVG_PS[row_number]) - MARGIN 332 | # Std Dev needs leeway to cope with floats/division/rounding etc: 333 | assert float(row['std_dev_ps']) < float(ground_truth.BIDIR_STD_DEV_PS[row_number]) + MARGIN 334 | assert float(row['std_dev_ps']) > float(ground_truth.BIDIR_STD_DEV_PS[row_number]) - MARGIN 335 | assert row['flowStart'] == ground_truth.BIDIR_FLOWSTART[row_number] 336 | assert row['flowEnd'] == ground_truth.BIDIR_FLOWEND[row_number] 337 | # Flow Duration needs leeway to cope with floats/division/rounding etc: 338 | assert float(row['flowDuration']) < float(ground_truth.BIDIR_FLOWDURATION[row_number]) + MARGIN 339 | assert float(row['flowDuration']) > float(ground_truth.BIDIR_FLOWDURATION[row_number]) - MARGIN 340 | # Inter-packet arrival times need leeway to cope with floats/division/rounding etc: 341 | assert float(row['min_piat']) < float(ground_truth.BIDIR_MIN_PIAT[row_number]) + MARGIN 342 | assert float(row['min_piat']) > float(ground_truth.BIDIR_MIN_PIAT[row_number]) - MARGIN 343 | assert float(row['max_piat']) < float(ground_truth.BIDIR_MAX_PIAT[row_number]) + MARGIN 344 | assert float(row['max_piat']) > float(ground_truth.BIDIR_MAX_PIAT[row_number]) - MARGIN 345 | assert float(row['avg_piat']) < float(ground_truth.BIDIR_AVG_PIAT[row_number]) + MARGIN 346 | assert float(row['avg_piat']) > float(ground_truth.BIDIR_AVG_PIAT[row_number]) - MARGIN 347 | assert float(row['std_dev_piat']) < float(ground_truth.BIDIR_STD_DEV_PIAT[row_number]) + MARGIN 348 | assert float(row['std_dev_piat']) > float(ground_truth.BIDIR_STD_DEV_PIAT[row_number]) - MARGIN 349 | # Forward values: 350 | assert row['f_pktTotalCount'] == ground_truth.BIDIR_F_PKTTOTALCOUNT[row_number] 351 | assert row['f_octetTotalCount'] == ground_truth.BIDIR_F_OCTETTOTALCOUNT[row_number] 352 | assert row['f_min_ps'] == ground_truth.BIDIR_F_MIN_PS[row_number] 353 | assert row['f_max_ps'] == ground_truth.BIDIR_F_MAX_PS[row_number] 354 | # Average needs leeway to cope with floats/division/rounding etc: 355 | assert float(row['f_avg_ps']) < float(ground_truth.BIDIR_F_AVG_PS[row_number]) + MARGIN 356 | assert float(row['f_avg_ps']) > float(ground_truth.BIDIR_F_AVG_PS[row_number]) - MARGIN 357 | # Std Dev needs leeway to cope with floats/division/rounding etc: 358 | assert float(row['f_std_dev_ps']) < float(ground_truth.BIDIR_F_STD_DEV_PS[row_number]) + MARGIN 359 | assert float(row['f_std_dev_ps']) > float(ground_truth.BIDIR_F_STD_DEV_PS[row_number]) - MARGIN 360 | assert row['f_flowStart'] == ground_truth.BIDIR_F_FLOWSTART[row_number] 361 | assert row['f_flowEnd'] == ground_truth.BIDIR_F_FLOWEND[row_number] 362 | # Flow Duration needs leeway to cope with floats/division/rounding etc: 363 | assert float(row['f_flowDuration']) < float(ground_truth.BIDIR_F_FLOWDURATION[row_number]) + MARGIN 364 | assert float(row['f_flowDuration']) > float(ground_truth.BIDIR_F_FLOWDURATION[row_number]) - MARGIN 365 | # Inter-packet arrival times need leeway to cope with floats/division/rounding etc: 366 | assert float(row['f_min_piat']) < float(ground_truth.BIDIR_F_MIN_PIAT[row_number]) + MARGIN 367 | assert float(row['f_min_piat']) > float(ground_truth.BIDIR_F_MIN_PIAT[row_number]) - MARGIN 368 | assert float(row['f_max_piat']) < float(ground_truth.BIDIR_F_MAX_PIAT[row_number]) + MARGIN 369 | assert float(row['f_max_piat']) > float(ground_truth.BIDIR_F_MAX_PIAT[row_number]) - MARGIN 370 | assert float(row['f_avg_piat']) < float(ground_truth.BIDIR_F_AVG_PIAT[row_number]) + MARGIN 371 | assert float(row['f_avg_piat']) > float(ground_truth.BIDIR_F_AVG_PIAT[row_number]) - MARGIN 372 | assert float(row['f_std_dev_piat']) < float(ground_truth.BIDIR_F_STD_DEV_PIAT[row_number]) + MARGIN 373 | assert float(row['f_std_dev_piat']) > float(ground_truth.BIDIR_F_STD_DEV_PIAT[row_number]) - MARGIN 374 | # Backward values: 375 | assert row['b_pktTotalCount'] == ground_truth.BIDIR_B_PKTTOTALCOUNT[row_number] 376 | assert row['b_octetTotalCount'] == ground_truth.BIDIR_B_OCTETTOTALCOUNT[row_number] 377 | assert row['b_min_ps'] == ground_truth.BIDIR_B_MIN_PS[row_number] 378 | assert row['b_max_ps'] == ground_truth.BIDIR_B_MAX_PS[row_number] 379 | # Average needs leeway to cope with floats/division/rounding etc: 380 | assert float(row['b_avg_ps']) < float(ground_truth.BIDIR_B_AVG_PS[row_number]) + MARGIN 381 | assert float(row['b_avg_ps']) > float(ground_truth.BIDIR_B_AVG_PS[row_number]) - MARGIN 382 | # Std Dev needs leeway to cope with floats/division/rounding etc: 383 | assert float(row['b_std_dev_ps']) < float(ground_truth.BIDIR_B_STD_DEV_PS[row_number]) + MARGIN 384 | assert float(row['b_std_dev_ps']) > float(ground_truth.BIDIR_B_STD_DEV_PS[row_number]) - MARGIN 385 | assert row['b_flowStart'] == ground_truth.BIDIR_B_FLOWSTART[row_number] 386 | assert row['b_flowEnd'] == ground_truth.BIDIR_B_FLOWEND[row_number] 387 | # Flow Duration needs leeway to cope with floats/division/rounding etc: 388 | assert float(row['b_flowDuration']) < float(ground_truth.BIDIR_B_FLOWDURATION[row_number]) + MARGIN 389 | assert float(row['b_flowDuration']) > float(ground_truth.BIDIR_B_FLOWDURATION[row_number]) - MARGIN 390 | # Inter-packet arrival times need leeway to cope with floats/division/rounding etc: 391 | assert float(row['b_min_piat']) < float(ground_truth.BIDIR_B_MIN_PIAT[row_number]) + MARGIN 392 | assert float(row['b_min_piat']) > float(ground_truth.BIDIR_B_MIN_PIAT[row_number]) - MARGIN 393 | assert float(row['b_max_piat']) < float(ground_truth.BIDIR_B_MAX_PIAT[row_number]) + MARGIN 394 | assert float(row['b_max_piat']) > float(ground_truth.BIDIR_B_MAX_PIAT[row_number]) - MARGIN 395 | assert float(row['b_avg_piat']) < float(ground_truth.BIDIR_B_AVG_PIAT[row_number]) + MARGIN 396 | assert float(row['b_avg_piat']) > float(ground_truth.BIDIR_B_AVG_PIAT[row_number]) - MARGIN 397 | assert float(row['b_std_dev_piat']) < float(ground_truth.BIDIR_B_STD_DEV_PIAT[row_number]) + MARGIN 398 | assert float(row['b_std_dev_piat']) > float(ground_truth.BIDIR_B_STD_DEV_PIAT[row_number]) - MARGIN 399 | row_number += 1 400 | 401 | #f_pktTotalCount,f_octetTotalCount,f_min_ps,f_max_ps,f_avg_ps,f_std_dev_ps, 402 | #f_flowStart,f_flowEnd,f_flowDuration,f_min_piat,f_max_piat,f_avg_piat,f_std_dev_piat, 403 | #b_pktTotalCount,b_octetTotalCount,b_min_ps,b_max_ps,b_avg_ps,vstd_dev_ps, 404 | #b_flowStart,b_flowEnd,b_flowDuration,b_min_piat,b_max_piat,b_avg_piat,b_std_dev_piat 405 | -------------------------------------------------------------------------------- /flowRecorder/flows.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 10 | # implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | """ 15 | flows.py 16 | 17 | This data library represents network flows 18 | 19 | It stores cummulative information (not individual packets) 20 | about flows in a MongoDB collection 21 | """ 22 | # For Python 2.x compatibility: 23 | from __future__ import division 24 | 25 | # General imports: 26 | import sys 27 | 28 | # For CSV operations: 29 | import csv 30 | 31 | # For packet methods: 32 | import socket 33 | 34 | # For flows dictionary: 35 | from collections import OrderedDict 36 | 37 | # For math operations: 38 | import numpy as np 39 | 40 | # Import dpkt for packet parsing: 41 | import dpkt 42 | 43 | # For logging configuration: 44 | from baseclass import BaseClass 45 | 46 | # For flow hashing: 47 | import nethash 48 | 49 | class Flows(BaseClass): 50 | """ 51 | The Flows class represents cummulative information about flows 52 | (not individual packets) 53 | """ 54 | def __init__(self, config, mode): 55 | """ 56 | Initialise the Flows Class 57 | Args: 58 | config: Config class object 59 | mode: the mode in which the packets should be organised 60 | into flow records. 'u' is for unidirectional, 'b' is for 61 | bidirectional. 62 | """ 63 | # Required for BaseClass: 64 | self.config = config 65 | # Set up Logging with inherited base class method: 66 | self.configure_logging(__name__, "flows_logging_level_s", 67 | "flows_logging_level_c") 68 | # Mode is u for unidirectional or b for bidirectional: 69 | self.mode = mode 70 | # Python dictionaries to hold current and archived flow records: 71 | self.flow_cache = OrderedDict() 72 | self.flow_archive = OrderedDict() 73 | 74 | # Create a Flow object for flow operations: 75 | self.flow = Flow(config, self.logger, self.flow_cache, self.flow_archive, mode) 76 | 77 | # Counter for packets that we ignored for various reasons: 78 | self.packets_ignored = 0 79 | 80 | # Counter for all the processed packets: 81 | self.packets_processed = 0 82 | 83 | def ingest_pcap(self, dpkt_reader): 84 | """ 85 | ingest packet data from dpkt reader of pcap file 86 | into flows. 87 | Args: 88 | dpkt_reader: dpkt pcap reader object (dpkt.pcap.Reader) 89 | """ 90 | 91 | infoFrequency = self.config.get_value("infoFrequency") 92 | 93 | # Process each packet in the pcap: 94 | for timestamp, packet in dpkt_reader: 95 | # Instantiate an instance of Packet class with packet info: 96 | packet = Packet(self.logger, timestamp, packet, self.mode) 97 | if packet.ingested: 98 | # Update the flow with packet info: 99 | self.flow.update(packet) 100 | self.packets_processed += 1 101 | if self.packets_processed % infoFrequency == 0: 102 | self.logger.info("Already processed %d packets", self.packets_processed) 103 | else: 104 | self.packets_ignored += 1 105 | 106 | def ingest_packet(self, hdr, packet): 107 | """ 108 | ingest a packet from pcapy (live capture) into flows. 109 | """ 110 | # Get timestamp from header: 111 | sec, ms = hdr.getts() 112 | timestamp = sec + ms / 1000000 113 | 114 | # Instantiate an instance of Packet class with packet info: 115 | packet = Packet(self.logger, timestamp, packet, self.mode) 116 | 117 | infoFrequency = self.config.get_value("infoFrequency") 118 | 119 | if packet.ingested: 120 | # Update the flow with packet info: 121 | self.flow.update(packet) 122 | self.packets_processed += 1 123 | if self.packets_processed % infoFrequency == 0: 124 | self.logger.info("Already processed %d packets", self.packets_processed) 125 | else: 126 | self.packets_ignored += 1 127 | 128 | def write(self, file_name): 129 | """ 130 | Write all flow records out to CSV file 131 | """ 132 | with open(file_name, mode='w') as csv_file: 133 | if self.mode == 'u': 134 | # Unidirectional fields: 135 | fieldnames = ['src_ip', 'src_port', 'dst_ip', 'dst_port', 136 | 'proto', 'pktTotalCount', 'octetTotalCount', 137 | 'min_ps', 'max_ps', 'avg_ps', 'std_dev_ps', 138 | 'flowStart', 'flowEnd', 'flowDuration', 139 | 'min_piat', 'max_piat', 'avg_piat', 'std_dev_piat'] 140 | else: 141 | # Bidirectional fields: 142 | fieldnames = ['src_ip', 'src_port', 'dst_ip', 'dst_port', 143 | 'proto', 'pktTotalCount', 'octetTotalCount', 144 | 'min_ps', 'max_ps', 'avg_ps', 'std_dev_ps', 145 | 'flowStart', 'flowEnd', 'flowDuration', 146 | 'min_piat', 'max_piat', 'avg_piat', 'std_dev_piat', 147 | 'f_pktTotalCount', 'f_octetTotalCount', 148 | 'f_min_ps', 'f_max_ps', 'f_avg_ps', 'f_std_dev_ps', 149 | 'f_flowStart', 'f_flowEnd', 'f_flowDuration', 150 | 'f_min_piat', 'f_max_piat', 'f_avg_piat', 151 | 'f_std_dev_piat', 152 | 'b_pktTotalCount', 'b_octetTotalCount', 153 | 'b_min_ps', 'b_max_ps', 'b_avg_ps', 'b_std_dev_ps', 154 | 'b_flowStart', 'b_flowEnd', 'b_flowDuration', 155 | 'b_min_piat', 'b_max_piat', 'b_avg_piat', 156 | 'b_std_dev_piat' 157 | ] 158 | writer = csv.DictWriter(csv_file, fieldnames=fieldnames, extrasaction='ignore') 159 | # Write header: 160 | writer.writeheader() 161 | # Write archive flows as rows: 162 | for flow_dict in self.flow_archive.items(): 163 | writer.writerow(flow_dict[1]) 164 | # Write current flows as rows: 165 | for flow_dict in self.flow_cache.items(): 166 | writer.writerow(flow_dict[1]) 167 | 168 | def stats(self): 169 | """ 170 | Log the stats for flows 171 | """ 172 | self.logger.info("Result statistics") 173 | self.logger.info("-----------------") 174 | self.logger.info("Flow Records: %s", len(self.flow_cache)) 175 | self.logger.info("Additional Archived Flow Records: %s", len(self.flow_archive)) 176 | self.logger.info("Ignored Packets: %s", self.packets_ignored) 177 | self.logger.info("Processed Packets: %s", self.packets_processed) 178 | 179 | class Flow(object): 180 | """ 181 | An object that represents summary for an individual flow 182 | Designed to be instantiated once by the Flows class 183 | and set to different flow context by packet object 184 | """ 185 | def __init__(self, config, logger, flow_cache, flow_archive, mode): 186 | """ 187 | Initialise with references to logger and flow_cache dictionary 188 | and mode of operation. 189 | Parameters: 190 | logger: logger object 191 | flow_cache: reference to dictionary of flows 192 | mode: b (bidirectional) or u (unidirectional). 193 | """ 194 | self.logger = logger 195 | self.flow_cache = flow_cache 196 | self.flow_archive = flow_archive 197 | self.mode = mode 198 | # Get value from config: 199 | self.flow_expiration = config.get_value("flow_expiration") 200 | self.logger.info("Flows will expire after %s seconds of inactivity", self.flow_expiration) 201 | self.logger.debug("Flow object instantiated in mode=%s", mode) 202 | 203 | def update(self, packet): 204 | """ 205 | Add or update flow in in flow_cache dictionary 206 | """ 207 | if packet.flow_hash in self.flow_cache: 208 | # Found existing flow in dict, update it: 209 | if self._is_current_flow(packet, self.flow_cache[packet.flow_hash]): 210 | # Update standard flow parameters: 211 | self._update_found(packet) 212 | if self.mode == 'b': 213 | # Also update bidirectional flow parameters: 214 | self._update_found_bidir(packet) 215 | else: 216 | # Expired flow so archive it: 217 | self._archive_flow(packet) 218 | # Delete from dict: 219 | self.flow_cache.pop(packet.flow_hash, None) 220 | # Now create as a new flow based on current packet: 221 | self._create_new(packet) 222 | if self.mode == 'b': 223 | self._create_new_bidir(packet) 224 | else: 225 | # Flow doesn't exist yet, create it: 226 | self._create_new(packet) 227 | if self.mode == 'b': 228 | self._create_new_bidir(packet) 229 | 230 | def _update_found(self, packet): 231 | """ 232 | Update existing flow in flow_cache dictionary with standard 233 | (non-bidirectional) parameters 234 | """ 235 | flow_hash = packet.flow_hash 236 | flow_dict = self.flow_cache[flow_hash] 237 | # Store size of this packet: 238 | flow_dict['length'].append(packet.length) 239 | # Update the count of packets and octets: 240 | flow_dict['pktTotalCount'] += 1 241 | flow_dict['octetTotalCount'] += packet.length 242 | # Update the min/max/avg/std_dev of the packet sizes: 243 | flow_dict['min_ps'] = min(flow_dict['length']) 244 | flow_dict['max_ps'] = max(flow_dict['length']) 245 | flow_dict['avg_ps'] = flow_dict['octetTotalCount'] / flow_dict['pktTotalCount'] 246 | flow_dict['std_dev_ps'] = np.std(flow_dict['length']) 247 | # Store the timestamps of the newly captured packet: 248 | flow_dict['times'].append(packet.timestamp) 249 | # As we have now at least 2 packets in the flow, we can calculate the packet-inter-arrival-time. 250 | # We decrement the packet counter every single time, otherwise it would start from 2 251 | # The first piat will be the current timestamp minus the timestamp of the previous packet: 252 | flow_dict['iats'].append(flow_dict['times'][-1] \ 253 | - flow_dict['times'][-2]) 254 | # Update the flow end/duration (the start does not change) 255 | flow_dict['flowEnd'] = packet.timestamp 256 | flow_dict['flowDuration'] = (packet.timestamp - flow_dict['flowStart']) 257 | # at last update the min/max/avg/std_dev of packet-inter-arrival-times 258 | flow_dict['min_piat'] = min(flow_dict['iats']) 259 | flow_dict['max_piat'] = max(flow_dict['iats']) 260 | flow_dict['avg_piat'] = sum(flow_dict['iats']) / (flow_dict['pktTotalCount'] - 1) 261 | flow_dict['std_dev_piat'] = np.std(flow_dict['iats']) 262 | 263 | def _update_found_bidir(self, packet): 264 | """ 265 | Update existing flow in flow_cache dictionary with 266 | bidirectional parameters (separately to standard parameters) 267 | """ 268 | flow_hash = packet.flow_hash 269 | flow_dict = self.flow_cache[flow_hash] 270 | # Determine packet direction (f=forward, r=reverse): 271 | direction = self.packet_dir(packet, flow_dict) 272 | # Update keys dependant on the direction (f or b): 273 | if direction == 'f': 274 | # Forward (f) direction 275 | # Store size of this packet: 276 | flow_dict['f_length'].append(packet.length) 277 | # Update the count of packets and octets: 278 | flow_dict['f_pktTotalCount'] += 1 279 | flow_dict['f_octetTotalCount'] += packet.length 280 | # Update the min/max/avg/std_dev of the packet sizes: 281 | flow_dict['f_min_ps'] = min(flow_dict['f_length']) 282 | flow_dict['f_max_ps'] = max(flow_dict['f_length']) 283 | flow_dict['f_avg_ps'] = flow_dict['f_octetTotalCount'] / flow_dict['f_pktTotalCount'] 284 | flow_dict['f_std_dev_ps'] = np.std(flow_dict['f_length']) 285 | # Store the timestamps of the newly captured packets: 286 | flow_dict['f_times'].append(packet.timestamp) 287 | # Do inter-packet arrival time if have at least 2 packets: 288 | if (flow_dict['f_pktTotalCount'] > 1): 289 | flow_dict['f_iats'].append(flow_dict['f_times'][-1] \ 290 | - flow_dict['f_times'][-2]) 291 | # Update the flow end/duration (the start does not change) 292 | flow_dict['f_flowEnd'] = packet.timestamp 293 | flow_dict['f_flowDuration'] = (packet.timestamp - flow_dict['f_flowStart']) 294 | # at last update the min/max/avg/std_dev of packet-inter-arrival-times 295 | flow_dict['f_min_piat'] = min(flow_dict['f_iats']) 296 | flow_dict['f_max_piat'] = max(flow_dict['f_iats']) 297 | flow_dict['f_avg_piat'] = sum(flow_dict['f_iats']) / (flow_dict['f_pktTotalCount'] - 1) 298 | flow_dict['f_std_dev_piat'] = np.std(flow_dict['f_iats']) 299 | else: 300 | # Backward (b) direction 301 | # Note: this may be the first time we've see backwards dir packet. 302 | # Store size of this packet: 303 | flow_dict['b_length'].append(packet.length) 304 | # Update the count of packets and octets: 305 | flow_dict['b_pktTotalCount'] += 1 306 | flow_dict['b_octetTotalCount'] += packet.length 307 | # Update the min/max/avg/std_dev of the packet sizes: 308 | flow_dict['b_min_ps'] = min(flow_dict['b_length']) 309 | flow_dict['b_max_ps'] = max(flow_dict['b_length']) 310 | flow_dict['b_avg_ps'] = flow_dict['b_octetTotalCount'] / flow_dict['b_pktTotalCount'] 311 | flow_dict['b_std_dev_ps'] = np.std(flow_dict['b_length']) 312 | # Store the timestamps of the newly captured packets: 313 | flow_dict['b_times'].append(packet.timestamp) 314 | # Do inter-packet arrival time if have at least 2 packets: 315 | if (flow_dict['b_pktTotalCount'] < 2): 316 | # First time, so set some stuff: 317 | flow_dict['b_flowStart'] = packet.timestamp 318 | else: 319 | # Not first time: 320 | flow_dict['b_iats'].append(flow_dict['b_times'][-1] \ 321 | - flow_dict['b_times'][-2]) 322 | flow_dict['b_flowDuration'] = (packet.timestamp - flow_dict['b_flowStart']) 323 | # Update the min/max/avg/std_dev of packet-inter-arrival-times: 324 | flow_dict['b_min_piat'] = min(flow_dict['b_iats']) 325 | flow_dict['b_max_piat'] = max(flow_dict['b_iats']) 326 | flow_dict['b_avg_piat'] = sum(flow_dict['b_iats']) / (flow_dict['b_pktTotalCount'] - 1) 327 | flow_dict['b_std_dev_piat'] = np.std(flow_dict['b_iats']) 328 | # Update the flow end/duration (the start does not change): 329 | flow_dict['b_flowEnd'] = packet.timestamp 330 | 331 | def _create_new(self, packet): 332 | """ 333 | Create new flow in flow_cache dictionary with standard 334 | (non-bidirectional) parameters 335 | """ 336 | flow_hash = packet.flow_hash 337 | # Create new key etc in flow dict for this flow: 338 | # Initialise the new flow key: 339 | self.flow_cache[flow_hash] = {} 340 | flow_dict = self.flow_cache[flow_hash] 341 | # Store the flow parameters for packet header values: 342 | flow_dict['src_ip'] = packet.ip_src 343 | flow_dict['dst_ip'] = packet.ip_dst 344 | flow_dict['proto'] = packet.proto 345 | flow_dict['src_port'] = packet.tp_src 346 | flow_dict['dst_port'] = packet.tp_dst 347 | # Store the size of the first packet: 348 | flow_dict['length'] = [] 349 | flow_dict['length'].append(packet.length) 350 | # Store the packet size and number of octets: 351 | flow_dict['pktTotalCount'] = 1 352 | flow_dict['octetTotalCount'] = packet.length 353 | # Set the min/max/avg/std_dev of packet sizes 354 | # (in case there will be no more packets belonging to the flow): 355 | flow_dict['min_ps'] = packet.length 356 | flow_dict['max_ps'] = packet.length 357 | flow_dict['avg_ps'] = packet.length 358 | flow_dict['std_dev_ps'] = np.std(flow_dict['length']) 359 | # Store the timestamps of the packets: 360 | flow_dict['times'] = [] 361 | flow_dict['times'].append(packet.timestamp) 362 | flow_dict['iats'] = [] 363 | # store the flow start/end/duration 364 | flow_dict['flowStart'] = packet.timestamp 365 | flow_dict['flowEnd'] = packet.timestamp 366 | flow_dict['flowDuration'] = 0 367 | # Set the min/max/avg/std_dev of packet-inter arrival times 368 | # (in case there will be no more packets belonging to the flow): 369 | flow_dict['min_piat'] = 0 370 | flow_dict['max_piat'] = 0 371 | flow_dict['avg_piat'] = 0 372 | flow_dict['std_dev_piat'] = 0 373 | 374 | def _create_new_bidir(self, packet): 375 | """ 376 | Add bidir parameters to new flow in flow_cache dictionary 377 | """ 378 | flow_hash = packet.flow_hash 379 | flow_dict = self.flow_cache[flow_hash] 380 | # Set up keys in preparation: 381 | flow_dict['f_length'] = [] 382 | flow_dict['f_times'] = [] 383 | flow_dict['f_iats'] = [] 384 | flow_dict['b_length'] = [] 385 | flow_dict['b_times'] = [] 386 | flow_dict['b_iats'] = [] 387 | flow_dict['b_pktTotalCount'] = 0 388 | flow_dict['b_octetTotalCount'] = 0 389 | flow_dict['b_min_ps'] = 0 390 | flow_dict['b_max_ps'] = 0 391 | flow_dict['b_avg_ps'] = 0 392 | flow_dict['b_std_dev_ps'] = 0 393 | flow_dict['b_flowStart'] = 0 394 | flow_dict['b_flowEnd'] = 0 395 | flow_dict['b_flowDuration'] = 0 396 | flow_dict['b_min_piat'] = 0 397 | flow_dict['b_max_piat'] = 0 398 | flow_dict['b_avg_piat'] = 0 399 | flow_dict['b_std_dev_piat'] = 0 400 | # Determine packet direction (f=forward, r=reverse): 401 | direction = self.packet_dir(packet, flow_dict) 402 | # Update keys dependant on the direction (f or b): 403 | if direction == 'f': 404 | # Forward (f) direction 405 | # Store the size of the first packet: 406 | flow_dict['f_length'].append(packet.length) 407 | # Store the packet size and number of octets: 408 | flow_dict['f_pktTotalCount'] = 1 409 | flow_dict['f_octetTotalCount'] = packet.length 410 | # Set the min/max/avg/std_dev of packet sizes 411 | # (in case there will be no more packets belonging to the flow): 412 | flow_dict['f_min_ps'] = packet.length 413 | flow_dict['f_max_ps'] = packet.length 414 | flow_dict['f_avg_ps'] = packet.length 415 | flow_dict['f_std_dev_ps'] = np.std(flow_dict['f_length']) 416 | # Store the timestamps of the packets: 417 | flow_dict['f_times'].append(packet.timestamp) 418 | # store the flow start/end/duration 419 | flow_dict['f_flowStart'] = packet.timestamp 420 | flow_dict['f_flowEnd'] = packet.timestamp 421 | flow_dict['f_flowDuration'] = 0 422 | # Set the min/max/avg/std_dev of packet-inter arrival times 423 | # (in case there will be no more packets belonging to the flow): 424 | flow_dict['f_min_piat'] = 0 425 | flow_dict['f_max_piat'] = 0 426 | flow_dict['f_avg_piat'] = 0 427 | flow_dict['f_std_dev_piat'] = 0 428 | else: 429 | # Backward (b) direction 430 | # Store the size of the first packet: 431 | flow_dict['b_length'].append(packet.length) 432 | # Store the packet size and number of octets: 433 | flow_dict['b_pktTotalCount'] = 1 434 | flow_dict['b_octetTotalCount'] = packet.length 435 | # Set the min/max/avg/std_dev of packet sizes 436 | # (in case there will be no more packets belonging to the flow): 437 | flow_dict['b_min_ps'] = packet.length 438 | flow_dict['b_max_ps'] = packet.length 439 | flow_dict['b_avg_ps'] = packet.length 440 | flow_dict['b_std_dev_ps'] = np.std(flow_dict['b_length']) 441 | # Store the timestamps of the packets: 442 | flow_dict['b_times'].append(packet.timestamp) 443 | # store the flow start/end/duration 444 | flow_dict['b_flowStart'] = packet.timestamp 445 | flow_dict['b_flowEnd'] = packet.timestamp 446 | flow_dict['b_flowDuration'] = 0 447 | # Set the min/max/avg/std_dev of packet-inter arrival times 448 | # (in case there will be no more packets belonging to the flow): 449 | flow_dict['b_min_piat'] = 0 450 | flow_dict['b_max_piat'] = 0 451 | flow_dict['b_avg_piat'] = 0 452 | flow_dict['b_std_dev_piat'] = 0 453 | 454 | def _is_current_flow(self, packet, flow_dict): 455 | """ 456 | Check if flow is current or has expired. 457 | Only check if the flow hash is already known 458 | True = flow has not expired 459 | False = flow has expired, i.e. PIAT from previous packet 460 | in flow is greater than flow expiration threshold 461 | """ 462 | if flow_dict['iats']: 463 | if (packet.timestamp - flow_dict['times'][-1]) > self.flow_expiration: 464 | # Flow has expired: 465 | return False 466 | else: 467 | # Flow has not expired: 468 | return True 469 | elif flow_dict['pktTotalCount'] == 1: 470 | # Was only 1 packet so no PIAT so use packet timestamp 471 | if (packet.timestamp - flow_dict['flowStart']) > self.flow_expiration: 472 | # Flow has expired: 473 | return False 474 | else: 475 | # Flow has not expired: 476 | return True 477 | else: 478 | # No packets??? 479 | self.logger.warning("Strange condition...") 480 | return True 481 | 482 | def _archive_flow(self, packet): 483 | """ 484 | Move a flow record to archive dictionary, indexed by a 485 | longer more unique key 486 | """ 487 | flow_hash = packet.flow_hash 488 | flow_dict = self.flow_cache[flow_hash] 489 | start_timestamp = flow_dict['flowStart'] 490 | ip_src = flow_dict['src_ip'] 491 | ip_dst = flow_dict['dst_ip'] 492 | proto = flow_dict['proto'] 493 | tp_src = flow_dict['src_port'] 494 | tp_dst = flow_dict['dst_port'] 495 | # Create new more-specific hash key for archiving: 496 | if self.mode == 'b': 497 | if proto == 6 or proto == 17: 498 | # Generate a directional 6-tuple flow_hash: 499 | new_hash = nethash.hash_b6((ip_src, 500 | ip_dst, proto, tp_src, 501 | tp_dst, start_timestamp)) 502 | else: 503 | # Generate a directional 4-tuple flow_hash: 504 | new_hash = nethash.hash_b4((ip_src, 505 | ip_dst, proto, 506 | start_timestamp)) 507 | elif self.mode == 'u': 508 | if proto == 6 or proto == 17: 509 | # Generate a directional 6-tuple flow_hash: 510 | new_hash = nethash.hash_u6((ip_src, 511 | ip_dst, proto, tp_src, 512 | tp_dst, start_timestamp)) 513 | else: 514 | # Generate a directional 4-tuple flow_hash: 515 | new_hash = nethash.hash_u4((ip_src, 516 | ip_dst, proto, 517 | start_timestamp)) 518 | # Check key isn't already used in archive: 519 | if new_hash in self.flow_archive: 520 | self.logger.warning("archive duplicate flow key=%s", new_hash) 521 | return 522 | # Copy to flow archive: 523 | self.flow_archive[new_hash] = flow_dict 524 | 525 | # Delete from current flows: 526 | 527 | 528 | def packet_dir(self, packet, flow_dict): 529 | """ 530 | Determine packet direction (f=forward, r=reverse) 531 | """ 532 | if packet.ip_src == flow_dict['src_ip']: 533 | return 'f' 534 | elif packet.ip_src == flow_dict['dst_ip']: 535 | return 'b' 536 | else: 537 | self.logger.critical("Uh oh, something went wrong. Exiting") 538 | sys.exit() 539 | 540 | class Packet(object): 541 | """ 542 | An object that represents a packet 543 | """ 544 | def __init__(self, logger, timestamp, packet, mode): 545 | """ 546 | Parameters: 547 | timestamp: when packet was recorded 548 | packet: dpkt object 549 | mode: b (bidirectional) or u (unidirectional). Used for 550 | hash calculation 551 | """ 552 | self.logger = logger 553 | #*** Initialise packet variables: 554 | self.flow_hash = 0 555 | self.timestamp = timestamp 556 | # self.length = len(packet) 557 | self.ip_src = 0 558 | self.ip_dst = 0 559 | self.proto = 0 560 | self.tp_src = 0 561 | self.tp_dst = 0 562 | self.tp_flags = 0 563 | self.tp_seq_src = 0 564 | self.tp_seq_dst = 0 565 | self.ingested = False 566 | 567 | try: 568 | # Read packet into dpkt to parse headers: 569 | eth = dpkt.ethernet.Ethernet(packet) 570 | except: 571 | # Skip Packet if unable to parse: 572 | self.logger.error("failed to unpack packet, skipping...") 573 | return 574 | 575 | # Get the IP packet 576 | ip = eth.data 577 | 578 | # Get the length of IPv4 packet: 579 | if isinstance(eth.data, dpkt.ip.IP): 580 | self.length = ip.len 581 | # Get the length of IPv6 packet: 582 | elif isinstance(eth.data, dpkt.ip6.IP6): 583 | self.length = len(ip.data) 584 | # Ignore if non-IP packet: 585 | else: 586 | return 587 | 588 | # Handle IPv4 and IPv6: 589 | try: 590 | self.ip_src = socket.inet_ntop(socket.AF_INET, ip.src) 591 | self.ip_dst = socket.inet_ntop(socket.AF_INET, ip.dst) 592 | except ValueError: 593 | self.ip_src = socket.inet_ntop(socket.AF_INET6, ip.src) 594 | self.ip_dst = socket.inet_ntop(socket.AF_INET6, ip.dst) 595 | # Transport layer: 596 | self.proto = ip.p 597 | if ip.p == 6: 598 | # TCP 599 | tcp = ip.data 600 | self.tp_src = tcp.sport 601 | self.tp_dst = tcp.dport 602 | self.tp_flags = tcp.flags 603 | self.tp_seq_src = tcp.seq 604 | self.tp_seq_dst = tcp.ack 605 | elif ip.p == 17: 606 | # UDP 607 | udp = ip.data 608 | self.tp_src = udp.sport 609 | self.tp_dst = udp.dport 610 | self.tp_flags = "" 611 | self.tp_seq_src = 0 612 | self.tp_seq_dst = 0 613 | else: 614 | # Not a transport layer that we understand, keep going: 615 | pass 616 | 617 | if mode == 'b': 618 | if self.proto == 6 or self.proto == 17: 619 | # Generate a directional 5-tuple flow_hash: 620 | self.flow_hash = nethash.hash_b5((self.ip_src, 621 | self.ip_dst, self.proto, self.tp_src, 622 | self.tp_dst)) 623 | else: 624 | # Generate a directional 3-tuple flow_hash: 625 | self.flow_hash = nethash.hash_b3((self.ip_src, 626 | self.ip_dst, self.proto)) 627 | elif mode == 'u': 628 | if self.proto == 6 or self.proto == 17: 629 | # Generate a directional 5-tuple flow_hash: 630 | self.flow_hash = nethash.hash_u5((self.ip_src, 631 | self.ip_dst, self.proto, self.tp_src, 632 | self.tp_dst)) 633 | else: 634 | # Generate a directional 3-tuple flow_hash: 635 | self.flow_hash = nethash.hash_u3((self.ip_src, 636 | self.ip_dst, self.proto)) 637 | else: 638 | logger.critical("unsupported mode=%s", mode) 639 | sys.exit() 640 | # Yay, packet has been ingested: 641 | self.ingested = True 642 | 643 | def tcp_fin(self): 644 | """ 645 | Does the current packet have the TCP FIN flag set? 646 | """ 647 | return self.tp_flags & dpkt.tcp.TH_FIN != 0 648 | 649 | def tcp_syn(self): 650 | """ 651 | Does the current packet have the TCP SYN flag set? 652 | """ 653 | return self.tp_flags & dpkt.tcp.TH_SYN != 0 654 | 655 | def tcp_rst(self): 656 | """ 657 | Does the current packet have the TCP RST flag set? 658 | """ 659 | return self.tp_flags & dpkt.tcp.TH_RST != 0 660 | 661 | def tcp_psh(self): 662 | """ 663 | Does the current packet have the TCP PSH flag set? 664 | """ 665 | return self.tp_flags & dpkt.tcp.TH_PUSH != 0 666 | 667 | def tcp_ack(self): 668 | """ 669 | Does the current packet have the TCP ACK flag set? 670 | """ 671 | return self.tp_flags & dpkt.tcp.TH_ACK != 0 672 | 673 | def tcp_urg(self): 674 | """ 675 | Does the current packet have the TCP URG flag set? 676 | """ 677 | return self.tp_flags & dpkt.tcp.TH_URG != 0 678 | 679 | def tcp_ece(self): 680 | """ 681 | Does the current packet have the TCP ECE flag set? 682 | """ 683 | return self.tp_flags & dpkt.tcp.TH_ECE != 0 684 | 685 | def tcp_cwr(self): 686 | """ 687 | Does the current packet have the TCP CWR flag set? 688 | """ 689 | return self.tp_flags & dpkt.tcp.TH_CWR != 0 690 | 691 | --------------------------------------------------------------------------------