├── logstash.conf ├── LICENSE ├── .gitignore ├── README.md └── tcpdump_aggregate.py /logstash.conf: -------------------------------------------------------------------------------- 1 | # udp tcpdump stream via 5141 2 | input { 3 | udp { 4 | type => "tcpdump" 5 | port => 5141 6 | } 7 | } 8 | 9 | ######################## 10 | #10-tag.conf 11 | ######################## 12 | 13 | filter { 14 | if [type] == "tcpdump" { 15 | mutate { 16 | add_tag => ["tcpdump"] 17 | } 18 | } 19 | } 20 | 21 | ######################### 22 | #12-tcpdump.conf 23 | ######################### 24 | 25 | filter { 26 | if "tcpdump" in [tags] { 27 | json { 28 | source => "message" 29 | } 30 | } 31 | } 32 | 33 | ######################### 34 | #30-outputs.conf 35 | ######################### 36 | 37 | output { 38 | if "tcpdump" in [tags] { 39 | elasticsearch { 40 | hosts => ["elasticsearch:9200"] 41 | index => "tcpdump-%{+YYYY.MM.dd}" 42 | } 43 | stdout { codec => rubydebug } 44 | } 45 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Fritz Larco 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elk-tcpdump 2 | Code to aggregate tcpdump traffic and send to ELK (Elasticsearch-Logstach-Kibana) 3 | 4 | This allows one to capture a host's network traffic statistics: 5 | - Source IP/Host/Port to Target IP/Host/Port 6 | - Aggregate count of packets over time 7 | - Aggregate length of packets over time 8 | 9 | This is ideal to run on firewalls (such as PfSense) to monitor traffic with a service such as ELK () 10 | 11 | # Instructions 12 | 13 | This was only tested on Ubuntu 14.04 and FreeBSD. The following packages are required: 14 | - tcpdump 15 | - host 16 | 17 | Then clone this repo: 18 | ``` 19 | git clone https://github.com/flarco/elk-tcpdump.git 20 | cd elk-tcpdump 21 | ``` 22 | 23 | To start collecting tcpdump statistics, run the following on the host: 24 | ```shell 25 | tcpdump -U -i eth0 -nn -tttt port not 5141 | python tcpdump_aggregate.py "192.168.2.3:5141" 26 | 27 | # this is an example on a firewall with 2 NICs 28 | tcpdump -U -i eth0 -i eth1 -nn -tttt port not 5141 | python tcpdump_aggregate.py "192.168.2.3:5141" 29 | ``` 30 | 31 | In the example above, the tcpdump aggregates will be sent over to host '192.168.2.3' / port 5141 via UDP every interval specified (in the script - default 20 sec). 32 | 33 | Here is an example of the received data on 192.168.2.3:5141: 34 | ```shell 35 | root@d54ea1457852:/tmp# netcat -ul 5141 36 | {"source_IP": "172.17.0.3", "source_PORT": 22, "target_IP": "172.17.0.1", "target_PORT": 54686, "type": "TCP", "count": 1, "length": 212, "source_HOST": "172.17.0.3", "target_HOST": "172.17.0.1", "time": "2016-09-08 23:27:40.090202"} 37 | {"source_IP": "172.17.0.1", "source_PORT": 54692, "target_IP": "172.17.0.3", "target_PORT": 22, "type": "TCP", "count": 24, "length": 0, "source_HOST": "NXDOMAIN", "target_HOST": "NXDOMAIN", "time": "2016-09-08 23:28:29.073292"} 38 | {"source_IP": "172.17.0.1", "source_PORT": 54690, "target_IP": "172.17.0.3", "target_PORT": 22, "type": "TCP", "count": 1, "length": 52, "source_HOST": "172.17.0.1", "target_HOST": "172.17.0.3", "time": "2016-09-08 23:28:29.073292"} 39 | {"source_IP": "172.17.0.3", "source_PORT": 22, "target_IP": "172.17.0.1", "target_PORT": 54690, "type": "TCP", "count": 1, "length": 0, "source_HOST": "172.17.0.3", "target_HOST": "172.17.0.1", "time": "2016-09-08 23:28:29.073292"} 40 | {"source_IP": "172.17.0.3", "source_PORT": 22, "target_IP": "172.17.0.1", "target_PORT": 54692, "type": "TCP", "count": 24, "length": 3888, "source_HOST": "172.17.0.3", "target_HOST": "172.17.0.1", "time": "2016-09-08 23:28:29.073292"} 41 | {"source_IP": "172.17.0.1", "source_PORT": 54686, "target_IP": "172.17.0.3", "target_PORT": 22, "type": "TCP", "count": 1, "length": 0, "source_HOST": "172.17.0.1", "target_HOST": "172.17.0.3", "time": "2016-09-08 23:28:29.073292"} 42 | ``` 43 | 44 | The `source_HOST` / `target_HOST` fields are resolved by trial (using the linux `host` command), then cached. If no hostname is returned, the IP is stored instead. 45 | 46 | With this process, we can use Logstash to parse this data and ingest into Elasticsearch, then view in Kibana. 47 | 48 | We can use the `logstash.conf` file for this. Make sure to [create the index-template](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) prior to ingesting in Elasticsearch! 49 | 50 | ------ 51 | 52 | Here is an example of Kibana using this data: 53 | 54 | ![image](https://cloud.githubusercontent.com/assets/7671010/18371213/895bf418-7600-11e6-846d-a70a67efdd13.png) -------------------------------------------------------------------------------- /tcpdump_aggregate.py: -------------------------------------------------------------------------------- 1 | import sys, os, re, time, datetime, threading, socket, json 2 | from subprocess import Popen, PIPE, STDOUT 3 | from collections import namedtuple, OrderedDict 4 | import linecache, traceback 5 | 6 | 7 | args = sys.argv[1:] 8 | utc_delta = datetime.datetime.utcnow() - datetime.datetime.now() 9 | 10 | if len(args) < 1: 11 | print('USAGE: python tcpdump_aggregate.py "TARGET_IP:UDP_PORT"') 12 | print('EXAMPLE: python tcpdump_aggregate.py "127.0.0.1:5141"') 13 | print('EXAMPLE with tcpdump: tcpdump -U -i eth0 -nn -tttt port not 5141 | python tcpdump_aggregate.py "127.0.0.1:5141"') 14 | sys.exit(1) 15 | 16 | 17 | UDP_IP = args[0].split(':')[0] 18 | UDP_PORT = int(args[0].split(':')[1]) 19 | SEC_INTERVAL = 20 # UDP send / Aggregate reset interval 20 | 21 | 22 | def get_exception_message(append_message = ''): 23 | """Obtain and return exception message""" 24 | 25 | exc_type, exc_obj, tb = sys.exc_info() 26 | f = tb.tb_frame 27 | lineno = tb.tb_lineno 28 | filename = f.f_code.co_filename 29 | linecache.checkcache(filename) 30 | line = linecache.getline(filename, lineno, f.f_globals) 31 | message = '-'*70 + '\n' +'EXCEPTION IN ({}, LINE {} "{}"): {} \n---\n{}'.format(filename, lineno, line.strip(), exc_obj, traceback.format_exc()) + '\n' + append_message 32 | return message 33 | 34 | 35 | def run_command(command, show_output = False, input = ''): 36 | """Run a shell command and return the output""" 37 | 38 | process = Popen(command, shell=True, stdout=PIPE, stderr=STDOUT) 39 | 40 | stdout_array = [] 41 | 42 | for line in iter(process.stdout.readline, ''): 43 | if show_output: sys.stdout.write(line) 44 | stdout_array.append(line.rstrip()) 45 | 46 | process.wait() 47 | stdout = '\n'.join(stdout_array) + process.stdout.read() 48 | 49 | return stdout 50 | 51 | def parse_ip_port(address): 52 | """Parse the IP and PORT from address string""" 53 | if ':' in address: 54 | # IP6 55 | deli = ":" 56 | 57 | if '.' in address: # there is PORT 58 | IP = address.split('.')[0] 59 | PORT = int(address.split('.')[-1]) 60 | else: # there is no PORT 61 | IP = address 62 | PORT = '' 63 | else: 64 | # IP4 65 | deli = "." 66 | arr = address.split(deli) 67 | if len(arr) == 5: 68 | IP = deli.join(arr[:-1]) 69 | PORT = int(arr[-1]) 70 | else: 71 | IP = address 72 | PORT = '' 73 | 74 | return (IP, PORT) 75 | 76 | 77 | host_cache = {} # cache of IP to host mapping 78 | threads = {} # One thread per IP 79 | thread_lock = threading.Lock() 80 | command_lock = threading.Lock() 81 | 82 | def get_host(IP): 83 | """Attempt to obtain the hostname of the IP, starts a thread.""" 84 | global host_cache, threads, thread_lock 85 | with thread_lock: 86 | if IP in host_cache: 87 | return host_cache[IP] 88 | 89 | # kick off thread 90 | if not IP in threads: 91 | threads[IP] = threading.Thread(target=get_host_process, args = (IP,)) 92 | threads[IP].start() 93 | 94 | return IP 95 | 96 | 97 | def get_host_process(IP): 98 | """Tread to obtain the host name of the IP""" 99 | global host_cache, thread_lock 100 | with command_lock: 101 | # print('Getting host for ' + IP) 102 | host = run_command('host ' + IP).split()[-1][:-1] # remove dot at the end 103 | if 'XDOMAIN' in host: 104 | host = 'NXDOMAIN' 105 | with thread_lock: 106 | host_cache[IP] = host 107 | 108 | 109 | def parse_packet(line): 110 | """ 111 | Parse one packet and return a dictionary record with the following keys: 112 | source_IP, source_PORT, source_HOST, 113 | target_IP, target_PORT, target_HOST, 114 | length, type, flags, seq, ask, win 115 | """ 116 | if not ', length ' in line: return 117 | if not '>' in line: return 118 | if 'ff:ff:ff:ff:ff:ff' in line: return 119 | 120 | packet = {} 121 | 122 | arr1 = line.split(' > ') 123 | part1 = arr1[0] # 2015-10-29 12:53:44.030124 IP 192.168.1.3.36409 124 | part1_arr = part1.split() 125 | 126 | arr2 = arr1[1].split(', length ') 127 | part2 = arr2[0] # 192.168.1.193.4849: UDP 128 | part2_arr = part2.split() 129 | 130 | source = part1_arr[-1] 131 | target = part2_arr[0][:-1] 132 | 133 | part3 = ' '.join(part2_arr[1:]) 134 | part3_arr = part3.split(',') 135 | 136 | packet['length'] = int(re.sub("[^0-9]", "", arr2[1].split(":")[0].split()[0])) 137 | 138 | packet['time'] = datetime.datetime.strptime(' '.join(part1_arr[0:2]), '%Y-%m-%d %H:%M:%S.%f') 139 | 140 | if part3 == 'UDP': 141 | packet['type'] = 'UDP' 142 | else: 143 | packet['type'] = 'TCP' 144 | 145 | # convert to UTC time for MongoDB 146 | packet['time'] = packet['time'] + datetime.timedelta(seconds=round(utc_delta.total_seconds())) 147 | 148 | 149 | (packet['source_IP'], packet['source_PORT']) = parse_ip_port(source) 150 | (packet['target_IP'], packet['target_PORT']) = parse_ip_port(target) 151 | 152 | packet['source_HOST'] = get_host(packet['source_IP']) 153 | 154 | packet['target_HOST'] = get_host(packet['target_IP']) 155 | 156 | for k in 'flags seq ack win'.split(): 157 | packet[k] = '' 158 | 159 | for p in part3_arr: 160 | p = p.strip() 161 | if p.startswith('Flags'): packet['flags'] = p.split()[-1] 162 | if p.startswith('seq'): packet['seq'] = p.split()[-1] 163 | if p.startswith('ack'): packet['ack'] = int(p.split()[-1]) 164 | if p.startswith('win'): packet['win'] = int(p.split()[-1]) 165 | 166 | return packet 167 | 168 | 169 | class Packet_Aggregate: 170 | """An Aggregate of packets with the length/count fields summed. 171 | Also has the function to send the aggregate via UDP/IP 172 | to a Logstash destination""" 173 | 174 | UPD_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 175 | combo_keys = 'source_IP source_PORT target_IP target_PORT type'.split() 176 | Combo = namedtuple('Combo', combo_keys) 177 | 178 | def __init__(self): 179 | self.combo_count = {} 180 | self.combo_length_sum = {} 181 | self.combo_host = {} 182 | 183 | def ingest(self, packet): 184 | combo_key = self.Combo(*[packet[key] for key in self.combo_keys]) 185 | 186 | length = packet['length'] 187 | self.combo_count[combo_key] = self.combo_count.get(combo_key, 0) + 1 188 | self.combo_length_sum[combo_key] = self.combo_length_sum.get(combo_key, 0) + length 189 | 190 | self.combo_host[combo_key] = dict( 191 | source_HOST = packet['source_HOST'], 192 | target_HOST = packet['target_HOST'], 193 | ) 194 | 195 | def send_udp(self, server, port): 196 | time = str(datetime.datetime.utcnow()) 197 | 198 | for combo, count in self.combo_count.iteritems(): 199 | combo_record = combo._asdict() 200 | combo_record['count'] = count 201 | combo_record['length'] = self.combo_length_sum[combo] 202 | 203 | for k,v in self.combo_host[combo].iteritems(): 204 | combo_record[k] = v 205 | 206 | combo_record['time'] = time 207 | 208 | # print(str(combo_record)) 209 | 210 | self.UPD_sock.sendto(json.dumps(combo_record), (server, port)) 211 | 212 | 213 | def main_buffer(): 214 | global line_count, threads 215 | 216 | 217 | line_count = 0 218 | all_packets = [] 219 | update_time_marker = lambda: datetime.datetime.now() + datetime.timedelta(seconds=SEC_INTERVAL) 220 | 221 | time_marker = update_time_marker() 222 | aggregate = Packet_Aggregate() 223 | 224 | UPD_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP 225 | 226 | print("Running packet capture live with Aggregate...") 227 | 228 | for line in sys.stdin: 229 | line_count += 1 230 | packet = parse_packet(line) 231 | if not packet: 232 | continue 233 | 234 | aggregate.ingest(packet) 235 | 236 | if datetime.datetime.now() > time_marker: 237 | aggregate.send_udp(UDP_IP, UDP_PORT) 238 | aggregate = Packet_Aggregate() 239 | time_marker = update_time_marker() 240 | 241 | 242 | try: 243 | global line_count 244 | line_count = 0 245 | main_buffer() 246 | 247 | except KeyboardInterrupt: 248 | print("Exiting...") 249 | except: 250 | print(get_exception_message()) 251 | 252 | print('read ' + str(line_count) + ' lines') 253 | --------------------------------------------------------------------------------