├── logstash.conf
├── LICENSE
├── .gitignore
├── README.md
└── tcpdump_aggregate.py


/logstash.conf:
--------------------------------------------------------------------------------
 1 | # udp tcpdump stream via 5141
 2 | input {
 3 |   udp {
 4 |     type => "tcpdump"
 5 |     port => 5141
 6 |   }
 7 | }
 8 | 
 9 | ########################
10 | #10-tag.conf
11 | ########################
12 | 
13 | filter {
14 |   if [type] == "tcpdump" {
15 |       mutate {
16 |         add_tag => ["tcpdump"]
17 |       }
18 |   }
19 | }
20 | 
21 | #########################
22 | #12-tcpdump.conf
23 | #########################
24 | 
25 | filter {
26 |   if "tcpdump" in [tags] {
27 |     json {
28 |         source => "message"
29 |     }
30 |   }
31 | }
32 | 
33 | #########################
34 | #30-outputs.conf
35 | #########################
36 | 
37 | output {
38 |   if "tcpdump" in [tags] {
39 |     elasticsearch {
40 |        hosts => ["elasticsearch:9200"]
41 |        index => "tcpdump-%{+YYYY.MM.dd}"
42 |     }
43 |     stdout { codec => rubydebug }
44 |   }
45 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Fritz Larco
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # elk-tcpdump
 2 | Code to aggregate tcpdump traffic and send to ELK (Elasticsearch-Logstach-Kibana)
 3 | 
 4 | This allows one to capture a host's network traffic statistics:
 5 |   - Source IP/Host/Port to Target IP/Host/Port
 6 |   - Aggregate count of packets over time
 7 |   - Aggregate length of packets over time
 8 | 
 9 | This is ideal to run on firewalls (such as PfSense) to monitor traffic with a service such as ELK (<https://www.elastic.co>)
10 | 
11 | # Instructions
12 | 
13 | This was only tested on Ubuntu 14.04 and FreeBSD. The following packages are required:
14 | - tcpdump
15 | - host
16 | 
17 | Then clone this repo:
18 | ```
19 | git clone https://github.com/flarco/elk-tcpdump.git
20 | cd elk-tcpdump
21 | ```
22 | 
23 | To start collecting tcpdump statistics, run the following on the host:
24 | ```shell
25 | tcpdump -U -i eth0 -nn -tttt port not 5141 | python tcpdump_aggregate.py "192.168.2.3:5141"
26 | 
27 | # this is an example on a firewall with 2 NICs
28 | tcpdump -U -i eth0 -i eth1 -nn -tttt port not 5141 | python tcpdump_aggregate.py "192.168.2.3:5141"
29 |  ```
30 | 
31 | In the example above, the tcpdump aggregates will be sent over to host '192.168.2.3' / port 5141 via UDP every interval specified (in the script - default 20 sec).
32 | 
33 | Here is an example of the received data on 192.168.2.3:5141:
34 | ```shell
35 | root@d54ea1457852:/tmp# netcat -ul 5141
36 | {"source_IP": "172.17.0.3", "source_PORT": 22, "target_IP": "172.17.0.1", "target_PORT": 54686, "type": "TCP", "count": 1, "length": 212, "source_HOST": "172.17.0.3", "target_HOST": "172.17.0.1", "time": "2016-09-08 23:27:40.090202"}
37 | {"source_IP": "172.17.0.1", "source_PORT": 54692, "target_IP": "172.17.0.3", "target_PORT": 22, "type": "TCP", "count": 24, "length": 0, "source_HOST": "NXDOMAIN", "target_HOST": "NXDOMAIN", "time": "2016-09-08 23:28:29.073292"}
38 | {"source_IP": "172.17.0.1", "source_PORT": 54690, "target_IP": "172.17.0.3", "target_PORT": 22, "type": "TCP", "count": 1, "length": 52, "source_HOST": "172.17.0.1", "target_HOST": "172.17.0.3", "time": "2016-09-08 23:28:29.073292"}
39 | {"source_IP": "172.17.0.3", "source_PORT": 22, "target_IP": "172.17.0.1", "target_PORT": 54690, "type": "TCP", "count": 1, "length": 0, "source_HOST": "172.17.0.3", "target_HOST": "172.17.0.1", "time": "2016-09-08 23:28:29.073292"}
40 | {"source_IP": "172.17.0.3", "source_PORT": 22, "target_IP": "172.17.0.1", "target_PORT": 54692, "type": "TCP", "count": 24, "length": 3888, "source_HOST": "172.17.0.3", "target_HOST": "172.17.0.1", "time": "2016-09-08 23:28:29.073292"}
41 | {"source_IP": "172.17.0.1", "source_PORT": 54686, "target_IP": "172.17.0.3", "target_PORT": 22, "type": "TCP", "count": 1, "length": 0, "source_HOST": "172.17.0.1", "target_HOST": "172.17.0.3", "time": "2016-09-08 23:28:29.073292"}
42 | ```
43 | 
44 | The `source_HOST` / `target_HOST` fields are resolved by trial (using the linux `host` command), then cached. If no hostname is returned, the IP is stored instead.
45 | 
46 | With this process, we can use Logstash to parse this data and ingest into Elasticsearch, then view in Kibana.
47 | 
48 | We can use the `logstash.conf` file for this. Make sure to [create the index-template](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) prior to ingesting in Elasticsearch!
49 | 
50 | ------
51 | 
52 | Here is an example of Kibana using this data:
53 | 
54 | ![image](https://cloud.githubusercontent.com/assets/7671010/18371213/895bf418-7600-11e6-846d-a70a67efdd13.png)


--------------------------------------------------------------------------------
/tcpdump_aggregate.py:
--------------------------------------------------------------------------------
  1 | import sys, os, re, time, datetime, threading, socket, json
  2 | from subprocess import Popen, PIPE, STDOUT
  3 | from collections import namedtuple, OrderedDict
  4 | import linecache, traceback
  5 | 
  6 | 
  7 | args = sys.argv[1:]
  8 | utc_delta = datetime.datetime.utcnow() - datetime.datetime.now()
  9 | 
 10 | if len(args) < 1:
 11 |   print('USAGE:                python tcpdump_aggregate.py "TARGET_IP:UDP_PORT"')
 12 |   print('EXAMPLE:              python tcpdump_aggregate.py "127.0.0.1:5141"')
 13 |   print('EXAMPLE with tcpdump: tcpdump -U -i eth0 -nn -tttt port not 5141 | python tcpdump_aggregate.py "127.0.0.1:5141"')
 14 |   sys.exit(1)
 15 | 
 16 | 
 17 | UDP_IP = args[0].split(':')[0]
 18 | UDP_PORT = int(args[0].split(':')[1])
 19 | SEC_INTERVAL = 20 # UDP send / Aggregate reset interval
 20 | 
 21 | 
 22 | def get_exception_message(append_message = ''):
 23 |   """Obtain and return exception message"""
 24 |   
 25 |   exc_type, exc_obj, tb = sys.exc_info()
 26 |   f = tb.tb_frame
 27 |   lineno = tb.tb_lineno
 28 |   filename = f.f_code.co_filename
 29 |   linecache.checkcache(filename)
 30 |   line = linecache.getline(filename, lineno, f.f_globals)
 31 |   message = '-'*70 + '\n' +'EXCEPTION IN ({}, LINE {} "{}"): {} \n---\n{}'.format(filename, lineno, line.strip(), exc_obj, traceback.format_exc()) + '\n' + append_message
 32 |   return message
 33 | 
 34 | 
 35 | def run_command(command, show_output = False, input = ''):
 36 |   """Run a shell command and return the output"""
 37 |   
 38 |   process = Popen(command, shell=True, stdout=PIPE, stderr=STDOUT)
 39 |   
 40 |   stdout_array = []
 41 |   
 42 |   for line in iter(process.stdout.readline, ''):
 43 |     if show_output: sys.stdout.write(line)
 44 |     stdout_array.append(line.rstrip())
 45 |   
 46 |   process.wait()
 47 |   stdout = '\n'.join(stdout_array) + process.stdout.read()
 48 |   
 49 |   return stdout
 50 | 
 51 | def parse_ip_port(address):
 52 |   """Parse the IP and PORT from address string"""
 53 |   if ':' in address:
 54 |     # IP6
 55 |     deli = ":"
 56 |     
 57 |     if '.' in address: # there is PORT
 58 |       IP = address.split('.')[0]
 59 |       PORT = int(address.split('.')[-1])
 60 |     else: # there is no PORT
 61 |       IP = address
 62 |       PORT = ''
 63 |   else:
 64 |     # IP4
 65 |     deli = "."
 66 |     arr = address.split(deli)
 67 |     if len(arr) == 5:
 68 |       IP = deli.join(arr[:-1])
 69 |       PORT = int(arr[-1])
 70 |     else:
 71 |       IP = address
 72 |       PORT = ''
 73 |   
 74 |   return (IP, PORT)
 75 | 
 76 | 
 77 | host_cache = {}  # cache of IP to host mapping
 78 | threads = {}  # One thread per IP
 79 | thread_lock = threading.Lock()
 80 | command_lock = threading.Lock()
 81 | 
 82 | def get_host(IP):
 83 |   """Attempt to obtain the hostname of the IP, starts a thread."""
 84 |   global host_cache, threads, thread_lock
 85 |   with thread_lock:
 86 |     if IP in host_cache:
 87 |       return host_cache[IP]
 88 |   
 89 |   # kick off thread
 90 |   if not IP in threads:
 91 |     threads[IP] = threading.Thread(target=get_host_process, args = (IP,))
 92 |     threads[IP].start()
 93 |   
 94 |   return IP
 95 | 
 96 | 
 97 | def get_host_process(IP):
 98 |   """Tread to obtain the host name of the IP"""
 99 |   global host_cache, thread_lock
100 |   with command_lock:
101 |     # print('Getting host for ' + IP)
102 |     host = run_command('host ' + IP).split()[-1][:-1] # remove dot at the end
103 |   if 'XDOMAIN' in host:
104 |     host = 'NXDOMAIN'
105 |   with thread_lock:
106 |     host_cache[IP] = host
107 | 
108 | 
109 | def parse_packet(line):
110 |   """
111 |   Parse one packet and return a dictionary record with the following keys:
112 |   source_IP, source_PORT, source_HOST,
113 |   target_IP, target_PORT, target_HOST,
114 |   length, type, flags, seq, ask, win
115 |   """
116 |   if not ', length ' in line: return
117 |   if not '>' in line: return
118 |   if 'ff:ff:ff:ff:ff:ff' in line: return
119 |   
120 |   packet = {}
121 |   
122 |   arr1 = line.split(' > ')
123 |   part1 = arr1[0] # 2015-10-29 12:53:44.030124 IP 192.168.1.3.36409
124 |   part1_arr = part1.split()
125 |   
126 |   arr2 = arr1[1].split(', length ')
127 |   part2 = arr2[0] #  192.168.1.193.4849: UDP
128 |   part2_arr = part2.split()
129 |   
130 |   source = part1_arr[-1]
131 |   target = part2_arr[0][:-1]
132 |   
133 |   part3 = ' '.join(part2_arr[1:])
134 |   part3_arr = part3.split(',')
135 |   
136 |   packet['length'] = int(re.sub("[^0-9]", "", arr2[1].split(":")[0].split()[0]))
137 |   
138 |   packet['time'] = datetime.datetime.strptime(' '.join(part1_arr[0:2]), '%Y-%m-%d %H:%M:%S.%f')
139 |   
140 |   if part3 == 'UDP':
141 |     packet['type'] = 'UDP'
142 |   else:
143 |     packet['type'] = 'TCP'
144 |   
145 |   # convert to UTC time for MongoDB
146 |   packet['time'] = packet['time'] + datetime.timedelta(seconds=round(utc_delta.total_seconds()))
147 |   
148 |   
149 |   (packet['source_IP'], packet['source_PORT']) = parse_ip_port(source)
150 |   (packet['target_IP'], packet['target_PORT']) = parse_ip_port(target)
151 |   
152 |   packet['source_HOST'] = get_host(packet['source_IP'])
153 |     
154 |   packet['target_HOST'] = get_host(packet['target_IP'])
155 |   
156 |   for k in 'flags seq ack win'.split():
157 |     packet[k] = ''
158 |   
159 |   for p in part3_arr:
160 |     p = p.strip()
161 |     if p.startswith('Flags'): packet['flags'] = p.split()[-1]
162 |     if p.startswith('seq'): packet['seq'] = p.split()[-1]
163 |     if p.startswith('ack'): packet['ack'] = int(p.split()[-1])
164 |     if p.startswith('win'): packet['win'] = int(p.split()[-1])
165 |   
166 |   return packet
167 | 
168 |   
169 | class Packet_Aggregate:
170 |   """An Aggregate of packets with the length/count fields summed.
171 |   Also has the function to send the aggregate via UDP/IP
172 |   to a Logstash destination"""
173 |   
174 |   UPD_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
175 |   combo_keys = 'source_IP source_PORT target_IP target_PORT type'.split()
176 |   Combo = namedtuple('Combo', combo_keys)
177 |   
178 |   def __init__(self):
179 |     self.combo_count = {}
180 |     self.combo_length_sum = {}
181 |     self.combo_host = {}
182 |   
183 |   def ingest(self, packet):
184 |     combo_key = self.Combo(*[packet[key] for key in self.combo_keys])
185 |     
186 |     length = packet['length']
187 |     self.combo_count[combo_key] = self.combo_count.get(combo_key, 0) + 1
188 |     self.combo_length_sum[combo_key] = self.combo_length_sum.get(combo_key, 0) + length
189 |     
190 |     self.combo_host[combo_key] = dict(
191 |       source_HOST = packet['source_HOST'],
192 |       target_HOST = packet['target_HOST'],
193 |     )
194 |   
195 |   def send_udp(self, server, port):
196 |     time = str(datetime.datetime.utcnow())
197 |     
198 |     for combo, count in self.combo_count.iteritems():
199 |       combo_record = combo._asdict()
200 |       combo_record['count'] = count
201 |       combo_record['length'] = self.combo_length_sum[combo]
202 |       
203 |       for k,v in self.combo_host[combo].iteritems():
204 |         combo_record[k] = v
205 |       
206 |       combo_record['time'] = time
207 |       
208 |       # print(str(combo_record))
209 |       
210 |       self.UPD_sock.sendto(json.dumps(combo_record), (server, port))
211 |   
212 |   
213 | def main_buffer():
214 |   global line_count, threads
215 |   
216 |   
217 |   line_count = 0
218 |   all_packets = []
219 |   update_time_marker = lambda: datetime.datetime.now() + datetime.timedelta(seconds=SEC_INTERVAL)
220 |   
221 |   time_marker = update_time_marker()
222 |   aggregate = Packet_Aggregate()
223 |   
224 |   UPD_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP
225 | 
226 |   print("Running packet capture live with Aggregate...")
227 | 
228 |   for line in sys.stdin:
229 |     line_count += 1
230 |     packet = parse_packet(line)
231 |     if not packet:
232 |       continue
233 |     
234 |     aggregate.ingest(packet)
235 |     
236 |     if datetime.datetime.now() > time_marker:
237 |       aggregate.send_udp(UDP_IP, UDP_PORT)
238 |       aggregate = Packet_Aggregate()
239 |       time_marker = update_time_marker()
240 |     
241 |   
242 | try:
243 |   global line_count
244 |   line_count = 0
245 |   main_buffer()
246 |   
247 | except KeyboardInterrupt:
248 |     print("Exiting...")
249 | except:
250 |   print(get_exception_message())
251 | 
252 | print('read ' + str(line_count) + ' lines')
253 | 


--------------------------------------------------------------------------------