├── netflow_4M.pcap ├── for_later └── sflow.pcap ├── Dockerfile ├── LICENSE ├── overflowd.py └── README.md /netflow_4M.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dakami/overflowd/HEAD/netflow_4M.pcap -------------------------------------------------------------------------------- /for_later/sflow.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dakami/overflowd/HEAD/for_later/sflow.pcap -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7.12 2 | MAINTAINER Karl Newell 3 | 4 | RUN pip install dpkt PyNacl 5 | COPY overflowd.py /overflowd/overflowd.py 6 | WORKDIR /workdir 7 | ENTRYPOINT ["/overflowd/overflowd.py"] 8 | CMD ["-h"] 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2016, dakami 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /overflowd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import struct, sys 4 | from random import SystemRandom 5 | 6 | sr = SystemRandom() 7 | def r(): return sr.random() 8 | import dpkt, socket 9 | from socket import inet_ntoa 10 | 11 | import nacl.encoding 12 | import nacl.signing 13 | 14 | import json 15 | from base64 import b64encode 16 | 17 | import time 18 | import optparse 19 | opts = None 20 | remainder = None 21 | 22 | 23 | signing_key = nacl.signing.SigningKey.generate() 24 | verify_key_base64 = signing_key.verify_key.encode(encoder=nacl.encoding.HexEncoder) 25 | 26 | def parsenf(buf): 27 | SIZE_OF_HEADER = 24 28 | SIZE_OF_RECORD = 48 29 | 30 | (version, count) = struct.unpack('!HH',buf[0:4]) 31 | if version != 5: 32 | return [] 33 | 34 | if count <= 0 or count >= 1000: 35 | return [] 36 | 37 | uptime = socket.ntohl(struct.unpack('I',buf[4:8])[0]) 38 | epochseconds = socket.ntohl(struct.unpack('I',buf[8:12])[0]) 39 | 40 | seen=[] 41 | for i in range(0, count): 42 | try: 43 | base = SIZE_OF_HEADER+(i*SIZE_OF_RECORD) 44 | 45 | data = struct.unpack('!HHIIIIHHBBBBHH',buf[base+12:base+44]) 46 | 47 | nfdata = {} 48 | nfdata['saddr'] = inet_ntoa(buf[base+0:base+4]) 49 | nfdata['daddr'] = inet_ntoa(buf[base+4:base+8]) 50 | #nfdata['naddr'] = inet_ntoa(buf[base+8:base+12]) 51 | c=0 52 | #nfdata['inputidx'] = data[c]; 53 | c+=1; 54 | #nfdata['outputidx'] = data[c]; 55 | c+=1; 56 | nfdata['pcount'] = data[c]; c+=1; 57 | nfdata['bcount'] = data[c]; c+=1; 58 | nfdata['stime'] = data[c]; c+=1; 59 | nfdata['etime'] = data[c]; c+=1; 60 | nfdata['sport'] = data[c]; c+=1; 61 | nfdata['dport'] = data[c]; c+=2; 62 | nfdata['flags'] = data[c]; c+=1; 63 | nfdata['protocol'] = data[c]; c+=1; 64 | nfdata['tos'] = data[c]; c+=1; 65 | #nfdata['sasid'] = data[c]; 66 | c+=1; 67 | #nfdata['dasid'] = data[c]; 68 | c+=1; 69 | seen.append(nfdata) 70 | except Exception as e: 71 | #raise e 72 | pass 73 | 74 | return seen 75 | #print "%s:%s -> %s:%s" % (nfdata['saddr'],nfdata['sport'],nfdata['daddr'],nfdata['dport']) 76 | 77 | def read_from_udp(): 78 | port = 7777 79 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 80 | s.bind(("", port)) 81 | while 1: 82 | data, addr = s.recvfrom(1500) 83 | try: 84 | seen_flows = parsenf(data) 85 | for sf in seen_flows: 86 | maybe_report(sf) 87 | except: 88 | raise 89 | 90 | 91 | def read_from_pcap(): 92 | """Open up a test pcap file and print out the packets""" 93 | with open(opts.pcapfile, 'rb') as f: 94 | pcap = dpkt.pcap.Reader(f) 95 | for timestamp, buf in pcap: 96 | try: 97 | seen_flows = parsenf(dpkt.ethernet.Ethernet(buf).ip.udp.data) 98 | for sf in seen_flows: 99 | maybe_report(sf) 100 | except: 101 | raise 102 | 103 | def maybe_report(sf): 104 | if(r() / float(sf['pcount']) > opts.rate): return 105 | report = {} 106 | flowdata = {} 107 | flowdata['sourcetype']={"type": "Netflow", "version": 5} 108 | flowdata['data']=sf 109 | report['flowdata']=flowdata 110 | contact = {} 111 | contact['email'] = "dan@whiteops.com" 112 | contact['identity'] = "White Ops" 113 | metadata = {} 114 | metadata['class'] = "INFORMATIONAL" 115 | metadata['info'] = "FLOWSEEN" 116 | metadata['time'] = time.time() 117 | report['metadata'] = metadata 118 | 119 | signature = {} 120 | signature['key'] = verify_key_base64 121 | signature['signature'] = b64encode(signing_key.sign(json.dumps(report))) 122 | report['signature']=signature 123 | 124 | notify(report) 125 | 126 | def notify(report): 127 | # XXX TODO, send UDP, HTTP, HTTPS notifications 128 | # for now, just print 129 | print report 130 | 131 | if __name__ == '__main__': 132 | usage =""" 133 | Overflowd (Traffic Intelligence Distribution Engine) 134 | Dan Kaminsky, Chief Scientist, whiteops.com 135 | with: Cosmo Mielke and Jeff Ward""" 136 | parser = optparse.OptionParser(usage=usage) 137 | parser.add_option("-f", "--pcapfile", dest="pcapfile", help="Load from PCAP") 138 | parser.add_option("-u", "--udpport", dest="udpport", default=7777, help="Stream from UDP (7777)") 139 | parser.add_option("-r", "--rate", dest="rate", default=0.00001, help="Odds flow will be reported scaled by packet count (0.000001)") 140 | opts, remainder = parser.parse_args(sys.argv) 141 | if opts.pcapfile: 142 | read_from_pcap() 143 | else: 144 | read_from_udp() 145 | 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overflowd 2 | Overflowd (Traffic Intelligence Distribution Engine) 3 | 4 | # TL;DR: 5 | 6 | Netflow to those suffering from network flows: Proactively delivering anti-spoof and contact data. 7 | 8 | # Quick Start 9 | # ./overflowd.py -h 10 | Usage: 11 | Overflowd (Traffic Intelligence Distribution Engine) 12 | Dan Kaminsky, Chief Scientist, whiteops.com 13 | with: Cosmo Mielke and Jeff Ward 14 | 15 | Options: 16 | -h, --help show this help message and exit 17 | -f PCAPFILE, --pcapfile=PCAPFILE 18 | Load from PCAP 19 | -u UDPPORT, --udpport=UDPPORT 20 | Stream from UDP (7777) 21 | -r RATE, --rate=RATE Odds flow will be reported scaled by packet count 22 | (0.000001) 23 | 24 | # ./overflowd.py -f netflow_4M.pcap | head 25 | {'flowdata': {'data': {'bcount': 682512, 'protocol': 6, 'tos': 0, 'etime': 1325314888, 'daddr': '122.166.77.74', 'pcount': 17001, 'flags': 16, 'stime': 1325252876, 'saddr': '122.166.82.196', 'dport': 20999, 'sport': 4568}, 'sourcetype': {'version': 5, 'type': 'Netflow'}}, 'signature': {'key': 'd52b9644ba6ffd2bdaa6505e649fd80ca80fad72baf2f46f5c83ab8a2a354df3', 'signature': 'z5yMEHH0pYe++uOiNhWzLkCyXsTQiMokNMZ3AWi8v8+0cuTy6ScCPS/RB0PXDCprmPLaC0AJpFCEW9S5bbB7CHsiZmxvd2RhdGEiOiB7ImRhdGEiOiB7ImJjb3VudCI6IDY4MjUxMiwgInByb3RvY29sIjogNiwgInRvcyI6IDAsICJldGltZSI6IDEzMjUzMTQ4ODgsICJkYWRkciI6ICIxMjIuMTY2Ljc3Ljc0IiwgInBjb3VudCI6IDE3MDAxLCAiZmxhZ3MiOiAxNiwgInN0aW1lIjogMTMyNTI1Mjg3NiwgInNhZGRyIjogIjEyMi4xNjYuODIuMTk2IiwgImRwb3J0IjogMjA5OTksICJzcG9ydCI6IDQ1Njh9LCAic291cmNldHlwZSI6IHsidmVyc2lvbiI6IDUsICJ0eXBlIjogIk5ldGZsb3cifX0sICJtZXRhZGF0YSI6IHsiaW5mbyI6ICJGTE9XU0VFTiIsICJjbGFzcyI6ICJJTkZPUk1BVElPTkFMIiwgInRpbWUiOiAxNDc3Nzc4MDI3LjEzODEwOX19'}, 'metadata': {'info': 'FLOWSEEN', 'class': 'INFORMATIONAL', 'time': 1477778027.138109}} 26 | {'flowdata': {'data': {'bcount': 1395502, 'protocol': 6, 'tos': 0, 'etime': 1325838753, 'daddr': '122.166.251.246', 'pcount': 6130, 'flags': 0, 'stime': 1325834529, 'saddr': '122.166.218.109', 'dport': 445, 'sport': 3183}, 'sourcetype': {'version': 5, 'type': 'Netflow'}}, 'signature': {'key': 'd52b9644ba6ffd2bdaa6505e649fd80ca80fad72baf2f46f5c83ab8a2a354df3', 'signature': '2MVQ2fhHpeC83cE3Dt1wK08z9/dxK19PNj7P7I4yCno1zMtw1qTvLH45sTXWsCicT7bo8DF0Uj1HeJ4gDPLiCHsiZmxvd2RhdGEiOiB7ImRhdGEiOiB7ImJjb3VudCI6IDEzOTU1MDIsICJwcm90b2NvbCI6IDYsICJ0b3MiOiAwLCAiZXRpbWUiOiAxMzI1ODM4NzUzLCAiZGFkZHIiOiAiMTIyLjE2Ni4yNTEuMjQ2IiwgInBjb3VudCI6IDYxMzAsICJmbGFncyI6IDAsICJzdGltZSI6IDEzMjU4MzQ1MjksICJzYWRkciI6ICIxMjIuMTY2LjIxOC4xMDkiLCAiZHBvcnQiOiA0NDUsICJzcG9ydCI6IDMxODN9LCAic291cmNldHlwZSI6IHsidmVyc2lvbiI6IDUsICJ0eXBlIjogIk5ldGZsb3cifX0sICJtZXRhZGF0YSI6IHsiaW5mbyI6ICJGTE9XU0VFTiIsICJjbGFzcyI6ICJJTkZPUk1BVElPTkFMIiwgInRpbWUiOiAxNDc3Nzc4MDI3LjE4MTE2OH19'}, 'metadata': {'info': 'FLOWSEEN', 'class': 'INFORMATIONAL', 'time': 1477778027.181168}} 27 | {'flowdata': {'data': {'bcount': 17227833, 'protocol': 6, 'tos': 0, 'etime': 1325317896, 'daddr': '122.166.80.208', 'pcount': 15726, 'flags': 24, 'stime': 1325257892, 'saddr': '122.166.72.234', 'dport': 1227, 'sport': 139}, 'sourcetype': {'version': 5, 'type': 'Netflow'}}, 'signature': {'key': 'd52b9644ba6ffd2bdaa6505e649fd80ca80fad72baf2f46f5c83ab8a2a354df3', 'signature': 'brw1G8hurkaLEFhWCrRnW0uM/kEPeoeBWDdkLeveIefuUzxPO30UHhRMSynrMAyam9tPWi0xudNrEjF8/LTwD3siZmxvd2RhdGEiOiB7ImRhdGEiOiB7ImJjb3VudCI6IDE3MjI3ODMzLCAicHJvdG9jb2wiOiA2LCAidG9zIjogMCwgImV0aW1lIjogMTMyNTMxNzg5NiwgImRhZGRyIjogIjEyMi4xNjYuODAuMjA4IiwgInBjb3VudCI6IDE1NzI2LCAiZmxhZ3MiOiAyNCwgInN0aW1lIjogMTMyNTI1Nzg5MiwgInNhZGRyIjogIjEyMi4xNjYuNzIuMjM0IiwgImRwb3J0IjogMTIyNywgInNwb3J0IjogMTM5fSwgInNvdXJjZXR5cGUiOiB7InZlcnNpb24iOiA1LCAidHlwZSI6ICJOZXRmbG93In19LCAibWV0YWRhdGEiOiB7ImluZm8iOiAiRkxPV1NFRU4iLCAiY2xhc3MiOiAiSU5GT1JNQVRJT05BTCIsICJ0aW1lIjogMTQ3Nzc3ODAyNy4yNTU3NTJ9fQ=='}, 'metadata': {'info': 'FLOWSEEN', 'class': 'INFORMATIONAL', 'time': 1477778027.255752}} 28 | {'flowdata': {'data': {'bcount': 63671628, 'protocol': 47, 'tos': 0, 'etime': 1325317896, 'daddr': '122.166.14.93', 'pcount': 60572, 'flags': 16, 'stime': 1325255892, 'saddr': '3.138.170.99', 'dport': 0, 'sport': 0}, 'sourcetype': {'version': 5, 'type': 'Netflow'}}, 'signature': {'key': 'd52b9644ba6ffd2bdaa6505e649fd80ca80fad72baf2f46f5c83ab8a2a354df3', 'signature': '0eCPikLp4ywGUlvdHs/b+dFOgDdBbGuWUIdLD3tkZ5a3iGvW6pOodmtMSMpQFVfST03db+ZzfMCL0HcuGesxAXsiZmxvd2RhdGEiOiB7ImRhdGEiOiB7ImJjb3VudCI6IDYzNjcxNjI4LCAicHJvdG9jb2wiOiA0NywgInRvcyI6IDAsICJldGltZSI6IDEzMjUzMTc4OTYsICJkYWRkciI6ICIxMjIuMTY2LjE0LjkzIiwgInBjb3VudCI6IDYwNTcyLCAiZmxhZ3MiOiAxNiwgInN0aW1lIjogMTMyNTI1NTg5MiwgInNhZGRyIjogIjMuMTM4LjE3MC45OSIsICJkcG9ydCI6IDAsICJzcG9ydCI6IDB9LCAic291cmNldHlwZSI6IHsidmVyc2lvbiI6IDUsICJ0eXBlIjogIk5ldGZsb3cifX0sICJtZXRhZGF0YSI6IHsiaW5mbyI6ICJGTE9XU0VFTiIsICJjbGFzcyI6ICJJTkZPUk1BVElPTkFMIiwgInRpbWUiOiAxNDc3Nzc4MDI3LjI1NjE1M319'}, 'metadata': {'info': 'FLOWSEEN', 'class': 'INFORMATIONAL', 'time': 1477778027.256153}} 29 | 30 | # What's going on here 31 | 32 | Our networks are increasingly under attack, we don't always quite know from 33 | where, and even if so, who do we talk to? Abuse management is *hard*, just in 34 | terms of a communications coordination problem. Can we make it easier? 35 | 36 | What if potentially malicious network traffic arrived with tracing data, not 37 | just from the networks attacking us (who might *ahem* might not be too 38 | communicative) but from all the networks bringing us their noise? 39 | 40 | It would require all the networks in the middle to have monitoring frameworks. 41 | Well, they do. Everyone's running some protocol that ends in "flow". But the 42 | data from Netflow, SFlow, QFlow, etc. either goes to local analysts, or 43 | giant overcentralized data pits*. What if, one out of a million packets 44 | caused a tracer message to go to the source and destination of traffic? 45 | 46 | Purely through stochastic dynamics, you'd end up with metadata -- the nastier 47 | the flood, the faster the context would arrive. And you wouldn't need everyone 48 | to deploy all at once (which is good because this is a thing that does not 49 | happen). Over time, more participants, better data distribution. Some 50 | participants, some data distribution. 51 | 52 | It'd certainly be easier to trace spoofed flows, manage asymmetric routing 53 | issues (traceroute is pleasantly naive), and honestly, just figure out who to 54 | talk to. 55 | 56 | And we could potentially send traffic with more frequency, if we had 57 | strong reason to believe a particular flow was sketchy. 58 | 59 | Think of this as a much more distributed Netflow. Privacy issues aren't 60 | really there; either you're receiving data you already know (since 61 | metadata follows the path of the original flows) or that you want to know 62 | (that somebody is spoofing traffic as you, and roughly, here's from where). 63 | 64 | This is experimental work on one of the more annoying, and difficult tasks 65 | we have maintaining the Internet. We can do do better than this particular 66 | version of Overflowd, but it's a good place to start a conversation. 67 | 68 | # Notes 69 | 70 | 1. Netflow data from https://traces.simpleweb.org/traces/netflow/ 71 | 2. Key management eventually works out through some formal mechanism, but 72 | just having a consistent key, that's used in quantity, over time, over 73 | many networks is its own mechanism. 74 | 3. * Heh, giant centralized data pits (you know who you are) -- I should clarify, 75 | I don't dislike what you're up to, I'm just taking that whole "data sharing" 76 | thing as a really important aspect. I'm trying a clever thing here -- unblocking 77 | the "who do we share data with" problem, by just sending it towards the 78 | destination networks and letting *the Internet itself route the metadata to 79 | where it needs to be*. 80 | 81 | 82 | # TODO 83 | 84 | 1. Actually send updates. Starting with 65535/udp, then HTTP/HTTPS to source 85 | and dest. 86 | 2. Persist signing key, create encryption mode --------------------------------------------------------------------------------