├── README.md ├── analyzers ├── general_stats.py ├── interactive_session.py ├── jabber.py ├── nested_tunnels.py ├── scp.py └── x11_tunneling.py ├── cipher.py ├── conversation.py ├── handshake.py ├── miscnet.py ├── packetfilter.py ├── sshflow.py └── statistics.py /README.md: -------------------------------------------------------------------------------- 1 | sshflow 2 | ======= 3 | 4 | This tool is a proof-of-concept to analyze packet capture files 5 | by looking for SSH handshakes and then profiling those sessions 6 | to guess what (if anything) is being tunneled. 7 | 8 | Plugins 9 | ------- 10 | 11 | Presently, the tool detects interactive sessions, nested tunnels, 12 | X11 forwarding, and server-to-client/client-to-server file copies. 13 | 14 | Usage 15 | ----- 16 | 17 | You need Python 2.7 and the dpkt library. 18 | 19 | `$ ./sshflow packetcapture.pcap` 20 | 21 | Assumptions 22 | ----------- 23 | 24 | It makes a number of assumptions: 25 | - aes-ctr with hmac-md5 is used as a cipher suite. This is out 26 | of sheer laziness. 27 | 28 | - Only a single channel (http://www.ietf.org/rfc/rfc4254.txt) is 29 | in active use per SSH connection. 30 | 31 | - That all the SSH connections in a .pcap will always have a unique 32 | (client ip, client ephemeral port, server ip, server port) 33 | 34 | -------------------------------------------------------------------------------- /analyzers/general_stats.py: -------------------------------------------------------------------------------- 1 | import conversation 2 | 3 | def analyze(c): 4 | print "General statistics" 5 | print " Detected ciphersuite: " + str(c.ciphersuite()) 6 | print " Smallest possible packet for ciphersuite: " + str(c.ciphersuite().smallestPacket()) 7 | print " Packets sent by client: " + str(c.clientPacketCount()) 8 | print " Packets sent by server: " + str(c.serverPacketCount()) 9 | print " Average client packet length: " + str(c.clientAverageLength()) 10 | print " Average server packet length: " + str(c.serverAverageLength()) 11 | print " Total bytes (of SSH data) sent by client: " + str(c.clientTotalLength()) 12 | print " Total bytes (of SSH data) sent by server: " + str(c.serverTotalLength()) 13 | print " Most common client packet size: " + str(c.clientLengthMode(5)) 14 | print " Most common server packet size: " + str(c.serverLengthMode(5)) 15 | print " Average time between client packets: " + str(c.clientAverageTime()) 16 | print " Average time between server packets: " + str(c.serverAverageTime()) 17 | -------------------------------------------------------------------------------- /analyzers/interactive_session.py: -------------------------------------------------------------------------------- 1 | # Tries to determine if an SSH conversation is an interactive 2 | # session or not 3 | 4 | import conversation 5 | import cipher 6 | 7 | def analyze(c): 8 | clm = c.clientLengthMode(1)[0][0] 9 | slm = c.serverLengthMode(1)[0][0] 10 | smallest = c.ciphersuite().smallestPacket() 11 | if slm == smallest and clm == smallest: 12 | print 13 | print "-> Likely an interactive shell session" 14 | -------------------------------------------------------------------------------- /analyzers/jabber.py: -------------------------------------------------------------------------------- 1 | # Tries to determine if an SSH conversation is an interactive 2 | # session or not 3 | 4 | import conversation 5 | import cipher 6 | 7 | def analyze(c): 8 | clm = c.clientLengthMode(1)[0][0] 9 | slm = c.serverLengthMode(1)[0][0] 10 | smallest = c.ciphersuite().smallestPacket() 11 | if clm == 176+smallest and slm == 1400+smallest: 12 | print 13 | print "-> Jabber tunneled over SOCKS5" 14 | -------------------------------------------------------------------------------- /analyzers/nested_tunnels.py: -------------------------------------------------------------------------------- 1 | # Tries to determine if an SSH session is a tunnel for another 2 | # ssh session 3 | # In an interactive session (many of the smallest possible packets), 4 | # the smallest packet will be double that of the normal smallest packet 5 | 6 | import conversation 7 | import cipher 8 | 9 | def analyze(c): 10 | smallest = c.ciphersuite().smallestPacket() 11 | clm = c.clientLengthMode(1)[0][0] 12 | slm = c.serverLengthMode(1)[0][0] 13 | 14 | # Guessing that greater than 4 nested tunnels is unlikely :) 15 | for layers in [ 2, 3, 4 ]: 16 | if (clm == layers * smallest) or (slm == layers * smallest): 17 | print "-> Possibly nested tunnels (%s layers detected)" % layers 18 | -------------------------------------------------------------------------------- /analyzers/scp.py: -------------------------------------------------------------------------------- 1 | # Tries to determine if a user is tunneling an X11 application over SSH 2 | 3 | import conversation 4 | import cipher 5 | 6 | def analyze(c): 7 | small = c.ciphersuite().smallestPacket() 8 | if c.serverLengthMode(2)[0][0] == small and c.clientAverageLength() > 1000 and c.clientTotalLength() / c.serverTotalLength() > 5: 9 | print "-> Likely a file copy from client to server" 10 | if c.clientLengthMode(2)[0][0] == small and c.serverAverageLength() > 1000 and c.serverTotalLength() / c.clientTotalLength() > 5: 11 | print "-> Likely a file copy from server to client" 12 | -------------------------------------------------------------------------------- /analyzers/x11_tunneling.py: -------------------------------------------------------------------------------- 1 | # Tries to determine if an SSH session is being used to tunnel 2 | # an X11 application 3 | 4 | import conversation 5 | import cipher 6 | 7 | def analyze(c): 8 | # Should both be below 0.05 seconds. 9 | cat = c.clientAverageTime() 10 | sat = c.serverAverageTime() 11 | 12 | # Should be smallest packet for ciphersuite 13 | clm1 = c.clientLengthMode(2)[0][0] 14 | clm2 = c.clientLengthMode(2)[1][0] 15 | 16 | # These should be 'smallest packet' and 'smallest packet + 16', respectively 17 | slm1 = c.serverLengthMode(2)[0][0] 18 | slm2 = c.serverLengthMode(2)[1][0] 19 | 20 | # And... smallest packet 21 | small = c.ciphersuite().smallestPacket() 22 | 23 | if cat < 0.05 and sat < 0.05 and clm2 == small and slm2 == small and clm1 == small+32 and slm1 == small+16: 24 | print "-> Detected X11 forwarding" 25 | -------------------------------------------------------------------------------- /cipher.py: -------------------------------------------------------------------------------- 1 | # A list of smallest application-layer packet sizes for various cipher suites 2 | 3 | class Ciphersuite: 4 | def __init__(self, cipher, mac, compression): 5 | if type(cipher) != str or type(mac) != str or type(compression) != bool: 6 | raise TypeError("Ciphersuite constructor takes two string arguments and one boolean") 7 | self.cipher = cipher 8 | self.mac = mac 9 | self.compression = compression 10 | 11 | def smallestPacket(self): 12 | # Assume aes-ctr without compression (it's common!) 13 | # TODO make this really do something useful 14 | return 48 15 | 16 | def blockSize(self): 17 | # Assume aes-ctr without compression (it's common!) 18 | # TODO make this really do something useful 19 | return 16 20 | 21 | def __str__(self): 22 | calgo = None 23 | if self.compression: 24 | calgo = "none" 25 | else: 26 | calgo = "zlib@openssh.com" 27 | 28 | return self.cipher + " " + self.mac + " " + calgo 29 | -------------------------------------------------------------------------------- /conversation.py: -------------------------------------------------------------------------------- 1 | import dpkt 2 | from collections import Counter 3 | from statistics import StatsEntity 4 | from miscnet import rtoq 5 | import cipher 6 | 7 | # This class does a lot of heavy lifting with statistics generation 8 | # It acts as a central repository of information for analyzing 9 | # modules to use. 10 | 11 | class Conversation: 12 | def __init__(self, caddr, cport, saddr, sport, csuite): 13 | # Client address and port 14 | self.caddr = caddr 15 | self.cport = cport 16 | # Server address and port 17 | self.saddr = saddr 18 | self.sport = sport 19 | # Cipher suite - we assume the same cipher suite is in use in both 20 | # directions because seriously, who doesn't do that? :) 21 | # Needs to be of type cipher 22 | self.csuite = csuite 23 | # Client stats 24 | self.cstat = StatsEntity() 25 | # Server stats 26 | self.sstat = StatsEntity() 27 | 28 | def addPacket(self, ts, eth): 29 | if type(eth) != dpkt.ethernet.Ethernet: 30 | raise TypeError("Expected dpkt.ethernet.Ethernet, received " + str(type(p)) + " instead") 31 | 32 | ip = eth.data 33 | tcp = ip.data 34 | app = tcp.data 35 | 36 | # Decide if this is a server or client packet 37 | stat = None 38 | # client 39 | if ip.src == self.caddr and tcp.sport == self.cport: 40 | stat = self.cstat 41 | # server 42 | elif ip.src == self.saddr and tcp.sport == self.sport: 43 | stat = self.sstat 44 | else: 45 | raise ValueError("Conversation '" + str(self) + "' received a packet that was not a part of the conversation") 46 | 47 | # Packet counts 48 | stat.pcount = stat.pcount + 1 49 | 50 | # Packet sizes (of application layer data) 51 | l = len(app) 52 | if l in stat.psizes: 53 | stat.psizes[l] = stat.psizes[l] + 1 54 | else: 55 | stat.psizes[l] = 1 56 | 57 | if stat.lasttimestamp == None: 58 | stat.lasttimestamp = ts 59 | else: 60 | # time between packets 61 | time = ts - stat.lasttimestamp 62 | if time in stat.timings: 63 | stat.timings[time] = stat.timings[time] + 1 64 | else: 65 | stat.timings[time] = 1 66 | stat.lasttimestamp = ts 67 | 68 | # Returns the sum of a dictionary where the key 69 | # is a size or a latency, and the value is the number 70 | # of occurences of that value 71 | def sumdict(self, d): 72 | total = 0 73 | for value in d.keys(): 74 | total = total + value * d[value] 75 | return total 76 | 77 | # Returns the average of a dictionary where the key 78 | # is a size or a latency, and the value is the number 79 | # of occurences of that value 80 | def averagedict(self, d): 81 | return self.sumdict(d) / len(d.keys()) 82 | 83 | 84 | # Plain old getters 85 | def clientPacketCount(self): 86 | return self.cstat.pcount 87 | def serverPacketCount(self): 88 | return self.sstat.pcount 89 | 90 | # Length getters 91 | def clientAverageLength(self): 92 | return self.averagedict(self.cstat.psizes) 93 | def serverAverageLength(self): 94 | return self.averagedict(self.sstat.psizes) 95 | 96 | def clientLengthMode(self, n): 97 | return Counter(self.cstat.psizes).most_common(n) 98 | def serverLengthMode(self, n): 99 | return Counter(self.sstat.psizes).most_common(n) 100 | 101 | def clientTotalLength(self): 102 | return self.sumdict(self.cstat.psizes) 103 | def serverTotalLength(self): 104 | return self.sumdict(self.sstat.psizes) 105 | 106 | def clientLengths(self): 107 | return self.cstat.psizes 108 | def serverLengths(self): 109 | return self.sstat.psizes 110 | 111 | # Timing getters 112 | def clientAverageTime(self): 113 | return self.averagedict(self.cstat.timings) 114 | def serverAverageTime(self): 115 | return self.averagedict(self.sstat.timings) 116 | 117 | def clientTotalTime(self): 118 | return self.sumdict(self.cstat.timings) 119 | def serverTotalTime(self): 120 | return self.sumdict(self.sstat.timings) 121 | 122 | def clientTimingMode(self, n): 123 | return Counter(self.cstat.timings).most_common(n) 124 | def serverTimingMode(self, n): 125 | return Counter(self.sstat.timings).most_common(n) 126 | 127 | def clientTimings(self): 128 | return self.cstat.timings 129 | def serverTimings(self): 130 | return self.sstat.timings 131 | 132 | # Just return the ciphersuite method, woohoo method chaining 133 | def ciphersuite(self): 134 | return self.csuite 135 | 136 | def __str__(self): 137 | return "%s:%s -> %s:%s" % (rtoq(self.caddr), self.cport, rtoq(self.saddr), self.sport) 138 | -------------------------------------------------------------------------------- /handshake.py: -------------------------------------------------------------------------------- 1 | import re 2 | import dpkt 3 | import sys 4 | import os 5 | from miscnet import rtoq 6 | from cipher import Ciphersuite 7 | from conversation import Conversation 8 | 9 | class HandshakeScanner: 10 | def __init__(self): 11 | # This will let the scanner instance keep track 12 | # of partially-open handshakes 13 | self.handshakes = {} 14 | 15 | # This function assumes that this is a TCP/IP packet. 16 | # The main function should check this. 17 | def checkPacket(self, ip): 18 | # All we require is that we get Ethernet frames 19 | if type(ip) != dpkt.ip.IP: 20 | raise TypeError("Expected dpkt.ip.IP, received " + str(type(ip)) + " instead") 21 | 22 | tcp = ip.data 23 | 24 | # Here be dragons 25 | 26 | # The SSH server sends its version number before the client does. Because 27 | # we want to index the handshakes dictionary by (clientaddr, clientport, serveraddr, serverport) 28 | # (which is totally arbitrary), we build an 'index tuple' and the reverse of it, to detect 29 | # the client responding to the server 30 | index_tuple = ip.dst, tcp.dport, ip.src, tcp.sport 31 | reverse_tuple = ip.src, tcp.sport, ip.dst, tcp.dport 32 | # If we see evidence of an SSH handshake 33 | if re.search('^SSH-2.0-', str(tcp.data)): 34 | # If this is the first SSH packet we have seen (from this pair of hosts+ports), 35 | # this will be the server sending its version number to the client (tcp connection 36 | # is already established at this point) 37 | # TODO check if TCP Fast Open affects the SSH handshake ordering (suspecting no) 38 | 39 | # This is true when the client sends its version number to the server, after 40 | # the server has sent its own version number 41 | if (reverse_tuple in self.handshakes) and type(self.handshakes[reverse_tuple]) == bool and self.handshakes[reverse_tuple] == False: 42 | self.handshakes[reverse_tuple] = True 43 | # Returns a Conversation object. 44 | # Right now, we don't detect the ciphersuite, 45 | # we just assume that the ciphersuite is "aes128-ctr hmac-md5 none" 46 | return Conversation(ip.src, tcp.sport, ip.dst, tcp.dport, Ciphersuite("aes128-ctr", "hmac-md5", False)) 47 | 48 | # This is less specific, so it needs to come after the reverse_tuple check 49 | # When the client responds, the index_tuple will never be in self.handshakes - 50 | # only the reverse_tuple will be (because the server sends its version first) 51 | if index_tuple not in self.handshakes: 52 | # This will get changed to 'True' when the client responds 53 | self.handshakes[index_tuple] = False 54 | return None 55 | else: 56 | return None 57 | 58 | def getHandshakes(self): 59 | return self.handshakes 60 | -------------------------------------------------------------------------------- /miscnet.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | # Raw 4-byte address to dotted-quad 4 | def rtoq(buf): 5 | return socket.inet_ntoa(buf) 6 | -------------------------------------------------------------------------------- /packetfilter.py: -------------------------------------------------------------------------------- 1 | import dpkt 2 | import conversation 3 | 4 | class PacketFilter: 5 | def __init__(self): 6 | # This will become a dictionary indexed by 7 | # (caddr, cport, saddr, sport) tuples with 8 | # values that are Conversation objects. 9 | # We use this dictionary to look up the converations that 10 | # packets belong to. 11 | self.conversations = {} 12 | 13 | def addConversation(self, c): 14 | self.conversations[c.caddr, c.cport, c.saddr, c.sport] = c 15 | 16 | # This function assumes all packets are TCP/IP 17 | # The "main" function should have checked this 18 | # This function sorts Ethernet frames into 19 | # Conversations 20 | # Does nothing with packets that are not part of 21 | # a Conversation 22 | def filterPacket(self, ts, eth): 23 | if type(eth) != dpkt.ethernet.Ethernet: 24 | raise TypeError("Expected dpkt.ethernet.Ethernet, received " + str(type(eth)) + " instead") 25 | ip = eth.data 26 | tcp = ip.data 27 | 28 | # We check one (or both) of these against the lookup table 29 | # 'conversations' and add the packet to the conversation 30 | tuples = [ (ip.src, tcp.sport, ip.dst, tcp.dport), (ip.dst, tcp.dport, ip.src, tcp.sport) ] 31 | for t in tuples: 32 | if t in self.conversations: 33 | c = self.conversations[t] 34 | c.addPacket(ts, eth) 35 | break 36 | 37 | def getConversations(self): 38 | return self.conversations.values() 39 | -------------------------------------------------------------------------------- /sshflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.7 2 | import dpkt 3 | import sys 4 | import re 5 | import os 6 | 7 | # Written by us 8 | from conversation import Conversation 9 | from handshake import HandshakeScanner 10 | from packetfilter import PacketFilter 11 | 12 | print "| sshflow" 13 | 14 | # Main! 15 | if len(sys.argv) != 2: 16 | print "error: you must specify a single PCAP file on the command line" 17 | exit(1) 18 | 19 | try: 20 | f = open(sys.argv[1], 'rb') 21 | except IOError: 22 | print "error: couldn't open the file '%s'" % (sys.argv[1]) 23 | exit(1) 24 | 25 | try: 26 | pcap = dpkt.pcap.Reader(f) 27 | except ValueError: 28 | print "error: file couldn't be parsed by dpkt. if exporting from wireshark, use 'Wireshark/tcpdump/... - libpcap' file type" 29 | exit(1) 30 | 31 | 32 | print "loading analyzers" 33 | # Modify python's path to look for modules in the 'analyzers' subdirectory 34 | # Probably an enormous hack. 35 | sys.path.append(sys.path[0] + "/analyzers") 36 | analyzers = [] 37 | for fname in os.listdir(sys.path[0] + "/analyzers"): 38 | # We only want to load modules with a .py extension 39 | if not re.search("\.py$", fname): 40 | continue 41 | name = os.path.splitext(fname)[0] 42 | print " " + name 43 | analyzers.append(__import__(name)) 44 | 45 | 46 | print "generating statistics from pcap file, please wait..." 47 | 48 | hs = HandshakeScanner() 49 | filt = PacketFilter() 50 | # Number of packets processed 51 | pcount = 0 52 | for ts, buf in pcap: 53 | pcount = pcount + 1 54 | 55 | # Parse the Ethernet frame 56 | # If the packet is not TCP/IP, we just 57 | # skip over it 58 | try: 59 | eth = dpkt.ethernet.Ethernet(buf) 60 | except dpkt.dpkt.UnpackError: 61 | continue 62 | 63 | if type(eth.data) != dpkt.ip.IP: 64 | continue 65 | ip = eth.data 66 | if type(ip.data) != dpkt.tcp.TCP: 67 | continue 68 | tcp = ip.data 69 | # empty messages are usually ACKS. These are, 70 | # as far as I can tell, not very useful for 71 | # statistics 72 | if len(tcp.data) == 0: 73 | continue 74 | 75 | # Tuple of (clientaddr, clientport, serveraddr, serverport, Ciphersuite) 76 | conv = hs.checkPacket(ip) 77 | if conv != None: 78 | print " SSH handshake: " + str(conv) 79 | filt.addConversation(conv) 80 | # Else, because if it's a handshake packet we don't care about 81 | # it for statistics. 82 | else: 83 | filt.filterPacket(ts, eth) 84 | 85 | print "" 86 | 87 | if len(filt.getConversations()) == 0: 88 | print "processed " + str(pcount) + " packets, no SSH handshakes found" 89 | exit(2) 90 | else: 91 | print "processed " + str(pcount) + " packets, analysis follows..." 92 | 93 | # Main analysis loop 94 | for c in filt.getConversations(): 95 | print 96 | print "--- analysis of conversation: " + str(c) + " ---" 97 | for a in analyzers: 98 | a.analyze(c) 99 | 100 | print "--- end of analyses ---" 101 | -------------------------------------------------------------------------------- /statistics.py: -------------------------------------------------------------------------------- 1 | # Yep, plain old data structure. 2 | class StatsEntity: 3 | def __init__(self): 4 | self.lasttimestamp = None 5 | self.pcount = 0 6 | self.timings = {} 7 | self.psizes = {} 8 | --------------------------------------------------------------------------------