├── CHANGELOG ├── countermeasures ├── PadToMTU.py ├── PadFixed.py ├── PadRFCFixed.py ├── PadRFCRand.py ├── PadRand.py ├── MiceElephants.py ├── PadRoundExponential.py ├── PadRoundLinear.py ├── DirectTargetSampling.py ├── Folklore.py └── WrightStyleMorphing.py ├── classifiers ├── LiberatoreClassifier.py ├── WrightClassifier.py ├── BandwidthClassifier.py ├── TimeClassifier.py ├── HerrmannClassifier.py ├── wekaAPI.py ├── VNGClassifier.py ├── VNGPlusPlusClassifier.py ├── JaccardClassifier.py ├── ESORICSClassifier.py └── PanchenkoClassifier.py ├── LICENSE ├── test_Trace.py ├── Webpage.py ├── Packet.py ├── pcapparser.py ├── README.md ├── arffWriter.py ├── parseResultsFile.py ├── Datastore.py ├── Trace.py ├── config.py └── main.py /CHANGELOG: -------------------------------------------------------------------------------- 1 | Version 0.1: Initial release 2 | -------------------------------------------------------------------------------- /countermeasures/PadToMTU.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | from Trace import Trace 6 | from Packet import Packet 7 | 8 | class PadToMTU: 9 | @staticmethod 10 | def applyCountermeasure(trace): 11 | newTrace = Trace(trace.getId()) 12 | # pad all packets to the MTU 13 | for packet in trace.getPackets(): 14 | newPacket = Packet( packet.getDirection(), 15 | packet.getTime(), 16 | Packet.MTU ) 17 | newTrace.addPacket( newPacket ) 18 | 19 | return newTrace -------------------------------------------------------------------------------- /classifiers/LiberatoreClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import arffWriter 6 | import wekaAPI 7 | 8 | class LiberatoreClassifier: 9 | @staticmethod 10 | def traceToInstance( trace ): 11 | instance = trace.getHistogram() 12 | instance['class'] = 'webpage'+str(trace.getId()) 13 | return instance 14 | 15 | @staticmethod 16 | def classify( runID, trainingSet, testingSet ): 17 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 18 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) 19 | -------------------------------------------------------------------------------- /classifiers/WrightClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import wekaAPI 6 | import arffWriter 7 | 8 | class WrightClassifier: 9 | @staticmethod 10 | def traceToInstance( trace ): 11 | instance = trace.getHistogram( None, True ) 12 | instance['class'] = 'webpage'+str(trace.getId()) 13 | return instance 14 | 15 | @staticmethod 16 | def classify( runID, trainingSet, testingSet ): 17 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 18 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | -------------------------------------------------------------------------------- /countermeasures/PadFixed.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PadFixed: 11 | @staticmethod 12 | def applyCountermeasure(trace): 13 | rand = random.choice(range(8,512,8)) 14 | 15 | newTrace = Trace(trace.getId()) 16 | for packet in trace.getPackets(): 17 | length = min( packet.getLength()+rand, Packet.MTU ) 18 | newPacket = Packet( packet.getDirection(), 19 | packet.getTime(), 20 | length ) 21 | newTrace.addPacket( newPacket ) 22 | 23 | return newTrace 24 | -------------------------------------------------------------------------------- /countermeasures/PadRFCFixed.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PadRFCFixed: 11 | @staticmethod 12 | def applyCountermeasure(trace): 13 | rand = random.choice(range(8,256,8)) 14 | 15 | newTrace = Trace(trace.getId()) 16 | for packet in trace.getPackets(): 17 | length = min( packet.getLength()+rand, Packet.MTU ) 18 | newPacket = Packet( packet.getDirection(), 19 | packet.getTime(), 20 | length ) 21 | newTrace.addPacket( newPacket ) 22 | 23 | return newTrace -------------------------------------------------------------------------------- /countermeasures/PadRFCRand.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PadRFCRand: 11 | @staticmethod 12 | def applyCountermeasure(trace): 13 | 14 | newTrace = Trace(trace.getId()) 15 | for packet in trace.getPackets(): 16 | rand = random.choice(range(8,256,8)) 17 | length = min( packet.getLength()+rand, Packet.MTU ) 18 | newPacket = Packet( packet.getDirection(), 19 | packet.getTime(), 20 | length ) 21 | newTrace.addPacket( newPacket ) 22 | 23 | return newTrace -------------------------------------------------------------------------------- /countermeasures/PadRand.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PadRand: 11 | @staticmethod 12 | def applyCountermeasure(trace): 13 | newTrace = Trace(trace.getId()) 14 | for packet in trace.getPackets(): 15 | length = Packet.MTU 16 | if Packet.MTU-packet.getLength()>0: 17 | length = packet.getLength()+random.choice(range(0,Packet.MTU-packet.getLength(),8)) 18 | newPacket = Packet( packet.getDirection(), 19 | packet.getTime(), 20 | length ) 21 | newTrace.addPacket( newPacket ) 22 | 23 | return newTrace -------------------------------------------------------------------------------- /classifiers/BandwidthClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import wekaAPI 6 | from Packet import Packet 7 | import arffWriter 8 | 9 | class BandwidthClassifier: 10 | @staticmethod 11 | def traceToInstance( trace ): 12 | instance = {} 13 | instance['bandwidthUp'] = trace.getBandwidth( Packet.UP ) 14 | instance['bandwidthDown'] = trace.getBandwidth( Packet.DOWN ) 15 | instance['class'] = 'webpage'+str(trace.getId()) 16 | return instance 17 | 18 | @staticmethod 19 | def classify( runID, trainingSet, testingSet ): 20 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 21 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) 22 | -------------------------------------------------------------------------------- /classifiers/TimeClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import wekaAPI 6 | from Packet import Packet 7 | import arffWriter 8 | 9 | class TimeClassifier: 10 | @staticmethod 11 | def traceToInstance( trace ): 12 | maxTime = 0 13 | for packet in trace.getPackets(): 14 | if packet.getTime() > maxTime: 15 | maxTime = packet.getTime() 16 | 17 | instance = {} 18 | instance['time'] = maxTime 19 | instance['class'] = 'webpage'+str(trace.getId()) 20 | return instance 21 | 22 | @staticmethod 23 | def classify( runID, trainingSet, testingSet ): 24 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 25 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) 26 | -------------------------------------------------------------------------------- /countermeasures/MiceElephants.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class MiceElephants: 11 | @staticmethod 12 | def applyCountermeasure(trace): 13 | newTrace = Trace(trace.getId()) 14 | for packet in trace.getPackets(): 15 | newPacket = Packet( packet.getDirection(), 16 | packet.getTime(), 17 | MiceElephants.calcLength(packet.getLength()) ) 18 | newTrace.addPacket( newPacket ) 19 | 20 | return newTrace 21 | 22 | @staticmethod 23 | def calcLength(packetLength): 24 | retVal = 0 25 | VALID_PACKETS = [128,1500] 26 | for val in VALID_PACKETS: 27 | if packetLength<=val: 28 | retVal = val 29 | break 30 | 31 | return retVal 32 | -------------------------------------------------------------------------------- /countermeasures/PadRoundExponential.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PadRoundExponential: 11 | @staticmethod 12 | def applyCountermeasure(trace): 13 | newTrace = Trace(trace.getId()) 14 | for packet in trace.getPackets(): 15 | newPacket = Packet( packet.getDirection(), 16 | packet.getTime(), 17 | PadRoundExponential.calcLength(packet.getLength()) ) 18 | newTrace.addPacket( newPacket ) 19 | 20 | return newTrace 21 | 22 | @staticmethod 23 | def calcLength(packetLength): 24 | VALID_PACKETS = [128,256,512,1024,1500] 25 | retVal = 0 26 | for val in VALID_PACKETS: 27 | if packetLength<=val: 28 | retVal = val 29 | break 30 | 31 | return retVal -------------------------------------------------------------------------------- /countermeasures/PadRoundLinear.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PadRoundLinear: 11 | 12 | @staticmethod 13 | def applyCountermeasure(trace): 14 | newTrace = Trace(trace.getId()) 15 | for packet in trace.getPackets(): 16 | newPacket = Packet( packet.getDirection(), 17 | packet.getTime(), 18 | PadRoundLinear.calcLength(packet.getLength()) ) 19 | newTrace.addPacket( newPacket ) 20 | 21 | return newTrace 22 | 23 | @staticmethod 24 | def calcLength(packetLength): 25 | retVal = 0 26 | VALID_PACKETS = range(128,1500,128) 27 | VALID_PACKETS.append(1500) 28 | for val in VALID_PACKETS: 29 | if packetLength<=val: 30 | retVal = val 31 | break 32 | 33 | return retVal -------------------------------------------------------------------------------- /test_Trace.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import unittest 6 | import pcapparser 7 | from Trace import Trace 8 | from Packet import Packet 9 | 10 | class PcapParserTestCase(unittest.TestCase): 11 | def test_readfile(self): 12 | actualTrace = pcapparser.readfile( month=3, day=14, hour=22, webpageId=8 ) 13 | 14 | expectedTrace = Trace(8) 15 | expectedTrace.addPacket( Packet( Packet.UP , 0 , 148 ) ) 16 | expectedTrace.addPacket( Packet( Packet.DOWN, 0 , 100 ) ) 17 | expectedTrace.addPacket( Packet( Packet.UP , 0 , 52 ) ) 18 | expectedTrace.addPacket( Packet( Packet.UP , 3 , 500 ) ) 19 | expectedTrace.addPacket( Packet( Packet.DOWN, 18 , 244 ) ) 20 | expectedTrace.addPacket( Packet( Packet.UP , 35 , 436 ) ) 21 | expectedTrace.addPacket( Packet( Packet.DOWN, 75 , 52 ) ) 22 | expectedTrace.addPacket( Packet( Packet.DOWN, 118, 292 ) ) 23 | expectedTrace.addPacket( Packet( Packet.UP , 158, 52 ) ) 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /classifiers/HerrmannClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import wekaAPI 6 | import math 7 | import arffWriter 8 | 9 | # TF-N implementation of Multinomial Naive Bayes Classifier 10 | class HerrmannClassifier: 11 | @staticmethod 12 | def traceToInstance( trace ): 13 | instance = trace.getHistogram() 14 | 15 | for attribute in instance: 16 | # Apply TF Transformation 17 | instance[attribute] = math.log( 1 + instance[attribute], 2 ) 18 | 19 | # Store Euclidean Length for Cosine Normalisation (Section 4.5.2) 20 | euclideanLength = 0 21 | for attribute in instance: 22 | euclideanLength += instance[attribute] * instance[attribute] 23 | euclideanLength = math.sqrt( euclideanLength ) 24 | 25 | for attribute in instance: 26 | # Apply Cosine Normalisation 27 | instance[attribute] /= euclideanLength 28 | 29 | instance['class'] = 'webpage'+str(trace.getId()) 30 | return instance 31 | 32 | @staticmethod 33 | def classify( runID, trainingSet, testingSet ): 34 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 35 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayesMultinomial", [] ) 36 | -------------------------------------------------------------------------------- /Webpage.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | 7 | class Webpage: 8 | def __init__( self, id ): 9 | self.__id = int(id) 10 | self.__traceSet = [] 11 | 12 | def addTrace( self, trace ): 13 | self.__traceSet.append( trace ) 14 | 15 | def getTrace( self, n ): 16 | return self.__traceSet[n] 17 | 18 | def getTraces( self ): 19 | return self.__traceSet 20 | 21 | def getId( self ): 22 | return self.__id 23 | 24 | def getBandwidth(self): 25 | totalBandwidth = 0 26 | for trace in self.getTraces(): 27 | totalBandwidth += trace.getBandwidth() 28 | return totalBandwidth 29 | 30 | def getHistogram( self, direction = None, normalize = False ): 31 | histogram = {} 32 | totalPackets = 0 33 | for trace in self.getTraces(): 34 | traceHistogram = trace.getHistogram( direction, False ) 35 | for key in traceHistogram.keys(): 36 | if not histogram.get( key ): 37 | histogram[key] = 0 38 | histogram[key] += traceHistogram[key] 39 | totalPackets += traceHistogram[key] 40 | 41 | if normalize: 42 | for key in histogram: 43 | histogram[key] = (histogram[key] * 1.0) / totalPackets 44 | 45 | return histogram -------------------------------------------------------------------------------- /Packet.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | class Packet: 6 | UP = 0 7 | DOWN = 1 8 | 9 | HEADER_ETHERNET = 0 # is actulally 14 on the LAN 10 | HEADER_IP = 20 11 | HEADER_TCP_REQUIRED = 20 12 | HEADER_TCP_OPTIONAL = 12 13 | HEADER_TCP = HEADER_TCP_REQUIRED + HEADER_TCP_OPTIONAL 14 | 15 | # Packet format for SSHv1 16 | HEADER_SSH_PACKET_FIELD_LENGTH = 4 17 | HEADER_SSH_PACKET_TYPE = 1 18 | HEADER_SSH_PADDING = 7 # We already know that our payload is 0 mod 8 19 | HEADER_SSH_CRC = 4 20 | HEADER_SSH = HEADER_SSH_PACKET_FIELD_LENGTH + HEADER_SSH_PACKET_TYPE + HEADER_SSH_PADDING + HEADER_SSH_CRC 21 | 22 | HEADER_LENGTH = HEADER_ETHERNET + HEADER_IP + HEADER_TCP 23 | 24 | MTU = 1500 + HEADER_ETHERNET 25 | 26 | def __init__( self, direction, time, length ): 27 | self.__direction = int(direction) 28 | self.__time = int(time) 29 | self.__length = int(length) 30 | 31 | def getDirection(self): 32 | return self.__direction 33 | 34 | def getLength(self): 35 | return self.__length 36 | 37 | def getTime(self): 38 | return self.__time 39 | 40 | def setLength(self, length): 41 | self.__length = int(length) 42 | 43 | def setTime(self, time): 44 | self.__time = int(time) 45 | -------------------------------------------------------------------------------- /classifiers/wekaAPI.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import re 6 | import subprocess 7 | import config 8 | import os 9 | 10 | def execute( trainingFile, testingFile, classifier, args ): 11 | myArgs = ["java", 12 | "-Xmx" + str(config.JVM_MEMORY_SIZE), 13 | "-classpath", '$CLASSPATH:'+config.WEKA_JAR, 14 | classifier, 15 | "-t", trainingFile, 16 | "-T", testingFile, 17 | '-v', 18 | '-classifications','weka.classifiers.evaluation.output.prediction.CSV' 19 | ] 20 | 21 | for arg in args: 22 | myArgs.append( arg ) 23 | 24 | pp = subprocess.Popen(' '.join(myArgs), shell=True, stdout=subprocess.PIPE) 25 | 26 | totalPredictions = 0 27 | totalCorrectPredictions = 0 28 | debugInfo = [] 29 | parsing = False 30 | for line in pp.stdout: 31 | line = line.rstrip() 32 | 33 | if parsing == True: 34 | if line=='': break; 35 | lineBits = line.split(',') 36 | actualClass = lineBits[1].split(':')[1] 37 | predictedClass = lineBits[2].split(':')[1] 38 | debugInfo.append([actualClass,predictedClass]) 39 | totalPredictions += 1.0 40 | if actualClass == predictedClass: 41 | totalCorrectPredictions += 1.0 42 | 43 | if line == 'inst#,actual,predicted,error,prediction': 44 | parsing = True 45 | 46 | accuracy = totalCorrectPredictions / totalPredictions * 100.0 47 | 48 | return [accuracy,debugInfo] 49 | -------------------------------------------------------------------------------- /classifiers/VNGClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import arffWriter 6 | import wekaAPI 7 | 8 | from statlib import stats 9 | 10 | from Trace import Trace 11 | from Packet import Packet 12 | import math 13 | 14 | class VNGClassifier: 15 | @staticmethod 16 | def roundArbitrary(x, base): 17 | return int(base * round(float(x)/base)) 18 | 19 | @staticmethod 20 | def traceToInstance( trace ): 21 | instance = {} 22 | 23 | # Size/Number Markers 24 | directionCursor = None 25 | dataCursor = 0 26 | for packet in trace.getPackets(): 27 | if directionCursor == None: 28 | directionCursor = packet.getDirection() 29 | 30 | if packet.getDirection()!=directionCursor: 31 | dataKey = 'S'+str(directionCursor)+'-'+str( VNGClassifier.roundArbitrary(dataCursor, 600) ) 32 | if not instance.get( dataKey ): 33 | instance[dataKey] = 0 34 | instance[dataKey] += 1 35 | 36 | directionCursor = packet.getDirection() 37 | dataCursor = 0 38 | 39 | dataCursor += packet.getLength() 40 | 41 | if dataCursor>0: 42 | key = 'S'+str(directionCursor)+'-'+str( VNGClassifier.roundArbitrary(dataCursor, 600) ) 43 | if not instance.get( key ): 44 | instance[key] = 0 45 | instance[key] += 1 46 | 47 | instance['class'] = 'webpage'+str(trace.getId()) 48 | return instance 49 | 50 | @staticmethod 51 | def classify( runID, trainingSet, testingSet ): 52 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 53 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) 54 | -------------------------------------------------------------------------------- /pcapparser.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import Packet 6 | import trace 7 | import os.path 8 | import glob 9 | import config 10 | import os 11 | from Packet import Packet 12 | from Trace import Trace 13 | import dpkt 14 | 15 | def readfile( month, day, hour, webpageId ): 16 | strId = '.'.join([str(month), str(day), str(hour), str(webpageId)]) 17 | 18 | trace = Trace(webpageId) 19 | start = 0 20 | 21 | absPath = __constructAbsolutePath( month, day, hour, webpageId ) 22 | 23 | if absPath: 24 | pcapReader = dpkt.pcap.Reader( file( absPath, "rb") ) 25 | 26 | for ts, buf in pcapReader: 27 | eth = dpkt.ethernet.Ethernet(buf) 28 | ip = eth.data 29 | tcp = ip.data 30 | 31 | if start==0: start = ts 32 | direction = Packet.UP 33 | if (tcp.sport==22): 34 | direction = Packet.DOWN 35 | delta = int(round(((ts - start) * 1000),0)) 36 | length = ip.len + Packet.HEADER_ETHERNET 37 | 38 | trace.addPacket( Packet(direction, delta, length ) ) 39 | 40 | return trace 41 | 42 | def __constructAbsolutePath( month, day, hour, webpageId ): 43 | if not os.path.exists(config.PCAP_ROOT): 44 | raise Exception('Directory ('+config.PCAP_ROOT+') does not exist') 45 | 46 | monthStr = '%02d' % month 47 | dayStr = '%02d' % day 48 | hourStr = '%02d' % hour 49 | path = os.path.join(config.PCAP_ROOT, '2006-'+monthStr 50 | +'-'+dayStr 51 | +'T'+hourStr 52 | +'*/*' 53 | +'-'+str(webpageId)) 54 | 55 | pathList = glob.glob(path) 56 | 57 | absFilePath = None 58 | if len(pathList)>0: 59 | absFilePath = pathList[0] 60 | 61 | return absFilePath -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Traffic Analysis Framework 2 | ========================== 3 | 4 | This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail" [1]. 5 | 6 | Contact details 7 | --------------- 8 | 9 | * website: https://kpdyer.com 10 | 11 | Notes 12 | ----- 13 | 14 | * Installed and tested on RHEL5 15 | * Caching (via memcache) is disabled by default. Toggle ENABLE_CACHE in Datastore.py at your own risk. 16 | 17 | Requirements 18 | ------------ 19 | 20 | * Required RHEL5 packages: ```mysql mysql-server memcached python-memcached MySQL-python python-devel gcc python-dpkt atlas atlas-devel lapack lapack-devel blas blas-devel glpk-devel g2clib-devel compat-libf2c-34 compat-gcc-34-g77``` 21 | * For Traffic Morphing install cvxopt-0.9 from source 22 | * Python 2.4 or later (Installed on RHEL5 by default) 23 | * WEKA (http://www.cs.waikato.ac.nz/ml/weka/) 24 | * Liberatore and Levine [2] WebIdent 2 Traces (http://traces.cs.umass.edu/index.php/Network/Network) 25 | * Herrmann et al. [3] MySQL Dataset (http://epub.uni-regensburg.de/11919) 26 | 27 | ### Getting started 28 | 29 | 1. Open config.py and set: 30 | * WEKA_ROOT to a directory that contains WEKA 31 | * PCAP_LOGS to the directory with extracted Liberatore and Levine pcap files 32 | * MYSQL_HOST/MYSQL_USER/MYSQL_PASSWD/MYSQL_DB settings for the Herrmann database 33 | 2. Execute "python main.py -h" to get help for runtime parameters 34 | 3. Output from main.py is placed the 'output' directory. 35 | Execute 'parseResultsFile.py' to interpret results. 36 | 37 | References 38 | ---------- 39 | * [1] Dyer K.P., Coull S.E., Ristenpart T., Shrimpton T. Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail, To appear at IEEE Security and Privacy 2012 40 | * [2] Marc Liberatore and Brian Neil Levine, Inferring the Source of Encrypted HTTP Connections. Proceedings of the 13th ACM Conference on Computer and Communications Security (CCS 2006) 41 | * [3] Dominik Herrmann, Rolf Wendolsky, and Hannes Federrath. Website Fingerprinting: Attacking Popular Privacy Enhancing Technologies with the Multinomial Naive-Bayes Classifier. In Proceedings of the ACM Workshop on Cloud Computing Security, pages 31–42, November 2009. 42 | -------------------------------------------------------------------------------- /arffWriter.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import config 6 | import os 7 | import itertools 8 | 9 | def writeArffFiles( runID, trainingSet, testingSet ): 10 | trainingFilename = 'datafile-'+runID+'-train' 11 | testingFilename = 'datafile-'+runID+'-test' 12 | 13 | classes = [] 14 | for instance in trainingSet: 15 | if instance['class'] not in classes: 16 | classes.append(instance['class']) 17 | for instance in testingSet: 18 | if instance['class'] not in classes: 19 | classes.append(instance['class']) 20 | 21 | attributes = [] 22 | for instance in trainingSet: 23 | for key in instance: 24 | if key not in attributes: 25 | attributes.append( key ) 26 | for instance in testingSet: 27 | for key in instance: 28 | if key not in attributes: 29 | attributes.append( key ) 30 | 31 | trainingFile = __writeArffFile( trainingSet, trainingFilename, classes, attributes ) 32 | testingFile = __writeArffFile( testingSet, testingFilename, classes, attributes ) 33 | 34 | return [trainingFile, testingFile] 35 | 36 | 37 | def __writeArffFile( inputArray, outputFile, classes, attributes ): 38 | arffFile = [] 39 | arffFile.append('@RELATION sites') 40 | for attribute in attributes: 41 | if attribute!='class': 42 | arffFile.append('@ATTRIBUTE '+str(attribute)+' real') 43 | arffFile.append('@ATTRIBUTE class {'+','.join(classes)+'}') 44 | arffFile.append('@DATA') 45 | 46 | for instance in inputArray: 47 | tmpBuf = [] 48 | for attribute in attributes: 49 | if attribute!='class': 50 | val = '0' 51 | if instance.get(attribute) not in [None,0]: 52 | val = str(instance[attribute]) 53 | tmpBuf.append(val) 54 | tmpBuf.append(instance['class']) 55 | 56 | arffFile.append( ','.join(itertools.imap(str, tmpBuf)) ) 57 | 58 | outputFile = os.path.join(config.CACHE_DIR, outputFile+'.arff') 59 | f = open( outputFile, 'w' ) 60 | f.write( "\n".join( arffFile ) ) 61 | f.close() 62 | 63 | return outputFile 64 | -------------------------------------------------------------------------------- /classifiers/VNGPlusPlusClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import wekaAPI 6 | import arffWriter 7 | 8 | from statlib import stats 9 | 10 | from Trace import Trace 11 | from Packet import Packet 12 | import math 13 | 14 | class VNGPlusPlusClassifier: 15 | @staticmethod 16 | def roundArbitrary(x, base): 17 | return int(base * round(float(x)/base)) 18 | 19 | @staticmethod 20 | def traceToInstance( trace ): 21 | instance = {} 22 | 23 | # Size/Number Markers 24 | directionCursor = None 25 | dataCursor = 0 26 | for packet in trace.getPackets(): 27 | if directionCursor == None: 28 | directionCursor = packet.getDirection() 29 | 30 | if packet.getDirection()!=directionCursor: 31 | dataKey = 'S'+str(directionCursor)+'-'+str( VNGPlusPlusClassifier.roundArbitrary(dataCursor, 600) ) 32 | if not instance.get( dataKey ): 33 | instance[dataKey] = 0 34 | instance[dataKey] += 1 35 | 36 | directionCursor = packet.getDirection() 37 | dataCursor = 0 38 | 39 | dataCursor += packet.getLength() 40 | 41 | if dataCursor>0: 42 | key = 'S'+str(directionCursor)+'-'+str( VNGPlusPlusClassifier.roundArbitrary(dataCursor, 600) ) 43 | if not instance.get( key ): 44 | instance[key] = 0 45 | instance[key] += 1 46 | 47 | instance['bandwidthUp'] = trace.getBandwidth( Packet.UP ) 48 | instance['bandwidthDown'] = trace.getBandwidth( Packet.DOWN ) 49 | 50 | maxTime = 0 51 | for packet in trace.getPackets(): 52 | if packet.getTime() > maxTime: 53 | maxTime = packet.getTime() 54 | instance['time'] = maxTime 55 | 56 | instance['class'] = 'webpage'+str(trace.getId()) 57 | return instance 58 | 59 | @staticmethod 60 | def classify( runID, trainingSet, testingSet ): 61 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 62 | return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] ) 63 | -------------------------------------------------------------------------------- /parseResultsFile.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import os.path 6 | import sys 7 | import config 8 | import glob 9 | import math 10 | 11 | if config.PYTHONPATH: sys.path.append(config.PYTHONPATH) 12 | 13 | from statlib import stats 14 | 15 | def parseResultsFile(filename): 16 | resultsFile = open(filename, 'r') 17 | 18 | contents = resultsFile.read() 19 | 20 | columns = contents.split("\n")[0].split(',') 21 | data = {} 22 | for value in columns: 23 | data[value] = [] 24 | 25 | lines = contents.split("\n") 26 | if len(lines) <= 1: 27 | return None 28 | 29 | for i in range(1,len(lines)): 30 | items = lines[i].split(', ') 31 | for i in range(len(items)): 32 | data[columns[i]].append(items[i]) 33 | 34 | trials = str(len(data[data.keys()[0]])) 35 | for key in data: 36 | if key=='accuracy': 37 | meanAccuracy = 0 38 | for i in range(len(data[key])): 39 | data[key][i] = float(data[key][i]) 40 | meanAccuracy += data[key][i] 41 | meanAccuracy /= len(data[key]) 42 | meanAccuracy = '%.1f' % round(meanAccuracy, 1) 43 | 44 | elif key=='overhead': 45 | numeratorSum = 0 46 | denominatorSum = 0 47 | for i in range(len(data[key])): 48 | value = data[key][i].split('/') 49 | numeratorSum += int(value[0]) 50 | denominatorSum += int(value[1]) 51 | 52 | overhead = str( round(( numeratorSum * 100.0 / denominatorSum ) - 100,1) ) 53 | elif key=='timeElapsedTotal': 54 | for i in range(len(data[key])): 55 | data[key][i] = float( data[key][i] ) 56 | 57 | timeElapsed = str(round(stats.mean(data[key]),1)) 58 | 59 | return [meanAccuracy,overhead,timeElapsed,trials] 60 | 61 | print 'filename [avgAccuracy, avgOverhead, avgTimeElapsed, numTrials]' 62 | if len(sys.argv)>1 and os.path.exists(sys.argv[1]): 63 | print sys.argv[1], parseResultsFile(sys.argv[1]) 64 | else: 65 | filelist = glob.glob('output/*.output') 66 | filelist.sort() 67 | for filename in filelist: 68 | if parseResultsFile(filename): 69 | print filename, parseResultsFile(filename) 70 | -------------------------------------------------------------------------------- /classifiers/JaccardClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import config 6 | 7 | class JaccardClassifier: 8 | @staticmethod 9 | def traceToInstance( trace ): 10 | instance = {} 11 | for p in trace.getPackets(): 12 | instance[p.getLength()] = 1 13 | 14 | instance['class'] = 'webpage'+str(trace.getId()) 15 | return instance 16 | 17 | @staticmethod 18 | def classify( runID, trainingSet, testingSet ): 19 | bagOfLengths = {} 20 | for instance in trainingSet: 21 | if not bagOfLengths.get(instance['class']): 22 | bagOfLengths[instance['class']] = {} 23 | for attribute in instance: 24 | if attribute!='class': 25 | if not bagOfLengths[instance['class']].get(attribute): 26 | bagOfLengths[instance['class']][attribute] = 0 27 | bagOfLengths[instance['class']][attribute] += 1 28 | 29 | for className in bagOfLengths: 30 | for length in bagOfLengths[className].keys(): 31 | if bagOfLengths[className][length] < (config.NUM_TRAINING_TRACES/2.0): 32 | del bagOfLengths[className][length] 33 | 34 | correctlyClassified = 0 35 | debugInfo = [] 36 | for instance in testingSet: 37 | guess = JaccardClassifier.doClassify(bagOfLengths, instance) 38 | if guess == instance['class']: 39 | correctlyClassified += 1 40 | debugInfo.append([instance['class'], guess]) 41 | 42 | accuracy = 100.0 * correctlyClassified / len(testingSet) 43 | 44 | return [accuracy, debugInfo] 45 | 46 | @staticmethod 47 | def doClassify(bagOfLengths, instance): 48 | guess = None 49 | bestSimilarity = 0 50 | for className in bagOfLengths: 51 | intersection = 0 52 | for attribute in instance: 53 | if attribute!='class' and attribute in bagOfLengths[className]: 54 | intersection += 1 55 | union = (len(instance) - 1) + len(bagOfLengths[className]) 56 | if union == 0: 57 | similarity = 0 58 | else: 59 | similarity = 1.0 * intersection / union 60 | if guess == None or similarity > bestSimilarity: 61 | bestSimilarity = similarity 62 | guess = className 63 | 64 | return guess 65 | -------------------------------------------------------------------------------- /countermeasures/DirectTargetSampling.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import config 6 | import random 7 | from Webpage import Webpage 8 | from Trace import Trace 9 | from Packet import Packet 10 | 11 | class DirectTargetSampling: 12 | L1_THRESHHOLD = 0.3 13 | @staticmethod 14 | def buildMetadata( srcWebpage, targetWebpage ): 15 | targetDistributionBi = targetWebpage.getHistogram( None, True ) 16 | targetDistributionUp = targetWebpage.getHistogram( Packet.UP, True ) 17 | targetDistributionDown = targetWebpage.getHistogram( Packet.DOWN, True ) 18 | 19 | return [targetDistributionBi, targetDistributionUp, targetDistributionDown] 20 | 21 | @staticmethod 22 | def applyCountermeasure( trace, metadata ): 23 | [targetDistributionBi, 24 | targetDistributionUp, 25 | targetDistributionDown] = metadata 26 | 27 | newTrace = Trace(trace.getId()) 28 | 29 | # primary sampling 30 | timeCursor = 0 31 | for packet in trace.getPackets(): 32 | timeCursor = packet.getTime() 33 | targetDistribution = targetDistributionDown 34 | if packet.getDirection()==Packet.UP: 35 | targetDistribution = targetDistributionUp 36 | 37 | packets = DirectTargetSampling.morphPacket( packet, targetDistribution ) 38 | for newPacket in packets: 39 | newTrace.addPacket( newPacket ) 40 | 41 | # secondary sampling 42 | while True: 43 | l1Distance = newTrace.calcL1Distance( targetDistributionBi ) 44 | if l1Distance <= DirectTargetSampling.L1_THRESHHOLD: 45 | break 46 | 47 | timeCursor += 10 48 | newDirection, newLen = newTrace.getMostSkewedDimension( targetDistributionBi ) 49 | packet = Packet( newDirection, timeCursor, newLen ) 50 | newTrace.addPacket( packet ) 51 | 52 | return newTrace 53 | 54 | @staticmethod 55 | def morphPacket( packet, targetDistribution ): 56 | packetPenalty = config.PACKET_PENALTY 57 | 58 | packetList = [] 59 | newPacket = DirectTargetSampling.generatePacket( targetDistribution, packet ) 60 | packetList.append( newPacket ) 61 | 62 | dataSent = newPacket.getLength() - packetPenalty 63 | dataSent = max( dataSent, 0 ) # Can have 'negative' dataSent if newPacket is ACK 64 | # and packet is not ACK 65 | residual = (packet.getLength() - packetPenalty) - dataSent 66 | 67 | # Now sample from secondary 68 | while residual > 0: 69 | newPacket = DirectTargetSampling.generatePacket( targetDistribution, packet ) 70 | packetList.append( newPacket ) 71 | 72 | dataSent = (newPacket.getLength() - packetPenalty) 73 | dataSent = max( dataSent, 0 ) 74 | residual -= dataSent 75 | 76 | return packetList 77 | 78 | @staticmethod 79 | def generatePacket( targetDistribution, packet ): 80 | sample = DirectTargetSampling.sampleFromDistribution( targetDistribution ) 81 | if sample == None: 82 | newLen = 1500 83 | else: 84 | bits = sample.split('-') 85 | newLen = int(bits[1]) 86 | packet = Packet( packet.getDirection(), packet.getTime(), newLen ) 87 | 88 | return packet 89 | 90 | @staticmethod 91 | def sampleFromDistribution( distribution ): 92 | total = 0 93 | for key in distribution: 94 | total += distribution[key] 95 | n = random.uniform(0,total) 96 | 97 | key = None 98 | for key in distribution: 99 | if n < distribution[key]: 100 | return key 101 | n -= distribution[key] 102 | 103 | return key 104 | -------------------------------------------------------------------------------- /countermeasures/Folklore.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import config 6 | import random 7 | from Webpage import Webpage 8 | from Trace import Trace 9 | from Packet import Packet 10 | import math 11 | 12 | class Folklore: 13 | FIXED_PACKET_LEN = 1000 14 | TIMER_CLOCK_SPEED = 20 15 | MILLISECONDS_TO_RUN = 0 16 | 17 | class Buffer: 18 | def __init__(self): 19 | self.__array = [] 20 | 21 | def queue(self): 22 | return self.__array 23 | 24 | def add(self,p): 25 | self.__array.append(p) 26 | 27 | def remove(self): 28 | if len(self.__array)==0: 29 | return None 30 | else: 31 | p = self.__array[0] 32 | del self.__array[0] 33 | return p 34 | 35 | def hasPackets(self): 36 | return (len(self.__array)>0) 37 | 38 | @staticmethod 39 | def packFromBuffer(outgoingCellCapacity, buffer): 40 | while outgoingCellCapacity>0: 41 | p = buffer.remove() 42 | if p and (p.getLength()-Packet.HEADER_LENGTH) > outgoingCellCapacity: 43 | newP = Packet( p.getDirection(), p.getTime(), (p.getLength()-Packet.HEADER_LENGTH)-outgoingCellCapacity ) 44 | buffer.add(newP) 45 | break 46 | elif p and p.getLength() <= outgoingCellCapacity: 47 | outgoingCellCapacity -= (p.getLength()-Packet.HEADER_LENGTH) 48 | else: 49 | break 50 | 51 | @staticmethod 52 | def applyCountermeasure( trace ): 53 | return Folklore.doCountermeasure(trace)[0] 54 | 55 | @staticmethod 56 | def maxLatency( trace ): 57 | latencyList = Folklore.doCountermeasure(trace)[1] 58 | maxLatency = 0 59 | if len(latencyList)>0: 60 | maxLatency = max(latencyList) 61 | return maxLatency 62 | 63 | @staticmethod 64 | def doCountermeasure( trace ): 65 | # Median trace length in the herrmann dataset is 3500ms 66 | # Median throughput is 62000 bytes/second 67 | # 40*1500 = 60000 bytes/second 68 | 69 | newTrace = Trace(trace.getId()) 70 | 71 | latency = [] 72 | timer = 0 73 | bufferUP = Folklore.Buffer() 74 | bufferDOWN = Folklore.Buffer() 75 | packetCursor = 0 76 | 77 | # Terminate only if (1) our clock is up, (2) we have no more packets from the source 78 | # and (3) our buffers are empty 79 | while timer <= Folklore.MILLISECONDS_TO_RUN \ 80 | or packetCursor < trace.getPacketCount() \ 81 | or bufferUP.hasPackets() \ 82 | or bufferDOWN.hasPackets(): 83 | 84 | # calculate max latency 85 | if bufferUP.hasPackets(): 86 | earliestPacket = bufferUP.queue()[0] 87 | latency.append( timer - earliestPacket.getTime() ) 88 | if bufferDOWN.hasPackets(): 89 | earliestPacket = bufferDOWN.queue()[0] 90 | latency.append( timer - earliestPacket.getTime() ) 91 | 92 | # add to buffer: all packets that appeared since last clock 93 | while packetCursor < trace.getPacketCount()\ 94 | and trace.getPackets()[packetCursor].getTime()<=timer: 95 | packet = trace.getPackets()[packetCursor] 96 | 97 | if packet.getDirection() == Packet.UP: 98 | bufferUP.add( packet ) 99 | elif packet.getDirection() == Packet.DOWN: 100 | bufferDOWN.add( packet ) 101 | 102 | # increment position in source buffer 103 | packetCursor += 1 104 | 105 | # check buffer UP: purge at most Packet.MTU bytes 106 | Folklore.packFromBuffer(Folklore.FIXED_PACKET_LEN-Packet.HEADER_LENGTH, bufferUP) 107 | 108 | # check buffer DOWN: purge at most Packet.MTU bytes 109 | Folklore.packFromBuffer(Folklore.FIXED_PACKET_LEN-Packet.HEADER_LENGTH, bufferDOWN) 110 | 111 | # send a byte in both directions 112 | newTrace.addPacket( Packet(Packet.DOWN, timer, Folklore.FIXED_PACKET_LEN ) ) 113 | newTrace.addPacket( Packet(Packet.UP , timer, Folklore.FIXED_PACKET_LEN ) ) 114 | 115 | # go to the next clock cycle 116 | timer += Folklore.TIMER_CLOCK_SPEED 117 | 118 | return [newTrace,latency] 119 | -------------------------------------------------------------------------------- /Datastore.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import MySQLdb 6 | import math 7 | import config 8 | import pcapparser 9 | 10 | from Webpage import Webpage 11 | from Trace import Trace 12 | from Packet import Packet 13 | 14 | import memcache 15 | mc = memcache.Client(['127.0.0.1:11211'], debug=0) 16 | ENABLE_CACHE = False 17 | 18 | import cPickle 19 | 20 | class Datastore: 21 | @staticmethod 22 | def getWebpagesLL( webpageIds, traceIndexStart, traceIndexEnd ): 23 | webpages = [] 24 | for webpageId in webpageIds: 25 | webpage = Webpage(webpageId) 26 | for traceIndex in range(traceIndexStart, traceIndexEnd): 27 | trace = Datastore.getTraceLL( webpageId, traceIndex ) 28 | webpage.addTrace(trace) 29 | webpages.append(webpage) 30 | 31 | return webpages 32 | 33 | @staticmethod 34 | def getTraceLL( webpageId, traceIndex ): 35 | key = '.'.join(['Webpage', 36 | 'LL', 37 | str(webpageId), 38 | str(traceIndex)]) 39 | 40 | trace = mc.get(key) 41 | if ENABLE_CACHE and trace: 42 | trace = cPickle.loads(trace) 43 | else: 44 | dateTime = config.DATA_SET[traceIndex] 45 | trace = pcapparser.readfile(dateTime['month'], 46 | dateTime['day'], 47 | dateTime['hour'], 48 | webpageId) 49 | 50 | mc.set(key,cPickle.dumps(trace,protocol=cPickle.HIGHEST_PROTOCOL)) 51 | 52 | return trace 53 | 54 | @staticmethod 55 | def getWebpagesHerrmann( webpageIds, traceIndexStart, traceIndexEnd ): 56 | webpages = [] 57 | for webpageId in webpageIds: 58 | webpage = Webpage(webpageId) 59 | for traceIndex in range(traceIndexStart, traceIndexEnd): 60 | trace = Datastore.getTraceHerrmann( webpageId, traceIndex ) 61 | webpage.addTrace(trace) 62 | webpages.append(webpage) 63 | 64 | return webpages 65 | 66 | @staticmethod 67 | def getTraceHerrmann( webpageId, traceIndex ): 68 | if config.DATA_SOURCE == 1: 69 | datasourceId = 4 70 | elif config.DATA_SOURCE == 2: 71 | datasourceId = 5 72 | 73 | key = '.'.join(['Webpage', 74 | 'H', 75 | str(datasourceId), 76 | str(webpageId), 77 | str(traceIndex)]) 78 | 79 | trace = mc.get(key) 80 | if ENABLE_CACHE and trace: 81 | trace = cPickle.loads(trace) 82 | else: 83 | connection = MySQLdb.connect(host=config.MYSQL_HOST, 84 | user=config.MYSQL_USER, 85 | passwd=config.MYSQL_PASSWD, 86 | db=config.MYSQL_DB ) 87 | 88 | cursor = connection.cursor() 89 | command = """SELECT packets.trace_id, 90 | packets.size, 91 | ROUND(packets.abstime*1000) 92 | FROM (SELECT id 93 | FROM traces 94 | WHERE site_id = (SELECT id 95 | FROM sites 96 | WHERE dataset_id = """+str(datasourceId)+""" 97 | ORDER BY id 98 | LIMIT """+str(webpageId)+""",1) 99 | ORDER BY id 100 | LIMIT """+str(traceIndex)+""",1) traces, 101 | packets 102 | WHERE traces.id = packets.trace_id 103 | ORDER BY packets.trace_id, packets.abstime""" 104 | cursor.execute( command ) 105 | 106 | data = cursor.fetchall() 107 | trace = Trace(webpageId) 108 | for item in data: 109 | direction = Packet.UP 110 | if int(item[1])>0: 111 | direction = Packet.DOWN 112 | time = item[2] 113 | length = int(math.fabs(item[1])) 114 | 115 | trace.addPacket( Packet( direction, time, length ) ) 116 | connection.close() 117 | 118 | mc.set(key,cPickle.dumps(trace,protocol=cPickle.HIGHEST_PROTOCOL)) 119 | 120 | return trace 121 | -------------------------------------------------------------------------------- /Trace.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import math 6 | import config 7 | from Packet import Packet 8 | 9 | class Trace: 10 | def __init__(self,id): 11 | self.__packetArray = [] 12 | self.__id = id 13 | self.__histogramUp = {} 14 | self.__histogramDown = {} 15 | self.__packetsUp = 0 16 | self.__packetsDown = 0 17 | self.__filePath = None 18 | self.__year = 0 19 | self.__month = 0 20 | self.__day = 0 21 | self.__hour = 0 22 | 23 | def getId(self): return self.__id 24 | 25 | def getPacketCount( self, direction = None ): 26 | return len(self.getPackets(direction)) 27 | 28 | def getPackets( self, direction = None ): 29 | retArray = [] 30 | for packet in self.__packetArray: 31 | if direction == None or packet.getDirection() == direction: 32 | retArray.append( packet ) 33 | return retArray 34 | 35 | def addPacket( self, packet ): 36 | # Completely ignore ACK packets 37 | if config.IGNORE_ACK and packet.getLength() == Packet.HEADER_LENGTH: 38 | return self.__packetArray 39 | 40 | key = str(packet.getDirection())+'-'+str(packet.getLength()) 41 | 42 | if packet.getDirection()==Packet.UP: 43 | self.__packetsUp += 1 44 | if not self.__histogramUp.get( key ): 45 | self.__histogramUp[key] = 0 46 | self.__histogramUp[key] += 1 47 | elif packet.getDirection()==Packet.DOWN: 48 | self.__packetsDown += 1 49 | if not self.__histogramDown.get( key ): 50 | self.__histogramDown[key] = 0 51 | self.__histogramDown[key] += 1 52 | 53 | return self.__packetArray.append( packet ) 54 | 55 | def getBandwidth( self, direction = None ): 56 | totalBandwidth = 0 57 | for packet in self.getPackets(): 58 | if (direction == None or direction == packet.getDirection()) and packet.getLength() != Packet.HEADER_LENGTH: 59 | totalBandwidth += packet.getLength() 60 | 61 | return totalBandwidth 62 | 63 | def getTime( self, direction = None ): 64 | timeCursor = 0 65 | for packet in self.getPackets(): 66 | if direction == None or direction == packet.getDirection(): 67 | timeCursor = packet.getTime() 68 | 69 | return timeCursor 70 | 71 | def getHistogram( self, direction = None, normalize = False ): 72 | if direction == Packet.UP: 73 | histogram = dict(self.__histogramUp) 74 | totalPackets = self.__packetsUp 75 | elif direction == Packet.DOWN: 76 | histogram = dict(self.__histogramDown) 77 | totalPackets = self.__packetsDown 78 | else: 79 | histogram = dict(self.__histogramUp) 80 | for key in self.__histogramDown: 81 | histogram[key] = self.__histogramDown[key] 82 | totalPackets = self.__packetsDown + self.__packetsUp 83 | 84 | if normalize==True: 85 | for key in histogram: 86 | histogram[key] = (histogram[key] * 1.0) / totalPackets 87 | 88 | return histogram 89 | 90 | def calcL1Distance( self, targetDistribution, filterDirection=None ): 91 | localDistribution = self.getHistogram( filterDirection, True ) 92 | 93 | keys = localDistribution.keys() 94 | for key in targetDistribution: 95 | if key not in keys: 96 | keys.append( key ) 97 | 98 | distance = 0 99 | for key in keys: 100 | l = localDistribution.get(key) 101 | r = targetDistribution.get(key) 102 | 103 | if l == None and r == None: continue 104 | if l == None: l = 0 105 | if r == None: r = 0 106 | 107 | distance += math.fabs( l - r ) 108 | 109 | return distance 110 | 111 | def getMostSkewedDimension( self, targetDistribution ): 112 | localDistribution = self.getHistogram( None, True ) 113 | 114 | keys = targetDistribution.keys() 115 | 116 | worstKey = None 117 | worstKeyDistance = 0 118 | 119 | for key in keys: 120 | l = localDistribution.get(key) 121 | r = targetDistribution.get(key) 122 | 123 | if l == None: l = 0 124 | if r == None: r = 0 125 | 126 | if worstKey==None or (r - l) > worstKeyDistance: 127 | worstKeyDistance = r - l 128 | worstKey = key 129 | 130 | bits = worstKey.split('-') 131 | 132 | return [int(bits[0]),int(bits[1])] 133 | -------------------------------------------------------------------------------- /classifiers/ESORICSClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import config 6 | from Packet import Packet 7 | import Levenshtein 8 | 9 | class ESORICSClassifier: 10 | @staticmethod 11 | def traceToInstance( trace ): 12 | return trace 13 | 14 | @staticmethod 15 | def classify( runID, trainingSet, testingSet ): 16 | candidateSequences = {} 17 | for trace in trainingSet: 18 | for d in [Packet.UP, Packet.DOWN]: 19 | if not candidateSequences.get('Webpage'+str(trace.getId())): 20 | candidateSequences['Webpage'+str(trace.getId())] = {} 21 | candidateSequences['Webpage'+str(trace.getId())][Packet.UP] = [] 22 | candidateSequences['Webpage'+str(trace.getId())][Packet.DOWN] = [] 23 | 24 | candidateSequences['Webpage'+str(trace.getId())][d].append([]) 25 | for p in trace.getPackets(): 26 | if p.getDirection()==d: 27 | if d == Packet.UP and p.getLength() > 300: 28 | candidateSequences['Webpage'+str(trace.getId())][d][-1].append(p.getLength()) 29 | elif d == Packet.DOWN and p.getLength() > 300 and p.getLength() < 1450: 30 | candidateSequences['Webpage'+str(trace.getId())][d][-1].append(p.getLength()) 31 | 32 | correctlyClassified = 0 33 | debugInfo = [] 34 | for instance in testingSet: 35 | actual = 'Webpage'+str(instance.getId()) 36 | guess = ESORICSClassifier.doClassify(candidateSequences, instance) 37 | if guess == actual: 38 | correctlyClassified += 1 39 | debugInfo.append([actual, guess]) 40 | 41 | accuracy = 100.0 * correctlyClassified / len(testingSet) 42 | 43 | return [accuracy, debugInfo] 44 | 45 | @staticmethod 46 | def doClassify(candidateSequences, instance): 47 | guess = None 48 | 49 | targetSequenceUp = [] 50 | targetSequenceDown = [] 51 | for p in instance.getPackets(): 52 | if p.getDirection()==Packet.UP and p.getLength() > 300: 53 | targetSequenceUp.append(p.getLength()) 54 | elif p.getDirection()==Packet.DOWN and p.getLength() > 300 and p.getLength() < 1450: 55 | targetSequenceDown.append(p.getLength()) 56 | 57 | similarity = {} 58 | for className in candidateSequences: 59 | if not similarity.get(className): 60 | similarity[className] = 0 61 | for direction in [Packet.UP, Packet.DOWN]: 62 | for i in range(len(candidateSequences[className][direction])): 63 | if direction == Packet.UP: 64 | distance = ESORICSClassifier.levenshtein(targetSequenceUp, candidateSequences[className][direction][i]) 65 | maxLen = max(len(targetSequenceUp), len(candidateSequences[className][direction][i])) 66 | if len(targetSequenceUp) == 0 or len(candidateSequences[className][direction][i]) == 0: 67 | distance = 1.0 68 | else: 69 | distance /= 1.0 * maxLen 70 | 71 | similarity[className] += 0.6 * distance 72 | elif direction == Packet.DOWN: 73 | distance = ESORICSClassifier.levenshtein(targetSequenceDown, candidateSequences[className][direction][i]) 74 | maxLen = max(len(targetSequenceDown), len(candidateSequences[className][direction][i])) 75 | if len(targetSequenceDown) == 0 or len(candidateSequences[className][direction][i]) == 0: 76 | distance = 1.0 77 | else: 78 | distance /= 1.0 * maxLen 79 | similarity[className] += 0.4 * distance 80 | 81 | bestSimilarity = config.NUM_TRAINING_TRACES 82 | for className in similarity: 83 | if guess == None or similarity[className] <= bestSimilarity: 84 | bestSimilarity = similarity[className] 85 | guess = className 86 | 87 | return guess 88 | 89 | @staticmethod 90 | # from http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance 91 | def levenshtein(s1, s2): 92 | s1 = ESORICSClassifier.encode(s1) 93 | s2 = ESORICSClassifier.encode(s2) 94 | return Levenshtein.distance(unicode(s1), unicode(s2)) 95 | 96 | @staticmethod 97 | def encode(list): 98 | strList = [] 99 | for val in list: 100 | #appVal = config.PACKET_RANGE2.index(val) 101 | appVal = unichr(val) 102 | strList.append(appVal) 103 | 104 | return ''.join(strList) 105 | -------------------------------------------------------------------------------- /classifiers/PanchenkoClassifier.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import wekaAPI 6 | import arffWriter 7 | 8 | from statlib import stats 9 | 10 | from Trace import Trace 11 | from Packet import Packet 12 | import math 13 | 14 | class PanchenkoClassifier: 15 | @staticmethod 16 | def roundArbitrary(x, base): 17 | return int(base * round(float(x)/base)) 18 | 19 | @staticmethod 20 | def roundNumberMarker(n): 21 | if n==4 or n==5: return 3 22 | elif n==7 or n==8: return 6 23 | elif n==10 or n==11 or n==12 or n==13: return 9 24 | else: return n 25 | 26 | @staticmethod 27 | def traceToInstance( trace ): 28 | if trace.getPacketCount()==0: 29 | instance = {} 30 | instance['class'] = 'webpage'+str(trace.getId()) 31 | return instance 32 | 33 | instance = trace.getHistogram() 34 | 35 | # Size/Number Markers 36 | directionCursor = None 37 | dataCursor = 0 38 | numberCursor = 0 39 | for packet in trace.getPackets(): 40 | if directionCursor == None: 41 | directionCursor = packet.getDirection() 42 | 43 | if packet.getDirection()!=directionCursor: 44 | dataKey = 'S'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundArbitrary(dataCursor, 600) ) 45 | if not instance.get( dataKey ): 46 | instance[dataKey] = 0 47 | instance[dataKey] += 1 48 | 49 | numberKey = 'N'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundNumberMarker(numberCursor) ) 50 | if not instance.get( numberKey ): 51 | instance[numberKey] = 0 52 | instance[numberKey] += 1 53 | 54 | directionCursor = packet.getDirection() 55 | dataCursor = 0 56 | numberCursor = 0 57 | 58 | dataCursor += packet.getLength() 59 | numberCursor += 1 60 | 61 | if dataCursor>0: 62 | key = 'S'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundArbitrary(dataCursor, 600) ) 63 | if not instance.get( key ): 64 | instance[key] = 0 65 | instance[key] += 1 66 | 67 | if numberCursor>0: 68 | numberKey = 'N'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundNumberMarker(numberCursor) ) 69 | if not instance.get( numberKey ): 70 | instance[numberKey] = 0 71 | instance[numberKey] += 1 72 | 73 | # HTML Markers 74 | counterUP = 0 75 | counterDOWN = 0 76 | htmlMarker = 0 77 | for packet in trace.getPackets(): 78 | if packet.getDirection() == Packet.UP: 79 | counterUP += 1 80 | if counterUP>1 and counterDOWN>0: break 81 | elif packet.getDirection() == Packet.DOWN: 82 | counterDOWN += 1 83 | htmlMarker += packet.getLength() 84 | 85 | htmlMarker = PanchenkoClassifier.roundArbitrary( htmlMarker, 600 ) 86 | instance['H'+str(htmlMarker)] = 1 87 | 88 | # Ocurring Packet Sizes 89 | packetsUp = [] 90 | packetsDown = [] 91 | for packet in trace.getPackets(): 92 | if packet.getDirection()==Packet.UP and packet.getLength() not in packetsUp: 93 | packetsUp.append( packet.getLength() ) 94 | if packet.getDirection()==Packet.DOWN and packet.getLength() not in packetsDown: 95 | packetsDown.append( packet.getLength() ) 96 | instance['uniquePacketSizesUp'] = PanchenkoClassifier.roundArbitrary( len( packetsUp ), 2) 97 | instance['uniquePacketSizesDown'] = PanchenkoClassifier.roundArbitrary( len( packetsDown ), 2) 98 | 99 | # Percentage Incoming Packets 100 | instance['percentageUp'] = PanchenkoClassifier.roundArbitrary( 100.0 * trace.getPacketCount( Packet.UP ) / trace.getPacketCount(), 5) 101 | instance['percentageDown'] = PanchenkoClassifier.roundArbitrary( 100.0 * trace.getPacketCount( Packet.DOWN ) / trace.getPacketCount(), 5) 102 | 103 | # Number of Packets 104 | instance['numberUp'] = PanchenkoClassifier.roundArbitrary( trace.getPacketCount( Packet.UP ), 15) 105 | instance['numberDown'] = PanchenkoClassifier.roundArbitrary( trace.getPacketCount( Packet.DOWN ), 15) 106 | 107 | # Total Bytes Transmitted 108 | bandwidthUp = PanchenkoClassifier.roundArbitrary( trace.getBandwidth( Packet.UP ), 10000) 109 | bandwidthDown = PanchenkoClassifier.roundArbitrary( trace.getBandwidth( Packet.DOWN ), 10000) 110 | instance['0-B'+str(bandwidthUp)] = 1 111 | instance['1-B'+str(bandwidthDown)] = 1 112 | 113 | # Label 114 | instance['class'] = 'webpage'+str(trace.getId()) 115 | 116 | return instance 117 | 118 | @staticmethod 119 | def classify( runID, trainingSet, testingSet ): 120 | [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet ) 121 | return wekaAPI.execute( trainingFile, 122 | testingFile, 123 | "weka.Run weka.classifiers.functions.LibSVM", 124 | ['-K','2', # RBF kernel 125 | '-G','0.0000019073486328125', # Gamma 126 | '-C','131072'] ) # Cost 127 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import os 6 | import sys 7 | from Packet import Packet 8 | 9 | # Set the following to a directory that contains 10 | # * weka-X-Y-Z (see WEKA_ROOT to change the weka version) 11 | # * pcap-logs (a diretory that contains all of the LL pcap files) 12 | # * [optional] (a directory that contains custom/local python modules) 13 | BASE_DIR = '' 14 | 15 | # Enviromental settings 16 | JVM_MEMORY_SIZE = '4192m' 17 | 18 | WEKA_ROOT = os.path.join(BASE_DIR ,'weka-3-7-5') 19 | WEKA_JAR = os.path.join(WEKA_ROOT ,'weka.jar') 20 | PCAP_ROOT = os.path.join(BASE_DIR ,'pcap-logs') 21 | PYTHON_ROOT = os.path.join(BASE_DIR ,'python2.4') 22 | PYTHONPATH = os.path.join(PYTHON_ROOT,'lib/python') 23 | CACHE_DIR = './cache' 24 | COUNTERMEASURE_DIR = './countermeasures' 25 | CLASSIFIERS_DIR = './classifiers' 26 | OUTPUT_DIR = './output' 27 | 28 | #Specify options for Herrmann MySQL database 29 | MYSQL_HOST = 'localhost' 30 | MYSQL_DB = 'fingerprints' 31 | MYSQL_USER = 'fingerprints' 32 | MYSQL_PASSWD = 'fingerprints' 33 | 34 | sys.path.append(PYTHONPATH) 35 | sys.path.append(COUNTERMEASURE_DIR) 36 | sys.path.append(CLASSIFIERS_DIR) 37 | 38 | COUNTERMEASURE = 0 39 | CLASSIFIER = 0 40 | BUCKET_SIZE = 2 41 | DATA_SOURCE = 1 42 | NUM_TRAINING_TRACES = 16 43 | NUM_TESTING_TRACES = 4 44 | NUM_TRIALS = 1 45 | TOP_N = 775 46 | PACKET_PENALTY = 68 47 | IGNORE_ACK = True 48 | 49 | # Liberatore and Levine Training and Testing configuration 50 | DATA_SET = [ 51 | #{'month':2,'day':10,'hour':13}, 52 | #{'month':2,'day':11,'hour':11}, 53 | #{'month':2,'day':13,'hour':8}, 54 | #{'month':2,'day':13,'hour':19}, 55 | #{'month':2,'day':14,'hour':9}, 56 | #{'month':2,'day':14,'hour':23}, 57 | #{'month':2,'day':15,'hour':8}, 58 | #{'month':2,'day':16,'hour':12}, 59 | #{'month':2,'day':20,'hour':10}, 60 | #{'month':2,'day':20,'hour':16}, 61 | #{'month':2,'day':20,'hour':22}, 62 | #{'month':2,'day':21,'hour':4}, 63 | #{'month':2,'day':21,'hour':10}, 64 | #{'month':2,'day':21,'hour':16}, 65 | #{'month':2,'day':21,'hour':22}, 66 | #{'month':2,'day':22,'hour':4}, 67 | #{'month':2,'day':22,'hour':10}, 68 | #{'month':2,'day':22,'hour':16}, 69 | #{'month':2,'day':22,'hour':22}, 70 | #{'month':2,'day':23,'hour':4}, 71 | #{'month':2,'day':23,'hour':10}, 72 | #{'month':2,'day':20,'hour':10}, 73 | #{'month':2,'day':20,'hour':16}, 74 | #{'month':2,'day':20,'hour':22}, 75 | #{'month':2,'day':21,'hour':4}, 76 | #{'month':2,'day':21,'hour':10}, 77 | #{'month':2,'day':21,'hour':16}, 78 | #{'month':2,'day':21,'hour':22}, 79 | #{'month':2,'day':22,'hour':4}, 80 | #{'month':2,'day':22,'hour':10}, 81 | #{'month':2,'day':22,'hour':16}, 82 | #{'month':2,'day':22,'hour':22}, 83 | #{'month':2,'day':23,'hour':4}, 84 | #{'month':2,'day':23,'hour':10}, 85 | {'month':3,'day':6,'hour':16}, 86 | {'month':3,'day':6,'hour':22}, 87 | {'month':3,'day':7,'hour':4}, 88 | {'month':3,'day':7,'hour':10}, 89 | {'month':3,'day':7,'hour':16}, 90 | {'month':3,'day':7,'hour':22}, 91 | {'month':3,'day':8,'hour':4}, 92 | {'month':3,'day':8,'hour':10}, 93 | {'month':3,'day':8,'hour':16}, 94 | {'month':3,'day':8,'hour':22}, 95 | {'month':3,'day':9,'hour':4}, 96 | {'month':3,'day':9,'hour':16}, 97 | {'month':3,'day':9,'hour':22}, 98 | {'month':3,'day':10,'hour':4}, 99 | {'month':3,'day':10,'hour':10}, 100 | {'month':3,'day':10,'hour':16}, 101 | {'month':3,'day':10,'hour':22}, 102 | {'month':3,'day':11,'hour':4}, 103 | {'month':3,'day':11,'hour':10}, 104 | {'month':3,'day':11,'hour':16}, 105 | {'month':3,'day':11,'hour':22}, 106 | {'month':3,'day':12,'hour':4}, 107 | {'month':3,'day':12,'hour':10}, 108 | {'month':3,'day':12,'hour':16}, 109 | {'month':3,'day':12,'hour':22}, 110 | {'month':3,'day':13,'hour':16}, 111 | {'month':3,'day':13,'hour':22}, 112 | {'month':3,'day':14,'hour':4}, 113 | {'month':3,'day':14,'hour':10}, 114 | {'month':3,'day':14,'hour':16}, 115 | {'month':3,'day':14,'hour':22}, 116 | {'month':3,'day':15,'hour':4}, 117 | {'month':3,'day':15,'hour':10}, 118 | {'month':3,'day':15,'hour':16}, 119 | {'month':3,'day':15,'hour':22}, 120 | {'month':3,'day':16,'hour':4}, 121 | {'month':3,'day':16,'hour':10}, 122 | {'month':3,'day':16,'hour':16}, 123 | {'month':3,'day':16,'hour':22}, 124 | {'month':3,'day':17,'hour':4}, 125 | {'month':3,'day':17,'hour':10}, 126 | {'month':3,'day':17,'hour':16}, 127 | {'month':3,'day':17,'hour':22}, 128 | {'month':3,'day':20,'hour':10}, 129 | {'month':3,'day':20,'hour':16}, 130 | {'month':3,'day':20,'hour':22}, 131 | {'month':3,'day':21,'hour':4}, 132 | {'month':3,'day':21,'hour':10}, 133 | {'month':3,'day':21,'hour':16}, 134 | {'month':3,'day':21,'hour':22}, 135 | {'month':3,'day':22,'hour':4}, 136 | {'month':3,'day':22,'hour':10}, 137 | {'month':3,'day':22,'hour':16}, 138 | {'month':3,'day':22,'hour':22}, 139 | {'month':3,'day':23,'hour':4}, 140 | {'month':3,'day':23,'hour':10}, 141 | {'month':3,'day':23,'hour':16}, 142 | {'month':3,'day':23,'hour':22}, 143 | {'month':3,'day':24,'hour':10}, 144 | {'month':3,'day':24,'hour':16}, 145 | {'month':3,'day':24,'hour':22}, 146 | {'month':3,'day':25,'hour':4}, 147 | {'month':3,'day':25,'hour':10}, 148 | {'month':3,'day':25,'hour':16}, 149 | {'month':3,'day':25,'hour':22}, 150 | {'month':3,'day':26,'hour':4}, 151 | {'month':3,'day':26,'hour':10}, 152 | {'month':3,'day':26,'hour':16}, 153 | {'month':3,'day':26,'hour':22}, 154 | {'month':3,'day':27,'hour':4}, 155 | {'month':3,'day':27,'hour':10}, 156 | {'month':3,'day':27,'hour':16}, 157 | {'month':3,'day':28,'hour':16}, 158 | {'month':3,'day':28,'hour':22}, 159 | {'month':3,'day':29,'hour':4}, 160 | {'month':3,'day':29,'hour':10}, 161 | {'month':3,'day':29,'hour':16}, 162 | {'month':3,'day':29,'hour':22}, 163 | {'month':3,'day':30,'hour':4}, 164 | {'month':3,'day':30,'hour':10}, 165 | {'month':3,'day':30,'hour':16}, 166 | {'month':3,'day':30,'hour':22}, 167 | {'month':3,'day':31,'hour':4}, 168 | {'month':3,'day':31,'hour':10}, 169 | {'month':3,'day':31,'hour':16}, 170 | {'month':3,'day':31,'hour':22}, 171 | {'month':4,'day':1,'hour':4}, 172 | {'month':4,'day':1,'hour':10}, 173 | {'month':4,'day':1,'hour':16}, 174 | {'month':4,'day':1,'hour':22}, 175 | {'month':4,'day':2,'hour':4}, 176 | {'month':4,'day':2,'hour':10}, 177 | {'month':4,'day':2,'hour':16}, 178 | {'month':4,'day':2,'hour':22}, 179 | {'month':4,'day':3,'hour':4}, 180 | {'month':4,'day':3,'hour':10}, 181 | {'month':4,'day':3,'hour':16}, 182 | {'month':4,'day':3,'hour':22}, 183 | {'month':4,'day':4,'hour':4}, 184 | {'month':4,'day':4,'hour':10}, 185 | {'month':4,'day':4,'hour':16}, 186 | {'month':4,'day':4,'hour':22}, 187 | {'month':4,'day':5,'hour':4}, 188 | {'month':4,'day':5,'hour':10}, 189 | {'month':4,'day':5,'hour':16}, 190 | {'month':4,'day':5,'hour':22}, 191 | {'month':4,'day':6,'hour':4}, 192 | {'month':4,'day':6,'hour':10}, 193 | {'month':4,'day':6,'hour':16}, 194 | {'month':4,'day':6,'hour':22}, 195 | {'month':4,'day':7,'hour':4}, 196 | {'month':4,'day':7,'hour':10}, 197 | {'month':4,'day':7,'hour':16}, 198 | {'month':4,'day':7,'hour':22}, 199 | {'month':4,'day':8,'hour':4}, 200 | {'month':4,'day':8,'hour':10}, 201 | {'month':4,'day':8,'hour':16}, 202 | #{'month':4,'day':13,'hour':22}, 203 | #{'month':4,'day':14,'hour':4}, 204 | #{'month':4,'day':14,'hour':10}, 205 | #{'month':4,'day':14,'hour':16}, 206 | #{'month':4,'day':14,'hour':22}, 207 | #{'month':4,'day':15,'hour':4}, 208 | #{'month':4,'day':15,'hour':10}, 209 | #{'month':4,'day':15,'hour':16}, 210 | #{'month':4,'day':15,'hour':22}, 211 | #{'month':4,'day':16,'hour':4}, 212 | #{'month':4,'day':16,'hour':16}, 213 | #{'month':4,'day':16,'hour':22}, 214 | #{'month':4,'day':18,'hour':16}, 215 | #{'month':4,'day':18,'hour':22}, 216 | #{'month':4,'day':19,'hour':4}, 217 | #{'month':4,'day':19,'hour':10}, 218 | #{'month':4,'day':19,'hour':16}, 219 | #{'month':4,'day':19,'hour':22}, 220 | #{'month':4,'day':20,'hour':4}, 221 | #{'month':4,'day':20,'hour':10}, 222 | #{'month':4,'day':20,'hour':16}, 223 | #{'month':4,'day':20,'hour':22}, 224 | #{'month':4,'day':21,'hour':4}, 225 | #{'month':4,'day':21,'hour':10}, 226 | #{'month':4,'day':21,'hour':16}, 227 | #{'month':4,'day':21,'hour':22}, 228 | #{'month':4,'day':22,'hour':4}, 229 | #{'month':4,'day':22,'hour':16}, 230 | #{'month':4,'day':22,'hour':22}, 231 | #{'month':4,'day':23,'hour':4}, 232 | #{'month':4,'day':23,'hour':10}, 233 | #{'month':4,'day':23,'hour':16}, 234 | #{'month':4,'day':23,'hour':22}, 235 | #{'month':4,'day':24,'hour':4}, 236 | #{'month':4,'day':24,'hour':16}, 237 | #{'month':4,'day':24,'hour':22}, 238 | ] 239 | 240 | # packet range (LL) 241 | PACKET_RANGE = range(Packet.HEADER_LENGTH,Packet.MTU+1,8) 242 | PACKET_RANGE2 = range(Packet.HEADER_LENGTH,Packet.MTU+1,4) 243 | 244 | # packet range (H) 245 | 246 | # Security Strategy Enum 247 | NONE = 0 248 | PAD_TO_MTU = 1 249 | RFC_COMPLIANT_FIXED_PAD = 2 250 | RFC_COMPLIANT_RANDOM_PAD = 3 251 | RANDOM_PAD = 4 252 | PAD_ROUND_EXPONENTIAL = 5 253 | PAD_ROUND_LINEAR = 6 254 | MICE_ELEPHANTS = 7 255 | DIRECT_TARGET_SAMPLING = 8 256 | WRIGHT_STYLE_MORPHING = 9 257 | FIXED_PAD = 10 258 | 259 | # Classifier enum 260 | LIBERATORE_CLASSIFIER = 0 261 | WRIGHT_CLASSIFIER = 1 262 | JACCARD_CLASSIFIER = 2 263 | PANCHENKO_CLASSIFIER = 3 264 | BANDWIDTH_CLASSIFIER = 4 265 | ESORICS_CLASSIFIER = 5 266 | HERRMANN_CLASSIFIER = 6 267 | TIME_CLASSIFIER = 10 268 | VNG_CLASSIFIER = 14 269 | VNG_PLUS_PLUS_CLASSIFIER = 15 270 | 271 | ### Sanity 272 | def sanity(): 273 | if not os.path.exists(WEKA_JAR): 274 | print 'Weka does not exist in path: '+str(WEKA_JAR) 275 | print 'Please install Weka properly.' 276 | #sys.exit() 277 | 278 | if BASE_DIR == '': 279 | print "!!!!" 280 | print "Please open config.py and set your BASE_DIR." 281 | #sys.exit() 282 | 283 | sanity() 284 | ### 285 | -------------------------------------------------------------------------------- /countermeasures/WrightStyleMorphing.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import random 6 | import math 7 | import config 8 | 9 | from Webpage import Webpage 10 | from Trace import Trace 11 | from Packet import Packet 12 | 13 | from cvxopt.base import matrix, sparse, spdiag 14 | 15 | from cvxopt import solvers 16 | 17 | solvers.options['show_progress'] = False 18 | solvers.options['LPX_K_MSGLEV'] = 0 19 | 20 | solvers.options['abstol'] = 1e-4 21 | solvers.options['reltol'] = 1e-4 22 | 23 | solvers.options['maxiters'] = 200 24 | 25 | # cost_matrix is the vector of cost coeffs 26 | n = len(config.PACKET_RANGE) 27 | N = n**2 28 | cost_matrix = matrix(0.0, (n,n), 'd') 29 | for i in range(n): 30 | for j in range(n): 31 | cost_matrix[i,j] = math.fabs(config.PACKET_RANGE[i] - config.PACKET_RANGE[j]) 32 | 33 | cost_matrix = matrix(cost_matrix, (N,1), 'd') 34 | 35 | class WrightStyleMorphing: 36 | L1_THRESHHOLD = 0.3 37 | 38 | @staticmethod 39 | def buildMetadata( srcWebpage, targetWebpage ): 40 | targetDistributionBi = targetWebpage.getHistogram( None, True ) 41 | targetDistributionUp = targetWebpage.getHistogram( Packet.UP, True ) 42 | targetDistributionDown = targetWebpage.getHistogram( Packet.DOWN, True ) 43 | srcDistributionUp = srcWebpage.getHistogram( Packet.UP, True ) 44 | srcDistributionDown = srcWebpage.getHistogram( Packet.DOWN, True ) 45 | morphingMatrixUp = WrightStyleMorphing.buildMorphingMatrix(srcWebpage.getId(), targetWebpage.getId(), Packet.UP, srcDistributionUp, targetDistributionUp) 46 | morphingMatrixDown = WrightStyleMorphing.buildMorphingMatrix(srcWebpage.getId(), targetWebpage.getId(), Packet.DOWN, srcDistributionDown, targetDistributionDown) 47 | 48 | return [targetDistributionBi, targetDistributionUp, targetDistributionDown, srcDistributionUp, srcDistributionDown, morphingMatrixUp, morphingMatrixDown] 49 | 50 | @staticmethod 51 | def applyCountermeasure( trace, metadata ): 52 | [targetDistributionBi, 53 | targetDistributionUp, 54 | targetDistributionDown, 55 | srcDistributionUp, 56 | srcDistributionDown, 57 | morphingMatrixUp, 58 | morphingMatrixDown] = metadata 59 | 60 | newTrace = Trace(trace.getId()) 61 | 62 | # primary sampling 63 | timeCursor = 0 64 | for packet in trace.getPackets(): 65 | timeCursor = packet.getLength() 66 | index = (packet.getLength()-Packet.HEADER_LENGTH)/8 67 | 68 | targetDistribution = None 69 | morphingColumn = None 70 | if packet.getDirection()==Packet.UP: 71 | if morphingMatrixUp: 72 | morphingColumn = morphingMatrixUp[:,index] 73 | else: 74 | targetDistribution = targetDistributionUp 75 | targetDistributionSecondary = targetDistributionUp 76 | else: 77 | if morphingMatrixDown: 78 | morphingColumn = morphingMatrixDown[:,index] 79 | else: 80 | targetDistribution = targetDistributionDown 81 | targetDistributionSecondary = targetDistributionDown 82 | 83 | if morphingColumn: 84 | targetDistribution = {} 85 | for i in range(len(morphingColumn)): 86 | key = str(packet.getDirection())+'-'+str( i*8 + Packet.HEADER_LENGTH ) 87 | targetDistribution[key] = morphingColumn[i] 88 | 89 | packets = WrightStyleMorphing.morphPacket( packet, targetDistribution, targetDistributionSecondary ) 90 | for newPacket in packets: 91 | newTrace.addPacket( newPacket ) 92 | 93 | # secondary sampling 94 | while True: 95 | l1Distance = newTrace.calcL1Distance( targetDistributionBi ) 96 | if l1Distance <= WrightStyleMorphing.L1_THRESHHOLD: 97 | break 98 | 99 | timeCursor += 10 100 | newDirection, newLen = newTrace.getMostSkewedDimension( targetDistributionBi ) 101 | packet = Packet( newDirection, timeCursor, newLen ) 102 | newTrace.addPacket( packet ) 103 | 104 | return newTrace 105 | 106 | @staticmethod 107 | def morphPacket( packet, targetDistributionPrimary, targetDistributionSecondary ): 108 | packetPenalty = config.PACKET_PENALTY 109 | 110 | packetList = [] 111 | newPacket = WrightStyleMorphing.generatePacket( targetDistributionPrimary, packet ) 112 | packetList.append( newPacket ) 113 | 114 | dataSent = newPacket.getLength() - packetPenalty 115 | dataSent = max( dataSent, 0 ) # Can have 'negative' dataSent if newPacket is ACK 116 | # and packet is not ACK 117 | residual = (packet.getLength() - packetPenalty) - dataSent 118 | 119 | # Now sample from secondary 120 | while residual > 0: 121 | newPacket = WrightStyleMorphing.generatePacket( targetDistributionSecondary, packet ) 122 | packetList.append( newPacket ) 123 | 124 | dataSent = (newPacket.getLength() - packetPenalty) 125 | dataSent = max( dataSent, 0 ) 126 | residual -= dataSent 127 | 128 | return packetList 129 | 130 | @staticmethod 131 | def generatePacket( targetDistribution, packet ): 132 | sample = WrightStyleMorphing.sampleFromDistribution( targetDistribution ) 133 | if sample == None: 134 | newLen = 1500 135 | else: 136 | bits = sample.split('-') 137 | newLen = int(bits[1]) 138 | packet = Packet( packet.getDirection(), packet.getTime(), newLen ) 139 | 140 | return packet 141 | 142 | @staticmethod 143 | def sampleFromDistribution( distribution ): 144 | total = 0 145 | for key in distribution: 146 | total += distribution[key] 147 | n = random.uniform(0,total) 148 | 149 | key = None 150 | for key in distribution: 151 | if n < distribution[key]: 152 | return key 153 | n -= distribution[key] 154 | 155 | return key 156 | 157 | @staticmethod 158 | def buildMorphingMatrix(srcID, targetID, direction, srcDist, targetDist): 159 | srcVec = matrix(0, ( len(config.PACKET_RANGE) , 1), 'd' ) 160 | targetVec = matrix(0, ( len(config.PACKET_RANGE) , 1), 'd' ) 161 | 162 | for i in range(len(config.PACKET_RANGE)): 163 | key = str(direction)+'-'+str(config.PACKET_RANGE[i]) 164 | 165 | if not srcDist.get( key ): 166 | srcVec[i] = 0 167 | else: 168 | srcVec[i] = srcDist[key] 169 | 170 | if not targetDist.get( key ): 171 | targetVec[i] = 0 172 | else: 173 | targetVec[i] = targetDist[key] 174 | 175 | A = WrightStyleMorphing.what_is_the_matrix( srcVec, targetVec ) 176 | 177 | return A 178 | 179 | @staticmethod 180 | def what_is_the_matrix(X, Z): 181 | """find the optimal morphing matrix A such that A * src_dist = target_dist""" 182 | 183 | #print "============ What is the Matrix? ===============" 184 | 185 | n = len(Z) 186 | N = n**2 187 | 188 | #print "X =", X.T 189 | #print "Z =", Z.T 190 | 191 | # Equality Constraints 192 | A_list = [] 193 | b_list = [] 194 | # -- the columns of the matrix must be valid PDF's 195 | A_pdf = matrix(0.0, (n,N), 'd') 196 | for i in range(n): 197 | A_pdf[i,n*i:n*i+n] = 1.0 198 | b_pdf = matrix(1.0, (n,1), 'd') 199 | #print "A_pdf =" 200 | #print A_pdf 201 | #print "b_pdf =" 202 | #print b_pdf 203 | A_list.append(A_pdf) 204 | b_list.append(b_pdf) 205 | 206 | # -- the matrix must morph X to Z 207 | A_morph = matrix(0.0, (n,N), 'd') 208 | for i in range(n): 209 | matrix_vers = matrix(0.0, (n,n), 'd') 210 | matrix_vers[i,:] = X.T 211 | row = matrix(matrix_vers, (1,N), 'd') 212 | A_morph[i,:] = row 213 | b_morph = matrix(Z, (n,1), 'd') 214 | A_list.append(A_morph) 215 | b_list.append(b_morph) 216 | 217 | #print "A_morph =" 218 | #print A_morph 219 | #print "b_morph =" 220 | #print b_morph 221 | 222 | # concatenate all our equality constraints into one coeff matrix and one b vector 223 | #A_list = [A_morph, A_pdf] 224 | #b_list = [b_morph, b_pdf] 225 | #A_list = [A_pdf] 226 | #b_list = [b_pdf] 227 | #A = matrix(A_list) 228 | A = sparse(A_list) 229 | b = matrix(b_list) 230 | 231 | #print "A =" 232 | #print A 233 | #print "b =" 234 | #print b 235 | 236 | # Inequality Constraints -- in order to be a valid PDF, each cell a_ij must be 0 <= a_ij <= 1 237 | G_list = [] 238 | h_list = [] 239 | G_lt = spdiag(matrix(1.0, (N,1), 'd')) 240 | h_lt = matrix(1.0, (N,1), 'd') 241 | # cvw: as mentioned in the comment above in find_the_one(), the "less than" constraints are 242 | # in fact redundant given that we already require the columns to sum to 1.0 and require 243 | # (below) that each prob is >= 0. yay for smaller KKT matrices. 244 | #G_list.append(G_lt) 245 | #h_list.append(h_lt) 246 | 247 | G_gt = spdiag(matrix(-1.0, (N,1), 'd')) 248 | h_gt = matrix(0.0, (N,1), 'd') 249 | G_list.append(G_gt) 250 | h_list.append(h_gt) 251 | 252 | # cvw: I guess we could add some more constraints if we really wanted to.. 253 | # i.e. only downgrade the bit rate 10% of the time or less 254 | # but for now these'll do 255 | 256 | G = sparse(G_list) 257 | h = matrix(h_list) 258 | #print "G =" 259 | #print G 260 | #print "h =" 261 | #print h 262 | 263 | #print "vectorized cost matrix =" 264 | #print c.T 265 | 266 | # now run the cvxopt solver to get our answer 267 | #print "running cvxopt lp() solver..." 268 | ans = solvers.lp(cost_matrix, G=G, h=h, A=A, b=b, solver='glpk') 269 | ##print ans['x'] 270 | #print "answer = ", ans 271 | A = None 272 | if ans['x']: 273 | cost = cost_matrix.T * ans['x'] 274 | 275 | # A is the morphing matrix 276 | A = matrix(ans['x'], (n,n), 'd') 277 | 278 | return A 279 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail". 2 | # Copyright (C) 2012 Kevin P. Dyer (kpdyer.com) 3 | # See LICENSE for more details. 4 | 5 | import sys 6 | import config 7 | import time 8 | import os 9 | import random 10 | import getopt 11 | import string 12 | import itertools 13 | 14 | # custom 15 | from Datastore import Datastore 16 | from Webpage import Webpage 17 | 18 | # countermeasures 19 | from PadToMTU import PadToMTU 20 | from PadRFCFixed import PadRFCFixed 21 | from PadRFCRand import PadRFCRand 22 | from PadRand import PadRand 23 | from PadRoundExponential import PadRoundExponential 24 | from PadRoundLinear import PadRoundLinear 25 | from MiceElephants import MiceElephants 26 | from DirectTargetSampling import DirectTargetSampling 27 | from Folklore import Folklore 28 | from WrightStyleMorphing import WrightStyleMorphing 29 | 30 | # classifiers 31 | from LiberatoreClassifier import LiberatoreClassifier 32 | from WrightClassifier import WrightClassifier 33 | from BandwidthClassifier import BandwidthClassifier 34 | from HerrmannClassifier import HerrmannClassifier 35 | from TimeClassifier import TimeClassifier 36 | from PanchenkoClassifier import PanchenkoClassifier 37 | from VNGPlusPlusClassifier import VNGPlusPlusClassifier 38 | from VNGClassifier import VNGClassifier 39 | from JaccardClassifier import JaccardClassifier 40 | from ESORICSClassifier import ESORICSClassifier 41 | 42 | def intToCountermeasure(n): 43 | countermeasure = None 44 | if n == config.PAD_TO_MTU: 45 | countermeasure = PadToMTU 46 | elif n == config.RFC_COMPLIANT_FIXED_PAD: 47 | countermeasure = PadRFCFixed 48 | elif n == config.RFC_COMPLIANT_RANDOM_PAD: 49 | countermeasure = PadRFCRand 50 | elif n == config.RANDOM_PAD: 51 | countermeasure = PadRand 52 | elif n == config.PAD_ROUND_EXPONENTIAL: 53 | countermeasure = PadRoundExponential 54 | elif n == config.PAD_ROUND_LINEAR: 55 | countermeasure = PadRoundLinear 56 | elif n == config.MICE_ELEPHANTS: 57 | countermeasure = MiceElephants 58 | elif n == config.DIRECT_TARGET_SAMPLING: 59 | countermeasure = DirectTargetSampling 60 | elif n == config.WRIGHT_STYLE_MORPHING: 61 | countermeasure = WrightStyleMorphing 62 | elif n > 10: 63 | countermeasure = Folklore 64 | 65 | # FIXED_PACKET_LEN: 1000,1250,1500 66 | if n in [11,12,13,14]: 67 | Folklore.FIXED_PACKET_LEN = 1000 68 | elif n in [15,16,17,18]: 69 | Folklore.FIXED_PACKET_LEN = 1250 70 | elif n in [19,20,21,22]: 71 | Folklore.FIXED_PACKET_LEN = 1500 72 | 73 | if n in [11,12,13,17,18,19]: 74 | Folklore.TIMER_CLOCK_SPEED = 20 75 | elif n in [14,15,16,20,21,22]: 76 | Folklore.TIMER_CLOCK_SPEED = 40 77 | 78 | if n in [11,14,17,20]: 79 | Folklore.MILLISECONDS_TO_RUN = 0 80 | elif n in [12,15,18,21]: 81 | Folklore.MILLISECONDS_TO_RUN = 5000 82 | elif n in [13,16,19,22]: 83 | Folklore.MILLISECONDS_TO_RUN = 10000 84 | 85 | if n==23: 86 | Folklore.MILLISECONDS_TO_RUN = 0 87 | Folklore.FIXED_PACKET_LEN = 1250 88 | Folklore.TIMER_CLOCK_SPEED = 40 89 | elif n==24: 90 | Folklore.MILLISECONDS_TO_RUN = 0 91 | Folklore.FIXED_PACKET_LEN = 1500 92 | Folklore.TIMER_CLOCK_SPEED = 20 93 | elif n==25: 94 | Folklore.MILLISECONDS_TO_RUN = 5000 95 | Folklore.FIXED_PACKET_LEN = 1000 96 | Folklore.TIMER_CLOCK_SPEED = 40 97 | elif n==26: 98 | Folklore.MILLISECONDS_TO_RUN = 5000 99 | Folklore.FIXED_PACKET_LEN = 1500 100 | Folklore.TIMER_CLOCK_SPEED = 20 101 | elif n==27: 102 | Folklore.MILLISECONDS_TO_RUN = 10000 103 | Folklore.FIXED_PACKET_LEN = 1000 104 | Folklore.TIMER_CLOCK_SPEED = 40 105 | elif n==28: 106 | Folklore.MILLISECONDS_TO_RUN = 10000 107 | Folklore.FIXED_PACKET_LEN = 1250 108 | Folklore.TIMER_CLOCK_SPEED = 20 109 | 110 | 111 | return countermeasure 112 | 113 | def intToClassifier(n): 114 | classifier = None 115 | if n == config.LIBERATORE_CLASSIFIER: 116 | classifier = LiberatoreClassifier 117 | elif n == config.WRIGHT_CLASSIFIER: 118 | classifier = WrightClassifier 119 | elif n == config.BANDWIDTH_CLASSIFIER: 120 | classifier = BandwidthClassifier 121 | elif n == config.HERRMANN_CLASSIFIER: 122 | classifier = HerrmannClassifier 123 | elif n == config.TIME_CLASSIFIER: 124 | classifier = TimeClassifier 125 | elif n == config.PANCHENKO_CLASSIFIER: 126 | classifier = PanchenkoClassifier 127 | elif n == config.VNG_PLUS_PLUS_CLASSIFIER: 128 | classifier = VNGPlusPlusClassifier 129 | elif n == config.VNG_CLASSIFIER: 130 | classifier = VNGClassifier 131 | elif n == config.JACCARD_CLASSIFIER: 132 | classifier = JaccardClassifier 133 | elif n == config.ESORICS_CLASSIFIER: 134 | classifier = ESORICSClassifier 135 | 136 | return classifier 137 | 138 | def usage(): 139 | print """ 140 | -N [int] : use [int] websites from the dataset 141 | from which we will use to sample a privacy 142 | set k in each experiment (default 775) 143 | 144 | -k [int] : the size of the privacy set (default 2) 145 | 146 | -d [int]: dataset to use 147 | 0: Liberatore and Levine Dataset (OpenSSH) 148 | 1: Herrmann et al. Dataset (OpenSSH) 149 | 2: Herrmann et al. Dataset (Tor) 150 | (default 1) 151 | 152 | -C [int] : classifier to run 153 | 0: Liberatore Classifer 154 | 1: Wright et al. Classifier 155 | 2: Jaccard Classifier 156 | 3: Panchenko et al. Classifier 157 | 5: Lu et al. Edit Distance Classifier 158 | 6: Herrmann et al. Classifier 159 | 4: Dyer et al. Bandwidth (BW) Classifier 160 | 10: Dyer et al. Time Classifier 161 | 14: Dyer et al. Variable n-gram (VNG) Classifier 162 | 15: Dyer et al. VNG++ Classifier 163 | (default 0) 164 | 165 | -c [int]: countermeasure to use 166 | 0: None 167 | 1: Pad to MTU 168 | 2: Session Random 255 169 | 3: Packet Random 255 170 | 4: Pad Random MTU 171 | 5: Exponential Pad 172 | 6: Linear Pad 173 | 7: Mice-Elephants Pad 174 | 8: Direct Target Sampling 175 | 9: Traffic Morphing 176 | (default 0) 177 | 178 | -t [int]: number of trials to run per experiment (default 1) 179 | 180 | -t [int]: number of training traces to use per experiment (default 16) 181 | 182 | -T [int]: number of testing traces to use per experiment (default 4) 183 | """ 184 | 185 | def run(): 186 | try: 187 | opts, args = getopt.getopt(sys.argv[1:], "t:T:N:k:c:C:d:n:r:h") 188 | except getopt.GetoptError, err: 189 | print str(err) # will print something like "option -a not recognized" 190 | usage() 191 | sys.exit(2) 192 | 193 | char_set = string.ascii_lowercase + string.digits 194 | runID = ''.join(random.sample(char_set,8)) 195 | 196 | for o, a in opts: 197 | if o in ("-k"): 198 | config.BUCKET_SIZE = int(a) 199 | elif o in ("-C"): 200 | config.CLASSIFIER = int(a) 201 | elif o in ("-d"): 202 | config.DATA_SOURCE = int(a) 203 | elif o in ("-c"): 204 | config.COUNTERMEASURE = int(a) 205 | elif o in ("-N"): 206 | config.TOP_N = int(a) 207 | elif o in ("-t"): 208 | config.NUM_TRAINING_TRACES = int(a) 209 | elif o in ("-T"): 210 | config.NUM_TESTING_TRACES = int(a) 211 | elif o in ("-n"): 212 | config.NUM_TRIALS = int(a) 213 | elif o in ("-r"): 214 | runID = str(a) 215 | else: 216 | usage() 217 | sys.exit(2) 218 | 219 | outputFilenameArray = ['results', 220 | 'k'+str(config.BUCKET_SIZE), 221 | 'c'+str(config.COUNTERMEASURE), 222 | 'd'+str(config.DATA_SOURCE), 223 | 'C'+str(config.CLASSIFIER), 224 | 'N'+str(config.TOP_N), 225 | 't'+str(config.NUM_TRAINING_TRACES), 226 | 'T'+str(config.NUM_TESTING_TRACES), 227 | ] 228 | outputFilename = os.path.join(config.OUTPUT_DIR,'.'.join(outputFilenameArray)) 229 | 230 | if not os.path.exists(config.CACHE_DIR): 231 | os.mkdir(config.CACHE_DIR) 232 | 233 | if not os.path.exists(outputFilename+'.output'): 234 | banner = ['accuracy','overhead','timeElapsedTotal','timeElapsedClassifier'] 235 | f = open( outputFilename+'.output', 'w' ) 236 | f.write(','.join(banner)) 237 | f.close() 238 | if not os.path.exists(outputFilename+'.debug'): 239 | f = open( outputFilename+'.debug', 'w' ) 240 | f.close() 241 | 242 | if config.DATA_SOURCE == 0: 243 | startIndex = config.NUM_TRAINING_TRACES 244 | endIndex = len(config.DATA_SET)-config.NUM_TESTING_TRACES 245 | elif config.DATA_SOURCE == 1: 246 | maxTracesPerWebsiteH = 160 247 | startIndex = config.NUM_TRAINING_TRACES 248 | endIndex = maxTracesPerWebsiteH-config.NUM_TESTING_TRACES 249 | elif config.DATA_SOURCE == 2: 250 | maxTracesPerWebsiteH = 18 251 | startIndex = config.NUM_TRAINING_TRACES 252 | endIndex = maxTracesPerWebsiteH-config.NUM_TESTING_TRACES 253 | 254 | for i in range(config.NUM_TRIALS): 255 | startStart = time.time() 256 | 257 | webpageIds = range(0, config.TOP_N - 1) 258 | random.shuffle( webpageIds ) 259 | webpageIds = webpageIds[0:config.BUCKET_SIZE] 260 | 261 | seed = random.randint( startIndex, endIndex ) 262 | 263 | preCountermeasureOverhead = 0 264 | postCountermeasureOverhead = 0 265 | 266 | classifier = intToClassifier(config.CLASSIFIER) 267 | countermeasure = intToCountermeasure(config.COUNTERMEASURE) 268 | 269 | trainingSet = [] 270 | testingSet = [] 271 | 272 | targetWebpage = None 273 | 274 | for webpageId in webpageIds: 275 | if config.DATA_SOURCE == 0: 276 | webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) 277 | webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) 278 | elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: 279 | webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) 280 | webpageTest = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) 281 | 282 | webpageTrain = webpageTrain[0] 283 | webpageTest = webpageTest[0] 284 | 285 | if targetWebpage == None: 286 | targetWebpage = webpageTrain 287 | 288 | preCountermeasureOverhead += webpageTrain.getBandwidth() 289 | preCountermeasureOverhead += webpageTest.getBandwidth() 290 | 291 | metadata = None 292 | if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]: 293 | metadata = countermeasure.buildMetadata( webpageTrain, targetWebpage ) 294 | 295 | i = 0 296 | for w in [webpageTrain, webpageTest]: 297 | for trace in w.getTraces(): 298 | if countermeasure: 299 | if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]: 300 | if w.getId()!=targetWebpage.getId(): 301 | traceWithCountermeasure = countermeasure.applyCountermeasure( trace, metadata ) 302 | else: 303 | traceWithCountermeasure = trace 304 | else: 305 | traceWithCountermeasure = countermeasure.applyCountermeasure( trace ) 306 | else: 307 | traceWithCountermeasure = trace 308 | 309 | postCountermeasureOverhead += traceWithCountermeasure.getBandwidth() 310 | instance = classifier.traceToInstance( traceWithCountermeasure ) 311 | 312 | if instance: 313 | if i==0: 314 | trainingSet.append( instance ) 315 | elif i==1: 316 | testingSet.append( instance ) 317 | i+=1 318 | 319 | ################### 320 | 321 | startClass = time.time() 322 | 323 | [accuracy,debugInfo] = classifier.classify( runID, trainingSet, testingSet ) 324 | 325 | end = time.time() 326 | 327 | overhead = str(postCountermeasureOverhead)+'/'+str(preCountermeasureOverhead) 328 | 329 | output = [accuracy,overhead] 330 | 331 | output.append( '%.2f' % (end-startStart) ) 332 | output.append( '%.2f' % (end-startClass) ) 333 | 334 | summary = ', '.join(itertools.imap(str, output)) 335 | 336 | f = open( outputFilename+'.output', 'a' ) 337 | f.write( "\n"+summary ) 338 | f.close() 339 | 340 | f = open( outputFilename+'.debug', 'a' ) 341 | for entry in debugInfo: 342 | f.write( entry[0]+','+entry[1]+"\n" ) 343 | f.close() 344 | 345 | if __name__ == '__main__': 346 | run() 347 | --------------------------------------------------------------------------------