├── CHANGELOG
├── countermeasures
    ├── PadToMTU.py
    ├── PadFixed.py
    ├── PadRFCFixed.py
    ├── PadRFCRand.py
    ├── PadRand.py
    ├── MiceElephants.py
    ├── PadRoundExponential.py
    ├── PadRoundLinear.py
    ├── DirectTargetSampling.py
    ├── Folklore.py
    └── WrightStyleMorphing.py
├── classifiers
    ├── LiberatoreClassifier.py
    ├── WrightClassifier.py
    ├── BandwidthClassifier.py
    ├── TimeClassifier.py
    ├── HerrmannClassifier.py
    ├── wekaAPI.py
    ├── VNGClassifier.py
    ├── VNGPlusPlusClassifier.py
    ├── JaccardClassifier.py
    ├── ESORICSClassifier.py
    └── PanchenkoClassifier.py
├── LICENSE
├── test_Trace.py
├── Webpage.py
├── Packet.py
├── pcapparser.py
├── README.md
├── arffWriter.py
├── parseResultsFile.py
├── Datastore.py
├── Trace.py
├── config.py
└── main.py


/CHANGELOG:
--------------------------------------------------------------------------------
1 | Version 0.1: Initial release
2 | 


--------------------------------------------------------------------------------
/countermeasures/PadToMTU.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | from Trace import Trace
 6 | from Packet import Packet
 7 | 
 8 | class PadToMTU:
 9 |     @staticmethod
10 |     def applyCountermeasure(trace):
11 |         newTrace = Trace(trace.getId())
12 |         # pad all packets to the MTU
13 |         for packet in trace.getPackets():
14 |             newPacket = Packet( packet.getDirection(),
15 |                                 packet.getTime(),
16 |                                 Packet.MTU )
17 |             newTrace.addPacket( newPacket )
18 | 
19 |         return newTrace


--------------------------------------------------------------------------------
/classifiers/LiberatoreClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import arffWriter
 6 | import wekaAPI
 7 | 
 8 | class LiberatoreClassifier:
 9 |     @staticmethod
10 |     def traceToInstance( trace ):
11 |         instance = trace.getHistogram()
12 |         instance['class'] = 'webpage'+str(trace.getId())
13 |         return instance
14 |     
15 |     @staticmethod
16 |     def classify( runID, trainingSet, testingSet ):
17 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
18 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
19 | 


--------------------------------------------------------------------------------
/classifiers/WrightClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import wekaAPI
 6 | import arffWriter
 7 | 
 8 | class WrightClassifier:
 9 |     @staticmethod
10 |     def traceToInstance( trace ):
11 |         instance = trace.getHistogram( None, True )
12 |         instance['class'] = 'webpage'+str(trace.getId())
13 |         return instance
14 |     
15 |     @staticmethod
16 |     def classify( runID, trainingSet, testingSet ):
17 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
18 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 


--------------------------------------------------------------------------------
/countermeasures/PadFixed.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class PadFixed:
11 |     @staticmethod
12 |     def applyCountermeasure(trace):
13 |         rand = random.choice(range(8,512,8))
14 | 
15 |         newTrace = Trace(trace.getId())
16 |         for packet in trace.getPackets():
17 |             length = min( packet.getLength()+rand, Packet.MTU )
18 |             newPacket = Packet( packet.getDirection(),
19 |                                 packet.getTime(),
20 |                                 length )
21 |             newTrace.addPacket( newPacket )
22 | 
23 |         return newTrace
24 | 


--------------------------------------------------------------------------------
/countermeasures/PadRFCFixed.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class PadRFCFixed:
11 |     @staticmethod
12 |     def applyCountermeasure(trace):
13 |         rand = random.choice(range(8,256,8))
14 | 
15 |         newTrace = Trace(trace.getId())
16 |         for packet in trace.getPackets():
17 |             length = min( packet.getLength()+rand, Packet.MTU )
18 |             newPacket = Packet( packet.getDirection(),
19 |                                 packet.getTime(),
20 |                                 length )
21 |             newTrace.addPacket( newPacket )
22 | 
23 |         return newTrace


--------------------------------------------------------------------------------
/countermeasures/PadRFCRand.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class PadRFCRand:
11 |     @staticmethod
12 |     def applyCountermeasure(trace):
13 | 
14 |         newTrace = Trace(trace.getId())
15 |         for packet in trace.getPackets():
16 |             rand = random.choice(range(8,256,8))
17 |             length = min( packet.getLength()+rand, Packet.MTU )
18 |             newPacket = Packet( packet.getDirection(),
19 |                                 packet.getTime(),
20 |                                 length )
21 |             newTrace.addPacket( newPacket )
22 | 
23 |         return newTrace


--------------------------------------------------------------------------------
/countermeasures/PadRand.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class PadRand:
11 |     @staticmethod
12 |     def applyCountermeasure(trace):
13 |         newTrace = Trace(trace.getId())
14 |         for packet in trace.getPackets():
15 |             length = Packet.MTU
16 |             if Packet.MTU-packet.getLength()>0:
17 |                 length = packet.getLength()+random.choice(range(0,Packet.MTU-packet.getLength(),8))
18 |             newPacket = Packet( packet.getDirection(),
19 |                                 packet.getTime(),
20 |                                 length )
21 |             newTrace.addPacket( newPacket )
22 | 
23 |         return newTrace


--------------------------------------------------------------------------------
/classifiers/BandwidthClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import wekaAPI
 6 | from Packet import Packet
 7 | import arffWriter
 8 | 
 9 | class BandwidthClassifier:
10 |     @staticmethod
11 |     def traceToInstance( trace ):
12 |         instance = {}
13 |         instance['bandwidthUp'] = trace.getBandwidth( Packet.UP )
14 |         instance['bandwidthDown'] = trace.getBandwidth( Packet.DOWN )
15 |         instance['class'] = 'webpage'+str(trace.getId())
16 |         return instance
17 |     
18 |     @staticmethod
19 |     def classify( runID, trainingSet, testingSet ):
20 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
21 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
22 | 


--------------------------------------------------------------------------------
/classifiers/TimeClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import wekaAPI
 6 | from Packet import Packet
 7 | import arffWriter
 8 | 
 9 | class TimeClassifier:
10 |     @staticmethod
11 |     def traceToInstance( trace ):
12 |         maxTime = 0
13 |         for packet in trace.getPackets():
14 |              if packet.getTime() > maxTime:
15 |                  maxTime = packet.getTime()
16 | 
17 |         instance = {}
18 |         instance['time'] = maxTime
19 |         instance['class'] = 'webpage'+str(trace.getId())
20 |         return instance
21 |     
22 |     @staticmethod
23 |     def classify( runID, trainingSet, testingSet ):
24 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
25 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
26 | 


--------------------------------------------------------------------------------
/countermeasures/MiceElephants.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class MiceElephants:
11 |     @staticmethod
12 |     def applyCountermeasure(trace):
13 |         newTrace = Trace(trace.getId())
14 |         for packet in trace.getPackets():
15 |             newPacket = Packet( packet.getDirection(),
16 |                                 packet.getTime(),
17 |                                 MiceElephants.calcLength(packet.getLength()) )
18 |             newTrace.addPacket( newPacket )
19 | 
20 |         return newTrace
21 | 
22 |     @staticmethod
23 |     def calcLength(packetLength):
24 |         retVal = 0
25 |         VALID_PACKETS = [128,1500]
26 |         for val in VALID_PACKETS:
27 |             if packetLength<=val:
28 |                 retVal = val
29 |                 break
30 | 
31 |         return retVal
32 | 


--------------------------------------------------------------------------------
/countermeasures/PadRoundExponential.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class PadRoundExponential:
11 |     @staticmethod
12 |     def applyCountermeasure(trace):
13 |         newTrace = Trace(trace.getId())
14 |         for packet in trace.getPackets():
15 |             newPacket = Packet( packet.getDirection(),
16 |                                 packet.getTime(),
17 |                                 PadRoundExponential.calcLength(packet.getLength()) )
18 |             newTrace.addPacket( newPacket )
19 | 
20 |         return newTrace
21 | 
22 |     @staticmethod
23 |     def calcLength(packetLength):
24 |         VALID_PACKETS = [128,256,512,1024,1500]
25 |         retVal = 0
26 |         for val in VALID_PACKETS:
27 |             if packetLength<=val:
28 |                 retVal = val
29 |                 break
30 | 
31 |         return retVal


--------------------------------------------------------------------------------
/countermeasures/PadRoundLinear.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class PadRoundLinear:
11 | 
12 |     @staticmethod
13 |     def applyCountermeasure(trace):
14 |         newTrace = Trace(trace.getId())
15 |         for packet in trace.getPackets():
16 |             newPacket = Packet( packet.getDirection(),
17 |                                 packet.getTime(),
18 |                                 PadRoundLinear.calcLength(packet.getLength()) )
19 |             newTrace.addPacket( newPacket )
20 | 
21 |         return newTrace
22 | 
23 |     @staticmethod
24 |     def calcLength(packetLength):
25 |         retVal = 0
26 |         VALID_PACKETS = range(128,1500,128)
27 |         VALID_PACKETS.append(1500)
28 |         for val in VALID_PACKETS:
29 |             if packetLength<=val:
30 |                 retVal = val
31 |                 break
32 | 
33 |         return retVal


--------------------------------------------------------------------------------
/test_Trace.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import unittest
 6 | import pcapparser
 7 | from Trace import Trace
 8 | from Packet import Packet
 9 | 
10 | class  PcapParserTestCase(unittest.TestCase):
11 |     def test_readfile(self):
12 |         actualTrace = pcapparser.readfile( month=3, day=14, hour=22, webpageId=8 )
13 | 
14 |         expectedTrace = Trace(8)
15 |         expectedTrace.addPacket( Packet( Packet.UP  , 0  , 148 ) )
16 |         expectedTrace.addPacket( Packet( Packet.DOWN, 0  , 100 ) )
17 |         expectedTrace.addPacket( Packet( Packet.UP  , 0  , 52  ) )
18 |         expectedTrace.addPacket( Packet( Packet.UP  , 3  , 500 ) )
19 |         expectedTrace.addPacket( Packet( Packet.DOWN, 18 , 244 ) )
20 |         expectedTrace.addPacket( Packet( Packet.UP  , 35 , 436 ) )
21 |         expectedTrace.addPacket( Packet( Packet.DOWN, 75 , 52  ) )
22 |         expectedTrace.addPacket( Packet( Packet.DOWN, 118, 292 ) )
23 |         expectedTrace.addPacket( Packet( Packet.UP  , 158, 52  ) )
24 | 
25 | if __name__ == '__main__':
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/classifiers/HerrmannClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import wekaAPI
 6 | import math
 7 | import arffWriter
 8 | 
 9 | # TF-N implementation of Multinomial Naive Bayes Classifier
10 | class HerrmannClassifier:
11 |     @staticmethod
12 |     def traceToInstance( trace ):
13 |         instance = trace.getHistogram()
14 | 
15 |         for attribute in instance:
16 |             # Apply TF Transformation
17 |             instance[attribute] = math.log( 1 + instance[attribute], 2 )
18 | 
19 |         # Store Euclidean Length for Cosine Normalisation (Section 4.5.2)
20 |         euclideanLength = 0
21 |         for attribute in instance:
22 |             euclideanLength += instance[attribute] * instance[attribute]
23 |         euclideanLength = math.sqrt( euclideanLength )
24 | 
25 |         for attribute in instance:
26 |             # Apply Cosine Normalisation
27 |             instance[attribute] /= euclideanLength
28 |         
29 |         instance['class'] = 'webpage'+str(trace.getId())
30 |         return instance
31 |     
32 |     @staticmethod
33 |     def classify( runID, trainingSet, testingSet ):
34 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
35 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayesMultinomial", [] )
36 | 


--------------------------------------------------------------------------------
/Webpage.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import random
 6 | 
 7 | class Webpage:
 8 |     def __init__( self, id ):
 9 |         self.__id = int(id)
10 |         self.__traceSet = []
11 | 
12 |     def addTrace( self, trace ):
13 |         self.__traceSet.append( trace )
14 | 
15 |     def getTrace( self, n ):
16 |         return self.__traceSet[n]
17 | 
18 |     def getTraces( self ):
19 |         return self.__traceSet
20 | 
21 |     def getId( self ):
22 |         return self.__id
23 | 
24 |     def getBandwidth(self):
25 |         totalBandwidth = 0
26 |         for trace in self.getTraces():
27 |             totalBandwidth += trace.getBandwidth()
28 |         return totalBandwidth
29 | 
30 |     def getHistogram( self, direction = None, normalize = False ):
31 |         histogram    = {}
32 |         totalPackets = 0
33 |         for trace in self.getTraces():
34 |             traceHistogram = trace.getHistogram( direction, False )
35 |             for key in traceHistogram.keys():
36 |                 if not histogram.get( key ):
37 |                     histogram[key] = 0
38 |                 histogram[key] += traceHistogram[key]
39 |                 totalPackets   += traceHistogram[key]
40 | 
41 |         if normalize:
42 |             for key in histogram:
43 |                 histogram[key] = (histogram[key] * 1.0) / totalPackets
44 | 
45 |         return histogram


--------------------------------------------------------------------------------
/Packet.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | class Packet:
 6 |     UP   = 0
 7 |     DOWN = 1
 8 | 
 9 |     HEADER_ETHERNET     = 0 # is actulally 14 on the LAN
10 |     HEADER_IP           = 20
11 |     HEADER_TCP_REQUIRED = 20
12 |     HEADER_TCP_OPTIONAL = 12
13 |     HEADER_TCP          = HEADER_TCP_REQUIRED + HEADER_TCP_OPTIONAL
14 | 
15 |     # Packet format for SSHv1
16 |     HEADER_SSH_PACKET_FIELD_LENGTH  = 4
17 |     HEADER_SSH_PACKET_TYPE          = 1
18 |     HEADER_SSH_PADDING              = 7 # We already know that our payload is 0 mod 8
19 |     HEADER_SSH_CRC                  = 4
20 |     HEADER_SSH                      = HEADER_SSH_PACKET_FIELD_LENGTH + HEADER_SSH_PACKET_TYPE + HEADER_SSH_PADDING + HEADER_SSH_CRC
21 | 
22 |     HEADER_LENGTH   = HEADER_ETHERNET + HEADER_IP + HEADER_TCP
23 | 
24 |     MTU  = 1500 + HEADER_ETHERNET
25 | 
26 |     def __init__( self, direction, time, length ):
27 |         self.__direction = int(direction)
28 |         self.__time      = int(time)
29 |         self.__length    = int(length)
30 | 
31 |     def getDirection(self):
32 |         return self.__direction
33 | 
34 |     def getLength(self):
35 |         return self.__length
36 | 
37 |     def getTime(self):
38 |         return self.__time
39 | 
40 |     def setLength(self, length):
41 |         self.__length = int(length)
42 | 
43 |     def setTime(self, time):
44 |         self.__time = int(time)
45 | 


--------------------------------------------------------------------------------
/classifiers/wekaAPI.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import re
 6 | import subprocess
 7 | import config
 8 | import os
 9 | 
10 | def execute( trainingFile, testingFile, classifier, args ):
11 |     myArgs = ["java",
12 |         "-Xmx" + str(config.JVM_MEMORY_SIZE),
13 |         "-classpath", '$CLASSPATH:'+config.WEKA_JAR,
14 |         classifier,
15 |         "-t", trainingFile,
16 |         "-T", testingFile,
17 |         '-v',
18 |         '-classifications','weka.classifiers.evaluation.output.prediction.CSV'
19 |         ]
20 | 
21 |     for arg in args:
22 |         myArgs.append( arg )
23 | 
24 |     pp = subprocess.Popen(' '.join(myArgs), shell=True, stdout=subprocess.PIPE)
25 | 
26 |     totalPredictions = 0
27 |     totalCorrectPredictions = 0
28 |     debugInfo = []
29 |     parsing = False
30 |     for line in pp.stdout:
31 |         line = line.rstrip()
32 | 
33 |         if parsing == True:
34 |             if line=='': break;
35 |             lineBits = line.split(',')
36 |             actualClass = lineBits[1].split(':')[1]
37 |             predictedClass = lineBits[2].split(':')[1]
38 |             debugInfo.append([actualClass,predictedClass])
39 |             totalPredictions += 1.0
40 |             if actualClass == predictedClass:
41 |                 totalCorrectPredictions += 1.0
42 | 
43 |         if line == 'inst#,actual,predicted,error,prediction':
44 |             parsing = True
45 | 
46 |     accuracy = totalCorrectPredictions / totalPredictions * 100.0
47 | 
48 |     return [accuracy,debugInfo]
49 | 


--------------------------------------------------------------------------------
/classifiers/VNGClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import arffWriter
 6 | import wekaAPI
 7 | 
 8 | from statlib import stats
 9 | 
10 | from Trace import Trace
11 | from Packet import Packet
12 | import math
13 | 
14 | class VNGClassifier:
15 |     @staticmethod
16 |     def roundArbitrary(x, base):
17 |         return int(base * round(float(x)/base))
18 | 
19 |     @staticmethod
20 |     def traceToInstance( trace ):
21 |         instance = {}
22 | 
23 |         # Size/Number Markers
24 |         directionCursor = None
25 |         dataCursor      = 0
26 |         for packet in trace.getPackets():
27 |             if directionCursor == None:
28 |                 directionCursor = packet.getDirection()
29 | 
30 |             if packet.getDirection()!=directionCursor:
31 |                 dataKey = 'S'+str(directionCursor)+'-'+str( VNGClassifier.roundArbitrary(dataCursor, 600) )
32 |                 if not instance.get( dataKey ):
33 |                     instance[dataKey] = 0
34 |                 instance[dataKey] += 1
35 | 
36 |                 directionCursor = packet.getDirection()
37 |                 dataCursor      = 0
38 | 
39 |             dataCursor += packet.getLength()
40 | 
41 |         if dataCursor>0:
42 |             key = 'S'+str(directionCursor)+'-'+str( VNGClassifier.roundArbitrary(dataCursor, 600) )
43 |             if not instance.get( key ):
44 |                 instance[key] = 0
45 |             instance[key] += 1
46 | 
47 |         instance['class'] = 'webpage'+str(trace.getId())
48 |         return instance
49 |     
50 |     @staticmethod
51 |     def classify( runID, trainingSet, testingSet ):
52 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
53 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
54 | 


--------------------------------------------------------------------------------
/pcapparser.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import Packet
 6 | import trace
 7 | import os.path
 8 | import glob
 9 | import config
10 | import os
11 | from Packet import Packet
12 | from Trace import Trace
13 | import dpkt
14 | 
15 | def readfile( month, day, hour, webpageId ):
16 |     strId = '.'.join([str(month), str(day), str(hour), str(webpageId)])
17 | 
18 |     trace = Trace(webpageId)
19 |     start = 0
20 | 
21 |     absPath    = __constructAbsolutePath( month, day, hour, webpageId )
22 | 
23 |     if absPath:
24 |         pcapReader = dpkt.pcap.Reader( file( absPath, "rb") )
25 | 
26 |         for ts, buf in pcapReader:
27 |             eth = dpkt.ethernet.Ethernet(buf)
28 |             ip  = eth.data
29 |             tcp = ip.data
30 |             
31 |             if start==0: start = ts
32 |             direction = Packet.UP
33 |             if (tcp.sport==22):
34 |                 direction = Packet.DOWN
35 |             delta     = int(round(((ts - start) * 1000),0))
36 |             length    = ip.len + Packet.HEADER_ETHERNET
37 | 
38 |             trace.addPacket( Packet(direction, delta, length ) )
39 |             
40 |     return trace
41 | 
42 | def __constructAbsolutePath( month, day, hour, webpageId ):
43 |     if not os.path.exists(config.PCAP_ROOT):
44 |         raise Exception('Directory ('+config.PCAP_ROOT+') does not exist')
45 |     
46 |     monthStr = '%02d' % month
47 |     dayStr   = '%02d' % day
48 |     hourStr  = '%02d' % hour
49 |     path     = os.path.join(config.PCAP_ROOT, '2006-'+monthStr
50 |                                                  +'-'+dayStr
51 |                                                  +'T'+hourStr
52 |                                                  +'*/*'
53 |                                                  +'-'+str(webpageId))
54 | 
55 |     pathList    =  glob.glob(path)
56 | 
57 |     absFilePath = None
58 |     if len(pathList)>0:
59 |         absFilePath = pathList[0]
60 | 
61 |     return absFilePath


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Traffic Analysis Framework
 2 | ==========================
 3 | 
 4 | This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail" [1].
 5 | 
 6 | Contact details
 7 | ---------------
 8 | 
 9 | * website: https://kpdyer.com
10 | 
11 | Notes
12 | -----
13 | 
14 | * Installed and tested on RHEL5
15 | * Caching (via memcache) is disabled by default. Toggle ENABLE_CACHE in Datastore.py at your own risk.
16 | 
17 | Requirements
18 | ------------
19 | 
20 | * Required RHEL5 packages: ```mysql mysql-server memcached python-memcached MySQL-python python-devel gcc python-dpkt atlas atlas-devel lapack lapack-devel blas blas-devel glpk-devel g2clib-devel compat-libf2c-34 compat-gcc-34-g77```
21 | * For Traffic Morphing install cvxopt-0.9 from source
22 | * Python 2.4 or later (Installed on RHEL5 by default)
23 | * WEKA (http://www.cs.waikato.ac.nz/ml/weka/)
24 | * Liberatore and Levine [2] WebIdent 2 Traces (http://traces.cs.umass.edu/index.php/Network/Network)
25 | * Herrmann et al. [3] MySQL Dataset (http://epub.uni-regensburg.de/11919)
26 | 
27 | ### Getting started
28 | 
29 | 1. Open config.py and set:
30 |    * WEKA_ROOT to a directory that contains WEKA
31 |    * PCAP_LOGS to the directory with extracted Liberatore and Levine pcap files
32 |    * MYSQL_HOST/MYSQL_USER/MYSQL_PASSWD/MYSQL_DB settings for the Herrmann database
33 | 2. Execute "python main.py -h" to get help for runtime parameters
34 | 3. Output from main.py is placed the 'output' directory.
35 |    Execute 'parseResultsFile.py' to interpret results.
36 | 
37 | References
38 | ----------
39 | * [1] Dyer K.P., Coull S.E., Ristenpart T., Shrimpton T. Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail, To appear at IEEE Security and Privacy 2012
40 | * [2] Marc Liberatore and Brian Neil Levine, Inferring the Source of Encrypted HTTP Connections. Proceedings of the 13th ACM Conference on Computer and Communications Security (CCS 2006)
41 | * [3] Dominik Herrmann, Rolf Wendolsky, and Hannes Federrath. Website Fingerprinting: Attacking Popular Privacy Enhancing Technologies with the Multinomial Naive-Bayes Classiﬁer. In Proceedings of the ACM Workshop on Cloud Computing Security, pages 31–42, November 2009.
42 | 


--------------------------------------------------------------------------------
/arffWriter.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import config
 6 | import os
 7 | import itertools
 8 | 
 9 | def writeArffFiles( runID, trainingSet, testingSet ):
10 |     trainingFilename           = 'datafile-'+runID+'-train'
11 |     testingFilename            = 'datafile-'+runID+'-test'
12 | 
13 |     classes = []
14 |     for instance in trainingSet:
15 |         if instance['class'] not in classes:
16 |             classes.append(instance['class'])
17 |     for instance in testingSet:
18 |         if instance['class'] not in classes:
19 |             classes.append(instance['class'])
20 | 
21 |     attributes = []
22 |     for instance in trainingSet:
23 |         for key in instance:
24 |             if key not in attributes:
25 |                 attributes.append( key )
26 |     for instance in testingSet:
27 |         for key in instance:
28 |             if key not in attributes:
29 |                 attributes.append( key )
30 | 
31 |     trainingFile = __writeArffFile( trainingSet, trainingFilename, classes, attributes )
32 |     testingFile = __writeArffFile( testingSet, testingFilename, classes, attributes )
33 | 
34 |     return [trainingFile, testingFile]
35 | 
36 | 
37 | def __writeArffFile( inputArray, outputFile, classes, attributes ):
38 |     arffFile = []
39 |     arffFile.append('@RELATION sites')
40 |     for attribute in attributes:
41 |         if attribute!='class':
42 |             arffFile.append('@ATTRIBUTE '+str(attribute)+' real')
43 |     arffFile.append('@ATTRIBUTE class {'+','.join(classes)+'}')
44 |     arffFile.append('@DATA')
45 | 
46 |     for instance in inputArray:
47 |         tmpBuf = []
48 |         for attribute in attributes:
49 |             if attribute!='class':
50 |                 val = '0'
51 |                 if instance.get(attribute) not in [None,0]:
52 |                     val = str(instance[attribute])
53 |                 tmpBuf.append(val)
54 |         tmpBuf.append(instance['class'])
55 | 
56 |         arffFile.append( ','.join(itertools.imap(str, tmpBuf)) )
57 |     
58 |     outputFile = os.path.join(config.CACHE_DIR, outputFile+'.arff')
59 |     f = open( outputFile, 'w' )
60 |     f.write( "\n".join( arffFile ) )
61 |     f.close()
62 | 
63 |     return outputFile
64 | 


--------------------------------------------------------------------------------
/classifiers/VNGPlusPlusClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import wekaAPI
 6 | import arffWriter
 7 | 
 8 | from statlib import stats
 9 | 
10 | from Trace import Trace
11 | from Packet import Packet
12 | import math
13 | 
14 | class VNGPlusPlusClassifier:
15 |     @staticmethod
16 |     def roundArbitrary(x, base):
17 |         return int(base * round(float(x)/base))
18 | 
19 |     @staticmethod
20 |     def traceToInstance( trace ):
21 |         instance = {}
22 | 
23 |         # Size/Number Markers
24 |         directionCursor = None
25 |         dataCursor      = 0
26 |         for packet in trace.getPackets():
27 |             if directionCursor == None:
28 |                 directionCursor = packet.getDirection()
29 | 
30 |             if packet.getDirection()!=directionCursor:
31 |                 dataKey = 'S'+str(directionCursor)+'-'+str( VNGPlusPlusClassifier.roundArbitrary(dataCursor, 600) )
32 |                 if not instance.get( dataKey ):
33 |                     instance[dataKey] = 0
34 |                 instance[dataKey] += 1
35 | 
36 |                 directionCursor = packet.getDirection()
37 |                 dataCursor      = 0
38 | 
39 |             dataCursor += packet.getLength()
40 | 
41 |         if dataCursor>0:
42 |             key = 'S'+str(directionCursor)+'-'+str( VNGPlusPlusClassifier.roundArbitrary(dataCursor, 600) )
43 |             if not instance.get( key ):
44 |                 instance[key] = 0
45 |             instance[key] += 1
46 | 
47 |         instance['bandwidthUp'] = trace.getBandwidth( Packet.UP )
48 |         instance['bandwidthDown'] = trace.getBandwidth( Packet.DOWN )
49 | 
50 |         maxTime = 0
51 |         for packet in trace.getPackets():
52 |              if packet.getTime() > maxTime:
53 |                  maxTime = packet.getTime()
54 |         instance['time'] = maxTime
55 | 
56 |         instance['class'] = 'webpage'+str(trace.getId())
57 |         return instance
58 |     
59 |     @staticmethod
60 |     def classify( runID, trainingSet, testingSet ):
61 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
62 |         return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
63 | 


--------------------------------------------------------------------------------
/parseResultsFile.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import os.path
 6 | import sys
 7 | import config
 8 | import glob
 9 | import math
10 | 
11 | if config.PYTHONPATH: sys.path.append(config.PYTHONPATH)
12 | 
13 | from statlib import stats
14 | 
15 | def parseResultsFile(filename):
16 |     resultsFile = open(filename, 'r')
17 | 
18 |     contents    = resultsFile.read()
19 | 
20 |     columns = contents.split("\n")[0].split(',')
21 |     data    = {}
22 |     for value in columns:
23 |         data[value] = []
24 | 
25 |     lines = contents.split("\n")
26 |     if len(lines) <= 1:
27 |         return None
28 | 
29 |     for i in range(1,len(lines)):
30 |         items = lines[i].split(', ')
31 |         for i in range(len(items)):
32 |             data[columns[i]].append(items[i])
33 | 
34 |     trials = str(len(data[data.keys()[0]]))
35 |     for key in data:
36 |         if key=='accuracy':
37 |             meanAccuracy = 0
38 |             for i in range(len(data[key])):
39 |                 data[key][i] = float(data[key][i])
40 |                 meanAccuracy += data[key][i]
41 |             meanAccuracy /= len(data[key])
42 |             meanAccuracy = '%.1f' % round(meanAccuracy, 1)
43 | 
44 |         elif key=='overhead':
45 |             numeratorSum   = 0
46 |             denominatorSum = 0
47 |             for i in range(len(data[key])):
48 |                 value = data[key][i].split('/')
49 |                 numeratorSum   += int(value[0])
50 |                 denominatorSum += int(value[1])
51 | 
52 |             overhead = str( round(( numeratorSum * 100.0 / denominatorSum ) - 100,1) )
53 |         elif key=='timeElapsedTotal':
54 |             for i in range(len(data[key])):
55 |                 data[key][i] = float( data[key][i] )
56 | 
57 |             timeElapsed = str(round(stats.mean(data[key]),1))
58 | 
59 |     return [meanAccuracy,overhead,timeElapsed,trials]
60 | 
61 | print 'filename [avgAccuracy, avgOverhead, avgTimeElapsed, numTrials]'
62 | if len(sys.argv)>1 and os.path.exists(sys.argv[1]):
63 |     print sys.argv[1], parseResultsFile(sys.argv[1])
64 | else:
65 |     filelist = glob.glob('output/*.output')
66 |     filelist.sort()
67 |     for filename in filelist:
68 |         if parseResultsFile(filename):
69 |             print filename, parseResultsFile(filename)
70 | 


--------------------------------------------------------------------------------
/classifiers/JaccardClassifier.py:
--------------------------------------------------------------------------------
 1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
 2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
 3 | # See LICENSE for more details.
 4 | 
 5 | import config
 6 | 
 7 | class JaccardClassifier:
 8 |     @staticmethod
 9 |     def traceToInstance( trace ):
10 |         instance = {}
11 |         for p in trace.getPackets():
12 |             instance[p.getLength()] = 1
13 |         
14 |         instance['class'] = 'webpage'+str(trace.getId())
15 |         return instance
16 |     
17 |     @staticmethod
18 |     def classify( runID, trainingSet, testingSet ):
19 |         bagOfLengths = {}
20 |         for instance in trainingSet:
21 |             if not bagOfLengths.get(instance['class']):
22 |                 bagOfLengths[instance['class']] = {}
23 |             for attribute in instance:
24 |                 if attribute!='class':
25 |                     if not bagOfLengths[instance['class']].get(attribute):
26 |                         bagOfLengths[instance['class']][attribute] = 0
27 |                     bagOfLengths[instance['class']][attribute] += 1
28 |         
29 |         for className in bagOfLengths:
30 |             for length in bagOfLengths[className].keys():
31 |                 if bagOfLengths[className][length] < (config.NUM_TRAINING_TRACES/2.0):
32 |                     del bagOfLengths[className][length]
33 |         
34 |         correctlyClassified = 0
35 |         debugInfo           = []
36 |         for instance in testingSet:
37 |             guess = JaccardClassifier.doClassify(bagOfLengths, instance)
38 |             if guess == instance['class']:
39 |                 correctlyClassified += 1
40 |             debugInfo.append([instance['class'], guess])
41 |         
42 |         accuracy = 100.0 * correctlyClassified / len(testingSet)
43 |         
44 |         return [accuracy, debugInfo]
45 |     
46 |     @staticmethod
47 |     def doClassify(bagOfLengths,  instance):
48 |         guess = None
49 |         bestSimilarity = 0
50 |         for className in bagOfLengths:
51 |             intersection = 0
52 |             for attribute in instance:
53 |                 if attribute!='class' and attribute in bagOfLengths[className]:
54 |                     intersection += 1
55 |             union = (len(instance) - 1) + len(bagOfLengths[className])
56 |             if union == 0:
57 |                 similarity = 0
58 |             else:
59 |                 similarity = 1.0 * intersection / union
60 |             if guess == None or similarity > bestSimilarity:
61 |                 bestSimilarity = similarity
62 |                 guess = className
63 |         
64 |         return guess
65 | 


--------------------------------------------------------------------------------
/countermeasures/DirectTargetSampling.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import config
  6 | import random
  7 | from Webpage import Webpage
  8 | from Trace import Trace
  9 | from Packet import Packet
 10 | 
 11 | class DirectTargetSampling:
 12 |     L1_THRESHHOLD = 0.3
 13 |     @staticmethod
 14 |     def buildMetadata( srcWebpage, targetWebpage ):
 15 |         targetDistributionBi     = targetWebpage.getHistogram( None, True )
 16 |         targetDistributionUp     = targetWebpage.getHistogram( Packet.UP, True )
 17 |         targetDistributionDown   = targetWebpage.getHistogram( Packet.DOWN, True )
 18 |         
 19 |         return [targetDistributionBi, targetDistributionUp, targetDistributionDown]
 20 | 
 21 |     @staticmethod
 22 |     def applyCountermeasure( trace,  metadata ):
 23 |         [targetDistributionBi,
 24 |          targetDistributionUp,
 25 |          targetDistributionDown] = metadata
 26 | 
 27 |         newTrace = Trace(trace.getId())
 28 | 
 29 |         # primary sampling
 30 |         timeCursor = 0
 31 |         for packet in trace.getPackets():
 32 |             timeCursor = packet.getTime()
 33 |             targetDistribution = targetDistributionDown
 34 |             if packet.getDirection()==Packet.UP:
 35 |                 targetDistribution = targetDistributionUp
 36 | 
 37 |             packets = DirectTargetSampling.morphPacket( packet, targetDistribution )
 38 |             for newPacket in packets:
 39 |                 newTrace.addPacket( newPacket )
 40 | 
 41 |         # secondary sampling
 42 |         while True:
 43 |             l1Distance = newTrace.calcL1Distance( targetDistributionBi )
 44 |             if l1Distance <= DirectTargetSampling.L1_THRESHHOLD:
 45 |                 break
 46 | 
 47 |             timeCursor += 10
 48 |             newDirection, newLen = newTrace.getMostSkewedDimension( targetDistributionBi )
 49 |             packet = Packet( newDirection, timeCursor, newLen )
 50 |             newTrace.addPacket( packet )
 51 | 
 52 |         return newTrace
 53 | 
 54 |     @staticmethod
 55 |     def morphPacket( packet, targetDistribution ):
 56 |         packetPenalty = config.PACKET_PENALTY
 57 | 
 58 |         packetList = []
 59 |         newPacket = DirectTargetSampling.generatePacket( targetDistribution, packet )
 60 |         packetList.append( newPacket )
 61 | 
 62 |         dataSent  = newPacket.getLength() - packetPenalty
 63 |         dataSent  = max( dataSent, 0 ) # Can have 'negative' dataSent if newPacket is ACK
 64 |                                        # and packet is not ACK
 65 |         residual  = (packet.getLength() - packetPenalty) - dataSent
 66 | 
 67 |         # Now sample from secondary
 68 |         while residual > 0:
 69 |             newPacket = DirectTargetSampling.generatePacket( targetDistribution, packet )
 70 |             packetList.append( newPacket )
 71 | 
 72 |             dataSent  = (newPacket.getLength() - packetPenalty)
 73 |             dataSent  = max( dataSent, 0 )
 74 |             residual -= dataSent
 75 | 
 76 |         return packetList
 77 | 
 78 |     @staticmethod
 79 |     def generatePacket( targetDistribution, packet ):
 80 |         sample       = DirectTargetSampling.sampleFromDistribution( targetDistribution )
 81 |         if sample == None:
 82 |             newLen       = 1500
 83 |         else:
 84 |             bits         = sample.split('-')
 85 |             newLen       = int(bits[1])
 86 |         packet       = Packet( packet.getDirection(), packet.getTime(), newLen )
 87 | 
 88 |         return packet
 89 | 
 90 |     @staticmethod
 91 |     def sampleFromDistribution( distribution ):
 92 |         total = 0
 93 |         for key in distribution:
 94 |             total += distribution[key]
 95 |         n = random.uniform(0,total)
 96 | 
 97 |         key = None
 98 |         for key in distribution:
 99 |             if n < distribution[key]:
100 |                 return key
101 |             n -= distribution[key]
102 | 
103 |         return key
104 | 


--------------------------------------------------------------------------------
/countermeasures/Folklore.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import config
  6 | import random
  7 | from Webpage import Webpage
  8 | from Trace import Trace
  9 | from Packet import Packet
 10 | import math
 11 | 
 12 | class Folklore:
 13 |     FIXED_PACKET_LEN    = 1000
 14 |     TIMER_CLOCK_SPEED   = 20
 15 |     MILLISECONDS_TO_RUN = 0
 16 | 
 17 |     class Buffer:
 18 |         def __init__(self):
 19 |             self.__array = []
 20 | 
 21 |         def queue(self):
 22 |             return self.__array
 23 | 
 24 |         def add(self,p):
 25 |             self.__array.append(p)
 26 | 
 27 |         def remove(self):
 28 |             if len(self.__array)==0:
 29 |                 return None
 30 |             else:
 31 |                 p = self.__array[0]
 32 |                 del self.__array[0]
 33 |                 return p
 34 | 
 35 |         def hasPackets(self):
 36 |             return (len(self.__array)>0)
 37 | 
 38 |     @staticmethod
 39 |     def packFromBuffer(outgoingCellCapacity, buffer):
 40 |         while outgoingCellCapacity>0:
 41 |             p = buffer.remove()
 42 |             if p and (p.getLength()-Packet.HEADER_LENGTH) > outgoingCellCapacity:
 43 |                 newP = Packet( p.getDirection(), p.getTime(), (p.getLength()-Packet.HEADER_LENGTH)-outgoingCellCapacity )
 44 |                 buffer.add(newP)
 45 |                 break
 46 |             elif p and p.getLength() <= outgoingCellCapacity:
 47 |                 outgoingCellCapacity -= (p.getLength()-Packet.HEADER_LENGTH)
 48 |             else:
 49 |                 break
 50 | 
 51 |     @staticmethod
 52 |     def applyCountermeasure( trace ):
 53 |         return Folklore.doCountermeasure(trace)[0]
 54 | 
 55 |     @staticmethod
 56 |     def maxLatency( trace ):
 57 |         latencyList = Folklore.doCountermeasure(trace)[1]
 58 |         maxLatency = 0
 59 |         if len(latencyList)>0:
 60 |             maxLatency = max(latencyList)
 61 |         return maxLatency
 62 | 
 63 |     @staticmethod
 64 |     def doCountermeasure( trace ):
 65 |         # Median trace length in the herrmann dataset is 3500ms
 66 |         # Median throughput is 62000 bytes/second
 67 |         # 40*1500 = 60000 bytes/second
 68 | 
 69 |         newTrace = Trace(trace.getId())
 70 | 
 71 |         latency      = []
 72 |         timer        = 0
 73 |         bufferUP     = Folklore.Buffer()
 74 |         bufferDOWN   = Folklore.Buffer()
 75 |         packetCursor = 0
 76 | 
 77 |         # Terminate only if (1) our clock is up, (2) we have no more packets from the source
 78 |         # and (3) our buffers are empty
 79 |         while timer <= Folklore.MILLISECONDS_TO_RUN \
 80 |            or packetCursor < trace.getPacketCount() \
 81 |            or bufferUP.hasPackets() \
 82 |            or bufferDOWN.hasPackets():
 83 | 
 84 |             # calculate max latency
 85 |             if bufferUP.hasPackets():
 86 |                 earliestPacket = bufferUP.queue()[0]
 87 |                 latency.append( timer - earliestPacket.getTime() )
 88 |             if bufferDOWN.hasPackets():
 89 |                 earliestPacket = bufferDOWN.queue()[0]
 90 |                 latency.append( timer - earliestPacket.getTime() )
 91 | 
 92 |             # add to buffer: all packets that appeared since last clock
 93 |             while packetCursor < trace.getPacketCount()\
 94 |               and trace.getPackets()[packetCursor].getTime()<=timer:
 95 |                 packet = trace.getPackets()[packetCursor]
 96 | 
 97 |                 if packet.getDirection() == Packet.UP:
 98 |                     bufferUP.add( packet )
 99 |                 elif packet.getDirection() == Packet.DOWN:
100 |                     bufferDOWN.add( packet )
101 | 
102 |                 # increment position in source buffer
103 |                 packetCursor += 1
104 | 
105 |             # check buffer UP: purge at most Packet.MTU bytes
106 |             Folklore.packFromBuffer(Folklore.FIXED_PACKET_LEN-Packet.HEADER_LENGTH, bufferUP)
107 | 
108 |             # check buffer DOWN: purge at most Packet.MTU bytes
109 |             Folklore.packFromBuffer(Folklore.FIXED_PACKET_LEN-Packet.HEADER_LENGTH, bufferDOWN)
110 | 
111 |             # send a byte in both directions
112 |             newTrace.addPacket( Packet(Packet.DOWN, timer, Folklore.FIXED_PACKET_LEN ) )
113 |             newTrace.addPacket( Packet(Packet.UP  , timer, Folklore.FIXED_PACKET_LEN ) )
114 | 
115 |             # go to the next clock cycle
116 |             timer += Folklore.TIMER_CLOCK_SPEED
117 |             
118 |         return [newTrace,latency]
119 | 


--------------------------------------------------------------------------------
/Datastore.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import MySQLdb
  6 | import math
  7 | import config
  8 | import pcapparser
  9 | 
 10 | from Webpage import Webpage
 11 | from Trace import Trace
 12 | from Packet import Packet
 13 | 
 14 | import memcache
 15 | mc = memcache.Client(['127.0.0.1:11211'], debug=0)
 16 | ENABLE_CACHE = False
 17 | 
 18 | import cPickle
 19 | 
 20 | class Datastore:
 21 |     @staticmethod
 22 |     def getWebpagesLL( webpageIds, traceIndexStart, traceIndexEnd ):
 23 |         webpages = []
 24 |         for webpageId in webpageIds:
 25 |             webpage = Webpage(webpageId)
 26 |             for traceIndex in range(traceIndexStart, traceIndexEnd):
 27 |                 trace = Datastore.getTraceLL( webpageId, traceIndex )
 28 |                 webpage.addTrace(trace)
 29 |             webpages.append(webpage)
 30 | 
 31 |         return webpages
 32 | 
 33 |     @staticmethod
 34 |     def getTraceLL( webpageId, traceIndex ):
 35 |         key = '.'.join(['Webpage',
 36 |                         'LL',
 37 |                         str(webpageId),
 38 |                         str(traceIndex)])
 39 | 
 40 |         trace = mc.get(key)
 41 |         if ENABLE_CACHE and trace:
 42 |             trace = cPickle.loads(trace)
 43 |         else:
 44 |             dateTime = config.DATA_SET[traceIndex]
 45 |             trace = pcapparser.readfile(dateTime['month'],
 46 |                                         dateTime['day'],
 47 |                                         dateTime['hour'],
 48 |                                         webpageId)
 49 | 
 50 |             mc.set(key,cPickle.dumps(trace,protocol=cPickle.HIGHEST_PROTOCOL))
 51 | 
 52 |         return trace
 53 | 
 54 |     @staticmethod
 55 |     def getWebpagesHerrmann( webpageIds, traceIndexStart, traceIndexEnd ):
 56 |         webpages = []
 57 |         for webpageId in webpageIds:
 58 |             webpage = Webpage(webpageId)
 59 |             for traceIndex in range(traceIndexStart, traceIndexEnd):
 60 |                 trace = Datastore.getTraceHerrmann( webpageId, traceIndex )
 61 |                 webpage.addTrace(trace)
 62 |             webpages.append(webpage)
 63 | 
 64 |         return webpages
 65 | 
 66 |     @staticmethod
 67 |     def getTraceHerrmann( webpageId, traceIndex ):
 68 |         if config.DATA_SOURCE == 1:
 69 |             datasourceId = 4
 70 |         elif config.DATA_SOURCE == 2:
 71 |             datasourceId = 5
 72 | 
 73 |         key = '.'.join(['Webpage',
 74 |                         'H',
 75 |                         str(datasourceId),
 76 |                         str(webpageId),
 77 |                         str(traceIndex)])
 78 | 
 79 |         trace = mc.get(key)
 80 |         if ENABLE_CACHE and trace:
 81 |             trace = cPickle.loads(trace)
 82 |         else:
 83 |             connection = MySQLdb.connect(host=config.MYSQL_HOST,
 84 |                                          user=config.MYSQL_USER,
 85 |                                          passwd=config.MYSQL_PASSWD,
 86 |                                          db=config.MYSQL_DB )
 87 | 
 88 |             cursor = connection.cursor()
 89 |             command = """SELECT packets.trace_id,
 90 |                                       packets.size,
 91 |                                       ROUND(packets.abstime*1000)
 92 |                                  FROM (SELECT id
 93 |                                          FROM traces
 94 |                                         WHERE site_id = (SELECT id
 95 |                                                            FROM sites
 96 |                                                           WHERE dataset_id = """+str(datasourceId)+"""
 97 |                                                           ORDER BY id
 98 |                                                           LIMIT """+str(webpageId)+""",1)
 99 |                                         ORDER BY id
100 |                                         LIMIT """+str(traceIndex)+""",1) traces,
101 |                                       packets
102 |                                 WHERE traces.id = packets.trace_id
103 |                                 ORDER BY packets.trace_id, packets.abstime"""
104 |             cursor.execute( command )
105 | 
106 |             data = cursor.fetchall()
107 |             trace = Trace(webpageId)
108 |             for item in data:
109 |                 direction = Packet.UP
110 |                 if int(item[1])>0:
111 |                     direction = Packet.DOWN
112 |                 time   = item[2]
113 |                 length = int(math.fabs(item[1]))
114 | 
115 |                 trace.addPacket( Packet( direction, time, length ) )
116 |             connection.close()
117 | 
118 |             mc.set(key,cPickle.dumps(trace,protocol=cPickle.HIGHEST_PROTOCOL))
119 | 
120 |         return trace
121 | 


--------------------------------------------------------------------------------
/Trace.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import math
  6 | import config
  7 | from Packet import Packet
  8 | 
  9 | class Trace:
 10 |     def __init__(self,id):
 11 |         self.__packetArray   = []
 12 |         self.__id            = id
 13 |         self.__histogramUp   = {}
 14 |         self.__histogramDown = {}
 15 |         self.__packetsUp     = 0
 16 |         self.__packetsDown   = 0
 17 |         self.__filePath      = None
 18 |         self.__year          = 0
 19 |         self.__month         = 0
 20 |         self.__day           = 0
 21 |         self.__hour          = 0
 22 | 
 23 |     def getId(self): return self.__id
 24 | 
 25 |     def getPacketCount( self, direction = None ):
 26 |         return len(self.getPackets(direction))
 27 | 
 28 |     def getPackets( self, direction = None ):
 29 |         retArray = []
 30 |         for packet in self.__packetArray:
 31 |             if direction == None or packet.getDirection() == direction:
 32 |                 retArray.append( packet )
 33 |         return retArray
 34 | 
 35 |     def addPacket( self, packet ):
 36 |         # Completely ignore ACK packets
 37 |         if config.IGNORE_ACK and packet.getLength() == Packet.HEADER_LENGTH:
 38 |             return self.__packetArray
 39 | 
 40 |         key = str(packet.getDirection())+'-'+str(packet.getLength())
 41 | 
 42 |         if packet.getDirection()==Packet.UP:
 43 |             self.__packetsUp += 1
 44 |             if not self.__histogramUp.get( key ):
 45 |                 self.__histogramUp[key] = 0
 46 |             self.__histogramUp[key] += 1
 47 |         elif packet.getDirection()==Packet.DOWN:
 48 |             self.__packetsDown += 1
 49 |             if not self.__histogramDown.get( key ):
 50 |                 self.__histogramDown[key] = 0
 51 |             self.__histogramDown[key] += 1
 52 | 
 53 |         return self.__packetArray.append( packet )
 54 | 
 55 |     def getBandwidth( self, direction = None ):
 56 |         totalBandwidth = 0
 57 |         for packet in self.getPackets():
 58 |             if (direction == None or direction == packet.getDirection()) and packet.getLength() != Packet.HEADER_LENGTH:
 59 |                 totalBandwidth += packet.getLength()
 60 | 
 61 |         return totalBandwidth
 62 | 
 63 |     def getTime( self, direction = None ):
 64 |         timeCursor = 0
 65 |         for packet in self.getPackets():
 66 |             if direction == None or direction == packet.getDirection():
 67 |                 timeCursor = packet.getTime()
 68 | 
 69 |         return timeCursor
 70 | 
 71 |     def getHistogram( self, direction = None, normalize = False ):
 72 |         if direction == Packet.UP:
 73 |             histogram = dict(self.__histogramUp)
 74 |             totalPackets = self.__packetsUp
 75 |         elif direction == Packet.DOWN:
 76 |             histogram = dict(self.__histogramDown)
 77 |             totalPackets = self.__packetsDown
 78 |         else:
 79 |             histogram = dict(self.__histogramUp)
 80 |             for key in self.__histogramDown:
 81 |                 histogram[key] = self.__histogramDown[key]
 82 |             totalPackets = self.__packetsDown + self.__packetsUp
 83 | 
 84 |         if normalize==True:
 85 |             for key in histogram:
 86 |                 histogram[key] = (histogram[key] * 1.0) / totalPackets
 87 | 
 88 |         return histogram
 89 | 
 90 |     def calcL1Distance( self, targetDistribution, filterDirection=None ):
 91 |         localDistribution  = self.getHistogram( filterDirection, True )
 92 | 
 93 |         keys = localDistribution.keys()
 94 |         for key in targetDistribution:
 95 |             if key not in keys:
 96 |                 keys.append( key )
 97 | 
 98 |         distance = 0
 99 |         for key in keys:
100 |             l = localDistribution.get(key)
101 |             r = targetDistribution.get(key)
102 | 
103 |             if l == None and r == None: continue
104 |             if l == None: l = 0
105 |             if r == None: r = 0
106 | 
107 |             distance += math.fabs( l - r )
108 | 
109 |         return distance
110 | 
111 |     def getMostSkewedDimension( self, targetDistribution ):
112 |         localDistribution  = self.getHistogram( None, True )
113 | 
114 |         keys = targetDistribution.keys()
115 | 
116 |         worstKey = None
117 |         worstKeyDistance = 0
118 | 
119 |         for key in keys:
120 |             l = localDistribution.get(key)
121 |             r = targetDistribution.get(key)
122 | 
123 |             if l == None: l = 0
124 |             if r == None: r = 0
125 | 
126 |             if worstKey==None or (r - l) > worstKeyDistance:
127 |                 worstKeyDistance = r - l
128 |                 worstKey = key
129 | 
130 |         bits = worstKey.split('-')
131 | 
132 |         return [int(bits[0]),int(bits[1])]
133 | 


--------------------------------------------------------------------------------
/classifiers/ESORICSClassifier.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import config
  6 | from Packet import Packet
  7 | import Levenshtein
  8 | 
  9 | class ESORICSClassifier:
 10 |     @staticmethod
 11 |     def traceToInstance( trace ):
 12 |         return trace
 13 |     
 14 |     @staticmethod
 15 |     def classify( runID, trainingSet, testingSet ):
 16 |         candidateSequences = {}
 17 |         for trace in trainingSet:
 18 |             for d in [Packet.UP,  Packet.DOWN]:
 19 |                 if not candidateSequences.get('Webpage'+str(trace.getId())):
 20 |                     candidateSequences['Webpage'+str(trace.getId())] = {}
 21 |                     candidateSequences['Webpage'+str(trace.getId())][Packet.UP] = []
 22 |                     candidateSequences['Webpage'+str(trace.getId())][Packet.DOWN] = []
 23 |                     
 24 |                 candidateSequences['Webpage'+str(trace.getId())][d].append([])
 25 |                 for p in trace.getPackets():
 26 |                     if p.getDirection()==d:
 27 |                         if d == Packet.UP and p.getLength() > 300:
 28 |                             candidateSequences['Webpage'+str(trace.getId())][d][-1].append(p.getLength())
 29 |                         elif d == Packet.DOWN and p.getLength() > 300 and p.getLength() < 1450:
 30 |                             candidateSequences['Webpage'+str(trace.getId())][d][-1].append(p.getLength())
 31 |         
 32 |         correctlyClassified = 0
 33 |         debugInfo           = []
 34 |         for instance in testingSet:
 35 |             actual = 'Webpage'+str(instance.getId())
 36 |             guess = ESORICSClassifier.doClassify(candidateSequences, instance)
 37 |             if guess == actual:
 38 |                 correctlyClassified += 1
 39 |             debugInfo.append([actual, guess])
 40 |         
 41 |         accuracy = 100.0 * correctlyClassified / len(testingSet)
 42 |         
 43 |         return [accuracy, debugInfo]
 44 |     
 45 |     @staticmethod
 46 |     def doClassify(candidateSequences,  instance):
 47 |         guess = None
 48 |         
 49 |         targetSequenceUp   = []
 50 |         targetSequenceDown = []
 51 |         for p in instance.getPackets():
 52 |             if p.getDirection()==Packet.UP and p.getLength() > 300:
 53 |                 targetSequenceUp.append(p.getLength())
 54 |             elif p.getDirection()==Packet.DOWN and p.getLength() > 300 and p.getLength() < 1450:
 55 |                 targetSequenceDown.append(p.getLength())
 56 |         
 57 |         similarity = {}        
 58 |         for className in candidateSequences:
 59 |             if not similarity.get(className):
 60 |                 similarity[className] = 0
 61 |             for direction in [Packet.UP,  Packet.DOWN]:
 62 |                 for i in range(len(candidateSequences[className][direction])):
 63 |                     if direction == Packet.UP:
 64 |                         distance = ESORICSClassifier.levenshtein(targetSequenceUp, candidateSequences[className][direction][i])
 65 |                         maxLen = max(len(targetSequenceUp), len(candidateSequences[className][direction][i]))
 66 |                         if len(targetSequenceUp) == 0 or len(candidateSequences[className][direction][i]) == 0:
 67 |                             distance = 1.0
 68 |                         else:
 69 |                             distance /= 1.0 * maxLen
 70 |                             
 71 |                         similarity[className] +=  0.6 * distance
 72 |                     elif direction == Packet.DOWN:
 73 |                         distance = ESORICSClassifier.levenshtein(targetSequenceDown, candidateSequences[className][direction][i])
 74 |                         maxLen = max(len(targetSequenceDown), len(candidateSequences[className][direction][i]))
 75 |                         if len(targetSequenceDown) == 0 or len(candidateSequences[className][direction][i]) == 0:
 76 |                             distance = 1.0
 77 |                         else:
 78 |                             distance /= 1.0 * maxLen
 79 |                         similarity[className] +=  0.4 * distance
 80 |                 
 81 |         bestSimilarity = config.NUM_TRAINING_TRACES
 82 |         for className in similarity:
 83 |             if guess == None or similarity[className] <= bestSimilarity:
 84 |                 bestSimilarity = similarity[className]
 85 |                 guess = className
 86 |         
 87 |         return guess
 88 | 
 89 |     @staticmethod
 90 |     # from http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
 91 |     def levenshtein(s1, s2):
 92 |         s1 = ESORICSClassifier.encode(s1)
 93 |         s2 = ESORICSClassifier.encode(s2)
 94 |         return Levenshtein.distance(unicode(s1), unicode(s2))
 95 |         
 96 |     @staticmethod
 97 |     def encode(list):
 98 |         strList = []
 99 |         for val in list:
100 |             #appVal = config.PACKET_RANGE2.index(val)
101 |             appVal = unichr(val)
102 |             strList.append(appVal)
103 |             
104 |         return ''.join(strList)
105 | 


--------------------------------------------------------------------------------
/classifiers/PanchenkoClassifier.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import wekaAPI
  6 | import arffWriter
  7 | 
  8 | from statlib import stats
  9 | 
 10 | from Trace import Trace
 11 | from Packet import Packet
 12 | import math
 13 | 
 14 | class PanchenkoClassifier:
 15 |     @staticmethod
 16 |     def roundArbitrary(x, base):
 17 |         return int(base * round(float(x)/base))
 18 | 
 19 |     @staticmethod
 20 |     def roundNumberMarker(n):
 21 |         if n==4 or n==5: return 3
 22 |         elif n==7 or n==8: return 6
 23 |         elif n==10 or n==11 or n==12 or n==13: return 9
 24 |         else: return n
 25 | 
 26 |     @staticmethod
 27 |     def traceToInstance( trace ):
 28 |         if trace.getPacketCount()==0:
 29 |             instance = {}
 30 |             instance['class'] = 'webpage'+str(trace.getId())
 31 |             return instance
 32 | 
 33 |         instance = trace.getHistogram()
 34 | 
 35 |         # Size/Number Markers
 36 |         directionCursor = None
 37 |         dataCursor      = 0
 38 |         numberCursor    = 0
 39 |         for packet in trace.getPackets():
 40 |             if directionCursor == None:
 41 |                 directionCursor = packet.getDirection()
 42 | 
 43 |             if packet.getDirection()!=directionCursor:
 44 |                 dataKey = 'S'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundArbitrary(dataCursor, 600) )
 45 |                 if not instance.get( dataKey ):
 46 |                     instance[dataKey] = 0
 47 |                 instance[dataKey] += 1
 48 | 
 49 |                 numberKey = 'N'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundNumberMarker(numberCursor) )
 50 |                 if not instance.get( numberKey ):
 51 |                     instance[numberKey] = 0
 52 |                 instance[numberKey] += 1
 53 | 
 54 |                 directionCursor = packet.getDirection()
 55 |                 dataCursor      = 0
 56 |                 numberCursor    = 0
 57 | 
 58 |             dataCursor += packet.getLength()
 59 |             numberCursor += 1
 60 | 
 61 |         if dataCursor>0:
 62 |             key = 'S'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundArbitrary(dataCursor, 600) )
 63 |             if not instance.get( key ):
 64 |                 instance[key] = 0
 65 |             instance[key] += 1
 66 | 
 67 |         if numberCursor>0:
 68 |             numberKey = 'N'+str(directionCursor)+'-'+str( PanchenkoClassifier.roundNumberMarker(numberCursor) )
 69 |             if not instance.get( numberKey ):
 70 |                 instance[numberKey] = 0
 71 |             instance[numberKey] += 1
 72 | 
 73 |         # HTML Markers
 74 |         counterUP = 0
 75 |         counterDOWN = 0
 76 |         htmlMarker = 0
 77 |         for packet in trace.getPackets():
 78 |             if packet.getDirection() == Packet.UP:
 79 |                 counterUP += 1
 80 |                 if counterUP>1 and counterDOWN>0: break
 81 |             elif packet.getDirection() == Packet.DOWN:
 82 |                 counterDOWN += 1
 83 |                 htmlMarker += packet.getLength()
 84 | 
 85 |         htmlMarker = PanchenkoClassifier.roundArbitrary( htmlMarker, 600 )
 86 |         instance['H'+str(htmlMarker)] = 1
 87 | 
 88 |         # Ocurring Packet Sizes
 89 |         packetsUp = []
 90 |         packetsDown = []
 91 |         for packet in trace.getPackets():
 92 |             if packet.getDirection()==Packet.UP and packet.getLength() not in packetsUp:
 93 |                 packetsUp.append( packet.getLength() )
 94 |             if packet.getDirection()==Packet.DOWN and packet.getLength() not in packetsDown:
 95 |                 packetsDown.append( packet.getLength() )
 96 |         instance['uniquePacketSizesUp'] = PanchenkoClassifier.roundArbitrary( len( packetsUp ), 2)
 97 |         instance['uniquePacketSizesDown'] = PanchenkoClassifier.roundArbitrary( len( packetsDown ), 2)
 98 | 
 99 |         # Percentage Incoming Packets
100 |         instance['percentageUp']   = PanchenkoClassifier.roundArbitrary( 100.0 * trace.getPacketCount( Packet.UP ) / trace.getPacketCount(), 5)
101 |         instance['percentageDown'] = PanchenkoClassifier.roundArbitrary( 100.0 * trace.getPacketCount( Packet.DOWN ) / trace.getPacketCount(), 5)
102 | 
103 |         # Number of Packets
104 |         instance['numberUp']   = PanchenkoClassifier.roundArbitrary( trace.getPacketCount( Packet.UP ), 15)
105 |         instance['numberDown'] = PanchenkoClassifier.roundArbitrary( trace.getPacketCount( Packet.DOWN ), 15)
106 | 
107 |         # Total Bytes Transmitted
108 |         bandwidthUp   = PanchenkoClassifier.roundArbitrary( trace.getBandwidth( Packet.UP ),   10000)
109 |         bandwidthDown = PanchenkoClassifier.roundArbitrary( trace.getBandwidth( Packet.DOWN ), 10000)
110 |         instance['0-B'+str(bandwidthUp)] = 1
111 |         instance['1-B'+str(bandwidthDown)] = 1
112 | 
113 |         # Label
114 |         instance['class'] = 'webpage'+str(trace.getId())
115 | 
116 |         return instance
117 |     
118 |     @staticmethod
119 |     def classify( runID, trainingSet, testingSet ):
120 |         [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
121 |         return wekaAPI.execute( trainingFile,
122 |                              testingFile,
123 |                              "weka.Run weka.classifiers.functions.LibSVM",
124 |                              ['-K','2', # RBF kernel
125 |                               '-G','0.0000019073486328125', # Gamma
126 |                               '-C','131072'] ) # Cost
127 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import os
  6 | import sys
  7 | from Packet import Packet
  8 | 
  9 | # Set the following to a directory that contains
 10 | # * weka-X-Y-Z (see WEKA_ROOT to change the weka version)
 11 | # * pcap-logs (a diretory that contains all of the LL pcap files)
 12 | # * [optional] (a directory that contains custom/local python modules)
 13 | BASE_DIR        = ''
 14 | 
 15 | # Enviromental settings
 16 | JVM_MEMORY_SIZE = '4192m'
 17 | 
 18 | WEKA_ROOT          = os.path.join(BASE_DIR   ,'weka-3-7-5')
 19 | WEKA_JAR           = os.path.join(WEKA_ROOT  ,'weka.jar')
 20 | PCAP_ROOT          = os.path.join(BASE_DIR   ,'pcap-logs')
 21 | PYTHON_ROOT        = os.path.join(BASE_DIR   ,'python2.4')
 22 | PYTHONPATH         = os.path.join(PYTHON_ROOT,'lib/python')
 23 | CACHE_DIR          = './cache'
 24 | COUNTERMEASURE_DIR = './countermeasures'
 25 | CLASSIFIERS_DIR    = './classifiers'
 26 | OUTPUT_DIR         = './output'
 27 | 
 28 | #Specify options for Herrmann MySQL database
 29 | MYSQL_HOST = 'localhost'
 30 | MYSQL_DB = 'fingerprints'
 31 | MYSQL_USER = 'fingerprints'
 32 | MYSQL_PASSWD = 'fingerprints'
 33 | 
 34 | sys.path.append(PYTHONPATH)
 35 | sys.path.append(COUNTERMEASURE_DIR)
 36 | sys.path.append(CLASSIFIERS_DIR)
 37 | 
 38 | COUNTERMEASURE      = 0
 39 | CLASSIFIER          = 0
 40 | BUCKET_SIZE         = 2
 41 | DATA_SOURCE         = 1
 42 | NUM_TRAINING_TRACES = 16
 43 | NUM_TESTING_TRACES  = 4
 44 | NUM_TRIALS          = 1
 45 | TOP_N               = 775
 46 | PACKET_PENALTY      = 68
 47 | IGNORE_ACK          = True
 48 | 
 49 | # Liberatore and Levine Training and Testing configuration
 50 | DATA_SET = [
 51 | #{'month':2,'day':10,'hour':13},
 52 | #{'month':2,'day':11,'hour':11},
 53 | #{'month':2,'day':13,'hour':8},
 54 | #{'month':2,'day':13,'hour':19},
 55 | #{'month':2,'day':14,'hour':9},
 56 | #{'month':2,'day':14,'hour':23},
 57 | #{'month':2,'day':15,'hour':8},
 58 | #{'month':2,'day':16,'hour':12},
 59 | #{'month':2,'day':20,'hour':10},
 60 | #{'month':2,'day':20,'hour':16},
 61 | #{'month':2,'day':20,'hour':22},
 62 | #{'month':2,'day':21,'hour':4},
 63 | #{'month':2,'day':21,'hour':10},
 64 | #{'month':2,'day':21,'hour':16},
 65 | #{'month':2,'day':21,'hour':22},
 66 | #{'month':2,'day':22,'hour':4},
 67 | #{'month':2,'day':22,'hour':10},
 68 | #{'month':2,'day':22,'hour':16},
 69 | #{'month':2,'day':22,'hour':22},
 70 | #{'month':2,'day':23,'hour':4},
 71 | #{'month':2,'day':23,'hour':10},
 72 | #{'month':2,'day':20,'hour':10},
 73 | #{'month':2,'day':20,'hour':16},
 74 | #{'month':2,'day':20,'hour':22},
 75 | #{'month':2,'day':21,'hour':4},
 76 | #{'month':2,'day':21,'hour':10},
 77 | #{'month':2,'day':21,'hour':16},
 78 | #{'month':2,'day':21,'hour':22},
 79 | #{'month':2,'day':22,'hour':4},
 80 | #{'month':2,'day':22,'hour':10},
 81 | #{'month':2,'day':22,'hour':16},
 82 | #{'month':2,'day':22,'hour':22},
 83 | #{'month':2,'day':23,'hour':4},
 84 | #{'month':2,'day':23,'hour':10},
 85 | {'month':3,'day':6,'hour':16},
 86 | {'month':3,'day':6,'hour':22},
 87 | {'month':3,'day':7,'hour':4},
 88 | {'month':3,'day':7,'hour':10},
 89 | {'month':3,'day':7,'hour':16},
 90 | {'month':3,'day':7,'hour':22},
 91 | {'month':3,'day':8,'hour':4},
 92 | {'month':3,'day':8,'hour':10},
 93 | {'month':3,'day':8,'hour':16},
 94 | {'month':3,'day':8,'hour':22},
 95 | {'month':3,'day':9,'hour':4},
 96 | {'month':3,'day':9,'hour':16},
 97 | {'month':3,'day':9,'hour':22},
 98 | {'month':3,'day':10,'hour':4},
 99 | {'month':3,'day':10,'hour':10},
100 | {'month':3,'day':10,'hour':16},
101 | {'month':3,'day':10,'hour':22},
102 | {'month':3,'day':11,'hour':4},
103 | {'month':3,'day':11,'hour':10},
104 | {'month':3,'day':11,'hour':16},
105 | {'month':3,'day':11,'hour':22},
106 | {'month':3,'day':12,'hour':4},
107 | {'month':3,'day':12,'hour':10},
108 | {'month':3,'day':12,'hour':16},
109 | {'month':3,'day':12,'hour':22},
110 | {'month':3,'day':13,'hour':16},
111 | {'month':3,'day':13,'hour':22},
112 | {'month':3,'day':14,'hour':4},
113 | {'month':3,'day':14,'hour':10},
114 | {'month':3,'day':14,'hour':16},
115 | {'month':3,'day':14,'hour':22},
116 | {'month':3,'day':15,'hour':4},
117 | {'month':3,'day':15,'hour':10},
118 | {'month':3,'day':15,'hour':16},
119 | {'month':3,'day':15,'hour':22},
120 | {'month':3,'day':16,'hour':4},
121 | {'month':3,'day':16,'hour':10},
122 | {'month':3,'day':16,'hour':16},
123 | {'month':3,'day':16,'hour':22},
124 | {'month':3,'day':17,'hour':4},
125 | {'month':3,'day':17,'hour':10},
126 | {'month':3,'day':17,'hour':16},
127 | {'month':3,'day':17,'hour':22},
128 | {'month':3,'day':20,'hour':10},
129 | {'month':3,'day':20,'hour':16},
130 | {'month':3,'day':20,'hour':22},
131 | {'month':3,'day':21,'hour':4},
132 | {'month':3,'day':21,'hour':10},
133 | {'month':3,'day':21,'hour':16},
134 | {'month':3,'day':21,'hour':22},
135 | {'month':3,'day':22,'hour':4},
136 | {'month':3,'day':22,'hour':10},
137 | {'month':3,'day':22,'hour':16},
138 | {'month':3,'day':22,'hour':22},
139 | {'month':3,'day':23,'hour':4},
140 | {'month':3,'day':23,'hour':10},
141 | {'month':3,'day':23,'hour':16},
142 | {'month':3,'day':23,'hour':22},
143 | {'month':3,'day':24,'hour':10},
144 | {'month':3,'day':24,'hour':16},
145 | {'month':3,'day':24,'hour':22},
146 | {'month':3,'day':25,'hour':4},
147 | {'month':3,'day':25,'hour':10},
148 | {'month':3,'day':25,'hour':16},
149 | {'month':3,'day':25,'hour':22},
150 | {'month':3,'day':26,'hour':4},
151 | {'month':3,'day':26,'hour':10},
152 | {'month':3,'day':26,'hour':16},
153 | {'month':3,'day':26,'hour':22},
154 | {'month':3,'day':27,'hour':4},
155 | {'month':3,'day':27,'hour':10},
156 | {'month':3,'day':27,'hour':16},
157 | {'month':3,'day':28,'hour':16},
158 | {'month':3,'day':28,'hour':22},
159 | {'month':3,'day':29,'hour':4},
160 | {'month':3,'day':29,'hour':10},
161 | {'month':3,'day':29,'hour':16},
162 | {'month':3,'day':29,'hour':22},
163 | {'month':3,'day':30,'hour':4},
164 | {'month':3,'day':30,'hour':10},
165 | {'month':3,'day':30,'hour':16},
166 | {'month':3,'day':30,'hour':22},
167 | {'month':3,'day':31,'hour':4},
168 | {'month':3,'day':31,'hour':10},
169 | {'month':3,'day':31,'hour':16},
170 | {'month':3,'day':31,'hour':22},
171 | {'month':4,'day':1,'hour':4},
172 | {'month':4,'day':1,'hour':10},
173 | {'month':4,'day':1,'hour':16},
174 | {'month':4,'day':1,'hour':22},
175 | {'month':4,'day':2,'hour':4},
176 | {'month':4,'day':2,'hour':10},
177 | {'month':4,'day':2,'hour':16},
178 | {'month':4,'day':2,'hour':22},
179 | {'month':4,'day':3,'hour':4},
180 | {'month':4,'day':3,'hour':10},
181 | {'month':4,'day':3,'hour':16},
182 | {'month':4,'day':3,'hour':22},
183 | {'month':4,'day':4,'hour':4},
184 | {'month':4,'day':4,'hour':10},
185 | {'month':4,'day':4,'hour':16},
186 | {'month':4,'day':4,'hour':22},
187 | {'month':4,'day':5,'hour':4},
188 | {'month':4,'day':5,'hour':10},
189 | {'month':4,'day':5,'hour':16},
190 | {'month':4,'day':5,'hour':22},
191 | {'month':4,'day':6,'hour':4},
192 | {'month':4,'day':6,'hour':10},
193 | {'month':4,'day':6,'hour':16},
194 | {'month':4,'day':6,'hour':22},
195 | {'month':4,'day':7,'hour':4},
196 | {'month':4,'day':7,'hour':10},
197 | {'month':4,'day':7,'hour':16},
198 | {'month':4,'day':7,'hour':22},
199 | {'month':4,'day':8,'hour':4},
200 | {'month':4,'day':8,'hour':10},
201 | {'month':4,'day':8,'hour':16},
202 | #{'month':4,'day':13,'hour':22},
203 | #{'month':4,'day':14,'hour':4},
204 | #{'month':4,'day':14,'hour':10},
205 | #{'month':4,'day':14,'hour':16},
206 | #{'month':4,'day':14,'hour':22},
207 | #{'month':4,'day':15,'hour':4},
208 | #{'month':4,'day':15,'hour':10},
209 | #{'month':4,'day':15,'hour':16},
210 | #{'month':4,'day':15,'hour':22},
211 | #{'month':4,'day':16,'hour':4},
212 | #{'month':4,'day':16,'hour':16},
213 | #{'month':4,'day':16,'hour':22},
214 | #{'month':4,'day':18,'hour':16},
215 | #{'month':4,'day':18,'hour':22},
216 | #{'month':4,'day':19,'hour':4},
217 | #{'month':4,'day':19,'hour':10},
218 | #{'month':4,'day':19,'hour':16},
219 | #{'month':4,'day':19,'hour':22},
220 | #{'month':4,'day':20,'hour':4},
221 | #{'month':4,'day':20,'hour':10},
222 | #{'month':4,'day':20,'hour':16},
223 | #{'month':4,'day':20,'hour':22},
224 | #{'month':4,'day':21,'hour':4},
225 | #{'month':4,'day':21,'hour':10},
226 | #{'month':4,'day':21,'hour':16},
227 | #{'month':4,'day':21,'hour':22},
228 | #{'month':4,'day':22,'hour':4},
229 | #{'month':4,'day':22,'hour':16},
230 | #{'month':4,'day':22,'hour':22},
231 | #{'month':4,'day':23,'hour':4},
232 | #{'month':4,'day':23,'hour':10},
233 | #{'month':4,'day':23,'hour':16},
234 | #{'month':4,'day':23,'hour':22},
235 | #{'month':4,'day':24,'hour':4},
236 | #{'month':4,'day':24,'hour':16},
237 | #{'month':4,'day':24,'hour':22},
238 | ]
239 | 
240 | # packet range (LL)
241 | PACKET_RANGE = range(Packet.HEADER_LENGTH,Packet.MTU+1,8)
242 | PACKET_RANGE2 = range(Packet.HEADER_LENGTH,Packet.MTU+1,4)
243 | 
244 | # packet range (H)
245 | 
246 | # Security Strategy Enum
247 | NONE                     = 0
248 | PAD_TO_MTU               = 1
249 | RFC_COMPLIANT_FIXED_PAD  = 2
250 | RFC_COMPLIANT_RANDOM_PAD = 3
251 | RANDOM_PAD               = 4
252 | PAD_ROUND_EXPONENTIAL    = 5
253 | PAD_ROUND_LINEAR         = 6
254 | MICE_ELEPHANTS           = 7
255 | DIRECT_TARGET_SAMPLING   = 8
256 | WRIGHT_STYLE_MORPHING    = 9
257 | FIXED_PAD                = 10
258 | 
259 | # Classifier enum
260 | LIBERATORE_CLASSIFIER    = 0
261 | WRIGHT_CLASSIFIER        = 1
262 | JACCARD_CLASSIFIER       = 2
263 | PANCHENKO_CLASSIFIER     = 3
264 | BANDWIDTH_CLASSIFIER     = 4
265 | ESORICS_CLASSIFIER       = 5
266 | HERRMANN_CLASSIFIER      = 6
267 | TIME_CLASSIFIER          = 10
268 | VNG_CLASSIFIER           = 14
269 | VNG_PLUS_PLUS_CLASSIFIER = 15
270 | 
271 | ### Sanity
272 | def sanity():
273 |     if not os.path.exists(WEKA_JAR):
274 |         print 'Weka does not exist in path: '+str(WEKA_JAR)
275 |         print 'Please install Weka properly.'
276 |         #sys.exit()
277 | 
278 |     if BASE_DIR == '':
279 |         print "!!!!"
280 |         print "Please open config.py and set your BASE_DIR."
281 |         #sys.exit()
282 | 
283 | sanity()
284 | ###
285 | 


--------------------------------------------------------------------------------
/countermeasures/WrightStyleMorphing.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import random
  6 | import math
  7 | import config
  8 | 
  9 | from Webpage import Webpage
 10 | from Trace import Trace
 11 | from Packet import Packet
 12 | 
 13 | from cvxopt.base import matrix, sparse, spdiag
 14 | 
 15 | from cvxopt import solvers
 16 | 
 17 | solvers.options['show_progress'] = False
 18 | solvers.options['LPX_K_MSGLEV']  = 0
 19 | 
 20 | solvers.options['abstol'] = 1e-4
 21 | solvers.options['reltol'] = 1e-4
 22 | 
 23 | solvers.options['maxiters'] = 200
 24 | 
 25 | # cost_matrix is the vector of cost coeffs
 26 | n = len(config.PACKET_RANGE)
 27 | N = n**2
 28 | cost_matrix = matrix(0.0, (n,n), 'd')
 29 | for i in range(n):
 30 |     for j in range(n):
 31 |         cost_matrix[i,j] = math.fabs(config.PACKET_RANGE[i] - config.PACKET_RANGE[j])
 32 | 
 33 | cost_matrix = matrix(cost_matrix, (N,1), 'd')
 34 | 
 35 | class WrightStyleMorphing:
 36 |     L1_THRESHHOLD = 0.3
 37 | 
 38 |     @staticmethod
 39 |     def buildMetadata( srcWebpage, targetWebpage ):
 40 |         targetDistributionBi     = targetWebpage.getHistogram( None, True )
 41 |         targetDistributionUp     = targetWebpage.getHistogram( Packet.UP, True )
 42 |         targetDistributionDown   = targetWebpage.getHistogram( Packet.DOWN, True )
 43 |         srcDistributionUp        = srcWebpage.getHistogram( Packet.UP, True )
 44 |         srcDistributionDown      = srcWebpage.getHistogram( Packet.DOWN, True )
 45 |         morphingMatrixUp         = WrightStyleMorphing.buildMorphingMatrix(srcWebpage.getId(), targetWebpage.getId(), Packet.UP, srcDistributionUp, targetDistributionUp)
 46 |         morphingMatrixDown       = WrightStyleMorphing.buildMorphingMatrix(srcWebpage.getId(), targetWebpage.getId(), Packet.DOWN, srcDistributionDown, targetDistributionDown)
 47 |         
 48 |         return [targetDistributionBi, targetDistributionUp, targetDistributionDown, srcDistributionUp, srcDistributionDown, morphingMatrixUp, morphingMatrixDown]
 49 | 
 50 |     @staticmethod
 51 |     def applyCountermeasure( trace,  metadata ):
 52 |         [targetDistributionBi,
 53 |          targetDistributionUp,
 54 |          targetDistributionDown,
 55 |          srcDistributionUp,
 56 |          srcDistributionDown,
 57 |          morphingMatrixUp,
 58 |          morphingMatrixDown] = metadata
 59 |          
 60 |         newTrace = Trace(trace.getId())
 61 | 
 62 |         # primary sampling
 63 |         timeCursor = 0
 64 |         for packet in trace.getPackets():
 65 |             timeCursor = packet.getLength()
 66 |             index = (packet.getLength()-Packet.HEADER_LENGTH)/8
 67 | 
 68 |             targetDistribution = None
 69 |             morphingColumn     = None
 70 |             if packet.getDirection()==Packet.UP:
 71 |                 if morphingMatrixUp:
 72 |                     morphingColumn = morphingMatrixUp[:,index]
 73 |                 else:
 74 |                     targetDistribution = targetDistributionUp
 75 |                 targetDistributionSecondary = targetDistributionUp
 76 |             else:
 77 |                 if morphingMatrixDown:
 78 |                     morphingColumn = morphingMatrixDown[:,index]
 79 |                 else:
 80 |                     targetDistribution = targetDistributionDown
 81 |                 targetDistributionSecondary = targetDistributionDown
 82 | 
 83 |             if morphingColumn:
 84 |                 targetDistribution = {}
 85 |                 for i in range(len(morphingColumn)):
 86 |                     key = str(packet.getDirection())+'-'+str( i*8 + Packet.HEADER_LENGTH )
 87 |                     targetDistribution[key] = morphingColumn[i]
 88 | 
 89 |             packets = WrightStyleMorphing.morphPacket( packet, targetDistribution, targetDistributionSecondary )
 90 |             for newPacket in packets:
 91 |                 newTrace.addPacket( newPacket )
 92 | 
 93 |         # secondary sampling
 94 |         while True:
 95 |             l1Distance = newTrace.calcL1Distance( targetDistributionBi )
 96 |             if l1Distance <= WrightStyleMorphing.L1_THRESHHOLD:
 97 |                 break
 98 | 
 99 |             timeCursor += 10
100 |             newDirection, newLen = newTrace.getMostSkewedDimension( targetDistributionBi )
101 |             packet = Packet( newDirection, timeCursor, newLen )
102 |             newTrace.addPacket( packet )
103 | 
104 |         return newTrace
105 | 
106 |     @staticmethod
107 |     def morphPacket( packet, targetDistributionPrimary, targetDistributionSecondary ):
108 |         packetPenalty = config.PACKET_PENALTY
109 | 
110 |         packetList = []
111 |         newPacket = WrightStyleMorphing.generatePacket( targetDistributionPrimary, packet )
112 |         packetList.append( newPacket )
113 | 
114 |         dataSent  = newPacket.getLength() - packetPenalty
115 |         dataSent  = max( dataSent, 0 ) # Can have 'negative' dataSent if newPacket is ACK
116 |                                        # and packet is not ACK
117 |         residual  = (packet.getLength() - packetPenalty) - dataSent
118 | 
119 |         # Now sample from secondary
120 |         while residual > 0:
121 |             newPacket = WrightStyleMorphing.generatePacket( targetDistributionSecondary, packet )
122 |             packetList.append( newPacket )
123 | 
124 |             dataSent  = (newPacket.getLength() - packetPenalty)
125 |             dataSent  = max( dataSent, 0 )
126 |             residual -= dataSent
127 | 
128 |         return packetList
129 | 
130 |     @staticmethod
131 |     def generatePacket( targetDistribution, packet ):
132 |         sample       = WrightStyleMorphing.sampleFromDistribution( targetDistribution )
133 |         if sample == None:
134 |             newLen       = 1500
135 |         else:
136 |             bits         = sample.split('-')
137 |             newLen       = int(bits[1])
138 |         packet       = Packet( packet.getDirection(), packet.getTime(), newLen )
139 | 
140 |         return packet
141 | 
142 |     @staticmethod
143 |     def sampleFromDistribution( distribution ):
144 |         total = 0
145 |         for key in distribution:
146 |             total += distribution[key]
147 |         n = random.uniform(0,total)
148 | 
149 |         key = None
150 |         for key in distribution:
151 |             if n < distribution[key]:
152 |                 return key
153 |             n -= distribution[key]
154 | 
155 |         return key
156 | 
157 |     @staticmethod
158 |     def buildMorphingMatrix(srcID, targetID, direction, srcDist, targetDist):
159 |         srcVec    = matrix(0, ( len(config.PACKET_RANGE) , 1), 'd' )
160 |         targetVec = matrix(0, ( len(config.PACKET_RANGE) , 1), 'd' )
161 | 
162 |         for i in range(len(config.PACKET_RANGE)):
163 |             key = str(direction)+'-'+str(config.PACKET_RANGE[i])
164 | 
165 |             if not srcDist.get( key ):
166 |                 srcVec[i] = 0
167 |             else:
168 |                 srcVec[i] = srcDist[key]
169 | 
170 |             if not targetDist.get( key ):
171 |                 targetVec[i] = 0
172 |             else:
173 |                 targetVec[i] = targetDist[key]
174 | 
175 |         A = WrightStyleMorphing.what_is_the_matrix( srcVec, targetVec )
176 | 
177 |         return A
178 | 
179 |     @staticmethod
180 |     def what_is_the_matrix(X, Z):
181 |         """find the optimal morphing matrix A such that A * src_dist = target_dist"""
182 | 
183 |         #print "============ What is the Matrix? ==============="
184 | 
185 |         n = len(Z)
186 |         N = n**2
187 | 
188 |         #print "X =", X.T
189 |         #print "Z =", Z.T
190 | 
191 |         # Equality Constraints
192 |         A_list = []
193 |         b_list = []
194 |         #  -- the columns of the matrix must be valid PDF's
195 |         A_pdf = matrix(0.0, (n,N), 'd')
196 |         for i in range(n):
197 |             A_pdf[i,n*i:n*i+n] = 1.0
198 |         b_pdf = matrix(1.0, (n,1), 'd')
199 |         #print "A_pdf ="
200 |         #print A_pdf
201 |         #print "b_pdf ="
202 |         #print b_pdf
203 |         A_list.append(A_pdf)
204 |         b_list.append(b_pdf)
205 | 
206 |         #  -- the matrix must morph X to Z
207 |         A_morph = matrix(0.0, (n,N), 'd')
208 |         for i in range(n):
209 |             matrix_vers = matrix(0.0, (n,n), 'd')
210 |             matrix_vers[i,:] = X.T
211 |             row = matrix(matrix_vers, (1,N), 'd')
212 |             A_morph[i,:] = row
213 |         b_morph = matrix(Z, (n,1), 'd')
214 |         A_list.append(A_morph)
215 |         b_list.append(b_morph)
216 | 
217 |         #print "A_morph ="
218 |         #print A_morph
219 |         #print "b_morph ="
220 |         #print b_morph
221 | 
222 |         # concatenate all our equality constraints into one coeff matrix and one b vector
223 |         #A_list = [A_morph, A_pdf]
224 |         #b_list = [b_morph, b_pdf]
225 |         #A_list = [A_pdf]
226 |         #b_list = [b_pdf]
227 |         #A = matrix(A_list)
228 |         A = sparse(A_list)
229 |         b = matrix(b_list)
230 | 
231 |         #print "A ="
232 |         #print A
233 |         #print "b ="
234 |         #print b
235 | 
236 |         # Inequality Constraints -- in order to be a valid PDF, each cell a_ij must be 0 <= a_ij <= 1
237 |         G_list = []
238 |         h_list = []
239 |         G_lt = spdiag(matrix(1.0, (N,1), 'd'))
240 |         h_lt = matrix(1.0, (N,1), 'd')
241 |         # cvw: as mentioned in the comment above in find_the_one(), the "less than" constraints are
242 |         #      in fact redundant given that we already require the columns to sum to 1.0 and require
243 |         #      (below) that each prob is >= 0.  yay for smaller KKT matrices.
244 |         #G_list.append(G_lt)
245 |         #h_list.append(h_lt)
246 | 
247 |         G_gt = spdiag(matrix(-1.0, (N,1), 'd'))
248 |         h_gt = matrix(0.0, (N,1), 'd')
249 |         G_list.append(G_gt)
250 |         h_list.append(h_gt)
251 | 
252 |         # cvw: I guess we could add some more constraints if we really wanted to..
253 |         #      i.e. only downgrade the bit rate 10% of the time or less
254 |         #      but for now these'll do
255 | 
256 |         G = sparse(G_list)
257 |         h = matrix(h_list)
258 |         #print "G ="
259 |         #print G
260 |         #print "h ="
261 |         #print h
262 | 
263 |         #print "vectorized cost matrix ="
264 |         #print c.T
265 | 
266 |         # now run the cvxopt solver to get our answer
267 |         #print "running cvxopt lp() solver..."
268 |         ans = solvers.lp(cost_matrix, G=G, h=h, A=A, b=b, solver='glpk')
269 |         ##print ans['x']
270 |         #print "answer = ", ans
271 |         A = None
272 |         if ans['x']:
273 |             cost = cost_matrix.T * ans['x']
274 | 
275 |             # A is the morphing matrix
276 |             A = matrix(ans['x'], (n,n), 'd')
277 | 
278 |         return A
279 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # This is a Python framework to compliment "Peek-a-Boo, I Still See You: Why Efficient Traffic Analysis Countermeasures Fail".
  2 | # Copyright (C) 2012  Kevin P. Dyer (kpdyer.com)
  3 | # See LICENSE for more details.
  4 | 
  5 | import sys
  6 | import config
  7 | import time
  8 | import os
  9 | import random
 10 | import getopt
 11 | import string
 12 | import itertools
 13 | 
 14 | # custom
 15 | from Datastore import Datastore
 16 | from Webpage import Webpage
 17 | 
 18 | # countermeasures
 19 | from PadToMTU import PadToMTU
 20 | from PadRFCFixed import PadRFCFixed
 21 | from PadRFCRand import PadRFCRand
 22 | from PadRand import PadRand
 23 | from PadRoundExponential import PadRoundExponential
 24 | from PadRoundLinear import PadRoundLinear
 25 | from MiceElephants import MiceElephants
 26 | from DirectTargetSampling import DirectTargetSampling
 27 | from Folklore import Folklore
 28 | from WrightStyleMorphing import WrightStyleMorphing
 29 | 
 30 | # classifiers
 31 | from LiberatoreClassifier import LiberatoreClassifier
 32 | from WrightClassifier import WrightClassifier
 33 | from BandwidthClassifier import BandwidthClassifier
 34 | from HerrmannClassifier import HerrmannClassifier
 35 | from TimeClassifier import TimeClassifier
 36 | from PanchenkoClassifier import PanchenkoClassifier
 37 | from VNGPlusPlusClassifier import VNGPlusPlusClassifier
 38 | from VNGClassifier import VNGClassifier
 39 | from JaccardClassifier import JaccardClassifier
 40 | from ESORICSClassifier import ESORICSClassifier
 41 | 
 42 | def intToCountermeasure(n):
 43 |     countermeasure = None
 44 |     if n == config.PAD_TO_MTU:
 45 |         countermeasure = PadToMTU
 46 |     elif n == config.RFC_COMPLIANT_FIXED_PAD:
 47 |         countermeasure = PadRFCFixed
 48 |     elif n == config.RFC_COMPLIANT_RANDOM_PAD:
 49 |         countermeasure = PadRFCRand
 50 |     elif n == config.RANDOM_PAD:
 51 |         countermeasure = PadRand
 52 |     elif n == config.PAD_ROUND_EXPONENTIAL:
 53 |         countermeasure = PadRoundExponential
 54 |     elif n == config.PAD_ROUND_LINEAR:
 55 |         countermeasure = PadRoundLinear
 56 |     elif n == config.MICE_ELEPHANTS:
 57 |         countermeasure = MiceElephants
 58 |     elif n == config.DIRECT_TARGET_SAMPLING:
 59 |         countermeasure = DirectTargetSampling
 60 |     elif n == config.WRIGHT_STYLE_MORPHING:
 61 |         countermeasure = WrightStyleMorphing
 62 |     elif n > 10:
 63 |         countermeasure = Folklore
 64 | 
 65 |         # FIXED_PACKET_LEN: 1000,1250,1500
 66 |         if n in [11,12,13,14]:
 67 |             Folklore.FIXED_PACKET_LEN    = 1000
 68 |         elif n in [15,16,17,18]:
 69 |             Folklore.FIXED_PACKET_LEN    = 1250
 70 |         elif n in [19,20,21,22]:
 71 |             Folklore.FIXED_PACKET_LEN    = 1500
 72 | 
 73 |         if n in [11,12,13,17,18,19]:
 74 |             Folklore.TIMER_CLOCK_SPEED   = 20
 75 |         elif n in [14,15,16,20,21,22]:
 76 |             Folklore.TIMER_CLOCK_SPEED   = 40
 77 | 
 78 |         if n in [11,14,17,20]:
 79 |             Folklore.MILLISECONDS_TO_RUN = 0
 80 |         elif n in [12,15,18,21]:
 81 |             Folklore.MILLISECONDS_TO_RUN = 5000
 82 |         elif n in [13,16,19,22]:
 83 |             Folklore.MILLISECONDS_TO_RUN = 10000
 84 | 
 85 |         if n==23:
 86 |             Folklore.MILLISECONDS_TO_RUN = 0
 87 |             Folklore.FIXED_PACKET_LEN    = 1250
 88 |             Folklore.TIMER_CLOCK_SPEED   = 40
 89 |         elif n==24:
 90 |             Folklore.MILLISECONDS_TO_RUN = 0
 91 |             Folklore.FIXED_PACKET_LEN    = 1500
 92 |             Folklore.TIMER_CLOCK_SPEED   = 20
 93 |         elif n==25:
 94 |             Folklore.MILLISECONDS_TO_RUN = 5000
 95 |             Folklore.FIXED_PACKET_LEN    = 1000
 96 |             Folklore.TIMER_CLOCK_SPEED   = 40
 97 |         elif n==26:
 98 |             Folklore.MILLISECONDS_TO_RUN = 5000
 99 |             Folklore.FIXED_PACKET_LEN    = 1500
100 |             Folklore.TIMER_CLOCK_SPEED   = 20
101 |         elif n==27:
102 |             Folklore.MILLISECONDS_TO_RUN = 10000
103 |             Folklore.FIXED_PACKET_LEN    = 1000
104 |             Folklore.TIMER_CLOCK_SPEED   = 40
105 |         elif n==28:
106 |             Folklore.MILLISECONDS_TO_RUN = 10000
107 |             Folklore.FIXED_PACKET_LEN    = 1250
108 |             Folklore.TIMER_CLOCK_SPEED   = 20
109 | 
110 | 
111 |     return countermeasure
112 | 
113 | def intToClassifier(n):
114 |     classifier = None
115 |     if n == config.LIBERATORE_CLASSIFIER:
116 |         classifier = LiberatoreClassifier
117 |     elif n == config.WRIGHT_CLASSIFIER:
118 |         classifier = WrightClassifier
119 |     elif n == config.BANDWIDTH_CLASSIFIER:
120 |         classifier = BandwidthClassifier
121 |     elif n == config.HERRMANN_CLASSIFIER:
122 |         classifier = HerrmannClassifier
123 |     elif n == config.TIME_CLASSIFIER:
124 |         classifier = TimeClassifier
125 |     elif n == config.PANCHENKO_CLASSIFIER:
126 |         classifier = PanchenkoClassifier
127 |     elif n == config.VNG_PLUS_PLUS_CLASSIFIER:
128 |         classifier = VNGPlusPlusClassifier
129 |     elif n == config.VNG_CLASSIFIER:
130 |         classifier = VNGClassifier
131 |     elif n == config.JACCARD_CLASSIFIER:
132 |         classifier = JaccardClassifier
133 |     elif n == config.ESORICS_CLASSIFIER:
134 |         classifier = ESORICSClassifier
135 | 
136 |     return classifier
137 | 
138 | def usage():
139 |     print """
140 |     -N [int] : use [int] websites from the dataset
141 |                from which we will use to sample a privacy
142 |                set k in each experiment (default 775)
143 | 
144 |     -k [int] : the size of the privacy set (default 2)
145 | 
146 |     -d [int]: dataset to use
147 |         0: Liberatore and Levine Dataset (OpenSSH)
148 |         1: Herrmann et al. Dataset (OpenSSH)
149 |         2: Herrmann et al. Dataset (Tor)
150 |         (default 1)
151 | 
152 |     -C [int] : classifier to run
153 |         0: Liberatore Classifer
154 |         1: Wright et al. Classifier
155 |         2: Jaccard Classifier
156 |         3: Panchenko et al. Classifier
157 |         5: Lu et al. Edit Distance Classifier
158 |         6: Herrmann et al. Classifier
159 |         4: Dyer et al. Bandwidth (BW) Classifier
160 |         10: Dyer et al. Time Classifier
161 |         14: Dyer et al. Variable n-gram (VNG) Classifier
162 |         15: Dyer et al. VNG++ Classifier
163 |         (default 0)
164 | 
165 |     -c [int]: countermeasure to use
166 |         0: None
167 |         1: Pad to MTU
168 |         2: Session Random 255
169 |         3: Packet Random 255
170 |         4: Pad Random MTU
171 |         5: Exponential Pad
172 |         6: Linear Pad
173 |         7: Mice-Elephants Pad
174 |         8: Direct Target Sampling
175 |         9: Traffic Morphing
176 |         (default 0)
177 | 
178 |     -t [int]: number of trials to run per experiment (default 1)
179 | 
180 |     -t [int]: number of training traces to use per experiment (default 16)
181 | 
182 |     -T [int]: number of testing traces to use per experiment (default 4)
183 |     """
184 | 
185 | def run():
186 |     try:
187 |         opts, args = getopt.getopt(sys.argv[1:], "t:T:N:k:c:C:d:n:r:h")
188 |     except getopt.GetoptError, err:
189 |         print str(err) # will print something like "option -a not recognized"
190 |         usage()
191 |         sys.exit(2)
192 | 
193 |     char_set = string.ascii_lowercase + string.digits
194 |     runID = ''.join(random.sample(char_set,8))
195 | 
196 |     for o, a in opts:
197 |         if o in ("-k"):
198 |             config.BUCKET_SIZE = int(a)
199 |         elif o in ("-C"):
200 |             config.CLASSIFIER = int(a)
201 |         elif o in ("-d"):
202 |             config.DATA_SOURCE = int(a)
203 |         elif o in ("-c"):
204 |             config.COUNTERMEASURE = int(a)
205 |         elif o in ("-N"):
206 |             config.TOP_N = int(a)
207 |         elif o in ("-t"):
208 |             config.NUM_TRAINING_TRACES = int(a)
209 |         elif o in ("-T"):
210 |             config.NUM_TESTING_TRACES = int(a)
211 |         elif o in ("-n"):
212 |             config.NUM_TRIALS = int(a)
213 |         elif o in ("-r"):
214 |             runID = str(a)
215 |         else:
216 |             usage()
217 |             sys.exit(2)
218 | 
219 |     outputFilenameArray = ['results',
220 |                            'k'+str(config.BUCKET_SIZE),
221 |                            'c'+str(config.COUNTERMEASURE),
222 |                            'd'+str(config.DATA_SOURCE),
223 |                            'C'+str(config.CLASSIFIER),
224 |                            'N'+str(config.TOP_N),
225 |                            't'+str(config.NUM_TRAINING_TRACES),
226 |                            'T'+str(config.NUM_TESTING_TRACES),
227 |                           ]
228 |     outputFilename = os.path.join(config.OUTPUT_DIR,'.'.join(outputFilenameArray))
229 | 
230 |     if not os.path.exists(config.CACHE_DIR):
231 |         os.mkdir(config.CACHE_DIR)
232 | 
233 |     if not os.path.exists(outputFilename+'.output'):
234 |         banner = ['accuracy','overhead','timeElapsedTotal','timeElapsedClassifier']
235 |         f = open( outputFilename+'.output', 'w' )
236 |         f.write(','.join(banner))
237 |         f.close()
238 |     if not os.path.exists(outputFilename+'.debug'):
239 |         f = open( outputFilename+'.debug', 'w' )
240 |         f.close()
241 | 
242 |     if config.DATA_SOURCE == 0:
243 |         startIndex = config.NUM_TRAINING_TRACES
244 |         endIndex   = len(config.DATA_SET)-config.NUM_TESTING_TRACES
245 |     elif config.DATA_SOURCE == 1:
246 |         maxTracesPerWebsiteH = 160
247 |         startIndex = config.NUM_TRAINING_TRACES
248 |         endIndex   = maxTracesPerWebsiteH-config.NUM_TESTING_TRACES
249 |     elif config.DATA_SOURCE == 2:
250 |         maxTracesPerWebsiteH = 18
251 |         startIndex = config.NUM_TRAINING_TRACES
252 |         endIndex   = maxTracesPerWebsiteH-config.NUM_TESTING_TRACES
253 | 
254 |     for i in range(config.NUM_TRIALS):
255 |         startStart = time.time()
256 | 
257 |         webpageIds = range(0, config.TOP_N - 1)
258 |         random.shuffle( webpageIds )
259 |         webpageIds = webpageIds[0:config.BUCKET_SIZE]
260 | 
261 |         seed = random.randint( startIndex, endIndex )
262 | 
263 |         preCountermeasureOverhead = 0
264 |         postCountermeasureOverhead = 0
265 | 
266 |         classifier     = intToClassifier(config.CLASSIFIER)
267 |         countermeasure = intToCountermeasure(config.COUNTERMEASURE)
268 | 
269 |         trainingSet = []
270 |         testingSet  = []
271 | 
272 |         targetWebpage = None
273 | 
274 |         for webpageId in webpageIds:
275 |             if config.DATA_SOURCE == 0:
276 |                 webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
277 |                 webpageTest  = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES )
278 |             elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2:
279 |                 webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed )
280 |                 webpageTest  = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES )
281 | 
282 |             webpageTrain = webpageTrain[0]
283 |             webpageTest = webpageTest[0]
284 | 
285 |             if targetWebpage == None:
286 |                 targetWebpage = webpageTrain
287 | 
288 |             preCountermeasureOverhead  += webpageTrain.getBandwidth()
289 |             preCountermeasureOverhead  += webpageTest.getBandwidth()
290 | 
291 |             metadata = None
292 |             if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
293 |                 metadata = countermeasure.buildMetadata( webpageTrain,  targetWebpage )
294 | 
295 |             i = 0
296 |             for w in [webpageTrain, webpageTest]:
297 |                 for trace in w.getTraces():
298 |                     if countermeasure:
299 |                         if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]:
300 |                             if w.getId()!=targetWebpage.getId():
301 |                                 traceWithCountermeasure = countermeasure.applyCountermeasure( trace,  metadata )
302 |                             else:
303 |                                 traceWithCountermeasure = trace
304 |                         else:
305 |                             traceWithCountermeasure = countermeasure.applyCountermeasure( trace )
306 |                     else:
307 |                         traceWithCountermeasure = trace
308 | 
309 |                     postCountermeasureOverhead += traceWithCountermeasure.getBandwidth()
310 |                     instance = classifier.traceToInstance( traceWithCountermeasure )
311 | 
312 |                     if instance:
313 |                         if i==0:
314 |                             trainingSet.append( instance )
315 |                         elif i==1:
316 |                             testingSet.append( instance )
317 |                 i+=1
318 | 
319 |         ###################
320 | 
321 |         startClass = time.time()
322 | 
323 |         [accuracy,debugInfo] = classifier.classify( runID, trainingSet, testingSet )
324 | 
325 |         end = time.time()
326 | 
327 |         overhead = str(postCountermeasureOverhead)+'/'+str(preCountermeasureOverhead)
328 | 
329 |         output = [accuracy,overhead]
330 | 
331 |         output.append( '%.2f' % (end-startStart) )
332 |         output.append( '%.2f' % (end-startClass) )
333 | 
334 |         summary = ', '.join(itertools.imap(str, output))
335 | 
336 |         f = open( outputFilename+'.output', 'a' )
337 |         f.write( "\n"+summary )
338 |         f.close()
339 | 
340 |         f = open( outputFilename+'.debug', 'a' )
341 |         for entry in debugInfo:
342 |             f.write( entry[0]+','+entry[1]+"\n" )
343 |         f.close()
344 | 
345 | if __name__ == '__main__':
346 |     run()
347 | 


--------------------------------------------------------------------------------