├── lib ├── __init__.py ├── streamifiers │ ├── __init__.py │ ├── all_one.py │ ├── host.py │ └── public_suffix.py ├── harfile.py └── stream.py ├── .gitignore ├── compressor ├── bohe │ ├── README.md │ ├── common_utils.py │ ├── __init__.py │ └── header_freq_tables.py ├── delta_bohe │ ├── README.md │ ├── header_freq_tables.h │ ├── Makefile │ ├── utils.h │ ├── harfile_translator.py │ ├── spdy4_headers_codec.h │ ├── huffman_test.cc │ ├── word_freak.py │ ├── utils.cc │ ├── common_utils.py │ ├── bit_bucket_test.py │ ├── huffman_test.py │ ├── __init__.py │ ├── trivial_http_parse.h │ ├── pretty_print_tree.h │ ├── header_freq_tables.py │ └── bit_bucket_test.cc ├── http1 │ └── __init__.py ├── delta │ ├── header_freq_tables.h │ ├── Makefile │ ├── utils.h │ ├── harfile_translator.py │ ├── spdy4_headers_codec.h │ ├── huffman_test.cc │ ├── word_freak.py │ ├── utils.cc │ ├── common_utils.py │ ├── __init__.py │ ├── bit_bucket_test.py │ ├── huffman_test.py │ ├── trivial_http_parse.h │ ├── pretty_print_tree.h │ ├── header_freq_tables.py │ └── bit_bucket_test.cc ├── hyper_hpack │ └── __init__.py ├── http2 │ ├── README.md │ └── __init__.py ├── http1_gzip │ └── __init__.py ├── http1_huffman │ └── __init__.py ├── simple │ └── seven.py ├── delta2 │ ├── harfile_translator.py │ ├── README.md │ ├── word_freak.py │ ├── common_utils.py │ ├── __init__.py │ ├── huffman_test.py │ ├── lrustorage.py │ └── lrustorage_test.py ├── delta2_bohe │ ├── harfile_translator.py │ ├── word_freak.py │ ├── common_utils.py │ ├── bit_bucket_test.py │ ├── huffman_test.py │ ├── __init__.py │ ├── lrustorage.py │ ├── lrustorage_test.py │ └── header_freq_tables.py ├── fork │ └── __init__.py ├── spdy3 │ └── __init__.py ├── headerdiff │ ├── README.md │ └── __init__.py └── __init__.py ├── sample_exec_codec.py ├── LICENSE ├── README.md ├── display_tsv.html └── compare_compressors.py /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.tsv 3 | -------------------------------------------------------------------------------- /compressor/bohe/README.md: -------------------------------------------------------------------------------- 1 | Raw BOHE impl... based on the I-D, without any delta encoding.. included for comparison. -------------------------------------------------------------------------------- /compressor/delta_bohe/README.md: -------------------------------------------------------------------------------- 1 | Requires Werkzeug HTTP Library: http://werkzeug.pocoo.org/ 2 | 3 | Other required imports: 4 | 5 | from datetime import datetime 6 | from Cookie import BaseCookie 7 | from werkzeug import http 8 | import sys 9 | import struct 10 | import md5 11 | import re 12 | 13 | 14 | -------------------------------------------------------------------------------- /lib/streamifiers/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..stream import Stream 4 | 5 | class BaseStreamifier(object): 6 | """ 7 | Base class for a streamifier. 8 | """ 9 | def __init__(self, procs): 10 | self.procs = procs 11 | 12 | def streamify(self, messages): 13 | """ 14 | Given a list of messages (each a req, res tuple), return a list of 15 | Stream objects. 16 | """ 17 | raise NotImplementedError -------------------------------------------------------------------------------- /compressor/http1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | from .. import BaseProcessor, spdy_dictionary, format_http1, parse_http1 6 | 7 | class Processor(BaseProcessor): 8 | def compress(self, in_headers, host): 9 | return format_http1(in_headers) 10 | 11 | def decompress(self, compressed): 12 | return parse_http1(compressed, self.is_request) -------------------------------------------------------------------------------- /lib/streamifiers/all_one.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from . import BaseStreamifier, Stream 4 | 5 | class Streamifier(BaseStreamifier): 6 | """ 7 | Put all messages into two streams, requests and responses. 8 | """ 9 | def streamify(self, messages): 10 | reqs, ress = [], [] 11 | for req, res in messages: 12 | host = req[':host'] 13 | reqs.append((req, host)) 14 | ress.append((res, host)) 15 | req_stream = Stream('all', reqs, 'req', self.procs) 16 | res_stream = Stream('all', ress, 'res', self.procs) 17 | return [req_stream, res_stream] 18 | -------------------------------------------------------------------------------- /sample_exec_codec.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | 6 | def main(): 7 | while True: 8 | headers = [] 9 | name = "" 10 | if len(sys.argv) >= 2: 11 | name = sys.argv[1] 12 | else: 13 | name = "%d" % os.getpid() 14 | while True: 15 | line = sys.stdin.readline() 16 | if line.strip() == "": 17 | break 18 | headers.append(line) 19 | 20 | sys.stdout.write(''.join(headers)) 21 | sys.stdout.write("\n") 22 | try: 23 | sys.stdout.flush() 24 | except IOError: # done 25 | break 26 | 27 | main() 28 | -------------------------------------------------------------------------------- /compressor/delta/header_freq_tables.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef HEADER_FREQ_TABLES 5 | #define HEADER_FREQ_TABLES 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | typedef std::pair FreqEntry; 13 | typedef std::array FreqTable; 14 | 15 | struct FreqTables { 16 | public: 17 | static FreqTable request_freq_table; 18 | static FreqTable response_freq_table; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /compressor/delta_bohe/header_freq_tables.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef HEADER_FREQ_TABLES 5 | #define HEADER_FREQ_TABLES 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | typedef std::pair FreqEntry; 13 | typedef std::array FreqTable; 14 | 15 | struct FreqTables { 16 | public: 17 | static FreqTable request_freq_table; 18 | static FreqTable response_freq_table; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /compressor/hyper_hpack/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import hpack 3 | from .. import BaseProcessor, spdy_dictionary, format_http1 4 | 5 | class Processor(BaseProcessor): 6 | def __init__(self, options, is_request, params): 7 | BaseProcessor.__init__(self, options, is_request, params) 8 | self.compressor = hpack.Encoder() 9 | self.decompressor = hpack.Decoder() 10 | self.sensitive = [] 11 | 12 | def compress(self, in_headers, host): 13 | headers = [(n,v,n.lower() in self.sensitive) for (n,v) in in_headers.items()] 14 | return self.compressor.encode(headers) 15 | 16 | def decompress(self, compressed): 17 | return self.decompressor.decode(compressed) -------------------------------------------------------------------------------- /compressor/http2/README.md: -------------------------------------------------------------------------------- 1 | HPACK Codec 2 | =========== 3 | 4 | Specification 5 | ------------- 6 | This codec is an implementation of HPACK (Header Compression for HTTP/2.0) which specification can be found at: 7 | https://datatracker.ietf.org/doc/draft-ietf-httpbis-header-compression/. More specifically, this codec implements the 3rd draft of this specification. 8 | 9 | Contents 10 | -------- 11 | The codec comprises both an encoder and a decoder. It has a few options. 12 | 13 | ### Maximum buffer size 14 | 15 | The buffer_size option allows to specify the maximum size of the header table. 16 | The default value is 4096 bytes. 17 | 18 | Example usage: 19 | 20 | ./compare_compressors.py -c "http2=buffer_size=8192" file.har 21 | -------------------------------------------------------------------------------- /compressor/http1_gzip/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | from .. import BaseProcessor, spdy_dictionary, format_http1 7 | 8 | class Processor(BaseProcessor): 9 | def __init__(self, options, is_request, params): 10 | BaseProcessor.__init__(self, options, is_request, params) 11 | self.compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 12 | zlib.DEFLATED, 15) 13 | self.compressor.compress(spdy_dictionary.spdy_dict); 14 | self.compressor.flush(zlib.Z_SYNC_FLUSH) 15 | 16 | def compress(self, in_headers, host): 17 | http1_msg = format_http1(in_headers) 18 | return ''.join([ 19 | self.compressor.compress(http1_msg), 20 | self.compressor.flush(zlib.Z_SYNC_FLUSH) 21 | ]) 22 | -------------------------------------------------------------------------------- /compressor/http1_huffman/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | from .. import BaseProcessor, spdy_dictionary, format_http1 7 | 8 | class Processor(BaseProcessor): 9 | def __init__(self, options, is_request, params): 10 | BaseProcessor.__init__(self, options, is_request, params) 11 | self.compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 12 | zlib.DEFLATED, 15, 8, zlib.Z_HUFFMAN_ONLY) 13 | self.compressor.compress(spdy_dictionary.spdy_dict); 14 | self.compressor.flush(zlib.Z_SYNC_FLUSH) 15 | 16 | def compress(self, in_headers, host): 17 | http1_msg = format_http1(in_headers) 18 | return ''.join([ 19 | self.compressor.compress(http1_msg), 20 | self.compressor.flush(zlib.Z_SYNC_FLUSH) 21 | ]) 22 | -------------------------------------------------------------------------------- /compressor/simple/seven.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Serialise ASCII as seven bits. 5 | 6 | Yes, I threw up a bit too. 7 | """ 8 | 9 | from bitarray import bitarray 10 | 11 | def encode(text): 12 | ba = bitarray() 13 | out = bitarray() 14 | ba.fromstring(text) 15 | s = 0 16 | while s < len(ba): 17 | byte = ba[s:s+8] 18 | out.extend(byte[1:8]) 19 | s += 8 20 | # print out 21 | return out.tobytes() 22 | 23 | def decode(bits): 24 | ba = bitarray() 25 | out = bitarray() 26 | ba.frombytes(bits) 27 | s = 0 28 | while s < len(ba): 29 | seven = ba[s:s+7] 30 | out.append(0) 31 | out.extend(seven) 32 | s += 7 33 | return out.tostring()[:-1].encode('ascii') 34 | 35 | 36 | if __name__ == "__main__": 37 | import sys 38 | instr = sys.argv[1].strip().encode('ascii') 39 | print "before: %s" % len(instr) 40 | f = encode(instr) 41 | print "after: %s" % len(f) 42 | g = decode(f) 43 | assert instr == g, "\n%s\n%s" % (repr(instr), repr(g)) 44 | -------------------------------------------------------------------------------- /compressor/delta/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | #CXX = clang++ 3 | DEBUGFLAGS = -DDEBUG -g -D_GLIBCXX_DEBUG 4 | OPTFLAGS = -O3 -DNDEBUG 5 | WARN = -Wall -Werror 6 | STD = -std=c++0x 7 | CXXFLAGS = $(DEBUG) $(OPT) $(WARN) $(STD) 8 | 9 | TARGETS = bit_bucket_test huffman_test spdy4_headers_sample 10 | 11 | SRCS = *.cc 12 | OBJS = $(SRCS:%.cc=%.o) 13 | 14 | 15 | ifndef DEBUG 16 | OPT = $(OPTFLAGS) 17 | DEBUG = -g 18 | else 19 | OPT = 20 | DEBUG = $(DEBUGFLAGS) 21 | endif 22 | 23 | 24 | .PHONEY: clean all 25 | 26 | all: $(TARGETS) 27 | 28 | clean: 29 | rm -f $(TARGETS) $(OBJS) $(DEPS) *.pyc 30 | 31 | .cc.o: 32 | $(CXX) $(CXXFLAGS) $< -c -o $@ 33 | 34 | spdy4_headers_sample: spdy4_headers_sample.o spdy4_headers_codec.o header_freq_tables.o 35 | $(CXX) $(CXXFLAGS) $^ -o $@ -lrt -lz 36 | 37 | bit_bucket_test: bit_bucket_test.o 38 | $(CXX) $(CXXFLAGS) $^ -o $@ 39 | 40 | huffman_test: huffman_test.o header_freq_tables.o utils.o 41 | $(CXX) $(CXXFLAGS) $^ -o $@ 42 | 43 | -------------------------------------------------------------------------------- /compressor/delta_bohe/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | #CXX = clang++ 3 | DEBUGFLAGS = -DDEBUG -g -D_GLIBCXX_DEBUG 4 | OPTFLAGS = -O3 -DNDEBUG 5 | WARN = -Wall -Werror 6 | STD = -std=c++0x 7 | CXXFLAGS = $(DEBUG) $(OPT) $(WARN) $(STD) 8 | 9 | TARGETS = bit_bucket_test huffman_test spdy4_headers_sample 10 | 11 | SRCS = *.cc 12 | OBJS = $(SRCS:%.cc=%.o) 13 | 14 | 15 | ifndef DEBUG 16 | OPT = $(OPTFLAGS) 17 | DEBUG = -g 18 | else 19 | OPT = 20 | DEBUG = $(DEBUGFLAGS) 21 | endif 22 | 23 | 24 | .PHONEY: clean all 25 | 26 | all: $(TARGETS) 27 | 28 | clean: 29 | rm -f $(TARGETS) $(OBJS) $(DEPS) *.pyc 30 | 31 | .cc.o: 32 | $(CXX) $(CXXFLAGS) $< -c -o $@ 33 | 34 | spdy4_headers_sample: spdy4_headers_sample.o spdy4_headers_codec.o header_freq_tables.o 35 | $(CXX) $(CXXFLAGS) $^ -o $@ -lrt -lz 36 | 37 | bit_bucket_test: bit_bucket_test.o 38 | $(CXX) $(CXXFLAGS) $^ -o $@ 39 | 40 | huffman_test: huffman_test.o header_freq_tables.o utils.o 41 | $(CXX) $(CXXFLAGS) $^ -o $@ 42 | 43 | -------------------------------------------------------------------------------- /lib/streamifiers/host.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from collections import defaultdict 4 | 5 | from . import BaseStreamifier, Stream 6 | 7 | class Streamifier(BaseStreamifier): 8 | """ 9 | Split the messages into streams, one per direction per hostname. 10 | """ 11 | def __init__(self, procs): 12 | BaseStreamifier.__init__(self, procs) 13 | 14 | def streamify(self, messages): 15 | """ 16 | Given a list of messages (each a req, res tuple), return a list of 17 | Stream objects. 18 | """ 19 | reqs = defaultdict(list) 20 | ress = defaultdict(list) 21 | hosts = [] 22 | for req, res in messages: 23 | host = req[':host'].lower().strip() 24 | if host not in hosts: 25 | hosts.append(host) 26 | reqs[host].append((req, host)) 27 | ress[host].append((res, host)) 28 | 29 | streams = [] 30 | for host in hosts: 31 | streams.append(Stream(host, reqs[host], 'req', self.procs)) 32 | streams.append(Stream(host, ress[host], 'res', self.procs)) 33 | return streams -------------------------------------------------------------------------------- /compressor/delta/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef UTILS_H 5 | #define UTILS_H 6 | 7 | #include 8 | #include 9 | 10 | using std::string; 11 | using std::stringstream; 12 | 13 | string ReadableUShort(uint16_t c); 14 | 15 | template 16 | string FormatAsBits(const T& v, int num_bits, int offset = 0) { 17 | stringstream retval; 18 | for (int i = 0; i < num_bits; ++i) { 19 | int byte_idx = i / 8; 20 | unsigned int c = v[byte_idx]; 21 | if ((i + offset) % 8 == 0) 22 | retval << "|"; 23 | retval << ((c & (0x80U >> (i % 8))) > 0); 24 | } 25 | return retval.str(); 26 | } 27 | 28 | template <> 29 | string FormatAsBits(const uint32_t& v, int num_bits, int offset); 30 | 31 | template <> 32 | string FormatAsBits(const uint16_t& v, int num_bits, int offset); 33 | 34 | template <> 35 | string FormatAsBits(const uint8_t& v, int num_bits, int offset); 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /compressor/delta_bohe/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef UTILS_H 5 | #define UTILS_H 6 | 7 | #include 8 | #include 9 | 10 | using std::string; 11 | using std::stringstream; 12 | 13 | string ReadableUShort(uint16_t c); 14 | 15 | template 16 | string FormatAsBits(const T& v, int num_bits, int offset = 0) { 17 | stringstream retval; 18 | for (int i = 0; i < num_bits; ++i) { 19 | int byte_idx = i / 8; 20 | unsigned int c = v[byte_idx]; 21 | if ((i + offset) % 8 == 0) 22 | retval << "|"; 23 | retval << ((c & (0x80U >> (i % 8))) > 0); 24 | } 25 | return retval.str(); 26 | } 27 | 28 | template <> 29 | string FormatAsBits(const uint32_t& v, int num_bits, int offset); 30 | 31 | template <> 32 | string FormatAsBits(const uint16_t& v, int num_bits, int offset); 33 | 34 | template <> 35 | string FormatAsBits(const uint8_t& v, int num_bits, int offset); 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /compressor/delta/harfile_translator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from harfile import ReadHarFile 8 | from optparse import OptionParser 9 | import sys 10 | import os 11 | 12 | def FormatIt(frame): 13 | for (k, v) in frame.iteritems(): 14 | print "%s: %s" %(k, v) 15 | 16 | def main(): 17 | parser = OptionParser() 18 | (options, args) = parser.parse_args() 19 | if not args: 20 | return 21 | if args >= 1: 22 | requests = [] 23 | responses = [] 24 | for filename in args: 25 | sys.stderr.write(filename) 26 | (har_requests, har_responses) = ReadHarFile(filename) 27 | requests.extend(har_requests) 28 | responses.extend(har_responses) 29 | for i in xrange(len(requests)): 30 | FormatIt(requests[i]) 31 | print 32 | FormatIt(responses[i]) 33 | print 34 | sys.stdin.close() 35 | sys.stdout.close() 36 | sys.stderr.close() 37 | os.close(0) 38 | os.close(1) 39 | os.close(2) 40 | 41 | main() 42 | -------------------------------------------------------------------------------- /compressor/delta2/harfile_translator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from harfile import ReadHarFile 8 | from optparse import OptionParser 9 | import sys 10 | import os 11 | 12 | def FormatIt(frame): 13 | for (k, v) in frame.iteritems(): 14 | print "%s: %s" %(k, v) 15 | 16 | def main(): 17 | parser = OptionParser() 18 | (options, args) = parser.parse_args() 19 | if not args: 20 | return 21 | if args >= 1: 22 | requests = [] 23 | responses = [] 24 | for filename in args: 25 | sys.stderr.write(filename) 26 | (har_requests, har_responses) = ReadHarFile(filename) 27 | requests.extend(har_requests) 28 | responses.extend(har_responses) 29 | for i in xrange(len(requests)): 30 | FormatIt(requests[i]) 31 | print 32 | FormatIt(responses[i]) 33 | print 34 | sys.stdin.close() 35 | sys.stdout.close() 36 | sys.stderr.close() 37 | os.close(0) 38 | os.close(1) 39 | os.close(2) 40 | 41 | main() 42 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/harfile_translator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from harfile import ReadHarFile 8 | from optparse import OptionParser 9 | import sys 10 | import os 11 | 12 | def FormatIt(frame): 13 | for (k, v) in frame.iteritems(): 14 | print "%s: %s" %(k, v) 15 | 16 | def main(): 17 | parser = OptionParser() 18 | (options, args) = parser.parse_args() 19 | if not args: 20 | return 21 | if args >= 1: 22 | requests = [] 23 | responses = [] 24 | for filename in args: 25 | sys.stderr.write(filename) 26 | (har_requests, har_responses) = ReadHarFile(filename) 27 | requests.extend(har_requests) 28 | responses.extend(har_responses) 29 | for i in xrange(len(requests)): 30 | FormatIt(requests[i]) 31 | print 32 | FormatIt(responses[i]) 33 | print 34 | sys.stdin.close() 35 | sys.stdout.close() 36 | sys.stderr.close() 37 | os.close(0) 38 | os.close(1) 39 | os.close(2) 40 | 41 | main() 42 | -------------------------------------------------------------------------------- /compressor/delta_bohe/harfile_translator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from harfile import ReadHarFile 8 | from optparse import OptionParser 9 | import sys 10 | import os 11 | 12 | def FormatIt(frame): 13 | for (k, v) in frame.iteritems(): 14 | print "%s: %s" %(k, v) 15 | 16 | def main(): 17 | parser = OptionParser() 18 | (options, args) = parser.parse_args() 19 | if not args: 20 | return 21 | if args >= 1: 22 | requests = [] 23 | responses = [] 24 | for filename in args: 25 | sys.stderr.write(filename) 26 | (har_requests, har_responses) = ReadHarFile(filename) 27 | requests.extend(har_requests) 28 | responses.extend(har_responses) 29 | for i in xrange(len(requests)): 30 | FormatIt(requests[i]) 31 | print 32 | FormatIt(responses[i]) 33 | print 34 | sys.stdin.close() 35 | sys.stdout.close() 36 | sys.stderr.close() 37 | os.close(0) 38 | os.close(1) 39 | os.close(2) 40 | 41 | main() 42 | -------------------------------------------------------------------------------- /compressor/delta/spdy4_headers_codec.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef SPDY4_HEADERS_CODEC_H__ 5 | #define SPDY4_HEADERS_CODEC_H__ 6 | 7 | #include 8 | 9 | #include "header_freq_tables.h" 10 | #include "trivial_http_parse.h" 11 | #include "bit_bucket.h" 12 | 13 | typedef uint16_t LRUIdx; 14 | 15 | typedef BitBucket OutputStream; 16 | 17 | class SPDY4HeadersCodecImpl; 18 | 19 | class SPDY4HeadersCodec { 20 | private: 21 | SPDY4HeadersCodecImpl* impl; 22 | public: 23 | SPDY4HeadersCodec(const FreqTable& sft); 24 | ~SPDY4HeadersCodec(); 25 | 26 | size_t CurrentStateSize() const; 27 | 28 | void ProcessFrame(OutputStream* os, 29 | uint32_t stream_id, 30 | uint32_t group_id, 31 | const HeaderFrame& headers, 32 | bool this_ends_the_frame); 33 | 34 | void SetMaxStateSize(size_t max_size); 35 | 36 | void SetMaxVals(size_t max_size); 37 | }; 38 | 39 | 40 | #endif //SPDY4_HEADERS_CODEC_H__ 41 | 42 | 43 | -------------------------------------------------------------------------------- /compressor/delta_bohe/spdy4_headers_codec.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef SPDY4_HEADERS_CODEC_H__ 5 | #define SPDY4_HEADERS_CODEC_H__ 6 | 7 | #include 8 | 9 | #include "header_freq_tables.h" 10 | #include "trivial_http_parse.h" 11 | #include "bit_bucket.h" 12 | 13 | typedef uint16_t LRUIdx; 14 | 15 | typedef BitBucket OutputStream; 16 | 17 | class SPDY4HeadersCodecImpl; 18 | 19 | class SPDY4HeadersCodec { 20 | private: 21 | SPDY4HeadersCodecImpl* impl; 22 | public: 23 | SPDY4HeadersCodec(const FreqTable& sft); 24 | ~SPDY4HeadersCodec(); 25 | 26 | size_t CurrentStateSize() const; 27 | 28 | void ProcessFrame(OutputStream* os, 29 | uint32_t stream_id, 30 | uint32_t group_id, 31 | const HeaderFrame& headers, 32 | bool this_ends_the_frame); 33 | 34 | void SetMaxStateSize(size_t max_size); 35 | 36 | void SetMaxVals(size_t max_size); 37 | }; 38 | 39 | 40 | #endif //SPDY4_HEADERS_CODEC_H__ 41 | 42 | 43 | -------------------------------------------------------------------------------- /lib/streamifiers/public_suffix.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from collections import defaultdict 4 | 5 | from . import BaseStreamifier, Stream 6 | 7 | from publicsuffix import PublicSuffixList 8 | 9 | class Streamifier(BaseStreamifier): 10 | """ 11 | Use the Public Suffix List to split the messages 12 | into streams, one per direction per suffix. 13 | """ 14 | def __init__(self, procs): 15 | BaseStreamifier.__init__(self, procs) 16 | self.psl = PublicSuffixList() 17 | 18 | def streamify(self, messages): 19 | """ 20 | Given a list of messages (each a req, res tuple), return a list of 21 | Stream objects. 22 | """ 23 | reqs = defaultdict(list) 24 | ress = defaultdict(list) 25 | suffixes = [] 26 | for req, res in messages: 27 | host = req[':host'] 28 | suffix = self.psl.get_public_suffix(host.split(":", 1)[0]) 29 | if suffix not in suffixes: 30 | suffixes.append(suffix) 31 | reqs[suffix].append((req, host)) 32 | ress[suffix].append((res, host)) 33 | 34 | streams = [] 35 | for suffix in suffixes: 36 | streams.append(Stream(suffix, reqs[suffix], 'req', self.procs)) 37 | streams.append(Stream(suffix, ress[suffix], 'res', self.procs)) 38 | return streams -------------------------------------------------------------------------------- /compressor/fork/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import os 6 | import subprocess 7 | import struct 8 | import sys 9 | 10 | from .. import BaseProcessor, format_http1, strip_conn_headers 11 | 12 | class Processor(BaseProcessor): 13 | def __init__(self, options, is_request, params): 14 | BaseProcessor.__init__(self, options, is_request, params) 15 | if "bin" in params[1:]: 16 | self.delimit_binary = True 17 | else: 18 | self.delimit_binary = False 19 | path = os.path.join(os.getcwd(), params[0]) 20 | self.process = subprocess.Popen(path, 21 | #bufsize=-1, 22 | shell=False, 23 | stdout=subprocess.PIPE, 24 | stdin=subprocess.PIPE) 25 | 26 | def compress(self, in_headers, host): 27 | http1_msg = format_http1(strip_conn_headers(in_headers)) 28 | self.process.stdin.write(http1_msg) 29 | if self.delimit_binary: 30 | output = self.process.stdout.read(8) 31 | size = struct.unpack("q", output)[0] 32 | output = self.process.stdout.read(int(size)) 33 | else: 34 | output = "" 35 | while True: 36 | line = self.process.stdout.readline() 37 | if line.strip() == "": 38 | break 39 | output += line 40 | return output 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /compressor/delta2/README.md: -------------------------------------------------------------------------------- 1 | 2 | Delta2 Compressor 3 | ================= 4 | 5 | 6 | Parameters 7 | ---------- 8 | 9 | * max_byte_size: the maximum number of key and value characters that the compressor is allowed to buffer. 10 | * max_entries: the maximum number of slots in the LRU. 11 | * hg_adjust: when set, entries of the current header-group are reinserted into the LRU. This is intended to ensure that these values stay alive in the LRU for longer and also intended to casue the indices which refer to these values to be bunched together. 12 | * implict_hg_add: when set, items being added via sclone or skvsto have their indices automatically inserted into the current header-group. 13 | * small_index: when set, causes the index size (on the wire) to become one byte, down from two bytes. This indirectly causes the maximum number of items in the LRU to drop to ~200 elements (though smaller values are still honored if set via max_entries, above). 14 | * refcnt_vals: when set, value strings are refcounted. This only matters when/if hg_adjust is enabled. 15 | * only_etoggles: when set, the compressor is forced to make explicit backreferences to everything, and thus acts similarly to the headerdiff encoder. 16 | * varint_encoding: when set, indices are encoded as variable-length integers. For values <= 15, 4 bits will be used. For values >15 and <= 255, 12 bits will be used. For values >255 and <= 16535, 28 bits will be used, and for values >16535, 60 bits will be used. For this to be effective, obviously, the expectation is that most integer values are quite small. 17 | # idx_from_end: when set, indices are encoded as distance-from-the-newest element. In conjunection with varint_encoding, this should yield a space savings on the wire. 18 | 19 | 20 | -------------------------------------------------------------------------------- /compressor/delta/huffman_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #include 5 | #include 6 | 7 | #include "header_freq_tables.h" 8 | #include "huffman.h" 9 | 10 | using std::cerr; 11 | using std::string; 12 | 13 | struct Testcase { 14 | string input; 15 | }; 16 | 17 | template 18 | void Test(const T& expected, const T& actual) { 19 | if (expected != actual) { 20 | cerr << "\n"; 21 | cerr << " --- FAILED ---\n"; 22 | cerr << " Expected: \"" << expected << "\"\n"; 23 | cerr << " Got: \"" << actual << "\"\n"; 24 | abort(); 25 | } 26 | } 27 | 28 | void TestEncodeDecode(const Huffman& huff, 29 | const string& input, 30 | bool use_eof, 31 | bool use_length, 32 | int length_delta) { 33 | string decoded; 34 | 35 | 36 | BitBucket bb; 37 | huff.Encode(&bb, input, use_eof); 38 | 39 | int num_bits = 0; 40 | if (use_length) 41 | num_bits = bb.NumBits() + length_delta; 42 | huff.Decode(&decoded, &bb, use_eof, num_bits); 43 | Test(input, decoded); 44 | } 45 | 46 | int main(int argc, char**argv) { 47 | Huffman huff; 48 | huff.Init(FreqTables::request_freq_table); 49 | array tests = {{ 50 | "dabbcccddddeeeee", 51 | "foobarbaz", 52 | "0-2rklnsvkl;-23kDFSi01k0=", 53 | "-9083480-12hjkadsgf8912345kl;hjajkl; `123890", 54 | "-3;jsdf", 55 | "\xFF\xE0\t\ne\x81\x82", 56 | }}; 57 | for (unsigned int i = 0; i < tests.size(); ++i) { 58 | const string& test = tests[i]; 59 | cerr << "TEST: " << test << "..."; 60 | cerr << "\n"; 61 | TestEncodeDecode(huff, test, true, false, 0); 62 | TestEncodeDecode(huff, test, false, true, 0); 63 | TestEncodeDecode(huff, test, true, true, 8); 64 | cerr << "PASSED!\n"; 65 | } 66 | //cout << huff; 67 | return EXIT_SUCCESS; 68 | } 69 | -------------------------------------------------------------------------------- /compressor/spdy3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | import struct 7 | from .. import spdy_dictionary, BaseProcessor 8 | 9 | class Processor(BaseProcessor): 10 | def __init__(self, options, is_request, params): 11 | BaseProcessor.__init__(self, options, is_request, params) 12 | self.compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 13 | zlib.DEFLATED, 15) 14 | if 'dict' in params: 15 | self.compressor.compress(spdy_dictionary.spdy_dict); 16 | self.compressor.flush(zlib.Z_SYNC_FLUSH) 17 | 18 | def compress(self, in_headers, host): 19 | raw_spdy3_frame = self.Spdy3HeadersFormat(in_headers) 20 | compress_me_payload = raw_spdy3_frame[12:] 21 | final_frame = raw_spdy3_frame[:12] 22 | final_frame += self.compressor.compress(compress_me_payload) 23 | final_frame += self.compressor.flush(zlib.Z_SYNC_FLUSH) 24 | return final_frame 25 | 26 | def Spdy3HeadersFormat(self, request): 27 | """ 28 | Formats the provided headers in SPDY3 format, uncompressed 29 | """ 30 | out_frame = [] 31 | frame_len = 0 32 | for (key, val) in request.items(): 33 | frame_len += 4 34 | frame_len += len(key) 35 | frame_len += 4 36 | frame_len += len(val) 37 | stream_id = 1 38 | num_kv_pairs = len(list(request.keys())) 39 | # out_frame.append(struct.pack('!L', 0x1 << 31 | 0x11 << 15 | 0x8)) 40 | # out_frame.append(struct.pack('!L', frame_len)) 41 | # out_frame.append(struct.pack('!L', stream_id)) 42 | # out_frame.append(struct.pack('!L', num_kv_pairs)) 43 | for (key, val) in request.items(): 44 | out_frame.append(struct.pack('!L', len(key))) 45 | out_frame.append(key.encode('ascii')) 46 | out_frame.append(struct.pack('!L', len(val))) 47 | out_frame.append(val.encode('ascii')) 48 | return b''.join(out_frame) 49 | 50 | -------------------------------------------------------------------------------- /compressor/delta_bohe/huffman_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #include 5 | #include 6 | 7 | #include "header_freq_tables.h" 8 | #include "huffman.h" 9 | 10 | using std::cerr; 11 | using std::string; 12 | 13 | struct Testcase { 14 | string input; 15 | }; 16 | 17 | template 18 | void Test(const T& expected, const T& actual) { 19 | if (expected != actual) { 20 | cerr << "\n"; 21 | cerr << " --- FAILED ---\n"; 22 | cerr << " Expected: \"" << expected << "\"\n"; 23 | cerr << " Got: \"" << actual << "\"\n"; 24 | abort(); 25 | } 26 | } 27 | 28 | void TestEncodeDecode(const Huffman& huff, 29 | const string& input, 30 | bool use_eof, 31 | bool use_length, 32 | int length_delta) { 33 | string decoded; 34 | 35 | 36 | BitBucket bb; 37 | huff.Encode(&bb, input, use_eof); 38 | 39 | int num_bits = 0; 40 | if (use_length) 41 | num_bits = bb.NumBits() + length_delta; 42 | huff.Decode(&decoded, &bb, use_eof, num_bits); 43 | Test(input, decoded); 44 | } 45 | 46 | int main(int argc, char**argv) { 47 | Huffman huff; 48 | huff.Init(FreqTables::request_freq_table); 49 | array tests = {{ 50 | "dabbcccddddeeeee", 51 | "foobarbaz", 52 | "0-2rklnsvkl;-23kDFSi01k0=", 53 | "-9083480-12hjkadsgf8912345kl;hjajkl; `123890", 54 | "-3;jsdf", 55 | "\xFF\xE0\t\ne\x81\x82", 56 | }}; 57 | for (unsigned int i = 0; i < tests.size(); ++i) { 58 | const string& test = tests[i]; 59 | cerr << "TEST: " << test << "..."; 60 | cerr << "\n"; 61 | TestEncodeDecode(huff, test, true, false, 0); 62 | TestEncodeDecode(huff, test, false, true, 0); 63 | TestEncodeDecode(huff, test, true, true, 8); 64 | cerr << "PASSED!\n"; 65 | } 66 | //cout << huff; 67 | return EXIT_SUCCESS; 68 | } 69 | -------------------------------------------------------------------------------- /compressor/delta/word_freak.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | class WordFreak: 6 | """ 7 | Observes and accumulates letter frequencies. 8 | Though it is called 'Word'Freq, it is better thought of 9 | as letter freq. 10 | """ 11 | def __init__(self): 12 | self.code = [] 13 | self.character_freaks = [] 14 | self.length_freaks = {} 15 | self.eofs = 0 16 | for i in xrange(256 + 1): 17 | self.character_freaks.append(0) 18 | 19 | def LookAt(self, ops): 20 | for op in ops: 21 | for key in ['key', 'val']: 22 | if key in op: 23 | self.length_freaks[len(op[key])] = \ 24 | self.length_freaks.get(len(op[key]),0) + 1 25 | self.character_freaks[256] += 1 26 | for c in op[key]: 27 | self.character_freaks[ord(c)] += 1 28 | 29 | def SortedByFreq(self): 30 | x = [ (chr(i), self.character_freaks[i]) \ 31 | for i in xrange(len(self.character_freaks))] 32 | return sorted(x, key=lambda x: x[1], reverse=True) 33 | 34 | def GetFrequencies(self): 35 | return self.character_freaks 36 | 37 | def __repr__(self): 38 | retval = ["["] 39 | cur_pair = "" 40 | cur_line = " " 41 | for i in xrange(len(self.character_freaks)): 42 | if (i < 256): 43 | cur_pair = "(%s, %d)," % (repr(chr(i)), self.character_freaks[i]) 44 | #retval.append( (chr(i), self.character_freaks[i])) 45 | else: 46 | cur_pair = "(%d, %d)," % (i, self.character_freaks[i]) 47 | #retval.append( (i, self.character_freaks[i])) 48 | 49 | if len(cur_pair) + len(cur_line) > 80: 50 | retval.append(cur_line) 51 | cur_line = " " 52 | cur_line = cur_line + cur_pair 53 | if cur_line != " ": 54 | retval.append(cur_line) 55 | retval.append(']') 56 | 57 | return '\n'.join(retval) 58 | 59 | def __str__(self): 60 | return self.__repr__() 61 | 62 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/word_freak.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | class WordFreak: 6 | """ 7 | Observes and accumulates letter frequencies. 8 | Though it is called 'Word'Freq, it is better thought of 9 | as letter freq. 10 | """ 11 | def __init__(self): 12 | self.code = [] 13 | self.character_freaks = [] 14 | self.length_freaks = {} 15 | self.eofs = 0 16 | for i in xrange(256 + 1): 17 | self.character_freaks.append(0) 18 | 19 | def LookAt(self, ops): 20 | for op in ops: 21 | for key in ['key', 'val']: 22 | if key in op: 23 | self.length_freaks[len(op[key])] = \ 24 | self.length_freaks.get(len(op[key]),0) + 1 25 | self.character_freaks[256] += 1 26 | for c in op[key]: 27 | self.character_freaks[ord(c)] += 1 28 | 29 | def SortedByFreq(self): 30 | x = [ (chr(i), self.character_freaks[i]) \ 31 | for i in xrange(len(self.character_freaks))] 32 | return sorted(x, key=lambda x: x[1], reverse=True) 33 | 34 | def GetFrequencies(self): 35 | return self.character_freaks 36 | 37 | def __repr__(self): 38 | retval = ["["] 39 | cur_pair = "" 40 | cur_line = " " 41 | for i in xrange(len(self.character_freaks)): 42 | if (i < 256): 43 | cur_pair = "(%s, %d)," % (repr(chr(i)), self.character_freaks[i]) 44 | #retval.append( (chr(i), self.character_freaks[i])) 45 | else: 46 | cur_pair = "(%d, %d)," % (i, self.character_freaks[i]) 47 | #retval.append( (i, self.character_freaks[i])) 48 | 49 | if len(cur_pair) + len(cur_line) > 80: 50 | retval.append(cur_line) 51 | cur_line = " " 52 | cur_line = cur_line + cur_pair 53 | if cur_line != " ": 54 | retval.append(cur_line) 55 | retval.append(']') 56 | 57 | return '\n'.join(retval) 58 | 59 | def __str__(self): 60 | return self.__repr__() 61 | 62 | -------------------------------------------------------------------------------- /compressor/delta_bohe/word_freak.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | class WordFreak: 6 | """ 7 | Observes and accumulates letter frequencies. 8 | Though it is called 'Word'Freq, it is better thought of 9 | as letter freq. 10 | """ 11 | def __init__(self): 12 | self.code = [] 13 | self.character_freaks = [] 14 | self.length_freaks = {} 15 | self.eofs = 0 16 | for i in xrange(256 + 1): 17 | self.character_freaks.append(0) 18 | 19 | def LookAt(self, ops): 20 | for op in ops: 21 | for key in ['key', 'val']: 22 | if key in op: 23 | self.length_freaks[len(op[key])] = \ 24 | self.length_freaks.get(len(op[key]),0) + 1 25 | self.character_freaks[256] += 1 26 | for c in op[key]: 27 | self.character_freaks[ord(c)] += 1 28 | 29 | def SortedByFreq(self): 30 | x = [ (chr(i), self.character_freaks[i]) \ 31 | for i in xrange(len(self.character_freaks))] 32 | return sorted(x, key=lambda x: x[1], reverse=True) 33 | 34 | def GetFrequencies(self): 35 | return self.character_freaks 36 | 37 | def __repr__(self): 38 | retval = ["["] 39 | cur_pair = "" 40 | cur_line = " " 41 | for i in xrange(len(self.character_freaks)): 42 | if (i < 256): 43 | cur_pair = "(%s, %d)," % (repr(chr(i)), self.character_freaks[i]) 44 | #retval.append( (chr(i), self.character_freaks[i])) 45 | else: 46 | cur_pair = "(%d, %d)," % (i, self.character_freaks[i]) 47 | #retval.append( (i, self.character_freaks[i])) 48 | 49 | if len(cur_pair) + len(cur_line) > 80: 50 | retval.append(cur_line) 51 | cur_line = " " 52 | cur_line = cur_line + cur_pair 53 | if cur_line != " ": 54 | retval.append(cur_line) 55 | retval.append(']') 56 | 57 | return '\n'.join(retval) 58 | 59 | def __str__(self): 60 | return self.__repr__() 61 | 62 | -------------------------------------------------------------------------------- /compressor/delta2/word_freak.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import string 6 | 7 | class WordFreak: 8 | """ 9 | Observes and accumulates letter frequencies. 10 | Though it is called 'Word'Freq, it is better thought of 11 | as letter freq. 12 | """ 13 | def __init__(self): 14 | self.code = [] 15 | self.character_freaks = [] 16 | self.length_freaks = {} 17 | self.eofs = 0 18 | for i in xrange(256 + 1): 19 | self.character_freaks.append(0) 20 | 21 | def LookAt(self, ops): 22 | for op in ops: 23 | for key in ['key', 'val']: 24 | if key in op: 25 | self.length_freaks[len(op[key])] = \ 26 | self.length_freaks.get(len(op[key]),0) + 1 27 | self.character_freaks[256] += 1 28 | for c in op[key]: 29 | self.character_freaks[ord(c)] += 1 30 | 31 | def SortedByFreq(self): 32 | x = [ (chr(i), self.character_freaks[i]) \ 33 | for i in xrange(len(self.character_freaks))] 34 | return sorted(x, key=lambda x: x[1], reverse=True) 35 | 36 | def GetFrequencies(self): 37 | return self.character_freaks 38 | 39 | def __repr__(self): 40 | printable = string.digits + string.letters + string.punctuation + ' ' 41 | max_freq = 0 42 | for i in self.character_freaks: 43 | if i > max_freq: 44 | max_freq = i 45 | freq_len = len("%d" % max_freq) 46 | retval = ["["] 47 | cur_pair = "" 48 | cur_line = " " 49 | format_string = "(%%s, %%%dd)," % freq_len 50 | for i in xrange(len(self.character_freaks)): 51 | if i < 128 and chr(i) in printable: 52 | sym = '{:>4}'.format(repr(chr(i))) 53 | else: 54 | sym = '{:>4}'.format(repr(i)) 55 | cur_pair = format_string % (sym, self.character_freaks[i]) 56 | if len(cur_pair) + len(cur_line) > 60: 57 | retval.append(cur_line) 58 | cur_line = " " 59 | cur_line = cur_line + cur_pair 60 | if cur_line != " ": 61 | retval.append(cur_line) 62 | retval.append(']') 63 | 64 | return '\n'.join(retval) 65 | 66 | def __str__(self): 67 | return self.__repr__() 68 | 69 | -------------------------------------------------------------------------------- /compressor/delta/utils.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #include 5 | 6 | #include "utils.h" 7 | 8 | using std::ostream; 9 | using std::hex; 10 | using std::dec; 11 | 12 | template <> 13 | string FormatAsBits(const uint32_t& v, int num_bits, int offset) { 14 | stringstream retval; 15 | for (int i = 0; i < num_bits; ++i) { 16 | if ((i + offset) % 8 == 0) 17 | retval << "|"; 18 | retval << (((v >> (31 - i)) & 0x1U) > 0); 19 | } 20 | return retval.str(); 21 | } 22 | 23 | template <> 24 | string FormatAsBits(const uint16_t& v, int num_bits, int offset) { 25 | stringstream retval; 26 | for (int i = 0; i < num_bits; ++i) { 27 | if ((i + offset) % 8 == 0) 28 | retval << "|"; 29 | retval << (((v >> (15 - i)) & 0x1U) > 0); 30 | } 31 | return retval.str(); 32 | } 33 | 34 | template <> 35 | string FormatAsBits(const uint8_t& v, int num_bits, int offset) { 36 | stringstream retval; 37 | for (int i = 0; i < num_bits; ++i) { 38 | if ((i + offset) % 8 == 0) 39 | retval << "|"; 40 | retval << (((v >> (7 - i)) & 0x1U) > 0); 41 | } 42 | return retval.str(); 43 | } 44 | 45 | void OutputCharToOstream(ostream& os, unsigned short c) { 46 | if (c > 256 + 1) 47 | abort(); 48 | if (c >= 256) { 49 | os << c; 50 | } else { 51 | os << " '"; 52 | if (c < 128 && (isgraph(c) || c == ' ')) { 53 | os << (char)c; 54 | } else { 55 | switch (c) { 56 | case '\t': 57 | os << "\\t"; 58 | break; 59 | case '\n': 60 | os << "\\n"; 61 | break; 62 | case '\r': 63 | os << "\\r"; 64 | break; 65 | case '\0': 66 | os << "\\0"; 67 | break; 68 | default: 69 | if (c >= 16) { 70 | os << "\\x" << hex << c << dec; 71 | } else { 72 | os << "\\x0" << hex << c << dec; 73 | } 74 | break; 75 | } 76 | } 77 | os << "'"; 78 | } 79 | } 80 | 81 | string ReadableUShort(uint16_t c) { 82 | stringstream s; 83 | OutputCharToOstream(s, c); 84 | return s.str(); 85 | } 86 | -------------------------------------------------------------------------------- /compressor/delta_bohe/utils.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #include 5 | 6 | #include "utils.h" 7 | 8 | using std::ostream; 9 | using std::hex; 10 | using std::dec; 11 | 12 | template <> 13 | string FormatAsBits(const uint32_t& v, int num_bits, int offset) { 14 | stringstream retval; 15 | for (int i = 0; i < num_bits; ++i) { 16 | if ((i + offset) % 8 == 0) 17 | retval << "|"; 18 | retval << (((v >> (31 - i)) & 0x1U) > 0); 19 | } 20 | return retval.str(); 21 | } 22 | 23 | template <> 24 | string FormatAsBits(const uint16_t& v, int num_bits, int offset) { 25 | stringstream retval; 26 | for (int i = 0; i < num_bits; ++i) { 27 | if ((i + offset) % 8 == 0) 28 | retval << "|"; 29 | retval << (((v >> (15 - i)) & 0x1U) > 0); 30 | } 31 | return retval.str(); 32 | } 33 | 34 | template <> 35 | string FormatAsBits(const uint8_t& v, int num_bits, int offset) { 36 | stringstream retval; 37 | for (int i = 0; i < num_bits; ++i) { 38 | if ((i + offset) % 8 == 0) 39 | retval << "|"; 40 | retval << (((v >> (7 - i)) & 0x1U) > 0); 41 | } 42 | return retval.str(); 43 | } 44 | 45 | void OutputCharToOstream(ostream& os, unsigned short c) { 46 | if (c > 256 + 1) 47 | abort(); 48 | if (c >= 256) { 49 | os << c; 50 | } else { 51 | os << " '"; 52 | if (c < 128 && (isgraph(c) || c == ' ')) { 53 | os << (char)c; 54 | } else { 55 | switch (c) { 56 | case '\t': 57 | os << "\\t"; 58 | break; 59 | case '\n': 60 | os << "\\n"; 61 | break; 62 | case '\r': 63 | os << "\\r"; 64 | break; 65 | case '\0': 66 | os << "\\0"; 67 | break; 68 | default: 69 | if (c >= 16) { 70 | os << "\\x" << hex << c << dec; 71 | } else { 72 | os << "\\x0" << hex << c << dec; 73 | } 74 | break; 75 | } 76 | } 77 | os << "'"; 78 | } 79 | } 80 | 81 | string ReadableUShort(uint16_t c) { 82 | stringstream s; 83 | OutputCharToOstream(s, c); 84 | return s.str(); 85 | } 86 | -------------------------------------------------------------------------------- /compressor/delta_bohe/common_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | 6 | 7 | def ListToStr(val): 8 | """ Takes a list of ints and makes it into a string """ 9 | return ''.join(['%c' % c for c in val]) 10 | 11 | def StrToList(val): 12 | """ Takes a string and makes it into a list of ints (<= 8 bits each)""" 13 | return [ord(c) for c in val] 14 | 15 | 16 | def MakeReadableString(val): 17 | """ Takes a string and returns a normalized version which allows 18 | interpretation of nonprinting characters, but easier to read than just 19 | hex.""" 20 | printable = string.digits + string.letters + string.punctuation + ' ' + '\t' 21 | out = [] 22 | for c in val: 23 | if c in printable: 24 | out.append(' %c ' % c) 25 | else: 26 | out.append('0x%02x ' % ord(c)) 27 | return ''.join(out) 28 | 29 | def FormatAsBits(output_and_bits): 30 | """ Takes as input a tuple representing (array_of_bytes, number_of_bits), 31 | and formats it as binary, with byte-boundaries marked""" 32 | (output, bits) = output_and_bits 33 | retval = [] 34 | if not bits: 35 | total_bits = len(output) * 8 36 | elif bits % 8: 37 | total_bits = (len(output) - 1) * 8 + (bits % 8) 38 | else: 39 | total_bits = len(output) * 8 40 | idx = 0 41 | while total_bits >= 8: 42 | c = output[idx] 43 | idx += 1 44 | retval.append('|') 45 | retval.append('{0:08b}'.format(c)) 46 | total_bits -= 8 47 | 48 | if (bits % 8) != 0: 49 | retval.append('|') 50 | retval.append('{0:08b}'.format(output[idx])[0:(bits % 8)]) 51 | retval.extend([' [%d]' % bits]) 52 | return ''.join(retval) 53 | 54 | 55 | class IDStore(object): 56 | """ Manages a store of IDs""" 57 | def __init__(self): 58 | self.ids = set() 59 | self.next_idx = 0 60 | 61 | def GetNext(self): 62 | """ Gets the next available ID. If an ID was returned, it will use that, 63 | else it will create a new unique id""" 64 | if self.ids: 65 | return self.ids.pop() 66 | self.next_idx += 1 67 | return self.next_idx 68 | 69 | def DoneWithId(self, id): 70 | """ Allows an ID to be reused later""" 71 | self.ids.add(id) 72 | 73 | -------------------------------------------------------------------------------- /compressor/bohe/common_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | 6 | 7 | def ListToStr(val): 8 | """ Takes a list of ints and makes it into a string """ 9 | return ''.join(['%c' % c for c in val]) 10 | 11 | def StrToList(val): 12 | """ Takes a string and makes it into a list of ints (<= 8 bits each)""" 13 | return [ord(c) for c in val] 14 | 15 | 16 | def MakeReadableString(val): 17 | """ Takes a string and returns a normalized version which allows 18 | interpretation of nonprinting characters, but easier to read than just 19 | hex.""" 20 | printable = string.digits + string.letters + string.punctuation + ' ' + '\t' 21 | out = [] 22 | for c in val: 23 | if c in printable: 24 | out.append(' %c ' % c) 25 | else: 26 | out.append('0x%02x ' % ord(c)) 27 | return ''.join(out) 28 | 29 | def FormatAsBits(output_and_bits): 30 | """ Takes as input a tuple representing (array_of_bytes, number_of_bits), 31 | and formats it as binary, with byte-boundaries marked""" 32 | (output, bits) = output_and_bits 33 | retval = [] 34 | if not bits: 35 | total_bits = len(output) * 8 36 | elif bits % 8: 37 | total_bits = (len(output) - 1) * 8 + (bits % 8) 38 | else: 39 | total_bits = len(output) * 8 40 | idx = 0 41 | while total_bits >= 8: 42 | c = output[idx] 43 | idx += 1 44 | retval.append('|') 45 | retval.append('{0:08b}'.format(c)) 46 | total_bits -= 8 47 | 48 | if (bits % 8) != 0: 49 | retval.append('|') 50 | retval.append('{0:08b}'.format(output[idx])[0:(bits % 8)]) 51 | retval.extend([' [%d]' % bits]) 52 | return ''.join(retval) 53 | 54 | 55 | class IDStore(object): 56 | """ Manages a store of IDs""" 57 | def __init__(self, max_id): 58 | self.ids = set() 59 | self.next_idx = 0 60 | self.mininum_id = 0 61 | self.maximum_id = max_id 62 | if self.maximum_id is None: 63 | print "need a max id" 64 | raise StandardError() 65 | 66 | def GetNext(self): 67 | """ Gets the next available ID. If an ID was returned, it will use that, 68 | else it will create a new unique id""" 69 | if self.ids: 70 | return self.ids.pop() 71 | self.next_idx += 1 72 | if self.maximum_id and self.next_idx >= self.maximum_id: 73 | self.next_idx = self.minimum_id 74 | return self.next_idx 75 | 76 | def DoneWithId(self, id): 77 | """ Allows an ID to be reused later""" 78 | self.ids.add(id) 79 | 80 | -------------------------------------------------------------------------------- /compressor/delta/common_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | 6 | 7 | def ListToStr(val): 8 | """ Takes a list of ints and makes it into a string """ 9 | return ''.join(['%c' % c for c in val]) 10 | 11 | def StrToList(val): 12 | """ Takes a string and makes it into a list of ints (<= 8 bits each)""" 13 | return [ord(c) for c in val] 14 | 15 | 16 | def MakeReadableString(val): 17 | """ Takes a string and returns a normalized version which allows 18 | interpretation of nonprinting characters, but easier to read than just 19 | hex.""" 20 | printable = string.digits + string.letters + string.punctuation + ' ' + '\t' 21 | out = [] 22 | for c in val: 23 | if c in printable: 24 | out.append(' %c ' % c) 25 | else: 26 | out.append('0x%02x ' % ord(c)) 27 | return ''.join(out) 28 | 29 | def FormatAsBits(output_and_bits): 30 | """ Takes as input a tuple representing (array_of_bytes, number_of_bits), 31 | and formats it as binary, with byte-boundaries marked""" 32 | (output, bits) = output_and_bits 33 | retval = [] 34 | if not bits: 35 | total_bits = len(output) * 8 36 | elif bits % 8: 37 | total_bits = (len(output) - 1) * 8 + (bits % 8) 38 | else: 39 | total_bits = len(output) * 8 40 | idx = 0 41 | while total_bits >= 8: 42 | c = output[idx] 43 | idx += 1 44 | retval.append('|') 45 | retval.append('{0:08b}'.format(c)) 46 | total_bits -= 8 47 | 48 | if (bits % 8) != 0: 49 | retval.append('|') 50 | retval.append('{0:08b}'.format(output[idx])[0:(bits % 8)]) 51 | retval.extend([' [%d]' % bits]) 52 | return ''.join(retval) 53 | 54 | 55 | class IDStore(object): 56 | """ Manages a store of IDs""" 57 | def __init__(self, max_id): 58 | self.ids = set() 59 | self.next_idx = 0 60 | self.minimum_id = 0 61 | self.maximum_id = max_id 62 | if self.maximum_id is None: 63 | print "need a max id" 64 | raise StandardError() 65 | 66 | def GetNext(self): 67 | """ Gets the next available ID. If an ID was returned, it will use that, 68 | else it will create a new unique id""" 69 | if self.ids: 70 | return self.ids.pop() 71 | self.next_idx += 1 72 | if self.maximum_id and self.next_idx >= self.maximum_id: 73 | self.next_idx = self.minimum_id 74 | return self.next_idx 75 | 76 | def DoneWithId(self, id): 77 | """ Allows an ID to be reused later""" 78 | self.ids.add(id) 79 | 80 | -------------------------------------------------------------------------------- /compressor/delta2/common_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | import string 5 | 6 | def ListToStr(val): 7 | """ Takes a list of ints and makes it into a string """ 8 | return ''.join(['%c' % c for c in val]) 9 | 10 | def StrToList(val): 11 | """ Takes a string and makes it into a list of ints (<= 8 bits each)""" 12 | return [ord(c) for c in val] 13 | 14 | 15 | def MakeReadableString(val): 16 | """ Takes a string and returns a normalized version which allows 17 | interpretation of nonprinting characters, but easier to read than just 18 | hex.""" 19 | printable = string.digits + string.letters + string.punctuation + ' ' + '\t' 20 | out = [] 21 | for c in val: 22 | if c in printable: 23 | out.append(' %c ' % c) 24 | else: 25 | out.append('0x%02x ' % ord(c)) 26 | return ''.join(out) 27 | 28 | def FormatAsBits(output_and_bits): 29 | """ Takes as input a tuple representing (array_of_bytes, number_of_bits), 30 | and formats it as binary, with byte-boundaries marked""" 31 | (output, bits) = output_and_bits 32 | retval = [] 33 | if not bits: 34 | total_bits = len(output) * 8 35 | elif bits % 8: 36 | total_bits = (len(output) - 1) * 8 + (bits % 8) 37 | else: 38 | total_bits = len(output) * 8 39 | idx = 0 40 | while total_bits >= 8: 41 | c = output[idx] 42 | idx += 1 43 | retval.append('|') 44 | retval.append('{0:08b}'.format(c)) 45 | total_bits -= 8 46 | if (bits % 8) != 0: 47 | retval.append('|') 48 | retval.append('{0:08b}'.format(output[idx])[0:(bits % 8)]) 49 | retval.extend([' [%d]' % bits]) 50 | return ''.join(retval) 51 | 52 | 53 | class IDStore(object): 54 | """ Manages a store of IDs""" 55 | def __init__(self, max_id): 56 | self.ids = set() 57 | self.next_idx = 0 58 | self.minimum_id = 0 59 | self.maximum_id = max_id 60 | if self.maximum_id is None: 61 | print "need a max id" 62 | raise StandardError() 63 | 64 | def GetNext(self): 65 | """ Gets the next available ID. If an ID was returned, it will use that, 66 | else it will create a new unique id""" 67 | if self.ids: 68 | return self.ids.pop() 69 | self.next_idx += 1 70 | if self.maximum_id and self.next_idx >= self.maximum_id: 71 | self.next_idx = self.minimum_id 72 | return self.next_idx 73 | 74 | def DoneWithId(self, id): 75 | """ Allows an ID to be reused later""" 76 | self.ids.add(id) 77 | 78 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/common_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | 6 | 7 | def ListToStr(val): 8 | """ Takes a list of ints and makes it into a string """ 9 | return ''.join(['%c' % c for c in val]) 10 | 11 | def StrToList(val): 12 | """ Takes a string and makes it into a list of ints (<= 8 bits each)""" 13 | return [ord(c) for c in val] 14 | 15 | 16 | def MakeReadableString(val): 17 | """ Takes a string and returns a normalized version which allows 18 | interpretation of nonprinting characters, but easier to read than just 19 | hex.""" 20 | printable = string.digits + string.letters + string.punctuation + ' ' + '\t' 21 | out = [] 22 | for c in val: 23 | if c in printable: 24 | out.append(' %c ' % c) 25 | else: 26 | out.append('0x%02x ' % ord(c)) 27 | return ''.join(out) 28 | 29 | def FormatAsBits(output_and_bits): 30 | """ Takes as input a tuple representing (array_of_bytes, number_of_bits), 31 | and formats it as binary, with byte-boundaries marked""" 32 | (output, bits) = output_and_bits 33 | retval = [] 34 | if not bits: 35 | total_bits = len(output) * 8 36 | elif bits % 8: 37 | total_bits = (len(output) - 1) * 8 + (bits % 8) 38 | else: 39 | total_bits = len(output) * 8 40 | idx = 0 41 | while total_bits >= 8: 42 | c = output[idx] 43 | idx += 1 44 | retval.append('|') 45 | retval.append('{0:08b}'.format(c)) 46 | total_bits -= 8 47 | 48 | if (bits % 8) != 0: 49 | retval.append('|') 50 | retval.append('{0:08b}'.format(output[idx])[0:(bits % 8)]) 51 | retval.extend([' [%d]' % bits]) 52 | return ''.join(retval) 53 | 54 | 55 | class IDStore(object): 56 | """ Manages a store of IDs""" 57 | def __init__(self, max_id): 58 | self.ids = set() 59 | self.next_idx = 0 60 | self.mininum_id = 0 61 | self.maximum_id = max_id 62 | if self.maximum_id is None: 63 | print "need a max id" 64 | raise StandardError() 65 | 66 | def GetNext(self): 67 | """ Gets the next available ID. If an ID was returned, it will use that, 68 | else it will create a new unique id""" 69 | if self.ids: 70 | return self.ids.pop() 71 | self.next_idx += 1 72 | if self.maximum_id and self.next_idx >= self.maximum_id: 73 | self.next_idx = self.minimum_id 74 | return self.next_idx 75 | 76 | def DoneWithId(self, id): 77 | """ Allows an ID to be reused later""" 78 | self.ids.add(id) 79 | 80 | -------------------------------------------------------------------------------- /compressor/delta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | import re 7 | 8 | import header_freq_tables 9 | import spdy4_codec_impl 10 | import huffman 11 | import common_utils 12 | from .. import BaseProcessor 13 | 14 | # There are a number of TODOS in the spdy4 15 | # have near indices. Possibly renumber whever something is referenced) 16 | 17 | class Processor(BaseProcessor): 18 | """ 19 | This class formats header frames in SPDY4 wire format, and then reads the 20 | resulting wire-formatted data and restores the data. Thus, it compresses and 21 | decompresses header data. 22 | 23 | It also keeps track of letter frequencies so that better frequency tables 24 | can eventually be constructed for use with the Huffman encoder. 25 | """ 26 | def __init__(self, options, is_request, params): 27 | BaseProcessor.__init__(self, options, is_request, params) 28 | self.compressor = spdy4_codec_impl.Spdy4CoDe(params) 29 | self.decompressor = spdy4_codec_impl.Spdy4CoDe(params) 30 | self.hosts = {} 31 | self.group_ids = common_utils.IDStore(255) 32 | self.wf = self.compressor.wf 33 | if is_request: 34 | request_freq_table = header_freq_tables.request_freq_table 35 | self.compressor.huffman_table = huffman.Huffman(request_freq_table) 36 | self.decompressor.huffman_table = huffman.Huffman(request_freq_table) 37 | else: 38 | response_freq_table = header_freq_tables.response_freq_table 39 | self.compressor.huffman_table = huffman.Huffman(response_freq_table) 40 | self.decompressor.huffman_table = huffman.Huffman(response_freq_table) 41 | 42 | def PrintOps(self, ops): 43 | for op in ops: 44 | print "\t", spdy4_codec_impl.FormatOp(op) 45 | 46 | def compress(self, inp_headers, host): 47 | normalized_host = re.sub('[0-1a-zA-Z-\.]*\.([^.]*\.[^.]*)', '\\1', 48 | host) 49 | if normalized_host in self.hosts: 50 | group_id = self.hosts[normalized_host] 51 | else: 52 | group_id = self.group_ids.GetNext() 53 | self.hosts[normalized_host] = group_id 54 | inp_ops = self.compressor.MakeOperations(inp_headers, group_id) 55 | inp_real_ops = self.compressor.OpsToRealOps(inp_ops, group_id) 56 | compressed_blob = self.compressor.Compress(inp_real_ops) 57 | return compressed_blob 58 | 59 | def decompress(self, compressed_blob): 60 | out_real_ops = self.decompressor.Decompress(compressed_blob) 61 | (group_id, out_ops, out_headers) = \ 62 | self.decompressor.RealOpsToOpAndExecute(out_real_ops) 63 | return out_headers 64 | -------------------------------------------------------------------------------- /compressor/delta2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | import re 7 | 8 | import header_freq_tables 9 | import spdy4_codec_impl 10 | import huffman 11 | import common_utils 12 | from .. import BaseProcessor 13 | 14 | # There are a number of TODOS in the spdy4 15 | # have near indices. Possibly renumber whever something is referenced) 16 | 17 | request_huffman = huffman.Huffman(header_freq_tables.request_freq_table) 18 | response_huffman = huffman.Huffman(header_freq_tables.response_freq_table) 19 | 20 | class Processor(BaseProcessor): 21 | """ 22 | This class formats header frames in SPDY4 wire format, and then reads the 23 | resulting wire-formatted data and restores the data. Thus, it compresses and 24 | decompresses header data. 25 | 26 | It also keeps track of letter frequencies so that better frequency tables 27 | can eventually be constructed for use with the Huffman encoder. 28 | """ 29 | def __init__(self, options, is_request, params): 30 | BaseProcessor.__init__(self, options, is_request, params) 31 | description = "request" 32 | if not is_request: 33 | description = "response" 34 | self.compressor = spdy4_codec_impl.Spdy4CoDe(params, description, options) 35 | self.decompressor = spdy4_codec_impl.Spdy4CoDe(params, description, options) 36 | self.hosts = {} 37 | self.group_ids = common_utils.IDStore(255) 38 | if is_request: 39 | self.compressor.huffman = request_huffman 40 | self.decompressor.huffman = request_huffman 41 | else: 42 | self.compressor.huffman = response_huffman 43 | self.decompressor.huffman = response_huffman 44 | 45 | def PrintOps(self, ops): 46 | for op in ops: 47 | print "\t", spdy4_codec_impl.FormatOp(op) 48 | 49 | def compress(self, inp_headers, host): 50 | normalized_host = re.sub('[0-1a-zA-Z-\.]*\.([^.]*\.[^.]*)', '\\1', 51 | host) 52 | if normalized_host in self.hosts: 53 | group_id = self.hosts[normalized_host] 54 | else: 55 | group_id = self.group_ids.GetNext() 56 | self.hosts[normalized_host] = group_id 57 | inp_ops = self.compressor.MakeOperations(inp_headers, group_id) 58 | inp_real_ops = self.compressor.OpsToRealOps(inp_ops, group_id) 59 | compressed_blob = self.compressor.Compress(inp_real_ops) 60 | return compressed_blob 61 | 62 | def decompress(self, compressed_blob): 63 | out_real_ops = self.decompressor.Decompress(compressed_blob) 64 | (group_id, out_ops, out_headers) = \ 65 | self.decompressor.RealOpsToOpAndExecute(out_real_ops) 66 | return out_headers 67 | 68 | def done(self): 69 | self.compressor.Done() 70 | -------------------------------------------------------------------------------- /compressor/headerdiff/README.md: -------------------------------------------------------------------------------- 1 | HeaderDiff Codec 2 | ================ 3 | 4 | Specification 5 | ------------- 6 | The specification for the HeaderDiff codec is at: 7 | https://datatracker.ietf.org/doc/draft-ruellan-headerdiff/ 8 | 9 | Delta-encoding modifications 10 | ---------------------------- 11 | The implementation support three modes for delta-encoding header values. In 12 | addition, delta-encoding can be disabled. 13 | 14 | The *full* mode searches for the largest shared prefix between the reference value and the value to encode. 15 | 16 | The *bounded* mode limits the shared prefix, forcing it to end with a character contained in a list of limit characters. For example: `/?= ,`. 17 | 18 | The *limit* mode limits the number of times an indexed value can be used as a reference for delta-encoding another value. 19 | 20 | Huffman 21 | ------- 22 | The implementation now optionally supports a static Huffman encoding for string 23 | values. 24 | 25 | Usage 26 | ----- 27 | 28 | The HeaderDiff codec supports the following options: 29 | 30 | - `buffer` for defining the maximum buffer size (default is 32768 bytes). 31 | - `deflate` for specifying the windowSize for Deflate. It is an integer between 32 | 8 and 15. By default, Deflate is not enabled. 33 | - `delta` for enabling or disabling delta-encoding (shared prefix). Enabled by 34 | default. 35 | - `delta_type` for specifying which type of delta-encoding to use. 36 | - *Full* mode: an empty value means that full prefix search is enabled. 37 | - *Bounded* mode: a string (possibly quoted) containing the characters 38 | defining the possible boundaries for the shared prefix. 39 | - *Limit* mode: an integer defining the maximum number of usage of an indexed 40 | value as a reference for delta-encoding another value. 41 | - `huffman` for enabling Huffman encoding of string values. Disabled by default. 42 | 43 | Examples 44 | -------- 45 | 46 | Using the default HeaderDiff codec: 47 | 48 | ./compare_compressors.py -c headerdiff file.har 49 | 50 | Using HeaderDiff with a small buffer: 51 | 52 | ./compare_compressors.py -c "headerdiff=buffer=4096" file.har 53 | 54 | Using HeaderDiff with Deflate: 55 | 56 | ./compare_compressors.py -c "headerdiff=deflate=12" file.har 57 | 58 | Using both a small buffer and Deflate: 59 | 60 | ./compare_compressors.py -c "headerdiff=buffer=4096,deflate=12" file.har 61 | 62 | Using HeaderDiff without delta-encoding: 63 | 64 | ./compare_compressors.py -c "headerdiff=delta=false" file.har 65 | 66 | Using HeaderDiff with *bounded* delta-encoding: 67 | 68 | ./compare_compressors.py -c "headerdiff=delta_type='/?= \coma'" file.har 69 | 70 | Using HeaderDiff with *limited* delta-encoding: 71 | 72 | ./compare_compressors.py -c "headerdiff=delta_type=2" file.har 73 | 74 | Using HeaderDiff with a static Huffman encoding of strings: 75 | 76 | ./compare_compressors.py -c "headerdiff=huffman" file.har 77 | 78 | -------------------------------------------------------------------------------- /compressor/bohe/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | import re 7 | import bohe 8 | import struct 9 | import header_freq_tables 10 | import common_utils 11 | 12 | from huffman import Huffman 13 | from .. import BaseProcessor 14 | from bit_bucket import BitBucket 15 | 16 | 17 | # There are a number of TODOS in the spdy4 18 | # have near indices. Possibly renumber whever something is referenced) 19 | 20 | class Processor(BaseProcessor): 21 | 22 | headers = [] 23 | 24 | def __init__(self, options, is_request, params): 25 | BaseProcessor.__init__(self, options, is_request, params) 26 | if is_request: 27 | self.huff = huffman.Huffman(header_freq_tables.request_freq_table) 28 | else: 29 | self.huff = huffman.Huffman(header_freq_tables.response_freq_table) 30 | 31 | def compress(self, inp_headers, host): 32 | data = BitBucket() 33 | res = '' 34 | for k,v in inp_headers.items(): 35 | if k in bohe.ID_TABLE: 36 | zz = data.NumBits() 37 | # encode as registered header 38 | data.StoreBits8(bohe.ID_TABLE.index(k) + 1) 39 | l = 0 40 | dohuff = True 41 | # Set the binary flag 42 | if k in bohe.ENCODERS: 43 | data.StoreBit(1) 44 | dohuff = False 45 | # Set the multiple values flag... 46 | if '\u00' in v: 47 | data.StoreBit(1) 48 | else: 49 | data.StoreBit(0) 50 | val = bohe.encode(k,v) 51 | if dohuff: 52 | val_as_list, len_in_bits = self.do_huff(self.huff, val) 53 | else: 54 | val_as_list = common_utils.StrToList(val) 55 | len_in_bits = len(val_as_list) *8 56 | data.StoreBits22(len(val_as_list)) 57 | data.StoreBits( (val_as_list, len_in_bits) ) 58 | else: 59 | data.StoreBits8(128 | len(k)) 60 | data.StoreBits((common_utils.StrToList(k), len(k)*8)) 61 | data.StoreBit(0) # assume not binary value for now 62 | if '\u00' in v: 63 | data.StoreBit(1) 64 | else: 65 | data.StoreBit(0) 66 | val_as_list, len_in_bits = self.do_huff(self.huff, v) 67 | data.StoreBits22(len(val_as_list)) 68 | data.StoreBits((val_as_list, len_in_bits)) 69 | return ''.join(common_utils.ListToStr(data.GetAllBits()[0])) 70 | 71 | def do_huff(self, huff, val): 72 | val_as_list = common_utils.StrToList(val) 73 | (val_as_list, len_in_bits) = huff.Encode(val_as_list, True) 74 | #len_in_bits = len(val_as_list) *8 75 | return val_as_list, len_in_bits 76 | 77 | 78 | # NO DECOMPRESSION YET! 79 | # def decompress(self, compressed): 80 | # header_group = 0 81 | # out_real_ops = self.decompressor.Decompress(compressed) 82 | # out_ops = self.decompressor.RealOpsToOpAndExecute( 83 | # out_real_ops, header_group) 84 | # return self.decompressor.GenerateAllHeaders(header_group) 85 | -------------------------------------------------------------------------------- /compressor/delta/bit_bucket_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from bit_bucket import BitBucket 8 | 9 | def RunTestCase(bb, testcase): 10 | pre_bb = str(bb) 11 | for instruction in testcase: 12 | (store_this, expected_output) = instruction 13 | bb.StoreBits(store_this) 14 | str_bb = str(bb) 15 | if str_bb != expected_output: 16 | print 17 | print "Failure!: \"%s\" != \"%s\"" % (str_bb, expected_output) 18 | print "op: ", store_this 19 | print "Pre bb: \"%s\"" % pre_bb 20 | print "expected: \"%s\"" % expected_output 21 | print "post bb: \"%s\"" % str_bb 22 | raise StandardError() 23 | pre_bb = str_bb 24 | 25 | 26 | def main(): 27 | bb = BitBucket() 28 | testcase_a = [ 29 | (([0xFF,0],6+8), "|11111111|000000 [6]"), 30 | (([0xFF], 3), "|11111111|00000011|1 [1]"), 31 | (([0x00], 3), "|11111111|00000011|1000 [4]"), 32 | (([0xFF,0], 8+6), "|11111111|00000011|10001111|11110000|00 [2]"), 33 | (([0xFF], 4), "|11111111|00000011|10001111|11110000|001111 [6]"), 34 | (([0x0], 4), "|11111111|00000011|10001111|11110000|00111100|00 [2]"), 35 | ] 36 | RunTestCase(bb, testcase_a) 37 | 38 | 39 | testcase_b = [ 40 | (([0xF0], 5), "|11110 [5]"), 41 | (([0x0F], 5), "|11110000|01 [2]"), 42 | (([0xF0], 5), "|11110000|0111110 [7]"), 43 | (([0x0F], 5), "|11110000|01111100|0001 [4]"), 44 | (([0xF0], 5), "|11110000|01111100|00011111|0 [1]"), 45 | (([0x0F], 5), "|11110000|01111100|00011111|000001 [6]"), 46 | (([0xF0], 5), "|11110000|01111100|00011111|00000111|110 [3]"), 47 | (([0x0F], 5), "|11110000|01111100|00011111|00000111|11000001 [0]"), 48 | (([0xF0], 5), "|11110000|01111100|00011111|00000111|11000001|11110 [5]"), 49 | ] 50 | bb.Clear() 51 | RunTestCase(bb, testcase_b) 52 | 53 | 54 | testcase_c = [ 55 | (([0xF0], 1), "|1 [1]"), 56 | (([0x0F], 1), "|10 [2]"), 57 | (([0xF0], 1), "|101 [3]"), 58 | (([0x0F], 1), "|1010 [4]"), 59 | (([0xF0], 1), "|10101 [5]"), 60 | (([0x0F], 1), "|101010 [6]"), 61 | (([0xF0], 1), "|1010101 [7]"), 62 | (([0x0F], 1), "|10101010 [0]"), 63 | (([0xF0], 1), "|10101010|1 [1]"), 64 | (([0x00,0xFF], 8+7), "|10101010|10000000|01111111 [0]"), 65 | ] 66 | bb.Clear() 67 | RunTestCase(bb, testcase_c) 68 | 69 | 70 | testcase_d = [ 71 | (([0xF0], 8), "|11110000 [0]"), 72 | (([0xF0], 8), "|11110000|11110000 [0]"), 73 | (([0xF0], 1), "|11110000|11110000|1 [1]"), 74 | (([0x0F], 8), "|11110000|11110000|10000111|1 [1]"), 75 | ] 76 | bb.Clear() 77 | RunTestCase(bb, testcase_d) 78 | 79 | testcase_e = [ 80 | (([0,52], 8+6), "|00000000|001101 [6]"), 81 | (([185], 8), "|00000000|00110110|111001 [6]"), 82 | ] 83 | bb.Clear() 84 | RunTestCase(bb, testcase_e) 85 | print "Success!" 86 | 87 | 88 | main() 89 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/bit_bucket_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from bit_bucket import BitBucket 8 | 9 | def RunTestCase(bb, testcase): 10 | pre_bb = str(bb) 11 | for instruction in testcase: 12 | (store_this, expected_output) = instruction 13 | bb.StoreBits(store_this) 14 | str_bb = str(bb) 15 | if str_bb != expected_output: 16 | print 17 | print "Failure!: \"%s\" != \"%s\"" % (str_bb, expected_output) 18 | print "op: ", store_this 19 | print "Pre bb: \"%s\"" % pre_bb 20 | print "expected: \"%s\"" % expected_output 21 | print "post bb: \"%s\"" % str_bb 22 | raise StandardError() 23 | pre_bb = str_bb 24 | 25 | 26 | def main(): 27 | bb = BitBucket() 28 | testcase_a = [ 29 | (([0xFF,0],6+8), "|11111111|000000 [6]"), 30 | (([0xFF], 3), "|11111111|00000011|1 [1]"), 31 | (([0x00], 3), "|11111111|00000011|1000 [4]"), 32 | (([0xFF,0], 8+6), "|11111111|00000011|10001111|11110000|00 [2]"), 33 | (([0xFF], 4), "|11111111|00000011|10001111|11110000|001111 [6]"), 34 | (([0x0], 4), "|11111111|00000011|10001111|11110000|00111100|00 [2]"), 35 | ] 36 | RunTestCase(bb, testcase_a) 37 | 38 | 39 | testcase_b = [ 40 | (([0xF0], 5), "|11110 [5]"), 41 | (([0x0F], 5), "|11110000|01 [2]"), 42 | (([0xF0], 5), "|11110000|0111110 [7]"), 43 | (([0x0F], 5), "|11110000|01111100|0001 [4]"), 44 | (([0xF0], 5), "|11110000|01111100|00011111|0 [1]"), 45 | (([0x0F], 5), "|11110000|01111100|00011111|000001 [6]"), 46 | (([0xF0], 5), "|11110000|01111100|00011111|00000111|110 [3]"), 47 | (([0x0F], 5), "|11110000|01111100|00011111|00000111|11000001 [0]"), 48 | (([0xF0], 5), "|11110000|01111100|00011111|00000111|11000001|11110 [5]"), 49 | ] 50 | bb.Clear() 51 | RunTestCase(bb, testcase_b) 52 | 53 | 54 | testcase_c = [ 55 | (([0xF0], 1), "|1 [1]"), 56 | (([0x0F], 1), "|10 [2]"), 57 | (([0xF0], 1), "|101 [3]"), 58 | (([0x0F], 1), "|1010 [4]"), 59 | (([0xF0], 1), "|10101 [5]"), 60 | (([0x0F], 1), "|101010 [6]"), 61 | (([0xF0], 1), "|1010101 [7]"), 62 | (([0x0F], 1), "|10101010 [0]"), 63 | (([0xF0], 1), "|10101010|1 [1]"), 64 | (([0x00,0xFF], 8+7), "|10101010|10000000|01111111 [0]"), 65 | ] 66 | bb.Clear() 67 | RunTestCase(bb, testcase_c) 68 | 69 | 70 | testcase_d = [ 71 | (([0xF0], 8), "|11110000 [0]"), 72 | (([0xF0], 8), "|11110000|11110000 [0]"), 73 | (([0xF0], 1), "|11110000|11110000|1 [1]"), 74 | (([0x0F], 8), "|11110000|11110000|10000111|1 [1]"), 75 | ] 76 | bb.Clear() 77 | RunTestCase(bb, testcase_d) 78 | 79 | testcase_e = [ 80 | (([0,52], 8+6), "|00000000|001101 [6]"), 81 | (([185], 8), "|00000000|00110110|111001 [6]"), 82 | ] 83 | bb.Clear() 84 | RunTestCase(bb, testcase_e) 85 | print "Success!" 86 | 87 | 88 | main() 89 | -------------------------------------------------------------------------------- /compressor/delta_bohe/bit_bucket_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from bit_bucket import BitBucket 8 | 9 | def RunTestCase(bb, testcase): 10 | pre_bb = str(bb) 11 | for instruction in testcase: 12 | (store_this, expected_output) = instruction 13 | bb.StoreBits(store_this) 14 | str_bb = str(bb) 15 | if str_bb != expected_output: 16 | print 17 | print "Failure!: \"%s\" != \"%s\"" % (str_bb, expected_output) 18 | print "op: ", store_this 19 | print "Pre bb: \"%s\"" % pre_bb 20 | print "expected: \"%s\"" % expected_output 21 | print "post bb: \"%s\"" % str_bb 22 | raise StandardError() 23 | pre_bb = str_bb 24 | 25 | 26 | def main(): 27 | bb = BitBucket() 28 | testcase_a = [ 29 | (([0xFF,0],6+8), "|11111111|000000 [6]"), 30 | (([0xFF], 3), "|11111111|00000011|1 [1]"), 31 | (([0x00], 3), "|11111111|00000011|1000 [4]"), 32 | (([0xFF,0], 8+6), "|11111111|00000011|10001111|11110000|00 [2]"), 33 | (([0xFF], 4), "|11111111|00000011|10001111|11110000|001111 [6]"), 34 | (([0x0], 4), "|11111111|00000011|10001111|11110000|00111100|00 [2]"), 35 | ] 36 | RunTestCase(bb, testcase_a) 37 | 38 | 39 | testcase_b = [ 40 | (([0xF0], 5), "|11110 [5]"), 41 | (([0x0F], 5), "|11110000|01 [2]"), 42 | (([0xF0], 5), "|11110000|0111110 [7]"), 43 | (([0x0F], 5), "|11110000|01111100|0001 [4]"), 44 | (([0xF0], 5), "|11110000|01111100|00011111|0 [1]"), 45 | (([0x0F], 5), "|11110000|01111100|00011111|000001 [6]"), 46 | (([0xF0], 5), "|11110000|01111100|00011111|00000111|110 [3]"), 47 | (([0x0F], 5), "|11110000|01111100|00011111|00000111|11000001 [0]"), 48 | (([0xF0], 5), "|11110000|01111100|00011111|00000111|11000001|11110 [5]"), 49 | ] 50 | bb.Clear() 51 | RunTestCase(bb, testcase_b) 52 | 53 | 54 | testcase_c = [ 55 | (([0xF0], 1), "|1 [1]"), 56 | (([0x0F], 1), "|10 [2]"), 57 | (([0xF0], 1), "|101 [3]"), 58 | (([0x0F], 1), "|1010 [4]"), 59 | (([0xF0], 1), "|10101 [5]"), 60 | (([0x0F], 1), "|101010 [6]"), 61 | (([0xF0], 1), "|1010101 [7]"), 62 | (([0x0F], 1), "|10101010 [0]"), 63 | (([0xF0], 1), "|10101010|1 [1]"), 64 | (([0x00,0xFF], 8+7), "|10101010|10000000|01111111 [0]"), 65 | ] 66 | bb.Clear() 67 | RunTestCase(bb, testcase_c) 68 | 69 | 70 | testcase_d = [ 71 | (([0xF0], 8), "|11110000 [0]"), 72 | (([0xF0], 8), "|11110000|11110000 [0]"), 73 | (([0xF0], 1), "|11110000|11110000|1 [1]"), 74 | (([0x0F], 8), "|11110000|11110000|10000111|1 [1]"), 75 | ] 76 | bb.Clear() 77 | RunTestCase(bb, testcase_d) 78 | 79 | testcase_e = [ 80 | (([0,52], 8+6), "|00000000|001101 [6]"), 81 | (([185], 8), "|00000000|00110110|111001 [6]"), 82 | ] 83 | bb.Clear() 84 | RunTestCase(bb, testcase_e) 85 | print "Success!" 86 | 87 | 88 | main() 89 | -------------------------------------------------------------------------------- /compressor/delta/huffman_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from huffman import Huffman 8 | from bit_bucket import BitBucket 9 | from common_utils import FormatAsBits 10 | from common_utils import ListToStr 11 | 12 | request_freq_table = [ 13 | ('\x00', 0), ('\x01', 0), ('\x02', 0), ('\x03', 0), ('\x04', 0), ('\x05', 0), 14 | ('\x06', 0), ('\x07', 0), ('\x08', 0), ('\t', 0), ('\n', 0), ('\x0b', 0), 15 | ('\x0c', 0), ('\r', 0), ('\x0e', 0), ('\x0f', 0), ('\x10', 0), ('\x11', 0), 16 | ('\x12', 0), ('\x13', 0), ('\x14', 0), ('\x15', 0), ('\x16', 0), ('\x17', 0), 17 | ('\x18', 0), ('\x19', 0), ('\x1a', 0), ('\x1b', 0), ('\x1c', 0), ('\x1d', 0), 18 | ('\x1e', 0), ('\x1f', 0), (' ', 28), ('!', 27), ('"', 0), ('#', 0), 19 | ('$', 2), ('%', 1273), ('&', 1293), ("'", 2), ('(', 23), (')', 23), 20 | ('*', 18), ('+', 15), (',', 875), ('-', 1222), ('.', 2212), ('/', 3366), 21 | ('0', 2419), ('1', 2743), ('2', 3072), ('3', 2358), ('4', 1845), ('5', 1528), 22 | ('6', 1796), ('7', 1448), ('8', 1585), ('9', 1453), (':', 147), (';', 122), 23 | ('<', 0), ('=', 1692), ('>', 0), ('?', 222), ('@', 0), ('A', 771), 24 | ('B', 354), ('C', 399), ('D', 651), ('E', 314), ('F', 709), ('G', 462), 25 | ('H', 246), ('I', 489), ('J', 212), ('K', 207), ('L', 329), ('M', 269), 26 | ('N', 306), ('O', 210), ('P', 313), ('Q', 244), ('R', 315), ('S', 396), 27 | ('T', 339), ('U', 351), ('V', 355), ('W', 243), ('X', 250), ('Y', 251), 28 | ('Z', 257), ('[', 2), ('\\', 0), (']', 2), ('^', 0), ('_', 1442), 29 | ('`', 0), ('a', 3281), ('b', 1184), ('c', 2353), ('d', 1564), ('e', 3447), 30 | ('f', 802), ('g', 1917), ('h', 988), ('i', 2488), ('j', 792), ('k', 529), 31 | ('l', 1571), ('m', 1980), ('n', 2526), ('o', 2349), ('p', 2016), ('q', 312), 32 | ('r', 2003), ('s', 3133), ('t', 2752), ('u', 974), ('v', 763), ('w', 764), 33 | ('x', 564), ('y', 586), ('z', 358), ('{', 11), ('|', 0), ('}', 11), 34 | ('~', 4), ('\x7f', 0), ('\x80', 1029)] 35 | 36 | 37 | test_data = [ 38 | "abbcccddddeeeee", 39 | "foobarbaz", 40 | "0-2rklnsvkl;-23kDFSi01k0=", 41 | "-9083480-12hjkadsgf8912345kl;hjajkl; `123890", 42 | "\0\0-3;jsdf" 43 | ] 44 | 45 | def MakeReadableString(val): 46 | printable = string.digits + string.letters + string.punctuation + ' ' + "\t" 47 | out = [] 48 | for c in val: 49 | if c in printable: 50 | out.append(" %c " % c) 51 | else: 52 | out.append("0x%02x " % ord(c)) 53 | return ''.join(out) 54 | 55 | def main(): 56 | h = Huffman(request_freq_table) 57 | for s in test_data: 58 | print " encoding: ", s 59 | sp = [ord(c) for c in s] 60 | e_result = h.Encode(sp, False) 61 | print " e_result: ", FormatAsBits(e_result) 62 | d_result = ''.join(ListToStr(h.Decode(e_result[0], False, e_result[1]))) 63 | if d_result != s: 64 | print "difference found: ", d_result, " ", s 65 | else: 66 | print "It worked: ", s 67 | print 68 | #bb = BitBucket() 69 | #bb.StoreBits(([0xff],7)) 70 | #bb.StoreBits(([0x00],5)) 71 | #bb.StoreBits(([0xff],5)) 72 | #bb.StoreBits(([0x00],6)) 73 | #bb.StoreBits(([0xff],5)) 74 | #bb.StoreBits(([0x00],5)) 75 | #bb.StoreBits(([0xff],6)) 76 | #bb.StoreBits(([0x00],5)) 77 | #bb.StoreBits(([0xff],8)) 78 | #print FormatAsBits(bb.GetAllBits()) 79 | 80 | main() 81 | 82 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/huffman_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from huffman import Huffman 8 | from bit_bucket import BitBucket 9 | from common_utils import FormatAsBits 10 | from common_utils import ListToStr 11 | 12 | request_freq_table = [ 13 | ('\x00', 0), ('\x01', 0), ('\x02', 0), ('\x03', 0), ('\x04', 0), ('\x05', 0), 14 | ('\x06', 0), ('\x07', 0), ('\x08', 0), ('\t', 0), ('\n', 0), ('\x0b', 0), 15 | ('\x0c', 0), ('\r', 0), ('\x0e', 0), ('\x0f', 0), ('\x10', 0), ('\x11', 0), 16 | ('\x12', 0), ('\x13', 0), ('\x14', 0), ('\x15', 0), ('\x16', 0), ('\x17', 0), 17 | ('\x18', 0), ('\x19', 0), ('\x1a', 0), ('\x1b', 0), ('\x1c', 0), ('\x1d', 0), 18 | ('\x1e', 0), ('\x1f', 0), (' ', 28), ('!', 27), ('"', 0), ('#', 0), 19 | ('$', 2), ('%', 1273), ('&', 1293), ("'", 2), ('(', 23), (')', 23), 20 | ('*', 18), ('+', 15), (',', 875), ('-', 1222), ('.', 2212), ('/', 3366), 21 | ('0', 2419), ('1', 2743), ('2', 3072), ('3', 2358), ('4', 1845), ('5', 1528), 22 | ('6', 1796), ('7', 1448), ('8', 1585), ('9', 1453), (':', 147), (';', 122), 23 | ('<', 0), ('=', 1692), ('>', 0), ('?', 222), ('@', 0), ('A', 771), 24 | ('B', 354), ('C', 399), ('D', 651), ('E', 314), ('F', 709), ('G', 462), 25 | ('H', 246), ('I', 489), ('J', 212), ('K', 207), ('L', 329), ('M', 269), 26 | ('N', 306), ('O', 210), ('P', 313), ('Q', 244), ('R', 315), ('S', 396), 27 | ('T', 339), ('U', 351), ('V', 355), ('W', 243), ('X', 250), ('Y', 251), 28 | ('Z', 257), ('[', 2), ('\\', 0), (']', 2), ('^', 0), ('_', 1442), 29 | ('`', 0), ('a', 3281), ('b', 1184), ('c', 2353), ('d', 1564), ('e', 3447), 30 | ('f', 802), ('g', 1917), ('h', 988), ('i', 2488), ('j', 792), ('k', 529), 31 | ('l', 1571), ('m', 1980), ('n', 2526), ('o', 2349), ('p', 2016), ('q', 312), 32 | ('r', 2003), ('s', 3133), ('t', 2752), ('u', 974), ('v', 763), ('w', 764), 33 | ('x', 564), ('y', 586), ('z', 358), ('{', 11), ('|', 0), ('}', 11), 34 | ('~', 4), ('\x7f', 0), ('\x80', 1029)] 35 | 36 | 37 | test_data = [ 38 | "abbcccddddeeeee", 39 | "foobarbaz", 40 | "0-2rklnsvkl;-23kDFSi01k0=", 41 | "-9083480-12hjkadsgf8912345kl;hjajkl; `123890", 42 | "\0\0-3;jsdf" 43 | ] 44 | 45 | def MakeReadableString(val): 46 | printable = string.digits + string.letters + string.punctuation + ' ' + "\t" 47 | out = [] 48 | for c in val: 49 | if c in printable: 50 | out.append(" %c " % c) 51 | else: 52 | out.append("0x%02x " % ord(c)) 53 | return ''.join(out) 54 | 55 | def main(): 56 | h = Huffman(request_freq_table) 57 | for s in test_data: 58 | print " encoding: ", s 59 | sp = [ord(c) for c in s] 60 | e_result = h.Encode(sp, False) 61 | print " e_result: ", FormatAsBits(e_result) 62 | d_result = ''.join(ListToStr(h.Decode(e_result[0], False, e_result[1]))) 63 | if d_result != s: 64 | print "difference found: ", d_result, " ", s 65 | else: 66 | print "It worked: ", s 67 | print 68 | #bb = BitBucket() 69 | #bb.StoreBits(([0xff],7)) 70 | #bb.StoreBits(([0x00],5)) 71 | #bb.StoreBits(([0xff],5)) 72 | #bb.StoreBits(([0x00],6)) 73 | #bb.StoreBits(([0xff],5)) 74 | #bb.StoreBits(([0x00],5)) 75 | #bb.StoreBits(([0xff],6)) 76 | #bb.StoreBits(([0x00],5)) 77 | #bb.StoreBits(([0xff],8)) 78 | #print FormatAsBits(bb.GetAllBits()) 79 | 80 | main() 81 | 82 | -------------------------------------------------------------------------------- /compressor/delta_bohe/huffman_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from huffman import Huffman 8 | from bit_bucket import BitBucket 9 | from common_utils import FormatAsBits 10 | from common_utils import ListToStr 11 | 12 | request_freq_table = [ 13 | ('\x00', 0), ('\x01', 0), ('\x02', 0), ('\x03', 0), ('\x04', 0), ('\x05', 0), 14 | ('\x06', 0), ('\x07', 0), ('\x08', 0), ('\t', 0), ('\n', 0), ('\x0b', 0), 15 | ('\x0c', 0), ('\r', 0), ('\x0e', 0), ('\x0f', 0), ('\x10', 0), ('\x11', 0), 16 | ('\x12', 0), ('\x13', 0), ('\x14', 0), ('\x15', 0), ('\x16', 0), ('\x17', 0), 17 | ('\x18', 0), ('\x19', 0), ('\x1a', 0), ('\x1b', 0), ('\x1c', 0), ('\x1d', 0), 18 | ('\x1e', 0), ('\x1f', 0), (' ', 28), ('!', 27), ('"', 0), ('#', 0), 19 | ('$', 2), ('%', 1273), ('&', 1293), ("'", 2), ('(', 23), (')', 23), 20 | ('*', 18), ('+', 15), (',', 875), ('-', 1222), ('.', 2212), ('/', 3366), 21 | ('0', 2419), ('1', 2743), ('2', 3072), ('3', 2358), ('4', 1845), ('5', 1528), 22 | ('6', 1796), ('7', 1448), ('8', 1585), ('9', 1453), (':', 147), (';', 122), 23 | ('<', 0), ('=', 1692), ('>', 0), ('?', 222), ('@', 0), ('A', 771), 24 | ('B', 354), ('C', 399), ('D', 651), ('E', 314), ('F', 709), ('G', 462), 25 | ('H', 246), ('I', 489), ('J', 212), ('K', 207), ('L', 329), ('M', 269), 26 | ('N', 306), ('O', 210), ('P', 313), ('Q', 244), ('R', 315), ('S', 396), 27 | ('T', 339), ('U', 351), ('V', 355), ('W', 243), ('X', 250), ('Y', 251), 28 | ('Z', 257), ('[', 2), ('\\', 0), (']', 2), ('^', 0), ('_', 1442), 29 | ('`', 0), ('a', 3281), ('b', 1184), ('c', 2353), ('d', 1564), ('e', 3447), 30 | ('f', 802), ('g', 1917), ('h', 988), ('i', 2488), ('j', 792), ('k', 529), 31 | ('l', 1571), ('m', 1980), ('n', 2526), ('o', 2349), ('p', 2016), ('q', 312), 32 | ('r', 2003), ('s', 3133), ('t', 2752), ('u', 974), ('v', 763), ('w', 764), 33 | ('x', 564), ('y', 586), ('z', 358), ('{', 11), ('|', 0), ('}', 11), 34 | ('~', 4), ('\x7f', 0), ('\x80', 1029)] 35 | 36 | 37 | test_data = [ 38 | "abbcccddddeeeee", 39 | "foobarbaz", 40 | "0-2rklnsvkl;-23kDFSi01k0=", 41 | "-9083480-12hjkadsgf8912345kl;hjajkl; `123890", 42 | "\0\0-3;jsdf" 43 | ] 44 | 45 | def MakeReadableString(val): 46 | printable = string.digits + string.letters + string.punctuation + ' ' + "\t" 47 | out = [] 48 | for c in val: 49 | if c in printable: 50 | out.append(" %c " % c) 51 | else: 52 | out.append("0x%02x " % ord(c)) 53 | return ''.join(out) 54 | 55 | def main(): 56 | h = Huffman(request_freq_table) 57 | for s in test_data: 58 | print " encoding: ", s 59 | sp = [ord(c) for c in s] 60 | e_result = h.Encode(sp, False) 61 | print " e_result: ", FormatAsBits(e_result) 62 | d_result = ''.join(ListToStr(h.Decode(e_result[0], False, e_result[1]))) 63 | if d_result != s: 64 | print "difference found: ", d_result, " ", s 65 | else: 66 | print "It worked: ", s 67 | print 68 | #bb = BitBucket() 69 | #bb.StoreBits(([0xff],7)) 70 | #bb.StoreBits(([0x00],5)) 71 | #bb.StoreBits(([0xff],5)) 72 | #bb.StoreBits(([0x00],6)) 73 | #bb.StoreBits(([0xff],5)) 74 | #bb.StoreBits(([0x00],5)) 75 | #bb.StoreBits(([0xff],6)) 76 | #bb.StoreBits(([0x00],5)) 77 | #bb.StoreBits(([0xff],8)) 78 | #print FormatAsBits(bb.GetAllBits()) 79 | 80 | main() 81 | 82 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | import re 7 | 8 | import header_freq_tables 9 | import spdy4_codec_impl 10 | import huffman 11 | import common_utils 12 | from .. import BaseProcessor, strip_conn_headers 13 | 14 | # There are a number of TODOS in the spdy4 15 | # have near indices. Possibly renumber whever something is referenced) 16 | 17 | class Processor(BaseProcessor): 18 | """ 19 | This class formats header frames in SPDY4 wire format, and then reads the 20 | resulting wire-formatted data and restores the data. Thus, it compresses and 21 | decompresses header data. 22 | 23 | It also keeps track of letter frequencies so that better frequency tables 24 | can eventually be constructed for use with the Huffman encoder. 25 | """ 26 | def __init__(self, options, is_request, params): 27 | # 'params' is ignored 28 | self.compressor = spdy4_codec_impl.Spdy4CoDe() 29 | self.decompressor = spdy4_codec_impl.Spdy4CoDe() 30 | self.options = options 31 | self.hosts = {} 32 | self.group_ids = common_utils.IDStore(2**31) 33 | self.wf = self.compressor.wf 34 | self.name = "delta2_bohe" 35 | if is_request: 36 | request_freq_table = header_freq_tables.request_freq_table 37 | self.compressor.huffman_table = huffman.Huffman(request_freq_table) 38 | self.decompressor.huffman_table = huffman.Huffman(request_freq_table) 39 | else: 40 | response_freq_table = header_freq_tables.response_freq_table 41 | self.compressor.huffman_table = huffman.Huffman(response_freq_table) 42 | self.decompressor.huffman_table = huffman.Huffman(response_freq_table) 43 | 44 | def PrintOps(self, ops): 45 | for op in ops: 46 | print "\t", spdy4_codec_impl.FormatOp(op) 47 | 48 | def compress(self, inp_headers, host): 49 | """ 50 | 'inp_headers' are the headers that will be processed 51 | 'request_headers' are the request headers associated with this frame 52 | the host is extracted from this data. For a response, this would be 53 | the request that engendered the response. For a request, it is just 54 | the request again. 55 | 56 | It returns: 57 | (compressed_frame, 58 | wire_formatted_operations_before_compression, 59 | wire_formatted_operations_after_decompression, 60 | input_headers, 61 | outputted_headers_after_encode_decode, 62 | operations_as_computed_by_encoder, 63 | operations_as_recovered_after_decode) 64 | 65 | Note that compressing with an unmodified stream-compressor like gzip is 66 | effective, however it is insecure. 67 | """ 68 | normalized_host = re.sub('[0-1a-zA-Z-\.]*\.([^.]*\.[^.]*)', '\\1', 69 | host) 70 | if normalized_host in self.hosts: 71 | group_id = self.hosts[normalized_host] 72 | else: 73 | group_id = self.group_ids.GetNext() 74 | self.hosts[normalized_host] = group_id 75 | inp_ops = self.compressor.MakeOperations(strip_conn_headers(inp_headers), group_id) 76 | inp_real_ops = self.compressor.OpsToRealOps(inp_ops, group_id) 77 | compressed_blob = self.compressor.Compress(inp_real_ops) 78 | return compressed_blob 79 | 80 | # def decompress(self, compressed_blob): 81 | # out_real_ops = self.decompressor.Decompress(compressed_blob) 82 | # (group_id, out_ops) = self.decompressor.RealOpsToOpAndExecute(out_real_ops) 83 | # out_headers = self.decompressor.GenerateAllHeaders(group_id) 84 | # return out_headers 85 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/lrustorage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from collections import deque 3 | 4 | class KV: 5 | def __init__(self, key=None, val=None, seq_num=None): 6 | self.key = key 7 | self.val = val 8 | self.seq_num = seq_num 9 | 10 | def ByteSize(self): 11 | return len(self.val) + len(self.key) 12 | 13 | def __repr__(self): 14 | return "{(%r, %r) %r}" % \ 15 | (self.key, self.val, self.seq_num) 16 | 17 | class LruStorage: 18 | def __init__(self, max_bytes=None, max_items=None, max_seq_num=None, 19 | offset=None): 20 | self.ring = deque() 21 | self.byte_size = 0 22 | self.max_items = max_items 23 | self.max_bytes = max_bytes 24 | self.max_seq_num = max_seq_num 25 | 26 | self.pop_cb = None 27 | self.offset = offset 28 | if offset is None: 29 | self.offset = 0 30 | self.seq_num = self.offset 31 | 32 | def __repr__(self): 33 | return "{%s %r}" % (self.seq_num, self.ring) 34 | 35 | def Reserve(self, byte_size, item_count): 36 | if self.max_items: 37 | while len(self.ring) + item_count > self.max_items: 38 | if not self.PopOne(): 39 | return 0 # can't pop one, nothing more to do. 40 | if self.max_bytes: 41 | while self.byte_size + byte_size > self.max_bytes: 42 | if not self.PopOne(): 43 | return 0 # can't pop one, nothing more to do. 44 | return 1 45 | 46 | def PopOne(self): 47 | item = self.ring.popleft() 48 | self.byte_size -= item.ByteSize() 49 | #print "POPPING: ", item.seq_num 50 | if self.pop_cb is not None: 51 | self.pop_cb(item) 52 | return 1 53 | 54 | def Store(self, item): 55 | item_byte_size = item.ByteSize() 56 | if self.max_bytes and self.byte_size + item_byte_size > self.max_bytes: 57 | raise MemoryError("max_bytes exceeded") 58 | if self.max_items and (self.max_items < (len(self.ring) + 1)): 59 | raise MemoryError("max_items exceeded") 60 | item.seq_num = self.seq_num 61 | self.seq_num += 1 62 | if self.max_seq_num and self.seq_num > self.max_seq_num: 63 | self.seq_num = self.offset 64 | self.byte_size += item_byte_size 65 | self.ring.append(item) 66 | 67 | def Lookup(self, seq_num): 68 | first_seq_num = self.ring[0].seq_num 69 | if seq_num < self.offset: 70 | raise IndexError("Negative indices unsupported: ", seq_num) 71 | if first_seq_num > seq_num: 72 | #print "fsn: %d, sn: %d" % (first_seq_num, seq_num) 73 | if self.max_seq_num: 74 | #print "a ",; 75 | lru_idx = (self.max_seq_num - first_seq_num) + seq_num 76 | else: 77 | raise IndexError("MaxSeqNum not defined and " 78 | "seq_num(%d) < first_seq_num(%d)" % 79 | (seq_num, first_seq_num)) 80 | else: 81 | #print "b ",; 82 | lru_idx = seq_num - first_seq_num 83 | #print "Looking up: ", lru_idx 84 | entry = self.ring[lru_idx] 85 | return KV(entry.key, entry.val, entry.seq_num) 86 | 87 | def FindKeyValEntries(self, key, val): 88 | # Looks for key/vals starting from the last entry 89 | ke = None 90 | ve = None 91 | for i in xrange(len(self.ring)-1, 0, -1): 92 | entry = self.ring[i] 93 | if entry.key == key: 94 | ke = entry 95 | for j in xrange(i, 0, -1): 96 | entry = self.ring[i] 97 | if entry.val == val: 98 | ve = entry 99 | break 100 | break 101 | return (ke, ve) 102 | 103 | def __len__(self): 104 | return len(self.ring) 105 | -------------------------------------------------------------------------------- /compressor/delta_bohe/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | import zlib 6 | import re 7 | 8 | import header_freq_tables 9 | import spdy4_codec_impl 10 | import huffman 11 | import common_utils 12 | from .. import BaseProcessor 13 | 14 | # There are a number of TODOS in the spdy4 15 | # have near indices. Possibly renumber whever something is referenced) 16 | 17 | class Processor(BaseProcessor): 18 | """ 19 | This class formats header frames in SPDY4 wire format, and then reads the 20 | resulting wire-formatted data and restores the data. Thus, it compresses and 21 | decompresses header data. 22 | 23 | It also keeps track of letter frequencies so that better frequency tables 24 | can eventually be constructed for use with the Huffman encoder. 25 | """ 26 | def __init__(self, options, is_request, params): 27 | BaseProcessor.__init__(self, options, is_request, params) 28 | # 'params' is ignored 29 | self.compressor = spdy4_codec_impl.Spdy4CoDe() 30 | self.decompressor = spdy4_codec_impl.Spdy4CoDe() 31 | self.hosts = {} 32 | self.group_ids = common_utils.IDStore() 33 | self.wf = self.compressor.wf 34 | self.name="delta-bohe" 35 | if is_request: 36 | request_freq_table = header_freq_tables.request_freq_table 37 | self.compressor.huffman_table = huffman.Huffman(request_freq_table) 38 | self.decompressor.huffman_table = huffman.Huffman(request_freq_table) 39 | else: 40 | response_freq_table = header_freq_tables.response_freq_table 41 | self.compressor.huffman_table = huffman.Huffman(response_freq_table) 42 | self.decompressor.huffman_table = huffman.Huffman(response_freq_table) 43 | 44 | def PrintOps(self, ops): 45 | for op in ops: 46 | print "\t", spdy4_codec_impl.FormatOp(op) 47 | 48 | def compress(self, inp_headers, host): 49 | """ 50 | 'inp_headers' are the headers that will be processed 51 | 'request_headers' are the request headers associated with this frame 52 | the host is extracted from this data. For a response, this would be 53 | the request that engendered the response. For a request, it is just 54 | the request again. 55 | 56 | It returns: 57 | (compressed_frame, 58 | wire_formatted_operations_before_compression, 59 | wire_formatted_operations_after_decompression, 60 | input_headers, 61 | outputted_headers_after_encode_decode, 62 | operations_as_computed_by_encoder, 63 | operations_as_recovered_after_decode) 64 | 65 | Note that compressing with an unmodified stream-compressor like gzip is 66 | effective, however it is insecure. 67 | """ 68 | header_group = 0 69 | inp_ops = self.compressor.MakeOperations(inp_headers, header_group) 70 | 71 | inp_real_ops = self.compressor.OpsToRealOps(inp_ops) 72 | compressed_blob = self.compressor.Compress(inp_real_ops) 73 | retval = { 74 | 'compressed': compressed_blob, 75 | 'serialized_ops': inp_real_ops, # should be equal \ 76 | 'input_headers': inp_headers, # should be equal \ 77 | 'interpretable_ops': inp_ops, # should be equal \ 78 | 'header_group': header_group 79 | } 80 | return compressed_blob 81 | 82 | # NO DECOMPRESSION YET! 83 | # def decompress(self, compressed): 84 | # header_group = 0 85 | # out_real_ops = self.decompressor.Decompress(compressed) 86 | # out_ops = self.decompressor.RealOpsToOpAndExecute( 87 | # out_real_ops, header_group) 88 | # return self.decompressor.GenerateAllHeaders(header_group) 89 | -------------------------------------------------------------------------------- /compressor/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Base class and common functions for compressor implementations. 5 | """ 6 | 7 | # pylint: disable=W0311 8 | 9 | class BaseProcessor(object): 10 | "Base class for compression processors." 11 | def __init__(self, options, is_request, params): 12 | self.options = options 13 | self.is_request = is_request 14 | name = self.__module__.split(".")[-1] 15 | if params: 16 | self.name = name + " (" + ", ".join(params) + ")" 17 | else: 18 | self.name = name 19 | self.params = params 20 | 21 | def compress(self, in_headers, host): 22 | """ 23 | 'in_headers' are the headers that will be processed. They are expected 24 | to be a dictionary whose keys are header names (all lowercase), and 25 | whose values are strings. Multiple instances of a header field will 26 | be delimited by \0 (null) characters. 27 | 28 | There are a number of special header names, indicated by ':' as the 29 | first character in the name. 30 | 31 | 'host' is the host header value for the request (or associated request, 32 | if it is a response). 33 | 34 | Return value is the resulting compressed headers. 35 | """ 36 | raise NotImplementedError 37 | 38 | def decompress(self, compressed): 39 | """ 40 | 'compressed' is the compressed headers. 41 | 42 | Return value is a header dictionary, as described above. 43 | """ 44 | raise NotImplementedError 45 | 46 | 47 | def format_http1(frame, 48 | delimiter="\r\n", 49 | valsep=": ", 50 | host='host', 51 | version="HTTP/1.1"): 52 | """Take the frame and format it as HTTP/1-ish""" 53 | out_frame = [] 54 | top_line = '' 55 | avoid_list = [] 56 | if ':method' in frame: 57 | top_line = '%s %s %s%s' % ( 58 | frame.get(':method',""), frame.get(':path', ""), 59 | frame.get(':version', version), delimiter) 60 | avoid_list = [':method', ':path', ':version'] 61 | else: 62 | top_line = '%s %s %s%s' % ( 63 | frame.get(':version', version), frame.get(':status',""), 64 | frame.get(':status-text', '?'), delimiter) 65 | avoid_list = [':version', ':status', ':status-text'] 66 | out_frame.append(top_line) 67 | 68 | for (key, val) in frame.items(): 69 | if key in avoid_list: 70 | continue 71 | if key == ':host': 72 | key = host 73 | for individual_val in val.split('\x00'): 74 | out_frame.append(key) 75 | out_frame.append(valsep) 76 | out_frame.append(individual_val) 77 | out_frame.append(delimiter) 78 | out_frame.append(delimiter) 79 | return ''.join(out_frame) 80 | 81 | 82 | def parse_http1(message, is_request, host='host'): 83 | """Take a HTTP1 message and return the header structure for it.""" 84 | out = {} 85 | lines = message.strip().split("\n") 86 | top_line = lines.pop(0).split(None, 2) 87 | for line in lines: 88 | if not line: 89 | break 90 | if line[0] == ':': 91 | name, value = line[1:].split(":", 1) 92 | name = ":" + name 93 | else: 94 | name, value = line.split(":", 1) 95 | name = name.lower() 96 | if name in out: 97 | out[name] += "\0" + value.strip() 98 | else: 99 | out[name] = value.strip() 100 | if is_request: 101 | out[':method'] = top_line[0] 102 | out[':path'] = top_line[1] 103 | out[':version'] = top_line[2].strip() 104 | if host in out: 105 | out[':host'] = out[host] 106 | del out[host] 107 | else: 108 | out[':version'] = top_line[0] 109 | out[':status'] = top_line[1] 110 | out[':status-text'] = top_line[2].strip() 111 | return out 112 | -------------------------------------------------------------------------------- /lib/harfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Utilities to get harfiles into the shape we want them to be. 5 | """ 6 | 7 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 8 | # Use of this source code is governed by a BSD-style license that can be 9 | # found in the LICENSE file. 10 | 11 | # pylint: disable=W0311 12 | 13 | import re 14 | import json 15 | import sys 16 | from urllib.parse import urlsplit 17 | 18 | def read_har_file(filename): 19 | "Read filename and return the header dictionaries for it." 20 | fhandle = open(filename) 21 | try: 22 | har = json.loads(fhandle.read()) 23 | except Exception as oops: 24 | sys.stderr.write("Unable to parse %s\n\n" % filename) 25 | sys.stderr.write(oops) 26 | sys.exit(1) 27 | finally: 28 | fhandle.close() 29 | return har2hdrs(har) 30 | 31 | 32 | def har2hdrs(har): 33 | """ 34 | Convert a har dictionary to two lists of header dictionaries for requests 35 | and responses. 36 | 37 | Headers derived from other information are preceded by a ":" character. 38 | """ 39 | request_headers = [] 40 | response_headers = [] 41 | for entry in har["log"]["entries"]: 42 | request = entry["request"] 43 | url = urlsplit(request["url"]) 44 | if not url.scheme.lower() in ["http", "https"]: 45 | continue 46 | headers = process_headers(request["headers"]) 47 | headers[":method"] = request["method"].lower() 48 | headers[":path"] = url.path 49 | if url.query: 50 | headers[":path"] += "?%s" % url.query 51 | headers[":scheme"] = url.scheme.lower() 52 | headers[":version"] = request["httpVersion"] 53 | if not ":host" in request_headers: 54 | headers[":host"] = re.sub("^[^:]*://([^/]*)/.*$", "\\1", request["url"]) 55 | request_headers.append(headers) 56 | 57 | response = entry["response"] 58 | headers = process_headers(response["headers"]) 59 | headers[":status"] = re.sub("^([0-9]*).*", "\\1", str(response["status"])) 60 | headers[":status-text"] = response["statusText"].strip() or \ 61 | STATUS_PHRASES.get(headers[':status'], 'unknown') 62 | headers[":version"] = response["httpVersion"] 63 | response_headers.append(headers) 64 | 65 | return (request_headers, response_headers) 66 | 67 | 68 | def process_headers(hdrdicts): 69 | "Take a har header datastructure and return a normalised dictionary." 70 | out = {} 71 | for hdrdict in hdrdicts: 72 | name = hdrdict["name"].lower() 73 | val = hdrdict["value"] 74 | if not name: 75 | raise Exception() 76 | if name == "host": 77 | name = ":host" 78 | if name in out: 79 | out[name] = out[name] + '\0' + val 80 | else: 81 | out[name] = val 82 | return out 83 | 84 | def encode_strings(inobj, encoding="latin-1"): 85 | "Encode strings in objects. Latin-1 is the default encoding for HTTP/1.x." 86 | retval = {} 87 | for key, val in inobj.items(): 88 | if key in ['text', 'content']: 89 | continue 90 | else: 91 | n_k = key 92 | if isinstance(key, str): 93 | n_k = key.encode(encoding) 94 | n_v = val 95 | if isinstance(val, str): 96 | n_v = val.encode(encoding) 97 | retval[n_k] = n_v 98 | return retval 99 | 100 | 101 | STATUS_PHRASES = { 102 | '200': 'OK', 103 | '201': 'Created', 104 | '202': 'Accepted', 105 | '203': 'Non-Authoritative Information', 106 | '204': 'No Content', 107 | '205': 'Reset Content', 108 | '206': 'Partial Content', 109 | '207': 'Multi-Status', 110 | '300': 'Multiple Choices', 111 | '301': 'Moved Permanently', 112 | '302': 'Found', 113 | '303': 'See Other', 114 | '304': 'Not Modified', 115 | '307': 'Temporary Redirect', 116 | '308': 'Permanent Redirect', 117 | '400': 'Bad Request', 118 | '401': 'Unauthorized', 119 | '403': 'Forbidden', 120 | '404': 'Not Found', 121 | '405': 'Method Not Allowed', 122 | '406': 'Not Acceptable', 123 | '408': 'Request Timeout', 124 | '409': 'Conflict', 125 | '410': 'Gone', 126 | '500': 'Internal Server Error', 127 | } 128 | 129 | -------------------------------------------------------------------------------- /compressor/http2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012-2013, Canon Inc. 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted only for the purpose of developing standards 6 | # within the HTTPbis WG and for testing and promoting such standards within the 7 | # IETF Standards Process. The following conditions are required to be met: 8 | # - Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # - Neither the name of Canon Inc. nor the names of its contributors may be 14 | # used to endorse or promote products derived from this software without 15 | # specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY CANON INC. AND ITS CONTRIBUTORS "AS IS" AND ANY 18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | # DISCLAIMED. IN NO EVENT SHALL CANON INC. AND ITS CONTRIBUTORS BE LIABLE FOR 21 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | from .. import BaseProcessor 29 | 30 | from http2Codec import HTTP2Codec 31 | 32 | #=============================================================================== 33 | # Parameter definition 34 | #=============================================================================== 35 | def parse_bool(value): 36 | if value is None: 37 | return True 38 | if value.lower() == "false": 39 | return False 40 | else: 41 | return True 42 | 43 | IS_REQUEST = "is_request" 44 | BUFFER_SIZE = "buffer_size" 45 | 46 | param_functions = { 47 | BUFFER_SIZE: int, 48 | } 49 | 50 | #=============================================================================== 51 | # Processor class 52 | #=============================================================================== 53 | def split_headers(d): 54 | lst = [] 55 | for k, v in d.items(): 56 | if k == "cookie": 57 | ch = ";" 58 | else: 59 | ch = "\0" 60 | hdrs = ((k, vs.strip()) for vs in v.split(ch)) 61 | lst.extend(h for h in hdrs if h not in lst) 62 | return lst 63 | 64 | def join_headers(lst): 65 | d = {} 66 | for k, v in lst: 67 | if k in d: 68 | if k == "cookie": 69 | d[k] += ";" + v 70 | else: 71 | d[k] += "\0" + v 72 | else: 73 | d[k] = v 74 | return d 75 | 76 | class Processor(BaseProcessor): 77 | def __init__(self, options, is_request, params): 78 | BaseProcessor.__init__(self, options, is_request, params) 79 | 80 | param_dict = { 81 | IS_REQUEST: is_request, 82 | BUFFER_SIZE: 4096, 83 | } 84 | 85 | for param in params: 86 | if "=" in param: 87 | name, value = param.split("=", 1) 88 | else: 89 | name = param 90 | value = None 91 | if name in param_functions: 92 | param_dict[name] = param_functions[name](value) 93 | else: 94 | param_dict[name] = value 95 | 96 | codecClass = HTTP2Codec 97 | self.codec = codecClass(**param_dict) 98 | 99 | def compress(self, in_headers, host): 100 | headers = split_headers(in_headers) 101 | frame = self.codec.encode_headers(headers) 102 | 103 | return frame 104 | 105 | def decompress(self, compressed): 106 | headers = self.codec.decode_headers(compressed) 107 | return join_headers(headers) 108 | 109 | -------------------------------------------------------------------------------- /lib/stream.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from collections import defaultdict 4 | import locale 5 | 6 | # pylint: disable=W0311 7 | 8 | 9 | class Stream(object): 10 | """ 11 | A one-way stream of sets of HTTP headers. 12 | 13 | For our purposes, a stream is the unit that gets compressed; i.e., the 14 | headers in it have a shared context. 15 | """ 16 | def __init__(self, name, messages, msg_type, procs): 17 | self.name = name # identifier for the stream; e.g., "example.com reqs" 18 | self.messages = messages 19 | self.msg_type = msg_type # "req" or "res" 20 | self.procs = procs # order of processors 21 | self.lname = max([len(p) for p in procs]) # longest processor name 22 | self.sizes = defaultdict(list) 23 | self.ratios = defaultdict(list) 24 | self.times = defaultdict(list) 25 | 26 | def record_result(self, proc_name, size, ratio, time): 27 | "Record the results of processing, by proc_name." 28 | self.sizes[proc_name].append(size) 29 | self.ratios[proc_name].append(ratio) 30 | self.times[proc_name].append(time) 31 | 32 | def print_header(self, output): 33 | "Print a header for the summary to output." 34 | output("* %s: %i %s messages\n" % 35 | (self.name, len(self.messages), self.msg_type)) 36 | 37 | def print_summary(self, output, baseline): 38 | "Print a summary of the stream to output, compared to baseline." 39 | lines = [] 40 | baseline_size = sum(self.sizes[baseline]) 41 | for proc in self.procs: 42 | ttl_size = sum(self.sizes[proc]) 43 | ttl_time = sum(self.times[proc]) 44 | pretty_size = locale.format("%13d", ttl_size, grouping=True) 45 | ratio = 1.0 * ttl_size / baseline_size 46 | try: 47 | std = meanstdv(self.ratios[proc])[1] 48 | except ZeroDivisionError: 49 | std = 0 50 | min_ratio = min(self.ratios[proc]) 51 | max_ratio = max(self.ratios[proc]) 52 | lines.append((proc, pretty_size, ttl_time, ratio, min_ratio, max_ratio, std)) 53 | output(' %%%ds size time | ratio min max std\n' % (self.lname + 9) % '') 54 | fmt = ' %%%ds %%s %%5.2f | %%2.2f %%2.2f %%2.2f %%2.2f\n' % self.lname 55 | for line in lines: 56 | output(fmt % line) 57 | output("\n") 58 | 59 | def print_tsv_header(self, output): 60 | "Print a TSV header to output." 61 | header = "\t".join(["num", "name"] + self.procs) 62 | output("%s\n" % header) 63 | 64 | def print_tsv(self, output, count=0): 65 | "Print the stream as TSV to output, using count as a counter." 66 | lines = list(zip(*[self.sizes[proc] for proc in self.procs])) 67 | for line in lines: 68 | count += 1 69 | output("\t".join([str(count), self.name] + [str(j) for j in line])) 70 | output("\n") 71 | return count 72 | 73 | def __add__(self, other): 74 | assert self.msg_type == other.msg_type 75 | new = Stream('', self.messages, self.msg_type, self.procs) 76 | new.messages.extend(other.messages) # NB: not great for memory 77 | new.sizes = merge_dols(self.sizes, other.sizes) 78 | new.ratios = merge_dols(self.ratios, other.ratios) 79 | new.times = merge_dols(self.times, other.times) 80 | new.procs = self.procs 81 | new.lname = self.lname 82 | return new 83 | 84 | def __radd__(self, other): 85 | new = Stream('', self.messages, self.msg_type, self.procs) 86 | new.sizes = self.sizes 87 | new.ratios = self.ratios 88 | new.times = self.times 89 | new.procs = self.procs 90 | new.lname = self.lname 91 | return new 92 | 93 | 94 | def merge_dols(dol1, dol2): 95 | """ 96 | Merge two dictionaries of lists. 97 | """ 98 | result = dict(dol1, **dol2) 99 | result.update((k, dol1[k] + dol2[k]) 100 | for k in set(dol1).intersection(dol2)) 101 | return result 102 | 103 | def meanstdv(members): 104 | """ 105 | Calculate mean and standard deviation of data x[]: 106 | mean = {\sum_i x_i \over n} 107 | std = sqrt(\sum_i (x_i - mean)^2 \over n-1) 108 | """ 109 | from math import sqrt 110 | num, mean, std = len(members), 0, 0 111 | for item in members: 112 | mean = mean + item 113 | mean = mean / float(num) 114 | for item in members: 115 | std = std + (item - mean)**2 116 | std = sqrt(std / float(num - 1)) 117 | return mean, std -------------------------------------------------------------------------------- /compressor/delta/trivial_http_parse.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef TRIVIAL_HTTP_PARSE_H 5 | #define TRIVIAL_HTTP_PARSE_H 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | using std::cerr; 15 | //using std::cout; 16 | using std::getline; 17 | using std::ifstream; 18 | using std::string; 19 | using std::vector; 20 | using std::istream; 21 | using std::ostream; 22 | 23 | struct KVPair { 24 | string key; 25 | string val; 26 | KVPair() {} 27 | KVPair(string key, string val) : key(key), val(val) {} 28 | friend ostream& operator<<(ostream& os, const KVPair& kv) { 29 | os << "\"" << kv.key << "\" \"" << kv.val << "\""; 30 | return os; 31 | } 32 | size_t size() const { return key.size() + val.size(); } 33 | }; 34 | 35 | typedef vector Lines; 36 | 37 | typedef Lines HeaderFrame; 38 | 39 | class TrivialHTTPParse { 40 | private: 41 | static HeaderFrame* GetHeaderFramePtr(vector* frames, 42 | unsigned int expected_previous_len) { 43 | if (frames->size() <= expected_previous_len) { 44 | frames->push_back(HeaderFrame()); 45 | } 46 | return &(frames->back()); 47 | } 48 | 49 | public: 50 | static int ParseFile(const string& fn, 51 | vector* requests, 52 | vector* responses) { 53 | ifstream ifs(fn.c_str()); 54 | ParseStream(ifs, requests, responses); 55 | return 1; 56 | } 57 | static int ParseStream(istream& istrm, 58 | vector* requests, 59 | vector* responses) { 60 | int frames_len = 0; 61 | int frames_idx = 0; 62 | vector* frames[2] = {requests, responses}; 63 | if (!(requests->empty() && responses->empty())) { 64 | return -1; 65 | } 66 | HeaderFrame* cur_frame = GetHeaderFramePtr(frames[frames_idx], frames_len); 67 | 68 | while (istrm.good()) { 69 | string line; 70 | getline(istrm, line); 71 | size_t colon_pos = line.find_first_of(":", 1); 72 | if (line.size() == 0) { 73 | // finished with this frame. 74 | if (frames_idx == 1) ++frames_len; 75 | frames_idx = ! frames_idx; 76 | cur_frame = GetHeaderFramePtr(frames[frames_idx], frames_len); 77 | continue; 78 | } else if (colon_pos == string::npos || 79 | colon_pos + 1 > line.size() || 80 | line[colon_pos + 1] != ' ') { 81 | cerr << "Misformatted line. Was expecting to see a ': ' in there.\n"; 82 | cerr << "Line:\n"; 83 | cerr << line << "\n"; 84 | cerr << "colon_pos: " << colon_pos<< "\n"; 85 | return 0; 86 | } 87 | size_t val_start = colon_pos + 2; 88 | size_t val_size = line.size() - val_start; 89 | cur_frame->push_back(KVPair(line.substr(0, colon_pos), 90 | line.substr(val_start, val_size))); 91 | } 92 | if (requests->back().empty()) { 93 | requests->pop_back(); 94 | } 95 | return 1; 96 | } 97 | }; 98 | 99 | /* 100 | int main(int argc, char** argv) { 101 | vector requests; 102 | vector responses; 103 | if (!ParseFile(argv[1], &requests, &responses)) { 104 | cerr << "Failed to parse correctly. Exiting\n"; 105 | return EXIT_FAILURE; 106 | } 107 | for (int i = 0; i < requests.size(); ++i) { 108 | for (HeaderFrame::Lines::const_iterator l_it = requests[i].lines.begin(); 109 | l_it != requests[i].lines.end(); 110 | ++l_it) { 111 | auto line = *l_it; 112 | const string& k = line.first; 113 | const string& v = line.second; 114 | cout << k << ": " << v << "\n"; 115 | } 116 | cout << "\n"; 117 | for (HeaderFrame::Lines::const_iterator l_it = responses[i].lines.begin(); 118 | l_it != responses[i].lines.end(); 119 | ++l_it) { 120 | auto line = *l_it; 121 | const string& k = line.first; 122 | const string& v = line.second; 123 | cout << k << ": " << v << "\n"; 124 | } 125 | cout << "\n"; 126 | } 127 | } 128 | */ 129 | 130 | 131 | 132 | 133 | #endif // TRIVIAL_HTTP_PARSE_H 134 | -------------------------------------------------------------------------------- /compressor/delta_bohe/trivial_http_parse.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #ifndef TRIVIAL_HTTP_PARSE_H 5 | #define TRIVIAL_HTTP_PARSE_H 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | using std::cerr; 15 | //using std::cout; 16 | using std::getline; 17 | using std::ifstream; 18 | using std::string; 19 | using std::vector; 20 | using std::istream; 21 | using std::ostream; 22 | 23 | struct KVPair { 24 | string key; 25 | string val; 26 | KVPair() {} 27 | KVPair(string key, string val) : key(key), val(val) {} 28 | friend ostream& operator<<(ostream& os, const KVPair& kv) { 29 | os << "\"" << kv.key << "\" \"" << kv.val << "\""; 30 | return os; 31 | } 32 | size_t size() const { return key.size() + val.size(); } 33 | }; 34 | 35 | typedef vector Lines; 36 | 37 | typedef Lines HeaderFrame; 38 | 39 | class TrivialHTTPParse { 40 | private: 41 | static HeaderFrame* GetHeaderFramePtr(vector* frames, 42 | unsigned int expected_previous_len) { 43 | if (frames->size() <= expected_previous_len) { 44 | frames->push_back(HeaderFrame()); 45 | } 46 | return &(frames->back()); 47 | } 48 | 49 | public: 50 | static int ParseFile(const string& fn, 51 | vector* requests, 52 | vector* responses) { 53 | ifstream ifs(fn.c_str()); 54 | ParseStream(ifs, requests, responses); 55 | return 1; 56 | } 57 | static int ParseStream(istream& istrm, 58 | vector* requests, 59 | vector* responses) { 60 | int frames_len = 0; 61 | int frames_idx = 0; 62 | vector* frames[2] = {requests, responses}; 63 | if (!(requests->empty() && responses->empty())) { 64 | return -1; 65 | } 66 | HeaderFrame* cur_frame = GetHeaderFramePtr(frames[frames_idx], frames_len); 67 | 68 | while (istrm.good()) { 69 | string line; 70 | getline(istrm, line); 71 | size_t colon_pos = line.find_first_of(":", 1); 72 | if (line.size() == 0) { 73 | // finished with this frame. 74 | if (frames_idx == 1) ++frames_len; 75 | frames_idx = ! frames_idx; 76 | cur_frame = GetHeaderFramePtr(frames[frames_idx], frames_len); 77 | continue; 78 | } else if (colon_pos == string::npos || 79 | colon_pos + 1 > line.size() || 80 | line[colon_pos + 1] != ' ') { 81 | cerr << "Misformatted line. Was expecting to see a ': ' in there.\n"; 82 | cerr << "Line:\n"; 83 | cerr << line << "\n"; 84 | cerr << "colon_pos: " << colon_pos<< "\n"; 85 | return 0; 86 | } 87 | size_t val_start = colon_pos + 2; 88 | size_t val_size = line.size() - val_start; 89 | cur_frame->push_back(KVPair(line.substr(0, colon_pos), 90 | line.substr(val_start, val_size))); 91 | } 92 | if (requests->back().empty()) { 93 | requests->pop_back(); 94 | } 95 | return 1; 96 | } 97 | }; 98 | 99 | /* 100 | int main(int argc, char** argv) { 101 | vector requests; 102 | vector responses; 103 | if (!ParseFile(argv[1], &requests, &responses)) { 104 | cerr << "Failed to parse correctly. Exiting\n"; 105 | return EXIT_FAILURE; 106 | } 107 | for (int i = 0; i < requests.size(); ++i) { 108 | for (HeaderFrame::Lines::const_iterator l_it = requests[i].lines.begin(); 109 | l_it != requests[i].lines.end(); 110 | ++l_it) { 111 | auto line = *l_it; 112 | const string& k = line.first; 113 | const string& v = line.second; 114 | cout << k << ": " << v << "\n"; 115 | } 116 | cout << "\n"; 117 | for (HeaderFrame::Lines::const_iterator l_it = responses[i].lines.begin(); 118 | l_it != responses[i].lines.end(); 119 | ++l_it) { 120 | auto line = *l_it; 121 | const string& k = line.first; 122 | const string& v = line.second; 123 | cout << k << ": " << v << "\n"; 124 | } 125 | cout << "\n"; 126 | } 127 | } 128 | */ 129 | 130 | 131 | 132 | 133 | #endif // TRIVIAL_HTTP_PARSE_H 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | HTTP Header Compression Tests 3 | ============================= 4 | 5 | Usage 6 | ----- 7 | 8 | The test can be run like this: 9 | 10 | ./compare_compressors.py [options] list-of-har-files 11 | 12 | See [the HAR specification](http://www.softwareishard.com/blog/har-12-spec/), 13 | and our [collected sample HAR files](https://github.com/http2/http_samples). 14 | 15 | The most important option is -c, which specifies what compressors to run. 16 | Current codecs include: 17 | 18 | * http1_gzip - gzip compression of HTTP1.x headers 19 | * spdy3 - SPDY 3's gzip-based compression 20 | * delta - draft-rpeon-httpbis-header-compression implementation 21 | * fork - fork a process; see below 22 | 23 | Interpreting Text Results 24 | ------------------------- 25 | 26 | Results will look something like: 27 | 28 | * TOTAL: 1012 req messages 29 | size time | ratio min max std 30 | http1 830,970 0.05 | 1.00 1.00 1.00 0.00 31 | simple 320,883 0.05 | 0.39 0.07 0.92 0.24 32 | spdy3 85,492 0.06 | 0.10 0.03 0.66 0.08 33 | 34 | * TOTAL: 1012 res messages 35 | size time | ratio min max std 36 | http1 424,075 0.04 | 1.00 1.00 1.00 0.00 37 | simple 176,216 0.12 | 0.42 0.11 0.95 0.12 38 | spdy3 80,706 0.07 | 0.19 0.04 0.68 0.09 39 | 40 | The 'size' column shows how many bytes the compression algorithm outputs; 41 | 'time' shows how much CPU time it roughly took; 'ratio' shows the ratio to the 42 | baseline (http1, by default), and the 'min', 'max' and 'std; columns show the 43 | minimum, maximum and standard deviations of the ratios, respectively. 44 | 45 | 46 | Showing Message Graphs 47 | ---------------------- 48 | 49 | When the "-t" option is used, TSV output is created. E.g., 50 | 51 | ./compare_compressors.py -t my.har 52 | 53 | This will create two TSV files, req.tsv and res.tsv, that can then be 54 | displayed by the display_tsv.html file. See [an 55 | example](http://http2.github.com/compression-test/). 56 | 57 | 58 | Adding New Compression Algorithms 59 | --------------------------------- 60 | 61 | If you wish to implement a new codec, there are two easy approaches. 62 | 63 | 1) Develop it in Python. New modules should be subdirectories of 64 | 'compressor', and should inherit from BaseProcessor there. 65 | 66 | 2) Develop it in another language, and use the 'fork' module to execute 67 | it in a separate process. See 'sample_exec_codec.py' for an example of this; 68 | it can be run like this: 69 | 70 | ./compare_compressors.py -c fork="sample_exec_codec.py" file.har 71 | 72 | 73 | 74 | NOTE WELL 75 | ========= 76 | 77 | Any submission to the [IETF](http://www.ietf.org/) intended by the Contributor 78 | for publication as all or part of an IETF Internet-Draft or RFC and any 79 | statement made within the context of an IETF activity is considered an "IETF 80 | Contribution". Such statements include oral statements in IETF sessions, as 81 | well as written and electronic communications made at any time or place, which 82 | are addressed to: 83 | 84 | * The IETF plenary session 85 | * The IESG, or any member thereof on behalf of the IESG 86 | * Any IETF mailing list, including the IETF list itself, any working group 87 | or design team list, or any other list functioning under IETF auspices 88 | * Any IETF working group or portion thereof 89 | * Any Birds of a Feather (BOF) session 90 | * The IAB or any member thereof on behalf of the IAB 91 | * The RFC Editor or the Internet-Drafts function 92 | * All IETF Contributions are subject to the rules of 93 | [RFC 5378](http://tools.ietf.org/html/rfc5378) and 94 | [RFC 3979](http://tools.ietf.org/html/rfc3979) 95 | (updated by [RFC 4879](http://tools.ietf.org/html/rfc4879)). 96 | 97 | Statements made outside of an IETF session, mailing list or other function, 98 | that are clearly not intended to be input to an IETF activity, group or 99 | function, are not IETF Contributions in the context of this notice. 100 | 101 | Please consult [RFC 5378](http://tools.ietf.org/html/rfc5378) and [RFC 102 | 3979](http://tools.ietf.org/html/rfc3979) for details. 103 | 104 | A participant in any IETF activity is deemed to accept all IETF rules of 105 | process, as documented in Best Current Practices RFCs and IESG Statements. 106 | 107 | A participant in any IETF activity acknowledges that written, audio and video 108 | records of meetings may be made and may be available to the public. 109 | -------------------------------------------------------------------------------- /compressor/delta/pretty_print_tree.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef PRETTY_PRINT_TREE_H 6 | #define PRETTY_PRINT_TREE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using std::ostream; 14 | using std::string; 15 | using std::stringstream; 16 | using std::vector; 17 | 18 | class CharMatrix { 19 | public: 20 | 21 | void WriteVisible(unsigned int* x_pos, unsigned int y_pos, const string& string_to_write) { 22 | Write(x_pos, y_pos, string_to_write, true); 23 | } 24 | 25 | void WriteInvisible(unsigned int* x_pos, unsigned int y_pos, const string& string_to_write) { 26 | Write(x_pos, y_pos, string_to_write, false); 27 | } 28 | 29 | void Write(unsigned int* x_pos, 30 | unsigned int y_pos, 31 | const string& string_to_write, 32 | bool visible) { 33 | if (char_matrix_.size() <= y_pos) { 34 | unsigned int old_size = char_matrix_.size(); 35 | char_matrix_.resize(y_pos + 1); 36 | offset_.resize(y_pos + 1); 37 | for (unsigned int i = old_size; i < y_pos + 1; ++i) { 38 | offset_[i] = 0; 39 | } 40 | } 41 | unsigned int end_x_pos = *x_pos + string_to_write.size() + 1 + offset_[y_pos]; 42 | if (char_matrix_[y_pos].size() < end_x_pos) { 43 | unsigned int old_size = char_matrix_[y_pos].size(); 44 | char_matrix_[y_pos].resize(end_x_pos); 45 | for (unsigned int x = old_size; x < (end_x_pos); ++x) { 46 | char_matrix_[y_pos][x] = ' '; 47 | } 48 | } 49 | for (unsigned int i = 0; i < string_to_write.size(); ++i, ++(*x_pos)) { 50 | char_matrix_[y_pos][*x_pos + offset_[y_pos]] = string_to_write[i]; 51 | if (!visible || !isprint(string_to_write[i])) { 52 | --(*x_pos); 53 | ++offset_[y_pos]; 54 | } 55 | } 56 | } 57 | 58 | friend ostream& operator<<(ostream& os, const CharMatrix& cm) { 59 | for (unsigned int y = 0; y < cm.char_matrix_.size(); ++y) { 60 | for (unsigned int x = 0; x < cm.char_matrix_[y].size(); ++x) { 61 | os << cm.char_matrix_[y][x]; 62 | } 63 | os << "\n"; 64 | } 65 | return os; 66 | } 67 | private: 68 | vector > char_matrix_; 69 | vector offset_; 70 | }; 71 | 72 | template 73 | int PrettyPrintHelper(const Node* node, 74 | int dist_from_root, 75 | unsigned int* x_pos, 76 | CharMatrix* char_matrix, 77 | int parent_pos, 78 | int direction) { 79 | unsigned int tmp_x_pos; 80 | unsigned int y_pos = dist_from_root * 3; 81 | if (node->children[0]) { 82 | int child_end = PrettyPrintHelper(node->children[0], 83 | dist_from_root + 1, 84 | x_pos, 85 | char_matrix, 86 | *x_pos, 87 | 0); 88 | tmp_x_pos = *x_pos - 1; 89 | char_matrix->WriteVisible(&tmp_x_pos, y_pos + 1, "/"); 90 | --tmp_x_pos; 91 | for (unsigned int x = child_end + 1; x < tmp_x_pos;) { 92 | char_matrix->WriteVisible(&x, y_pos + 1, "_"); 93 | } 94 | } 95 | 96 | unsigned int original_x_pos = *x_pos; 97 | { 98 | stringstream s; 99 | s << "[" << *node << "]"; 100 | char_matrix->WriteVisible(x_pos, y_pos, s.str()); 101 | } 102 | 103 | unsigned int new_x_pos = *x_pos; 104 | 105 | if (dist_from_root != 0) { 106 | if (direction == 0) { 107 | tmp_x_pos = *x_pos; 108 | char_matrix->WriteVisible(&tmp_x_pos, y_pos - 1, "/"); 109 | } else { 110 | tmp_x_pos = original_x_pos - 1; 111 | char_matrix->WriteVisible(&tmp_x_pos, y_pos - 1, "\\"); 112 | --tmp_x_pos; 113 | for (unsigned int x = parent_pos + 1; x < tmp_x_pos;) { 114 | char_matrix->WriteVisible(&x, y_pos - 2, "_"); 115 | } 116 | } 117 | } 118 | *x_pos += 2; 119 | 120 | if (node->children[1]) { 121 | tmp_x_pos = *x_pos - 2; 122 | char_matrix->WriteVisible(&tmp_x_pos, y_pos + 1, "\\"); 123 | PrettyPrintHelper(node->children[1], 124 | dist_from_root + 1, 125 | x_pos, 126 | char_matrix, 127 | *x_pos - 2, 128 | 1); 129 | } 130 | return new_x_pos; 131 | } 132 | 133 | template 134 | void PrettyPrintTreeToStream(Node* root, ostream& os) { 135 | CharMatrix char_matrix; 136 | if (root == NULL) { 137 | return; 138 | } 139 | 140 | int x_pos = 0; 141 | PrettyPrintHelper(root, 0, &x_pos, &char_matrix, 0, 0); 142 | os << char_matrix; 143 | }; 144 | 145 | 146 | #endif 147 | -------------------------------------------------------------------------------- /compressor/delta_bohe/pretty_print_tree.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef PRETTY_PRINT_TREE_H 6 | #define PRETTY_PRINT_TREE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using std::ostream; 14 | using std::string; 15 | using std::stringstream; 16 | using std::vector; 17 | 18 | class CharMatrix { 19 | public: 20 | 21 | void WriteVisible(unsigned int* x_pos, unsigned int y_pos, const string& string_to_write) { 22 | Write(x_pos, y_pos, string_to_write, true); 23 | } 24 | 25 | void WriteInvisible(unsigned int* x_pos, unsigned int y_pos, const string& string_to_write) { 26 | Write(x_pos, y_pos, string_to_write, false); 27 | } 28 | 29 | void Write(unsigned int* x_pos, 30 | unsigned int y_pos, 31 | const string& string_to_write, 32 | bool visible) { 33 | if (char_matrix_.size() <= y_pos) { 34 | unsigned int old_size = char_matrix_.size(); 35 | char_matrix_.resize(y_pos + 1); 36 | offset_.resize(y_pos + 1); 37 | for (unsigned int i = old_size; i < y_pos + 1; ++i) { 38 | offset_[i] = 0; 39 | } 40 | } 41 | unsigned int end_x_pos = *x_pos + string_to_write.size() + 1 + offset_[y_pos]; 42 | if (char_matrix_[y_pos].size() < end_x_pos) { 43 | unsigned int old_size = char_matrix_[y_pos].size(); 44 | char_matrix_[y_pos].resize(end_x_pos); 45 | for (unsigned int x = old_size; x < (end_x_pos); ++x) { 46 | char_matrix_[y_pos][x] = ' '; 47 | } 48 | } 49 | for (unsigned int i = 0; i < string_to_write.size(); ++i, ++(*x_pos)) { 50 | char_matrix_[y_pos][*x_pos + offset_[y_pos]] = string_to_write[i]; 51 | if (!visible || !isprint(string_to_write[i])) { 52 | --(*x_pos); 53 | ++offset_[y_pos]; 54 | } 55 | } 56 | } 57 | 58 | friend ostream& operator<<(ostream& os, const CharMatrix& cm) { 59 | for (unsigned int y = 0; y < cm.char_matrix_.size(); ++y) { 60 | for (unsigned int x = 0; x < cm.char_matrix_[y].size(); ++x) { 61 | os << cm.char_matrix_[y][x]; 62 | } 63 | os << "\n"; 64 | } 65 | return os; 66 | } 67 | private: 68 | vector > char_matrix_; 69 | vector offset_; 70 | }; 71 | 72 | template 73 | int PrettyPrintHelper(const Node* node, 74 | int dist_from_root, 75 | unsigned int* x_pos, 76 | CharMatrix* char_matrix, 77 | int parent_pos, 78 | int direction) { 79 | unsigned int tmp_x_pos; 80 | unsigned int y_pos = dist_from_root * 3; 81 | if (node->children[0]) { 82 | int child_end = PrettyPrintHelper(node->children[0], 83 | dist_from_root + 1, 84 | x_pos, 85 | char_matrix, 86 | *x_pos, 87 | 0); 88 | tmp_x_pos = *x_pos - 1; 89 | char_matrix->WriteVisible(&tmp_x_pos, y_pos + 1, "/"); 90 | --tmp_x_pos; 91 | for (unsigned int x = child_end + 1; x < tmp_x_pos;) { 92 | char_matrix->WriteVisible(&x, y_pos + 1, "_"); 93 | } 94 | } 95 | 96 | unsigned int original_x_pos = *x_pos; 97 | { 98 | stringstream s; 99 | s << "[" << *node << "]"; 100 | char_matrix->WriteVisible(x_pos, y_pos, s.str()); 101 | } 102 | 103 | unsigned int new_x_pos = *x_pos; 104 | 105 | if (dist_from_root != 0) { 106 | if (direction == 0) { 107 | tmp_x_pos = *x_pos; 108 | char_matrix->WriteVisible(&tmp_x_pos, y_pos - 1, "/"); 109 | } else { 110 | tmp_x_pos = original_x_pos - 1; 111 | char_matrix->WriteVisible(&tmp_x_pos, y_pos - 1, "\\"); 112 | --tmp_x_pos; 113 | for (unsigned int x = parent_pos + 1; x < tmp_x_pos;) { 114 | char_matrix->WriteVisible(&x, y_pos - 2, "_"); 115 | } 116 | } 117 | } 118 | *x_pos += 2; 119 | 120 | if (node->children[1]) { 121 | tmp_x_pos = *x_pos - 2; 122 | char_matrix->WriteVisible(&tmp_x_pos, y_pos + 1, "\\"); 123 | PrettyPrintHelper(node->children[1], 124 | dist_from_root + 1, 125 | x_pos, 126 | char_matrix, 127 | *x_pos - 2, 128 | 1); 129 | } 130 | return new_x_pos; 131 | } 132 | 133 | template 134 | void PrettyPrintTreeToStream(Node* root, ostream& os) { 135 | CharMatrix char_matrix; 136 | if (root == NULL) { 137 | return; 138 | } 139 | 140 | int x_pos = 0; 141 | PrettyPrintHelper(root, 0, &x_pos, &char_matrix, 0, 0); 142 | os << char_matrix; 143 | }; 144 | 145 | 146 | #endif 147 | -------------------------------------------------------------------------------- /display_tsv.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 42 | 43 | 44 | 187 | -------------------------------------------------------------------------------- /compare_compressors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | compression_test.py 5 | 6 | Tests various HTTP header compression algorithms, to compare them. 7 | """ 8 | 9 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 10 | # Use of this source code is governed by a BSD-style license that can be 11 | # found in the LICENSE file. 12 | 13 | # pylint: disable=W0311 14 | 15 | from collections import defaultdict 16 | from importlib import import_module 17 | import locale 18 | import optparse 19 | import operator 20 | from functools import reduce 21 | 22 | from lib.harfile import read_har_file 23 | from lib.processors import Processors 24 | 25 | 26 | class CompressionTester(object): 27 | """ 28 | This is the thing. 29 | """ 30 | msg_types = ['req', 'res'] 31 | streamifier_dir = "lib.streamifiers" 32 | 33 | def __init__(self, output): 34 | self.options, self.args = self.parse_options() 35 | if self.options.baseline is None: 36 | self.options.baseline = "http1" 37 | if not self.options.baseline in self.options.processor_names: 38 | new_processor_names = [self.options.baseline] 39 | new_processor_names.extend(self.options.processor_names) 40 | self.options.processor_names = new_processor_names 41 | self.output = output 42 | self.tsv_out = defaultdict(list) # accumulator for TSV output 43 | self.processors = Processors(self.options, self.msg_types, output) 44 | self.streamify = self.load_streamifier(self.options.streamifier) 45 | self.run() 46 | 47 | def run(self): 48 | "Let's do this thing." 49 | sessions = [] 50 | for filename in self.args: 51 | har_requests, har_responses = read_har_file(filename) 52 | messages = list(zip(har_requests, har_responses)) 53 | sessions.extend(self.streamify(messages)) 54 | for session in sessions: 55 | if self.options.verbose > 0: 56 | session.print_header(self.output) 57 | self.processors.process_session(session) 58 | if self.options.verbose > 0: 59 | session.print_summary(self.output, self.options.baseline) 60 | self.processors.done() 61 | for msg_type in self.msg_types: 62 | ttl_stream = reduce(operator.add, [s for s in sessions if s.msg_type == msg_type]) 63 | ttl_stream.name = "TOTAL" 64 | ttl_stream.print_header(self.output) 65 | ttl_stream.print_summary(self.output, self.options.baseline) 66 | if self.options.tsv: 67 | out = {} 68 | for msg_type in self.msg_types: 69 | out[msg_type] = [ 70 | open("%s%s" % (self.options.prefix, "%s.tsv" % msg_type), 'w'), 71 | 0 72 | ] 73 | sessions[0].print_tsv_header(out[msg_type][0].write) 74 | for session in sessions: 75 | tsvfh, tsv_count = out[session.msg_type] 76 | out[session.msg_type][1] = session.print_tsv(tsvfh.write, tsv_count) 77 | for fh, count in list(out.values()): 78 | fh.close() 79 | 80 | def load_streamifier(self, name): 81 | "Load the streamifier specified in the options." 82 | return import_module("%s.%s" % (self.streamifier_dir, name)) \ 83 | .Streamifier([p.name for p in self.processors.processors['req']]) \ 84 | .streamify 85 | 86 | def parse_options(self): 87 | "Parse command-line options and return (options, args)." 88 | optp = optparse.OptionParser() 89 | optp.add_option('-v', '--verbose', 90 | type='int', 91 | dest='verbose', 92 | help='set verbosity, 1-5 (default: %default)', 93 | default=0, 94 | metavar='VERBOSITY') 95 | optp.add_option('-d', '--debug', 96 | action='store_true', 97 | dest="debug", 98 | help="debug mode. Stops on first header mismatch.", 99 | default=False) 100 | optp.add_option('-c', '--codec', 101 | action='append', 102 | dest='processor_names', 103 | help='compression modules to test, potentially with ' 104 | 'parameters. ' 105 | 'e.g. -c spdy3 -c fork="abc" ' 106 | '(default: %default)', 107 | default=[]) 108 | optp.add_option('-b', '--baseline', 109 | dest='baseline', 110 | help='baseline codec to base comparisons upon. ' 111 | '(default: %default)', 112 | default=None) 113 | optp.add_option('-t', '--tsv', 114 | action="store_true", 115 | dest="tsv", 116 | help="output TSV.", 117 | default=False) 118 | optp.add_option('-s', '--streamifier', 119 | dest="streamifier", 120 | help="streamifier module to use (default: %default).", 121 | default="public_suffix") 122 | optp.add_option('--prefix', 123 | action="store", 124 | dest="prefix", 125 | help="Prefix for TSV file output.", 126 | default="") 127 | return optp.parse_args() 128 | 129 | 130 | if __name__ == "__main__": 131 | import os 132 | import sys 133 | if os.name == "nt": 134 | locale.setlocale(locale.LC_ALL, 'english-us') 135 | else: 136 | locale.setlocale(locale.LC_ALL, 'en_US') 137 | CompressionTester(sys.stdout.write) 138 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/lrustorage_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from lrustorage import LruStorage 4 | from lrustorage import KV 5 | 6 | def TestBasicFunctionality(): 7 | print "TestBasicFunctionality...", 8 | max_items = 10 9 | max_byte_size = 7*2*max_items 10 | s = LruStorage(max_byte_size, max_items) 11 | key_fmt = "key_%03d" 12 | val_fmt = "val_%03d" 13 | for i in xrange(max_items): 14 | s.Store(KV(key_fmt % i, val_fmt % i)) 15 | assert len(s.ring) == max_items 16 | assert s.byte_size == max_byte_size 17 | for i in xrange(max_items): 18 | entry = s.Lookup(i) 19 | assert entry.key == key_fmt % i 20 | assert entry.val == val_fmt % i 21 | assert entry.seq_num == i 22 | print "Success!" 23 | 24 | def TestMaxItemSize(): 25 | print "TestMaxItemSize...", 26 | caught_error = 0 27 | 28 | max_items = 10 29 | max_byte_size = 10000000 30 | s = LruStorage(max_byte_size, max_items) 31 | key_fmt = "key_%03d" 32 | val_fmt = "val_%03d" 33 | try: 34 | for i in xrange(max_items+10): 35 | s.Store(KV(key_fmt % i, val_fmt % i)) 36 | except MemoryError as me: 37 | caught_error = 1 38 | if not caught_error: 39 | print "Failure: Attempted to store too many ITEMS, but no exception" 40 | else: 41 | print "Success!" 42 | 43 | def TestMaxByteSize(): 44 | print "TestMaxByteSize...", 45 | caught_error = 0 46 | 47 | max_items = 10 48 | max_byte_size = 7*2*(max_items - 1) 49 | s = LruStorage(max_byte_size, max_items) 50 | key_fmt = "key_%03d" 51 | val_fmt = "val_%03d" 52 | try: 53 | for i in xrange(max_items + 1): 54 | s.Store(KV(key_fmt % i, val_fmt % i)) 55 | except MemoryError as me: 56 | if i == max_items - 1: 57 | caught_error = 1 58 | if not caught_error: 59 | print "Failure: Attempted to store too many BYTES, but no exception" 60 | else: 61 | print "Success!" 62 | 63 | def TestFindKeyValEntries(): 64 | print "TestFindKeyValEntries...", 65 | caught_error = 0 66 | 67 | max_items = 10 68 | max_byte_size = 7*2*max_items 69 | s = LruStorage(max_byte_size, max_items) 70 | key_fmt = "key_%03d" 71 | val_fmt = "val_%03d" 72 | for i in xrange(max_items): 73 | s.Store(KV(key_fmt % i, val_fmt % i)) 74 | 75 | (ke, ve) = s.FindKeyValEntries("key_009", "") 76 | assert ke.key == "key_009" 77 | assert ve is None 78 | (ke, ve) = s.FindKeyValEntries("key_001", "val_001") 79 | assert ke.key == "key_001" 80 | assert ve.key == "key_001" 81 | assert ve.val == "val_001" 82 | print "Success!" 83 | 84 | def TestPopOne(): 85 | print "TestPopOne...", 86 | caught_error = 0 87 | 88 | max_items = 10 89 | max_byte_size = 7*2*max_items 90 | s = LruStorage(max_byte_size, max_items) 91 | key_fmt = "key_%03d" 92 | val_fmt = "val_%03d" 93 | for i in xrange(max_items): 94 | s.Store(KV(key_fmt % i, val_fmt % i)) 95 | 96 | assert s.Lookup(0).key == key_fmt % 0 97 | 98 | for i in xrange(0, max_items): 99 | entry = s.Lookup(i) # this should work, of course. 100 | s.PopOne() 101 | try: 102 | s.Lookup(i) 103 | except IndexError as ie: 104 | caught_error = 1 105 | if not caught_error: 106 | print s.ring 107 | print s.Lookup(i) 108 | print "Failure: PopOne() didn't pop the first element" 109 | return 110 | 111 | assert s.byte_size == 0 112 | assert len(s.ring) == 0 113 | 114 | caught_error = 0 115 | try: 116 | s.PopOne() 117 | except: 118 | caught_error = 1 119 | pass 120 | if not caught_error: 121 | print "Did PopOne() with empty LruStorage, and got no error!?" 122 | return 123 | 124 | print "Success!" 125 | 126 | def TestReserve(): 127 | print "TestReserve...", 128 | max_items = 10 129 | max_byte_size = 1000 130 | s = LruStorage(max_byte_size, max_items) 131 | key_fmt = "key_%06d" 132 | val_fmt = "val_%06d" 133 | for i in xrange(max_items + 10): 134 | if i < max_items: 135 | kv = KV(key_fmt % i, val_fmt % i) 136 | s.Store(kv) 137 | else: 138 | try: 139 | kv = KV(key_fmt % i, val_fmt % i) 140 | s.Store(kv) 141 | print "This shouldn't have worked. Error." 142 | return 143 | except MemoryError as me: 144 | s.Reserve(kv.ByteSize(), 1) 145 | kv = KV(key_fmt % i, val_fmt % i) 146 | s.Store(kv) 147 | 148 | s = LruStorage(20, max_items) 149 | s.Store(KV("12345", "67890")) 150 | s.Store(KV("12345", "67890")) 151 | try: 152 | s.Store(KV("12345", "678901")) 153 | print "This shouldn't have worked. Error." 154 | return 155 | except MemoryError as me: 156 | s.Reserve(11,1) 157 | s.Store(KV("12345", "678901")) 158 | assert len(s.ring) == 1 159 | print "Success!" 160 | 161 | 162 | def TestRollOver(): 163 | print "TestRollOver...", 164 | max_items = 64 165 | max_seq_num = 64 166 | max_byte_size = (6+4)*2*max_items 167 | s = LruStorage(max_byte_size, max_items, max_seq_num) 168 | key_fmt = "key_%06d" 169 | val_fmt = "val_%06d" 170 | for i in xrange(max_items + max_items/2): 171 | kv = KV(key_fmt % i, val_fmt % i) 172 | s.Reserve(kv.ByteSize(), 1) 173 | s.Store(kv) 174 | for i in xrange(max_items/2, max_items + max_items/2): 175 | key_str = key_fmt % i 176 | s.Lookup(i % max_seq_num).key == key_str 177 | print "Success!" 178 | 179 | def main(): 180 | TestBasicFunctionality() 181 | TestMaxItemSize() 182 | TestMaxByteSize() 183 | TestFindKeyValEntries() 184 | TestPopOne() 185 | TestReserve() 186 | TestRollOver() 187 | 188 | main() 189 | 190 | -------------------------------------------------------------------------------- /compressor/headerdiff/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012-2013, Canon Inc. 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted only for the purpose of developing standards 6 | # within the HTTPbis WG and for testing and promoting such standards within the 7 | # IETF Standards Process. The following conditions are required to be met: 8 | # - Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # - Neither the name of Canon Inc. nor the names of its contributors may be 14 | # used to endorse or promote products derived from this software without 15 | # specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY CANON INC. AND ITS CONTRIBUTORS "AS IS" AND ANY 18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | # DISCLAIMED. IN NO EVENT SHALL CANON INC. AND ITS CONTRIBUTORS BE LIABLE FOR 21 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | import collections 29 | import zlib 30 | 31 | from headerDiffCodec import HeaderDiffCodec, IndexedHeader 32 | from headerDiffCodec import DELTA_FULL, DELTA_BOUND, DELTA_MAX 33 | 34 | from .. import BaseProcessor, spdy_dictionary 35 | 36 | ##################################################### 37 | ## Class for representing a Header: (name, value) ## 38 | ##################################################### 39 | class HeaderTuple(object): 40 | def __init__(self, name, value): 41 | self.name = name 42 | self.value = value 43 | 44 | @classmethod 45 | def from_dict(cls, d): 46 | """Convert a dict of headers to a list of HeaderTuple.""" 47 | return [HeaderTuple(k, v) for k, v in d.items()] 48 | 49 | @classmethod 50 | def split_from_dict(cls, d): 51 | """Convert a dict of headers to a list of HeaderTuple, splitting 52 | the cookies.""" 53 | lst = [] 54 | for k, v in d.items(): 55 | if k == "cookie": 56 | lst.extend(HeaderTuple(k, vs.strip()) for vs in v.split(";")) 57 | else: 58 | lst.extend(HeaderTuple(k, vs.strip()) for vs in v.split("\0")) 59 | return lst 60 | 61 | def __str__(self): 62 | return self.name + ":" + self.value 63 | 64 | def __repr__(self): 65 | return self.name + ":" + self.value 66 | 67 | BUFFER_SIZE = "buffer" 68 | DEFLATE_SIZE = "deflate" 69 | DELTA_USAGE = "delta" 70 | DELTA_TYPE = "delta_type" 71 | HUFFMAN = "huffman" 72 | 73 | def parse_bool(value): 74 | if value is None: 75 | return True 76 | if value.lower() == "false": 77 | return False 78 | else: 79 | return True 80 | 81 | def parse_delta(value): 82 | if value is None: 83 | return DELTA_FULL, "" 84 | 85 | value = value.strip() 86 | try: 87 | vint = int(value) 88 | return DELTA_MAX, vint 89 | except ValueError: 90 | pass 91 | 92 | if value: 93 | return DELTA_BOUND, value.strip("\"'") 94 | else: 95 | return DELTA_FULL, "" 96 | 97 | param_functions = { 98 | BUFFER_SIZE: int, 99 | DEFLATE_SIZE: int, 100 | DELTA_USAGE: parse_bool, 101 | DELTA_TYPE: parse_delta, 102 | HUFFMAN: parse_bool, 103 | } 104 | 105 | ##################################################### 106 | ## Interface for the HeaderDiff codec ## 107 | ##################################################### 108 | class Processor(BaseProcessor): 109 | def __init__(self, options, is_request, params): 110 | BaseProcessor.__init__(self, options, is_request, params) 111 | 112 | param_dict = { 113 | BUFFER_SIZE: 32768, 114 | DEFLATE_SIZE: None, 115 | DELTA_USAGE: True, 116 | DELTA_TYPE: (DELTA_FULL, ""), 117 | HUFFMAN: False, 118 | } 119 | for param in params: 120 | if "=" in param: 121 | name, value = param.split("=", 1) 122 | else: 123 | name = param 124 | value = None 125 | if name in param_functions: 126 | param_dict[name] = param_functions[name](value) 127 | else: 128 | param_dict[name] = value 129 | 130 | self.codec = HeaderDiffCodec( 131 | param_dict[BUFFER_SIZE], 132 | windowSize=param_dict[DEFLATE_SIZE], 133 | dict=spdy_dictionary.spdy_dict, 134 | delta_usage=param_dict[DELTA_USAGE], 135 | delta_type=param_dict[DELTA_TYPE], 136 | huffman=param_dict[HUFFMAN], 137 | isRequest=is_request, 138 | ) 139 | 140 | def compress(self, in_headers, host): 141 | hdrs = dict(in_headers) 142 | hdrs = HeaderTuple.split_from_dict(hdrs) 143 | 144 | frame = self.codec.encodeHeaders(hdrs, self.is_request) 145 | return frame 146 | 147 | def decompress(self, compressed): 148 | headers = self.codec.decodeHeaders(compressed, self.is_request) 149 | hdrs = {} 150 | for k, v in headers: 151 | if k in hdrs: 152 | if k == "cookie": 153 | hdrs[k] += ";" + v 154 | else: 155 | hdrs[k] += "\0" + v 156 | else: 157 | hdrs[k] = v 158 | 159 | return hdrs 160 | -------------------------------------------------------------------------------- /compressor/delta2/huffman_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 4 | # Use of this source code is governed by a BSD-style license that can be 5 | # found in the LICENSE file. 6 | 7 | from huffman import Huffman 8 | from bit_bucket import BitBucket 9 | from common_utils import FormatAsBits 10 | from common_utils import ListToStr 11 | 12 | request_freq_table = [ 13 | ( 0, 0),( 1, 0),( 2, 0), 14 | ( 3, 0),( 4, 0),( 5, 0), 15 | ( 6, 0),( 7, 0),( 8, 0), 16 | ( 9, 0),( 10, 0),( 11, 0), 17 | ( 12, 0),( 13, 0),( 14, 0), 18 | ( 15, 0),( 16, 0),( 17, 0), 19 | ( 18, 0),( 19, 0),( 20, 0), 20 | ( 21, 0),( 22, 0),( 23, 0), 21 | ( 24, 0),( 25, 0),( 26, 0), 22 | ( 27, 0),( 28, 0),( 29, 0), 23 | ( 30, 0),( 31, 0),( ' ', 1015), 24 | ( '!', 1070),( '"', 248),( '#', 134), 25 | ( '$', 133),( '%', 51699),( '&', 35390), 26 | ( "'", 103),( '(', 897),( ')', 958), 27 | ( '*', 1090),( '+', 1047),( ',', 4819), 28 | ( '-', 65428),( '.', 107763),( '/', 198044), 29 | ( '0', 96411),( '1', 97568),( '2', 100401), 30 | ( '3', 65167),( '4', 44510),( '5', 48391), 31 | ( '6', 43457),( '7', 47569),( '8', 43049), 32 | ( '9', 48704),( ':', 50383),( ';', 6713), 33 | ( '<', 16),( '=', 55594),( '>', 50), 34 | ( '?', 10299),( '@', 557),( 'A', 20358), 35 | ( 'B', 11689),( 'C', 14173),( 'D', 15341), 36 | ( 'E', 8779),( 'F', 22416),( 'G', 5665), 37 | ( 'H', 6092),( 'I', 7661),( 'J', 4912), 38 | ( 'K', 3239),( 'L', 6283),( 'M', 7952), 39 | ( 'N', 5858),( 'O', 6065),( 'P', 6940), 40 | ( 'Q', 4768),( 'R', 7305),( 'S', 8037), 41 | ( 'T', 10439),( 'U', 5850),( 'V', 5839), 42 | ( 'W', 6682),( 'X', 6246),( 'Y', 5340), 43 | ( 'Z', 3844),( '[', 286),('\\', 0), 44 | ( ']', 295),( '^', 144),( '_', 48992), 45 | ( '`', 9),( 'a', 155039),( 'b', 47481), 46 | ( 'c', 101510),( 'd', 71484),( 'e', 266868), 47 | ( 'f', 80484),( 'g', 60734),( 'h', 87393), 48 | ( 'i', 128602),( 'j', 23163),( 'k', 19215), 49 | ( 'l', 88869),( 'm', 85724),( 'n', 99945), 50 | ( 'o', 121614),( 'p', 127713),( 'q', 10028), 51 | ( 'r', 170852),( 's', 125075),( 't', 192825), 52 | ( 'u', 52443),( 'v', 21283),( 'w', 72409), 53 | ( 'x', 16587),( 'y', 19104),( 'z', 9528), 54 | ( '{', 30),( '|', 986),( '}', 30), 55 | ( '~', 805),( 127, 0),( 128, 0), 56 | ( 129, 0),( 130, 0),( 131, 0), 57 | ( 132, 0),( 133, 0),( 134, 0), 58 | ( 135, 0),( 136, 0),( 137, 0), 59 | ( 138, 0),( 139, 0),( 140, 0), 60 | ( 141, 0),( 142, 0),( 143, 0), 61 | ( 144, 0),( 145, 0),( 146, 0), 62 | ( 147, 0),( 148, 0),( 149, 0), 63 | ( 150, 0),( 151, 0),( 152, 0), 64 | ( 153, 0),( 154, 0),( 155, 0), 65 | ( 156, 0),( 157, 0),( 158, 0), 66 | ( 159, 0),( 160, 0),( 161, 0), 67 | ( 162, 0),( 163, 0),( 164, 0), 68 | ( 165, 0),( 166, 0),( 167, 0), 69 | ( 168, 0),( 169, 0),( 170, 0), 70 | ( 171, 0),( 172, 0),( 173, 0), 71 | ( 174, 0),( 175, 0),( 176, 0), 72 | ( 177, 0),( 178, 0),( 179, 0), 73 | ( 180, 0),( 181, 0),( 182, 0), 74 | ( 183, 0),( 184, 0),( 185, 0), 75 | ( 186, 0),( 187, 0),( 188, 0), 76 | ( 189, 0),( 190, 0),( 191, 0), 77 | ( 192, 0),( 193, 0),( 194, 0), 78 | ( 195, 0),( 196, 0),( 197, 0), 79 | ( 198, 0),( 199, 0),( 200, 0), 80 | ( 201, 0),( 202, 0),( 203, 0), 81 | ( 204, 0),( 205, 0),( 206, 0), 82 | ( 207, 0),( 208, 0),( 209, 0), 83 | ( 210, 0),( 211, 0),( 212, 0), 84 | ( 213, 0),( 214, 0),( 215, 0), 85 | ( 216, 0),( 217, 0),( 218, 0), 86 | ( 219, 0),( 220, 0),( 221, 0), 87 | ( 222, 0),( 223, 0),( 224, 0), 88 | ( 225, 0),( 226, 0),( 227, 0), 89 | ( 228, 0),( 229, 0),( 230, 0), 90 | ( 231, 0),( 232, 0),( 233, 0), 91 | ( 234, 0),( 235, 0),( 236, 0), 92 | ( 237, 0),( 238, 0),( 239, 0), 93 | ( 240, 0),( 241, 0),( 242, 0), 94 | ( 243, 0),( 244, 0),( 245, 0), 95 | ( 246, 0),( 247, 0),( 248, 0), 96 | ( 249, 0),( 250, 0),( 251, 0), 97 | ( 252, 0),( 253, 0),( 254, 0), 98 | ( 255, 0),( 256, 98008), 99 | ]# 100 | 101 | 102 | test_data = [ 103 | "abbcccddddeeeee", 104 | "foobarbaz", 105 | "0-2rklnsvkl;-23kDFSi01k0=", 106 | "-9083480-12hjkadsgf8912345kl;hjajkl; `123890", 107 | "\0\0-3;jsdf" 108 | ] 109 | 110 | def MakeReadableString(val): 111 | printable = string.digits + string.letters + string.punctuation + ' ' + "\t" 112 | out = [] 113 | for c in val: 114 | if c in printable: 115 | out.append(" %c " % c) 116 | else: 117 | out.append("0x%02x " % ord(c)) 118 | return ''.join(out) 119 | 120 | def main(): 121 | h = Huffman(request_freq_table) 122 | for s in test_data: 123 | print " encoding: ", s 124 | sp = [ord(c) for c in s] 125 | e_result = BitBucket() 126 | h.EncodeToBB(e_result, sp, True) 127 | print " e_result: ", FormatAsBits(e_result.GetAllBits()) 128 | 129 | d_result = ListToStr(h.DecodeFromBB(e_result, True, -1)) 130 | if d_result != s: 131 | print "difference found: d_result(%s) vs orig(%s)" % (repr(d_result), 132 | repr(s)) 133 | else: 134 | print "It worked: ", s 135 | print 136 | 137 | main() 138 | 139 | -------------------------------------------------------------------------------- /compressor/delta2/lrustorage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from collections import deque 3 | 4 | class RefCntString: 5 | def __init__(self, x): 6 | self.decr = 1 7 | if type(x) is str: 8 | self.data = [x, 1] 9 | else: 10 | self.data = x.data 11 | self.data[1] += 1 12 | 13 | def refcnt(self): 14 | return self.data[1] 15 | 16 | def done(self): 17 | self.data[1] -= self.decr 18 | self.decr = 0 19 | 20 | def __str__(self): 21 | return self.data[0] 22 | 23 | def __repr__(self): 24 | return '"%s":%d' % (self.data[0], self.data[1]) 25 | 26 | def __len__(self): 27 | if self.data[1] > 1: 28 | return 0 29 | return len(self.data[0]) 30 | 31 | def ComputeKVHash(key, val): 32 | khash = hash(key) 33 | kvhash = khash + hash(val) 34 | return (khash, kvhash) 35 | 36 | class KV: 37 | def __init__(self, key=None, val=None, seq_num=None): 38 | self.key_ = RefCntString(key) 39 | self.val_ = RefCntString(val) 40 | (self.khash, self.kvhash) = ComputeKVHash(str(key), str(val)) 41 | self.seq_num = seq_num 42 | 43 | def done(self): 44 | self.key_.done() 45 | self.val_.done() 46 | 47 | def key(self): 48 | return str(self.key_) 49 | 50 | def val(self): 51 | return str(self.val_) 52 | 53 | def ByteSize(self): 54 | return len(self.val_) + len(self.key_) 55 | 56 | def __repr__(self): 57 | return "{(%r, %s) %r %r %r}" % \ 58 | (repr(self.key_), repr(self.val_), self.seq_num, self.khash, self.kvhash) 59 | 60 | class LruStorage: 61 | def __init__(self, max_bytes=None, max_items=None, max_seq_num=None, 62 | offset=None): 63 | self.ring = deque() 64 | self.byte_size = 0 65 | self.max_items = max_items 66 | self.max_bytes = max_bytes 67 | self.max_seq_num = max_seq_num 68 | 69 | self.pop_cb = None 70 | self.offset = offset 71 | if offset is None: 72 | self.offset = 0 73 | self.seq_num = self.offset 74 | 75 | def __repr__(self): 76 | return "{%s %r}" % (self.seq_num, self.ring) 77 | 78 | def Reserve(self, entry, item_count): 79 | if self.max_items == 0 or self.max_bytes == 0: 80 | return 0 81 | if self.max_items is not None: 82 | while len(self.ring) + item_count > self.max_items: 83 | if not self.PopOne(): 84 | return 0 # can't pop one, nothing more to do. 85 | if self.max_bytes is not None: 86 | while self.byte_size + entry.ByteSize() > self.max_bytes: 87 | if not self.PopOne(): 88 | return 0 # can't pop one, nothing more to do. 89 | return 1 90 | 91 | def PopOne(self): 92 | if not self.ring: 93 | return 0 94 | item = self.ring.popleft() 95 | self.byte_size -= item.ByteSize() 96 | item.done() 97 | #print "POPPING: ", item.seq_num 98 | if self.pop_cb is not None: 99 | self.pop_cb(item) 100 | return 1 101 | 102 | def Store(self, item): 103 | item_byte_size = item.ByteSize() 104 | if self.max_bytes is not None and self.byte_size + item_byte_size > self.max_bytes: 105 | error_string =' '.join([ 106 | "Max bytes exceeded", 107 | "max bytes: %d" % self.max_bytes, 108 | "self.byte_size: %d" % self.byte_size, 109 | "item.ByteSize: %d" % item.ByteSize()]) 110 | raise MemoryError(error_string) 111 | if self.max_items and (self.max_items < (len(self.ring) + 1)): 112 | raise MemoryError("max_items exceeded") 113 | item.seq_num = self.seq_num 114 | self.seq_num += 1 115 | if self.max_seq_num is not None and self.seq_num >= self.max_seq_num: 116 | self.seq_num = self.offset 117 | self.byte_size += item_byte_size 118 | self.ring.append(item) 119 | 120 | def SeqNumToIdxFromLeft(self, seq_num): 121 | #print "\tlen(ring): ", len(self.ring), 122 | first_seq_num = self.ring[0].seq_num 123 | if seq_num < self.offset: 124 | raise IndexError("Negative indices unsupported: ", seq_num) 125 | if first_seq_num > seq_num: 126 | #print " fsn: %d, sn: %d" % (first_seq_num, seq_num) 127 | if self.max_seq_num: 128 | #print " A ", 129 | lru_idx = (self.max_seq_num - first_seq_num) + (seq_num - self.offset) 130 | else: 131 | raise IndexError("MaxSeqNum not defined and " 132 | "seq_num(%d) < first_seq_num(%d)" % 133 | (seq_num, first_seq_num)) 134 | else: 135 | #print " B ", 136 | lru_idx = seq_num - first_seq_num 137 | #print "idx_from_left: ", lru_idx 138 | return lru_idx 139 | 140 | def Lookup(self, seq_num): 141 | lru_idx = self.SeqNumToIdxFromLeft(seq_num) 142 | #print "Looking up: ", lru_idx 143 | try: 144 | entry = self.ring[lru_idx] 145 | except IndexError: 146 | print self.ring 147 | print "len(ring): ", len(self.ring) 148 | print "lru_idx: ", lru_idx 149 | print "seq_num requested:", seq_num 150 | print "first_seq_num:", self.ring[0].seq_num 151 | raise 152 | if entry.seq_num != seq_num: 153 | print "Something strange has happened" 154 | print "entry: ", entry 155 | print self.ring 156 | print "lru_idx: ", lru_idx 157 | print "seq_num requested:", seq_num 158 | print "first_seq_num:", self.ring[0].seq_num 159 | raise StandardError() 160 | return entry 161 | 162 | def FindKeyValEntries(self, key, val): 163 | # Looks for key/vals starting from the last entry 164 | (khash, kvhash) = ComputeKVHash(key, val) 165 | ke = None 166 | for i in xrange(len(self.ring) - 1, -1, -1): 167 | item = self.ring[i] 168 | if khash == item.khash and item.key() == key: 169 | ke = item 170 | if kvhash == item.kvhash and item.val() == val: 171 | return (item.seq_num, item.seq_num) 172 | for j in xrange(i - 1, -1, -1): 173 | item = self.ring[j] 174 | if kvhash == item.kvhash and item.key() == key and item.val() == val: 175 | return (item.seq_num, item.seq_num) 176 | return (ke.seq_num, None) 177 | return (None, None) 178 | 179 | def __len__(self): 180 | return len(self.ring) 181 | 182 | def __repr__(self): 183 | return repr(self.ring) 184 | -------------------------------------------------------------------------------- /compressor/bohe/header_freq_tables.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | request_freq_table = [ 6 | ('\x00', 0),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 7 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 8 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 9 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 10 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 11 | ('\x1e', 0),('\x1f', 0),(' ', 61),('!', 9),('"', 0),('#', 0),('$', 2), 12 | ('%', 1433),('&', 1662),("'", 2),('(', 34),(')', 34),('*', 25),('+', 4), 13 | (',', 967),('-', 1379),('.', 2886),('/', 4511),('0', 3198),('1', 3331), 14 | ('2', 3597),('3', 2691),('4', 2251),('5', 1880),('6', 2155),('7', 1639), 15 | ('8', 1916),('9', 1728),(':', 171),(';', 214),('<', 0),('=', 2120),('>', 0), 16 | ('?', 251),('@', 0),('A', 931),('B', 481),('C', 566),('D', 696),('E', 362), 17 | ('F', 545),('G', 513),('H', 328),('I', 524),('J', 210),('K', 260),('L', 373), 18 | ('M', 287),('N', 311),('O', 288),('P', 381),('Q', 291),('R', 328),('S', 543), 19 | ('T', 434),('U', 386),('V', 372),('W', 295),('X', 216),('Y', 205),('Z', 199), 20 | ('[', 2),('\\', 0),(']', 2),('^', 0),('_', 1702),('`', 0),('a', 4237), 21 | ('b', 1601),('c', 3203),('d', 2392),('e', 4941),('f', 932),('g', 2297), 22 | ('h', 1352),('i', 3233),('j', 913),('k', 630),('l', 2082),('m', 2429), 23 | ('n', 3116),('o', 3286),('p', 2510),('q', 314),('r', 2646),('s', 3825), 24 | ('t', 3486),('u', 1298),('v', 839),('w', 1172),('x', 760),('y', 705), 25 | ('z', 352),('{', 12),('|', 12),('}', 12),('~', 4),('\x7f', 0),('\x80', 0), 26 | ('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0),('\x86', 0), 27 | ('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0),('\x8c', 0), 28 | ('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0),('\x92', 0), 29 | ('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0),('\x98', 0), 30 | ('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0),('\x9e', 0), 31 | ('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0),('\xa4', 0), 32 | ('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0),('\xaa', 0), 33 | ('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0),('\xb0', 0), 34 | ('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0),('\xb6', 0), 35 | ('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0),('\xbc', 0), 36 | ('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0),('\xc2', 0), 37 | ('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0),('\xc8', 0), 38 | ('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0),('\xce', 0), 39 | ('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0),('\xd4', 0), 40 | ('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0),('\xda', 0), 41 | ('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0),('\xe0', 0), 42 | ('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0),('\xe6', 0), 43 | ('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0),('\xec', 0), 44 | ('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0),('\xf2', 0), 45 | ('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0),('\xf8', 0), 46 | ('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0),('\xfe', 0), 47 | ('\xff', 0),(256, 1304), 48 | ] 49 | 50 | response_freq_table = [ 51 | ('\x00', 87),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 52 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 53 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 54 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 55 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 56 | ('\x1e', 0),('\x1f', 0),(' ', 8277),('!', 0),('"', 948),('#', 9),('$', 0), 57 | ('%', 191),('&', 203),("'", 2),('(', 191),(')', 191),('*', 6),('+', 300), 58 | (',', 2522),('-', 2374),('.', 1325),('/', 3266),('0', 7630),('1', 7965), 59 | ('2', 7636),('3', 4415),('4', 4337),('5', 3594),('6', 3253),('7', 3223), 60 | ('8', 3920),('9', 3306),(':', 3545),(';', 421),('<', 0),('=', 1626),('>', 0), 61 | ('?', 24),('@', 0),('A', 1644),('B', 820),('C', 1187),('D', 1116),('E', 954), 62 | ('F', 1260),('G', 1955),('H', 493),('I', 674),('J', 875),('K', 560), 63 | ('L', 544),('M', 2305),('N', 844),('O', 781),('P', 640),('Q', 537),('R', 555), 64 | ('S', 965),('T', 2550),('U', 691),('V', 504),('W', 776),('X', 459),('Y', 507), 65 | ('Z', 476),('[', 11),('\\', 0),(']', 11),('^', 0),('_', 436),('`', 0), 66 | ('a', 5171),('b', 3355),('c', 4201),('d', 3265),('e', 5511),('f', 2185), 67 | ('g', 2455),('h', 1166),('i', 3075),('j', 768),('k', 768),('l', 1980), 68 | ('m', 1582),('n', 3613),('o', 3418),('p', 1864),('q', 532),('r', 2488), 69 | ('s', 2906),('t', 3324),('u', 2433),('v', 1097),('w', 927),('x', 1169), 70 | ('y', 749),('z', 506),('{', 9),('|', 13),('}', 9),('~', 0),('\x7f', 0), 71 | ('\x80', 0),('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0), 72 | ('\x86', 0),('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0), 73 | ('\x8c', 0),('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0), 74 | ('\x92', 0),('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0), 75 | ('\x98', 0),('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0), 76 | ('\x9e', 0),('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0), 77 | ('\xa4', 0),('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0), 78 | ('\xaa', 0),('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0), 79 | ('\xb0', 0),('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0), 80 | ('\xb6', 0),('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0), 81 | ('\xbc', 0),('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0), 82 | ('\xc2', 0),('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0), 83 | ('\xc8', 0),('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0), 84 | ('\xce', 0),('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0), 85 | ('\xd4', 0),('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0), 86 | ('\xda', 0),('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0), 87 | ('\xe0', 0),('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0), 88 | ('\xe6', 0),('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0), 89 | ('\xec', 0),('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0), 90 | ('\xf2', 0),('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0), 91 | ('\xf8', 0),('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0), 92 | ('\xfe', 0),('\xff', 0),(256, 5072), 93 | ] 94 | 95 | -------------------------------------------------------------------------------- /compressor/delta/header_freq_tables.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | request_freq_table = [ 6 | ('\x00', 0),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 7 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 8 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 9 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 10 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 11 | ('\x1e', 0),('\x1f', 0),(' ', 61),('!', 9),('"', 0),('#', 0),('$', 2), 12 | ('%', 1433),('&', 1662),("'", 2),('(', 34),(')', 34),('*', 25),('+', 4), 13 | (',', 967),('-', 1379),('.', 2886),('/', 4511),('0', 3198),('1', 3331), 14 | ('2', 3597),('3', 2691),('4', 2251),('5', 1880),('6', 2155),('7', 1639), 15 | ('8', 1916),('9', 1728),(':', 171),(';', 214),('<', 0),('=', 2120),('>', 0), 16 | ('?', 251),('@', 0),('A', 931),('B', 481),('C', 566),('D', 696),('E', 362), 17 | ('F', 545),('G', 513),('H', 328),('I', 524),('J', 210),('K', 260),('L', 373), 18 | ('M', 287),('N', 311),('O', 288),('P', 381),('Q', 291),('R', 328),('S', 543), 19 | ('T', 434),('U', 386),('V', 372),('W', 295),('X', 216),('Y', 205),('Z', 199), 20 | ('[', 2),('\\', 0),(']', 2),('^', 0),('_', 1702),('`', 0),('a', 4237), 21 | ('b', 1601),('c', 3203),('d', 2392),('e', 4941),('f', 932),('g', 2297), 22 | ('h', 1352),('i', 3233),('j', 913),('k', 630),('l', 2082),('m', 2429), 23 | ('n', 3116),('o', 3286),('p', 2510),('q', 314),('r', 2646),('s', 3825), 24 | ('t', 3486),('u', 1298),('v', 839),('w', 1172),('x', 760),('y', 705), 25 | ('z', 352),('{', 12),('|', 12),('}', 12),('~', 4),('\x7f', 0),('\x80', 0), 26 | ('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0),('\x86', 0), 27 | ('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0),('\x8c', 0), 28 | ('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0),('\x92', 0), 29 | ('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0),('\x98', 0), 30 | ('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0),('\x9e', 0), 31 | ('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0),('\xa4', 0), 32 | ('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0),('\xaa', 0), 33 | ('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0),('\xb0', 0), 34 | ('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0),('\xb6', 0), 35 | ('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0),('\xbc', 0), 36 | ('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0),('\xc2', 0), 37 | ('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0),('\xc8', 0), 38 | ('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0),('\xce', 0), 39 | ('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0),('\xd4', 0), 40 | ('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0),('\xda', 0), 41 | ('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0),('\xe0', 0), 42 | ('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0),('\xe6', 0), 43 | ('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0),('\xec', 0), 44 | ('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0),('\xf2', 0), 45 | ('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0),('\xf8', 0), 46 | ('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0),('\xfe', 0), 47 | ('\xff', 0),(256, 1304), 48 | ] 49 | 50 | response_freq_table = [ 51 | ('\x00', 87),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 52 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 53 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 54 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 55 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 56 | ('\x1e', 0),('\x1f', 0),(' ', 8277),('!', 0),('"', 948),('#', 9),('$', 0), 57 | ('%', 191),('&', 203),("'", 2),('(', 191),(')', 191),('*', 6),('+', 300), 58 | (',', 2522),('-', 2374),('.', 1325),('/', 3266),('0', 7630),('1', 7965), 59 | ('2', 7636),('3', 4415),('4', 4337),('5', 3594),('6', 3253),('7', 3223), 60 | ('8', 3920),('9', 3306),(':', 3545),(';', 421),('<', 0),('=', 1626),('>', 0), 61 | ('?', 24),('@', 0),('A', 1644),('B', 820),('C', 1187),('D', 1116),('E', 954), 62 | ('F', 1260),('G', 1955),('H', 493),('I', 674),('J', 875),('K', 560), 63 | ('L', 544),('M', 2305),('N', 844),('O', 781),('P', 640),('Q', 537),('R', 555), 64 | ('S', 965),('T', 2550),('U', 691),('V', 504),('W', 776),('X', 459),('Y', 507), 65 | ('Z', 476),('[', 11),('\\', 0),(']', 11),('^', 0),('_', 436),('`', 0), 66 | ('a', 5171),('b', 3355),('c', 4201),('d', 3265),('e', 5511),('f', 2185), 67 | ('g', 2455),('h', 1166),('i', 3075),('j', 768),('k', 768),('l', 1980), 68 | ('m', 1582),('n', 3613),('o', 3418),('p', 1864),('q', 532),('r', 2488), 69 | ('s', 2906),('t', 3324),('u', 2433),('v', 1097),('w', 927),('x', 1169), 70 | ('y', 749),('z', 506),('{', 9),('|', 13),('}', 9),('~', 0),('\x7f', 0), 71 | ('\x80', 0),('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0), 72 | ('\x86', 0),('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0), 73 | ('\x8c', 0),('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0), 74 | ('\x92', 0),('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0), 75 | ('\x98', 0),('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0), 76 | ('\x9e', 0),('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0), 77 | ('\xa4', 0),('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0), 78 | ('\xaa', 0),('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0), 79 | ('\xb0', 0),('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0), 80 | ('\xb6', 0),('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0), 81 | ('\xbc', 0),('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0), 82 | ('\xc2', 0),('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0), 83 | ('\xc8', 0),('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0), 84 | ('\xce', 0),('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0), 85 | ('\xd4', 0),('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0), 86 | ('\xda', 0),('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0), 87 | ('\xe0', 0),('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0), 88 | ('\xe6', 0),('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0), 89 | ('\xec', 0),('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0), 90 | ('\xf2', 0),('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0), 91 | ('\xf8', 0),('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0), 92 | ('\xfe', 0),('\xff', 0),(256, 5072), 93 | ] 94 | 95 | -------------------------------------------------------------------------------- /compressor/delta2_bohe/header_freq_tables.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | request_freq_table = [ 6 | ('\x00', 0),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 7 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 8 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 9 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 10 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 11 | ('\x1e', 0),('\x1f', 0),(' ', 61),('!', 9),('"', 0),('#', 0),('$', 2), 12 | ('%', 1433),('&', 1662),("'", 2),('(', 34),(')', 34),('*', 25),('+', 4), 13 | (',', 967),('-', 1379),('.', 2886),('/', 4511),('0', 3198),('1', 3331), 14 | ('2', 3597),('3', 2691),('4', 2251),('5', 1880),('6', 2155),('7', 1639), 15 | ('8', 1916),('9', 1728),(':', 171),(';', 214),('<', 0),('=', 2120),('>', 0), 16 | ('?', 251),('@', 0),('A', 931),('B', 481),('C', 566),('D', 696),('E', 362), 17 | ('F', 545),('G', 513),('H', 328),('I', 524),('J', 210),('K', 260),('L', 373), 18 | ('M', 287),('N', 311),('O', 288),('P', 381),('Q', 291),('R', 328),('S', 543), 19 | ('T', 434),('U', 386),('V', 372),('W', 295),('X', 216),('Y', 205),('Z', 199), 20 | ('[', 2),('\\', 0),(']', 2),('^', 0),('_', 1702),('`', 0),('a', 4237), 21 | ('b', 1601),('c', 3203),('d', 2392),('e', 4941),('f', 932),('g', 2297), 22 | ('h', 1352),('i', 3233),('j', 913),('k', 630),('l', 2082),('m', 2429), 23 | ('n', 3116),('o', 3286),('p', 2510),('q', 314),('r', 2646),('s', 3825), 24 | ('t', 3486),('u', 1298),('v', 839),('w', 1172),('x', 760),('y', 705), 25 | ('z', 352),('{', 12),('|', 12),('}', 12),('~', 4),('\x7f', 0),('\x80', 0), 26 | ('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0),('\x86', 0), 27 | ('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0),('\x8c', 0), 28 | ('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0),('\x92', 0), 29 | ('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0),('\x98', 0), 30 | ('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0),('\x9e', 0), 31 | ('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0),('\xa4', 0), 32 | ('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0),('\xaa', 0), 33 | ('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0),('\xb0', 0), 34 | ('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0),('\xb6', 0), 35 | ('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0),('\xbc', 0), 36 | ('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0),('\xc2', 0), 37 | ('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0),('\xc8', 0), 38 | ('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0),('\xce', 0), 39 | ('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0),('\xd4', 0), 40 | ('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0),('\xda', 0), 41 | ('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0),('\xe0', 0), 42 | ('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0),('\xe6', 0), 43 | ('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0),('\xec', 0), 44 | ('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0),('\xf2', 0), 45 | ('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0),('\xf8', 0), 46 | ('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0),('\xfe', 0), 47 | ('\xff', 0),(256, 1304), 48 | ] 49 | 50 | response_freq_table = [ 51 | ('\x00', 87),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 52 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 53 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 54 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 55 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 56 | ('\x1e', 0),('\x1f', 0),(' ', 8277),('!', 0),('"', 948),('#', 9),('$', 0), 57 | ('%', 191),('&', 203),("'", 2),('(', 191),(')', 191),('*', 6),('+', 300), 58 | (',', 2522),('-', 2374),('.', 1325),('/', 3266),('0', 7630),('1', 7965), 59 | ('2', 7636),('3', 4415),('4', 4337),('5', 3594),('6', 3253),('7', 3223), 60 | ('8', 3920),('9', 3306),(':', 3545),(';', 421),('<', 0),('=', 1626),('>', 0), 61 | ('?', 24),('@', 0),('A', 1644),('B', 820),('C', 1187),('D', 1116),('E', 954), 62 | ('F', 1260),('G', 1955),('H', 493),('I', 674),('J', 875),('K', 560), 63 | ('L', 544),('M', 2305),('N', 844),('O', 781),('P', 640),('Q', 537),('R', 555), 64 | ('S', 965),('T', 2550),('U', 691),('V', 504),('W', 776),('X', 459),('Y', 507), 65 | ('Z', 476),('[', 11),('\\', 0),(']', 11),('^', 0),('_', 436),('`', 0), 66 | ('a', 5171),('b', 3355),('c', 4201),('d', 3265),('e', 5511),('f', 2185), 67 | ('g', 2455),('h', 1166),('i', 3075),('j', 768),('k', 768),('l', 1980), 68 | ('m', 1582),('n', 3613),('o', 3418),('p', 1864),('q', 532),('r', 2488), 69 | ('s', 2906),('t', 3324),('u', 2433),('v', 1097),('w', 927),('x', 1169), 70 | ('y', 749),('z', 506),('{', 9),('|', 13),('}', 9),('~', 0),('\x7f', 0), 71 | ('\x80', 0),('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0), 72 | ('\x86', 0),('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0), 73 | ('\x8c', 0),('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0), 74 | ('\x92', 0),('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0), 75 | ('\x98', 0),('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0), 76 | ('\x9e', 0),('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0), 77 | ('\xa4', 0),('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0), 78 | ('\xaa', 0),('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0), 79 | ('\xb0', 0),('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0), 80 | ('\xb6', 0),('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0), 81 | ('\xbc', 0),('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0), 82 | ('\xc2', 0),('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0), 83 | ('\xc8', 0),('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0), 84 | ('\xce', 0),('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0), 85 | ('\xd4', 0),('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0), 86 | ('\xda', 0),('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0), 87 | ('\xe0', 0),('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0), 88 | ('\xe6', 0),('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0), 89 | ('\xec', 0),('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0), 90 | ('\xf2', 0),('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0), 91 | ('\xf8', 0),('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0), 92 | ('\xfe', 0),('\xff', 0),(256, 5072), 93 | ] 94 | 95 | -------------------------------------------------------------------------------- /compressor/delta_bohe/header_freq_tables.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style license that can be 3 | # found in the LICENSE file. 4 | 5 | request_freq_table = [ 6 | ('\x00', 0),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 7 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 8 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 9 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 10 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 11 | ('\x1e', 0),('\x1f', 0),(' ', 61),('!', 9),('"', 0),('#', 0),('$', 2), 12 | ('%', 1433),('&', 1662),("'", 2),('(', 34),(')', 34),('*', 25),('+', 4), 13 | (',', 967),('-', 1379),('.', 2886),('/', 4511),('0', 3198),('1', 3331), 14 | ('2', 3597),('3', 2691),('4', 2251),('5', 1880),('6', 2155),('7', 1639), 15 | ('8', 1916),('9', 1728),(':', 171),(';', 214),('<', 0),('=', 2120),('>', 0), 16 | ('?', 251),('@', 0),('A', 931),('B', 481),('C', 566),('D', 696),('E', 362), 17 | ('F', 545),('G', 513),('H', 328),('I', 524),('J', 210),('K', 260),('L', 373), 18 | ('M', 287),('N', 311),('O', 288),('P', 381),('Q', 291),('R', 328),('S', 543), 19 | ('T', 434),('U', 386),('V', 372),('W', 295),('X', 216),('Y', 205),('Z', 199), 20 | ('[', 2),('\\', 0),(']', 2),('^', 0),('_', 1702),('`', 0),('a', 4237), 21 | ('b', 1601),('c', 3203),('d', 2392),('e', 4941),('f', 932),('g', 2297), 22 | ('h', 1352),('i', 3233),('j', 913),('k', 630),('l', 2082),('m', 2429), 23 | ('n', 3116),('o', 3286),('p', 2510),('q', 314),('r', 2646),('s', 3825), 24 | ('t', 3486),('u', 1298),('v', 839),('w', 1172),('x', 760),('y', 705), 25 | ('z', 352),('{', 12),('|', 12),('}', 12),('~', 4),('\x7f', 0),('\x80', 0), 26 | ('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0),('\x86', 0), 27 | ('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0),('\x8c', 0), 28 | ('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0),('\x92', 0), 29 | ('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0),('\x98', 0), 30 | ('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0),('\x9e', 0), 31 | ('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0),('\xa4', 0), 32 | ('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0),('\xaa', 0), 33 | ('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0),('\xb0', 0), 34 | ('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0),('\xb6', 0), 35 | ('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0),('\xbc', 0), 36 | ('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0),('\xc2', 0), 37 | ('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0),('\xc8', 0), 38 | ('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0),('\xce', 0), 39 | ('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0),('\xd4', 0), 40 | ('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0),('\xda', 0), 41 | ('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0),('\xe0', 0), 42 | ('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0),('\xe6', 0), 43 | ('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0),('\xec', 0), 44 | ('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0),('\xf2', 0), 45 | ('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0),('\xf8', 0), 46 | ('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0),('\xfe', 0), 47 | ('\xff', 0),(256, 1304), 48 | ] 49 | 50 | response_freq_table = [ 51 | ('\x00', 87),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0), 52 | ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0), 53 | ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0), 54 | ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0), 55 | ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0), 56 | ('\x1e', 0),('\x1f', 0),(' ', 8277),('!', 0),('"', 948),('#', 9),('$', 0), 57 | ('%', 191),('&', 203),("'", 2),('(', 191),(')', 191),('*', 6),('+', 300), 58 | (',', 2522),('-', 2374),('.', 1325),('/', 3266),('0', 7630),('1', 7965), 59 | ('2', 7636),('3', 4415),('4', 4337),('5', 3594),('6', 3253),('7', 3223), 60 | ('8', 3920),('9', 3306),(':', 3545),(';', 421),('<', 0),('=', 1626),('>', 0), 61 | ('?', 24),('@', 0),('A', 1644),('B', 820),('C', 1187),('D', 1116),('E', 954), 62 | ('F', 1260),('G', 1955),('H', 493),('I', 674),('J', 875),('K', 560), 63 | ('L', 544),('M', 2305),('N', 844),('O', 781),('P', 640),('Q', 537),('R', 555), 64 | ('S', 965),('T', 2550),('U', 691),('V', 504),('W', 776),('X', 459),('Y', 507), 65 | ('Z', 476),('[', 11),('\\', 0),(']', 11),('^', 0),('_', 436),('`', 0), 66 | ('a', 5171),('b', 3355),('c', 4201),('d', 3265),('e', 5511),('f', 2185), 67 | ('g', 2455),('h', 1166),('i', 3075),('j', 768),('k', 768),('l', 1980), 68 | ('m', 1582),('n', 3613),('o', 3418),('p', 1864),('q', 532),('r', 2488), 69 | ('s', 2906),('t', 3324),('u', 2433),('v', 1097),('w', 927),('x', 1169), 70 | ('y', 749),('z', 506),('{', 9),('|', 13),('}', 9),('~', 0),('\x7f', 0), 71 | ('\x80', 0),('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0), 72 | ('\x86', 0),('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0), 73 | ('\x8c', 0),('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0), 74 | ('\x92', 0),('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0), 75 | ('\x98', 0),('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0), 76 | ('\x9e', 0),('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0), 77 | ('\xa4', 0),('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0), 78 | ('\xaa', 0),('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0), 79 | ('\xb0', 0),('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0), 80 | ('\xb6', 0),('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0), 81 | ('\xbc', 0),('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0), 82 | ('\xc2', 0),('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0), 83 | ('\xc8', 0),('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0), 84 | ('\xce', 0),('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0), 85 | ('\xd4', 0),('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0), 86 | ('\xda', 0),('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0), 87 | ('\xe0', 0),('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0), 88 | ('\xe6', 0),('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0), 89 | ('\xec', 0),('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0), 90 | ('\xf2', 0),('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0), 91 | ('\xf8', 0),('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0), 92 | ('\xfe', 0),('\xff', 0),(256, 5072), 93 | ] 94 | 95 | -------------------------------------------------------------------------------- /compressor/delta/bit_bucket_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #include "bit_bucket.h" 5 | #include "utils.h" 6 | 7 | struct Testcase { 8 | size_t num_bytes; 9 | vector v; 10 | int num_bits; 11 | string expected_state; 12 | 13 | friend ostream& operator<<(ostream& os, const Testcase& tc) { 14 | os << "Testcase: \"" << tc.expected_state << "\"," 15 | << "\"" << FormatAsBits(tc.v, tc.num_bits) << "\"" 16 | << " (" << tc.num_bytes << ")"; 17 | return os; 18 | } 19 | }; 20 | 21 | void TestStoreBits(BitBucket* bb, const Testcase& test) { 22 | cout << test << " ..." << flush; 23 | bb->StoreBits(test.v, test.num_bits); 24 | if (test.expected_state != bb->AsString()) { 25 | cerr << "\n"; 26 | cerr << " --- FAILED ---\n"; 27 | cerr << " Expected: \"" << test.expected_state << "\"\n"; 28 | cerr << " Got: \"" << bb << "\"\n"; 29 | cerr << " DEBUG: " << bb->DebugStr() << "\n"; 30 | abort(); 31 | } 32 | } 33 | 34 | void TestGetBits(BitBucket*bb, const Testcase& test) { 35 | bb->Seek(0); 36 | vector storage; 37 | int num_bits = bb->NumBits(); 38 | bb->GetBits(&storage, num_bits); 39 | stringstream formatted_bits; 40 | formatted_bits << FormatAsBits(storage, num_bits); 41 | formatted_bits << " [" << num_bits << "," << num_bits % 8 << "]"; 42 | if (formatted_bits.str() != test.expected_state) { 43 | cerr << "\n"; 44 | cerr << " --- FAILED ---\n"; 45 | cerr << " Expected: \"" << test.expected_state << "\"\n"; 46 | cerr << " Got: \"" << formatted_bits.str() << "\"\n"; 47 | cerr << " DEBUG: " << bb->DebugStr() << "\n"; 48 | abort(); 49 | } 50 | // Now, do it again, starting from bit offsets other than 0 51 | for (int i = 1; i < min(8, num_bits); ++i) { 52 | bb->Seek(0); 53 | formatted_bits.str(""); 54 | for (int j = 0; j < i; ++j) { 55 | if (j % 8 == 0) { 56 | formatted_bits << "|"; 57 | } 58 | formatted_bits << bb->GetBit(); 59 | } 60 | storage.clear(); 61 | bb->GetBits(&storage, num_bits - i); 62 | string storage_str = FormatAsBits(storage, num_bits - i, i); 63 | formatted_bits << FormatAsBits(storage, num_bits - i, i); 64 | formatted_bits << " [" << num_bits << "," << num_bits % 8 << "]"; 65 | if (formatted_bits.str() != test.expected_state) { 66 | cerr << "\n"; 67 | cerr << " --- FAILED ---\n"; 68 | cerr << " Offset: " << i << "\n"; 69 | cerr << " Expected: \"" << test.expected_state << "\"\n"; 70 | cerr << " Got: \"" << formatted_bits.str() << "\"\n"; 71 | cerr << " DEBUG: " << bb->DebugStr() << "\n"; 72 | //abort(); 73 | } 74 | } 75 | } 76 | 77 | void TestBytesRequired(BitBucket*bb, const Testcase& test) { 78 | if (bb->BytesRequired() != test.num_bytes) { 79 | cerr << "\n"; 80 | cerr << " --- FAILED ---\n"; 81 | cerr << " Expected: \"" << test.num_bytes << "\"\n"; 82 | cerr << " Got: \"" << bb->BytesRequired() << "\"\n"; 83 | } 84 | } 85 | 86 | void RunTests(const vector& tests) { 87 | BitBucket bb; 88 | cout << "\n\nNew test\n"; 89 | for (unsigned int i = 0; i < tests.size(); ++i) { 90 | const Testcase& test = tests[i]; 91 | TestStoreBits(&bb, test); 92 | TestGetBits(&bb, test); 93 | TestBytesRequired(&bb, test); 94 | cout << " Passed\n" << flush; 95 | } 96 | } 97 | 98 | int main(int argc, char** argv) { 99 | { 100 | vector tests = { 101 | {2, {'\xff', '\x00' }, 8+6, "|11111111|000000 [14,6]"}, 102 | {4, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|0000 [28,4]"}, 103 | {6, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|00001111" 104 | "|11110000|00 [42,2]"}, 105 | {7, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|00001111" 106 | "|11110000|00111111|11000000 [56,0]"}, 107 | {9, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|00001111" 108 | "|11110000|00111111|11000000|11111111" 109 | "|000000 [70,6]"}, 110 | }; 111 | RunTests(tests); 112 | } 113 | { 114 | vector tests = { 115 | {2, {'\xff', '\x00' }, 8+6, "|11111111|000000 [14,6]"}, 116 | {3, {'\xff' }, 3 , "|11111111|00000011|1 [17,1]"}, 117 | {3, {'\x00' }, 3 , "|11111111|00000011|1000 [20,4]"}, 118 | {5, {'\xff', '\x00' }, 8+6, "|11111111|00000011|10001111|11110000" 119 | "|00 [34,2]"}, 120 | {5, {'\xff' }, 4 , "|11111111|00000011|10001111|11110000" 121 | "|001111 [38,6]"}, 122 | {6, {'\x00' }, 4 , "|11111111|00000011|10001111|11110000" 123 | "|00111100|00 [42,2]"}, 124 | }; 125 | RunTests(tests); 126 | } 127 | { 128 | vector tests = { 129 | {1, {'\xF0'}, 5, "|11110 [5,5]"}, 130 | {2, {'\x0F'}, 5, "|11110000|01 [10,2]"}, 131 | {2, {'\xF0'}, 5, "|11110000|0111110 [15,7]"}, 132 | {3, {'\x0F'}, 5, "|11110000|01111100|0001 [20,4]"}, 133 | {4, {'\xF0'}, 5, "|11110000|01111100|00011111|0 [25,1]"}, 134 | {4, {'\x0F'}, 5, "|11110000|01111100|00011111|000001 [30,6]"}, 135 | {5, {'\xF0'}, 5, "|11110000|01111100|00011111|00000111|110 [35,3]"}, 136 | {5, {'\x0F'}, 5, "|11110000|01111100|00011111|00000111|11000001 [40,0]"}, 137 | {6, {'\xF0'}, 5, "|11110000|01111100|00011111|00000111|11000001|11110 [45,5]"}, 138 | }; 139 | RunTests(tests); 140 | } 141 | { 142 | vector tests = { 143 | {1, {'\xF0'}, 1, "|1 [1,1]"}, 144 | {1, {'\x0F'}, 1, "|10 [2,2]"}, 145 | {1, {'\xF0'}, 1, "|101 [3,3]"}, 146 | {1, {'\x0F'}, 1, "|1010 [4,4]"}, 147 | {1, {'\xF0'}, 1, "|10101 [5,5]"}, 148 | {1, {'\x0F'}, 1, "|101010 [6,6]"}, 149 | {1, {'\xF0'}, 1, "|1010101 [7,7]"}, 150 | {1, {'\x0F'}, 1, "|10101010 [8,0]"}, 151 | {2, {'\xF0'}, 1, "|10101010|1 [9,1]"}, 152 | {3, {'\x00','\xFF'}, 8+7, "|10101010|10000000|01111111 [24,0]"}, 153 | }; 154 | RunTests(tests); 155 | } 156 | { 157 | vector tests = { 158 | {1, {'\xF0'}, 8, "|11110000 [8,0]"}, 159 | {2, {'\xF0'}, 8, "|11110000|11110000 [16,0]"}, 160 | {3, {'\xF0'}, 1, "|11110000|11110000|1 [17,1]"}, 161 | {4, {'\x0F'}, 8, "|11110000|11110000|10000111|1 [25,1]"}, 162 | }; 163 | RunTests(tests); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /compressor/delta_bohe/bit_bucket_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | #include "bit_bucket.h" 5 | #include "utils.h" 6 | 7 | struct Testcase { 8 | size_t num_bytes; 9 | vector v; 10 | int num_bits; 11 | string expected_state; 12 | 13 | friend ostream& operator<<(ostream& os, const Testcase& tc) { 14 | os << "Testcase: \"" << tc.expected_state << "\"," 15 | << "\"" << FormatAsBits(tc.v, tc.num_bits) << "\"" 16 | << " (" << tc.num_bytes << ")"; 17 | return os; 18 | } 19 | }; 20 | 21 | void TestStoreBits(BitBucket* bb, const Testcase& test) { 22 | cout << test << " ..." << flush; 23 | bb->StoreBits(test.v, test.num_bits); 24 | if (test.expected_state != bb->AsString()) { 25 | cerr << "\n"; 26 | cerr << " --- FAILED ---\n"; 27 | cerr << " Expected: \"" << test.expected_state << "\"\n"; 28 | cerr << " Got: \"" << bb << "\"\n"; 29 | cerr << " DEBUG: " << bb->DebugStr() << "\n"; 30 | abort(); 31 | } 32 | } 33 | 34 | void TestGetBits(BitBucket*bb, const Testcase& test) { 35 | bb->Seek(0); 36 | vector storage; 37 | int num_bits = bb->NumBits(); 38 | bb->GetBits(&storage, num_bits); 39 | stringstream formatted_bits; 40 | formatted_bits << FormatAsBits(storage, num_bits); 41 | formatted_bits << " [" << num_bits << "," << num_bits % 8 << "]"; 42 | if (formatted_bits.str() != test.expected_state) { 43 | cerr << "\n"; 44 | cerr << " --- FAILED ---\n"; 45 | cerr << " Expected: \"" << test.expected_state << "\"\n"; 46 | cerr << " Got: \"" << formatted_bits.str() << "\"\n"; 47 | cerr << " DEBUG: " << bb->DebugStr() << "\n"; 48 | abort(); 49 | } 50 | // Now, do it again, starting from bit offsets other than 0 51 | for (int i = 1; i < min(8, num_bits); ++i) { 52 | bb->Seek(0); 53 | formatted_bits.str(""); 54 | for (int j = 0; j < i; ++j) { 55 | if (j % 8 == 0) { 56 | formatted_bits << "|"; 57 | } 58 | formatted_bits << bb->GetBit(); 59 | } 60 | storage.clear(); 61 | bb->GetBits(&storage, num_bits - i); 62 | string storage_str = FormatAsBits(storage, num_bits - i, i); 63 | formatted_bits << FormatAsBits(storage, num_bits - i, i); 64 | formatted_bits << " [" << num_bits << "," << num_bits % 8 << "]"; 65 | if (formatted_bits.str() != test.expected_state) { 66 | cerr << "\n"; 67 | cerr << " --- FAILED ---\n"; 68 | cerr << " Offset: " << i << "\n"; 69 | cerr << " Expected: \"" << test.expected_state << "\"\n"; 70 | cerr << " Got: \"" << formatted_bits.str() << "\"\n"; 71 | cerr << " DEBUG: " << bb->DebugStr() << "\n"; 72 | //abort(); 73 | } 74 | } 75 | } 76 | 77 | void TestBytesRequired(BitBucket*bb, const Testcase& test) { 78 | if (bb->BytesRequired() != test.num_bytes) { 79 | cerr << "\n"; 80 | cerr << " --- FAILED ---\n"; 81 | cerr << " Expected: \"" << test.num_bytes << "\"\n"; 82 | cerr << " Got: \"" << bb->BytesRequired() << "\"\n"; 83 | } 84 | } 85 | 86 | void RunTests(const vector& tests) { 87 | BitBucket bb; 88 | cout << "\n\nNew test\n"; 89 | for (unsigned int i = 0; i < tests.size(); ++i) { 90 | const Testcase& test = tests[i]; 91 | TestStoreBits(&bb, test); 92 | TestGetBits(&bb, test); 93 | TestBytesRequired(&bb, test); 94 | cout << " Passed\n" << flush; 95 | } 96 | } 97 | 98 | int main(int argc, char** argv) { 99 | { 100 | vector tests = { 101 | {2, {'\xff', '\x00' }, 8+6, "|11111111|000000 [14,6]"}, 102 | {4, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|0000 [28,4]"}, 103 | {6, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|00001111" 104 | "|11110000|00 [42,2]"}, 105 | {7, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|00001111" 106 | "|11110000|00111111|11000000 [56,0]"}, 107 | {9, {'\xff', '\x00' }, 8+6, "|11111111|00000011|11111100|00001111" 108 | "|11110000|00111111|11000000|11111111" 109 | "|000000 [70,6]"}, 110 | }; 111 | RunTests(tests); 112 | } 113 | { 114 | vector tests = { 115 | {2, {'\xff', '\x00' }, 8+6, "|11111111|000000 [14,6]"}, 116 | {3, {'\xff' }, 3 , "|11111111|00000011|1 [17,1]"}, 117 | {3, {'\x00' }, 3 , "|11111111|00000011|1000 [20,4]"}, 118 | {5, {'\xff', '\x00' }, 8+6, "|11111111|00000011|10001111|11110000" 119 | "|00 [34,2]"}, 120 | {5, {'\xff' }, 4 , "|11111111|00000011|10001111|11110000" 121 | "|001111 [38,6]"}, 122 | {6, {'\x00' }, 4 , "|11111111|00000011|10001111|11110000" 123 | "|00111100|00 [42,2]"}, 124 | }; 125 | RunTests(tests); 126 | } 127 | { 128 | vector tests = { 129 | {1, {'\xF0'}, 5, "|11110 [5,5]"}, 130 | {2, {'\x0F'}, 5, "|11110000|01 [10,2]"}, 131 | {2, {'\xF0'}, 5, "|11110000|0111110 [15,7]"}, 132 | {3, {'\x0F'}, 5, "|11110000|01111100|0001 [20,4]"}, 133 | {4, {'\xF0'}, 5, "|11110000|01111100|00011111|0 [25,1]"}, 134 | {4, {'\x0F'}, 5, "|11110000|01111100|00011111|000001 [30,6]"}, 135 | {5, {'\xF0'}, 5, "|11110000|01111100|00011111|00000111|110 [35,3]"}, 136 | {5, {'\x0F'}, 5, "|11110000|01111100|00011111|00000111|11000001 [40,0]"}, 137 | {6, {'\xF0'}, 5, "|11110000|01111100|00011111|00000111|11000001|11110 [45,5]"}, 138 | }; 139 | RunTests(tests); 140 | } 141 | { 142 | vector tests = { 143 | {1, {'\xF0'}, 1, "|1 [1,1]"}, 144 | {1, {'\x0F'}, 1, "|10 [2,2]"}, 145 | {1, {'\xF0'}, 1, "|101 [3,3]"}, 146 | {1, {'\x0F'}, 1, "|1010 [4,4]"}, 147 | {1, {'\xF0'}, 1, "|10101 [5,5]"}, 148 | {1, {'\x0F'}, 1, "|101010 [6,6]"}, 149 | {1, {'\xF0'}, 1, "|1010101 [7,7]"}, 150 | {1, {'\x0F'}, 1, "|10101010 [8,0]"}, 151 | {2, {'\xF0'}, 1, "|10101010|1 [9,1]"}, 152 | {3, {'\x00','\xFF'}, 8+7, "|10101010|10000000|01111111 [24,0]"}, 153 | }; 154 | RunTests(tests); 155 | } 156 | { 157 | vector tests = { 158 | {1, {'\xF0'}, 8, "|11110000 [8,0]"}, 159 | {2, {'\xF0'}, 8, "|11110000|11110000 [16,0]"}, 160 | {3, {'\xF0'}, 1, "|11110000|11110000|1 [17,1]"}, 161 | {4, {'\x0F'}, 8, "|11110000|11110000|10000111|1 [25,1]"}, 162 | }; 163 | RunTests(tests); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /compressor/delta2/lrustorage_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from lrustorage import LruStorage 4 | from lrustorage import KV 5 | from lrustorage import RefCntString 6 | import unittest 7 | 8 | class TestLruStorage(unittest.TestCase): 9 | def test_BasicFunctionality(self): 10 | max_items = 10 11 | max_byte_size = 7*2*max_items 12 | s = LruStorage(max_byte_size, max_items) 13 | key_fmt = "key_%03d" 14 | val_fmt = "val_%03d" 15 | for i in xrange(max_items): 16 | s.Store(KV(key_fmt % i, val_fmt % i)) 17 | self.assertEqual(len(s.ring), max_items) 18 | self.assertEqual(s.byte_size, max_byte_size) 19 | for i in xrange(max_items): 20 | entry = s.Lookup(i) 21 | self.assertEqual(entry.key(), key_fmt % i) 22 | self.assertEqual(entry.val(), val_fmt % i) 23 | self.assertEqual(entry.seq_num, i) 24 | 25 | def test_MaxItemSize(self): 26 | caught_error = 0 27 | 28 | max_items = 10 29 | max_byte_size = 10000000 30 | s = LruStorage(max_byte_size, max_items) 31 | key_fmt = "key_%03d" 32 | val_fmt = "val_%03d" 33 | try: 34 | for i in xrange(max_items+10): 35 | s.Store(KV(key_fmt % i, val_fmt % i)) 36 | except MemoryError as me: 37 | caught_error = 1 38 | if not caught_error: 39 | self.fail("Failure: Attempted to store too many ITEMS, but no exception") 40 | return 41 | 42 | def test_MaxByteSize(self): 43 | caught_error = 0 44 | 45 | max_items = 10 46 | max_byte_size = 7*2*(max_items - 1) 47 | s = LruStorage(max_byte_size, max_items) 48 | key_fmt = "key_%03d" 49 | val_fmt = "val_%03d" 50 | try: 51 | for i in xrange(max_items + 1): 52 | s.Store(KV(key_fmt % i, val_fmt % i)) 53 | except MemoryError as me: 54 | if i == max_items - 1: 55 | caught_error = 1 56 | if not caught_error: 57 | self.fail("Failure: Attempted to store too many BYTES, but no exception") 58 | return 59 | 60 | def test_FindKeyValEntries(self): 61 | caught_error = 0 62 | 63 | max_items = 10 64 | max_byte_size = 7*2*max_items 65 | s = LruStorage(max_byte_size, max_items) 66 | key_fmt = "key_%03d" 67 | val_fmt = "val_%03d" 68 | for i in xrange(max_items): 69 | s.Store(KV(key_fmt % i, val_fmt % i)) 70 | 71 | (ke, ve) = s.FindKeyValEntries("key_009", "") 72 | ke = s.Lookup(ke) 73 | self.assertEqual(ke.key(), "key_009") 74 | self.assertIsNone(ve) 75 | (ke, ve) = s.FindKeyValEntries("key_001", "val_001") 76 | ke = s.Lookup(ke) 77 | ve = s.Lookup(ve) 78 | self.assertEqual(ke.key(), "key_001") 79 | self.assertEqual(ve.key(), "key_001") 80 | self.assertEqual(ve.val(), "val_001") 81 | 82 | def test_PopOne(self): 83 | caught_error = 0 84 | 85 | max_items = 10 86 | max_byte_size = 7*2*max_items 87 | s = LruStorage(max_byte_size, max_items) 88 | key_fmt = "key_%03d" 89 | val_fmt = "val_%03d" 90 | for i in xrange(max_items): 91 | s.Store(KV(key_fmt % i, val_fmt % i)) 92 | 93 | self.assertEqual(s.Lookup(0).key(), key_fmt % 0) 94 | 95 | for i in xrange(0, max_items): 96 | entry = s.Lookup(i) 97 | s.PopOne() 98 | try: 99 | s.Lookup(i) 100 | except IndexError as ie: 101 | caught_error = 1 102 | if not caught_error: 103 | print s.ring 104 | print s.Lookup(i) 105 | self.fail("Failure: PopOne() didn't pop the first element") 106 | return 107 | 108 | self.assertEqual(s.byte_size, 0) 109 | self.assertEqual(len(s.ring), 0) 110 | 111 | caught_error = 0 112 | retval = s.PopOne() 113 | if retval: 114 | self.fail("Failure: PopOne() didn't return false when no elements to pop!") 115 | 116 | def test_Reserve(self): 117 | max_items = 10 118 | max_byte_size = 1000 119 | s = LruStorage(max_byte_size, max_items) 120 | key_fmt = "key_%06d" 121 | val_fmt = "val_%06d" 122 | for i in xrange(max_items + 10): 123 | if i < max_items: 124 | kv = KV(key_fmt % i, val_fmt % i) 125 | s.Store(kv) 126 | else: 127 | try: 128 | kv = KV(key_fmt % i, val_fmt % i) 129 | s.Store(kv) 130 | self.fail("This shouldn't have worked. Error.") 131 | return 132 | except MemoryError as me: 133 | s.Reserve(kv, 1) 134 | kv = KV(key_fmt % i, val_fmt % i) 135 | s.Store(kv) 136 | s = LruStorage(20, max_items) 137 | s.Store(KV("12345", "67890")) 138 | s.Store(KV("12345", "67890")) 139 | try: 140 | s.Store(KV("12345", "678901")) 141 | self.fail("This shouldn't have worked. Error.") 142 | return 143 | except MemoryError as me: 144 | s.Reserve(KV("12345","123456"),1) 145 | s.Store(KV("12345", "678901")) 146 | self.assertEqual(len(s.ring), 1) 147 | 148 | def test_RollOver(self): 149 | max_items = 64 150 | max_seq_num = 64 151 | max_byte_size = (6+4)*2*max_items 152 | s = LruStorage(max_byte_size, max_items, max_seq_num) 153 | key_fmt = "key_%06d" 154 | val_fmt = "val_%06d" 155 | for i in xrange(max_items + max_items/2): 156 | kv = KV(key_fmt % i, val_fmt % i) 157 | s.Reserve(kv, 1) 158 | s.Store(kv) 159 | for i in xrange(max_items/2, max_items + max_items/2): 160 | key_str = key_fmt % i 161 | item = s.Lookup(i % max_seq_num) 162 | self.assertEqual(item.key(), key_str) 163 | 164 | def test_RollOverWithOffset(self): 165 | max_items = 64 166 | max_seq_num = 128 167 | offset = 60 168 | max_byte_size = (6+4)*2*max_items 169 | s = LruStorage(max_byte_size, max_items, max_seq_num, offset) 170 | key_fmt = "key_%06d" 171 | val_fmt = "val_%06d" 172 | idx = offset 173 | for i in xrange(offset, max_items*3 + offset): 174 | if idx >= max_seq_num: 175 | idx = offset 176 | key_str = key_fmt % idx 177 | kv = KV(key_fmt % idx, val_fmt % idx) 178 | s.Reserve(kv, 1) 179 | s.Store(kv) 180 | item = s.Lookup(idx) 181 | self.assertEqual(item.seq_num, idx) 182 | self.assertEqual(item.key(), key_str) 183 | idx += 1 184 | 185 | def test_RefCntString(self): 186 | orig = "foobarbaz" 187 | ref1 = RefCntString(orig) 188 | self.assertEqual(len(ref1), len(orig)) 189 | self.assertEqual(ref1.refcnt(), 1) 190 | 191 | ref2 = RefCntString(ref1) 192 | self.assertEqual(ref1.refcnt(), 2) 193 | self.assertEqual(len(ref1), 0) 194 | self.assertEqual(ref2.refcnt(), 2) 195 | self.assertEqual(len(ref2), 0) 196 | 197 | ref3 = RefCntString(ref2) 198 | self.assertEqual(ref1.refcnt(), 3) 199 | self.assertEqual(len(ref1), 0) 200 | self.assertEqual(ref2.refcnt(), 3) 201 | self.assertEqual(len(ref2), 0) 202 | self.assertEqual(ref3.refcnt(), 3) 203 | self.assertEqual(len(ref2), 0) 204 | 205 | ref1.done() 206 | self.assertEqual(ref2.refcnt(), 2) 207 | self.assertEqual(len(ref2), 0) 208 | self.assertEqual(ref3.refcnt(), 2) 209 | self.assertEqual(len(ref2), 0) 210 | 211 | ref2.done() 212 | self.assertEqual(ref3.refcnt(), 1) 213 | self.assertEqual(len(ref3), len(orig)) 214 | 215 | unittest.main() 216 | 217 | --------------------------------------------------------------------------------