├── .gitignore ├── Makefile ├── README ├── TODO ├── block_compressor.c ├── block_compressor_benchmark.txt ├── cl_tester.c ├── csnappy.h ├── csnappy_compat.h ├── csnappy_compress.c ├── csnappy_decompress.c ├── csnappy_internal.h ├── csnappy_internal_userspace.h ├── debugfs_input.txt ├── kernel_3_2_10.patch ├── python ├── OutputBuffer.py ├── pysnappy_compress.py └── pysnappy_decompress.py ├── snappy_tester.patch ├── testdata ├── baddata3.snappy ├── unaligned_uint64_test.bin.gz ├── unaligned_uint64_test.snappy.gz ├── urls.10K └── urls.10K.snappy ├── unaligned_arm.s ├── unaligned_test.c ├── userspace_benchmark.txt ├── zram_benchmark.txt └── zramtest2.sh /.gitignore: -------------------------------------------------------------------------------- 1 | cl_tester 2 | *.o 3 | *.so 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | OPT_FLAGS = -g -O2 -DNDEBUG -fomit-frame-pointer 3 | DBG_FLAGS = -ggdb -O0 -DDEBUG 4 | CFLAGS := -std=gnu89 -Wall -pedantic -DHAVE_BUILTIN_CTZ 5 | ifeq (${DEBUG},yes) 6 | CFLAGS += $(DBG_FLAGS) 7 | else 8 | CFLAGS += $(OPT_FLAGS) 9 | endif 10 | LDFLAGS = -Wl,-O1 -Wl,--no-undefined 11 | PREFIX := /usr 12 | LIBDIR := $(PREFIX)/lib 13 | 14 | all: test 15 | 16 | test: check_unaligned_uint64 cl_test check_leaks 17 | 18 | cl_tester: cl_tester.c csnappy.h libcsnappy.so 19 | $(CC) $(CFLAGS) $(LDFLAGS) -D_GNU_SOURCE -o $@ $< libcsnappy.so 20 | 21 | cl_test: cl_tester 22 | rm -f afifo 23 | mkfifo afifo 24 | LD_LIBRARY_PATH=. ./cl_tester -c afifo & 26 | diff -u testdata/urls.10K afifo && echo "compress-decompress restores original" 27 | rm -f afifo 28 | LD_LIBRARY_PATH=. ./cl_tester -S d && echo "decompression is safe" 29 | LD_LIBRARY_PATH=. ./cl_tester -S c 30 | 31 | check_leaks: cl_tester 32 | LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -d -c /dev/null 33 | LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -d -c /dev/null || true 34 | LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -c /dev/null 35 | LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -S d 36 | 37 | check_unaligned_uint64: 38 | gzip -dc testdata/unaligned_uint64_test.snappy 39 | gzip -dc testdata/unaligned_uint64_test.bin 40 | EXTRA_TEST_CFLAGS="-O0" make check_unaligned_uint64_extra_cflags 41 | EXTRA_TEST_CFLAGS="-O1" make check_unaligned_uint64_extra_cflags 42 | EXTRA_TEST_CFLAGS="-O2" make check_unaligned_uint64_extra_cflags 43 | EXTRA_TEST_CFLAGS="-O3" make check_unaligned_uint64_extra_cflags 44 | EXTRA_TEST_CFLAGS="-O2 -march=native" make check_unaligned_uint64_extra_cflags 45 | EXTRA_TEST_CFLAGS="-O3 -march=native" make check_unaligned_uint64_extra_cflags 46 | rm -f testdata/unaligned_uint64_test.snappy testdata/unaligned_uint64_test.bin 47 | 48 | check_unaligned_uint64_extra_cflags: 49 | make clean 50 | make cl_tester 51 | rm -f tmp 52 | LD_LIBRARY_PATH=. ./cl_tester -d testdata/unaligned_uint64_test.snappy tmp 53 | diff testdata/unaligned_uint64_test.bin tmp >/dev/null && echo "${EXTRA_TEST_CFLAGS} ok" 54 | make clean 55 | rm -f tmp 56 | 57 | libcsnappy.so: csnappy_compress.c csnappy_decompress.c csnappy_internal.h csnappy_internal_userspace.h 58 | $(CC) $(CFLAGS) $(EXTRA_TEST_CFLAGS) -fPIC -DPIC -c -o csnappy_compress.o csnappy_compress.c 59 | $(CC) $(CFLAGS) $(EXTRA_TEST_CFLAGS) -fPIC -DPIC -c -o csnappy_decompress.o csnappy_decompress.c 60 | $(CC) $(CFLAGS) $(EXTRA_TEST_CFLAGS) $(LDFLAGS) -shared -o $@ csnappy_compress.o csnappy_decompress.o 61 | 62 | block_compressor: block_compressor.c libcsnappy.so 63 | $(CC) -std=gnu99 -Wall -O2 -g -o $@ $< libcsnappy.so -llzo2 -lz -lrt 64 | 65 | test_block_compressor: block_compressor 66 | for testfile in \ 67 | /usr/lib64/chromium-browser/chrome \ 68 | /usr/lib64/qt4/libQtWebKit.so.4.7.2 \ 69 | /usr/lib64/llvm/libLLVM-2.9.so \ 70 | /usr/lib64/xulrunner-2.0/libxul.so \ 71 | /usr/libexec/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/cc1 \ 72 | /usr/lib64/libnvidia-glcore.so.270.41.03 \ 73 | /usr/lib64/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/libgcj.so.12.0.0 \ 74 | /usr/lib64/libwireshark.so.0.0.1 \ 75 | /usr/share/icons/oxygen/icon-theme.cache \ 76 | ; do \ 77 | echo compressing: $$testfile ; \ 78 | for method in snappy lzo zlib ; do \ 79 | LD_LIBRARY_PATH=. ./block_compressor -c $$method $$testfile itmp ;\ 80 | LD_LIBRARY_PATH=. ./block_compressor -c $$method -d itmp otmp > /dev/null ;\ 81 | diff -u $$testfile otmp ;\ 82 | echo "ratio:" \ 83 | $$(stat --printf %s itmp) \* 100 / $$(stat --printf %s $$testfile) "=" \ 84 | $$(expr $$(stat --printf %s itmp) \* 100 / $$(stat --printf %s $$testfile)) "%" ;\ 85 | rm -f itmp otmp ;\ 86 | done ; \ 87 | done ; 88 | 89 | NDK = /mnt/backup/home/backup/android-ndk-r7b 90 | SYSROOT = $(NDK)/platforms/android-5/arch-arm 91 | TOOLCHAIN = $(NDK)/toolchains/arm-linux-androideabi-4.4.3/prebuilt/linux-x86/bin 92 | unaligned_test_android: unaligned_test.c unaligned_arm.s 93 | $(TOOLCHAIN)/arm-linux-androideabi-gcc \ 94 | -ffunction-sections -funwind-tables -fstack-protector \ 95 | -D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__ \ 96 | -Wno-psabi -march=armv5te -mtune=xscale -msoft-float -mthumb \ 97 | -O2 -fomit-frame-pointer -fno-strict-aliasing -finline-limit=64 \ 98 | -DANDROID -Wa,--noexecstack -DNDEBUG -g \ 99 | -I$(SYSROOT)/usr/include -c -o unaligned_test.o unaligned_test.c 100 | $(TOOLCHAIN)/arm-linux-androideabi-gcc \ 101 | -ffunction-sections -funwind-tables -fstack-protector \ 102 | -D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__ \ 103 | -Wno-psabi -march=armv5te -mtune=xscale -msoft-float -mthumb \ 104 | -O2 -fomit-frame-pointer -fno-strict-aliasing -finline-limit=64 \ 105 | -DANDROID -Wa,--noexecstack -DNDEBUG -g \ 106 | -I$(SYSROOT)/usr/include -c -o unaligned_arm.o unaligned_arm.s 107 | $(TOOLCHAIN)/arm-linux-androideabi-g++ \ 108 | --sysroot=$(SYSROOT) unaligned_test.o unaligned_arm.o \ 109 | $(TOOLCHAIN)/../lib/gcc/arm-linux-androideabi/4.4.3/libgcc.a \ 110 | -Wl,--no-undefined -Wl,-z,noexecstack -lc -lm -o unaligned_test_android 111 | 112 | install: csnappy.h libcsnappy.so 113 | install -d "$(DESTDIR)$(PREFIX)"/include 114 | install -m 0644 csnappy.h "$(DESTDIR)$(PREFIX)"/include/ 115 | install -d "$(DESTDIR)$(LIBDIR)" 116 | install libcsnappy.so "$(DESTDIR)$(LIBDIR)" 117 | 118 | uninstall: 119 | rm -f "$(DESTDIR)$(PREFIX)"/include/csnappy.h 120 | rm -f "$(DESTDIR)$(LIBDIR)"/libcsnappy.so 121 | 122 | clean: 123 | rm -f *.o *_debug libcsnappy.so cl_tester 124 | 125 | .PHONY: .REGEN clean all 126 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Google Snappy, available at https://google.github.io/snappy/ 2 | is a compression library designed for speed rather than compression ratios. 3 | 4 | It is not a new concept by far. The Linux kernel currently uses LZO as the 5 | default fast compressor. Snappy works faster, though achieves less compression. 6 | 7 | Google's code is written in C with a lot of C++. Some of the more interesting 8 | features that rely on C++ appear to have been elided from the open source 9 | version of Snappy. 10 | 11 | Goals: 12 | To get the codebase into a shape that can be accepted into the mainline 13 | Linux kernel and used with zram (http://code.google.com/p/compcache/). 14 | Being able to compress vmlinux and initrd with Snappy is a secondary goal. 15 | Same for support in Squashfs and other parts of the kernel that currently 16 | support LZO. 17 | 18 | Results: 19 | I cut out or ported to plain ANSI C the necessary code and headers. 20 | To cause less confusion, I call this project (and files) csnappy. 21 | The API looks right, but I welcome comments. 22 | The code *has* been tested in kernel-space using a patched zram and it works. 23 | 24 | The code has been tested in a qemu emulating a PowerPC Mac and ARMv5TE running 25 | Debian Wheezy. 26 | 27 | I also use an ARMv6 Android phone for testing. 28 | 29 | Testing on other hardware or platforms is welcome. 30 | 31 | Note: The userspace tester is a hack, as is the shared library. 32 | 33 | Someone wrote a perl wrapper for csnappy: 34 | http://search.cpan.org/dist/Compress-Snappy/ 35 | https://github.com/gray/compress-snappy 36 | 37 | Patch for upstream snappy tester is available: snappy_tester.patch 38 | Patch for linux kernel is available: kernel_3_2_10.patch 39 | 40 | Benchmark in userspace: userspace_benchmark.txt 41 | Benchmark in kernel space with zram: zram_benchmark.txt 42 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | * test with debug config option 2 | * rename CamelCase symbols to linux_style (?) 3 | * test on 32bit x86 qemu 4 | * test on non-x86 hardware 5 | * consider hash functions with better performance on other arches. 6 | * consider hash table with say, 8, possible matches. 7 | * check what compression ratio, speed, memory use looks like if instead of 8 | hash table with 16K entries for 32K values and no chaining we use a hash map 9 | that stores all values (establish upper limits). 10 | -------------------------------------------------------------------------------- /block_compressor.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011, Zeev Tarantov . 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following disclaimer 13 | in the documentation and/or other materials provided with the 14 | distribution. 15 | * Neither the name of Zeev Tarantov nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "csnappy.h" 41 | #include 42 | #ifndef MAP_ANONYMOUS 43 | #define MAP_ANONYMOUS MAP_ANON 44 | #endif 45 | 46 | static int PAGE_SIZE, PAGE_SHIFT; 47 | 48 | #define handle_error(msg) \ 49 | do { perror(msg); exit(EXIT_FAILURE); } while (0) 50 | 51 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 52 | 53 | static void* noop(void) { return NULL; } 54 | static void noop_p(void *p) { } 55 | 56 | static void* lzo_compress_init(void) 57 | { 58 | char *workmem; 59 | if (!(workmem = malloc(LZO1X_1_MEM_COMPRESS))) 60 | handle_error("malloc"); 61 | return workmem; 62 | } 63 | 64 | static void lzo_compress_free(void *opaque) 65 | { 66 | free(opaque); 67 | } 68 | 69 | static void lzo_compress( 70 | const char *src, 71 | uint32_t ilen, 72 | char *dst, 73 | uint32_t *dst_len, 74 | void *opaque) 75 | { 76 | lzo_uint olen; 77 | lzo1x_1_compress((unsigned char *)src, ilen, 78 | (unsigned char *)dst, &olen, opaque); 79 | *dst_len = olen; 80 | } 81 | 82 | static int lzo_decompress( 83 | const char *src, 84 | uint32_t ilen, 85 | char *dst, 86 | uint32_t *dst_len, 87 | void *opaque) 88 | { 89 | lzo_uint olen = *dst_len; 90 | int ret; 91 | ret = lzo1x_decompress_safe( 92 | (unsigned char *)src, ilen, 93 | (unsigned char *)dst, &olen, NULL); 94 | *dst_len = olen; 95 | return ret; 96 | } 97 | 98 | 99 | #define WMSIZE_ORDER ((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1)) 100 | static void* snappy_compress_init(void) 101 | { 102 | char *workmem; 103 | if (!(workmem = malloc(1 << WMSIZE_ORDER))) 104 | handle_error("malloc"); 105 | return workmem; 106 | } 107 | 108 | static void snappy_compress_free(void *opaque) 109 | { 110 | free(opaque); 111 | } 112 | 113 | static void snappy_compress( 114 | const char *src, 115 | uint32_t ilen, 116 | char *dst, 117 | uint32_t *dst_len, 118 | void *opaque) 119 | { 120 | char *end; 121 | end = csnappy_compress_fragment(src, ilen, dst, 122 | opaque, WMSIZE_ORDER); 123 | *dst_len = end - dst; 124 | } 125 | 126 | static int snappy_decompress( 127 | const char *src, 128 | uint32_t ilen, 129 | char *dst, 130 | uint32_t *dst_len, 131 | void *opaque) 132 | { 133 | return csnappy_decompress_noheader(src, ilen, dst, dst_len); 134 | } 135 | 136 | 137 | static void* zlib_compress_init(void) 138 | { 139 | z_stream *zs; 140 | if (!(zs = malloc(sizeof(z_stream)))) 141 | handle_error("malloc"); 142 | zs->zalloc = Z_NULL; 143 | zs->zfree = Z_NULL; 144 | zs->opaque = Z_NULL; 145 | if (deflateInit(zs, Z_DEFAULT_COMPRESSION) != Z_OK) 146 | handle_error("deflateInit"); 147 | return zs; 148 | } 149 | 150 | static void zlib_compress_free(void *opaque) 151 | { 152 | if (deflateEnd(opaque) != Z_OK) 153 | handle_error("deflateEnd"); 154 | free(opaque); 155 | } 156 | 157 | static void zlib_compress( 158 | const char *src, 159 | uint32_t ilen, 160 | char *dst, 161 | uint32_t *dst_len, 162 | void *opaque) 163 | { 164 | z_stream *zs = opaque; 165 | zs->avail_in = ilen; 166 | zs->next_in = (unsigned char *)src; 167 | zs->avail_out = *dst_len; 168 | zs->next_out = (unsigned char *)dst; 169 | if (deflate(zs, Z_FINISH) != Z_STREAM_END) 170 | handle_error("deflate"); 171 | if (deflateReset(zs) != Z_OK) 172 | handle_error("deflateReset"); 173 | *dst_len = *dst_len - zs->avail_out; 174 | } 175 | 176 | static void* zlib_decompress_init(void) 177 | { 178 | z_stream *zs; 179 | if (!(zs = malloc(sizeof(z_stream)))) 180 | handle_error("malloc"); 181 | zs->zalloc = Z_NULL; 182 | zs->zfree = Z_NULL; 183 | zs->opaque = Z_NULL; 184 | zs->next_in = Z_NULL; 185 | zs->avail_in = 0; 186 | if (inflateInit(zs) != Z_OK) 187 | handle_error("inflateInit"); 188 | return zs; 189 | } 190 | 191 | static void zlib_decompress_free(void *opaque) 192 | { 193 | if (inflateEnd(opaque) != Z_OK) 194 | handle_error("inflateEnd"); 195 | free(opaque); 196 | } 197 | 198 | static int zlib_decompress( 199 | const char *src, 200 | uint32_t ilen, 201 | char *dst, 202 | uint32_t *dst_len, 203 | void *opaque) 204 | { 205 | z_stream *zs = opaque; 206 | zs->avail_in = ilen; 207 | zs->next_in = (unsigned char *)src; 208 | zs->avail_out = *dst_len; 209 | zs->next_out = (unsigned char *)dst; 210 | if (inflate(zs, Z_FINISH) != Z_STREAM_END) 211 | handle_error("inflate"); 212 | if (inflateReset(zs) != Z_OK) 213 | handle_error("inflateReset"); 214 | *dst_len = *dst_len - zs->avail_out; 215 | return 0; 216 | } 217 | 218 | 219 | enum { 220 | LZO = 0, 221 | SNAPPY = 1, 222 | ZLIB = 2, 223 | }; 224 | 225 | static const char* const COMPRESSORS[] = { "LZO", "SNAPPY", "ZLIB" }; 226 | 227 | typedef void (*compress_fn)(const char *src, uint32_t ilen, char *dst, 228 | uint32_t *dst_len, void *opaque); 229 | 230 | typedef int (*decompress_fn)(const char *src, uint32_t ilen, char *dst, 231 | uint32_t *dst_len, void *opaque); 232 | 233 | struct compressor_funcs { 234 | void* (*compress_init)(void); 235 | void (*compress_free)(void *opaque); 236 | compress_fn compress; 237 | void* (*decompress_init)(void); 238 | void (*decompress_free)(void *opaque); 239 | decompress_fn decompress; 240 | }; 241 | 242 | static const struct compressor_funcs compressors[] = { 243 | {lzo_compress_init, lzo_compress_free, lzo_compress, 244 | noop, noop_p, lzo_decompress}, 245 | {snappy_compress_init, snappy_compress_free, snappy_compress, 246 | noop, noop_p, snappy_decompress}, 247 | {zlib_compress_init, zlib_compress_free, zlib_compress, 248 | zlib_decompress_init, zlib_decompress_free, zlib_decompress}, 249 | }; 250 | 251 | #define ONE_BILLION 1000000000 252 | static void add_time_diff(struct timespec *total, 253 | struct timespec *start, 254 | struct timespec *end) 255 | { 256 | end->tv_sec -= start->tv_sec; 257 | end->tv_nsec -= start->tv_nsec; 258 | if (end->tv_nsec < 0) { 259 | end->tv_sec--; 260 | end->tv_nsec += ONE_BILLION; 261 | } 262 | total->tv_sec += end->tv_sec; 263 | total->tv_nsec += end->tv_nsec; 264 | if (total->tv_nsec > ONE_BILLION) { 265 | total->tv_sec++; 266 | total->tv_nsec -= ONE_BILLION; 267 | } 268 | } 269 | 270 | union intbytes { 271 | uint32_t i; 272 | char c[4]; 273 | }; 274 | 275 | static int do_compress(int method, FILE *ifile, FILE *ofile) 276 | { 277 | union intbytes intbuf; 278 | char *ibuf, *obuf, *opaque; 279 | compress_fn compress = compressors[method].compress; 280 | uint32_t counts[3] = { 0 }; 281 | struct timespec t1, t2, elapsed; 282 | memset(&elapsed, 0, sizeof(elapsed)); 283 | if (!(ibuf = malloc(PAGE_SIZE))) 284 | handle_error("malloc"); 285 | if (!(obuf = malloc(2 * PAGE_SIZE))) 286 | handle_error("malloc"); 287 | opaque = compressors[method].compress_init(); 288 | if (fseek(ifile, 0, SEEK_END) == -1) 289 | handle_error("fseek"); 290 | long input_length = ftell(ifile); 291 | if (fseek(ifile, 0, SEEK_SET) == -1) 292 | handle_error("fseek"); 293 | long nr_pages = DIV_ROUND_UP(input_length, PAGE_SIZE); 294 | if (nr_pages > UINT32_MAX) 295 | handle_error("inut file too big"); 296 | printf("compressor: %s\n", COMPRESSORS[method]); 297 | printf("#pages: %u\n", (unsigned)nr_pages); 298 | intbuf.i = (uint32_t)nr_pages; 299 | if (fwrite(&intbuf.c, 1, 4, ofile) < 4) 300 | handle_error("fwrite"); 301 | /* expand ofile to place of first compressed block data */ 302 | fseek(ofile, nr_pages * sizeof(uint32_t), SEEK_SET); 303 | /* write something so the file will grow. end of file now points to 304 | * start of compressed data of first block */ 305 | if (fwrite(&intbuf, 1, 4, ofile) < 4) 306 | handle_error("fwrite"); 307 | for (uint32_t i = 0; i < nr_pages; i++) { 308 | uint32_t ilen = fread(ibuf, 1, PAGE_SIZE, ifile); 309 | if (ilen < PAGE_SIZE && !feof(ifile)) 310 | handle_error("fread"); 311 | uint32_t olen = 2 * PAGE_SIZE; 312 | clock_gettime(CLOCK_MONOTONIC, &t1); 313 | compress(ibuf, ilen, obuf, &olen, opaque); 314 | clock_gettime(CLOCK_MONOTONIC, &t2); 315 | char *wbuf = obuf; 316 | if (olen >= ilen) { 317 | olen = ilen; 318 | wbuf = ibuf; 319 | counts[2]++; 320 | } else if (olen > (PAGE_SIZE / 2)) { 321 | counts[1]++; 322 | } else { 323 | counts[0]++; 324 | } 325 | if (fseek(ofile, (i + 1) * sizeof(uint32_t), SEEK_SET) == -1) 326 | handle_error("fseek"); 327 | intbuf.i = olen; 328 | if (fwrite(&intbuf.c, 1, 4, ofile) < 4) 329 | handle_error("fwrite"); 330 | if (fseek(ofile, 0, SEEK_END) == -1) 331 | handle_error("fseek"); 332 | if (fwrite(wbuf, 1, olen, ofile) < olen) 333 | handle_error("fwrite"); 334 | add_time_diff(&elapsed, &t1, &t2); 335 | } 336 | fclose(ofile); 337 | fclose(ifile); 338 | free(obuf); 339 | free(ibuf); 340 | compressors[method].compress_free(opaque); 341 | printf("> 100%%\t:%u\n> 50%%\t:%u\n<= 50%%\t:%u\n" 342 | "%d.%09ld seconds\n", 343 | counts[2],counts[1],counts[0], 344 | (int)elapsed.tv_sec, elapsed.tv_nsec); 345 | return 0; 346 | } 347 | 348 | static int do_decompress(int method, FILE *ifile, FILE *ofile) 349 | { 350 | union intbytes intbuf; 351 | char *ibuf, *obuf, *opaque; 352 | decompress_fn decompress = compressors[method].decompress; 353 | uint64_t ipos; 354 | uint32_t nr_pages; 355 | if (!(ibuf = malloc(2 * PAGE_SIZE))) 356 | handle_error("malloc"); 357 | if (!(obuf = malloc(PAGE_SIZE))) 358 | handle_error("malloc"); 359 | opaque = compressors[method].decompress_init(); 360 | if (fread(&intbuf.c, 1, 4, ifile) < 4) 361 | handle_error("fread"); 362 | nr_pages = intbuf.i; 363 | printf("nr_pages: %u\n", nr_pages); 364 | ipos = (nr_pages + 1) * sizeof(uint32_t); 365 | for (uint32_t i = 0; i < nr_pages; i++) { 366 | if (fseek(ifile, (i + 1) * sizeof(uint32_t), SEEK_SET) == -1) 367 | handle_error("fseek"); 368 | if (fread(&intbuf.c, 1, 4, ifile) < 4) 369 | handle_error("fread"); 370 | uint32_t ilen = intbuf.i; 371 | if (fseek(ifile, ipos, SEEK_SET) == -1) 372 | handle_error("fseek"); 373 | if (fread(ibuf, 1, ilen, ifile) < ilen) 374 | handle_error("fread"); 375 | ipos += ilen; 376 | uint32_t olen = PAGE_SIZE; 377 | char *wbuf = obuf; 378 | if (ilen == PAGE_SIZE) { 379 | wbuf = ibuf; 380 | } else { 381 | if (decompress(ibuf, ilen, obuf, &olen, opaque)) 382 | handle_error("decompress"); 383 | } 384 | if (fwrite(wbuf, 1, olen, ofile) < olen) 385 | handle_error("fwrite"); 386 | printf("%d -> %d\n", ilen, olen); 387 | } 388 | fclose(ofile); 389 | fclose(ifile); 390 | free(obuf); 391 | free(ibuf); 392 | compressors[method].decompress_free(opaque); 393 | return 0; 394 | } 395 | 396 | int main(int argc, char * const argv[]) 397 | { 398 | int c, compressor = -1, decompress = 0; 399 | const char *ifile_name, *ofile_name; 400 | FILE *ifile, *ofile; 401 | 402 | while((c = getopt(argc, argv, "c:d")) != -1) { 403 | switch (c) { 404 | case 'c': 405 | if (strcasecmp(optarg, COMPRESSORS[LZO]) == 0) 406 | compressor = LZO; 407 | else if (strcasecmp(optarg, COMPRESSORS[SNAPPY]) == 0) 408 | compressor = SNAPPY; 409 | else if (strcasecmp(optarg, COMPRESSORS[ZLIB]) == 0) 410 | compressor = ZLIB; 411 | else 412 | goto usage; 413 | break; 414 | case 'd': 415 | decompress = 1; 416 | break; 417 | default: 418 | goto usage; 419 | } 420 | } 421 | if (optind > argc - 2) 422 | goto usage; 423 | ifile_name = argv[optind]; 424 | ofile_name = argv[optind + 1]; 425 | if (!(ifile = fopen(ifile_name, "rb"))) { 426 | perror("fopen of ifile_name"); 427 | return 2; 428 | } 429 | if (!(ofile = fopen(ofile_name, "wb"))) { 430 | perror("fopen of ofile_name"); 431 | return 3; 432 | } 433 | PAGE_SIZE = (int)sysconf(_SC_PAGE_SIZE); 434 | PAGE_SHIFT = ffs(PAGE_SIZE) - 1; 435 | if (!decompress) 436 | return do_compress(compressor, ifile, ofile); 437 | else 438 | return do_decompress(compressor, ifile, ofile); 439 | usage: 440 | fprintf(stderr, 441 | "usage: block_compressor -c lzo|snappy|zlib [-d] ifile ofile\n"); 442 | return 1; 443 | } 444 | -------------------------------------------------------------------------------- /block_compressor_benchmark.txt: -------------------------------------------------------------------------------- 1 | for testfile in \ 2 | /usr/lib64/chromium-browser/chrome \ 3 | /usr/lib64/qt4/libQtWebKit.so.4.7.2 \ 4 | /usr/lib64/llvm/libLLVM-2.9.so \ 5 | /usr/lib64/xulrunner-2.0/libxul.so \ 6 | /usr/libexec/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/cc1 \ 7 | /usr/lib64/libnvidia-glcore.so.270.41.03 \ 8 | /usr/lib64/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/libgcj.so.12.0.0 \ 9 | /usr/lib64/libwireshark.so.0.0.1 \ 10 | /usr/share/icons/oxygen/icon-theme.cache \ 11 | ; do \ 12 | echo compressing: $testfile ; \ 13 | for method in snappy lzo zlib ; do \ 14 | LD_LIBRARY_PATH=. ./block_compressor -c $method $testfile itmp ;\ 15 | LD_LIBRARY_PATH=. ./block_compressor -c $method -d itmp otmp > /dev/null ;\ 16 | diff -u $testfile otmp ;\ 17 | echo "ratio:" \ 18 | $(stat --printf %s itmp) \* 100 / $(stat --printf %s $testfile) "=" \ 19 | $(expr $(stat --printf %s itmp) \* 100 / $(stat --printf %s $testfile)) "%" ;\ 20 | rm -f itmp otmp ;\ 21 | done ; \ 22 | done ; 23 | compressing: /usr/lib64/chromium-browser/chrome 24 | compressor: SNAPPY 25 | #pages: 10392 26 | > 100% :341 27 | > 50% :8445 28 | <= 50% :1606 29 | 0.174652181 seconds 30 | ratio: 27932299 * 100 / 42562848 = 65 % 31 | compressor: LZO 32 | #pages: 10392 33 | > 100% :495 34 | > 50% :8080 35 | <= 50% :1817 36 | 0.220447504 seconds 37 | ratio: 27150908 * 100 / 42562848 = 63 % 38 | compressor: ZLIB 39 | #pages: 10392 40 | > 100% :0 41 | > 50% :5800 42 | <= 50% :4592 43 | 2.395360610 seconds 44 | ratio: 20904235 * 100 / 42562848 = 49 % 45 | compressing: /usr/lib64/qt4/libQtWebKit.so.4.7.2 46 | compressor: SNAPPY 47 | #pages: 5342 48 | > 100% :219 49 | > 50% :3405 50 | <= 50% :1718 51 | 0.080079531 seconds 52 | ratio: 13290800 * 100 / 21877760 = 60 % 53 | compressor: LZO 54 | #pages: 5342 55 | > 100% :272 56 | > 50% :3281 57 | <= 50% :1789 58 | 0.100200702 seconds 59 | ratio: 12737811 * 100 / 21877760 = 58 % 60 | compressor: ZLIB 61 | #pages: 5342 62 | > 100% :142 63 | > 50% :2464 64 | <= 50% :2736 65 | 1.147235809 seconds 66 | ratio: 9903402 * 100 / 21877760 = 45 % 67 | compressing: /usr/lib64/llvm/libLLVM-2.9.so 68 | compressor: SNAPPY 69 | #pages: 3472 70 | > 100% :44 71 | > 50% :2384 72 | <= 50% :1044 73 | 0.055121943 seconds 74 | ratio: 8493554 * 100 / 14219992 = 59 % 75 | compressor: LZO 76 | #pages: 3472 77 | > 100% :53 78 | > 50% :2355 79 | <= 50% :1064 80 | 0.068662186 seconds 81 | ratio: 8213334 * 100 / 14219992 = 57 % 82 | compressor: ZLIB 83 | #pages: 3472 84 | > 100% :12 85 | > 50% :1728 86 | <= 50% :1732 87 | 0.766150075 seconds 88 | ratio: 6221694 * 100 / 14219992 = 43 % 89 | compressing: /usr/lib64/xulrunner-2.0/libxul.so 90 | compressor: SNAPPY 91 | #pages: 7187 92 | > 100% :229 93 | > 50% :4432 94 | <= 50% :2526 95 | 0.108149693 seconds 96 | ratio: 17455680 * 100 / 29433888 = 59 % 97 | compressor: LZO 98 | #pages: 7187 99 | > 100% :253 100 | > 50% :4287 101 | <= 50% :2647 102 | 0.135244136 seconds 103 | ratio: 16596460 * 100 / 29433888 = 56 % 104 | compressor: ZLIB 105 | #pages: 7187 106 | > 100% :1 107 | > 50% :3021 108 | <= 50% :4165 109 | 1.610910737 seconds 110 | ratio: 12248775 * 100 / 29433888 = 41 % 111 | compressing: /usr/libexec/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/cc1 112 | compressor: SNAPPY 113 | #pages: 3608 114 | > 100% :68 115 | > 50% :2168 116 | <= 50% :1372 117 | 0.056032193 seconds 118 | ratio: 7728033 * 100 / 14775120 = 52 % 119 | compressor: LZO 120 | #pages: 3608 121 | > 100% :72 122 | > 50% :1975 123 | <= 50% :1561 124 | 0.069680830 seconds 125 | ratio: 7384676 * 100 / 14775120 = 49 % 126 | compressor: ZLIB 127 | #pages: 3608 128 | > 100% :2 129 | > 50% :306 130 | <= 50% :3300 131 | 0.789069806 seconds 132 | ratio: 5493265 * 100 / 14775120 = 37 % 133 | compressing: /usr/lib64/libnvidia-glcore.so.270.41.03 134 | compressor: SNAPPY 135 | #pages: 6710 136 | > 100% :74 137 | > 50% :2614 138 | <= 50% :4022 139 | 0.084111724 seconds 140 | ratio: 12860385 * 100 / 27481328 = 46 % 141 | compressor: LZO 142 | #pages: 6710 143 | > 100% :89 144 | > 50% :2436 145 | <= 50% :4185 146 | 0.103006618 seconds 147 | ratio: 12051888 * 100 / 27481328 = 43 % 148 | compressor: ZLIB 149 | #pages: 6710 150 | > 100% :1 151 | > 50% :1633 152 | <= 50% :5076 153 | 1.216785009 seconds 154 | ratio: 8641291 * 100 / 27481328 = 31 % 155 | compressing: /usr/lib64/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/libgcj.so.12.0.0 156 | compressor: SNAPPY 157 | #pages: 15133 158 | > 100% :190 159 | > 50% :5105 160 | <= 50% :9838 161 | 0.193854352 seconds 162 | ratio: 27131163 * 100 / 61982968 = 43 % 163 | compressor: LZO 164 | #pages: 15133 165 | > 100% :201 166 | > 50% :4323 167 | <= 50% :10609 168 | 0.235593989 seconds 169 | ratio: 24944283 * 100 / 61982968 = 40 % 170 | compressor: ZLIB 171 | #pages: 15133 172 | > 100% :63 173 | > 50% :317 174 | <= 50% :14753 175 | 2.943011502 seconds 176 | ratio: 18266667 * 100 / 61982968 = 29 % 177 | compressing: /usr/lib64/libwireshark.so.0.0.1 178 | compressor: SNAPPY 179 | #pages: 11341 180 | > 100% :64 181 | > 50% :2982 182 | <= 50% :8295 183 | 0.130238274 seconds 184 | ratio: 19576418 * 100 / 46449592 = 42 % 185 | compressor: LZO 186 | #pages: 11341 187 | > 100% :86 188 | > 50% :2565 189 | <= 50% :8690 190 | 0.157854033 seconds 191 | ratio: 17765477 * 100 / 46449592 = 38 % 192 | compressor: ZLIB 193 | #pages: 11341 194 | > 100% :1 195 | > 50% :1219 196 | <= 50% :10121 197 | 2.020140289 seconds 198 | ratio: 12565102 * 100 / 46449592 = 27 % 199 | compressing: /usr/share/icons/oxygen/icon-theme.cache 200 | compressor: SNAPPY 201 | #pages: 43411 202 | > 100% :0 203 | > 50% :7777 204 | <= 50% :35634 205 | 0.441581102 seconds 206 | ratio: 60247441 * 100 / 177810480 = 33 % 207 | compressor: LZO 208 | #pages: 43411 209 | > 100% :31 210 | > 50% :7801 211 | <= 50% :35579 212 | 0.547072992 seconds 213 | ratio: 59064132 * 100 / 177810480 = 33 % 214 | compressor: ZLIB 215 | #pages: 43411 216 | > 100% :0 217 | > 50% :2464 218 | <= 50% :40947 219 | 6.256616084 seconds 220 | ratio: 42305375 * 100 / 177810480 = 23 % 221 | -------------------------------------------------------------------------------- /cl_tester.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "csnappy.h" 8 | #ifndef MAP_ANONYMOUS 9 | #define MAP_ANONYMOUS MAP_ANON 10 | #endif 11 | 12 | #define MAX_INPUT_SIZE 10 * 1024 * 1024 13 | 14 | static int do_decompress(FILE *ifile, FILE *ofile) 15 | { 16 | char *ibuf, *obuf; 17 | uint32_t ilen, olen; 18 | int status, retval = 0; 19 | 20 | if (!(ibuf = (char *)malloc(MAX_INPUT_SIZE))) { 21 | fprintf(stderr, "malloc failed to allocate %d.\n", MAX_INPUT_SIZE); 22 | fclose(ifile); 23 | retval = 4; 24 | goto out; 25 | } 26 | 27 | ilen = fread(ibuf, 1, MAX_INPUT_SIZE, ifile); 28 | if (!feof(ifile)) { 29 | fprintf(stderr, "input was longer than %d, aborting.\n", MAX_INPUT_SIZE); 30 | free(ibuf); 31 | fclose(ifile); 32 | retval = 5; 33 | goto out; 34 | } 35 | fclose(ifile); 36 | 37 | if ((status = csnappy_get_uncompressed_length(ibuf, ilen, &olen)) < 0) { 38 | fprintf(stderr, "snappy_get_uncompressed_length returned %d.\n", status); 39 | free(ibuf); 40 | retval = 6; 41 | goto out; 42 | } 43 | 44 | if (!(obuf = (char *)malloc(olen))) { 45 | fprintf(stderr, "malloc failed to allocate %d.\n", (int)olen); 46 | free(ibuf); 47 | retval = 4; 48 | goto out; 49 | } 50 | 51 | status = csnappy_decompress(ibuf, ilen, obuf, olen); 52 | free(ibuf); 53 | if (status != CSNAPPY_E_OK) { 54 | fprintf(stderr, "snappy_decompress returned %d.\n", status); 55 | free(obuf); 56 | retval = 7; 57 | goto out; 58 | } 59 | 60 | fwrite(obuf, 1, olen, ofile); 61 | free(obuf); 62 | out: 63 | fclose(ofile); 64 | return retval; 65 | } 66 | 67 | static int do_compress(FILE *ifile, FILE *ofile) 68 | { 69 | char *ibuf, *obuf; 70 | void *working_memory; 71 | uint32_t ilen, olen, max_compressed_len; 72 | 73 | if (!(ibuf = (char *)malloc(MAX_INPUT_SIZE))) { 74 | fprintf(stderr, "malloc failed to allocate %d.\n", MAX_INPUT_SIZE); 75 | fclose(ifile); 76 | fclose(ofile); 77 | return 4; 78 | } 79 | 80 | ilen = fread(ibuf, 1, MAX_INPUT_SIZE, ifile); 81 | if (!feof(ifile)) { 82 | fprintf(stderr, "input was longer than %d, aborting.\n", MAX_INPUT_SIZE); 83 | free(ibuf); 84 | fclose(ifile); 85 | fclose(ofile); 86 | return 5; 87 | } 88 | fclose(ifile); 89 | 90 | max_compressed_len = csnappy_max_compressed_length(ilen); 91 | if (!(obuf = (char*)malloc(max_compressed_len))) { 92 | fprintf(stderr, "malloc failed to allocate %d bytes.\n", (int)max_compressed_len); 93 | free(ibuf); 94 | fclose(ofile); 95 | return 4; 96 | } 97 | 98 | if (!(working_memory = malloc(CSNAPPY_WORKMEM_BYTES))) { 99 | fprintf(stderr, "malloc failed to allocate %d bytes.\n", CSNAPPY_WORKMEM_BYTES); 100 | free(ibuf); 101 | fclose(ofile); 102 | return 4; 103 | } 104 | 105 | csnappy_compress(ibuf, ilen, obuf, &olen, 106 | working_memory, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO); 107 | free(ibuf); 108 | free(working_memory); 109 | 110 | fwrite(obuf, 1, olen, ofile); 111 | fclose(ofile); 112 | free(obuf); 113 | return 0; 114 | } 115 | 116 | #define handle_error(msg) \ 117 | do { perror(msg); exit(EXIT_FAILURE); } while (0) 118 | 119 | 120 | static void segfault_handler(int signum) { 121 | if (signum == SIGSEGV) { 122 | printf("compression overwrites out buffer\n"); 123 | exit(EXIT_SUCCESS); 124 | } 125 | } 126 | 127 | int do_selftest_compression(void) 128 | { 129 | struct sigaction sa; 130 | char *obuf, *ibuf, *workmem; 131 | FILE *ifile; 132 | long PAGE_SIZE = sysconf(_SC_PAGE_SIZE); 133 | uint32_t olen = 0; 134 | uint32_t ilen = PAGE_SIZE + 100; 135 | 136 | obuf = (char*)mmap(NULL, PAGE_SIZE * 2, 137 | PROT_READ | PROT_WRITE, 138 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 139 | if (obuf == MAP_FAILED) 140 | handle_error("mmap"); 141 | if (mprotect(obuf + PAGE_SIZE, PAGE_SIZE, PROT_NONE)) 142 | handle_error("mprotect"); 143 | if (!(ibuf = (char*)malloc(ilen))) 144 | handle_error("malloc"); 145 | if (!(ifile = fopen("/dev/urandom", "rb"))) 146 | handle_error("fopen"); 147 | if (fread(ibuf, 1, ilen, ifile) < ilen) 148 | handle_error("fread"); 149 | if (fclose(ifile)) 150 | handle_error("fclose"); 151 | if (!(workmem = (char*)malloc(CSNAPPY_WORKMEM_BYTES))) 152 | handle_error("malloc"); 153 | sa.sa_handler = segfault_handler; 154 | sigemptyset(&sa.sa_mask); 155 | sa.sa_flags = 0; 156 | sigaction(SIGSEGV, &sa, NULL); 157 | csnappy_compress(ibuf, ilen, obuf, &olen, 158 | workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO); 159 | fprintf(stderr, "ERROR: csnappy_compress did not segfault when should have!\n"); 160 | if (munmap(obuf, PAGE_SIZE * 2)) 161 | handle_error("munmap"); 162 | free(workmem); 163 | free(ibuf); 164 | return EXIT_FAILURE; 165 | } 166 | 167 | static const char fake[] = "\x32\xc4\x66\x6f\x6f\x6f\x6f\x6f\x6f"; 168 | int do_selftest_decompression(void) 169 | { 170 | char *obuf, *ibuf, *workmem; 171 | FILE *ifile; 172 | int ret; 173 | long PAGE_SIZE = sysconf(_SC_PAGE_SIZE); 174 | int hlen; 175 | uint32_t n; 176 | uint32_t ilen = PAGE_SIZE + 100; 177 | uint32_t olen = csnappy_max_compressed_length(ilen); 178 | if (!(obuf = (char*)malloc(olen))) 179 | handle_error("malloc"); 180 | if (!(ibuf = (char*)malloc(ilen))) 181 | handle_error("malloc"); 182 | if (!(ifile = fopen("/dev/urandom", "rb"))) 183 | handle_error("fopen"); 184 | if (fread(ibuf, 1, ilen, ifile) < ilen) 185 | handle_error("fread"); 186 | if (fclose(ifile)) 187 | handle_error("fclose"); 188 | if (!(workmem = (char*)malloc(CSNAPPY_WORKMEM_BYTES))) 189 | handle_error("malloc"); 190 | csnappy_compress(ibuf, ilen, obuf, &olen, 191 | workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO); 192 | free(workmem); 193 | free(ibuf); 194 | ibuf = obuf; 195 | ilen = olen; 196 | olen = PAGE_SIZE; 197 | obuf = (char*)mmap(NULL, PAGE_SIZE * 2, 198 | PROT_READ | PROT_WRITE, 199 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 200 | if (obuf == MAP_FAILED) 201 | handle_error("mmap"); 202 | if (mprotect(obuf + PAGE_SIZE, PAGE_SIZE, PROT_NONE)) 203 | handle_error("mprotect"); 204 | ret = csnappy_decompress(ibuf, ilen, obuf, olen); 205 | if (ret != CSNAPPY_E_OUTPUT_INSUF) { 206 | fprintf(stderr, "snappy_decompress returned %d.\n", ret); 207 | exit(EXIT_FAILURE); 208 | } 209 | hlen = csnappy_get_uncompressed_length(ibuf, ilen, &n); 210 | if (hlen == CSNAPPY_E_HEADER_BAD) { 211 | fprintf(stderr, "csnappy_get_uncompressed_length, could not obtain header length\n"); 212 | exit(EXIT_FAILURE); 213 | } 214 | ret = csnappy_decompress_noheader(ibuf + hlen, ilen - hlen, obuf, &olen); 215 | if (ret != CSNAPPY_E_OUTPUT_OVERRUN) { 216 | fprintf(stderr, "csnappy_decompress_noheader returned %d.\n", ret); 217 | exit(EXIT_FAILURE); 218 | } 219 | free(ibuf); 220 | if (munmap(obuf, PAGE_SIZE * 2)) 221 | handle_error("munmap"); 222 | 223 | olen = 50; 224 | if (!(obuf = (char*)malloc(olen))) 225 | handle_error("malloc"); 226 | ret = csnappy_decompress(fake, 9, obuf, olen); 227 | if (ret == CSNAPPY_E_OK) { 228 | fprintf(stderr, "csnappy_decompress, stream cut off mid literal: %d\n", ret); 229 | exit(EXIT_FAILURE); 230 | } 231 | ret = csnappy_decompress_noheader(fake + 1, 8, obuf, &olen); 232 | if (ret == CSNAPPY_E_OK) { 233 | fprintf(stderr, "csnappy_decompress_noheader, stream cut off mid literal: %d\n", ret); 234 | exit(EXIT_FAILURE); 235 | } 236 | free(obuf); 237 | return 0; 238 | } 239 | 240 | int main(int argc, char * const argv[]) 241 | { 242 | int c; 243 | int decompress = 0, files = 1; 244 | int selftest_compression = 0, selftest_decompression = 0; 245 | const char *ifile_name, *ofile_name; 246 | FILE *ifile, *ofile; 247 | 248 | while((c = getopt(argc, argv, "S:dc")) != -1) { 249 | switch (c) { 250 | case 'S': 251 | switch (optarg[0]) { 252 | case 'c': 253 | selftest_compression = 1; 254 | break; 255 | case 'd': 256 | selftest_decompression = 1; 257 | break; 258 | default: 259 | goto usage; 260 | } 261 | break; 262 | case 'd': 263 | decompress = 1; 264 | break; 265 | case 'c': 266 | files = 0; 267 | break; 268 | default: 269 | goto usage; 270 | } 271 | } 272 | if (selftest_compression) 273 | return do_selftest_compression(); 274 | if (selftest_decompression) 275 | return do_selftest_decompression(); 276 | ifile = stdin; 277 | ofile = stdout; 278 | if (files) { 279 | if (optind > argc - 2) 280 | goto usage; 281 | ifile_name = argv[optind]; 282 | ofile_name = argv[optind + 1]; 283 | if (!(ifile = fopen(ifile_name, "rb"))) { 284 | perror("fopen of ifile_name"); 285 | return 2; 286 | } 287 | if (!(ofile = fopen(ofile_name, "wb"))) { 288 | perror("fopen of ofile_name"); 289 | return 3; 290 | } 291 | } 292 | if (decompress) 293 | return do_decompress(ifile, ofile); 294 | else 295 | return do_compress(ifile, ofile); 296 | usage: 297 | fprintf(stderr, 298 | "Usage:\n" 299 | "cl_tester [-d] infile outfile\t-\t[de]compress infile to outfile.\n" 300 | "cl_tester [-d] -c\t\t-\t[de]compress stdin to stdout.\n" 301 | "cl_tester -S c\t\t\t-\tSelf-test compression.\n" 302 | "cl_tester -S d\t\t\t-\tSelf-test decompression.\n"); 303 | return 1; 304 | } 305 | -------------------------------------------------------------------------------- /csnappy.h: -------------------------------------------------------------------------------- 1 | #ifndef __CSNAPPY_H__ 2 | #define __CSNAPPY_H__ 3 | /* 4 | File modified for the Linux Kernel by 5 | Zeev Tarantov 6 | */ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #define CSNAPPY_VERSION 5 12 | 13 | #define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 16 14 | #define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO) 15 | 16 | #ifndef __GNUC__ 17 | #define __attribute__(x) /*NOTHING*/ 18 | #endif 19 | 20 | #if defined(__SUNPRO_C) || defined(_AIX) 21 | # include 22 | #else 23 | # include 24 | #endif 25 | 26 | /* 27 | * Returns the maximal size of the compressed representation of 28 | * input data that is "source_len" bytes in length; 29 | */ 30 | uint32_t 31 | csnappy_max_compressed_length(uint32_t source_len) __attribute__((const)); 32 | 33 | /* 34 | * Flat array compression that does not emit the "uncompressed length" 35 | * prefix. Compresses "input" array to the "output" array. 36 | * 37 | * REQUIRES: "input" is at most 32KiB long. 38 | * REQUIRES: "output" points to an array of memory that is at least 39 | * "csnappy_max_compressed_length(input_length)" in size. 40 | * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. 41 | * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. 42 | * 43 | * Returns an "end" pointer into "output" buffer. 44 | * "end - output" is the compressed size of "input". 45 | */ 46 | char* 47 | csnappy_compress_fragment( 48 | const char *input, 49 | const uint32_t input_length, 50 | char *output, 51 | void *working_memory, 52 | const int workmem_bytes_power_of_two); 53 | 54 | /* 55 | * REQUIRES: "compressed" must point to an area of memory that is at 56 | * least "csnappy_max_compressed_length(input_length)" bytes in length. 57 | * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. 58 | * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. 59 | * 60 | * Takes the data stored in "input[0..input_length-1]" and stores 61 | * it in the array pointed to by "compressed". 62 | * 63 | * "*out_compressed_length" is set to the length of the compressed output. 64 | */ 65 | void 66 | csnappy_compress( 67 | const char *input, 68 | uint32_t input_length, 69 | char *compressed, 70 | uint32_t *out_compressed_length, 71 | void *working_memory, 72 | const int workmem_bytes_power_of_two); 73 | 74 | /* 75 | * Reads header of compressed data to get stored length of uncompressed data. 76 | * REQUIRES: start points to compressed data. 77 | * REQUIRES: n is length of available compressed data. 78 | * 79 | * Returns SNAPPY_E_HEADER_BAD on error. 80 | * Returns number of bytes read from input on success. 81 | * Stores decoded length into *result. 82 | */ 83 | int 84 | csnappy_get_uncompressed_length( 85 | const char *start, 86 | uint32_t n, 87 | uint32_t *result); 88 | 89 | /* 90 | * Safely decompresses all data from array "src" of length "src_len" containing 91 | * entire compressed stream (with header) into array "dst" of size "dst_len". 92 | * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...). 93 | * 94 | * Iff successful, returns CSNAPPY_E_OK. 95 | * If recorded length in header is greater than dst_len, returns 96 | * CSNAPPY_E_OUTPUT_INSUF. 97 | * If compressed data is malformed, does not write more than dst_len into dst. 98 | */ 99 | int 100 | csnappy_decompress( 101 | const char *src, 102 | uint32_t src_len, 103 | char *dst, 104 | uint32_t dst_len); 105 | 106 | /* 107 | * Safely decompresses stream src_len bytes long read from src to dst. 108 | * Amount of available space at dst must be provided in *dst_len by caller. 109 | * If compressed stream needs more space, it will not overflow and return 110 | * CSNAPPY_E_OUTPUT_OVERRUN. 111 | * On success, sets *dst_len to actal number of bytes decompressed. 112 | * Iff successful, returns CSNAPPY_E_OK. 113 | */ 114 | int 115 | csnappy_decompress_noheader( 116 | const char *src, 117 | uint32_t src_len, 118 | char *dst, 119 | uint32_t *dst_len); 120 | 121 | /* 122 | * Return values (< 0 = Error) 123 | */ 124 | #define CSNAPPY_E_OK 0 125 | #define CSNAPPY_E_HEADER_BAD (-1) 126 | #define CSNAPPY_E_OUTPUT_INSUF (-2) 127 | #define CSNAPPY_E_OUTPUT_OVERRUN (-3) 128 | #define CSNAPPY_E_INPUT_NOT_CONSUMED (-4) 129 | #define CSNAPPY_E_DATA_MALFORMED (-5) 130 | 131 | #ifdef __cplusplus 132 | } 133 | #endif 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /csnappy_compat.h: -------------------------------------------------------------------------------- 1 | #ifndef CSNAPPY_COMPAT_H 2 | 3 | /* This file was added to Sereal to attempt some MSVC compatibility, 4 | * but is at best a band-aid. And done without a lot of experience 5 | * in whatever subset of C99 MSVC supports. 6 | */ 7 | 8 | #ifndef INLINE 9 | # if defined(_MSC_VER) 10 | # define INLINE __inline 11 | # else 12 | # define INLINE inline 13 | # endif 14 | #endif 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /csnappy_compress.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011, Google Inc. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following disclaimer 13 | in the documentation and/or other materials provided with the 14 | distribution. 15 | * Neither the name of Google Inc. nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | File modified for the Linux Kernel by 32 | Zeev Tarantov 33 | 34 | File modified for Sereal by 35 | Steffen Mueller 36 | */ 37 | 38 | #include "csnappy_internal.h" 39 | #ifdef __KERNEL__ 40 | #include 41 | #include 42 | #endif 43 | #include "csnappy.h" 44 | 45 | 46 | static INLINE char* 47 | encode_varint32(char *sptr, uint32_t v) 48 | { 49 | uint8_t* ptr = (uint8_t *)sptr; 50 | static const int B = 128; 51 | if (v < (1<<7)) { 52 | *(ptr++) = v; 53 | } else if (v < (1<<14)) { 54 | *(ptr++) = v | B; 55 | *(ptr++) = v>>7; 56 | } else if (v < (1<<21)) { 57 | *(ptr++) = v | B; 58 | *(ptr++) = (v>>7) | B; 59 | *(ptr++) = v>>14; 60 | } else if (v < (1<<28)) { 61 | *(ptr++) = v | B; 62 | *(ptr++) = (v>>7) | B; 63 | *(ptr++) = (v>>14) | B; 64 | *(ptr++) = v>>21; 65 | } else { 66 | *(ptr++) = v | B; 67 | *(ptr++) = (v>>7) | B; 68 | *(ptr++) = (v>>14) | B; 69 | *(ptr++) = (v>>21) | B; 70 | *(ptr++) = v>>28; 71 | } 72 | return (char *)ptr; 73 | } 74 | 75 | /* 76 | * *** DO NOT CHANGE THE VALUE OF kBlockSize *** 77 | 78 | * New Compression code chops up the input into blocks of at most 79 | * the following size. This ensures that back-references in the 80 | * output never cross kBlockSize block boundaries. This can be 81 | * helpful in implementing blocked decompression. However the 82 | * decompression code should not rely on this guarantee since older 83 | * compression code may not obey it. 84 | */ 85 | #define kBlockLog 15 86 | #define kBlockSize (1 << kBlockLog) 87 | 88 | 89 | #if defined(__arm__) && !defined(ARCH_ARM_HAVE_UNALIGNED) 90 | 91 | static uint8_t* emit_literal( 92 | uint8_t *op, 93 | const uint8_t *src, 94 | const uint8_t *end) 95 | { 96 | uint32_t length = end - src; 97 | uint32_t n = length - 1; 98 | if (!length) 99 | return op; 100 | if (n < 60) { 101 | /* Fits in tag byte */ 102 | *op++ = LITERAL | (n << 2); 103 | } else { 104 | /* Encode in upcoming bytes */ 105 | uint8_t *base = op; 106 | op++; 107 | do { 108 | *op++ = n & 0xff; 109 | n >>= 8; 110 | } while (n > 0); 111 | *base = LITERAL | ((59 + (op - base - 1)) << 2); 112 | } 113 | memcpy(op, src, length); 114 | return op + length; 115 | } 116 | 117 | static uint8_t* emit_copy( 118 | uint8_t *op, 119 | uint32_t offset, 120 | uint32_t len) 121 | { 122 | DCHECK_GT(offset, 0); 123 | 124 | /* Emit 64 byte copies but make sure to keep at least four bytes 125 | * reserved */ 126 | while (unlikely(len >= 68)) { 127 | *op++ = COPY_2_BYTE_OFFSET | ((64 - 1) << 2); 128 | *op++ = offset & 255; 129 | *op++ = offset >> 8; 130 | len -= 64; 131 | } 132 | 133 | /* Emit an extra 60 byte copy if have too much data to fit in one 134 | * copy */ 135 | if (unlikely(len > 64)) { 136 | *op++ = COPY_2_BYTE_OFFSET | ((60 - 1) << 2); 137 | *op++ = offset & 255; 138 | *op++ = offset >> 8; 139 | len -= 60; 140 | } 141 | 142 | /* Emit remainder */ 143 | DCHECK_GE(len, 4); 144 | if ((len < 12) && (offset < 2048)) { 145 | int len_minus_4 = len - 4; 146 | *op++ = COPY_1_BYTE_OFFSET | 147 | ((len_minus_4) << 2) | 148 | ((offset >> 8) << 5); 149 | *op++ = offset & 0xff; 150 | } else { 151 | *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2); 152 | *op++ = offset & 255; 153 | *op++ = offset >> 8; 154 | } 155 | return op; 156 | } 157 | 158 | static uint32_t find_match_length( 159 | const uint8_t *s1, 160 | const uint8_t *s2, 161 | const uint8_t *s2_end) 162 | { 163 | const uint8_t * const s2_start = s2; 164 | while (s2 < s2_end && *s1++ == *s2++) /*nothing*/; 165 | return s2 - s2_start - 1; 166 | } 167 | 168 | static uint32_t hash(uint32_t v) 169 | { 170 | return v * UINT32_C(0x1e35a7bd); 171 | } 172 | 173 | char* 174 | csnappy_compress_fragment( 175 | const char *input, 176 | const uint32_t input_size, 177 | char *dst, 178 | void *working_memory, 179 | const int workmem_bytes_power_of_two) 180 | { 181 | const uint8_t * const src_start = (const uint8_t *)input; 182 | const uint8_t * const src_end_minus4 = src_start + input_size - 4; 183 | const uint8_t *src = src_start, *done_upto = src_start, *match; 184 | uint8_t *op = (uint8_t *)dst; 185 | uint16_t *wm = (uint16_t *)working_memory; 186 | int shift = 33 - workmem_bytes_power_of_two; 187 | uint32_t curr_val, curr_hash, match_val, offset, length; 188 | if (unlikely(input_size < 4)) 189 | goto the_end; 190 | memset(wm, 0, 1 << workmem_bytes_power_of_two); 191 | for (;;) { 192 | curr_val = (src[1] << 8) | (src[2] << 16) | (src[3] << 24); 193 | do { 194 | src++; 195 | if (unlikely(src >= src_end_minus4)) 196 | goto the_end; 197 | curr_val = (curr_val >> 8) | (src[3] << 24); 198 | DCHECK_EQ(curr_val, get_unaligned_le32(src)); 199 | curr_hash = hash(curr_val) >> shift; 200 | match = src_start + wm[curr_hash]; 201 | DCHECK_LT(match, src); 202 | wm[curr_hash] = src - src_start; 203 | match_val = get_unaligned_le32(match); 204 | } while (likely(curr_val != match_val)); 205 | offset = src - match; 206 | length = 4 + find_match_length( 207 | match + 4, src + 4, src_end_minus4 + 4); 208 | DCHECK_EQ(memcmp(src, match, length), 0); 209 | op = emit_literal(op, done_upto, src); 210 | op = emit_copy(op, offset, length); 211 | done_upto = src + length; 212 | src = done_upto - 1; 213 | } 214 | the_end: 215 | op = emit_literal(op, done_upto, src_end_minus4 + 4); 216 | return (char *)op; 217 | } 218 | 219 | #else /* !simple */ 220 | 221 | /* 222 | * Any hash function will produce a valid compressed bitstream, but a good 223 | * hash function reduces the number of collisions and thus yields better 224 | * compression for compressible input, and more speed for incompressible 225 | * input. Of course, it doesn't hurt if the hash function is reasonably fast 226 | * either, as it gets called a lot. 227 | */ 228 | static INLINE uint32_t HashBytes(uint32_t bytes, int shift) 229 | { 230 | uint32_t kMul = 0x1e35a7bd; 231 | return (bytes * kMul) >> shift; 232 | } 233 | static INLINE uint32_t Hash(const char *p, int shift) 234 | { 235 | return HashBytes(UNALIGNED_LOAD32(p), shift); 236 | } 237 | 238 | 239 | /* 240 | * Return the largest n such that 241 | * 242 | * s1[0,n-1] == s2[0,n-1] 243 | * and n <= (s2_limit - s2). 244 | * 245 | * Does not read *s2_limit or beyond. 246 | * Does not read *(s1 + (s2_limit - s2)) or beyond. 247 | * Requires that s2_limit >= s2. 248 | * 249 | * Separate implementation for x86_64, for speed. Uses the fact that 250 | * x86_64 is little endian. 251 | */ 252 | #if defined(__x86_64__) || defined(__aarch64__) 253 | static INLINE int 254 | FindMatchLength(const char *s1, const char *s2, const char *s2_limit) 255 | { 256 | uint64_t x; 257 | int matched, matching_bits; 258 | DCHECK_GE(s2_limit, s2); 259 | matched = 0; 260 | /* 261 | * Find out how long the match is. We loop over the data 64 bits at a 262 | * time until we find a 64-bit block that doesn't match; then we find 263 | * the first non-matching bit and use that to calculate the total 264 | * length of the match. 265 | */ 266 | while (likely(s2 <= s2_limit - 8)) { 267 | if (unlikely(UNALIGNED_LOAD64(s1 + matched) == 268 | UNALIGNED_LOAD64(s2))) { 269 | s2 += 8; 270 | matched += 8; 271 | } else { 272 | /* 273 | * On current (mid-2008) Opteron models there is a 3% 274 | * more efficient code sequence to find the first 275 | * non-matching byte. However, what follows is ~10% 276 | * better on Intel Core 2 and newer, and we expect AMD's 277 | * bsf instruction to improve. 278 | */ 279 | x = UNALIGNED_LOAD64(s1 + matched) ^ 280 | UNALIGNED_LOAD64(s2); 281 | matching_bits = FindLSBSetNonZero64(x); 282 | matched += matching_bits >> 3; 283 | return matched; 284 | } 285 | } 286 | while (likely(s2 < s2_limit)) { 287 | if (likely(s1[matched] == *s2)) { 288 | ++s2; 289 | ++matched; 290 | } else { 291 | return matched; 292 | } 293 | } 294 | return matched; 295 | } 296 | #else /* !defined(__x86_64__) && !defined(__aarch64__) */ 297 | static INLINE int 298 | FindMatchLength(const char *s1, const char *s2, const char *s2_limit) 299 | { 300 | /* Implementation based on the x86-64 version, above. */ 301 | int matched = 0; 302 | DCHECK_GE(s2_limit, s2); 303 | 304 | while (s2 <= s2_limit - 4 && 305 | UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { 306 | s2 += 4; 307 | matched += 4; 308 | } 309 | #if __BYTE_ORDER == __LITTLE_ENDIAN 310 | if (s2 <= s2_limit - 4) { 311 | uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^ 312 | UNALIGNED_LOAD32(s2); 313 | int matching_bits = FindLSBSetNonZero(x); 314 | matched += matching_bits >> 3; 315 | } else { 316 | while ((s2 < s2_limit) && (s1[matched] == *s2)) { 317 | ++s2; 318 | ++matched; 319 | } 320 | } 321 | #else 322 | while ((s2 < s2_limit) && (s1[matched] == *s2)) { 323 | ++s2; 324 | ++matched; 325 | } 326 | #endif 327 | return matched; 328 | } 329 | #endif /* !defined(__x86_64__) && !defined(__aarch64__) */ 330 | 331 | 332 | static INLINE char* 333 | EmitLiteral(char *op, const char *literal, int len, int allow_fast_path) 334 | { 335 | int n = len - 1; /* Zero-length literals are disallowed */ 336 | if (n < 60) { 337 | /* Fits in tag byte */ 338 | *op++ = LITERAL | (n << 2); 339 | /* 340 | The vast majority of copies are below 16 bytes, for which a 341 | call to memcpy is overkill. This fast path can sometimes 342 | copy up to 15 bytes too much, but that is okay in the 343 | main loop, since we have a bit to go on for both sides: 344 | - The input will always have kInputMarginBytes = 15 extra 345 | available bytes, as long as we're in the main loop, and 346 | if not, allow_fast_path = false. 347 | - The output will always have 32 spare bytes (see 348 | snappy_max_compressed_length). 349 | */ 350 | if (allow_fast_path && len <= 16) { 351 | UnalignedCopy64(literal, op); 352 | UnalignedCopy64(literal + 8, op + 8); 353 | return op + len; 354 | } 355 | } else { 356 | /* Encode in upcoming bytes */ 357 | char *base = op; 358 | int count = 0; 359 | op++; 360 | while (n > 0) { 361 | *op++ = n & 0xff; 362 | n >>= 8; 363 | count++; 364 | } 365 | DCHECK_GE(count, 1); 366 | DCHECK_LE(count, 4); 367 | *base = LITERAL | ((59+count) << 2); 368 | } 369 | memcpy(op, literal, len); 370 | return op + len; 371 | } 372 | 373 | static INLINE char* 374 | EmitCopyLessThan64(char *op, int offset, int len) 375 | { 376 | DCHECK_LE(len, 64); 377 | DCHECK_GE(len, 4); 378 | DCHECK_LT(offset, 65536); 379 | 380 | if ((len < 12) && (offset < 2048)) { 381 | int len_minus_4 = len - 4; 382 | DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */ 383 | *op++ = COPY_1_BYTE_OFFSET + 384 | ((len_minus_4) << 2) + 385 | ((offset >> 8) << 5); 386 | *op++ = offset & 0xff; 387 | } else { 388 | *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2); 389 | put_unaligned_le16(offset, op); 390 | op += 2; 391 | } 392 | return op; 393 | } 394 | 395 | static INLINE char* 396 | EmitCopy(char *op, int offset, int len) 397 | { 398 | /* Emit 64 byte copies but make sure to keep at least four bytes 399 | * reserved */ 400 | while (len >= 68) { 401 | op = EmitCopyLessThan64(op, offset, 64); 402 | len -= 64; 403 | } 404 | 405 | /* Emit an extra 60 byte copy if have too much data to fit in one 406 | * copy */ 407 | if (len > 64) { 408 | op = EmitCopyLessThan64(op, offset, 60); 409 | len -= 60; 410 | } 411 | 412 | /* Emit remainder */ 413 | op = EmitCopyLessThan64(op, offset, len); 414 | return op; 415 | } 416 | 417 | 418 | /* 419 | For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will 420 | equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have 421 | empirically found that overlapping loads such as 422 | UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) 423 | are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. 424 | 425 | We have different versions for 64- and 32-bit; ideally we would avoid the 426 | two functions and just INLINE the UNALIGNED_LOAD64 call into 427 | GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever 428 | enough to avoid loading the value multiple times then. For 64-bit, the load 429 | is done when GetEightBytesAt() is called, whereas for 32-bit, the load is 430 | done at GetUint32AtOffset() time. 431 | */ 432 | 433 | #if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8) 434 | 435 | typedef uint64_t EightBytesReference; 436 | 437 | static INLINE EightBytesReference GetEightBytesAt(const char* ptr) { 438 | return UNALIGNED_LOAD64(ptr); 439 | } 440 | 441 | static INLINE uint32_t GetUint32AtOffset(uint64_t v, int offset) { 442 | DCHECK_GE(offset, 0); 443 | DCHECK_LE(offset, 4); 444 | #if __BYTE_ORDER == __LITTLE_ENDIAN 445 | return v >> (8 * offset); 446 | #else 447 | return v >> (32 - 8 * offset); 448 | #endif 449 | } 450 | 451 | #else /* !ARCH_K8 */ 452 | 453 | typedef const char* EightBytesReference; 454 | 455 | static INLINE EightBytesReference GetEightBytesAt(const char* ptr) { 456 | return ptr; 457 | } 458 | 459 | static INLINE uint32_t GetUint32AtOffset(const char* v, int offset) { 460 | DCHECK_GE(offset, 0); 461 | DCHECK_LE(offset, 4); 462 | return UNALIGNED_LOAD32(v + offset); 463 | } 464 | 465 | #endif /* !ARCH_K8 */ 466 | 467 | 468 | #define kInputMarginBytes 15 469 | char* 470 | csnappy_compress_fragment( 471 | const char *input, 472 | const uint32_t input_size, 473 | char *op, 474 | void *working_memory, 475 | const int workmem_bytes_power_of_two) 476 | { 477 | const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip, 478 | *candidate, *base; 479 | uint16_t *table = (uint16_t *)working_memory; 480 | EightBytesReference input_bytes; 481 | uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes; 482 | int shift, matched; 483 | 484 | DCHECK_GE(workmem_bytes_power_of_two, 9); 485 | DCHECK_LE(workmem_bytes_power_of_two, 15); 486 | /* Table of 2^X bytes, need (X-1) bits to address table of uint16_t. 487 | * How many bits of 32bit hash function result are discarded? */ 488 | shift = 33 - workmem_bytes_power_of_two; 489 | /* "ip" is the input pointer, and "op" is the output pointer. */ 490 | ip = input; 491 | DCHECK_LE(input_size, kBlockSize); 492 | ip_end = input + input_size; 493 | base_ip = ip; 494 | /* Bytes in [next_emit, ip) will be emitted as literal bytes. Or 495 | [next_emit, ip_end) after the main loop. */ 496 | next_emit = ip; 497 | 498 | if (unlikely(input_size < kInputMarginBytes)) 499 | goto emit_remainder; 500 | 501 | memset(working_memory, 0, 1 << workmem_bytes_power_of_two); 502 | 503 | ip_limit = input + input_size - kInputMarginBytes; 504 | next_hash = Hash(++ip, shift); 505 | 506 | main_loop: 507 | DCHECK_LT(next_emit, ip); 508 | /* 509 | * The body of this loop calls EmitLiteral once and then EmitCopy one or 510 | * more times. (The exception is that when we're close to exhausting 511 | * the input we goto emit_remainder.) 512 | * 513 | * In the first iteration of this loop we're just starting, so 514 | * there's nothing to copy, so calling EmitLiteral once is 515 | * necessary. And we only start a new iteration when the 516 | * current iteration has determined that a call to EmitLiteral will 517 | * precede the next call to EmitCopy (if any). 518 | * 519 | * Step 1: Scan forward in the input looking for a 4-byte-long match. 520 | * If we get close to exhausting the input then goto emit_remainder. 521 | * 522 | * Heuristic match skipping: If 32 bytes are scanned with no matches 523 | * found, start looking only at every other byte. If 32 more bytes are 524 | * scanned, look at every third byte, etc.. When a match is found, 525 | * immediately go back to looking at every byte. This is a small loss 526 | * (~5% performance, ~0.1% density) for compressible data due to more 527 | * bookkeeping, but for non-compressible data (such as JPEG) it's a huge 528 | * win since the compressor quickly "realizes" the data is incompressible 529 | * and doesn't bother looking for matches everywhere. 530 | * 531 | * The "skip" variable keeps track of how many bytes there are since the 532 | * last match; dividing it by 32 (ie. right-shifting by five) gives the 533 | * number of bytes to move ahead for each iteration. 534 | */ 535 | skip = 32; 536 | 537 | next_ip = ip; 538 | do { 539 | ip = next_ip; 540 | hash = next_hash; 541 | DCHECK_EQ(hash, Hash(ip, shift)); 542 | next_ip = ip + (skip++ >> 5); 543 | if (unlikely(next_ip > ip_limit)) 544 | goto emit_remainder; 545 | next_hash = Hash(next_ip, shift); 546 | candidate = base_ip + table[hash]; 547 | DCHECK_GE(candidate, base_ip); 548 | DCHECK_LT(candidate, ip); 549 | 550 | table[hash] = ip - base_ip; 551 | } while (likely(UNALIGNED_LOAD32(ip) != 552 | UNALIGNED_LOAD32(candidate))); 553 | 554 | /* 555 | * Step 2: A 4-byte match has been found. We'll later see if more 556 | * than 4 bytes match. But, prior to the match, input 557 | * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." 558 | */ 559 | DCHECK_LE(next_emit + 16, ip_end); 560 | op = EmitLiteral(op, next_emit, ip - next_emit, 1); 561 | 562 | /* 563 | * Step 3: Call EmitCopy, and then see if another EmitCopy could 564 | * be our next move. Repeat until we find no match for the 565 | * input immediately after what was consumed by the last EmitCopy call. 566 | * 567 | * If we exit this loop normally then we need to call EmitLiteral next, 568 | * though we don't yet know how big the literal will be. We handle that 569 | * by proceeding to the next iteration of the main loop. We also can exit 570 | * this loop via goto if we get close to exhausting the input. 571 | */ 572 | candidate_bytes = 0; 573 | 574 | do { 575 | /* We have a 4-byte match at ip, and no need to emit any 576 | "literal bytes" prior to ip. */ 577 | base = ip; 578 | matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); 579 | ip += matched; 580 | DCHECK_EQ(0, memcmp(base, candidate, matched)); 581 | op = EmitCopy(op, base - candidate, matched); 582 | /* We could immediately start working at ip now, but to improve 583 | compression we first update table[Hash(ip - 1, ...)]. */ 584 | next_emit = ip; 585 | if (unlikely(ip >= ip_limit)) 586 | goto emit_remainder; 587 | input_bytes = GetEightBytesAt(ip - 1); 588 | prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); 589 | table[prev_hash] = ip - base_ip - 1; 590 | cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); 591 | candidate = base_ip + table[cur_hash]; 592 | candidate_bytes = UNALIGNED_LOAD32(candidate); 593 | table[cur_hash] = ip - base_ip; 594 | } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); 595 | 596 | next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); 597 | ++ip; 598 | goto main_loop; 599 | 600 | emit_remainder: 601 | /* Emit the remaining bytes as a literal */ 602 | if (next_emit < ip_end) 603 | op = EmitLiteral(op, next_emit, ip_end - next_emit, 0); 604 | 605 | return op; 606 | } 607 | #endif /* !simple */ 608 | #if defined(__KERNEL__) && !defined(STATIC) 609 | EXPORT_SYMBOL(csnappy_compress_fragment); 610 | #endif 611 | 612 | uint32_t __attribute__((const)) 613 | csnappy_max_compressed_length(uint32_t source_len) 614 | { 615 | return 32 + source_len + source_len/6; 616 | } 617 | #if defined(__KERNEL__) && !defined(STATIC) 618 | EXPORT_SYMBOL(csnappy_max_compressed_length); 619 | #endif 620 | 621 | void 622 | csnappy_compress( 623 | const char *input, 624 | uint32_t input_length, 625 | char *compressed, 626 | uint32_t *compressed_length, 627 | void *working_memory, 628 | const int workmem_bytes_power_of_two) 629 | { 630 | int workmem_size; 631 | int num_to_read; 632 | uint32_t written = 0; 633 | char *p = encode_varint32(compressed, input_length); 634 | written += (p - compressed); 635 | compressed = p; 636 | while (input_length > 0) { 637 | num_to_read = min(input_length, (uint32_t)kBlockSize); 638 | workmem_size = workmem_bytes_power_of_two; 639 | if (unlikely(num_to_read < kBlockSize)) { 640 | for (workmem_size = 9; 641 | workmem_size < workmem_bytes_power_of_two; 642 | ++workmem_size) { 643 | if ((1 << (workmem_size-1)) >= num_to_read) 644 | break; 645 | } 646 | } 647 | p = csnappy_compress_fragment( 648 | input, num_to_read, compressed, 649 | working_memory, workmem_size); 650 | written += (p - compressed); 651 | compressed = p; 652 | input_length -= num_to_read; 653 | input += num_to_read; 654 | } 655 | *compressed_length = written; 656 | } 657 | #if defined(__KERNEL__) && !defined(STATIC) 658 | EXPORT_SYMBOL(csnappy_compress); 659 | 660 | MODULE_LICENSE("BSD"); 661 | MODULE_DESCRIPTION("Snappy Compressor"); 662 | #endif 663 | -------------------------------------------------------------------------------- /csnappy_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011, Google Inc. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following disclaimer 13 | in the documentation and/or other materials provided with the 14 | distribution. 15 | * Neither the name of Google Inc. nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | File modified for the Linux Kernel by 32 | Zeev Tarantov 33 | 34 | File modified for Sereal by 35 | Steffen Mueller 36 | */ 37 | 38 | #include "csnappy_internal.h" 39 | #ifdef __KERNEL__ 40 | #include 41 | #include 42 | #endif 43 | #include "csnappy.h" 44 | 45 | int 46 | csnappy_get_uncompressed_length( 47 | const char *src, 48 | uint32_t src_len, 49 | uint32_t *result) 50 | { 51 | const char *src_base = src; 52 | uint32_t shift = 0; 53 | uint8_t c; 54 | /* Length is encoded in 1..5 bytes */ 55 | *result = 0; 56 | for (;;) { 57 | if (shift >= 32) 58 | goto err_out; 59 | if (src_len == 0) 60 | goto err_out; 61 | c = *(const uint8_t *)src++; 62 | src_len -= 1; 63 | *result |= (uint32_t)(c & 0x7f) << shift; 64 | if (c < 128) 65 | break; 66 | shift += 7; 67 | } 68 | return src - src_base; 69 | err_out: 70 | return CSNAPPY_E_HEADER_BAD; 71 | } 72 | #if defined(__KERNEL__) && !defined(STATIC) 73 | EXPORT_SYMBOL(csnappy_get_uncompressed_length); 74 | #endif 75 | 76 | #if defined(__arm__) && !defined(ARCH_ARM_HAVE_UNALIGNED) 77 | int csnappy_decompress_noheader( 78 | const char *src_, 79 | uint32_t src_remaining, 80 | char *dst, 81 | uint32_t *dst_len) 82 | { 83 | const uint8_t * src = (const uint8_t *)src_; 84 | const uint8_t * const src_end = src + src_remaining; 85 | char * const dst_base = dst; 86 | char * const dst_end = dst + *dst_len; 87 | while (src < src_end) { 88 | uint32_t opcode = *src++; 89 | uint32_t length = (opcode >> 2) + 1; 90 | const uint8_t *copy_src; 91 | if (likely((opcode & 3) == 0)) { 92 | if (unlikely(length > 60)) { 93 | uint32_t extra_bytes = length - 60; 94 | int shift, max_shift; 95 | if (unlikely(src + extra_bytes > src_end)) 96 | return CSNAPPY_E_DATA_MALFORMED; 97 | length = 0; 98 | for (shift = 0, max_shift = extra_bytes*8; 99 | shift < max_shift; 100 | shift += 8) 101 | length |= *src++ << shift; 102 | ++length; 103 | } 104 | if (unlikely(src + length > src_end)) 105 | return CSNAPPY_E_DATA_MALFORMED; 106 | copy_src = src; 107 | src += length; 108 | } else { 109 | uint32_t offset; 110 | if (likely((opcode & 3) == 1)) { 111 | if (unlikely(src + 1 > src_end)) 112 | return CSNAPPY_E_DATA_MALFORMED; 113 | length = ((length - 1) & 7) + 4; 114 | offset = ((opcode >> 5) << 8) + *src++; 115 | } else if (likely((opcode & 3) == 2)) { 116 | if (unlikely(src + 2 > src_end)) 117 | return CSNAPPY_E_DATA_MALFORMED; 118 | offset = src[0] | (src[1] << 8); 119 | src += 2; 120 | } else { 121 | if (unlikely(src + 4 > src_end)) 122 | return CSNAPPY_E_DATA_MALFORMED; 123 | offset = src[0] | (src[1] << 8) | 124 | (src[2] << 16) | (src[3] << 24); 125 | src += 4; 126 | } 127 | if (unlikely(!offset || (offset > dst - dst_base))) 128 | return CSNAPPY_E_DATA_MALFORMED; 129 | copy_src = (const uint8_t *)dst - offset; 130 | } 131 | if (unlikely(dst + length > dst_end)) 132 | return CSNAPPY_E_OUTPUT_OVERRUN; 133 | do *dst++ = *copy_src++; while (--length); 134 | } 135 | *dst_len = dst - dst_base; 136 | return CSNAPPY_E_OK; 137 | } 138 | #else /* !(arm with no unaligned access) */ 139 | /* 140 | * Data stored per entry in lookup table: 141 | * Range Bits-used Description 142 | * ------------------------------------ 143 | * 1..64 0..7 Literal/copy length encoded in opcode byte 144 | * 0..7 8..10 Copy offset encoded in opcode byte / 256 145 | * 0..4 11..13 Extra bytes after opcode 146 | * 147 | * We use eight bits for the length even though 7 would have sufficed 148 | * because of efficiency reasons: 149 | * (1) Extracting a byte is faster than a bit-field 150 | * (2) It properly aligns copy offset so we do not need a <<8 151 | */ 152 | static const uint16_t char_table[256] = { 153 | 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 154 | 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 155 | 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, 156 | 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, 157 | 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, 158 | 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, 159 | 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, 160 | 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, 161 | 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, 162 | 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, 163 | 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, 164 | 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, 165 | 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, 166 | 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, 167 | 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, 168 | 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, 169 | 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, 170 | 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, 171 | 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, 172 | 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, 173 | 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, 174 | 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, 175 | 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, 176 | 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, 177 | 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, 178 | 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, 179 | 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, 180 | 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, 181 | 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 182 | 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 183 | 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 184 | 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 185 | }; 186 | 187 | /* 188 | * Copy "len" bytes from "src" to "op", one byte at a time. Used for 189 | * handling COPY operations where the input and output regions may 190 | * overlap. For example, suppose: 191 | * src == "ab" 192 | * op == src + 2 193 | * len == 20 194 | * After IncrementalCopy(src, op, len), the result will have 195 | * eleven copies of "ab" 196 | * ababababababababababab 197 | * Note that this does not match the semantics of either memcpy() 198 | * or memmove(). 199 | */ 200 | static INLINE void IncrementalCopy(const char *src, char *op, int len) 201 | { 202 | DCHECK_GT(len, 0); 203 | do { 204 | *op++ = *src++; 205 | } while (--len > 0); 206 | } 207 | 208 | /* 209 | * Equivalent to IncrementalCopy except that it can write up to ten extra 210 | * bytes after the end of the copy, and that it is faster. 211 | * 212 | * The main part of this loop is a simple copy of eight bytes at a time until 213 | * we've copied (at least) the requested amount of bytes. However, if op and 214 | * src are less than eight bytes apart (indicating a repeating pattern of 215 | * length < 8), we first need to expand the pattern in order to get the correct 216 | * results. For instance, if the buffer looks like this, with the eight-byte 217 | * and patterns marked as intervals: 218 | * 219 | * abxxxxxxxxxxxx 220 | * [------] src 221 | * [------] op 222 | * 223 | * a single eight-byte copy from to will repeat the pattern once, 224 | * after which we can move two bytes without moving : 225 | * 226 | * ababxxxxxxxxxx 227 | * [------] src 228 | * [------] op 229 | * 230 | * and repeat the exercise until the two no longer overlap. 231 | * 232 | * This allows us to do very well in the special case of one single byte 233 | * repeated many times, without taking a big hit for more general cases. 234 | * 235 | * The worst case of extra writing past the end of the match occurs when 236 | * op - src == 1 and len == 1; the last copy will read from byte positions 237 | * [0..7] and write to [4..11], whereas it was only supposed to write to 238 | * position 1. Thus, ten excess bytes. 239 | */ 240 | static const int kMaxIncrementCopyOverflow = 10; 241 | static INLINE void IncrementalCopyFastPath(const char *src, char *op, int len) 242 | { 243 | while (op - src < 8) { 244 | UnalignedCopy64(src, op); 245 | len -= op - src; 246 | op += op - src; 247 | } 248 | while (len > 0) { 249 | UnalignedCopy64(src, op); 250 | src += 8; 251 | op += 8; 252 | len -= 8; 253 | } 254 | } 255 | 256 | 257 | /* A type that writes to a flat array. */ 258 | struct SnappyArrayWriter { 259 | char *base; 260 | char *op; 261 | char *op_limit; 262 | }; 263 | 264 | static INLINE int 265 | SAW__AppendFastPath(struct SnappyArrayWriter *this, 266 | const char *ip, uint32_t len) 267 | { 268 | char *op = this->op; 269 | const uint32_t space_left = this->op_limit - op; 270 | if (likely(space_left >= 16)) { 271 | UnalignedCopy64(ip, op); 272 | UnalignedCopy64(ip + 8, op + 8); 273 | } else { 274 | if (unlikely(space_left < len)) 275 | return CSNAPPY_E_OUTPUT_OVERRUN; 276 | memcpy(op, ip, len); 277 | } 278 | this->op = op + len; 279 | return CSNAPPY_E_OK; 280 | } 281 | 282 | static INLINE int 283 | SAW__Append(struct SnappyArrayWriter *this, 284 | const char *ip, uint32_t len) 285 | { 286 | char *op = this->op; 287 | const uint32_t space_left = this->op_limit - op; 288 | if (unlikely(space_left < len)) 289 | return CSNAPPY_E_OUTPUT_OVERRUN; 290 | memcpy(op, ip, len); 291 | this->op = op + len; 292 | return CSNAPPY_E_OK; 293 | } 294 | 295 | static INLINE int 296 | SAW__AppendFromSelf(struct SnappyArrayWriter *this, 297 | uint32_t offset, uint32_t len) 298 | { 299 | char *op = this->op; 300 | const uint32_t space_left = this->op_limit - op; 301 | /* -1u catches offset==0 */ 302 | if (op - this->base <= offset - 1u) 303 | return CSNAPPY_E_DATA_MALFORMED; 304 | /* Fast path, used for the majority (70-80%) of dynamic invocations. */ 305 | if (len <= 16 && offset >= 8 && space_left >= 16) { 306 | UnalignedCopy64(op - offset, op); 307 | UnalignedCopy64(op - offset + 8, op + 8); 308 | } else if (space_left >= (len + kMaxIncrementCopyOverflow)) { 309 | IncrementalCopyFastPath(op - offset, op, len); 310 | } else { 311 | if (space_left < len) 312 | return CSNAPPY_E_OUTPUT_OVERRUN; 313 | IncrementalCopy(op - offset, op, len); 314 | } 315 | this->op = op + len; 316 | return CSNAPPY_E_OK; 317 | } 318 | 319 | int 320 | csnappy_decompress_noheader( 321 | const char *src, 322 | uint32_t src_remaining, 323 | char *dst, 324 | uint32_t *dst_len) 325 | { 326 | struct SnappyArrayWriter writer; 327 | const char *end_minus5 = src + src_remaining - 5; 328 | uint32_t length, trailer, opword, extra_bytes; 329 | int ret, available; 330 | uint8_t opcode; 331 | char scratch[5]; 332 | writer.op = writer.base = dst; 333 | writer.op_limit = writer.op + *dst_len; 334 | #define LOOP_COND() \ 335 | if (unlikely(src >= end_minus5)) { \ 336 | available = end_minus5 + 5 - src; \ 337 | if (unlikely(available <= 0)) \ 338 | goto out; \ 339 | memmove(scratch, src, available); \ 340 | src = scratch; \ 341 | end_minus5 = scratch + available - 5; \ 342 | } 343 | 344 | LOOP_COND(); 345 | for (;;) { 346 | opcode = *(const uint8_t *)src++; 347 | if (opcode & 0x3) { 348 | opword = char_table[opcode]; 349 | extra_bytes = opword >> 11; 350 | trailer = get_unaligned_le(src, extra_bytes); 351 | length = opword & 0xff; 352 | src += extra_bytes; 353 | trailer += opword & 0x700; 354 | ret = SAW__AppendFromSelf(&writer, trailer, length); 355 | if (ret < 0) 356 | return ret; 357 | LOOP_COND(); 358 | } else { 359 | length = (opcode >> 2) + 1; 360 | available = end_minus5 + 5 - src; 361 | if (length <= 16 && available >= 16) { 362 | if ((ret = SAW__AppendFastPath(&writer, src, length)) < 0) 363 | return ret; 364 | src += length; 365 | LOOP_COND(); 366 | continue; 367 | } 368 | if (unlikely(length > 60)) { 369 | extra_bytes = length - 60; 370 | length = get_unaligned_le(src, extra_bytes) + 1; 371 | src += extra_bytes; 372 | available = end_minus5 + 5 - src; 373 | } 374 | if (unlikely(available < (int32_t)length)) 375 | return CSNAPPY_E_DATA_MALFORMED; 376 | ret = SAW__Append(&writer, src, length); 377 | if (ret < 0) 378 | return ret; 379 | src += length; 380 | LOOP_COND(); 381 | } 382 | } 383 | #undef LOOP_COND 384 | out: 385 | *dst_len = writer.op - writer.base; 386 | return CSNAPPY_E_OK; 387 | } 388 | #endif /* optimized for unaligned arch */ 389 | 390 | #if defined(__KERNEL__) && !defined(STATIC) 391 | EXPORT_SYMBOL(csnappy_decompress_noheader); 392 | #endif 393 | 394 | int 395 | csnappy_decompress( 396 | const char *src, 397 | uint32_t src_len, 398 | char *dst, 399 | uint32_t dst_len) 400 | { 401 | int n; 402 | uint32_t olen = 0; 403 | /* Read uncompressed length from the front of the compressed input */ 404 | n = csnappy_get_uncompressed_length(src, src_len, &olen); 405 | if (unlikely(n < CSNAPPY_E_OK)) 406 | return n; 407 | /* Protect against possible DoS attack */ 408 | if (unlikely(olen > dst_len)) 409 | return CSNAPPY_E_OUTPUT_INSUF; 410 | return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen); 411 | } 412 | #if defined(__KERNEL__) && !defined(STATIC) 413 | EXPORT_SYMBOL(csnappy_decompress); 414 | 415 | MODULE_LICENSE("BSD"); 416 | MODULE_DESCRIPTION("Snappy Decompressor"); 417 | #endif 418 | -------------------------------------------------------------------------------- /csnappy_internal.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | Various stubs for the open-source version of Snappy. 31 | 32 | File modified for the Linux Kernel by 33 | Zeev Tarantov 34 | 35 | File modified for Sereal by 36 | Steffen Mueller 37 | */ 38 | 39 | #ifndef CSNAPPY_INTERNAL_H_ 40 | #define CSNAPPY_INTERNAL_H_ 41 | 42 | #include "csnappy_compat.h" 43 | 44 | #ifndef __KERNEL__ 45 | #include "csnappy_internal_userspace.h" 46 | #include 47 | #else 48 | 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | 55 | #if (defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)) || \ 56 | (!defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)) 57 | #error either __LITTLE_ENDIAN or __BIG_ENDIAN must be defined 58 | #endif 59 | #if defined(__LITTLE_ENDIAN) 60 | #define __BYTE_ORDER __LITTLE_ENDIAN 61 | #else 62 | #define __BYTE_ORDER __BIG_ENDIAN 63 | #endif 64 | 65 | #ifdef DEBUG 66 | #define DCHECK(cond) if (!(cond)) \ 67 | printk(KERN_DEBUG "assert failed @ %s:%i\n", \ 68 | __FILE__, __LINE__) 69 | #else 70 | #define DCHECK(cond) 71 | #endif 72 | 73 | #define UNALIGNED_LOAD16(_p) get_unaligned((const uint16_t *)(_p)) 74 | #define UNALIGNED_LOAD32(_p) get_unaligned((const uint32_t *)(_p)) 75 | #define UNALIGNED_LOAD64(_p) get_unaligned((const uint64_t *)(_p)) 76 | #define UNALIGNED_STORE16(_p, _val) put_unaligned((_val), (uint16_t *)(_p)) 77 | #define UNALIGNED_STORE32(_p, _val) put_unaligned((_val), (uint32_t *)(_p)) 78 | #define UNALIGNED_STORE64(_p, _val) put_unaligned((_val), (uint64_t *)(_p)) 79 | 80 | #define FindLSBSetNonZero(n) __builtin_ctz(n) 81 | #define FindLSBSetNonZero64(n) __builtin_ctzll(n) 82 | 83 | #endif /* __KERNEL__ */ 84 | 85 | #if (!defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)) || ! defined(__BYTE_ORDER) 86 | # error either __LITTLE_ENDIAN or __BIG_ENDIAN, plus __BYTE_ORDER must be defined 87 | #endif 88 | 89 | #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || defined(__ARMV6__) || \ 90 | defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) 91 | # define ARCH_ARM_HAVE_UNALIGNED 92 | #endif 93 | 94 | 95 | static INLINE void UnalignedCopy64(const void *src, void *dst) { 96 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(ARCH_ARM_HAVE_UNALIGNED) || defined(__aarch64__) 97 | if ((sizeof(void *) == 8) || (sizeof(long) == 8)) { 98 | UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); 99 | } else { 100 | /* This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64 101 | on some platforms, in particular ARM. */ 102 | const uint8_t *src_bytep = (const uint8_t *)src; 103 | uint8_t *dst_bytep = (uint8_t *)dst; 104 | 105 | UNALIGNED_STORE32(dst_bytep, UNALIGNED_LOAD32(src_bytep)); 106 | UNALIGNED_STORE32(dst_bytep + 4, UNALIGNED_LOAD32(src_bytep + 4)); 107 | } 108 | #else 109 | const uint8_t *src_bytep = (const uint8_t *)src; 110 | uint8_t *dst_bytep = (uint8_t *)dst; 111 | dst_bytep[0] = src_bytep[0]; 112 | dst_bytep[1] = src_bytep[1]; 113 | dst_bytep[2] = src_bytep[2]; 114 | dst_bytep[3] = src_bytep[3]; 115 | dst_bytep[4] = src_bytep[4]; 116 | dst_bytep[5] = src_bytep[5]; 117 | dst_bytep[6] = src_bytep[6]; 118 | dst_bytep[7] = src_bytep[7]; 119 | #endif 120 | } 121 | 122 | #if defined(__arm__) 123 | #if defined(ARCH_ARM_HAVE_UNALIGNED) 124 | static INLINE uint32_t get_unaligned_le(const void *p, uint32_t n) 125 | { 126 | uint32_t wordmask = (1U << (8 * n)) - 1; 127 | return get_unaligned_le32(p) & wordmask; 128 | } 129 | #else 130 | extern uint32_t get_unaligned_le_armv5(const void *p, uint32_t n); 131 | #define get_unaligned_le get_unaligned_le_armv5 132 | #endif 133 | #else 134 | static INLINE uint32_t get_unaligned_le(const void *p, uint32_t n) 135 | { 136 | /* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */ 137 | static const uint32_t wordmask[] = { 138 | 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu 139 | }; 140 | return get_unaligned_le32(p) & wordmask[n]; 141 | } 142 | #endif 143 | 144 | #define DCHECK_EQ(a, b) DCHECK(((a) == (b))) 145 | #define DCHECK_NE(a, b) DCHECK(((a) != (b))) 146 | #define DCHECK_GT(a, b) DCHECK(((a) > (b))) 147 | #define DCHECK_GE(a, b) DCHECK(((a) >= (b))) 148 | #define DCHECK_LT(a, b) DCHECK(((a) < (b))) 149 | #define DCHECK_LE(a, b) DCHECK(((a) <= (b))) 150 | 151 | enum { 152 | LITERAL = 0, 153 | COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */ 154 | COPY_2_BYTE_OFFSET = 2, 155 | COPY_4_BYTE_OFFSET = 3 156 | }; 157 | 158 | #endif /* CSNAPPY_INTERNAL_H_ */ 159 | -------------------------------------------------------------------------------- /csnappy_internal_userspace.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | Various stubs for the open-source version of Snappy. 31 | 32 | File modified by 33 | Zeev Tarantov 34 | 35 | File modified for Sereal by 36 | Steffen Mueller 37 | Yves Orton 38 | 39 | */ 40 | 41 | #ifndef CSNAPPY_INTERNAL_USERSPACE_H_ 42 | #define CSNAPPY_INTERNAL_USERSPACE_H_ 43 | 44 | /*note the original version of this file checked for MS version, but MS will *never* support 45 | * anything but C89, so the version check is bogus. */ 46 | #if defined(_MSC_VER) 47 | typedef unsigned __int8 uint8_t; 48 | typedef unsigned __int16 uint16_t; 49 | typedef unsigned __int32 uint32_t; 50 | typedef unsigned __int64 uint64_t; 51 | typedef __int32 int32_t; /* Sereal specific change, see csnappy_decompress.c(271) : error C2065: 'int32_t' : undeclared identifier */ 52 | /* the following define is Sereal specific, as MS C89 compilers do not know about "inline" */ 53 | #define inline __inline 54 | #ifdef _M_X64 55 | # define __x86_64__ 56 | # define __x86_64 57 | # define __amd64__ 58 | # define __amd64 59 | #endif 60 | #ifdef _M_IX86 61 | # define __i386__ 62 | # define __i386 63 | # define i386 64 | # define _X86_ 65 | #endif 66 | #ifdef _M_IA64 67 | # define __ia64__ 68 | # define __ia64 69 | # define __IA64__ 70 | # define __itanium__ 71 | #endif 72 | 73 | #else 74 | 75 | #if defined(__SUNPRO_C) || defined(_AIX) 76 | # include 77 | #else 78 | # include 79 | #endif 80 | 81 | #endif 82 | 83 | #ifdef _GNU_SOURCE 84 | #define min(x, y) (__extension__ ({ \ 85 | typeof(x) _min1 = (x); \ 86 | typeof(y) _min2 = (y); \ 87 | (void) (&_min1 == &_min2); \ 88 | _min1 < _min2 ? _min1 : _min2; })) 89 | #else 90 | #define min(x, y) (((x) < (y)) ? (x) : (y)) 91 | #endif 92 | 93 | /* Static prediction hints. */ 94 | #ifndef __GNUC__ 95 | #define __builtin_expect(a,b) a 96 | #endif 97 | #define likely(x) __builtin_expect(!!(x), 1) 98 | #define unlikely(x) __builtin_expect(!!(x), 0) 99 | 100 | 101 | #ifdef DEBUG 102 | #include 103 | #define DCHECK(cond) assert(cond) 104 | #else 105 | #define DCHECK(cond) 106 | #endif 107 | 108 | #include "csnappy_compat.h" 109 | 110 | /* 111 | Uses code from http://code.google.com/p/exfat/source/browse/trunk/libexfat/byteorder.h 112 | with 3-clause BSD license instead of GPL, with permission from: 113 | Andrew Nayenko 114 | Albert Lee 115 | */ 116 | #if defined(_MSC_VER) 117 | 118 | #include 119 | #define bswap_16(x) _byteswap_ushort(x) 120 | #define bswap_32(x) _byteswap_ulong(x) 121 | #define bswap_64(x) _byteswap_uint64(x) 122 | #define __BIG_ENDIAN 4321 123 | #define __LITTLE_ENDIAN 1234 124 | #define __BYTE_ORDER LITTLE_ENDIAN 125 | 126 | #elif defined(_AIX) 127 | 128 | #include 129 | #define __LITTLE_ENDIAN LITTLE_ENDIAN 130 | #define __BIG_ENDIAN BIG_ENDIAN 131 | #define __BYTE_ORDER __BIG_ENDIAN 132 | 133 | #elif defined(__APPLE__) 134 | 135 | #include 136 | #include 137 | #define bswap_16(x) OSSwapInt16(x) 138 | #define bswap_32(x) OSSwapInt32(x) 139 | #define bswap_64(x) OSSwapInt64(x) 140 | #define __BYTE_ORDER BYTE_ORDER 141 | #define __LITTLE_ENDIAN LITTLE_ENDIAN 142 | #define __BIG_ENDIAN BIG_ENDIAN 143 | 144 | #elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) 145 | 146 | #include 147 | #define bswap_16(x) bswap16(x) 148 | #define bswap_32(x) bswap32(x) 149 | #define bswap_64(x) bswap64(x) 150 | #define __BYTE_ORDER _BYTE_ORDER 151 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN 152 | #define __BIG_ENDIAN _BIG_ENDIAN 153 | 154 | #elif defined(__OpenBSD__) 155 | 156 | #include 157 | #define bswap_16(x) swap16(x) 158 | #define bswap_32(x) swap32(x) 159 | #define bswap_64(x) swap64(x) 160 | #define __BYTE_ORDER _BYTE_ORDER 161 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN 162 | #define __BIG_ENDIAN _BIG_ENDIAN 163 | 164 | #elif defined(__MINGW32__) 165 | #include 166 | #define __BYTE_ORDER BYTE_ORDER 167 | #define __LITTLE_ENDIAN LITTLE_ENDIAN 168 | #define __BIG_ENDIAN BIG_ENDIAN 169 | 170 | 171 | #elif defined(__sun) 172 | 173 | #include 174 | #define bswap_16(x) BSWAP_16(x) 175 | #define bswap_32(x) BSWAP_32(x) 176 | #define bswap_64(x) BSWAP_64(x) 177 | #define __LITTLE_ENDIAN 1234 178 | #define __BIG_ENDIAN 4321 179 | #ifdef _LITTLE_ENDIAN 180 | #define __BYTE_ORDER __LITTLE_ENDIAN 181 | #else 182 | #define __BYTE_ORDER __BIG_ENDIAN 183 | #endif 184 | 185 | #elif defined(__hpux) 186 | 187 | #ifdef __LP64__ 188 | #define __LITTLE_ENDIAN 12345678 189 | #define __BIG_ENDIAN 87654321 190 | #define int64_t long 191 | #else 192 | #define __LITTLE_ENDIAN 1234 193 | #define __BIG_ENDIAN 4321 194 | #define int64_t long long 195 | #endif 196 | 197 | #define __BYTE_ORDER __BIG_ENDIAN /* HP-UX always */ 198 | #define int32_t int 199 | #define int16_t short 200 | 201 | #define __SNAPPY_STRICT_ALIGN 202 | 203 | #elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) 204 | 205 | #ifndef __BIG_ENDIAN 206 | #define __BIG_ENDIAN 87654321 207 | #endif 208 | #ifndef __LITTLE_ENDIAN 209 | #define __LITTLE_ENDIAN 12345678 210 | #endif 211 | #ifndef __BYTE_ORDER 212 | #define __BYTE_ORDER __BIG_ENDIAN 213 | #endif 214 | 215 | #define __SNAPPY_STRICT_ALIGN 216 | 217 | #elif defined(__GNUC__) || defined(__ANDROID__) || defined(__CYGWIN__) 218 | 219 | #include 220 | #include 221 | 222 | #endif 223 | 224 | #ifndef bswap_16 225 | #define bswap_16(x) \ 226 | (((uint16_t)(x) & 0xFF00) >> 8 | \ 227 | ((uint16_t)(x) & 0x00FF) << 8) 228 | #endif 229 | 230 | #ifndef bswap_32 231 | #define bswap_32(x) \ 232 | (((uint32_t)(x) & 0xFF000000) >> 24 | \ 233 | ((uint32_t)(x) & 0x00FF0000) >> 8 | \ 234 | ((uint32_t)(x) & 0x0000FF00) << 8 | \ 235 | ((uint32_t)(x) & 0x000000FF) << 24) 236 | #endif 237 | 238 | #ifndef bswap_64 239 | #define bswap_64(x) \ 240 | (((uint64_t)(x) & 0xFF00000000000000) >> 56 | \ 241 | ((uint64_t)(x) & 0x00FF000000000000) >> 40 | \ 242 | ((uint64_t)(x) & 0x0000FF0000000000) >> 24 | \ 243 | ((uint64_t)(x) & 0x000000FF00000000) >> 8 | \ 244 | ((uint64_t)(x) & 0x00000000FF000000) << 8 | \ 245 | ((uint64_t)(x) & 0x0000000000FF0000) << 24 | \ 246 | ((uint64_t)(x) & 0x000000000000FF00) << 40 | \ 247 | ((uint64_t)(x) & 0x00000000000000FF) << 56) 248 | #endif 249 | 250 | 251 | /* Potentially unaligned loads and stores. */ 252 | 253 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) 254 | #if defined(__GNUC__) 255 | typedef uint16_t my_uint16_t __attribute__((aligned(1))); 256 | typedef uint32_t my_uint32_t __attribute__((aligned(1))); 257 | typedef uint64_t my_uint64_t __attribute__((aligned(1))); 258 | #else 259 | typedef uint16_t my_uint16_t; 260 | typedef uint32_t my_uint32_t; 261 | typedef uint64_t my_uint64_t; 262 | #endif 263 | 264 | #define UNALIGNED_LOAD16(_p) (*(const my_uint16_t*)(_p)) 265 | #define UNALIGNED_LOAD32(_p) (*(const my_uint32_t*)(_p)) 266 | #define UNALIGNED_LOAD64(_p) (*(const my_uint64_t*)(_p)) 267 | 268 | #define UNALIGNED_STORE16(_p, _val) (*(my_uint16_t*)(_p) = (_val)) 269 | #define UNALIGNED_STORE32(_p, _val) (*(my_uint32_t*)(_p) = (_val)) 270 | #define UNALIGNED_STORE64(_p, _val) (*(my_uint64_t*)(_p) = (_val)) 271 | 272 | #elif defined(__arm__) && \ 273 | !defined(__ARM_ARCH_4__) && \ 274 | !defined(__ARM_ARCH_4T__) && /* http://wiki.debian.org/ArmEabiPort#Choice_of_minimum_CPU */ \ 275 | !defined(__MARM_ARMV4__) && \ 276 | !defined(_ARMV4I_) && \ 277 | !defined(__ARM_ARCH_5__) && \ 278 | !defined(__ARM_ARCH_5T__) && \ 279 | !defined(__ARM_ARCH_5E__) && \ 280 | !defined(__ARM_ARCH_5TE__) && \ 281 | !defined(__ARM_ARCH_5TEJ__) && \ 282 | !defined(__MARM_ARMV5__) 283 | 284 | #define UNALIGNED_LOAD16(_p) (*(const uint16_t*)(_p)) 285 | #define UNALIGNED_LOAD32(_p) (*(const uint32_t*)(_p)) 286 | #define UNALIGNED_STORE16(_p, _val) (*(uint16_t*)(_p) = (_val)) 287 | #define UNALIGNED_STORE32(_p, _val) (*(uint32_t*)(_p) = (_val)) 288 | 289 | #pragma pack(1) 290 | struct una_u64 { uint64_t x; }; 291 | #pragma pack() 292 | 293 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p) 294 | { 295 | const struct una_u64 *ptr = (const struct una_u64 *)p; 296 | return ptr->x; 297 | } 298 | 299 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v) 300 | { 301 | struct una_u64 *ptr = (struct una_u64 *)p; 302 | ptr->x = v; 303 | } 304 | 305 | #elif defined(__SNAPPY_STRICT_ALIGN) || defined(__sparc) || defined(__sparc__) /* strict architectures */ 306 | 307 | /* For these platforms, there really are no unaligned loads/stores. 308 | * Read/write everything as uint8_t. Smart compilers might recognize 309 | * these patterns and generate something smart. */ 310 | 311 | /* Possible future enhancement: see if the ptr is evenly divisible 312 | * (as uintNN_t) by 2/4/8, and if so, do the cast-as-uintNN_t-ptr- 313 | * and-deref-as-uintNN_t. Balancing act: adding the branch 314 | * will slow things down, while reading/writing aligned might speed 315 | * things up. */ 316 | 317 | #if __BYTE_ORDER == __BIG_ENDIAN 318 | 319 | static INLINE uint16_t UNALIGNED_LOAD16(const void *p) 320 | { 321 | return 322 | (uint16_t)(((uint8_t*)p)[0]) << 8 | 323 | (uint16_t)(((uint8_t*)p)[1]); 324 | } 325 | 326 | static INLINE uint32_t UNALIGNED_LOAD32(const void *p) 327 | { 328 | return 329 | (uint32_t)(((uint8_t*)p)[0]) << 24 | 330 | (uint32_t)(((uint8_t*)p)[1]) << 16 | 331 | (uint32_t)(((uint8_t*)p)[2]) << 8 | 332 | (uint32_t)(((uint8_t*)p)[3]); 333 | } 334 | 335 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p) 336 | { 337 | return 338 | (uint64_t)((uint8_t*)p)[0] << 56 | 339 | (uint64_t)((uint8_t*)p)[1] << 48 | 340 | (uint64_t)((uint8_t*)p)[2] << 40 | 341 | (uint64_t)((uint8_t*)p)[3] << 32 | 342 | (uint64_t)((uint8_t*)p)[4] << 24 | 343 | (uint64_t)((uint8_t*)p)[5] << 16 | 344 | (uint64_t)((uint8_t*)p)[5] << 8 | 345 | (uint64_t)((uint8_t*)p)[7]; 346 | } 347 | 348 | static INLINE void UNALIGNED_STORE16(void *p, uint16_t v) 349 | { 350 | uint8_t* s = (uint8_t*)p; 351 | s[0] = (v & 0xFF00) >> 8; 352 | s[1] = (v & 0x00FF); 353 | } 354 | 355 | static INLINE void UNALIGNED_STORE32(void *p, uint32_t v) 356 | { 357 | uint8_t* s = (uint8_t*)p; 358 | s[0] = (v & 0xFF000000) >> 24; 359 | s[1] = (v & 0x00FF0000) >> 16; 360 | s[2] = (v & 0x0000FF00) >> 8; 361 | s[3] = (v & 0x000000FF); 362 | } 363 | 364 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v) 365 | { 366 | uint8_t* s = (uint8_t*)p; 367 | s[0] = (v & 0xFF00000000000000) >> 56; 368 | s[1] = (v & 0x00FF000000000000) >> 48; 369 | s[2] = (v & 0x0000FF0000000000) >> 40; 370 | s[3] = (v & 0x000000FF00000000) >> 32; 371 | s[4] = (v & 0x00000000FF000000) >> 24; 372 | s[5] = (v & 0x0000000000FF0000) >> 16; 373 | s[6] = (v & 0x000000000000FF00) >> 8; 374 | s[7] = (v & 0x00000000000000FF); 375 | } 376 | 377 | #endif /* #if __BYTE_ORDER == __BIG_ENDIAN */ 378 | 379 | #if __BYTE_ORDER == __LITTLE_ENDIAN 380 | 381 | static INLINE uint16_t UNALIGNED_LOAD16(const void *p) 382 | { 383 | return 384 | (uint16_t)(((uint8_t*)p)[1]) << 8) | 385 | (uint16_t)(((uint8_t*)p)[0]); 386 | } 387 | 388 | static INLINE uint32_t UNALIGNED_LOAD32(const void *p) 389 | { 390 | return 391 | (uint32_t)(((uint8_t*)p)[3]) << 24 | 392 | (uint32_t)(((uint8_t*)p)[2]) << 16 | 393 | (uint32_t)(((uint8_t*)p)[1]) << 8 | 394 | (uint32_t)(((uint8_t*)p)[0]); 395 | } 396 | 397 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p) 398 | { 399 | return 400 | (uint64_t)(((uint8_t*)p)[7]) << 56 | 401 | (uint64_t)(((uint8_t*)p)[6]) << 48 | 402 | (uint64_t)(((uint8_t*)p)[5]) << 40 | 403 | (uint64_t)(((uint8_t*)p)[4]) << 32 | 404 | (uint64_t)(((uint8_t*)p)[3]) << 24 | 405 | (uint64_t)(((uint8_t*)p)[2]) << 16 | 406 | (uint64_t)(((uint8_t*)p)[1]) << 8 | 407 | (uint64_t)(((uint8_t*)p)[0]); 408 | } 409 | 410 | static INLINE void UNALIGNED_STORE16(void *p, uint16_t v) 411 | { 412 | uint8_t* s = (uint8_t*)p; 413 | s[1] = (v & 0xFF00) >> 8; 414 | s[0] = (v & 0x00FF); 415 | } 416 | 417 | static INLINE void UNALIGNED_STORE32(void *p, uint32_t v) 418 | { 419 | uint8_t* s = (uint8_t*)p; 420 | s[3] = (v & 0xFF000000) >> 24; 421 | s[2] = (v & 0x00FF0000) >> 16; 422 | s[1] = (v & 0x0000FF00) >> 8; 423 | s[0] = (v & 0x000000FF); 424 | } 425 | 426 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v) 427 | { 428 | uint8_t* s = (uint8_t*)p; 429 | s[7] = (v & 0xFF00000000000000) >> 56; 430 | s[6] = (v & 0x00FF000000000000) >> 48; 431 | s[5] = (v & 0x0000FF0000000000) >> 40; 432 | s[4] = (v & 0x000000FF00000000) >> 32; 433 | s[3] = (v & 0x00000000FF000000) >> 24; 434 | s[2] = (v & 0x0000000000FF0000) >> 16; 435 | s[1] = (v & 0x000000000000FF00) >> 8; 436 | s[0] = (v & 0x00000000000000FF); 437 | } 438 | 439 | #endif /* #if __BYTE_ORDER == __LITTLE_ENDIAN */ 440 | 441 | #else /* !(x86 || powerpc) && !(arm && !(old arm architectures)) */ 442 | 443 | /* pragma pack is available in gcc (though originally apparently by 444 | * Microsoft) and in some other compilers (probably inspired by either 445 | * the two big ones), but there is no good portable way to detect 446 | * whether it's supported. The bad news: on platforms where it's not 447 | * supported (unsupported pragmas are ignored) but which do require 448 | * strict alignment, the below pragma pack trickery will fail. 449 | * Therefore this option is the last and the default, and the platforms 450 | * requiring strict alignment are detected earlier. */ 451 | 452 | #pragma pack(1) 453 | struct una_u16 { uint16_t x; }; 454 | struct una_u32 { uint32_t x; }; 455 | struct una_u64 { uint64_t x; }; 456 | #pragma pack() 457 | 458 | static INLINE uint16_t UNALIGNED_LOAD16(const void *p) 459 | { 460 | const struct una_u16 *ptr = (const struct una_u16 *)p; 461 | return ptr->x; 462 | } 463 | 464 | static INLINE uint32_t UNALIGNED_LOAD32(const void *p) 465 | { 466 | const struct una_u32 *ptr = (const struct una_u32 *)p; 467 | return ptr->x; 468 | } 469 | 470 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p) 471 | { 472 | const struct una_u64 *ptr = (const struct una_u64 *)p; 473 | return ptr->x; 474 | } 475 | 476 | static INLINE void UNALIGNED_STORE16(void *p, uint16_t v) 477 | { 478 | struct una_u16 *ptr = (struct una_u16 *)p; 479 | ptr->x = v; 480 | } 481 | 482 | static INLINE void UNALIGNED_STORE32(void *p, uint32_t v) 483 | { 484 | struct una_u32 *ptr = (struct una_u32 *)p; 485 | ptr->x = v; 486 | } 487 | 488 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v) 489 | { 490 | struct una_u64 *ptr = (struct una_u64 *)p; 491 | ptr->x = v; 492 | } 493 | 494 | #endif /* defining UNALIGNED_LOADNN and UNALIGNED_STORENN */ 495 | 496 | 497 | #if __BYTE_ORDER == __LITTLE_ENDIAN 498 | #define get_unaligned_le32(p) UNALIGNED_LOAD32(p) 499 | #define put_unaligned_le16(v, p) UNALIGNED_STORE16(p, v) 500 | #elif __BYTE_ORDER == __BIG_ENDIAN 501 | static INLINE uint32_t get_unaligned_le32(const void *p) 502 | { 503 | return bswap_32(UNALIGNED_LOAD32(p)); 504 | } 505 | static INLINE void put_unaligned_le16(uint16_t val, void *p) 506 | { 507 | UNALIGNED_STORE16(p, bswap_16(val)); 508 | } 509 | #else 510 | static INLINE uint32_t get_unaligned_le32(const void *p) 511 | { 512 | const uint8_t *b = (const uint8_t *)p; 513 | return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); 514 | } 515 | static INLINE void put_unaligned_le16(uint16_t val, void *p) 516 | { 517 | uint8_t *b = (uint8_t *)p; 518 | b[0] = val & 255; 519 | b[1] = val >> 8; 520 | } 521 | #endif 522 | 523 | 524 | #if defined(HAVE_BUILTIN_CTZ) 525 | 526 | static INLINE int FindLSBSetNonZero(uint32_t n) 527 | { 528 | return __builtin_ctz(n); 529 | } 530 | 531 | static INLINE int FindLSBSetNonZero64(uint64_t n) 532 | { 533 | return __builtin_ctzll(n); 534 | } 535 | 536 | #else /* Portable versions. */ 537 | 538 | static INLINE int FindLSBSetNonZero(uint32_t n) 539 | { 540 | int rc = 31, i, shift; 541 | uint32_t x; 542 | for (i = 4, shift = 1 << 4; i >= 0; --i) { 543 | x = n << shift; 544 | if (x != 0) { 545 | n = x; 546 | rc -= shift; 547 | } 548 | shift >>= 1; 549 | } 550 | return rc; 551 | } 552 | 553 | /* FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). */ 554 | static INLINE int FindLSBSetNonZero64(uint64_t n) 555 | { 556 | const uint32_t bottombits = (uint32_t)n; 557 | if (bottombits == 0) { 558 | /* Bottom bits are zero, so scan in top bits */ 559 | return 32 + FindLSBSetNonZero((uint32_t)(n >> 32)); 560 | } else { 561 | return FindLSBSetNonZero(bottombits); 562 | } 563 | } 564 | 565 | #endif /* End portable versions. */ 566 | 567 | #endif /* CSNAPPY_INTERNAL_USERSPACE_H_ */ 568 | -------------------------------------------------------------------------------- /debugfs_input.txt: -------------------------------------------------------------------------------- 1 | set_super_value raid_stride 0 2 | set_super_value raid_stripe_width 0 3 | -------------------------------------------------------------------------------- /kernel_3_2_10.patch: -------------------------------------------------------------------------------- 1 | diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig 2 | index 25cdff3..ace8395 100644 3 | --- a/drivers/staging/Kconfig 4 | +++ b/drivers/staging/Kconfig 5 | @@ -88,6 +88,8 @@ source "drivers/staging/sep/Kconfig" 6 | 7 | source "drivers/staging/iio/Kconfig" 8 | 9 | +source "drivers/staging/snappy/Kconfig" 10 | + 11 | source "drivers/staging/zram/Kconfig" 12 | 13 | source "drivers/staging/zcache/Kconfig" 14 | diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile 15 | index a25f3f2..2c0862b 100644 16 | --- a/drivers/staging/Makefile 17 | +++ b/drivers/staging/Makefile 18 | @@ -57,3 +57,5 @@ obj-$(CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4) += ste_rmi4/ 19 | obj-$(CONFIG_DRM_PSB) += gma500/ 20 | obj-$(CONFIG_INTEL_MEI) += mei/ 21 | obj-$(CONFIG_MFD_NVEC) += nvec/ 22 | +obj-$(CONFIG_SNAPPY_COMPRESS) += snappy/ 23 | +obj-$(CONFIG_SNAPPY_DECOMPRESS) += snappy/ 24 | diff --git a/drivers/staging/snappy/Kconfig b/drivers/staging/snappy/Kconfig 25 | new file mode 100644 26 | index 0000000..24f6908 27 | --- /dev/null 28 | +++ b/drivers/staging/snappy/Kconfig 29 | @@ -0,0 +1,5 @@ 30 | +config SNAPPY_COMPRESS 31 | + tristate "Google Snappy Compression" 32 | + 33 | +config SNAPPY_DECOMPRESS 34 | + tristate "Google Snappy Decompression" 35 | diff --git a/drivers/staging/snappy/Makefile b/drivers/staging/snappy/Makefile 36 | new file mode 100644 37 | index 0000000..f5be21d 38 | --- /dev/null 39 | +++ b/drivers/staging/snappy/Makefile 40 | @@ -0,0 +1,7 @@ 41 | +ccflags-y := -std=gnu99 42 | + 43 | +snappy_compress-objs := csnappy_compress.o 44 | +snappy_decompress-objs := csnappy_decompress.o 45 | + 46 | +obj-$(CONFIG_SNAPPY_COMPRESS) += csnappy_compress.o 47 | +obj-$(CONFIG_SNAPPY_DECOMPRESS) += csnappy_decompress.o 48 | diff --git a/drivers/staging/snappy/csnappy.h b/drivers/staging/snappy/csnappy.h 49 | new file mode 100644 50 | index 0000000..97a3e17 51 | --- /dev/null 52 | +++ b/drivers/staging/snappy/csnappy.h 53 | @@ -0,0 +1,129 @@ 54 | +#ifndef __CSNAPPY_H__ 55 | +#define __CSNAPPY_H__ 56 | +/* 57 | +File modified for the Linux Kernel by 58 | +Zeev Tarantov 59 | +*/ 60 | +#ifdef __cplusplus 61 | +extern "C" { 62 | +#endif 63 | + 64 | +#define CSNAPPY_VERSION 4 65 | + 66 | +#define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 15 67 | +#define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO) 68 | + 69 | +#ifndef __GNUC__ 70 | +#define __attribute__(x) /*NOTHING*/ 71 | +#endif 72 | + 73 | +/* 74 | + * Returns the maximal size of the compressed representation of 75 | + * input data that is "source_len" bytes in length; 76 | + */ 77 | +uint32_t 78 | +csnappy_max_compressed_length(uint32_t source_len) __attribute__((const)); 79 | + 80 | +/* 81 | + * Flat array compression that does not emit the "uncompressed length" 82 | + * prefix. Compresses "input" array to the "output" array. 83 | + * 84 | + * REQUIRES: "input" is at most 32KiB long. 85 | + * REQUIRES: "output" points to an array of memory that is at least 86 | + * "csnappy_max_compressed_length(input_length)" in size. 87 | + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. 88 | + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. 89 | + * 90 | + * Returns an "end" pointer into "output" buffer. 91 | + * "end - output" is the compressed size of "input". 92 | + */ 93 | +char* 94 | +csnappy_compress_fragment( 95 | + const char *input, 96 | + const uint32_t input_length, 97 | + char *output, 98 | + void *working_memory, 99 | + const int workmem_bytes_power_of_two); 100 | + 101 | +/* 102 | + * REQUIRES: "compressed" must point to an area of memory that is at 103 | + * least "csnappy_max_compressed_length(input_length)" bytes in length. 104 | + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. 105 | + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. 106 | + * 107 | + * Takes the data stored in "input[0..input_length]" and stores 108 | + * it in the array pointed to by "compressed". 109 | + * 110 | + * "*out_compressed_length" is set to the length of the compressed output. 111 | + */ 112 | +void 113 | +csnappy_compress( 114 | + const char *input, 115 | + uint32_t input_length, 116 | + char *compressed, 117 | + uint32_t *out_compressed_length, 118 | + void *working_memory, 119 | + const int workmem_bytes_power_of_two); 120 | + 121 | +/* 122 | + * Reads header of compressed data to get stored length of uncompressed data. 123 | + * REQUIRES: start points to compressed data. 124 | + * REQUIRES: n is length of available compressed data. 125 | + * 126 | + * Returns SNAPPY_E_HEADER_BAD on error. 127 | + * Returns number of bytes read from input on success. 128 | + * Stores decoded length into *result. 129 | + */ 130 | +int 131 | +csnappy_get_uncompressed_length( 132 | + const char *start, 133 | + uint32_t n, 134 | + uint32_t *result); 135 | + 136 | +/* 137 | + * Safely decompresses all data from array "src" of length "src_len" containing 138 | + * entire compressed stream (with header) into array "dst" of size "dst_len". 139 | + * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...). 140 | + * 141 | + * Iff successful, returns CSNAPPY_E_OK. 142 | + * If recorded length in header is greater than dst_len, returns 143 | + * CSNAPPY_E_OUTPUT_INSUF. 144 | + * If compressed data is malformed, does not write more than dst_len into dst. 145 | + */ 146 | +int 147 | +csnappy_decompress( 148 | + const char *src, 149 | + uint32_t src_len, 150 | + char *dst, 151 | + uint32_t dst_len); 152 | + 153 | +/* 154 | + * Safely decompresses stream src_len bytes long read from src to dst. 155 | + * Amount of available space at dst must be provided in *dst_len by caller. 156 | + * If compressed stream needs more space, it will not overflow and return 157 | + * CSNAPPY_E_OUTPUT_OVERRUN. 158 | + * On success, sets *dst_len to actal number of bytes decompressed. 159 | + * Iff successful, returns CSNAPPY_E_OK. 160 | + */ 161 | +int 162 | +csnappy_decompress_noheader( 163 | + const char *src, 164 | + uint32_t src_len, 165 | + char *dst, 166 | + uint32_t *dst_len); 167 | + 168 | +/* 169 | + * Return values (< 0 = Error) 170 | + */ 171 | +#define CSNAPPY_E_OK 0 172 | +#define CSNAPPY_E_HEADER_BAD (-1) 173 | +#define CSNAPPY_E_OUTPUT_INSUF (-2) 174 | +#define CSNAPPY_E_OUTPUT_OVERRUN (-3) 175 | +#define CSNAPPY_E_INPUT_NOT_CONSUMED (-4) 176 | +#define CSNAPPY_E_DATA_MALFORMED (-5) 177 | + 178 | +#ifdef __cplusplus 179 | +} 180 | +#endif 181 | + 182 | +#endif 183 | diff --git a/drivers/staging/snappy/csnappy_compress.c b/drivers/staging/snappy/csnappy_compress.c 184 | new file mode 100644 185 | index 0000000..b093727 186 | --- /dev/null 187 | +++ b/drivers/staging/snappy/csnappy_compress.c 188 | @@ -0,0 +1,527 @@ 189 | +/* 190 | +Copyright 2011, Google Inc. 191 | +All rights reserved. 192 | + 193 | +Redistribution and use in source and binary forms, with or without 194 | +modification, are permitted provided that the following conditions are 195 | +met: 196 | + 197 | + * Redistributions of source code must retain the above copyright 198 | +notice, this list of conditions and the following disclaimer. 199 | + * Redistributions in binary form must reproduce the above 200 | +copyright notice, this list of conditions and the following disclaimer 201 | +in the documentation and/or other materials provided with the 202 | +distribution. 203 | + * Neither the name of Google Inc. nor the names of its 204 | +contributors may be used to endorse or promote products derived from 205 | +this software without specific prior written permission. 206 | + 207 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 208 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 209 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 210 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 211 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 212 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 213 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 214 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 215 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 216 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 217 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 218 | + 219 | +File modified for the Linux Kernel by 220 | +Zeev Tarantov 221 | +*/ 222 | + 223 | +#include "csnappy_internal.h" 224 | +#ifdef __KERNEL__ 225 | +#include 226 | +#include 227 | +#endif 228 | +#include "csnappy.h" 229 | + 230 | + 231 | +static inline char* 232 | +encode_varint32(char *sptr, uint32_t v) 233 | +{ 234 | + uint8_t* ptr = (uint8_t *)sptr; 235 | + static const int B = 128; 236 | + if (v < (1<<7)) { 237 | + *(ptr++) = v; 238 | + } else if (v < (1<<14)) { 239 | + *(ptr++) = v | B; 240 | + *(ptr++) = v>>7; 241 | + } else if (v < (1<<21)) { 242 | + *(ptr++) = v | B; 243 | + *(ptr++) = (v>>7) | B; 244 | + *(ptr++) = v>>14; 245 | + } else if (v < (1<<28)) { 246 | + *(ptr++) = v | B; 247 | + *(ptr++) = (v>>7) | B; 248 | + *(ptr++) = (v>>14) | B; 249 | + *(ptr++) = v>>21; 250 | + } else { 251 | + *(ptr++) = v | B; 252 | + *(ptr++) = (v>>7) | B; 253 | + *(ptr++) = (v>>14) | B; 254 | + *(ptr++) = (v>>21) | B; 255 | + *(ptr++) = v>>28; 256 | + } 257 | + return (char *)ptr; 258 | +} 259 | + 260 | + 261 | +/* 262 | + * Any hash function will produce a valid compressed bitstream, but a good 263 | + * hash function reduces the number of collisions and thus yields better 264 | + * compression for compressible input, and more speed for incompressible 265 | + * input. Of course, it doesn't hurt if the hash function is reasonably fast 266 | + * either, as it gets called a lot. 267 | + */ 268 | +static inline uint32_t HashBytes(uint32_t bytes, int shift) 269 | +{ 270 | + uint32_t kMul = 0x1e35a7bd; 271 | + return (bytes * kMul) >> shift; 272 | +} 273 | +static inline uint32_t Hash(const char *p, int shift) 274 | +{ 275 | + return HashBytes(UNALIGNED_LOAD32(p), shift); 276 | +} 277 | + 278 | + 279 | +/* 280 | + * *** DO NOT CHANGE THE VALUE OF kBlockSize *** 281 | + 282 | + * New Compression code chops up the input into blocks of at most 283 | + * the following size. This ensures that back-references in the 284 | + * output never cross kBlockSize block boundaries. This can be 285 | + * helpful in implementing blocked decompression. However the 286 | + * decompression code should not rely on this guarantee since older 287 | + * compression code may not obey it. 288 | + */ 289 | +#define kBlockLog 15 290 | +#define kBlockSize (1 << kBlockLog) 291 | + 292 | + 293 | +/* 294 | + * Return the largest n such that 295 | + * 296 | + * s1[0,n-1] == s2[0,n-1] 297 | + * and n <= (s2_limit - s2). 298 | + * 299 | + * Does not read *s2_limit or beyond. 300 | + * Does not read *(s1 + (s2_limit - s2)) or beyond. 301 | + * Requires that s2_limit >= s2. 302 | + * 303 | + * Separate implementation for x86_64, for speed. Uses the fact that 304 | + * x86_64 is little endian. 305 | + */ 306 | +#if defined(__x86_64__) 307 | +static inline int 308 | +FindMatchLength(const char *s1, const char *s2, const char *s2_limit) 309 | +{ 310 | + uint64_t x; 311 | + int matched, matching_bits; 312 | + DCHECK_GE(s2_limit, s2); 313 | + matched = 0; 314 | + /* 315 | + * Find out how long the match is. We loop over the data 64 bits at a 316 | + * time until we find a 64-bit block that doesn't match; then we find 317 | + * the first non-matching bit and use that to calculate the total 318 | + * length of the match. 319 | + */ 320 | + while (likely(s2 <= s2_limit - 8)) { 321 | + if (unlikely(UNALIGNED_LOAD64(s1 + matched) == 322 | + UNALIGNED_LOAD64(s2))) { 323 | + s2 += 8; 324 | + matched += 8; 325 | + } else { 326 | + /* 327 | + * On current (mid-2008) Opteron models there is a 3% 328 | + * more efficient code sequence to find the first 329 | + * non-matching byte. However, what follows is ~10% 330 | + * better on Intel Core 2 and newer, and we expect AMD's 331 | + * bsf instruction to improve. 332 | + */ 333 | + x = UNALIGNED_LOAD64(s1 + matched) ^ 334 | + UNALIGNED_LOAD64(s2); 335 | + matching_bits = FindLSBSetNonZero64(x); 336 | + matched += matching_bits >> 3; 337 | + return matched; 338 | + } 339 | + } 340 | + while (likely(s2 < s2_limit)) { 341 | + if (likely(s1[matched] == *s2)) { 342 | + ++s2; 343 | + ++matched; 344 | + } else { 345 | + return matched; 346 | + } 347 | + } 348 | + return matched; 349 | +} 350 | +#else /* !defined(__x86_64__) */ 351 | +static inline int 352 | +FindMatchLength(const char *s1, const char *s2, const char *s2_limit) 353 | +{ 354 | + /* Implementation based on the x86-64 version, above. */ 355 | + int matched = 0; 356 | + DCHECK_GE(s2_limit, s2); 357 | + 358 | + while (s2 <= s2_limit - 4 && 359 | + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { 360 | + s2 += 4; 361 | + matched += 4; 362 | + } 363 | +#if __BYTE_ORDER == __LITTLE_ENDIAN 364 | + if (s2 <= s2_limit - 4) { 365 | + uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^ 366 | + UNALIGNED_LOAD32(s2); 367 | + int matching_bits = FindLSBSetNonZero(x); 368 | + matched += matching_bits >> 3; 369 | + } else { 370 | + while ((s2 < s2_limit) && (s1[matched] == *s2)) { 371 | + ++s2; 372 | + ++matched; 373 | + } 374 | + } 375 | +#else 376 | + while ((s2 < s2_limit) && (s1[matched] == *s2)) { 377 | + ++s2; 378 | + ++matched; 379 | + } 380 | +#endif 381 | + return matched; 382 | +} 383 | +#endif /* !defined(__x86_64__) */ 384 | + 385 | + 386 | +static inline char* 387 | +EmitLiteral(char *op, const char *literal, int len, int allow_fast_path) 388 | +{ 389 | + int n = len - 1; /* Zero-length literals are disallowed */ 390 | + if (n < 60) { 391 | + /* Fits in tag byte */ 392 | + *op++ = LITERAL | (n << 2); 393 | + /* 394 | + The vast majority of copies are below 16 bytes, for which a 395 | + call to memcpy is overkill. This fast path can sometimes 396 | + copy up to 15 bytes too much, but that is okay in the 397 | + main loop, since we have a bit to go on for both sides: 398 | + - The input will always have kInputMarginBytes = 15 extra 399 | + available bytes, as long as we're in the main loop, and 400 | + if not, allow_fast_path = false. 401 | + - The output will always have 32 spare bytes (see 402 | + snappy_max_compressed_length). 403 | + */ 404 | + if (allow_fast_path && len <= 16) { 405 | + UnalignedCopy64(literal, op); 406 | + UnalignedCopy64(literal + 8, op + 8); 407 | + return op + len; 408 | + } 409 | + } else { 410 | + /* Encode in upcoming bytes */ 411 | + char *base = op; 412 | + int count = 0; 413 | + op++; 414 | + while (n > 0) { 415 | + *op++ = n & 0xff; 416 | + n >>= 8; 417 | + count++; 418 | + } 419 | + DCHECK_GE(count, 1); 420 | + DCHECK_LE(count, 4); 421 | + *base = LITERAL | ((59+count) << 2); 422 | + } 423 | + memcpy(op, literal, len); 424 | + return op + len; 425 | +} 426 | + 427 | +static inline char* 428 | +EmitCopyLessThan64(char *op, int offset, int len) 429 | +{ 430 | + DCHECK_LE(len, 64); 431 | + DCHECK_GE(len, 4); 432 | + DCHECK_LT(offset, 65536); 433 | + 434 | + if ((len < 12) && (offset < 2048)) { 435 | + int len_minus_4 = len - 4; 436 | + DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */ 437 | + *op++ = COPY_1_BYTE_OFFSET | 438 | + ((len_minus_4) << 2) | 439 | + ((offset >> 8) << 5); 440 | + *op++ = offset & 0xff; 441 | + } else { 442 | + *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2); 443 | + put_unaligned_le16(offset, op); 444 | + op += 2; 445 | + } 446 | + return op; 447 | +} 448 | + 449 | +static inline char* 450 | +EmitCopy(char *op, int offset, int len) 451 | +{ 452 | + /* Emit 64 byte copies but make sure to keep at least four bytes 453 | + * reserved */ 454 | + while (len >= 68) { 455 | + op = EmitCopyLessThan64(op, offset, 64); 456 | + len -= 64; 457 | + } 458 | + 459 | + /* Emit an extra 60 byte copy if have too much data to fit in one 460 | + * copy */ 461 | + if (len > 64) { 462 | + op = EmitCopyLessThan64(op, offset, 60); 463 | + len -= 60; 464 | + } 465 | + 466 | + /* Emit remainder */ 467 | + op = EmitCopyLessThan64(op, offset, len); 468 | + return op; 469 | +} 470 | + 471 | + 472 | +/* 473 | +For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will 474 | +equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have 475 | +empirically found that overlapping loads such as 476 | + UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) 477 | +are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. 478 | + 479 | +We have different versions for 64- and 32-bit; ideally we would avoid the 480 | +two functions and just inline the UNALIGNED_LOAD64 call into 481 | +GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever 482 | +enough to avoid loading the value multiple times then. For 64-bit, the load 483 | +is done when GetEightBytesAt() is called, whereas for 32-bit, the load is 484 | +done at GetUint32AtOffset() time. 485 | +*/ 486 | + 487 | +#if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8) 488 | + 489 | +typedef uint64_t EightBytesReference; 490 | + 491 | +static inline EightBytesReference GetEightBytesAt(const char* ptr) { 492 | + return UNALIGNED_LOAD64(ptr); 493 | +} 494 | + 495 | +static inline uint32_t GetUint32AtOffset(uint64_t v, int offset) { 496 | + DCHECK_GE(offset, 0); 497 | + DCHECK_LE(offset, 4); 498 | +#ifdef __LITTLE_ENDIAN 499 | + return v >> (8 * offset); 500 | +#else 501 | + return v >> (32 - 8 * offset); 502 | +#endif 503 | +} 504 | + 505 | +#else /* !ARCH_K8 */ 506 | + 507 | +typedef const char* EightBytesReference; 508 | + 509 | +static inline EightBytesReference GetEightBytesAt(const char* ptr) { 510 | + return ptr; 511 | +} 512 | + 513 | +static inline uint32_t GetUint32AtOffset(const char* v, int offset) { 514 | + DCHECK_GE(offset, 0); 515 | + DCHECK_LE(offset, 4); 516 | + return UNALIGNED_LOAD32(v + offset); 517 | +} 518 | + 519 | +#endif /* !ARCH_K8 */ 520 | + 521 | + 522 | +#define kInputMarginBytes 15 523 | +char* 524 | +csnappy_compress_fragment( 525 | + const char *input, 526 | + const uint32_t input_size, 527 | + char *op, 528 | + void *working_memory, 529 | + const int workmem_bytes_power_of_two) 530 | +{ 531 | + const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip, 532 | + *candidate, *base; 533 | + uint16_t *table = (uint16_t *)working_memory; 534 | + EightBytesReference input_bytes; 535 | + uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes; 536 | + int shift, matched; 537 | + 538 | + DCHECK_GE(workmem_bytes_power_of_two, 9); 539 | + DCHECK_LE(workmem_bytes_power_of_two, 15); 540 | + /* Table of 2^X bytes, need (X-1) bits to address table of uint16_t. 541 | + * How many bits of 32bit hash function result are discarded? */ 542 | + shift = 33 - workmem_bytes_power_of_two; 543 | + /* "ip" is the input pointer, and "op" is the output pointer. */ 544 | + ip = input; 545 | + DCHECK_LE(input_size, kBlockSize); 546 | + ip_end = input + input_size; 547 | + base_ip = ip; 548 | + /* Bytes in [next_emit, ip) will be emitted as literal bytes. Or 549 | + [next_emit, ip_end) after the main loop. */ 550 | + next_emit = ip; 551 | + 552 | + if (unlikely(input_size < kInputMarginBytes)) 553 | + goto emit_remainder; 554 | + 555 | + memset(working_memory, 0, 1 << workmem_bytes_power_of_two); 556 | + 557 | + ip_limit = input + input_size - kInputMarginBytes; 558 | + next_hash = Hash(++ip, shift); 559 | + 560 | +main_loop: 561 | + DCHECK_LT(next_emit, ip); 562 | + /* 563 | + * The body of this loop calls EmitLiteral once and then EmitCopy one or 564 | + * more times. (The exception is that when we're close to exhausting 565 | + * the input we goto emit_remainder.) 566 | + * 567 | + * In the first iteration of this loop we're just starting, so 568 | + * there's nothing to copy, so calling EmitLiteral once is 569 | + * necessary. And we only start a new iteration when the 570 | + * current iteration has determined that a call to EmitLiteral will 571 | + * precede the next call to EmitCopy (if any). 572 | + * 573 | + * Step 1: Scan forward in the input looking for a 4-byte-long match. 574 | + * If we get close to exhausting the input then goto emit_remainder. 575 | + * 576 | + * Heuristic match skipping: If 32 bytes are scanned with no matches 577 | + * found, start looking only at every other byte. If 32 more bytes are 578 | + * scanned, look at every third byte, etc.. When a match is found, 579 | + * immediately go back to looking at every byte. This is a small loss 580 | + * (~5% performance, ~0.1% density) for compressible data due to more 581 | + * bookkeeping, but for non-compressible data (such as JPEG) it's a huge 582 | + * win since the compressor quickly "realizes" the data is incompressible 583 | + * and doesn't bother looking for matches everywhere. 584 | + * 585 | + * The "skip" variable keeps track of how many bytes there are since the 586 | + * last match; dividing it by 32 (ie. right-shifting by five) gives the 587 | + * number of bytes to move ahead for each iteration. 588 | + */ 589 | + skip = 32; 590 | + 591 | + next_ip = ip; 592 | + do { 593 | + ip = next_ip; 594 | + hash = next_hash; 595 | + DCHECK_EQ(hash, Hash(ip, shift)); 596 | + next_ip = ip + (skip++ >> 5); 597 | + if (unlikely(next_ip > ip_limit)) 598 | + goto emit_remainder; 599 | + next_hash = Hash(next_ip, shift); 600 | + candidate = base_ip + table[hash]; 601 | + DCHECK_GE(candidate, base_ip); 602 | + DCHECK_LT(candidate, ip); 603 | + 604 | + table[hash] = ip - base_ip; 605 | + } while (likely(UNALIGNED_LOAD32(ip) != 606 | + UNALIGNED_LOAD32(candidate))); 607 | + 608 | + /* 609 | + * Step 2: A 4-byte match has been found. We'll later see if more 610 | + * than 4 bytes match. But, prior to the match, input 611 | + * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." 612 | + */ 613 | + DCHECK_LE(next_emit + 16, ip_end); 614 | + op = EmitLiteral(op, next_emit, ip - next_emit, 1); 615 | + 616 | + /* 617 | + * Step 3: Call EmitCopy, and then see if another EmitCopy could 618 | + * be our next move. Repeat until we find no match for the 619 | + * input immediately after what was consumed by the last EmitCopy call. 620 | + * 621 | + * If we exit this loop normally then we need to call EmitLiteral next, 622 | + * though we don't yet know how big the literal will be. We handle that 623 | + * by proceeding to the next iteration of the main loop. We also can exit 624 | + * this loop via goto if we get close to exhausting the input. 625 | + */ 626 | + candidate_bytes = 0; 627 | + 628 | + do { 629 | + /* We have a 4-byte match at ip, and no need to emit any 630 | + "literal bytes" prior to ip. */ 631 | + base = ip; 632 | + matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); 633 | + ip += matched; 634 | + DCHECK_EQ(0, memcmp(base, candidate, matched)); 635 | + op = EmitCopy(op, base - candidate, matched); 636 | + /* We could immediately start working at ip now, but to improve 637 | + compression we first update table[Hash(ip - 1, ...)]. */ 638 | + next_emit = ip; 639 | + if (unlikely(ip >= ip_limit)) 640 | + goto emit_remainder; 641 | + input_bytes = GetEightBytesAt(ip - 1); 642 | + prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); 643 | + table[prev_hash] = ip - base_ip - 1; 644 | + cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); 645 | + candidate = base_ip + table[cur_hash]; 646 | + candidate_bytes = UNALIGNED_LOAD32(candidate); 647 | + table[cur_hash] = ip - base_ip; 648 | + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); 649 | + 650 | + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); 651 | + ++ip; 652 | + goto main_loop; 653 | + 654 | +emit_remainder: 655 | + /* Emit the remaining bytes as a literal */ 656 | + if (next_emit < ip_end) 657 | + op = EmitLiteral(op, next_emit, ip_end - next_emit, 0); 658 | + 659 | + return op; 660 | +} 661 | +#if defined(__KERNEL__) && !defined(STATIC) 662 | +EXPORT_SYMBOL(csnappy_compress_fragment); 663 | +#endif 664 | + 665 | +uint32_t __attribute__((const)) 666 | +csnappy_max_compressed_length(uint32_t source_len) 667 | +{ 668 | + return 32 + source_len + source_len/6; 669 | +} 670 | +#if defined(__KERNEL__) && !defined(STATIC) 671 | +EXPORT_SYMBOL(csnappy_max_compressed_length); 672 | +#endif 673 | + 674 | +void 675 | +csnappy_compress( 676 | + const char *input, 677 | + uint32_t input_length, 678 | + char *compressed, 679 | + uint32_t *compressed_length, 680 | + void *working_memory, 681 | + const int workmem_bytes_power_of_two) 682 | +{ 683 | + int workmem_size; 684 | + int num_to_read; 685 | + uint32_t written = 0; 686 | + char *p = encode_varint32(compressed, input_length); 687 | + written += (p - compressed); 688 | + compressed = p; 689 | + while (input_length > 0) { 690 | + num_to_read = min(input_length, (uint32_t)kBlockSize); 691 | + workmem_size = workmem_bytes_power_of_two; 692 | + if (num_to_read < kBlockSize) { 693 | + for (workmem_size = 9; 694 | + workmem_size < workmem_bytes_power_of_two; 695 | + ++workmem_size) { 696 | + if ((1 << (workmem_size-1)) >= num_to_read) 697 | + break; 698 | + } 699 | + } 700 | + p = csnappy_compress_fragment( 701 | + input, num_to_read, compressed, 702 | + working_memory, workmem_size); 703 | + written += (p - compressed); 704 | + compressed = p; 705 | + input_length -= num_to_read; 706 | + input += num_to_read; 707 | + } 708 | + *compressed_length = written; 709 | +} 710 | +#if defined(__KERNEL__) && !defined(STATIC) 711 | +EXPORT_SYMBOL(csnappy_compress); 712 | + 713 | +MODULE_LICENSE("BSD"); 714 | +MODULE_DESCRIPTION("Snappy Compressor"); 715 | +#endif 716 | diff --git a/drivers/staging/snappy/csnappy_decompress.c b/drivers/staging/snappy/csnappy_decompress.c 717 | new file mode 100644 718 | index 0000000..572ce43 719 | --- /dev/null 720 | +++ b/drivers/staging/snappy/csnappy_decompress.c 721 | @@ -0,0 +1,413 @@ 722 | +/* 723 | +Copyright 2011, Google Inc. 724 | +All rights reserved. 725 | + 726 | +Redistribution and use in source and binary forms, with or without 727 | +modification, are permitted provided that the following conditions are 728 | +met: 729 | + 730 | + * Redistributions of source code must retain the above copyright 731 | +notice, this list of conditions and the following disclaimer. 732 | + * Redistributions in binary form must reproduce the above 733 | +copyright notice, this list of conditions and the following disclaimer 734 | +in the documentation and/or other materials provided with the 735 | +distribution. 736 | + * Neither the name of Google Inc. nor the names of its 737 | +contributors may be used to endorse or promote products derived from 738 | +this software without specific prior written permission. 739 | + 740 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 741 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 742 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 743 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 744 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 745 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 746 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 747 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 748 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 749 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 750 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 751 | + 752 | +File modified for the Linux Kernel by 753 | +Zeev Tarantov 754 | +*/ 755 | + 756 | +#include "csnappy_internal.h" 757 | +#ifdef __KERNEL__ 758 | +#include 759 | +#include 760 | +#endif 761 | +#include "csnappy.h" 762 | + 763 | +int 764 | +csnappy_get_uncompressed_length( 765 | + const char *src, 766 | + uint32_t src_len, 767 | + uint32_t *result) 768 | +{ 769 | + const char *src_base = src; 770 | + uint32_t shift = 0; 771 | + uint8_t c; 772 | + /* Length is encoded in 1..5 bytes */ 773 | + *result = 0; 774 | + for (;;) { 775 | + if (shift >= 32) 776 | + goto err_out; 777 | + if (src_len == 0) 778 | + goto err_out; 779 | + c = *(const uint8_t *)src++; 780 | + src_len -= 1; 781 | + *result |= (uint32_t)(c & 0x7f) << shift; 782 | + if (c < 128) 783 | + break; 784 | + shift += 7; 785 | + } 786 | + return src - src_base; 787 | +err_out: 788 | + return CSNAPPY_E_HEADER_BAD; 789 | +} 790 | +#if defined(__KERNEL__) && !defined(STATIC) 791 | +EXPORT_SYMBOL(csnappy_get_uncompressed_length); 792 | +#endif 793 | + 794 | +#if defined(__arm__) && !(ARCH_ARM_HAVE_UNALIGNED) 795 | +int csnappy_decompress_noheader( 796 | + const char *src_, 797 | + uint32_t src_remaining, 798 | + char *dst, 799 | + uint32_t *dst_len) 800 | +{ 801 | + const uint8_t * src = (const uint8_t *)src_; 802 | + const uint8_t * const src_end = src + src_remaining; 803 | + char * const dst_base = dst; 804 | + char * const dst_end = dst + *dst_len; 805 | + while (src < src_end) { 806 | + uint32_t opcode = *src++; 807 | + uint32_t length = (opcode >> 2) + 1; 808 | + const uint8_t *copy_src; 809 | + if (likely((opcode & 3) == 0)) { 810 | + if (unlikely(length > 60)) { 811 | + uint32_t extra_bytes = length - 60; 812 | + if (unlikely(src + extra_bytes > src_end)) 813 | + return CSNAPPY_E_DATA_MALFORMED; 814 | + length = 0; 815 | + for (int shift = 0, max_shift = extra_bytes*8; 816 | + shift < max_shift; 817 | + shift += 8) 818 | + length |= *src++ << shift; 819 | + ++length; 820 | + } 821 | + if (unlikely(src + length > src_end)) 822 | + return CSNAPPY_E_DATA_MALFORMED; 823 | + copy_src = src; 824 | + src += length; 825 | + } else { 826 | + uint32_t offset; 827 | + if (likely((opcode & 3) == 1)) { 828 | + if (unlikely(src + 1 > src_end)) 829 | + return CSNAPPY_E_DATA_MALFORMED; 830 | + length = ((length - 1) & 7) + 4; 831 | + offset = ((opcode >> 5) << 8) + *src++; 832 | + } else if (likely((opcode & 3) == 2)) { 833 | + if (unlikely(src + 2 > src_end)) 834 | + return CSNAPPY_E_DATA_MALFORMED; 835 | + offset = src[0] | (src[1] << 8); 836 | + src += 2; 837 | + } else { 838 | + if (unlikely(src + 4 > src_end)) 839 | + return CSNAPPY_E_DATA_MALFORMED; 840 | + offset = src[0] | (src[1] << 8) | 841 | + (src[2] << 16) | (src[3] << 24); 842 | + src += 4; 843 | + } 844 | + if (unlikely(!offset || (offset > dst - dst_base))) 845 | + return CSNAPPY_E_DATA_MALFORMED; 846 | + copy_src = (const uint8_t *)dst - offset; 847 | + } 848 | + if (unlikely(dst + length > dst_end)) 849 | + return CSNAPPY_E_OUTPUT_OVERRUN; 850 | + do *dst++ = *copy_src++; while (--length); 851 | + } 852 | + *dst_len = dst - dst_base; 853 | + return CSNAPPY_E_OK; 854 | +} 855 | +#else /* !(arm with no unaligned access) */ 856 | +/* 857 | + * Data stored per entry in lookup table: 858 | + * Range Bits-used Description 859 | + * ------------------------------------ 860 | + * 1..64 0..7 Literal/copy length encoded in opcode byte 861 | + * 0..7 8..10 Copy offset encoded in opcode byte / 256 862 | + * 0..4 11..13 Extra bytes after opcode 863 | + * 864 | + * We use eight bits for the length even though 7 would have sufficed 865 | + * because of efficiency reasons: 866 | + * (1) Extracting a byte is faster than a bit-field 867 | + * (2) It properly aligns copy offset so we do not need a <<8 868 | + */ 869 | +static const uint16_t char_table[256] = { 870 | + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 871 | + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 872 | + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, 873 | + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, 874 | + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, 875 | + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, 876 | + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, 877 | + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, 878 | + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, 879 | + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, 880 | + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, 881 | + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, 882 | + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, 883 | + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, 884 | + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, 885 | + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, 886 | + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, 887 | + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, 888 | + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, 889 | + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, 890 | + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, 891 | + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, 892 | + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, 893 | + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, 894 | + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, 895 | + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, 896 | + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, 897 | + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, 898 | + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 899 | + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 900 | + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 901 | + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 902 | +}; 903 | + 904 | +/* 905 | + * Copy "len" bytes from "src" to "op", one byte at a time. Used for 906 | + * handling COPY operations where the input and output regions may 907 | + * overlap. For example, suppose: 908 | + * src == "ab" 909 | + * op == src + 2 910 | + * len == 20 911 | + * After IncrementalCopy(src, op, len), the result will have 912 | + * eleven copies of "ab" 913 | + * ababababababababababab 914 | + * Note that this does not match the semantics of either memcpy() 915 | + * or memmove(). 916 | + */ 917 | +static inline void IncrementalCopy(const char *src, char *op, int len) 918 | +{ 919 | + DCHECK_GT(len, 0); 920 | + do { 921 | + *op++ = *src++; 922 | + } while (--len > 0); 923 | +} 924 | + 925 | +/* 926 | + * Equivalent to IncrementalCopy except that it can write up to ten extra 927 | + * bytes after the end of the copy, and that it is faster. 928 | + * 929 | + * The main part of this loop is a simple copy of eight bytes at a time until 930 | + * we've copied (at least) the requested amount of bytes. However, if op and 931 | + * src are less than eight bytes apart (indicating a repeating pattern of 932 | + * length < 8), we first need to expand the pattern in order to get the correct 933 | + * results. For instance, if the buffer looks like this, with the eight-byte 934 | + * and patterns marked as intervals: 935 | + * 936 | + * abxxxxxxxxxxxx 937 | + * [------] src 938 | + * [------] op 939 | + * 940 | + * a single eight-byte copy from to will repeat the pattern once, 941 | + * after which we can move two bytes without moving : 942 | + * 943 | + * ababxxxxxxxxxx 944 | + * [------] src 945 | + * [------] op 946 | + * 947 | + * and repeat the exercise until the two no longer overlap. 948 | + * 949 | + * This allows us to do very well in the special case of one single byte 950 | + * repeated many times, without taking a big hit for more general cases. 951 | + * 952 | + * The worst case of extra writing past the end of the match occurs when 953 | + * op - src == 1 and len == 1; the last copy will read from byte positions 954 | + * [0..7] and write to [4..11], whereas it was only supposed to write to 955 | + * position 1. Thus, ten excess bytes. 956 | + */ 957 | +static const int kMaxIncrementCopyOverflow = 10; 958 | +static inline void IncrementalCopyFastPath(const char *src, char *op, int len) 959 | +{ 960 | + while (op - src < 8) { 961 | + UnalignedCopy64(src, op); 962 | + len -= op - src; 963 | + op += op - src; 964 | + } 965 | + while (len > 0) { 966 | + UnalignedCopy64(src, op); 967 | + src += 8; 968 | + op += 8; 969 | + len -= 8; 970 | + } 971 | +} 972 | + 973 | + 974 | +/* A type that writes to a flat array. */ 975 | +struct SnappyArrayWriter { 976 | + char *base; 977 | + char *op; 978 | + char *op_limit; 979 | +}; 980 | + 981 | +static inline int 982 | +SAW__AppendFastPath(struct SnappyArrayWriter *this, 983 | + const char *ip, uint32_t len) 984 | +{ 985 | + char *op = this->op; 986 | + const int space_left = this->op_limit - op; 987 | + if (likely(space_left >= 16)) { 988 | + UnalignedCopy64(ip, op); 989 | + UnalignedCopy64(ip + 8, op + 8); 990 | + } else { 991 | + if (unlikely(space_left < len)) 992 | + return CSNAPPY_E_OUTPUT_OVERRUN; 993 | + memcpy(op, ip, len); 994 | + } 995 | + this->op = op + len; 996 | + return CSNAPPY_E_OK; 997 | +} 998 | + 999 | +static inline int 1000 | +SAW__Append(struct SnappyArrayWriter *this, 1001 | + const char *ip, uint32_t len) 1002 | +{ 1003 | + char *op = this->op; 1004 | + const int space_left = this->op_limit - op; 1005 | + if (unlikely(space_left < len)) 1006 | + return CSNAPPY_E_OUTPUT_OVERRUN; 1007 | + memcpy(op, ip, len); 1008 | + this->op = op + len; 1009 | + return CSNAPPY_E_OK; 1010 | +} 1011 | + 1012 | +static inline int 1013 | +SAW__AppendFromSelf(struct SnappyArrayWriter *this, 1014 | + uint32_t offset, uint32_t len) 1015 | +{ 1016 | + char *op = this->op; 1017 | + const int space_left = this->op_limit - op; 1018 | + /* -1u catches offset==0 */ 1019 | + if (op - this->base <= offset - 1u) 1020 | + return CSNAPPY_E_DATA_MALFORMED; 1021 | + /* Fast path, used for the majority (70-80%) of dynamic invocations. */ 1022 | + if (len <= 16 && offset >= 8 && space_left >= 16) { 1023 | + UnalignedCopy64(op - offset, op); 1024 | + UnalignedCopy64(op - offset + 8, op + 8); 1025 | + } else if (space_left >= len + kMaxIncrementCopyOverflow) { 1026 | + IncrementalCopyFastPath(op - offset, op, len); 1027 | + } else { 1028 | + if (space_left < len) 1029 | + return CSNAPPY_E_OUTPUT_OVERRUN; 1030 | + IncrementalCopy(op - offset, op, len); 1031 | + } 1032 | + this->op = op + len; 1033 | + return CSNAPPY_E_OK; 1034 | +} 1035 | + 1036 | +int 1037 | +csnappy_decompress_noheader( 1038 | + const char *src, 1039 | + uint32_t src_remaining, 1040 | + char *dst, 1041 | + uint32_t *dst_len) 1042 | +{ 1043 | + struct SnappyArrayWriter writer; 1044 | + const char *end_minus5 = src + src_remaining - 5; 1045 | + uint32_t length, trailer, opword, extra_bytes; 1046 | + int ret, available; 1047 | + uint8_t opcode; 1048 | + char scratch[5]; 1049 | + writer.op = writer.base = dst; 1050 | + writer.op_limit = writer.op + *dst_len; 1051 | + #define LOOP_COND() \ 1052 | + if (unlikely(src >= end_minus5)) { \ 1053 | + available = end_minus5 + 5 - src; \ 1054 | + if (unlikely(available <= 0)) \ 1055 | + goto out; \ 1056 | + memmove(scratch, src, available); \ 1057 | + src = scratch; \ 1058 | + end_minus5 = scratch + available - 5; \ 1059 | + } 1060 | + 1061 | + LOOP_COND(); 1062 | + for (;;) { 1063 | + opcode = *(const uint8_t *)src++; 1064 | + if (opcode & 0x3) { 1065 | + opword = char_table[opcode]; 1066 | + extra_bytes = opword >> 11; 1067 | + trailer = get_unaligned_le(src, extra_bytes); 1068 | + length = opword & 0xff; 1069 | + src += extra_bytes; 1070 | + trailer += opword & 0x700; 1071 | + ret = SAW__AppendFromSelf(&writer, trailer, length); 1072 | + if (ret < 0) 1073 | + return ret; 1074 | + LOOP_COND(); 1075 | + } else { 1076 | + length = (opcode >> 2) + 1; 1077 | + available = end_minus5 + 5 - src; 1078 | + if (length <= 16 && available >= 16) { 1079 | + if ((ret = SAW__AppendFastPath(&writer, src, length)) < 0) 1080 | + return ret; 1081 | + src += length; 1082 | + LOOP_COND(); 1083 | + continue; 1084 | + } 1085 | + if (unlikely(length > 60)) { 1086 | + extra_bytes = length - 60; 1087 | + length = get_unaligned_le(src, extra_bytes) + 1; 1088 | + src += extra_bytes; 1089 | + available = end_minus5 + 5 - src; 1090 | + } 1091 | + if (unlikely(available < length)) 1092 | + return CSNAPPY_E_DATA_MALFORMED; 1093 | + ret = SAW__Append(&writer, src, length); 1094 | + if (ret < 0) 1095 | + return ret; 1096 | + src += length; 1097 | + LOOP_COND(); 1098 | + } 1099 | + } 1100 | +#undef LOOP_COND 1101 | +out: 1102 | + *dst_len = writer.op - writer.base; 1103 | + return CSNAPPY_E_OK; 1104 | +} 1105 | +#endif /* optimized for unaligned arch */ 1106 | + 1107 | +#if defined(__KERNEL__) && !defined(STATIC) 1108 | +EXPORT_SYMBOL(csnappy_decompress_noheader); 1109 | +#endif 1110 | + 1111 | +int 1112 | +csnappy_decompress( 1113 | + const char *src, 1114 | + uint32_t src_len, 1115 | + char *dst, 1116 | + uint32_t dst_len) 1117 | +{ 1118 | + int n; 1119 | + uint32_t olen = 0; 1120 | + /* Read uncompressed length from the front of the compressed input */ 1121 | + n = csnappy_get_uncompressed_length(src, src_len, &olen); 1122 | + if (unlikely(n < CSNAPPY_E_OK)) 1123 | + return n; 1124 | + /* Protect against possible DoS attack */ 1125 | + if (unlikely(olen > dst_len)) 1126 | + return CSNAPPY_E_OUTPUT_INSUF; 1127 | + return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen); 1128 | +} 1129 | +#if defined(__KERNEL__) && !defined(STATIC) 1130 | +EXPORT_SYMBOL(csnappy_decompress); 1131 | + 1132 | +MODULE_LICENSE("BSD"); 1133 | +MODULE_DESCRIPTION("Snappy Decompressor"); 1134 | +#endif 1135 | diff --git a/drivers/staging/snappy/csnappy_internal.h b/drivers/staging/snappy/csnappy_internal.h 1136 | new file mode 100644 1137 | index 0000000..5fc7ba1 1138 | --- /dev/null 1139 | +++ b/drivers/staging/snappy/csnappy_internal.h 1140 | @@ -0,0 +1,147 @@ 1141 | +/* 1142 | +Copyright 2011 Google Inc. All Rights Reserved. 1143 | + 1144 | +Redistribution and use in source and binary forms, with or without 1145 | +modification, are permitted provided that the following conditions are 1146 | +met: 1147 | + 1148 | + * Redistributions of source code must retain the above copyright 1149 | +notice, this list of conditions and the following disclaimer. 1150 | + * Redistributions in binary form must reproduce the above 1151 | +copyright notice, this list of conditions and the following disclaimer 1152 | +in the documentation and/or other materials provided with the 1153 | +distribution. 1154 | + * Neither the name of Google Inc. nor the names of its 1155 | +contributors may be used to endorse or promote products derived from 1156 | +this software without specific prior written permission. 1157 | + 1158 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1159 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1160 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1161 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1162 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1163 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1164 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1165 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1166 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1167 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1168 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1169 | + 1170 | +Various stubs for the open-source version of Snappy. 1171 | + 1172 | +File modified for the Linux Kernel by 1173 | +Zeev Tarantov 1174 | +*/ 1175 | + 1176 | +#ifndef CSNAPPY_INTERNAL_H_ 1177 | +#define CSNAPPY_INTERNAL_H_ 1178 | + 1179 | +#ifndef __KERNEL__ 1180 | +#include "csnappy_internal_userspace.h" 1181 | +#include 1182 | +#else 1183 | + 1184 | +#include 1185 | +#include 1186 | +#include 1187 | +#include 1188 | +#include 1189 | + 1190 | +#if (defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)) || \ 1191 | + (!defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)) 1192 | +#error either __LITTLE_ENDIAN or __BIG_ENDIAN must be defined 1193 | +#endif 1194 | +#if defined(__LITTLE_ENDIAN) 1195 | +#define __BYTE_ORDER __LITTLE_ENDIAN 1196 | +#else 1197 | +#define __BYTE_ORDER __BIG_ENDIAN 1198 | +#endif 1199 | + 1200 | +#ifdef DEBUG 1201 | +#define DCHECK(cond) if (!(cond)) \ 1202 | + printk(KERN_DEBUG "assert failed @ %s:%i\n", \ 1203 | + __FILE__, __LINE__) 1204 | +#else 1205 | +#define DCHECK(cond) 1206 | +#endif 1207 | + 1208 | +#define UNALIGNED_LOAD16(_p) get_unaligned((const uint16_t *)(_p)) 1209 | +#define UNALIGNED_LOAD32(_p) get_unaligned((const uint32_t *)(_p)) 1210 | +#define UNALIGNED_LOAD64(_p) get_unaligned((const uint64_t *)(_p)) 1211 | +#define UNALIGNED_STORE16(_p, _val) put_unaligned((_val), (uint16_t *)(_p)) 1212 | +#define UNALIGNED_STORE32(_p, _val) put_unaligned((_val), (uint32_t *)(_p)) 1213 | +#define UNALIGNED_STORE64(_p, _val) put_unaligned((_val), (uint64_t *)(_p)) 1214 | + 1215 | +#define FindLSBSetNonZero(n) __builtin_ctz(n) 1216 | +#define FindLSBSetNonZero64(n) __builtin_ctzll(n) 1217 | + 1218 | +#endif /* __KERNEL__ */ 1219 | + 1220 | +#define ARCH_ARM_HAVE_UNALIGNED \ 1221 | + defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || defined(__ARMV6__) || \ 1222 | + defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) 1223 | + 1224 | +static inline void UnalignedCopy64(const void *src, void *dst) { 1225 | +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || ARCH_ARM_HAVE_UNALIGNED 1226 | + if ((sizeof(void *) == 8) || (sizeof(long) == 8)) { 1227 | + UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); 1228 | + } else { 1229 | + /* This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64 1230 | + on some platforms, in particular ARM. */ 1231 | + const uint8_t *src_bytep = (const uint8_t *)src; 1232 | + uint8_t *dst_bytep = (uint8_t *)dst; 1233 | + 1234 | + UNALIGNED_STORE32(dst_bytep, UNALIGNED_LOAD32(src_bytep)); 1235 | + UNALIGNED_STORE32(dst_bytep + 4, UNALIGNED_LOAD32(src_bytep + 4)); 1236 | + } 1237 | +#else 1238 | + const uint8_t *src_bytep = (const uint8_t *)src; 1239 | + uint8_t *dst_bytep = (uint8_t *)dst; 1240 | + dst_bytep[0] = src_bytep[0]; 1241 | + dst_bytep[1] = src_bytep[1]; 1242 | + dst_bytep[2] = src_bytep[2]; 1243 | + dst_bytep[3] = src_bytep[3]; 1244 | + dst_bytep[4] = src_bytep[4]; 1245 | + dst_bytep[5] = src_bytep[5]; 1246 | + dst_bytep[6] = src_bytep[6]; 1247 | + dst_bytep[7] = src_bytep[7]; 1248 | +#endif 1249 | +} 1250 | + 1251 | +#if defined(__arm__) 1252 | + #if ARCH_ARM_HAVE_UNALIGNED 1253 | + static inline uint32_t get_unaligned_le(const void *p, uint32_t n) 1254 | + { 1255 | + uint32_t wordmask = (1U << (8 * n)) - 1; 1256 | + return get_unaligned_le32(p) & wordmask; 1257 | + } 1258 | + #else 1259 | + extern uint32_t get_unaligned_le_armv5(const void *p, uint32_t n); 1260 | + #define get_unaligned_le get_unaligned_le_armv5 1261 | + #endif 1262 | +#else 1263 | + static inline uint32_t get_unaligned_le(const void *p, uint32_t n) 1264 | + { 1265 | + /* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */ 1266 | + static const uint32_t wordmask[] = { 1267 | + 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu 1268 | + }; 1269 | + return get_unaligned_le32(p) & wordmask[n]; 1270 | + } 1271 | +#endif 1272 | + 1273 | +#define DCHECK_EQ(a, b) DCHECK(((a) == (b))) 1274 | +#define DCHECK_NE(a, b) DCHECK(((a) != (b))) 1275 | +#define DCHECK_GT(a, b) DCHECK(((a) > (b))) 1276 | +#define DCHECK_GE(a, b) DCHECK(((a) >= (b))) 1277 | +#define DCHECK_LT(a, b) DCHECK(((a) < (b))) 1278 | +#define DCHECK_LE(a, b) DCHECK(((a) <= (b))) 1279 | + 1280 | +enum { 1281 | + LITERAL = 0, 1282 | + COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */ 1283 | + COPY_2_BYTE_OFFSET = 2, 1284 | + COPY_4_BYTE_OFFSET = 3 1285 | +}; 1286 | + 1287 | +#endif /* CSNAPPY_INTERNAL_H_ */ 1288 | diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig 1289 | index 3bec4db..1132f4b 100644 1290 | --- a/drivers/staging/zram/Kconfig 1291 | +++ b/drivers/staging/zram/Kconfig 1292 | @@ -6,8 +6,6 @@ config ZRAM 1293 | tristate "Compressed RAM block device support" 1294 | depends on BLOCK && SYSFS 1295 | select XVMALLOC 1296 | - select LZO_COMPRESS 1297 | - select LZO_DECOMPRESS 1298 | default n 1299 | help 1300 | Creates virtual block devices called /dev/zramX (X = 0, 1, ...). 1301 | @@ -28,3 +26,21 @@ config ZRAM_DEBUG 1302 | help 1303 | This option adds additional debugging code to the compressed 1304 | RAM block device driver. 1305 | + 1306 | +choice ZRAM_COMPRESS 1307 | + prompt "compression method" 1308 | + depends on ZRAM 1309 | + default ZRAM_LZO 1310 | + help 1311 | + Select the compression method used by zram. 1312 | + LZO is the default. Snappy compresses a bit worse (around ~2%) but 1313 | + much (~2x) faster, at least on x86-64. 1314 | +config ZRAM_LZO 1315 | + bool "LZO compression" 1316 | + select LZO_COMPRESS 1317 | + select LZO_DECOMPRESS 1318 | +config ZRAM_SNAPPY 1319 | + bool "Snappy compression" 1320 | + depends on SNAPPY_COMPRESS 1321 | + depends on SNAPPY_DECOMPRESS 1322 | +endchoice 1323 | diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c 1324 | index 09de99f..33cf32c 100644 1325 | --- a/drivers/staging/zram/zram_drv.c 1326 | +++ b/drivers/staging/zram/zram_drv.c 1327 | @@ -29,12 +29,56 @@ 1328 | #include 1329 | #include 1330 | #include 1331 | -#include 1332 | #include 1333 | #include 1334 | 1335 | #include "zram_drv.h" 1336 | 1337 | +#if defined(CONFIG_ZRAM_LZO) 1338 | +#include 1339 | +#define WMSIZE LZO1X_MEM_COMPRESS 1340 | +#define COMPRESS(s, sl, d, dl, wm) \ 1341 | + lzo1x_1_compress(s, sl, d, dl, wm) 1342 | +#define DECOMPRESS(s, sl, d, dl) \ 1343 | + lzo1x_decompress_safe(s, sl, d, dl) 1344 | +#elif defined(CONFIG_ZRAM_SNAPPY) 1345 | +#include "../snappy/csnappy.h" /* if built in drivers/staging */ 1346 | +#define WMSIZE_ORDER ((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1)) 1347 | +#define WMSIZE (1 << WMSIZE_ORDER) 1348 | +static int 1349 | +snappy_compress_( 1350 | + const unsigned char *src, 1351 | + size_t src_len, 1352 | + unsigned char *dst, 1353 | + size_t *dst_len, 1354 | + void *workmem) 1355 | +{ 1356 | + const unsigned char *end = csnappy_compress_fragment( 1357 | + src, (uint32_t)src_len, dst, workmem, WMSIZE_ORDER); 1358 | + *dst_len = end - dst; 1359 | + return 0; 1360 | +} 1361 | +static int 1362 | +snappy_decompress_( 1363 | + const unsigned char *src, 1364 | + size_t src_len, 1365 | + unsigned char *dst, 1366 | + size_t *dst_len) 1367 | +{ 1368 | + uint32_t dst_len_ = (uint32_t)*dst_len; 1369 | + int ret = csnappy_decompress_noheader(src, src_len, dst, &dst_len_); 1370 | + *dst_len = (size_t)dst_len_; 1371 | + return ret; 1372 | +} 1373 | +#define COMPRESS(s, sl, d, dl, wm) \ 1374 | + snappy_compress_(s, sl, d, dl, wm) 1375 | +#define DECOMPRESS(s, sl, d, dl) \ 1376 | + snappy_decompress_(s, sl, d, dl) 1377 | +#else 1378 | +#error either CONFIG_ZRAM_LZO or CONFIG_ZRAM_SNAPPY must be defined 1379 | +#endif 1380 | + 1381 | + 1382 | /* Globals */ 1383 | static int zram_major; 1384 | struct zram *zram_devices; 1385 | @@ -257,9 +301,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1386 | cmem = kmap_atomic(zram->table[index].page, KM_USER1) + 1387 | zram->table[index].offset; 1388 | 1389 | - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), 1390 | - xv_get_object_size(cmem) - sizeof(*zheader), 1391 | - uncmem, &clen); 1392 | + ret = DECOMPRESS(cmem + sizeof(*zheader), 1393 | + xv_get_object_size(cmem) - sizeof(*zheader), 1394 | + uncmem, &clen); 1395 | 1396 | if (is_partial_io(bvec)) { 1397 | memcpy(user_mem + bvec->bv_offset, uncmem + offset, 1398 | @@ -271,7 +315,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1399 | kunmap_atomic(user_mem, KM_USER0); 1400 | 1401 | /* Should NEVER happen. Return bio error if it does. */ 1402 | - if (unlikely(ret != LZO_E_OK)) { 1403 | + if (unlikely(ret)) { 1404 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1405 | zram_stat64_inc(zram, &zram->stats.failed_reads); 1406 | return ret; 1407 | @@ -305,13 +349,13 @@ static int zram_read_before_write(struct zram *zram, char *mem, u32 index) 1408 | return 0; 1409 | } 1410 | 1411 | - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), 1412 | - xv_get_object_size(cmem) - sizeof(*zheader), 1413 | - mem, &clen); 1414 | + ret = DECOMPRESS(cmem + sizeof(*zheader), 1415 | + xv_get_object_size(cmem) - sizeof(*zheader), 1416 | + mem, &clen); 1417 | kunmap_atomic(cmem, KM_USER0); 1418 | 1419 | /* Should NEVER happen. Return bio error if it does. */ 1420 | - if (unlikely(ret != LZO_E_OK)) { 1421 | + if (unlikely(ret)) { 1422 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1423 | zram_stat64_inc(zram, &zram->stats.failed_reads); 1424 | return ret; 1425 | @@ -377,18 +421,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, 1426 | goto out; 1427 | } 1428 | 1429 | - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, 1430 | - zram->compress_workmem); 1431 | + COMPRESS(uncmem, PAGE_SIZE, src, &clen, zram->compress_workmem); 1432 | 1433 | kunmap_atomic(user_mem, KM_USER0); 1434 | if (is_partial_io(bvec)) 1435 | kfree(uncmem); 1436 | 1437 | - if (unlikely(ret != LZO_E_OK)) { 1438 | - pr_err("Compression failed! err=%d\n", ret); 1439 | - goto out; 1440 | - } 1441 | - 1442 | /* 1443 | * Page is incompressible. Store it as-is (uncompressed) 1444 | * since we do not want to return too many disk write 1445 | @@ -646,7 +684,7 @@ int zram_init_device(struct zram *zram) 1446 | 1447 | zram_set_disksize(zram, totalram_pages << PAGE_SHIFT); 1448 | 1449 | - zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); 1450 | + zram->compress_workmem = kzalloc(WMSIZE, GFP_KERNEL); 1451 | if (!zram->compress_workmem) { 1452 | pr_err("Error allocating compressor working memory!\n"); 1453 | ret = -ENOMEM; 1454 | -------------------------------------------------------------------------------- /python/OutputBuffer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from array import array 3 | 4 | class OutputBuffer(object): 5 | """A ring buffer that holds 32K of the last output, 6 | flushing a page at a time until flush() is called.""" 7 | def __init__(self, f, checksummer): 8 | self.f = f 9 | self.checksummer = checksummer 10 | a_list = [0] * 4096 11 | self.pages = [array('B', a_list) for i in xrange(9)] 12 | self.top_page_index, self.top_offset, self.isize, self.checksum = 0, 0, 0, 0 13 | def _flush_page(self): 14 | self.checksum = self.checksummer(self.pages[0], self.checksum) 15 | self.isize += 4096 16 | self.pages[0].tofile(self.f) 17 | self.pages.append(self.pages.pop(0)) 18 | #for i in xrange(4096): self.pages[-1][i] = '\x00' # DEBUG 19 | def _maybe_flush_page(self): 20 | if self.top_offset == 4096: 21 | self.top_offset = 0 22 | if self.top_page_index == len(self.pages) - 1: 23 | self._flush_page() 24 | else: 25 | self.top_page_index += 1 26 | def put_byte(self, b): 27 | self.pages[self.top_page_index][self.top_offset] = b 28 | self.top_offset += 1 29 | self._maybe_flush_page() 30 | def flush(self): 31 | self.top_offset -= 1 32 | if self.top_offset < 0: 33 | self.top_offset = 4095 34 | self.top_page_index -= 1 35 | while self.top_page_index > 0: 36 | self._flush_page() 37 | self.top_page_index -= 1 38 | data = self.pages[self.top_page_index][:self.top_offset+1] 39 | self.checksum = self.checksummer(data, self.checksum) 40 | self.isize += self.top_offset + 1 41 | self.f.write(data.tostring()) 42 | return (self.checksum & 0xffffffff, self.isize) 43 | def num_bufferred_bytes(self): 44 | return (self.top_page_index << 12) + self.top_offset 45 | def get_remaining_space(self): 46 | return 4096 - self.top_offset 47 | def put_bytes(self, bytes): 48 | curr_i = 0 49 | len_i = len(bytes) 50 | while curr_i < len_i: 51 | length = min(4096 - self.top_offset, len_i - curr_i) 52 | self.pages[self.top_page_index][self.top_offset : self.top_offset+length] = \ 53 | array('B', bytes[curr_i : curr_i+length]) 54 | curr_i += length 55 | self.top_offset += length 56 | self._maybe_flush_page() 57 | def repeat_chunk(self, length, distance_back): 58 | begin = (self.top_page_index << 12) + self.top_offset - distance_back 59 | if begin < 0: 60 | raise ValueError("distance back is too big. pos [%d %d], distance: %d" % \ 61 | (self.top_page_index, self.top_offset, distance_back)) 62 | input_page_index, input_offset = begin >> 12, begin & 4095 63 | while True: 64 | input_page, output_page = self.pages[input_page_index], self.pages[self.top_page_index] 65 | remaining_space, input_to_end_of_page = 4096 - self.top_offset, 4096 - input_offset 66 | bytes_to_copy = min(length, remaining_space, input_to_end_of_page) 67 | if distance_back == 1: 68 | output_page[self.top_offset : self.top_offset + bytes_to_copy] = \ 69 | array('B', (input_page[input_offset],)) * bytes_to_copy 70 | else: 71 | for i in xrange(bytes_to_copy): 72 | output_page[self.top_offset + i] = input_page[input_offset + i] 73 | input_offset += bytes_to_copy 74 | if input_offset == 4096: 75 | input_offset = 0 76 | input_page_index += 1 77 | self.top_offset += bytes_to_copy 78 | if self.top_offset == 4096: 79 | self.top_offset = 0 80 | if self.top_page_index == len(self.pages) - 1: 81 | self._flush_page() 82 | input_page_index -= 1 83 | else: 84 | self.top_page_index += 1 85 | length -= bytes_to_copy 86 | if length == 0: break 87 | -------------------------------------------------------------------------------- /python/pysnappy_compress.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | from struct import pack as struct_pack 3 | from array import array 4 | from collections import defaultdict 5 | 6 | def encode_varint32(f, n): 7 | while n: 8 | b = n & 127 9 | n >>= 7 10 | if n: 11 | b |= 128 12 | f.write(chr(b)) 13 | 14 | def snappy_emit_literal(f, s, begin, end): 15 | l = end - begin - 1 # end is one past the literal, length is encoded -1 16 | if l < 60: 17 | f.write(chr(l << 2)) 18 | elif l < 256: 19 | f.write(chr(240)) # 60 << 2 20 | f.write(chr(l)) 21 | else: # a length > 65536 is not supported by this encoder! 22 | f.write(chr(244)) # 61 << 2 23 | f.write(chr(l & 255)) 24 | f.write(chr(l >> 8)) 25 | f.write(s[begin : end]) 26 | 27 | def snappy_emit_backref(f, offset, length): 28 | if 4 <= length <= 11 and offset < 2048: 29 | f.write(chr(1 | ((length - 4) << 2) | ((offset >> 8) << 5))) 30 | f.write(chr(offset & 255)) 31 | else: # a back offset with offset > 65536 is not supported by this encoder! 32 | encoded_offset = struct_pack(" 0: 34 | curr_len_chunk = min(length, 64) 35 | f.write(chr(2 | ((curr_len_chunk - 1) << 2))) 36 | f.write(encoded_offset) 37 | length -= curr_len_chunk 38 | 39 | N = 4096 # up to 64K is allowed by this encoder 40 | MIN_LENGTH = 4 # snappy back references with length < 4 encodes to 3 bytes 41 | 42 | def snappy_compress_block_dict(ofile, s, ilen, wm = defaultdict(list)): 43 | """A compressor that does not miss matches and uses unlimited memory""" 44 | wm.clear() 45 | literal_start = 0 46 | i = 1 47 | while i < ilen - MIN_LENGTH: 48 | longest_match_length = MIN_LENGTH - 1 49 | longest_match_start = 0 50 | length_limit = ilen - i 51 | hash_chain = wm[s[i : i + MIN_LENGTH]] 52 | for j in hash_chain: 53 | length = MIN_LENGTH 54 | while length < length_limit and s[i + length] == s[j + length]: 55 | length += 1 56 | if length > longest_match_length: 57 | longest_match_length = length 58 | longest_match_start = j 59 | hash_chain.insert(0, i) 60 | if longest_match_length >= MIN_LENGTH: 61 | if i - 1 >= literal_start: 62 | snappy_emit_literal(ofile, s, literal_start, i) 63 | snappy_emit_backref(ofile, i - longest_match_start, longest_match_length) 64 | i += longest_match_length 65 | literal_start = i 66 | else: 67 | i += 1 68 | if i < ilen: 69 | snappy_emit_literal(ofile, s, literal_start, ilen) 70 | 71 | TABLE_ITEMS_ORDER = 12 72 | MASK = ((1 << TABLE_ITEMS_ORDER) - 1) 73 | def snappy_compress_block_table(ofile, s, ilen, \ 74 | wm = array('H', [0]*(1 << TABLE_ITEMS_ORDER))): 75 | """A compressor that uses limited memory, but misses matches""" 76 | for i in xrange(len(wm)): wm[i] = 0 77 | literal_start = 0 78 | i = 1 79 | while i < ilen - MIN_LENGTH: 80 | hash_key = hash(s[i : i + MIN_LENGTH]) & MASK 81 | match_start = wm[hash_key] 82 | wm[hash_key] = i 83 | length = 0 84 | length_limit = ilen - i 85 | while length < length_limit and s[i + length] == s[match_start + length]: 86 | length += 1 87 | if length >= MIN_LENGTH: 88 | if i - 1 >= literal_start: 89 | snappy_emit_literal(ofile, s, literal_start, i) 90 | snappy_emit_backref(ofile, i - match_start, length) 91 | i += length 92 | literal_start = i 93 | else: 94 | i += 1 95 | if i < ilen: 96 | snappy_emit_literal(ofile, s, literal_start, ilen) 97 | 98 | with open(sys.argv[1], "rb") as ifile: 99 | with open(sys.argv[2], "wb") as ofile: 100 | ifile.seek(0, os.SEEK_END) 101 | encode_varint32(ofile, ifile.tell()) 102 | ifile.seek(0, os.SEEK_SET) 103 | while True: 104 | s = ifile.read(N) 105 | if not s: 106 | break 107 | snappy_compress_block_table(ofile, s, len(s)) 108 | -------------------------------------------------------------------------------- /python/pysnappy_decompress.py: -------------------------------------------------------------------------------- 1 | from OutputBuffer import OutputBuffer 2 | from struct import unpack as struct_unpack 3 | 4 | def read_varint32(f): 5 | v = 0 6 | offset = 0 7 | while True: 8 | c = f.read(1) 9 | if not c: 10 | raise ValueError("header is malformed") 11 | b = ord(c) 12 | v |= (b & 127) << offset 13 | offset += 7 14 | if b < 128: 15 | break 16 | if offset >= 32: 17 | raise ValueError("header is malformed") 18 | return v 19 | 20 | def read_le_bytes(f, n): 21 | s = f.read(n) 22 | if len(s) < n: 23 | raise ValueError("data malformed") 24 | if n == 1: 25 | return ord(s) 26 | elif n == 2: 27 | return struct_unpack("> 2) + 1 44 | if cmd_type == 0: 45 | if length > 60: 46 | length = read_le_bytes(ifile, length - 60) + 1 47 | ob.put_bytes(ifile.read(length)) 48 | else: 49 | if cmd_type == 1: 50 | length = ((length - 1) & 7) + 4 51 | offset = ((c >> 5) << 8) + read_le_bytes(ifile, 1) 52 | elif cmd_type == 2: 53 | offset = read_le_bytes(ifile, 2) 54 | else: 55 | offset = read_le_bytes(ifile, 4) 56 | ob.repeat_chunk(length, offset) 57 | if ob.isize + ob.num_bufferred_bytes() == expected_olen: 58 | ob.flush() 59 | else: 60 | raise ValueError("input not consumed") 61 | 62 | if __name__ == "__main__": 63 | import sys 64 | with file(sys.argv[1], "rb") as ifile: 65 | with file(sys.argv[2], "wb") as ofile: 66 | ob = OutputBuffer(ofile, lambda x,y: y) 67 | snappy_decompress(ifile, ob) 68 | -------------------------------------------------------------------------------- /snappy_tester.patch: -------------------------------------------------------------------------------- 1 | diff --git a/configure.ac b/configure.ac 2 | index d193b0b..35f0775 100644 3 | --- a/configure.ac 4 | +++ b/configure.ac 5 | @@ -97,6 +97,7 @@ CHECK_EXT_COMPRESSION_LIB([lzo2], [lzo1x_1_15_compress]) 6 | CHECK_EXT_COMPRESSION_LIB([lzf], [lzf_compress]) 7 | CHECK_EXT_COMPRESSION_LIB([fastlz], [fastlz_compress]) 8 | CHECK_EXT_COMPRESSION_LIB([quicklz], [qlz_compress]) 9 | +CHECK_EXT_COMPRESSION_LIB([csnappy], [csnappy_compress]) 10 | AC_SUBST([UNITTEST_LIBS]) 11 | 12 | # These are used by snappy-stubs-public.h.in. 13 | diff --git a/snappy-test.h b/snappy-test.h 14 | index ef6a955..10fd24c 100644 15 | --- a/snappy-test.h 16 | +++ b/snappy-test.h 17 | @@ -120,6 +120,10 @@ extern "C" { 18 | #include "quicklz.h" 19 | #endif 20 | 21 | +#ifdef HAVE_LIBCSNAPPY 22 | +#include "csnappy.h" 23 | +#endif 24 | + 25 | namespace { 26 | namespace File { 27 | void Init() { } 28 | diff --git a/snappy_unittest.cc b/snappy_unittest.cc 29 | index f3b9c83..52a8ab3 100644 30 | --- a/snappy_unittest.cc 31 | +++ b/snappy_unittest.cc 32 | @@ -57,6 +57,8 @@ DEFINE_bool(liblzf, false, 33 | "(http://www.goof.com/pcg/marc/liblzf.html)"); 34 | DEFINE_bool(fastlz, false, 35 | "Run FastLZ compression (http://www.fastlz.org/"); 36 | +DEFINE_bool(csnappy, false, 37 | + "Run csnappy compression (https://github.com/zeevt/csnappy/)"); 38 | DEFINE_bool(snappy, true, "Run snappy compression"); 39 | 40 | 41 | @@ -121,11 +123,11 @@ typedef string DataEndingAtUnreadablePage; 42 | #endif 43 | 44 | enum CompressorType { 45 | - ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY 46 | + ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, CSNAPPY, SNAPPY, 47 | }; 48 | 49 | const char* names[] = { 50 | - "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY" 51 | + "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "CSNAPPY", "SNAPPY", 52 | }; 53 | 54 | static size_t MinimumRequiredOutputSpace(size_t input_size, 55 | @@ -156,6 +158,12 @@ static size_t MinimumRequiredOutputSpace(size_t input_size, 56 | return max(static_cast(ceil(input_size * 1.05)), 66); 57 | #endif // FASTLZ_VERSION 58 | 59 | +#ifdef CSNAPPY_VERSION 60 | + case CSNAPPY: 61 | + return static_cast(csnappy_max_compressed_length( 62 | + static_cast(input_size))); 63 | +#endif // CSNAPPY_VERSION 64 | + 65 | case SNAPPY: 66 | return snappy::MaxCompressedLength(input_size); 67 | 68 | @@ -266,6 +274,24 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp, 69 | } 70 | #endif // FASTLZ_VERSION 71 | 72 | +#ifdef CSNAPPY_VERSION 73 | + case CSNAPPY: { 74 | + uint32_t destlen; 75 | + char* mem = new char[CSNAPPY_WORKMEM_BYTES]; 76 | + csnappy_compress(input, input_size, 77 | + string_as_array(compressed), 78 | + &destlen, 79 | + mem, 80 | + CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO); 81 | + delete[] mem; 82 | + CHECK_LE(destlen, csnappy_max_compressed_length(input_size)); 83 | + if (!compressed_is_preallocated) { 84 | + compressed->resize(destlen); 85 | + } 86 | + break; 87 | + } 88 | +#endif // CSNAPPY_VERSION 89 | + 90 | case SNAPPY: { 91 | size_t destlen; 92 | snappy::RawCompress(input, input_size, 93 | @@ -364,9 +390,23 @@ static bool Uncompress(const string& compressed, CompressorType comp, 94 | } 95 | #endif // FASTLZ_VERSION 96 | 97 | +#ifdef CSNAPPY_VERSION 98 | + case CSNAPPY: { 99 | + int ret = csnappy_decompress( 100 | + compressed.data(), 101 | + (uint32_t)compressed.size(), 102 | + string_as_array(output), 103 | + (uint32_t)size); 104 | + CHECK_EQ(ret, CSNAPPY_E_OK); 105 | + break; 106 | + } 107 | +#endif // CSNAPPY_VERSION 108 | + 109 | case SNAPPY: { 110 | - snappy::RawUncompress(compressed.data(), compressed.size(), 111 | - string_as_array(output)); 112 | + bool ret = snappy::RawUncompress(compressed.data(), 113 | + compressed.size(), 114 | + string_as_array(output)); 115 | + CHECK_EQ(ret, true); 116 | break; 117 | } 118 | 119 | @@ -464,7 +504,7 @@ static void Measure(const char* data, 120 | string urate = (uncomp_rate >= 0) 121 | ? StringPrintf("%.1f", uncomp_rate) 122 | : string("?"); 123 | - printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " 124 | + printf("%-8s [b %dM] bytes %6d -> %6d %4.1f%% " 125 | "comp %5.1f MB/s uncomp %5s MB/s\n", 126 | x.c_str(), 127 | block_size/(1<<20), 128 | @@ -1013,6 +1053,7 @@ static void MeasureFile(const char* fname) { 129 | if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10); 130 | if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10); 131 | if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10); 132 | + if (FLAGS_csnappy) Measure(input, len, CSNAPPY, repeats, 1024<<10); 133 | if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10); 134 | 135 | // For block-size based measurements 136 | -------------------------------------------------------------------------------- /testdata/baddata3.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/baddata3.snappy -------------------------------------------------------------------------------- /testdata/unaligned_uint64_test.bin.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/unaligned_uint64_test.bin.gz -------------------------------------------------------------------------------- /testdata/unaligned_uint64_test.snappy.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/unaligned_uint64_test.snappy.gz -------------------------------------------------------------------------------- /testdata/urls.10K.snappy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/urls.10K.snappy -------------------------------------------------------------------------------- /unaligned_arm.s: -------------------------------------------------------------------------------- 1 | # Written by http://stackoverflow.com/users/104109/bitbank 2 | # Licensed under 3-clause BSD with permission. 3 | .syntax unified 4 | .arch armv5te 5 | .text 6 | .align 2 7 | .global get_unaligned_le_armv5 8 | .arm 9 | .type get_unaligned_le_armv5, %function 10 | # When called from C, r0 = first parameter, r1 = second parameter 11 | # r0-r3 and r12 can get trashed by C functions 12 | get_unaligned_le_armv5: 13 | .fnstart 14 | ldrb %r2, [%r0],#1 @ byte 0 is always read (n=1..4) 15 | cmp %r1, #2 16 | ldrbge %r3, [%r0],#1 @ byte 1, n == 2 17 | ldrbgt %r12,[%r0],#1 @ byte 2, n > 2 18 | orrge %r2, %r2, %r3, LSL #8 19 | orrgt %r2, %r2, %r12,LSL #16 20 | cmp %r1, #4 21 | ldrbeq %r3, [%r0],#1 @ byte 3, n == 4 22 | movne %r0, %r2 @ recoup wasted cycle 23 | orreq %r0, %r2, %r3, LSL #24 24 | bx lr 25 | .fnend 26 | -------------------------------------------------------------------------------- /unaligned_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct __attribute__((__packed__)) una_u32 { uint32_t x; }; 4 | 5 | static inline uint32_t read_aligned_uint32(const void *p) { 6 | const uint32_t *ptr = (const uint32_t *)p; 7 | return *ptr; 8 | } 9 | 10 | static inline uint32_t read_unaligned_ps_uint32(const void *p) { 11 | const struct una_u32 *ptr = (const struct una_u32 *)p; 12 | return ptr->x; 13 | } 14 | 15 | static inline uint32_t read_unaligned_bsle_uint32(const void *p) { 16 | const uint8_t *b = (const uint8_t *)p; 17 | return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); 18 | } 19 | 20 | static inline void write_aligned_uint32(void *p, uint32_t v) { 21 | uint32_t *ptr = (uint32_t *)p; 22 | *ptr = v; 23 | } 24 | 25 | static inline void write_unaligned_ps_uint32(void *p, uint32_t v) { 26 | struct una_u32 *ptr = (struct una_u32 *)p; 27 | ptr->x = v; 28 | } 29 | 30 | static inline void write_unaligned_bsle_uint32(void *p, uint32_t v) { 31 | uint8_t *b = (uint8_t *)p; 32 | b[0] = v & 0xff; 33 | b[1] = (v >> 8) & 0xff; 34 | b[2] = (v >> 16) & 0xff; 35 | b[3] = (v >> 24) & 0xff; 36 | } 37 | 38 | #include 39 | #include 40 | 41 | static const uint32_t wordmask[] = { 42 | 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu 43 | }; 44 | 45 | uint32_t get_unaligned_le_x86(const void *p, uint32_t n) { 46 | uint32_t ret = *(const uint32_t *)p & wordmask[n]; 47 | return ret; 48 | } 49 | 50 | uint32_t get_unaligned_le_v1(const void *p, uint32_t n) { 51 | const uint8_t *b = (const uint8_t *)p; 52 | uint32_t ret; 53 | ret = b[0]; 54 | if (n > 1) { 55 | ret |= b[1] << 8; 56 | if (n > 2) { 57 | ret |= b[2] << 16; 58 | if (n > 3) { 59 | ret |= b[3] << 24; 60 | } 61 | } 62 | } 63 | return ret; 64 | } 65 | 66 | uint32_t get_unaligned_le_v2(const void *p, uint32_t n) { 67 | const uint8_t *b = (const uint8_t *)p; 68 | uint32_t ret = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); 69 | ret &= wordmask[n]; 70 | return ret; 71 | } 72 | 73 | uint32_t get_unaligned_le_v4(const void *p, uint32_t n) { 74 | const uint8_t *b = (const uint8_t *)p; 75 | uint32_t ret; 76 | int mask1 = -(n>1); /* 0 if n<=1, 0xFF otherwise */ 77 | int mask2 = -(n>2); 78 | int mask3 = -(n>3); 79 | ret = b[0]; 80 | ret |= (b[1] << 8) & mask1; 81 | ret |= (b[2] << 16) & mask2; 82 | ret |= (b[3] << 24) & mask3; 83 | return ret; 84 | } 85 | 86 | uint32_t get_unaligned_le_v5(const void *p, uint32_t n) { 87 | uint32_t mask = (1U << (8 * n)) - 1; 88 | uint32_t ret = *(const uint32_t *)p & mask; 89 | return ret; 90 | } 91 | 92 | extern uint32_t get_unaligned_le_armv5(const void *p, uint32_t n); 93 | 94 | #include 95 | #include 96 | #include 97 | 98 | int main(int argc, char *argv[]) { 99 | if (argc < 2) 100 | return 2; 101 | const unsigned int data_size = 1000000; 102 | uint8_t *data = malloc(data_size); 103 | if (!data) 104 | return 0; 105 | uint32_t x, i; 106 | for (i = 0; i < data_size; i++) 107 | data[i] = i & 255; 108 | const uint8_t *p = data, * const end = data + data_size - 10; 109 | 110 | #define TEST_LOOP(func) \ 111 | for (i = 0; i < 100; i++) { \ 112 | x = 0xffffffff; \ 113 | p = data; \ 114 | while (p < end) { \ 115 | x ^= func(p, 1); p += 1; \ 116 | x ^= func(p, 2); p += 2; \ 117 | x ^= func(p, 3); p += 3; \ 118 | x ^= func(p, 4); p += 4; \ 119 | } \ 120 | } 121 | 122 | switch (argv[1][0] - '0') { 123 | case 0: 124 | TEST_LOOP(get_unaligned_le_v1); 125 | break; 126 | case 1: 127 | TEST_LOOP(get_unaligned_le_v2); 128 | break; 129 | case 2: 130 | TEST_LOOP(get_unaligned_le_v4); 131 | break; 132 | case 3: 133 | TEST_LOOP(get_unaligned_le_armv5); 134 | break; 135 | case 4: 136 | TEST_LOOP(get_unaligned_le_x86); 137 | break; 138 | case 5: 139 | TEST_LOOP(get_unaligned_le_v5); 140 | break; 141 | default: 142 | goto out; 143 | } 144 | out: 145 | free(data); 146 | return 0; 147 | } 148 | -------------------------------------------------------------------------------- /userspace_benchmark.txt: -------------------------------------------------------------------------------- 1 | testdata/alice29.txt : 2 | ZLIB: [b 1M] bytes 152089 -> 54404 35.8% comp 9.8 MB/s uncomp 138.0 MB/s 3 | LZO204: [b 1M] bytes 152089 -> 82691 54.4% comp 64.6 MB/s uncomp 206.3 MB/s 4 | LZO205: [b 1M] bytes 152089 -> 87825 57.7% comp 175.4 MB/s uncomp 240.0 MB/s 5 | CSNAPPY: [b 1M] bytes 152089 -> 90965 59.8% comp 173.7 MB/s uncomp 409.6 MB/s 6 | SNAPPY: [b 4M] bytes 152089 -> 90965 59.8% comp 174.9 MB/s uncomp 401.6 MB/s 7 | testdata/asyoulik.txt : 8 | ZLIB: [b 1M] bytes 125179 -> 48897 39.1% comp 9.0 MB/s uncomp 131.0 MB/s 9 | LZO204: [b 1M] bytes 125179 -> 73217 58.5% comp 59.6 MB/s uncomp 202.1 MB/s 10 | LZO205: [b 1M] bytes 125179 -> 77041 61.5% comp 164.4 MB/s uncomp 237.4 MB/s 11 | CSNAPPY: [b 1M] bytes 125179 -> 80207 64.1% comp 163.6 MB/s uncomp 387.7 MB/s 12 | SNAPPY: [b 4M] bytes 125179 -> 80207 64.1% comp 164.6 MB/s uncomp 378.9 MB/s 13 | testdata/cp.html : 14 | ZLIB: [b 1M] bytes 24603 -> 7961 32.4% comp 23.0 MB/s uncomp 142.0 MB/s 15 | LZO204: [b 1M] bytes 24603 -> 11621 47.2% comp 66.8 MB/s uncomp 300.0 MB/s 16 | LZO205: [b 1M] bytes 24603 -> 11909 48.4% comp 218.1 MB/s uncomp 336.9 MB/s 17 | CSNAPPY: [b 1M] bytes 24603 -> 11838 48.1% comp 228.9 MB/s uncomp 548.1 MB/s 18 | SNAPPY: [b 4M] bytes 24603 -> 11838 48.1% comp 227.6 MB/s uncomp 523.3 MB/s 19 | testdata/fields.c : 20 | ZLIB: [b 1M] bytes 11150 -> 3122 28.0% comp 25.2 MB/s uncomp 147.5 MB/s 21 | LZO204: [b 1M] bytes 11150 -> 4663 41.8% comp 86.2 MB/s uncomp 304.5 MB/s 22 | LZO205: [b 1M] bytes 11150 -> 4711 42.3% comp 253.3 MB/s uncomp 346.1 MB/s 23 | CSNAPPY: [b 1M] bytes 11150 -> 4728 42.4% comp 251.7 MB/s uncomp 536.5 MB/s 24 | SNAPPY: [b 4M] bytes 11150 -> 4728 42.4% comp 249.6 MB/s uncomp 515.2 MB/s 25 | testdata/geo.protodata : 26 | ZLIB: [b 1M] bytes 118588 -> 15131 12.8% comp 43.2 MB/s uncomp 310.1 MB/s 27 | LZO204: [b 1M] bytes 118588 -> 20026 16.9% comp 150.2 MB/s uncomp 639.7 MB/s 28 | LZO205: [b 1M] bytes 118588 -> 23965 20.2% comp 487.6 MB/s uncomp 705.7 MB/s 29 | CSNAPPY: [b 1M] bytes 118588 -> 27459 23.2% comp 469.0 MB/s uncomp 985.8 MB/s 30 | SNAPPY: [b 4M] bytes 118588 -> 27459 23.2% comp 466.1 MB/s uncomp 954.6 MB/s 31 | testdata/grammar.lsp : 32 | ZLIB: [b 1M] bytes 3721 -> 1222 32.8% comp 24.0 MB/s uncomp 109.3 MB/s 33 | LZO204: [b 1M] bytes 3721 -> 1781 47.9% comp 79.2 MB/s uncomp 360.8 MB/s 34 | LZO205: [b 1M] bytes 3721 -> 1811 48.7% comp 232.3 MB/s uncomp 442.2 MB/s 35 | CSNAPPY: [b 1M] bytes 3721 -> 1800 48.4% comp 257.6 MB/s uncomp 612.8 MB/s 36 | SNAPPY: [b 4M] bytes 3721 -> 1800 48.4% comp 250.1 MB/s uncomp 570.9 MB/s 37 | testdata/house.jpg : 38 | ZLIB: [b 1M] bytes 126958 -> 126513 99.6% comp 19.0 MB/s uncomp 231.8 MB/s 39 | LZO204: [b 1M] bytes 126958 -> 127173 100.2% comp 23.5 MB/s uncomp 1635.4 MB/s 40 | LZO205: [b 1M] bytes 126958 -> 127303 100.3% comp 1051.1 MB/s uncomp 3762.4 MB/s 41 | CSNAPPY: [b 1M] bytes 126958 -> 126803 99.9% comp 2365.1 MB/s uncomp 8190.2 MB/s 42 | SNAPPY: [b 4M] bytes 126958 -> 126803 99.9% comp 2326.8 MB/s uncomp 8402.5 MB/s 43 | testdata/html : 44 | ZLIB: [b 1M] bytes 102400 -> 13699 13.4% comp 35.6 MB/s uncomp 273.4 MB/s 45 | LZO204: [b 1M] bytes 102400 -> 21027 20.5% comp 135.7 MB/s uncomp 494.3 MB/s 46 | LZO205: [b 1M] bytes 102400 -> 22547 22.0% comp 421.6 MB/s uncomp 557.5 MB/s 47 | CSNAPPY: [b 1M] bytes 102400 -> 24140 23.6% comp 425.8 MB/s uncomp 873.0 MB/s 48 | SNAPPY: [b 4M] bytes 102400 -> 24140 23.6% comp 422.9 MB/s uncomp 845.4 MB/s 49 | testdata/html_x_4 : 50 | ZLIB: [b 1M] bytes 409600 -> 53367 13.0% comp 32.1 MB/s uncomp 277.7 MB/s 51 | LZO204: [b 1M] bytes 409600 -> 82980 20.3% comp 143.3 MB/s uncomp 487.0 MB/s 52 | LZO205: [b 1M] bytes 409600 -> 89475 21.8% comp 428.2 MB/s uncomp 556.1 MB/s 53 | CSNAPPY: [b 1M] bytes 409600 -> 96472 23.6% comp 423.4 MB/s uncomp 870.8 MB/s 54 | SNAPPY: [b 4M] bytes 409600 -> 96472 23.6% comp 418.3 MB/s uncomp 830.5 MB/s 55 | testdata/kennedy.xls : 56 | ZLIB: [b 1M] bytes 1029744 -> 203992 19.8% comp 15.8 MB/s uncomp 230.0 MB/s 57 | LZO204: [b 1M] bytes 1029744 -> 357315 34.7% comp 159.1 MB/s uncomp 624.6 MB/s 58 | LZO205: [b 1M] bytes 1029744 -> 362984 35.2% comp 413.2 MB/s uncomp 736.1 MB/s 59 | CSNAPPY: [b 1M] bytes 1029744 -> 425735 41.3% comp 354.9 MB/s uncomp 564.4 MB/s 60 | SNAPPY: [b 4M] bytes 1029744 -> 425735 41.3% comp 350.0 MB/s uncomp 513.0 MB/s 61 | testdata/kppkn.gtb : 62 | ZLIB: [b 1M] bytes 184320 -> 38751 21.0% comp 7.2 MB/s uncomp 180.9 MB/s 63 | LZO204: [b 1M] bytes 184320 -> 71671 38.9% comp 98.6 MB/s uncomp 274.8 MB/s 64 | LZO205: [b 1M] bytes 184320 -> 71445 38.8% comp 295.0 MB/s uncomp 321.9 MB/s 65 | CSNAPPY: [b 1M] bytes 184320 -> 70535 38.3% comp 271.8 MB/s uncomp 483.8 MB/s 66 | SNAPPY: [b 4M] bytes 184320 -> 70535 38.3% comp 273.9 MB/s uncomp 464.5 MB/s 67 | testdata/lcet10.txt : 68 | ZLIB: [b 1M] bytes 426754 -> 144904 34.0% comp 10.0 MB/s uncomp 142.8 MB/s 69 | LZO204: [b 1M] bytes 426754 -> 221290 51.9% comp 67.3 MB/s uncomp 212.3 MB/s 70 | LZO205: [b 1M] bytes 426754 -> 236699 55.5% comp 182.2 MB/s uncomp 248.3 MB/s 71 | CSNAPPY: [b 1M] bytes 426754 -> 243710 57.1% comp 181.7 MB/s uncomp 437.4 MB/s 72 | SNAPPY: [b 4M] bytes 426754 -> 243710 57.1% comp 183.0 MB/s uncomp 428.3 MB/s 73 | testdata/mapreduce-osdi-1.pdf : 74 | ZLIB: [b 1M] bytes 94330 -> 74928 79.4% comp 22.4 MB/s uncomp 177.9 MB/s 75 | LZO204: [b 1M] bytes 94330 -> 76999 81.6% comp 29.0 MB/s uncomp 938.7 MB/s 76 | LZO205: [b 1M] bytes 94330 -> 94704 100.4% comp 1057.4 MB/s uncomp 3974.6 MB/s 77 | CSNAPPY: [b 1M] bytes 94330 -> 77477 82.1% comp 833.6 MB/s uncomp 2115.4 MB/s 78 | SNAPPY: [b 4M] bytes 94330 -> 77477 82.1% comp 832.2 MB/s uncomp 1997.5 MB/s 79 | testdata/plrabn12.txt : 80 | ZLIB: [b 1M] bytes 481861 -> 195261 40.5% comp 7.5 MB/s uncomp 130.1 MB/s 81 | LZO204: [b 1M] bytes 481861 -> 294610 61.1% comp 59.1 MB/s uncomp 192.3 MB/s 82 | LZO205: [b 1M] bytes 481861 -> 314012 65.2% comp 155.7 MB/s uncomp 229.7 MB/s 83 | CSNAPPY: [b 1M] bytes 481861 -> 329339 68.3% comp 153.4 MB/s uncomp 363.5 MB/s 84 | SNAPPY: [b 4M] bytes 481861 -> 329339 68.3% comp 154.5 MB/s uncomp 354.9 MB/s 85 | testdata/ptt5 : 86 | ZLIB: [b 1M] bytes 513216 -> 56465 11.0% comp 25.8 MB/s uncomp 269.0 MB/s 87 | LZO204: [b 1M] bytes 513216 -> 86232 16.8% comp 139.7 MB/s uncomp 590.6 MB/s 88 | LZO205: [b 1M] bytes 513216 -> 87278 17.0% comp 551.6 MB/s uncomp 667.6 MB/s 89 | CSNAPPY: [b 1M] bytes 513216 -> 93455 18.2% comp 555.0 MB/s uncomp 845.6 MB/s 90 | SNAPPY: [b 4M] bytes 513216 -> 93455 18.2% comp 553.1 MB/s uncomp 795.0 MB/s 91 | testdata/sum : 92 | ZLIB: [b 1M] bytes 38240 -> 12990 34.0% comp 13.9 MB/s uncomp 144.6 MB/s 93 | LZO204: [b 1M] bytes 38240 -> 17686 46.2% comp 67.1 MB/s uncomp 311.0 MB/s 94 | LZO205: [b 1M] bytes 38240 -> 18086 47.3% comp 230.6 MB/s uncomp 373.5 MB/s 95 | CSNAPPY: [b 1M] bytes 38240 -> 19837 51.9% comp 228.7 MB/s uncomp 513.1 MB/s 96 | SNAPPY: [b 4M] bytes 38240 -> 19837 51.9% comp 226.7 MB/s uncomp 479.2 MB/s 97 | testdata/urls.10K : 98 | ZLIB: [b 1M] bytes 702087 -> 222613 31.7% comp 18.2 MB/s uncomp 160.0 MB/s 99 | LZO204: [b 1M] bytes 702087 -> 309320 44.1% comp 64.5 MB/s uncomp 309.2 MB/s 100 | LZO205: [b 1M] bytes 702087 -> 345814 49.3% comp 226.3 MB/s uncomp 376.5 MB/s 101 | CSNAPPY: [b 1M] bytes 702087 -> 357267 50.9% comp 240.1 MB/s uncomp 645.5 MB/s 102 | SNAPPY: [b 4M] bytes 702087 -> 357267 50.9% comp 239.3 MB/s uncomp 598.7 MB/s 103 | testdata/xargs.1 : 104 | ZLIB: [b 1M] bytes 4227 -> 1736 41.1% comp 23.2 MB/s uncomp 104.0 MB/s 105 | LZO204: [b 1M] bytes 4227 -> 2450 58.0% comp 65.2 MB/s uncomp 333.1 MB/s 106 | LZO205: [b 1M] bytes 4227 -> 2468 58.4% comp 192.3 MB/s uncomp 392.1 MB/s 107 | CSNAPPY: [b 1M] bytes 4227 -> 2509 59.4% comp 215.9 MB/s uncomp 499.1 MB/s 108 | SNAPPY: [b 4M] bytes 4227 -> 2509 59.4% comp 208.7 MB/s uncomp 477.0 MB/s 109 | -------------------------------------------------------------------------------- /zram_benchmark.txt: -------------------------------------------------------------------------------- 1 | Benchmark (zramtest2.sh) creates a zram block device, an ext4 filesystem on it, 2 | fixes the filesystem not to think it's on a striped RAID device with a single 3 | stripe, and then untars a gzip'd tarball to that filesystem. 4 | Then it syncs the filesystem, drops caches and verifies md5 checksums of 5 | extracted files to cause them to be read back from zram. 6 | Results are from running on a 2.33Ghz Core 2 Duo with Linux 2.6.39-rc3. 7 | 8 | Results for LZO zram: 9 | real 0m5.333s 10 | user 0m4.244s 11 | sys 0m1.471s 12 | orig_data_size 645918720 13 | compr_data_size 320624925 14 | mem_used_total 326627328 15 | 16 | Result for Snappy zram: 17 | real 0m5.012s 18 | user 0m4.213s 19 | sys 0m1.477s 20 | orig_data_size 645914624 21 | compr_data_size 326040602 22 | mem_used_total 332374016 23 | 24 | orig_data_size is not constant from run to run. I don't know why. 25 | 26 | perf for the LZO zram: 27 | [ perf record: Woken up 13 times to write data ] 28 | [ perf record: Captured and wrote 3.195 MB perf.data (~139599 samples) ] 29 | # Events: 15K cycles 30 | # 31 | # Overhead Command Shared Object Symbol 32 | # ........ .............. ................. ...................................... 33 | # 34 | 25.96% kthreadd [lzo_compress] [k] _lzo1x_1_do_compress 35 | 12.38% gzip gzip [.] zip 36 | 11.46% gzip gzip [.] treat_file.part.4.2264 37 | 7.60% md5sum [lzo_decompress] [k] lzo1x_decompress_safe 38 | 4.80% md5sum md5sum [.] digest_file.isra.2.2089 39 | 3.95% md5sum md5sum [.] 0x3324 40 | 1.96% gzip libc-2.13.so [.] __memcpy_ssse3 41 | 1.11% tar [kernel.kallsyms] [k] copy_user_generic_string 42 | 0.86% gzip [kernel.kallsyms] [k] copy_user_generic_string 43 | 0.70% kthreadd [kernel.kallsyms] [k] __memcpy 44 | 0.60% md5sum [zram] [k] zram_make_request 45 | 0.54% kthreadd [lzo_compress] [k] lzo1x_1_compress 46 | 0.51% md5sum md5sum [.] __libc_csu_init 47 | 0.50% md5sum [kernel.kallsyms] [k] copy_user_generic_string 48 | 0.48% tar [kernel.kallsyms] [k] ext4_mark_iloc_dirty 49 | 0.45% gzip gzip [.] treat_stdin.2262 50 | 0.38% tar [kernel.kallsyms] [k] __memset 51 | 0.38% gzip gzip [.] treat_file.2267 52 | 0.36% dd [kernel.kallsyms] [k] system_call 53 | 0.31% bash [kernel.kallsyms] [k] _raw_spin_trylock 54 | 0.29% bash [kernel.kallsyms] [k] _raw_spin_lock 55 | 0.24% kthreadd [kernel.kallsyms] [k] mb_find_order_for_block 56 | 0.22% tar [kernel.kallsyms] [k] _raw_spin_lock 57 | 0.21% tar [kernel.kallsyms] [k] system_call 58 | 0.21% gzip gzip [.] compress_block.2644.2190 59 | 0.21% swapper [kernel.kallsyms] [k] mwait_idle 60 | 61 | perf for Snappy zram: 62 | [ perf record: Woken up 13 times to write data ] 63 | [ perf record: Captured and wrote 3.088 MB perf.data (~134926 samples) ] 64 | # Events: 13K cycles 65 | # 66 | # Overhead Command Shared Object Symbol 67 | # ........ .............. ....................... ...................................... 68 | # 69 | 15.27% gzip gzip [.] zip 70 | 14.60% gzip gzip [.] treat_file.part.4.2264 71 | 11.37% flush-253:0 [csnappy_compress] [k] csnappy_compress_fragment 72 | 6.15% md5sum md5sum [.] digest_file.isra.2.2089 73 | 5.28% md5sum [csnappy_decompress] [k] csnappy_decompress_noheader 74 | 5.20% md5sum md5sum [.] 0x336d 75 | 2.21% gzip libc-2.13.so [.] __memcpy_ssse3 76 | 2.07% flush-253:0 [kernel.kallsyms] [k] __memcpy 77 | 1.53% tar [kernel.kallsyms] [k] copy_user_generic_string 78 | 1.18% gzip [kernel.kallsyms] [k] copy_user_generic_string 79 | 0.68% flush-253:0 [kernel.kallsyms] [k] __memset 80 | 0.66% md5sum md5sum [.] __libc_csu_init 81 | 0.62% md5sum [kernel.kallsyms] [k] copy_user_generic_string 82 | 0.61% dd [kernel.kallsyms] [k] system_call 83 | 0.56% md5sum [zram] [k] zram_make_request 84 | 0.52% gzip gzip [.] treat_stdin.2262 85 | 0.49% tar [kernel.kallsyms] [k] __memset 86 | 0.48% gzip gzip [.] treat_file.2267 87 | 0.45% tar [kernel.kallsyms] [k] ext4_mark_iloc_dirty 88 | 0.38% md5sum [kernel.kallsyms] [k] __memcpy 89 | 0.34% swapper [kernel.kallsyms] [k] mwait_idle 90 | 0.33% tar [kernel.kallsyms] [k] _raw_spin_lock 91 | 0.30% tar [kernel.kallsyms] [k] system_call 92 | 0.28% umount [kernel.kallsyms] [k] _raw_spin_lock 93 | 0.28% dd [kernel.kallsyms] [k] copy_user_generic_string 94 | 0.26% tar [kernel.kallsyms] [k] __ext4_get_inode_loc 95 | 0.24% gzip gzip [.] compress_block.2644.2190 96 | 0.24% md5sum [kernel.kallsyms] [k] system_call 97 | 0.23% flush-253:0 [kernel.kallsyms] [k] _raw_spin_lock 98 | 0.22% bash [kernel.kallsyms] [k] _raw_spin_lock 99 | 0.22% tar [kernel.kallsyms] [k] __find_get_block 100 | 0.22% flush-253:0 [kernel.kallsyms] [k] ext4_bio_write_page 101 | 0.21% flush-253:0 [kernel.kallsyms] [k] mb_find_order_for_block 102 | -------------------------------------------------------------------------------- /zramtest2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | TESTFILE="/usr/portage/distfiles/qt-everywhere-opensource-src-4.7.2.tar.gz" 4 | 5 | if [ ! -f MD5SUMS.gz ]; then 6 | rm -rf temp 7 | mkdir temp 8 | cd temp 9 | time tar xzf ${TESTFILE} 10 | find ./ -type f | sort | xargs -L 1 md5sum | gzip -9 -c > ../MD5SUMS.gz 11 | cd .. 12 | rm -rf temp 13 | fi 14 | 15 | grep -q " $PWD/zram0mnt " /proc/mounts && umount zram0mnt 16 | mkdir -p zram0mnt 17 | echo 1 >/sys/block/zram0/reset || exit 18 | sleep 2 19 | echo $((1024*1024*1024)) > /sys/block/zram0/disksize 20 | mke2fs -t ext4 -m 0 -I 128 -O ^has_journal,^ext_attr /dev/zram0 >/dev/null || exit 21 | tune2fs -l /dev/zram0 | grep 'RAID' 22 | debugfs -w -f debugfs_input.txt /dev/zram0 23 | tune2fs -l /dev/zram0 | grep 'RAID' 24 | mount -o noatime,barrier=0,data=writeback,nobh,discard /dev/zram0 zram0mnt 25 | dd if=${TESTFILE} of=/dev/null >/dev/null 2>&1 26 | cd zram0mnt 27 | time tar xzf ${TESTFILE} 28 | sync 29 | sleep 5 30 | echo 3 > /proc/sys/vm/drop_caches 31 | sleep 5 32 | echo -ne "orig_data_size\t" | cat - /sys/block/zram0/orig_data_size 33 | echo -ne "compr_data_size\t" | cat - /sys/block/zram0/compr_data_size 34 | echo -ne "mem_used_total\t" | cat - /sys/block/zram0/mem_used_total 35 | gunzip -c <../MD5SUMS.gz | md5sum -c - | egrep -v ': OK$' 36 | cd .. 37 | umount zram0mnt 38 | echo 1 >/sys/block/zram0/reset 39 | rmdir zram0mnt 40 | --------------------------------------------------------------------------------