├── .gitignore
├── Makefile
├── README
├── TODO
├── block_compressor.c
├── block_compressor_benchmark.txt
├── cl_tester.c
├── csnappy.h
├── csnappy_compat.h
├── csnappy_compress.c
├── csnappy_decompress.c
├── csnappy_internal.h
├── csnappy_internal_userspace.h
├── debugfs_input.txt
├── kernel_3_2_10.patch
├── python
    ├── OutputBuffer.py
    ├── pysnappy_compress.py
    └── pysnappy_decompress.py
├── snappy_tester.patch
├── testdata
    ├── baddata3.snappy
    ├── unaligned_uint64_test.bin.gz
    ├── unaligned_uint64_test.snappy.gz
    ├── urls.10K
    └── urls.10K.snappy
├── unaligned_arm.s
├── unaligned_test.c
├── userspace_benchmark.txt
├── zram_benchmark.txt
└── zramtest2.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | cl_tester
2 | *.o
3 | *.so
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | 
  2 | OPT_FLAGS = -g -O2 -DNDEBUG -fomit-frame-pointer
  3 | DBG_FLAGS = -ggdb -O0 -DDEBUG
  4 | CFLAGS := -std=gnu89 -Wall -pedantic -DHAVE_BUILTIN_CTZ
  5 | ifeq (${DEBUG},yes)
  6 | CFLAGS += $(DBG_FLAGS)
  7 | else
  8 | CFLAGS += $(OPT_FLAGS)
  9 | endif
 10 | LDFLAGS = -Wl,-O1 -Wl,--no-undefined
 11 | PREFIX := /usr
 12 | LIBDIR := $(PREFIX)/lib
 13 | 
 14 | all: test
 15 | 
 16 | test: check_unaligned_uint64 cl_test check_leaks
 17 | 
 18 | cl_tester: cl_tester.c csnappy.h libcsnappy.so
 19 | 	$(CC) $(CFLAGS) $(LDFLAGS) -D_GNU_SOURCE -o $@ $< libcsnappy.so
 20 | 
 21 | cl_test: cl_tester
 22 | 	rm -f afifo
 23 | 	mkfifo afifo
 24 | 	LD_LIBRARY_PATH=. ./cl_tester -c <testdata/urls.10K | \
 25 | 	LD_LIBRARY_PATH=. ./cl_tester -d -c > afifo &
 26 | 	diff -u testdata/urls.10K afifo && echo "compress-decompress restores original"
 27 | 	rm -f afifo
 28 | 	LD_LIBRARY_PATH=. ./cl_tester -S d && echo "decompression is safe"
 29 | 	LD_LIBRARY_PATH=. ./cl_tester -S c
 30 | 
 31 | check_leaks: cl_tester
 32 | 	LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -d -c <testdata/urls.10K.snappy >/dev/null
 33 | 	LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -d -c <testdata/baddata3.snappy >/dev/null || true
 34 | 	LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -c <testdata/urls.10K >/dev/null
 35 | 	LD_LIBRARY_PATH=. valgrind --leak-check=full --show-reachable=yes ./cl_tester -S d
 36 | 
 37 | check_unaligned_uint64:
 38 | 	gzip -dc <testdata/unaligned_uint64_test.snappy.gz >testdata/unaligned_uint64_test.snappy
 39 | 	gzip -dc <testdata/unaligned_uint64_test.bin.gz >testdata/unaligned_uint64_test.bin
 40 | 	EXTRA_TEST_CFLAGS="-O0" make check_unaligned_uint64_extra_cflags
 41 | 	EXTRA_TEST_CFLAGS="-O1" make check_unaligned_uint64_extra_cflags
 42 | 	EXTRA_TEST_CFLAGS="-O2" make check_unaligned_uint64_extra_cflags
 43 | 	EXTRA_TEST_CFLAGS="-O3" make check_unaligned_uint64_extra_cflags
 44 | 	EXTRA_TEST_CFLAGS="-O2 -march=native" make check_unaligned_uint64_extra_cflags
 45 | 	EXTRA_TEST_CFLAGS="-O3 -march=native" make check_unaligned_uint64_extra_cflags
 46 | 	rm -f testdata/unaligned_uint64_test.snappy testdata/unaligned_uint64_test.bin
 47 | 
 48 | check_unaligned_uint64_extra_cflags:
 49 | 	make clean
 50 | 	make cl_tester
 51 | 	rm -f tmp
 52 | 	LD_LIBRARY_PATH=. ./cl_tester -d testdata/unaligned_uint64_test.snappy tmp
 53 | 	diff testdata/unaligned_uint64_test.bin tmp >/dev/null && echo "${EXTRA_TEST_CFLAGS} ok"
 54 | 	make clean
 55 | 	rm -f tmp
 56 | 
 57 | libcsnappy.so: csnappy_compress.c csnappy_decompress.c csnappy_internal.h csnappy_internal_userspace.h
 58 | 	$(CC) $(CFLAGS) $(EXTRA_TEST_CFLAGS) -fPIC -DPIC -c -o csnappy_compress.o csnappy_compress.c
 59 | 	$(CC) $(CFLAGS) $(EXTRA_TEST_CFLAGS) -fPIC -DPIC -c -o csnappy_decompress.o csnappy_decompress.c
 60 | 	$(CC) $(CFLAGS) $(EXTRA_TEST_CFLAGS) $(LDFLAGS) -shared -o $@ csnappy_compress.o csnappy_decompress.o
 61 | 
 62 | block_compressor: block_compressor.c libcsnappy.so
 63 | 	$(CC) -std=gnu99 -Wall -O2 -g -o $@ $< libcsnappy.so -llzo2 -lz -lrt
 64 | 
 65 | test_block_compressor: block_compressor
 66 | 	for testfile in \
 67 | 		/usr/lib64/chromium-browser/chrome \
 68 | 		/usr/lib64/qt4/libQtWebKit.so.4.7.2 \
 69 | 		/usr/lib64/llvm/libLLVM-2.9.so \
 70 | 		/usr/lib64/xulrunner-2.0/libxul.so \
 71 | 		/usr/libexec/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/cc1 \
 72 | 		/usr/lib64/libnvidia-glcore.so.270.41.03 \
 73 | 		/usr/lib64/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/libgcj.so.12.0.0 \
 74 | 		/usr/lib64/libwireshark.so.0.0.1 \
 75 | 		/usr/share/icons/oxygen/icon-theme.cache \
 76 | 	; do \
 77 | 	echo compressing: $$testfile ; \
 78 | 	for method in snappy lzo zlib ; do \
 79 | 	LD_LIBRARY_PATH=. ./block_compressor -c $$method $$testfile itmp ;\
 80 | 	LD_LIBRARY_PATH=. ./block_compressor -c $$method -d itmp otmp > /dev/null ;\
 81 | 	diff -u $$testfile otmp ;\
 82 | 	echo "ratio:" \
 83 | 	$$(stat --printf %s itmp) \* 100 / $$(stat --printf %s $$testfile) "=" \
 84 | 	$$(expr $$(stat --printf %s itmp) \* 100 / $$(stat --printf %s $$testfile)) "%" ;\
 85 | 	rm -f itmp otmp ;\
 86 | 	done ; \
 87 | 	done ;
 88 | 
 89 | NDK = /mnt/backup/home/backup/android-ndk-r7b
 90 | SYSROOT = $(NDK)/platforms/android-5/arch-arm
 91 | TOOLCHAIN = $(NDK)/toolchains/arm-linux-androideabi-4.4.3/prebuilt/linux-x86/bin
 92 | unaligned_test_android: unaligned_test.c unaligned_arm.s
 93 | 	$(TOOLCHAIN)/arm-linux-androideabi-gcc \
 94 | 	-ffunction-sections -funwind-tables -fstack-protector \
 95 | 	-D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__ \
 96 | 	-Wno-psabi -march=armv5te -mtune=xscale -msoft-float -mthumb \
 97 | 	-O2 -fomit-frame-pointer -fno-strict-aliasing -finline-limit=64 \
 98 | 	-DANDROID  -Wa,--noexecstack -DNDEBUG -g \
 99 | 	-I$(SYSROOT)/usr/include -c -o unaligned_test.o unaligned_test.c
100 | 	$(TOOLCHAIN)/arm-linux-androideabi-gcc \
101 | 	-ffunction-sections -funwind-tables -fstack-protector \
102 | 	-D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__ \
103 | 	-Wno-psabi -march=armv5te -mtune=xscale -msoft-float -mthumb \
104 | 	-O2 -fomit-frame-pointer -fno-strict-aliasing -finline-limit=64 \
105 | 	-DANDROID  -Wa,--noexecstack -DNDEBUG -g \
106 | 	-I$(SYSROOT)/usr/include -c -o unaligned_arm.o unaligned_arm.s
107 | 	$(TOOLCHAIN)/arm-linux-androideabi-g++ \
108 | 	--sysroot=$(SYSROOT) unaligned_test.o unaligned_arm.o \
109 | 	$(TOOLCHAIN)/../lib/gcc/arm-linux-androideabi/4.4.3/libgcc.a \
110 | 	-Wl,--no-undefined -Wl,-z,noexecstack -lc -lm -o unaligned_test_android
111 | 
112 | install: csnappy.h libcsnappy.so
113 | 	install -d "$(DESTDIR)$(PREFIX)"/include
114 | 	install -m 0644 csnappy.h "$(DESTDIR)$(PREFIX)"/include/
115 | 	install -d "$(DESTDIR)$(LIBDIR)"
116 | 	install libcsnappy.so "$(DESTDIR)$(LIBDIR)"
117 | 
118 | uninstall:
119 | 	rm -f "$(DESTDIR)$(PREFIX)"/include/csnappy.h
120 | 	rm -f "$(DESTDIR)$(LIBDIR)"/libcsnappy.so
121 | 
122 | clean:
123 | 	rm -f *.o *_debug libcsnappy.so cl_tester
124 | 
125 | .PHONY: .REGEN clean all
126 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | Google Snappy, available at https://google.github.io/snappy/
 2 | is a compression library designed for speed rather than compression ratios.
 3 | 
 4 | It is not a new concept by far. The Linux kernel currently uses LZO as the
 5 |  default fast compressor. Snappy works faster, though achieves less compression.
 6 | 
 7 | Google's code is written in C with a lot of C++. Some of the more interesting
 8 |  features that rely on C++ appear to have been elided from the open source
 9 |  version of Snappy.
10 | 
11 | Goals:
12 | To get the codebase into a shape that can be accepted into the mainline
13 |  Linux kernel and used with zram (http://code.google.com/p/compcache/).
14 | Being able to compress vmlinux and initrd with Snappy is a secondary goal.
15 | Same for support in Squashfs and other parts of the kernel that currently
16 |  support LZO.
17 | 
18 | Results:
19 | I cut out or ported to plain ANSI C the necessary code and headers.
20 | To cause less confusion, I call this project (and files) csnappy.
21 | The API looks right, but I welcome comments.
22 | The code *has* been tested in kernel-space using a patched zram and it works.
23 | 
24 | The code has been tested in a qemu emulating a PowerPC Mac and ARMv5TE running
25 |  Debian Wheezy.
26 | 
27 | I also use an ARMv6 Android phone for testing.
28 | 
29 | Testing on other hardware or platforms is welcome.
30 | 
31 | Note: The userspace tester is a hack, as is the shared library.
32 | 
33 | Someone wrote a perl wrapper for csnappy:
34 | http://search.cpan.org/dist/Compress-Snappy/
35 | https://github.com/gray/compress-snappy
36 | 
37 | Patch for upstream snappy tester is available: snappy_tester.patch
38 | Patch for linux kernel is available: kernel_3_2_10.patch
39 | 
40 | Benchmark in userspace: userspace_benchmark.txt
41 | Benchmark in kernel space with zram: zram_benchmark.txt
42 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | * test with debug config option
 2 | * rename CamelCase symbols to linux_style (?)
 3 | * test on 32bit x86 qemu
 4 | * test on non-x86 hardware
 5 | * consider hash functions with better performance on other arches.
 6 | * consider hash table with say, 8, possible matches.
 7 | * check what compression ratio, speed, memory use looks like if instead of
 8 |  hash table with 16K entries for 32K values and no chaining we use a hash map
 9 |  that stores all values (establish upper limits).
10 | 


--------------------------------------------------------------------------------
/block_compressor.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011, Zeev Tarantov <zeev.tarantov@gmail.com>.
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are
  7 | met:
  8 | 
  9 |   * Redistributions of source code must retain the above copyright
 10 | notice, this list of conditions and the following disclaimer.
 11 |   * Redistributions in binary form must reproduce the above
 12 | copyright notice, this list of conditions and the following disclaimer
 13 | in the documentation and/or other materials provided with the
 14 | distribution.
 15 |   * Neither the name of Zeev Tarantov nor the names of its
 16 | contributors may be used to endorse or promote products derived from
 17 | this software without specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | */
 31 | #include <stdlib.h>
 32 | #include <stdio.h>
 33 | #include <stdint.h>
 34 | #include <string.h>
 35 | #include <strings.h>
 36 | #include <unistd.h>
 37 | #include <sys/mman.h>
 38 | #include <time.h>
 39 | #include <lzo/lzo1x.h>
 40 | #include "csnappy.h"
 41 | #include <zlib.h>
 42 | #ifndef MAP_ANONYMOUS
 43 | #define MAP_ANONYMOUS MAP_ANON
 44 | #endif
 45 | 
 46 | static int PAGE_SIZE, PAGE_SHIFT;
 47 | 
 48 | #define handle_error(msg) \
 49 |   do { perror(msg); exit(EXIT_FAILURE); } while (0)
 50 | 
 51 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 52 | 
 53 | static void* noop(void) { return NULL; }
 54 | static void noop_p(void *p) { }
 55 | 
 56 | static void* lzo_compress_init(void)
 57 | {
 58 | 	char *workmem;
 59 | 	if (!(workmem = malloc(LZO1X_1_MEM_COMPRESS)))
 60 | 		handle_error("malloc");
 61 | 	return workmem;
 62 | }
 63 | 
 64 | static void lzo_compress_free(void *opaque)
 65 | {
 66 | 	free(opaque);
 67 | }
 68 | 
 69 | static void lzo_compress(
 70 | 	const char *src,
 71 | 	uint32_t ilen,
 72 | 	char *dst,
 73 | 	uint32_t *dst_len,
 74 | 	void *opaque)
 75 | {
 76 | 	lzo_uint olen;
 77 | 	lzo1x_1_compress((unsigned char *)src, ilen,
 78 | 			 (unsigned char *)dst, &olen, opaque);
 79 | 	*dst_len = olen;
 80 | }
 81 | 
 82 | static int lzo_decompress(
 83 | 	const char *src,
 84 | 	uint32_t ilen,
 85 | 	char *dst,
 86 | 	uint32_t *dst_len,
 87 | 	void *opaque)
 88 | {
 89 | 	lzo_uint olen = *dst_len;
 90 | 	int ret;
 91 | 	ret = lzo1x_decompress_safe(
 92 | 		(unsigned char *)src, ilen,
 93 | 		(unsigned char *)dst, &olen, NULL);
 94 | 	*dst_len = olen;
 95 | 	return ret;
 96 | }
 97 | 
 98 | 
 99 | #define WMSIZE_ORDER	((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1))
100 | static void* snappy_compress_init(void)
101 | {
102 | 	char *workmem;
103 | 	if (!(workmem = malloc(1 << WMSIZE_ORDER)))
104 | 		handle_error("malloc");
105 | 	return workmem;
106 | }
107 | 
108 | static void snappy_compress_free(void *opaque)
109 | {
110 | 	free(opaque);
111 | }
112 | 
113 | static void snappy_compress(
114 | 	const char *src,
115 | 	uint32_t ilen,
116 | 	char *dst,
117 | 	uint32_t *dst_len,
118 | 	void *opaque)
119 | {
120 | 	char *end;
121 | 	end = csnappy_compress_fragment(src, ilen, dst,
122 | 		opaque, WMSIZE_ORDER);
123 | 	*dst_len = end - dst;
124 | }
125 | 
126 | static int snappy_decompress(
127 | 	const char *src,
128 | 	uint32_t ilen,
129 | 	char *dst,
130 | 	uint32_t *dst_len,
131 | 	void *opaque)
132 | {
133 | 	return csnappy_decompress_noheader(src, ilen, dst, dst_len);
134 | }
135 | 
136 | 
137 | static void* zlib_compress_init(void)
138 | {
139 | 	z_stream *zs;
140 | 	if (!(zs = malloc(sizeof(z_stream))))
141 | 		handle_error("malloc");
142 | 	zs->zalloc = Z_NULL;
143 | 	zs->zfree = Z_NULL;
144 | 	zs->opaque = Z_NULL;
145 | 	if (deflateInit(zs, Z_DEFAULT_COMPRESSION) != Z_OK)
146 | 		handle_error("deflateInit");
147 | 	return zs;
148 | }
149 | 
150 | static void zlib_compress_free(void *opaque)
151 | {
152 | 	if (deflateEnd(opaque) != Z_OK)
153 | 		handle_error("deflateEnd");
154 | 	free(opaque);
155 | }
156 | 
157 | static void zlib_compress(
158 | 	const char *src,
159 | 	uint32_t ilen,
160 | 	char *dst,
161 | 	uint32_t *dst_len,
162 | 	void *opaque)
163 | {
164 | 	z_stream *zs = opaque;
165 | 	zs->avail_in = ilen;
166 | 	zs->next_in = (unsigned char *)src;
167 | 	zs->avail_out = *dst_len;
168 | 	zs->next_out = (unsigned char *)dst;
169 | 	if (deflate(zs, Z_FINISH) != Z_STREAM_END)
170 | 		handle_error("deflate");
171 | 	if (deflateReset(zs) != Z_OK)
172 | 		handle_error("deflateReset");
173 | 	*dst_len = *dst_len - zs->avail_out;
174 | }
175 | 
176 | static void* zlib_decompress_init(void)
177 | {
178 | 	z_stream *zs;
179 | 	if (!(zs = malloc(sizeof(z_stream))))
180 | 		handle_error("malloc");
181 | 	zs->zalloc = Z_NULL;
182 | 	zs->zfree = Z_NULL;
183 | 	zs->opaque = Z_NULL;
184 | 	zs->next_in = Z_NULL;
185 | 	zs->avail_in = 0;
186 | 	if (inflateInit(zs) != Z_OK)
187 | 		handle_error("inflateInit");
188 | 	return zs;
189 | }
190 | 
191 | static void zlib_decompress_free(void *opaque)
192 | {
193 | 	if (inflateEnd(opaque) != Z_OK)
194 | 		handle_error("inflateEnd");
195 | 	free(opaque);
196 | }
197 | 
198 | static int zlib_decompress(
199 | 	const char *src,
200 | 	uint32_t ilen,
201 | 	char *dst,
202 | 	uint32_t *dst_len,
203 | 	void *opaque)
204 | {
205 | 	z_stream *zs = opaque;
206 | 	zs->avail_in = ilen;
207 | 	zs->next_in = (unsigned char *)src;
208 | 	zs->avail_out = *dst_len;
209 | 	zs->next_out = (unsigned char *)dst;
210 | 	if (inflate(zs, Z_FINISH) != Z_STREAM_END)
211 | 		handle_error("inflate");
212 | 	if (inflateReset(zs) != Z_OK)
213 | 		handle_error("inflateReset");
214 | 	*dst_len = *dst_len - zs->avail_out;
215 | 	return 0;
216 | }
217 | 
218 | 
219 | enum {
220 | 	LZO = 0,
221 | 	SNAPPY = 1,
222 | 	ZLIB = 2,
223 | };
224 | 
225 | static const char* const COMPRESSORS[] = { "LZO", "SNAPPY", "ZLIB" };
226 | 
227 | typedef void (*compress_fn)(const char *src, uint32_t ilen, char *dst,
228 | 				uint32_t *dst_len, void *opaque);
229 | 
230 | typedef int (*decompress_fn)(const char *src, uint32_t ilen, char *dst,
231 | 				uint32_t *dst_len, void *opaque);
232 | 
233 | struct compressor_funcs {
234 | 	void* (*compress_init)(void);
235 | 	void (*compress_free)(void *opaque);
236 | 	compress_fn compress;
237 | 	void* (*decompress_init)(void);
238 | 	void (*decompress_free)(void *opaque);
239 | 	decompress_fn decompress;
240 | };
241 | 
242 | static const struct compressor_funcs compressors[] = {
243 | 	{lzo_compress_init, lzo_compress_free, lzo_compress,
244 | 		noop, noop_p, lzo_decompress},
245 | 	{snappy_compress_init, snappy_compress_free, snappy_compress,
246 | 		noop, noop_p, snappy_decompress},
247 | 	{zlib_compress_init, zlib_compress_free, zlib_compress,
248 | 		zlib_decompress_init, zlib_decompress_free, zlib_decompress},
249 | };
250 | 
251 | #define ONE_BILLION 1000000000
252 | static void add_time_diff(struct timespec *total,
253 | 			struct timespec *start,
254 | 			struct timespec *end)
255 | {
256 | 	end->tv_sec -= start->tv_sec;
257 | 	end->tv_nsec -= start->tv_nsec;
258 | 	if (end->tv_nsec < 0) {
259 | 		end->tv_sec--;
260 | 		end->tv_nsec += ONE_BILLION;
261 | 	}
262 | 	total->tv_sec += end->tv_sec;
263 | 	total->tv_nsec += end->tv_nsec;
264 | 	if (total->tv_nsec > ONE_BILLION) {
265 | 		total->tv_sec++;
266 | 		total->tv_nsec -= ONE_BILLION;
267 | 	}
268 | }
269 | 
270 | union intbytes {
271 | 	uint32_t i;
272 | 	char c[4];
273 | };
274 | 
275 | static int do_compress(int method, FILE *ifile, FILE *ofile)
276 | {
277 | 	union intbytes intbuf;
278 | 	char *ibuf, *obuf, *opaque;
279 | 	compress_fn compress = compressors[method].compress;
280 | 	uint32_t counts[3] = { 0 };
281 | 	struct timespec t1, t2, elapsed;
282 | 	memset(&elapsed, 0, sizeof(elapsed));
283 | 	if (!(ibuf = malloc(PAGE_SIZE)))
284 | 		handle_error("malloc");
285 | 	if (!(obuf = malloc(2 * PAGE_SIZE)))
286 | 		handle_error("malloc");
287 | 	opaque = compressors[method].compress_init();
288 | 	if (fseek(ifile, 0, SEEK_END) == -1)
289 | 		handle_error("fseek");
290 | 	long input_length = ftell(ifile);
291 | 	if (fseek(ifile, 0, SEEK_SET) == -1)
292 | 		handle_error("fseek");
293 | 	long nr_pages = DIV_ROUND_UP(input_length, PAGE_SIZE);
294 | 	if (nr_pages > UINT32_MAX)
295 | 		handle_error("inut file too big");
296 | 	printf("compressor: %s\n", COMPRESSORS[method]);
297 | 	printf("#pages: %u\n", (unsigned)nr_pages);
298 | 	intbuf.i = (uint32_t)nr_pages;
299 | 	if (fwrite(&intbuf.c, 1, 4, ofile) < 4)
300 | 		handle_error("fwrite");
301 | 	/* expand ofile to place of first compressed block data */
302 | 	fseek(ofile, nr_pages * sizeof(uint32_t), SEEK_SET);
303 | 	/* write something so the file will grow. end of file now points to
304 | 	 * start of compressed data of first block */
305 | 	if (fwrite(&intbuf, 1, 4, ofile) < 4)
306 | 		handle_error("fwrite");
307 | 	for (uint32_t i = 0; i < nr_pages; i++) {
308 | 		uint32_t ilen = fread(ibuf, 1, PAGE_SIZE, ifile);
309 | 		if (ilen < PAGE_SIZE && !feof(ifile))
310 | 			handle_error("fread");
311 | 		uint32_t olen = 2 * PAGE_SIZE;
312 | 		clock_gettime(CLOCK_MONOTONIC, &t1);
313 | 		compress(ibuf, ilen, obuf, &olen, opaque);
314 | 		clock_gettime(CLOCK_MONOTONIC, &t2);
315 | 		char *wbuf = obuf;
316 | 		if (olen >= ilen) {
317 | 			olen = ilen;
318 | 			wbuf = ibuf;
319 | 			counts[2]++;
320 | 		} else if (olen > (PAGE_SIZE / 2)) {
321 | 			counts[1]++;
322 | 		} else {
323 | 			counts[0]++;
324 | 		}
325 | 		if (fseek(ofile, (i + 1) * sizeof(uint32_t), SEEK_SET) == -1)
326 | 			handle_error("fseek");
327 | 		intbuf.i = olen;
328 | 		if (fwrite(&intbuf.c, 1, 4, ofile) < 4)
329 | 			handle_error("fwrite");
330 | 		if (fseek(ofile, 0, SEEK_END) == -1)
331 | 			handle_error("fseek");
332 | 		if (fwrite(wbuf, 1, olen, ofile) < olen)
333 | 			handle_error("fwrite");
334 | 		add_time_diff(&elapsed, &t1, &t2);
335 | 	}
336 | 	fclose(ofile);
337 | 	fclose(ifile);
338 | 	free(obuf);
339 | 	free(ibuf);
340 | 	compressors[method].compress_free(opaque);
341 | 	printf("> 100%%\t:%u\n> 50%%\t:%u\n<= 50%%\t:%u\n"
342 | 		"%d.%09ld seconds\n",
343 | 		counts[2],counts[1],counts[0],
344 | 		(int)elapsed.tv_sec, elapsed.tv_nsec);
345 | 	return 0;
346 | }
347 | 
348 | static int do_decompress(int method, FILE *ifile, FILE *ofile)
349 | {
350 | 	union intbytes intbuf;
351 | 	char *ibuf, *obuf, *opaque;
352 | 	decompress_fn decompress = compressors[method].decompress;
353 | 	uint64_t ipos;
354 | 	uint32_t nr_pages;
355 | 	if (!(ibuf = malloc(2 * PAGE_SIZE)))
356 | 		handle_error("malloc");
357 | 	if (!(obuf = malloc(PAGE_SIZE)))
358 | 		handle_error("malloc");
359 | 	opaque = compressors[method].decompress_init();
360 | 	if (fread(&intbuf.c, 1, 4, ifile) < 4)
361 | 		handle_error("fread");
362 | 	nr_pages = intbuf.i;
363 | 	printf("nr_pages: %u\n", nr_pages);
364 | 	ipos = (nr_pages + 1) * sizeof(uint32_t);
365 | 	for (uint32_t i = 0; i < nr_pages; i++) {
366 | 		if (fseek(ifile, (i + 1) * sizeof(uint32_t), SEEK_SET) == -1)
367 | 			handle_error("fseek");
368 | 		if (fread(&intbuf.c, 1, 4, ifile) < 4)
369 | 			handle_error("fread");
370 | 		uint32_t ilen = intbuf.i;
371 | 		if (fseek(ifile, ipos, SEEK_SET) == -1)
372 | 			handle_error("fseek");
373 | 		if (fread(ibuf, 1, ilen, ifile) < ilen)
374 | 			handle_error("fread");
375 | 		ipos += ilen;
376 | 		uint32_t olen = PAGE_SIZE;
377 | 		char *wbuf = obuf;
378 | 		if (ilen == PAGE_SIZE) {
379 | 			wbuf = ibuf;
380 | 		} else {
381 | 			if (decompress(ibuf, ilen, obuf, &olen, opaque))
382 | 				handle_error("decompress");
383 | 		}
384 | 		if (fwrite(wbuf, 1, olen, ofile) < olen)
385 | 			handle_error("fwrite");
386 | 		printf("%d -> %d\n", ilen, olen);
387 | 	}
388 | 	fclose(ofile);
389 | 	fclose(ifile);
390 | 	free(obuf);
391 | 	free(ibuf);
392 | 	compressors[method].decompress_free(opaque);
393 | 	return 0;
394 | }
395 | 
396 | int main(int argc, char * const argv[])
397 | {
398 | 	int c, compressor = -1, decompress = 0;
399 | 	const char *ifile_name, *ofile_name;
400 | 	FILE *ifile, *ofile;
401 | 
402 | 	while((c = getopt(argc, argv, "c:d")) != -1) {
403 | 		switch (c) {
404 | 		case 'c':
405 | 			if (strcasecmp(optarg, COMPRESSORS[LZO]) == 0)
406 | 				compressor = LZO;
407 | 			else if (strcasecmp(optarg, COMPRESSORS[SNAPPY]) == 0)
408 | 				compressor = SNAPPY;
409 | 			else if (strcasecmp(optarg, COMPRESSORS[ZLIB]) == 0)
410 | 				compressor = ZLIB;
411 | 			else
412 | 				goto usage;
413 | 			break;
414 | 		case 'd':
415 | 			decompress = 1;
416 | 			break;
417 | 		default:
418 | 			goto usage;
419 | 		}
420 | 	}
421 | 	if (optind > argc - 2)
422 | 		goto usage;
423 | 	ifile_name = argv[optind];
424 | 	ofile_name = argv[optind + 1];
425 | 	if (!(ifile = fopen(ifile_name, "rb"))) {
426 | 		perror("fopen of ifile_name");
427 | 		return 2;
428 | 	}
429 | 	if (!(ofile = fopen(ofile_name, "wb"))) {
430 | 		perror("fopen of ofile_name");
431 | 		return 3;
432 | 	}
433 | 	PAGE_SIZE = (int)sysconf(_SC_PAGE_SIZE);
434 | 	PAGE_SHIFT = ffs(PAGE_SIZE) - 1;
435 | 	if (!decompress)
436 | 		return do_compress(compressor, ifile, ofile);
437 | 	else
438 | 		return do_decompress(compressor, ifile, ofile);
439 | usage:
440 | 	fprintf(stderr,
441 | 		"usage: block_compressor -c lzo|snappy|zlib [-d] ifile ofile\n");
442 | 	return 1;
443 | }
444 | 


--------------------------------------------------------------------------------
/block_compressor_benchmark.txt:
--------------------------------------------------------------------------------
  1 | for testfile in \
  2 | 	/usr/lib64/chromium-browser/chrome \
  3 | 	/usr/lib64/qt4/libQtWebKit.so.4.7.2 \
  4 | 	/usr/lib64/llvm/libLLVM-2.9.so \
  5 | 	/usr/lib64/xulrunner-2.0/libxul.so \
  6 | 	/usr/libexec/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/cc1 \
  7 | 	/usr/lib64/libnvidia-glcore.so.270.41.03 \
  8 | 	/usr/lib64/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/libgcj.so.12.0.0 \
  9 | 	/usr/lib64/libwireshark.so.0.0.1 \
 10 | 	/usr/share/icons/oxygen/icon-theme.cache \
 11 | ; do \
 12 | echo compressing: $testfile ; \
 13 | for method in snappy lzo zlib ; do \
 14 | LD_LIBRARY_PATH=. ./block_compressor -c $method $testfile itmp ;\
 15 | LD_LIBRARY_PATH=. ./block_compressor -c $method -d itmp otmp > /dev/null ;\
 16 | diff -u $testfile otmp ;\
 17 | echo "ratio:" \
 18 | $(stat --printf %s itmp) \* 100 / $(stat --printf %s $testfile) "=" \
 19 | $(expr $(stat --printf %s itmp) \* 100 / $(stat --printf %s $testfile)) "%" ;\
 20 | rm -f itmp otmp ;\
 21 | done ; \
 22 | done ;
 23 | compressing: /usr/lib64/chromium-browser/chrome
 24 | compressor: SNAPPY
 25 | #pages: 10392
 26 | > 100%	:341
 27 | > 50%	:8445
 28 | <= 50%	:1606
 29 | 0.174652181 seconds
 30 | ratio: 27932299 * 100 / 42562848 = 65 %
 31 | compressor: LZO
 32 | #pages: 10392
 33 | > 100%	:495
 34 | > 50%	:8080
 35 | <= 50%	:1817
 36 | 0.220447504 seconds
 37 | ratio: 27150908 * 100 / 42562848 = 63 %
 38 | compressor: ZLIB
 39 | #pages: 10392
 40 | > 100%	:0
 41 | > 50%	:5800
 42 | <= 50%	:4592
 43 | 2.395360610 seconds
 44 | ratio: 20904235 * 100 / 42562848 = 49 %
 45 | compressing: /usr/lib64/qt4/libQtWebKit.so.4.7.2
 46 | compressor: SNAPPY
 47 | #pages: 5342
 48 | > 100%	:219
 49 | > 50%	:3405
 50 | <= 50%	:1718
 51 | 0.080079531 seconds
 52 | ratio: 13290800 * 100 / 21877760 = 60 %
 53 | compressor: LZO
 54 | #pages: 5342
 55 | > 100%	:272
 56 | > 50%	:3281
 57 | <= 50%	:1789
 58 | 0.100200702 seconds
 59 | ratio: 12737811 * 100 / 21877760 = 58 %
 60 | compressor: ZLIB
 61 | #pages: 5342
 62 | > 100%	:142
 63 | > 50%	:2464
 64 | <= 50%	:2736
 65 | 1.147235809 seconds
 66 | ratio: 9903402 * 100 / 21877760 = 45 %
 67 | compressing: /usr/lib64/llvm/libLLVM-2.9.so
 68 | compressor: SNAPPY
 69 | #pages: 3472
 70 | > 100%	:44
 71 | > 50%	:2384
 72 | <= 50%	:1044
 73 | 0.055121943 seconds
 74 | ratio: 8493554 * 100 / 14219992 = 59 %
 75 | compressor: LZO
 76 | #pages: 3472
 77 | > 100%	:53
 78 | > 50%	:2355
 79 | <= 50%	:1064
 80 | 0.068662186 seconds
 81 | ratio: 8213334 * 100 / 14219992 = 57 %
 82 | compressor: ZLIB
 83 | #pages: 3472
 84 | > 100%	:12
 85 | > 50%	:1728
 86 | <= 50%	:1732
 87 | 0.766150075 seconds
 88 | ratio: 6221694 * 100 / 14219992 = 43 %
 89 | compressing: /usr/lib64/xulrunner-2.0/libxul.so
 90 | compressor: SNAPPY
 91 | #pages: 7187
 92 | > 100%	:229
 93 | > 50%	:4432
 94 | <= 50%	:2526
 95 | 0.108149693 seconds
 96 | ratio: 17455680 * 100 / 29433888 = 59 %
 97 | compressor: LZO
 98 | #pages: 7187
 99 | > 100%	:253
100 | > 50%	:4287
101 | <= 50%	:2647
102 | 0.135244136 seconds
103 | ratio: 16596460 * 100 / 29433888 = 56 %
104 | compressor: ZLIB
105 | #pages: 7187
106 | > 100%	:1
107 | > 50%	:3021
108 | <= 50%	:4165
109 | 1.610910737 seconds
110 | ratio: 12248775 * 100 / 29433888 = 41 %
111 | compressing: /usr/libexec/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/cc1
112 | compressor: SNAPPY
113 | #pages: 3608
114 | > 100%	:68
115 | > 50%	:2168
116 | <= 50%	:1372
117 | 0.056032193 seconds
118 | ratio: 7728033 * 100 / 14775120 = 52 %
119 | compressor: LZO
120 | #pages: 3608
121 | > 100%	:72
122 | > 50%	:1975
123 | <= 50%	:1561
124 | 0.069680830 seconds
125 | ratio: 7384676 * 100 / 14775120 = 49 %
126 | compressor: ZLIB
127 | #pages: 3608
128 | > 100%	:2
129 | > 50%	:306
130 | <= 50%	:3300
131 | 0.789069806 seconds
132 | ratio: 5493265 * 100 / 14775120 = 37 %
133 | compressing: /usr/lib64/libnvidia-glcore.so.270.41.03
134 | compressor: SNAPPY
135 | #pages: 6710
136 | > 100%	:74
137 | > 50%	:2614
138 | <= 50%	:4022
139 | 0.084111724 seconds
140 | ratio: 12860385 * 100 / 27481328 = 46 %
141 | compressor: LZO
142 | #pages: 6710
143 | > 100%	:89
144 | > 50%	:2436
145 | <= 50%	:4185
146 | 0.103006618 seconds
147 | ratio: 12051888 * 100 / 27481328 = 43 %
148 | compressor: ZLIB
149 | #pages: 6710
150 | > 100%	:1
151 | > 50%	:1633
152 | <= 50%	:5076
153 | 1.216785009 seconds
154 | ratio: 8641291 * 100 / 27481328 = 31 %
155 | compressing: /usr/lib64/gcc/x86_64-pc-linux-gnu/4.6.1-pre9999/libgcj.so.12.0.0
156 | compressor: SNAPPY
157 | #pages: 15133
158 | > 100%	:190
159 | > 50%	:5105
160 | <= 50%	:9838
161 | 0.193854352 seconds
162 | ratio: 27131163 * 100 / 61982968 = 43 %
163 | compressor: LZO
164 | #pages: 15133
165 | > 100%	:201
166 | > 50%	:4323
167 | <= 50%	:10609
168 | 0.235593989 seconds
169 | ratio: 24944283 * 100 / 61982968 = 40 %
170 | compressor: ZLIB
171 | #pages: 15133
172 | > 100%	:63
173 | > 50%	:317
174 | <= 50%	:14753
175 | 2.943011502 seconds
176 | ratio: 18266667 * 100 / 61982968 = 29 %
177 | compressing: /usr/lib64/libwireshark.so.0.0.1
178 | compressor: SNAPPY
179 | #pages: 11341
180 | > 100%	:64
181 | > 50%	:2982
182 | <= 50%	:8295
183 | 0.130238274 seconds
184 | ratio: 19576418 * 100 / 46449592 = 42 %
185 | compressor: LZO
186 | #pages: 11341
187 | > 100%	:86
188 | > 50%	:2565
189 | <= 50%	:8690
190 | 0.157854033 seconds
191 | ratio: 17765477 * 100 / 46449592 = 38 %
192 | compressor: ZLIB
193 | #pages: 11341
194 | > 100%	:1
195 | > 50%	:1219
196 | <= 50%	:10121
197 | 2.020140289 seconds
198 | ratio: 12565102 * 100 / 46449592 = 27 %
199 | compressing: /usr/share/icons/oxygen/icon-theme.cache
200 | compressor: SNAPPY
201 | #pages: 43411
202 | > 100%	:0
203 | > 50%	:7777
204 | <= 50%	:35634
205 | 0.441581102 seconds
206 | ratio: 60247441 * 100 / 177810480 = 33 %
207 | compressor: LZO
208 | #pages: 43411
209 | > 100%	:31
210 | > 50%	:7801
211 | <= 50%	:35579
212 | 0.547072992 seconds
213 | ratio: 59064132 * 100 / 177810480 = 33 %
214 | compressor: ZLIB
215 | #pages: 43411
216 | > 100%	:0
217 | > 50%	:2464
218 | <= 50%	:40947
219 | 6.256616084 seconds
220 | ratio: 42305375 * 100 / 177810480 = 23 %
221 | 


--------------------------------------------------------------------------------
/cl_tester.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <stdint.h>
  4 | #include <unistd.h>
  5 | #include <sys/mman.h>
  6 | #include <signal.h>
  7 | #include "csnappy.h"
  8 | #ifndef MAP_ANONYMOUS
  9 | #define MAP_ANONYMOUS MAP_ANON
 10 | #endif
 11 | 
 12 | #define MAX_INPUT_SIZE 10 * 1024 * 1024
 13 | 
 14 | static int do_decompress(FILE *ifile, FILE *ofile)
 15 | {
 16 | 	char *ibuf, *obuf;
 17 | 	uint32_t ilen, olen;
 18 | 	int status, retval = 0;
 19 | 
 20 | 	if (!(ibuf = (char *)malloc(MAX_INPUT_SIZE))) {
 21 | 		fprintf(stderr, "malloc failed to allocate %d.\n", MAX_INPUT_SIZE);
 22 | 		fclose(ifile);
 23 | 		retval = 4;
 24 | 		goto out;
 25 | 	}
 26 | 
 27 | 	ilen = fread(ibuf, 1, MAX_INPUT_SIZE, ifile);
 28 | 	if (!feof(ifile)) {
 29 | 		fprintf(stderr, "input was longer than %d, aborting.\n", MAX_INPUT_SIZE);
 30 | 		free(ibuf);
 31 | 		fclose(ifile);
 32 | 		retval = 5;
 33 | 		goto out;
 34 | 	}
 35 | 	fclose(ifile);
 36 | 
 37 | 	if ((status = csnappy_get_uncompressed_length(ibuf, ilen, &olen)) < 0) {
 38 | 		fprintf(stderr, "snappy_get_uncompressed_length returned %d.\n", status);
 39 | 		free(ibuf);
 40 | 		retval = 6;
 41 | 		goto out;
 42 | 	}
 43 | 
 44 | 	if (!(obuf = (char *)malloc(olen))) {
 45 | 		fprintf(stderr, "malloc failed to allocate %d.\n", (int)olen);
 46 | 		free(ibuf);
 47 | 		retval = 4;
 48 | 		goto out;
 49 | 	}
 50 | 
 51 | 	status = csnappy_decompress(ibuf, ilen, obuf, olen);
 52 | 	free(ibuf);
 53 | 	if (status != CSNAPPY_E_OK) {
 54 | 		fprintf(stderr, "snappy_decompress returned %d.\n", status);
 55 | 		free(obuf);
 56 | 		retval = 7;
 57 | 		goto out;
 58 | 	}
 59 | 
 60 | 	fwrite(obuf, 1, olen, ofile);
 61 | 	free(obuf);
 62 | out:
 63 | 	fclose(ofile);
 64 | 	return retval;
 65 | }
 66 | 
 67 | static int do_compress(FILE *ifile, FILE *ofile)
 68 | {
 69 | 	char *ibuf, *obuf;
 70 | 	void *working_memory;
 71 | 	uint32_t ilen, olen, max_compressed_len;
 72 | 
 73 | 	if (!(ibuf = (char *)malloc(MAX_INPUT_SIZE))) {
 74 | 		fprintf(stderr, "malloc failed to allocate %d.\n", MAX_INPUT_SIZE);
 75 | 		fclose(ifile);
 76 | 		fclose(ofile);
 77 | 		return 4;
 78 | 	}
 79 | 
 80 | 	ilen = fread(ibuf, 1, MAX_INPUT_SIZE, ifile);
 81 | 	if (!feof(ifile)) {
 82 | 		fprintf(stderr, "input was longer than %d, aborting.\n", MAX_INPUT_SIZE);
 83 | 		free(ibuf);
 84 | 		fclose(ifile);
 85 | 		fclose(ofile);
 86 | 		return 5;
 87 | 	}
 88 | 	fclose(ifile);
 89 | 
 90 | 	max_compressed_len = csnappy_max_compressed_length(ilen);
 91 | 	if (!(obuf = (char*)malloc(max_compressed_len))) {
 92 | 		fprintf(stderr, "malloc failed to allocate %d bytes.\n", (int)max_compressed_len);
 93 | 		free(ibuf);
 94 | 		fclose(ofile);
 95 | 		return 4;
 96 | 	}
 97 | 
 98 | 	if (!(working_memory = malloc(CSNAPPY_WORKMEM_BYTES))) {
 99 | 		fprintf(stderr, "malloc failed to allocate %d bytes.\n", CSNAPPY_WORKMEM_BYTES);
100 | 		free(ibuf);
101 | 		fclose(ofile);
102 | 		return 4;
103 | 	}
104 | 
105 | 	csnappy_compress(ibuf, ilen, obuf, &olen,
106 | 			working_memory, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO);
107 | 	free(ibuf);
108 | 	free(working_memory);
109 | 
110 | 	fwrite(obuf, 1, olen, ofile);
111 | 	fclose(ofile);
112 | 	free(obuf);
113 | 	return 0;
114 | }
115 | 
116 | #define handle_error(msg) \
117 |   do { perror(msg); exit(EXIT_FAILURE); } while (0)
118 | 
119 | 
120 | static void segfault_handler(int signum) {
121 | 	if (signum == SIGSEGV) {
122 | 		printf("compression overwrites out buffer\n");
123 | 		exit(EXIT_SUCCESS);
124 | 	}
125 | }
126 | 
127 | int do_selftest_compression(void)
128 | {
129 | 	struct sigaction sa;
130 | 	char *obuf, *ibuf, *workmem;
131 | 	FILE *ifile;
132 | 	long PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
133 | 	uint32_t olen = 0;
134 | 	uint32_t ilen = PAGE_SIZE + 100;
135 | 
136 | 	obuf = (char*)mmap(NULL, PAGE_SIZE * 2,
137 | 		    PROT_READ | PROT_WRITE,
138 | 		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
139 | 	if (obuf == MAP_FAILED)
140 | 		handle_error("mmap");
141 | 	if (mprotect(obuf + PAGE_SIZE, PAGE_SIZE, PROT_NONE))
142 | 		handle_error("mprotect");
143 | 	if (!(ibuf = (char*)malloc(ilen)))
144 | 		handle_error("malloc");
145 | 	if (!(ifile = fopen("/dev/urandom", "rb")))
146 | 		handle_error("fopen");
147 | 	if (fread(ibuf, 1, ilen, ifile) < ilen)
148 | 		handle_error("fread");
149 | 	if (fclose(ifile))
150 | 		handle_error("fclose");
151 | 	if (!(workmem = (char*)malloc(CSNAPPY_WORKMEM_BYTES)))
152 | 		handle_error("malloc");
153 | 	sa.sa_handler = segfault_handler;
154 | 	sigemptyset(&sa.sa_mask);
155 | 	sa.sa_flags = 0;
156 | 	sigaction(SIGSEGV, &sa, NULL);
157 | 	csnappy_compress(ibuf, ilen, obuf, &olen,
158 | 			workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO);
159 | 	fprintf(stderr, "ERROR: csnappy_compress did not segfault when should have!\n");
160 | 	if (munmap(obuf, PAGE_SIZE * 2))
161 | 		handle_error("munmap");
162 | 	free(workmem);
163 | 	free(ibuf);
164 | 	return EXIT_FAILURE;
165 | }
166 | 
167 | static const char fake[] = "\x32\xc4\x66\x6f\x6f\x6f\x6f\x6f\x6f";
168 | int do_selftest_decompression(void)
169 | {
170 | 	char *obuf, *ibuf, *workmem;
171 | 	FILE *ifile;
172 | 	int ret;
173 | 	long PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
174 | 	int hlen;
175 | 	uint32_t n;
176 | 	uint32_t ilen = PAGE_SIZE + 100;
177 | 	uint32_t olen = csnappy_max_compressed_length(ilen);
178 | 	if (!(obuf = (char*)malloc(olen)))
179 | 		handle_error("malloc");
180 | 	if (!(ibuf = (char*)malloc(ilen)))
181 | 		handle_error("malloc");
182 | 	if (!(ifile = fopen("/dev/urandom", "rb")))
183 | 		handle_error("fopen");
184 | 	if (fread(ibuf, 1, ilen, ifile) < ilen)
185 | 		handle_error("fread");
186 | 	if (fclose(ifile))
187 | 		handle_error("fclose");
188 | 	if (!(workmem = (char*)malloc(CSNAPPY_WORKMEM_BYTES)))
189 | 		handle_error("malloc");
190 | 	csnappy_compress(ibuf, ilen, obuf, &olen,
191 | 			workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO);
192 | 	free(workmem);
193 | 	free(ibuf);
194 | 	ibuf = obuf;
195 | 	ilen = olen;
196 | 	olen = PAGE_SIZE;
197 | 	obuf = (char*)mmap(NULL, PAGE_SIZE * 2,
198 | 		    PROT_READ | PROT_WRITE,
199 | 		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
200 | 	if (obuf == MAP_FAILED)
201 | 		handle_error("mmap");
202 | 	if (mprotect(obuf + PAGE_SIZE, PAGE_SIZE, PROT_NONE))
203 | 		handle_error("mprotect");
204 | 	ret = csnappy_decompress(ibuf, ilen, obuf, olen);
205 | 	if (ret != CSNAPPY_E_OUTPUT_INSUF) {
206 | 		fprintf(stderr, "snappy_decompress returned %d.\n", ret);
207 | 		exit(EXIT_FAILURE);
208 | 	}
209 | 	hlen = csnappy_get_uncompressed_length(ibuf, ilen, &n);
210 | 	if (hlen == CSNAPPY_E_HEADER_BAD) {
211 | 		fprintf(stderr, "csnappy_get_uncompressed_length, could not obtain header length\n");
212 | 		exit(EXIT_FAILURE);
213 | 	}
214 | 	ret = csnappy_decompress_noheader(ibuf + hlen, ilen - hlen, obuf, &olen);
215 | 	if (ret != CSNAPPY_E_OUTPUT_OVERRUN) {
216 | 		fprintf(stderr, "csnappy_decompress_noheader returned %d.\n", ret);
217 | 		exit(EXIT_FAILURE);
218 | 	}
219 | 	free(ibuf);
220 | 	if (munmap(obuf, PAGE_SIZE * 2))
221 | 		handle_error("munmap");
222 | 
223 | 	olen = 50;
224 | 	if (!(obuf = (char*)malloc(olen)))
225 | 		handle_error("malloc");
226 | 	ret = csnappy_decompress(fake, 9, obuf, olen);
227 | 	if (ret == CSNAPPY_E_OK) {
228 | 		fprintf(stderr, "csnappy_decompress, stream cut off mid literal: %d\n", ret);
229 | 		exit(EXIT_FAILURE);
230 | 	}
231 | 	ret = csnappy_decompress_noheader(fake + 1, 8, obuf, &olen);
232 | 	if (ret == CSNAPPY_E_OK) {
233 | 		fprintf(stderr, "csnappy_decompress_noheader, stream cut off mid literal: %d\n", ret);
234 | 		exit(EXIT_FAILURE);
235 | 	}
236 | 	free(obuf);
237 | 	return 0;
238 | }
239 | 
240 | int main(int argc, char * const argv[])
241 | {
242 | 	int c;
243 | 	int decompress = 0, files = 1;
244 | 	int selftest_compression = 0, selftest_decompression = 0;
245 | 	const char *ifile_name, *ofile_name;
246 | 	FILE *ifile, *ofile;
247 | 
248 | 	while((c = getopt(argc, argv, "S:dc")) != -1) {
249 | 		switch (c) {
250 | 		case 'S':
251 | 			switch (optarg[0]) {
252 | 			case 'c':
253 | 				selftest_compression = 1;
254 | 				break;
255 | 			case 'd':
256 | 				selftest_decompression = 1;
257 | 				break;
258 | 			default:
259 | 				goto usage;
260 | 			}
261 | 			break;
262 | 		case 'd':
263 | 			decompress = 1;
264 | 			break;
265 | 		case 'c':
266 | 			files = 0;
267 | 			break;
268 | 		default:
269 | 			goto usage;
270 | 		}
271 | 	}
272 | 	if (selftest_compression)
273 | 		return do_selftest_compression();
274 | 	if (selftest_decompression)
275 | 		return do_selftest_decompression();
276 | 	ifile = stdin;
277 | 	ofile = stdout;
278 | 	if (files) {
279 | 		if (optind > argc - 2)
280 | 			goto usage;
281 | 		ifile_name = argv[optind];
282 | 		ofile_name = argv[optind + 1];
283 | 		if (!(ifile = fopen(ifile_name, "rb"))) {
284 | 			perror("fopen of ifile_name");
285 | 			return 2;
286 | 		}
287 | 		if (!(ofile = fopen(ofile_name, "wb"))) {
288 | 			perror("fopen of ofile_name");
289 | 			return 3;
290 | 		}
291 | 	}
292 | 	if (decompress)
293 | 		return do_decompress(ifile, ofile);
294 | 	else
295 | 		return do_compress(ifile, ofile);
296 | usage:
297 | 	fprintf(stderr,
298 | 	"Usage:\n"
299 | 	"cl_tester [-d] infile outfile\t-\t[de]compress infile to outfile.\n"
300 | 	"cl_tester [-d] -c\t\t-\t[de]compress stdin to stdout.\n"
301 | 	"cl_tester -S c\t\t\t-\tSelf-test compression.\n"
302 | 	"cl_tester -S d\t\t\t-\tSelf-test decompression.\n");
303 | 	return 1;
304 | }
305 | 


--------------------------------------------------------------------------------
/csnappy.h:
--------------------------------------------------------------------------------
  1 | #ifndef __CSNAPPY_H__
  2 | #define __CSNAPPY_H__
  3 | /*
  4 | File modified for the Linux Kernel by
  5 | Zeev Tarantov <zeev.tarantov@gmail.com>
  6 | */
  7 | #ifdef __cplusplus
  8 | extern "C" {
  9 | #endif
 10 | 
 11 | #define CSNAPPY_VERSION	5
 12 | 
 13 | #define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 16
 14 | #define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO)
 15 | 
 16 | #ifndef __GNUC__
 17 | #define __attribute__(x) /*NOTHING*/
 18 | #endif
 19 | 
 20 | #if defined(__SUNPRO_C) || defined(_AIX)
 21 | # include <inttypes.h>
 22 | #else
 23 | # include <stdint.h>
 24 | #endif
 25 | 
 26 | /*
 27 |  * Returns the maximal size of the compressed representation of
 28 |  * input data that is "source_len" bytes in length;
 29 |  */
 30 | uint32_t
 31 | csnappy_max_compressed_length(uint32_t source_len) __attribute__((const));
 32 | 
 33 | /*
 34 |  * Flat array compression that does not emit the "uncompressed length"
 35 |  * prefix. Compresses "input" array to the "output" array.
 36 |  *
 37 |  * REQUIRES: "input" is at most 32KiB long.
 38 |  * REQUIRES: "output" points to an array of memory that is at least
 39 |  * "csnappy_max_compressed_length(input_length)" in size.
 40 |  * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
 41 |  * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
 42 |  *
 43 |  * Returns an "end" pointer into "output" buffer.
 44 |  * "end - output" is the compressed size of "input".
 45 |  */
 46 | char*
 47 | csnappy_compress_fragment(
 48 | 	const char *input,
 49 | 	const uint32_t input_length,
 50 | 	char *output,
 51 | 	void *working_memory,
 52 | 	const int workmem_bytes_power_of_two);
 53 | 
 54 | /*
 55 |  * REQUIRES: "compressed" must point to an area of memory that is at
 56 |  * least "csnappy_max_compressed_length(input_length)" bytes in length.
 57 |  * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
 58 |  * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
 59 |  *
 60 |  * Takes the data stored in "input[0..input_length-1]" and stores
 61 |  * it in the array pointed to by "compressed".
 62 |  *
 63 |  * "*out_compressed_length" is set to the length of the compressed output.
 64 |  */
 65 | void
 66 | csnappy_compress(
 67 | 	const char *input,
 68 | 	uint32_t input_length,
 69 | 	char *compressed,
 70 | 	uint32_t *out_compressed_length,
 71 | 	void *working_memory,
 72 | 	const int workmem_bytes_power_of_two);
 73 | 
 74 | /*
 75 |  * Reads header of compressed data to get stored length of uncompressed data.
 76 |  * REQUIRES: start points to compressed data.
 77 |  * REQUIRES: n is length of available compressed data.
 78 |  *
 79 |  * Returns SNAPPY_E_HEADER_BAD on error.
 80 |  * Returns number of bytes read from input on success.
 81 |  * Stores decoded length into *result.
 82 |  */
 83 | int
 84 | csnappy_get_uncompressed_length(
 85 | 	const char *start,
 86 | 	uint32_t n,
 87 | 	uint32_t *result);
 88 | 
 89 | /*
 90 |  * Safely decompresses all data from array "src" of length "src_len" containing
 91 |  * entire compressed stream (with header) into array "dst" of size "dst_len".
 92 |  * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...).
 93 |  *
 94 |  * Iff successful, returns CSNAPPY_E_OK.
 95 |  * If recorded length in header is greater than dst_len, returns
 96 |  *  CSNAPPY_E_OUTPUT_INSUF.
 97 |  * If compressed data is malformed, does not write more than dst_len into dst.
 98 |  */
 99 | int
100 | csnappy_decompress(
101 | 	const char *src,
102 | 	uint32_t src_len,
103 | 	char *dst,
104 | 	uint32_t dst_len);
105 | 
106 | /*
107 |  * Safely decompresses stream src_len bytes long read from src to dst.
108 |  * Amount of available space at dst must be provided in *dst_len by caller.
109 |  * If compressed stream needs more space, it will not overflow and return
110 |  *  CSNAPPY_E_OUTPUT_OVERRUN.
111 |  * On success, sets *dst_len to actal number of bytes decompressed.
112 |  * Iff successful, returns CSNAPPY_E_OK.
113 |  */
114 | int
115 | csnappy_decompress_noheader(
116 | 	const char *src,
117 | 	uint32_t src_len,
118 | 	char *dst,
119 | 	uint32_t *dst_len);
120 | 
121 | /*
122 |  * Return values (< 0 = Error)
123 |  */
124 | #define CSNAPPY_E_OK			0
125 | #define CSNAPPY_E_HEADER_BAD		(-1)
126 | #define CSNAPPY_E_OUTPUT_INSUF		(-2)
127 | #define CSNAPPY_E_OUTPUT_OVERRUN	(-3)
128 | #define CSNAPPY_E_INPUT_NOT_CONSUMED	(-4)
129 | #define CSNAPPY_E_DATA_MALFORMED	(-5)
130 | 
131 | #ifdef __cplusplus
132 | }
133 | #endif
134 | 
135 | #endif
136 | 


--------------------------------------------------------------------------------
/csnappy_compat.h:
--------------------------------------------------------------------------------
 1 | #ifndef CSNAPPY_COMPAT_H
 2 | 
 3 | /* This file was added to Sereal to attempt some MSVC compatibility,
 4 |  * but is at best a band-aid. And done without a lot of experience
 5 |  * in whatever subset of C99 MSVC supports.
 6 |  */
 7 | 
 8 | #ifndef INLINE
 9 | #   if defined(_MSC_VER)
10 | #     define INLINE __inline
11 | #   else
12 | #     define INLINE inline
13 | #   endif
14 | #endif
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/csnappy_compress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011, Google Inc.
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are
  7 | met:
  8 | 
  9 |   * Redistributions of source code must retain the above copyright
 10 | notice, this list of conditions and the following disclaimer.
 11 |   * Redistributions in binary form must reproduce the above
 12 | copyright notice, this list of conditions and the following disclaimer
 13 | in the documentation and/or other materials provided with the
 14 | distribution.
 15 |   * Neither the name of Google Inc. nor the names of its
 16 | contributors may be used to endorse or promote products derived from
 17 | this software without specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | File modified for the Linux Kernel by
 32 | Zeev Tarantov <zeev.tarantov@gmail.com>
 33 | 
 34 | File modified for Sereal by
 35 | Steffen Mueller <smueller@cpan.org>
 36 | */
 37 | 
 38 | #include "csnappy_internal.h"
 39 | #ifdef __KERNEL__
 40 | #include <linux/kernel.h>
 41 | #include <linux/module.h>
 42 | #endif
 43 | #include "csnappy.h"
 44 | 
 45 | 
 46 | static INLINE char*
 47 | encode_varint32(char *sptr, uint32_t v)
 48 | {
 49 | 	uint8_t* ptr = (uint8_t *)sptr;
 50 | 	static const int B = 128;
 51 | 	if (v < (1<<7)) {
 52 | 		*(ptr++) = v;
 53 | 	} else if (v < (1<<14)) {
 54 | 		*(ptr++) = v | B;
 55 | 		*(ptr++) = v>>7;
 56 | 	} else if (v < (1<<21)) {
 57 | 		*(ptr++) = v | B;
 58 | 		*(ptr++) = (v>>7) | B;
 59 | 		*(ptr++) = v>>14;
 60 | 	} else if (v < (1<<28)) {
 61 | 		*(ptr++) = v | B;
 62 | 		*(ptr++) = (v>>7) | B;
 63 | 		*(ptr++) = (v>>14) | B;
 64 | 		*(ptr++) = v>>21;
 65 | 	} else {
 66 | 		*(ptr++) = v | B;
 67 | 		*(ptr++) = (v>>7) | B;
 68 | 		*(ptr++) = (v>>14) | B;
 69 | 		*(ptr++) = (v>>21) | B;
 70 | 		*(ptr++) = v>>28;
 71 | 	}
 72 | 	return (char *)ptr;
 73 | }
 74 | 
 75 | /*
 76 |  * *** DO NOT CHANGE THE VALUE OF kBlockSize ***
 77 | 
 78 |  * New Compression code chops up the input into blocks of at most
 79 |  * the following size.  This ensures that back-references in the
 80 |  * output never cross kBlockSize block boundaries.  This can be
 81 |  * helpful in implementing blocked decompression.  However the
 82 |  * decompression code should not rely on this guarantee since older
 83 |  * compression code may not obey it.
 84 |  */
 85 | #define kBlockLog 15
 86 | #define kBlockSize (1 << kBlockLog)
 87 | 
 88 | 
 89 | #if defined(__arm__) && !defined(ARCH_ARM_HAVE_UNALIGNED)
 90 | 
 91 | static uint8_t* emit_literal(
 92 | 	uint8_t *op,
 93 | 	const uint8_t *src,
 94 | 	const uint8_t *end)
 95 | {
 96 | 	uint32_t length = end - src;
 97 | 	uint32_t n = length - 1;
 98 | 	if (!length)
 99 | 		return op;
100 | 	if (n < 60) {
101 | 		/* Fits in tag byte */
102 | 		*op++ = LITERAL | (n << 2);
103 | 	} else {
104 | 		/* Encode in upcoming bytes */
105 | 		uint8_t *base = op;
106 | 		op++;
107 | 		do {
108 | 			*op++ = n & 0xff;
109 | 			n >>= 8;
110 | 		} while (n > 0);
111 | 		*base = LITERAL | ((59 + (op - base - 1)) << 2);
112 | 	}
113 | 	memcpy(op, src, length);
114 | 	return op + length;
115 | }
116 | 
117 | static uint8_t* emit_copy(
118 | 	uint8_t *op,
119 | 	uint32_t offset,
120 | 	uint32_t len)
121 | {
122 | 	DCHECK_GT(offset, 0);
123 | 	
124 | 	/* Emit 64 byte copies but make sure to keep at least four bytes
125 | 	 * reserved */
126 | 	while (unlikely(len >= 68)) {
127 | 		*op++ = COPY_2_BYTE_OFFSET | ((64 - 1) << 2);
128 | 		*op++ = offset & 255;
129 | 		*op++ = offset >> 8;
130 | 		len -= 64;
131 | 	}
132 | 
133 | 	/* Emit an extra 60 byte copy if have too much data to fit in one
134 | 	 * copy */
135 | 	if (unlikely(len > 64)) {
136 | 		*op++ = COPY_2_BYTE_OFFSET | ((60 - 1) << 2);
137 | 		*op++ = offset & 255;
138 | 		*op++ = offset >> 8;
139 | 		len -= 60;
140 | 	}
141 | 
142 | 	/* Emit remainder */
143 | 	DCHECK_GE(len, 4);
144 | 	if ((len < 12) && (offset < 2048)) {
145 | 		int len_minus_4 = len - 4;
146 | 		*op++ = COPY_1_BYTE_OFFSET   |
147 | 			((len_minus_4) << 2) |
148 | 			((offset >> 8) << 5);
149 | 		*op++ = offset & 0xff;
150 | 	} else {
151 | 		*op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
152 | 		*op++ = offset & 255;
153 | 		*op++ = offset >> 8;
154 | 	}
155 | 	return op;
156 | }
157 | 
158 | static uint32_t find_match_length(
159 | 	const uint8_t *s1,
160 | 	const uint8_t *s2,
161 | 	const uint8_t *s2_end)
162 | {
163 | 	const uint8_t * const s2_start = s2;
164 | 	while (s2 < s2_end && *s1++ == *s2++) /*nothing*/;
165 | 	return s2 - s2_start - 1;
166 | }
167 | 
168 | static uint32_t hash(uint32_t v)
169 | {
170 | 	return v * UINT32_C(0x1e35a7bd);
171 | }
172 | 
173 | char*
174 | csnappy_compress_fragment(
175 | 	const char *input,
176 | 	const uint32_t input_size,
177 | 	char *dst,
178 | 	void *working_memory,
179 | 	const int workmem_bytes_power_of_two)
180 | {
181 | 	const uint8_t * const src_start = (const uint8_t *)input;
182 | 	const uint8_t * const src_end_minus4 = src_start + input_size - 4;
183 | 	const uint8_t *src = src_start, *done_upto = src_start, *match;
184 | 	uint8_t *op = (uint8_t *)dst;
185 | 	uint16_t *wm = (uint16_t *)working_memory;
186 | 	int shift = 33 - workmem_bytes_power_of_two;
187 | 	uint32_t curr_val, curr_hash, match_val, offset, length;
188 | 	if (unlikely(input_size < 4))
189 | 		goto the_end;
190 | 	memset(wm, 0, 1 << workmem_bytes_power_of_two);
191 | 	for (;;) {
192 | 		curr_val = (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
193 | 		do {
194 | 			src++;
195 | 			if (unlikely(src >= src_end_minus4))
196 | 				goto the_end;
197 | 			curr_val = (curr_val >> 8) | (src[3] << 24);
198 | 			DCHECK_EQ(curr_val, get_unaligned_le32(src));
199 | 			curr_hash = hash(curr_val) >> shift;
200 | 			match = src_start + wm[curr_hash];
201 | 			DCHECK_LT(match, src);
202 | 			wm[curr_hash] = src - src_start;
203 | 			match_val = get_unaligned_le32(match);
204 | 		} while (likely(curr_val != match_val));
205 | 		offset = src - match;
206 | 		length = 4 + find_match_length(
207 | 			match + 4, src + 4, src_end_minus4 + 4);
208 | 		DCHECK_EQ(memcmp(src, match, length), 0);
209 | 		op = emit_literal(op, done_upto, src);
210 | 		op = emit_copy(op, offset, length);
211 | 		done_upto = src + length;
212 | 		src = done_upto - 1;
213 | 	}
214 | the_end:
215 | 	op = emit_literal(op, done_upto, src_end_minus4 + 4);
216 | 	return (char *)op;
217 | }
218 | 
219 | #else /* !simple */
220 | 
221 | /*
222 |  * Any hash function will produce a valid compressed bitstream, but a good
223 |  * hash function reduces the number of collisions and thus yields better
224 |  * compression for compressible input, and more speed for incompressible
225 |  * input. Of course, it doesn't hurt if the hash function is reasonably fast
226 |  * either, as it gets called a lot.
227 |  */
228 | static INLINE uint32_t HashBytes(uint32_t bytes, int shift)
229 | {
230 | 	uint32_t kMul = 0x1e35a7bd;
231 | 	return (bytes * kMul) >> shift;
232 | }
233 | static INLINE uint32_t Hash(const char *p, int shift)
234 | {
235 | 	return HashBytes(UNALIGNED_LOAD32(p), shift);
236 | }
237 | 
238 | 
239 | /*
240 |  * Return the largest n such that
241 |  *
242 |  *   s1[0,n-1] == s2[0,n-1]
243 |  *   and n <= (s2_limit - s2).
244 |  *
245 |  * Does not read *s2_limit or beyond.
246 |  * Does not read *(s1 + (s2_limit - s2)) or beyond.
247 |  * Requires that s2_limit >= s2.
248 |  *
249 |  * Separate implementation for x86_64, for speed.  Uses the fact that
250 |  * x86_64 is little endian.
251 |  */
252 | #if defined(__x86_64__) || defined(__aarch64__)
253 | static INLINE int
254 | FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
255 | {
256 | 	uint64_t x;
257 | 	int matched, matching_bits;
258 | 	DCHECK_GE(s2_limit, s2);
259 | 	matched = 0;
260 | 	/*
261 | 	 * Find out how long the match is. We loop over the data 64 bits at a
262 | 	 * time until we find a 64-bit block that doesn't match; then we find
263 | 	 * the first non-matching bit and use that to calculate the total
264 | 	 * length of the match.
265 | 	 */
266 | 	while (likely(s2 <= s2_limit - 8)) {
267 | 		if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
268 | 				UNALIGNED_LOAD64(s2))) {
269 | 			s2 += 8;
270 | 			matched += 8;
271 | 		} else {
272 | 			/*
273 | 			 * On current (mid-2008) Opteron models there is a 3%
274 | 			 * more efficient code sequence to find the first
275 | 			 * non-matching byte. However, what follows is ~10%
276 | 			 * better on Intel Core 2 and newer, and we expect AMD's
277 | 			 * bsf instruction to improve.
278 | 			 */
279 | 			x = UNALIGNED_LOAD64(s1 + matched) ^
280 | 				UNALIGNED_LOAD64(s2);
281 | 			matching_bits = FindLSBSetNonZero64(x);
282 | 			matched += matching_bits >> 3;
283 | 			return matched;
284 | 		}
285 | 	}
286 | 	while (likely(s2 < s2_limit)) {
287 | 		if (likely(s1[matched] == *s2)) {
288 | 			++s2;
289 | 			++matched;
290 | 		} else {
291 | 			return matched;
292 | 		}
293 | 	}
294 | 	return matched;
295 | }
296 | #else /* !defined(__x86_64__) && !defined(__aarch64__) */
297 | static INLINE int
298 | FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
299 | {
300 | 	/* Implementation based on the x86-64 version, above. */
301 | 	int matched = 0;
302 | 	DCHECK_GE(s2_limit, s2);
303 | 
304 | 	while (s2 <= s2_limit - 4 &&
305 | 		UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
306 | 		s2 += 4;
307 | 		matched += 4;
308 | 	}
309 | #if __BYTE_ORDER == __LITTLE_ENDIAN
310 | 	if (s2 <= s2_limit - 4) {
311 | 		uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
312 | 				UNALIGNED_LOAD32(s2);
313 | 		int matching_bits = FindLSBSetNonZero(x);
314 | 		matched += matching_bits >> 3;
315 | 	} else {
316 | 		while ((s2 < s2_limit) && (s1[matched] == *s2)) {
317 | 			++s2;
318 | 			++matched;
319 | 		}
320 | 	}
321 | #else
322 | 	while ((s2 < s2_limit) && (s1[matched] == *s2)) {
323 | 		++s2;
324 | 		++matched;
325 | 	}
326 | #endif
327 | 	return matched;
328 | }
329 | #endif /* !defined(__x86_64__) && !defined(__aarch64__) */
330 | 
331 | 
332 | static INLINE char*
333 | EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
334 | {
335 | 	int n = len - 1; /* Zero-length literals are disallowed */
336 | 	if (n < 60) {
337 | 		/* Fits in tag byte */
338 | 		*op++ = LITERAL | (n << 2);
339 | 		/*
340 | 		The vast majority of copies are below 16 bytes, for which a
341 | 		call to memcpy is overkill. This fast path can sometimes
342 | 		copy up to 15 bytes too much, but that is okay in the
343 | 		main loop, since we have a bit to go on for both sides:
344 | 		- The input will always have kInputMarginBytes = 15 extra
345 | 		available bytes, as long as we're in the main loop, and
346 | 		if not, allow_fast_path = false.
347 | 		- The output will always have 32 spare bytes (see
348 | 		snappy_max_compressed_length).
349 | 		*/
350 | 		if (allow_fast_path && len <= 16) {
351 | 			UnalignedCopy64(literal, op);
352 | 			UnalignedCopy64(literal + 8, op + 8);
353 | 			return op + len;
354 | 		}
355 | 	} else {
356 | 		/* Encode in upcoming bytes */
357 | 		char *base = op;
358 | 		int count = 0;
359 | 		op++;
360 | 		while (n > 0) {
361 | 			*op++ = n & 0xff;
362 | 			n >>= 8;
363 | 			count++;
364 | 		}
365 | 		DCHECK_GE(count, 1);
366 | 		DCHECK_LE(count, 4);
367 | 		*base = LITERAL | ((59+count) << 2);
368 | 	}
369 | 	memcpy(op, literal, len);
370 | 	return op + len;
371 | }
372 | 
373 | static INLINE char*
374 | EmitCopyLessThan64(char *op, int offset, int len)
375 | {
376 | 	DCHECK_LE(len, 64);
377 | 	DCHECK_GE(len, 4);
378 | 	DCHECK_LT(offset, 65536);
379 | 
380 | 	if ((len < 12) && (offset < 2048)) {
381 | 		int len_minus_4 = len - 4;
382 | 		DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
383 | 		*op++ = COPY_1_BYTE_OFFSET   +
384 | 			((len_minus_4) << 2) +
385 | 			((offset >> 8) << 5);
386 | 		*op++ = offset & 0xff;
387 | 	} else {
388 | 		*op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
389 | 		put_unaligned_le16(offset, op);
390 | 		op += 2;
391 | 	}
392 | 	return op;
393 | }
394 | 
395 | static INLINE char*
396 | EmitCopy(char *op, int offset, int len)
397 | {
398 | 	/* Emit 64 byte copies but make sure to keep at least four bytes
399 | 	 * reserved */
400 | 	while (len >= 68) {
401 | 		op = EmitCopyLessThan64(op, offset, 64);
402 | 		len -= 64;
403 | 	}
404 | 
405 | 	/* Emit an extra 60 byte copy if have too much data to fit in one
406 | 	 * copy */
407 | 	if (len > 64) {
408 | 		op = EmitCopyLessThan64(op, offset, 60);
409 | 		len -= 60;
410 | 	}
411 | 
412 | 	/* Emit remainder */
413 | 	op = EmitCopyLessThan64(op, offset, len);
414 | 	return op;
415 | }
416 | 
417 | 
418 | /*
419 | For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
420 | equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
421 | empirically found that overlapping loads such as
422 |  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
423 | are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
424 | 
425 | We have different versions for 64- and 32-bit; ideally we would avoid the
426 | two functions and just INLINE the UNALIGNED_LOAD64 call into
427 | GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
428 | enough to avoid loading the value multiple times then. For 64-bit, the load
429 | is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
430 | done at GetUint32AtOffset() time.
431 | */
432 | 
433 | #if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8)
434 | 
435 | typedef uint64_t EightBytesReference;
436 | 
437 | static INLINE EightBytesReference GetEightBytesAt(const char* ptr) {
438 | 	return UNALIGNED_LOAD64(ptr);
439 | }
440 | 
441 | static INLINE uint32_t GetUint32AtOffset(uint64_t v, int offset) {
442 | 	DCHECK_GE(offset, 0);
443 | 	DCHECK_LE(offset, 4);
444 | #if __BYTE_ORDER == __LITTLE_ENDIAN
445 | 	return v >> (8 * offset);
446 | #else
447 | 	return v >> (32 - 8 * offset);
448 | #endif
449 | }
450 | 
451 | #else /* !ARCH_K8 */
452 | 
453 | typedef const char* EightBytesReference;
454 | 
455 | static INLINE EightBytesReference GetEightBytesAt(const char* ptr) {
456 | 	return ptr;
457 | }
458 | 
459 | static INLINE uint32_t GetUint32AtOffset(const char* v, int offset) {
460 | 	DCHECK_GE(offset, 0);
461 | 	DCHECK_LE(offset, 4);
462 | 	return UNALIGNED_LOAD32(v + offset);
463 | }
464 | 
465 | #endif /* !ARCH_K8 */
466 | 
467 | 
468 | #define kInputMarginBytes 15
469 | char*
470 | csnappy_compress_fragment(
471 | 	const char *input,
472 | 	const uint32_t input_size,
473 | 	char *op,
474 | 	void *working_memory,
475 | 	const int workmem_bytes_power_of_two)
476 | {
477 | 	const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
478 | 			*candidate, *base;
479 | 	uint16_t *table = (uint16_t *)working_memory;
480 | 	EightBytesReference input_bytes;
481 | 	uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
482 | 	int shift, matched;
483 | 
484 | 	DCHECK_GE(workmem_bytes_power_of_two, 9);
485 | 	DCHECK_LE(workmem_bytes_power_of_two, 15);
486 | 	/* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
487 | 	 * How many bits of 32bit hash function result are discarded? */
488 | 	shift = 33 - workmem_bytes_power_of_two;
489 | 	/* "ip" is the input pointer, and "op" is the output pointer. */
490 | 	ip = input;
491 | 	DCHECK_LE(input_size, kBlockSize);
492 | 	ip_end = input + input_size;
493 | 	base_ip = ip;
494 | 	/* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
495 | 	   [next_emit, ip_end) after the main loop. */
496 | 	next_emit = ip;
497 | 
498 | 	if (unlikely(input_size < kInputMarginBytes))
499 | 		goto emit_remainder;
500 | 
501 | 	memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
502 | 
503 | 	ip_limit = input + input_size - kInputMarginBytes;
504 | 	next_hash = Hash(++ip, shift);
505 | 
506 | main_loop:
507 | 	DCHECK_LT(next_emit, ip);
508 | 	/*
509 | 	* The body of this loop calls EmitLiteral once and then EmitCopy one or
510 | 	* more times. (The exception is that when we're close to exhausting
511 | 	* the input we goto emit_remainder.)
512 | 	*
513 | 	* In the first iteration of this loop we're just starting, so
514 | 	* there's nothing to copy, so calling EmitLiteral once is
515 | 	* necessary. And we only start a new iteration when the
516 | 	* current iteration has determined that a call to EmitLiteral will
517 | 	* precede the next call to EmitCopy (if any).
518 | 	*
519 | 	* Step 1: Scan forward in the input looking for a 4-byte-long match.
520 | 	* If we get close to exhausting the input then goto emit_remainder.
521 | 	*
522 | 	* Heuristic match skipping: If 32 bytes are scanned with no matches
523 | 	* found, start looking only at every other byte. If 32 more bytes are
524 | 	* scanned, look at every third byte, etc.. When a match is found,
525 | 	* immediately go back to looking at every byte. This is a small loss
526 | 	* (~5% performance, ~0.1% density) for compressible data due to more
527 | 	* bookkeeping, but for non-compressible data (such as JPEG) it's a huge
528 | 	* win since the compressor quickly "realizes" the data is incompressible
529 | 	* and doesn't bother looking for matches everywhere.
530 | 	*
531 | 	* The "skip" variable keeps track of how many bytes there are since the
532 | 	* last match; dividing it by 32 (ie. right-shifting by five) gives the
533 | 	* number of bytes to move ahead for each iteration.
534 | 	*/
535 | 	skip = 32;
536 | 
537 | 	next_ip = ip;
538 | 	do {
539 | 		ip = next_ip;
540 | 		hash = next_hash;
541 | 		DCHECK_EQ(hash, Hash(ip, shift));
542 | 		next_ip = ip + (skip++ >> 5);
543 | 		if (unlikely(next_ip > ip_limit))
544 | 			goto emit_remainder;
545 | 		next_hash = Hash(next_ip, shift);
546 | 		candidate = base_ip + table[hash];
547 | 		DCHECK_GE(candidate, base_ip);
548 | 		DCHECK_LT(candidate, ip);
549 | 
550 | 		table[hash] = ip - base_ip;
551 | 	} while (likely(UNALIGNED_LOAD32(ip) !=
552 | 			UNALIGNED_LOAD32(candidate)));
553 | 
554 | 	/*
555 | 	* Step 2: A 4-byte match has been found. We'll later see if more
556 | 	* than 4 bytes match. But, prior to the match, input
557 | 	* bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
558 | 	*/
559 | 	DCHECK_LE(next_emit + 16, ip_end);
560 | 	op = EmitLiteral(op, next_emit, ip - next_emit, 1);
561 | 
562 | 	/*
563 | 	* Step 3: Call EmitCopy, and then see if another EmitCopy could
564 | 	* be our next move. Repeat until we find no match for the
565 | 	* input immediately after what was consumed by the last EmitCopy call.
566 | 	*
567 | 	* If we exit this loop normally then we need to call EmitLiteral next,
568 | 	* though we don't yet know how big the literal will be. We handle that
569 | 	* by proceeding to the next iteration of the main loop. We also can exit
570 | 	* this loop via goto if we get close to exhausting the input.
571 | 	*/
572 | 	candidate_bytes = 0;
573 | 
574 | 	do {
575 | 		/* We have a 4-byte match at ip, and no need to emit any
576 | 		 "literal bytes" prior to ip. */
577 | 		base = ip;
578 | 		matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
579 | 		ip += matched;
580 | 		DCHECK_EQ(0, memcmp(base, candidate, matched));
581 | 		op = EmitCopy(op, base - candidate, matched);
582 | 		/* We could immediately start working at ip now, but to improve
583 | 		 compression we first update table[Hash(ip - 1, ...)]. */
584 | 		next_emit = ip;
585 | 		if (unlikely(ip >= ip_limit))
586 | 			goto emit_remainder;
587 | 		input_bytes = GetEightBytesAt(ip - 1);
588 | 		prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
589 | 		table[prev_hash] = ip - base_ip - 1;
590 | 		cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
591 | 		candidate = base_ip + table[cur_hash];
592 | 		candidate_bytes = UNALIGNED_LOAD32(candidate);
593 | 		table[cur_hash] = ip - base_ip;
594 | 	} while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
595 | 
596 | 	next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
597 | 	++ip;
598 | 	goto main_loop;
599 | 
600 | emit_remainder:
601 | 	/* Emit the remaining bytes as a literal */
602 | 	if (next_emit < ip_end)
603 | 		op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
604 | 
605 | 	return op;
606 | }
607 | #endif /* !simple */
608 | #if defined(__KERNEL__) && !defined(STATIC)
609 | EXPORT_SYMBOL(csnappy_compress_fragment);
610 | #endif
611 | 
612 | uint32_t __attribute__((const))
613 | csnappy_max_compressed_length(uint32_t source_len)
614 | {
615 | 	return 32 + source_len + source_len/6;
616 | }
617 | #if defined(__KERNEL__) && !defined(STATIC)
618 | EXPORT_SYMBOL(csnappy_max_compressed_length);
619 | #endif
620 | 
621 | void
622 | csnappy_compress(
623 | 	const char *input,
624 | 	uint32_t input_length,
625 | 	char *compressed,
626 | 	uint32_t *compressed_length,
627 | 	void *working_memory,
628 | 	const int workmem_bytes_power_of_two)
629 | {
630 | 	int workmem_size;
631 | 	int num_to_read;
632 | 	uint32_t written = 0;
633 | 	char *p = encode_varint32(compressed, input_length);
634 | 	written += (p - compressed);
635 | 	compressed = p;
636 | 	while (input_length > 0) {
637 | 		num_to_read = min(input_length, (uint32_t)kBlockSize);
638 | 		workmem_size = workmem_bytes_power_of_two;
639 | 		if (unlikely(num_to_read < kBlockSize)) {
640 | 			for (workmem_size = 9;
641 | 			     workmem_size < workmem_bytes_power_of_two;
642 | 			     ++workmem_size) {
643 | 				if ((1 << (workmem_size-1)) >= num_to_read)
644 | 					break;
645 | 			}
646 | 		}
647 | 		p = csnappy_compress_fragment(
648 | 				input, num_to_read, compressed,
649 | 				working_memory, workmem_size);
650 | 		written += (p - compressed);
651 | 		compressed = p;
652 | 		input_length -= num_to_read;
653 | 		input += num_to_read;
654 | 	}
655 | 	*compressed_length = written;
656 | }
657 | #if defined(__KERNEL__) && !defined(STATIC)
658 | EXPORT_SYMBOL(csnappy_compress);
659 | 
660 | MODULE_LICENSE("BSD");
661 | MODULE_DESCRIPTION("Snappy Compressor");
662 | #endif
663 | 


--------------------------------------------------------------------------------
/csnappy_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011, Google Inc.
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are
  7 | met:
  8 | 
  9 |   * Redistributions of source code must retain the above copyright
 10 | notice, this list of conditions and the following disclaimer.
 11 |   * Redistributions in binary form must reproduce the above
 12 | copyright notice, this list of conditions and the following disclaimer
 13 | in the documentation and/or other materials provided with the
 14 | distribution.
 15 |   * Neither the name of Google Inc. nor the names of its
 16 | contributors may be used to endorse or promote products derived from
 17 | this software without specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | File modified for the Linux Kernel by
 32 | Zeev Tarantov <zeev.tarantov@gmail.com>
 33 | 
 34 | File modified for Sereal by
 35 | Steffen Mueller <smueller@cpan.org>
 36 | */
 37 | 
 38 | #include "csnappy_internal.h"
 39 | #ifdef __KERNEL__
 40 | #include <linux/kernel.h>
 41 | #include <linux/module.h>
 42 | #endif
 43 | #include "csnappy.h"
 44 | 
 45 | int
 46 | csnappy_get_uncompressed_length(
 47 | 	const char *src,
 48 | 	uint32_t src_len,
 49 | 	uint32_t *result)
 50 | {
 51 | 	const char *src_base = src;
 52 | 	uint32_t shift = 0;
 53 | 	uint8_t c;
 54 | 	/* Length is encoded in 1..5 bytes */
 55 | 	*result = 0;
 56 | 	for (;;) {
 57 | 		if (shift >= 32)
 58 | 			goto err_out;
 59 | 		if (src_len == 0)
 60 | 			goto err_out;
 61 | 		c = *(const uint8_t *)src++;
 62 | 		src_len -= 1;
 63 | 		*result |= (uint32_t)(c & 0x7f) << shift;
 64 | 		if (c < 128)
 65 | 			break;
 66 | 		shift += 7;
 67 | 	}
 68 | 	return src - src_base;
 69 | err_out:
 70 | 	return CSNAPPY_E_HEADER_BAD;
 71 | }
 72 | #if defined(__KERNEL__) && !defined(STATIC)
 73 | EXPORT_SYMBOL(csnappy_get_uncompressed_length);
 74 | #endif
 75 | 
 76 | #if defined(__arm__) && !defined(ARCH_ARM_HAVE_UNALIGNED)
 77 | int csnappy_decompress_noheader(
 78 | 	const char	*src_,
 79 | 	uint32_t	src_remaining,
 80 | 	char		*dst,
 81 | 	uint32_t	*dst_len)
 82 | {
 83 | 	const uint8_t * src = (const uint8_t *)src_;
 84 | 	const uint8_t * const src_end = src + src_remaining;
 85 | 	char * const dst_base = dst;
 86 | 	char * const dst_end = dst + *dst_len;
 87 | 	while (src < src_end) {
 88 | 		uint32_t opcode = *src++;
 89 | 		uint32_t length = (opcode >> 2) + 1;
 90 | 		const uint8_t *copy_src;
 91 | 		if (likely((opcode & 3) == 0)) {
 92 | 			if (unlikely(length > 60)) {
 93 | 				uint32_t extra_bytes = length - 60;
 94 | 				int shift, max_shift;
 95 | 				if (unlikely(src + extra_bytes > src_end))
 96 | 					return CSNAPPY_E_DATA_MALFORMED;
 97 | 				length = 0;
 98 | 				for (shift = 0, max_shift = extra_bytes*8;
 99 | 					shift < max_shift;
100 | 					shift += 8)
101 | 					length |= *src++ << shift;
102 | 				++length;
103 | 			}
104 | 			if (unlikely(src + length > src_end))
105 | 				return CSNAPPY_E_DATA_MALFORMED;
106 | 			copy_src = src;
107 | 			src += length;
108 | 		} else {
109 | 			uint32_t offset;
110 | 			if (likely((opcode & 3) == 1)) {
111 | 				if (unlikely(src + 1 > src_end))
112 | 					return CSNAPPY_E_DATA_MALFORMED;
113 | 				length = ((length - 1) & 7) + 4;
114 | 				offset = ((opcode >> 5) << 8) + *src++;
115 | 			} else if (likely((opcode & 3) == 2)) {
116 | 				if (unlikely(src + 2 > src_end))
117 | 					return CSNAPPY_E_DATA_MALFORMED;
118 | 				offset = src[0] | (src[1] << 8);
119 | 				src += 2;
120 | 			} else {
121 | 				if (unlikely(src + 4 > src_end))
122 | 					return CSNAPPY_E_DATA_MALFORMED;
123 | 				offset = src[0] | (src[1] << 8) |
124 | 					 (src[2] << 16) | (src[3] << 24);
125 | 				src += 4;
126 | 			}
127 | 			if (unlikely(!offset || (offset > dst - dst_base)))
128 | 				return CSNAPPY_E_DATA_MALFORMED;
129 | 			copy_src = (const uint8_t *)dst - offset;
130 | 		}
131 | 		if (unlikely(dst + length > dst_end))
132 | 			return CSNAPPY_E_OUTPUT_OVERRUN;
133 | 		do *dst++ = *copy_src++; while (--length);
134 | 	}
135 | 	*dst_len = dst - dst_base;
136 | 	return CSNAPPY_E_OK;
137 | }
138 | #else /* !(arm with no unaligned access) */
139 | /*
140 |  * Data stored per entry in lookup table:
141 |  *      Range   Bits-used       Description
142 |  *      ------------------------------------
143 |  *      1..64   0..7            Literal/copy length encoded in opcode byte
144 |  *      0..7    8..10           Copy offset encoded in opcode byte / 256
145 |  *      0..4    11..13          Extra bytes after opcode
146 |  *
147 |  * We use eight bits for the length even though 7 would have sufficed
148 |  * because of efficiency reasons:
149 |  *      (1) Extracting a byte is faster than a bit-field
150 |  *      (2) It properly aligns copy offset so we do not need a <<8
151 |  */
152 | static const uint16_t char_table[256] = {
153 | 	0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
154 | 	0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
155 | 	0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
156 | 	0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
157 | 	0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
158 | 	0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
159 | 	0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
160 | 	0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
161 | 	0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
162 | 	0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
163 | 	0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
164 | 	0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
165 | 	0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
166 | 	0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
167 | 	0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
168 | 	0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
169 | 	0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
170 | 	0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
171 | 	0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
172 | 	0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
173 | 	0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
174 | 	0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
175 | 	0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
176 | 	0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
177 | 	0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
178 | 	0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
179 | 	0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
180 | 	0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
181 | 	0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
182 | 	0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
183 | 	0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
184 | 	0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
185 | };
186 | 
187 | /*
188 |  * Copy "len" bytes from "src" to "op", one byte at a time.  Used for
189 |  * handling COPY operations where the input and output regions may
190 |  * overlap.  For example, suppose:
191 |  *    src    == "ab"
192 |  *    op     == src + 2
193 |  *    len    == 20
194 |  * After IncrementalCopy(src, op, len), the result will have
195 |  * eleven copies of "ab"
196 |  *    ababababababababababab
197 |  * Note that this does not match the semantics of either memcpy()
198 |  * or memmove().
199 |  */
200 | static INLINE void IncrementalCopy(const char *src, char *op, int len)
201 | {
202 | 	DCHECK_GT(len, 0);
203 | 	do {
204 | 		*op++ = *src++;
205 | 	} while (--len > 0);
206 | }
207 | 
208 | /*
209 |  * Equivalent to IncrementalCopy except that it can write up to ten extra
210 |  * bytes after the end of the copy, and that it is faster.
211 |  *
212 |  * The main part of this loop is a simple copy of eight bytes at a time until
213 |  * we've copied (at least) the requested amount of bytes.  However, if op and
214 |  * src are less than eight bytes apart (indicating a repeating pattern of
215 |  * length < 8), we first need to expand the pattern in order to get the correct
216 |  * results. For instance, if the buffer looks like this, with the eight-byte
217 |  * <src> and <op> patterns marked as intervals:
218 |  *
219 |  *    abxxxxxxxxxxxx
220 |  *    [------]           src
221 |  *      [------]         op
222 |  *
223 |  * a single eight-byte copy from <src> to <op> will repeat the pattern once,
224 |  * after which we can move <op> two bytes without moving <src>:
225 |  *
226 |  *    ababxxxxxxxxxx
227 |  *    [------]           src
228 |  *        [------]       op
229 |  *
230 |  * and repeat the exercise until the two no longer overlap.
231 |  *
232 |  * This allows us to do very well in the special case of one single byte
233 |  * repeated many times, without taking a big hit for more general cases.
234 |  *
235 |  * The worst case of extra writing past the end of the match occurs when
236 |  * op - src == 1 and len == 1; the last copy will read from byte positions
237 |  * [0..7] and write to [4..11], whereas it was only supposed to write to
238 |  * position 1. Thus, ten excess bytes.
239 |  */
240 | static const int kMaxIncrementCopyOverflow = 10;
241 | static INLINE void IncrementalCopyFastPath(const char *src, char *op, int len)
242 | {
243 | 	while (op - src < 8) {
244 | 		UnalignedCopy64(src, op);
245 | 		len -= op - src;
246 | 		op += op - src;
247 | 	}
248 | 	while (len > 0) {
249 | 		UnalignedCopy64(src, op);
250 | 		src += 8;
251 | 		op += 8;
252 | 		len -= 8;
253 | 	}
254 | }
255 | 
256 | 
257 | /* A type that writes to a flat array. */
258 | struct SnappyArrayWriter {
259 | 	char *base;
260 | 	char *op;
261 | 	char *op_limit;
262 | };
263 | 
264 | static INLINE int
265 | SAW__AppendFastPath(struct SnappyArrayWriter *this,
266 | 		    const char *ip, uint32_t len)
267 | {
268 | 	char *op = this->op;
269 | 	const uint32_t space_left = this->op_limit - op;
270 | 	if (likely(space_left >= 16)) {
271 | 		UnalignedCopy64(ip, op);
272 | 		UnalignedCopy64(ip + 8, op + 8);
273 | 	} else {
274 |                 if (unlikely(space_left < len))
275 | 			return CSNAPPY_E_OUTPUT_OVERRUN;
276 | 		memcpy(op, ip, len);
277 | 	}
278 | 	this->op = op + len;
279 | 	return CSNAPPY_E_OK;
280 | }
281 | 
282 | static INLINE int
283 | SAW__Append(struct SnappyArrayWriter *this,
284 | 	    const char *ip, uint32_t len)
285 | {
286 | 	char *op = this->op;
287 | 	const uint32_t space_left = this->op_limit - op;
288 |         if (unlikely(space_left < len))
289 | 		return CSNAPPY_E_OUTPUT_OVERRUN;
290 | 	memcpy(op, ip, len);
291 | 	this->op = op + len;
292 | 	return CSNAPPY_E_OK;
293 | }
294 | 
295 | static INLINE int
296 | SAW__AppendFromSelf(struct SnappyArrayWriter *this,
297 | 		    uint32_t offset, uint32_t len)
298 | {
299 | 	char *op = this->op;
300 | 	const uint32_t space_left = this->op_limit - op;
301 | 	/* -1u catches offset==0 */
302 | 	if (op - this->base <= offset - 1u)
303 | 		return CSNAPPY_E_DATA_MALFORMED;
304 | 	/* Fast path, used for the majority (70-80%) of dynamic invocations. */
305 | 	if (len <= 16 && offset >= 8 && space_left >= 16) {
306 | 		UnalignedCopy64(op - offset, op);
307 | 		UnalignedCopy64(op - offset + 8, op + 8);
308 |         } else if (space_left >= (len + kMaxIncrementCopyOverflow)) {
309 | 		IncrementalCopyFastPath(op - offset, op, len);
310 | 	} else {
311 |                 if (space_left < len)
312 | 			return CSNAPPY_E_OUTPUT_OVERRUN;
313 | 		IncrementalCopy(op - offset, op, len);
314 | 	}
315 | 	this->op = op + len;
316 | 	return CSNAPPY_E_OK;
317 | }
318 | 
319 | int
320 | csnappy_decompress_noheader(
321 | 	const char	*src,
322 | 	uint32_t	src_remaining,
323 | 	char		*dst,
324 | 	uint32_t	*dst_len)
325 | {
326 | 	struct SnappyArrayWriter writer;
327 | 	const char *end_minus5 = src + src_remaining - 5;
328 | 	uint32_t length, trailer, opword, extra_bytes;
329 | 	int ret, available;
330 | 	uint8_t opcode;
331 | 	char scratch[5];
332 | 	writer.op = writer.base = dst;
333 | 	writer.op_limit = writer.op + *dst_len;
334 | 	#define LOOP_COND() \
335 | 	if (unlikely(src >= end_minus5)) {		\
336 | 		available = end_minus5 + 5 - src;	\
337 | 		if (unlikely(available <= 0))		\
338 | 			goto out;			\
339 | 		memmove(scratch, src, available);	\
340 | 		src = scratch;				\
341 | 		end_minus5 = scratch + available - 5;	\
342 | 	}
343 | 	
344 | 	LOOP_COND();
345 | 	for (;;) {
346 | 		opcode = *(const uint8_t *)src++;
347 | 		if (opcode & 0x3) {
348 | 			opword = char_table[opcode];
349 | 			extra_bytes = opword >> 11;
350 | 			trailer = get_unaligned_le(src, extra_bytes);
351 | 			length = opword & 0xff;
352 | 			src += extra_bytes;
353 | 			trailer += opword & 0x700;
354 | 			ret = SAW__AppendFromSelf(&writer, trailer, length);
355 | 			if (ret < 0)
356 | 				return ret;
357 | 			LOOP_COND();
358 | 		} else {
359 | 			length = (opcode >> 2) + 1;
360 | 			available = end_minus5 + 5 - src;
361 | 			if (length <= 16 && available >= 16) {
362 | 				if ((ret = SAW__AppendFastPath(&writer, src, length)) < 0)
363 | 					return ret;
364 | 				src += length;
365 | 				LOOP_COND();
366 | 				continue;
367 | 			}
368 | 			if (unlikely(length > 60)) {
369 | 				extra_bytes = length - 60;
370 | 				length = get_unaligned_le(src, extra_bytes) + 1;
371 | 				src += extra_bytes;
372 | 				available = end_minus5 + 5 - src;
373 | 			}
374 |                         if (unlikely(available < (int32_t)length))
375 | 				return CSNAPPY_E_DATA_MALFORMED;
376 | 			ret = SAW__Append(&writer, src, length);
377 | 			if (ret < 0)
378 | 				return ret;
379 | 			src += length;
380 | 			LOOP_COND();
381 | 		}
382 | 	}
383 | #undef LOOP_COND
384 | out:
385 | 	*dst_len = writer.op - writer.base;
386 | 	return CSNAPPY_E_OK;
387 | }
388 | #endif /* optimized for unaligned arch */
389 | 
390 | #if defined(__KERNEL__) && !defined(STATIC)
391 | EXPORT_SYMBOL(csnappy_decompress_noheader);
392 | #endif
393 | 
394 | int
395 | csnappy_decompress(
396 | 	const char *src,
397 | 	uint32_t src_len,
398 | 	char *dst,
399 | 	uint32_t dst_len)
400 | {
401 | 	int n;
402 | 	uint32_t olen = 0;
403 | 	/* Read uncompressed length from the front of the compressed input */
404 | 	n = csnappy_get_uncompressed_length(src, src_len, &olen);
405 | 	if (unlikely(n < CSNAPPY_E_OK))
406 | 		return n;
407 | 	/* Protect against possible DoS attack */
408 | 	if (unlikely(olen > dst_len))
409 | 		return CSNAPPY_E_OUTPUT_INSUF;
410 | 	return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen);
411 | }
412 | #if defined(__KERNEL__) && !defined(STATIC)
413 | EXPORT_SYMBOL(csnappy_decompress);
414 | 
415 | MODULE_LICENSE("BSD");
416 | MODULE_DESCRIPTION("Snappy Decompressor");
417 | #endif
418 | 


--------------------------------------------------------------------------------
/csnappy_internal.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Redistribution and use in source and binary forms, with or without
  5 | modification, are permitted provided that the following conditions are
  6 | met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright
  9 | notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above
 11 | copyright notice, this list of conditions and the following disclaimer
 12 | in the documentation and/or other materials provided with the
 13 | distribution.
 14 |     * Neither the name of Google Inc. nor the names of its
 15 | contributors may be used to endorse or promote products derived from
 16 | this software without specific prior written permission.
 17 | 
 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | Various stubs for the open-source version of Snappy.
 31 | 
 32 | File modified for the Linux Kernel by
 33 | Zeev Tarantov <zeev.tarantov@gmail.com>
 34 | 
 35 | File modified for Sereal by
 36 | Steffen Mueller <smueller@cpan.org>
 37 | */
 38 | 
 39 | #ifndef CSNAPPY_INTERNAL_H_
 40 | #define CSNAPPY_INTERNAL_H_
 41 | 
 42 | #include "csnappy_compat.h"
 43 | 
 44 | #ifndef __KERNEL__
 45 | #include "csnappy_internal_userspace.h"
 46 | #include <string.h>
 47 | #else
 48 | 
 49 | #include <linux/types.h>
 50 | #include <linux/string.h>
 51 | #include <linux/compiler.h>
 52 | #include <asm/byteorder.h>
 53 | #include <asm/unaligned.h>
 54 | 
 55 | #if (defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)) || \
 56 |     (!defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN))
 57 | #error either __LITTLE_ENDIAN or __BIG_ENDIAN must be defined
 58 | #endif
 59 | #if defined(__LITTLE_ENDIAN)
 60 | #define __BYTE_ORDER __LITTLE_ENDIAN
 61 | #else
 62 | #define __BYTE_ORDER __BIG_ENDIAN
 63 | #endif
 64 | 
 65 | #ifdef DEBUG
 66 | #define DCHECK(cond)	if (!(cond)) \
 67 | 			printk(KERN_DEBUG "assert failed @ %s:%i\n", \
 68 | 				__FILE__, __LINE__)
 69 | #else
 70 | #define DCHECK(cond)
 71 | #endif
 72 | 
 73 | #define UNALIGNED_LOAD16(_p)		get_unaligned((const uint16_t *)(_p))
 74 | #define UNALIGNED_LOAD32(_p)		get_unaligned((const uint32_t *)(_p))
 75 | #define UNALIGNED_LOAD64(_p)		get_unaligned((const uint64_t *)(_p))
 76 | #define UNALIGNED_STORE16(_p, _val)	put_unaligned((_val), (uint16_t *)(_p))
 77 | #define UNALIGNED_STORE32(_p, _val)	put_unaligned((_val), (uint32_t *)(_p))
 78 | #define UNALIGNED_STORE64(_p, _val)	put_unaligned((_val), (uint64_t *)(_p))
 79 | 
 80 | #define FindLSBSetNonZero(n)		__builtin_ctz(n)
 81 | #define FindLSBSetNonZero64(n)		__builtin_ctzll(n)
 82 | 
 83 | #endif /* __KERNEL__ */
 84 | 
 85 | #if (!defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)) || ! defined(__BYTE_ORDER)
 86 | #  error either __LITTLE_ENDIAN or __BIG_ENDIAN, plus __BYTE_ORDER must be defined
 87 | #endif
 88 | 
 89 | #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || defined(__ARMV6__) || \
 90 |     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
 91 | #  define ARCH_ARM_HAVE_UNALIGNED
 92 | #endif
 93 | 
 94 | 
 95 | static INLINE void UnalignedCopy64(const void *src, void *dst) {
 96 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(ARCH_ARM_HAVE_UNALIGNED) || defined(__aarch64__)
 97 |   if ((sizeof(void *) == 8) || (sizeof(long) == 8)) {
 98 |     UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
 99 |   } else {
100 |    /* This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
101 |       on some platforms, in particular ARM. */
102 |     const uint8_t *src_bytep = (const uint8_t *)src;
103 |     uint8_t *dst_bytep = (uint8_t *)dst;
104 | 
105 |     UNALIGNED_STORE32(dst_bytep, UNALIGNED_LOAD32(src_bytep));
106 |     UNALIGNED_STORE32(dst_bytep + 4, UNALIGNED_LOAD32(src_bytep + 4));
107 |   }
108 | #else
109 |   const uint8_t *src_bytep = (const uint8_t *)src;
110 |   uint8_t *dst_bytep = (uint8_t *)dst;
111 |   dst_bytep[0] = src_bytep[0];
112 |   dst_bytep[1] = src_bytep[1];
113 |   dst_bytep[2] = src_bytep[2];
114 |   dst_bytep[3] = src_bytep[3];
115 |   dst_bytep[4] = src_bytep[4];
116 |   dst_bytep[5] = src_bytep[5];
117 |   dst_bytep[6] = src_bytep[6];
118 |   dst_bytep[7] = src_bytep[7];
119 | #endif
120 | }
121 | 
122 | #if defined(__arm__)
123 |   #if defined(ARCH_ARM_HAVE_UNALIGNED)
124 |      static INLINE uint32_t get_unaligned_le(const void *p, uint32_t n)
125 |      {
126 |        uint32_t wordmask = (1U << (8 * n)) - 1;
127 |        return get_unaligned_le32(p) & wordmask;
128 |      }
129 |   #else
130 |      extern uint32_t get_unaligned_le_armv5(const void *p, uint32_t n);
131 |      #define get_unaligned_le get_unaligned_le_armv5
132 |   #endif
133 | #else
134 |   static INLINE uint32_t get_unaligned_le(const void *p, uint32_t n)
135 |   {
136 |     /* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */
137 |     static const uint32_t wordmask[] = {
138 |       0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
139 |     };
140 |     return get_unaligned_le32(p) & wordmask[n];
141 |   }
142 | #endif
143 | 
144 | #define DCHECK_EQ(a, b)	DCHECK(((a) == (b)))
145 | #define DCHECK_NE(a, b)	DCHECK(((a) != (b)))
146 | #define DCHECK_GT(a, b)	DCHECK(((a) >  (b)))
147 | #define DCHECK_GE(a, b)	DCHECK(((a) >= (b)))
148 | #define DCHECK_LT(a, b)	DCHECK(((a) <  (b)))
149 | #define DCHECK_LE(a, b)	DCHECK(((a) <= (b)))
150 | 
151 | enum {
152 | 	LITERAL = 0,
153 | 	COPY_1_BYTE_OFFSET = 1,  /* 3 bit length + 3 bits of offset in opcode */
154 | 	COPY_2_BYTE_OFFSET = 2,
155 | 	COPY_4_BYTE_OFFSET = 3
156 | };
157 | 
158 | #endif  /* CSNAPPY_INTERNAL_H_ */
159 | 


--------------------------------------------------------------------------------
/csnappy_internal_userspace.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Redistribution and use in source and binary forms, with or without
  5 | modification, are permitted provided that the following conditions are
  6 | met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright
  9 | notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above
 11 | copyright notice, this list of conditions and the following disclaimer
 12 | in the documentation and/or other materials provided with the
 13 | distribution.
 14 |     * Neither the name of Google Inc. nor the names of its
 15 | contributors may be used to endorse or promote products derived from
 16 | this software without specific prior written permission.
 17 | 
 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | Various stubs for the open-source version of Snappy.
 31 | 
 32 | File modified by
 33 | Zeev Tarantov <zeev.tarantov@gmail.com>
 34 | 
 35 | File modified for Sereal by
 36 | Steffen Mueller <smueller@cpan.org>
 37 | Yves Orton <demerphq@gmail.com>
 38 | 
 39 | */
 40 | 
 41 | #ifndef CSNAPPY_INTERNAL_USERSPACE_H_
 42 | #define CSNAPPY_INTERNAL_USERSPACE_H_
 43 | 
 44 | /*note the original version of this file checked for MS version, but MS will *never* support
 45 |  * anything but C89, so the version check is bogus. */
 46 | #if defined(_MSC_VER)
 47 | typedef unsigned __int8  uint8_t;
 48 | typedef unsigned __int16 uint16_t;
 49 | typedef unsigned __int32 uint32_t;
 50 | typedef unsigned __int64 uint64_t;
 51 | typedef __int32 int32_t; /* Sereal specific change, see csnappy_decompress.c(271) : error C2065: 'int32_t' : undeclared identifier */
 52 | /* the following define is Sereal specific, as MS C89 compilers do not know about "inline" */
 53 | #define inline __inline
 54 | #ifdef _M_X64
 55 | #  define __x86_64__
 56 | #  define __x86_64
 57 | #  define __amd64__
 58 | #  define __amd64
 59 | #endif
 60 | #ifdef _M_IX86
 61 | #  define __i386__
 62 | #  define __i386
 63 | #  define i386
 64 | #  define _X86_
 65 | #endif
 66 | #ifdef _M_IA64
 67 | #  define __ia64__
 68 | #  define __ia64
 69 | #  define __IA64__
 70 | #  define __itanium__
 71 | #endif
 72 | 
 73 | #else
 74 | 
 75 | #if defined(__SUNPRO_C) || defined(_AIX)
 76 | # include <inttypes.h>
 77 | #else
 78 | # include <stdint.h>
 79 | #endif
 80 | 
 81 | #endif
 82 | 
 83 | #ifdef _GNU_SOURCE
 84 | #define min(x, y) (__extension__ ({		\
 85 | 	typeof(x) _min1 = (x);			\
 86 | 	typeof(y) _min2 = (y);			\
 87 | 	(void) (&_min1 == &_min2);		\
 88 | 	_min1 < _min2 ? _min1 : _min2; }))
 89 | #else
 90 | #define min(x, y) (((x) < (y)) ? (x) : (y))
 91 | #endif
 92 | 
 93 | /* Static prediction hints. */
 94 | #ifndef __GNUC__
 95 | #define __builtin_expect(a,b) a
 96 | #endif
 97 | #define likely(x)	__builtin_expect(!!(x), 1)
 98 | #define unlikely(x)	__builtin_expect(!!(x), 0)
 99 | 
100 | 
101 | #ifdef DEBUG
102 | #include <assert.h>
103 | #define DCHECK(cond)	assert(cond)
104 | #else
105 | #define DCHECK(cond)
106 | #endif
107 | 
108 | #include "csnappy_compat.h"
109 | 
110 | /*
111 | Uses code from http://code.google.com/p/exfat/source/browse/trunk/libexfat/byteorder.h
112 | with 3-clause BSD license instead of GPL, with permission from:
113 | Andrew Nayenko
114 | Albert Lee
115 | */
116 | #if defined(_MSC_VER)
117 | 
118 | #include <stdlib.h>
119 | #define bswap_16(x) _byteswap_ushort(x)
120 | #define bswap_32(x) _byteswap_ulong(x)
121 | #define bswap_64(x) _byteswap_uint64(x)
122 | #define __BIG_ENDIAN	4321
123 | #define __LITTLE_ENDIAN	1234
124 | #define __BYTE_ORDER	LITTLE_ENDIAN
125 | 
126 | #elif defined(_AIX)
127 | 
128 | #include <sys/machine.h>
129 | #define __LITTLE_ENDIAN LITTLE_ENDIAN
130 | #define __BIG_ENDIAN BIG_ENDIAN
131 | #define __BYTE_ORDER __BIG_ENDIAN
132 | 
133 | #elif defined(__APPLE__)
134 | 
135 | #include <machine/endian.h>
136 | #include <libkern/OSByteOrder.h>
137 | #define bswap_16(x) OSSwapInt16(x)
138 | #define bswap_32(x) OSSwapInt32(x)
139 | #define bswap_64(x) OSSwapInt64(x)
140 | #define __BYTE_ORDER BYTE_ORDER
141 | #define __LITTLE_ENDIAN LITTLE_ENDIAN
142 | #define __BIG_ENDIAN BIG_ENDIAN
143 | 
144 | #elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
145 | 
146 | #include <sys/endian.h>
147 | #define bswap_16(x) bswap16(x)
148 | #define bswap_32(x) bswap32(x)
149 | #define bswap_64(x) bswap64(x)
150 | #define __BYTE_ORDER _BYTE_ORDER
151 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN
152 | #define __BIG_ENDIAN _BIG_ENDIAN
153 | 
154 | #elif defined(__OpenBSD__)
155 | 
156 | #include <machine/endian.h>
157 | #define bswap_16(x) swap16(x)
158 | #define bswap_32(x) swap32(x)
159 | #define bswap_64(x) swap64(x)
160 | #define __BYTE_ORDER _BYTE_ORDER
161 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN
162 | #define __BIG_ENDIAN _BIG_ENDIAN
163 | 
164 | #elif defined(__MINGW32__)
165 | #include <sys/param.h>
166 | #define __BYTE_ORDER BYTE_ORDER
167 | #define __LITTLE_ENDIAN LITTLE_ENDIAN
168 | #define __BIG_ENDIAN BIG_ENDIAN
169 | 
170 | 
171 | #elif defined(__sun)
172 | 
173 | #include <sys/byteorder.h>
174 | #define bswap_16(x) BSWAP_16(x)
175 | #define bswap_32(x) BSWAP_32(x)
176 | #define bswap_64(x) BSWAP_64(x)
177 | #define __LITTLE_ENDIAN 1234
178 | #define __BIG_ENDIAN 4321
179 | #ifdef _LITTLE_ENDIAN
180 | #define __BYTE_ORDER __LITTLE_ENDIAN
181 | #else
182 | #define __BYTE_ORDER __BIG_ENDIAN
183 | #endif
184 | 
185 | #elif defined(__hpux)
186 | 
187 | #ifdef __LP64__
188 | #define __LITTLE_ENDIAN 12345678
189 | #define __BIG_ENDIAN 87654321
190 | #define int64_t long
191 | #else
192 | #define __LITTLE_ENDIAN 1234
193 | #define __BIG_ENDIAN 4321
194 | #define int64_t long long
195 | #endif
196 | 
197 | #define __BYTE_ORDER __BIG_ENDIAN /* HP-UX always */
198 | #define int32_t int
199 | #define int16_t short
200 | 
201 | #define __SNAPPY_STRICT_ALIGN
202 | 
203 | #elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
204 | 
205 | #ifndef __BIG_ENDIAN
206 | #define __BIG_ENDIAN    87654321
207 | #endif
208 | #ifndef __LITTLE_ENDIAN
209 | #define __LITTLE_ENDIAN 12345678
210 | #endif
211 | #ifndef __BYTE_ORDER
212 | #define __BYTE_ORDER __BIG_ENDIAN
213 | #endif
214 | 
215 | #define __SNAPPY_STRICT_ALIGN
216 | 
217 | #elif defined(__GNUC__) || defined(__ANDROID__) || defined(__CYGWIN__)
218 | 
219 | #include <endian.h>
220 | #include <byteswap.h>
221 | 
222 | #endif
223 | 
224 | #ifndef bswap_16
225 | #define bswap_16(x) \
226 |   (((uint16_t)(x) & 0xFF00) >> 8 | \
227 |    ((uint16_t)(x) & 0x00FF) << 8)
228 | #endif
229 | 
230 | #ifndef bswap_32
231 | #define bswap_32(x) \
232 |   (((uint32_t)(x) & 0xFF000000) >> 24 | \
233 |    ((uint32_t)(x) & 0x00FF0000) >>  8 | \
234 |    ((uint32_t)(x) & 0x0000FF00) <<  8 | \
235 |    ((uint32_t)(x) & 0x000000FF) << 24)
236 | #endif
237 | 
238 | #ifndef bswap_64
239 | #define bswap_64(x) \
240 |   (((uint64_t)(x) & 0xFF00000000000000) >> 56 | \
241 |    ((uint64_t)(x) & 0x00FF000000000000) >> 40 | \
242 |    ((uint64_t)(x) & 0x0000FF0000000000) >> 24 | \
243 |    ((uint64_t)(x) & 0x000000FF00000000) >>  8 | \
244 |    ((uint64_t)(x) & 0x00000000FF000000) <<  8 | \
245 |    ((uint64_t)(x) & 0x0000000000FF0000) << 24 | \
246 |    ((uint64_t)(x) & 0x000000000000FF00) << 40 | \
247 |    ((uint64_t)(x) & 0x00000000000000FF) << 56)
248 | #endif
249 | 
250 | 
251 | /* Potentially unaligned loads and stores. */
252 | 
253 | #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
254 | #if defined(__GNUC__)
255 | typedef uint16_t my_uint16_t __attribute__((aligned(1)));
256 | typedef uint32_t my_uint32_t __attribute__((aligned(1)));
257 | typedef uint64_t my_uint64_t __attribute__((aligned(1)));
258 | #else
259 | typedef uint16_t my_uint16_t;
260 | typedef uint32_t my_uint32_t;
261 | typedef uint64_t my_uint64_t;
262 | #endif
263 | 
264 | #define UNALIGNED_LOAD16(_p) (*(const my_uint16_t*)(_p))
265 | #define UNALIGNED_LOAD32(_p) (*(const my_uint32_t*)(_p))
266 | #define UNALIGNED_LOAD64(_p) (*(const my_uint64_t*)(_p))
267 | 
268 | #define UNALIGNED_STORE16(_p, _val) (*(my_uint16_t*)(_p) = (_val))
269 | #define UNALIGNED_STORE32(_p, _val) (*(my_uint32_t*)(_p) = (_val))
270 | #define UNALIGNED_STORE64(_p, _val) (*(my_uint64_t*)(_p) = (_val))
271 | 
272 | #elif defined(__arm__) && \
273 | 	!defined(__ARM_ARCH_4__) && \
274 | 	!defined(__ARM_ARCH_4T__) && /* http://wiki.debian.org/ArmEabiPort#Choice_of_minimum_CPU */ \
275 | 	!defined(__MARM_ARMV4__) && \
276 | 	!defined(_ARMV4I_) && \
277 | 	!defined(__ARM_ARCH_5__) && \
278 | 	!defined(__ARM_ARCH_5T__) && \
279 | 	!defined(__ARM_ARCH_5E__) && \
280 | 	!defined(__ARM_ARCH_5TE__) && \
281 | 	!defined(__ARM_ARCH_5TEJ__) && \
282 | 	!defined(__MARM_ARMV5__)
283 | 
284 | #define UNALIGNED_LOAD16(_p) (*(const uint16_t*)(_p))
285 | #define UNALIGNED_LOAD32(_p) (*(const uint32_t*)(_p))
286 | #define UNALIGNED_STORE16(_p, _val) (*(uint16_t*)(_p) = (_val))
287 | #define UNALIGNED_STORE32(_p, _val) (*(uint32_t*)(_p) = (_val))
288 | 
289 | #pragma pack(1)
290 | struct una_u64 { uint64_t x; };
291 | #pragma pack()
292 | 
293 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p)
294 | {
295 | 	const struct una_u64 *ptr = (const struct una_u64 *)p;
296 | 	return ptr->x;
297 | }
298 | 
299 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v)
300 | {
301 | 	struct una_u64 *ptr = (struct una_u64 *)p;
302 | 	ptr->x = v;
303 | }
304 | 
305 | #elif defined(__SNAPPY_STRICT_ALIGN) || defined(__sparc) || defined(__sparc__) /* strict architectures */
306 | 
307 | /* For these platforms, there really are no unaligned loads/stores.
308 |  * Read/write everything as uint8_t. Smart compilers might recognize
309 |  * these patterns and generate something smart. */
310 | 
311 | /* Possible future enhancement: see if the ptr is evenly divisible
312 |  * (as uintNN_t) by 2/4/8, and if so, do the cast-as-uintNN_t-ptr-
313 |  * and-deref-as-uintNN_t.  Balancing act: adding the branch
314 |  * will slow things down, while reading/writing aligned might speed
315 |  * things up. */
316 | 
317 | #if __BYTE_ORDER == __BIG_ENDIAN
318 | 
319 | static INLINE uint16_t UNALIGNED_LOAD16(const void *p)
320 | {
321 | 	return
322 |           (uint16_t)(((uint8_t*)p)[0]) << 8 |
323 |           (uint16_t)(((uint8_t*)p)[1]);
324 | }
325 | 
326 | static INLINE uint32_t UNALIGNED_LOAD32(const void *p)
327 | {
328 | 	return
329 |           (uint32_t)(((uint8_t*)p)[0]) << 24 |
330 |           (uint32_t)(((uint8_t*)p)[1]) << 16 |
331 |           (uint32_t)(((uint8_t*)p)[2]) <<  8 |
332 |           (uint32_t)(((uint8_t*)p)[3]);
333 | }
334 | 
335 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p)
336 | {
337 | 	return
338 |           (uint64_t)((uint8_t*)p)[0] << 56 |
339 |           (uint64_t)((uint8_t*)p)[1] << 48 |
340 |           (uint64_t)((uint8_t*)p)[2] << 40 |
341 |           (uint64_t)((uint8_t*)p)[3] << 32 |
342 |           (uint64_t)((uint8_t*)p)[4] << 24 |
343 |           (uint64_t)((uint8_t*)p)[5] << 16 |
344 |           (uint64_t)((uint8_t*)p)[5] <<  8 |
345 |           (uint64_t)((uint8_t*)p)[7];
346 | }
347 | 
348 | static INLINE void UNALIGNED_STORE16(void *p, uint16_t v)
349 | {
350 | 	uint8_t* s = (uint8_t*)p;
351 | 	s[0] = (v & 0xFF00) >> 8;
352 | 	s[1] = (v & 0x00FF);
353 | }
354 | 
355 | static INLINE void UNALIGNED_STORE32(void *p, uint32_t v)
356 | {
357 | 	uint8_t* s = (uint8_t*)p;
358 | 	s[0] = (v & 0xFF000000) >> 24;
359 | 	s[1] = (v & 0x00FF0000) >> 16;
360 | 	s[2] = (v & 0x0000FF00) >>  8;
361 | 	s[3] = (v & 0x000000FF);
362 | }
363 | 
364 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v)
365 | {
366 | 	uint8_t* s = (uint8_t*)p;
367 | 	s[0] = (v & 0xFF00000000000000) >> 56;
368 | 	s[1] = (v & 0x00FF000000000000) >> 48;
369 | 	s[2] = (v & 0x0000FF0000000000) >> 40;
370 | 	s[3] = (v & 0x000000FF00000000) >> 32;
371 | 	s[4] = (v & 0x00000000FF000000) >> 24;
372 | 	s[5] = (v & 0x0000000000FF0000) >> 16;
373 | 	s[6] = (v & 0x000000000000FF00) >>  8;
374 | 	s[7] = (v & 0x00000000000000FF);
375 | }
376 | 
377 | #endif /* #if __BYTE_ORDER == __BIG_ENDIAN */
378 | 
379 | #if __BYTE_ORDER == __LITTLE_ENDIAN
380 | 
381 | static INLINE uint16_t UNALIGNED_LOAD16(const void *p)
382 | {
383 | 	return
384 |           (uint16_t)(((uint8_t*)p)[1]) << 8) |
385 |           (uint16_t)(((uint8_t*)p)[0]);
386 | }
387 | 
388 | static INLINE uint32_t UNALIGNED_LOAD32(const void *p)
389 | {
390 | 	return
391 |           (uint32_t)(((uint8_t*)p)[3]) << 24 |
392 |           (uint32_t)(((uint8_t*)p)[2]) << 16 |
393 |           (uint32_t)(((uint8_t*)p)[1]) <<  8 |
394 |           (uint32_t)(((uint8_t*)p)[0]);
395 | }
396 | 
397 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p)
398 | {
399 | 	return
400 |           (uint64_t)(((uint8_t*)p)[7]) << 56 |
401 |           (uint64_t)(((uint8_t*)p)[6]) << 48 |
402 |           (uint64_t)(((uint8_t*)p)[5]) << 40 |
403 |           (uint64_t)(((uint8_t*)p)[4]) << 32 |
404 |           (uint64_t)(((uint8_t*)p)[3]) << 24 |
405 |           (uint64_t)(((uint8_t*)p)[2]) << 16 |
406 |           (uint64_t)(((uint8_t*)p)[1]) <<  8 |
407 |           (uint64_t)(((uint8_t*)p)[0]);
408 | }
409 | 
410 | static INLINE void UNALIGNED_STORE16(void *p, uint16_t v)
411 | {
412 | 	uint8_t* s = (uint8_t*)p;
413 | 	s[1] = (v & 0xFF00) >> 8;
414 | 	s[0] = (v & 0x00FF);
415 | }
416 | 
417 | static INLINE void UNALIGNED_STORE32(void *p, uint32_t v)
418 | {
419 | 	uint8_t* s = (uint8_t*)p;
420 | 	s[3] = (v & 0xFF000000) >> 24;
421 | 	s[2] = (v & 0x00FF0000) >> 16;
422 | 	s[1] = (v & 0x0000FF00) >>  8;
423 | 	s[0] = (v & 0x000000FF);
424 | }
425 | 
426 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v)
427 | {
428 | 	uint8_t* s = (uint8_t*)p;
429 | 	s[7] = (v & 0xFF00000000000000) >> 56;
430 | 	s[6] = (v & 0x00FF000000000000) >> 48;
431 | 	s[5] = (v & 0x0000FF0000000000) >> 40;
432 | 	s[4] = (v & 0x000000FF00000000) >> 32;
433 | 	s[3] = (v & 0x00000000FF000000) >> 24;
434 | 	s[2] = (v & 0x0000000000FF0000) >> 16;
435 | 	s[1] = (v & 0x000000000000FF00) >>  8;
436 | 	s[0] = (v & 0x00000000000000FF);
437 | }
438 | 
439 | #endif /* #if __BYTE_ORDER == __LITTLE_ENDIAN */
440 | 
441 | #else /* !(x86 || powerpc) && !(arm && !(old arm architectures)) */
442 | 
443 | /* pragma pack is available in gcc (though originally apparently by
444 |  * Microsoft) and in some other compilers (probably inspired by either
445 |  * the two big ones), but there is no good portable way to detect
446 |  * whether it's supported.  The bad news: on platforms where it's not
447 |  * supported (unsupported pragmas are ignored) but which do require
448 |  * strict alignment, the below pragma pack trickery will fail.
449 |  * Therefore this option is the last and the default, and the platforms
450 |  * requiring strict alignment are detected earlier. */
451 | 
452 | #pragma pack(1)
453 | struct una_u16 { uint16_t x; };
454 | struct una_u32 { uint32_t x; };
455 | struct una_u64 { uint64_t x; };
456 | #pragma pack()
457 | 
458 | static INLINE uint16_t UNALIGNED_LOAD16(const void *p)
459 | {
460 | 	const struct una_u16 *ptr = (const struct una_u16 *)p;
461 | 	return ptr->x;
462 | }
463 | 
464 | static INLINE uint32_t UNALIGNED_LOAD32(const void *p)
465 | {
466 | 	const struct una_u32 *ptr = (const struct una_u32 *)p;
467 | 	return ptr->x;
468 | }
469 | 
470 | static INLINE uint64_t UNALIGNED_LOAD64(const void *p)
471 | {
472 | 	const struct una_u64 *ptr = (const struct una_u64 *)p;
473 | 	return ptr->x;
474 | }
475 | 
476 | static INLINE void UNALIGNED_STORE16(void *p, uint16_t v)
477 | {
478 | 	struct una_u16 *ptr = (struct una_u16 *)p;
479 | 	ptr->x = v;
480 | }
481 | 
482 | static INLINE void UNALIGNED_STORE32(void *p, uint32_t v)
483 | {
484 | 	struct una_u32 *ptr = (struct una_u32 *)p;
485 | 	ptr->x = v;
486 | }
487 | 
488 | static INLINE void UNALIGNED_STORE64(void *p, uint64_t v)
489 | {
490 | 	struct una_u64 *ptr = (struct una_u64 *)p;
491 | 	ptr->x = v;
492 | }
493 | 
494 | #endif /* defining UNALIGNED_LOADNN and UNALIGNED_STORENN */
495 | 
496 | 
497 | #if __BYTE_ORDER == __LITTLE_ENDIAN
498 | #define get_unaligned_le32(p)           UNALIGNED_LOAD32(p)
499 | #define put_unaligned_le16(v, p)        UNALIGNED_STORE16(p, v)
500 | #elif __BYTE_ORDER == __BIG_ENDIAN
501 | static INLINE uint32_t get_unaligned_le32(const void *p)
502 | {
503 |   return bswap_32(UNALIGNED_LOAD32(p));
504 | }
505 | static INLINE void put_unaligned_le16(uint16_t val, void *p)
506 | {
507 |   UNALIGNED_STORE16(p, bswap_16(val));
508 | }
509 | #else
510 | static INLINE uint32_t get_unaligned_le32(const void *p)
511 | {
512 |   const uint8_t *b = (const uint8_t *)p;
513 |   return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
514 | }
515 | static INLINE void put_unaligned_le16(uint16_t val, void *p)
516 | {
517 |   uint8_t *b = (uint8_t *)p;
518 |   b[0] = val & 255;
519 |   b[1] = val >> 8;
520 | }
521 | #endif
522 | 
523 | 
524 | #if defined(HAVE_BUILTIN_CTZ)
525 | 
526 | static INLINE int FindLSBSetNonZero(uint32_t n)
527 | {
528 | 	return __builtin_ctz(n);
529 | }
530 | 
531 | static INLINE int FindLSBSetNonZero64(uint64_t n)
532 | {
533 | 	return __builtin_ctzll(n);
534 | }
535 | 
536 | #else /* Portable versions. */
537 | 
538 | static INLINE int FindLSBSetNonZero(uint32_t n)
539 | {
540 | 	int rc = 31, i, shift;
541 | 	uint32_t x;
542 | 	for (i = 4, shift = 1 << 4; i >= 0; --i) {
543 | 		x = n << shift;
544 | 		if (x != 0) {
545 | 			n = x;
546 | 			rc -= shift;
547 | 		}
548 | 		shift >>= 1;
549 | 	}
550 | 	return rc;
551 | }
552 | 
553 | /* FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). */
554 | static INLINE int FindLSBSetNonZero64(uint64_t n)
555 | {
556 | 	const uint32_t bottombits = (uint32_t)n;
557 | 	if (bottombits == 0) {
558 | 		/* Bottom bits are zero, so scan in top bits */
559 | 		return 32 + FindLSBSetNonZero((uint32_t)(n >> 32));
560 | 	} else {
561 | 		return FindLSBSetNonZero(bottombits);
562 | 	}
563 | }
564 | 
565 | #endif /* End portable versions. */
566 | 
567 | #endif  /* CSNAPPY_INTERNAL_USERSPACE_H_ */
568 | 


--------------------------------------------------------------------------------
/debugfs_input.txt:
--------------------------------------------------------------------------------
1 | set_super_value raid_stride 0
2 | set_super_value raid_stripe_width 0
3 | 


--------------------------------------------------------------------------------
/kernel_3_2_10.patch:
--------------------------------------------------------------------------------
   1 | diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
   2 | index 25cdff3..ace8395 100644
   3 | --- a/drivers/staging/Kconfig
   4 | +++ b/drivers/staging/Kconfig
   5 | @@ -88,6 +88,8 @@ source "drivers/staging/sep/Kconfig"
   6 |  
   7 |  source "drivers/staging/iio/Kconfig"
   8 |  
   9 | +source "drivers/staging/snappy/Kconfig"
  10 | +
  11 |  source "drivers/staging/zram/Kconfig"
  12 |  
  13 |  source "drivers/staging/zcache/Kconfig"
  14 | diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
  15 | index a25f3f2..2c0862b 100644
  16 | --- a/drivers/staging/Makefile
  17 | +++ b/drivers/staging/Makefile
  18 | @@ -57,3 +57,5 @@ obj-$(CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4)	+= ste_rmi4/
  19 |  obj-$(CONFIG_DRM_PSB)		+= gma500/
  20 |  obj-$(CONFIG_INTEL_MEI)		+= mei/
  21 |  obj-$(CONFIG_MFD_NVEC)		+= nvec/
  22 | +obj-$(CONFIG_SNAPPY_COMPRESS)	+= snappy/
  23 | +obj-$(CONFIG_SNAPPY_DECOMPRESS)	+= snappy/
  24 | diff --git a/drivers/staging/snappy/Kconfig b/drivers/staging/snappy/Kconfig
  25 | new file mode 100644
  26 | index 0000000..24f6908
  27 | --- /dev/null
  28 | +++ b/drivers/staging/snappy/Kconfig
  29 | @@ -0,0 +1,5 @@
  30 | +config SNAPPY_COMPRESS
  31 | +	tristate "Google Snappy Compression"
  32 | +
  33 | +config SNAPPY_DECOMPRESS
  34 | +	tristate "Google Snappy Decompression"
  35 | diff --git a/drivers/staging/snappy/Makefile b/drivers/staging/snappy/Makefile
  36 | new file mode 100644
  37 | index 0000000..f5be21d
  38 | --- /dev/null
  39 | +++ b/drivers/staging/snappy/Makefile
  40 | @@ -0,0 +1,7 @@
  41 | +ccflags-y := -std=gnu99
  42 | +
  43 | +snappy_compress-objs := csnappy_compress.o
  44 | +snappy_decompress-objs := csnappy_decompress.o
  45 | +
  46 | +obj-$(CONFIG_SNAPPY_COMPRESS) += csnappy_compress.o
  47 | +obj-$(CONFIG_SNAPPY_DECOMPRESS) += csnappy_decompress.o
  48 | diff --git a/drivers/staging/snappy/csnappy.h b/drivers/staging/snappy/csnappy.h
  49 | new file mode 100644
  50 | index 0000000..97a3e17
  51 | --- /dev/null
  52 | +++ b/drivers/staging/snappy/csnappy.h
  53 | @@ -0,0 +1,129 @@
  54 | +#ifndef __CSNAPPY_H__
  55 | +#define __CSNAPPY_H__
  56 | +/*
  57 | +File modified for the Linux Kernel by
  58 | +Zeev Tarantov <zeev.tarantov@gmail.com>
  59 | +*/
  60 | +#ifdef __cplusplus
  61 | +extern "C" {
  62 | +#endif
  63 | +
  64 | +#define CSNAPPY_VERSION	4
  65 | +
  66 | +#define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 15
  67 | +#define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO)
  68 | +
  69 | +#ifndef __GNUC__
  70 | +#define __attribute__(x) /*NOTHING*/
  71 | +#endif
  72 | +
  73 | +/*
  74 | + * Returns the maximal size of the compressed representation of
  75 | + * input data that is "source_len" bytes in length;
  76 | + */
  77 | +uint32_t
  78 | +csnappy_max_compressed_length(uint32_t source_len) __attribute__((const));
  79 | +
  80 | +/*
  81 | + * Flat array compression that does not emit the "uncompressed length"
  82 | + * prefix. Compresses "input" array to the "output" array.
  83 | + *
  84 | + * REQUIRES: "input" is at most 32KiB long.
  85 | + * REQUIRES: "output" points to an array of memory that is at least
  86 | + * "csnappy_max_compressed_length(input_length)" in size.
  87 | + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
  88 | + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
  89 | + *
  90 | + * Returns an "end" pointer into "output" buffer.
  91 | + * "end - output" is the compressed size of "input".
  92 | + */
  93 | +char*
  94 | +csnappy_compress_fragment(
  95 | +	const char *input,
  96 | +	const uint32_t input_length,
  97 | +	char *output,
  98 | +	void *working_memory,
  99 | +	const int workmem_bytes_power_of_two);
 100 | +
 101 | +/*
 102 | + * REQUIRES: "compressed" must point to an area of memory that is at
 103 | + * least "csnappy_max_compressed_length(input_length)" bytes in length.
 104 | + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
 105 | + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
 106 | + *
 107 | + * Takes the data stored in "input[0..input_length]" and stores
 108 | + * it in the array pointed to by "compressed".
 109 | + *
 110 | + * "*out_compressed_length" is set to the length of the compressed output.
 111 | + */
 112 | +void
 113 | +csnappy_compress(
 114 | +	const char *input,
 115 | +	uint32_t input_length,
 116 | +	char *compressed,
 117 | +	uint32_t *out_compressed_length,
 118 | +	void *working_memory,
 119 | +	const int workmem_bytes_power_of_two);
 120 | +
 121 | +/*
 122 | + * Reads header of compressed data to get stored length of uncompressed data.
 123 | + * REQUIRES: start points to compressed data.
 124 | + * REQUIRES: n is length of available compressed data.
 125 | + *
 126 | + * Returns SNAPPY_E_HEADER_BAD on error.
 127 | + * Returns number of bytes read from input on success.
 128 | + * Stores decoded length into *result.
 129 | + */
 130 | +int
 131 | +csnappy_get_uncompressed_length(
 132 | +	const char *start,
 133 | +	uint32_t n,
 134 | +	uint32_t *result);
 135 | +
 136 | +/*
 137 | + * Safely decompresses all data from array "src" of length "src_len" containing
 138 | + * entire compressed stream (with header) into array "dst" of size "dst_len".
 139 | + * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...).
 140 | + *
 141 | + * Iff successful, returns CSNAPPY_E_OK.
 142 | + * If recorded length in header is greater than dst_len, returns
 143 | + *  CSNAPPY_E_OUTPUT_INSUF.
 144 | + * If compressed data is malformed, does not write more than dst_len into dst.
 145 | + */
 146 | +int
 147 | +csnappy_decompress(
 148 | +	const char *src,
 149 | +	uint32_t src_len,
 150 | +	char *dst,
 151 | +	uint32_t dst_len);
 152 | +
 153 | +/*
 154 | + * Safely decompresses stream src_len bytes long read from src to dst.
 155 | + * Amount of available space at dst must be provided in *dst_len by caller.
 156 | + * If compressed stream needs more space, it will not overflow and return
 157 | + *  CSNAPPY_E_OUTPUT_OVERRUN.
 158 | + * On success, sets *dst_len to actal number of bytes decompressed.
 159 | + * Iff successful, returns CSNAPPY_E_OK.
 160 | + */
 161 | +int
 162 | +csnappy_decompress_noheader(
 163 | +	const char *src,
 164 | +	uint32_t src_len,
 165 | +	char *dst,
 166 | +	uint32_t *dst_len);
 167 | +
 168 | +/*
 169 | + * Return values (< 0 = Error)
 170 | + */
 171 | +#define CSNAPPY_E_OK			0
 172 | +#define CSNAPPY_E_HEADER_BAD		(-1)
 173 | +#define CSNAPPY_E_OUTPUT_INSUF		(-2)
 174 | +#define CSNAPPY_E_OUTPUT_OVERRUN	(-3)
 175 | +#define CSNAPPY_E_INPUT_NOT_CONSUMED	(-4)
 176 | +#define CSNAPPY_E_DATA_MALFORMED	(-5)
 177 | +
 178 | +#ifdef __cplusplus
 179 | +}
 180 | +#endif
 181 | +
 182 | +#endif
 183 | diff --git a/drivers/staging/snappy/csnappy_compress.c b/drivers/staging/snappy/csnappy_compress.c
 184 | new file mode 100644
 185 | index 0000000..b093727
 186 | --- /dev/null
 187 | +++ b/drivers/staging/snappy/csnappy_compress.c
 188 | @@ -0,0 +1,527 @@
 189 | +/*
 190 | +Copyright 2011, Google Inc.
 191 | +All rights reserved.
 192 | +
 193 | +Redistribution and use in source and binary forms, with or without
 194 | +modification, are permitted provided that the following conditions are
 195 | +met:
 196 | +
 197 | +  * Redistributions of source code must retain the above copyright
 198 | +notice, this list of conditions and the following disclaimer.
 199 | +  * Redistributions in binary form must reproduce the above
 200 | +copyright notice, this list of conditions and the following disclaimer
 201 | +in the documentation and/or other materials provided with the
 202 | +distribution.
 203 | +  * Neither the name of Google Inc. nor the names of its
 204 | +contributors may be used to endorse or promote products derived from
 205 | +this software without specific prior written permission.
 206 | +
 207 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 208 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 209 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 210 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 211 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 212 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 213 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 214 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 215 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 216 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 217 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 218 | +
 219 | +File modified for the Linux Kernel by
 220 | +Zeev Tarantov <zeev.tarantov@gmail.com>
 221 | +*/
 222 | +
 223 | +#include "csnappy_internal.h"
 224 | +#ifdef __KERNEL__
 225 | +#include <linux/kernel.h>
 226 | +#include <linux/module.h>
 227 | +#endif
 228 | +#include "csnappy.h"
 229 | +
 230 | +
 231 | +static inline char*
 232 | +encode_varint32(char *sptr, uint32_t v)
 233 | +{
 234 | +	uint8_t* ptr = (uint8_t *)sptr;
 235 | +	static const int B = 128;
 236 | +	if (v < (1<<7)) {
 237 | +		*(ptr++) = v;
 238 | +	} else if (v < (1<<14)) {
 239 | +		*(ptr++) = v | B;
 240 | +		*(ptr++) = v>>7;
 241 | +	} else if (v < (1<<21)) {
 242 | +		*(ptr++) = v | B;
 243 | +		*(ptr++) = (v>>7) | B;
 244 | +		*(ptr++) = v>>14;
 245 | +	} else if (v < (1<<28)) {
 246 | +		*(ptr++) = v | B;
 247 | +		*(ptr++) = (v>>7) | B;
 248 | +		*(ptr++) = (v>>14) | B;
 249 | +		*(ptr++) = v>>21;
 250 | +	} else {
 251 | +		*(ptr++) = v | B;
 252 | +		*(ptr++) = (v>>7) | B;
 253 | +		*(ptr++) = (v>>14) | B;
 254 | +		*(ptr++) = (v>>21) | B;
 255 | +		*(ptr++) = v>>28;
 256 | +	}
 257 | +	return (char *)ptr;
 258 | +}
 259 | +
 260 | +
 261 | +/*
 262 | + * Any hash function will produce a valid compressed bitstream, but a good
 263 | + * hash function reduces the number of collisions and thus yields better
 264 | + * compression for compressible input, and more speed for incompressible
 265 | + * input. Of course, it doesn't hurt if the hash function is reasonably fast
 266 | + * either, as it gets called a lot.
 267 | + */
 268 | +static inline uint32_t HashBytes(uint32_t bytes, int shift)
 269 | +{
 270 | +	uint32_t kMul = 0x1e35a7bd;
 271 | +	return (bytes * kMul) >> shift;
 272 | +}
 273 | +static inline uint32_t Hash(const char *p, int shift)
 274 | +{
 275 | +	return HashBytes(UNALIGNED_LOAD32(p), shift);
 276 | +}
 277 | +
 278 | +
 279 | +/*
 280 | + * *** DO NOT CHANGE THE VALUE OF kBlockSize ***
 281 | +
 282 | + * New Compression code chops up the input into blocks of at most
 283 | + * the following size.  This ensures that back-references in the
 284 | + * output never cross kBlockSize block boundaries.  This can be
 285 | + * helpful in implementing blocked decompression.  However the
 286 | + * decompression code should not rely on this guarantee since older
 287 | + * compression code may not obey it.
 288 | + */
 289 | +#define kBlockLog 15
 290 | +#define kBlockSize (1 << kBlockLog)
 291 | +
 292 | +
 293 | +/*
 294 | + * Return the largest n such that
 295 | + *
 296 | + *   s1[0,n-1] == s2[0,n-1]
 297 | + *   and n <= (s2_limit - s2).
 298 | + *
 299 | + * Does not read *s2_limit or beyond.
 300 | + * Does not read *(s1 + (s2_limit - s2)) or beyond.
 301 | + * Requires that s2_limit >= s2.
 302 | + *
 303 | + * Separate implementation for x86_64, for speed.  Uses the fact that
 304 | + * x86_64 is little endian.
 305 | + */
 306 | +#if defined(__x86_64__)
 307 | +static inline int
 308 | +FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
 309 | +{
 310 | +	uint64_t x;
 311 | +	int matched, matching_bits;
 312 | +	DCHECK_GE(s2_limit, s2);
 313 | +	matched = 0;
 314 | +	/*
 315 | +	 * Find out how long the match is. We loop over the data 64 bits at a
 316 | +	 * time until we find a 64-bit block that doesn't match; then we find
 317 | +	 * the first non-matching bit and use that to calculate the total
 318 | +	 * length of the match.
 319 | +	 */
 320 | +	while (likely(s2 <= s2_limit - 8)) {
 321 | +		if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
 322 | +				UNALIGNED_LOAD64(s2))) {
 323 | +			s2 += 8;
 324 | +			matched += 8;
 325 | +		} else {
 326 | +			/*
 327 | +			 * On current (mid-2008) Opteron models there is a 3%
 328 | +			 * more efficient code sequence to find the first
 329 | +			 * non-matching byte. However, what follows is ~10%
 330 | +			 * better on Intel Core 2 and newer, and we expect AMD's
 331 | +			 * bsf instruction to improve.
 332 | +			 */
 333 | +			x = UNALIGNED_LOAD64(s1 + matched) ^
 334 | +				UNALIGNED_LOAD64(s2);
 335 | +			matching_bits = FindLSBSetNonZero64(x);
 336 | +			matched += matching_bits >> 3;
 337 | +			return matched;
 338 | +		}
 339 | +	}
 340 | +	while (likely(s2 < s2_limit)) {
 341 | +		if (likely(s1[matched] == *s2)) {
 342 | +			++s2;
 343 | +			++matched;
 344 | +		} else {
 345 | +			return matched;
 346 | +		}
 347 | +	}
 348 | +	return matched;
 349 | +}
 350 | +#else /* !defined(__x86_64__) */
 351 | +static inline int
 352 | +FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
 353 | +{
 354 | +	/* Implementation based on the x86-64 version, above. */
 355 | +	int matched = 0;
 356 | +	DCHECK_GE(s2_limit, s2);
 357 | +
 358 | +	while (s2 <= s2_limit - 4 &&
 359 | +		UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
 360 | +		s2 += 4;
 361 | +		matched += 4;
 362 | +	}
 363 | +#if __BYTE_ORDER == __LITTLE_ENDIAN
 364 | +	if (s2 <= s2_limit - 4) {
 365 | +		uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
 366 | +				UNALIGNED_LOAD32(s2);
 367 | +		int matching_bits = FindLSBSetNonZero(x);
 368 | +		matched += matching_bits >> 3;
 369 | +	} else {
 370 | +		while ((s2 < s2_limit) && (s1[matched] == *s2)) {
 371 | +			++s2;
 372 | +			++matched;
 373 | +		}
 374 | +	}
 375 | +#else
 376 | +	while ((s2 < s2_limit) && (s1[matched] == *s2)) {
 377 | +		++s2;
 378 | +		++matched;
 379 | +	}
 380 | +#endif
 381 | +	return matched;
 382 | +}
 383 | +#endif /* !defined(__x86_64__) */
 384 | +
 385 | +
 386 | +static inline char*
 387 | +EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
 388 | +{
 389 | +	int n = len - 1; /* Zero-length literals are disallowed */
 390 | +	if (n < 60) {
 391 | +		/* Fits in tag byte */
 392 | +		*op++ = LITERAL | (n << 2);
 393 | +		/*
 394 | +		The vast majority of copies are below 16 bytes, for which a
 395 | +		call to memcpy is overkill. This fast path can sometimes
 396 | +		copy up to 15 bytes too much, but that is okay in the
 397 | +		main loop, since we have a bit to go on for both sides:
 398 | +		- The input will always have kInputMarginBytes = 15 extra
 399 | +		available bytes, as long as we're in the main loop, and
 400 | +		if not, allow_fast_path = false.
 401 | +		- The output will always have 32 spare bytes (see
 402 | +		snappy_max_compressed_length).
 403 | +		*/
 404 | +		if (allow_fast_path && len <= 16) {
 405 | +			UnalignedCopy64(literal, op);
 406 | +			UnalignedCopy64(literal + 8, op + 8);
 407 | +			return op + len;
 408 | +		}
 409 | +	} else {
 410 | +		/* Encode in upcoming bytes */
 411 | +		char *base = op;
 412 | +		int count = 0;
 413 | +		op++;
 414 | +		while (n > 0) {
 415 | +			*op++ = n & 0xff;
 416 | +			n >>= 8;
 417 | +			count++;
 418 | +		}
 419 | +		DCHECK_GE(count, 1);
 420 | +		DCHECK_LE(count, 4);
 421 | +		*base = LITERAL | ((59+count) << 2);
 422 | +	}
 423 | +	memcpy(op, literal, len);
 424 | +	return op + len;
 425 | +}
 426 | +
 427 | +static inline char*
 428 | +EmitCopyLessThan64(char *op, int offset, int len)
 429 | +{
 430 | +	DCHECK_LE(len, 64);
 431 | +	DCHECK_GE(len, 4);
 432 | +	DCHECK_LT(offset, 65536);
 433 | +
 434 | +	if ((len < 12) && (offset < 2048)) {
 435 | +		int len_minus_4 = len - 4;
 436 | +		DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
 437 | +		*op++ = COPY_1_BYTE_OFFSET   |
 438 | +			((len_minus_4) << 2) |
 439 | +			((offset >> 8) << 5);
 440 | +		*op++ = offset & 0xff;
 441 | +	} else {
 442 | +		*op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
 443 | +		put_unaligned_le16(offset, op);
 444 | +		op += 2;
 445 | +	}
 446 | +	return op;
 447 | +}
 448 | +
 449 | +static inline char*
 450 | +EmitCopy(char *op, int offset, int len)
 451 | +{
 452 | +	/* Emit 64 byte copies but make sure to keep at least four bytes
 453 | +	 * reserved */
 454 | +	while (len >= 68) {
 455 | +		op = EmitCopyLessThan64(op, offset, 64);
 456 | +		len -= 64;
 457 | +	}
 458 | +
 459 | +	/* Emit an extra 60 byte copy if have too much data to fit in one
 460 | +	 * copy */
 461 | +	if (len > 64) {
 462 | +		op = EmitCopyLessThan64(op, offset, 60);
 463 | +		len -= 60;
 464 | +	}
 465 | +
 466 | +	/* Emit remainder */
 467 | +	op = EmitCopyLessThan64(op, offset, len);
 468 | +	return op;
 469 | +}
 470 | +
 471 | +
 472 | +/*
 473 | +For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
 474 | +equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
 475 | +empirically found that overlapping loads such as
 476 | + UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
 477 | +are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
 478 | +
 479 | +We have different versions for 64- and 32-bit; ideally we would avoid the
 480 | +two functions and just inline the UNALIGNED_LOAD64 call into
 481 | +GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
 482 | +enough to avoid loading the value multiple times then. For 64-bit, the load
 483 | +is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
 484 | +done at GetUint32AtOffset() time.
 485 | +*/
 486 | +
 487 | +#if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8)
 488 | +
 489 | +typedef uint64_t EightBytesReference;
 490 | +
 491 | +static inline EightBytesReference GetEightBytesAt(const char* ptr) {
 492 | +	return UNALIGNED_LOAD64(ptr);
 493 | +}
 494 | +
 495 | +static inline uint32_t GetUint32AtOffset(uint64_t v, int offset) {
 496 | +	DCHECK_GE(offset, 0);
 497 | +	DCHECK_LE(offset, 4);
 498 | +#ifdef __LITTLE_ENDIAN
 499 | +	return v >> (8 * offset);
 500 | +#else
 501 | +	return v >> (32 - 8 * offset);
 502 | +#endif
 503 | +}
 504 | +
 505 | +#else /* !ARCH_K8 */
 506 | +
 507 | +typedef const char* EightBytesReference;
 508 | +
 509 | +static inline EightBytesReference GetEightBytesAt(const char* ptr) {
 510 | +	return ptr;
 511 | +}
 512 | +
 513 | +static inline uint32_t GetUint32AtOffset(const char* v, int offset) {
 514 | +	DCHECK_GE(offset, 0);
 515 | +	DCHECK_LE(offset, 4);
 516 | +	return UNALIGNED_LOAD32(v + offset);
 517 | +}
 518 | +
 519 | +#endif /* !ARCH_K8 */
 520 | +
 521 | +
 522 | +#define kInputMarginBytes 15
 523 | +char*
 524 | +csnappy_compress_fragment(
 525 | +	const char *input,
 526 | +	const uint32_t input_size,
 527 | +	char *op,
 528 | +	void *working_memory,
 529 | +	const int workmem_bytes_power_of_two)
 530 | +{
 531 | +	const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
 532 | +			*candidate, *base;
 533 | +	uint16_t *table = (uint16_t *)working_memory;
 534 | +	EightBytesReference input_bytes;
 535 | +	uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
 536 | +	int shift, matched;
 537 | +
 538 | +	DCHECK_GE(workmem_bytes_power_of_two, 9);
 539 | +	DCHECK_LE(workmem_bytes_power_of_two, 15);
 540 | +	/* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
 541 | +	 * How many bits of 32bit hash function result are discarded? */
 542 | +	shift = 33 - workmem_bytes_power_of_two;
 543 | +	/* "ip" is the input pointer, and "op" is the output pointer. */
 544 | +	ip = input;
 545 | +	DCHECK_LE(input_size, kBlockSize);
 546 | +	ip_end = input + input_size;
 547 | +	base_ip = ip;
 548 | +	/* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
 549 | +	   [next_emit, ip_end) after the main loop. */
 550 | +	next_emit = ip;
 551 | +
 552 | +	if (unlikely(input_size < kInputMarginBytes))
 553 | +		goto emit_remainder;
 554 | +
 555 | +	memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
 556 | +
 557 | +	ip_limit = input + input_size - kInputMarginBytes;
 558 | +	next_hash = Hash(++ip, shift);
 559 | +
 560 | +main_loop:
 561 | +	DCHECK_LT(next_emit, ip);
 562 | +	/*
 563 | +	* The body of this loop calls EmitLiteral once and then EmitCopy one or
 564 | +	* more times. (The exception is that when we're close to exhausting
 565 | +	* the input we goto emit_remainder.)
 566 | +	*
 567 | +	* In the first iteration of this loop we're just starting, so
 568 | +	* there's nothing to copy, so calling EmitLiteral once is
 569 | +	* necessary. And we only start a new iteration when the
 570 | +	* current iteration has determined that a call to EmitLiteral will
 571 | +	* precede the next call to EmitCopy (if any).
 572 | +	*
 573 | +	* Step 1: Scan forward in the input looking for a 4-byte-long match.
 574 | +	* If we get close to exhausting the input then goto emit_remainder.
 575 | +	*
 576 | +	* Heuristic match skipping: If 32 bytes are scanned with no matches
 577 | +	* found, start looking only at every other byte. If 32 more bytes are
 578 | +	* scanned, look at every third byte, etc.. When a match is found,
 579 | +	* immediately go back to looking at every byte. This is a small loss
 580 | +	* (~5% performance, ~0.1% density) for compressible data due to more
 581 | +	* bookkeeping, but for non-compressible data (such as JPEG) it's a huge
 582 | +	* win since the compressor quickly "realizes" the data is incompressible
 583 | +	* and doesn't bother looking for matches everywhere.
 584 | +	*
 585 | +	* The "skip" variable keeps track of how many bytes there are since the
 586 | +	* last match; dividing it by 32 (ie. right-shifting by five) gives the
 587 | +	* number of bytes to move ahead for each iteration.
 588 | +	*/
 589 | +	skip = 32;
 590 | +
 591 | +	next_ip = ip;
 592 | +	do {
 593 | +		ip = next_ip;
 594 | +		hash = next_hash;
 595 | +		DCHECK_EQ(hash, Hash(ip, shift));
 596 | +		next_ip = ip + (skip++ >> 5);
 597 | +		if (unlikely(next_ip > ip_limit))
 598 | +			goto emit_remainder;
 599 | +		next_hash = Hash(next_ip, shift);
 600 | +		candidate = base_ip + table[hash];
 601 | +		DCHECK_GE(candidate, base_ip);
 602 | +		DCHECK_LT(candidate, ip);
 603 | +
 604 | +		table[hash] = ip - base_ip;
 605 | +	} while (likely(UNALIGNED_LOAD32(ip) !=
 606 | +			UNALIGNED_LOAD32(candidate)));
 607 | +
 608 | +	/*
 609 | +	* Step 2: A 4-byte match has been found. We'll later see if more
 610 | +	* than 4 bytes match. But, prior to the match, input
 611 | +	* bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
 612 | +	*/
 613 | +	DCHECK_LE(next_emit + 16, ip_end);
 614 | +	op = EmitLiteral(op, next_emit, ip - next_emit, 1);
 615 | +
 616 | +	/*
 617 | +	* Step 3: Call EmitCopy, and then see if another EmitCopy could
 618 | +	* be our next move. Repeat until we find no match for the
 619 | +	* input immediately after what was consumed by the last EmitCopy call.
 620 | +	*
 621 | +	* If we exit this loop normally then we need to call EmitLiteral next,
 622 | +	* though we don't yet know how big the literal will be. We handle that
 623 | +	* by proceeding to the next iteration of the main loop. We also can exit
 624 | +	* this loop via goto if we get close to exhausting the input.
 625 | +	*/
 626 | +	candidate_bytes = 0;
 627 | +
 628 | +	do {
 629 | +		/* We have a 4-byte match at ip, and no need to emit any
 630 | +		 "literal bytes" prior to ip. */
 631 | +		base = ip;
 632 | +		matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
 633 | +		ip += matched;
 634 | +		DCHECK_EQ(0, memcmp(base, candidate, matched));
 635 | +		op = EmitCopy(op, base - candidate, matched);
 636 | +		/* We could immediately start working at ip now, but to improve
 637 | +		 compression we first update table[Hash(ip - 1, ...)]. */
 638 | +		next_emit = ip;
 639 | +		if (unlikely(ip >= ip_limit))
 640 | +			goto emit_remainder;
 641 | +		input_bytes = GetEightBytesAt(ip - 1);
 642 | +		prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
 643 | +		table[prev_hash] = ip - base_ip - 1;
 644 | +		cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
 645 | +		candidate = base_ip + table[cur_hash];
 646 | +		candidate_bytes = UNALIGNED_LOAD32(candidate);
 647 | +		table[cur_hash] = ip - base_ip;
 648 | +	} while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
 649 | +
 650 | +	next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
 651 | +	++ip;
 652 | +	goto main_loop;
 653 | +
 654 | +emit_remainder:
 655 | +	/* Emit the remaining bytes as a literal */
 656 | +	if (next_emit < ip_end)
 657 | +		op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
 658 | +
 659 | +	return op;
 660 | +}
 661 | +#if defined(__KERNEL__) && !defined(STATIC)
 662 | +EXPORT_SYMBOL(csnappy_compress_fragment);
 663 | +#endif
 664 | +
 665 | +uint32_t __attribute__((const))
 666 | +csnappy_max_compressed_length(uint32_t source_len)
 667 | +{
 668 | +	return 32 + source_len + source_len/6;
 669 | +}
 670 | +#if defined(__KERNEL__) && !defined(STATIC)
 671 | +EXPORT_SYMBOL(csnappy_max_compressed_length);
 672 | +#endif
 673 | +
 674 | +void
 675 | +csnappy_compress(
 676 | +	const char *input,
 677 | +	uint32_t input_length,
 678 | +	char *compressed,
 679 | +	uint32_t *compressed_length,
 680 | +	void *working_memory,
 681 | +	const int workmem_bytes_power_of_two)
 682 | +{
 683 | +	int workmem_size;
 684 | +	int num_to_read;
 685 | +	uint32_t written = 0;
 686 | +	char *p = encode_varint32(compressed, input_length);
 687 | +	written += (p - compressed);
 688 | +	compressed = p;
 689 | +	while (input_length > 0) {
 690 | +		num_to_read = min(input_length, (uint32_t)kBlockSize);
 691 | +		workmem_size = workmem_bytes_power_of_two;
 692 | +		if (num_to_read < kBlockSize) {
 693 | +			for (workmem_size = 9;
 694 | +			     workmem_size < workmem_bytes_power_of_two;
 695 | +			     ++workmem_size) {
 696 | +				if ((1 << (workmem_size-1)) >= num_to_read)
 697 | +					break;
 698 | +			}
 699 | +		}
 700 | +		p = csnappy_compress_fragment(
 701 | +				input, num_to_read, compressed,
 702 | +				working_memory, workmem_size);
 703 | +		written += (p - compressed);
 704 | +		compressed = p;
 705 | +		input_length -= num_to_read;
 706 | +		input += num_to_read;
 707 | +	}
 708 | +	*compressed_length = written;
 709 | +}
 710 | +#if defined(__KERNEL__) && !defined(STATIC)
 711 | +EXPORT_SYMBOL(csnappy_compress);
 712 | +
 713 | +MODULE_LICENSE("BSD");
 714 | +MODULE_DESCRIPTION("Snappy Compressor");
 715 | +#endif
 716 | diff --git a/drivers/staging/snappy/csnappy_decompress.c b/drivers/staging/snappy/csnappy_decompress.c
 717 | new file mode 100644
 718 | index 0000000..572ce43
 719 | --- /dev/null
 720 | +++ b/drivers/staging/snappy/csnappy_decompress.c
 721 | @@ -0,0 +1,413 @@
 722 | +/*
 723 | +Copyright 2011, Google Inc.
 724 | +All rights reserved.
 725 | +
 726 | +Redistribution and use in source and binary forms, with or without
 727 | +modification, are permitted provided that the following conditions are
 728 | +met:
 729 | +
 730 | +  * Redistributions of source code must retain the above copyright
 731 | +notice, this list of conditions and the following disclaimer.
 732 | +  * Redistributions in binary form must reproduce the above
 733 | +copyright notice, this list of conditions and the following disclaimer
 734 | +in the documentation and/or other materials provided with the
 735 | +distribution.
 736 | +  * Neither the name of Google Inc. nor the names of its
 737 | +contributors may be used to endorse or promote products derived from
 738 | +this software without specific prior written permission.
 739 | +
 740 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 741 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 742 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 743 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 744 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 745 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 746 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 747 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 748 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 749 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 750 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 751 | +
 752 | +File modified for the Linux Kernel by
 753 | +Zeev Tarantov <zeev.tarantov@gmail.com>
 754 | +*/
 755 | +
 756 | +#include "csnappy_internal.h"
 757 | +#ifdef __KERNEL__
 758 | +#include <linux/kernel.h>
 759 | +#include <linux/module.h>
 760 | +#endif
 761 | +#include "csnappy.h"
 762 | +
 763 | +int
 764 | +csnappy_get_uncompressed_length(
 765 | +	const char *src,
 766 | +	uint32_t src_len,
 767 | +	uint32_t *result)
 768 | +{
 769 | +	const char *src_base = src;
 770 | +	uint32_t shift = 0;
 771 | +	uint8_t c;
 772 | +	/* Length is encoded in 1..5 bytes */
 773 | +	*result = 0;
 774 | +	for (;;) {
 775 | +		if (shift >= 32)
 776 | +			goto err_out;
 777 | +		if (src_len == 0)
 778 | +			goto err_out;
 779 | +		c = *(const uint8_t *)src++;
 780 | +		src_len -= 1;
 781 | +		*result |= (uint32_t)(c & 0x7f) << shift;
 782 | +		if (c < 128)
 783 | +			break;
 784 | +		shift += 7;
 785 | +	}
 786 | +	return src - src_base;
 787 | +err_out:
 788 | +	return CSNAPPY_E_HEADER_BAD;
 789 | +}
 790 | +#if defined(__KERNEL__) && !defined(STATIC)
 791 | +EXPORT_SYMBOL(csnappy_get_uncompressed_length);
 792 | +#endif
 793 | +
 794 | +#if defined(__arm__) && !(ARCH_ARM_HAVE_UNALIGNED)
 795 | +int csnappy_decompress_noheader(
 796 | +	const char	*src_,
 797 | +	uint32_t	src_remaining,
 798 | +	char		*dst,
 799 | +	uint32_t	*dst_len)
 800 | +{
 801 | +	const uint8_t * src = (const uint8_t *)src_;
 802 | +	const uint8_t * const src_end = src + src_remaining;
 803 | +	char * const dst_base = dst;
 804 | +	char * const dst_end = dst + *dst_len;
 805 | +	while (src < src_end) {
 806 | +		uint32_t opcode = *src++;
 807 | +		uint32_t length = (opcode >> 2) + 1;
 808 | +		const uint8_t *copy_src;
 809 | +		if (likely((opcode & 3) == 0)) {
 810 | +			if (unlikely(length > 60)) {
 811 | +				uint32_t extra_bytes = length - 60;
 812 | +				if (unlikely(src + extra_bytes > src_end))
 813 | +					return CSNAPPY_E_DATA_MALFORMED;
 814 | +				length = 0;
 815 | +				for (int shift = 0, max_shift = extra_bytes*8;
 816 | +					shift < max_shift;
 817 | +					shift += 8)
 818 | +					length |= *src++ << shift;
 819 | +				++length;
 820 | +			}
 821 | +			if (unlikely(src + length > src_end))
 822 | +				return CSNAPPY_E_DATA_MALFORMED;
 823 | +			copy_src = src;
 824 | +			src += length;
 825 | +		} else {
 826 | +			uint32_t offset;
 827 | +			if (likely((opcode & 3) == 1)) {
 828 | +				if (unlikely(src + 1 > src_end))
 829 | +					return CSNAPPY_E_DATA_MALFORMED;
 830 | +				length = ((length - 1) & 7) + 4;
 831 | +				offset = ((opcode >> 5) << 8) + *src++;
 832 | +			} else if (likely((opcode & 3) == 2)) {
 833 | +				if (unlikely(src + 2 > src_end))
 834 | +					return CSNAPPY_E_DATA_MALFORMED;
 835 | +				offset = src[0] | (src[1] << 8);
 836 | +				src += 2;
 837 | +			} else {
 838 | +				if (unlikely(src + 4 > src_end))
 839 | +					return CSNAPPY_E_DATA_MALFORMED;
 840 | +				offset = src[0] | (src[1] << 8) |
 841 | +					 (src[2] << 16) | (src[3] << 24);
 842 | +				src += 4;
 843 | +			}
 844 | +			if (unlikely(!offset || (offset > dst - dst_base)))
 845 | +				return CSNAPPY_E_DATA_MALFORMED;
 846 | +			copy_src = (const uint8_t *)dst - offset;
 847 | +		}
 848 | +		if (unlikely(dst + length > dst_end))
 849 | +			return CSNAPPY_E_OUTPUT_OVERRUN;
 850 | +		do *dst++ = *copy_src++; while (--length);
 851 | +	}
 852 | +	*dst_len = dst - dst_base;
 853 | +	return CSNAPPY_E_OK;
 854 | +}
 855 | +#else /* !(arm with no unaligned access) */
 856 | +/*
 857 | + * Data stored per entry in lookup table:
 858 | + *      Range   Bits-used       Description
 859 | + *      ------------------------------------
 860 | + *      1..64   0..7            Literal/copy length encoded in opcode byte
 861 | + *      0..7    8..10           Copy offset encoded in opcode byte / 256
 862 | + *      0..4    11..13          Extra bytes after opcode
 863 | + *
 864 | + * We use eight bits for the length even though 7 would have sufficed
 865 | + * because of efficiency reasons:
 866 | + *      (1) Extracting a byte is faster than a bit-field
 867 | + *      (2) It properly aligns copy offset so we do not need a <<8
 868 | + */
 869 | +static const uint16_t char_table[256] = {
 870 | +	0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
 871 | +	0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
 872 | +	0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
 873 | +	0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
 874 | +	0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
 875 | +	0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
 876 | +	0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
 877 | +	0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
 878 | +	0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
 879 | +	0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
 880 | +	0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
 881 | +	0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
 882 | +	0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
 883 | +	0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
 884 | +	0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
 885 | +	0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
 886 | +	0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
 887 | +	0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
 888 | +	0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
 889 | +	0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
 890 | +	0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
 891 | +	0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
 892 | +	0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
 893 | +	0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
 894 | +	0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
 895 | +	0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
 896 | +	0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
 897 | +	0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
 898 | +	0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
 899 | +	0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
 900 | +	0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
 901 | +	0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
 902 | +};
 903 | +
 904 | +/*
 905 | + * Copy "len" bytes from "src" to "op", one byte at a time.  Used for
 906 | + * handling COPY operations where the input and output regions may
 907 | + * overlap.  For example, suppose:
 908 | + *    src    == "ab"
 909 | + *    op     == src + 2
 910 | + *    len    == 20
 911 | + * After IncrementalCopy(src, op, len), the result will have
 912 | + * eleven copies of "ab"
 913 | + *    ababababababababababab
 914 | + * Note that this does not match the semantics of either memcpy()
 915 | + * or memmove().
 916 | + */
 917 | +static inline void IncrementalCopy(const char *src, char *op, int len)
 918 | +{
 919 | +	DCHECK_GT(len, 0);
 920 | +	do {
 921 | +		*op++ = *src++;
 922 | +	} while (--len > 0);
 923 | +}
 924 | +
 925 | +/*
 926 | + * Equivalent to IncrementalCopy except that it can write up to ten extra
 927 | + * bytes after the end of the copy, and that it is faster.
 928 | + *
 929 | + * The main part of this loop is a simple copy of eight bytes at a time until
 930 | + * we've copied (at least) the requested amount of bytes.  However, if op and
 931 | + * src are less than eight bytes apart (indicating a repeating pattern of
 932 | + * length < 8), we first need to expand the pattern in order to get the correct
 933 | + * results. For instance, if the buffer looks like this, with the eight-byte
 934 | + * <src> and <op> patterns marked as intervals:
 935 | + *
 936 | + *    abxxxxxxxxxxxx
 937 | + *    [------]           src
 938 | + *      [------]         op
 939 | + *
 940 | + * a single eight-byte copy from <src> to <op> will repeat the pattern once,
 941 | + * after which we can move <op> two bytes without moving <src>:
 942 | + *
 943 | + *    ababxxxxxxxxxx
 944 | + *    [------]           src
 945 | + *        [------]       op
 946 | + *
 947 | + * and repeat the exercise until the two no longer overlap.
 948 | + *
 949 | + * This allows us to do very well in the special case of one single byte
 950 | + * repeated many times, without taking a big hit for more general cases.
 951 | + *
 952 | + * The worst case of extra writing past the end of the match occurs when
 953 | + * op - src == 1 and len == 1; the last copy will read from byte positions
 954 | + * [0..7] and write to [4..11], whereas it was only supposed to write to
 955 | + * position 1. Thus, ten excess bytes.
 956 | + */
 957 | +static const int kMaxIncrementCopyOverflow = 10;
 958 | +static inline void IncrementalCopyFastPath(const char *src, char *op, int len)
 959 | +{
 960 | +	while (op - src < 8) {
 961 | +		UnalignedCopy64(src, op);
 962 | +		len -= op - src;
 963 | +		op += op - src;
 964 | +	}
 965 | +	while (len > 0) {
 966 | +		UnalignedCopy64(src, op);
 967 | +		src += 8;
 968 | +		op += 8;
 969 | +		len -= 8;
 970 | +	}
 971 | +}
 972 | +
 973 | +
 974 | +/* A type that writes to a flat array. */
 975 | +struct SnappyArrayWriter {
 976 | +	char *base;
 977 | +	char *op;
 978 | +	char *op_limit;
 979 | +};
 980 | +
 981 | +static inline int
 982 | +SAW__AppendFastPath(struct SnappyArrayWriter *this,
 983 | +		    const char *ip, uint32_t len)
 984 | +{
 985 | +	char *op = this->op;
 986 | +	const int space_left = this->op_limit - op;
 987 | +	if (likely(space_left >= 16)) {
 988 | +		UnalignedCopy64(ip, op);
 989 | +		UnalignedCopy64(ip + 8, op + 8);
 990 | +	} else {
 991 | +		if (unlikely(space_left < len))
 992 | +			return CSNAPPY_E_OUTPUT_OVERRUN;
 993 | +		memcpy(op, ip, len);
 994 | +	}
 995 | +	this->op = op + len;
 996 | +	return CSNAPPY_E_OK;
 997 | +}
 998 | +
 999 | +static inline int
1000 | +SAW__Append(struct SnappyArrayWriter *this,
1001 | +	    const char *ip, uint32_t len)
1002 | +{
1003 | +	char *op = this->op;
1004 | +	const int space_left = this->op_limit - op;
1005 | +	if (unlikely(space_left < len))
1006 | +		return CSNAPPY_E_OUTPUT_OVERRUN;
1007 | +	memcpy(op, ip, len);
1008 | +	this->op = op + len;
1009 | +	return CSNAPPY_E_OK;
1010 | +}
1011 | +
1012 | +static inline int
1013 | +SAW__AppendFromSelf(struct SnappyArrayWriter *this,
1014 | +		    uint32_t offset, uint32_t len)
1015 | +{
1016 | +	char *op = this->op;
1017 | +	const int space_left = this->op_limit - op;
1018 | +	/* -1u catches offset==0 */
1019 | +	if (op - this->base <= offset - 1u)
1020 | +		return CSNAPPY_E_DATA_MALFORMED;
1021 | +	/* Fast path, used for the majority (70-80%) of dynamic invocations. */
1022 | +	if (len <= 16 && offset >= 8 && space_left >= 16) {
1023 | +		UnalignedCopy64(op - offset, op);
1024 | +		UnalignedCopy64(op - offset + 8, op + 8);
1025 | +	} else if (space_left >= len + kMaxIncrementCopyOverflow) {
1026 | +		IncrementalCopyFastPath(op - offset, op, len);
1027 | +	} else {
1028 | +		if (space_left < len)
1029 | +			return CSNAPPY_E_OUTPUT_OVERRUN;
1030 | +		IncrementalCopy(op - offset, op, len);
1031 | +	}
1032 | +	this->op = op + len;
1033 | +	return CSNAPPY_E_OK;
1034 | +}
1035 | +
1036 | +int
1037 | +csnappy_decompress_noheader(
1038 | +	const char	*src,
1039 | +	uint32_t	src_remaining,
1040 | +	char		*dst,
1041 | +	uint32_t	*dst_len)
1042 | +{
1043 | +	struct SnappyArrayWriter writer;
1044 | +	const char *end_minus5 = src + src_remaining - 5;
1045 | +	uint32_t length, trailer, opword, extra_bytes;
1046 | +	int ret, available;
1047 | +	uint8_t opcode;
1048 | +	char scratch[5];
1049 | +	writer.op = writer.base = dst;
1050 | +	writer.op_limit = writer.op + *dst_len;
1051 | +	#define LOOP_COND() \
1052 | +	if (unlikely(src >= end_minus5)) {		\
1053 | +		available = end_minus5 + 5 - src;	\
1054 | +		if (unlikely(available <= 0))		\
1055 | +			goto out;			\
1056 | +		memmove(scratch, src, available);	\
1057 | +		src = scratch;				\
1058 | +		end_minus5 = scratch + available - 5;	\
1059 | +	}
1060 | +	
1061 | +	LOOP_COND();
1062 | +	for (;;) {
1063 | +		opcode = *(const uint8_t *)src++;
1064 | +		if (opcode & 0x3) {
1065 | +			opword = char_table[opcode];
1066 | +			extra_bytes = opword >> 11;
1067 | +			trailer = get_unaligned_le(src, extra_bytes);
1068 | +			length = opword & 0xff;
1069 | +			src += extra_bytes;
1070 | +			trailer += opword & 0x700;
1071 | +			ret = SAW__AppendFromSelf(&writer, trailer, length);
1072 | +			if (ret < 0)
1073 | +				return ret;
1074 | +			LOOP_COND();
1075 | +		} else {
1076 | +			length = (opcode >> 2) + 1;
1077 | +			available = end_minus5 + 5 - src;
1078 | +			if (length <= 16 && available >= 16) {
1079 | +				if ((ret = SAW__AppendFastPath(&writer, src, length)) < 0)
1080 | +					return ret;
1081 | +				src += length;
1082 | +				LOOP_COND();
1083 | +				continue;
1084 | +			}
1085 | +			if (unlikely(length > 60)) {
1086 | +				extra_bytes = length - 60;
1087 | +				length = get_unaligned_le(src, extra_bytes) + 1;
1088 | +				src += extra_bytes;
1089 | +				available = end_minus5 + 5 - src;
1090 | +			}
1091 | +			if (unlikely(available < length))
1092 | +				return CSNAPPY_E_DATA_MALFORMED;
1093 | +			ret = SAW__Append(&writer, src, length);
1094 | +			if (ret < 0)
1095 | +				return ret;
1096 | +			src += length;
1097 | +			LOOP_COND();
1098 | +		}
1099 | +	}
1100 | +#undef LOOP_COND
1101 | +out:
1102 | +	*dst_len = writer.op - writer.base;
1103 | +	return CSNAPPY_E_OK;
1104 | +}
1105 | +#endif /* optimized for unaligned arch */
1106 | +
1107 | +#if defined(__KERNEL__) && !defined(STATIC)
1108 | +EXPORT_SYMBOL(csnappy_decompress_noheader);
1109 | +#endif
1110 | +
1111 | +int
1112 | +csnappy_decompress(
1113 | +	const char *src,
1114 | +	uint32_t src_len,
1115 | +	char *dst,
1116 | +	uint32_t dst_len)
1117 | +{
1118 | +	int n;
1119 | +	uint32_t olen = 0;
1120 | +	/* Read uncompressed length from the front of the compressed input */
1121 | +	n = csnappy_get_uncompressed_length(src, src_len, &olen);
1122 | +	if (unlikely(n < CSNAPPY_E_OK))
1123 | +		return n;
1124 | +	/* Protect against possible DoS attack */
1125 | +	if (unlikely(olen > dst_len))
1126 | +		return CSNAPPY_E_OUTPUT_INSUF;
1127 | +	return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen);
1128 | +}
1129 | +#if defined(__KERNEL__) && !defined(STATIC)
1130 | +EXPORT_SYMBOL(csnappy_decompress);
1131 | +
1132 | +MODULE_LICENSE("BSD");
1133 | +MODULE_DESCRIPTION("Snappy Decompressor");
1134 | +#endif
1135 | diff --git a/drivers/staging/snappy/csnappy_internal.h b/drivers/staging/snappy/csnappy_internal.h
1136 | new file mode 100644
1137 | index 0000000..5fc7ba1
1138 | --- /dev/null
1139 | +++ b/drivers/staging/snappy/csnappy_internal.h
1140 | @@ -0,0 +1,147 @@
1141 | +/*
1142 | +Copyright 2011 Google Inc. All Rights Reserved.
1143 | +
1144 | +Redistribution and use in source and binary forms, with or without
1145 | +modification, are permitted provided that the following conditions are
1146 | +met:
1147 | +
1148 | +    * Redistributions of source code must retain the above copyright
1149 | +notice, this list of conditions and the following disclaimer.
1150 | +    * Redistributions in binary form must reproduce the above
1151 | +copyright notice, this list of conditions and the following disclaimer
1152 | +in the documentation and/or other materials provided with the
1153 | +distribution.
1154 | +    * Neither the name of Google Inc. nor the names of its
1155 | +contributors may be used to endorse or promote products derived from
1156 | +this software without specific prior written permission.
1157 | +
1158 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1159 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1160 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1161 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1162 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1163 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1164 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1165 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1166 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1167 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1168 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1169 | +
1170 | +Various stubs for the open-source version of Snappy.
1171 | +
1172 | +File modified for the Linux Kernel by
1173 | +Zeev Tarantov <zeev.tarantov@gmail.com>
1174 | +*/
1175 | +
1176 | +#ifndef CSNAPPY_INTERNAL_H_
1177 | +#define CSNAPPY_INTERNAL_H_
1178 | +
1179 | +#ifndef __KERNEL__
1180 | +#include "csnappy_internal_userspace.h"
1181 | +#include <string.h>
1182 | +#else
1183 | +
1184 | +#include <linux/types.h>
1185 | +#include <linux/string.h>
1186 | +#include <linux/compiler.h>
1187 | +#include <asm/byteorder.h>
1188 | +#include <asm/unaligned.h>
1189 | +
1190 | +#if (defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)) || \
1191 | +    (!defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN))
1192 | +#error either __LITTLE_ENDIAN or __BIG_ENDIAN must be defined
1193 | +#endif
1194 | +#if defined(__LITTLE_ENDIAN)
1195 | +#define __BYTE_ORDER __LITTLE_ENDIAN
1196 | +#else
1197 | +#define __BYTE_ORDER __BIG_ENDIAN
1198 | +#endif
1199 | +
1200 | +#ifdef DEBUG
1201 | +#define DCHECK(cond)	if (!(cond)) \
1202 | +			printk(KERN_DEBUG "assert failed @ %s:%i\n", \
1203 | +				__FILE__, __LINE__)
1204 | +#else
1205 | +#define DCHECK(cond)
1206 | +#endif
1207 | +
1208 | +#define UNALIGNED_LOAD16(_p)		get_unaligned((const uint16_t *)(_p))
1209 | +#define UNALIGNED_LOAD32(_p)		get_unaligned((const uint32_t *)(_p))
1210 | +#define UNALIGNED_LOAD64(_p)		get_unaligned((const uint64_t *)(_p))
1211 | +#define UNALIGNED_STORE16(_p, _val)	put_unaligned((_val), (uint16_t *)(_p))
1212 | +#define UNALIGNED_STORE32(_p, _val)	put_unaligned((_val), (uint32_t *)(_p))
1213 | +#define UNALIGNED_STORE64(_p, _val)	put_unaligned((_val), (uint64_t *)(_p))
1214 | +
1215 | +#define FindLSBSetNonZero(n)		__builtin_ctz(n)
1216 | +#define FindLSBSetNonZero64(n)		__builtin_ctzll(n)
1217 | +
1218 | +#endif /* __KERNEL__ */
1219 | +
1220 | +#define ARCH_ARM_HAVE_UNALIGNED \
1221 | +    defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || defined(__ARMV6__) || \
1222 | +    defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
1223 | +
1224 | +static inline void UnalignedCopy64(const void *src, void *dst) {
1225 | +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || ARCH_ARM_HAVE_UNALIGNED
1226 | +  if ((sizeof(void *) == 8) || (sizeof(long) == 8)) {
1227 | +    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
1228 | +  } else {
1229 | +   /* This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
1230 | +      on some platforms, in particular ARM. */
1231 | +    const uint8_t *src_bytep = (const uint8_t *)src;
1232 | +    uint8_t *dst_bytep = (uint8_t *)dst;
1233 | +
1234 | +    UNALIGNED_STORE32(dst_bytep, UNALIGNED_LOAD32(src_bytep));
1235 | +    UNALIGNED_STORE32(dst_bytep + 4, UNALIGNED_LOAD32(src_bytep + 4));
1236 | +  }
1237 | +#else
1238 | +  const uint8_t *src_bytep = (const uint8_t *)src;
1239 | +  uint8_t *dst_bytep = (uint8_t *)dst;
1240 | +  dst_bytep[0] = src_bytep[0];
1241 | +  dst_bytep[1] = src_bytep[1];
1242 | +  dst_bytep[2] = src_bytep[2];
1243 | +  dst_bytep[3] = src_bytep[3];
1244 | +  dst_bytep[4] = src_bytep[4];
1245 | +  dst_bytep[5] = src_bytep[5];
1246 | +  dst_bytep[6] = src_bytep[6];
1247 | +  dst_bytep[7] = src_bytep[7];
1248 | +#endif
1249 | +}
1250 | +
1251 | +#if defined(__arm__)
1252 | +  #if ARCH_ARM_HAVE_UNALIGNED
1253 | +     static inline uint32_t get_unaligned_le(const void *p, uint32_t n)
1254 | +     {
1255 | +       uint32_t wordmask = (1U << (8 * n)) - 1;
1256 | +       return get_unaligned_le32(p) & wordmask;
1257 | +     }
1258 | +  #else
1259 | +     extern uint32_t get_unaligned_le_armv5(const void *p, uint32_t n);
1260 | +     #define get_unaligned_le get_unaligned_le_armv5
1261 | +  #endif
1262 | +#else
1263 | +  static inline uint32_t get_unaligned_le(const void *p, uint32_t n)
1264 | +  {
1265 | +    /* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */
1266 | +    static const uint32_t wordmask[] = {
1267 | +      0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
1268 | +    };
1269 | +    return get_unaligned_le32(p) & wordmask[n];
1270 | +  }
1271 | +#endif
1272 | +
1273 | +#define DCHECK_EQ(a, b)	DCHECK(((a) == (b)))
1274 | +#define DCHECK_NE(a, b)	DCHECK(((a) != (b)))
1275 | +#define DCHECK_GT(a, b)	DCHECK(((a) >  (b)))
1276 | +#define DCHECK_GE(a, b)	DCHECK(((a) >= (b)))
1277 | +#define DCHECK_LT(a, b)	DCHECK(((a) <  (b)))
1278 | +#define DCHECK_LE(a, b)	DCHECK(((a) <= (b)))
1279 | +
1280 | +enum {
1281 | +	LITERAL = 0,
1282 | +	COPY_1_BYTE_OFFSET = 1,  /* 3 bit length + 3 bits of offset in opcode */
1283 | +	COPY_2_BYTE_OFFSET = 2,
1284 | +	COPY_4_BYTE_OFFSET = 3
1285 | +};
1286 | +
1287 | +#endif  /* CSNAPPY_INTERNAL_H_ */
1288 | diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig
1289 | index 3bec4db..1132f4b 100644
1290 | --- a/drivers/staging/zram/Kconfig
1291 | +++ b/drivers/staging/zram/Kconfig
1292 | @@ -6,8 +6,6 @@ config ZRAM
1293 |  	tristate "Compressed RAM block device support"
1294 |  	depends on BLOCK && SYSFS
1295 |  	select XVMALLOC
1296 | -	select LZO_COMPRESS
1297 | -	select LZO_DECOMPRESS
1298 |  	default n
1299 |  	help
1300 |  	  Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
1301 | @@ -28,3 +26,21 @@ config ZRAM_DEBUG
1302 |  	help
1303 |  	  This option adds additional debugging code to the compressed
1304 |  	  RAM block device driver.
1305 | +
1306 | +choice ZRAM_COMPRESS
1307 | +	prompt "compression method"
1308 | +	depends on ZRAM
1309 | +	default ZRAM_LZO
1310 | +	help
1311 | +	  Select the compression method used by zram.
1312 | +	  LZO is the default. Snappy compresses a bit worse (around ~2%) but
1313 | +	  much (~2x) faster, at least on x86-64.
1314 | +config ZRAM_LZO
1315 | +	bool "LZO compression"
1316 | +	select LZO_COMPRESS
1317 | +	select LZO_DECOMPRESS
1318 | +config ZRAM_SNAPPY
1319 | +	bool "Snappy compression"
1320 | +	depends on SNAPPY_COMPRESS
1321 | +	depends on SNAPPY_DECOMPRESS
1322 | +endchoice
1323 | diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
1324 | index 09de99f..33cf32c 100644
1325 | --- a/drivers/staging/zram/zram_drv.c
1326 | +++ b/drivers/staging/zram/zram_drv.c
1327 | @@ -29,12 +29,56 @@
1328 |  #include <linux/genhd.h>
1329 |  #include <linux/highmem.h>
1330 |  #include <linux/slab.h>
1331 | -#include <linux/lzo.h>
1332 |  #include <linux/string.h>
1333 |  #include <linux/vmalloc.h>
1334 |  
1335 |  #include "zram_drv.h"
1336 |  
1337 | +#if defined(CONFIG_ZRAM_LZO)
1338 | +#include <linux/lzo.h>
1339 | +#define WMSIZE		LZO1X_MEM_COMPRESS
1340 | +#define COMPRESS(s, sl, d, dl, wm)	\
1341 | +	lzo1x_1_compress(s, sl, d, dl, wm)
1342 | +#define DECOMPRESS(s, sl, d, dl)	\
1343 | +	lzo1x_decompress_safe(s, sl, d, dl)
1344 | +#elif defined(CONFIG_ZRAM_SNAPPY)
1345 | +#include "../snappy/csnappy.h" /* if built in drivers/staging */
1346 | +#define WMSIZE_ORDER	((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1))
1347 | +#define WMSIZE		(1 << WMSIZE_ORDER)
1348 | +static int
1349 | +snappy_compress_(
1350 | +	const unsigned char *src,
1351 | +	size_t src_len,
1352 | +	unsigned char *dst,
1353 | +	size_t *dst_len,
1354 | +	void *workmem)
1355 | +{
1356 | +	const unsigned char *end = csnappy_compress_fragment(
1357 | +		src, (uint32_t)src_len, dst, workmem, WMSIZE_ORDER);
1358 | +	*dst_len = end - dst;
1359 | +	return 0;
1360 | +}
1361 | +static int
1362 | +snappy_decompress_(
1363 | +	const unsigned char *src,
1364 | +	size_t src_len,
1365 | +	unsigned char *dst,
1366 | +	size_t *dst_len)
1367 | +{
1368 | +	uint32_t dst_len_ = (uint32_t)*dst_len;
1369 | +	int ret = csnappy_decompress_noheader(src, src_len, dst, &dst_len_);
1370 | +	*dst_len = (size_t)dst_len_;
1371 | +	return ret;
1372 | +}
1373 | +#define COMPRESS(s, sl, d, dl, wm)	\
1374 | +	snappy_compress_(s, sl, d, dl, wm)
1375 | +#define DECOMPRESS(s, sl, d, dl)	\
1376 | +	snappy_decompress_(s, sl, d, dl)
1377 | +#else
1378 | +#error either CONFIG_ZRAM_LZO or CONFIG_ZRAM_SNAPPY must be defined
1379 | +#endif
1380 | +
1381 | +
1382 |  /* Globals */
1383 |  static int zram_major;
1384 |  struct zram *zram_devices;
1385 | @@ -257,9 +301,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1386 |  	cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
1387 |  		zram->table[index].offset;
1388 |  
1389 | -	ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
1390 | -				    xv_get_object_size(cmem) - sizeof(*zheader),
1391 | -				    uncmem, &clen);
1392 | +	ret = DECOMPRESS(cmem + sizeof(*zheader),
1393 | +			 xv_get_object_size(cmem) - sizeof(*zheader),
1394 | +			 uncmem, &clen);
1395 |  
1396 |  	if (is_partial_io(bvec)) {
1397 |  		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
1398 | @@ -271,7 +315,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1399 |  	kunmap_atomic(user_mem, KM_USER0);
1400 |  
1401 |  	/* Should NEVER happen. Return bio error if it does. */
1402 | -	if (unlikely(ret != LZO_E_OK)) {
1403 | +	if (unlikely(ret)) {
1404 |  		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1405 |  		zram_stat64_inc(zram, &zram->stats.failed_reads);
1406 |  		return ret;
1407 | @@ -305,13 +349,13 @@ static int zram_read_before_write(struct zram *zram, char *mem, u32 index)
1408 |  		return 0;
1409 |  	}
1410 |  
1411 | -	ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
1412 | -				    xv_get_object_size(cmem) - sizeof(*zheader),
1413 | -				    mem, &clen);
1414 | +	ret = DECOMPRESS(cmem + sizeof(*zheader),
1415 | +			 xv_get_object_size(cmem) - sizeof(*zheader),
1416 | +			 mem, &clen);
1417 |  	kunmap_atomic(cmem, KM_USER0);
1418 |  
1419 |  	/* Should NEVER happen. Return bio error if it does. */
1420 | -	if (unlikely(ret != LZO_E_OK)) {
1421 | +	if (unlikely(ret)) {
1422 |  		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1423 |  		zram_stat64_inc(zram, &zram->stats.failed_reads);
1424 |  		return ret;
1425 | @@ -377,18 +421,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
1426 |  		goto out;
1427 |  	}
1428 |  
1429 | -	ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
1430 | -			       zram->compress_workmem);
1431 | +	COMPRESS(uncmem, PAGE_SIZE, src, &clen, zram->compress_workmem);
1432 |  
1433 |  	kunmap_atomic(user_mem, KM_USER0);
1434 |  	if (is_partial_io(bvec))
1435 |  			kfree(uncmem);
1436 |  
1437 | -	if (unlikely(ret != LZO_E_OK)) {
1438 | -		pr_err("Compression failed! err=%d\n", ret);
1439 | -		goto out;
1440 | -	}
1441 | -
1442 |  	/*
1443 |  	 * Page is incompressible. Store it as-is (uncompressed)
1444 |  	 * since we do not want to return too many disk write
1445 | @@ -646,7 +684,7 @@ int zram_init_device(struct zram *zram)
1446 |  
1447 |  	zram_set_disksize(zram, totalram_pages << PAGE_SHIFT);
1448 |  
1449 | -	zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
1450 | +	zram->compress_workmem = kzalloc(WMSIZE, GFP_KERNEL);
1451 |  	if (!zram->compress_workmem) {
1452 |  		pr_err("Error allocating compressor working memory!\n");
1453 |  		ret = -ENOMEM;
1454 | 


--------------------------------------------------------------------------------
/python/OutputBuffer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from array import array
 3 | 
 4 | class OutputBuffer(object):
 5 |   """A ring buffer that holds 32K of the last output,
 6 |      flushing a page at a time until flush() is called."""
 7 |   def __init__(self, f, checksummer):
 8 |     self.f = f
 9 |     self.checksummer = checksummer
10 |     a_list = [0] * 4096
11 |     self.pages = [array('B', a_list) for i in xrange(9)]
12 |     self.top_page_index, self.top_offset, self.isize, self.checksum = 0, 0, 0, 0
13 |   def _flush_page(self):
14 |     self.checksum = self.checksummer(self.pages[0], self.checksum)
15 |     self.isize += 4096
16 |     self.pages[0].tofile(self.f)
17 |     self.pages.append(self.pages.pop(0))
18 |     #for i in xrange(4096): self.pages[-1][i] = '\x00' # DEBUG
19 |   def _maybe_flush_page(self):
20 |     if self.top_offset == 4096:
21 |       self.top_offset = 0
22 |       if self.top_page_index == len(self.pages) - 1:
23 |         self._flush_page()
24 |       else:
25 |         self.top_page_index += 1
26 |   def put_byte(self, b):
27 |     self.pages[self.top_page_index][self.top_offset] = b
28 |     self.top_offset += 1
29 |     self._maybe_flush_page()
30 |   def flush(self):
31 |     self.top_offset -= 1
32 |     if self.top_offset < 0:
33 |       self.top_offset = 4095
34 |       self.top_page_index -= 1
35 |     while self.top_page_index > 0:
36 |       self._flush_page()
37 |       self.top_page_index -= 1
38 |     data = self.pages[self.top_page_index][:self.top_offset+1]
39 |     self.checksum = self.checksummer(data, self.checksum)
40 |     self.isize += self.top_offset + 1
41 |     self.f.write(data.tostring())
42 |     return (self.checksum & 0xffffffff, self.isize)
43 |   def num_bufferred_bytes(self):
44 |     return (self.top_page_index << 12) + self.top_offset
45 |   def get_remaining_space(self):
46 |     return 4096 - self.top_offset
47 |   def put_bytes(self, bytes):
48 |     curr_i = 0
49 |     len_i = len(bytes)
50 |     while curr_i < len_i:
51 |       length = min(4096 - self.top_offset, len_i - curr_i)
52 |       self.pages[self.top_page_index][self.top_offset : self.top_offset+length] = \
53 |                                        array('B', bytes[curr_i : curr_i+length])
54 |       curr_i += length
55 |       self.top_offset += length
56 |       self._maybe_flush_page()
57 |   def repeat_chunk(self, length, distance_back):
58 |     begin = (self.top_page_index << 12) + self.top_offset - distance_back
59 |     if begin < 0:
60 |       raise ValueError("distance back is too big. pos [%d %d], distance: %d" % \
61 |                                (self.top_page_index, self.top_offset, distance_back))
62 |     input_page_index, input_offset = begin >> 12, begin & 4095
63 |     while True:
64 |       input_page, output_page = self.pages[input_page_index], self.pages[self.top_page_index]
65 |       remaining_space, input_to_end_of_page = 4096 - self.top_offset, 4096 - input_offset
66 |       bytes_to_copy = min(length, remaining_space, input_to_end_of_page)
67 |       if distance_back == 1:
68 |         output_page[self.top_offset : self.top_offset + bytes_to_copy] = \
69 |                          array('B', (input_page[input_offset],)) * bytes_to_copy
70 |       else:
71 |         for i in xrange(bytes_to_copy):
72 |           output_page[self.top_offset + i] = input_page[input_offset + i]
73 |       input_offset += bytes_to_copy
74 |       if input_offset == 4096:
75 |         input_offset = 0
76 |         input_page_index += 1
77 |       self.top_offset += bytes_to_copy
78 |       if self.top_offset == 4096:
79 |         self.top_offset = 0
80 |         if self.top_page_index == len(self.pages) - 1:
81 |           self._flush_page()
82 |           input_page_index -= 1
83 |         else:
84 |           self.top_page_index += 1
85 |       length -= bytes_to_copy
86 |       if length == 0: break
87 | 


--------------------------------------------------------------------------------
/python/pysnappy_compress.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from struct import pack as struct_pack
  3 | from array import array
  4 | from collections import defaultdict
  5 | 
  6 | def encode_varint32(f, n):
  7 |   while n:
  8 |     b = n & 127
  9 |     n >>= 7
 10 |     if n:
 11 |       b |= 128
 12 |     f.write(chr(b))
 13 | 
 14 | def snappy_emit_literal(f, s, begin, end):
 15 |   l = end - begin - 1 # end is one past the literal, length is encoded -1
 16 |   if l < 60:
 17 |     f.write(chr(l << 2))
 18 |   elif l < 256:
 19 |     f.write(chr(240)) # 60 << 2
 20 |     f.write(chr(l))
 21 |   else: # a length > 65536 is not supported by this encoder!
 22 |     f.write(chr(244)) # 61 << 2
 23 |     f.write(chr(l & 255))
 24 |     f.write(chr(l >> 8))
 25 |   f.write(s[begin : end])
 26 | 
 27 | def snappy_emit_backref(f, offset, length):
 28 |   if 4 <= length <= 11 and offset < 2048:
 29 |     f.write(chr(1 | ((length - 4) << 2) | ((offset >> 8) << 5)))
 30 |     f.write(chr(offset & 255))
 31 |   else: # a back offset with offset > 65536 is not supported by this encoder!
 32 |     encoded_offset = struct_pack("<H", offset)
 33 |     while length > 0:
 34 |       curr_len_chunk = min(length, 64)
 35 |       f.write(chr(2 | ((curr_len_chunk - 1) << 2)))
 36 |       f.write(encoded_offset)
 37 |       length -= curr_len_chunk
 38 | 
 39 | N = 4096 # up to 64K is allowed by this encoder
 40 | MIN_LENGTH = 4 # snappy back references with length < 4 encodes to 3 bytes
 41 | 
 42 | def snappy_compress_block_dict(ofile, s, ilen, wm = defaultdict(list)):
 43 |   """A compressor that does not miss matches and uses unlimited memory"""
 44 |   wm.clear()
 45 |   literal_start = 0
 46 |   i = 1
 47 |   while i < ilen - MIN_LENGTH:
 48 |     longest_match_length = MIN_LENGTH - 1
 49 |     longest_match_start = 0
 50 |     length_limit = ilen - i
 51 |     hash_chain = wm[s[i : i + MIN_LENGTH]]
 52 |     for j in hash_chain:
 53 |       length = MIN_LENGTH
 54 |       while length < length_limit and s[i + length] == s[j + length]:
 55 |         length += 1
 56 |       if length > longest_match_length:
 57 |         longest_match_length = length
 58 |         longest_match_start = j
 59 |     hash_chain.insert(0, i)
 60 |     if longest_match_length >= MIN_LENGTH:
 61 |       if i - 1 >= literal_start:
 62 |         snappy_emit_literal(ofile, s, literal_start, i)
 63 |       snappy_emit_backref(ofile, i - longest_match_start, longest_match_length)
 64 |       i += longest_match_length
 65 |       literal_start = i
 66 |     else:
 67 |       i += 1
 68 |   if i < ilen:
 69 |     snappy_emit_literal(ofile, s, literal_start, ilen)
 70 | 
 71 | TABLE_ITEMS_ORDER = 12
 72 | MASK = ((1 << TABLE_ITEMS_ORDER) - 1)
 73 | def snappy_compress_block_table(ofile, s, ilen, \
 74 |                                 wm = array('H', [0]*(1 << TABLE_ITEMS_ORDER))):
 75 |   """A compressor that uses limited memory, but misses matches"""
 76 |   for i in xrange(len(wm)): wm[i] = 0
 77 |   literal_start = 0
 78 |   i = 1
 79 |   while i < ilen - MIN_LENGTH:
 80 |     hash_key = hash(s[i : i + MIN_LENGTH]) & MASK
 81 |     match_start = wm[hash_key]
 82 |     wm[hash_key] = i
 83 |     length = 0
 84 |     length_limit = ilen - i
 85 |     while length < length_limit and s[i + length] == s[match_start + length]:
 86 |       length += 1
 87 |     if length >= MIN_LENGTH:
 88 |       if i - 1 >= literal_start:
 89 |         snappy_emit_literal(ofile, s, literal_start, i)
 90 |       snappy_emit_backref(ofile, i - match_start, length)
 91 |       i += length
 92 |       literal_start = i
 93 |     else:
 94 |       i += 1
 95 |   if i < ilen:
 96 |     snappy_emit_literal(ofile, s, literal_start, ilen)
 97 | 
 98 | with open(sys.argv[1], "rb") as ifile:
 99 |   with open(sys.argv[2], "wb") as ofile:
100 |     ifile.seek(0, os.SEEK_END)
101 |     encode_varint32(ofile, ifile.tell())
102 |     ifile.seek(0, os.SEEK_SET)
103 |     while True:
104 |       s = ifile.read(N)
105 |       if not s:
106 |         break
107 |       snappy_compress_block_table(ofile, s, len(s))
108 | 


--------------------------------------------------------------------------------
/python/pysnappy_decompress.py:
--------------------------------------------------------------------------------
 1 | from OutputBuffer import OutputBuffer
 2 | from struct import unpack as struct_unpack
 3 | 
 4 | def read_varint32(f):
 5 |   v = 0
 6 |   offset = 0
 7 |   while True:
 8 |     c = f.read(1)
 9 |     if not c:
10 |       raise ValueError("header is malformed")
11 |     b = ord(c)
12 |     v |= (b & 127) << offset
13 |     offset += 7
14 |     if b < 128:
15 |       break
16 |     if offset >= 32:
17 |       raise ValueError("header is malformed")
18 |   return v
19 | 
20 | def read_le_bytes(f, n):
21 |   s = f.read(n)
22 |   if len(s) < n:
23 |     raise ValueError("data malformed")
24 |   if n == 1:
25 |     return ord(s)
26 |   elif n == 2:
27 |     return struct_unpack("<H", s)[0]
28 |   elif n == 3:
29 |     return ord(s[0]) | (ord(s[1]) << 8) | (ord(s[2]) << 16)
30 |   elif n == 4:
31 |     return struct_unpack("<I", s)[0]
32 |   else:
33 |     raise ValueError("number of bytes must be in range [1..4]")
34 | 
35 | def snappy_decompress(ifile, ob):
36 |   expected_olen = read_varint32(ifile)
37 |   while True:
38 |     s = ifile.read(1)
39 |     if not s:
40 |       break
41 |     c = ord(s)
42 |     cmd_type = c & 3
43 |     length = (c >> 2) + 1
44 |     if cmd_type == 0:
45 |       if length > 60:
46 |         length = read_le_bytes(ifile, length - 60) + 1
47 |       ob.put_bytes(ifile.read(length))
48 |     else:
49 |       if cmd_type == 1:
50 |         length = ((length - 1) & 7) + 4
51 |         offset = ((c >> 5) << 8) + read_le_bytes(ifile, 1)
52 |       elif cmd_type == 2:
53 |         offset = read_le_bytes(ifile, 2)
54 |       else:
55 |         offset = read_le_bytes(ifile, 4)
56 |       ob.repeat_chunk(length, offset)
57 |   if ob.isize + ob.num_bufferred_bytes() == expected_olen:
58 |     ob.flush()
59 |   else:
60 |     raise ValueError("input not consumed")
61 | 
62 | if __name__ == "__main__":
63 |   import sys
64 |   with file(sys.argv[1], "rb") as ifile:
65 |     with file(sys.argv[2], "wb") as ofile:
66 |       ob = OutputBuffer(ofile, lambda x,y: y)
67 |       snappy_decompress(ifile, ob)
68 | 


--------------------------------------------------------------------------------
/snappy_tester.patch:
--------------------------------------------------------------------------------
  1 | diff --git a/configure.ac b/configure.ac
  2 | index d193b0b..35f0775 100644
  3 | --- a/configure.ac
  4 | +++ b/configure.ac
  5 | @@ -97,6 +97,7 @@ CHECK_EXT_COMPRESSION_LIB([lzo2], [lzo1x_1_15_compress])
  6 |  CHECK_EXT_COMPRESSION_LIB([lzf], [lzf_compress])
  7 |  CHECK_EXT_COMPRESSION_LIB([fastlz], [fastlz_compress])
  8 |  CHECK_EXT_COMPRESSION_LIB([quicklz], [qlz_compress])
  9 | +CHECK_EXT_COMPRESSION_LIB([csnappy], [csnappy_compress])
 10 |  AC_SUBST([UNITTEST_LIBS])
 11 |  
 12 |  # These are used by snappy-stubs-public.h.in.
 13 | diff --git a/snappy-test.h b/snappy-test.h
 14 | index ef6a955..10fd24c 100644
 15 | --- a/snappy-test.h
 16 | +++ b/snappy-test.h
 17 | @@ -120,6 +120,10 @@ extern "C" {
 18 |  #include "quicklz.h"
 19 |  #endif
 20 |  
 21 | +#ifdef HAVE_LIBCSNAPPY
 22 | +#include "csnappy.h"
 23 | +#endif
 24 | +
 25 |  namespace {
 26 |  namespace File {
 27 |    void Init() { }
 28 | diff --git a/snappy_unittest.cc b/snappy_unittest.cc
 29 | index f3b9c83..52a8ab3 100644
 30 | --- a/snappy_unittest.cc
 31 | +++ b/snappy_unittest.cc
 32 | @@ -57,6 +57,8 @@ DEFINE_bool(liblzf, false,
 33 |              "(http://www.goof.com/pcg/marc/liblzf.html)");
 34 |  DEFINE_bool(fastlz, false,
 35 |              "Run FastLZ compression (http://www.fastlz.org/");
 36 | +DEFINE_bool(csnappy, false,
 37 | +	    "Run csnappy compression (https://github.com/zeevt/csnappy/)");
 38 |  DEFINE_bool(snappy, true, "Run snappy compression");
 39 |  
 40 |  
 41 | @@ -121,11 +123,11 @@ typedef string DataEndingAtUnreadablePage;
 42 |  #endif
 43 |  
 44 |  enum CompressorType {
 45 | -  ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
 46 | +  ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, CSNAPPY, SNAPPY,
 47 |  };
 48 |  
 49 |  const char* names[] = {
 50 | -  "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
 51 | +  "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "CSNAPPY", "SNAPPY",
 52 |  };
 53 |  
 54 |  static size_t MinimumRequiredOutputSpace(size_t input_size,
 55 | @@ -156,6 +158,12 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
 56 |        return max(static_cast<int>(ceil(input_size * 1.05)), 66);
 57 |  #endif  // FASTLZ_VERSION
 58 |  
 59 | +#ifdef CSNAPPY_VERSION
 60 | +    case CSNAPPY:
 61 | +      return static_cast<size_t>(csnappy_max_compressed_length(
 62 | +                                  static_cast<uint32_t>(input_size)));
 63 | +#endif // CSNAPPY_VERSION
 64 | +
 65 |      case SNAPPY:
 66 |        return snappy::MaxCompressedLength(input_size);
 67 |  
 68 | @@ -266,6 +274,24 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
 69 |      }
 70 |  #endif  // FASTLZ_VERSION
 71 |  
 72 | +#ifdef CSNAPPY_VERSION
 73 | +    case CSNAPPY: {
 74 | +      uint32_t destlen;
 75 | +      char* mem = new char[CSNAPPY_WORKMEM_BYTES];
 76 | +      csnappy_compress(input, input_size,
 77 | +                      string_as_array(compressed),
 78 | +                      &destlen,
 79 | +                      mem,
 80 | +                      CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO);
 81 | +      delete[] mem;
 82 | +      CHECK_LE(destlen, csnappy_max_compressed_length(input_size));
 83 | +      if (!compressed_is_preallocated) {
 84 | +        compressed->resize(destlen);
 85 | +      }
 86 | +      break;
 87 | +    }
 88 | +#endif // CSNAPPY_VERSION
 89 | +
 90 |      case SNAPPY: {
 91 |        size_t destlen;
 92 |        snappy::RawCompress(input, input_size,
 93 | @@ -364,9 +390,23 @@ static bool Uncompress(const string& compressed, CompressorType comp,
 94 |      }
 95 |  #endif  // FASTLZ_VERSION
 96 |  
 97 | +#ifdef CSNAPPY_VERSION
 98 | +    case CSNAPPY: {
 99 | +      int ret = csnappy_decompress(
100 | +          compressed.data(),
101 | +          (uint32_t)compressed.size(),
102 | +          string_as_array(output),
103 | +          (uint32_t)size);
104 | +      CHECK_EQ(ret, CSNAPPY_E_OK);
105 | +      break;
106 | +    }
107 | +#endif // CSNAPPY_VERSION
108 | +
109 |      case SNAPPY: {
110 | -      snappy::RawUncompress(compressed.data(), compressed.size(),
111 | -                            string_as_array(output));
112 | +      bool ret = snappy::RawUncompress(compressed.data(),
113 | +                                       compressed.size(),
114 | +                                       string_as_array(output));
115 | +      CHECK_EQ(ret, true);
116 |        break;
117 |      }
118 |  
119 | @@ -464,7 +504,7 @@ static void Measure(const char* data,
120 |    string urate = (uncomp_rate >= 0)
121 |                   ? StringPrintf("%.1f", uncomp_rate)
122 |                   : string("?");
123 | -  printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%%  "
124 | +  printf("%-8s [b %dM] bytes %6d -> %6d %4.1f%%  "
125 |           "comp %5.1f MB/s  uncomp %5s MB/s\n",
126 |           x.c_str(),
127 |           block_size/(1<<20),
128 | @@ -1013,6 +1053,7 @@ static void MeasureFile(const char* fname) {
129 |      if (FLAGS_liblzf)   Measure(input, len, LIBLZF, repeats, 1024<<10);
130 |      if (FLAGS_quicklz)  Measure(input, len, QUICKLZ, repeats, 1024<<10);
131 |      if (FLAGS_fastlz)   Measure(input, len, FASTLZ, repeats, 1024<<10);
132 | +    if (FLAGS_csnappy)  Measure(input, len, CSNAPPY, repeats, 1024<<10);
133 |      if (FLAGS_snappy)    Measure(input, len, SNAPPY, repeats, 4096<<10);
134 |  
135 |      // For block-size based measurements
136 | 


--------------------------------------------------------------------------------
/testdata/baddata3.snappy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/baddata3.snappy


--------------------------------------------------------------------------------
/testdata/unaligned_uint64_test.bin.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/unaligned_uint64_test.bin.gz


--------------------------------------------------------------------------------
/testdata/unaligned_uint64_test.snappy.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/unaligned_uint64_test.snappy.gz


--------------------------------------------------------------------------------
/testdata/urls.10K.snappy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeevt/csnappy/6c10c305e8dde193546e6b33cf8a785d5dc123e2/testdata/urls.10K.snappy


--------------------------------------------------------------------------------
/unaligned_arm.s:
--------------------------------------------------------------------------------
 1 | # Written by http://stackoverflow.com/users/104109/bitbank
 2 | # Licensed under 3-clause BSD with permission. 
 3 | .syntax unified
 4 | .arch armv5te
 5 | .text
 6 | .align 2
 7 | .global get_unaligned_le_armv5
 8 | .arm
 9 | .type get_unaligned_le_armv5, %function
10 | # When called from C, r0 = first parameter, r1 = second parameter
11 | # r0-r3 and r12 can get trashed by C functions
12 | get_unaligned_le_armv5:
13 | .fnstart
14 |   ldrb   %r2, [%r0],#1  		@ byte 0 is always read (n=1..4)
15 |   cmp    %r1, #2
16 |   ldrbge %r3, [%r0],#1  		@ byte 1, n == 2
17 |   ldrbgt %r12,[%r0],#1  		@ byte 2, n > 2
18 |   orrge  %r2, %r2,  %r3, LSL #8
19 |   orrgt  %r2, %r2,  %r12,LSL #16
20 |   cmp    %r1, #4
21 |   ldrbeq %r3, [%r0],#1   		@ byte 3, n == 4
22 |   movne  %r0, %r2               @ recoup wasted cycle
23 |   orreq  %r0, %r2,  %r3, LSL #24
24 |   bx lr
25 | .fnend
26 | 


--------------------------------------------------------------------------------
/unaligned_test.c:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | 
  3 | struct __attribute__((__packed__)) una_u32 { uint32_t x; };
  4 | 
  5 | static inline uint32_t read_aligned_uint32(const void *p) {
  6 |   const uint32_t *ptr = (const uint32_t *)p;
  7 |   return *ptr;
  8 | }
  9 | 
 10 | static inline uint32_t read_unaligned_ps_uint32(const void *p) {
 11 |   const struct una_u32 *ptr = (const struct una_u32 *)p;
 12 |   return ptr->x;
 13 | }
 14 | 
 15 | static inline uint32_t read_unaligned_bsle_uint32(const void *p) {
 16 |   const uint8_t *b = (const uint8_t *)p;
 17 |   return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
 18 | }
 19 | 
 20 | static inline void write_aligned_uint32(void *p, uint32_t v) {
 21 |   uint32_t *ptr = (uint32_t *)p;
 22 |   *ptr = v;
 23 | }
 24 | 
 25 | static inline void write_unaligned_ps_uint32(void *p, uint32_t v) {
 26 |   struct una_u32 *ptr = (struct una_u32 *)p;
 27 |   ptr->x = v;
 28 | }
 29 | 
 30 | static inline void write_unaligned_bsle_uint32(void *p, uint32_t v) {
 31 |   uint8_t *b = (uint8_t *)p;
 32 |   b[0] = v & 0xff;
 33 |   b[1] = (v >> 8) & 0xff;
 34 |   b[2] = (v >> 16) & 0xff;
 35 |   b[3] = (v >> 24) & 0xff;
 36 | }
 37 | 
 38 | #include <endian.h>
 39 | #include <byteswap.h>
 40 | 
 41 | static const uint32_t wordmask[] = {
 42 |   0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
 43 | };
 44 | 
 45 | uint32_t get_unaligned_le_x86(const void *p, uint32_t n) {
 46 |   uint32_t ret = *(const uint32_t *)p & wordmask[n];
 47 |   return ret;
 48 | }
 49 | 
 50 | uint32_t get_unaligned_le_v1(const void *p, uint32_t n) {
 51 |   const uint8_t *b = (const uint8_t *)p;
 52 |   uint32_t ret;
 53 |   ret = b[0];
 54 |   if (n > 1) {
 55 |     ret |= b[1] << 8;
 56 |     if (n > 2) {
 57 |       ret |= b[2] << 16;
 58 |       if (n > 3) {
 59 |         ret |= b[3] << 24;
 60 |       }
 61 |     }
 62 |   }
 63 |   return ret;
 64 | }
 65 | 
 66 | uint32_t get_unaligned_le_v2(const void *p, uint32_t n) {
 67 |   const uint8_t *b = (const uint8_t *)p;
 68 |   uint32_t ret = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
 69 |   ret &= wordmask[n];
 70 |   return ret;
 71 | }
 72 | 
 73 | uint32_t get_unaligned_le_v4(const void *p, uint32_t n) {
 74 |   const uint8_t *b = (const uint8_t *)p;
 75 |   uint32_t ret;
 76 |   int mask1 = -(n>1); /* 0 if n<=1, 0xFF otherwise */
 77 |   int mask2 = -(n>2);
 78 |   int mask3 = -(n>3);
 79 |   ret = b[0];
 80 |   ret |= (b[1] << 8) & mask1;
 81 |   ret |= (b[2] << 16) & mask2;
 82 |   ret |= (b[3] << 24) & mask3;
 83 |   return ret;
 84 | }
 85 | 
 86 | uint32_t get_unaligned_le_v5(const void *p, uint32_t n) {
 87 |   uint32_t mask = (1U << (8 * n)) - 1;
 88 |   uint32_t ret = *(const uint32_t *)p & mask;
 89 |   return ret;
 90 | }
 91 | 
 92 | extern uint32_t get_unaligned_le_armv5(const void *p, uint32_t n);
 93 | 
 94 | #include <stdlib.h>
 95 | #include <memory.h>
 96 | #include <string.h>
 97 | 
 98 | int main(int argc, char *argv[]) {
 99 |   if (argc < 2)
100 |     return 2;
101 |   const unsigned int data_size = 1000000;
102 |   uint8_t *data = malloc(data_size);
103 |   if (!data)
104 |     return 0;
105 |   uint32_t x, i;
106 |   for (i = 0; i < data_size; i++)
107 |     data[i] = i & 255;
108 |   const uint8_t *p = data, * const end = data + data_size - 10;
109 | 
110 | #define TEST_LOOP(func)        \
111 |   for (i = 0; i < 100; i++) {  \
112 |     x = 0xffffffff;            \
113 |     p = data;                  \
114 |     while (p < end) {          \
115 |       x ^= func(p, 1); p += 1; \
116 |       x ^= func(p, 2); p += 2; \
117 |       x ^= func(p, 3); p += 3; \
118 |       x ^= func(p, 4); p += 4; \
119 |     }                          \
120 |   }
121 | 
122 |   switch (argv[1][0] - '0') {
123 |     case 0:
124 |       TEST_LOOP(get_unaligned_le_v1);
125 |       break;
126 |     case 1:
127 |       TEST_LOOP(get_unaligned_le_v2);
128 |       break;
129 |     case 2:
130 |       TEST_LOOP(get_unaligned_le_v4);
131 |       break;
132 |     case 3:
133 |       TEST_LOOP(get_unaligned_le_armv5);
134 |       break;
135 |     case 4:
136 |       TEST_LOOP(get_unaligned_le_x86);
137 |       break;
138 |     case 5:
139 |       TEST_LOOP(get_unaligned_le_v5);
140 |       break;
141 |     default:
142 |       goto out;
143 |   }
144 | out:
145 |   free(data);
146 |   return 0;
147 | }
148 | 


--------------------------------------------------------------------------------
/userspace_benchmark.txt:
--------------------------------------------------------------------------------
  1 | testdata/alice29.txt                     :
  2 | ZLIB:    [b 1M] bytes 152089 ->  54404 35.8%  comp   9.8 MB/s  uncomp 138.0 MB/s
  3 | LZO204:  [b 1M] bytes 152089 ->  82691 54.4%  comp  64.6 MB/s  uncomp 206.3 MB/s
  4 | LZO205:  [b 1M] bytes 152089 ->  87825 57.7%  comp 175.4 MB/s  uncomp 240.0 MB/s
  5 | CSNAPPY: [b 1M] bytes 152089 ->  90965 59.8%  comp 173.7 MB/s  uncomp 409.6 MB/s
  6 | SNAPPY:  [b 4M] bytes 152089 ->  90965 59.8%  comp 174.9 MB/s  uncomp 401.6 MB/s
  7 | testdata/asyoulik.txt                    :
  8 | ZLIB:    [b 1M] bytes 125179 ->  48897 39.1%  comp   9.0 MB/s  uncomp 131.0 MB/s
  9 | LZO204:  [b 1M] bytes 125179 ->  73217 58.5%  comp  59.6 MB/s  uncomp 202.1 MB/s
 10 | LZO205:  [b 1M] bytes 125179 ->  77041 61.5%  comp 164.4 MB/s  uncomp 237.4 MB/s
 11 | CSNAPPY: [b 1M] bytes 125179 ->  80207 64.1%  comp 163.6 MB/s  uncomp 387.7 MB/s
 12 | SNAPPY:  [b 4M] bytes 125179 ->  80207 64.1%  comp 164.6 MB/s  uncomp 378.9 MB/s
 13 | testdata/cp.html                         :
 14 | ZLIB:    [b 1M] bytes  24603 ->   7961 32.4%  comp  23.0 MB/s  uncomp 142.0 MB/s
 15 | LZO204:  [b 1M] bytes  24603 ->  11621 47.2%  comp  66.8 MB/s  uncomp 300.0 MB/s
 16 | LZO205:  [b 1M] bytes  24603 ->  11909 48.4%  comp 218.1 MB/s  uncomp 336.9 MB/s
 17 | CSNAPPY: [b 1M] bytes  24603 ->  11838 48.1%  comp 228.9 MB/s  uncomp 548.1 MB/s
 18 | SNAPPY:  [b 4M] bytes  24603 ->  11838 48.1%  comp 227.6 MB/s  uncomp 523.3 MB/s
 19 | testdata/fields.c                        :
 20 | ZLIB:    [b 1M] bytes  11150 ->   3122 28.0%  comp  25.2 MB/s  uncomp 147.5 MB/s
 21 | LZO204:  [b 1M] bytes  11150 ->   4663 41.8%  comp  86.2 MB/s  uncomp 304.5 MB/s
 22 | LZO205:  [b 1M] bytes  11150 ->   4711 42.3%  comp 253.3 MB/s  uncomp 346.1 MB/s
 23 | CSNAPPY: [b 1M] bytes  11150 ->   4728 42.4%  comp 251.7 MB/s  uncomp 536.5 MB/s
 24 | SNAPPY:  [b 4M] bytes  11150 ->   4728 42.4%  comp 249.6 MB/s  uncomp 515.2 MB/s
 25 | testdata/geo.protodata                   :
 26 | ZLIB:    [b 1M] bytes 118588 ->  15131 12.8%  comp  43.2 MB/s  uncomp 310.1 MB/s
 27 | LZO204:  [b 1M] bytes 118588 ->  20026 16.9%  comp 150.2 MB/s  uncomp 639.7 MB/s
 28 | LZO205:  [b 1M] bytes 118588 ->  23965 20.2%  comp 487.6 MB/s  uncomp 705.7 MB/s
 29 | CSNAPPY: [b 1M] bytes 118588 ->  27459 23.2%  comp 469.0 MB/s  uncomp 985.8 MB/s
 30 | SNAPPY:  [b 4M] bytes 118588 ->  27459 23.2%  comp 466.1 MB/s  uncomp 954.6 MB/s
 31 | testdata/grammar.lsp                     :
 32 | ZLIB:    [b 1M] bytes   3721 ->   1222 32.8%  comp  24.0 MB/s  uncomp 109.3 MB/s
 33 | LZO204:  [b 1M] bytes   3721 ->   1781 47.9%  comp  79.2 MB/s  uncomp 360.8 MB/s
 34 | LZO205:  [b 1M] bytes   3721 ->   1811 48.7%  comp 232.3 MB/s  uncomp 442.2 MB/s
 35 | CSNAPPY: [b 1M] bytes   3721 ->   1800 48.4%  comp 257.6 MB/s  uncomp 612.8 MB/s
 36 | SNAPPY:  [b 4M] bytes   3721 ->   1800 48.4%  comp 250.1 MB/s  uncomp 570.9 MB/s
 37 | testdata/house.jpg                       :
 38 | ZLIB:    [b 1M] bytes 126958 -> 126513 99.6%  comp  19.0 MB/s uncomp 231.8 MB/s
 39 | LZO204: [b 1M] bytes 126958 -> 127173 100.2% comp   23.5 MB/s uncomp 1635.4 MB/s
 40 | LZO205: [b 1M] bytes 126958 -> 127303 100.3% comp 1051.1 MB/s uncomp 3762.4 MB/s
 41 | CSNAPPY: [b 1M] bytes 126958 -> 126803 99.9% comp 2365.1 MB/s uncomp 8190.2 MB/s
 42 | SNAPPY:  [b 4M] bytes 126958 -> 126803 99.9% comp 2326.8 MB/s uncomp 8402.5 MB/s
 43 | testdata/html                            :
 44 | ZLIB:    [b 1M] bytes 102400 ->  13699 13.4%  comp  35.6 MB/s  uncomp 273.4 MB/s
 45 | LZO204:  [b 1M] bytes 102400 ->  21027 20.5%  comp 135.7 MB/s  uncomp 494.3 MB/s
 46 | LZO205:  [b 1M] bytes 102400 ->  22547 22.0%  comp 421.6 MB/s  uncomp 557.5 MB/s
 47 | CSNAPPY: [b 1M] bytes 102400 ->  24140 23.6%  comp 425.8 MB/s  uncomp 873.0 MB/s
 48 | SNAPPY:  [b 4M] bytes 102400 ->  24140 23.6%  comp 422.9 MB/s  uncomp 845.4 MB/s
 49 | testdata/html_x_4                        :
 50 | ZLIB:    [b 1M] bytes 409600 ->  53367 13.0%  comp  32.1 MB/s  uncomp 277.7 MB/s
 51 | LZO204:  [b 1M] bytes 409600 ->  82980 20.3%  comp 143.3 MB/s  uncomp 487.0 MB/s
 52 | LZO205:  [b 1M] bytes 409600 ->  89475 21.8%  comp 428.2 MB/s  uncomp 556.1 MB/s
 53 | CSNAPPY: [b 1M] bytes 409600 ->  96472 23.6%  comp 423.4 MB/s  uncomp 870.8 MB/s
 54 | SNAPPY:  [b 4M] bytes 409600 ->  96472 23.6%  comp 418.3 MB/s  uncomp 830.5 MB/s
 55 | testdata/kennedy.xls                     :
 56 | ZLIB:    [b 1M] bytes 1029744 -> 203992 19.8%  comp  15.8 MB/s  uncomp 230.0 MB/s
 57 | LZO204:  [b 1M] bytes 1029744 -> 357315 34.7%  comp 159.1 MB/s  uncomp 624.6 MB/s
 58 | LZO205:  [b 1M] bytes 1029744 -> 362984 35.2%  comp 413.2 MB/s  uncomp 736.1 MB/s
 59 | CSNAPPY: [b 1M] bytes 1029744 -> 425735 41.3%  comp 354.9 MB/s  uncomp 564.4 MB/s
 60 | SNAPPY:  [b 4M] bytes 1029744 -> 425735 41.3%  comp 350.0 MB/s  uncomp 513.0 MB/s
 61 | testdata/kppkn.gtb                       :
 62 | ZLIB:    [b 1M] bytes 184320 ->  38751 21.0%  comp   7.2 MB/s  uncomp 180.9 MB/s
 63 | LZO204:  [b 1M] bytes 184320 ->  71671 38.9%  comp  98.6 MB/s  uncomp 274.8 MB/s
 64 | LZO205:  [b 1M] bytes 184320 ->  71445 38.8%  comp 295.0 MB/s  uncomp 321.9 MB/s
 65 | CSNAPPY: [b 1M] bytes 184320 ->  70535 38.3%  comp 271.8 MB/s  uncomp 483.8 MB/s
 66 | SNAPPY:  [b 4M] bytes 184320 ->  70535 38.3%  comp 273.9 MB/s  uncomp 464.5 MB/s
 67 | testdata/lcet10.txt                      :
 68 | ZLIB:    [b 1M] bytes 426754 -> 144904 34.0%  comp  10.0 MB/s  uncomp 142.8 MB/s
 69 | LZO204:  [b 1M] bytes 426754 -> 221290 51.9%  comp  67.3 MB/s  uncomp 212.3 MB/s
 70 | LZO205:  [b 1M] bytes 426754 -> 236699 55.5%  comp 182.2 MB/s  uncomp 248.3 MB/s
 71 | CSNAPPY: [b 1M] bytes 426754 -> 243710 57.1%  comp 181.7 MB/s  uncomp 437.4 MB/s
 72 | SNAPPY:  [b 4M] bytes 426754 -> 243710 57.1%  comp 183.0 MB/s  uncomp 428.3 MB/s
 73 | testdata/mapreduce-osdi-1.pdf            :
 74 | ZLIB:    [b 1M] bytes  94330 ->  74928 79.4%  comp  22.4 MB/s  uncomp 177.9 MB/s
 75 | LZO204:  [b 1M] bytes  94330 ->  76999 81.6%  comp  29.0 MB/s  uncomp 938.7 MB/s
 76 | LZO205:  [b 1M] bytes  94330 -> 94704 100.4% comp 1057.4 MB/s uncomp 3974.6 MB/s
 77 | CSNAPPY: [b 1M] bytes  94330 ->  77477 82.1%  comp 833.6 MB/s uncomp 2115.4 MB/s
 78 | SNAPPY:  [b 4M] bytes  94330 ->  77477 82.1%  comp 832.2 MB/s uncomp 1997.5 MB/s
 79 | testdata/plrabn12.txt                    :
 80 | ZLIB:    [b 1M] bytes 481861 -> 195261 40.5%  comp   7.5 MB/s  uncomp 130.1 MB/s
 81 | LZO204:  [b 1M] bytes 481861 -> 294610 61.1%  comp  59.1 MB/s  uncomp 192.3 MB/s
 82 | LZO205:  [b 1M] bytes 481861 -> 314012 65.2%  comp 155.7 MB/s  uncomp 229.7 MB/s
 83 | CSNAPPY: [b 1M] bytes 481861 -> 329339 68.3%  comp 153.4 MB/s  uncomp 363.5 MB/s
 84 | SNAPPY:  [b 4M] bytes 481861 -> 329339 68.3%  comp 154.5 MB/s  uncomp 354.9 MB/s
 85 | testdata/ptt5                            :
 86 | ZLIB:    [b 1M] bytes 513216 ->  56465 11.0%  comp  25.8 MB/s  uncomp 269.0 MB/s
 87 | LZO204:  [b 1M] bytes 513216 ->  86232 16.8%  comp 139.7 MB/s  uncomp 590.6 MB/s
 88 | LZO205:  [b 1M] bytes 513216 ->  87278 17.0%  comp 551.6 MB/s  uncomp 667.6 MB/s
 89 | CSNAPPY: [b 1M] bytes 513216 ->  93455 18.2%  comp 555.0 MB/s  uncomp 845.6 MB/s
 90 | SNAPPY:  [b 4M] bytes 513216 ->  93455 18.2%  comp 553.1 MB/s  uncomp 795.0 MB/s
 91 | testdata/sum                             :
 92 | ZLIB:    [b 1M] bytes  38240 ->  12990 34.0%  comp  13.9 MB/s  uncomp 144.6 MB/s
 93 | LZO204:  [b 1M] bytes  38240 ->  17686 46.2%  comp  67.1 MB/s  uncomp 311.0 MB/s
 94 | LZO205:  [b 1M] bytes  38240 ->  18086 47.3%  comp 230.6 MB/s  uncomp 373.5 MB/s
 95 | CSNAPPY: [b 1M] bytes  38240 ->  19837 51.9%  comp 228.7 MB/s  uncomp 513.1 MB/s
 96 | SNAPPY:  [b 4M] bytes  38240 ->  19837 51.9%  comp 226.7 MB/s  uncomp 479.2 MB/s
 97 | testdata/urls.10K                        :
 98 | ZLIB:    [b 1M] bytes 702087 -> 222613 31.7%  comp  18.2 MB/s  uncomp 160.0 MB/s
 99 | LZO204:  [b 1M] bytes 702087 -> 309320 44.1%  comp  64.5 MB/s  uncomp 309.2 MB/s
100 | LZO205:  [b 1M] bytes 702087 -> 345814 49.3%  comp 226.3 MB/s  uncomp 376.5 MB/s
101 | CSNAPPY: [b 1M] bytes 702087 -> 357267 50.9%  comp 240.1 MB/s  uncomp 645.5 MB/s
102 | SNAPPY:  [b 4M] bytes 702087 -> 357267 50.9%  comp 239.3 MB/s  uncomp 598.7 MB/s
103 | testdata/xargs.1                         :
104 | ZLIB:    [b 1M] bytes   4227 ->   1736 41.1%  comp  23.2 MB/s  uncomp 104.0 MB/s
105 | LZO204:  [b 1M] bytes   4227 ->   2450 58.0%  comp  65.2 MB/s  uncomp 333.1 MB/s
106 | LZO205:  [b 1M] bytes   4227 ->   2468 58.4%  comp 192.3 MB/s  uncomp 392.1 MB/s
107 | CSNAPPY: [b 1M] bytes   4227 ->   2509 59.4%  comp 215.9 MB/s  uncomp 499.1 MB/s
108 | SNAPPY:  [b 4M] bytes   4227 ->   2509 59.4%  comp 208.7 MB/s  uncomp 477.0 MB/s
109 | 


--------------------------------------------------------------------------------
/zram_benchmark.txt:
--------------------------------------------------------------------------------
  1 | Benchmark (zramtest2.sh) creates a zram block device, an ext4 filesystem on it,
  2 |  fixes the filesystem not to think it's on a striped RAID device with a single
  3 |  stripe, and then untars a gzip'd tarball to that filesystem.
  4 | Then it syncs the filesystem, drops caches and verifies md5 checksums of
  5 |  extracted files to cause them to be read back from zram.
  6 | Results are from running on a 2.33Ghz Core 2 Duo with Linux 2.6.39-rc3.
  7 | 
  8 | Results for LZO zram:
  9 | real	0m5.333s
 10 | user	0m4.244s
 11 | sys	0m1.471s
 12 | orig_data_size	645918720
 13 | compr_data_size	320624925
 14 | mem_used_total	326627328
 15 | 
 16 | Result for Snappy zram:
 17 | real	0m5.012s
 18 | user	0m4.213s
 19 | sys	0m1.477s
 20 | orig_data_size	645914624
 21 | compr_data_size	326040602
 22 | mem_used_total	332374016
 23 | 
 24 | orig_data_size is not constant from run to run. I don't know why.
 25 | 
 26 | perf for the LZO zram:
 27 | [ perf record: Woken up 13 times to write data ]
 28 | [ perf record: Captured and wrote 3.195 MB perf.data (~139599 samples) ]
 29 | # Events: 15K cycles
 30 | #
 31 | # Overhead         Command      Shared Object                                  Symbol
 32 | # ........  ..............  .................  ......................................
 33 | #
 34 |     25.96%        kthreadd  [lzo_compress]     [k] _lzo1x_1_do_compress
 35 |     12.38%            gzip  gzip               [.] zip
 36 |     11.46%            gzip  gzip               [.] treat_file.part.4.2264
 37 |      7.60%          md5sum  [lzo_decompress]   [k] lzo1x_decompress_safe
 38 |      4.80%          md5sum  md5sum             [.] digest_file.isra.2.2089
 39 |      3.95%          md5sum  md5sum             [.] 0x3324          
 40 |      1.96%            gzip  libc-2.13.so       [.] __memcpy_ssse3
 41 |      1.11%             tar  [kernel.kallsyms]  [k] copy_user_generic_string
 42 |      0.86%            gzip  [kernel.kallsyms]  [k] copy_user_generic_string
 43 |      0.70%        kthreadd  [kernel.kallsyms]  [k] __memcpy
 44 |      0.60%          md5sum  [zram]             [k] zram_make_request
 45 |      0.54%        kthreadd  [lzo_compress]     [k] lzo1x_1_compress
 46 |      0.51%          md5sum  md5sum             [.] __libc_csu_init
 47 |      0.50%          md5sum  [kernel.kallsyms]  [k] copy_user_generic_string
 48 |      0.48%             tar  [kernel.kallsyms]  [k] ext4_mark_iloc_dirty
 49 |      0.45%            gzip  gzip               [.] treat_stdin.2262
 50 |      0.38%             tar  [kernel.kallsyms]  [k] __memset
 51 |      0.38%            gzip  gzip               [.] treat_file.2267
 52 |      0.36%              dd  [kernel.kallsyms]  [k] system_call
 53 |      0.31%            bash  [kernel.kallsyms]  [k] _raw_spin_trylock
 54 |      0.29%            bash  [kernel.kallsyms]  [k] _raw_spin_lock
 55 |      0.24%        kthreadd  [kernel.kallsyms]  [k] mb_find_order_for_block
 56 |      0.22%             tar  [kernel.kallsyms]  [k] _raw_spin_lock
 57 |      0.21%             tar  [kernel.kallsyms]  [k] system_call
 58 |      0.21%            gzip  gzip               [.] compress_block.2644.2190
 59 |      0.21%         swapper  [kernel.kallsyms]  [k] mwait_idle
 60 | 
 61 | perf for Snappy zram:
 62 | [ perf record: Woken up 13 times to write data ]
 63 | [ perf record: Captured and wrote 3.088 MB perf.data (~134926 samples) ]
 64 | # Events: 13K cycles
 65 | #
 66 | # Overhead         Command            Shared Object                                  Symbol
 67 | # ........  ..............  .......................  ......................................
 68 | #
 69 |     15.27%            gzip  gzip                     [.] zip
 70 |     14.60%            gzip  gzip                     [.] treat_file.part.4.2264
 71 |     11.37%     flush-253:0  [csnappy_compress]       [k] csnappy_compress_fragment
 72 |      6.15%          md5sum  md5sum                   [.] digest_file.isra.2.2089
 73 |      5.28%          md5sum  [csnappy_decompress]     [k] csnappy_decompress_noheader
 74 |      5.20%          md5sum  md5sum                   [.] 0x336d          
 75 |      2.21%            gzip  libc-2.13.so             [.] __memcpy_ssse3
 76 |      2.07%     flush-253:0  [kernel.kallsyms]        [k] __memcpy
 77 |      1.53%             tar  [kernel.kallsyms]        [k] copy_user_generic_string
 78 |      1.18%            gzip  [kernel.kallsyms]        [k] copy_user_generic_string
 79 |      0.68%     flush-253:0  [kernel.kallsyms]        [k] __memset
 80 |      0.66%          md5sum  md5sum                   [.] __libc_csu_init
 81 |      0.62%          md5sum  [kernel.kallsyms]        [k] copy_user_generic_string
 82 |      0.61%              dd  [kernel.kallsyms]        [k] system_call
 83 |      0.56%          md5sum  [zram]                   [k] zram_make_request
 84 |      0.52%            gzip  gzip                     [.] treat_stdin.2262
 85 |      0.49%             tar  [kernel.kallsyms]        [k] __memset
 86 |      0.48%            gzip  gzip                     [.] treat_file.2267
 87 |      0.45%             tar  [kernel.kallsyms]        [k] ext4_mark_iloc_dirty
 88 |      0.38%          md5sum  [kernel.kallsyms]        [k] __memcpy
 89 |      0.34%         swapper  [kernel.kallsyms]        [k] mwait_idle
 90 |      0.33%             tar  [kernel.kallsyms]        [k] _raw_spin_lock
 91 |      0.30%             tar  [kernel.kallsyms]        [k] system_call
 92 |      0.28%          umount  [kernel.kallsyms]        [k] _raw_spin_lock
 93 |      0.28%              dd  [kernel.kallsyms]        [k] copy_user_generic_string
 94 |      0.26%             tar  [kernel.kallsyms]        [k] __ext4_get_inode_loc
 95 |      0.24%            gzip  gzip                     [.] compress_block.2644.2190
 96 |      0.24%          md5sum  [kernel.kallsyms]        [k] system_call
 97 |      0.23%     flush-253:0  [kernel.kallsyms]        [k] _raw_spin_lock
 98 |      0.22%            bash  [kernel.kallsyms]        [k] _raw_spin_lock
 99 |      0.22%             tar  [kernel.kallsyms]        [k] __find_get_block
100 |      0.22%     flush-253:0  [kernel.kallsyms]        [k] ext4_bio_write_page
101 |      0.21%     flush-253:0  [kernel.kallsyms]        [k] mb_find_order_for_block
102 | 


--------------------------------------------------------------------------------
/zramtest2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | TESTFILE="/usr/portage/distfiles/qt-everywhere-opensource-src-4.7.2.tar.gz"
 4 | 
 5 | if [ ! -f MD5SUMS.gz ]; then
 6 | rm -rf temp
 7 | mkdir temp
 8 | cd temp
 9 | time tar xzf ${TESTFILE}
10 | find ./ -type f | sort | xargs -L 1 md5sum | gzip -9 -c > ../MD5SUMS.gz
11 | cd ..
12 | rm -rf temp
13 | fi
14 | 
15 | grep -q " $PWD/zram0mnt " /proc/mounts && umount zram0mnt
16 | mkdir -p zram0mnt
17 | echo 1 >/sys/block/zram0/reset || exit
18 | sleep 2
19 | echo $((1024*1024*1024)) > /sys/block/zram0/disksize
20 | mke2fs -t ext4 -m 0 -I 128 -O ^has_journal,^ext_attr /dev/zram0 >/dev/null || exit
21 | tune2fs -l /dev/zram0 | grep 'RAID'
22 | debugfs -w -f debugfs_input.txt /dev/zram0
23 | tune2fs -l /dev/zram0 | grep 'RAID'
24 | mount -o noatime,barrier=0,data=writeback,nobh,discard /dev/zram0 zram0mnt
25 | dd if=${TESTFILE} of=/dev/null >/dev/null 2>&1
26 | cd zram0mnt
27 | time tar xzf ${TESTFILE}
28 | sync
29 | sleep 5
30 | echo 3 > /proc/sys/vm/drop_caches
31 | sleep 5
32 | echo -ne "orig_data_size\t" | cat - /sys/block/zram0/orig_data_size
33 | echo -ne "compr_data_size\t" | cat - /sys/block/zram0/compr_data_size
34 | echo -ne "mem_used_total\t" | cat - /sys/block/zram0/mem_used_total
35 | gunzip -c <../MD5SUMS.gz | md5sum -c - | egrep -v ': OK$'
36 | cd ..
37 | umount zram0mnt
38 | echo 1 >/sys/block/zram0/reset
39 | rmdir zram0mnt
40 | 


--------------------------------------------------------------------------------