├── .cirrus.yml
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CMakeLists.txt
├── COPYING
├── NEWS.md
├── README.md
├── common_defs.h
├── lib
    ├── adler32.c
    ├── arm
    │   ├── adler32_impl.h
    │   ├── cpu_features.c
    │   ├── cpu_features.h
    │   ├── crc32_impl.h
    │   ├── crc32_pmull_helpers.h
    │   ├── crc32_pmull_wide.h
    │   └── matchfinder_impl.h
    ├── bt_matchfinder.h
    ├── cpu_features_common.h
    ├── crc32.c
    ├── crc32_multipliers.h
    ├── crc32_tables.h
    ├── decompress_template.h
    ├── deflate_compress.c
    ├── deflate_compress.h
    ├── deflate_constants.h
    ├── deflate_decompress.c
    ├── gzip_compress.c
    ├── gzip_constants.h
    ├── gzip_decompress.c
    ├── hc_matchfinder.h
    ├── ht_matchfinder.h
    ├── lib_common.h
    ├── matchfinder_common.h
    ├── riscv
    │   └── matchfinder_impl.h
    ├── utils.c
    ├── x86
    │   ├── adler32_impl.h
    │   ├── adler32_template.h
    │   ├── cpu_features.c
    │   ├── cpu_features.h
    │   ├── crc32_impl.h
    │   ├── crc32_pclmul_template.h
    │   ├── decompress_impl.h
    │   └── matchfinder_impl.h
    ├── zlib_compress.c
    ├── zlib_constants.h
    └── zlib_decompress.c
├── libdeflate-config.cmake.in
├── libdeflate.h
├── libdeflate.pc.in
├── programs
    ├── CMakeLists.txt
    ├── benchmark.c
    ├── checksum.c
    ├── config.h.in
    ├── gzip.c
    ├── prog_util.c
    ├── prog_util.h
    ├── test_checksums.c
    ├── test_custom_malloc.c
    ├── test_incomplete_codes.c
    ├── test_invalid_streams.c
    ├── test_litrunlen_overflow.c
    ├── test_overread.c
    ├── test_slow_decompression.c
    ├── test_trailing_bytes.c
    ├── test_util.c
    ├── test_util.h
    └── tgetopt.c
└── scripts
    ├── android_build.sh
    ├── android_tests.sh
    ├── benchmark.sh
    ├── checksum.sh
    ├── checksum_benchmarks.sh
    ├── cmake-helper.sh
    ├── deflate_benchmarks.sh
    ├── exec_tests.sh
    ├── gen-crc32-consts.py
    ├── gen-release-archives.sh
    ├── gen_bitreverse_tab.py
    ├── gen_default_litlen_costs.py
    ├── gen_offset_slot_map.py
    ├── gzip_tests.sh
    ├── libFuzzer
        ├── .gitignore
        ├── deflate_compress
        │   ├── corpus
        │   │   └── 0
        │   └── fuzz.c
        ├── deflate_decompress
        │   ├── corpus
        │   │   └── 0
        │   └── fuzz.c
        ├── fuzz.sh
        ├── gzip_decompress
        │   ├── corpus
        │   │   └── 0
        │   └── fuzz.c
        └── zlib_decompress
        │   ├── corpus
        │       └── 0
        │   └── fuzz.c
    ├── run_tests.sh
    ├── toolchain-i686-w64-mingw32.cmake
    └── toolchain-x86_64-w64-mingw32.cmake


/.cirrus.yml:
--------------------------------------------------------------------------------
 1 | task:
 2 |   freebsd_instance:
 3 |     matrix:
 4 |       - image_family: freebsd-14-2
 5 |   install_script: pkg install -y cmake
 6 |   script:
 7 |     - cmake -B build -DLIBDEFLATE_BUILD_TESTS=1
 8 |     - cmake --build build
 9 |     - ctest --test-dir build
10 |     # Direct compilation without official build system
11 |     - cc -O2 -Wall -Werror lib/*.c lib/*/*.c programs/gzip.c programs/prog_util.c programs/tgetopt.c -o libdeflate-gzip
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build*
2 | /libdeflate-*-windows-*
3 | /libdeflate-*.tar.gz
4 | cscope*
5 | tags
6 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright 2016 Eric Biggers
 2 | Copyright 2024 Google LLC
 3 | 
 4 | Permission is hereby granted, free of charge, to any person
 5 | obtaining a copy of this software and associated documentation files
 6 | (the "Software"), to deal in the Software without restriction,
 7 | including without limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of the Software,
 9 | and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/lib/adler32.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * adler32.c - Adler-32 checksum algorithm
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "lib_common.h"
 29 | 
 30 | /* The Adler-32 divisor, or "base", value */
 31 | #define DIVISOR 65521
 32 | 
 33 | /*
 34 |  * MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
 35 |  * of s2 overflowing when it is represented as an unsigned 32-bit integer.  This
 36 |  * value was computed using the following Python script:
 37 |  *
 38 |  *	divisor = 65521
 39 |  *	count = 0
 40 |  *	s1 = divisor - 1
 41 |  *	s2 = divisor - 1
 42 |  *	while True:
 43 |  *		s1 += 0xFF
 44 |  *		s2 += s1
 45 |  *		if s2 > 0xFFFFFFFF:
 46 |  *			break
 47 |  *		count += 1
 48 |  *	print(count)
 49 |  *
 50 |  * Note that to get the correct worst-case value, we must assume that every byte
 51 |  * has value 0xFF and that s1 and s2 started with the highest possible values
 52 |  * modulo the divisor.
 53 |  */
 54 | #define MAX_CHUNK_LEN	5552
 55 | 
 56 | /*
 57 |  * Update the Adler-32 values s1 and s2 using n bytes from p, update p to p + n,
 58 |  * update n to 0, and reduce s1 and s2 mod DIVISOR.  It is assumed that neither
 59 |  * s1 nor s2 can overflow before the reduction at the end, i.e. n plus any bytes
 60 |  * already processed after the last reduction must not exceed MAX_CHUNK_LEN.
 61 |  *
 62 |  * This uses only portable C code.  This is used as a fallback when a vectorized
 63 |  * implementation of Adler-32 (e.g. AVX2) is unavailable on the platform.
 64 |  *
 65 |  * Some of the vectorized implementations also use this to handle the end of the
 66 |  * data when the data isn't evenly divisible by the length the vectorized code
 67 |  * works on.  To avoid compiler errors about target-specific option mismatches
 68 |  * when this is used in that way, this is a macro rather than a function.
 69 |  *
 70 |  * Although this is unvectorized, this does include an optimization where the
 71 |  * main loop processes four bytes at a time using a strategy similar to that
 72 |  * used by vectorized implementations.  This provides increased instruction-
 73 |  * level parallelism compared to the traditional 's1 += *p++; s2 += s1;'.
 74 |  */
 75 | #define ADLER32_CHUNK(s1, s2, p, n)					\
 76 | do {									\
 77 | 	if (n >= 4) {							\
 78 | 		u32 s1_sum = 0;						\
 79 | 		u32 byte_0_sum = 0;					\
 80 | 		u32 byte_1_sum = 0;					\
 81 | 		u32 byte_2_sum = 0;					\
 82 | 		u32 byte_3_sum = 0;					\
 83 | 									\
 84 | 		do {							\
 85 | 			s1_sum += s1;					\
 86 | 			s1 += p[0] + p[1] + p[2] + p[3];		\
 87 | 			byte_0_sum += p[0];				\
 88 | 			byte_1_sum += p[1];				\
 89 | 			byte_2_sum += p[2];				\
 90 | 			byte_3_sum += p[3];				\
 91 | 			p += 4;						\
 92 | 			n -= 4;						\
 93 | 		} while (n >= 4);					\
 94 | 		s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) +	\
 95 | 		      (2 * byte_2_sum) + byte_3_sum;			\
 96 | 	}								\
 97 | 	for (; n; n--, p++) {						\
 98 | 		s1 += *p;						\
 99 | 		s2 += s1;						\
100 | 	}								\
101 | 	s1 %= DIVISOR;							\
102 | 	s2 %= DIVISOR;							\
103 | } while (0)
104 | 
105 | static u32 MAYBE_UNUSED
106 | adler32_generic(u32 adler, const u8 *p, size_t len)
107 | {
108 | 	u32 s1 = adler & 0xFFFF;
109 | 	u32 s2 = adler >> 16;
110 | 
111 | 	while (len) {
112 | 		size_t n = MIN(len, MAX_CHUNK_LEN & ~3);
113 | 
114 | 		len -= n;
115 | 		ADLER32_CHUNK(s1, s2, p, n);
116 | 	}
117 | 
118 | 	return (s2 << 16) | s1;
119 | }
120 | 
121 | /* Include architecture-specific implementation(s) if available. */
122 | #undef DEFAULT_IMPL
123 | #undef arch_select_adler32_func
124 | typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len);
125 | #if defined(ARCH_ARM32) || defined(ARCH_ARM64)
126 | #  include "arm/adler32_impl.h"
127 | #elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
128 | #  include "x86/adler32_impl.h"
129 | #endif
130 | 
131 | #ifndef DEFAULT_IMPL
132 | #  define DEFAULT_IMPL adler32_generic
133 | #endif
134 | 
135 | #ifdef arch_select_adler32_func
136 | static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);
137 | 
138 | static volatile adler32_func_t adler32_impl = dispatch_adler32;
139 | 
140 | /* Choose the best implementation at runtime. */
141 | static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
142 | {
143 | 	adler32_func_t f = arch_select_adler32_func();
144 | 
145 | 	if (f == NULL)
146 | 		f = DEFAULT_IMPL;
147 | 
148 | 	adler32_impl = f;
149 | 	return f(adler, p, len);
150 | }
151 | #else
152 | /* The best implementation is statically known, so call it directly. */
153 | #define adler32_impl DEFAULT_IMPL
154 | #endif
155 | 
156 | LIBDEFLATEAPI u32
157 | libdeflate_adler32(u32 adler, const void *buffer, size_t len)
158 | {
159 | 	if (buffer == NULL) /* Return initial value. */
160 | 		return 1;
161 | 	return adler32_impl(adler, buffer, len);
162 | }
163 | 


--------------------------------------------------------------------------------
/lib/arm/cpu_features.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * arm/cpu_features.c - feature detection for ARM CPUs
  3 |  *
  4 |  * Copyright 2018 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | /*
 29 |  * ARM CPUs don't have a standard way for unprivileged programs to detect CPU
 30 |  * features.  But an OS-specific way can be used when available.
 31 |  */
 32 | 
 33 | #ifdef __APPLE__
 34 | #  undef _ANSI_SOURCE
 35 | #  undef _DARWIN_C_SOURCE
 36 | #  define _DARWIN_C_SOURCE /* for sysctlbyname() */
 37 | #endif
 38 | 
 39 | #include "../cpu_features_common.h" /* must be included first */
 40 | #include "cpu_features.h"
 41 | 
 42 | #ifdef ARM_CPU_FEATURES_KNOWN
 43 | /* Runtime ARM CPU feature detection is supported. */
 44 | 
 45 | #ifdef __linux__
 46 | /*
 47 |  * On Linux, arm32 and arm64 CPU features can be detected by reading the
 48 |  * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
 49 |  *
 50 |  * Ideally we'd use the C library function getauxval(), but it's not guaranteed
 51 |  * to be available: it was only added to glibc in 2.16, and in Android it was
 52 |  * added to API level 18 for arm32 and level 21 for arm64.
 53 |  */
 54 | 
 55 | #include <errno.h>
 56 | #include <fcntl.h>
 57 | #include <string.h>
 58 | #include <unistd.h>
 59 | 
 60 | #define AT_HWCAP	16
 61 | #define AT_HWCAP2	26
 62 | 
 63 | static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
 64 | {
 65 | 	int fd;
 66 | 	unsigned long auxbuf[32];
 67 | 	int filled = 0;
 68 | 	int i;
 69 | 
 70 | 	fd = open("/proc/self/auxv", O_RDONLY);
 71 | 	if (fd < 0)
 72 | 		return;
 73 | 
 74 | 	for (;;) {
 75 | 		do {
 76 | 			int ret = read(fd, &((char *)auxbuf)[filled],
 77 | 				       sizeof(auxbuf) - filled);
 78 | 			if (ret <= 0) {
 79 | 				if (ret < 0 && errno == EINTR)
 80 | 					continue;
 81 | 				goto out;
 82 | 			}
 83 | 			filled += ret;
 84 | 		} while (filled < 2 * sizeof(long));
 85 | 
 86 | 		i = 0;
 87 | 		do {
 88 | 			unsigned long type = auxbuf[i];
 89 | 			unsigned long value = auxbuf[i + 1];
 90 | 
 91 | 			if (type == AT_HWCAP)
 92 | 				*hwcap = value;
 93 | 			else if (type == AT_HWCAP2)
 94 | 				*hwcap2 = value;
 95 | 			i += 2;
 96 | 			filled -= 2 * sizeof(long);
 97 | 		} while (filled >= 2 * sizeof(long));
 98 | 
 99 | 		memmove(auxbuf, &auxbuf[i], filled);
100 | 	}
101 | out:
102 | 	close(fd);
103 | }
104 | 
105 | static u32 query_arm_cpu_features(void)
106 | {
107 | 	u32 features = 0;
108 | 	unsigned long hwcap = 0;
109 | 	unsigned long hwcap2 = 0;
110 | 
111 | 	scan_auxv(&hwcap, &hwcap2);
112 | 
113 | #ifdef ARCH_ARM32
114 | 	STATIC_ASSERT(sizeof(long) == 4);
115 | 	if (hwcap & (1 << 12))	/* HWCAP_NEON */
116 | 		features |= ARM_CPU_FEATURE_NEON;
117 | #else
118 | 	STATIC_ASSERT(sizeof(long) == 8);
119 | 	if (hwcap & (1 << 1))	/* HWCAP_ASIMD */
120 | 		features |= ARM_CPU_FEATURE_NEON;
121 | 	if (hwcap & (1 << 4))	/* HWCAP_PMULL */
122 | 		features |= ARM_CPU_FEATURE_PMULL;
123 | 	if (hwcap & (1 << 7))	/* HWCAP_CRC32 */
124 | 		features |= ARM_CPU_FEATURE_CRC32;
125 | 	if (hwcap & (1 << 17))	/* HWCAP_SHA3 */
126 | 		features |= ARM_CPU_FEATURE_SHA3;
127 | 	if (hwcap & (1 << 20))	/* HWCAP_ASIMDDP */
128 | 		features |= ARM_CPU_FEATURE_DOTPROD;
129 | #endif
130 | 	return features;
131 | }
132 | 
133 | #elif defined(__APPLE__)
134 | /* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
135 | 
136 | #include <sys/types.h>
137 | #include <sys/sysctl.h>
138 | #include <TargetConditionals.h>
139 | 
140 | static const struct {
141 | 	const char *name;
142 | 	u32 feature;
143 | } feature_sysctls[] = {
144 | 	{ "hw.optional.neon",		  ARM_CPU_FEATURE_NEON },
145 | 	{ "hw.optional.AdvSIMD",	  ARM_CPU_FEATURE_NEON },
146 | 	{ "hw.optional.arm.FEAT_PMULL",	  ARM_CPU_FEATURE_PMULL },
147 | 	{ "hw.optional.armv8_crc32",	  ARM_CPU_FEATURE_CRC32 },
148 | 	{ "hw.optional.armv8_2_sha3",	  ARM_CPU_FEATURE_SHA3 },
149 | 	{ "hw.optional.arm.FEAT_SHA3",	  ARM_CPU_FEATURE_SHA3 },
150 | 	{ "hw.optional.arm.FEAT_DotProd", ARM_CPU_FEATURE_DOTPROD },
151 | };
152 | 
153 | static u32 query_arm_cpu_features(void)
154 | {
155 | 	u32 features = 0;
156 | 	size_t i;
157 | 
158 | 	for (i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
159 | 		const char *name = feature_sysctls[i].name;
160 | 		u32 val = 0;
161 | 		size_t valsize = sizeof(val);
162 | 
163 | 		if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
164 | 		    valsize == sizeof(val) && val == 1)
165 | 			features |= feature_sysctls[i].feature;
166 | 	}
167 | 	return features;
168 | }
169 | #elif defined(_WIN32)
170 | 
171 | #include <windows.h>
172 | 
173 | #ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE /* added in Windows SDK 20348 */
174 | #  define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
175 | #endif
176 | 
177 | static u32 query_arm_cpu_features(void)
178 | {
179 | 	u32 features = ARM_CPU_FEATURE_NEON;
180 | 
181 | 	if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
182 | 		features |= ARM_CPU_FEATURE_PMULL;
183 | 	if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
184 | 		features |= ARM_CPU_FEATURE_CRC32;
185 | 	if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
186 | 		features |= ARM_CPU_FEATURE_DOTPROD;
187 | 
188 | 	/* FIXME: detect SHA3 support too. */
189 | 
190 | 	return features;
191 | }
192 | #else
193 | #error "unhandled case"
194 | #endif
195 | 
196 | static const struct cpu_feature arm_cpu_feature_table[] = {
197 | 	{ARM_CPU_FEATURE_NEON,		"neon"},
198 | 	{ARM_CPU_FEATURE_PMULL,		"pmull"},
199 | 	{ARM_CPU_FEATURE_PREFER_PMULL,  "prefer_pmull"},
200 | 	{ARM_CPU_FEATURE_CRC32,		"crc32"},
201 | 	{ARM_CPU_FEATURE_SHA3,		"sha3"},
202 | 	{ARM_CPU_FEATURE_DOTPROD,	"dotprod"},
203 | };
204 | 
205 | volatile u32 libdeflate_arm_cpu_features = 0;
206 | 
207 | void libdeflate_init_arm_cpu_features(void)
208 | {
209 | 	u32 features = query_arm_cpu_features();
210 | 
211 | 	/*
212 | 	 * On the Apple M1 processor, crc32 instructions max out at about 25.5
213 | 	 * GB/s in the best case of using a 3-way or greater interleaved chunked
214 | 	 * implementation, whereas a pmull-based implementation achieves 68 GB/s
215 | 	 * provided that the stride length is large enough (about 10+ vectors
216 | 	 * with eor3, or 12+ without).
217 | 	 *
218 | 	 * Assume that crc32 instructions are preferable in other cases.
219 | 	 */
220 | #if (defined(__APPLE__) && TARGET_OS_OSX) || defined(TEST_SUPPORT__DO_NOT_USE)
221 | 	features |= ARM_CPU_FEATURE_PREFER_PMULL;
222 | #endif
223 | 
224 | 	disable_cpu_features_for_testing(&features, arm_cpu_feature_table,
225 | 					 ARRAY_LEN(arm_cpu_feature_table));
226 | 
227 | 	libdeflate_arm_cpu_features = features | ARM_CPU_FEATURES_KNOWN;
228 | }
229 | 
230 | #endif /* ARM_CPU_FEATURES_KNOWN */
231 | 


--------------------------------------------------------------------------------
/lib/arm/cpu_features.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * arm/cpu_features.h - feature detection for ARM CPUs
  3 |  *
  4 |  * Copyright 2018 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #ifndef LIB_ARM_CPU_FEATURES_H
 29 | #define LIB_ARM_CPU_FEATURES_H
 30 | 
 31 | #include "../lib_common.h"
 32 | 
 33 | #if defined(ARCH_ARM32) || defined(ARCH_ARM64)
 34 | 
 35 | #define ARM_CPU_FEATURE_NEON		(1 << 0)
 36 | #define ARM_CPU_FEATURE_PMULL		(1 << 1)
 37 | /*
 38 |  * PREFER_PMULL indicates that the CPU has very high pmull throughput, and so
 39 |  * the 12x wide pmull-based CRC-32 implementation is likely to be faster than an
 40 |  * implementation based on the crc32 instructions.
 41 |  */
 42 | #define ARM_CPU_FEATURE_PREFER_PMULL	(1 << 2)
 43 | #define ARM_CPU_FEATURE_CRC32		(1 << 3)
 44 | #define ARM_CPU_FEATURE_SHA3		(1 << 4)
 45 | #define ARM_CPU_FEATURE_DOTPROD		(1 << 5)
 46 | 
 47 | #if !defined(FREESTANDING) && \
 48 |     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) && \
 49 |     (defined(__linux__) || \
 50 |      (defined(__APPLE__) && defined(ARCH_ARM64)) || \
 51 |      (defined(_WIN32) && defined(ARCH_ARM64)))
 52 | /* Runtime ARM CPU feature detection is supported. */
 53 | #  define ARM_CPU_FEATURES_KNOWN	(1U << 31)
 54 | extern volatile u32 libdeflate_arm_cpu_features;
 55 | 
 56 | void libdeflate_init_arm_cpu_features(void);
 57 | 
 58 | static inline u32 get_arm_cpu_features(void)
 59 | {
 60 | 	if (libdeflate_arm_cpu_features == 0)
 61 | 		libdeflate_init_arm_cpu_features();
 62 | 	return libdeflate_arm_cpu_features;
 63 | }
 64 | #else
 65 | static inline u32 get_arm_cpu_features(void) { return 0; }
 66 | #endif
 67 | 
 68 | /* NEON */
 69 | #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(ARCH_ARM64))
 70 | #  define HAVE_NEON(features)	1
 71 | #  define HAVE_NEON_NATIVE	1
 72 | #else
 73 | #  define HAVE_NEON(features)	((features) & ARM_CPU_FEATURE_NEON)
 74 | #  define HAVE_NEON_NATIVE	0
 75 | #endif
 76 | /*
 77 |  * With both gcc and clang, NEON intrinsics require that the main target has
 78 |  * NEON enabled already.  Exception: with gcc 6.1 and later (r230411 for arm32,
 79 |  * r226563 for arm64), hardware floating point support is sufficient.
 80 |  */
 81 | #if (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) && \
 82 | 	(HAVE_NEON_NATIVE || (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
 83 | #  define HAVE_NEON_INTRIN	1
 84 | #  include <arm_neon.h>
 85 | #else
 86 | #  define HAVE_NEON_INTRIN	0
 87 | #endif
 88 | 
 89 | /* PMULL */
 90 | #ifdef __ARM_FEATURE_CRYPTO
 91 | #  define HAVE_PMULL(features)	1
 92 | #else
 93 | #  define HAVE_PMULL(features)	((features) & ARM_CPU_FEATURE_PMULL)
 94 | #endif
 95 | #if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
 96 | 	(GCC_PREREQ(7, 1) || defined(__clang__) || defined(_MSC_VER)) && \
 97 | 	CPU_IS_LITTLE_ENDIAN() /* untested on big endian */
 98 | #  define HAVE_PMULL_INTRIN	1
 99 |    /* Work around MSVC's vmull_p64() taking poly64x1_t instead of poly64_t */
100 | #  ifdef _MSC_VER
101 | #    define compat_vmull_p64(a, b)  vmull_p64(vcreate_p64(a), vcreate_p64(b))
102 | #  else
103 | #    define compat_vmull_p64(a, b)  vmull_p64((a), (b))
104 | #  endif
105 | #else
106 | #  define HAVE_PMULL_INTRIN	0
107 | #endif
108 | 
109 | /* CRC32 */
110 | #ifdef __ARM_FEATURE_CRC32
111 | #  define HAVE_CRC32(features)	1
112 | #else
113 | #  define HAVE_CRC32(features)	((features) & ARM_CPU_FEATURE_CRC32)
114 | #endif
115 | #if defined(ARCH_ARM64) && \
116 | 	(defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
117 | #  define HAVE_CRC32_INTRIN	1
118 | #  if defined(__GNUC__) || defined(__clang__)
119 | #    include <arm_acle.h>
120 | #  endif
121 |    /*
122 |     * Use an inline assembly fallback for clang 15 and earlier, which only
123 |     * defined the crc32 intrinsics when crc32 is enabled in the main target.
124 |     */
125 | #  if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
126 | 	!defined(__ARM_FEATURE_CRC32)
127 | #    undef __crc32b
128 | #    define __crc32b(a, b)					\
129 | 	({ uint32_t res;					\
130 | 	   __asm__("crc32b %w0, %w1, %w2"			\
131 | 		   : "=r" (res) : "r" (a), "r" (b));		\
132 | 	   res; })
133 | #    undef __crc32h
134 | #    define __crc32h(a, b)					\
135 | 	({ uint32_t res;					\
136 | 	   __asm__("crc32h %w0, %w1, %w2"			\
137 | 		   : "=r" (res) : "r" (a), "r" (b));		\
138 | 	   res; })
139 | #    undef __crc32w
140 | #    define __crc32w(a, b)					\
141 | 	({ uint32_t res;					\
142 | 	   __asm__("crc32w %w0, %w1, %w2"			\
143 | 		   : "=r" (res) : "r" (a), "r" (b));		\
144 | 	   res; })
145 | #    undef __crc32d
146 | #    define __crc32d(a, b)					\
147 | 	({ uint32_t res;					\
148 | 	   __asm__("crc32x %w0, %w1, %2"			\
149 | 		   : "=r" (res) : "r" (a), "r" (b));		\
150 | 	   res; })
151 | #    pragma clang diagnostic ignored "-Wgnu-statement-expression"
152 | #  endif
153 | #else
154 | #  define HAVE_CRC32_INTRIN	0
155 | #endif
156 | 
157 | /* SHA3 (needed for the eor3 instruction) */
158 | #ifdef __ARM_FEATURE_SHA3
159 | #  define HAVE_SHA3(features)	1
160 | #else
161 | #  define HAVE_SHA3(features)	((features) & ARM_CPU_FEATURE_SHA3)
162 | #endif
163 | #if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
164 | 	(GCC_PREREQ(9, 1) /* r268049 */ || \
165 | 	 CLANG_PREREQ(7, 0, 10010463) /* r338010 */)
166 | #  define HAVE_SHA3_INTRIN	1
167 |    /*
168 |     * Use an inline assembly fallback for clang 15 and earlier, which only
169 |     * defined the sha3 intrinsics when sha3 is enabled in the main target.
170 |     */
171 | #  if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
172 | 	!defined(__ARM_FEATURE_SHA3)
173 | #    undef veor3q_u8
174 | #    define veor3q_u8(a, b, c)					\
175 | 	({ uint8x16_t res;					\
176 | 	   __asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b"	\
177 | 		   : "=w" (res) : "w" (a), "w" (b), "w" (c));	\
178 | 	   res; })
179 | #    pragma clang diagnostic ignored "-Wgnu-statement-expression"
180 | #  endif
181 | #else
182 | #  define HAVE_SHA3_INTRIN	0
183 | #endif
184 | 
185 | /* dotprod */
186 | #ifdef __ARM_FEATURE_DOTPROD
187 | #  define HAVE_DOTPROD(features)	1
188 | #else
189 | #  define HAVE_DOTPROD(features)	((features) & ARM_CPU_FEATURE_DOTPROD)
190 | #endif
191 | #if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
192 | 	(GCC_PREREQ(8, 1) || CLANG_PREREQ(7, 0, 10010000) || defined(_MSC_VER))
193 | #  define HAVE_DOTPROD_INTRIN	1
194 |    /*
195 |     * Use an inline assembly fallback for clang 15 and earlier, which only
196 |     * defined the dotprod intrinsics when dotprod is enabled in the main target.
197 |     */
198 | #  if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
199 | 	!defined(__ARM_FEATURE_DOTPROD)
200 | #    undef vdotq_u32
201 | #    define vdotq_u32(a, b, c)					\
202 | 	({ uint32x4_t res = (a);				\
203 | 	   __asm__("udot %0.4s, %1.16b, %2.16b"			\
204 | 		   : "+w" (res) : "w" (b), "w" (c));		\
205 | 	   res; })
206 | #    pragma clang diagnostic ignored "-Wgnu-statement-expression"
207 | #  endif
208 | #else
209 | #  define HAVE_DOTPROD_INTRIN	0
210 | #endif
211 | 
212 | #endif /* ARCH_ARM32 || ARCH_ARM64 */
213 | 
214 | #endif /* LIB_ARM_CPU_FEATURES_H */
215 | 


--------------------------------------------------------------------------------
/lib/arm/crc32_pmull_helpers.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * arm/crc32_pmull_helpers.h - helper functions for CRC-32 folding with PMULL
  3 |  *
  4 |  * Copyright 2022 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | /*
 29 |  * This file is a "template" for instantiating helper functions for CRC folding
 30 |  * with pmull instructions.  It accepts the following parameters:
 31 |  *
 32 |  * SUFFIX:
 33 |  *	Name suffix to append to all instantiated functions.
 34 |  * ATTRIBUTES:
 35 |  *	Target function attributes to use.
 36 |  * ENABLE_EOR3:
 37 |  *	Use the eor3 instruction (from the sha3 extension).
 38 |  */
 39 | 
 40 | /* Create a vector with 'a' in the first 4 bytes, and the rest zeroed out. */
 41 | #undef u32_to_bytevec
 42 | static forceinline ATTRIBUTES uint8x16_t
 43 | ADD_SUFFIX(u32_to_bytevec)(u32 a)
 44 | {
 45 | 	return vreinterpretq_u8_u32(vsetq_lane_u32(a, vdupq_n_u32(0), 0));
 46 | }
 47 | #define u32_to_bytevec	ADD_SUFFIX(u32_to_bytevec)
 48 | 
 49 | /* Load two 64-bit values into a vector. */
 50 | #undef load_multipliers
 51 | static forceinline ATTRIBUTES poly64x2_t
 52 | ADD_SUFFIX(load_multipliers)(const u64 p[2])
 53 | {
 54 | 	return vreinterpretq_p64_u64(vld1q_u64(p));
 55 | }
 56 | #define load_multipliers	ADD_SUFFIX(load_multipliers)
 57 | 
 58 | /* Do carryless multiplication of the low halves of two vectors. */
 59 | #undef clmul_low
 60 | static forceinline ATTRIBUTES uint8x16_t
 61 | ADD_SUFFIX(clmul_low)(uint8x16_t a, poly64x2_t b)
 62 | {
 63 | 	return vreinterpretq_u8_p128(
 64 | 		     compat_vmull_p64(vgetq_lane_p64(vreinterpretq_p64_u8(a), 0),
 65 | 				      vgetq_lane_p64(b, 0)));
 66 | }
 67 | #define clmul_low	ADD_SUFFIX(clmul_low)
 68 | 
 69 | /* Do carryless multiplication of the high halves of two vectors. */
 70 | #undef clmul_high
 71 | static forceinline ATTRIBUTES uint8x16_t
 72 | ADD_SUFFIX(clmul_high)(uint8x16_t a, poly64x2_t b)
 73 | {
 74 | #ifdef __clang__
 75 | 	/*
 76 | 	 * Use inline asm to ensure that pmull2 is really used.  This works
 77 | 	 * around clang bug https://github.com/llvm/llvm-project/issues/52868.
 78 | 	 */
 79 | 	uint8x16_t res;
 80 | 
 81 | 	__asm__("pmull2 %0.1q, %1.2d, %2.2d" : "=w" (res) : "w" (a), "w" (b));
 82 | 	return res;
 83 | #else
 84 | 	return vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b));
 85 | #endif
 86 | }
 87 | #define clmul_high	ADD_SUFFIX(clmul_high)
 88 | 
 89 | #undef eor3
 90 | static forceinline ATTRIBUTES uint8x16_t
 91 | ADD_SUFFIX(eor3)(uint8x16_t a, uint8x16_t b, uint8x16_t c)
 92 | {
 93 | #if ENABLE_EOR3
 94 | 	return veor3q_u8(a, b, c);
 95 | #else
 96 | 	return veorq_u8(veorq_u8(a, b), c);
 97 | #endif
 98 | }
 99 | #define eor3	ADD_SUFFIX(eor3)
100 | 
101 | #undef fold_vec
102 | static forceinline ATTRIBUTES uint8x16_t
103 | ADD_SUFFIX(fold_vec)(uint8x16_t src, uint8x16_t dst, poly64x2_t multipliers)
104 | {
105 | 	uint8x16_t a = clmul_low(src, multipliers);
106 | 	uint8x16_t b = clmul_high(src, multipliers);
107 | 
108 | 	return eor3(a, b, dst);
109 | }
110 | #define fold_vec	ADD_SUFFIX(fold_vec)
111 | 
112 | /*
113 |  * Given v containing a 16-byte polynomial, and a pointer 'p' that points to the
114 |  * next '1 <= len <= 15' data bytes, rearrange the concatenation of v and the
115 |  * data into vectors x0 and x1 that contain 'len' bytes and 16 bytes,
116 |  * respectively.  Then fold x0 into x1 and return the result.  Assumes that
117 |  * 'p + len - 16' is in-bounds.
118 |  */
119 | #undef fold_partial_vec
120 | static forceinline ATTRIBUTES MAYBE_UNUSED uint8x16_t
121 | ADD_SUFFIX(fold_partial_vec)(uint8x16_t v, const u8 *p, size_t len,
122 | 			     poly64x2_t multipliers_1)
123 | {
124 | 	/*
125 | 	 * vqtbl1q_u8(v, shift_tab[len..len+15]) left shifts v by 16-len bytes.
126 | 	 * vqtbl1q_u8(v, shift_tab[len+16..len+31]) right shifts v by len bytes.
127 | 	 */
128 | 	static const u8 shift_tab[48] = {
129 | 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
130 | 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
131 | 		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
132 | 		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
133 | 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
134 | 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
135 | 	};
136 | 	const uint8x16_t lshift = vld1q_u8(&shift_tab[len]);
137 | 	const uint8x16_t rshift = vld1q_u8(&shift_tab[len + 16]);
138 | 	uint8x16_t x0, x1, bsl_mask;
139 | 
140 | 	/* x0 = v left-shifted by '16 - len' bytes */
141 | 	x0 = vqtbl1q_u8(v, lshift);
142 | 
143 | 	/* Create a vector of '16 - len' 0x00 bytes, then 'len' 0xff bytes. */
144 | 	bsl_mask = vreinterpretq_u8_s8(
145 | 			vshrq_n_s8(vreinterpretq_s8_u8(rshift), 7));
146 | 
147 | 	/*
148 | 	 * x1 = the last '16 - len' bytes from v (i.e. v right-shifted by 'len'
149 | 	 * bytes) followed by the remaining data.
150 | 	 */
151 | 	x1 = vbslq_u8(bsl_mask /* 0 bits select from arg3, 1 bits from arg2 */,
152 | 		      vld1q_u8(p + len - 16), vqtbl1q_u8(v, rshift));
153 | 
154 | 	return fold_vec(x0, x1, multipliers_1);
155 | }
156 | #define fold_partial_vec	ADD_SUFFIX(fold_partial_vec)
157 | 


--------------------------------------------------------------------------------
/lib/arm/crc32_pmull_wide.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * arm/crc32_pmull_wide.h - gzip CRC-32 with PMULL (extra-wide version)
  3 |  *
  4 |  * Copyright 2022 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | /*
 29 |  * This file is a "template" for instantiating PMULL-based crc32_arm functions.
 30 |  * The "parameters" are:
 31 |  *
 32 |  * SUFFIX:
 33 |  *	Name suffix to append to all instantiated functions.
 34 |  * ATTRIBUTES:
 35 |  *	Target function attributes to use.
 36 |  * ENABLE_EOR3:
 37 |  *	Use the eor3 instruction (from the sha3 extension).
 38 |  *
 39 |  * This is the extra-wide version; it uses an unusually large stride length of
 40 |  * 12, and it assumes that crc32 instructions are available too.  It's intended
 41 |  * for powerful CPUs that support both pmull and crc32 instructions, but where
 42 |  * throughput of pmull and xor (given enough instructions issued in parallel) is
 43 |  * significantly higher than that of crc32, thus making the crc32 instructions
 44 |  * (counterintuitively) not actually the fastest way to compute the CRC-32.  The
 45 |  * Apple M1 processor is an example of such a CPU.
 46 |  */
 47 | 
 48 | #include "crc32_pmull_helpers.h"
 49 | 
 50 | static ATTRIBUTES u32
 51 | ADD_SUFFIX(crc32_arm)(u32 crc, const u8 *p, size_t len)
 52 | {
 53 | 	uint8x16_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11;
 54 | 
 55 | 	if (len < 3 * 192) {
 56 | 		static const u64 _aligned_attribute(16) mults[3][2] = {
 57 | 			{ CRC32_X543_MODG, CRC32_X479_MODG }, /* 4 vecs */
 58 | 			{ CRC32_X287_MODG, CRC32_X223_MODG }, /* 2 vecs */
 59 | 			{ CRC32_X159_MODG, CRC32_X95_MODG },  /* 1 vecs */
 60 | 		};
 61 | 		poly64x2_t multipliers_4, multipliers_2, multipliers_1;
 62 | 
 63 | 		if (len < 64)
 64 | 			goto tail;
 65 | 		multipliers_4 = load_multipliers(mults[0]);
 66 | 		multipliers_2 = load_multipliers(mults[1]);
 67 | 		multipliers_1 = load_multipliers(mults[2]);
 68 | 		/*
 69 | 		 * Short length; don't bother aligning the pointer, and fold
 70 | 		 * 64 bytes (4 vectors) at a time, at most.
 71 | 		 */
 72 | 		v0 = veorq_u8(vld1q_u8(p + 0), u32_to_bytevec(crc));
 73 | 		v1 = vld1q_u8(p + 16);
 74 | 		v2 = vld1q_u8(p + 32);
 75 | 		v3 = vld1q_u8(p + 48);
 76 | 		p += 64;
 77 | 		len -= 64;
 78 | 		while (len >= 64) {
 79 | 			v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_4);
 80 | 			v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_4);
 81 | 			v2 = fold_vec(v2, vld1q_u8(p + 32), multipliers_4);
 82 | 			v3 = fold_vec(v3, vld1q_u8(p + 48), multipliers_4);
 83 | 			p += 64;
 84 | 			len -= 64;
 85 | 		}
 86 | 		v0 = fold_vec(v0, v2, multipliers_2);
 87 | 		v1 = fold_vec(v1, v3, multipliers_2);
 88 | 		if (len >= 32) {
 89 | 			v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_2);
 90 | 			v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_2);
 91 | 			p += 32;
 92 | 			len -= 32;
 93 | 		}
 94 | 		v0 = fold_vec(v0, v1, multipliers_1);
 95 | 	} else {
 96 | 		static const u64 _aligned_attribute(16) mults[4][2] = {
 97 | 			{ CRC32_X1567_MODG, CRC32_X1503_MODG }, /* 12 vecs */
 98 | 			{ CRC32_X799_MODG, CRC32_X735_MODG },   /* 6 vecs */
 99 | 			{ CRC32_X415_MODG, CRC32_X351_MODG },   /* 3 vecs */
100 | 			{ CRC32_X159_MODG, CRC32_X95_MODG },    /* 1 vecs */
101 | 		};
102 | 		const poly64x2_t multipliers_12 = load_multipliers(mults[0]);
103 | 		const poly64x2_t multipliers_6 = load_multipliers(mults[1]);
104 | 		const poly64x2_t multipliers_3 = load_multipliers(mults[2]);
105 | 		const poly64x2_t multipliers_1 = load_multipliers(mults[3]);
106 | 		const size_t align = -(uintptr_t)p & 15;
107 | 		const uint8x16_t *vp;
108 | 
109 | 		/* Align p to the next 16-byte boundary. */
110 | 		if (align) {
111 | 			if (align & 1)
112 | 				crc = __crc32b(crc, *p++);
113 | 			if (align & 2) {
114 | 				crc = __crc32h(crc, le16_bswap(*(u16 *)p));
115 | 				p += 2;
116 | 			}
117 | 			if (align & 4) {
118 | 				crc = __crc32w(crc, le32_bswap(*(u32 *)p));
119 | 				p += 4;
120 | 			}
121 | 			if (align & 8) {
122 | 				crc = __crc32d(crc, le64_bswap(*(u64 *)p));
123 | 				p += 8;
124 | 			}
125 | 			len -= align;
126 | 		}
127 | 		vp = (const uint8x16_t *)p;
128 | 		v0 = veorq_u8(*vp++, u32_to_bytevec(crc));
129 | 		v1 = *vp++;
130 | 		v2 = *vp++;
131 | 		v3 = *vp++;
132 | 		v4 = *vp++;
133 | 		v5 = *vp++;
134 | 		v6 = *vp++;
135 | 		v7 = *vp++;
136 | 		v8 = *vp++;
137 | 		v9 = *vp++;
138 | 		v10 = *vp++;
139 | 		v11 = *vp++;
140 | 		len -= 192;
141 | 		/* Fold 192 bytes (12 vectors) at a time. */
142 | 		do {
143 | 			v0 = fold_vec(v0, *vp++, multipliers_12);
144 | 			v1 = fold_vec(v1, *vp++, multipliers_12);
145 | 			v2 = fold_vec(v2, *vp++, multipliers_12);
146 | 			v3 = fold_vec(v3, *vp++, multipliers_12);
147 | 			v4 = fold_vec(v4, *vp++, multipliers_12);
148 | 			v5 = fold_vec(v5, *vp++, multipliers_12);
149 | 			v6 = fold_vec(v6, *vp++, multipliers_12);
150 | 			v7 = fold_vec(v7, *vp++, multipliers_12);
151 | 			v8 = fold_vec(v8, *vp++, multipliers_12);
152 | 			v9 = fold_vec(v9, *vp++, multipliers_12);
153 | 			v10 = fold_vec(v10, *vp++, multipliers_12);
154 | 			v11 = fold_vec(v11, *vp++, multipliers_12);
155 | 			len -= 192;
156 | 		} while (len >= 192);
157 | 
158 | 		/*
159 | 		 * Fewer than 192 bytes left.  Fold v0-v11 down to just v0,
160 | 		 * while processing up to 144 more bytes.
161 | 		 */
162 | 		v0 = fold_vec(v0, v6, multipliers_6);
163 | 		v1 = fold_vec(v1, v7, multipliers_6);
164 | 		v2 = fold_vec(v2, v8, multipliers_6);
165 | 		v3 = fold_vec(v3, v9, multipliers_6);
166 | 		v4 = fold_vec(v4, v10, multipliers_6);
167 | 		v5 = fold_vec(v5, v11, multipliers_6);
168 | 		if (len >= 96) {
169 | 			v0 = fold_vec(v0, *vp++, multipliers_6);
170 | 			v1 = fold_vec(v1, *vp++, multipliers_6);
171 | 			v2 = fold_vec(v2, *vp++, multipliers_6);
172 | 			v3 = fold_vec(v3, *vp++, multipliers_6);
173 | 			v4 = fold_vec(v4, *vp++, multipliers_6);
174 | 			v5 = fold_vec(v5, *vp++, multipliers_6);
175 | 			len -= 96;
176 | 		}
177 | 		v0 = fold_vec(v0, v3, multipliers_3);
178 | 		v1 = fold_vec(v1, v4, multipliers_3);
179 | 		v2 = fold_vec(v2, v5, multipliers_3);
180 | 		if (len >= 48) {
181 | 			v0 = fold_vec(v0, *vp++, multipliers_3);
182 | 			v1 = fold_vec(v1, *vp++, multipliers_3);
183 | 			v2 = fold_vec(v2, *vp++, multipliers_3);
184 | 			len -= 48;
185 | 		}
186 | 		v0 = fold_vec(v0, v1, multipliers_1);
187 | 		v0 = fold_vec(v0, v2, multipliers_1);
188 | 		p = (const u8 *)vp;
189 | 	}
190 | 	/* Reduce 128 to 32 bits using crc32 instructions. */
191 | 	crc = __crc32d(0, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 0));
192 | 	crc = __crc32d(crc, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 1));
193 | tail:
194 | 	/* Finish up the remainder using crc32 instructions. */
195 | 	if (len & 32) {
196 | 		crc = __crc32d(crc, get_unaligned_le64(p + 0));
197 | 		crc = __crc32d(crc, get_unaligned_le64(p + 8));
198 | 		crc = __crc32d(crc, get_unaligned_le64(p + 16));
199 | 		crc = __crc32d(crc, get_unaligned_le64(p + 24));
200 | 		p += 32;
201 | 	}
202 | 	if (len & 16) {
203 | 		crc = __crc32d(crc, get_unaligned_le64(p + 0));
204 | 		crc = __crc32d(crc, get_unaligned_le64(p + 8));
205 | 		p += 16;
206 | 	}
207 | 	if (len & 8) {
208 | 		crc = __crc32d(crc, get_unaligned_le64(p));
209 | 		p += 8;
210 | 	}
211 | 	if (len & 4) {
212 | 		crc = __crc32w(crc, get_unaligned_le32(p));
213 | 		p += 4;
214 | 	}
215 | 	if (len & 2) {
216 | 		crc = __crc32h(crc, get_unaligned_le16(p));
217 | 		p += 2;
218 | 	}
219 | 	if (len & 1)
220 | 		crc = __crc32b(crc, *p);
221 | 	return crc;
222 | }
223 | 
224 | #undef SUFFIX
225 | #undef ATTRIBUTES
226 | #undef ENABLE_EOR3
227 | 


--------------------------------------------------------------------------------
/lib/arm/matchfinder_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * arm/matchfinder_impl.h - ARM implementations of matchfinder functions
 3 |  *
 4 |  * Copyright 2016 Eric Biggers
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #ifndef LIB_ARM_MATCHFINDER_IMPL_H
29 | #define LIB_ARM_MATCHFINDER_IMPL_H
30 | 
31 | #include "cpu_features.h"
32 | 
33 | #if HAVE_NEON_NATIVE
34 | static forceinline void
35 | matchfinder_init_neon(mf_pos_t *data, size_t size)
36 | {
37 | 	int16x8_t *p = (int16x8_t *)data;
38 | 	int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL);
39 | 
40 | 	STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
41 | 	STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
42 | 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
43 | 
44 | 	do {
45 | 		p[0] = v;
46 | 		p[1] = v;
47 | 		p[2] = v;
48 | 		p[3] = v;
49 | 		p += 4;
50 | 		size -= 4 * sizeof(*p);
51 | 	} while (size != 0);
52 | }
53 | #define matchfinder_init matchfinder_init_neon
54 | 
55 | static forceinline void
56 | matchfinder_rebase_neon(mf_pos_t *data, size_t size)
57 | {
58 | 	int16x8_t *p = (int16x8_t *)data;
59 | 	int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE);
60 | 
61 | 	STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
62 | 	STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
63 | 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
64 | 
65 | 	do {
66 | 		p[0] = vqaddq_s16(p[0], v);
67 | 		p[1] = vqaddq_s16(p[1], v);
68 | 		p[2] = vqaddq_s16(p[2], v);
69 | 		p[3] = vqaddq_s16(p[3], v);
70 | 		p += 4;
71 | 		size -= 4 * sizeof(*p);
72 | 	} while (size != 0);
73 | }
74 | #define matchfinder_rebase matchfinder_rebase_neon
75 | 
76 | #endif /* HAVE_NEON_NATIVE */
77 | 
78 | #endif /* LIB_ARM_MATCHFINDER_IMPL_H */
79 | 


--------------------------------------------------------------------------------
/lib/cpu_features_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cpu_features_common.h - code shared by all lib/$arch/cpu_features.c
 3 |  *
 4 |  * Copyright 2020 Eric Biggers
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #ifndef LIB_CPU_FEATURES_COMMON_H
29 | #define LIB_CPU_FEATURES_COMMON_H
30 | 
31 | #if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
32 |    /* for strdup() and strtok_r() */
33 | #  undef _ANSI_SOURCE
34 | #  ifndef __APPLE__
35 | #    undef _GNU_SOURCE
36 | #    define _GNU_SOURCE
37 | #  endif
38 | #  include <stdio.h>
39 | #  include <stdlib.h>
40 | #  include <string.h>
41 | #endif
42 | 
43 | #include "lib_common.h"
44 | 
45 | struct cpu_feature {
46 | 	u32 bit;
47 | 	const char *name;
48 | };
49 | 
50 | #if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
51 | /* Disable any features that are listed in $LIBDEFLATE_DISABLE_CPU_FEATURES. */
52 | static inline void
53 | disable_cpu_features_for_testing(u32 *features,
54 | 				 const struct cpu_feature *feature_table,
55 | 				 size_t feature_table_length)
56 | {
57 | 	char *env_value, *strbuf, *p, *saveptr = NULL;
58 | 	size_t i;
59 | 
60 | 	env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES");
61 | 	if (!env_value)
62 | 		return;
63 | 	strbuf = strdup(env_value);
64 | 	if (!strbuf)
65 | 		abort();
66 | 	p = strtok_r(strbuf, ",", &saveptr);
67 | 	while (p) {
68 | 		for (i = 0; i < feature_table_length; i++) {
69 | 			if (strcmp(p, feature_table[i].name) == 0) {
70 | 				*features &= ~feature_table[i].bit;
71 | 				break;
72 | 			}
73 | 		}
74 | 		if (i == feature_table_length) {
75 | 			fprintf(stderr,
76 | 				"unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n",
77 | 				p);
78 | 			abort();
79 | 		}
80 | 		p = strtok_r(NULL, ",", &saveptr);
81 | 	}
82 | 	free(strbuf);
83 | }
84 | #else /* TEST_SUPPORT__DO_NOT_USE */
85 | static inline void
86 | disable_cpu_features_for_testing(u32 *features,
87 | 				 const struct cpu_feature *feature_table,
88 | 				 size_t feature_table_length)
89 | {
90 | }
91 | #endif /* !TEST_SUPPORT__DO_NOT_USE */
92 | 
93 | #endif /* LIB_CPU_FEATURES_COMMON_H */
94 | 


--------------------------------------------------------------------------------
/lib/deflate_compress.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIB_DEFLATE_COMPRESS_H
 2 | #define LIB_DEFLATE_COMPRESS_H
 3 | 
 4 | #include "lib_common.h"
 5 | 
 6 | /*
 7 |  * DEFLATE compression is private to deflate_compress.c, but we do need to be
 8 |  * able to query the compression level for zlib and gzip header generation.
 9 |  */
10 | 
11 | struct libdeflate_compressor;
12 | 
13 | unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c);
14 | 
15 | #endif /* LIB_DEFLATE_COMPRESS_H */
16 | 


--------------------------------------------------------------------------------
/lib/deflate_constants.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * deflate_constants.h - constants for the DEFLATE compression format
 3 |  */
 4 | 
 5 | #ifndef LIB_DEFLATE_CONSTANTS_H
 6 | #define LIB_DEFLATE_CONSTANTS_H
 7 | 
 8 | /* Valid block types  */
 9 | #define DEFLATE_BLOCKTYPE_UNCOMPRESSED		0
10 | #define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN	1
11 | #define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN	2
12 | 
13 | /* Minimum and maximum supported match lengths (in bytes)  */
14 | #define DEFLATE_MIN_MATCH_LEN			3
15 | #define DEFLATE_MAX_MATCH_LEN			258
16 | 
17 | /* Maximum supported match offset (in bytes) */
18 | #define DEFLATE_MAX_MATCH_OFFSET		32768
19 | 
20 | /* log2 of DEFLATE_MAX_MATCH_OFFSET */
21 | #define DEFLATE_WINDOW_ORDER			15
22 | 
23 | /* Number of symbols in each Huffman code.  Note: for the literal/length
24 |  * and offset codes, these are actually the maximum values; a given block
25 |  * might use fewer symbols.  */
26 | #define DEFLATE_NUM_PRECODE_SYMS		19
27 | #define DEFLATE_NUM_LITLEN_SYMS			288
28 | #define DEFLATE_NUM_OFFSET_SYMS			32
29 | 
30 | /* The maximum number of symbols across all codes  */
31 | #define DEFLATE_MAX_NUM_SYMS			288
32 | 
33 | /* Division of symbols in the literal/length code  */
34 | #define DEFLATE_NUM_LITERALS			256
35 | #define DEFLATE_END_OF_BLOCK			256
36 | #define DEFLATE_FIRST_LEN_SYM			257
37 | 
38 | /* Maximum codeword length, in bits, within each Huffman code  */
39 | #define DEFLATE_MAX_PRE_CODEWORD_LEN		7
40 | #define DEFLATE_MAX_LITLEN_CODEWORD_LEN		15
41 | #define DEFLATE_MAX_OFFSET_CODEWORD_LEN		15
42 | 
43 | /* The maximum codeword length across all codes  */
44 | #define DEFLATE_MAX_CODEWORD_LEN		15
45 | 
46 | /* Maximum possible overrun when decoding codeword lengths  */
47 | #define DEFLATE_MAX_LENS_OVERRUN		137
48 | 
49 | /*
50 |  * Maximum number of extra bits that may be required to represent a match
51 |  * length or offset.
52 |  */
53 | #define DEFLATE_MAX_EXTRA_LENGTH_BITS		5
54 | #define DEFLATE_MAX_EXTRA_OFFSET_BITS		13
55 | 
56 | #endif /* LIB_DEFLATE_CONSTANTS_H */
57 | 


--------------------------------------------------------------------------------
/lib/gzip_compress.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * gzip_compress.c - compress with a gzip wrapper
 3 |  *
 4 |  * Copyright 2016 Eric Biggers
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #include "deflate_compress.h"
29 | #include "gzip_constants.h"
30 | 
31 | LIBDEFLATEAPI size_t
32 | libdeflate_gzip_compress(struct libdeflate_compressor *c,
33 | 			 const void *in, size_t in_nbytes,
34 | 			 void *out, size_t out_nbytes_avail)
35 | {
36 | 	u8 *out_next = out;
37 | 	unsigned compression_level;
38 | 	u8 xfl;
39 | 	size_t deflate_size;
40 | 
41 | 	if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
42 | 		return 0;
43 | 
44 | 	/* ID1 */
45 | 	*out_next++ = GZIP_ID1;
46 | 	/* ID2 */
47 | 	*out_next++ = GZIP_ID2;
48 | 	/* CM */
49 | 	*out_next++ = GZIP_CM_DEFLATE;
50 | 	/* FLG */
51 | 	*out_next++ = 0;
52 | 	/* MTIME */
53 | 	put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
54 | 	out_next += 4;
55 | 	/* XFL */
56 | 	xfl = 0;
57 | 	compression_level = libdeflate_get_compression_level(c);
58 | 	if (compression_level < 2)
59 | 		xfl |= GZIP_XFL_FASTEST_COMPRESSION;
60 | 	else if (compression_level >= 8)
61 | 		xfl |= GZIP_XFL_SLOWEST_COMPRESSION;
62 | 	*out_next++ = xfl;
63 | 	/* OS */
64 | 	*out_next++ = GZIP_OS_UNKNOWN;	/* OS  */
65 | 
66 | 	/* Compressed data  */
67 | 	deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
68 | 					out_nbytes_avail - GZIP_MIN_OVERHEAD);
69 | 	if (deflate_size == 0)
70 | 		return 0;
71 | 	out_next += deflate_size;
72 | 
73 | 	/* CRC32 */
74 | 	put_unaligned_le32(libdeflate_crc32(0, in, in_nbytes), out_next);
75 | 	out_next += 4;
76 | 
77 | 	/* ISIZE */
78 | 	put_unaligned_le32((u32)in_nbytes, out_next);
79 | 	out_next += 4;
80 | 
81 | 	return out_next - (u8 *)out;
82 | }
83 | 
84 | LIBDEFLATEAPI size_t
85 | libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
86 | 			       size_t in_nbytes)
87 | {
88 | 	return GZIP_MIN_OVERHEAD +
89 | 	       libdeflate_deflate_compress_bound(c, in_nbytes);
90 | }
91 | 


--------------------------------------------------------------------------------
/lib/gzip_constants.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * gzip_constants.h - constants for the gzip wrapper format
 3 |  */
 4 | 
 5 | #ifndef LIB_GZIP_CONSTANTS_H
 6 | #define LIB_GZIP_CONSTANTS_H
 7 | 
 8 | #define GZIP_MIN_HEADER_SIZE	10
 9 | #define GZIP_FOOTER_SIZE	8
10 | #define GZIP_MIN_OVERHEAD	(GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
11 | 
12 | #define GZIP_ID1		0x1F
13 | #define GZIP_ID2		0x8B
14 | 
15 | #define GZIP_CM_DEFLATE		8
16 | 
17 | #define GZIP_FTEXT		0x01
18 | #define GZIP_FHCRC		0x02
19 | #define GZIP_FEXTRA		0x04
20 | #define GZIP_FNAME		0x08
21 | #define GZIP_FCOMMENT		0x10
22 | #define GZIP_FRESERVED		0xE0
23 | 
24 | #define GZIP_MTIME_UNAVAILABLE	0
25 | 
26 | #define GZIP_XFL_SLOWEST_COMPRESSION	0x02
27 | #define GZIP_XFL_FASTEST_COMPRESSION	0x04
28 | 
29 | #define GZIP_OS_FAT		0
30 | #define GZIP_OS_AMIGA		1
31 | #define GZIP_OS_VMS		2
32 | #define GZIP_OS_UNIX		3
33 | #define GZIP_OS_VM_CMS		4
34 | #define GZIP_OS_ATARI_TOS	5
35 | #define GZIP_OS_HPFS		6
36 | #define GZIP_OS_MACINTOSH	7
37 | #define GZIP_OS_Z_SYSTEM	8
38 | #define GZIP_OS_CP_M		9
39 | #define GZIP_OS_TOPS_20		10
40 | #define GZIP_OS_NTFS		11
41 | #define GZIP_OS_QDOS		12
42 | #define GZIP_OS_RISCOS		13
43 | #define GZIP_OS_UNKNOWN		255
44 | 
45 | #endif /* LIB_GZIP_CONSTANTS_H */
46 | 


--------------------------------------------------------------------------------
/lib/gzip_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * gzip_decompress.c - decompress with a gzip wrapper
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "lib_common.h"
 29 | #include "gzip_constants.h"
 30 | 
 31 | LIBDEFLATEAPI enum libdeflate_result
 32 | libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
 33 | 			      const void *in, size_t in_nbytes,
 34 | 			      void *out, size_t out_nbytes_avail,
 35 | 			      size_t *actual_in_nbytes_ret,
 36 | 			      size_t *actual_out_nbytes_ret)
 37 | {
 38 | 	const u8 *in_next = in;
 39 | 	const u8 * const in_end = in_next + in_nbytes;
 40 | 	u8 flg;
 41 | 	size_t actual_in_nbytes;
 42 | 	size_t actual_out_nbytes;
 43 | 	enum libdeflate_result result;
 44 | 
 45 | 	if (in_nbytes < GZIP_MIN_OVERHEAD)
 46 | 		return LIBDEFLATE_BAD_DATA;
 47 | 
 48 | 	/* ID1 */
 49 | 	if (*in_next++ != GZIP_ID1)
 50 | 		return LIBDEFLATE_BAD_DATA;
 51 | 	/* ID2 */
 52 | 	if (*in_next++ != GZIP_ID2)
 53 | 		return LIBDEFLATE_BAD_DATA;
 54 | 	/* CM */
 55 | 	if (*in_next++ != GZIP_CM_DEFLATE)
 56 | 		return LIBDEFLATE_BAD_DATA;
 57 | 	flg = *in_next++;
 58 | 	/* MTIME */
 59 | 	in_next += 4;
 60 | 	/* XFL */
 61 | 	in_next += 1;
 62 | 	/* OS */
 63 | 	in_next += 1;
 64 | 
 65 | 	if (flg & GZIP_FRESERVED)
 66 | 		return LIBDEFLATE_BAD_DATA;
 67 | 
 68 | 	/* Extra field */
 69 | 	if (flg & GZIP_FEXTRA) {
 70 | 		u16 xlen = get_unaligned_le16(in_next);
 71 | 		in_next += 2;
 72 | 
 73 | 		if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
 74 | 			return LIBDEFLATE_BAD_DATA;
 75 | 
 76 | 		in_next += xlen;
 77 | 	}
 78 | 
 79 | 	/* Original file name (zero terminated) */
 80 | 	if (flg & GZIP_FNAME) {
 81 | 		while (*in_next++ != 0 && in_next != in_end)
 82 | 			;
 83 | 		if (in_end - in_next < GZIP_FOOTER_SIZE)
 84 | 			return LIBDEFLATE_BAD_DATA;
 85 | 	}
 86 | 
 87 | 	/* File comment (zero terminated) */
 88 | 	if (flg & GZIP_FCOMMENT) {
 89 | 		while (*in_next++ != 0 && in_next != in_end)
 90 | 			;
 91 | 		if (in_end - in_next < GZIP_FOOTER_SIZE)
 92 | 			return LIBDEFLATE_BAD_DATA;
 93 | 	}
 94 | 
 95 | 	/* CRC16 for gzip header */
 96 | 	if (flg & GZIP_FHCRC) {
 97 | 		in_next += 2;
 98 | 		if (in_end - in_next < GZIP_FOOTER_SIZE)
 99 | 			return LIBDEFLATE_BAD_DATA;
100 | 	}
101 | 
102 | 	/* Compressed data  */
103 | 	result = libdeflate_deflate_decompress_ex(d, in_next,
104 | 					in_end - GZIP_FOOTER_SIZE - in_next,
105 | 					out, out_nbytes_avail,
106 | 					&actual_in_nbytes,
107 | 					actual_out_nbytes_ret);
108 | 	if (result != LIBDEFLATE_SUCCESS)
109 | 		return result;
110 | 
111 | 	if (actual_out_nbytes_ret)
112 | 		actual_out_nbytes = *actual_out_nbytes_ret;
113 | 	else
114 | 		actual_out_nbytes = out_nbytes_avail;
115 | 
116 | 	in_next += actual_in_nbytes;
117 | 
118 | 	/* CRC32 */
119 | 	if (libdeflate_crc32(0, out, actual_out_nbytes) !=
120 | 	    get_unaligned_le32(in_next))
121 | 		return LIBDEFLATE_BAD_DATA;
122 | 	in_next += 4;
123 | 
124 | 	/* ISIZE */
125 | 	if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
126 | 		return LIBDEFLATE_BAD_DATA;
127 | 	in_next += 4;
128 | 
129 | 	if (actual_in_nbytes_ret)
130 | 		*actual_in_nbytes_ret = in_next - (u8 *)in;
131 | 
132 | 	return LIBDEFLATE_SUCCESS;
133 | }
134 | 
135 | LIBDEFLATEAPI enum libdeflate_result
136 | libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
137 | 			   const void *in, size_t in_nbytes,
138 | 			   void *out, size_t out_nbytes_avail,
139 | 			   size_t *actual_out_nbytes_ret)
140 | {
141 | 	return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
142 | 					     out, out_nbytes_avail,
143 | 					     NULL, actual_out_nbytes_ret);
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/ht_matchfinder.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
  3 |  *
  4 |  * Copyright 2022 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  *
 27 |  * ---------------------------------------------------------------------------
 28 |  *
 29 |  * This is a Hash Table (ht) matchfinder.
 30 |  *
 31 |  * This is a variant of the Hash Chains (hc) matchfinder that is optimized for
 32 |  * very fast compression.  The ht_matchfinder stores the hash chains inline in
 33 |  * the hash table, whereas the hc_matchfinder stores them in a separate array.
 34 |  * Storing the hash chains inline is the faster method when max_search_depth
 35 |  * (the maximum chain length) is very small.  It is not appropriate when
 36 |  * max_search_depth is larger, as then it uses too much memory.
 37 |  *
 38 |  * Due to its focus on speed, the ht_matchfinder doesn't support length 3
 39 |  * matches.  It also doesn't allow max_search_depth to vary at runtime; it is
 40 |  * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
 41 |  *
 42 |  * See hc_matchfinder.h for more information.
 43 |  */
 44 | 
 45 | #ifndef LIB_HT_MATCHFINDER_H
 46 | #define LIB_HT_MATCHFINDER_H
 47 | 
 48 | #include "matchfinder_common.h"
 49 | 
 50 | #define HT_MATCHFINDER_HASH_ORDER	15
 51 | #define HT_MATCHFINDER_BUCKET_SIZE	2
 52 | 
 53 | #define HT_MATCHFINDER_MIN_MATCH_LEN	4
 54 | /* Minimum value of max_len for ht_matchfinder_longest_match() */
 55 | #define HT_MATCHFINDER_REQUIRED_NBYTES	5
 56 | 
 57 | struct MATCHFINDER_ALIGNED ht_matchfinder {
 58 | 	mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
 59 | 			 [HT_MATCHFINDER_BUCKET_SIZE];
 60 | };
 61 | 
 62 | static forceinline void
 63 | ht_matchfinder_init(struct ht_matchfinder *mf)
 64 | {
 65 | 	STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
 66 | 
 67 | 	matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
 68 | }
 69 | 
 70 | static forceinline void
 71 | ht_matchfinder_slide_window(struct ht_matchfinder *mf)
 72 | {
 73 | 	matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
 74 | }
 75 | 
 76 | /* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
 77 | static forceinline u32
 78 | ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
 79 | 			     const u8 ** const in_base_p,
 80 | 			     const u8 * const in_next,
 81 | 			     const u32 max_len,
 82 | 			     const u32 nice_len,
 83 | 			     u32 * const next_hash,
 84 | 			     u32 * const offset_ret)
 85 | {
 86 | 	u32 best_len = 0;
 87 | 	const u8 *best_matchptr = in_next;
 88 | 	u32 cur_pos = in_next - *in_base_p;
 89 | 	const u8 *in_base;
 90 | 	mf_pos_t cutoff;
 91 | 	u32 hash;
 92 | 	u32 seq;
 93 | 	mf_pos_t cur_node;
 94 | 	const u8 *matchptr;
 95 | #if HT_MATCHFINDER_BUCKET_SIZE > 1
 96 | 	mf_pos_t to_insert;
 97 | 	u32 len;
 98 | #endif
 99 | #if HT_MATCHFINDER_BUCKET_SIZE > 2
100 | 	int i;
101 | #endif
102 | 
103 | 	/* This is assumed throughout this function. */
104 | 	STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
105 | 
106 | 	if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
107 | 		ht_matchfinder_slide_window(mf);
108 | 		*in_base_p += MATCHFINDER_WINDOW_SIZE;
109 | 		cur_pos = 0;
110 | 	}
111 | 	in_base = *in_base_p;
112 | 	cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
113 | 
114 | 	hash = *next_hash;
115 | 	STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
116 | 	*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
117 | 			     HT_MATCHFINDER_HASH_ORDER);
118 | 	seq = load_u32_unaligned(in_next);
119 | 	prefetchw(&mf->hash_tab[*next_hash]);
120 | #if HT_MATCHFINDER_BUCKET_SIZE == 1
121 | 	/* Hand-unrolled version for BUCKET_SIZE == 1 */
122 | 	cur_node = mf->hash_tab[hash][0];
123 | 	mf->hash_tab[hash][0] = cur_pos;
124 | 	if (cur_node <= cutoff)
125 | 		goto out;
126 | 	matchptr = &in_base[cur_node];
127 | 	if (load_u32_unaligned(matchptr) == seq) {
128 | 		best_len = lz_extend(in_next, matchptr, 4, max_len);
129 | 		best_matchptr = matchptr;
130 | 	}
131 | #elif HT_MATCHFINDER_BUCKET_SIZE == 2
132 | 	/*
133 | 	 * Hand-unrolled version for BUCKET_SIZE == 2.  The logic here also
134 | 	 * differs slightly in that it copies the first entry to the second even
135 | 	 * if nice_len is reached on the first, as this can be slightly faster.
136 | 	 */
137 | 	cur_node = mf->hash_tab[hash][0];
138 | 	mf->hash_tab[hash][0] = cur_pos;
139 | 	if (cur_node <= cutoff)
140 | 		goto out;
141 | 	matchptr = &in_base[cur_node];
142 | 
143 | 	to_insert = cur_node;
144 | 	cur_node = mf->hash_tab[hash][1];
145 | 	mf->hash_tab[hash][1] = to_insert;
146 | 
147 | 	if (load_u32_unaligned(matchptr) == seq) {
148 | 		best_len = lz_extend(in_next, matchptr, 4, max_len);
149 | 		best_matchptr = matchptr;
150 | 		if (cur_node <= cutoff || best_len >= nice_len)
151 | 			goto out;
152 | 		matchptr = &in_base[cur_node];
153 | 		if (load_u32_unaligned(matchptr) == seq &&
154 | 		    load_u32_unaligned(matchptr + best_len - 3) ==
155 | 		    load_u32_unaligned(in_next + best_len - 3)) {
156 | 			len = lz_extend(in_next, matchptr, 4, max_len);
157 | 			if (len > best_len) {
158 | 				best_len = len;
159 | 				best_matchptr = matchptr;
160 | 			}
161 | 		}
162 | 	} else {
163 | 		if (cur_node <= cutoff)
164 | 			goto out;
165 | 		matchptr = &in_base[cur_node];
166 | 		if (load_u32_unaligned(matchptr) == seq) {
167 | 			best_len = lz_extend(in_next, matchptr, 4, max_len);
168 | 			best_matchptr = matchptr;
169 | 		}
170 | 	}
171 | #else
172 | 	/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
173 | 	to_insert = cur_pos;
174 | 	for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
175 | 		cur_node = mf->hash_tab[hash][i];
176 | 		mf->hash_tab[hash][i] = to_insert;
177 | 		if (cur_node <= cutoff)
178 | 			goto out;
179 | 		matchptr = &in_base[cur_node];
180 | 		if (load_u32_unaligned(matchptr) == seq) {
181 | 			len = lz_extend(in_next, matchptr, 4, max_len);
182 | 			if (len > best_len) {
183 | 				best_len = len;
184 | 				best_matchptr = matchptr;
185 | 				if (best_len >= nice_len)
186 | 					goto out;
187 | 			}
188 | 		}
189 | 		to_insert = cur_node;
190 | 	}
191 | #endif
192 | out:
193 | 	*offset_ret = in_next - best_matchptr;
194 | 	return best_len;
195 | }
196 | 
197 | static forceinline void
198 | ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
199 | 			  const u8 ** const in_base_p,
200 | 			  const u8 *in_next,
201 | 			  const u8 * const in_end,
202 | 			  const u32 count,
203 | 			  u32 * const next_hash)
204 | {
205 | 	s32 cur_pos = in_next - *in_base_p;
206 | 	u32 hash;
207 | 	u32 remaining = count;
208 | 	int i;
209 | 
210 | 	if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
211 | 		return;
212 | 
213 | 	if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
214 | 		ht_matchfinder_slide_window(mf);
215 | 		*in_base_p += MATCHFINDER_WINDOW_SIZE;
216 | 		cur_pos -= MATCHFINDER_WINDOW_SIZE;
217 | 	}
218 | 
219 | 	hash = *next_hash;
220 | 	do {
221 | 		for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
222 | 			mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
223 | 		mf->hash_tab[hash][0] = cur_pos;
224 | 
225 | 		hash = lz_hash(get_unaligned_le32(++in_next),
226 | 			       HT_MATCHFINDER_HASH_ORDER);
227 | 		cur_pos++;
228 | 	} while (--remaining);
229 | 
230 | 	prefetchw(&mf->hash_tab[hash]);
231 | 	*next_hash = hash;
232 | }
233 | 
234 | #endif /* LIB_HT_MATCHFINDER_H */
235 | 


--------------------------------------------------------------------------------
/lib/lib_common.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lib_common.h - internal header included by all library code
  3 |  */
  4 | 
  5 | #ifndef LIB_LIB_COMMON_H
  6 | #define LIB_LIB_COMMON_H
  7 | 
  8 | #ifdef LIBDEFLATE_H
  9 |  /*
 10 |   * When building the library, LIBDEFLATEAPI needs to be defined properly before
 11 |   * including libdeflate.h.
 12 |   */
 13 | #  error "lib_common.h must always be included before libdeflate.h"
 14 | #endif
 15 | 
 16 | #if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
 17 | #  define LIBDEFLATE_EXPORT_SYM  __declspec(dllexport)
 18 | #elif defined(__GNUC__)
 19 | #  define LIBDEFLATE_EXPORT_SYM  __attribute__((visibility("default")))
 20 | #else
 21 | #  define LIBDEFLATE_EXPORT_SYM
 22 | #endif
 23 | 
 24 | /*
 25 |  * On i386, gcc assumes that the stack is 16-byte aligned at function entry.
 26 |  * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
 27 |  * only guarantee 4-byte alignment when calling functions.  This is mainly an
 28 |  * issue on Windows, but it has been seen on Linux too.  Work around this ABI
 29 |  * incompatibility by realigning the stack pointer when entering libdeflate.
 30 |  * This prevents crashes in SSE/AVX code.
 31 |  */
 32 | #if defined(__GNUC__) && defined(__i386__)
 33 | #  define LIBDEFLATE_ALIGN_STACK  __attribute__((force_align_arg_pointer))
 34 | #else
 35 | #  define LIBDEFLATE_ALIGN_STACK
 36 | #endif
 37 | 
 38 | #define LIBDEFLATEAPI	LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
 39 | 
 40 | #include "../common_defs.h"
 41 | 
 42 | typedef void *(*malloc_func_t)(size_t);
 43 | typedef void (*free_func_t)(void *);
 44 | 
 45 | extern malloc_func_t libdeflate_default_malloc_func;
 46 | extern free_func_t libdeflate_default_free_func;
 47 | 
 48 | void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
 49 | 				size_t alignment, size_t size);
 50 | void libdeflate_aligned_free(free_func_t free_func, void *ptr);
 51 | 
 52 | #ifdef FREESTANDING
 53 | /*
 54 |  * With -ffreestanding, <string.h> may be missing, and we must provide
 55 |  * implementations of memset(), memcpy(), memmove(), and memcmp().
 56 |  * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
 57 |  *
 58 |  * Also, -ffreestanding disables interpreting calls to these functions as
 59 |  * built-ins.  E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
 60 |  * not be optimized to a single load instruction.  For performance reasons we
 61 |  * don't want that.  So, declare these functions as macros that expand to the
 62 |  * corresponding built-ins.  This approach is recommended in the gcc man page.
 63 |  * We still need the actual function definitions in case gcc calls them.
 64 |  */
 65 | void *memset(void *s, int c, size_t n);
 66 | #define memset(s, c, n)		__builtin_memset((s), (c), (n))
 67 | 
 68 | void *memcpy(void *dest, const void *src, size_t n);
 69 | #define memcpy(dest, src, n)	__builtin_memcpy((dest), (src), (n))
 70 | 
 71 | void *memmove(void *dest, const void *src, size_t n);
 72 | #define memmove(dest, src, n)	__builtin_memmove((dest), (src), (n))
 73 | 
 74 | int memcmp(const void *s1, const void *s2, size_t n);
 75 | #define memcmp(s1, s2, n)	__builtin_memcmp((s1), (s2), (n))
 76 | 
 77 | #undef LIBDEFLATE_ENABLE_ASSERTIONS
 78 | #else
 79 | #  include <string.h>
 80 |    /*
 81 |     * To prevent false positive static analyzer warnings, ensure that assertions
 82 |     * are visible to the static analyzer.
 83 |     */
 84 | #  ifdef __clang_analyzer__
 85 | #    define LIBDEFLATE_ENABLE_ASSERTIONS
 86 | #  endif
 87 | #endif
 88 | 
 89 | /*
 90 |  * Runtime assertion support.  Don't enable this in production builds; it may
 91 |  * hurt performance significantly.
 92 |  */
 93 | #ifdef LIBDEFLATE_ENABLE_ASSERTIONS
 94 | NORETURN void
 95 | libdeflate_assertion_failed(const char *expr, const char *file, int line);
 96 | #define ASSERT(expr) { if (unlikely(!(expr))) \
 97 | 	libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
 98 | #else
 99 | #define ASSERT(expr) (void)(expr)
100 | #endif
101 | 
102 | #define CONCAT_IMPL(a, b)	a##b
103 | #define CONCAT(a, b)		CONCAT_IMPL(a, b)
104 | #define ADD_SUFFIX(name)	CONCAT(name, SUFFIX)
105 | 
106 | #endif /* LIB_LIB_COMMON_H */
107 | 


--------------------------------------------------------------------------------
/lib/matchfinder_common.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * matchfinder_common.h - common code for Lempel-Ziv matchfinding
  3 |  */
  4 | 
  5 | #ifndef LIB_MATCHFINDER_COMMON_H
  6 | #define LIB_MATCHFINDER_COMMON_H
  7 | 
  8 | #include "lib_common.h"
  9 | 
 10 | #ifndef MATCHFINDER_WINDOW_ORDER
 11 | #  error "MATCHFINDER_WINDOW_ORDER must be defined!"
 12 | #endif
 13 | 
 14 | /*
 15 |  * Given a 32-bit value that was loaded with the platform's native endianness,
 16 |  * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
 17 |  * bits contain the first 3 bytes, arranged in octets in a platform-dependent
 18 |  * order, at the memory location from which the input 32-bit value was loaded.
 19 |  */
 20 | static forceinline u32
 21 | loaded_u32_to_u24(u32 v)
 22 | {
 23 | 	if (CPU_IS_LITTLE_ENDIAN())
 24 | 		return v & 0xFFFFFF;
 25 | 	else
 26 | 		return v >> 8;
 27 | }
 28 | 
 29 | /*
 30 |  * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
 31 |  * The order in which the 3 bytes will be arranged as octets in the 24 bits is
 32 |  * platform-dependent.  At least 4 bytes (not 3) must be available at @p.
 33 |  */
 34 | static forceinline u32
 35 | load_u24_unaligned(const u8 *p)
 36 | {
 37 | #if UNALIGNED_ACCESS_IS_FAST
 38 | 	return loaded_u32_to_u24(load_u32_unaligned(p));
 39 | #else
 40 | 	if (CPU_IS_LITTLE_ENDIAN())
 41 | 		return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
 42 | 	else
 43 | 		return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
 44 | #endif
 45 | }
 46 | 
 47 | #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
 48 | 
 49 | typedef s16 mf_pos_t;
 50 | 
 51 | #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
 52 | 
 53 | /*
 54 |  * This is the memory address alignment, in bytes, required for the matchfinder
 55 |  * buffers by the architecture-specific implementations of matchfinder_init()
 56 |  * and matchfinder_rebase().  "Matchfinder buffer" means an entire struct
 57 |  * hc_matchfinder, bt_matchfinder, or ht_matchfinder; the next_tab field of
 58 |  * struct hc_matchfinder; or the child_tab field of struct bt_matchfinder.
 59 |  *
 60 |  * This affects how the entire 'struct deflate_compressor' is allocated, since
 61 |  * the matchfinder structures are embedded inside it.
 62 |  *
 63 |  * Currently the maximum memory address alignment required is 32 bytes, needed
 64 |  * by the AVX-2 matchfinder functions.
 65 |  */
 66 | #define MATCHFINDER_MEM_ALIGNMENT	32
 67 | 
 68 | /*
 69 |  * This declares a size, in bytes, that is guaranteed to divide the sizes of the
 70 |  * matchfinder buffers (where "matchfinder buffers" is as defined for
 71 |  * MATCHFINDER_MEM_ALIGNMENT).  The architecture-specific implementations of
 72 |  * matchfinder_init() and matchfinder_rebase() take advantage of this value.
 73 |  *
 74 |  * Currently the maximum size alignment required is 128 bytes, needed by
 75 |  * the AVX-2 matchfinder functions.  However, the RISC-V Vector Extension
 76 |  * matchfinder functions can, in principle, take advantage of a larger size
 77 |  * alignment.  Therefore, we set this to 1024, which still easily divides the
 78 |  * actual sizes that result from the current matchfinder struct definitions.
 79 |  * This value can safely be changed to any power of two that is >= 128.
 80 |  */
 81 | #define MATCHFINDER_SIZE_ALIGNMENT	1024
 82 | 
 83 | #undef matchfinder_init
 84 | #undef matchfinder_rebase
 85 | #ifdef _aligned_attribute
 86 | #  define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
 87 | #  if defined(ARCH_ARM32) || defined(ARCH_ARM64)
 88 | #    include "arm/matchfinder_impl.h"
 89 | #  elif defined(ARCH_RISCV)
 90 | #    include "riscv/matchfinder_impl.h"
 91 | #  elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
 92 | #    include "x86/matchfinder_impl.h"
 93 | #  endif
 94 | #else
 95 | #  define MATCHFINDER_ALIGNED
 96 | #endif
 97 | 
 98 | /*
 99 |  * Initialize the hash table portion of the matchfinder.
100 |  *
101 |  * Essentially, this is an optimized memset().
102 |  *
103 |  * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
104 |  * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
105 |  */
106 | #ifndef matchfinder_init
107 | static forceinline void
108 | matchfinder_init(mf_pos_t *data, size_t size)
109 | {
110 | 	size_t num_entries = size / sizeof(*data);
111 | 	size_t i;
112 | 
113 | 	for (i = 0; i < num_entries; i++)
114 | 		data[i] = MATCHFINDER_INITVAL;
115 | }
116 | #endif
117 | 
118 | /*
119 |  * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
120 |  *
121 |  * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
122 |  * run through the matchfinder.
123 |  *
124 |  * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
125 |  * array, making the entries be relative to the current position rather than the
126 |  * position MATCHFINDER_WINDOW_SIZE bytes prior.  To avoid integer underflows,
127 |  * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
128 |  * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
129 |  *
130 |  * The given array must contain all matchfinder data that is position-relative:
131 |  * the hash table(s) as well as any hash chain or binary tree links.  Its
132 |  * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
133 |  * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
134 |  */
135 | #ifndef matchfinder_rebase
136 | static forceinline void
137 | matchfinder_rebase(mf_pos_t *data, size_t size)
138 | {
139 | 	size_t num_entries = size / sizeof(*data);
140 | 	size_t i;
141 | 
142 | 	if (MATCHFINDER_WINDOW_SIZE == 32768) {
143 | 		/*
144 | 		 * Branchless version for 32768-byte windows.  Clear all bits if
145 | 		 * the value was already negative, then set the sign bit.  This
146 | 		 * is equivalent to subtracting 32768 with signed saturation.
147 | 		 */
148 | 		for (i = 0; i < num_entries; i++)
149 | 			data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
150 | 	} else {
151 | 		for (i = 0; i < num_entries; i++) {
152 | 			if (data[i] >= 0)
153 | 				data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
154 | 			else
155 | 				data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
156 | 		}
157 | 	}
158 | }
159 | #endif
160 | 
161 | /*
162 |  * The hash function: given a sequence prefix held in the low-order bits of a
163 |  * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
164 |  * bits of the product that don't fit in a 32-bit value, but take the
165 |  * next-highest @num_bits bits of the product as the hash value, as those have
166 |  * the most randomness.
167 |  */
168 | static forceinline u32
169 | lz_hash(u32 seq, unsigned num_bits)
170 | {
171 | 	return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
172 | }
173 | 
174 | /*
175 |  * Return the number of bytes at @matchptr that match the bytes at @strptr, up
176 |  * to a maximum of @max_len.  Initially, @start_len bytes are matched.
177 |  */
178 | static forceinline u32
179 | lz_extend(const u8 * const strptr, const u8 * const matchptr,
180 | 	  const u32 start_len, const u32 max_len)
181 | {
182 | 	u32 len = start_len;
183 | 	machine_word_t v_word;
184 | 
185 | 	if (UNALIGNED_ACCESS_IS_FAST) {
186 | 
187 | 		if (likely(max_len - len >= 4 * WORDBYTES)) {
188 | 
189 | 		#define COMPARE_WORD_STEP				\
190 | 			v_word = load_word_unaligned(&matchptr[len]) ^	\
191 | 				 load_word_unaligned(&strptr[len]);	\
192 | 			if (v_word != 0)				\
193 | 				goto word_differs;			\
194 | 			len += WORDBYTES;				\
195 | 
196 | 			COMPARE_WORD_STEP
197 | 			COMPARE_WORD_STEP
198 | 			COMPARE_WORD_STEP
199 | 			COMPARE_WORD_STEP
200 | 		#undef COMPARE_WORD_STEP
201 | 		}
202 | 
203 | 		while (len + WORDBYTES <= max_len) {
204 | 			v_word = load_word_unaligned(&matchptr[len]) ^
205 | 				 load_word_unaligned(&strptr[len]);
206 | 			if (v_word != 0)
207 | 				goto word_differs;
208 | 			len += WORDBYTES;
209 | 		}
210 | 	}
211 | 
212 | 	while (len < max_len && matchptr[len] == strptr[len])
213 | 		len++;
214 | 	return len;
215 | 
216 | word_differs:
217 | 	if (CPU_IS_LITTLE_ENDIAN())
218 | 		len += (bsfw(v_word) >> 3);
219 | 	else
220 | 		len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
221 | 	return len;
222 | }
223 | 
224 | #endif /* LIB_MATCHFINDER_COMMON_H */
225 | 


--------------------------------------------------------------------------------
/lib/riscv/matchfinder_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * riscv/matchfinder_impl.h - RISC-V implementations of matchfinder functions
 3 |  *
 4 |  * Copyright 2024 Eric Biggers
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #ifndef LIB_RISCV_MATCHFINDER_IMPL_H
29 | #define LIB_RISCV_MATCHFINDER_IMPL_H
30 | 
31 | #if defined(ARCH_RISCV) && defined(__riscv_vector)
32 | #include <riscv_vector.h>
33 | 
34 | /*
35 |  * Return the maximum number of 16-bit (mf_pos_t) elements that fit in 8 RISC-V
36 |  * vector registers and also evenly divide the sizes of the matchfinder buffers.
37 |  */
38 | static forceinline size_t
39 | riscv_matchfinder_vl(void)
40 | {
41 | 	const size_t vl = __riscv_vsetvlmax_e16m8();
42 | 
43 | 	STATIC_ASSERT(sizeof(mf_pos_t) == sizeof(s16));
44 | 	/*
45 | 	 * MATCHFINDER_SIZE_ALIGNMENT is a power of 2, as is 'vl' because the
46 | 	 * RISC-V Vector Extension requires that the vector register length
47 | 	 * (VLEN) be a power of 2.  Thus, a simple MIN() gives the correct
48 | 	 * answer here; rounding to a power of 2 is not required.
49 | 	 */
50 | 	STATIC_ASSERT((MATCHFINDER_SIZE_ALIGNMENT &
51 | 		       (MATCHFINDER_SIZE_ALIGNMENT - 1)) == 0);
52 | 	ASSERT((vl & (vl - 1)) == 0);
53 | 	return MIN(vl, MATCHFINDER_SIZE_ALIGNMENT / sizeof(mf_pos_t));
54 | }
55 | 
56 | /* matchfinder_init() optimized using the RISC-V Vector Extension */
57 | static forceinline void
58 | matchfinder_init_rvv(mf_pos_t *p, size_t size)
59 | {
60 | 	const size_t vl = riscv_matchfinder_vl();
61 | 	const vint16m8_t v = __riscv_vmv_v_x_i16m8(MATCHFINDER_INITVAL, vl);
62 | 
63 | 	ASSERT(size > 0 && size % (vl * sizeof(p[0])) == 0);
64 | 	do {
65 | 		__riscv_vse16_v_i16m8(p, v, vl);
66 | 		p += vl;
67 | 		size -= vl * sizeof(p[0]);
68 | 	} while (size != 0);
69 | }
70 | #define matchfinder_init matchfinder_init_rvv
71 | 
72 | /* matchfinder_rebase() optimized using the RISC-V Vector Extension */
73 | static forceinline void
74 | matchfinder_rebase_rvv(mf_pos_t *p, size_t size)
75 | {
76 | 	const size_t vl = riscv_matchfinder_vl();
77 | 
78 | 	ASSERT(size > 0 && size % (vl * sizeof(p[0])) == 0);
79 | 	do {
80 | 		vint16m8_t v = __riscv_vle16_v_i16m8(p, vl);
81 | 
82 | 		/*
83 | 		 * This should generate the vsadd.vx instruction
84 | 		 * (Vector Saturating Add, integer vector-scalar)
85 | 		 */
86 | 		v = __riscv_vsadd_vx_i16m8(v, (s16)-MATCHFINDER_WINDOW_SIZE,
87 | 					   vl);
88 | 		__riscv_vse16_v_i16m8(p, v, vl);
89 | 		p += vl;
90 | 		size -= vl * sizeof(p[0]);
91 | 	} while (size != 0);
92 | }
93 | #define matchfinder_rebase matchfinder_rebase_rvv
94 | 
95 | #endif /* ARCH_RISCV && __riscv_vector */
96 | 
97 | #endif /* LIB_RISCV_MATCHFINDER_IMPL_H */
98 | 


--------------------------------------------------------------------------------
/lib/utils.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * utils.c - utility functions for libdeflate
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "lib_common.h"
 29 | 
 30 | #ifdef FREESTANDING
 31 | #  define malloc NULL
 32 | #  define free NULL
 33 | #else
 34 | #  include <stdlib.h>
 35 | #endif
 36 | 
 37 | malloc_func_t libdeflate_default_malloc_func = malloc;
 38 | free_func_t libdeflate_default_free_func = free;
 39 | 
 40 | void *
 41 | libdeflate_aligned_malloc(malloc_func_t malloc_func,
 42 | 			  size_t alignment, size_t size)
 43 | {
 44 | 	void *ptr = (*malloc_func)(sizeof(void *) + alignment - 1 + size);
 45 | 
 46 | 	if (ptr) {
 47 | 		void *orig_ptr = ptr;
 48 | 
 49 | 		ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
 50 | 		((void **)ptr)[-1] = orig_ptr;
 51 | 	}
 52 | 	return ptr;
 53 | }
 54 | 
 55 | void
 56 | libdeflate_aligned_free(free_func_t free_func, void *ptr)
 57 | {
 58 | 	(*free_func)(((void **)ptr)[-1]);
 59 | }
 60 | 
 61 | LIBDEFLATEAPI void
 62 | libdeflate_set_memory_allocator(malloc_func_t malloc_func,
 63 | 				free_func_t free_func)
 64 | {
 65 | 	libdeflate_default_malloc_func = malloc_func;
 66 | 	libdeflate_default_free_func = free_func;
 67 | }
 68 | 
 69 | /*
 70 |  * Implementations of libc functions for freestanding library builds.
 71 |  * Normal library builds don't use these.  Not optimized yet; usually the
 72 |  * compiler expands these functions and doesn't actually call them anyway.
 73 |  */
 74 | #ifdef FREESTANDING
 75 | #undef memset
 76 | void * __attribute__((weak))
 77 | memset(void *s, int c, size_t n)
 78 | {
 79 | 	u8 *p = s;
 80 | 	size_t i;
 81 | 
 82 | 	for (i = 0; i < n; i++)
 83 | 		p[i] = c;
 84 | 	return s;
 85 | }
 86 | 
 87 | #undef memcpy
 88 | void * __attribute__((weak))
 89 | memcpy(void *dest, const void *src, size_t n)
 90 | {
 91 | 	u8 *d = dest;
 92 | 	const u8 *s = src;
 93 | 	size_t i;
 94 | 
 95 | 	for (i = 0; i < n; i++)
 96 | 		d[i] = s[i];
 97 | 	return dest;
 98 | }
 99 | 
100 | #undef memmove
101 | void * __attribute__((weak))
102 | memmove(void *dest, const void *src, size_t n)
103 | {
104 | 	u8 *d = dest;
105 | 	const u8 *s = src;
106 | 	size_t i;
107 | 
108 | 	if (d <= s)
109 | 		return memcpy(d, s, n);
110 | 
111 | 	for (i = n; i > 0; i--)
112 | 		d[i - 1] = s[i - 1];
113 | 	return dest;
114 | }
115 | 
116 | #undef memcmp
117 | int __attribute__((weak))
118 | memcmp(const void *s1, const void *s2, size_t n)
119 | {
120 | 	const u8 *p1 = s1;
121 | 	const u8 *p2 = s2;
122 | 	size_t i;
123 | 
124 | 	for (i = 0; i < n; i++) {
125 | 		if (p1[i] != p2[i])
126 | 			return (int)p1[i] - (int)p2[i];
127 | 	}
128 | 	return 0;
129 | }
130 | #endif /* FREESTANDING */
131 | 
132 | #ifdef LIBDEFLATE_ENABLE_ASSERTIONS
133 | #include <stdio.h>
134 | #include <stdlib.h>
135 | NORETURN void
136 | libdeflate_assertion_failed(const char *expr, const char *file, int line)
137 | {
138 | 	fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line);
139 | 	abort();
140 | }
141 | #endif /* LIBDEFLATE_ENABLE_ASSERTIONS */
142 | 


--------------------------------------------------------------------------------
/lib/x86/adler32_impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #ifndef LIB_X86_ADLER32_IMPL_H
 29 | #define LIB_X86_ADLER32_IMPL_H
 30 | 
 31 | #include "cpu_features.h"
 32 | 
 33 | /* SSE2 and AVX2 implementations.  Used on older CPUs. */
 34 | #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
 35 | #  define adler32_x86_sse2	adler32_x86_sse2
 36 | #  define SUFFIX			   _sse2
 37 | #  define ATTRIBUTES		_target_attribute("sse2")
 38 | #  define VL			16
 39 | #  define USE_VNNI		0
 40 | #  define USE_AVX512		0
 41 | #  include "adler32_template.h"
 42 | 
 43 | #  define adler32_x86_avx2	adler32_x86_avx2
 44 | #  define SUFFIX			   _avx2
 45 | #  define ATTRIBUTES		_target_attribute("avx2")
 46 | #  define VL			32
 47 | #  define USE_VNNI		0
 48 | #  define USE_AVX512		0
 49 | #  include "adler32_template.h"
 50 | #endif
 51 | 
 52 | /*
 53 |  * AVX-VNNI implementation.  This is used on CPUs that have AVX2 and AVX-VNNI
 54 |  * but don't have AVX-512, for example Intel Alder Lake.
 55 |  *
 56 |  * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
 57 |  * intrinsics (in gcc 11.1) slightly before binutils added support for
 58 |  * assembling AVX-VNNI instructions (in binutils 2.36).  Distros can reasonably
 59 |  * have gcc 11 with binutils 2.35.  Because of this issue, we check for gcc 12
 60 |  * instead of gcc 11.  (libdeflate supports direct compilation without a
 61 |  * configure step, so checking the binutils version is not always an option.)
 62 |  */
 63 | #if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
 64 | 	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
 65 | #  define adler32_x86_avx2_vnni	adler32_x86_avx2_vnni
 66 | #  define SUFFIX			   _avx2_vnni
 67 | #  define ATTRIBUTES		_target_attribute("avx2,avxvnni")
 68 | #  define VL			32
 69 | #  define USE_VNNI		1
 70 | #  define USE_AVX512		0
 71 | #  include "adler32_template.h"
 72 | #endif
 73 | 
 74 | #if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
 75 | 	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
 76 | /*
 77 |  * AVX512VNNI implementation using 256-bit vectors.  This is very similar to the
 78 |  * AVX-VNNI implementation but takes advantage of masking and more registers.
 79 |  * This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
 80 |  * Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
 81 |  * registers are used.
 82 |  */
 83 | #  define adler32_x86_avx512_vl256_vnni	adler32_x86_avx512_vl256_vnni
 84 | #  define SUFFIX				   _avx512_vl256_vnni
 85 | #  define ATTRIBUTES		_target_attribute("avx512bw,avx512vl,avx512vnni")
 86 | #  define VL			32
 87 | #  define USE_VNNI		1
 88 | #  define USE_AVX512		1
 89 | #  include "adler32_template.h"
 90 | 
 91 | /*
 92 |  * AVX512VNNI implementation using 512-bit vectors.  This is used on CPUs that
 93 |  * have a good AVX-512 implementation including AVX512VNNI.
 94 |  */
 95 | #  define adler32_x86_avx512_vl512_vnni	adler32_x86_avx512_vl512_vnni
 96 | #  define SUFFIX				   _avx512_vl512_vnni
 97 | #  define ATTRIBUTES		_target_attribute("avx512bw,avx512vnni")
 98 | #  define VL			64
 99 | #  define USE_VNNI		1
100 | #  define USE_AVX512		1
101 | #  include "adler32_template.h"
102 | #endif
103 | 
104 | static inline adler32_func_t
105 | arch_select_adler32_func(void)
106 | {
107 | 	const u32 features MAYBE_UNUSED = get_x86_cpu_features();
108 | 
109 | #ifdef adler32_x86_avx512_vl512_vnni
110 | 	if ((features & X86_CPU_FEATURE_ZMM) &&
111 | 	    HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
112 | 		return adler32_x86_avx512_vl512_vnni;
113 | #endif
114 | #ifdef adler32_x86_avx512_vl256_vnni
115 | 	if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
116 | 	    HAVE_AVX512VNNI(features))
117 | 		return adler32_x86_avx512_vl256_vnni;
118 | #endif
119 | #ifdef adler32_x86_avx2_vnni
120 | 	if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
121 | 		return adler32_x86_avx2_vnni;
122 | #endif
123 | #ifdef adler32_x86_avx2
124 | 	if (HAVE_AVX2(features))
125 | 		return adler32_x86_avx2;
126 | #endif
127 | #ifdef adler32_x86_sse2
128 | 	if (HAVE_SSE2(features))
129 | 		return adler32_x86_sse2;
130 | #endif
131 | 	return NULL;
132 | }
133 | #define arch_select_adler32_func	arch_select_adler32_func
134 | 
135 | #endif /* LIB_X86_ADLER32_IMPL_H */
136 | 


--------------------------------------------------------------------------------
/lib/x86/cpu_features.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * x86/cpu_features.c - feature detection for x86 CPUs
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "../cpu_features_common.h" /* must be included first */
 29 | #include "cpu_features.h"
 30 | 
 31 | #ifdef X86_CPU_FEATURES_KNOWN
 32 | /* Runtime x86 CPU feature detection is supported. */
 33 | 
 34 | /* Execute the CPUID instruction. */
 35 | static inline void
 36 | cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
 37 | {
 38 | #ifdef _MSC_VER
 39 | 	int result[4];
 40 | 
 41 | 	__cpuidex(result, leaf, subleaf);
 42 | 	*a = result[0];
 43 | 	*b = result[1];
 44 | 	*c = result[2];
 45 | 	*d = result[3];
 46 | #else
 47 | 	__asm__ volatile("cpuid" : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
 48 | 			 : "a" (leaf), "c" (subleaf));
 49 | #endif
 50 | }
 51 | 
 52 | /* Read an extended control register. */
 53 | static inline u64
 54 | read_xcr(u32 index)
 55 | {
 56 | #ifdef _MSC_VER
 57 | 	return _xgetbv(index);
 58 | #else
 59 | 	u32 d, a;
 60 | 
 61 | 	/*
 62 | 	 * Execute the "xgetbv" instruction.  Old versions of binutils do not
 63 | 	 * recognize this instruction, so list the raw bytes instead.
 64 | 	 *
 65 | 	 * This must be 'volatile' to prevent this code from being moved out
 66 | 	 * from under the check for OSXSAVE.
 67 | 	 */
 68 | 	__asm__ volatile(".byte 0x0f, 0x01, 0xd0" :
 69 | 			 "=d" (d), "=a" (a) : "c" (index));
 70 | 
 71 | 	return ((u64)d << 32) | a;
 72 | #endif
 73 | }
 74 | 
 75 | static const struct cpu_feature x86_cpu_feature_table[] = {
 76 | 	{X86_CPU_FEATURE_SSE2,		"sse2"},
 77 | 	{X86_CPU_FEATURE_PCLMULQDQ,	"pclmulqdq"},
 78 | 	{X86_CPU_FEATURE_AVX,		"avx"},
 79 | 	{X86_CPU_FEATURE_AVX2,		"avx2"},
 80 | 	{X86_CPU_FEATURE_BMI2,		"bmi2"},
 81 | 	{X86_CPU_FEATURE_ZMM,		"zmm"},
 82 | 	{X86_CPU_FEATURE_AVX512BW,	"avx512bw"},
 83 | 	{X86_CPU_FEATURE_AVX512VL,	"avx512vl"},
 84 | 	{X86_CPU_FEATURE_VPCLMULQDQ,	"vpclmulqdq"},
 85 | 	{X86_CPU_FEATURE_AVX512VNNI,	"avx512_vnni"},
 86 | 	{X86_CPU_FEATURE_AVXVNNI,	"avx_vnni"},
 87 | };
 88 | 
 89 | volatile u32 libdeflate_x86_cpu_features = 0;
 90 | 
 91 | static inline bool
 92 | os_supports_avx512(u64 xcr0)
 93 | {
 94 | #ifdef __APPLE__
 95 | 	/*
 96 | 	 * The Darwin kernel had a bug where it could corrupt the opmask
 97 | 	 * registers.  See
 98 | 	 * https://community.intel.com/t5/Software-Tuning-Performance/MacOS-Darwin-kernel-bug-clobbers-AVX-512-opmask-register-state/m-p/1327259
 99 | 	 * Darwin also does not initially set the XCR0 bits for AVX512, but they
100 | 	 * are set if the thread tries to use AVX512 anyway.  Thus, to safely
101 | 	 * and consistently use AVX512 on macOS we'd need to check the kernel
102 | 	 * version as well as detect AVX512 support using a macOS-specific
103 | 	 * method.  We don't bother with this, especially given Apple's
104 | 	 * transition to arm64.
105 | 	 */
106 | 	return false;
107 | #else
108 | 	return (xcr0 & 0xe6) == 0xe6;
109 | #endif
110 | }
111 | 
112 | /*
113 |  * Don't use 512-bit vectors (ZMM registers) on Intel CPUs before Rocket Lake
114 |  * and Sapphire Rapids, due to the overly-eager downclocking which can reduce
115 |  * the performance of workloads that use ZMM registers only occasionally.
116 |  */
117 | static inline bool
118 | allow_512bit_vectors(const u32 manufacturer[3], u32 family, u32 model)
119 | {
120 | #ifdef TEST_SUPPORT__DO_NOT_USE
121 | 	return true;
122 | #endif
123 | 	if (memcmp(manufacturer, "GenuineIntel", 12) != 0)
124 | 		return true;
125 | 	if (family != 6)
126 | 		return true;
127 | 	switch (model) {
128 | 	case 85: /* Skylake (Server), Cascade Lake, Cooper Lake */
129 | 	case 106: /* Ice Lake (Server) */
130 | 	case 108: /* Ice Lake (Server) */
131 | 	case 126: /* Ice Lake (Client) */
132 | 	case 140: /* Tiger Lake */
133 | 	case 141: /* Tiger Lake */
134 | 		return false;
135 | 	}
136 | 	return true;
137 | }
138 | 
139 | /* Initialize libdeflate_x86_cpu_features. */
140 | void libdeflate_init_x86_cpu_features(void)
141 | {
142 | 	u32 max_leaf;
143 | 	u32 manufacturer[3];
144 | 	u32 family, model;
145 | 	u32 a, b, c, d;
146 | 	u64 xcr0 = 0;
147 | 	u32 features = 0;
148 | 
149 | 	/* EAX=0: Highest Function Parameter and Manufacturer ID */
150 | 	cpuid(0, 0, &max_leaf, &manufacturer[0], &manufacturer[2],
151 | 	      &manufacturer[1]);
152 | 	if (max_leaf < 1)
153 | 		goto out;
154 | 
155 | 	/* EAX=1: Processor Info and Feature Bits */
156 | 	cpuid(1, 0, &a, &b, &c, &d);
157 | 	family = (a >> 8) & 0xf;
158 | 	model = (a >> 4) & 0xf;
159 | 	if (family == 6 || family == 0xf)
160 | 		model += (a >> 12) & 0xf0;
161 | 	if (family == 0xf)
162 | 		family += (a >> 20) & 0xff;
163 | 	if (d & (1 << 26))
164 | 		features |= X86_CPU_FEATURE_SSE2;
165 | 	/*
166 | 	 * No known CPUs have pclmulqdq without sse4.1, so in practice code
167 | 	 * targeting pclmulqdq can use sse4.1 instructions.  But to be safe,
168 | 	 * explicitly check for both the pclmulqdq and sse4.1 bits.
169 | 	 */
170 | 	if ((c & (1 << 1)) && (c & (1 << 19)))
171 | 		features |= X86_CPU_FEATURE_PCLMULQDQ;
172 | 	if (c & (1 << 27))
173 | 		xcr0 = read_xcr(0);
174 | 	if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
175 | 		features |= X86_CPU_FEATURE_AVX;
176 | 
177 | 	if (max_leaf < 7)
178 | 		goto out;
179 | 
180 | 	/* EAX=7, ECX=0: Extended Features */
181 | 	cpuid(7, 0, &a, &b, &c, &d);
182 | 	if (b & (1 << 8))
183 | 		features |= X86_CPU_FEATURE_BMI2;
184 | 	if ((xcr0 & 0x6) == 0x6) {
185 | 		if (b & (1 << 5))
186 | 			features |= X86_CPU_FEATURE_AVX2;
187 | 		if (c & (1 << 10))
188 | 			features |= X86_CPU_FEATURE_VPCLMULQDQ;
189 | 	}
190 | 	if (os_supports_avx512(xcr0)) {
191 | 		if (allow_512bit_vectors(manufacturer, family, model))
192 | 			features |= X86_CPU_FEATURE_ZMM;
193 | 		if (b & (1 << 30))
194 | 			features |= X86_CPU_FEATURE_AVX512BW;
195 | 		if (b & (1U << 31))
196 | 			features |= X86_CPU_FEATURE_AVX512VL;
197 | 		if (c & (1 << 11))
198 | 			features |= X86_CPU_FEATURE_AVX512VNNI;
199 | 	}
200 | 
201 | 	/* EAX=7, ECX=1: Extended Features */
202 | 	cpuid(7, 1, &a, &b, &c, &d);
203 | 	if ((a & (1 << 4)) && ((xcr0 & 0x6) == 0x6))
204 | 		features |= X86_CPU_FEATURE_AVXVNNI;
205 | 
206 | out:
207 | 	disable_cpu_features_for_testing(&features, x86_cpu_feature_table,
208 | 					 ARRAY_LEN(x86_cpu_feature_table));
209 | 
210 | 	libdeflate_x86_cpu_features = features | X86_CPU_FEATURES_KNOWN;
211 | }
212 | 
213 | #endif /* X86_CPU_FEATURES_KNOWN */
214 | 


--------------------------------------------------------------------------------
/lib/x86/cpu_features.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * x86/cpu_features.h - feature detection for x86 CPUs
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #ifndef LIB_X86_CPU_FEATURES_H
 29 | #define LIB_X86_CPU_FEATURES_H
 30 | 
 31 | #include "../lib_common.h"
 32 | 
 33 | #if defined(ARCH_X86_32) || defined(ARCH_X86_64)
 34 | 
 35 | #define X86_CPU_FEATURE_SSE2		(1 << 0)
 36 | #define X86_CPU_FEATURE_PCLMULQDQ	(1 << 1)
 37 | #define X86_CPU_FEATURE_AVX		(1 << 2)
 38 | #define X86_CPU_FEATURE_AVX2		(1 << 3)
 39 | #define X86_CPU_FEATURE_BMI2		(1 << 4)
 40 | /*
 41 |  * ZMM indicates whether 512-bit vectors (zmm registers) should be used.  On
 42 |  * some CPUs, to avoid downclocking issues we don't set ZMM even if the CPU and
 43 |  * operating system support AVX-512.  On these CPUs, we may still use AVX-512
 44 |  * instructions, but only with xmm and ymm registers.
 45 |  */
 46 | #define X86_CPU_FEATURE_ZMM		(1 << 5)
 47 | #define X86_CPU_FEATURE_AVX512BW	(1 << 6)
 48 | #define X86_CPU_FEATURE_AVX512VL	(1 << 7)
 49 | #define X86_CPU_FEATURE_VPCLMULQDQ	(1 << 8)
 50 | #define X86_CPU_FEATURE_AVX512VNNI	(1 << 9)
 51 | #define X86_CPU_FEATURE_AVXVNNI		(1 << 10)
 52 | 
 53 | #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
 54 | /* Runtime x86 CPU feature detection is supported. */
 55 | #  define X86_CPU_FEATURES_KNOWN	(1U << 31)
 56 | extern volatile u32 libdeflate_x86_cpu_features;
 57 | 
 58 | void libdeflate_init_x86_cpu_features(void);
 59 | 
 60 | static inline u32 get_x86_cpu_features(void)
 61 | {
 62 | 	if (libdeflate_x86_cpu_features == 0)
 63 | 		libdeflate_init_x86_cpu_features();
 64 | 	return libdeflate_x86_cpu_features;
 65 | }
 66 | /*
 67 |  * x86 intrinsics are also supported.  Include the headers needed to use them.
 68 |  * Normally just immintrin.h suffices.  With clang in MSVC compatibility mode,
 69 |  * immintrin.h incorrectly skips including sub-headers, so include those too.
 70 |  */
 71 | #  include <immintrin.h>
 72 | #  if defined(_MSC_VER) && defined(__clang__)
 73 | #    include <tmmintrin.h>
 74 | #    include <smmintrin.h>
 75 | #    include <wmmintrin.h>
 76 | #    include <avxintrin.h>
 77 | #    include <avx2intrin.h>
 78 | #    include <avx512fintrin.h>
 79 | #    include <avx512bwintrin.h>
 80 | #    include <avx512vlintrin.h>
 81 | #    if __has_include(<avx512vlbwintrin.h>)
 82 | #      include <avx512vlbwintrin.h>
 83 | #    endif
 84 | #    if __has_include(<vpclmulqdqintrin.h>)
 85 | #      include <vpclmulqdqintrin.h>
 86 | #    endif
 87 | #    if __has_include(<avx512vnniintrin.h>)
 88 | #      include <avx512vnniintrin.h>
 89 | #    endif
 90 | #    if __has_include(<avx512vlvnniintrin.h>)
 91 | #      include <avx512vlvnniintrin.h>
 92 | #    endif
 93 | #    if __has_include(<avxvnniintrin.h>)
 94 | #      include <avxvnniintrin.h>
 95 | #    endif
 96 | #  endif
 97 | #else
 98 | static inline u32 get_x86_cpu_features(void) { return 0; }
 99 | #endif
100 | 
101 | #if defined(__SSE2__) || \
102 | 	(defined(_MSC_VER) && \
103 | 	 (defined(ARCH_X86_64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
104 | #  define HAVE_SSE2(features)		1
105 | #  define HAVE_SSE2_NATIVE		1
106 | #else
107 | #  define HAVE_SSE2(features)		((features) & X86_CPU_FEATURE_SSE2)
108 | #  define HAVE_SSE2_NATIVE		0
109 | #endif
110 | 
111 | #if (defined(__PCLMUL__) && defined(__SSE4_1__)) || \
112 | 	(defined(_MSC_VER) && defined(__AVX2__))
113 | #  define HAVE_PCLMULQDQ(features)	1
114 | #else
115 | #  define HAVE_PCLMULQDQ(features)	((features) & X86_CPU_FEATURE_PCLMULQDQ)
116 | #endif
117 | 
118 | #ifdef __AVX__
119 | #  define HAVE_AVX(features)		1
120 | #else
121 | #  define HAVE_AVX(features)		((features) & X86_CPU_FEATURE_AVX)
122 | #endif
123 | 
124 | #ifdef __AVX2__
125 | #  define HAVE_AVX2(features)		1
126 | #else
127 | #  define HAVE_AVX2(features)		((features) & X86_CPU_FEATURE_AVX2)
128 | #endif
129 | 
130 | #if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
131 | #  define HAVE_BMI2(features)		1
132 | #  define HAVE_BMI2_NATIVE		1
133 | #else
134 | #  define HAVE_BMI2(features)		((features) & X86_CPU_FEATURE_BMI2)
135 | #  define HAVE_BMI2_NATIVE		0
136 | #endif
137 | 
138 | #ifdef __AVX512BW__
139 | #  define HAVE_AVX512BW(features)	1
140 | #else
141 | #  define HAVE_AVX512BW(features)	((features) & X86_CPU_FEATURE_AVX512BW)
142 | #endif
143 | 
144 | #ifdef __AVX512VL__
145 | #  define HAVE_AVX512VL(features)	1
146 | #else
147 | #  define HAVE_AVX512VL(features)	((features) & X86_CPU_FEATURE_AVX512VL)
148 | #endif
149 | 
150 | #ifdef __VPCLMULQDQ__
151 | #  define HAVE_VPCLMULQDQ(features)	1
152 | #else
153 | #  define HAVE_VPCLMULQDQ(features)	((features) & X86_CPU_FEATURE_VPCLMULQDQ)
154 | #endif
155 | 
156 | #ifdef __AVX512VNNI__
157 | #  define HAVE_AVX512VNNI(features)	1
158 | #else
159 | #  define HAVE_AVX512VNNI(features)	((features) & X86_CPU_FEATURE_AVX512VNNI)
160 | #endif
161 | 
162 | #ifdef __AVXVNNI__
163 | #  define HAVE_AVXVNNI(features)	1
164 | #else
165 | #  define HAVE_AVXVNNI(features)	((features) & X86_CPU_FEATURE_AVXVNNI)
166 | #endif
167 | 
168 | #endif /* ARCH_X86_32 || ARCH_X86_64 */
169 | 
170 | #endif /* LIB_X86_CPU_FEATURES_H */
171 | 


--------------------------------------------------------------------------------
/lib/x86/crc32_impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * x86/crc32_impl.h - x86 implementations of the gzip CRC-32 algorithm
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #ifndef LIB_X86_CRC32_IMPL_H
 29 | #define LIB_X86_CRC32_IMPL_H
 30 | 
 31 | #include "cpu_features.h"
 32 | 
 33 | /*
 34 |  * pshufb(x, shift_tab[len..len+15]) left shifts x by 16-len bytes.
 35 |  * pshufb(x, shift_tab[len+16..len+31]) right shifts x by len bytes.
 36 |  */
 37 | static const u8 MAYBE_UNUSED shift_tab[48] = {
 38 | 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 39 | 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 40 | 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 41 | 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 42 | 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 43 | 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 44 | };
 45 | 
 46 | #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
 47 | /*
 48 |  * PCLMULQDQ implementation.  This targets PCLMULQDQ+SSE4.1, since in practice
 49 |  * all CPUs that support PCLMULQDQ also support SSE4.1.
 50 |  */
 51 | #  define crc32_x86_pclmulqdq	crc32_x86_pclmulqdq
 52 | #  define SUFFIX			 _pclmulqdq
 53 | #  define ATTRIBUTES		_target_attribute("pclmul,sse4.1")
 54 | #  define VL			16
 55 | #  define USE_AVX512		0
 56 | #  include "crc32_pclmul_template.h"
 57 | 
 58 | /*
 59 |  * PCLMULQDQ/AVX implementation.  Same as above, but this is compiled with AVX
 60 |  * enabled so that the compiler can generate VEX-coded instructions which can be
 61 |  * slightly more efficient.  It still uses 128-bit vectors.
 62 |  */
 63 | #  define crc32_x86_pclmulqdq_avx	crc32_x86_pclmulqdq_avx
 64 | #  define SUFFIX				 _pclmulqdq_avx
 65 | #  define ATTRIBUTES		_target_attribute("pclmul,avx")
 66 | #  define VL			16
 67 | #  define USE_AVX512		0
 68 | #  include "crc32_pclmul_template.h"
 69 | #endif
 70 | 
 71 | /*
 72 |  * VPCLMULQDQ/AVX2 implementation.  This is used on CPUs that have AVX2 and
 73 |  * VPCLMULQDQ but don't have AVX-512, for example Intel Alder Lake.
 74 |  *
 75 |  * Currently this can't be enabled with MSVC because MSVC has a bug where it
 76 |  * incorrectly assumes that VPCLMULQDQ implies AVX-512:
 77 |  * https://developercommunity.visualstudio.com/t/Compiler-incorrectly-assumes-VAES-and-VP/10578785
 78 |  *
 79 |  * gcc 8.1 and 8.2 had a similar bug where they assumed that
 80 |  * _mm256_clmulepi64_epi128() always needed AVX512.  It's fixed in gcc 8.3.
 81 |  *
 82 |  * _mm256_zextsi128_si256() requires gcc 10.
 83 |  */
 84 | #if (GCC_PREREQ(10, 1) || CLANG_PREREQ(6, 0, 10000000)) && \
 85 | 	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
 86 | #  define crc32_x86_vpclmulqdq_avx2	crc32_x86_vpclmulqdq_avx2
 87 | #  define SUFFIX				 _vpclmulqdq_avx2
 88 | #  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx2")
 89 | #  define VL			32
 90 | #  define USE_AVX512		0
 91 | #  include "crc32_pclmul_template.h"
 92 | #endif
 93 | 
 94 | #if (GCC_PREREQ(10, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
 95 | 	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
 96 | /*
 97 |  * VPCLMULQDQ/AVX512 implementation using 256-bit vectors.  This is very similar
 98 |  * to the VPCLMULQDQ/AVX2 implementation but takes advantage of the vpternlog
 99 |  * instruction and more registers.  This is used on certain older Intel CPUs,
100 |  * specifically Ice Lake and Tiger Lake, which support VPCLMULQDQ and AVX512 but
101 |  * downclock a bit too eagerly when ZMM registers are used.
102 |  *
103 |  * _mm256_zextsi128_si256() requires gcc 10.
104 |  */
105 | #  define crc32_x86_vpclmulqdq_avx512_vl256  crc32_x86_vpclmulqdq_avx512_vl256
106 | #  define SUFFIX				      _vpclmulqdq_avx512_vl256
107 | #  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl")
108 | #  define VL			32
109 | #  define USE_AVX512		1
110 | #  include "crc32_pclmul_template.h"
111 | 
112 | /*
113 |  * VPCLMULQDQ/AVX512 implementation using 512-bit vectors.  This is used on CPUs
114 |  * that have a good AVX-512 implementation including VPCLMULQDQ.
115 |  *
116 |  * _mm512_zextsi128_si512() requires gcc 10.
117 |  */
118 | #  define crc32_x86_vpclmulqdq_avx512_vl512  crc32_x86_vpclmulqdq_avx512_vl512
119 | #  define SUFFIX				      _vpclmulqdq_avx512_vl512
120 | #  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl")
121 | #  define VL			64
122 | #  define USE_AVX512		1
123 | #  include "crc32_pclmul_template.h"
124 | #endif
125 | 
126 | static inline crc32_func_t
127 | arch_select_crc32_func(void)
128 | {
129 | 	const u32 features MAYBE_UNUSED = get_x86_cpu_features();
130 | 
131 | #ifdef crc32_x86_vpclmulqdq_avx512_vl512
132 | 	if ((features & X86_CPU_FEATURE_ZMM) &&
133 | 	    HAVE_VPCLMULQDQ(features) && HAVE_PCLMULQDQ(features) &&
134 | 	    HAVE_AVX512BW(features) && HAVE_AVX512VL(features))
135 | 		return crc32_x86_vpclmulqdq_avx512_vl512;
136 | #endif
137 | #ifdef crc32_x86_vpclmulqdq_avx512_vl256
138 | 	if (HAVE_VPCLMULQDQ(features) && HAVE_PCLMULQDQ(features) &&
139 | 	    HAVE_AVX512BW(features) && HAVE_AVX512VL(features))
140 | 		return crc32_x86_vpclmulqdq_avx512_vl256;
141 | #endif
142 | #ifdef crc32_x86_vpclmulqdq_avx2
143 | 	if (HAVE_VPCLMULQDQ(features) && HAVE_PCLMULQDQ(features) &&
144 | 	    HAVE_AVX2(features))
145 | 		return crc32_x86_vpclmulqdq_avx2;
146 | #endif
147 | #ifdef crc32_x86_pclmulqdq_avx
148 | 	if (HAVE_PCLMULQDQ(features) && HAVE_AVX(features))
149 | 		return crc32_x86_pclmulqdq_avx;
150 | #endif
151 | #ifdef crc32_x86_pclmulqdq
152 | 	if (HAVE_PCLMULQDQ(features))
153 | 		return crc32_x86_pclmulqdq;
154 | #endif
155 | 	return NULL;
156 | }
157 | #define arch_select_crc32_func	arch_select_crc32_func
158 | 
159 | #endif /* LIB_X86_CRC32_IMPL_H */
160 | 


--------------------------------------------------------------------------------
/lib/x86/decompress_impl.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIB_X86_DECOMPRESS_IMPL_H
 2 | #define LIB_X86_DECOMPRESS_IMPL_H
 3 | 
 4 | #include "cpu_features.h"
 5 | 
 6 | /*
 7 |  * BMI2 optimized decompression function.
 8 |  *
 9 |  * With gcc and clang we just compile the whole function with
10 |  * __attribute__((target("bmi2"))), and the compiler uses bmi2 automatically.
11 |  *
12 |  * With MSVC, there is no target function attribute, but it's still possible to
13 |  * use bmi2 intrinsics explicitly.  Currently we mostly don't, but there's a
14 |  * case in which we do (see below), so we at least take advantage of that.
15 |  * However, MSVC from VS2017 (toolset v141) apparently miscompiles the _bzhi_*()
16 |  * intrinsics.  It seems to be fixed in VS2022.  Hence, use MSVC_PREREQ(1930).
17 |  */
18 | #if defined(__GNUC__) || defined(__clang__) || MSVC_PREREQ(1930)
19 | #  define deflate_decompress_bmi2	deflate_decompress_bmi2
20 | #  define FUNCNAME			deflate_decompress_bmi2
21 | #  define ATTRIBUTES			_target_attribute("bmi2")
22 |    /*
23 |     * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the
24 |     * bzhi instruction for 'word & BITMASK(count)'.  So use the bzhi intrinsic
25 |     * explicitly.  EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)';
26 |     * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'.
27 |     * Nevertheless, their implementation using the bzhi intrinsic is identical,
28 |     * as the bzhi instruction truncates the count to 8 bits implicitly.
29 |     */
30 | #  ifndef __clang__
31 | #    ifdef ARCH_X86_64
32 | #      define EXTRACT_VARBITS(word, count)  _bzhi_u64((word), (count))
33 | #      define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count))
34 | #    else
35 | #      define EXTRACT_VARBITS(word, count)  _bzhi_u32((word), (count))
36 | #      define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count))
37 | #    endif
38 | #  endif
39 | #  include "../decompress_template.h"
40 | #endif
41 | 
42 | #if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE
43 | #define DEFAULT_IMPL	deflate_decompress_bmi2
44 | #else
45 | static inline decompress_func_t
46 | arch_select_decompress_func(void)
47 | {
48 | #ifdef deflate_decompress_bmi2
49 | 	if (HAVE_BMI2(get_x86_cpu_features()))
50 | 		return deflate_decompress_bmi2;
51 | #endif
52 | 	return NULL;
53 | }
54 | #define arch_select_decompress_func	arch_select_decompress_func
55 | #endif
56 | 
57 | #endif /* LIB_X86_DECOMPRESS_IMPL_H */
58 | 


--------------------------------------------------------------------------------
/lib/x86/matchfinder_impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * x86/matchfinder_impl.h - x86 implementations of matchfinder functions
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #ifndef LIB_X86_MATCHFINDER_IMPL_H
 29 | #define LIB_X86_MATCHFINDER_IMPL_H
 30 | 
 31 | #include "cpu_features.h"
 32 | 
 33 | #ifdef __AVX2__
 34 | static forceinline void
 35 | matchfinder_init_avx2(mf_pos_t *data, size_t size)
 36 | {
 37 | 	__m256i *p = (__m256i *)data;
 38 | 	__m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
 39 | 
 40 | 	STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
 41 | 	STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
 42 | 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
 43 | 
 44 | 	do {
 45 | 		p[0] = v;
 46 | 		p[1] = v;
 47 | 		p[2] = v;
 48 | 		p[3] = v;
 49 | 		p += 4;
 50 | 		size -= 4 * sizeof(*p);
 51 | 	} while (size != 0);
 52 | }
 53 | #define matchfinder_init matchfinder_init_avx2
 54 | 
 55 | static forceinline void
 56 | matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
 57 | {
 58 | 	__m256i *p = (__m256i *)data;
 59 | 	__m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
 60 | 
 61 | 	STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
 62 | 	STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
 63 | 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
 64 | 
 65 | 	do {
 66 | 		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
 67 | 		p[0] = _mm256_adds_epi16(p[0], v);
 68 | 		p[1] = _mm256_adds_epi16(p[1], v);
 69 | 		p[2] = _mm256_adds_epi16(p[2], v);
 70 | 		p[3] = _mm256_adds_epi16(p[3], v);
 71 | 		p += 4;
 72 | 		size -= 4 * sizeof(*p);
 73 | 	} while (size != 0);
 74 | }
 75 | #define matchfinder_rebase matchfinder_rebase_avx2
 76 | 
 77 | #elif HAVE_SSE2_NATIVE
 78 | static forceinline void
 79 | matchfinder_init_sse2(mf_pos_t *data, size_t size)
 80 | {
 81 | 	__m128i *p = (__m128i *)data;
 82 | 	__m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
 83 | 
 84 | 	STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
 85 | 	STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
 86 | 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
 87 | 
 88 | 	do {
 89 | 		p[0] = v;
 90 | 		p[1] = v;
 91 | 		p[2] = v;
 92 | 		p[3] = v;
 93 | 		p += 4;
 94 | 		size -= 4 * sizeof(*p);
 95 | 	} while (size != 0);
 96 | }
 97 | #define matchfinder_init matchfinder_init_sse2
 98 | 
 99 | static forceinline void
100 | matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
101 | {
102 | 	__m128i *p = (__m128i *)data;
103 | 	__m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
104 | 
105 | 	STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
106 | 	STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
107 | 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
108 | 
109 | 	do {
110 | 		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
111 | 		p[0] = _mm_adds_epi16(p[0], v);
112 | 		p[1] = _mm_adds_epi16(p[1], v);
113 | 		p[2] = _mm_adds_epi16(p[2], v);
114 | 		p[3] = _mm_adds_epi16(p[3], v);
115 | 		p += 4;
116 | 		size -= 4 * sizeof(*p);
117 | 	} while (size != 0);
118 | }
119 | #define matchfinder_rebase matchfinder_rebase_sse2
120 | #endif /* HAVE_SSE2_NATIVE */
121 | 
122 | #endif /* LIB_X86_MATCHFINDER_IMPL_H */
123 | 


--------------------------------------------------------------------------------
/lib/zlib_compress.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * zlib_compress.c - compress with a zlib wrapper
 3 |  *
 4 |  * Copyright 2016 Eric Biggers
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #include "deflate_compress.h"
29 | #include "zlib_constants.h"
30 | 
31 | LIBDEFLATEAPI size_t
32 | libdeflate_zlib_compress(struct libdeflate_compressor *c,
33 | 			 const void *in, size_t in_nbytes,
34 | 			 void *out, size_t out_nbytes_avail)
35 | {
36 | 	u8 *out_next = out;
37 | 	u16 hdr;
38 | 	unsigned compression_level;
39 | 	unsigned level_hint;
40 | 	size_t deflate_size;
41 | 
42 | 	if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
43 | 		return 0;
44 | 
45 | 	/* 2 byte header: CMF and FLG  */
46 | 	hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12);
47 | 	compression_level = libdeflate_get_compression_level(c);
48 | 	if (compression_level < 2)
49 | 		level_hint = ZLIB_FASTEST_COMPRESSION;
50 | 	else if (compression_level < 6)
51 | 		level_hint = ZLIB_FAST_COMPRESSION;
52 | 	else if (compression_level < 8)
53 | 		level_hint = ZLIB_DEFAULT_COMPRESSION;
54 | 	else
55 | 		level_hint = ZLIB_SLOWEST_COMPRESSION;
56 | 	hdr |= level_hint << 6;
57 | 	hdr |= 31 - (hdr % 31);
58 | 
59 | 	put_unaligned_be16(hdr, out_next);
60 | 	out_next += 2;
61 | 
62 | 	/* Compressed data  */
63 | 	deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
64 | 					out_nbytes_avail - ZLIB_MIN_OVERHEAD);
65 | 	if (deflate_size == 0)
66 | 		return 0;
67 | 	out_next += deflate_size;
68 | 
69 | 	/* ADLER32  */
70 | 	put_unaligned_be32(libdeflate_adler32(1, in, in_nbytes), out_next);
71 | 	out_next += 4;
72 | 
73 | 	return out_next - (u8 *)out;
74 | }
75 | 
76 | LIBDEFLATEAPI size_t
77 | libdeflate_zlib_compress_bound(struct libdeflate_compressor *c,
78 | 			       size_t in_nbytes)
79 | {
80 | 	return ZLIB_MIN_OVERHEAD +
81 | 	       libdeflate_deflate_compress_bound(c, in_nbytes);
82 | }
83 | 


--------------------------------------------------------------------------------
/lib/zlib_constants.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * zlib_constants.h - constants for the zlib wrapper format
 3 |  */
 4 | 
 5 | #ifndef LIB_ZLIB_CONSTANTS_H
 6 | #define LIB_ZLIB_CONSTANTS_H
 7 | 
 8 | #define ZLIB_MIN_HEADER_SIZE	2
 9 | #define ZLIB_FOOTER_SIZE	4
10 | #define ZLIB_MIN_OVERHEAD	(ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
11 | 
12 | #define ZLIB_CM_DEFLATE		8
13 | 
14 | #define ZLIB_CINFO_32K_WINDOW	7
15 | 
16 | #define ZLIB_FASTEST_COMPRESSION	0
17 | #define ZLIB_FAST_COMPRESSION		1
18 | #define ZLIB_DEFAULT_COMPRESSION	2
19 | #define ZLIB_SLOWEST_COMPRESSION	3
20 | 
21 | #endif /* LIB_ZLIB_CONSTANTS_H */
22 | 


--------------------------------------------------------------------------------
/lib/zlib_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * zlib_decompress.c - decompress with a zlib wrapper
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "lib_common.h"
 29 | #include "zlib_constants.h"
 30 | 
 31 | LIBDEFLATEAPI enum libdeflate_result
 32 | libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *d,
 33 | 			      const void *in, size_t in_nbytes,
 34 | 			      void *out, size_t out_nbytes_avail,
 35 | 			      size_t *actual_in_nbytes_ret,
 36 | 			      size_t *actual_out_nbytes_ret)
 37 | {
 38 | 	const u8 *in_next = in;
 39 | 	const u8 * const in_end = in_next + in_nbytes;
 40 | 	u16 hdr;
 41 | 	size_t actual_in_nbytes;
 42 | 	size_t actual_out_nbytes;
 43 | 	enum libdeflate_result result;
 44 | 
 45 | 	if (in_nbytes < ZLIB_MIN_OVERHEAD)
 46 | 		return LIBDEFLATE_BAD_DATA;
 47 | 
 48 | 	/* 2 byte header: CMF and FLG  */
 49 | 	hdr = get_unaligned_be16(in_next);
 50 | 	in_next += 2;
 51 | 
 52 | 	/* FCHECK */
 53 | 	if ((hdr % 31) != 0)
 54 | 		return LIBDEFLATE_BAD_DATA;
 55 | 
 56 | 	/* CM */
 57 | 	if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE)
 58 | 		return LIBDEFLATE_BAD_DATA;
 59 | 
 60 | 	/* CINFO */
 61 | 	if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW)
 62 | 		return LIBDEFLATE_BAD_DATA;
 63 | 
 64 | 	/* FDICT */
 65 | 	if ((hdr >> 5) & 1)
 66 | 		return LIBDEFLATE_BAD_DATA;
 67 | 
 68 | 	/* Compressed data  */
 69 | 	result = libdeflate_deflate_decompress_ex(d, in_next,
 70 | 					in_end - ZLIB_FOOTER_SIZE - in_next,
 71 | 					out, out_nbytes_avail,
 72 | 					&actual_in_nbytes, actual_out_nbytes_ret);
 73 | 	if (result != LIBDEFLATE_SUCCESS)
 74 | 		return result;
 75 | 
 76 | 	if (actual_out_nbytes_ret)
 77 | 		actual_out_nbytes = *actual_out_nbytes_ret;
 78 | 	else
 79 | 		actual_out_nbytes = out_nbytes_avail;
 80 | 
 81 | 	in_next += actual_in_nbytes;
 82 | 
 83 | 	/* ADLER32  */
 84 | 	if (libdeflate_adler32(1, out, actual_out_nbytes) !=
 85 | 	    get_unaligned_be32(in_next))
 86 | 		return LIBDEFLATE_BAD_DATA;
 87 | 	in_next += 4;
 88 | 
 89 | 	if (actual_in_nbytes_ret)
 90 | 		*actual_in_nbytes_ret = in_next - (u8 *)in;
 91 | 
 92 | 	return LIBDEFLATE_SUCCESS;
 93 | }
 94 | 
 95 | LIBDEFLATEAPI enum libdeflate_result
 96 | libdeflate_zlib_decompress(struct libdeflate_decompressor *d,
 97 | 			   const void *in, size_t in_nbytes,
 98 | 			   void *out, size_t out_nbytes_avail,
 99 | 			   size_t *actual_out_nbytes_ret)
100 | {
101 | 	return libdeflate_zlib_decompress_ex(d, in, in_nbytes,
102 | 					     out, out_nbytes_avail,
103 | 					     NULL, actual_out_nbytes_ret);
104 | }
105 | 


--------------------------------------------------------------------------------
/libdeflate-config.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 | 
3 | include("${CMAKE_CURRENT_LIST_DIR}/libdeflate-targets.cmake")
4 | 


--------------------------------------------------------------------------------
/libdeflate.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@CMAKE_INSTALL_PREFIX@
 2 | exec_prefix=${prefix}
 3 | includedir=@CMAKE_PKGCONFIG_INCLUDEDIR@
 4 | libdir=@CMAKE_PKGCONFIG_LIBDIR@
 5 | 
 6 | Name: libdeflate
 7 | Description: Fast implementation of DEFLATE, zlib, and gzip
 8 | Version: @PROJECT_VERSION@
 9 | Libs: -L${libdir} -ldeflate
10 | Cflags: -I${includedir}
11 | 
12 | # Note: this library's public header allows LIBDEFLATE_DLL to be defined when
13 | # linking to the DLL on Windows, to make __declspec(dllimport) be used.
14 | # However, the only way to define a shared-library-only flag in a pkgconfig file
15 | # is to use the weird workaround of unconditionally defining it in Cflags, then
16 | # undefining it in Cflags.private.  Just don't bother with this, since
17 | # __declspec(dllimport) is optional anyway.  It is a very minor performance
18 | # optimization that is irrelevant for most use cases of libdeflate.
19 | 


--------------------------------------------------------------------------------
/programs/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | include(CheckSymbolExists)
  2 | 
  3 | # Check for the availability of OS functionality and generate the config.h file.
  4 | #
  5 | # Keep CMAKE_REQUIRED_DEFINITIONS in sync with what prog_util.h does.
  6 | if(LINUX)
  7 |     set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L)
  8 | elseif(APPLE)
  9 |     set(CMAKE_REQUIRED_DEFINITIONS -D_DARWIN_C_SOURCE -U_POSIX_C_SOURCE)
 10 | else()
 11 |     set(CMAKE_REQUIRED_DEFINITIONS -U_POSIX_C_SOURCE)
 12 | endif()
 13 | check_symbol_exists(clock_gettime "time.h" HAVE_CLOCK_GETTIME)
 14 | check_symbol_exists(futimens "fcntl.h;sys/stat.h" HAVE_FUTIMENS)
 15 | check_symbol_exists(posix_fadvise "fcntl.h" HAVE_POSIX_FADVISE)
 16 | check_symbol_exists(posix_madvise "sys/mman.h" HAVE_POSIX_MADVISE)
 17 | check_c_source_compiles("#include <sys/types.h>
 18 |                          #include <sys/stat.h>
 19 |                          int main() { struct stat st; (void)st.st_atim; }"
 20 |                          HAVE_STAT_NANOSECOND_PRECISION)
 21 | configure_file(config.h.in config.h)
 22 | 
 23 | # Build a utility library for the programs.  This library is not installed.
 24 | add_library(libdeflate_prog_utils STATIC prog_util.c tgetopt.c ../common_defs.h)
 25 | set_target_properties(libdeflate_prog_utils PROPERTIES
 26 |                       OUTPUT_NAME deflate_prog_utils)
 27 | if(LIBDEFLATE_USE_SHARED_LIB)
 28 |     target_link_libraries(libdeflate_prog_utils PUBLIC libdeflate_shared)
 29 | else()
 30 |     target_link_libraries(libdeflate_prog_utils PUBLIC libdeflate_static)
 31 | endif()
 32 | target_include_directories(libdeflate_prog_utils PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
 33 | target_compile_definitions(libdeflate_prog_utils PUBLIC HAVE_CONFIG_H)
 34 | if(WIN32)
 35 |     if(MINGW)
 36 |         target_compile_options(libdeflate_prog_utils PUBLIC -municode)
 37 |         target_link_libraries(libdeflate_prog_utils PUBLIC -municode)
 38 |     else()
 39 |         target_compile_definitions(libdeflate_prog_utils PUBLIC UNICODE _UNICODE)
 40 |     endif()
 41 | endif()
 42 | 
 43 | # Build and install libdeflate-gzip and its alias libdeflate-gunzip.
 44 | if(LIBDEFLATE_BUILD_GZIP)
 45 |     add_executable(libdeflate-gzip gzip.c)
 46 |     target_link_libraries(libdeflate-gzip PRIVATE libdeflate_prog_utils)
 47 |     install(TARGETS libdeflate-gzip DESTINATION ${CMAKE_INSTALL_BINDIR})
 48 |     if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
 49 |         # Install libdeflate-gunzip as a hard link to libdeflate-gzip.
 50 |         # Fall back to a copy if hard links are unsupported.
 51 |         #
 52 |         # Note: on Windows, prepending DESTDIR like this doesn't work correctly
 53 |         # when ${CMAKE_INSTALL_FULL_BINDIR} includes a drive letter.  But that
 54 |         # is fine since DESTDIR is unsupported on Windows anyway, according to
 55 |         # the CMake documentation.
 56 |         set(GZIP "${CMAKE_INSTALL_FULL_BINDIR}/libdeflate-gzip${CMAKE_EXECUTABLE_SUFFIX}")
 57 |         set(GUNZIP "${CMAKE_INSTALL_FULL_BINDIR}/libdeflate-gunzip${CMAKE_EXECUTABLE_SUFFIX}")
 58 |         install(CODE "message(\"-- Installing: \$ENV{DESTDIR}${GUNZIP}\")")
 59 |         install(CODE "file(CREATE_LINK \"\$ENV{DESTDIR}${GZIP}\"
 60 |                            \"\$ENV{DESTDIR}${GUNZIP}\" COPY_ON_ERROR)")
 61 |     else()
 62 |         # The cmake version is too old to support file(CREATE_LINK).
 63 |         # Just compile gzip.c again to build libdeflate-gunzip.
 64 |         add_executable(libdeflate-gunzip gzip.c)
 65 |         target_link_libraries(libdeflate-gunzip PRIVATE libdeflate_prog_utils)
 66 |         install(TARGETS libdeflate-gunzip DESTINATION ${CMAKE_INSTALL_BINDIR})
 67 |     endif()
 68 | endif()
 69 | 
 70 | # Build the test programs, if requested.
 71 | if(LIBDEFLATE_BUILD_TESTS)
 72 | 
 73 |     # The test programs depend on zlib for comparison tests.
 74 |     find_package(ZLIB REQUIRED)
 75 | 
 76 |     # Build a utility library for the test programs.
 77 |     add_library(libdeflate_test_utils STATIC test_util.c)
 78 |     set_target_properties(libdeflate_test_utils PROPERTIES
 79 |                           OUTPUT_NAME deflate_test_utils)
 80 |     target_link_libraries(libdeflate_test_utils PUBLIC
 81 |                           libdeflate_prog_utils ZLIB::ZLIB)
 82 | 
 83 |     # Build the benchmark and checksum programs.
 84 |     add_executable(benchmark benchmark.c)
 85 |     target_link_libraries(benchmark PRIVATE libdeflate_test_utils)
 86 |     add_executable(checksum checksum.c)
 87 |     target_link_libraries(checksum PRIVATE libdeflate_test_utils)
 88 | 
 89 |     # Build the unit test programs and register them with CTest.
 90 |     set(UNIT_TEST_PROGS
 91 |         test_checksums
 92 |         test_custom_malloc
 93 |         test_incomplete_codes
 94 |         test_invalid_streams
 95 |         test_litrunlen_overflow
 96 |         test_overread
 97 |         test_slow_decompression
 98 |         test_trailing_bytes
 99 |     )
100 |     foreach(PROG ${UNIT_TEST_PROGS})
101 |         add_executable(${PROG} ${PROG}.c)
102 |         target_link_libraries(${PROG} PRIVATE libdeflate_test_utils)
103 |         add_test(NAME ${PROG} COMMAND ${PROG})
104 |     endforeach()
105 | endif()
106 | 


--------------------------------------------------------------------------------
/programs/checksum.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * checksum.c - Adler-32 and CRC-32 checksumming program
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "test_util.h"
 29 | 
 30 | static const tchar *const optstring = T("Ahm:s:tZ");
 31 | 
 32 | static void
 33 | show_usage(FILE *fp)
 34 | {
 35 | 	fprintf(fp,
 36 | "Usage: %"TS" [-A] [-h] [-m ALIGN] [-s SIZE] [-t] [-Z] [FILE]...\n"
 37 | "Calculate Adler-32 or CRC-32 checksums of the specified FILEs.\n"
 38 | "\n"
 39 | "Options:\n"
 40 | "  -A        use Adler-32 (default is CRC-32)\n"
 41 | "  -h        print this help\n"
 42 | "  -m ALIGN  misalign the buffer by ALIGN bytes\n"
 43 | "  -s SIZE   chunk size in bytes\n"
 44 | "  -t        show checksum speed, excluding I/O\n"
 45 | "  -Z        use zlib implementation instead of libdeflate\n",
 46 | 	prog_invocation_name);
 47 | }
 48 | 
 49 | typedef u32 (*cksum_fn_t)(u32, const void *, size_t);
 50 | 
 51 | static u32
 52 | adler32_libdeflate(u32 adler, const void *buf, size_t len)
 53 | {
 54 | 	return libdeflate_adler32(adler, buf, len);
 55 | }
 56 | 
 57 | static u32
 58 | crc32_libdeflate(u32 crc, const void *buf, size_t len)
 59 | {
 60 | 	return libdeflate_crc32(crc, buf, len);
 61 | }
 62 | 
 63 | static u32
 64 | adler32_zlib(u32 adler, const void *buf, size_t len)
 65 | {
 66 | 	return adler32(adler, buf, len);
 67 | }
 68 | 
 69 | static u32
 70 | crc32_zlib(u32 crc, const void *buf, size_t len)
 71 | {
 72 | 	return crc32(crc, buf, len);
 73 | }
 74 | 
 75 | static int
 76 | checksum_stream(struct file_stream *in, cksum_fn_t cksum, u32 *sum,
 77 | 		void *buf, size_t bufsize, u64 *size_ret, u64 *elapsed_ret)
 78 | {
 79 | 	u64 size = 0;
 80 | 	u64 elapsed = 0;
 81 | 
 82 | 	for (;;) {
 83 | 		ssize_t ret;
 84 | 		u64 start_time;
 85 | 
 86 | 		ret = xread(in, buf, bufsize);
 87 | 		if (ret < 0)
 88 | 			return ret;
 89 | 		if (ret == 0)
 90 | 			break;
 91 | 
 92 | 		size += ret;
 93 | 		start_time = timer_ticks();
 94 | 		*sum = cksum(*sum, buf, ret);
 95 | 		elapsed += timer_ticks() - start_time;
 96 | 	}
 97 | 
 98 | 	if (elapsed == 0)
 99 | 		elapsed = 1;
100 | 	*size_ret = size;
101 | 	*elapsed_ret = elapsed;
102 | 	return 0;
103 | }
104 | 
105 | int
106 | tmain(int argc, tchar *argv[])
107 | {
108 | 	bool use_adler32 = false;
109 | 	bool use_zlib_impl = false;
110 | 	bool do_timing = false;
111 | 	void *orig_buf = NULL;
112 | 	void *buf;
113 | 	size_t misalignment = 0;
114 | 	size_t bufsize = 131072;
115 | 	tchar *default_file_list[] = { NULL };
116 | 	cksum_fn_t cksum;
117 | 	int opt_char;
118 | 	int i;
119 | 	int ret;
120 | 
121 | 	begin_program(argv);
122 | 
123 | 	while ((opt_char = tgetopt(argc, argv, optstring)) != -1) {
124 | 		switch (opt_char) {
125 | 		case 'A':
126 | 			use_adler32 = true;
127 | 			break;
128 | 		case 'h':
129 | 			show_usage(stdout);
130 | 			return 0;
131 | 		case 'm':
132 | 			misalignment = tstrtoul(toptarg, NULL, 10);
133 | 			if (misalignment >= 4096) {
134 | 				msg("invalid misalignment: \"%"TS"\"", toptarg);
135 | 				return 1;
136 | 			}
137 | 			break;
138 | 		case 's':
139 | 			bufsize = tstrtoul(toptarg, NULL, 10);
140 | 			if (bufsize == 0 || bufsize > SIZE_MAX / 2) {
141 | 				msg("invalid chunk size: \"%"TS"\"", toptarg);
142 | 				return 1;
143 | 			}
144 | 			break;
145 | 		case 't':
146 | 			do_timing = true;
147 | 			break;
148 | 		case 'Z':
149 | 			use_zlib_impl = true;
150 | 			break;
151 | 		default:
152 | 			show_usage(stderr);
153 | 			return 1;
154 | 		}
155 | 	}
156 | 
157 | 	argc -= toptind;
158 | 	argv += toptind;
159 | 
160 | 	if (use_adler32) {
161 | 		if (use_zlib_impl)
162 | 			cksum = adler32_zlib;
163 | 		else
164 | 			cksum = adler32_libdeflate;
165 | 	} else {
166 | 		if (use_zlib_impl)
167 | 			cksum = crc32_zlib;
168 | 		else
169 | 			cksum = crc32_libdeflate;
170 | 	}
171 | 
172 | 	orig_buf = xmalloc(bufsize + 4096 + misalignment);
173 | 	if (orig_buf == NULL)
174 | 		return 1;
175 | 	buf = (u8 *)orig_buf + (-(uintptr_t)orig_buf % 4096) + misalignment;
176 | 
177 | 	if (argc == 0) {
178 | 		argv = default_file_list;
179 | 		argc = ARRAY_LEN(default_file_list);
180 | 	} else {
181 | 		for (i = 0; i < argc; i++)
182 | 			if (argv[i][0] == '-' && argv[i][1] == '\0')
183 | 				argv[i] = NULL;
184 | 	}
185 | 
186 | 	for (i = 0; i < argc; i++) {
187 | 		struct file_stream in;
188 | 		u32 sum = cksum(0, NULL, 0);
189 | 		u64 size = 0;
190 | 		u64 elapsed = 0;
191 | 
192 | 		ret = xopen_for_read(argv[i], true, &in);
193 | 		if (ret != 0)
194 | 			goto out;
195 | 
196 | 		ret = checksum_stream(&in, cksum, &sum, buf, bufsize,
197 | 				      &size, &elapsed);
198 | 		if (ret == 0) {
199 | 			if (do_timing) {
200 | 				printf("%08"PRIx32"\t%"TS"\t"
201 | 				       "%"PRIu64" ms\t%"PRIu64" MB/s\n",
202 | 				       sum, in.name, timer_ticks_to_ms(elapsed),
203 | 				       timer_MB_per_s(size, elapsed));
204 | 			} else {
205 | 				printf("%08"PRIx32"\t%"TS"\t\n", sum, in.name);
206 | 			}
207 | 		}
208 | 
209 | 		xclose(&in);
210 | 
211 | 		if (ret != 0)
212 | 			goto out;
213 | 	}
214 | 	ret = 0;
215 | out:
216 | 	free(orig_buf);
217 | 	return -ret;
218 | }
219 | 


--------------------------------------------------------------------------------
/programs/config.h.in:
--------------------------------------------------------------------------------
 1 | #ifndef CONFIG_H
 2 | #define CONFIG_H
 3 | 
 4 | /* Is the clock_gettime() function available? */
 5 | #cmakedefine HAVE_CLOCK_GETTIME
 6 | 
 7 | /* Is the futimens() function available? */
 8 | #cmakedefine HAVE_FUTIMENS
 9 | 
10 | /* Is the posix_fadvise() function available? */
11 | #cmakedefine HAVE_POSIX_FADVISE
12 | 
13 | /* Is the posix_madvise() function available? */
14 | #cmakedefine HAVE_POSIX_MADVISE
15 | 
16 | /* Does stat() provide nanosecond-precision timestamps? */
17 | #cmakedefine HAVE_STAT_NANOSECOND_PRECISION
18 | 
19 | #endif /* CONFIG_H */
20 | 


--------------------------------------------------------------------------------
/programs/prog_util.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * prog_util.h - common header for the programs; must be included first
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #ifndef PROGRAMS_PROG_UTIL_H
 29 | #define PROGRAMS_PROG_UTIL_H
 30 | 
 31 | /*
 32 |  * This header provides some utility functions and macros for the programs.  It
 33 |  * also defines some macros that control the behavior of system headers, and for
 34 |  * that reason it must be included before any system header.
 35 |  *
 36 |  * The latter part could be handled in this directory's CMakeLists.txt instead.
 37 |  * We put as much as possible here, directly in the source, to make it easier to
 38 |  * build the programs using other build systems (or "no build system").
 39 |  *
 40 |  * Note: CMakeLists.txt does do some dynamic feature detection, which can't be
 41 |  * done in the source code.  For that reason, it duplicates some of the logic
 42 |  * that defines macros like _GNU_SOURCE.  Keep this logic in sync.
 43 |  */
 44 | 
 45 | #ifdef _WIN32
 46 | 
 47 |   /*
 48 |    * To keep the code similar on all platforms, sometimes we intentionally use
 49 |    * the "deprecated" non-underscore-prefixed variants of functions in msvcrt.
 50 |    */
 51 | #  undef _CRT_NONSTDC_NO_DEPRECATE
 52 | #  define _CRT_NONSTDC_NO_DEPRECATE	1
 53 | 
 54 |   /*
 55 |    * Similarly, to match other platforms we intentionally use the "non-secure"
 56 |    * variants, which aren't actually any less secure when used properly.
 57 |    */
 58 | #  undef _CRT_SECURE_NO_WARNINGS
 59 | #  define _CRT_SECURE_NO_WARNINGS	1
 60 | 
 61 | #else
 62 | 
 63 |    /* Needed to work with files >= 2 GiB on 32-bit systems */
 64 | #  undef _FILE_OFFSET_BITS
 65 | #  define _FILE_OFFSET_BITS	64
 66 | 
 67 |    /* Note: when making changes here, update programs/CMakeLists.txt too. */
 68 | #  if defined(__linux__)
 69 |      /*
 70 |       * May be needed for clock_gettime(), posix_fadvise(), posix_madvise(),
 71 |       * futimens(), and MAP_ANONYMOUS, depending on the C library version.
 72 |       */
 73 | #    undef _GNU_SOURCE
 74 | #    define _GNU_SOURCE
 75 | #    undef _POSIX_C_SOURCE
 76 | #    define _POSIX_C_SOURCE	200809L
 77 | #  elif defined(__APPLE__)
 78 |      /* Needed for O_NOFOLLOW and MAP_ANON */
 79 | #    undef _DARWIN_C_SOURCE
 80 | #    define _DARWIN_C_SOURCE
 81 | #    undef _POSIX_C_SOURCE
 82 | #  elif defined(__sun)
 83 |      /* Needed for futimens() */
 84 | #    undef __EXTENSIONS__
 85 | #    define __EXTENSIONS__
 86 | #    undef _POSIX_C_SOURCE
 87 | #  else
 88 |      /*
 89 |       * Else assume that nothing else is needed.  Don't use _POSIX_C_SOURCE on
 90 |       * BSD, since it causes anything non-POSIX, such as MAP_ANON, to be hidden.
 91 |       */
 92 | #    undef _POSIX_C_SOURCE
 93 | #  endif
 94 | #endif
 95 | 
 96 | #ifdef HAVE_CONFIG_H
 97 | #  include "config.h"
 98 | #endif
 99 | 
100 | #include "../common_defs.h"
101 | 
102 | #include <inttypes.h>
103 | #include <limits.h>
104 | #include <stdio.h>
105 | #include <stdlib.h>
106 | #include <string.h>
107 | #ifndef _WIN32
108 | #  include <sys/types.h>
109 | #endif
110 | 
111 | #if defined(__GNUC__) || __has_attribute(format)
112 | # define _printf(str_idx, args_idx)	\
113 | 		__attribute__((format(printf, str_idx, args_idx)))
114 | #else
115 | # define _printf(str_idx, args_idx)
116 | #endif
117 | 
118 | #ifdef _WIN32
119 | 
120 | /*
121 |  * Definitions for Windows builds.  Mainly, 'tchar' is defined to be the 2-byte
122 |  * 'wchar_t' type instead of 'char'.  This is the only "easy" way I know of to
123 |  * get full Unicode support on Windows...
124 |  */
125 | 
126 | #include <io.h>
127 | #include <wchar.h>
128 | int wmain(int argc, wchar_t **argv);
129 | #  define	tmain		wmain
130 | #  define	tchar		wchar_t
131 | #  define	_T(text)	L##text
132 | #  define	T(text)		_T(text)
133 | #  define	TS		"ls"
134 | #  define	TC		"lc"
135 | #  define	tmemcpy		wmemcpy
136 | #  define	topen		_wopen
137 | #  define	tstrchr		wcschr
138 | #  define	tstrcmp		wcscmp
139 | #  define	tstrlen		wcslen
140 | #  define	tstrrchr	wcsrchr
141 | #  define	tstrtoul	wcstoul
142 | #  define	tstrxcmp	wcsicmp
143 | #  define	tunlink		_wunlink
144 | #  define	tutimbuf	__utimbuf64
145 | #  define	tutime		_wutime64
146 | #  define	tstat		_wstat64
147 | #  define	tfstat		_fstat64
148 | #  define	stat_t		struct _stat64
149 | #  ifdef _MSC_VER
150 | #    define	STDIN_FILENO	0
151 | #    define	STDOUT_FILENO	1
152 | #    define	STDERR_FILENO	2
153 | #    define	S_ISREG(m)      (((m) & S_IFMT) == S_IFREG)
154 | #    define	S_ISDIR(m)      (((m) & S_IFMT) == S_IFDIR)
155 | #  endif
156 | 
157 | #else /* _WIN32 */
158 | 
159 | /* Standard definitions for everyone else */
160 | 
161 | #  define	tmain		main
162 | #  define	tchar		char
163 | #  define	T(text)		text
164 | #  define	TS		"s"
165 | #  define	TC		"c"
166 | #  define	tmemcpy		memcpy
167 | #  define	topen		open
168 | #  define	tstrchr		strchr
169 | #  define	tstrcmp		strcmp
170 | #  define	tstrlen		strlen
171 | #  define	tstrrchr	strrchr
172 | #  define	tstrtoul	strtoul
173 | #  define	tstrxcmp	strcmp
174 | #  define	tunlink		unlink
175 | #  define	tutimbuf	utimbuf
176 | #  define	tutime		utime
177 | #  define	tstat		stat
178 | #  define	tfstat		fstat
179 | #  define	stat_t		struct stat
180 | 
181 | #endif /* !_WIN32 */
182 | 
183 | extern const tchar *prog_invocation_name;
184 | extern bool suppress_warnings;
185 | 
186 | void _printf(1, 2) msg(const char *fmt, ...);
187 | void _printf(1, 2) msg_errno(const char *fmt, ...);
188 | void _printf(1, 2) warn(const char *fmt, ...);
189 | 
190 | void *xmalloc(size_t size);
191 | 
192 | void begin_program(tchar *argv[]);
193 | 
194 | struct file_stream {
195 | 	int fd;
196 | 	tchar *name;
197 | 	bool is_standard_stream;
198 | 	void *mmap_token;
199 | 	void *mmap_mem;
200 | 	size_t mmap_size;
201 | };
202 | 
203 | int xopen_for_read(const tchar *path, bool symlink_ok,
204 | 		   struct file_stream *strm);
205 | int xopen_for_write(const tchar *path, bool force, struct file_stream *strm);
206 | int map_file_contents(struct file_stream *strm, u64 size);
207 | 
208 | ssize_t xread(struct file_stream *strm, void *buf, size_t count);
209 | int full_write(struct file_stream *strm, const void *buf, size_t count);
210 | 
211 | int xclose(struct file_stream *strm);
212 | 
213 | int parse_compression_level(tchar opt_char, const tchar *arg);
214 | 
215 | struct libdeflate_compressor *alloc_compressor(int level);
216 | struct libdeflate_decompressor *alloc_decompressor(void);
217 | 
218 | /* tgetopt.c */
219 | 
220 | extern tchar *toptarg;
221 | extern int toptind, topterr, toptopt;
222 | 
223 | int tgetopt(int argc, tchar *argv[], const tchar *optstring);
224 | 
225 | #endif /* PROGRAMS_PROG_UTIL_H */
226 | 


--------------------------------------------------------------------------------
/programs/test_checksums.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * test_checksums.c
  3 |  *
  4 |  * Verify that libdeflate's Adler-32 and CRC-32 functions produce the same
  5 |  * results as their zlib equivalents.
  6 |  */
  7 | 
  8 | #include "test_util.h"
  9 | 
 10 | #include <stdlib.h>
 11 | #include <time.h>
 12 | 
 13 | static unsigned int rng_seed;
 14 | 
 15 | typedef u32 (*cksum_fn_t)(u32, const void *, size_t);
 16 | 
 17 | static u32
 18 | adler32_libdeflate(u32 adler, const void *buf, size_t len)
 19 | {
 20 | 	return libdeflate_adler32(adler, buf, len);
 21 | }
 22 | 
 23 | static u32
 24 | crc32_libdeflate(u32 crc, const void *buf, size_t len)
 25 | {
 26 | 	return libdeflate_crc32(crc, buf, len);
 27 | }
 28 | 
 29 | static u32
 30 | adler32_zlib(u32 adler, const void *buf, size_t len)
 31 | {
 32 | 	return adler32(adler, buf, len);
 33 | }
 34 | 
 35 | static u32
 36 | crc32_zlib(u32 crc, const void *buf, size_t len)
 37 | {
 38 | 	return crc32(crc, buf, len);
 39 | }
 40 | 
 41 | static u32
 42 | select_initial_crc(void)
 43 | {
 44 | 	if (rand() & 1)
 45 | 		return 0;
 46 | 	return ((u32)rand() << 16) | rand();
 47 | }
 48 | 
 49 | static u32
 50 | select_initial_adler(void)
 51 | {
 52 | 	u32 lo, hi;
 53 | 
 54 | 	if (rand() & 1)
 55 | 		return 1;
 56 | 
 57 | 	lo = (rand() % 4 == 0 ? 65520 : rand() % 65521);
 58 | 	hi = (rand() % 4 == 0 ? 65520 : rand() % 65521);
 59 | 	return (hi << 16) | lo;
 60 | }
 61 | 
 62 | static void
 63 | test_initial_values(cksum_fn_t cksum, u32 expected)
 64 | {
 65 | 	ASSERT(cksum(0, NULL, 0) == expected);
 66 | 	if (cksum != adler32_zlib) /* broken */
 67 | 		ASSERT(cksum(0, NULL, 1) == expected);
 68 | 	ASSERT(cksum(0, NULL, 1234) == expected);
 69 | 	ASSERT(cksum(1234, NULL, 0) == expected);
 70 | 	ASSERT(cksum(1234, NULL, 1234) == expected);
 71 | }
 72 | 
 73 | static void
 74 | test_multipart(const u8 *buffer, size_t size, const char *name,
 75 | 	       cksum_fn_t cksum, u32 v, u32 expected)
 76 | {
 77 | 	size_t division = rand() % (size + 1);
 78 | 	v = cksum(v, buffer, division);
 79 | 	v = cksum(v, buffer + division, size - division);
 80 | 	if (v != expected) {
 81 | 		fprintf(stderr, "%s checksum failed multipart test\n", name);
 82 | 		ASSERT(0);
 83 | 	}
 84 | }
 85 | 
 86 | static void
 87 | test_checksums(const void *buffer, size_t size, const char *name,
 88 | 	       cksum_fn_t cksum1, cksum_fn_t cksum2, u32 initial_value)
 89 | {
 90 | 	u32 v1 = cksum1(initial_value, buffer, size);
 91 | 	u32 v2 = cksum2(initial_value, buffer, size);
 92 | 
 93 | 	if (v1 != v2) {
 94 | 		fprintf(stderr, "%s checksum mismatch\n", name);
 95 | 		fprintf(stderr, "initial_value=0x%08"PRIx32", buffer=%p, "
 96 | 			"size=%zu, buffer=", initial_value, buffer, size);
 97 | 		for (size_t i = 0; i < MIN(size, 256); i++)
 98 | 			fprintf(stderr, "%02x", ((const u8 *)buffer)[i]);
 99 | 		if (size > 256)
100 | 			fprintf(stderr, "...");
101 | 		fprintf(stderr, "\n");
102 | 		ASSERT(0);
103 | 	}
104 | 
105 | 	if ((rand() & 15) == 0) {
106 | 		test_multipart(buffer, size, name, cksum1, initial_value, v1);
107 | 		test_multipart(buffer, size, name, cksum2, initial_value, v1);
108 | 	}
109 | }
110 | 
111 | static void
112 | test_crc32(const void *buffer, size_t size, u32 initial_value)
113 | {
114 | 	test_checksums(buffer, size, "CRC-32",
115 | 		       crc32_libdeflate, crc32_zlib, initial_value);
116 | }
117 | 
118 | static void
119 | test_adler32(const void *buffer, size_t size, u32 initial_value)
120 | {
121 | 	test_checksums(buffer, size, "Adler-32",
122 | 		       adler32_libdeflate, adler32_zlib, initial_value);
123 | }
124 | 
125 | static void test_random_buffers(u8 *buf_start, u8 *buf_end, size_t limit,
126 | 				u32 num_iter)
127 | {
128 | 	for (u32 i = 0; i < num_iter; i++) {
129 | 		size_t start = rand() % limit;
130 | 		size_t len = rand() % (limit - start);
131 | 		u32 a0 = select_initial_adler();
132 | 		u32 c0 = select_initial_crc();
133 | 
134 | 		for (size_t j = start; j < start + len; j++)
135 | 			buf_start[j] = rand();
136 | 
137 | 		/* Test with chosen size and alignment */
138 | 		test_adler32(&buf_start[start], len, a0);
139 | 		test_crc32(&buf_start[start], len, c0);
140 | 
141 | 		/* Test with chosen size, with guard page before input buffer */
142 | 		memmove(buf_start, &buf_start[start], len);
143 | 		test_adler32(buf_start, len, a0);
144 | 		test_crc32(buf_start, len, c0);
145 | 
146 | 		/* Test with chosen size, with guard page after input buffer */
147 | 		memmove(buf_end - len, buf_start, len);
148 | 		test_adler32(buf_end - len, len, a0);
149 | 		test_crc32(buf_end - len, len, c0);
150 | 	}
151 | }
152 | 
153 | int
154 | tmain(int argc, tchar *argv[])
155 | {
156 | 	u8 *buf_start, *buf_end;
157 | 
158 | 	begin_program(argv);
159 | 
160 | 	alloc_guarded_buffer(262144, &buf_start, &buf_end);
161 | 
162 | 	rng_seed = time(NULL);
163 | 	srand(rng_seed);
164 | 
165 | 	test_initial_values(adler32_libdeflate, 1);
166 | 	test_initial_values(adler32_zlib, 1);
167 | 	test_initial_values(crc32_libdeflate, 0);
168 | 	test_initial_values(crc32_zlib, 0);
169 | 
170 | 	/* Test different buffer sizes and alignments */
171 | 	test_random_buffers(buf_start, buf_end, 256,  5000);
172 | 	test_random_buffers(buf_start, buf_end, 1024,  500);
173 | 	test_random_buffers(buf_start, buf_end, 32768,  50);
174 | 	test_random_buffers(buf_start, buf_end, 262144, 50);
175 | 
176 | 	/*
177 | 	 * Test Adler-32 overflow cases.  For example, given all 0xFF bytes and
178 | 	 * the highest possible initial (s1, s2) of (65520, 65520), then s2 if
179 | 	 * stored as a 32-bit unsigned integer will overflow if > 5552 bytes are
180 | 	 * processed.  Implementations must make sure to reduce s2 modulo 65521
181 | 	 * before that point.  Also, some implementations make use of 16-bit
182 | 	 * counters which can overflow earlier.
183 | 	 */
184 | 	memset(buf_start, 0xFF, 32768);
185 | 	for (u32 i = 0; i < 20; i++) {
186 | 		u32 initial_value;
187 | 
188 | 		if (i == 0)
189 | 			initial_value = ((u32)65520 << 16) | 65520;
190 | 		else
191 | 			initial_value = select_initial_adler();
192 | 
193 | 		test_adler32(buf_start, 5553, initial_value);
194 | 		test_adler32(buf_start, rand() % 32769, initial_value);
195 | 		buf_start[rand() % 32768] = 0xFE;
196 | 	}
197 | 
198 | 	free_guarded_buffer(buf_start, buf_end);
199 | 	return 0;
200 | }
201 | 


--------------------------------------------------------------------------------
/programs/test_custom_malloc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * test_custom_malloc.c
  3 |  *
  4 |  * Test the support for custom memory allocators.
  5 |  * Also test injecting allocation failures.
  6 |  */
  7 | 
  8 | #include "test_util.h"
  9 | 
 10 | static int malloc_count = 0;
 11 | static int free_count = 0;
 12 | 
 13 | static void *do_malloc(size_t size)
 14 | {
 15 | 	malloc_count++;
 16 | 	return malloc(size);
 17 | }
 18 | 
 19 | static void *do_fail_malloc(size_t size)
 20 | {
 21 | 	malloc_count++;
 22 | 	return NULL;
 23 | }
 24 | 
 25 | static void do_free(void *ptr)
 26 | {
 27 | 	free_count++;
 28 | 	free(ptr);
 29 | }
 30 | 
 31 | static void reset_state(void)
 32 | {
 33 | 	libdeflate_set_memory_allocator(malloc, free);
 34 | 	malloc_count = 0;
 35 | 	free_count = 0;
 36 | }
 37 | 
 38 | /* Test that the custom allocator is actually used when requested. */
 39 | static void do_custom_memalloc_test(bool global)
 40 | {
 41 | 	static const struct libdeflate_options options = {
 42 | 		.sizeof_options = sizeof(options),
 43 | 		.malloc_func = do_malloc,
 44 | 		.free_func = do_free,
 45 | 	};
 46 | 	int level;
 47 | 	struct libdeflate_compressor *c;
 48 | 	struct libdeflate_decompressor *d;
 49 | 
 50 | 	if (global)
 51 | 		libdeflate_set_memory_allocator(do_malloc, do_free);
 52 | 
 53 | 	for (level = 0; level <= 12; level++) {
 54 | 		malloc_count = free_count = 0;
 55 | 		if (global)
 56 | 			c = libdeflate_alloc_compressor(level);
 57 | 		else
 58 | 			c = libdeflate_alloc_compressor_ex(level, &options);
 59 | 		ASSERT(c != NULL);
 60 | 		ASSERT(malloc_count == 1);
 61 | 		ASSERT(free_count == 0);
 62 | 		libdeflate_free_compressor(c);
 63 | 		ASSERT(malloc_count == 1);
 64 | 		ASSERT(free_count == 1);
 65 | 	}
 66 | 
 67 | 	malloc_count = free_count = 0;
 68 | 	if (global)
 69 | 		d = libdeflate_alloc_decompressor();
 70 | 	else
 71 | 		d = libdeflate_alloc_decompressor_ex(&options);
 72 | 	ASSERT(d != NULL);
 73 | 	ASSERT(malloc_count == 1);
 74 | 	ASSERT(free_count == 0);
 75 | 	libdeflate_free_decompressor(d);
 76 | 	ASSERT(malloc_count == 1);
 77 | 	ASSERT(free_count == 1);
 78 | 
 79 | 	reset_state();
 80 | }
 81 | 
 82 | #define offsetofend(type, field) \
 83 | 	(offsetof(type, field) + sizeof(((type *)NULL)->field))
 84 | 
 85 | /* Test some edge cases involving libdeflate_options. */
 86 | static void do_options_test(void)
 87 | {
 88 | 	struct libdeflate_options options = { 0 };
 89 | 	struct libdeflate_compressor *c;
 90 | 	struct libdeflate_decompressor *d;
 91 | 	/* Size in libdeflate v1.19 */
 92 | 	size_t min_size = offsetofend(struct libdeflate_options, free_func);
 93 | 
 94 | 	/* sizeof_options must be at least the minimum size. */
 95 | 	for (; options.sizeof_options < min_size;
 96 | 	     options.sizeof_options++) {
 97 | 		c = libdeflate_alloc_compressor_ex(6, &options);
 98 | 		ASSERT(c == NULL);
 99 | 		d = libdeflate_alloc_decompressor_ex(&options);
100 | 		ASSERT(d == NULL);
101 | 	}
102 | 
103 | 	/* NULL malloc_func and free_func means "use the global allocator". */
104 | 	options.sizeof_options = min_size;
105 | 	malloc_count = free_count = 0;
106 | 	libdeflate_set_memory_allocator(do_malloc, do_free);
107 | 	c = libdeflate_alloc_compressor_ex(6, &options);
108 | 	libdeflate_free_compressor(c);
109 | 	ASSERT(malloc_count == 1);
110 | 	ASSERT(free_count == 1);
111 | 	d = libdeflate_alloc_decompressor_ex(&options);
112 | 	libdeflate_free_decompressor(d);
113 | 	ASSERT(malloc_count == 2);
114 | 	ASSERT(free_count == 2);
115 | 
116 | 	reset_state();
117 | }
118 | 
119 | /* Test injecting memory allocation failures. */
120 | static void do_fault_injection_test(void)
121 | {
122 | 	int level;
123 | 	struct libdeflate_compressor *c;
124 | 	struct libdeflate_decompressor *d;
125 | 
126 | 	libdeflate_set_memory_allocator(do_fail_malloc, do_free);
127 | 
128 | 	for (level = 0; level <= 12; level++) {
129 | 		malloc_count = free_count = 0;
130 | 		c = libdeflate_alloc_compressor(level);
131 | 		ASSERT(c == NULL);
132 | 		ASSERT(malloc_count == 1);
133 | 		ASSERT(free_count == 0);
134 | 	}
135 | 
136 | 	malloc_count = free_count = 0;
137 | 	d = libdeflate_alloc_decompressor();
138 | 	ASSERT(d == NULL);
139 | 	ASSERT(malloc_count == 1);
140 | 	ASSERT(free_count == 0);
141 | 
142 | 	reset_state();
143 | }
144 | 
145 | int
146 | tmain(int argc, tchar *argv[])
147 | {
148 | 	begin_program(argv);
149 | 
150 | 	do_custom_memalloc_test(true);
151 | 	do_custom_memalloc_test(false);
152 | 	do_options_test();
153 | 	do_fault_injection_test();
154 | 	return 0;
155 | }
156 | 


--------------------------------------------------------------------------------
/programs/test_invalid_streams.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * test_invalid_streams.c
  3 |  *
  4 |  * Test that invalid DEFLATE streams are rejected with LIBDEFLATE_BAD_DATA.
  5 |  *
  6 |  * This isn't actually very important, since DEFLATE doesn't have built-in error
  7 |  * detection, so corruption of a DEFLATE stream can only be reliably detected
  8 |  * using a separate checksum anyway.  As long as the DEFLATE decompressor
  9 |  * handles all streams safely (no crashes, etc.), in practice it is fine for it
 10 |  * to automatically remap invalid streams to valid streams, instead of returning
 11 |  * an error.  Corruption detection is the responsibility of the zlib or gzip
 12 |  * layer, or the application when an external checksum is used.
 13 |  *
 14 |  * Nevertheless, to reduce surprises when people intentionally compare zlib's
 15 |  * and libdeflate's handling of invalid DEFLATE streams, libdeflate implements
 16 |  * zlib's strict behavior when decoding DEFLATE, except when it would have a
 17 |  * significant performance cost.
 18 |  */
 19 | 
 20 | #include "test_util.h"
 21 | 
 22 | static void
 23 | assert_decompression_error(const u8 *in, size_t in_nbytes)
 24 | {
 25 | 	struct libdeflate_decompressor *d;
 26 | 	z_stream z;
 27 | 	u8 out[128];
 28 | 	const size_t out_nbytes_avail = sizeof(out);
 29 | 	size_t actual_out_nbytes;
 30 | 	enum libdeflate_result res;
 31 | 
 32 | 	/* libdeflate */
 33 | 	d = libdeflate_alloc_decompressor();
 34 | 	ASSERT(d != NULL);
 35 | 	res = libdeflate_deflate_decompress(d, in, in_nbytes,
 36 | 					    out, out_nbytes_avail,
 37 | 					    &actual_out_nbytes);
 38 | 	ASSERT(res == LIBDEFLATE_BAD_DATA);
 39 | 	libdeflate_free_decompressor(d);
 40 | 
 41 | 	/* zlib, as a control */
 42 | 	memset(&z, 0, sizeof(z));
 43 | 	res = inflateInit2(&z, -15);
 44 | 	ASSERT(res == Z_OK);
 45 | 	z.next_in = (void *)in;
 46 | 	z.avail_in = in_nbytes;
 47 | 	z.next_out = (void *)out;
 48 | 	z.avail_out = out_nbytes_avail;
 49 | 	res = inflate(&z, Z_FINISH);
 50 | 	ASSERT(res == Z_DATA_ERROR);
 51 | 	inflateEnd(&z);
 52 | }
 53 | 
 54 | /*
 55 |  * Test that DEFLATE decompression returns an error if a block header contains
 56 |  * too many encoded litlen and offset codeword lengths.
 57 |  */
 58 | static void
 59 | test_too_many_codeword_lengths(void)
 60 | {
 61 | 	u8 in[128];
 62 | 	struct output_bitstream os = { .next = in, .end = in + sizeof(in) };
 63 | 	int i;
 64 | 
 65 | 	ASSERT(put_bits(&os, 1, 1));	/* BFINAL: 1 */
 66 | 	ASSERT(put_bits(&os, 2, 2));	/* BTYPE: DYNAMIC_HUFFMAN */
 67 | 
 68 | 	/*
 69 | 	 * Litlen code:
 70 | 	 *	litlensym_255			len=1 codeword=0
 71 | 	 *	litlensym_256 (end-of-block)	len=1 codeword=1
 72 | 	 * Offset code:
 73 | 	 *	(empty)
 74 | 	 *
 75 | 	 * Litlen and offset codeword lengths:
 76 | 	 *	[0..254] = 0	presym_{18,18}
 77 | 	 *	[255]	 = 1	presym_1
 78 | 	 *	[256]	 = 1	presym_1
 79 | 	 *	[257...] = 0	presym_18 [TOO MANY]
 80 | 	 *
 81 | 	 * Precode:
 82 | 	 *	presym_1	len=1 codeword=0
 83 | 	 *	presym_18	len=1 codeword=1
 84 | 	 */
 85 | 
 86 | 	ASSERT(put_bits(&os, 0, 5));	/* num_litlen_syms: 0 + 257 */
 87 | 	ASSERT(put_bits(&os, 0, 5));	/* num_offset_syms: 0 + 1 */
 88 | 	ASSERT(put_bits(&os, 14, 4));	/* num_explicit_precode_lens: 14 + 4 */
 89 | 
 90 | 	/*
 91 | 	 * Precode codeword lengths: order is
 92 | 	 * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
 93 | 	 */
 94 | 	for (i = 0; i < 2; i++)		/* presym_{16,17}: len=0 */
 95 | 		ASSERT(put_bits(&os, 0, 3));
 96 | 	ASSERT(put_bits(&os, 1, 3));	/* presym_18: len=1 */
 97 | 	ASSERT(put_bits(&os, 0, 3));	/* presym_0: len=0 */
 98 | 	for (i = 0; i < 13; i++)	/* presym_{8,...,14}: len=0 */
 99 | 		ASSERT(put_bits(&os, 0, 3));
100 | 	ASSERT(put_bits(&os, 1, 3));	/* presym_1: len=1 */
101 | 
102 | 	/* Litlen and offset codeword lengths */
103 | 	ASSERT(put_bits(&os, 0x1, 1) &&	/* presym_18, 128 zeroes */
104 | 	       put_bits(&os, 117, 7));
105 | 	ASSERT(put_bits(&os, 0x1, 1) &&	/* presym_18, 127 zeroes */
106 | 	       put_bits(&os, 116, 7));
107 | 	ASSERT(put_bits(&os, 0x0, 1));	/* presym_1 */
108 | 	ASSERT(put_bits(&os, 0x0, 1));	/* presym_1 */
109 | 	ASSERT(put_bits(&os, 0x1, 1) &&	/* presym_18, 128 zeroes [TOO MANY] */
110 | 	       put_bits(&os, 117, 7));
111 | 
112 | 	/* Literal */
113 | 	ASSERT(put_bits(&os, 0x0, 0));	/* litlensym_255 */
114 | 
115 | 	/* End of block */
116 | 	ASSERT(put_bits(&os, 0x1, 1));	/* litlensym_256 */
117 | 
118 | 	ASSERT(flush_bits(&os));
119 | 
120 | 	assert_decompression_error(in, os.next - in);
121 | }
122 | 
123 | int
124 | tmain(int argc, tchar *argv[])
125 | {
126 | 	begin_program(argv);
127 | 
128 | 	test_too_many_codeword_lengths();
129 | 	return 0;
130 | }
131 | 


--------------------------------------------------------------------------------
/programs/test_litrunlen_overflow.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_litrunlen_overflow.c
 3 |  *
 4 |  * Regression test for commit f2f0df727444 ("deflate_compress: fix corruption
 5 |  * with long literal run").  Try to compress a file longer than 65535 bytes
 6 |  * where no 2-byte sequence (3 would be sufficient) is repeated <= 32768 bytes
 7 |  * apart, and the distribution of bytes remains constant throughout, and yet not
 8 |  * all bytes are used so the data is still slightly compressible.  There will be
 9 |  * no matches in this data, but the compressor should still output a compressed
10 |  * block, and this block should contain more than 65535 consecutive literals,
11 |  * which triggered the bug.
12 |  *
13 |  * Note: on random data, this situation is extremely unlikely if the compressor
14 |  * uses all matches it finds, since random data will on average have a 3-byte
15 |  * match every (256**3)/32768 = 512 bytes.
16 |  */
17 | 
18 | #include "test_util.h"
19 | 
20 | int
21 | tmain(int argc, tchar *argv[])
22 | {
23 | 	const int data_size = 2 * 250 * 251;
24 | 	u8 *orig_data, *compressed_data, *decompressed_data;
25 | 	int i, stride, multiple, j = 0;
26 | 	struct libdeflate_decompressor *d;
27 | 	static const int levels[] = { 3, 6, 12 };
28 | 
29 | 	begin_program(argv);
30 | 
31 | 	orig_data = xmalloc(data_size);
32 | 	compressed_data = xmalloc(data_size);
33 | 	decompressed_data = xmalloc(data_size);
34 | 
35 | 	for (i = 0; i < 2; i++) {
36 | 		for (stride = 1; stride < 251; stride++) {
37 | 			for (multiple = 0; multiple < 251; multiple++)
38 | 				orig_data[j++] = (stride * multiple) % 251;
39 | 		}
40 | 	}
41 | 	ASSERT(j == data_size);
42 | 
43 | 	d = libdeflate_alloc_decompressor();
44 | 	ASSERT(d != NULL);
45 | 
46 | 	for (i = 0; i < ARRAY_LEN(levels); i++) {
47 | 		struct libdeflate_compressor *c;
48 | 		size_t csize;
49 | 		enum libdeflate_result res;
50 | 
51 | 		c = libdeflate_alloc_compressor(levels[i]);
52 | 		ASSERT(c != NULL);
53 | 
54 | 		csize = libdeflate_deflate_compress(c, orig_data, data_size,
55 | 						    compressed_data, data_size);
56 | 		ASSERT(csize > 0 && csize < data_size);
57 | 
58 | 		res = libdeflate_deflate_decompress(d, compressed_data, csize,
59 | 						    decompressed_data,
60 | 						    data_size, NULL);
61 | 		ASSERT(res == LIBDEFLATE_SUCCESS);
62 | 		ASSERT(memcmp(orig_data, decompressed_data, data_size) == 0);
63 | 
64 | 		libdeflate_free_compressor(c);
65 | 	}
66 | 
67 | 	libdeflate_free_decompressor(d);
68 | 	free(orig_data);
69 | 	free(compressed_data);
70 | 	free(decompressed_data);
71 | 	return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/programs/test_overread.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_overread.c
 3 |  *
 4 |  * Test that the decompressor doesn't produce an unbounded amount of output if
 5 |  * it runs out of input, even when implicit zeroes appended to the input would
 6 |  * continue producing output (as is the case when the input ends during a
 7 |  * DYNAMIC_HUFFMAN block where a literal has an all-zeroes codeword).
 8 |  *
 9 |  * This is a regression test for commit 3f21ec9d6121 ("deflate_decompress: error
10 |  * out if overread count gets too large").
11 |  */
12 | 
13 | #include "test_util.h"
14 | 
15 | static void
16 | generate_test_input(struct output_bitstream *os)
17 | {
18 | 	int i;
19 | 
20 | 	put_bits(os, 0, 1);	/* BFINAL: 0 */
21 | 	put_bits(os, 2, 2);	/* BTYPE: DYNAMIC_HUFFMAN */
22 | 
23 | 	/*
24 | 	 * Write the Huffman codes.
25 | 	 *
26 | 	 * Litlen code:
27 | 	 *	litlensym_0   (0)		len=1 codeword=0
28 | 	 *	litlensym_256 (end-of-block)	len=1 codeword=1
29 | 	 * Offset code:
30 | 	 *	offsetsym_0 (unused)		len=1 codeword=0
31 | 	 *
32 | 	 * Litlen and offset codeword lengths:
33 | 	 *	[0]	 = 1	presym_1
34 | 	 *	[1..255] = 0	presym_{18,18}
35 | 	 *	[256]	 = 1	presym_1
36 | 	 *	[257]	 = 1	presym_1
37 | 	 *
38 | 	 * Precode:
39 | 	 *	presym_1	len=1 codeword=0
40 | 	 *	presym_18	len=1 codeword=1
41 | 	 */
42 | 	put_bits(os, 0, 5);	/* num_litlen_syms: 0 + 257 */
43 | 	put_bits(os, 0, 5);	/* num_offset_syms: 0 + 1 */
44 | 	put_bits(os, 14, 4);	/* num_explicit_precode_lens: 14 + 4 */
45 | 	/*
46 | 	 * Precode codeword lengths: order is
47 | 	 * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
48 | 	 */
49 | 	put_bits(os, 0, 3);		/* presym_16: len=0 */
50 | 	put_bits(os, 0, 3);		/* presym_17: len=0 */
51 | 	put_bits(os, 1, 3);		/* presym_18: len=1 */
52 | 	for (i = 0; i < 14; i++)	/* presym_{0,...,14}: len=0 */
53 | 		put_bits(os, 0, 3);
54 | 	put_bits(os, 1, 3);		/* presym_1: len=1 */
55 | 
56 | 	/* Litlen and offset codeword lengths */
57 | 	put_bits(os, 0, 1);		/* presym_1 */
58 | 	put_bits(os, 1, 1);		/* presym_18 ... */
59 | 	put_bits(os, 117, 7);		/* ... 11 + 117 zeroes */
60 | 	put_bits(os, 1, 1);		/* presym_18 ... */
61 | 	put_bits(os, 116, 7);		/* ... 11 + 116 zeroes */
62 | 	put_bits(os, 0, 1);		/* presym_1 */
63 | 	put_bits(os, 0, 1);		/* presym_1 */
64 | 
65 | 	/* Implicit zeroes would generate endless literals from here. */
66 | 
67 | 	ASSERT(flush_bits(os));
68 | }
69 | 
70 | int
71 | tmain(int argc, tchar *argv[])
72 | {
73 | 	u8 cdata[16];
74 | 	u8 udata[256];
75 | 	struct output_bitstream os =
76 | 		{ .next = cdata, .end = cdata + sizeof(cdata) };
77 | 	struct libdeflate_decompressor *d;
78 | 	enum libdeflate_result res;
79 | 	size_t actual_out_nbytes;
80 | 
81 | 	begin_program(argv);
82 | 
83 | 	generate_test_input(&os);
84 | 	d = libdeflate_alloc_decompressor();
85 | 	ASSERT(d != NULL);
86 | 
87 | 	res = libdeflate_deflate_decompress(d, cdata, os.next - cdata,
88 | 					    udata, sizeof(udata),
89 | 					    &actual_out_nbytes);
90 | 	/* Before the fix, the result was LIBDEFLATE_INSUFFICIENT_SPACE here. */
91 | 	ASSERT(res == LIBDEFLATE_BAD_DATA);
92 | 
93 | 	libdeflate_free_decompressor(d);
94 | 	return 0;
95 | }
96 | 


--------------------------------------------------------------------------------
/programs/test_trailing_bytes.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * test_trailing_bytes.c
  3 |  *
  4 |  * Test that decompression correctly stops at the end of the first DEFLATE,
  5 |  * zlib, or gzip stream, and doesn't process any additional trailing bytes.
  6 |  */
  7 | 
  8 | #include "test_util.h"
  9 | 
 10 | static const struct {
 11 | 	size_t (*compress)(struct libdeflate_compressor *compressor,
 12 | 			   const void *in, size_t in_nbytes,
 13 | 			   void *out, size_t out_nbytes_avail);
 14 | 	enum libdeflate_result (*decompress)(
 15 | 			struct libdeflate_decompressor *decompressor,
 16 | 			const void *in, size_t in_nbytes,
 17 | 			void *out, size_t out_nbytes_avail,
 18 | 			size_t *actual_out_nbytes_ret);
 19 | 	enum libdeflate_result (*decompress_ex)(
 20 | 			struct libdeflate_decompressor *decompressor,
 21 | 			const void *in, size_t in_nbytes,
 22 | 			void *out, size_t out_nbytes_avail,
 23 | 			size_t *actual_in_nbytes_ret,
 24 | 			size_t *actual_out_nbytes_ret);
 25 | } codecs[] = {
 26 | 	{
 27 | 		.compress = libdeflate_deflate_compress,
 28 | 		.decompress = libdeflate_deflate_decompress,
 29 | 		.decompress_ex = libdeflate_deflate_decompress_ex,
 30 | 	}, {
 31 | 		.compress = libdeflate_zlib_compress,
 32 | 		.decompress = libdeflate_zlib_decompress,
 33 | 		.decompress_ex = libdeflate_zlib_decompress_ex,
 34 | 	}, {
 35 | 		.compress = libdeflate_gzip_compress,
 36 | 		.decompress = libdeflate_gzip_decompress,
 37 | 		.decompress_ex = libdeflate_gzip_decompress_ex,
 38 | 	}
 39 | };
 40 | 
 41 | int
 42 | tmain(int argc, tchar *argv[])
 43 | {
 44 | 	const size_t original_nbytes = 32768;
 45 | 	const size_t compressed_nbytes_total = 32768;
 46 | 	/*
 47 | 	 * Don't use the full buffer for compressed data, because we want to
 48 | 	 * test whether decompression can deal with additional trailing bytes.
 49 | 	 *
 50 | 	 * Note: we can't use a guarded buffer (i.e. a buffer where the byte
 51 | 	 * after compressed_nbytes is unmapped) because the decompressor may
 52 | 	 * read a few bytes beyond the end of the stream (but ultimately not
 53 | 	 * actually use those bytes) as long as they are within the buffer.
 54 | 	 */
 55 | 	const size_t compressed_nbytes_avail = 30000;
 56 | 	size_t i;
 57 | 	u8 *original;
 58 | 	u8 *compressed;
 59 | 	u8 *decompressed;
 60 | 	struct libdeflate_compressor *c;
 61 | 	struct libdeflate_decompressor *d;
 62 | 	size_t compressed_nbytes;
 63 | 	enum libdeflate_result res;
 64 | 	size_t actual_compressed_nbytes;
 65 | 	size_t actual_decompressed_nbytes;
 66 | 
 67 | 	begin_program(argv);
 68 | 
 69 | 	ASSERT(compressed_nbytes_avail < compressed_nbytes_total);
 70 | 
 71 | 	/* Prepare some dummy data to compress */
 72 | 	original = xmalloc(original_nbytes);
 73 | 	ASSERT(original != NULL);
 74 | 	for (i = 0; i < original_nbytes; i++)
 75 | 		original[i] = (i % 123) + (i % 1023);
 76 | 
 77 | 	compressed = xmalloc(compressed_nbytes_total);
 78 | 	ASSERT(compressed != NULL);
 79 | 	memset(compressed, 0, compressed_nbytes_total);
 80 | 
 81 | 	decompressed = xmalloc(original_nbytes);
 82 | 	ASSERT(decompressed != NULL);
 83 | 
 84 | 	c = libdeflate_alloc_compressor(6);
 85 | 	ASSERT(c != NULL);
 86 | 
 87 | 	d = libdeflate_alloc_decompressor();
 88 | 	ASSERT(d != NULL);
 89 | 
 90 | 	for (i = 0; i < ARRAY_LEN(codecs); i++) {
 91 | 		compressed_nbytes = codecs[i].compress(c, original,
 92 | 						       original_nbytes,
 93 | 						       compressed,
 94 | 						       compressed_nbytes_avail);
 95 | 		ASSERT(compressed_nbytes > 0);
 96 | 		ASSERT(compressed_nbytes <= compressed_nbytes_avail);
 97 | 
 98 | 		/* Test decompress() of stream that fills the whole buffer */
 99 | 		actual_decompressed_nbytes = 0;
100 | 		memset(decompressed, 0, original_nbytes);
101 | 		res = codecs[i].decompress(d, compressed, compressed_nbytes,
102 | 					   decompressed, original_nbytes,
103 | 					   &actual_decompressed_nbytes);
104 | 		ASSERT(res == LIBDEFLATE_SUCCESS);
105 | 		ASSERT(actual_decompressed_nbytes == original_nbytes);
106 | 		ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
107 | 
108 | 		/* Test decompress_ex() of stream that fills the whole buffer */
109 | 		actual_compressed_nbytes = actual_decompressed_nbytes = 0;
110 | 		memset(decompressed, 0, original_nbytes);
111 | 		res = codecs[i].decompress_ex(d, compressed, compressed_nbytes,
112 | 					      decompressed, original_nbytes,
113 | 					      &actual_compressed_nbytes,
114 | 					      &actual_decompressed_nbytes);
115 | 		ASSERT(res == LIBDEFLATE_SUCCESS);
116 | 		ASSERT(actual_compressed_nbytes == compressed_nbytes);
117 | 		ASSERT(actual_decompressed_nbytes == original_nbytes);
118 | 		ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
119 | 
120 | 		/* Test decompress() of stream with trailing bytes */
121 | 		actual_decompressed_nbytes = 0;
122 | 		memset(decompressed, 0, original_nbytes);
123 | 		res = codecs[i].decompress(d, compressed,
124 | 					   compressed_nbytes_total,
125 | 					   decompressed, original_nbytes,
126 | 					   &actual_decompressed_nbytes);
127 | 		ASSERT(res == LIBDEFLATE_SUCCESS);
128 | 		ASSERT(actual_decompressed_nbytes == original_nbytes);
129 | 		ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
130 | 
131 | 		/* Test decompress_ex() of stream with trailing bytes */
132 | 		actual_compressed_nbytes = actual_decompressed_nbytes = 0;
133 | 		memset(decompressed, 0, original_nbytes);
134 | 		res = codecs[i].decompress_ex(d, compressed,
135 | 					      compressed_nbytes_total,
136 | 					      decompressed, original_nbytes,
137 | 					      &actual_compressed_nbytes,
138 | 					      &actual_decompressed_nbytes);
139 | 		ASSERT(res == LIBDEFLATE_SUCCESS);
140 | 		ASSERT(actual_compressed_nbytes == compressed_nbytes);
141 | 		ASSERT(actual_decompressed_nbytes == original_nbytes);
142 | 		ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
143 | 	}
144 | 
145 | 	free(original);
146 | 	free(compressed);
147 | 	free(decompressed);
148 | 	libdeflate_free_compressor(c);
149 | 	libdeflate_free_decompressor(d);
150 | 	return 0;
151 | }
152 | 


--------------------------------------------------------------------------------
/programs/test_util.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * test_util.c - utility functions for test programs
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "test_util.h"
 29 | 
 30 | #include <fcntl.h>
 31 | #include <time.h>
 32 | #ifdef _WIN32
 33 | #  include <windows.h>
 34 | #else
 35 | #  include <unistd.h>
 36 | #  include <sys/mman.h>
 37 | #  include <sys/time.h>
 38 | #endif
 39 | 
 40 | #ifndef MAP_ANONYMOUS
 41 | #  define MAP_ANONYMOUS MAP_ANON
 42 | #endif
 43 | 
 44 | /* Abort with an error message */
 45 | NORETURN void
 46 | assertion_failed(const char *expr, const char *file, int line)
 47 | {
 48 | 	msg("Assertion failed: %s at %s:%d", expr, file, line);
 49 | 	abort();
 50 | }
 51 | 
 52 | void
 53 | begin_performance_test(void)
 54 | {
 55 | 	/* Skip performance tests by default, since they can be flaky. */
 56 | 	if (getenv("INCLUDE_PERF_TESTS") == NULL)
 57 | 		exit(0);
 58 | }
 59 | 
 60 | static size_t
 61 | get_page_size(void)
 62 | {
 63 | #ifdef _WIN32
 64 | 	SYSTEM_INFO info;
 65 | 
 66 | 	GetSystemInfo(&info);
 67 | 	return info.dwPageSize;
 68 | #else
 69 | 	return sysconf(_SC_PAGESIZE);
 70 | #endif
 71 | }
 72 | 
 73 | /* Allocate a buffer with guard pages */
 74 | void
 75 | alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret)
 76 | {
 77 | 	const size_t pagesize = get_page_size();
 78 | 	const size_t nr_pages = (size + pagesize - 1) / pagesize;
 79 | 	u8 *base_addr;
 80 | 	u8 *start, *end;
 81 | #ifdef _WIN32
 82 | 	DWORD oldProtect;
 83 | #endif
 84 | 
 85 | 	*start_ret = NULL;
 86 | 	*end_ret = NULL;
 87 | 
 88 | #ifdef _WIN32
 89 | 	/* Allocate buffer and guard pages with no access. */
 90 | 	base_addr = VirtualAlloc(NULL, (nr_pages + 2) * pagesize,
 91 | 				 MEM_COMMIT | MEM_RESERVE, PAGE_NOACCESS);
 92 | 	if (!base_addr) {
 93 | 		msg("Unable to allocate memory (VirtualAlloc): Windows error %u",
 94 | 		    (unsigned int)GetLastError());
 95 | 		ASSERT(0);
 96 | 	}
 97 | 	start = base_addr + pagesize;
 98 | 	end = start + (nr_pages * pagesize);
 99 | 
100 | 	/* Grant read+write access to just the buffer. */
101 | 	if (!VirtualProtect(start, end - start, PAGE_READWRITE, &oldProtect)) {
102 | 		msg("Unable to protect memory (VirtualProtect): Windows error %u",
103 | 		    (unsigned int)GetLastError());
104 | 		VirtualFree(base_addr, 0, MEM_RELEASE);
105 | 		ASSERT(0);
106 | 	}
107 | #else
108 | 	/* Allocate buffer and guard pages. */
109 | 	base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
110 | 			 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
111 | 	if (base_addr == (u8 *)MAP_FAILED) {
112 | 		msg_errno("Unable to allocate memory (anonymous mmap)");
113 | 		ASSERT(0);
114 | 	}
115 | 	start = base_addr + pagesize;
116 | 	end = start + (nr_pages * pagesize);
117 | 
118 | 	/* Unmap the guard pages. */
119 | 	munmap(base_addr, pagesize);
120 | 	munmap(end, pagesize);
121 | #endif
122 | 	*start_ret = start;
123 | 	*end_ret = end;
124 | }
125 | 
126 | /* Free a buffer that was allocated by alloc_guarded_buffer() */
127 | void
128 | free_guarded_buffer(u8 *start, u8 *end)
129 | {
130 | 	if (!start)
131 | 		return;
132 | #ifdef _WIN32
133 | 	VirtualFree(start - get_page_size(), 0, MEM_RELEASE);
134 | #else
135 | 	munmap(start, end - start);
136 | #endif
137 | }
138 | 
139 | /*
140 |  * Return the number of timer ticks that have elapsed since some unspecified
141 |  * point fixed at the start of program execution
142 |  */
143 | u64
144 | timer_ticks(void)
145 | {
146 | #ifdef _WIN32
147 | 	LARGE_INTEGER count;
148 | 
149 | 	QueryPerformanceCounter(&count);
150 | 	return count.QuadPart;
151 | #elif defined(HAVE_CLOCK_GETTIME) || \
152 | 	/* fallback detection method for direct compilation */ \
153 | 	(!defined(HAVE_CONFIG_H) && defined(CLOCK_MONOTONIC))
154 | 	struct timespec ts;
155 | 
156 | 	clock_gettime(CLOCK_MONOTONIC, &ts);
157 | 	return (1000000000 * (u64)ts.tv_sec) + ts.tv_nsec;
158 | #else
159 | 	struct timeval tv;
160 | 
161 | 	gettimeofday(&tv, NULL);
162 | 	return (1000000 * (u64)tv.tv_sec) + tv.tv_usec;
163 | #endif
164 | }
165 | 
166 | /*
167 |  * Return the number of timer ticks per second
168 |  */
169 | static u64
170 | timer_frequency(void)
171 | {
172 | #ifdef _WIN32
173 | 	LARGE_INTEGER freq;
174 | 
175 | 	QueryPerformanceFrequency(&freq);
176 | 	return freq.QuadPart;
177 | #elif defined(HAVE_CLOCK_GETTIME) || \
178 | 	/* fallback detection method for direct compilation */ \
179 | 	(!defined(HAVE_CONFIG_H) && defined(CLOCK_MONOTONIC))
180 | 	return 1000000000;
181 | #else
182 | 	return 1000000;
183 | #endif
184 | }
185 | 
186 | /*
187 |  * Convert a number of elapsed timer ticks to milliseconds
188 |  */
189 | u64 timer_ticks_to_ms(u64 ticks)
190 | {
191 | 	return ticks * 1000 / timer_frequency();
192 | }
193 | 
194 | /*
195 |  * Convert a byte count and a number of elapsed timer ticks to MB/s
196 |  */
197 | u64 timer_MB_per_s(u64 bytes, u64 ticks)
198 | {
199 | 	return bytes * timer_frequency() / ticks / 1000000;
200 | }
201 | 
202 | /*
203 |  * Convert a byte count and a number of elapsed timer ticks to KB/s
204 |  */
205 | u64 timer_KB_per_s(u64 bytes, u64 ticks)
206 | {
207 | 	return bytes * timer_frequency() / ticks / 1000;
208 | }
209 | 
210 | bool
211 | put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits)
212 | {
213 | 	os->bitbuf |= bits << os->bitcount;
214 | 	os->bitcount += num_bits;
215 | 	while (os->bitcount >= 8) {
216 | 		if (os->next == os->end)
217 | 			return false;
218 | 		*os->next++ = os->bitbuf;
219 | 		os->bitcount -= 8;
220 | 		os->bitbuf >>= 8;
221 | 	}
222 | 	return true;
223 | }
224 | 
225 | bool
226 | flush_bits(struct output_bitstream *os)
227 | {
228 | 	while (os->bitcount > 0) {
229 | 		if (os->next == os->end)
230 | 			return false;
231 | 		*os->next++ = os->bitbuf;
232 | 		os->bitcount -= 8;
233 | 		os->bitbuf >>= 8;
234 | 	}
235 | 	os->bitcount = 0;
236 | 	return true;
237 | }
238 | 


--------------------------------------------------------------------------------
/programs/test_util.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_util.h - utility functions for test programs
 3 |  *
 4 |  * Copyright 2016 Eric Biggers
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #ifndef PROGRAMS_TEST_UTIL_H
29 | #define PROGRAMS_TEST_UTIL_H
30 | 
31 | #include "prog_util.h" /* must be included first */
32 | 
33 | #include <zlib.h> /* for comparison purposes */
34 | 
35 | NORETURN void
36 | assertion_failed(const char *expr, const char *file, int line);
37 | 
38 | #define ASSERT(expr) { if (unlikely(!(expr))) \
39 | 	assertion_failed(#expr, __FILE__, __LINE__); }
40 | 
41 | void begin_performance_test(void);
42 | 
43 | void alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret);
44 | void free_guarded_buffer(u8 *start, u8 *end);
45 | 
46 | u64 timer_ticks(void);
47 | u64 timer_ticks_to_ms(u64 ticks);
48 | u64 timer_MB_per_s(u64 bytes, u64 ticks);
49 | u64 timer_KB_per_s(u64 bytes, u64 ticks);
50 | 
51 | struct output_bitstream {
52 | 	machine_word_t bitbuf;
53 | 	int bitcount;
54 | 	u8 *next;
55 | 	u8 *end;
56 | };
57 | 
58 | bool put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits);
59 | bool flush_bits(struct output_bitstream *os);
60 | 
61 | #endif /* PROGRAMS_TEST_UTIL_H */
62 | 


--------------------------------------------------------------------------------
/programs/tgetopt.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * tgetopt.c - portable replacement for GNU getopt()
  3 |  *
  4 |  * Copyright 2016 Eric Biggers
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person
  7 |  * obtaining a copy of this software and associated documentation
  8 |  * files (the "Software"), to deal in the Software without
  9 |  * restriction, including without limitation the rights to use,
 10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following
 13 |  * conditions:
 14 |  *
 15 |  * The above copyright notice and this permission notice shall be
 16 |  * included in all copies or substantial portions of the Software.
 17 |  *
 18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 25 |  * OTHER DEALINGS IN THE SOFTWARE.
 26 |  */
 27 | 
 28 | #include "prog_util.h"
 29 | 
 30 | tchar *toptarg;
 31 | int toptind = 1, topterr = 1, toptopt;
 32 | 
 33 | /*
 34 |  * This is a simple implementation of getopt().  It can be compiled with either
 35 |  * 'char' or 'wchar_t' as the character type.
 36 |  *
 37 |  * Do *not* use this implementation if you need any of the following features,
 38 |  * as they are not supported:
 39 |  *	- Long options
 40 |  *	- Option-related arguments retained in argv, not nulled out
 41 |  *	- '+' and '-' characters in optstring
 42 |  */
 43 | int
 44 | tgetopt(int argc, tchar *argv[], const tchar *optstring)
 45 | {
 46 | 	static tchar empty[1];
 47 | 	static tchar *nextchar;
 48 | 	static bool done;
 49 | 
 50 | 	if (toptind == 1) {
 51 | 		/* Starting to scan a new argument vector */
 52 | 		nextchar = NULL;
 53 | 		done = false;
 54 | 	}
 55 | 
 56 | 	while (!done && (nextchar != NULL || toptind < argc)) {
 57 | 		if (nextchar == NULL) {
 58 | 			/* Scanning a new argument */
 59 | 			tchar *arg = argv[toptind++];
 60 | 			if (arg[0] == '-' && arg[1] != '\0') {
 61 | 				if (arg[1] == '-' && arg[2] == '\0') {
 62 | 					/* All args after "--" are nonoptions */
 63 | 					argv[toptind - 1] = NULL;
 64 | 					done = true;
 65 | 				} else {
 66 | 					/* Start of short option characters */
 67 | 					nextchar = &arg[1];
 68 | 				}
 69 | 			}
 70 | 		} else {
 71 | 			/* More short options in previous arg */
 72 | 			tchar opt = *nextchar;
 73 | 			tchar *p = tstrchr(optstring, opt);
 74 | 			if (p == NULL) {
 75 | 				if (topterr)
 76 | 					msg("invalid option -- '%"TC"'", opt);
 77 | 				toptopt = opt;
 78 | 				return '?';
 79 | 			}
 80 | 			/* 'opt' is a valid short option character */
 81 | 			nextchar++;
 82 | 			toptarg = NULL;
 83 | 			if (*(p + 1) == ':') {
 84 | 				/* 'opt' can take an argument */
 85 | 				if (*nextchar != '\0') {
 86 | 					/* Optarg is in same argv argument */
 87 | 					toptarg = nextchar;
 88 | 					nextchar = empty;
 89 | 				} else if (toptind < argc && *(p + 2) != ':') {
 90 | 					/* Optarg is next argv argument */
 91 | 					argv[toptind - 1] = NULL;
 92 | 					toptarg = argv[toptind++];
 93 | 				} else if (*(p + 2) != ':') {
 94 | 					if (topterr && *optstring != ':') {
 95 | 						msg("option requires an "
 96 | 						    "argument -- '%"TC"'", opt);
 97 | 					}
 98 | 					toptopt = opt;
 99 | 					opt = (*optstring == ':') ? ':' : '?';
100 | 				}
101 | 			}
102 | 			if (*nextchar == '\0') {
103 | 				argv[toptind - 1] = NULL;
104 | 				nextchar = NULL;
105 | 			}
106 | 			return opt;
107 | 		}
108 | 	}
109 | 
110 | 	/* Done scanning.  Move all nonoptions to the end, set optind to the
111 | 	 * index of the first nonoption, and return -1. */
112 | 	toptind = argc;
113 | 	while (--argc > 0)
114 | 		if (argv[argc] != NULL)
115 | 			argv[--toptind] = argv[argc];
116 | 	done = true;
117 | 	return -1;
118 | }
119 | 


--------------------------------------------------------------------------------
/scripts/android_build.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -eu -o pipefail
  4 | 
  5 | SCRIPTDIR="$(dirname "$0")"
  6 | BUILDDIR="$SCRIPTDIR/../build"
  7 | API_LEVEL=28
  8 | ARCH=arm64
  9 | CFLAGS=${CFLAGS:-}
 10 | ENABLE_CRC=false
 11 | ENABLE_CRYPTO=false
 12 | NDKDIR=$HOME/android-ndk-r25b
 13 | 
 14 | usage() {
 15 | 	cat << EOF
 16 | Usage: $0 [OPTION]...
 17 | Build libdeflate for Android.
 18 | 
 19 |   --api-level=LEVEL    Android API level to target (default: $API_LEVEL)
 20 |   --arch=ARCH          Architecture: arm32|arm64|x86|x86_64 (default: $ARCH)
 21 |   --enable-crc         Enable crc instructions
 22 |   --enable-crypto      Enable crypto instructions
 23 |   --ndkdir=NDKDIR      Android NDK directory (default: $NDKDIR)
 24 | EOF
 25 | }
 26 | if ! options=$(getopt -o '' \
 27 | 	-l 'api-level:,arch:,enable-crc,enable-crypto,help,ndkdir:' -- "$@"); then
 28 | 	usage 1>&2
 29 | 	exit 1
 30 | fi
 31 | 
 32 | eval set -- "$options"
 33 | 
 34 | while [ $# -gt 0 ]; do
 35 | 	case "$1" in
 36 | 	--api-level)
 37 | 		API_LEVEL="$2"
 38 | 		shift
 39 | 		;;
 40 | 	--arch)
 41 | 		ARCH="$2"
 42 | 		shift
 43 | 		;;
 44 | 	--enable-crc)
 45 | 		ENABLE_CRC=true
 46 | 		;;
 47 | 	--enable-crypto)
 48 | 		ENABLE_CRYPTO=true
 49 | 		;;
 50 | 	--help)
 51 | 		usage
 52 | 		exit 0
 53 | 		;;
 54 | 	--ndkdir)
 55 | 		NDKDIR="$2"
 56 | 		shift
 57 | 		;;
 58 | 	--)
 59 | 		shift
 60 | 		break
 61 | 		;;
 62 | 	*)
 63 | 		echo 1>&2 "Unknown option \"$1\""
 64 | 		usage 1>&2
 65 | 		exit 1
 66 | 	esac
 67 | 	shift
 68 | done
 69 | 
 70 | case "$ARCH" in
 71 | arm|arm32|aarch32|armeabi-v7a)
 72 | 	ANDROID_ABI=armeabi-v7a
 73 | 	if $ENABLE_CRC || $ENABLE_CRYPTO; then
 74 | 		CFLAGS+=" -march=armv8-a"
 75 | 		if $ENABLE_CRC; then
 76 | 			CFLAGS+=" -mcrc"
 77 | 		else
 78 | 			CFLAGS+=" -mnocrc"
 79 | 		fi
 80 | 		if $ENABLE_CRYPTO; then
 81 | 			CFLAGS+=" -mfpu=crypto-neon-fp-armv8"
 82 | 		else
 83 | 			CFLAGS+=" -mfpu=neon"
 84 | 		fi
 85 | 	fi
 86 | 	;;
 87 | arm64|aarch64|arm64-v8a)
 88 | 	ANDROID_ABI=arm64-v8a
 89 | 	features=""
 90 | 	if $ENABLE_CRC; then
 91 | 		features+="+crc"
 92 | 	fi
 93 | 	if $ENABLE_CRYPTO; then
 94 | 		features+="+crypto"
 95 | 	fi
 96 | 	if [ -n "$features" ]; then
 97 | 		CFLAGS+=" -march=armv8-a$features"
 98 | 	fi
 99 | 	;;
100 | x86)
101 | 	ANDROID_ABI=x86
102 | 	;;
103 | x86_64)
104 | 	ANDROID_ABI=x86_64
105 | 	;;
106 | *)
107 | 	echo 1>&2 "Unknown architecture: \"$ARCH\""
108 | 	usage 1>&2
109 | 	exit 1
110 | esac
111 | 
112 | "$SCRIPTDIR"/cmake-helper.sh -G Ninja \
113 | 	-DCMAKE_TOOLCHAIN_FILE="$NDKDIR"/build/cmake/android.toolchain.cmake \
114 | 	-DCMAKE_C_FLAGS="$CFLAGS" \
115 | 	-DANDROID_ABI="$ANDROID_ABI" \
116 | 	-DANDROID_PLATFORM="$API_LEVEL" \
117 | 	-DLIBDEFLATE_BUILD_TESTS=1
118 | cmake --build "$BUILDDIR"
119 | 


--------------------------------------------------------------------------------
/scripts/android_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Test libdeflate on a connected arm64 Android device.
 4 | # Requires the Android NDK (release 19 or later) and adb.
 5 | 
 6 | set -eu -o pipefail
 7 | cd "$(dirname "$0")/.."
 8 | 
 9 | if [ $# -ne 0 ]; then
10 | 	echo 1>&2 "Usage: $0"
11 | 	exit 2
12 | fi
13 | 
14 | # Use NDKDIR if specified in environment, else use default value.
15 | : "${NDKDIR:=$HOME/android-ndk-r25b}"
16 | if [ ! -e "$NDKDIR" ]; then
17 | 	cat 1>&2 << EOF
18 | Android NDK was not found in NDKDIR=$NDKDIR!  Set the
19 | environmental variable NDKDIR to the location of your Android NDK installation.
20 | EOF
21 | 	exit 1
22 | fi
23 | 
24 | CLEANUP_CMDS=()
25 | cleanup() {
26 | 	for cmd in "${CLEANUP_CMDS[@]}"; do
27 | 		eval "$cmd"
28 | 	done
29 | }
30 | trap cleanup EXIT
31 | 
32 | # Use TESTDATA if specified in environment, else generate it.
33 | if [ -z "${TESTDATA:-}" ]; then
34 | 	# Generate default TESTDATA file.
35 | 	TESTDATA=$(mktemp -t libdeflate_testdata.XXXXXXXXXX)
36 | 	export TESTDATA
37 | 	CLEANUP_CMDS+=("rm -f '$TESTDATA'")
38 | 	find . '(' -name '*.c' -o -name '*.h' -o -name '*.sh' ')' \
39 | 		-exec cat '{}' ';' | head -c 1000000 > "$TESTDATA"
40 | fi
41 | 
42 | TMPDIR=$(mktemp -d -t libdeflate_test.XXXXXXXXX)
43 | CLEANUP_CMDS+=("rm -r '$TMPDIR'")
44 | 
45 | android_build_and_test() {
46 | 	echo "Running Android tests with $*"
47 | 
48 | 	./scripts/android_build.sh --ndkdir="$NDKDIR" "$@" > /dev/null
49 | 	adb push "$TESTDATA" ./scripts/exec_tests.sh \
50 | 		./build/programs/{benchmark,test_*} /data/local/tmp/ > /dev/null
51 | 
52 | 	# Note: adb shell always returns 0, even if the shell command fails...
53 | 	adb shell "cd /data/local/tmp && WRAPPER= TESTDATA=$(basename "$TESTDATA") sh exec_tests.sh" \
54 | 		> "$TMPDIR/adb.out"
55 | 	if ! grep -q "exec_tests finished successfully" "$TMPDIR/adb.out"; then
56 | 		echo 1>&2 "Android test failure!  adb shell output:"
57 | 		cat "$TMPDIR/adb.out"
58 | 		exit 1
59 | 	fi
60 | }
61 | 
62 | android_build_and_test --arch=arm32
63 | android_build_and_test --arch=arm32 --enable-crc
64 | android_build_and_test --arch=arm64
65 | android_build_and_test --arch=arm64 --enable-crc
66 | android_build_and_test --arch=arm64 --enable-crypto
67 | android_build_and_test --arch=arm64 --enable-crc --enable-crypto
68 | 
69 | echo "Android tests passed"
70 | 


--------------------------------------------------------------------------------
/scripts/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPTDIR="$(dirname "$(realpath "$0")")"
 6 | BUILDDIR="$SCRIPTDIR/../build"
 7 | 
 8 | "$SCRIPTDIR"/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 -G Ninja > /dev/null
 9 | ninja -C "$BUILDDIR" --quiet benchmark
10 | "$BUILDDIR"/programs/benchmark "$@"
11 | 


--------------------------------------------------------------------------------
/scripts/checksum.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | SCRIPTDIR="$(dirname "$(realpath "$0")")"
 6 | BUILDDIR="$SCRIPTDIR/../build"
 7 | 
 8 | "$SCRIPTDIR"/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 -G Ninja > /dev/null
 9 | ninja -C "$BUILDDIR" --quiet checksum
10 | "$BUILDDIR"/programs/checksum "$@"
11 | 


--------------------------------------------------------------------------------
/scripts/checksum_benchmarks.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -eu -o pipefail
  4 | 
  5 | __have_cpu_feature() {
  6 | 	local feature="$1"
  7 | 	local tag
  8 | 	case $ARCH in
  9 | 	arm*|aarch*)
 10 | 		tag="Features"
 11 | 		;;
 12 | 	*)
 13 | 		tag="flags"
 14 | 		;;
 15 | 	esac
 16 | 	grep -q "^$tag"$'[ \t]'"*:.*\<$feature\>" /proc/cpuinfo
 17 | }
 18 | 
 19 | have_cpu_features() {
 20 | 	local feature
 21 | 	for feature; do
 22 | 		__have_cpu_feature "$feature" || return 1
 23 | 	done
 24 | }
 25 | 
 26 | make_and_test() {
 27 | 	# Build the checksum program and tests.  Set the special test support
 28 | 	# flag to get support for LIBDEFLATE_DISABLE_CPU_FEATURES.
 29 | 	rm -rf build
 30 | 	CFLAGS="$CFLAGS -DTEST_SUPPORT__DO_NOT_USE=1" \
 31 | 		cmake -B build -G Ninja -DLIBDEFLATE_BUILD_TESTS=1 \
 32 | 		"${EXTRA_CMAKE_FLAGS[@]}" > /dev/null
 33 | 	cmake --build build > /dev/null
 34 | 
 35 | 	# Run the checksum tests, for good measure.  (This isn't actually part
 36 | 	# of the benchmarking.)
 37 | 	./build/programs/test_checksums > /dev/null
 38 | }
 39 | 
 40 | __do_benchmark() {
 41 | 	local impl="$1" speed
 42 | 	shift
 43 | 	local flags=("$@")
 44 | 
 45 | 	speed=$(./build/programs/checksum "${CKSUM_FLAGS[@]}" \
 46 | 		"${flags[@]}" -t "$FILE" | \
 47 | 		grep -o '[0-9]\+ MB/s' | grep -o '[0-9]\+')
 48 | 	printf "%-60s%-10s\n" "$CKSUM_NAME ($impl)" "$speed"
 49 | }
 50 | 
 51 | do_benchmark() {
 52 | 	local impl="$1"
 53 | 
 54 | 	CFLAGS="${EXTRA_CFLAGS[*]}" make_and_test
 55 | 	if [ "$impl" = zlib ]; then
 56 | 		__do_benchmark "$impl" "-Z"
 57 | 	else
 58 | 		__do_benchmark "libdeflate, $impl"
 59 | 		if $ENABLE_32BIT; then
 60 | 			CFLAGS="-m32 ${EXTRA_CFLAGS[*]}" make_and_test
 61 | 			__do_benchmark "libdeflate, $impl, 32-bit"
 62 | 		fi
 63 | 	fi
 64 | }
 65 | 
 66 | sort_by_speed() {
 67 | 	awk '{print $NF, $0}' | sort -nr | cut -f2- -d' '
 68 | }
 69 | 
 70 | disable_cpu_feature() {
 71 | 	LIBDEFLATE_DISABLE_CPU_FEATURES+=",$1"
 72 | 	shift
 73 | 	if (( $# > 0 )); then
 74 | 		EXTRA_CFLAGS+=("$@")
 75 | 	fi
 76 | }
 77 | 
 78 | cleanup() {
 79 | 	if $USING_TMPFILE; then
 80 | 		rm "$FILE"
 81 | 	fi
 82 | }
 83 | 
 84 | ARCH="$(uname -m)"
 85 | USING_TMPFILE=false
 86 | EXTRA_CMAKE_FLAGS=()
 87 | ENABLE_32BIT=false
 88 | 
 89 | trap cleanup EXIT
 90 | 
 91 | longopts="help"
 92 | longopts+=",cmake-flag:"
 93 | longopts+=",enable-32bit"
 94 | 
 95 | usage() {
 96 | 	echo "Usage: $0 [--cmake-flag=FLAG]... [--enable-32bit] [FILE]"
 97 | }
 98 | 
 99 | if ! options=$(getopt -o "" -l "$longopts" -- "$@"); then
100 | 	usage 1>&2
101 | 	exit 1
102 | fi
103 | eval set -- "$options"
104 | while (( $# >= 1 )); do
105 | 	case "$1" in
106 | 	--cmake-flag)
107 | 		EXTRA_CMAKE_FLAGS+=("$2")
108 | 		shift
109 | 		;;
110 | 	--enable-32bit)
111 | 		ENABLE_32BIT=true
112 | 		;;
113 | 	--help)
114 | 		usage
115 | 		exit 0
116 | 		;;
117 | 	--)
118 | 		shift
119 | 		break
120 | 		;;
121 | 	*)
122 | 		echo 1>&2 "Invalid option: '$1'"
123 | 		usage 1>&2
124 | 		exit 1
125 | 		;;
126 | 	esac
127 | 	shift
128 | done
129 | 
130 | if (( $# == 0 )); then
131 | 	# Generate default test data file.
132 | 	FILE=$(mktemp -t checksum_testdata.XXXXXXXXXX)
133 | 	USING_TMPFILE=true
134 | 	echo "Generating 250 MB test file: $FILE"
135 | 	head -c 250000000 /dev/urandom > "$FILE"
136 | elif (( $# == 1 )); then
137 | 	FILE="$1"
138 | else
139 | 	usage 1>&2
140 | 	exit 1
141 | fi
142 | 
143 | cat << EOF
144 | Method                                                      Speed (MB/s)
145 | ------                                                      ------------
146 | EOF
147 | 
148 | # CRC-32
149 | CKSUM_NAME="CRC-32"
150 | CKSUM_FLAGS=()
151 | EXTRA_CFLAGS=()
152 | export LIBDEFLATE_DISABLE_CPU_FEATURES=""
153 | {
154 | case $ARCH in
155 | i386|x86_64)
156 | 	if have_cpu_features vpclmulqdq pclmulqdq avx512bw avx512vl; then
157 | 		do_benchmark "VPCLMULQDQ/AVX512/VL512"
158 | 		disable_cpu_feature zmm
159 | 		do_benchmark "VPCLMULQDQ/AVX512/VL256"
160 | 		disable_cpu_feature avx512vl "-mno-avx512vl"
161 | 		disable_cpu_feature avx512bw "-mno-avx512bw"
162 | 	fi
163 | 	if have_cpu_features vpclmulqdq pclmulqdq avx2; then
164 | 		do_benchmark "VPCLMULQDQ/AVX2"
165 | 		disable_cpu_feature vpclmulqdq "-mno-vpclmulqdq"
166 | 	fi
167 | 	if have_cpu_features pclmulqdq avx; then
168 | 		do_benchmark "PCLMULQDQ/AVX"
169 | 		disable_cpu_feature avx "-mno-avx"
170 | 	fi
171 | 	if have_cpu_features pclmulqdq; then
172 | 		do_benchmark "PCLMULQDQ"
173 | 		disable_cpu_feature pclmulqdq "-mno-pclmul"
174 | 	fi
175 | 	;;
176 | aarch*)
177 | 	EXTRA_CFLAGS=("-march=armv8-a")
178 | 	if have_cpu_features pmull crc32 sha3; then
179 | 		do_benchmark "pmullx12_crc_eor3"
180 | 		disable_cpu_feature sha3
181 | 	fi
182 | 	if have_cpu_features pmull crc32; then
183 | 		do_benchmark "pmullx12_crc"
184 | 		disable_cpu_feature prefer_pmull
185 | 		do_benchmark "crc_pmullcombine"
186 | 	fi
187 | 	if have_cpu_features crc32; then
188 | 		do_benchmark "crc"
189 | 		disable_cpu_feature crc32
190 | 	fi
191 | 	if have_cpu_features pmull; then
192 | 		do_benchmark "pmull4x"
193 | 		disable_cpu_feature pmull
194 | 	fi
195 | 	;;
196 | esac
197 | do_benchmark "generic"
198 | do_benchmark "zlib"
199 | } | sort_by_speed
200 | 
201 | # Adler-32
202 | CKSUM_NAME="Adler-32"
203 | CKSUM_FLAGS=(-A)
204 | EXTRA_CFLAGS=()
205 | export LIBDEFLATE_DISABLE_CPU_FEATURES=""
206 | echo
207 | {
208 | case $ARCH in
209 | i386|x86_64)
210 | 	if have_cpu_features avx512bw avx512_vnni; then
211 | 		do_benchmark "AVX512VNNI/VL512"
212 | 		disable_cpu_feature zmm
213 | 		if have_cpu_features avx512vl; then
214 | 			do_benchmark "AVX512VNNI/VL256"
215 | 		fi
216 | 		disable_cpu_feature avx512_vnni "-mno-avx512vnni"
217 | 		disable_cpu_feature avx512bw "-mno-avx512bw"
218 | 	fi
219 | 	if have_cpu_features avx2 avx_vnni; then
220 | 		do_benchmark "AVX-VNNI"
221 | 		disable_cpu_feature avx_vnni "-mno-avxvnni"
222 | 	fi
223 | 	if have_cpu_features avx2; then
224 | 		do_benchmark "AVX2"
225 | 		disable_cpu_feature avx2 "-mno-avx2"
226 | 	fi
227 | 	if have_cpu_features sse2; then
228 | 		do_benchmark "SSE2"
229 | 		disable_cpu_feature sse2 "-mno-sse2"
230 | 	fi
231 | 	;;
232 | arm*)
233 | 	if have_cpu_features neon; then
234 | 		do_benchmark "NEON"
235 | 		disable_cpu_feature neon "-mfpu=vfpv3"
236 | 	fi
237 | 	;;
238 | aarch*)
239 | 	EXTRA_CFLAGS=("-march=armv8-a")
240 | 	if have_cpu_features asimd asimddp; then
241 | 		do_benchmark "DOTPROD"
242 | 		disable_cpu_feature dotprod
243 | 	fi
244 | 	if have_cpu_features asimd; then
245 | 		do_benchmark "NEON"
246 | 		disable_cpu_feature neon
247 | 		EXTRA_CFLAGS=("-march=armv8-a+nosimd")
248 | 	fi
249 | 	;;
250 | esac
251 | do_benchmark "generic"
252 | do_benchmark "zlib"
253 | } | sort_by_speed
254 | 


--------------------------------------------------------------------------------
/scripts/cmake-helper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # This script ensures that the 'build' directory has been created and configured
 4 | # with the given CMake options and environment.
 5 | 
 6 | set -e
 7 | 
 8 | TOPDIR="$(dirname "$0")"/..
 9 | BUILDDIR="$TOPDIR"/build
10 | 
11 | flags=$(env; echo "@CMAKEOPTS@=$*")
12 | if [ "$flags" != "$(cat "$BUILDDIR"/.flags 2>/dev/null || true)" ]; then
13 | 	rm -rf "$BUILDDIR"/CMakeCache.txt "$BUILDDIR"/CMakeFiles
14 | 	mkdir -p "$BUILDDIR"
15 | 	cmake -S "$TOPDIR" -B "$BUILDDIR" "$@"
16 | 	echo "$flags" > "$BUILDDIR"/.flags
17 | fi
18 | 


--------------------------------------------------------------------------------
/scripts/deflate_benchmarks.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -eu -o pipefail
  4 | topdir="$(dirname "$0")/.."
  5 | tmpfile=$(mktemp)
  6 | trap 'rm -f $tmpfile' EXIT
  7 | 
  8 | run_benchmark()
  9 | {
 10 | 	local best_ctime=1000000000
 11 | 	local i
 12 | 
 13 | 	for i in $(seq "$NUM_ITERATIONS"); do
 14 | 		"$@" > "$tmpfile"
 15 | 		csize=$(awk '/Compressed/{print $4}' "$tmpfile")
 16 | 		ctime=$(awk '/Compression time/{print $3}' "$tmpfile")
 17 | 		if (( ctime <  best_ctime )); then
 18 | 			best_ctime=$ctime
 19 | 		fi
 20 | 		: "$i" # make shellcheck happy
 21 | 	done
 22 | 	CSIZE=$csize
 23 | 	CTIME=$best_ctime
 24 | }
 25 | 
 26 | multifile()
 27 | {
 28 | 	local file results cmd best em
 29 | 
 30 | 	NUM_ITERATIONS=1
 31 | 
 32 | 	echo "File | zlib -6 | zlib -9 | libdeflate -6 | libdeflate -9 | libdeflate -12"
 33 | 	echo "-----|---------|---------|---------------|---------------|---------------"
 34 | 
 35 | 	for file in "$@"; do
 36 | 		echo -n "$(basename "$file")"
 37 | 		results=()
 38 | 		cmd=("$topdir/build/programs/benchmark"
 39 | 		     -s"$(stat -c "%s" "$file")" "$file")
 40 | 		run_benchmark "${cmd[@]}" -Y -6
 41 | 		results+=("$CSIZE")
 42 | 		run_benchmark "${cmd[@]}" -Y -6
 43 | 		results+=("$CSIZE")
 44 | 		run_benchmark "${cmd[@]}" -6
 45 | 		results+=("$CSIZE")
 46 | 		run_benchmark "${cmd[@]}" -9
 47 | 		results+=("$CSIZE")
 48 | 		run_benchmark "${cmd[@]}" -12
 49 | 		results+=("$CSIZE")
 50 | 		best=2000000000
 51 | 		for result in "${results[@]}"; do
 52 | 			if (( result < best)); then
 53 | 				best=$result
 54 | 			fi
 55 | 		done
 56 | 		for result in "${results[@]}"; do
 57 | 			if (( result == best )); then
 58 | 				em="**"
 59 | 			else
 60 | 				em=""
 61 | 			fi
 62 | 			echo -n " | ${em}${result}${em}"
 63 | 		done
 64 | 		echo
 65 | 	done
 66 | }
 67 | 
 68 | single_file()
 69 | {
 70 | 	local file=$1
 71 | 	local usize args
 72 | 	local include_old=false
 73 | 
 74 | 	usize=$(stat -c "%s" "$file")
 75 | 	: "${NUM_ITERATIONS:=3}"
 76 | 
 77 | 	if [ -e "$topdir/benchmark-old" ]; then
 78 | 		include_old=true
 79 | 	fi
 80 | 	echo -n "Level | libdeflate (new) "
 81 | 	if $include_old; then
 82 | 		echo -n "| libdeflate (old) "
 83 | 	fi
 84 | 	echo "| zlib"
 85 | 	echo -n "------|------------------"
 86 | 	if $include_old; then
 87 | 		echo -n "|------------------"
 88 | 	fi
 89 | 	echo "|-----"
 90 | 	for level in {1..12}; do
 91 | 		echo -n "$level"
 92 | 		args=("$file" -s "$usize" "-$level")
 93 | 
 94 | 		run_benchmark "$topdir/build/programs/benchmark" "${args[@]}"
 95 | 		echo -n " | $CSIZE / $CTIME"
 96 | 
 97 | 		if $include_old; then
 98 | 			run_benchmark "$topdir/benchmark-old" "${args[@]}"
 99 | 			echo -n " | $CSIZE / $CTIME"
100 | 		fi
101 | 
102 | 		if (( level > 9 )); then
103 | 			echo -n " | N/A"
104 | 		else
105 | 			run_benchmark "$topdir/build/programs/benchmark" \
106 | 				      "${args[@]}" -Y
107 | 			echo -n " | $CSIZE / $CTIME"
108 | 		fi
109 | 		echo
110 | 	done
111 | }
112 | 
113 | if (( $# > 1 )); then
114 | 	multifile "$@"
115 | elif (( $# == 1 )); then
116 | 	single_file "$@"
117 | else
118 | 	echo 1>&2 "Usage: $0 FILE..."
119 | fi
120 | 


--------------------------------------------------------------------------------
/scripts/exec_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | # Helper script used by run_tests.sh and android_tests.sh,
 4 | # not intended to be run directly
 5 | #
 6 | 
 7 | set -eu
 8 | 
 9 | DIR=${1:-.}
10 | 
11 | cd "$DIR"
12 | 
13 | run_cmd() {
14 | 	echo "$WRAPPER $*"
15 | 	$WRAPPER "$@" > /dev/null
16 | }
17 | 
18 | for prog in ./test_*; do
19 | 	run_cmd "$prog"
20 | done
21 | 
22 | for format in '' '-g' '-z'; do
23 | 	for ref_impl in '' '-Y' '-Z'; do
24 | 		run_cmd ./benchmark $format $ref_impl "$TESTDATA"
25 | 	done
26 | done
27 | for level in 0 1 3 7 9; do
28 | 	for ref_impl in '' '-Y'; do
29 | 		run_cmd ./benchmark -$level $ref_impl "$TESTDATA"
30 | 	done
31 | done
32 | for level in 0 1 3 7 9 12; do
33 | 	for ref_impl in '' '-Z'; do
34 | 		run_cmd ./benchmark -$level $ref_impl "$TESTDATA"
35 | 	done
36 | done
37 | 
38 | echo "exec_tests finished successfully" # Needed for 'adb shell'
39 | 


--------------------------------------------------------------------------------
/scripts/gen-crc32-consts.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # This script generates constants for efficient computation of the gzip CRC-32.
  4 | 
  5 | import sys
  6 | 
  7 | # This is the generator polynomial G(x) of the gzip CRC-32, represented as an
  8 | # int using the natural mapping between bits and polynomial coefficients.
  9 | G = 0x104c11db7
 10 | 
 11 | # XOR (add) an iterable of polynomials.
 12 | def xor(iterable):
 13 |     res = 0
 14 |     for val in iterable:
 15 |         res ^= val
 16 |     return res
 17 | 
 18 | # Multiply two polynomials.
 19 | def clmul(a, b):
 20 |     return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0)
 21 | 
 22 | # Polynomial division floor(a / b).
 23 | def div(a, b):
 24 |     q = 0
 25 |     while a.bit_length() >= b.bit_length():
 26 |         q ^= 1 << (a.bit_length() - b.bit_length())
 27 |         a ^= b << (a.bit_length() - b.bit_length())
 28 |     return q
 29 | 
 30 | # Reduce the polynomial 'a' modulo the polynomial 'b'.
 31 | def reduce(a, b):
 32 |     return a ^ clmul(div(a, b), b)
 33 | 
 34 | # Reverse the bits of a polynomial.
 35 | def bitreverse(poly, num_bits):
 36 |     return xor(1 << (num_bits - 1 - i) for i in range(num_bits)
 37 |                if (poly & (1 << i)) != 0)
 38 | 
 39 | # Compute x^d mod G.
 40 | def x_to_the_d(d):
 41 |     if d < G.bit_length() - 1:
 42 |         return 1 << d
 43 |     t = x_to_the_d(d//2)
 44 |     t = clmul(t, t)
 45 |     if d % 2 != 0:
 46 |         t <<= 1
 47 |     return reduce(t, G)
 48 | 
 49 | def gen_tables():
 50 |     print('/*')
 51 |     print(' * crc32_tables.h - data tables for CRC-32 computation')
 52 |     print(' *')
 53 |     print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py.  DO NOT EDIT.')
 54 |     print(' */')
 55 |     for n in [1, 8]:
 56 |         print('')
 57 |         print(f'static const u32 crc32_slice{n}_table[] MAYBE_UNUSED = {{')
 58 |         # The i'th table entry is the CRC-32 of the message consisting of byte
 59 |         # i % 256 followed by i // 256 zero bytes.
 60 |         polys = [bitreverse(i % 256, 8) << (32 + 8*(i//256)) for i in range(256 * n)]
 61 |         polys = [bitreverse(reduce(poly, G), 32) for poly in polys]
 62 |         for i in range(0, len(polys), 4):
 63 |             print(f'\t0x{polys[i+0]:08x}, 0x{polys[i+1]:08x}, 0x{polys[i+2]:08x}, 0x{polys[i+3]:08x},')
 64 |         print('};')
 65 | 
 66 | # Compute the constant multipliers needed for "folding" over various distances
 67 | # with the gzip CRC-32.  Each such multiplier is x^d mod G(x) for some distance
 68 | # d, in bits, over which the folding is occurring.
 69 | #
 70 | # Folding works as follows: let A(x) be a polynomial (possibly reduced partially
 71 | # or fully mod G(x)) for part of the message, and let B(x) be a polynomial
 72 | # (possibly reduced partially or fully mod G(x)) for a later part of the
 73 | # message.  The unreduced combined polynomial is A(x)*x^d + B(x), where d is the
 74 | # number of bits separating the two parts of the message plus len(B(x)).  Since
 75 | # mod G(x) can be applied at any point, x^d mod G(x) can be precomputed and used
 76 | # instead of x^d unreduced.  That allows the combined polynomial to be computed
 77 | # relatively easily in a partially-reduced form A(x)*(x^d mod G(x)) + B(x), with
 78 | # length max(len(A(x)) + 31, len(B(x))).  This does require doing a polynomial
 79 | # multiplication (carryless multiplication).
 80 | #
 81 | # "Folding" in this way can be used for the entire CRC computation except the
 82 | # final reduction to 32 bits; this works well when CPU support for carryless
 83 | # multiplication is available.  It can also be used to combine CRCs of different
 84 | # parts of the message that were computed using a different method.
 85 | #
 86 | # Note that the gzip CRC-32 uses bit-reversed polynomials.  I.e., the low order
 87 | # bits are really the high order polynomial coefficients.
 88 | def gen_multipliers():
 89 |     print('/*')
 90 |     print(' * crc32_multipliers.h - constants for CRC-32 folding')
 91 |     print(' *')
 92 |     print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py.  DO NOT EDIT.')
 93 |     print(' */')
 94 |     print('')
 95 | 
 96 |     # Compute the multipliers needed for CRC-32 folding with carryless
 97 |     # multiplication instructions that operate on the 64-bit halves of 128-bit
 98 |     # segments.  Using the terminology from earlier, for each 64-bit fold
 99 |     # len(A(x)) = 64, and len(B(x)) = 95 since a 64-bit polynomial multiplied by
100 |     # a 32-bit one produces a 95-bit one.  When A(x) is the low order polynomial
101 |     # half of a 128-bit segments (high order physical half), the separation
102 |     # between the message parts is the total length of the 128-bit segments
103 |     # separating the values.  When A(x) is the high order polynomial half, the
104 |     # separation is 64 bits greater.
105 |     for i in range(1, 33):
106 |         sep_lo = 128 * (i - 1)
107 |         sep_hi = sep_lo + 64
108 |         len_B = 95
109 |         for d in [sep_hi + len_B, # A(x) = high 64 polynomial bits (low 64 physical bits)
110 |                   sep_lo + len_B # A(x) = low 64 polynomial bits (high 64 physical bits)
111 |                   ]:
112 |             poly = bitreverse(x_to_the_d(d), 32)
113 |             print(f'#define CRC32_X{d}_MODG 0x{poly:08x} /* x^{d} mod G(x) */')
114 |         print('')
115 | 
116 |     # Compute constants for the final 128 => 32 bit reduction.
117 |     poly = bitreverse(div(1 << 95, G), 64)
118 |     print(f'#define CRC32_BARRETT_CONSTANT_1 0x{poly:016x}ULL /* floor(x^95 / G(x)) */')
119 |     poly = bitreverse(G, 33)
120 |     print(f'#define CRC32_BARRETT_CONSTANT_2 0x{poly:016x}ULL /* G(x) */')
121 | 
122 |     # Compute multipliers for combining the CRCs of separate chunks.
123 |     print('')
124 |     num_chunks = 4
125 |     table_len = 129
126 |     min_chunk_len = 128
127 |     print(f'#define CRC32_NUM_CHUNKS {num_chunks}')
128 |     print(f'#define CRC32_MIN_VARIABLE_CHUNK_LEN {min_chunk_len}UL')
129 |     print(f'#define CRC32_MAX_VARIABLE_CHUNK_LEN {(table_len-1) * min_chunk_len}UL')
130 |     print('')
131 |     print('/* Multipliers for implementations that use a variable chunk length */')
132 |     print('static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {')
133 |     print('\t{ 0 /* unused row */ },')
134 |     for i in range(1, table_len):
135 |         chunk_len = i * min_chunk_len
136 |         print(f'\t/* chunk_len={chunk_len} */')
137 |         print('\t{ ', end='')
138 |         for j in range(num_chunks - 1, 0, -1):
139 |             d = (j * 8 * chunk_len) - 33
140 |             poly = bitreverse(x_to_the_d(d), 32)
141 |             print(f'0x{poly:08x} /* x^{d} mod G(x) */, ', end='')
142 |         print('},')
143 |     print('};')
144 |     fixed_chunk_len = 32768
145 |     print('')
146 |     print('/* Multipliers for implementations that use a large fixed chunk length */')
147 |     print(f'#define CRC32_FIXED_CHUNK_LEN {fixed_chunk_len}UL')
148 |     for j in range(1, num_chunks):
149 |         d = (j * 8 * fixed_chunk_len) - 33
150 |         poly = bitreverse(x_to_the_d(d), 32)
151 |         print(f'#define CRC32_FIXED_CHUNK_MULT_{j} 0x{poly:08x} /* x^{d} mod G(x) */')
152 | 
153 | with open('lib/crc32_tables.h', 'w') as f:
154 |     sys.stdout = f
155 |     gen_tables()
156 | with open('lib/crc32_multipliers.h', 'w') as f:
157 |     sys.stdout = f
158 |     gen_multipliers()
159 | 


--------------------------------------------------------------------------------
/scripts/gen-release-archives.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eu -o pipefail
 4 | 
 5 | # This script generates source and binary archives that should be posted for
 6 | # each new release of libdeflate.
 7 | 
 8 | prefix="libdeflate-$(git describe HEAD | sed 's/^v//')"
 9 | 
10 | # Generate source code archive libdeflate-*.tar.gz
11 | tarball="${prefix}.tar.gz"
12 | echo "Generating $tarball"
13 | git archive --format=tar --prefix="${prefix}/" HEAD \
14 | 	| libdeflate-gzip -12 > "$tarball"
15 | 
16 | # Generate Windows binary releases libdeflate-*-windows-*-bin.zip
17 | for arch in 'i686' 'x86_64'; do
18 | 	dir=${prefix}-windows-${arch}-bin
19 | 	zipfile="${dir}.zip"
20 | 	echo "Generating $zipfile"
21 | 	rm -rf build "$dir" "$zipfile"
22 | 	CFLAGS="-Werror" ${arch}-w64-mingw32-cmake -B build -G Ninja \
23 | 		-DLIBDEFLATE_BUILD_TESTS=1 > /dev/null
24 | 	cmake --build build > /dev/null
25 | 	mkdir "$dir"
26 | 	cp libdeflate.h build/libdeflate.{dll,dll.a,a} \
27 | 		build/programs/{benchmark,checksum}.exe "$dir"
28 | 	cp build/programs/libdeflate-gzip.exe "$dir"/gzip.exe
29 | 	cp build/programs/libdeflate-gzip.exe "$dir"/gunzip.exe
30 | 	${arch}-w64-mingw32-strip "$dir"/libdeflate.dll "$dir"/*.exe
31 | 	for file in COPYING NEWS.md README.md; do
32 | 		sed < $file > "$dir/${file}.txt" -e 's/$/\r/g'
33 | 	done
34 | 	(cd "$dir" && zip -q -r "../${zipfile}" .)
35 | done
36 | 
37 | echo "Successfully generated release archives"
38 | 


--------------------------------------------------------------------------------
/scripts/gen_bitreverse_tab.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # This script computes a table that maps each byte to its bitwise reverse.
 4 | 
 5 | def reverse_byte(v):
 6 |     return sum(1 << (7 - bit) for bit in range(8) if (v & (1 << bit)) != 0)
 7 | 
 8 | tab = [reverse_byte(v) for v in range(256)]
 9 | 
10 | print('static const u8 bitreverse_tab[256] = {')
11 | for i in range(0, len(tab), 8):
12 |     print('\t', end='')
13 |     for j, v in enumerate(tab[i:i+8]):
14 |         print(f'0x{v:02x},', end='')
15 |         if j == 7:
16 |             print('')
17 |         else:
18 |             print(' ', end='')
19 | print('};')
20 | 


--------------------------------------------------------------------------------
/scripts/gen_default_litlen_costs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # This script computes the default litlen symbol costs for the near-optimal
 4 | # parser.
 5 | 
 6 | from math import log2
 7 | 
 8 | BIT_COST = 16 # Must match BIT_COST in deflate_compress.c
 9 | NUM_LEN_SLOTS = 29
10 | 
11 | print("""static const struct {
12 | 	u8 used_lits_to_lit_cost[257];
13 | 	u8 len_sym_cost;
14 | } default_litlen_costs[] = {""")
15 | MATCH_PROBS = [0.25, 0.50, 0.75]
16 | for i, match_prob in enumerate(MATCH_PROBS):
17 |     len_prob = match_prob / NUM_LEN_SLOTS
18 |     len_sym_cost = int(-log2(len_prob) * BIT_COST)
19 |     if i == 0:
20 |         print('\t{', end='')
21 |     print(f' /* match_prob = {match_prob} */')
22 |     print('\t\t.used_lits_to_lit_cost = {')
23 | 
24 |     j = 0
25 |     for num_used_literals in range(0, 257):
26 |         if num_used_literals == 0:
27 |             num_used_literals = 1
28 |         lit_prob = (1 - match_prob) / num_used_literals
29 |         lit_cost = int(-log2(lit_prob) * BIT_COST)
30 |         if j == 0:
31 |             print('\t\t\t', end='')
32 |         if j == 7 or num_used_literals == 256:
33 |             print(f'{lit_cost},')
34 |             j = 0
35 |         else:
36 |             print(f'{lit_cost}, ', end='')
37 |             j += 1
38 |     print('\t\t},')
39 |     print(f'\t\t.len_sym_cost = {len_sym_cost},')
40 |     if i < len(MATCH_PROBS) - 1:
41 |         print('\t}, {', end='')
42 |     else:
43 |         print('\t},')
44 | print('};')
45 | 


--------------------------------------------------------------------------------
/scripts/gen_offset_slot_map.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # This script generates the deflate_offset_slot[] array, which maps
 4 | # 'offset - 1 => offset_slot' for offset <= 256.
 5 | 
 6 | DEFLATE_OFFSET_SLOT_BASE = [
 7 | 	1    , 2    , 3    , 4     , 5     , 7     , 9     , 13    ,
 8 | 	17   , 25   , 33   , 49    , 65    , 97    , 129   , 193   ,
 9 | 	257  , 385  , 513  , 769   , 1025  , 1537  , 2049  , 3073  ,
10 | 	4097 , 6145 , 8193 , 12289 , 16385 , 24577 ,
11 | ]
12 | 
13 | offset_slot_map = [0] * 256
14 | offset_slot = -1
15 | for offset in range(1, len(offset_slot_map) + 1):
16 |     if offset >= DEFLATE_OFFSET_SLOT_BASE[offset_slot + 1]:
17 |         offset_slot += 1
18 |     offset_slot_map[offset - 1] = offset_slot
19 | 
20 | print(f'static const u8 deflate_offset_slot[{len(offset_slot_map)}] = {{')
21 | for i in range(0, len(offset_slot_map), 16):
22 |     print('\t', end='')
23 |     for j, v in enumerate(offset_slot_map[i:i+16]):
24 |         print(f'{v},', end='')
25 |         if j == 15:
26 |             print('')
27 |         else:
28 |             print(' ', end='')
29 | print('};')
30 | 


--------------------------------------------------------------------------------
/scripts/libFuzzer/.gitignore:
--------------------------------------------------------------------------------
1 | */fuzz
2 | 


--------------------------------------------------------------------------------
/scripts/libFuzzer/deflate_compress/corpus/0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebiggers/libdeflate/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/scripts/libFuzzer/deflate_compress/corpus/0


--------------------------------------------------------------------------------
/scripts/libFuzzer/deflate_compress/fuzz.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <libdeflate.h>
 3 | #include <stdbool.h>
 4 | #include <stdint.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <sys/mman.h>
 8 | #include <unistd.h>
 9 | 
10 | static void
11 | alloc_guarded_buffer(size_t size, uint8_t **start_ret, uint8_t **end_ret)
12 | {
13 | 	const size_t pagesize = sysconf(_SC_PAGESIZE);
14 | 	const size_t nr_pages = (size + pagesize - 1) / pagesize;
15 | 	uint8_t *base_addr, *start, *end;
16 | 
17 | 	/* Allocate buffer and guard pages. */
18 | 	base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
19 | 			 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
20 | 	assert(base_addr != (uint8_t *)MAP_FAILED);
21 | 	start = base_addr + pagesize;
22 | 	end = start + (nr_pages * pagesize);
23 | 
24 | 	/* Unmap the guard pages. */
25 | 	munmap(base_addr, pagesize);
26 | 	munmap(end, pagesize);
27 | 
28 | 	*start_ret = start;
29 | 	*end_ret = end;
30 | }
31 | 
32 | static void
33 | free_guarded_buffer(uint8_t *start, uint8_t *end)
34 | {
35 | 	munmap(start, end - start);
36 | }
37 | 
38 | /* Fuzz the DEFLATE compression and decompression round trip. */
39 | int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
40 | {
41 | 	int level;
42 | 	bool use_bound;
43 | 	struct libdeflate_compressor *c;
44 | 	struct libdeflate_decompressor *d;
45 | 	size_t csize_avail;
46 | 	uint8_t *ubuf_start, *ubuf_end, *ubuf;
47 | 	uint8_t *cbuf_start, *cbuf_end, *cbuf;
48 | 	uint8_t *dbuf_start, *dbuf_end, *dbuf;
49 | 	size_t csize;
50 | 	enum libdeflate_result res;
51 | 
52 | 	if (insize < 2)
53 | 		return 0;
54 | 
55 | 	level = in[0] % 13;
56 | 	use_bound = in[1] % 2;
57 | 	in += 2;
58 | 	insize -= 2;
59 | 
60 | 	c = libdeflate_alloc_compressor(level);
61 | 	d = libdeflate_alloc_decompressor();
62 | 
63 | 	/* Use guard pages to make all input/output buffer overflows segfault */
64 | 
65 | 	alloc_guarded_buffer(insize, &ubuf_start, &ubuf_end);
66 | 	ubuf = ubuf_end - insize;
67 | 	memcpy(ubuf, in, insize);
68 | 
69 | 	csize_avail = use_bound ? libdeflate_deflate_compress_bound(c, insize) :
70 | 				  insize;
71 | 	alloc_guarded_buffer(csize_avail, &cbuf_start, &cbuf_end);
72 | 	cbuf = cbuf_end - csize_avail;
73 | 
74 | 	alloc_guarded_buffer(insize, &dbuf_start, &dbuf_end);
75 | 	dbuf = dbuf_end - insize;
76 | 
77 | 	csize = libdeflate_deflate_compress(c, ubuf, insize, cbuf, csize_avail);
78 | 	if (csize != 0) {
79 | 		assert(csize <= csize_avail);
80 | 		memmove(cbuf_end - csize, cbuf, csize);
81 | 		res = libdeflate_deflate_decompress(d, cbuf_end - csize, csize,
82 | 						    dbuf, insize, NULL);
83 | 		assert(res == LIBDEFLATE_SUCCESS);
84 | 		assert(memcmp(in, dbuf, insize) == 0);
85 | 	} else {
86 | 		assert(!use_bound);
87 | 	}
88 | 
89 | 	libdeflate_free_compressor(c);
90 | 	libdeflate_free_decompressor(d);
91 | 	free_guarded_buffer(ubuf_start, ubuf_end);
92 | 	free_guarded_buffer(cbuf_start, cbuf_end);
93 | 	free_guarded_buffer(dbuf_start, dbuf_end);
94 | 	return 0;
95 | }
96 | 


--------------------------------------------------------------------------------
/scripts/libFuzzer/deflate_decompress/corpus/0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebiggers/libdeflate/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/scripts/libFuzzer/deflate_decompress/corpus/0


--------------------------------------------------------------------------------
/scripts/libFuzzer/deflate_decompress/fuzz.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <libdeflate.h>
 3 | #include <stdbool.h>
 4 | #include <stdint.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <sys/mman.h>
 8 | #include <unistd.h>
 9 | 
10 | static void
11 | alloc_guarded_buffer(size_t size, uint8_t **start_ret, uint8_t **end_ret)
12 | {
13 | 	const size_t pagesize = sysconf(_SC_PAGESIZE);
14 | 	const size_t nr_pages = (size + pagesize - 1) / pagesize;
15 | 	uint8_t *base_addr, *start, *end;
16 | 
17 | 	/* Allocate buffer and guard pages. */
18 | 	base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
19 | 			 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
20 | 	assert(base_addr != (uint8_t *)MAP_FAILED);
21 | 	start = base_addr + pagesize;
22 | 	end = start + (nr_pages * pagesize);
23 | 
24 | 	/* Unmap the guard pages. */
25 | 	munmap(base_addr, pagesize);
26 | 	munmap(end, pagesize);
27 | 
28 | 	*start_ret = start;
29 | 	*end_ret = end;
30 | }
31 | 
32 | static void
33 | free_guarded_buffer(uint8_t *start, uint8_t *end)
34 | {
35 | 	munmap(start, end - start);
36 | }
37 | 
38 | /* Fuzz DEFLATE decompression. */
39 | int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
40 | {
41 | 	size_t outsize_avail = 3 * insize;
42 | 	uint8_t *cbuf_start, *cbuf_end, *cbuf;
43 | 	uint8_t *dbuf_start, *dbuf_end, *dbuf;
44 | 	struct libdeflate_decompressor *d;
45 | 
46 | 	/* Use guard pages to make all input/output buffer overflows segfault */
47 | 
48 | 	alloc_guarded_buffer(insize, &cbuf_start, &cbuf_end);
49 | 	cbuf = cbuf_end - insize;
50 | 	memcpy(cbuf, in, insize);
51 | 
52 | 	alloc_guarded_buffer(outsize_avail, &dbuf_start, &dbuf_end);
53 | 	dbuf = dbuf_end - outsize_avail;
54 | 
55 | 	d = libdeflate_alloc_decompressor();
56 | 	libdeflate_deflate_decompress(d, cbuf, insize, dbuf, outsize_avail,
57 | 				      NULL);
58 | 	libdeflate_free_decompressor(d);
59 | 	free_guarded_buffer(cbuf_start, cbuf_end);
60 | 	free_guarded_buffer(dbuf_start, dbuf_end);
61 | 	return 0;
62 | }
63 | 


--------------------------------------------------------------------------------
/scripts/libFuzzer/fuzz.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e -u -o pipefail
  4 | 
  5 | cd "$(dirname "$0")"
  6 | 
  7 | read -r -a AVAILABLE_TARGETS < <(echo */fuzz.c | sed 's@/fuzz.c@@g')
  8 | 
  9 | usage()
 10 | {
 11 | 	cat << EOF
 12 | Usage: $0 [OPTION]... FUZZ_TARGET
 13 | 
 14 | Fuzz libdeflate with LLVM's libFuzzer.
 15 | 
 16 | Options:
 17 |    --asan          Enable AddressSanitizer
 18 |    --max-len=LEN   Maximum length of generated inputs (default: $MAX_LEN)
 19 |    --msan          Enable MemorySanitizer
 20 |    --time=SECONDS  Stop after the given time has passed
 21 |    --ubsan         Enable UndefinedBehaviorSanitizer
 22 | 
 23 | Available fuzz targets: ${AVAILABLE_TARGETS[*]}
 24 | EOF
 25 | }
 26 | 
 27 | die()
 28 | {
 29 | 	echo "$*" 1>&2
 30 | 	exit 1
 31 | }
 32 | 
 33 | run_cmd()
 34 | {
 35 | 	echo "$*"
 36 | 	"$@"
 37 | }
 38 | 
 39 | EXTRA_SANITIZERS=
 40 | EXTRA_FUZZER_ARGS=()
 41 | MAX_LEN=65536
 42 | 
 43 | longopts_array=(
 44 | asan
 45 | help
 46 | max-len:
 47 | msan
 48 | time:
 49 | ubsan
 50 | )
 51 | longopts=$(echo "${longopts_array[@]}" | tr ' ' ',')
 52 | 
 53 | if ! options=$(getopt -o "" -l "$longopts" -- "$@"); then
 54 | 	usage 1>&2
 55 | 	exit 1
 56 | fi
 57 | eval set -- "$options"
 58 | while true; do
 59 | 	case "$1" in
 60 | 	--asan)
 61 | 		EXTRA_SANITIZERS+=",address"
 62 | 		;;
 63 | 	--help)
 64 | 		usage
 65 | 		exit 0
 66 | 		;;
 67 | 	--max-len)
 68 | 		MAX_LEN=$2
 69 | 		shift
 70 | 		;;
 71 | 	--msan)
 72 | 		EXTRA_SANITIZERS+=",memory"
 73 | 		;;
 74 | 	--time)
 75 | 		EXTRA_FUZZER_ARGS+=("-max_total_time=$2")
 76 | 		shift
 77 | 		;;
 78 | 	--ubsan)
 79 | 		EXTRA_SANITIZERS+=",undefined"
 80 | 		;;
 81 | 	--)
 82 | 		shift
 83 | 		break
 84 | 		;;
 85 | 	*)
 86 | 		echo 1>&2 "Invalid option '$1'"
 87 | 		usage 1>&2
 88 | 		exit 1
 89 | 	esac
 90 | 	shift
 91 | done
 92 | EXTRA_FUZZER_ARGS+=("-max_len=$MAX_LEN")
 93 | 
 94 | if (( $# != 1 )); then
 95 | 	echo 1>&2 "No fuzz target specified!"
 96 | 	usage 1>&2
 97 | 	exit 1
 98 | fi
 99 | TARGET=$1
100 | if [ ! -e "$TARGET/fuzz.c" ]; then
101 | 	echo 1>&2 "'$TARGET' is not a valid fuzz target!"
102 | 	usage 1>&2
103 | 	exit 1
104 | fi
105 | run_cmd clang -g -O1 -fsanitize=fuzzer$EXTRA_SANITIZERS \
106 | 	-Wall -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS=1 -I ../../ \
107 | 	../../lib/*{,/*}.c "$TARGET/fuzz.c" -o "$TARGET/fuzz"
108 | run_cmd "$TARGET/fuzz" "${EXTRA_FUZZER_ARGS[@]}" "$TARGET/corpus"
109 | 


--------------------------------------------------------------------------------
/scripts/libFuzzer/gzip_decompress/corpus/0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebiggers/libdeflate/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/scripts/libFuzzer/gzip_decompress/corpus/0


--------------------------------------------------------------------------------
/scripts/libFuzzer/gzip_decompress/fuzz.c:
--------------------------------------------------------------------------------
 1 | #include <libdeflate.h>
 2 | #include <stdint.h>
 3 | #include <stdlib.h>
 4 | 
 5 | /* Fuzz gzip decompression. */
 6 | int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
 7 | {
 8 | 	size_t outsize_avail = 3 * insize;
 9 | 	uint8_t *out;
10 | 	struct libdeflate_decompressor *d;
11 | 
12 | 	out = malloc(outsize_avail);
13 | 
14 | 	d = libdeflate_alloc_decompressor();
15 | 	libdeflate_gzip_decompress(d, in, insize, out, outsize_avail, NULL);
16 | 	libdeflate_free_decompressor(d);
17 | 	free(out);
18 | 	return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/scripts/libFuzzer/zlib_decompress/corpus/0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebiggers/libdeflate/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/scripts/libFuzzer/zlib_decompress/corpus/0


--------------------------------------------------------------------------------
/scripts/libFuzzer/zlib_decompress/fuzz.c:
--------------------------------------------------------------------------------
 1 | #include <libdeflate.h>
 2 | #include <stdint.h>
 3 | #include <stdlib.h>
 4 | 
 5 | /* Fuzz zlib decompression. */
 6 | int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
 7 | {
 8 | 	size_t outsize_avail = 3 * insize;
 9 | 	uint8_t *out;
10 | 	struct libdeflate_decompressor *d;
11 | 
12 | 	out = malloc(outsize_avail);
13 | 
14 | 	d = libdeflate_alloc_decompressor();
15 | 	libdeflate_zlib_decompress(d, in, insize, out, outsize_avail, NULL);
16 | 	libdeflate_free_decompressor(d);
17 | 	free(out);
18 | 	return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/scripts/toolchain-i686-w64-mingw32.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_SYSTEM_NAME Windows)
2 | set(CMAKE_SYSTEM_PROCESSOR i686)
3 | set(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
4 | set(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
5 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
6 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
7 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
8 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
9 | 


--------------------------------------------------------------------------------
/scripts/toolchain-x86_64-w64-mingw32.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_SYSTEM_NAME Windows)
2 | set(CMAKE_SYSTEM_PROCESSOR x86_64)
3 | set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
4 | set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
5 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
6 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
7 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
8 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
9 | 


--------------------------------------------------------------------------------