├── .github
    └── workflows
    │   └── meson-ci-workflow.yaml
├── LICENSE
├── README.md
├── blz4.c
├── lz4.c
├── lz4.h
├── lz4_btparse.h
├── lz4_depack.c
├── lz4_leparse.h
├── lz4_ssparse.h
├── meson.build
├── parg.c
└── parg.h


/.github/workflows/meson-ci-workflow.yaml:
--------------------------------------------------------------------------------
 1 | name: Meson CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   windows:
 7 |     name: Windows ${{ matrix.config.name }}
 8 |     runs-on: windows-latest
 9 | 
10 |     strategy:
11 |       matrix:
12 |         config:
13 |           - name: MSVC x86
14 |             arch: x86
15 | 
16 |           - name: MSVC x64
17 |             arch: amd64
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - uses: actions/setup-python@v4
23 |         with:
24 |           python-version: '3.x'
25 | 
26 |       - name: Install Meson
27 |         run: pip install meson ninja
28 | 
29 |       - name: Configure
30 |         run: meson setup --vsenv build
31 | 
32 |       - name: Build
33 |         run: meson compile -C build -v
34 | 
35 |   linux:
36 |     name: Linux ${{ matrix.config.name }}
37 |     runs-on: ubuntu-latest
38 |     env:
39 |       CC: ${{ matrix.config.cc }}
40 |       CXX: ${{ matrix.config.cxx }}
41 | 
42 |     strategy:
43 |       matrix:
44 |         config:
45 |           - name: GCC
46 |             cc: gcc
47 |             cxx: g++
48 | 
49 |           - name: Clang
50 |             cc: clang
51 |             cxx: clang++
52 | 
53 |     steps:
54 |       - uses: actions/checkout@v4
55 | 
56 |       - uses: actions/setup-python@v4
57 |         with:
58 |           python-version: '3.x'
59 | 
60 |       - name: Install Meson
61 |         run: pip install meson ninja
62 | 
63 |       - name: Configure
64 |         run: meson setup build
65 | 
66 |       - name: Build
67 |         run: meson compile -C build -v
68 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The zlib License (Zlib)
 2 | 
 3 | Copyright (c) 2018-2020 Joergen Ibsen
 4 | 
 5 | This software is provided 'as-is', without any express or implied
 6 | warranty. In no event will the authors be held liable for any damages
 7 | arising from the use of this software.
 8 | 
 9 | Permission is granted to anyone to use this software for any purpose,
10 | including commercial applications, and to alter it and redistribute it
11 | freely, subject to the following restrictions:
12 | 
13 |   1. The origin of this software must not be misrepresented; you must
14 |      not claim that you wrote the original software. If you use this
15 |      software in a product, an acknowledgment in the product
16 |      documentation would be appreciated but is not required.
17 | 
18 |   2. Altered source versions must be plainly marked as such, and must
19 |      not be misrepresented as being the original software.
20 | 
21 |   3. This notice may not be removed or altered from any source
22 |      distribution.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | blz4
 3 | ====
 4 | 
 5 | [![Meson CI](https://github.com/jibsen/blz4/workflows/Meson%20CI/badge.svg)](https://github.com/jibsen/blz4/actions)
 6 | 
 7 | About
 8 | -----
 9 | 
10 | This is an example using some of the compression algorithms from [BriefLZ][]
11 | to produce output in the format of [LZ4][].
12 | 
13 | **Please note:** this is just a quick experiment to see how it would work, it
14 | is not production quality, and has not been properly tested.
15 | 
16 | [BriefLZ]: https://github.com/jibsen/brieflz
17 | [LZ4]: https://github.com/lz4/lz4
18 | 
19 | 
20 | Benchmark
21 | ---------
22 | 
23 | Here are some results on the [Silesia compression corpus][silesia]:
24 | 
25 | | File    |   Original | `blz4 --optimal` | `lz4 -12 -l` |  `lz4x -9` |
26 | | :------ | ---------: | ---------------: | -----------: | ---------: |
27 | | dickens | 10.192.446 |        4.380.430 |    4.380.430 |  4.380.430 |
28 | | mozilla | 51.220.480 |       22.025.940 |   22.025.988 | 22.025.940 |
29 | | mr      |  9.970.564 |        4.190.675 |    4.190.774 |  4.190.675 |
30 | | nci     | 33.553.445 |        3.621.482 |    3.621.567 |  3.621.482 |
31 | | ooffice |  6.152.192 |        3.535.258 |    3.535.258 |  3.535.258 |
32 | | osdb    | 10.085.684 |        3.951.474 |    3.951.474 |  3.951.474 |
33 | | reymont |  6.627.202 |        2.063.060 |    2.063.060 |  2.063.060 |
34 | | samba   | 21.606.400 |        6.100.521 |    6.100.539 |  6.100.521 |
35 | | sao     |  7.251.944 |        5.668.742 |    5.668.742 |  5.668.742 |
36 | | webster | 41.458.703 |       13.835.336 |   13.835.336 | 13.835.336 |
37 | | xml     |  5.345.280 |          759.868 |      759.901 |    759.868 |
38 | | x-ray   |  8.474.240 |        7.177.203 |    7.177.203 |  7.177.203 |
39 | 
40 | I did not include smallz4 because it does not create output in the legacy
41 | format, so the results are not directly comparable on files larger than
42 | 4MiB.
43 | 
44 | [silesia]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
45 | 
46 | 
47 | Usage
48 | -----
49 | 
50 | blz4 uses [Meson][] to generate build systems. To create one for the tools on
51 | your platform, and build blz4, use something along the lines of:
52 | 
53 | ~~~sh
54 | mkdir build
55 | cd build
56 | meson ..
57 | ninja
58 | ~~~
59 | 
60 | You can also simply compile and link the source files.
61 | 
62 | blz4 includes the leparse and btparse algorithms from BriefLZ, which gives
63 | compression levels `-5` to `-9` and the **very** slow `--optimal`.
64 | 
65 | [Meson]: https://mesonbuild.com/
66 | 
67 | 
68 | Notes
69 | -----
70 | 
71 |   - LZ4 appears to do flexible parsing, is very close to optimal, and much
72 |     faster.
73 | 
74 | 
75 | Related Projects
76 | ----------------
77 | 
78 |   - [LZ4X](https://github.com/encode84/lz4x)
79 |   - [smallz4](https://create.stephan-brumme.com/smallz4/)
80 | 
81 | 
82 | License
83 | -------
84 | 
85 | This projected is licensed under the [zlib License](LICENSE) (Zlib).
86 | 


--------------------------------------------------------------------------------
/blz4.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 |  *
  4 |  * Copyright (c) 2018-2020 Joergen Ibsen
  5 |  *
  6 |  * This software is provided 'as-is', without any express or implied
  7 |  * warranty. In no event will the authors be held liable for any damages
  8 |  * arising from the use of this software.
  9 |  *
 10 |  * Permission is granted to anyone to use this software for any purpose,
 11 |  * including commercial applications, and to alter it and redistribute it
 12 |  * freely, subject to the following restrictions:
 13 |  *
 14 |  *   1. The origin of this software must not be misrepresented; you must
 15 |  *      not claim that you wrote the original software. If you use this
 16 |  *      software in a product, an acknowledgment in the product
 17 |  *      documentation would be appreciated but is not required.
 18 |  *
 19 |  *   2. Altered source versions must be plainly marked as such, and must
 20 |  *      not be misrepresented as being the original software.
 21 |  *
 22 |  *   3. This notice may not be removed or altered from any source
 23 |  *      distribution.
 24 |  */
 25 | 
 26 | #ifdef _MSC_VER
 27 | #  define _CRT_SECURE_NO_WARNINGS
 28 | #  define _CRT_DISABLE_PERFCRIT_LOCKS
 29 | #else
 30 | #  define _FILE_OFFSET_BITS 64
 31 | #  define _ftelli64 ftello64
 32 | #endif
 33 | 
 34 | #ifdef __MINGW32__
 35 | #  define __USE_MINGW_ANSI_STDIO 1
 36 | #endif
 37 | 
 38 | #include <errno.h>
 39 | #include <limits.h>
 40 | #include <stdarg.h>
 41 | #include <stddef.h>
 42 | #include <stdio.h>
 43 | #include <stdlib.h>
 44 | #include <time.h>
 45 | 
 46 | #include "lz4.h"
 47 | #include "parg.h"
 48 | 
 49 | #define LZ4_LEGACY_MAGIC (0x184C2102UL)
 50 | 
 51 | /*
 52 |  * The default block size used to process data.
 53 |  */
 54 | #ifndef BLOCK_SIZE
 55 | #  define BLOCK_SIZE (8 * 1024 * 1024UL)
 56 | #endif
 57 | 
 58 | /*
 59 |  * Unsigned char type.
 60 |  */
 61 | typedef unsigned char byte;
 62 | 
 63 | /*
 64 |  * Get the low-order 8 bits of a value.
 65 |  */
 66 | #if CHAR_BIT == 8
 67 | #  define octet(v) ((byte) (v))
 68 | #else
 69 | #  define octet(v) ((v) & 0x00FF)
 70 | #endif
 71 | 
 72 | /*
 73 |  * Store a 32-bit unsigned value in little-endian order.
 74 |  */
 75 | static void
 76 | write_le32(byte *p, unsigned long val)
 77 | {
 78 | 	p[0] = octet(val);
 79 | 	p[1] = octet(val >> 8);
 80 | 	p[2] = octet(val >> 16);
 81 | 	p[3] = octet(val >> 24);
 82 | }
 83 | 
 84 | /*
 85 |  * Read a 32-bit unsigned value in little-endian order.
 86 |  */
 87 | static unsigned long
 88 | read_le32(const byte *p)
 89 | {
 90 | 	return ((unsigned long) octet(p[0]))
 91 | 	     | ((unsigned long) octet(p[1]) << 8)
 92 | 	     | ((unsigned long) octet(p[2]) << 16)
 93 | 	     | ((unsigned long) octet(p[3]) << 24);
 94 | }
 95 | 
 96 | static unsigned int
 97 | ratio(long long x, long long y)
 98 | {
 99 | 	if (x <= LLONG_MAX / 100) {
100 | 		x *= 100;
101 | 	}
102 | 	else {
103 | 		y /= 100;
104 | 	}
105 | 
106 | 	if (y == 0) {
107 | 		y = 1;
108 | 	}
109 | 
110 | 	return (unsigned int) (x / y);
111 | }
112 | 
113 | static void
114 | printf_error(const char *fmt, ...)
115 | {
116 | 	va_list arg;
117 | 
118 | 	fputs("blz4: ", stderr);
119 | 
120 | 	va_start(arg, fmt);
121 | 	vfprintf(stderr, fmt, arg);
122 | 	va_end(arg);
123 | 
124 | 	fputs("\n", stderr);
125 | }
126 | 
127 | static void
128 | printf_usage(const char *fmt, ...)
129 | {
130 | 	va_list arg;
131 | 
132 | 	fputs("blz4: ", stderr);
133 | 
134 | 	va_start(arg, fmt);
135 | 	vfprintf(stderr, fmt, arg);
136 | 	va_end(arg);
137 | 
138 | 	fputs("\n"
139 | 	      "usage: blz4 [-56789 | --optimal] [-v] INFILE OUTFILE\n"
140 | 	      "       blz4 -d [-v] INFILE OUTFILE\n"
141 | 	      "       blz4 -V | --version\n"
142 | 	      "       blz4 -h | --help\n", stderr);
143 | }
144 | 
145 | static int
146 | compress_file(const char *oldname, const char *packedname, int be_verbose,
147 |               int level)
148 | {
149 | 	const byte lz4_magic[4] = { 0x02, 0x21, 0x4C, 0x18 };
150 | 	byte header[4];
151 | 	FILE *oldfile = NULL;
152 | 	FILE *packedfile = NULL;
153 | 	byte *data = NULL;
154 | 	byte *packed = NULL;
155 | 	byte *workmem = NULL;
156 | 	long long insize = 0, outsize = 0;
157 | 	static const char rotator[] = "-\\|/";
158 | 	unsigned int counter = 0;
159 | 	size_t n_read;
160 | 	clock_t clocks;
161 | 	int res = 1;
162 | 
163 | 	/* Allocate memory */
164 | 	if ((data = (byte *) malloc(BLOCK_SIZE)) == NULL
165 | 	 || (packed = (byte *) malloc(lz4_max_packed_size(BLOCK_SIZE))) == NULL
166 | 	 || (workmem = (byte *) malloc(lz4_workmem_size_level(BLOCK_SIZE, level))) == NULL) {
167 | 		printf_error("not enough memory");
168 | 		goto out;
169 | 	}
170 | 
171 | 	/* Open input file */
172 | 	if ((oldfile = fopen(oldname, "rb")) == NULL) {
173 | 		printf_usage("unable to open input file '%s'", oldname);
174 | 		goto out;
175 | 	}
176 | 
177 | 	/* Create output file */
178 | 	if ((packedfile = fopen(packedname, "wb")) == NULL) {
179 | 		printf_usage("unable to open output file '%s'", packedname);
180 | 		goto out;
181 | 	}
182 | 
183 | 	clocks = clock();
184 | 
185 | 	/* Write LZ4 header magic */
186 | 	fwrite(lz4_magic, 1, sizeof(lz4_magic), packedfile);
187 | 	outsize += sizeof(lz4_magic);
188 | 
189 | 	/* While we are able to read data from input file .. */
190 | 	while ((n_read = fread(data, 1, BLOCK_SIZE, oldfile)) > 0) {
191 | 		size_t packedsize;
192 | 
193 | 		/* Show a little progress indicator */
194 | 		if (be_verbose) {
195 | 			fprintf(stderr, "%c\r", rotator[counter]);
196 | 			counter = (counter + 1) & 0x03;
197 | 		}
198 | 
199 | 		/* Compress data block */
200 | 		packedsize = lz4_pack_level(data, packed, (unsigned long) n_read,
201 | 		                            workmem, level);
202 | 
203 | 		/* Check for compression error */
204 | 		if (packedsize == 0) {
205 | 			printf_error("an error occured while compressing");
206 | 			goto out;
207 | 		}
208 | 
209 | 		/* Put block-specific values into header */
210 | 		write_le32(header, (unsigned long) packedsize);
211 | 
212 | 		/* Write header and compressed data */
213 | 		fwrite(header, 1, sizeof(header), packedfile);
214 | 		fwrite(packed, 1, packedsize, packedfile);
215 | 
216 | 		/* Sum input and output size */
217 | 		insize += n_read;
218 | 		outsize += packedsize + sizeof(header);
219 | 	}
220 | 
221 | 	clocks = clock() - clocks;
222 | 
223 | 	/* Show result */
224 | 	if (be_verbose) {
225 | 		fprintf(stderr, "in %lld out %lld ratio %u%% time %.2f\n",
226 | 		        insize, outsize, ratio(outsize, insize),
227 | 		        (double) clocks / (double) CLOCKS_PER_SEC);
228 | 	}
229 | 
230 | 	res = 0;
231 | 
232 | out:
233 | 	/* Close files */
234 | 	if (packedfile != NULL) {
235 | 		fclose(packedfile);
236 | 	}
237 | 	if (oldfile != NULL) {
238 | 		fclose(oldfile);
239 | 	}
240 | 
241 | 	/* Free memory */
242 | 	if (workmem != NULL) {
243 | 		free(workmem);
244 | 	}
245 | 	if (packed != NULL) {
246 | 		free(packed);
247 | 	}
248 | 	if (data != NULL) {
249 | 		free(data);
250 | 	}
251 | 
252 | 	return res;
253 | }
254 | 
255 | static int
256 | decompress_file(const char *packedname, const char *newname, int be_verbose)
257 | {
258 | 	byte header[4];
259 | 	FILE *newfile = NULL;
260 | 	FILE *packedfile = NULL;
261 | 	byte *data = NULL;
262 | 	byte *packed = NULL;
263 | 	long long insize = 0, outsize = 0;
264 | 	static const char rotator[] = "-\\|/";
265 | 	unsigned int counter = 0;
266 | 	clock_t clocks;
267 | 	size_t max_packed_size;
268 | 	int res = 1;
269 | 
270 | 	max_packed_size = lz4_max_packed_size(BLOCK_SIZE);
271 | 
272 | 	/* Allocate memory */
273 | 	if ((data = (byte *) malloc(BLOCK_SIZE)) == NULL
274 | 	 || (packed = (byte *) malloc(max_packed_size)) == NULL) {
275 | 		printf_error("not enough memory");
276 | 		goto out;
277 | 	}
278 | 
279 | 	/* Open input file */
280 | 	if ((packedfile = fopen(packedname, "rb")) == NULL) {
281 | 		printf_usage("unable to open input file '%s'", packedname);
282 | 		goto out;
283 | 	}
284 | 
285 | 	/* Create output file */
286 | 	if ((newfile = fopen(newname, "wb")) == NULL) {
287 | 		printf_usage("unable to open output file '%s'", newname);
288 | 		goto out;
289 | 	}
290 | 
291 | 	clocks = clock();
292 | 
293 | 	/* Read LZ4 header magic */
294 | 	if (fread(header, 1, sizeof(header), packedfile) != sizeof(header)) {
295 | 		printf_error("unable to read LZ4 header magic");
296 | 		goto out;
297 | 	}
298 | 
299 | 	/* Check header is LZ4 legacy magic */
300 | 	if (read_le32(header) != LZ4_LEGACY_MAGIC) {
301 | 		printf_error("LZ4 header magic mismatch");
302 | 		goto out;
303 | 	}
304 | 
305 | 	/* While we are able to read a header from input file .. */
306 | 	while (fread(header, 1, sizeof(header), packedfile) == sizeof(header)) {
307 | 		size_t hdr_packedsize, depackedsize;
308 | 
309 | 		/* Show a little progress indicator */
310 | 		if (be_verbose) {
311 | 			fprintf(stderr, "%c\r", rotator[counter]);
312 | 			counter = (counter + 1) & 0x03;
313 | 		}
314 | 
315 | 		/* Get compressed size from header */
316 | 		hdr_packedsize = (size_t) read_le32(header);
317 | 
318 | 		/* If header is LZ4 magic value, assume new frame */
319 | 		if (hdr_packedsize == LZ4_LEGACY_MAGIC) {
320 | 			insize += sizeof(header);
321 | 			continue;
322 | 		}
323 | 
324 | 		/* Check buffer is sufficient */
325 | 		if (hdr_packedsize > max_packed_size) {
326 | 			printf_error("compressed size in header too large");
327 | 			goto out;
328 | 		}
329 | 
330 | 		/* Read compressed data */
331 | 		if (fread(packed, 1, hdr_packedsize, packedfile) != hdr_packedsize) {
332 | 			printf_error("error reading block from compressed file");
333 | 			goto out;
334 | 		}
335 | 
336 | 		/* Decompress data */
337 | 		depackedsize = lz4_depack(packed, data,
338 | 		                          (unsigned long) hdr_packedsize);
339 | 
340 | 		/* Check for decompression error */
341 | 		if (depackedsize == LZ4_ERROR) {
342 | 			printf_error("an error occured while decompressing");
343 | 			goto out;
344 | 		}
345 | 
346 | 		/* Write decompressed data */
347 | 		fwrite(data, 1, depackedsize, newfile);
348 | 
349 | 		/* Sum input and output size */
350 | 		insize += hdr_packedsize + sizeof(header);
351 | 		outsize += depackedsize;
352 | 	}
353 | 
354 | 	clocks = clock() - clocks;
355 | 
356 | 	/* Show result */
357 | 	if (be_verbose) {
358 | 		fprintf(stderr, "in %lld out %lld ratio %u%% time %.2f\n",
359 | 		        insize, outsize, ratio(insize, outsize),
360 | 		        (double) clocks / (double) CLOCKS_PER_SEC);
361 | 	}
362 | 
363 | 	res = 0;
364 | 
365 | out:
366 | 	/* Close files */
367 | 	if (packedfile != NULL) {
368 | 		fclose(packedfile);
369 | 	}
370 | 	if (newfile != NULL) {
371 | 		fclose(newfile);
372 | 	}
373 | 
374 | 	/* Free memory */
375 | 	if (packed != NULL) {
376 | 		free(packed);
377 | 	}
378 | 	if (data != NULL) {
379 | 		free(data);
380 | 	}
381 | 
382 | 	return res;
383 | }
384 | 
385 | static void
386 | print_syntax(void)
387 | {
388 | 	fputs("usage: blz4 [options] INFILE OUTFILE\n"
389 | 	      "\n"
390 | 	      "options:\n"
391 | 	      "  -5                     compress faster (default)\n"
392 | 	      "  -9                     compress better\n"
393 | 	      "      --optimal          optimal but very slow compression\n"
394 | 	      "  -d, --decompress       decompress\n"
395 | 	      "  -h, --help             print this help and exit\n"
396 | 	      "  -v, --verbose          verbose mode\n"
397 | 	      "  -V, --version          print version and exit\n"
398 | 	      "\n"
399 | 	      "PLEASE NOTE: This is an experiment, use at your own risk.\n", stdout);
400 | }
401 | 
402 | static void
403 | print_version(void)
404 | {
405 | 	fputs("blz4 " LZ4_VER_STRING "\n"
406 | 	      "\n"
407 | 	      "Copyright (c) 2018-2020 Joergen Ibsen\n"
408 | 	      "\n"
409 | 	      "Licensed under the zlib license (Zlib).\n"
410 | 	      "There is NO WARRANTY, to the extent permitted by law.\n", stdout);
411 | }
412 | 
413 | int
414 | main(int argc, char *argv[])
415 | {
416 | 	struct parg_state ps;
417 | 	const char *infile = NULL;
418 | 	const char *outfile = NULL;
419 | 	int flag_decompress = 0;
420 | 	int flag_verbose = 0;
421 | 	int level = 5;
422 | 	int c;
423 | 
424 | 	const struct parg_option long_options[] = {
425 | 		{ "decompress", PARG_NOARG, NULL, 'd' },
426 | 		{ "help", PARG_NOARG, NULL, 'h' },
427 | 		{ "optimal", PARG_NOARG, NULL, 'x' },
428 | 		{ "verbose", PARG_NOARG, NULL, 'v' },
429 | 		{ "version", PARG_NOARG, NULL, 'V' },
430 | 		{ 0, 0, 0, 0 }
431 | 	};
432 | 
433 | 	parg_init(&ps);
434 | 
435 | 	while ((c = parg_getopt_long(&ps, argc, argv, "56789dhvVx", long_options, NULL)) != -1) {
436 | 		switch (c) {
437 | 		case 1:
438 | 			if (infile == NULL) {
439 | 				infile = ps.optarg;
440 | 			}
441 | 			else if (outfile == NULL) {
442 | 				outfile = ps.optarg;
443 | 			}
444 | 			else {
445 | 				printf_usage("too many arguments");
446 | 				return EXIT_FAILURE;
447 | 			}
448 | 			break;
449 | 		case '5':
450 | 		case '6':
451 | 		case '7':
452 | 		case '8':
453 | 		case '9':
454 | 			level = c - '0';
455 | 			break;
456 | 		case 'x':
457 | 			level = 10;
458 | 			break;
459 | 		case 'd':
460 | 			flag_decompress = 1;
461 | 			break;
462 | 		case 'h':
463 | 			print_syntax();
464 | 			return EXIT_SUCCESS;
465 | 			break;
466 | 		case 'v':
467 | 			flag_verbose = 1;
468 | 			break;
469 | 		case 'V':
470 | 			print_version();
471 | 			return EXIT_SUCCESS;
472 | 			break;
473 | 		default:
474 | 			printf_usage("unknown option '%s'", argv[ps.optind - 1]);
475 | 			return EXIT_FAILURE;
476 | 			break;
477 | 		}
478 | 	}
479 | 
480 | 	if (outfile == NULL) {
481 | 		printf_usage("too few arguments");
482 | 		return EXIT_FAILURE;
483 | 	}
484 | 
485 | 	if (flag_decompress) {
486 | 		return decompress_file(infile, outfile, flag_verbose);
487 | 	}
488 | 	else {
489 | 		return compress_file(infile, outfile, flag_verbose, level);
490 | 	}
491 | 
492 | 	return EXIT_SUCCESS;
493 | }
494 | 


--------------------------------------------------------------------------------
/lz4.c:
--------------------------------------------------------------------------------
  1 | //
  2 | // blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 | //
  4 | // C packer
  5 | //
  6 | // Copyright (c) 2018-2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #include "lz4.h"
 29 | 
 30 | #include <assert.h>
 31 | #include <limits.h>
 32 | #include <stdint.h>
 33 | 
 34 | #if _MSC_VER >= 1400
 35 | #  include <intrin.h>
 36 | #  define LZ4_BUILTIN_MSVC
 37 | #elif defined(__clang__) && defined(__has_builtin)
 38 | #  if __has_builtin(__builtin_clz)
 39 | #    define LZ4_BUILTIN_GCC
 40 | #  endif
 41 | #elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
 42 | #  define LZ4_BUILTIN_GCC
 43 | #endif
 44 | 
 45 | // Number of bits of hash to use for lookup.
 46 | //
 47 | // The size of the lookup table (and thus workmem) depends on this.
 48 | //
 49 | // Values between 10 and 18 work well. Lower values generally make compression
 50 | // speed faster but ratio worse. The default value 17 (128k entries) is a
 51 | // compromise.
 52 | //
 53 | #ifndef LZ4_HASH_BITS
 54 | #  define LZ4_HASH_BITS 17
 55 | #endif
 56 | 
 57 | #define LOOKUP_SIZE (1UL << LZ4_HASH_BITS)
 58 | 
 59 | #define WORKMEM_SIZE (LOOKUP_SIZE * sizeof(uint32_t))
 60 | 
 61 | #define NO_MATCH_POS ((uint32_t) -1)
 62 | 
 63 | static int
 64 | lz4_log2(unsigned long n)
 65 | {
 66 | 	assert(n > 0);
 67 | 
 68 | #if defined(LZ4_BUILTIN_MSVC)
 69 | 	unsigned long msb_pos;
 70 | 	_BitScanReverse(&msb_pos, n);
 71 | 	return (int) msb_pos;
 72 | #elif defined(LZ4_BUILTIN_GCC)
 73 | 	return (int) sizeof(n) * CHAR_BIT - 1 - __builtin_clzl(n);
 74 | #else
 75 | 	int bits = 0;
 76 | 
 77 | 	while (n >>= 1) {
 78 | 		++bits;
 79 | 	}
 80 | 
 81 | 	return bits;
 82 | #endif
 83 | }
 84 | 
 85 | // Hash four bytes starting a p.
 86 | //
 87 | // This is Fibonacci hashing, also known as Knuth's multiplicative hash. The
 88 | // constant is a prime close to 2^32/phi.
 89 | //
 90 | static unsigned long
 91 | lz4_hash4_bits(const unsigned char *p, int bits)
 92 | {
 93 | 	assert(bits > 0 && bits <= 32);
 94 | 
 95 | 	uint32_t val = (uint32_t) p[0]
 96 | 	             | ((uint32_t) p[1] << 8)
 97 | 	             | ((uint32_t) p[2] << 16)
 98 | 	             | ((uint32_t) p[3] << 24);
 99 | 
100 | 	return (val * UINT32_C(2654435761)) >> (32 - bits);
101 | }
102 | 
103 | static unsigned long
104 | lz4_literal_cost(unsigned long nlit)
105 | {
106 | 	return (nlit + 255 - 15) / 255;
107 | }
108 | 
109 | static unsigned long
110 | lz4_match_cost(unsigned long len)
111 | {
112 | 	return 1 + 2 + (len + 255 - 19) / 255;
113 | }
114 | 
115 | unsigned long
116 | lz4_max_packed_size(unsigned long src_size)
117 | {
118 | 	return src_size + src_size / 255 + 16;
119 | }
120 | 
121 | // Include compression algorithms used by lz4_pack_level
122 | #include "lz4_btparse.h"
123 | #include "lz4_leparse.h"
124 | 
125 | size_t
126 | lz4_workmem_size_level(size_t src_size, int level)
127 | {
128 | 	switch (level) {
129 | 	case 5:
130 | 	case 6:
131 | 	case 7:
132 | 		return lz4_leparse_workmem_size(src_size);
133 | 	case 8:
134 | 	case 9:
135 | 	case 10:
136 | 		return lz4_btparse_workmem_size(src_size);
137 | 	default:
138 | 		return (size_t) -1;
139 | 	}
140 | }
141 | 
142 | unsigned long
143 | lz4_pack_level(const void *src, void *dst, unsigned long src_size,
144 |                void *workmem, int level)
145 | {
146 | 	switch (level) {
147 | 	case 5:
148 | 		return lz4_pack_leparse(src, dst, src_size, workmem, 1, 18);
149 | 	case 6:
150 | 		return lz4_pack_leparse(src, dst, src_size, workmem, 8, 32);
151 | 	case 7:
152 | 		return lz4_pack_leparse(src, dst, src_size, workmem, 64, 64);
153 | 	case 8:
154 | 		return lz4_pack_btparse(src, dst, src_size, workmem, 16, 96);
155 | 	case 9:
156 | 		return lz4_pack_btparse(src, dst, src_size, workmem, 32, 224);
157 | 	case 10:
158 | 		return lz4_pack_btparse(src, dst, src_size, workmem, ULONG_MAX, ULONG_MAX);
159 | 	default:
160 | 		return LZ4_ERROR;
161 | 	}
162 | }
163 | 
164 | // clang -g -O1 -fsanitize=fuzzer,address -DLZ4_FUZZING lz4.c lz4_depack.c
165 | #if defined(LZ4_FUZZING)
166 | #include <limits.h>
167 | #include <stddef.h>
168 | #include <stdint.h>
169 | #include <stdlib.h>
170 | #include <string.h>
171 | 
172 | #ifndef LZ4_FUZZ_LEVEL
173 | #  define LZ4_FUZZ_LEVEL 5
174 | #endif
175 | 
176 | extern int
177 | LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
178 | {
179 | 	if (size > 8 * 1024 * 1024UL) { return 0; }
180 | 	void *workmem = malloc(lz4_workmem_size_level(size, LZ4_FUZZ_LEVEL));
181 | 	void *packed = malloc(lz4_max_packed_size(size));
182 | 	void *depacked = malloc(size);
183 | 	if (!workmem || !packed || !depacked) { abort(); }
184 | 	unsigned long packed_size = lz4_pack_level(data, packed, size, workmem, LZ4_FUZZ_LEVEL);
185 | 	lz4_depack(packed, depacked, packed_size);
186 | 	if (memcmp(data, depacked, size)) { abort(); }
187 | 	free(depacked);
188 | 	free(packed);
189 | 	free(workmem);
190 | 	return 0;
191 | }
192 | #endif
193 | 


--------------------------------------------------------------------------------
/lz4.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 |  *
  4 |  * C/C++ header file
  5 |  *
  6 |  * Copyright (c) 2018-2020 Joergen Ibsen
  7 |  *
  8 |  * This software is provided 'as-is', without any express or implied
  9 |  * warranty. In no event will the authors be held liable for any damages
 10 |  * arising from the use of this software.
 11 |  *
 12 |  * Permission is granted to anyone to use this software for any purpose,
 13 |  * including commercial applications, and to alter it and redistribute it
 14 |  * freely, subject to the following restrictions:
 15 |  *
 16 |  *   1. The origin of this software must not be misrepresented; you must
 17 |  *      not claim that you wrote the original software. If you use this
 18 |  *      software in a product, an acknowledgment in the product
 19 |  *      documentation would be appreciated but is not required.
 20 |  *
 21 |  *   2. Altered source versions must be plainly marked as such, and must
 22 |  *      not be misrepresented as being the original software.
 23 |  *
 24 |  *   3. This notice may not be removed or altered from any source
 25 |  *      distribution.
 26 |  */
 27 | 
 28 | #ifndef LZ4_H_INCLUDED
 29 | #define LZ4_H_INCLUDED
 30 | 
 31 | #include <stddef.h>
 32 | #include <stdio.h>
 33 | 
 34 | #ifdef __cplusplus
 35 | extern "C" {
 36 | #endif
 37 | 
 38 | #define LZ4_VER_MAJOR 0        /**< Major version number */
 39 | #define LZ4_VER_MINOR 1        /**< Minor version number */
 40 | #define LZ4_VER_PATCH 0        /**< Patch version number */
 41 | #define LZ4_VER_STRING "0.2.0" /**< Version number as a string */
 42 | 
 43 | #ifdef LZ4_DLL
 44 | #  if defined(_WIN32) || defined(__CYGWIN__)
 45 | #    ifdef LZ4_DLL_EXPORTS
 46 | #      define LZ4_API __declspec(dllexport)
 47 | #    else
 48 | #      define LZ4_API __declspec(dllimport)
 49 | #    endif
 50 | #    define LZ4_LOCAL
 51 | #  else
 52 | #    if __GNUC__ >= 4
 53 | #      define LZ4_API __attribute__ ((visibility ("default")))
 54 | #      define LZ4_LOCAL __attribute__ ((visibility ("hidden")))
 55 | #    else
 56 | #      define LZ4_API
 57 | #      define LZ4_LOCAL
 58 | #    endif
 59 | #  endif
 60 | #else
 61 | #  define LZ4_API
 62 | #  define LZ4_LOCAL
 63 | #endif
 64 | 
 65 | /**
 66 |  * Return value on error.
 67 |  */
 68 | #ifndef LZ4_ERROR
 69 | #  define LZ4_ERROR ((unsigned long) (-1))
 70 | #endif
 71 | 
 72 | /**
 73 |  * Get bound on compressed data size.
 74 |  *
 75 |  * @see lz4_pack_level
 76 |  *
 77 |  * @param src_size number of bytes to compress
 78 |  * @return maximum size of compressed data
 79 |  */
 80 | LZ4_API unsigned long
 81 | lz4_max_packed_size(unsigned long src_size);
 82 | 
 83 | /**
 84 |  * Get required size of `workmem` buffer.
 85 |  *
 86 |  * @see lz4_pack_level
 87 |  *
 88 |  * @param src_size number of bytes to compress
 89 |  * @param level compression level
 90 |  * @return required size in bytes of `workmem` buffer
 91 |  */
 92 | LZ4_API size_t
 93 | lz4_workmem_size_level(size_t src_size, int level);
 94 | 
 95 | /**
 96 |  * Compress `src_size` bytes of data from `src` to `dst`.
 97 |  *
 98 |  * Compression levels between 5 and 9 offer a trade-off between
 99 |  * time/space and ratio. Level 10 is optimal but very slow.
100 |  *
101 |  * @param src pointer to data
102 |  * @param dst pointer to where to place compressed data
103 |  * @param src_size number of bytes to compress
104 |  * @param workmem pointer to memory for temporary use
105 |  * @param level compression level
106 |  * @return size of compressed data
107 |  */
108 | LZ4_API unsigned long
109 | lz4_pack_level(const void *src, void *dst, unsigned long src_size,
110 |                void *workmem, int level);
111 | 
112 | /**
113 |  * Decompress data from `src` to `dst`.
114 |  *
115 |  * @param src pointer to compressed data
116 |  * @param dst pointer to where to place decompressed data
117 |  * @param packed_size size of compressed data
118 |  * @return size of decompressed data
119 |  */
120 | LZ4_API unsigned long
121 | lz4_depack(const void *src, void *dst, unsigned long packed_size);
122 | 
123 | #ifdef __cplusplus
124 | } /* extern "C" */
125 | #endif
126 | 
127 | #endif /* LZ4_H_INCLUDED */
128 | 


--------------------------------------------------------------------------------
/lz4_btparse.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 | //
  4 | // Forwards dynamic programming parse using binary trees
  5 | //
  6 | // Copyright (c) 2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #ifndef LZ4_BTPARSE_H_INCLUDED
 29 | #define LZ4_BTPARSE_H_INCLUDED
 30 | 
 31 | static size_t
 32 | lz4_btparse_workmem_size(size_t src_size)
 33 | {
 34 | 	return (5 * src_size + 3 + LOOKUP_SIZE) * sizeof(uint32_t);
 35 | }
 36 | 
 37 | // Forwards dynamic programming parse using binary trees, checking all
 38 | // possible matches.
 39 | //
 40 | // The match search uses a binary tree for each hash entry, which is updated
 41 | // dynamically as it is searched by re-rooting the tree at the search string.
 42 | //
 43 | // This does not result in balanced trees on all inputs, but often works well
 44 | // in practice, and has the advantage that we get the matches in order from
 45 | // closest and back.
 46 | //
 47 | // A drawback is the memory requirement of 5 * src_size words, since we cannot
 48 | // overlap the arrays in a forwards parse.
 49 | //
 50 | // This match search method is found in LZMA by Igor Pavlov, libdeflate
 51 | // by Eric Biggers, and other libraries.
 52 | //
 53 | static unsigned long
 54 | lz4_pack_btparse(const void *src, void *dst, unsigned long src_size, void *workmem,
 55 |                  const unsigned long max_depth, const unsigned long accept_len)
 56 | {
 57 | 	const unsigned char *const in = (const unsigned char *) src;
 58 | 	const unsigned long last_match_pos = src_size > 12 ? src_size - 12 : 0;
 59 | 
 60 | 	// Check for empty input
 61 | 	if (src_size == 0) {
 62 | 		unsigned char *out = (unsigned char *) dst;
 63 | 		*out++ = 0;
 64 | 		return 1;
 65 | 	}
 66 | 
 67 | 	// Check for input without room for match
 68 | 	if (src_size < 13) {
 69 | 		unsigned char *out = (unsigned char *) dst;
 70 | 		*out++ = src_size << 4;
 71 | 		for (unsigned long i = 0; i < src_size; ++i) {
 72 | 			*out++ = in[i];
 73 | 		}
 74 | 		return 1 + src_size;
 75 | 	}
 76 | 
 77 | 	uint32_t *const cost = (uint32_t *) workmem;
 78 | 	uint32_t *const mpos = cost + src_size + 1;
 79 | 	uint32_t *const mlen = mpos + src_size + 1;
 80 | 	uint32_t *const nodes = mlen + src_size + 1;
 81 | 	uint32_t *const lookup = nodes + 2 * src_size;
 82 | 
 83 | 	// Initialize lookup
 84 | 	for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
 85 | 		lookup[i] = NO_MATCH_POS;
 86 | 	}
 87 | 
 88 | 	// Initialize to all literals with infinite cost
 89 | 	for (unsigned long i = 0; i <= src_size; ++i) {
 90 | 		cost[i] = UINT32_MAX;
 91 | 		mlen[i] = 1;
 92 | 		mpos[i] = 0;
 93 | 	}
 94 | 
 95 | 	cost[0] = 0;
 96 | 
 97 | 	// Next position where we are going to check matches
 98 | 	//
 99 | 	// This is used to skip matching while still updating the trees when
100 | 	// we find a match that is accept_len or longer.
101 | 	//
102 | 	unsigned long next_match_cur = 0;
103 | 
104 | 	// Phase 1: Find lowest cost path arriving at each position
105 | 	for (unsigned long cur = 0; cur <= last_match_pos; ++cur) {
106 | 		// Check literal
107 | 		//
108 | 		// For literals, we store the number of literals up to the
109 | 		// current position in mpos. This is used to update the cost
110 | 		// from the current position with the additional cost of
111 | 		// encoding the length of this run of literals in the next
112 | 		// match.
113 | 		//
114 | 		if (mlen[cur] == 1) {
115 | 			unsigned long literals_cost = 1 + lz4_literal_cost(mpos[cur] + 1) - lz4_literal_cost(mpos[cur]);
116 | 
117 | 			if (cost[cur + 1] > cost[cur] + literals_cost) {
118 | 				cost[cur + 1] = cost[cur] + literals_cost;
119 | 				mlen[cur + 1] = 1;
120 | 				mpos[cur + 1] = mpos[cur] + 1;
121 | 			}
122 | 		}
123 | 		else {
124 | 			if (cost[cur + 1] > cost[cur] + 1) {
125 | 				cost[cur + 1] = cost[cur] + 1;
126 | 				mlen[cur + 1] = 1;
127 | 				mpos[cur + 1] = 1;
128 | 			}
129 | 		}
130 | 
131 | 		if (cur > next_match_cur) {
132 | 			next_match_cur = cur;
133 | 		}
134 | 
135 | 		unsigned long max_len = 3;
136 | 		unsigned long max_len_pos = NO_MATCH_POS;
137 | 
138 | 		// Look up first match for current position
139 | 		//
140 | 		// pos is the current root of the tree of strings with this
141 | 		// hash. We are going to re-root the tree so cur becomes the
142 | 		// new root.
143 | 		//
144 | 		const unsigned long hash = lz4_hash4_bits(&in[cur], LZ4_HASH_BITS);
145 | 		unsigned long pos = lookup[hash];
146 | 		lookup[hash] = cur;
147 | 
148 | 		uint32_t *lt_node = &nodes[2 * cur];
149 | 		uint32_t *gt_node = &nodes[2 * cur + 1];
150 | 		unsigned long lt_len = 0;
151 | 		unsigned long gt_len = 0;
152 | 
153 | 		assert(pos == NO_MATCH_POS || pos < cur);
154 | 
155 | 		// If we are checking matches, allow lengths up to end of
156 | 		// input, otherwise compare only up to accept_len
157 | 		const unsigned long len_limit = cur == next_match_cur ? src_size - cur - 5
158 | 		                              : accept_len < src_size - cur - 5 ? accept_len
159 | 		                              : src_size - cur - 5;
160 | 		unsigned long num_chain = max_depth;
161 | 
162 | 		// Check matches
163 | 		for (;;) {
164 | 			// If at bottom of tree, mark leaf nodes
165 | 			//
166 | 			// In case we reached max_depth, this also prunes the
167 | 			// subtree we have not searched yet and do not know
168 | 			// where belongs.
169 | 			//
170 | 			if (pos == NO_MATCH_POS || cur - pos > 65535 || num_chain-- == 0) {
171 | 				*lt_node = NO_MATCH_POS;
172 | 				*gt_node = NO_MATCH_POS;
173 | 
174 | 				break;
175 | 			}
176 | 
177 | 			// The string at pos is lexicographically greater than
178 | 			// a string that matched in the first lt_len positions,
179 | 			// and less than a string that matched in the first
180 | 			// gt_len positions, so it must match up to at least
181 | 			// the minimum of these.
182 | 			unsigned long len = lt_len < gt_len ? lt_len : gt_len;
183 | 
184 | 			// Find match len
185 | 			while (len < len_limit && in[pos + len] == in[cur + len]) {
186 | 				++len;
187 | 			}
188 | 
189 | 			// Update longest match found
190 | 			if (cur == next_match_cur && len > max_len) {
191 | 				max_len = len;
192 | 				max_len_pos = pos;
193 | 
194 | 				if (len >= accept_len) {
195 | 					next_match_cur = cur + len;
196 | 				}
197 | 			}
198 | 
199 | 			// If we reach maximum match length, the string at pos
200 | 			// is equal to cur, so we can assign the left and right
201 | 			// subtrees.
202 | 			//
203 | 			// This removes pos from the tree, but we added cur
204 | 			// which is equal and closer for future matches.
205 | 			//
206 | 			if (len >= accept_len || len == len_limit) {
207 | 				*lt_node = nodes[2 * pos];
208 | 				*gt_node = nodes[2 * pos + 1];
209 | 
210 | 				break;
211 | 			}
212 | 
213 | 			// Go to previous match and restructure tree
214 | 			//
215 | 			// lt_node points to a node that is going to contain
216 | 			// elements lexicographically less than cur (the search
217 | 			// string).
218 | 			//
219 | 			// If the string at pos is less than cur, we set that
220 | 			// lt_node to pos. We know that all elements in the
221 | 			// left subtree are less than pos, and thus less than
222 | 			// cur, so we point lt_node at the right subtree of
223 | 			// pos and continue our search there.
224 | 			//
225 | 			// The equivalent applies to gt_node when the string at
226 | 			// pos is greater than cur.
227 | 			//
228 | 			if (in[pos + len] < in[cur + len]) {
229 | 				*lt_node = pos;
230 | 				lt_node = &nodes[2 * pos + 1];
231 | 				assert(*lt_node == NO_MATCH_POS || *lt_node < pos);
232 | 				pos = *lt_node;
233 | 				lt_len = len;
234 | 			}
235 | 			else {
236 | 				*gt_node = pos;
237 | 				gt_node = &nodes[2 * pos];
238 | 				assert(*gt_node == NO_MATCH_POS || *gt_node < pos);
239 | 				pos = *gt_node;
240 | 				gt_len = len;
241 | 			}
242 | 		}
243 | 
244 | 		// Update costs for longest match found
245 | 		//
246 | 		// If the match is longer than 18, decreasing the match length
247 | 		// by up to 255 will result in saving 1 byte on the match
248 | 		// length encoding.
249 | 		//
250 | 		// On the other hand, the best case is that the following
251 | 		// sequence is a match that can be extended to the left to
252 | 		// cover the bytes we no longer match, which increases the
253 | 		// match length of that match. We can do this at most 254
254 | 		// times before its match length encoding goes up 1 byte.
255 | 		//
256 | 		// So we only have to check the last 255 posssible match
257 | 		// lengths.
258 | 		//
259 | 		// This optimization is from lz4x by Ilya Muravyov.
260 | 		//
261 | 		if (max_len_pos != NO_MATCH_POS) {
262 | 			unsigned long min_len = max_len > (254 + 4) ? max_len - 254 : 4;
263 | 
264 | 			for (unsigned long i = min_len; i <= max_len; ++i) {
265 | 				unsigned long match_cost = lz4_match_cost(i);
266 | 
267 | 				assert(match_cost < UINT32_MAX - cost[cur]);
268 | 
269 | 				unsigned long cost_there = cost[cur] + match_cost;
270 | 
271 | 				// If the choice is between a literal and a
272 | 				// match with the same cost, choose the match.
273 | 				// This is because the match is able to encode
274 | 				// any literals preceding it.
275 | 				if (cost_there < cost[cur + i]
276 | 				 || (mlen[cur + i] == 1 && cost_there == cost[cur + i])) {
277 | 					cost[cur + i] = cost_there;
278 | 					mpos[cur + i] = max_len_pos;
279 | 					mlen[cur + i] = i;
280 | 				}
281 | 			}
282 | 		}
283 | 	}
284 | 
285 | 	for (unsigned long cur = last_match_pos + 1; cur < src_size; ++cur) {
286 | 		// Check literal
287 | 		if (mlen[cur] == 1) {
288 | 			unsigned long literals_cost = 1 + lz4_literal_cost(mpos[cur] + 1) - lz4_literal_cost(mpos[cur]);
289 | 
290 | 			if (cost[cur + 1] > cost[cur] + literals_cost) {
291 | 				cost[cur + 1] = cost[cur] + literals_cost;
292 | 				mlen[cur + 1] = 1;
293 | 				mpos[cur + 1] = mpos[cur] + 1;
294 | 			}
295 | 		}
296 | 		else {
297 | 			if (cost[cur + 1] > cost[cur] + 1) {
298 | 				cost[cur + 1] = cost[cur] + 1;
299 | 				mlen[cur + 1] = 1;
300 | 				mpos[cur + 1] = 1;
301 | 			}
302 | 		}
303 | 	}
304 | 
305 | 	// Phase 2: Follow lowest cost path backwards gathering tokens
306 | 	unsigned long next_token = src_size;
307 | 
308 | 	for (unsigned long cur = src_size; cur > 0; cur -= mlen[cur], --next_token) {
309 | 		mlen[next_token] = mlen[cur];
310 | 		mpos[next_token] = mpos[cur];
311 | 	}
312 | 
313 | 	// Phase 3: Output tokens
314 | 	unsigned char *out = (unsigned char *) dst;
315 | 
316 | 	unsigned long cur = 0;
317 | 
318 | 	for (unsigned long i = next_token + 1; i <= src_size; cur += mlen[i++]) {
319 | 		unsigned long next_lit = cur;
320 | 		unsigned long nlit = 0;
321 | 
322 | 		// Move over literals, counting them
323 | 		while (i <= src_size && mlen[i] == 1) {
324 | 			++nlit;
325 | 			++i;
326 | 			++cur;
327 | 		}
328 | 
329 | 		// Make room for token
330 | 		unsigned char *token_out = out++;
331 | 
332 | 		// Output extra literal length bytes
333 | 		while (nlit >= 15 + 255) {
334 | 			*out++ = 255;
335 | 			nlit -= 255;
336 | 		}
337 | 		if (nlit >= 15) {
338 | 			*out++ = nlit - 15;
339 | 			nlit = 15;
340 | 		}
341 | 
342 | 		// Output literals
343 | 		while (next_lit < cur) {
344 | 			*out++ = in[next_lit++];
345 | 		}
346 | 
347 | 		// Handle last incomplete sequence
348 | 		if (i > src_size) {
349 | 			// Write token
350 | 			*token_out = nlit << 4;
351 | 			break;
352 | 		}
353 | 
354 | 		// Output offset
355 | 		unsigned long offs = mlen[i] == 1 ? 1 : cur - mpos[i];
356 | 
357 | 		*out++ = offs & 0xFF;
358 | 		*out++ = (offs >> 8) & 0xFF;
359 | 
360 | 		// Output extra length bytes
361 | 		unsigned long len = mlen[i];
362 | 
363 | 		while (len >= 19 + 255) {
364 | 			*out++ = 255;
365 | 			len -= 255;
366 | 		}
367 | 		if (len >= 19) {
368 | 			*out++ = len - 19;
369 | 			len = 19;
370 | 		}
371 | 
372 | 		// Write token
373 | 		*token_out = (nlit << 4) | (len - 4);
374 | 	}
375 | 
376 | 	// Return compressed size
377 | 	return (unsigned long) (out - (unsigned char *) dst);
378 | }
379 | 
380 | #endif /* LZ4_BTPARSE_H_INCLUDED */
381 | 


--------------------------------------------------------------------------------
/lz4_depack.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 |  *
  4 |  * C depacker
  5 |  *
  6 |  * Copyright (c) 2018 Joergen Ibsen
  7 |  *
  8 |  * This software is provided 'as-is', without any express or implied
  9 |  * warranty. In no event will the authors be held liable for any damages
 10 |  * arising from the use of this software.
 11 |  *
 12 |  * Permission is granted to anyone to use this software for any purpose,
 13 |  * including commercial applications, and to alter it and redistribute it
 14 |  * freely, subject to the following restrictions:
 15 |  *
 16 |  *   1. The origin of this software must not be misrepresented; you must
 17 |  *      not claim that you wrote the original software. If you use this
 18 |  *      software in a product, an acknowledgment in the product
 19 |  *      documentation would be appreciated but is not required.
 20 |  *
 21 |  *   2. Altered source versions must be plainly marked as such, and must
 22 |  *      not be misrepresented as being the original software.
 23 |  *
 24 |  *   3. This notice may not be removed or altered from any source
 25 |  *      distribution.
 26 |  */
 27 | 
 28 | #include "lz4.h"
 29 | 
 30 | #include <assert.h>
 31 | 
 32 | unsigned long
 33 | lz4_depack(const void *src, void *dst, unsigned long packed_size)
 34 | {
 35 | 	const unsigned char *in = (unsigned char *) src;
 36 | 	unsigned char *out = (unsigned char *) dst;
 37 | 	unsigned long dst_size = 0;
 38 | 	unsigned long cur = 0;
 39 | 	unsigned long prev_match_start = 0;
 40 | 
 41 | 	if (in[0] == 0) {
 42 | 		return 0;
 43 | 	}
 44 | 
 45 | 	/* Main decompression loop */
 46 | 	while (cur < packed_size) {
 47 | 		unsigned long token = in[cur++];
 48 | 		unsigned long lit_len = token >> 4;
 49 | 		unsigned long len = (token & 0x0F) + 4;
 50 | 		unsigned long offs;
 51 | 		unsigned long i;
 52 | 
 53 | 		/* Read extra literal length bytes */
 54 | 		if (lit_len == 15) {
 55 | 			while (in[cur] == 255) {
 56 | 				lit_len += 255;
 57 | 				++cur;
 58 | 			}
 59 | 			lit_len += in[cur++];
 60 | 		}
 61 | 
 62 | 		/* Copy literals */
 63 | 		for (i = 0; i < lit_len; ++i) {
 64 | 			out[dst_size++] = in[cur++];
 65 | 		}
 66 | 
 67 | 		/* Check for last incomplete sequence */
 68 | 		if (cur == packed_size) {
 69 | 			/* Check parsing restrictions */
 70 | 			if (dst_size >= 5 && lit_len < 5) {
 71 | 				return LZ4_ERROR;
 72 | 			}
 73 | 
 74 | 			if (dst_size > 12 && dst_size - prev_match_start < 12) {
 75 | 				return LZ4_ERROR;
 76 | 			}
 77 | 
 78 | 			break;
 79 | 		}
 80 | 
 81 | 		/* Read offset */
 82 | 		offs = (unsigned long) in[cur] | ((unsigned long) in[cur + 1] << 8);
 83 | 		cur += 2;
 84 | 
 85 | 		/* Read extra length bytes */
 86 | 		if (len == 19) {
 87 | 			while (in[cur] == 255) {
 88 | 				len += 255;
 89 | 				++cur;
 90 | 			}
 91 | 			len += in[cur++];
 92 | 		}
 93 | 
 94 | 		prev_match_start = dst_size;
 95 | 
 96 | 		/* Copy match */
 97 | 		for (i = 0; i < len; ++i) {
 98 | 			out[dst_size] = out[dst_size - offs];
 99 | 			++dst_size;
100 | 		}
101 | 	}
102 | 
103 | 	/* Return decompressed size */
104 | 	return dst_size;
105 | }
106 | 


--------------------------------------------------------------------------------
/lz4_leparse.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 | //
  4 | // Backwards dynamic programming parse with left-extension of matches
  5 | //
  6 | // Copyright (c) 2018-2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #ifndef LZ4_LEPARSE_H_INCLUDED
 29 | #define LZ4_LEPARSE_H_INCLUDED
 30 | 
 31 | static size_t
 32 | lz4_leparse_workmem_size(size_t src_size)
 33 | {
 34 | 	return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
 35 | 	     * sizeof(uint32_t);
 36 | }
 37 | 
 38 | static unsigned long
 39 | lz4_pack_leparse(const void *src, void *dst, unsigned long src_size, void *workmem,
 40 |                  const unsigned long max_depth, const unsigned long accept_len)
 41 | {
 42 | 	const unsigned char *const in = (const unsigned char *) src;
 43 | 	const unsigned long last_match_pos = src_size > 12 ? src_size - 12 : 0;
 44 | 
 45 | 	// Check for empty input
 46 | 	if (src_size == 0) {
 47 | 		unsigned char *out = (unsigned char *) dst;
 48 | 		*out++ = 0;
 49 | 		return 1;
 50 | 	}
 51 | 
 52 | 	// Check for input without room for match
 53 | 	if (src_size < 13) {
 54 | 		unsigned char *out = (unsigned char *) dst;
 55 | 		*out++ = src_size << 4;
 56 | 		for (unsigned long i = 0; i < src_size; ++i) {
 57 | 			*out++ = in[i];
 58 | 		}
 59 | 		return 1 + src_size;
 60 | 	}
 61 | 
 62 | 	// With a bit of careful ordering we can fit in 3 * src_size words.
 63 | 	//
 64 | 	// The idea is that the lookup is only used in the first phase to
 65 | 	// build the hash chains, so we overlap it with mpos and mlen.
 66 | 	// Also, since we are using prev from right to left in phase two,
 67 | 	// and that is the order we fill in cost, we can overlap these.
 68 | 	//
 69 | 	// One detail is that we actually use src_size + 1 elements of cost,
 70 | 	// but we put mpos after it, where we do not need the first element.
 71 | 	//
 72 | 	uint32_t *const prev = (uint32_t *) workmem;
 73 | 	uint32_t *const mpos = prev + src_size;
 74 | 	uint32_t *const mlen = mpos + src_size;
 75 | 	uint32_t *const cost = prev;
 76 | 	uint32_t *const lookup = mpos;
 77 | 
 78 | 	// Phase 1: Build hash chains
 79 | 	const int bits = 2 * src_size < LOOKUP_SIZE ? LZ4_HASH_BITS : lz4_log2(src_size);
 80 | 
 81 | 	// Initialize lookup
 82 | 	for (unsigned long i = 0; i < (1UL << bits); ++i) {
 83 | 		lookup[i] = NO_MATCH_POS;
 84 | 	}
 85 | 
 86 | 	// Build hash chains in prev
 87 | 	if (last_match_pos > 0) {
 88 | 		for (unsigned long i = 0; i <= last_match_pos; ++i) {
 89 | 			const unsigned long hash = lz4_hash4_bits(&in[i], bits);
 90 | 			prev[i] = lookup[hash];
 91 | 			lookup[hash] = i;
 92 | 		}
 93 | 	}
 94 | 
 95 | 	// Initialize last eleven positions as literals
 96 | 	for (unsigned long i = 1; i < 12; ++i) {
 97 | 		mlen[src_size - i] = 1;
 98 | 		mpos[src_size - i] = i;
 99 | 		cost[src_size - i] = i;
100 | 	}
101 | 	cost[src_size] = 0;
102 | 
103 | 	// Phase 2: Find lowest cost path from each position to end
104 | 	for (unsigned long cur = last_match_pos; cur > 0; --cur) {
105 | 		// Since we updated prev to the end in the first phase, we
106 | 		// do not need to hash, but can simply look up the previous
107 | 		// position directly.
108 | 		unsigned long pos = prev[cur];
109 | 
110 | 		assert(pos == NO_MATCH_POS || pos < cur);
111 | 
112 | 		// Start with a literal
113 | 		//
114 | 		// We store the number of literals from the current position
115 | 		// up to the next match in mpos. This is used to update the
116 | 		// cost from the current position with the additional cost of
117 | 		// encoding the length of this run of literals in the next
118 | 		// match.
119 | 		//
120 | 		if (mlen[cur + 1] == 1) {
121 | 			cost[cur] = 1 + cost[cur + 1] - lz4_literal_cost(mpos[cur + 1]) + lz4_literal_cost(mpos[cur + 1] + 1);
122 | 			mlen[cur] = 1;
123 | 			mpos[cur] = mpos[cur + 1] + 1;
124 | 		}
125 | 		else {
126 | 			cost[cur] = 1 + cost[cur + 1];
127 | 			mlen[cur] = 1;
128 | 			mpos[cur] = 1;
129 | 		}
130 | 
131 | 		unsigned long max_len = 3;
132 | 
133 | 		const unsigned long len_limit = src_size - cur - 5;
134 | 		unsigned long num_chain = max_depth;
135 | 
136 | 		// Go through the chain of prev matches
137 | 		for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
138 | 			if (cur - pos > 65535) {
139 | 				break;
140 | 			}
141 | 
142 | 			unsigned long len = 0;
143 | 
144 | 			// If next byte matches, so this has a chance to be a longer match
145 | 			if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
146 | 				// Find match len
147 | 				while (len < len_limit && in[pos + len] == in[cur + len]) {
148 | 					++len;
149 | 				}
150 | 			}
151 | 
152 | 			// Extend current match if possible
153 | 			//
154 | 			// Note that we are checking matches in order from the
155 | 			// closest and back. This means for a match further
156 | 			// away, the encoding of all lengths up to the current
157 | 			// max length will always be longer or equal, so we need
158 | 			// only consider the extension.
159 | 			if (len > max_len) {
160 | 				unsigned long min_cost = UINT32_MAX;
161 | 				unsigned long min_cost_len = 3;
162 | 
163 | 				// Find lowest cost match length
164 | 				for (unsigned long i = max_len + 1; i <= len; ++i) {
165 | 					unsigned long match_cost = lz4_match_cost(i);
166 | 					assert(match_cost < UINT32_MAX - cost[cur + i]);
167 | 					unsigned long cost_here = match_cost + cost[cur + i];
168 | 
169 | 					if (cost_here < min_cost) {
170 | 						min_cost = cost_here;
171 | 						min_cost_len = i;
172 | 					}
173 | 				}
174 | 
175 | 				max_len = len;
176 | 
177 | 				// Update cost if cheaper
178 | 				if (min_cost < cost[cur]) {
179 | 					cost[cur] = min_cost;
180 | 					mpos[cur] = pos;
181 | 					mlen[cur] = min_cost_len;
182 | 
183 | 					// Left-extend current match if possible
184 | 					if (pos > 0 && in[pos - 1] == in[cur - 1]) {
185 | 						do {
186 | 							--cur;
187 | 							--pos;
188 | 							++min_cost_len;
189 | 							unsigned long match_cost = lz4_match_cost(min_cost_len);
190 | 							assert(match_cost < UINT32_MAX - cost[cur + min_cost_len]);
191 | 							unsigned long cost_here = match_cost + cost[cur + min_cost_len];
192 | 							cost[cur] = cost_here;
193 | 							mpos[cur] = pos;
194 | 							mlen[cur] = min_cost_len;
195 | 						} while (pos > 0 && in[pos - 1] == in[cur - 1]);
196 | 						break;
197 | 					}
198 | 				}
199 | 			}
200 | 
201 | 			if (len >= accept_len || len == len_limit) {
202 | 				break;
203 | 			}
204 | 		}
205 | 	}
206 | 
207 | 	mpos[0] = 0;
208 | 	mlen[0] = 1;
209 | 
210 | 	unsigned char *out = (unsigned char *) dst;
211 | 
212 | 	// Phase 3: Output compressed data, following lowest cost path
213 | 	for (unsigned long i = 0; i < src_size; i += mlen[i]) {
214 | 		unsigned long next_lit = i;
215 | 		unsigned long nlit = 0;
216 | 
217 | 		// Move over literals, counting them
218 | 		while (i < src_size && mlen[i] == 1) {
219 | 			++nlit;
220 | 			++i;
221 | 		}
222 | 
223 | 		// Make room for token
224 | 		unsigned char *token_out = out++;
225 | 
226 | 		// Output extra literal length bytes
227 | 		while (nlit >= 15 + 255) {
228 | 			*out++ = 255;
229 | 			nlit -= 255;
230 | 		}
231 | 		if (nlit >= 15) {
232 | 			*out++ = nlit - 15;
233 | 			nlit = 15;
234 | 		}
235 | 
236 | 		// Output literals
237 | 		while (next_lit < i) {
238 | 			*out++ = in[next_lit++];
239 | 		}
240 | 
241 | 		// Handle last incomplete sequence
242 | 		if (i == src_size) {
243 | 			// Write token
244 | 			*token_out = nlit << 4;
245 | 			break;
246 | 		}
247 | 
248 | 		// Output offset
249 | 		unsigned long offs = mlen[i] == 1 ? 1 : i - mpos[i];
250 | 
251 | 		*out++ = offs & 0xFF;
252 | 		*out++ = (offs >> 8) & 0xFF;
253 | 
254 | 		// Output extra length bytes
255 | 		unsigned long len = mlen[i];
256 | 
257 | 		while (len >= 19 + 255) {
258 | 			*out++ = 255;
259 | 			len -= 255;
260 | 		}
261 | 		if (len >= 19) {
262 | 			*out++ = len - 19;
263 | 			len = 19;
264 | 		}
265 | 
266 | 		// Write token
267 | 		*token_out = (nlit << 4) | (len - 4);
268 | 	}
269 | 
270 | 	// Return compressed size
271 | 	return (unsigned long) (out - (unsigned char *) dst);
272 | }
273 | 
274 | #endif /* LZ4_LEPARSE_H_INCLUDED */
275 | 


--------------------------------------------------------------------------------
/lz4_ssparse.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // blz4 - Example of LZ4 compression with BriefLZ algorithms
  3 | //
  4 | // Backwards dynamic programming parse
  5 | //
  6 | // Copyright (c) 2018-2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #ifndef LZ4_SSPARSE_H_INCLUDED
 29 | #define LZ4_SSPARSE_H_INCLUDED
 30 | 
 31 | static size_t
 32 | lz4_ssparse_workmem_size(size_t src_size)
 33 | {
 34 | 	return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
 35 | 	     * sizeof(uint32_t);
 36 | }
 37 | 
 38 | static unsigned long
 39 | lz4_pack_ssparse(const void *src, void *dst, unsigned long src_size, void *workmem,
 40 |                  const unsigned long max_depth, const unsigned long accept_len)
 41 | {
 42 | 	const unsigned char *const in = (const unsigned char *) src;
 43 | 	const unsigned long last_match_pos = src_size > 12 ? src_size - 12 : 0;
 44 | 
 45 | 	// Check for empty input
 46 | 	if (src_size == 0) {
 47 | 		unsigned char *out = (unsigned char *) dst;
 48 | 		*out++ = 0;
 49 | 		return 1;
 50 | 	}
 51 | 
 52 | 	// Check for input without room for match
 53 | 	if (src_size < 13) {
 54 | 		unsigned char *out = (unsigned char *) dst;
 55 | 		*out++ = src_size << 4;
 56 | 		for (unsigned long i = 0; i < src_size; ++i) {
 57 | 			*out++ = in[i];
 58 | 		}
 59 | 		return 1 + src_size;
 60 | 	}
 61 | 
 62 | 	// With a bit of careful ordering we can fit in 3 * src_size words.
 63 | 	//
 64 | 	// The idea is that the lookup is only used in the first phase to
 65 | 	// build the hash chains, so we overlap it with mpos and mlen.
 66 | 	// Also, since we are using prev from right to left in phase two,
 67 | 	// and that is the order we fill in cost, we can overlap these.
 68 | 	//
 69 | 	// One detail is that we actually use src_size + 1 elements of cost,
 70 | 	// but we put mpos after it, where we do not need the first element.
 71 | 	//
 72 | 	uint32_t *const prev = (uint32_t *) workmem;
 73 | 	uint32_t *const mpos = prev + src_size;
 74 | 	uint32_t *const mlen = mpos + src_size;
 75 | 	uint32_t *const cost = prev;
 76 | 	uint32_t *const lookup = mpos;
 77 | 
 78 | 	// Phase 1: Build hash chains
 79 | 	const int bits = 2 * src_size < LOOKUP_SIZE ? LZ4_HASH_BITS : lz4_log2(src_size);
 80 | 
 81 | 	// Initialize lookup
 82 | 	for (unsigned long i = 0; i < (1UL << bits); ++i) {
 83 | 		lookup[i] = NO_MATCH_POS;
 84 | 	}
 85 | 
 86 | 	// Build hash chains in prev
 87 | 	if (last_match_pos > 0) {
 88 | 		for (unsigned long i = 0; i <= last_match_pos; ++i) {
 89 | 			const unsigned long hash = lz4_hash4_bits(&in[i], bits);
 90 | 			prev[i] = lookup[hash];
 91 | 			lookup[hash] = i;
 92 | 		}
 93 | 	}
 94 | 
 95 | 	// Initialize last eleven positions as literals
 96 | 	for (unsigned long i = 1; i < 12; ++i) {
 97 | 		mlen[src_size - i] = 1;
 98 | 		mpos[src_size - i] = i;
 99 | 		cost[src_size - i] = i;
100 | 	}
101 | 	cost[src_size] = 0;
102 | 
103 | 	// Phase 2: Find lowest cost path from each position to end
104 | 	for (unsigned long cur = last_match_pos; cur > 0; --cur) {
105 | 		// Since we updated prev to the end in the first phase, we
106 | 		// do not need to hash, but can simply look up the previous
107 | 		// position directly.
108 | 		unsigned long pos = prev[cur];
109 | 
110 | 		assert(pos == NO_MATCH_POS || pos < cur);
111 | 
112 | 		// Start with a literal
113 | 		//
114 | 		// We store the number of literals from the current position
115 | 		// up to the next match in mpos. This is used to update the
116 | 		// cost from the current position with the additional cost of
117 | 		// encoding the length of this run of literals in the next
118 | 		// match.
119 | 		//
120 | 		if (mlen[cur + 1] == 1) {
121 | 			cost[cur] = 1 + cost[cur + 1] - lz4_literal_cost(mpos[cur + 1]) + lz4_literal_cost(mpos[cur + 1] + 1);
122 | 			mlen[cur] = 1;
123 | 			mpos[cur] = mpos[cur + 1] + 1;
124 | 		}
125 | 		else {
126 | 			cost[cur] = 1 + cost[cur + 1];
127 | 			mlen[cur] = 1;
128 | 			mpos[cur] = 1;
129 | 		}
130 | 
131 | 		unsigned long max_len = 3;
132 | 
133 | 		const unsigned long len_limit = src_size - cur - 5;
134 | 		unsigned long num_chain = max_depth;
135 | 
136 | 		// Go through the chain of prev matches
137 | 		for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
138 | 			if (cur - pos > 65535) {
139 | 				break;
140 | 			}
141 | 
142 | 			unsigned long len = 0;
143 | 
144 | 			// If next byte matches, so this has a chance to be a longer match
145 | 			if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
146 | 				// Find match len
147 | 				while (len < len_limit && in[pos + len] == in[cur + len]) {
148 | 					++len;
149 | 				}
150 | 			}
151 | 
152 | 			// Extend current match if possible
153 | 			//
154 | 			// Note that we are checking matches in order from the
155 | 			// closest and back. This means for a match further
156 | 			// away, the encoding of all lengths up to the current
157 | 			// max length will always be longer or equal, so we need
158 | 			// only consider the extension.
159 | 			if (len > max_len) {
160 | 				unsigned long min_cost = UINT32_MAX;
161 | 				unsigned long min_cost_len = 3;
162 | 
163 | 				// Find lowest cost match length
164 | 				for (unsigned long i = max_len + 1; i <= len; ++i) {
165 | 					unsigned long match_cost = lz4_match_cost(i);
166 | 					assert(match_cost < UINT32_MAX - cost[cur + i]);
167 | 					unsigned long cost_here = match_cost + cost[cur + i];
168 | 
169 | 					if (cost_here < min_cost) {
170 | 						min_cost = cost_here;
171 | 						min_cost_len = i;
172 | 					}
173 | 				}
174 | 
175 | 				max_len = len;
176 | 
177 | 				// Update cost if cheaper
178 | 				//
179 | 				// If the choice is between a literal and a match
180 | 				// with the same cost, choose the match. This is
181 | 				// because the match is able to encode any literals
182 | 				// preceding it.
183 | 				//
184 | 				if (min_cost < cost[cur]
185 | 				 || (mlen[cur] == 1 && min_cost == cost[cur])) {
186 | 					cost[cur] = min_cost;
187 | 					mpos[cur] = pos;
188 | 					mlen[cur] = min_cost_len;
189 | 				}
190 | 			}
191 | 
192 | 			if (len >= accept_len || len == len_limit) {
193 | 				break;
194 | 			}
195 | 		}
196 | 	}
197 | 
198 | 	mpos[0] = 0;
199 | 	mlen[0] = 1;
200 | 
201 | 	unsigned char *out = (unsigned char *) dst;
202 | 
203 | 	// Phase 3: Output compressed data, following lowest cost path
204 | 	for (unsigned long i = 0; i < src_size; i += mlen[i]) {
205 | 		unsigned long next_lit = i;
206 | 		unsigned long nlit = 0;
207 | 
208 | 		// Move over literals, counting them
209 | 		while (i < src_size && mlen[i] == 1) {
210 | 			++nlit;
211 | 			++i;
212 | 		}
213 | 
214 | 		// Make room for token
215 | 		unsigned char *token_out = out++;
216 | 
217 | 		// Output extra literal length bytes
218 | 		while (nlit >= 15 + 255) {
219 | 			*out++ = 255;
220 | 			nlit -= 255;
221 | 		}
222 | 		if (nlit >= 15) {
223 | 			*out++ = nlit - 15;
224 | 			nlit = 15;
225 | 		}
226 | 
227 | 		// Output literals
228 | 		while (next_lit < i) {
229 | 			*out++ = in[next_lit++];
230 | 		}
231 | 
232 | 		// Handle last incomplete sequence
233 | 		if (i == src_size) {
234 | 			// Write token
235 | 			*token_out = nlit << 4;
236 | 			break;
237 | 		}
238 | 
239 | 		// Output offset
240 | 		unsigned long offs = mlen[i] == 1 ? 1 : i - mpos[i];
241 | 
242 | 		*out++ = offs & 0xFF;
243 | 		*out++ = (offs >> 8) & 0xFF;
244 | 
245 | 		// Output extra length bytes
246 | 		unsigned long len = mlen[i];
247 | 
248 | 		while (len >= 19 + 255) {
249 | 			*out++ = 255;
250 | 			len -= 255;
251 | 		}
252 | 		if (len >= 19) {
253 | 			*out++ = len - 19;
254 | 			len = 19;
255 | 		}
256 | 
257 | 		// Write token
258 | 		*token_out = (nlit << 4) | (len - 4);
259 | 	}
260 | 
261 | 	// Return compressed size
262 | 	return (unsigned long) (out - (unsigned char *) dst);
263 | }
264 | 
265 | #endif /* LZ4_SSPARSE_H_INCLUDED */
266 | 


--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
 1 | project('blz4', 'c',
 2 |   meson_version : '>=0.60.0',
 3 |   default_options : [
 4 |     'b_ndebug=if-release',
 5 |     'buildtype=release',
 6 |     'c_std=c99',
 7 |     'default_library=static',
 8 |     'warning_level=3',
 9 |   ],
10 |   version : '0.2.0',
11 |   license : 'Zlib'
12 | )
13 | 
14 | lib = library('lz4', 'lz4.c', 'lz4_depack.c')
15 | 
16 | lz4_dep = declare_dependency(
17 |   include_directories : include_directories('.'),
18 |   link_with : lib,
19 |   version : meson.project_version()
20 | )
21 | 
22 | executable('blz4', 'blz4.c', 'parg.c', dependencies : lz4_dep)
23 | 


--------------------------------------------------------------------------------
/parg.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * parg - parse argv
  3 |  *
  4 |  * Written in 2015-2016 by Joergen Ibsen
  5 |  *
  6 |  * To the extent possible under law, the author(s) have dedicated all
  7 |  * copyright and related and neighboring rights to this software to the
  8 |  * public domain worldwide. This software is distributed without any
  9 |  * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
 10 |  */
 11 | 
 12 | #include "parg.h"
 13 | 
 14 | #include <assert.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | 
 18 | /*
 19 |  * Check if state is at end of argv.
 20 |  */
 21 | static int
 22 | is_argv_end(const struct parg_state *ps, int argc, char *const argv[])
 23 | {
 24 | 	return ps->optind >= argc || argv[ps->optind] == NULL;
 25 | }
 26 | 
 27 | /*
 28 |  * Match nextchar against optstring.
 29 |  */
 30 | static int
 31 | match_short(struct parg_state *ps, int argc, char *const argv[],
 32 |             const char *optstring)
 33 | {
 34 | 	const char *p = strchr(optstring, *ps->nextchar);
 35 | 
 36 | 	if (p == NULL) {
 37 | 		ps->optopt = *ps->nextchar++;
 38 | 		return '?';
 39 | 	}
 40 | 
 41 | 	/* If no option argument, return option */
 42 | 	if (p[1] != ':') {
 43 | 		return *ps->nextchar++;
 44 | 	}
 45 | 
 46 | 	/* If more characters, return as option argument */
 47 | 	if (ps->nextchar[1] != '\0') {
 48 | 		ps->optarg = &ps->nextchar[1];
 49 | 		ps->nextchar = NULL;
 50 | 		return *p;
 51 | 	}
 52 | 
 53 | 	/* If option argument is optional, return option */
 54 | 	if (p[2] == ':') {
 55 | 		return *ps->nextchar++;
 56 | 	}
 57 | 
 58 | 	/* Option argument required, so return next argv element */
 59 | 	if (is_argv_end(ps, argc, argv)) {
 60 | 		ps->optopt = *ps->nextchar++;
 61 | 		return optstring[0] == ':' ? ':' : '?';
 62 | 	}
 63 | 
 64 | 	ps->optarg = argv[ps->optind++];
 65 | 	ps->nextchar = NULL;
 66 | 	return *p;
 67 | }
 68 | 
 69 | /*
 70 |  * Match string at nextchar against longopts.
 71 |  */
 72 | static int
 73 | match_long(struct parg_state *ps, int argc, char *const argv[],
 74 |            const char *optstring,
 75 |            const struct parg_option *longopts, int *longindex)
 76 | {
 77 | 	size_t len;
 78 | 	int num_match = 0;
 79 | 	int match = -1;
 80 | 	int i;
 81 | 
 82 | 	len = strcspn(ps->nextchar, "=");
 83 | 
 84 | 	for (i = 0; longopts[i].name; ++i) {
 85 | 		if (strncmp(ps->nextchar, longopts[i].name, len) == 0) {
 86 | 			match = i;
 87 | 			num_match++;
 88 | 			/* Take if exact match */
 89 | 			if (longopts[i].name[len] == '\0') {
 90 | 				num_match = 1;
 91 | 				break;
 92 | 			}
 93 | 		}
 94 | 	}
 95 | 
 96 | 	/* Return '?' on no or ambiguous match */
 97 | 	if (num_match != 1) {
 98 | 		ps->optopt = 0;
 99 | 		ps->nextchar = NULL;
100 | 		return '?';
101 | 	}
102 | 
103 | 	assert(match != -1);
104 | 
105 | 	if (longindex) {
106 | 		*longindex = match;
107 | 	}
108 | 
109 | 	if (ps->nextchar[len] == '=') {
110 | 		/* Option argument present, check if extraneous */
111 | 		if (longopts[match].has_arg == PARG_NOARG) {
112 | 			ps->optopt = longopts[match].flag ? 0 : longopts[match].val;
113 | 			ps->nextchar = NULL;
114 | 			return optstring[0] == ':' ? ':' : '?';
115 | 		}
116 | 		else {
117 | 			ps->optarg = &ps->nextchar[len + 1];
118 | 		}
119 | 	}
120 | 	else if (longopts[match].has_arg == PARG_REQARG) {
121 | 		/* Option argument required, so return next argv element */
122 | 		if (is_argv_end(ps, argc, argv)) {
123 | 			ps->optopt = longopts[match].flag ? 0 : longopts[match].val;
124 | 			ps->nextchar = NULL;
125 | 			return optstring[0] == ':' ? ':' : '?';
126 | 		}
127 | 
128 | 		ps->optarg = argv[ps->optind++];
129 | 	}
130 | 
131 | 	ps->nextchar = NULL;
132 | 
133 | 	if (longopts[match].flag != NULL) {
134 | 		*longopts[match].flag = longopts[match].val;
135 | 		return 0;
136 | 	}
137 | 
138 | 	return longopts[match].val;
139 | }
140 | 
141 | void
142 | parg_init(struct parg_state *ps)
143 | {
144 | 	ps->optarg = NULL;
145 | 	ps->optind = 1;
146 | 	ps->optopt = '?';
147 | 	ps->nextchar = NULL;
148 | }
149 | 
150 | int
151 | parg_getopt(struct parg_state *ps, int argc, char *const argv[],
152 |             const char *optstring)
153 | {
154 | 	return parg_getopt_long(ps, argc, argv, optstring, NULL, NULL);
155 | }
156 | 
157 | int
158 | parg_getopt_long(struct parg_state *ps, int argc, char *const argv[],
159 |                  const char *optstring,
160 |                  const struct parg_option *longopts, int *longindex)
161 | {
162 | 	assert(ps != NULL);
163 | 	assert(argv != NULL);
164 | 	assert(optstring != NULL);
165 | 
166 | 	ps->optarg = NULL;
167 | 
168 | 	if (argc < 2) {
169 | 		return -1;
170 | 	}
171 | 
172 | 	/* Advance to next element if needed */
173 | 	if (ps->nextchar == NULL || *ps->nextchar == '\0') {
174 | 		if (is_argv_end(ps, argc, argv)) {
175 | 			return -1;
176 | 		}
177 | 
178 | 		ps->nextchar = argv[ps->optind++];
179 | 
180 | 		/* Check for nonoption element (including '-') */
181 | 		if (ps->nextchar[0] != '-' || ps->nextchar[1] == '\0') {
182 | 			ps->optarg = ps->nextchar;
183 | 			ps->nextchar = NULL;
184 | 			return 1;
185 | 		}
186 | 
187 | 		/* Check for '--' */
188 | 		if (ps->nextchar[1] == '-') {
189 | 			if (ps->nextchar[2] == '\0') {
190 | 				ps->nextchar = NULL;
191 | 				return -1;
192 | 			}
193 | 
194 | 			if (longopts != NULL) {
195 | 				ps->nextchar += 2;
196 | 
197 | 				return match_long(ps, argc, argv, optstring,
198 | 				                  longopts, longindex);
199 | 			}
200 | 		}
201 | 
202 | 		ps->nextchar++;
203 | 	}
204 | 
205 | 	/* Match nextchar */
206 | 	return match_short(ps, argc, argv, optstring);
207 | }
208 | 
209 | /*
210 |  * Reverse elements of `v` from `i` to `j`.
211 |  */
212 | static void
213 | reverse(char *v[], int i, int j)
214 | {
215 | 	while (j - i > 1) {
216 | 		char *tmp = v[i];
217 | 		v[i] = v[j - 1];
218 | 		v[j - 1] = tmp;
219 | 		++i;
220 | 		--j;
221 | 	}
222 | }
223 | 
224 | /*
225 |  * Reorder elements of `argv` with no special cases.
226 |  *
227 |  * This function assumes there is no `--` element, and the last element
228 |  * is not an option missing a required argument.
229 |  *
230 |  * The algorithm is described here:
231 |  * http://hardtoc.com/2016/11/07/reordering-arguments.html
232 |  */
233 | static int
234 | parg_reorder_simple(int argc, char *argv[],
235 |                     const char *optstring,
236 |                     const struct parg_option *longopts)
237 | {
238 | 	struct parg_state ps;
239 | 	int change;
240 | 	int l = 0;
241 | 	int m = 0;
242 | 	int r = 0;
243 | 
244 | 	if (argc < 2) {
245 | 		return argc;
246 | 	}
247 | 
248 | 	do {
249 | 		int nextind;
250 | 		int c;
251 | 
252 | 		parg_init(&ps);
253 | 
254 | 		nextind = ps.optind;
255 | 
256 | 		/* Parse until end of argument */
257 | 		do {
258 | 			c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
259 | 		} while (ps.nextchar != NULL && *ps.nextchar != '\0');
260 | 
261 | 		change = 0;
262 | 
263 | 		do {
264 | 			/* Find next non-option */
265 | 			for (l = nextind; c != 1 && c != -1;) {
266 | 				l = ps.optind;
267 | 
268 | 				do {
269 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
270 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
271 | 			}
272 | 
273 | 			/* Find next option */
274 | 			for (m = l; c == 1;) {
275 | 				m = ps.optind;
276 | 
277 | 				do {
278 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
279 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
280 | 			}
281 | 
282 | 			/* Find next non-option */
283 | 			for (r = m; c != 1 && c != -1;) {
284 | 				r = ps.optind;
285 | 
286 | 				do {
287 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
288 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
289 | 			}
290 | 
291 | 			/* Find next option */
292 | 			for (nextind = r; c == 1;) {
293 | 				nextind = ps.optind;
294 | 
295 | 				do {
296 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
297 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
298 | 			}
299 | 
300 | 			if (m < r) {
301 | 				change = 1;
302 | 				reverse(argv, l, m);
303 | 				reverse(argv, m, r);
304 | 				reverse(argv, l, r);
305 | 			}
306 | 		} while (c != -1);
307 | 	} while (change != 0);
308 | 
309 | 	return l + (r - m);
310 | }
311 | 
312 | int
313 | parg_reorder(int argc, char *argv[],
314 |              const char *optstring,
315 |              const struct parg_option *longopts)
316 | {
317 | 	struct parg_state ps;
318 | 	int lastind;
319 | 	int optend;
320 | 	int c;
321 | 
322 | 	assert(argv != NULL);
323 | 	assert(optstring != NULL);
324 | 
325 | 	if (argc < 2) {
326 | 		return argc;
327 | 	}
328 | 
329 | 	parg_init(&ps);
330 | 
331 | 	/* Find end of normal arguments */
332 | 	do {
333 | 		lastind = ps.optind;
334 | 
335 | 		c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
336 | 
337 | 		/* Check for trailing option with error */
338 | 		if ((c == '?' || c == ':') && is_argv_end(&ps, argc, argv)) {
339 | 			lastind = ps.optind - 1;
340 | 			break;
341 | 		}
342 | 	} while (c != -1);
343 | 
344 | 	optend = parg_reorder_simple(lastind, argv, optstring, longopts);
345 | 
346 | 	/* Rotate `--` or trailing option with error into position */
347 | 	if (lastind < argc) {
348 | 		reverse(argv, optend, lastind);
349 | 		reverse(argv, optend, lastind + 1);
350 | 		++optend;
351 | 	}
352 | 
353 | 	return optend;
354 | }
355 | 


--------------------------------------------------------------------------------
/parg.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * parg - parse argv
  3 |  *
  4 |  * Written in 2015-2016 by Joergen Ibsen
  5 |  *
  6 |  * To the extent possible under law, the author(s) have dedicated all
  7 |  * copyright and related and neighboring rights to this software to the
  8 |  * public domain worldwide. This software is distributed without any
  9 |  * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
 10 |  */
 11 | 
 12 | #ifndef PARG_H_INCLUDED
 13 | #define PARG_H_INCLUDED
 14 | 
 15 | #ifdef __cplusplus
 16 | extern "C" {
 17 | #endif
 18 | 
 19 | #define PARG_VER_MAJOR 1        /**< Major version number */
 20 | #define PARG_VER_MINOR 0        /**< Minor version number */
 21 | #define PARG_VER_PATCH 2        /**< Patch version number */
 22 | #define PARG_VER_STRING "1.0.2" /**< Version number as a string */
 23 | 
 24 | /**
 25 |  * Structure containing state between calls to parser.
 26 |  *
 27 |  * @see parg_init
 28 |  */
 29 | struct parg_state {
 30 | 	const char *optarg;   /**< Pointer to option argument, if any */
 31 | 	int optind;           /**< Next index in argv to process */
 32 | 	int optopt;           /**< Option value resulting in error, if any */
 33 | 	const char *nextchar; /**< Next character to process */
 34 | };
 35 | 
 36 | /**
 37 |  * Structure for supplying long options to `parg_getopt_long()`.
 38 |  *
 39 |  * @see parg_getopt_long
 40 |  */
 41 | struct parg_option {
 42 | 	const char *name; /**< Name of option */
 43 | 	int has_arg;      /**< Option argument status */
 44 | 	int *flag;        /**< Pointer to flag variable */
 45 | 	int val;          /**< Value of option */
 46 | };
 47 | 
 48 | /**
 49 |  * Values for `has_arg` flag in `parg_option`.
 50 |  *
 51 |  * @see parg_option
 52 |  */
 53 | typedef enum {
 54 | 	PARG_NOARG,  /**< No argument */
 55 | 	PARG_REQARG, /**< Required argument */
 56 | 	PARG_OPTARG  /**< Optional argument */
 57 | } parg_arg_num;
 58 | 
 59 | /**
 60 |  * Initialize `ps`.
 61 |  *
 62 |  * Must be called before using state with a parser.
 63 |  *
 64 |  * @see parg_state
 65 |  *
 66 |  * @param ps pointer to state
 67 |  */
 68 | void
 69 | parg_init(struct parg_state *ps);
 70 | 
 71 | /**
 72 |  * Parse next short option in `argv`.
 73 |  *
 74 |  * Elements in `argv` that contain short options start with a single dash
 75 |  * followed by one or more option characters, and optionally an option
 76 |  * argument for the last option character. Examples are '`-d`', '`-ofile`',
 77 |  * and '`-dofile`'.
 78 |  *
 79 |  * Consecutive calls to this function match the command-line arguments in
 80 |  * `argv` against the short option characters in `optstring`.
 81 |  *
 82 |  * If an option character in `optstring` is followed by a colon, '`:`', the
 83 |  * option requires an argument. If it is followed by two colons, the option
 84 |  * may take an optional argument.
 85 |  *
 86 |  * If a match is found, `optarg` points to the option argument, if any, and
 87 |  * the value of the option character is returned.
 88 |  *
 89 |  * If a match is found, but is missing a required option argument, `optopt`
 90 |  * is set to the option character. If the first character in `optstring` is
 91 |  * '`:`', then '`:`' is returned, otherwise '`?`' is returned.
 92 |  *
 93 |  * If no option character in `optstring` matches a short option, `optopt`
 94 |  * is set to the option character, and '`?`' is returned.
 95 |  *
 96 |  * If an element of argv does not contain options (a nonoption element),
 97 |  * `optarg` points to the element, and `1` is returned.
 98 |  *
 99 |  * An element consisting of a single dash, '`-`', is returned as a nonoption.
100 |  *
101 |  * Parsing stops and `-1` is returned, when the end of `argv` is reached, or
102 |  * if an element contains '`--`'.
103 |  *
104 |  * Works similarly to `getopt`, if `optstring` were prefixed by '`-`'.
105 |  *
106 |  * @param ps pointer to state
107 |  * @param argc number of elements in `argv`
108 |  * @param argv array of pointers to command-line arguments
109 |  * @param optstring string containing option characters
110 |  * @return option value on match, `1` on nonoption element, `-1` on end of
111 |  * arguments, '`?`' on unmatched option, '`?`' or '`:`' on option argument
112 |  * error
113 |  */
114 | int
115 | parg_getopt(struct parg_state *ps, int argc, char *const argv[],
116 |             const char *optstring);
117 | 
118 | /**
119 |  * Parse next long or short option in `argv`.
120 |  *
121 |  * Elements in `argv` that contain a long option start with two dashes
122 |  * followed by a string, and optionally an equal sign and an option argument.
123 |  * Examples are '`--help`' and '`--size=5`'.
124 |  *
125 |  * If no exact match is found, an unambiguous prefix of a long option will
126 |  * match. For example, if '`foo`' and '`foobar`' are valid long options, then
127 |  * '`--fo`' is ambiguous and will not match, '`--foo`' matches exactly, and
128 |  * '`--foob`' is an unambiguous prefix and will match.
129 |  *
130 |  * If a long option match is found, and `flag` is `NULL`, `val` is returned.
131 |  *
132 |  * If a long option match is found, and `flag` is not `NULL`, `val` is stored
133 |  * in the variable `flag` points to, and `0` is returned.
134 |  *
135 |  * If a long option match is found, but is missing a required option argument,
136 |  * or has an option argument even though it takes none, `optopt` is set to
137 |  * `val` if `flag` is `NULL`, and `0` otherwise. If the first character in
138 |  * `optstring` is '`:`', then '`:`' is returned, otherwise '`?`' is returned.
139 |  *
140 |  * If `longindex` is not `NULL`, the index of the entry in `longopts` that
141 |  * matched is stored there.
142 |  *
143 |  * If no long option in `longopts` matches a long option, '`?`' is returned.
144 |  *
145 |  * Handling of nonoptions and short options is like `parg_getopt()`.
146 |  *
147 |  * If no short options are required, an empty string, `""`, should be passed
148 |  * as `optstring`.
149 |  *
150 |  * Works similarly to `getopt_long`, if `optstring` were prefixed by '`-`'.
151 |  *
152 |  * @see parg_getopt
153 |  *
154 |  * @param ps pointer to state
155 |  * @param argc number of elements in `argv`
156 |  * @param argv array of pointers to command-line arguments
157 |  * @param optstring string containing option characters
158 |  * @param longopts array of `parg_option` structures
159 |  * @param longindex pointer to variable to store index of matching option in
160 |  * @return option value on match, `0` for flag option, `1` on nonoption
161 |  * element, `-1` on end of arguments, '`?`' on unmatched or ambiguous option,
162 |  * '`?`' or '`:`' on option argument error
163 |  */
164 | int
165 | parg_getopt_long(struct parg_state *ps, int argc, char *const argv[],
166 |                  const char *optstring,
167 |                  const struct parg_option *longopts, int *longindex);
168 | 
169 | /**
170 |  * Reorder elements of `argv` so options appear first.
171 |  *
172 |  * If there are no long options, `longopts` may be `NULL`.
173 |  *
174 |  * The return value can be used as `argc` parameter for `parg_getopt()` and
175 |  * `parg_getopt_long()`.
176 |  *
177 |  * @param argc number of elements in `argv`
178 |  * @param argv array of pointers to command-line arguments
179 |  * @param optstring string containing option characters
180 |  * @param longopts array of `parg_option` structures
181 |  * @return index of first nonoption in `argv` on success, `-1` on error
182 |  */
183 | int
184 | parg_reorder(int argc, char *argv[],
185 |              const char *optstring,
186 |              const struct parg_option *longopts);
187 | 
188 | #ifdef __cplusplus
189 | } /* extern "C" */
190 | #endif
191 | 
192 | #endif /* PARG_H_INCLUDED */
193 | 


--------------------------------------------------------------------------------