├── .github
    └── workflows
    │   └── meson-ci-workflow.yaml
├── LICENSE
├── README.md
├── bcrush.c
├── crush.c
├── crush.h
├── crush_btparse.h
├── crush_depack.c
├── crush_depack_file.c
├── crush_internal.h
├── crush_leparse.h
├── crush_ssparse.h
├── meson.build
├── parg.c
└── parg.h


/.github/workflows/meson-ci-workflow.yaml:
--------------------------------------------------------------------------------
 1 | name: Meson CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   windows:
 7 |     name: Windows ${{ matrix.config.name }}
 8 |     runs-on: windows-latest
 9 | 
10 |     strategy:
11 |       matrix:
12 |         config:
13 |           - name: MSVC x86
14 |             arch: x86
15 | 
16 |           - name: MSVC x64
17 |             arch: amd64
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - uses: actions/setup-python@v4
23 |         with:
24 |           python-version: '3.x'
25 | 
26 |       - name: Install Meson
27 |         run: pip install meson ninja
28 | 
29 |       - name: Configure
30 |         run: meson setup --vsenv build
31 | 
32 |       - name: Build
33 |         run: meson compile -C build -v
34 | 
35 |   linux:
36 |     name: Linux ${{ matrix.config.name }}
37 |     runs-on: ubuntu-latest
38 |     env:
39 |       CC: ${{ matrix.config.cc }}
40 |       CXX: ${{ matrix.config.cxx }}
41 | 
42 |     strategy:
43 |       matrix:
44 |         config:
45 |           - name: GCC
46 |             cc: gcc
47 |             cxx: g++
48 | 
49 |           - name: Clang
50 |             cc: clang
51 |             cxx: clang++
52 | 
53 |     steps:
54 |       - uses: actions/checkout@v4
55 | 
56 |       - uses: actions/setup-python@v4
57 |         with:
58 |           python-version: '3.x'
59 | 
60 |       - name: Install Meson
61 |         run: pip install meson ninja
62 | 
63 |       - name: Configure
64 |         run: meson setup build
65 | 
66 |       - name: Build
67 |         run: meson compile -C build -v
68 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The zlib License (Zlib)
 2 | 
 3 | Copyright (c) 2018-2020 Joergen Ibsen
 4 | 
 5 | This software is provided 'as-is', without any express or implied
 6 | warranty. In no event will the authors be held liable for any damages
 7 | arising from the use of this software.
 8 | 
 9 | Permission is granted to anyone to use this software for any purpose,
10 | including commercial applications, and to alter it and redistribute it
11 | freely, subject to the following restrictions:
12 | 
13 |   1. The origin of this software must not be misrepresented; you must
14 |      not claim that you wrote the original software. If you use this
15 |      software in a product, an acknowledgment in the product
16 |      documentation would be appreciated but is not required.
17 | 
18 |   2. Altered source versions must be plainly marked as such, and must
19 |      not be misrepresented as being the original software.
20 | 
21 |   3. This notice may not be removed or altered from any source
22 |      distribution.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | bcrush
 3 | ======
 4 | 
 5 | [![Meson CI](https://github.com/jibsen/bcrush/workflows/Meson%20CI/badge.svg)](https://github.com/jibsen/bcrush/actions)
 6 | 
 7 | About
 8 | -----
 9 | 
10 | This is an example using some of the compression algorithms from [BriefLZ][]
11 | to produce output in the format of [CRUSH][] by Ilya Muravyov.
12 | 
13 | **Please note:** this is just a quick experiment to see how it would work, it
14 | is not production quality, and has not been properly tested.
15 | 
16 | [BriefLZ]: https://github.com/jibsen/brieflz
17 | [CRUSH]: https://sourceforge.net/projects/crush/
18 | 
19 | 
20 | Benchmark
21 | ---------
22 | 
23 | Here are some results on the [Silesia compression corpus][silesia]:
24 | 
25 | | File    |   Original | `bcrush --optimal` | `crush cx` | `crushx -9` |
26 | | :------ | ---------: | -----------------: | ---------: | ----------: |
27 | | dickens | 10.192.446 |          3.148.963 |  3.350.093 |   3.343.930 |
28 | | mozilla | 51.220.480 |         18.037.611 | 18.760.573 |  18.281.301 |
29 | | mr      |  9.970.564 |          3.367.533 |  3.532.785 |   3.428.968 |
30 | | nci     | 33.553.445 |          2.407.286 |  2.624.037 |   2.750.658 |
31 | | ooffice |  6.152.192 |          2.832.224 |  2.958.518 |   2.871.884 |
32 | | osdb    | 10.085.684 |          3.424.687 |  3.545.632 |   3.457.335 |
33 | | reymont |  6.627.202 |          1.523.547 |  1.644.701 |   1.610.306 |
34 | | samba   | 21.606.400 |          4.720.964 |  4.912.141 |   4.911.613 |
35 | | sao     |  7.251.944 |          5.344.713 |  5.472.035 |   5.368.466 |
36 | | webster | 41.458.703 |          9.766.251 | 10.430.228 |  10.322.130 |
37 | | xml     |  5.345.280 |            535.316 |    563.744 |     561.118 |
38 | | x-ray   |  8.474.240 |          5.717.405 |  5.958.603 |   5.747.141 |
39 | 
40 | Where crush is the original CRUSH v1.00, and crushx is an implementation of
41 | crush with optimal parsing [posted][crushx] on Encode's Forum.
42 | 
43 | [silesia]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
44 | [crushx]: https://encode.su/threads/2578-crush-v1-1
45 | 
46 | 
47 | Usage
48 | -----
49 | 
50 | bcrush uses [Meson][] to generate build systems. To create one for the tools on
51 | your platform, and build bcrush, use something along the lines of:
52 | 
53 | ~~~sh
54 | mkdir build
55 | cd build
56 | meson ..
57 | ninja
58 | ~~~
59 | 
60 | You can also simply compile and link the source files.
61 | 
62 | bcrush includes the leparse and btparse algorithms from BriefLZ, which gives
63 | compression levels `-5` to `-9` and the **very** slow `--optimal`.
64 | 
65 | [Meson]: https://mesonbuild.com/
66 | 
67 | 
68 | Notes
69 | -----
70 | 
71 |   - The CRUSH format does not store the size of the compressed block, so I
72 |     copied the way the CRUSH depacker reads one byte at a time from the file
73 |     to avoid issues with reading the next block into memory.
74 |   - bcrush only hashes 3 bytes to find matches, which makes it slow on files
75 |     with many small matches. It might benefit from using two hash tables like
76 |     CRUSH.
77 | 
78 | 
79 | License
80 | -------
81 | 
82 | This projected is licensed under the [zlib License](LICENSE) (Zlib).
83 | 


--------------------------------------------------------------------------------
/bcrush.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 |  *
  4 |  * Copyright (c) 2018-2020 Joergen Ibsen
  5 |  *
  6 |  * This software is provided 'as-is', without any express or implied
  7 |  * warranty. In no event will the authors be held liable for any damages
  8 |  * arising from the use of this software.
  9 |  *
 10 |  * Permission is granted to anyone to use this software for any purpose,
 11 |  * including commercial applications, and to alter it and redistribute it
 12 |  * freely, subject to the following restrictions:
 13 |  *
 14 |  *   1. The origin of this software must not be misrepresented; you must
 15 |  *      not claim that you wrote the original software. If you use this
 16 |  *      software in a product, an acknowledgment in the product
 17 |  *      documentation would be appreciated but is not required.
 18 |  *
 19 |  *   2. Altered source versions must be plainly marked as such, and must
 20 |  *      not be misrepresented as being the original software.
 21 |  *
 22 |  *   3. This notice may not be removed or altered from any source
 23 |  *      distribution.
 24 |  */
 25 | 
 26 | #ifdef _MSC_VER
 27 | #  define _CRT_SECURE_NO_WARNINGS
 28 | #  define _CRT_DISABLE_PERFCRIT_LOCKS
 29 | #  define ftello _ftelli64
 30 | #else
 31 | #  define _FILE_OFFSET_BITS 64
 32 | #endif
 33 | 
 34 | #ifdef __MINGW32__
 35 | #  define __USE_MINGW_ANSI_STDIO 1
 36 | #endif
 37 | 
 38 | #include <errno.h>
 39 | #include <limits.h>
 40 | #include <stdarg.h>
 41 | #include <stddef.h>
 42 | #include <stdio.h>
 43 | #include <stdlib.h>
 44 | #include <time.h>
 45 | 
 46 | #include "crush.h"
 47 | #include "parg.h"
 48 | 
 49 | /*
 50 |  * The default block size used to process data.
 51 |  */
 52 | #ifndef BLOCK_SIZE
 53 | #  define BLOCK_SIZE (64 * 1024 * 1024UL)
 54 | #endif
 55 | 
 56 | /*
 57 |  * Unsigned char type.
 58 |  */
 59 | typedef unsigned char byte;
 60 | 
 61 | /*
 62 |  * Get the low-order 8 bits of a value.
 63 |  */
 64 | #if CHAR_BIT == 8
 65 | #  define octet(v) ((byte) (v))
 66 | #else
 67 | #  define octet(v) ((v) & 0x00FF)
 68 | #endif
 69 | 
 70 | /*
 71 |  * Store a 32-bit unsigned value in little-endian order.
 72 |  */
 73 | static void
 74 | write_le32(byte *p, unsigned long val)
 75 | {
 76 | 	p[0] = octet(val);
 77 | 	p[1] = octet(val >> 8);
 78 | 	p[2] = octet(val >> 16);
 79 | 	p[3] = octet(val >> 24);
 80 | }
 81 | 
 82 | /*
 83 |  * Read a 32-bit unsigned value in little-endian order.
 84 |  */
 85 | static unsigned long
 86 | read_le32(const byte *p)
 87 | {
 88 | 	return ((unsigned long) octet(p[0]))
 89 | 	     | ((unsigned long) octet(p[1]) << 8)
 90 | 	     | ((unsigned long) octet(p[2]) << 16)
 91 | 	     | ((unsigned long) octet(p[3]) << 24);
 92 | }
 93 | 
 94 | static unsigned int
 95 | ratio(long long x, long long y)
 96 | {
 97 | 	if (x <= LLONG_MAX / 100) {
 98 | 		x *= 100;
 99 | 	}
100 | 	else {
101 | 		y /= 100;
102 | 	}
103 | 
104 | 	if (y == 0) {
105 | 		y = 1;
106 | 	}
107 | 
108 | 	return (unsigned int) (x / y);
109 | }
110 | 
111 | static void
112 | printf_error(const char *fmt, ...)
113 | {
114 | 	va_list arg;
115 | 
116 | 	fputs("bcrush: ", stderr);
117 | 
118 | 	va_start(arg, fmt);
119 | 	vfprintf(stderr, fmt, arg);
120 | 	va_end(arg);
121 | 
122 | 	fputs("\n", stderr);
123 | }
124 | 
125 | static void
126 | printf_usage(const char *fmt, ...)
127 | {
128 | 	va_list arg;
129 | 
130 | 	fputs("bcrush: ", stderr);
131 | 
132 | 	va_start(arg, fmt);
133 | 	vfprintf(stderr, fmt, arg);
134 | 	va_end(arg);
135 | 
136 | 	fputs("\n"
137 | 	      "usage: bcrush [-56789 | --optimal] [-v] INFILE OUTFILE\n"
138 | 	      "       bcrush -d [-v] INFILE OUTFILE\n"
139 | 	      "       bcrush -V | --version\n"
140 | 	      "       bcrush -h | --help\n", stderr);
141 | }
142 | 
143 | static int
144 | compress_file(const char *oldname, const char *packedname, int be_verbose,
145 |               int level)
146 | {
147 | 	byte header[4];
148 | 	FILE *oldfile = NULL;
149 | 	FILE *packedfile = NULL;
150 | 	byte *data = NULL;
151 | 	byte *packed = NULL;
152 | 	byte *workmem = NULL;
153 | 	long long insize = 0, outsize = 0;
154 | 	static const char rotator[] = "-\\|/";
155 | 	unsigned int counter = 0;
156 | 	size_t n_read;
157 | 	clock_t clocks;
158 | 	int res = 1;
159 | 
160 | 	/* Allocate memory */
161 | 	if ((data = (byte *) malloc(BLOCK_SIZE)) == NULL
162 | 	 || (packed = (byte *) malloc(crush_max_packed_size(BLOCK_SIZE))) == NULL
163 | 	 || (workmem = (byte *) malloc(crush_workmem_size_level(BLOCK_SIZE, level))) == NULL) {
164 | 		printf_error("not enough memory");
165 | 		goto out;
166 | 	}
167 | 
168 | 	/* Open input file */
169 | 	if ((oldfile = fopen(oldname, "rb")) == NULL) {
170 | 		printf_usage("unable to open input file '%s'", oldname);
171 | 		goto out;
172 | 	}
173 | 
174 | 	/* Create output file */
175 | 	if ((packedfile = fopen(packedname, "wb")) == NULL) {
176 | 		printf_usage("unable to open output file '%s'", packedname);
177 | 		goto out;
178 | 	}
179 | 
180 | 	clocks = clock();
181 | 
182 | 	/* While we are able to read data from input file .. */
183 | 	while ((n_read = fread(data, 1, BLOCK_SIZE, oldfile)) > 0) {
184 | 		size_t packedsize;
185 | 
186 | 		/* Show a little progress indicator */
187 | 		if (be_verbose) {
188 | 			fprintf(stderr, "%c\r", rotator[counter]);
189 | 			counter = (counter + 1) & 0x03;
190 | 		}
191 | 
192 | 		/* Compress data block */
193 | 		packedsize = crush_pack_level(data, packed, (unsigned long) n_read,
194 | 		                              workmem, level);
195 | 
196 | 		/* Check for compression error */
197 | 		if (packedsize == 0) {
198 | 			printf_error("an error occured while compressing");
199 | 			goto out;
200 | 		}
201 | 
202 | 		/* Put block-specific values into header */
203 | 		write_le32(header, (unsigned long) n_read);
204 | 
205 | 		/* Write header and compressed data */
206 | 		fwrite(header, 1, sizeof(header), packedfile);
207 | 		fwrite(packed, 1, packedsize, packedfile);
208 | 
209 | 		/* Sum input and output size */
210 | 		insize += n_read;
211 | 		outsize += packedsize + sizeof(header);
212 | 	}
213 | 
214 | 	clocks = clock() - clocks;
215 | 
216 | 	/* Show result */
217 | 	if (be_verbose) {
218 | 		fprintf(stderr, "in %lld out %lld ratio %u%% time %.2f\n",
219 | 		        insize, outsize, ratio(outsize, insize),
220 | 		        (double) clocks / (double) CLOCKS_PER_SEC);
221 | 	}
222 | 
223 | 	res = 0;
224 | 
225 | out:
226 | 	/* Close files */
227 | 	if (packedfile != NULL) {
228 | 		fclose(packedfile);
229 | 	}
230 | 	if (oldfile != NULL) {
231 | 		fclose(oldfile);
232 | 	}
233 | 
234 | 	/* Free memory */
235 | 	if (workmem != NULL) {
236 | 		free(workmem);
237 | 	}
238 | 	if (packed != NULL) {
239 | 		free(packed);
240 | 	}
241 | 	if (data != NULL) {
242 | 		free(data);
243 | 	}
244 | 
245 | 	return res;
246 | }
247 | 
248 | static int
249 | decompress_file(const char *packedname, const char *newname, int be_verbose)
250 | {
251 | 	byte header[4];
252 | 	FILE *newfile = NULL;
253 | 	FILE *packedfile = NULL;
254 | 	byte *data = NULL;
255 | 	long long insize = 0, outsize = 0;
256 | 	static const char rotator[] = "-\\|/";
257 | 	unsigned int counter = 0;
258 | 	clock_t clocks;
259 | 	int res = 1;
260 | 
261 | 	/* Allocate memory */
262 | 	if ((data = (byte *) malloc(BLOCK_SIZE)) == NULL) {
263 | 		printf_error("not enough memory");
264 | 		goto out;
265 | 	}
266 | 
267 | 	/* Open input file */
268 | 	if ((packedfile = fopen(packedname, "rb")) == NULL) {
269 | 		printf_usage("unable to open input file '%s'", packedname);
270 | 		goto out;
271 | 	}
272 | 
273 | 	/* Create output file */
274 | 	if ((newfile = fopen(newname, "wb")) == NULL) {
275 | 		printf_usage("unable to open output file '%s'", newname);
276 | 		goto out;
277 | 	}
278 | 
279 | 	clocks = clock();
280 | 
281 | 	/* While we are able to read a header from input file .. */
282 | 	while (fread(header, 1, sizeof(header), packedfile) == sizeof(header)) {
283 | 		size_t hdr_depackedsize, depackedsize;
284 | 
285 | 		/* Show a little progress indicator */
286 | 		if (be_verbose) {
287 | 			fprintf(stderr, "%c\r", rotator[counter]);
288 | 			counter = (counter + 1) & 0x03;
289 | 		}
290 | 
291 | 		/* Get original size from header */
292 | 		hdr_depackedsize = (size_t) read_le32(header);
293 | 
294 | 		/* Check blocksize is sufficient */
295 | 		if (hdr_depackedsize > BLOCK_SIZE) {
296 | 			printf_usage("compressed file requires block size"
297 | 				     " >= %lu bytes", hdr_depackedsize);
298 | 			goto out;
299 | 		}
300 | 
301 | 		/* Decompress data */
302 | 		depackedsize = crush_depack_file(packedfile, data,
303 | 		                                 (unsigned long) hdr_depackedsize);
304 | 
305 | 		/* Check for decompression error */
306 | 		if (depackedsize != hdr_depackedsize) {
307 | 			printf_error("an error occured while decompressing");
308 | 			goto out;
309 | 		}
310 | 
311 | 		/* Write decompressed data */
312 | 		fwrite(data, 1, depackedsize, newfile);
313 | 	}
314 | 
315 | 	clocks = clock() - clocks;
316 | 
317 | 	insize = ftello(packedfile);
318 | 	outsize = ftello(newfile);
319 | 
320 | 	/* Show result */
321 | 	if (be_verbose) {
322 | 		fprintf(stderr, "in %lld out %lld ratio %u%% time %.2f\n",
323 | 		        insize, outsize, ratio(insize, outsize),
324 | 		        (double) clocks / (double) CLOCKS_PER_SEC);
325 | 	}
326 | 
327 | 	res = 0;
328 | 
329 | out:
330 | 	/* Close files */
331 | 	if (packedfile != NULL) {
332 | 		fclose(packedfile);
333 | 	}
334 | 	if (newfile != NULL) {
335 | 		fclose(newfile);
336 | 	}
337 | 
338 | 	/* Free memory */
339 | 	if (data != NULL) {
340 | 		free(data);
341 | 	}
342 | 
343 | 	return res;
344 | }
345 | 
346 | static void
347 | print_syntax(void)
348 | {
349 | 	fputs("usage: bcrush [options] INFILE OUTFILE\n"
350 | 	      "\n"
351 | 	      "options:\n"
352 | 	      "  -5                     compress faster (default)\n"
353 | 	      "  -9                     compress better\n"
354 | 	      "      --optimal          optimal but very slow compression\n"
355 | 	      "  -d, --decompress       decompress\n"
356 | 	      "  -h, --help             print this help and exit\n"
357 | 	      "  -v, --verbose          verbose mode\n"
358 | 	      "  -V, --version          print version and exit\n"
359 | 	      "\n"
360 | 	      "PLEASE NOTE: This is an experiment, use at your own risk.\n", stdout);
361 | }
362 | 
363 | static void
364 | print_version(void)
365 | {
366 | 	fputs("bcrush " CRUSH_VER_STRING "\n"
367 | 	      "\n"
368 | 	      "Copyright (c) 2018-2020 Joergen Ibsen\n"
369 | 	      "\n"
370 | 	      "Licensed under the zlib license (Zlib).\n"
371 | 	      "There is NO WARRANTY, to the extent permitted by law.\n", stdout);
372 | }
373 | 
374 | int
375 | main(int argc, char *argv[])
376 | {
377 | 	struct parg_state ps;
378 | 	const char *infile = NULL;
379 | 	const char *outfile = NULL;
380 | 	int flag_decompress = 0;
381 | 	int flag_verbose = 0;
382 | 	int level = 5;
383 | 	int c;
384 | 
385 | 	const struct parg_option long_options[] = {
386 | 		{ "decompress", PARG_NOARG, NULL, 'd' },
387 | 		{ "help", PARG_NOARG, NULL, 'h' },
388 | 		{ "optimal", PARG_NOARG, NULL, 'x' },
389 | 		{ "verbose", PARG_NOARG, NULL, 'v' },
390 | 		{ "version", PARG_NOARG, NULL, 'V' },
391 | 		{ 0, 0, 0, 0 }
392 | 	};
393 | 
394 | 	parg_init(&ps);
395 | 
396 | 	while ((c = parg_getopt_long(&ps, argc, argv, "56789dhvVx", long_options, NULL)) != -1) {
397 | 		switch (c) {
398 | 		case 1:
399 | 			if (infile == NULL) {
400 | 				infile = ps.optarg;
401 | 			}
402 | 			else if (outfile == NULL) {
403 | 				outfile = ps.optarg;
404 | 			}
405 | 			else {
406 | 				printf_usage("too many arguments");
407 | 				return EXIT_FAILURE;
408 | 			}
409 | 			break;
410 | 		case '5':
411 | 		case '6':
412 | 		case '7':
413 | 		case '8':
414 | 		case '9':
415 | 			level = c - '0';
416 | 			break;
417 | 		case 'x':
418 | 			level = 10;
419 | 			break;
420 | 		case 'd':
421 | 			flag_decompress = 1;
422 | 			break;
423 | 		case 'h':
424 | 			print_syntax();
425 | 			return EXIT_SUCCESS;
426 | 			break;
427 | 		case 'v':
428 | 			flag_verbose = 1;
429 | 			break;
430 | 		case 'V':
431 | 			print_version();
432 | 			return EXIT_SUCCESS;
433 | 			break;
434 | 		default:
435 | 			printf_usage("unknown option '%s'", argv[ps.optind - 1]);
436 | 			return EXIT_FAILURE;
437 | 			break;
438 | 		}
439 | 	}
440 | 
441 | 	if (outfile == NULL) {
442 | 		printf_usage("too few arguments");
443 | 		return EXIT_FAILURE;
444 | 	}
445 | 
446 | 	if (flag_decompress) {
447 | 		return decompress_file(infile, outfile, flag_verbose);
448 | 	}
449 | 	else {
450 | 		return compress_file(infile, outfile, flag_verbose, level);
451 | 	}
452 | 
453 | 	return EXIT_SUCCESS;
454 | }
455 | 


--------------------------------------------------------------------------------
/crush.c:
--------------------------------------------------------------------------------
  1 | //
  2 | // bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 | //
  4 | // C packer
  5 | //
  6 | // Copyright (c) 2018-2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #include "crush.h"
 29 | #include "crush_internal.h"
 30 | 
 31 | #include <assert.h>
 32 | #include <limits.h>
 33 | #include <stdint.h>
 34 | 
 35 | #if _MSC_VER >= 1400
 36 | #  include <intrin.h>
 37 | #  define CRUSH_BUILTIN_MSVC
 38 | #elif defined(__clang__) && defined(__has_builtin)
 39 | #  if __has_builtin(__builtin_clz)
 40 | #    define CRUSH_BUILTIN_GCC
 41 | #  endif
 42 | #elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
 43 | #  define CRUSH_BUILTIN_GCC
 44 | #endif
 45 | 
 46 | // Number of bits of hash to use for lookup.
 47 | //
 48 | // The size of the lookup table (and thus workmem) depends on this.
 49 | //
 50 | // Values between 10 and 18 work well. Lower values generally make compression
 51 | // speed faster but ratio worse. The default value 17 (128k entries) is a
 52 | // compromise.
 53 | //
 54 | #ifndef CRUSH_HASH_BITS
 55 | #  define CRUSH_HASH_BITS 17
 56 | #endif
 57 | 
 58 | #define LOOKUP_SIZE (1UL << CRUSH_HASH_BITS)
 59 | 
 60 | #define WORKMEM_SIZE (LOOKUP_SIZE * sizeof(uint32_t))
 61 | 
 62 | #define NO_MATCH_POS ((uint32_t) -1)
 63 | 
 64 | struct lsb_bitwriter {
 65 | 	unsigned char *next_out;
 66 | 	uint32_t tag;
 67 | 	int msb;
 68 | };
 69 | 
 70 | static void
 71 | lbw_init(struct lsb_bitwriter *lbw, unsigned char *dst)
 72 | {
 73 | 	lbw->next_out = dst;
 74 | 	lbw->tag = 0;
 75 | 	lbw->msb = 0;
 76 | }
 77 | 
 78 | static unsigned char*
 79 | lbw_finalize(struct lsb_bitwriter *lbw)
 80 | {
 81 | 	// Write bytes until no bits left in tag
 82 | 	while (lbw->msb > 0) {
 83 | 		*lbw->next_out++ = lbw->tag;
 84 | 		lbw->tag >>= 8;
 85 | 		lbw->msb -= 8;
 86 | 	}
 87 | 
 88 | 	return lbw->next_out;
 89 | }
 90 | 
 91 | static void
 92 | lbw_flush(struct lsb_bitwriter *lbw, int num) {
 93 | 	assert(num >= 0 && num <= 32);
 94 | 
 95 | 	// Write bytes until at least num bits free
 96 | 	while (lbw->msb > 32 - num) {
 97 | 		*lbw->next_out++ = lbw->tag;
 98 | 		lbw->tag >>= 8;
 99 | 		lbw->msb -= 8;
100 | 	}
101 | 
102 | 	assert(lbw->msb >= 0);
103 | }
104 | 
105 | static void
106 | lbw_putbits_no_flush(struct lsb_bitwriter *lbw, uint32_t bits, int num) {
107 | 	assert(num >= 0 && num <= 32 - lbw->msb);
108 | 	assert((bits & (~0ULL << num)) == 0);
109 | 
110 | 	// Add bits to tag
111 | 	lbw->tag |= bits << lbw->msb;
112 | 	lbw->msb += num;
113 | }
114 | 
115 | static void
116 | lbw_putbits(struct lsb_bitwriter *lbw, uint32_t bits, int num) {
117 | 	lbw_flush(lbw, num);
118 | 	lbw_putbits_no_flush(lbw, bits, num);
119 | }
120 | 
121 | static int
122 | crush_log2(unsigned long n)
123 | {
124 | 	assert(n > 0);
125 | 
126 | #if defined(CRUSH_BUILTIN_MSVC)
127 | 	unsigned long msb_pos;
128 | 	_BitScanReverse(&msb_pos, n);
129 | 	return (int) msb_pos;
130 | #elif defined(CRUSH_BUILTIN_GCC)
131 | 	return (int) sizeof(n) * CHAR_BIT - 1 - __builtin_clzl(n);
132 | #else
133 | 	int bits = 0;
134 | 
135 | 	while (n >>= 1) {
136 | 		++bits;
137 | 	}
138 | 
139 | 	return bits;
140 | #endif
141 | }
142 | 
143 | // Hash three bytes starting a p.
144 | //
145 | // This is Fibonacci hashing, also known as Knuth's multiplicative hash. The
146 | // constant is a prime close to 2^32/phi.
147 | //
148 | static unsigned long
149 | crush_hash3_bits(const unsigned char *p, int bits)
150 | {
151 | 	assert(bits > 0 && bits <= 32);
152 | 
153 | 	uint32_t val = (uint32_t) p[0]
154 | 	             | ((uint32_t) p[1] << 8)
155 | 	             | ((uint32_t) p[2] << 16);
156 | 
157 | 	return (val * UINT32_C(2654435761)) >> (32 - bits);
158 | }
159 | 
160 | static unsigned long
161 | crush_match_cost(unsigned long pos, unsigned long len)
162 | {
163 | 	unsigned long cost = 1;
164 | 
165 | 	const unsigned long l = len - MIN_MATCH;
166 | 
167 | 	if (l < A) {
168 | 		cost += 1 + A_BITS;
169 | 	}
170 | 	else if (l < B) {
171 | 		cost += 2 + B_BITS;
172 | 	}
173 | 	else if (l < C) {
174 | 		cost += 3 + C_BITS;
175 | 	}
176 | 	else if (l < D) {
177 | 		cost += 4 + D_BITS;
178 | 	}
179 | 	else if (l < E) {
180 | 		cost += 5 + E_BITS;
181 | 	}
182 | 	else {
183 | 		cost += 5 + F_BITS;
184 | 	}
185 | 
186 | 	cost += SLOT_BITS;
187 | 
188 | 	if (pos >= (2UL << (W_BITS - NUM_SLOTS))) {
189 | 		cost += crush_log2(pos);
190 | 	}
191 | 	else {
192 | 		cost += W_BITS - (NUM_SLOTS - 1);
193 | 	}
194 | 
195 | 	return cost;
196 | }
197 | 
198 | unsigned long
199 | crush_max_packed_size(unsigned long src_size)
200 | {
201 | 	return src_size + src_size / 8 + 64;
202 | }
203 | 
204 | // Include compression algorithms used by crush_pack_level
205 | #include "crush_btparse.h"
206 | #include "crush_leparse.h"
207 | 
208 | size_t
209 | crush_workmem_size_level(size_t src_size, int level)
210 | {
211 | 	switch (level) {
212 | 	case 5:
213 | 	case 6:
214 | 	case 7:
215 | 		return crush_leparse_workmem_size(src_size);
216 | 	case 8:
217 | 	case 9:
218 | 	case 10:
219 | 		return crush_btparse_workmem_size(src_size);
220 | 	default:
221 | 		return (size_t) -1;
222 | 	}
223 | }
224 | 
225 | unsigned long
226 | crush_pack_level(const void *src, void *dst, unsigned long src_size,
227 |                  void *workmem, int level)
228 | {
229 | 	switch (level) {
230 | 	case 5:
231 | 		return crush_pack_leparse(src, dst, src_size, workmem, 1, 16);
232 | 	case 6:
233 | 		return crush_pack_leparse(src, dst, src_size, workmem, 8, 32);
234 | 	case 7:
235 | 		return crush_pack_leparse(src, dst, src_size, workmem, 64, 64);
236 | 	case 8:
237 | 		return crush_pack_btparse(src, dst, src_size, workmem, 16, 96);
238 | 	case 9:
239 | 		return crush_pack_btparse(src, dst, src_size, workmem, 32, 224);
240 | 	case 10:
241 | 		return crush_pack_btparse(src, dst, src_size, workmem, ULONG_MAX, ULONG_MAX);
242 | 	default:
243 | 		return CRUSH_ERROR;
244 | 	}
245 | }
246 | 
247 | // clang -g -O1 -fsanitize=fuzzer,address -DCRUSH_FUZZING crush.c crush_depack.c
248 | #if defined(CRUSH_FUZZING)
249 | #include <limits.h>
250 | #include <stddef.h>
251 | #include <stdint.h>
252 | #include <stdlib.h>
253 | #include <string.h>
254 | 
255 | #ifndef CRUSH_FUZZ_LEVEL
256 | #  define CRUSH_FUZZ_LEVEL 5
257 | #endif
258 | 
259 | extern int
260 | LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
261 | {
262 | 	if (size > 64 * 1024 * 1024UL) { return 0; }
263 | 	void *workmem = malloc(crush_workmem_size_level(size, CRUSH_FUZZ_LEVEL));
264 | 	void *packed = malloc(crush_max_packed_size(size));
265 | 	void *depacked = malloc(size);
266 | 	if (!workmem || !packed || !depacked) { abort(); }
267 | 	unsigned long packed_size = crush_pack_level(data, packed, size, workmem, CRUSH_FUZZ_LEVEL);
268 | 	crush_depack(packed, depacked, size);
269 | 	if (memcmp(data, depacked, size)) { abort(); }
270 | 	free(depacked);
271 | 	free(packed);
272 | 	free(workmem);
273 | 	return 0;
274 | }
275 | #endif
276 | 


--------------------------------------------------------------------------------
/crush.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 |  *
  4 |  * C/C++ header file
  5 |  *
  6 |  * Copyright (c) 2018-2020 Joergen Ibsen
  7 |  *
  8 |  * This software is provided 'as-is', without any express or implied
  9 |  * warranty. In no event will the authors be held liable for any damages
 10 |  * arising from the use of this software.
 11 |  *
 12 |  * Permission is granted to anyone to use this software for any purpose,
 13 |  * including commercial applications, and to alter it and redistribute it
 14 |  * freely, subject to the following restrictions:
 15 |  *
 16 |  *   1. The origin of this software must not be misrepresented; you must
 17 |  *      not claim that you wrote the original software. If you use this
 18 |  *      software in a product, an acknowledgment in the product
 19 |  *      documentation would be appreciated but is not required.
 20 |  *
 21 |  *   2. Altered source versions must be plainly marked as such, and must
 22 |  *      not be misrepresented as being the original software.
 23 |  *
 24 |  *   3. This notice may not be removed or altered from any source
 25 |  *      distribution.
 26 |  */
 27 | 
 28 | #ifndef CRUSH_H_INCLUDED
 29 | #define CRUSH_H_INCLUDED
 30 | 
 31 | #include <stddef.h>
 32 | #include <stdio.h>
 33 | 
 34 | #ifdef __cplusplus
 35 | extern "C" {
 36 | #endif
 37 | 
 38 | #define CRUSH_VER_MAJOR 0        /**< Major version number */
 39 | #define CRUSH_VER_MINOR 2        /**< Minor version number */
 40 | #define CRUSH_VER_PATCH 1        /**< Patch version number */
 41 | #define CRUSH_VER_STRING "0.2.1" /**< Version number as a string */
 42 | 
 43 | #ifdef CRUSH_DLL
 44 | #  if defined(_WIN32) || defined(__CYGWIN__)
 45 | #    ifdef CRUSH_DLL_EXPORTS
 46 | #      define CRUSH_API __declspec(dllexport)
 47 | #    else
 48 | #      define CRUSH_API __declspec(dllimport)
 49 | #    endif
 50 | #    define CRUSH_LOCAL
 51 | #  else
 52 | #    if __GNUC__ >= 4
 53 | #      define CRUSH_API __attribute__ ((visibility ("default")))
 54 | #      define CRUSH_LOCAL __attribute__ ((visibility ("hidden")))
 55 | #    else
 56 | #      define CRUSH_API
 57 | #      define CRUSH_LOCAL
 58 | #    endif
 59 | #  endif
 60 | #else
 61 | #  define CRUSH_API
 62 | #  define CRUSH_LOCAL
 63 | #endif
 64 | 
 65 | /**
 66 |  * Return value on error.
 67 |  */
 68 | #ifndef CRUSH_ERROR
 69 | #  define CRUSH_ERROR ((unsigned long) (-1))
 70 | #endif
 71 | 
 72 | /**
 73 |  * Get bound on compressed data size.
 74 |  *
 75 |  * @see crush_pack_level
 76 |  *
 77 |  * @param src_size number of bytes to compress
 78 |  * @return maximum size of compressed data
 79 |  */
 80 | CRUSH_API unsigned long
 81 | crush_max_packed_size(unsigned long src_size);
 82 | 
 83 | /**
 84 |  * Get required size of `workmem` buffer.
 85 |  *
 86 |  * @see crush_pack_level
 87 |  *
 88 |  * @param src_size number of bytes to compress
 89 |  * @param level compression level
 90 |  * @return required size in bytes of `workmem` buffer
 91 |  */
 92 | CRUSH_API size_t
 93 | crush_workmem_size_level(size_t src_size, int level);
 94 | 
 95 | /**
 96 |  * Compress `src_size` bytes of data from `src` to `dst`.
 97 |  *
 98 |  * Compression levels between 5 and 9 offer a trade-off between
 99 |  * time/space and ratio. Level 10 is optimal but very slow.
100 |  *
101 |  * @param src pointer to data
102 |  * @param dst pointer to where to place compressed data
103 |  * @param src_size number of bytes to compress
104 |  * @param workmem pointer to memory for temporary use
105 |  * @param level compression level
106 |  * @return size of compressed data
107 |  */
108 | CRUSH_API unsigned long
109 | crush_pack_level(const void *src, void *dst, unsigned long src_size,
110 |                  void *workmem, int level);
111 | 
112 | /**
113 |  * Decompress `depacked_size` bytes of data from `src` to `dst`.
114 |  *
115 |  * @param src pointer to compressed data
116 |  * @param dst pointer to where to place decompressed data
117 |  * @param depacked_size size of decompressed data
118 |  * @return size of decompressed data
119 |  */
120 | CRUSH_API unsigned long
121 | crush_depack(const void *src, void *dst, unsigned long depacked_size);
122 | 
123 | /**
124 |  * Decompress `depacked_size` bytes of data from `src_file` to `dst`.
125 |  *
126 |  * @param src_file file containing compressed data
127 |  * @param dst pointer to where to place decompressed data
128 |  * @param depacked_size size of decompressed data
129 |  * @return size of decompressed data
130 |  */
131 | CRUSH_API unsigned long
132 | crush_depack_file(FILE *src_file, void *dst, unsigned long depacked_size);
133 | 
134 | #ifdef __cplusplus
135 | } /* extern "C" */
136 | #endif
137 | 
138 | #endif /* CRUSH_H_INCLUDED */
139 | 


--------------------------------------------------------------------------------
/crush_btparse.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 | //
  4 | // Forwards dynamic programming parse using binary trees
  5 | //
  6 | // Copyright (c) 2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #ifndef CRUSH_BTPARSE_H_INCLUDED
 29 | #define CRUSH_BTPARSE_H_INCLUDED
 30 | 
 31 | static size_t
 32 | crush_btparse_workmem_size(size_t src_size)
 33 | {
 34 | 	return (5 * src_size + 3 + LOOKUP_SIZE) * sizeof(uint32_t);
 35 | }
 36 | 
 37 | // Forwards dynamic programming parse using binary trees, checking all
 38 | // possible matches.
 39 | //
 40 | // The match search uses a binary tree for each hash entry, which is updated
 41 | // dynamically as it is searched by re-rooting the tree at the search string.
 42 | //
 43 | // This does not result in balanced trees on all inputs, but often works well
 44 | // in practice, and has the advantage that we get the matches in order from
 45 | // closest and back.
 46 | //
 47 | // A drawback is the memory requirement of 5 * src_size words, since we cannot
 48 | // overlap the arrays in a forwards parse.
 49 | //
 50 | // This match search method is found in LZMA by Igor Pavlov, libdeflate
 51 | // by Eric Biggers, and other libraries.
 52 | //
 53 | static unsigned long
 54 | crush_pack_btparse(const void *src, void *dst, unsigned long src_size, void *workmem,
 55 |                    const unsigned long max_depth, const unsigned long accept_len)
 56 | {
 57 | 	struct lsb_bitwriter lbw;
 58 | 	const unsigned char *const in = (const unsigned char *) src;
 59 | 	const unsigned long last_match_pos = src_size > 3 ? src_size - 3 : 0;
 60 | 
 61 | 	// Check for empty input
 62 | 	if (src_size == 0) {
 63 | 		return 0;
 64 | 	}
 65 | 
 66 | 	lbw_init(&lbw, (unsigned char *) dst);
 67 | 
 68 | 	if (src_size < 4) {
 69 | 		for (unsigned long i = 0; i < src_size; ++i) {
 70 | 			lbw_putbits(&lbw, (uint32_t) in[i] << 1, 9);
 71 | 		}
 72 | 
 73 | 		return (unsigned long) (lbw_finalize(&lbw) - (unsigned char *) dst);
 74 | 	}
 75 | 
 76 | 	uint32_t *const cost = (uint32_t *) workmem;
 77 | 	uint32_t *const mpos = cost + src_size + 1;
 78 | 	uint32_t *const mlen = mpos + src_size + 1;
 79 | 	uint32_t *const nodes = mlen + src_size + 1;
 80 | 	uint32_t *const lookup = nodes + 2 * src_size;
 81 | 
 82 | 	// Initialize lookup
 83 | 	for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
 84 | 		lookup[i] = NO_MATCH_POS;
 85 | 	}
 86 | 
 87 | 	// Initialize to all literals with infinite cost
 88 | 	for (unsigned long i = 0; i <= src_size; ++i) {
 89 | 		cost[i] = UINT32_MAX;
 90 | 		mlen[i] = 1;
 91 | 	}
 92 | 
 93 | 	cost[0] = 0;
 94 | 
 95 | 	// Next position where we are going to check matches
 96 | 	//
 97 | 	// This is used to skip matching while still updating the trees when
 98 | 	// we find a match that is accept_len or longer.
 99 | 	//
100 | 	unsigned long next_match_cur = 0;
101 | 
102 | 	// Phase 1: Find lowest cost path arriving at each position
103 | 	for (unsigned long cur = 0; cur <= last_match_pos; ++cur) {
104 | 		// Check literal
105 | 		if (cost[cur + 1] > cost[cur] + 9) {
106 | 			cost[cur + 1] = cost[cur] + 9;
107 | 			mlen[cur + 1] = 1;
108 | 		}
109 | 
110 | 		if (cur > next_match_cur) {
111 | 			next_match_cur = cur;
112 | 		}
113 | 
114 | 		unsigned long max_len = MIN_MATCH - 1;
115 | 
116 | 		// Look up first match for current position
117 | 		//
118 | 		// pos is the current root of the tree of strings with this
119 | 		// hash. We are going to re-root the tree so cur becomes the
120 | 		// new root.
121 | 		//
122 | 		const unsigned long hash = crush_hash3_bits(&in[cur], CRUSH_HASH_BITS);
123 | 		unsigned long pos = lookup[hash];
124 | 		lookup[hash] = cur;
125 | 
126 | 		uint32_t *lt_node = &nodes[2 * cur];
127 | 		uint32_t *gt_node = &nodes[2 * cur + 1];
128 | 		unsigned long lt_len = 0;
129 | 		unsigned long gt_len = 0;
130 | 
131 | 		assert(pos == NO_MATCH_POS || pos < cur);
132 | 
133 | 		// If we are checking matches, allow lengths up to MAX_MATCH,
134 | 		// otherwise compare only up to accept_len
135 | 		const unsigned long len_left = src_size - cur > MAX_MATCH ? MAX_MATCH : src_size - cur;
136 | 		const unsigned long len_limit = cur == next_match_cur ? len_left
137 | 		                              : accept_len < len_left ? accept_len
138 | 		                              : len_left;
139 | 		unsigned long num_chain = max_depth;
140 | 
141 | 		// Check matches
142 | 		for (;;) {
143 | 			// If at bottom of tree, mark leaf nodes
144 | 			//
145 | 			// In case we reached max_depth, this also prunes the
146 | 			// subtree we have not searched yet and do not know
147 | 			// where belongs.
148 | 			//
149 | 			if (pos == NO_MATCH_POS || cur - pos > W_SIZE || num_chain-- == 0) {
150 | 				*lt_node = NO_MATCH_POS;
151 | 				*gt_node = NO_MATCH_POS;
152 | 
153 | 				break;
154 | 			}
155 | 
156 | 			// The string at pos is lexicographically greater than
157 | 			// a string that matched in the first lt_len positions,
158 | 			// and less than a string that matched in the first
159 | 			// gt_len positions, so it must match up to at least
160 | 			// the minimum of these.
161 | 			unsigned long len = lt_len < gt_len ? lt_len : gt_len;
162 | 
163 | 			// Find match len
164 | 			while (len < len_limit && in[pos + len] == in[cur + len]) {
165 | 				++len;
166 | 			}
167 | 
168 | 			// Extend current match if possible
169 | 			//
170 | 			// Note that we are checking matches in order from the
171 | 			// closest and back. This means for a match further
172 | 			// away, the encoding of all lengths up to the current
173 | 			// max length will always be longer or equal, so we need
174 | 			// only consider the extension.
175 | 			//
176 | 			if (cur == next_match_cur && len > max_len) {
177 | 				for (unsigned long i = max_len + 1; i <= len; ++i) {
178 | 					unsigned long match_cost = crush_match_cost(cur - pos - 1, i);
179 | 
180 | 					assert(match_cost < UINT32_MAX - cost[cur]);
181 | 
182 | 					unsigned long cost_there = cost[cur] + match_cost;
183 | 
184 | 					if (cost_there < cost[cur + i]) {
185 | 						cost[cur + i] = cost_there;
186 | 						mpos[cur + i] = cur - pos - 1;
187 | 						mlen[cur + i] = i;
188 | 					}
189 | 				}
190 | 
191 | 				max_len = len;
192 | 
193 | 				if (len >= accept_len) {
194 | 					next_match_cur = cur + len;
195 | 				}
196 | 			}
197 | 
198 | 			// If we reach maximum match length, the string at pos
199 | 			// is equal to cur, so we can assign the left and right
200 | 			// subtrees.
201 | 			//
202 | 			// This removes pos from the tree, but we added cur
203 | 			// which is equal and closer for future matches.
204 | 			//
205 | 			if (len >= accept_len || len == len_limit) {
206 | 				*lt_node = nodes[2 * pos];
207 | 				*gt_node = nodes[2 * pos + 1];
208 | 
209 | 				break;
210 | 			}
211 | 
212 | 			// Go to previous match and restructure tree
213 | 			//
214 | 			// lt_node points to a node that is going to contain
215 | 			// elements lexicographically less than cur (the search
216 | 			// string).
217 | 			//
218 | 			// If the string at pos is less than cur, we set that
219 | 			// lt_node to pos. We know that all elements in the
220 | 			// left subtree are less than pos, and thus less than
221 | 			// cur, so we point lt_node at the right subtree of
222 | 			// pos and continue our search there.
223 | 			//
224 | 			// The equivalent applies to gt_node when the string at
225 | 			// pos is greater than cur.
226 | 			//
227 | 			if (in[pos + len] < in[cur + len]) {
228 | 				*lt_node = pos;
229 | 				lt_node = &nodes[2 * pos + 1];
230 | 				assert(*lt_node == NO_MATCH_POS || *lt_node < pos);
231 | 				pos = *lt_node;
232 | 				lt_len = len;
233 | 			}
234 | 			else {
235 | 				*gt_node = pos;
236 | 				gt_node = &nodes[2 * pos];
237 | 				assert(*gt_node == NO_MATCH_POS || *gt_node < pos);
238 | 				pos = *gt_node;
239 | 				gt_len = len;
240 | 			}
241 | 		}
242 | 	}
243 | 
244 | 	for (unsigned long cur = last_match_pos + 1; cur < src_size; ++cur) {
245 | 		// Check literal
246 | 		if (cost[cur + 1] > cost[cur] + 9) {
247 | 			cost[cur + 1] = cost[cur] + 9;
248 | 			mlen[cur + 1] = 1;
249 | 		}
250 | 	}
251 | 
252 | 	// Phase 2: Follow lowest cost path backwards gathering tokens
253 | 	unsigned long next_token = src_size;
254 | 
255 | 	for (unsigned long cur = src_size; cur > 0; cur -= mlen[cur], --next_token) {
256 | 		mlen[next_token] = mlen[cur];
257 | 		mpos[next_token] = mpos[cur];
258 | 	}
259 | 
260 | 	// Phase 3: Output tokens
261 | 	unsigned long cur = 0;
262 | 	for (unsigned long i = next_token + 1; i <= src_size; cur += mlen[i++]) {
263 | 		if (mlen[i] == 1) {
264 | 			lbw_putbits(&lbw, (uint32_t) in[cur] << 1, 9);
265 | 		}
266 | 		else {
267 | 			const unsigned long offs = mpos[i];
268 | 
269 | 			lbw_putbits(&lbw, 1, 1);
270 | 
271 | 			const unsigned long l = mlen[i] - MIN_MATCH;
272 | 
273 | 			if (l < A) {
274 | 				lbw_putbits(&lbw, 1UL, 1);
275 | 				lbw_putbits(&lbw, l, A_BITS);
276 | 			}
277 | 			else if (l < B) {
278 | 				lbw_putbits(&lbw, 1UL << 1, 2);
279 | 				lbw_putbits(&lbw, l - A, B_BITS);
280 | 			}
281 | 			else if (l < C) {
282 | 				lbw_putbits(&lbw, 1UL << 2, 3);
283 | 				lbw_putbits(&lbw, l - B, C_BITS);
284 | 			}
285 | 			else if (l < D) {
286 | 				lbw_putbits(&lbw, 1UL << 3, 4);
287 | 				lbw_putbits(&lbw, l - C, D_BITS);
288 | 			}
289 | 			else if (l < E) {
290 | 				lbw_putbits(&lbw, 1UL << 4, 5);
291 | 				lbw_putbits(&lbw, l - D, E_BITS);
292 | 			}
293 | 			else {
294 | 				lbw_putbits(&lbw, 0, 5);
295 | 				lbw_putbits(&lbw, l - E, F_BITS);
296 | 			}
297 | 
298 | 			if (offs >= (2UL << (W_BITS - NUM_SLOTS))) {
299 | 				unsigned long mlog = crush_log2(offs);
300 | 
301 | 				lbw_putbits(&lbw, mlog - (W_BITS - NUM_SLOTS), SLOT_BITS);
302 | 				lbw_putbits(&lbw, offs - (1UL << mlog), mlog);
303 | 			}
304 | 			else {
305 | 				lbw_putbits(&lbw, 0, SLOT_BITS);
306 | 				lbw_putbits(&lbw, offs, W_BITS - (NUM_SLOTS - 1));
307 | 			}
308 | 		}
309 | 	}
310 | 
311 | 	// Return compressed size
312 | 	return (unsigned long) (lbw_finalize(&lbw) - (unsigned char *) dst);
313 | }
314 | 
315 | #endif /* CRUSH_BTPARSE_H_INCLUDED */
316 | 


--------------------------------------------------------------------------------
/crush_depack.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 |  *
  4 |  * C depacker
  5 |  *
  6 |  * Copyright (c) 2018 Joergen Ibsen
  7 |  *
  8 |  * This software is provided 'as-is', without any express or implied
  9 |  * warranty. In no event will the authors be held liable for any damages
 10 |  * arising from the use of this software.
 11 |  *
 12 |  * Permission is granted to anyone to use this software for any purpose,
 13 |  * including commercial applications, and to alter it and redistribute it
 14 |  * freely, subject to the following restrictions:
 15 |  *
 16 |  *   1. The origin of this software must not be misrepresented; you must
 17 |  *      not claim that you wrote the original software. If you use this
 18 |  *      software in a product, an acknowledgment in the product
 19 |  *      documentation would be appreciated but is not required.
 20 |  *
 21 |  *   2. Altered source versions must be plainly marked as such, and must
 22 |  *      not be misrepresented as being the original software.
 23 |  *
 24 |  *   3. This notice may not be removed or altered from any source
 25 |  *      distribution.
 26 |  */
 27 | 
 28 | #include "crush.h"
 29 | #include "crush_internal.h"
 30 | 
 31 | #include <assert.h>
 32 | #include <stdint.h>
 33 | 
 34 | struct lsb_bitreader {
 35 | 	const unsigned char *src;
 36 | 	uint32_t tag;
 37 | 	int msb;
 38 | };
 39 | 
 40 | static void
 41 | lbr_init(struct lsb_bitreader *lbr, const unsigned char *src)
 42 | {
 43 | 	lbr->src = src;
 44 | 	lbr->tag = 0;
 45 | 	lbr->msb = 0;
 46 | }
 47 | 
 48 | static void
 49 | lbr_refill(struct lsb_bitreader *lbr, int num)
 50 | {
 51 | 	assert(num >= 0 && num <= 32);
 52 | 
 53 | 	// Read bytes until at least num bits available
 54 | 	while (lbr->msb < num) {
 55 | 		lbr->tag |= (uint32_t) *lbr->src++ << lbr->msb;
 56 | 		lbr->msb += 8;
 57 | 	}
 58 | 
 59 | 	assert(lbr->msb <= 32);
 60 | }
 61 | 
 62 | static uint32_t
 63 | lbr_getbits_no_refill(struct lsb_bitreader *lbr, int num)
 64 | {
 65 | 	assert(num >= 0 && num <= lbr->msb);
 66 | 
 67 | 	// Get bits from tag
 68 | 	uint32_t bits = lbr->tag & ((1ULL << num) - 1);
 69 | 
 70 | 	// Remove bits from tag
 71 | 	lbr->tag >>= num;
 72 | 	lbr->msb -= num;
 73 | 
 74 | 	return bits;
 75 | }
 76 | 
 77 | static uint32_t
 78 | lbr_getbits(struct lsb_bitreader *lbr, int num)
 79 | {
 80 | 	lbr_refill(lbr, num);
 81 | 	return lbr_getbits_no_refill(lbr, num);
 82 | }
 83 | 
 84 | unsigned long
 85 | crush_depack(const void *src, void *dst, unsigned long depacked_size)
 86 | {
 87 | 	struct lsb_bitreader lbr;
 88 | 	unsigned char *out = (unsigned char *) dst;
 89 | 	unsigned long dst_size = 0;
 90 | 
 91 | 	lbr_init(&lbr, (const unsigned char *) src);
 92 | 
 93 | 	/* Main decompression loop */
 94 | 	while (dst_size < depacked_size) {
 95 | 		if (lbr_getbits(&lbr, 1)) {
 96 | 			unsigned long len;
 97 | 			unsigned long mlog;
 98 | 			unsigned long mpos;
 99 | 			unsigned long offs;
100 | 
101 | 			/* Decode match length */
102 | 			if (lbr_getbits(&lbr, 1)) {
103 | 				len = lbr_getbits(&lbr, A_BITS);
104 | 			}
105 | 			else if (lbr_getbits(&lbr, 1)) {
106 | 				len = lbr_getbits(&lbr, B_BITS) + A;
107 | 			}
108 | 			else if (lbr_getbits(&lbr, 1)) {
109 | 				len = lbr_getbits(&lbr, C_BITS) + B;
110 | 			}
111 | 			else if (lbr_getbits(&lbr, 1)) {
112 | 				len = lbr_getbits(&lbr, D_BITS) + C;
113 | 			}
114 | 			else if (lbr_getbits(&lbr, 1)) {
115 | 				len = lbr_getbits(&lbr, E_BITS) + D;
116 | 			}
117 | 			else {
118 | 				len = lbr_getbits(&lbr, F_BITS) + E;
119 | 			}
120 | 
121 | 			/* Decode match offset */
122 | 			mlog = lbr_getbits(&lbr, SLOT_BITS) + (W_BITS - NUM_SLOTS);
123 | 			offs = mlog > (W_BITS - NUM_SLOTS)
124 | 			     ? lbr_getbits(&lbr, mlog) + (1 << mlog)
125 | 			     : lbr_getbits(&lbr, W_BITS - (NUM_SLOTS - 1));
126 | 
127 | 			if (++offs > dst_size) {
128 | 				return CRUSH_ERROR;
129 | 			}
130 | 
131 | 			mpos = dst_size - offs;
132 | 
133 | 			/* Copy match */
134 | 			out[dst_size++] = out[mpos++];
135 | 			out[dst_size++] = out[mpos++];
136 | 			out[dst_size++] = out[mpos++];
137 | 			while (len-- != 0) {
138 | 				out[dst_size++] = out[mpos++];
139 | 			}
140 | 		}
141 | 		else {
142 | 			/* Copy literal */
143 | 			out[dst_size++] = lbr_getbits(&lbr, 8);
144 | 		}
145 | 	}
146 | 
147 | 	/* Return decompressed size */
148 | 	return dst_size;
149 | }
150 | 


--------------------------------------------------------------------------------
/crush_depack_file.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 |  *
  4 |  * C depacker
  5 |  *
  6 |  * Copyright (c) 2018 Joergen Ibsen
  7 |  *
  8 |  * This software is provided 'as-is', without any express or implied
  9 |  * warranty. In no event will the authors be held liable for any damages
 10 |  * arising from the use of this software.
 11 |  *
 12 |  * Permission is granted to anyone to use this software for any purpose,
 13 |  * including commercial applications, and to alter it and redistribute it
 14 |  * freely, subject to the following restrictions:
 15 |  *
 16 |  *   1. The origin of this software must not be misrepresented; you must
 17 |  *      not claim that you wrote the original software. If you use this
 18 |  *      software in a product, an acknowledgment in the product
 19 |  *      documentation would be appreciated but is not required.
 20 |  *
 21 |  *   2. Altered source versions must be plainly marked as such, and must
 22 |  *      not be misrepresented as being the original software.
 23 |  *
 24 |  *   3. This notice may not be removed or altered from any source
 25 |  *      distribution.
 26 |  */
 27 | 
 28 | #include "crush.h"
 29 | #include "crush_internal.h"
 30 | 
 31 | #include <assert.h>
 32 | #include <stdint.h>
 33 | 
 34 | struct lsb_bitreader {
 35 | 	FILE *src;
 36 | 	uint32_t tag;
 37 | 	int msb;
 38 | };
 39 | 
 40 | static void
 41 | lbr_init(struct lsb_bitreader *lbr, FILE *src)
 42 | {
 43 | 	lbr->src = src;
 44 | 	lbr->tag = 0;
 45 | 	lbr->msb = 0;
 46 | }
 47 | 
 48 | static void
 49 | lbr_refill(struct lsb_bitreader *lbr, int num)
 50 | {
 51 | 	assert(num >= 0 && num <= 32);
 52 | 
 53 | 	// Read bytes until at least num bits available
 54 | 	while (lbr->msb < num) {
 55 | 		lbr->tag |= (uint32_t) getc(lbr->src) << lbr->msb;
 56 | 		lbr->msb += 8;
 57 | 	}
 58 | 
 59 | 	assert(lbr->msb <= 32);
 60 | }
 61 | 
 62 | static uint32_t
 63 | lbr_getbits_no_refill(struct lsb_bitreader *lbr, int num)
 64 | {
 65 | 	assert(num >= 0 && num <= lbr->msb);
 66 | 
 67 | 	// Get bits from tag
 68 | 	uint32_t bits = lbr->tag & ((1ULL << num) - 1);
 69 | 
 70 | 	// Remove bits from tag
 71 | 	lbr->tag >>= num;
 72 | 	lbr->msb -= num;
 73 | 
 74 | 	return bits;
 75 | }
 76 | 
 77 | static uint32_t
 78 | lbr_getbits(struct lsb_bitreader *lbr, int num)
 79 | {
 80 | 	lbr_refill(lbr, num);
 81 | 	return lbr_getbits_no_refill(lbr, num);
 82 | }
 83 | 
 84 | unsigned long
 85 | crush_depack_file(FILE *src, void *dst, unsigned long depacked_size)
 86 | {
 87 | 	struct lsb_bitreader lbr;
 88 | 	unsigned char *out = (unsigned char *) dst;
 89 | 	unsigned long dst_size = 0;
 90 | 
 91 | 	lbr_init(&lbr, src);
 92 | 
 93 | 	/* Main decompression loop */
 94 | 	while (dst_size < depacked_size) {
 95 | 		if (lbr_getbits(&lbr, 1)) {
 96 | 			unsigned long len;
 97 | 			unsigned long mlog;
 98 | 			unsigned long mpos;
 99 | 			unsigned long offs;
100 | 
101 | 			/* Decode match length */
102 | 			if (lbr_getbits(&lbr, 1)) {
103 | 				len = lbr_getbits(&lbr, A_BITS);
104 | 			}
105 | 			else if (lbr_getbits(&lbr, 1)) {
106 | 				len = lbr_getbits(&lbr, B_BITS) + A;
107 | 			}
108 | 			else if (lbr_getbits(&lbr, 1)) {
109 | 				len = lbr_getbits(&lbr, C_BITS) + B;
110 | 			}
111 | 			else if (lbr_getbits(&lbr, 1)) {
112 | 				len = lbr_getbits(&lbr, D_BITS) + C;
113 | 			}
114 | 			else if (lbr_getbits(&lbr, 1)) {
115 | 				len = lbr_getbits(&lbr, E_BITS) + D;
116 | 			}
117 | 			else {
118 | 				len = lbr_getbits(&lbr, F_BITS) + E;
119 | 			}
120 | 
121 | 			/* Decode match offset */
122 | 			mlog = lbr_getbits(&lbr, SLOT_BITS) + (W_BITS - NUM_SLOTS);
123 | 			offs = mlog > (W_BITS - NUM_SLOTS)
124 | 			     ? lbr_getbits(&lbr, mlog) + (1 << mlog)
125 | 			     : lbr_getbits(&lbr, W_BITS - (NUM_SLOTS - 1));
126 | 
127 | 			if (++offs > dst_size) {
128 | 				return CRUSH_ERROR;
129 | 			}
130 | 
131 | 			mpos = dst_size - offs;
132 | 
133 | 			/* Copy match */
134 | 			out[dst_size++] = out[mpos++];
135 | 			out[dst_size++] = out[mpos++];
136 | 			out[dst_size++] = out[mpos++];
137 | 			while (len-- != 0) {
138 | 				out[dst_size++] = out[mpos++];
139 | 			}
140 | 		}
141 | 		else {
142 | 			/* Copy literal */
143 | 			out[dst_size++] = lbr_getbits(&lbr, 8);
144 | 		}
145 | 	}
146 | 
147 | 	/* Return decompressed size */
148 | 	return dst_size;
149 | }
150 | 


--------------------------------------------------------------------------------
/crush_internal.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // bcrush - Example of CRUSH compression with BriefLZ algorithms
 3 | //
 4 | // Internal C/C++ header file
 5 | //
 6 | // Copyright (c) 2018 Joergen Ibsen
 7 | //
 8 | // This software is provided 'as-is', without any express or implied
 9 | // warranty. In no event will the authors be held liable for any damages
10 | // arising from the use of this software.
11 | //
12 | // Permission is granted to anyone to use this software for any purpose,
13 | // including commercial applications, and to alter it and redistribute it
14 | // freely, subject to the following restrictions:
15 | //
16 | //   1. The origin of this software must not be misrepresented; you must
17 | //      not claim that you wrote the original software. If you use this
18 | //      software in a product, an acknowledgment in the product
19 | //      documentation would be appreciated but is not required.
20 | //
21 | //   2. Altered source versions must be plainly marked as such, and must
22 | //      not be misrepresented as being the original software.
23 | //
24 | //   3. This notice may not be removed or altered from any source
25 | //      distribution.
26 | //
27 | 
28 | #ifndef CRUSH_INTERNAL_H_INCLUDED
29 | #define CRUSH_INTERNAL_H_INCLUDED
30 | 
31 | #ifdef __cplusplus
32 | extern "C" {
33 | #endif
34 | 
35 | #define W_BITS 21 // Window size (17..23)
36 | #define W_SIZE (1UL << W_BITS)
37 | #define W_MASK (W_SIZE - 1)
38 | #define SLOT_BITS 4
39 | #define NUM_SLOTS (1UL << SLOT_BITS)
40 | 
41 | #define A_BITS 2 // 1 xx
42 | #define B_BITS 2 // 01 xx
43 | #define C_BITS 2 // 001 xx
44 | #define D_BITS 3 // 0001 xxx
45 | #define E_BITS 5 // 00001 xxxxx
46 | #define F_BITS 9 // 00000 xxxxxxxxx
47 | #define A (1UL << A_BITS)
48 | #define B ((1UL << B_BITS) + A)
49 | #define C ((1UL << C_BITS) + B)
50 | #define D ((1UL << D_BITS) + C)
51 | #define E ((1UL << E_BITS) + D)
52 | #define F ((1UL << F_BITS) + E)
53 | #define MIN_MATCH 3
54 | #define MAX_MATCH ((F - 1) + MIN_MATCH)
55 | 
56 | #define TOO_FAR (1UL << 16)
57 | 
58 | #ifdef __cplusplus
59 | } /* extern "C" */
60 | #endif
61 | 
62 | #endif /* CRUSH_INTERNAL_H_INCLUDED */
63 | 


--------------------------------------------------------------------------------
/crush_leparse.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 | //
  4 | // Backwards dynamic programming parse with left-extension of matches
  5 | //
  6 | // Copyright (c) 2018-2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #ifndef CRUSH_LEPARSE_H_INCLUDED
 29 | #define CRUSH_LEPARSE_H_INCLUDED
 30 | 
 31 | static size_t
 32 | crush_leparse_workmem_size(size_t src_size)
 33 | {
 34 | 	return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
 35 | 	     * sizeof(uint32_t);
 36 | }
 37 | 
 38 | static unsigned long
 39 | crush_pack_leparse(const void *src, void *dst, unsigned long src_size, void *workmem,
 40 |                    const unsigned long max_depth, const unsigned long accept_len)
 41 | {
 42 | 	struct lsb_bitwriter lbw;
 43 | 	const unsigned char *const in = (const unsigned char *) src;
 44 | 	const unsigned long last_match_pos = src_size > 3 ? src_size - 3 : 0;
 45 | 
 46 | 	// Check for empty input
 47 | 	if (src_size == 0) {
 48 | 		return 0;
 49 | 	}
 50 | 
 51 | 	lbw_init(&lbw, (unsigned char *) dst);
 52 | 
 53 | 	if (src_size < 4) {
 54 | 		for (unsigned long i = 0; i < src_size; ++i) {
 55 | 			lbw_putbits(&lbw, (uint32_t) in[i] << 1, 9);
 56 | 		}
 57 | 
 58 | 		return (unsigned long) (lbw_finalize(&lbw) - (unsigned char *) dst);
 59 | 	}
 60 | 
 61 | 	// With a bit of careful ordering we can fit in 3 * src_size words.
 62 | 	//
 63 | 	// The idea is that the lookup is only used in the first phase to
 64 | 	// build the hash chains, so we overlap it with mpos and mlen.
 65 | 	// Also, since we are using prev from right to left in phase two,
 66 | 	// and that is the order we fill in cost, we can overlap these.
 67 | 	//
 68 | 	// One detail is that we actually use src_size + 1 elements of cost,
 69 | 	// but we put mpos after it, where we do not need the first element.
 70 | 	//
 71 | 	uint32_t *const prev = (uint32_t *) workmem;
 72 | 	uint32_t *const mpos = prev + src_size;
 73 | 	uint32_t *const mlen = mpos + src_size;
 74 | 	uint32_t *const cost = prev;
 75 | 	uint32_t *const lookup = mpos;
 76 | 
 77 | 	// Phase 1: Build hash chains
 78 | 	const int bits = 2 * src_size < LOOKUP_SIZE ? CRUSH_HASH_BITS : crush_log2(src_size);
 79 | 
 80 | 	// Initialize lookup
 81 | 	for (unsigned long i = 0; i < (1UL << bits); ++i) {
 82 | 		lookup[i] = NO_MATCH_POS;
 83 | 	}
 84 | 
 85 | 	// Build hash chains in prev
 86 | 	if (last_match_pos > 0) {
 87 | 		for (unsigned long i = 0; i <= last_match_pos; ++i) {
 88 | 			const unsigned long hash = crush_hash3_bits(&in[i], bits);
 89 | 			prev[i] = lookup[hash];
 90 | 			lookup[hash] = i;
 91 | 		}
 92 | 	}
 93 | 
 94 | 	// Initialize last two positions as literals
 95 | 	mlen[src_size - 2] = 1;
 96 | 	mlen[src_size - 1] = 1;
 97 | 
 98 | 	cost[src_size - 2] = 18;
 99 | 	cost[src_size - 1] = 9;
100 | 	cost[src_size] = 0;
101 | 
102 | 	// Phase 2: Find lowest cost path from each position to end
103 | 	for (unsigned long cur = last_match_pos; cur > 0; --cur) {
104 | 		// Since we updated prev to the end in the first phase, we
105 | 		// do not need to hash, but can simply look up the previous
106 | 		// position directly.
107 | 		unsigned long pos = prev[cur];
108 | 
109 | 		assert(pos == NO_MATCH_POS || pos < cur);
110 | 
111 | 		// Start with a literal
112 | 		cost[cur] = cost[cur + 1] + 9;
113 | 		mlen[cur] = 1;
114 | 
115 | 		unsigned long max_len = MIN_MATCH - 1;
116 | 
117 | 		const unsigned long len_limit = src_size - cur > MAX_MATCH ? MAX_MATCH : src_size - cur;
118 | 		unsigned long num_chain = max_depth;
119 | 
120 | 		// Go through the chain of prev matches
121 | 		for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
122 | 			// Limit offset to W_SIZE
123 | 			if (cur - pos > W_SIZE) {
124 | 				break;
125 | 			}
126 | 
127 | 			// The CRUSH packer drops length 3 matches further
128 | 			// away than TOO_FAR (64k). The actual point at which
129 | 			// a match is longer than 3 literals is 1M, so this
130 | 			// might be a heuristic to find better matches at the
131 | 			// next position. At any rate, it is not uses in the
132 | 			// depacker, so I left it out here because that
133 | 			// improves ratio by a tiny bit.
134 | /*
135 | 			// Minimum match length 4 for offset > 64k
136 | 			if (max_len == MIN_MATCH - 1 && cur - pos > TOO_FAR) {
137 | 				max_len = MIN_MATCH;
138 | 			}
139 | */
140 | 			unsigned long len = 0;
141 | 
142 | 			// If next byte matches, so this has a chance to be a longer match
143 | 			if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
144 | 				// Find match len
145 | 				while (len < len_limit && in[pos + len] == in[cur + len]) {
146 | 					++len;
147 | 				}
148 | 			}
149 | 
150 | 			// Extend current match if possible
151 | 			//
152 | 			// Note that we are checking matches in order from the
153 | 			// closest and back. This means for a match further
154 | 			// away, the encoding of all lengths up to the current
155 | 			// max length will always be longer or equal, so we need
156 | 			// only consider the extension.
157 | 			if (len > max_len) {
158 | 				unsigned long min_cost = UINT32_MAX;
159 | 				unsigned long min_cost_len = MIN_MATCH - 1;
160 | 
161 | 				// Find lowest cost match length
162 | 				for (unsigned long i = max_len + 1; i <= len; ++i) {
163 | 					unsigned long match_cost = crush_match_cost(cur - pos - 1, i);
164 | 					assert(match_cost < UINT32_MAX - cost[cur + i]);
165 | 					unsigned long cost_here = match_cost + cost[cur + i];
166 | 
167 | 					if (cost_here < min_cost) {
168 | 						min_cost = cost_here;
169 | 						min_cost_len = i;
170 | 					}
171 | 				}
172 | 
173 | 				max_len = len;
174 | 
175 | 				// Update cost if cheaper
176 | 				if (min_cost < cost[cur]) {
177 | 					cost[cur] = min_cost;
178 | 					mpos[cur] = pos;
179 | 					mlen[cur] = min_cost_len;
180 | 
181 | 					// Left-extend current match if possible
182 | 					if (pos > 0 && in[pos - 1] == in[cur - 1] && min_cost_len < MAX_MATCH) {
183 | 						do {
184 | 							--cur;
185 | 							--pos;
186 | 							++min_cost_len;
187 | 							unsigned long match_cost = crush_match_cost(cur - pos - 1, min_cost_len);
188 | 							assert(match_cost < UINT32_MAX - cost[cur + min_cost_len]);
189 | 							unsigned long cost_here = match_cost + cost[cur + min_cost_len];
190 | 							cost[cur] = cost_here;
191 | 							mpos[cur] = pos;
192 | 							mlen[cur] = min_cost_len;
193 | 						} while (pos > 0 && in[pos - 1] == in[cur - 1] && min_cost_len < MAX_MATCH);
194 | 						break;
195 | 					}
196 | 				}
197 | 			}
198 | 
199 | 			if (len >= accept_len || len == len_limit) {
200 | 				break;
201 | 			}
202 | 		}
203 | 	}
204 | 
205 | 	mpos[0] = 0;
206 | 	mlen[0] = 1;
207 | 
208 | 	// Phase 3: Output compressed data, following lowest cost path
209 | 	for (unsigned long i = 0; i < src_size; i += mlen[i]) {
210 | 		if (mlen[i] == 1) {
211 | 			lbw_putbits(&lbw, (uint32_t) in[i] << 1, 9);
212 | 		}
213 | 		else {
214 | 			const unsigned long offs = i - mpos[i] - 1;
215 | 
216 | 			lbw_putbits(&lbw, 1, 1);
217 | 
218 | 			const unsigned long l = mlen[i] - MIN_MATCH;
219 | 
220 | 			if (l < A) {
221 | 				lbw_putbits(&lbw, 1UL, 1);
222 | 				lbw_putbits(&lbw, l, A_BITS);
223 | 			}
224 | 			else if (l < B) {
225 | 				lbw_putbits(&lbw, 1UL << 1, 2);
226 | 				lbw_putbits(&lbw, l - A, B_BITS);
227 | 			}
228 | 			else if (l < C) {
229 | 				lbw_putbits(&lbw, 1UL << 2, 3);
230 | 				lbw_putbits(&lbw, l - B, C_BITS);
231 | 			}
232 | 			else if (l < D) {
233 | 				lbw_putbits(&lbw, 1UL << 3, 4);
234 | 				lbw_putbits(&lbw, l - C, D_BITS);
235 | 			}
236 | 			else if (l < E) {
237 | 				lbw_putbits(&lbw, 1UL << 4, 5);
238 | 				lbw_putbits(&lbw, l - D, E_BITS);
239 | 			}
240 | 			else {
241 | 				lbw_putbits(&lbw, 0, 5);
242 | 				lbw_putbits(&lbw, l - E, F_BITS);
243 | 			}
244 | 
245 | 			if (offs >= (2UL << (W_BITS - NUM_SLOTS))) {
246 | 				unsigned long mlog = crush_log2(offs);
247 | 
248 | 				lbw_putbits(&lbw, mlog - (W_BITS - NUM_SLOTS), SLOT_BITS);
249 | 				lbw_putbits(&lbw, offs - (1UL << mlog), mlog);
250 | 			}
251 | 			else {
252 | 				lbw_putbits(&lbw, 0, SLOT_BITS);
253 | 				lbw_putbits(&lbw, offs, W_BITS - (NUM_SLOTS - 1));
254 | 			}
255 | 		}
256 | 	}
257 | 
258 | 	// Return compressed size
259 | 	return (unsigned long) (lbw_finalize(&lbw) - (unsigned char *) dst);
260 | }
261 | 
262 | #endif /* CRUSH_LEPARSE_H_INCLUDED */
263 | 


--------------------------------------------------------------------------------
/crush_ssparse.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // bcrush - Example of CRUSH compression with BriefLZ algorithms
  3 | //
  4 | // Backwards dynamic programming parse
  5 | //
  6 | // Copyright (c) 2018-2020 Joergen Ibsen
  7 | //
  8 | // This software is provided 'as-is', without any express or implied
  9 | // warranty. In no event will the authors be held liable for any damages
 10 | // arising from the use of this software.
 11 | //
 12 | // Permission is granted to anyone to use this software for any purpose,
 13 | // including commercial applications, and to alter it and redistribute it
 14 | // freely, subject to the following restrictions:
 15 | //
 16 | //   1. The origin of this software must not be misrepresented; you must
 17 | //      not claim that you wrote the original software. If you use this
 18 | //      software in a product, an acknowledgment in the product
 19 | //      documentation would be appreciated but is not required.
 20 | //
 21 | //   2. Altered source versions must be plainly marked as such, and must
 22 | //      not be misrepresented as being the original software.
 23 | //
 24 | //   3. This notice may not be removed or altered from any source
 25 | //      distribution.
 26 | //
 27 | 
 28 | #ifndef CRUSH_SSPARSE_H_INCLUDED
 29 | #define CRUSH_SSPARSE_H_INCLUDED
 30 | 
 31 | static size_t
 32 | crush_ssparse_workmem_size(size_t src_size)
 33 | {
 34 | 	return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
 35 | 	     * sizeof(uint32_t);
 36 | }
 37 | 
 38 | static unsigned long
 39 | crush_pack_ssparse(const void *src, void *dst, unsigned long src_size, void *workmem,
 40 |                    const unsigned long max_depth, const unsigned long accept_len)
 41 | {
 42 | 	struct lsb_bitwriter lbw;
 43 | 	const unsigned char *const in = (const unsigned char *) src;
 44 | 	const unsigned long last_match_pos = src_size > 3 ? src_size - 3 : 0;
 45 | 
 46 | 	// Check for empty input
 47 | 	if (src_size == 0) {
 48 | 		return 0;
 49 | 	}
 50 | 
 51 | 	lbw_init(&lbw, (unsigned char *) dst);
 52 | 
 53 | 	if (src_size < 4) {
 54 | 		for (unsigned long i = 0; i < src_size; ++i) {
 55 | 			lbw_putbits(&lbw, (uint32_t) in[i] << 1, 9);
 56 | 		}
 57 | 
 58 | 		return (unsigned long) (lbw_finalize(&lbw) - (unsigned char *) dst);
 59 | 	}
 60 | 
 61 | 	// With a bit of careful ordering we can fit in 3 * src_size words.
 62 | 	//
 63 | 	// The idea is that the lookup is only used in the first phase to
 64 | 	// build the hash chains, so we overlap it with mpos and mlen.
 65 | 	// Also, since we are using prev from right to left in phase two,
 66 | 	// and that is the order we fill in cost, we can overlap these.
 67 | 	//
 68 | 	// One detail is that we actually use src_size + 1 elements of cost,
 69 | 	// but we put mpos after it, where we do not need the first element.
 70 | 	//
 71 | 	uint32_t *const prev = (uint32_t *) workmem;
 72 | 	uint32_t *const mpos = prev + src_size;
 73 | 	uint32_t *const mlen = mpos + src_size;
 74 | 	uint32_t *const cost = prev;
 75 | 	uint32_t *const lookup = mpos;
 76 | 
 77 | 	// Phase 1: Build hash chains
 78 | 	const int bits = 2 * src_size < LOOKUP_SIZE ? CRUSH_HASH_BITS : crush_log2(src_size);
 79 | 
 80 | 	// Initialize lookup
 81 | 	for (unsigned long i = 0; i < (1UL << bits); ++i) {
 82 | 		lookup[i] = NO_MATCH_POS;
 83 | 	}
 84 | 
 85 | 	// Build hash chains in prev
 86 | 	if (last_match_pos > 0) {
 87 | 		for (unsigned long i = 0; i <= last_match_pos; ++i) {
 88 | 			const unsigned long hash = crush_hash3_bits(&in[i], bits);
 89 | 			prev[i] = lookup[hash];
 90 | 			lookup[hash] = i;
 91 | 		}
 92 | 	}
 93 | 
 94 | 	// Initialize last two positions as literals
 95 | 	mlen[src_size - 2] = 1;
 96 | 	mlen[src_size - 1] = 1;
 97 | 
 98 | 	cost[src_size - 2] = 18;
 99 | 	cost[src_size - 1] = 9;
100 | 	cost[src_size] = 0;
101 | 
102 | 	// Phase 2: Find lowest cost path from each position to end
103 | 	for (unsigned long cur = last_match_pos; cur > 0; --cur) {
104 | 		// Since we updated prev to the end in the first phase, we
105 | 		// do not need to hash, but can simply look up the previous
106 | 		// position directly.
107 | 		unsigned long pos = prev[cur];
108 | 
109 | 		assert(pos == NO_MATCH_POS || pos < cur);
110 | 
111 | 		// Start with a literal
112 | 		cost[cur] = cost[cur + 1] + 9;
113 | 		mlen[cur] = 1;
114 | 
115 | 		unsigned long max_len = MIN_MATCH - 1;
116 | 
117 | 		const unsigned long len_limit = src_size - cur > MAX_MATCH ? MAX_MATCH : src_size - cur;
118 | 		unsigned long num_chain = max_depth;
119 | 
120 | 		// Go through the chain of prev matches
121 | 		for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
122 | 			// Limit offset to W_SIZE
123 | 			if (cur - pos > W_SIZE) {
124 | 				break;
125 | 			}
126 | 
127 | 			// The CRUSH packer drops length 3 matches further
128 | 			// away than TOO_FAR (64k). The actual point at which
129 | 			// a match is longer than 3 literals is 1M, so this
130 | 			// might be a heuristic to find better matches at the
131 | 			// next position. At any rate, it is not uses in the
132 | 			// depacker, so I left it out here because that
133 | 			// improves ratio by a tiny bit.
134 | /*
135 | 			// Minimum match length 4 for offset > 64k
136 | 			if (max_len == MIN_MATCH - 1 && cur - pos > TOO_FAR) {
137 | 				max_len = MIN_MATCH;
138 | 			}
139 | */
140 | 			unsigned long len = 0;
141 | 
142 | 			// If next byte matches, so this has a chance to be a longer match
143 | 			if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
144 | 				// Find match len
145 | 				while (len < len_limit && in[pos + len] == in[cur + len]) {
146 | 					++len;
147 | 				}
148 | 			}
149 | 
150 | 			// Extend current match if possible
151 | 			//
152 | 			// Note that we are checking matches in order from the
153 | 			// closest and back. This means for a match further
154 | 			// away, the encoding of all lengths up to the current
155 | 			// max length will always be longer or equal, so we need
156 | 			// only consider the extension.
157 | 			if (len > max_len) {
158 | 				unsigned long min_cost = UINT32_MAX;
159 | 				unsigned long min_cost_len = MIN_MATCH - 1;
160 | 
161 | 				// Find lowest cost match length
162 | 				for (unsigned long i = max_len + 1; i <= len; ++i) {
163 | 					unsigned long match_cost = crush_match_cost(cur - pos - 1, i);
164 | 					assert(match_cost < UINT32_MAX - cost[cur + i]);
165 | 					unsigned long cost_here = match_cost + cost[cur + i];
166 | 
167 | 					if (cost_here < min_cost) {
168 | 						min_cost = cost_here;
169 | 						min_cost_len = i;
170 | 					}
171 | 				}
172 | 
173 | 				max_len = len;
174 | 
175 | 				// Update cost if cheaper
176 | 				if (min_cost < cost[cur]) {
177 | 					cost[cur] = min_cost;
178 | 					mpos[cur] = pos;
179 | 					mlen[cur] = min_cost_len;
180 | 				}
181 | 			}
182 | 
183 | 			if (len >= accept_len || len == len_limit) {
184 | 				break;
185 | 			}
186 | 		}
187 | 	}
188 | 
189 | 	mpos[0] = 0;
190 | 	mlen[0] = 1;
191 | 
192 | 	// Phase 3: Output compressed data, following lowest cost path
193 | 	for (unsigned long i = 0; i < src_size; i += mlen[i]) {
194 | 		if (mlen[i] == 1) {
195 | 			lbw_putbits(&lbw, (uint32_t) in[i] << 1, 9);
196 | 		}
197 | 		else {
198 | 			const unsigned long offs = i - mpos[i] - 1;
199 | 
200 | 			lbw_putbits(&lbw, 1, 1);
201 | 
202 | 			const unsigned long l = mlen[i] - MIN_MATCH;
203 | 
204 | 			if (l < A) {
205 | 				lbw_putbits(&lbw, 1UL, 1);
206 | 				lbw_putbits(&lbw, l, A_BITS);
207 | 			}
208 | 			else if (l < B) {
209 | 				lbw_putbits(&lbw, 1UL << 1, 2);
210 | 				lbw_putbits(&lbw, l - A, B_BITS);
211 | 			}
212 | 			else if (l < C) {
213 | 				lbw_putbits(&lbw, 1UL << 2, 3);
214 | 				lbw_putbits(&lbw, l - B, C_BITS);
215 | 			}
216 | 			else if (l < D) {
217 | 				lbw_putbits(&lbw, 1UL << 3, 4);
218 | 				lbw_putbits(&lbw, l - C, D_BITS);
219 | 			}
220 | 			else if (l < E) {
221 | 				lbw_putbits(&lbw, 1UL << 4, 5);
222 | 				lbw_putbits(&lbw, l - D, E_BITS);
223 | 			}
224 | 			else {
225 | 				lbw_putbits(&lbw, 0, 5);
226 | 				lbw_putbits(&lbw, l - E, F_BITS);
227 | 			}
228 | 
229 | 			if (offs >= (2UL << (W_BITS - NUM_SLOTS))) {
230 | 				unsigned long mlog = crush_log2(offs);
231 | 
232 | 				lbw_putbits(&lbw, mlog - (W_BITS - NUM_SLOTS), SLOT_BITS);
233 | 				lbw_putbits(&lbw, offs - (1UL << mlog), mlog);
234 | 			}
235 | 			else {
236 | 				lbw_putbits(&lbw, 0, SLOT_BITS);
237 | 				lbw_putbits(&lbw, offs, W_BITS - (NUM_SLOTS - 1));
238 | 			}
239 | 		}
240 | 	}
241 | 
242 | 	// Return compressed size
243 | 	return (unsigned long) (lbw_finalize(&lbw) - (unsigned char *) dst);
244 | }
245 | 
246 | #endif /* CRUSH_SSPARSE_H_INCLUDED */
247 | 


--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
 1 | project('bcrush', 'c',
 2 |   meson_version : '>=0.60.0',
 3 |   default_options : [
 4 |     'b_ndebug=if-release',
 5 |     'buildtype=release',
 6 |     'c_std=c99',
 7 |     'default_library=static',
 8 |     'warning_level=3',
 9 |   ],
10 |   version : '0.2.1',
11 |   license : 'Zlib'
12 | )
13 | 
14 | lib = library('crush', 'crush.c', 'crush_depack.c', 'crush_depack_file.c')
15 | 
16 | crush_dep = declare_dependency(
17 |   include_directories : include_directories('.'),
18 |   link_with : lib,
19 |   version : meson.project_version()
20 | )
21 | 
22 | executable('bcrush', 'bcrush.c', 'parg.c', dependencies : crush_dep)
23 | 


--------------------------------------------------------------------------------
/parg.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * parg - parse argv
  3 |  *
  4 |  * Written in 2015-2016 by Joergen Ibsen
  5 |  *
  6 |  * To the extent possible under law, the author(s) have dedicated all
  7 |  * copyright and related and neighboring rights to this software to the
  8 |  * public domain worldwide. This software is distributed without any
  9 |  * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
 10 |  */
 11 | 
 12 | #include "parg.h"
 13 | 
 14 | #include <assert.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | 
 18 | /*
 19 |  * Check if state is at end of argv.
 20 |  */
 21 | static int
 22 | is_argv_end(const struct parg_state *ps, int argc, char *const argv[])
 23 | {
 24 | 	return ps->optind >= argc || argv[ps->optind] == NULL;
 25 | }
 26 | 
 27 | /*
 28 |  * Match nextchar against optstring.
 29 |  */
 30 | static int
 31 | match_short(struct parg_state *ps, int argc, char *const argv[],
 32 |             const char *optstring)
 33 | {
 34 | 	const char *p = strchr(optstring, *ps->nextchar);
 35 | 
 36 | 	if (p == NULL) {
 37 | 		ps->optopt = *ps->nextchar++;
 38 | 		return '?';
 39 | 	}
 40 | 
 41 | 	/* If no option argument, return option */
 42 | 	if (p[1] != ':') {
 43 | 		return *ps->nextchar++;
 44 | 	}
 45 | 
 46 | 	/* If more characters, return as option argument */
 47 | 	if (ps->nextchar[1] != '\0') {
 48 | 		ps->optarg = &ps->nextchar[1];
 49 | 		ps->nextchar = NULL;
 50 | 		return *p;
 51 | 	}
 52 | 
 53 | 	/* If option argument is optional, return option */
 54 | 	if (p[2] == ':') {
 55 | 		return *ps->nextchar++;
 56 | 	}
 57 | 
 58 | 	/* Option argument required, so return next argv element */
 59 | 	if (is_argv_end(ps, argc, argv)) {
 60 | 		ps->optopt = *ps->nextchar++;
 61 | 		return optstring[0] == ':' ? ':' : '?';
 62 | 	}
 63 | 
 64 | 	ps->optarg = argv[ps->optind++];
 65 | 	ps->nextchar = NULL;
 66 | 	return *p;
 67 | }
 68 | 
 69 | /*
 70 |  * Match string at nextchar against longopts.
 71 |  */
 72 | static int
 73 | match_long(struct parg_state *ps, int argc, char *const argv[],
 74 |            const char *optstring,
 75 |            const struct parg_option *longopts, int *longindex)
 76 | {
 77 | 	size_t len;
 78 | 	int num_match = 0;
 79 | 	int match = -1;
 80 | 	int i;
 81 | 
 82 | 	len = strcspn(ps->nextchar, "=");
 83 | 
 84 | 	for (i = 0; longopts[i].name; ++i) {
 85 | 		if (strncmp(ps->nextchar, longopts[i].name, len) == 0) {
 86 | 			match = i;
 87 | 			num_match++;
 88 | 			/* Take if exact match */
 89 | 			if (longopts[i].name[len] == '\0') {
 90 | 				num_match = 1;
 91 | 				break;
 92 | 			}
 93 | 		}
 94 | 	}
 95 | 
 96 | 	/* Return '?' on no or ambiguous match */
 97 | 	if (num_match != 1) {
 98 | 		ps->optopt = 0;
 99 | 		ps->nextchar = NULL;
100 | 		return '?';
101 | 	}
102 | 
103 | 	assert(match != -1);
104 | 
105 | 	if (longindex) {
106 | 		*longindex = match;
107 | 	}
108 | 
109 | 	if (ps->nextchar[len] == '=') {
110 | 		/* Option argument present, check if extraneous */
111 | 		if (longopts[match].has_arg == PARG_NOARG) {
112 | 			ps->optopt = longopts[match].flag ? 0 : longopts[match].val;
113 | 			ps->nextchar = NULL;
114 | 			return optstring[0] == ':' ? ':' : '?';
115 | 		}
116 | 		else {
117 | 			ps->optarg = &ps->nextchar[len + 1];
118 | 		}
119 | 	}
120 | 	else if (longopts[match].has_arg == PARG_REQARG) {
121 | 		/* Option argument required, so return next argv element */
122 | 		if (is_argv_end(ps, argc, argv)) {
123 | 			ps->optopt = longopts[match].flag ? 0 : longopts[match].val;
124 | 			ps->nextchar = NULL;
125 | 			return optstring[0] == ':' ? ':' : '?';
126 | 		}
127 | 
128 | 		ps->optarg = argv[ps->optind++];
129 | 	}
130 | 
131 | 	ps->nextchar = NULL;
132 | 
133 | 	if (longopts[match].flag != NULL) {
134 | 		*longopts[match].flag = longopts[match].val;
135 | 		return 0;
136 | 	}
137 | 
138 | 	return longopts[match].val;
139 | }
140 | 
141 | void
142 | parg_init(struct parg_state *ps)
143 | {
144 | 	ps->optarg = NULL;
145 | 	ps->optind = 1;
146 | 	ps->optopt = '?';
147 | 	ps->nextchar = NULL;
148 | }
149 | 
150 | int
151 | parg_getopt(struct parg_state *ps, int argc, char *const argv[],
152 |             const char *optstring)
153 | {
154 | 	return parg_getopt_long(ps, argc, argv, optstring, NULL, NULL);
155 | }
156 | 
157 | int
158 | parg_getopt_long(struct parg_state *ps, int argc, char *const argv[],
159 |                  const char *optstring,
160 |                  const struct parg_option *longopts, int *longindex)
161 | {
162 | 	assert(ps != NULL);
163 | 	assert(argv != NULL);
164 | 	assert(optstring != NULL);
165 | 
166 | 	ps->optarg = NULL;
167 | 
168 | 	if (argc < 2) {
169 | 		return -1;
170 | 	}
171 | 
172 | 	/* Advance to next element if needed */
173 | 	if (ps->nextchar == NULL || *ps->nextchar == '\0') {
174 | 		if (is_argv_end(ps, argc, argv)) {
175 | 			return -1;
176 | 		}
177 | 
178 | 		ps->nextchar = argv[ps->optind++];
179 | 
180 | 		/* Check for nonoption element (including '-') */
181 | 		if (ps->nextchar[0] != '-' || ps->nextchar[1] == '\0') {
182 | 			ps->optarg = ps->nextchar;
183 | 			ps->nextchar = NULL;
184 | 			return 1;
185 | 		}
186 | 
187 | 		/* Check for '--' */
188 | 		if (ps->nextchar[1] == '-') {
189 | 			if (ps->nextchar[2] == '\0') {
190 | 				ps->nextchar = NULL;
191 | 				return -1;
192 | 			}
193 | 
194 | 			if (longopts != NULL) {
195 | 				ps->nextchar += 2;
196 | 
197 | 				return match_long(ps, argc, argv, optstring,
198 | 				                  longopts, longindex);
199 | 			}
200 | 		}
201 | 
202 | 		ps->nextchar++;
203 | 	}
204 | 
205 | 	/* Match nextchar */
206 | 	return match_short(ps, argc, argv, optstring);
207 | }
208 | 
209 | /*
210 |  * Reverse elements of `v` from `i` to `j`.
211 |  */
212 | static void
213 | reverse(char *v[], int i, int j)
214 | {
215 | 	while (j - i > 1) {
216 | 		char *tmp = v[i];
217 | 		v[i] = v[j - 1];
218 | 		v[j - 1] = tmp;
219 | 		++i;
220 | 		--j;
221 | 	}
222 | }
223 | 
224 | /*
225 |  * Reorder elements of `argv` with no special cases.
226 |  *
227 |  * This function assumes there is no `--` element, and the last element
228 |  * is not an option missing a required argument.
229 |  *
230 |  * The algorithm is described here:
231 |  * http://hardtoc.com/2016/11/07/reordering-arguments.html
232 |  */
233 | static int
234 | parg_reorder_simple(int argc, char *argv[],
235 |                     const char *optstring,
236 |                     const struct parg_option *longopts)
237 | {
238 | 	struct parg_state ps;
239 | 	int change;
240 | 	int l = 0;
241 | 	int m = 0;
242 | 	int r = 0;
243 | 
244 | 	if (argc < 2) {
245 | 		return argc;
246 | 	}
247 | 
248 | 	do {
249 | 		int nextind;
250 | 		int c;
251 | 
252 | 		parg_init(&ps);
253 | 
254 | 		nextind = ps.optind;
255 | 
256 | 		/* Parse until end of argument */
257 | 		do {
258 | 			c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
259 | 		} while (ps.nextchar != NULL && *ps.nextchar != '\0');
260 | 
261 | 		change = 0;
262 | 
263 | 		do {
264 | 			/* Find next non-option */
265 | 			for (l = nextind; c != 1 && c != -1;) {
266 | 				l = ps.optind;
267 | 
268 | 				do {
269 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
270 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
271 | 			}
272 | 
273 | 			/* Find next option */
274 | 			for (m = l; c == 1;) {
275 | 				m = ps.optind;
276 | 
277 | 				do {
278 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
279 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
280 | 			}
281 | 
282 | 			/* Find next non-option */
283 | 			for (r = m; c != 1 && c != -1;) {
284 | 				r = ps.optind;
285 | 
286 | 				do {
287 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
288 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
289 | 			}
290 | 
291 | 			/* Find next option */
292 | 			for (nextind = r; c == 1;) {
293 | 				nextind = ps.optind;
294 | 
295 | 				do {
296 | 					c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
297 | 				} while (ps.nextchar != NULL && *ps.nextchar != '\0');
298 | 			}
299 | 
300 | 			if (m < r) {
301 | 				change = 1;
302 | 				reverse(argv, l, m);
303 | 				reverse(argv, m, r);
304 | 				reverse(argv, l, r);
305 | 			}
306 | 		} while (c != -1);
307 | 	} while (change != 0);
308 | 
309 | 	return l + (r - m);
310 | }
311 | 
312 | int
313 | parg_reorder(int argc, char *argv[],
314 |              const char *optstring,
315 |              const struct parg_option *longopts)
316 | {
317 | 	struct parg_state ps;
318 | 	int lastind;
319 | 	int optend;
320 | 	int c;
321 | 
322 | 	assert(argv != NULL);
323 | 	assert(optstring != NULL);
324 | 
325 | 	if (argc < 2) {
326 | 		return argc;
327 | 	}
328 | 
329 | 	parg_init(&ps);
330 | 
331 | 	/* Find end of normal arguments */
332 | 	do {
333 | 		lastind = ps.optind;
334 | 
335 | 		c = parg_getopt_long(&ps, argc, argv, optstring, longopts, NULL);
336 | 
337 | 		/* Check for trailing option with error */
338 | 		if ((c == '?' || c == ':') && is_argv_end(&ps, argc, argv)) {
339 | 			lastind = ps.optind - 1;
340 | 			break;
341 | 		}
342 | 	} while (c != -1);
343 | 
344 | 	optend = parg_reorder_simple(lastind, argv, optstring, longopts);
345 | 
346 | 	/* Rotate `--` or trailing option with error into position */
347 | 	if (lastind < argc) {
348 | 		reverse(argv, optend, lastind);
349 | 		reverse(argv, optend, lastind + 1);
350 | 		++optend;
351 | 	}
352 | 
353 | 	return optend;
354 | }
355 | 


--------------------------------------------------------------------------------
/parg.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * parg - parse argv
  3 |  *
  4 |  * Written in 2015-2016 by Joergen Ibsen
  5 |  *
  6 |  * To the extent possible under law, the author(s) have dedicated all
  7 |  * copyright and related and neighboring rights to this software to the
  8 |  * public domain worldwide. This software is distributed without any
  9 |  * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
 10 |  */
 11 | 
 12 | #ifndef PARG_H_INCLUDED
 13 | #define PARG_H_INCLUDED
 14 | 
 15 | #ifdef __cplusplus
 16 | extern "C" {
 17 | #endif
 18 | 
 19 | #define PARG_VER_MAJOR 1        /**< Major version number */
 20 | #define PARG_VER_MINOR 0        /**< Minor version number */
 21 | #define PARG_VER_PATCH 2        /**< Patch version number */
 22 | #define PARG_VER_STRING "1.0.2" /**< Version number as a string */
 23 | 
 24 | /**
 25 |  * Structure containing state between calls to parser.
 26 |  *
 27 |  * @see parg_init
 28 |  */
 29 | struct parg_state {
 30 | 	const char *optarg;   /**< Pointer to option argument, if any */
 31 | 	int optind;           /**< Next index in argv to process */
 32 | 	int optopt;           /**< Option value resulting in error, if any */
 33 | 	const char *nextchar; /**< Next character to process */
 34 | };
 35 | 
 36 | /**
 37 |  * Structure for supplying long options to `parg_getopt_long()`.
 38 |  *
 39 |  * @see parg_getopt_long
 40 |  */
 41 | struct parg_option {
 42 | 	const char *name; /**< Name of option */
 43 | 	int has_arg;      /**< Option argument status */
 44 | 	int *flag;        /**< Pointer to flag variable */
 45 | 	int val;          /**< Value of option */
 46 | };
 47 | 
 48 | /**
 49 |  * Values for `has_arg` flag in `parg_option`.
 50 |  *
 51 |  * @see parg_option
 52 |  */
 53 | typedef enum {
 54 | 	PARG_NOARG,  /**< No argument */
 55 | 	PARG_REQARG, /**< Required argument */
 56 | 	PARG_OPTARG  /**< Optional argument */
 57 | } parg_arg_num;
 58 | 
 59 | /**
 60 |  * Initialize `ps`.
 61 |  *
 62 |  * Must be called before using state with a parser.
 63 |  *
 64 |  * @see parg_state
 65 |  *
 66 |  * @param ps pointer to state
 67 |  */
 68 | void
 69 | parg_init(struct parg_state *ps);
 70 | 
 71 | /**
 72 |  * Parse next short option in `argv`.
 73 |  *
 74 |  * Elements in `argv` that contain short options start with a single dash
 75 |  * followed by one or more option characters, and optionally an option
 76 |  * argument for the last option character. Examples are '`-d`', '`-ofile`',
 77 |  * and '`-dofile`'.
 78 |  *
 79 |  * Consecutive calls to this function match the command-line arguments in
 80 |  * `argv` against the short option characters in `optstring`.
 81 |  *
 82 |  * If an option character in `optstring` is followed by a colon, '`:`', the
 83 |  * option requires an argument. If it is followed by two colons, the option
 84 |  * may take an optional argument.
 85 |  *
 86 |  * If a match is found, `optarg` points to the option argument, if any, and
 87 |  * the value of the option character is returned.
 88 |  *
 89 |  * If a match is found, but is missing a required option argument, `optopt`
 90 |  * is set to the option character. If the first character in `optstring` is
 91 |  * '`:`', then '`:`' is returned, otherwise '`?`' is returned.
 92 |  *
 93 |  * If no option character in `optstring` matches a short option, `optopt`
 94 |  * is set to the option character, and '`?`' is returned.
 95 |  *
 96 |  * If an element of argv does not contain options (a nonoption element),
 97 |  * `optarg` points to the element, and `1` is returned.
 98 |  *
 99 |  * An element consisting of a single dash, '`-`', is returned as a nonoption.
100 |  *
101 |  * Parsing stops and `-1` is returned, when the end of `argv` is reached, or
102 |  * if an element contains '`--`'.
103 |  *
104 |  * Works similarly to `getopt`, if `optstring` were prefixed by '`-`'.
105 |  *
106 |  * @param ps pointer to state
107 |  * @param argc number of elements in `argv`
108 |  * @param argv array of pointers to command-line arguments
109 |  * @param optstring string containing option characters
110 |  * @return option value on match, `1` on nonoption element, `-1` on end of
111 |  * arguments, '`?`' on unmatched option, '`?`' or '`:`' on option argument
112 |  * error
113 |  */
114 | int
115 | parg_getopt(struct parg_state *ps, int argc, char *const argv[],
116 |             const char *optstring);
117 | 
118 | /**
119 |  * Parse next long or short option in `argv`.
120 |  *
121 |  * Elements in `argv` that contain a long option start with two dashes
122 |  * followed by a string, and optionally an equal sign and an option argument.
123 |  * Examples are '`--help`' and '`--size=5`'.
124 |  *
125 |  * If no exact match is found, an unambiguous prefix of a long option will
126 |  * match. For example, if '`foo`' and '`foobar`' are valid long options, then
127 |  * '`--fo`' is ambiguous and will not match, '`--foo`' matches exactly, and
128 |  * '`--foob`' is an unambiguous prefix and will match.
129 |  *
130 |  * If a long option match is found, and `flag` is `NULL`, `val` is returned.
131 |  *
132 |  * If a long option match is found, and `flag` is not `NULL`, `val` is stored
133 |  * in the variable `flag` points to, and `0` is returned.
134 |  *
135 |  * If a long option match is found, but is missing a required option argument,
136 |  * or has an option argument even though it takes none, `optopt` is set to
137 |  * `val` if `flag` is `NULL`, and `0` otherwise. If the first character in
138 |  * `optstring` is '`:`', then '`:`' is returned, otherwise '`?`' is returned.
139 |  *
140 |  * If `longindex` is not `NULL`, the index of the entry in `longopts` that
141 |  * matched is stored there.
142 |  *
143 |  * If no long option in `longopts` matches a long option, '`?`' is returned.
144 |  *
145 |  * Handling of nonoptions and short options is like `parg_getopt()`.
146 |  *
147 |  * If no short options are required, an empty string, `""`, should be passed
148 |  * as `optstring`.
149 |  *
150 |  * Works similarly to `getopt_long`, if `optstring` were prefixed by '`-`'.
151 |  *
152 |  * @see parg_getopt
153 |  *
154 |  * @param ps pointer to state
155 |  * @param argc number of elements in `argv`
156 |  * @param argv array of pointers to command-line arguments
157 |  * @param optstring string containing option characters
158 |  * @param longopts array of `parg_option` structures
159 |  * @param longindex pointer to variable to store index of matching option in
160 |  * @return option value on match, `0` for flag option, `1` on nonoption
161 |  * element, `-1` on end of arguments, '`?`' on unmatched or ambiguous option,
162 |  * '`?`' or '`:`' on option argument error
163 |  */
164 | int
165 | parg_getopt_long(struct parg_state *ps, int argc, char *const argv[],
166 |                  const char *optstring,
167 |                  const struct parg_option *longopts, int *longindex);
168 | 
169 | /**
170 |  * Reorder elements of `argv` so options appear first.
171 |  *
172 |  * If there are no long options, `longopts` may be `NULL`.
173 |  *
174 |  * The return value can be used as `argc` parameter for `parg_getopt()` and
175 |  * `parg_getopt_long()`.
176 |  *
177 |  * @param argc number of elements in `argv`
178 |  * @param argv array of pointers to command-line arguments
179 |  * @param optstring string containing option characters
180 |  * @param longopts array of `parg_option` structures
181 |  * @return index of first nonoption in `argv` on success, `-1` on error
182 |  */
183 | int
184 | parg_reorder(int argc, char *argv[],
185 |              const char *optstring,
186 |              const struct parg_option *longopts);
187 | 
188 | #ifdef __cplusplus
189 | } /* extern "C" */
190 | #endif
191 | 
192 | #endif /* PARG_H_INCLUDED */
193 | 


--------------------------------------------------------------------------------