├── tinyZZZ.exe ├── example.txt.zst ├── third-party └── lpaq8_official.exe ├── src ├── lz4C.h ├── lz4D.h ├── gzipC.h ├── lzmaC.h ├── lzmaD.h ├── zstdD.h ├── zipC.h ├── lpaq8CD.h ├── FileIO.h ├── FileIO.c ├── lz4D.c ├── lz4C.c ├── tinyZZZ_main.c ├── zipC.c ├── lzmaD.c ├── gzipC.c ├── zstdD.c └── lzmaC.c ├── LICENSE └── README.md /tinyZZZ.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXuan95/TinyZZZ/HEAD/tinyZZZ.exe -------------------------------------------------------------------------------- /example.txt.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXuan95/TinyZZZ/HEAD/example.txt.zst -------------------------------------------------------------------------------- /third-party/lpaq8_official.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXuan95/TinyZZZ/HEAD/third-party/lpaq8_official.exe -------------------------------------------------------------------------------- /src/lz4C.h: -------------------------------------------------------------------------------- 1 | #ifndef __LZ4_C_H__ 2 | #define __LZ4_C_H__ 3 | 4 | #include 5 | #include 6 | 7 | int lz4C (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); 8 | 9 | #endif // __LZ4_C_H__ 10 | -------------------------------------------------------------------------------- /src/lz4D.h: -------------------------------------------------------------------------------- 1 | #ifndef __LZ4_D_H__ 2 | #define __LZ4_D_H__ 3 | 4 | #include 5 | #include 6 | 7 | int lz4D (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); 8 | 9 | #endif // __LZ4_D_H__ 10 | -------------------------------------------------------------------------------- /src/gzipC.h: -------------------------------------------------------------------------------- 1 | #ifndef __GZIP_C_H__ 2 | #define __GZIP_C_H__ 3 | 4 | #include 5 | #include 6 | 7 | int gzipC (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); 8 | 9 | #endif // __GZIP_C_H__ 10 | -------------------------------------------------------------------------------- /src/lzmaC.h: -------------------------------------------------------------------------------- 1 | #ifndef __LZMA_C_H__ 2 | #define __LZMA_C_H__ 3 | 4 | #include 5 | #include 6 | 7 | int lzmaC (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); 8 | 9 | #endif // __LZMA_C_H__ 10 | -------------------------------------------------------------------------------- /src/lzmaD.h: -------------------------------------------------------------------------------- 1 | #ifndef __LZMA_D_H__ 2 | #define __LZMA_D_H__ 3 | 4 | #include 5 | #include 6 | 7 | int lzmaD (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); 8 | 9 | #endif // __LZMA_D_H__ 10 | -------------------------------------------------------------------------------- /src/zstdD.h: -------------------------------------------------------------------------------- 1 | #ifndef __ZSTD_D_H__ 2 | #define __ZSTD_D_H__ 3 | 4 | #include 5 | #include 6 | 7 | void zstdD (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); 8 | 9 | #endif // __ZSTD_D_H__ 10 | -------------------------------------------------------------------------------- /src/zipC.h: -------------------------------------------------------------------------------- 1 | #ifndef __ZIP_C_H__ 2 | #define __ZIP_C_H__ 3 | 4 | #include 5 | #include 6 | 7 | int zipClzma (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, const char *file_name_in_zip); 8 | int zipCdeflate (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, const char *file_name_in_zip); 9 | 10 | #endif // __ZIP_C_H__ 11 | -------------------------------------------------------------------------------- /src/lpaq8CD.h: -------------------------------------------------------------------------------- 1 | #ifndef __LPAQ8_CD_H__ 2 | #define __LPAQ8_CD_H__ 3 | 4 | #include 5 | #include 6 | 7 | int lpaq8D (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, uint8_t *p_level, size_t *p_mem_usage); 8 | int lpaq8C (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, uint8_t level, size_t *p_mem_usage); 9 | 10 | #endif // __LPAQ8_CD_H__ 11 | -------------------------------------------------------------------------------- /src/FileIO.h: -------------------------------------------------------------------------------- 1 | #ifndef __FILE_IO_H__ 2 | #define __FILE_IO_H__ 3 | 4 | #include 5 | #include 6 | 7 | 8 | // Function : read all data from file to a buffer. 9 | // Note : The buffer is malloc in this function and need to be free outside by user ! 10 | // Parameter : 11 | // size_t *p_len : getting the data length, i.e. the file length. 12 | // const char *filename : file name 13 | // Return : 14 | // non-NULL pointer : success. Return the data buffer pointer. The data length will be on *p_len 15 | // NULL : failed 16 | uint8_t *loadFromFile (size_t *p_len, const char *filename); 17 | 18 | 19 | // Function : write data from buffer to a file. 20 | // Parameter : 21 | // const uint8_t *p_buf : data buffer pointer 22 | // size_t len : data length 23 | // const char *filename : file name 24 | // Return : 25 | // 1 : failed 26 | // 0 : success 27 | int saveToFile (const uint8_t *p_buf, size_t len, const char *filename); 28 | 29 | 30 | #endif // __FILE_IO_H__ 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 https://github.com/WangXuan95 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/FileIO.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | 8 | // Function : read all data from file to a buffer. 9 | // Note : The buffer is malloc in this function and need to be free outside by user ! 10 | // Parameter : 11 | // size_t *p_len : getting the data length, i.e. the file length. 12 | // const char *filename : file name 13 | // Return : 14 | // non-NULL pointer : success. Return the data buffer pointer. The data length will be on *p_len 15 | // NULL : failed 16 | uint8_t *loadFromFile (size_t *p_len, const char *filename) { 17 | size_t rlen = 0; 18 | FILE *fp = NULL; 19 | uint8_t *p_buf = NULL; 20 | 21 | *p_len = 0; 22 | 23 | fp = fopen(filename, "rb"); 24 | 25 | if (fp == NULL) 26 | return NULL; 27 | 28 | if (0 != fseek(fp, 0, SEEK_END)) { 29 | fclose(fp); 30 | return NULL; 31 | } 32 | 33 | *p_len = ftell(fp); // get file data length 34 | 35 | if (0 != fseek(fp, 0, SEEK_SET)) { 36 | fclose(fp); 37 | return NULL; 38 | } 39 | 40 | if (*p_len == 0) { // special case : file length = 0 (empty file) 41 | fclose(fp); 42 | p_buf = (uint8_t*)malloc(1); // malloc a 1-byte buffer 43 | return p_buf; // directly return it without filling any data 44 | } 45 | 46 | p_buf = (uint8_t*)malloc((*p_len) + 65536); 47 | 48 | if (p_buf == NULL) { 49 | fclose(fp); 50 | return NULL; 51 | } 52 | 53 | rlen = fread(p_buf, sizeof(uint8_t), (*p_len), fp); 54 | 55 | fclose(fp); 56 | 57 | if (rlen != (*p_len)) { // actual readed length is not equal to expected readed length 58 | free(p_buf); 59 | return NULL; 60 | } 61 | 62 | return p_buf; 63 | } 64 | 65 | 66 | 67 | // Function : write data from buffer to a file. 68 | // Parameter : 69 | // const uint8_t *p_buf : data buffer pointer 70 | // size_t len : data length 71 | // const char *filename : file name 72 | // Return : 73 | // 1 : failed 74 | // 0 : success 75 | int saveToFile (const uint8_t *p_buf, size_t len, const char *filename) { 76 | size_t wlen = 0; 77 | FILE *fp; 78 | 79 | fp = fopen(filename, "wb"); 80 | 81 | if (fp == NULL) 82 | return 1; 83 | 84 | if (len > 0) 85 | wlen = fwrite(p_buf, sizeof(uint8_t), len, fp); 86 | 87 | fclose(fp); 88 | 89 | if (wlen != len) 90 | return 1; 91 | 92 | return 0; 93 | } 94 | 95 | -------------------------------------------------------------------------------- /src/lz4D.c: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // uint8_t, uint64_t 3 | 4 | #define R_OK 0 5 | #define R_DST_OVERFLOW 1 6 | #define R_SRC_OVERFLOW 2 7 | #define R_CORRUPT 3 8 | #define R_VERSION 4 9 | #define R_NOT_LZ4 5 10 | #define R_NOT_YET_SUPPORT 101 11 | 12 | #define RET_WHEN_ERR(err_code) { int ec = (err_code); if (ec) return ec; } 13 | #define RET_ERR_IF(err_code,condition) { if (condition) return err_code; } 14 | 15 | #define MAGIC_LZ4LEGACY 0x184C2102U 16 | #define MAGIC_LZ4FRAME 0x184D2204U 17 | #define MAGIC_SKIPFRAME_MIN 0x184D2A50U 18 | #define MAGIC_SKIPFRAME_MAX 0x184D2A5FU 19 | 20 | #define MIN_ML 4 21 | 22 | 23 | static int LZ4_skip (uint8_t **pp_src, uint8_t *p_src_limit, uint64_t n_bytes) { 24 | RET_ERR_IF(R_SRC_OVERFLOW, (n_bytes > p_src_limit - *pp_src)); 25 | (*pp_src) += n_bytes; 26 | return R_OK; 27 | } 28 | 29 | 30 | static int LZ4_read (uint8_t **pp_src, uint8_t *p_src_limit, uint64_t n_bytes, uint64_t *p_value) { 31 | uint64_t i; 32 | RET_ERR_IF(R_SRC_OVERFLOW, (n_bytes > p_src_limit - *pp_src)); 33 | (*p_value) = 0; 34 | for (i=0; i p_src_limit - *pp_src)); 44 | RET_ERR_IF(R_DST_OVERFLOW, (n_bytes > p_dst_limit - *pp_dst)); 45 | for (; n_bytes>0; n_bytes--) { 46 | **pp_dst = **pp_src; 47 | (*pp_src) ++; 48 | (*pp_dst) ++; 49 | } 50 | return R_OK; 51 | } 52 | 53 | 54 | static int LZ4_decompress_block (uint8_t **pp_src, uint8_t *p_src_limit, uint8_t **pp_dst, uint8_t *p_dst_limit, uint64_t block_csize) { 55 | RET_ERR_IF(R_SRC_OVERFLOW, (block_csize > p_src_limit - *pp_src)); 56 | p_src_limit = (*pp_src) + block_csize; 57 | for (;;) { 58 | uint8_t *p_match; 59 | uint64_t byte, ll, ml, of; 60 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 1, &byte)); 61 | ml = byte & 15; 62 | ll = byte >> 4; 63 | if (ll == 15) { 64 | do { 65 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 1, &byte)); 66 | ll += byte; 67 | } while (byte == 255); 68 | } 69 | RET_WHEN_ERR(LZ4_copy(pp_src, p_src_limit, pp_dst, p_dst_limit, ll)); // copy literals from src to dst 70 | if (*pp_src == p_src_limit) { 71 | break; 72 | } 73 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 2, &of)); 74 | RET_ERR_IF(R_CORRUPT, of==0); 75 | if (ml == 15) { 76 | do { 77 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 1, &byte)); 78 | ml += byte; 79 | } while (byte == 255); 80 | } 81 | p_match = (*pp_dst) - of; 82 | RET_WHEN_ERR(LZ4_copy(&p_match, p_dst_limit, pp_dst, p_dst_limit, (ml+MIN_ML))); // copy match 83 | } 84 | return R_OK; 85 | } 86 | 87 | 88 | static int LZ4_decompress_blocks_until_endmark (uint8_t **pp_src, uint8_t *p_src_limit, uint8_t **pp_dst, uint8_t *p_dst_limit, uint8_t block_checksum_flag) { 89 | uint64_t block_csize; 90 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 4, &block_csize)); 91 | while (block_csize != 0x00000000U) { 92 | if (block_csize < 0x80000000U) { 93 | RET_WHEN_ERR(LZ4_decompress_block(pp_src, p_src_limit, pp_dst, p_dst_limit, block_csize)); 94 | } else { 95 | block_csize -= 0x80000000U; 96 | RET_WHEN_ERR(LZ4_copy(pp_src, p_src_limit, pp_dst, p_dst_limit, block_csize)); 97 | } 98 | if (block_checksum_flag) { 99 | RET_WHEN_ERR(LZ4_skip(pp_src, p_src_limit, 4)); // block checksum, TODO: check it 100 | } 101 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 4, &block_csize)); 102 | } 103 | return R_OK; 104 | } 105 | 106 | 107 | static int LZ4_decompress_blocks_legacy (uint8_t **pp_src, uint8_t *p_src_limit, uint8_t **pp_dst, uint8_t *p_dst_limit) { 108 | while (*pp_src != p_src_limit) { 109 | uint64_t block_csize; 110 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 4, &block_csize)); 111 | if (block_csize == MAGIC_LZ4LEGACY || block_csize == MAGIC_LZ4FRAME || (MAGIC_SKIPFRAME_MIN <= block_csize && block_csize <= MAGIC_SKIPFRAME_MAX)) { // meeting known magic 112 | (*pp_src) -= 4; // give back 4 bytes to input stream 113 | break; 114 | } else { 115 | RET_WHEN_ERR(LZ4_decompress_block(pp_src, p_src_limit, pp_dst, p_dst_limit, block_csize)); 116 | } 117 | } 118 | return R_OK; 119 | } 120 | 121 | 122 | static int LZ4_parse_frame_descriptor (uint8_t **pp_src, uint8_t *p_src_limit, uint8_t *p_block_checksum_flag, uint8_t *p_content_checksum_flag, uint8_t *p_content_size_flag, uint64_t *p_content_size) { 123 | uint64_t bd_flg; 124 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 2, &bd_flg)); 125 | RET_ERR_IF(R_NOT_YET_SUPPORT, ((bd_flg & 1) != 0)); // currently do not support dictionary 126 | RET_ERR_IF(R_VERSION, (((bd_flg >> 1) & 1) != 0)); // reserved must be 0 127 | *p_content_checksum_flag = ((bd_flg >> 2) & 1); 128 | *p_content_size_flag = ((bd_flg >> 3) & 1); 129 | *p_block_checksum_flag = ((bd_flg >> 4) & 1); 130 | RET_ERR_IF(R_VERSION, (((bd_flg >> 6) & 3) != 1)); // version must be 1 131 | RET_ERR_IF(R_VERSION, (((bd_flg >> 8)&0xF) != 0)); // reserved must be 0 132 | RET_ERR_IF(R_VERSION, (((bd_flg >>12) & 7) < 4)); // Block MaxSize must be 4, 5, 6, 7 133 | RET_ERR_IF(R_VERSION, (((bd_flg >>15) & 1) != 0)); // reserved must be 0 134 | if (*p_content_size_flag) { 135 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 8, p_content_size)); 136 | } else { 137 | *p_content_size = 0; 138 | } 139 | RET_WHEN_ERR(LZ4_skip(pp_src, p_src_limit, 1)); // skip header checksum (HC) byte, TODO: check it 140 | return R_OK; 141 | } 142 | 143 | 144 | static int LZ4_decompress_frame (uint8_t **pp_src, uint8_t *p_src_limit, uint8_t **pp_dst, uint8_t *p_dst_limit) { 145 | uint64_t magic; 146 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 4, &magic)); 147 | if (magic == MAGIC_LZ4LEGACY) { 148 | RET_WHEN_ERR(LZ4_decompress_blocks_legacy(pp_src, p_src_limit, pp_dst, p_dst_limit)); 149 | } else if (magic == MAGIC_LZ4FRAME) { 150 | uint8_t block_checksum_flag, content_checksum_flag, content_size_flag; 151 | uint64_t content_size; 152 | uint8_t *p_dst_base = *pp_dst; 153 | RET_WHEN_ERR(LZ4_parse_frame_descriptor(pp_src, p_src_limit, &block_checksum_flag, &content_checksum_flag, &content_size_flag, &content_size)); 154 | RET_WHEN_ERR(LZ4_decompress_blocks_until_endmark(pp_src, p_src_limit, pp_dst, p_dst_limit, block_checksum_flag)); 155 | if (content_checksum_flag) { 156 | RET_WHEN_ERR(LZ4_skip(pp_src, p_src_limit, 4)); // content checksum, TODO: check it 157 | } 158 | if (content_size_flag) { 159 | RET_ERR_IF(R_CORRUPT, ((*pp_dst - p_dst_base) != content_size)); 160 | } 161 | } else if (MAGIC_SKIPFRAME_MIN <= magic && magic <= MAGIC_SKIPFRAME_MAX) { 162 | uint64_t skip_frame_len; 163 | RET_WHEN_ERR(LZ4_read(pp_src, p_src_limit, 4, &skip_frame_len)); 164 | RET_WHEN_ERR(LZ4_skip(pp_src, p_src_limit, skip_frame_len)); 165 | } else { 166 | RET_ERR_IF(R_NOT_LZ4, 1); 167 | } 168 | return R_OK; 169 | } 170 | 171 | 172 | int lz4D (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len) { 173 | uint8_t *p_src_curr = p_src; 174 | uint8_t *p_src_limit = p_src + src_len; 175 | uint8_t *p_dst_curr = p_dst; 176 | uint8_t *p_dst_limit = p_dst + (*p_dst_len); 177 | RET_ERR_IF(R_SRC_OVERFLOW, p_src_curr > p_src_limit); 178 | RET_ERR_IF(R_DST_OVERFLOW, p_dst_curr > p_dst_limit); 179 | while (p_src_curr < p_src_limit) { 180 | RET_WHEN_ERR(LZ4_decompress_frame(&p_src_curr, p_src_limit, &p_dst_curr, p_dst_limit)); 181 | } 182 | *p_dst_len = p_dst_curr - p_dst; 183 | return R_OK; 184 | } 185 | 186 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![language](https://img.shields.io/badge/language-C-green.svg) ![build](https://img.shields.io/badge/build-Windows-blue.svg) ![build](https://img.shields.io/badge/build-linux-FF1010.svg) 2 | 3 | TinyZZZ 4 | =========================== 5 | 6 | TinyZZZ is a simple, standalone data compressor/decompressor which supports several popular data compression algorithms, including [GZIP](https://www.rfc-editor.org/rfc/rfc1952), [LZ4](https://github.com/lz4/lz4), [ZSTD](https://github.com/facebook/zstd), [LZMA](https://www.7-zip.org/sdk.html) and [LPAQ8](https://mattmahoney.net/dc/#lpaq). These algorithms are written in C language, unlike the official code implementation, this code mainly focuses on simplicity and easy to understand. 7 | 8 | TinyZZZ currently supports following compression algorithms: 9 | 10 | | format | file suffix | compress | decompress | 11 | | :------------------------------------------------: | :---------: | :-------------------------------: | :-------------------------------: | 12 | | **[GZIP](https://www.rfc-editor.org/rfc/rfc1952)** | .gz | [510 lines of C](./src/gzipC.c) | :x: not yet supported | 13 | | **[LZ4](https://github.com/lz4/lz4)** | .lz4 | [170 lines of C](./src/lz4C.c) | [190 lines of C](./src/lz4D.c) | 14 | | **[ZSTD](https://github.com/facebook/zstd)** | .zst | :x: not yet supported | [760 lines of C](./src/zstdD.c) | 15 | | **[LZMA](https://www.7-zip.org/sdk.html)** | .lzma | [780 lines of C](./src/lzmaC.c) | [480 lines of C](./src/lzmaD.c) | 16 | | **[LPAQ8](https://mattmahoney.net/dc/#lpaq)** | .lpaq8 | [860 lines of C](./src/lpaq8CD.c) | [860 lines of C](./src/lpaq8CD.c) | 17 | 18 | Explanation: 19 | 20 | | format | year | Explanation | 21 | | :-----------------------------------------------------: | :--: | :----------------------------------------------------------- | 22 | | **[GZIP](https://www.rfc-editor.org/rfc/rfc1952)** | 1989 | GZIP is an old, famous lossless data compression algorithm which has excellent compatibility. The core compression algorithm of GZIP is [Deflate](https://www.rfc-editor.org/rfc/rfc1951). The file name suffix of compressed GZIP file is ".gz" | 23 | | **[LZ4](https://github.com/lz4/lz4)** | 2014 | LZ4 is a new, lightweight lossless data compression algorithm with very high decompression speed. The file name suffix of compressed LZ4 file is ".lz4" | 24 | | **[ZSTD](https://github.com/facebook/zstd)** | 2016 | ZSTD (Zstandard) is a new lossless data compression algorithm with high compression ratio and high decompression speed. The file name suffix of compressed ZSTD file is ".zstd" | 25 | | **[LZMA](https://www.7-zip.org/sdk.html)** | 2000 | LZMA is a lossless data compression algorithm with higher compression ratio than LZ4, GZIP, BZIP, and ZSTD. Several archive container formats supports LZMA: (1) ".lzma" is a very simple format to contain LZMA, which is legacy and gradually replaced by ".xz" format. (2) ".7z" and ".xz" format, whose default compression method is LZMA. | 26 | | **[LPAQ8](https://mattmahoney.net/dc/#lpaq)** | 2008 | LPAQ8 is a slow, high-compression-ratio lossless data compression algorithm by Alexander Rhatushnyak and Matt Mahoney. The basic principle of LPAQ8 is context-mixing instead of LZ77. You can download the official implement of LPAQ8 from https://mattmahoney.net/dc/lpaq8.zip . I've put [the official executable file lpaq8.exe](./third-party/lpaq8_official.exe) in this repo for comparison. | 27 | | **[ZIP](https://docs.fileformat.com/compression/zip/)** | 1989 | ZIP is not actually a data compression algorithm, but a container format that supports file packaging and compressing by many compression algorithms. This code supports compress a file to ZIP container by deflate algorithm or LZMA algorithm. | 28 | 29 |   30 | 31 |   32 | 33 | ## Linux Build 34 | 35 | On Linux, run following command to compile. The output Linux binary file is [tinyZZZ](./tinyZZZ) 36 | 37 | Note: The code complies with the C99 standard. 38 | 39 | ```bash 40 | gcc src/*.c -O2 -std=c99 -Wall -o tinyZZZ 41 | ``` 42 | 43 |   44 | 45 |   46 | 47 | ## Windows Build (MinGW) 48 | 49 | If you installed MinGW in Windows, run following command to compile. The output executable file is [tinyZZZ.exe](./tinyZZZ.exe) 50 | 51 | ```powershell 52 | gcc src\*.c -O2 -std=c99 -Wall -o tinyZZZ.exe 53 | ``` 54 | 55 |   56 | 57 |   58 | 59 | ## Windows Build (MSVC) 60 | 61 | If you added MSVC compiler (cl.exe) to environment, run following command to compile. The output executable file is [tinyZZZ.exe](./tinyZZZ.exe) 62 | 63 | ```powershell 64 | cl src\*.c /Ox /FetinyZZZ.exe 65 | ``` 66 | 67 |   68 | 69 |   70 | 71 | ## Usage 72 | 73 | Run TinyZZZ to show usage: 74 | 75 | ``` 76 | └─$ ./tinyZZZ 77 | |-------------------------------------------------------------------------------------------| 78 | | Usage : | 79 | | - decompress a GZIP file : *** not yet supported! *** | 80 | | - compress a file to GZIP file : tinyZZZ -c --gzip | 81 | | - decompress a LZ4 file : tinyZZZ -d --lz4 | 82 | | - compress a file to LZ4 file : tinyZZZ -c --lz4 | 83 | | - decompress a ZSTD file : tinyZZZ -d --zstd | 84 | | - compress a file to ZSTD file : *** not yet supported! *** | 85 | | - decompress a LZMA file : tinyZZZ -d --lzma | 86 | | - compress a file to LZMA file : tinyZZZ -c --lzma | 87 | | - decompress a LPAQ8 file : tinyZZZ -d --lpaq8 | 88 | | - compress a file to LPAQ8 file: tinyZZZ -c --lpaq8 | 89 | |-------------------------------------------------------------------------------------------| 90 | | Usage (compress to ZIP container) : | 91 | | - use Deflate method : tinyZZZ -c --gzip --zip | 92 | | - use LZMA method : tinyZZZ -c --lzma --zip | 93 | |-------------------------------------------------------------------------------------------| 94 | ``` 95 | 96 |   97 | 98 | ### Example Usage 99 | 100 | **Example1**: decompress the file `example.txt.zst` to `example.txt` use following command. 101 | 102 | ```bash 103 | ./tinyZZZ -d --zstd example.txt.zst example.txt 104 | ``` 105 | 106 | **Example2**: compress `example.txt` to `example.txt.gz` use following command. The outputting ".gz" file can be extracted by many other software, such as [7ZIP](https://www.7-zip.org), [WinRAR](https://www.rarlab.com/), etc. 107 | 108 | ```bash 109 | ./tinyZZZ -c --gzip example.txt example.txt.gz 110 | ``` 111 | 112 | **Example3**: compress `example.txt` to `example.txt.lzma` use following command. 113 | 114 | ```bash 115 | ./tinyZZZ -c --lzma example.txt example.txt.lzma 116 | ``` 117 | 118 | **Example4**: decompress `example.txt.lzma` to `example.txt` use following command. 119 | 120 | ```bash 121 | ./tinyZZZ -d --lzma example.txt.lzma example.txt 122 | ``` 123 | 124 | **Example5**: compress `example.txt` to `example.txt.lz4` use following command. 125 | 126 | ```bash 127 | ./tinyZZZ -c --lz4 example.txt example.txt.lz4 128 | ``` 129 | 130 | **Example6**: decompress `example.txt.lz4` to `example.txt` use following command. 131 | 132 | ```bash 133 | ./tinyZZZ -d --lz4 example.txt.lz4 example.txt 134 | ``` 135 | 136 | **Example7**: compress `example.txt` to `example.zip` use following command (method=deflate). The outputting ".zip" file can be extracted by many other software, such as [7ZIP](https://www.7-zip.org), [WinRAR](https://www.rarlab.com/), etc. 137 | 138 | ```bash 139 | ./tinyZZZ -c --gzip --zip example.txt example.zip 140 | ``` 141 | 142 | **Example8**: compress `example.txt` to `example.zip` use following command (method=LZMA). The outputting ".zip" file can be extracted by many other software, such as [7ZIP](https://www.7-zip.org), [WinRAR](https://www.rarlab.com/), etc. 143 | 144 | ```bash 145 | ./tinyZZZ -c --lzma --zip example.txt example.zip 146 | ``` 147 | 148 |   149 | 150 |   151 | 152 | ## Appendix: How to decompress ".lzma" file 153 | 154 | #### on Windows 155 | 156 | On Windows, you can use the [official 7ZIP/LZMA software](https://www.7-zip.org/sdk.html) to decompress the generated ".lzma" file. To get it, download the "LZMA SDK", extract it. In the "bin" directory, you can see "lzma.exe". To decompress a ".lzma" file, run command as format: 157 | 158 | ```powershell 159 | .\lzma.exe d 160 | ``` 161 | 162 | #### on Linux 163 | 164 | On Linux, you can decompress ".lzma" file using the official "p7zip" software. You should firstly install it: 165 | 166 | ```bash 167 | apt-get install p7zip 168 | ``` 169 | 170 | Then use following command to decompress the ".lzma" file. It may report a error : *"ERROR: There are some data after the end of the payload data"* . Just ignore it, because there may be a extra "0x00" at the end of ".lzma" file. It won't affect the normal data decompression. 171 | 172 | ```bash 173 | 7z x [input_lzma_file] 174 | ``` 175 | 176 |   177 | 178 |   179 | 180 | ## Related Links 181 | 182 | - GZIP specification: https://www.rfc-editor.org/rfc/rfc1951 183 | - Deflate algorithm specification: https://www.rfc-editor.org/rfc/rfc1952 184 | 185 | - LZ4 official code: https://github.com/lz4/lz4 186 | 187 | - LZ4 specification: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md , https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md 188 | 189 | - ZSTD specification: https://www.rfc-editor.org/rfc/rfc8878 190 | 191 | - ZSTD official code: https://github.com/facebook/zstd 192 | 193 | - ZSTD official lightweight decompressor: https://github.com/facebook/zstd/tree/dev/doc/educational_decoder 194 | 195 | - LZMA official code and the 7ZIP software: https://www.7-zip.org/sdk.html 196 | - another LZMA official code and the XZ software: https://tukaani.org/xz/ 197 | 198 | - An introduction to LZMA algorithm: https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm 199 | 200 | - An FPGA-based hardware GZIP data compressor: https://github.com/WangXuan95/FPGA-Gzip-compressor 201 | 202 | - An FPGA-based hardware LZMA data compressor: https://github.com/WangXuan95/FPGA-LZMA-compressor 203 | 204 | - LPAQ8 official code : https://mattmahoney.net/dc/#lpaq 205 | 206 | - principle of context-mixing and PAQ : https://mattmahoney.net/dc/dce.html#Section_43 207 | 208 | -------------------------------------------------------------------------------- /src/lz4C.c: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // uint8_t, uint64_t 3 | #include // malloc, free 4 | 5 | 6 | #define R_OK 0 7 | #define R_DST_OVERFLOW 1 8 | #define R_SRC_OVERFLOW 2 9 | #define R_MEM_RANOUT 3 10 | 11 | #define RET_WHEN_ERR(err_code) { int ec = (err_code); if (ec) return ec; } 12 | #define RET_ERR_IF(err_code,condition) { if (condition) return err_code; } 13 | 14 | #define MIN_COMPRESSED_BLOCK_SIZE 13 15 | #define MAX_COMPRESSED_BLOCK_SIZE 4194304 16 | 17 | #define ML_MIN 4 18 | #define ML_MAX 65535 19 | #define ML_SKIP_THRESH 36 20 | #define OF_MIN 1 21 | #define OF_MAX 65535 22 | #define N_HASH 6543 23 | #define N_HASH1_ROW 16 24 | #define N_HASH2_ROW 16 25 | #define N_HASH3_ROW 4 26 | #define COST_LIT 1 27 | 28 | static size_t LZ4_cost_match (/*size_t ll,*/ uint16_t ml) { 29 | size_t cost = 3; 30 | // if (ll >= 15) { 31 | // ll -= 15; 32 | // cost += 1; 33 | // } 34 | // for (; ll>=255; ll-=255) cost ++; 35 | ml -= ML_MIN; 36 | if (ml >= 15) { 37 | ml -= 15; 38 | cost += 1; 39 | } 40 | for (; ml>=255; ml-=255) cost ++; 41 | return cost; 42 | } 43 | 44 | 45 | static int LZ4_write (uint8_t **pp_dst, uint8_t *p_dst_limit, uint8_t byte) { 46 | RET_ERR_IF(R_DST_OVERFLOW, (*pp_dst >= p_dst_limit)); 47 | *((*pp_dst)++) = byte; 48 | return R_OK; 49 | } 50 | 51 | 52 | static int LZ4_write_vlc (uint8_t **pp_dst, uint8_t *p_dst_limit, uint64_t value) { 53 | for (;;) { 54 | if (value < 255) { 55 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, value)); 56 | break; 57 | } else { 58 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 255)); 59 | value -= 255; 60 | } 61 | } 62 | return R_OK; 63 | } 64 | 65 | 66 | static int LZ4_copy (uint8_t *p_src, uint8_t *p_src_end, uint8_t **pp_dst, uint8_t *p_dst_limit) { 67 | RET_ERR_IF(R_DST_OVERFLOW, (p_src_end - p_src > p_dst_limit - *pp_dst)); 68 | for (; p_src>8)); 89 | ml -= ML_MIN; 90 | if (ml < 15) { 91 | (*p_token) |= ml; 92 | } else { 93 | (*p_token) |= 15; 94 | RET_WHEN_ERR(LZ4_write_vlc(pp_dst, p_dst_limit, ml-15)); 95 | } 96 | } 97 | return R_OK; 98 | } 99 | 100 | 101 | typedef struct { 102 | uint16_t ml [MAX_COMPRESSED_BLOCK_SIZE]; 103 | uint16_t of [MAX_COMPRESSED_BLOCK_SIZE]; 104 | union { 105 | struct { 106 | const uint8_t* hash1_tab [N_HASH] [N_HASH1_ROW]; 107 | const uint8_t* hash2_tab [N_HASH] [N_HASH2_ROW]; 108 | const uint8_t* hash3_tab [N_HASH] [N_HASH3_ROW]; 109 | }; 110 | size_t cost [MAX_COMPRESSED_BLOCK_SIZE+1]; 111 | }; 112 | } LZ4_compress_workspace_t; 113 | 114 | 115 | static int LZ4_compress_block (uint8_t *p_src, uint8_t *p_src_end, uint8_t **pp_dst, uint8_t *p_dst_limit, LZ4_compress_workspace_t *wksp) { 116 | size_t src_len = p_src_end - p_src; 117 | size_t i, skip=0; 118 | int k; 119 | 120 | // memset(wksp->hash1_tab, 0, sizeof(wksp->hash1_tab)); 121 | // memset(wksp->hash2_tab, 0, sizeof(wksp->hash2_tab)); 122 | 123 | for (i=0; i+MIN_COMPRESSED_BLOCK_SIZE+1ml[i] = 0; 132 | 133 | if (skip > 0) { 134 | skip --; 135 | } else { 136 | size_t len_max = src_len - (MIN_COMPRESSED_BLOCK_SIZE+1) - i; 137 | if (len_max > ML_MAX) { 138 | len_max = ML_MAX; 139 | } 140 | 141 | for (k=0; k<(N_HASH1_ROW+N_HASH2_ROW+N_HASH3_ROW); k++) { 142 | const uint8_t *p_match = (k < N_HASH1_ROW) ? wksp->hash1_tab[hash1][k] : 143 | (k < N_HASH1_ROW+N_HASH2_ROW) ? wksp->hash2_tab[hash2][k-N_HASH1_ROW] : 144 | wksp->hash3_tab[hash3][k-N_HASH1_ROW-N_HASH2_ROW] ; 145 | if (p_src <= p_match && p_match < p) { 146 | size_t of = p - p_match; 147 | if (OF_MIN <= of && of <= OF_MAX) { 148 | size_t ml; 149 | for (ml=0; ml= ML_MIN && ml > wksp->ml[i]) { 151 | wksp->ml[i] = ml; 152 | wksp->of[i] = of; 153 | if (ml >= ML_SKIP_THRESH) { 154 | skip = ml - 1; 155 | } 156 | } 157 | } 158 | } 159 | } 160 | } 161 | 162 | for (k=0; khash1_tab[hash1][k] = wksp->hash1_tab[hash1][k+1]; 164 | } 165 | wksp->hash1_tab[hash1][N_HASH1_ROW-1] = p; 166 | 167 | for (k=0; khash2_tab[hash2][k] = wksp->hash2_tab[hash2][k+1]; 169 | } 170 | wksp->hash2_tab[hash2][N_HASH2_ROW-1] = p; 171 | 172 | for (k=0; khash3_tab[hash3][k] = wksp->hash3_tab[hash3][k+1]; 174 | } 175 | wksp->hash3_tab[hash3][N_HASH3_ROW-1] = p; 176 | } 177 | 178 | for (; iml[i] = 0; 180 | } 181 | 182 | wksp->cost[src_len] = 0; 183 | 184 | for (i=src_len; i>0;) { 185 | i --; 186 | wksp->cost[i] = wksp->cost[i+1] + COST_LIT; 187 | { 188 | uint16_t ml = wksp->ml[i]; 189 | if (ml > 0) { 190 | size_t match_cost = wksp->cost[i+ml] + LZ4_cost_match(ml); 191 | if (wksp->cost[i] > match_cost) { 192 | wksp->cost[i] = match_cost; 193 | } else { 194 | wksp->ml[i] = 0; 195 | wksp->of[i] = 0; 196 | } 197 | } 198 | } 199 | } 200 | 201 | { 202 | uint8_t *p_last_start = p_src; 203 | for (i=0; iml[i] > 0) { 205 | RET_WHEN_ERR(LZ4_compress_seqence(p_last_start, &p_src[i], wksp->ml[i], wksp->of[i], pp_dst, p_dst_limit)); 206 | i += wksp->ml[i]; 207 | p_last_start = p_src + i; 208 | i --; 209 | } 210 | } 211 | RET_WHEN_ERR(LZ4_compress_seqence(p_last_start, &p_src[src_len], 0, 0, pp_dst, p_dst_limit)); 212 | } 213 | 214 | return R_OK; 215 | } 216 | 217 | 218 | static int LZ4_compress_or_copy_block_with_csize (uint8_t *p_src, uint8_t *p_src_end, uint8_t **pp_dst, uint8_t *p_dst_limit, LZ4_compress_workspace_t *wksp) { 219 | uint64_t csize = p_src_end - p_src; 220 | uint8_t *p_dst_base = (*pp_dst) + 4; 221 | RET_ERR_IF(R_DST_OVERFLOW, (p_dst_limit-(*pp_dst) < 4)); 222 | (*pp_dst) += 4; 223 | if (csize <= MIN_COMPRESSED_BLOCK_SIZE) { 224 | RET_WHEN_ERR(LZ4_copy(p_src, p_src_end, pp_dst, p_dst_limit)); 225 | csize |= 0x80000000U; 226 | } else { 227 | RET_WHEN_ERR(LZ4_compress_block(p_src, p_src_end, pp_dst, p_dst_limit, wksp)); 228 | if (csize > (*pp_dst) - p_dst_base) { 229 | csize = (*pp_dst) - p_dst_base; 230 | } else { 231 | *pp_dst = p_dst_base; 232 | RET_WHEN_ERR(LZ4_copy(p_src, p_src_end, pp_dst, p_dst_limit)); 233 | csize |= 0x80000000U; 234 | } 235 | } 236 | p_dst_base[-4] = 0xFF & (csize ); 237 | p_dst_base[-3] = 0xFF & (csize >> 8); 238 | p_dst_base[-2] = 0xFF & (csize >> 16); 239 | p_dst_base[-1] = 0xFF & (csize >> 24); 240 | return R_OK; 241 | } 242 | 243 | 244 | int lz4C (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len) { 245 | uint8_t *p_src_limit = p_src + src_len; 246 | uint8_t *p_dst_tmp = p_dst; 247 | uint8_t **pp_dst = &p_dst_tmp; 248 | uint8_t *p_dst_limit = p_dst + (*p_dst_len); 249 | LZ4_compress_workspace_t *wksp = (LZ4_compress_workspace_t*)malloc(sizeof(LZ4_compress_workspace_t)); 250 | RET_ERR_IF(R_MEM_RANOUT, (wksp==NULL)); 251 | RET_ERR_IF(R_SRC_OVERFLOW, p_src > p_src_limit); 252 | RET_ERR_IF(R_DST_OVERFLOW, p_dst > p_dst_limit); 253 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x04)); 254 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x22)); 255 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x4D)); 256 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x18)); 257 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x60)); 258 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x70)); // block size, 0x40=64kB, 0x50=256kB, 0x60=1MB, 0x70=4MB 259 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x73)); 260 | while (p_src < p_src_limit) { 261 | uint8_t *p_src_end = p_src_limit; // block end 262 | if (p_src_end - p_src > MAX_COMPRESSED_BLOCK_SIZE) { 263 | p_src_end = p_src + MAX_COMPRESSED_BLOCK_SIZE; 264 | } 265 | RET_WHEN_ERR(LZ4_compress_or_copy_block_with_csize(p_src, p_src_end, pp_dst, p_dst_limit, wksp)); 266 | p_src = p_src_end; 267 | } 268 | free(wksp); 269 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x00)); 270 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x00)); 271 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x00)); 272 | RET_WHEN_ERR(LZ4_write(pp_dst, p_dst_limit, 0x00)); 273 | *p_dst_len = (*pp_dst) - p_dst; 274 | return R_OK; 275 | } 276 | 277 | -------------------------------------------------------------------------------- /src/tinyZZZ_main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "FileIO.h" 7 | 8 | #include "gzipC.h" 9 | #include "lz4D.h" 10 | #include "lz4C.h" 11 | #include "zstdD.h" 12 | #include "lzmaD.h" 13 | #include "lzmaC.h" 14 | #include "lpaq8CD.h" 15 | #include "zipC.h" 16 | 17 | 18 | 19 | const char *USAGE = 20 | "|-------------------------------------------------------------------------------------------|\n" 21 | "| TinyZZZ v0.5 https://github.com/WangXuan95/TinyZZZ |\n" 22 | "| TinyZZZ is a simple, standalone data compressor/decompressor with several popular data |\n" 23 | "| compression algorithms, which are written in C language (C99). Unlike the official |\n" 24 | "| implementations, this code mainly focuses on simplicity and easy to understand. |\n" 25 | "|-------------------------------------------------------------------------------------------|\n" 26 | "| currently support: |\n" 27 | "| - GZIP compress |\n" 28 | "| - LZ4 decompress and compress |\n" 29 | "| - ZSTD decompress |\n" 30 | "| - LZMA decompress and compress |\n" 31 | "| - LPAQ8 decompress and compress |\n" 32 | "| - compress a file to ZIP container file using deflate (GZIP) method or LZMA method |\n" 33 | "|-------------------------------------------------------------------------------------------|\n" 34 | "| Usage : |\n" 35 | "| - decompress a GZIP file : *** not yet supported! *** |\n" 36 | "| - compress a file to GZIP file : tinyZZZ -c --gzip |\n" 37 | "| - decompress a LZ4 file : tinyZZZ -d --lz4 |\n" 38 | "| - compress a file to LZ4 file : tinyZZZ -c --lz4 |\n" 39 | "| - decompress a ZSTD file : tinyZZZ -d --zstd |\n" 40 | "| - compress a file to ZSTD file : *** not yet supported! *** |\n" 41 | "| - decompress a LZMA file : tinyZZZ -d --lzma |\n" 42 | "| - compress a file to LZMA file : tinyZZZ -c --lzma |\n" 43 | "| - decompress a LPAQ8 file : tinyZZZ -d --lpaq8 |\n" 44 | "| - compress a file to LPAQ8 file: tinyZZZ -c --lpaq8 |\n" 45 | "|-------------------------------------------------------------------------------------------|\n" 46 | "| Usage (compress to ZIP container) : |\n" 47 | "| - use Deflate method : tinyZZZ -c --gzip --zip |\n" 48 | "| - use LZMA method : tinyZZZ -c --lzma --zip |\n" 49 | "|-------------------------------------------------------------------------------------------|\n"; 50 | 51 | 52 | 53 | 54 | /// remove a filename's path prefix. e.g., if fname is "a/b/c.txt", we will get "c.txt" 55 | static void removeDirectoryPathFromFileName (char *fname) { 56 | char *p = fname; 57 | char *q = fname; 58 | for (; *p; p++) { 59 | *q = *p; 60 | if (*p == '/' || *p == '\\') // '/' is file sep of linux, '\' is file sep of windows 61 | q = fname; // back to base 62 | else 63 | q ++; 64 | }; 65 | *q = '\0'; 66 | } 67 | 68 | 69 | 70 | #define IS_64b_SYSTEM (sizeof(size_t) == 8) 71 | 72 | 73 | 74 | int main (int argc, char **argv) { 75 | 76 | enum {ACTION_NONE, COMPRESS, DECOMPRESS} type_action = ACTION_NONE; 77 | enum {FORMAT_NONE, GZIP, LZ4, ZSTD, LZMA, LPAQ8} type_format = FORMAT_NONE; 78 | enum {NATIVE, ZIP} type_container = NATIVE; 79 | 80 | char *fname_src=NULL, *fname_dst=NULL; 81 | uint8_t *p_src , *p_dst; 82 | size_t src_len , dst_len , MAX_DST_LEN = IS_64b_SYSTEM ? 0x80000000 : 0x20000000; 83 | int ret_code = 0; 84 | uint8_t compress_level = 2; 85 | 86 | 87 | // parse command line -------------------------------------------------------------------------------------------------- 88 | for (argc--; argc>=1; argc--) { // for all argv (inversely) 89 | char *arg = argv[argc]; 90 | if (arg[0] == '-') { 91 | if (strcmp(arg, "-c" ) == 0) { 92 | type_action = COMPRESS; 93 | } else if (strcmp(arg, "-d" ) == 0) { 94 | type_action = DECOMPRESS; 95 | } else if (strcmp(arg, "--gzip") == 0) { 96 | type_format = GZIP; 97 | } else if (strcmp(arg, "--lz4" ) == 0) { 98 | type_format = LZ4; 99 | } else if (strcmp(arg, "--zstd") == 0) { 100 | type_format = ZSTD; 101 | } else if (strcmp(arg, "--lzma") == 0) { 102 | type_format = LZMA; 103 | } else if (strcmp(arg, "--lpaq8") == 0) { 104 | type_format = LPAQ8; 105 | } else if (strcmp(arg, "--zip" ) == 0) { 106 | type_container = ZIP; 107 | } else if ('0' <= arg[1] && arg[1] <= '9') { 108 | compress_level = arg[1] - '0'; 109 | } else { 110 | printf(USAGE); // unknown switch 111 | return -1; 112 | } 113 | } else if (fname_dst == NULL) { 114 | fname_dst = arg; // get destination file name 115 | } else if (fname_src == NULL) { 116 | fname_src = arg; // get source file name 117 | } else { 118 | printf(USAGE); // too many file name 119 | return -1; 120 | } 121 | } 122 | 123 | if (fname_dst == NULL || fname_src == NULL || type_action == ACTION_NONE || type_format == FORMAT_NONE) { 124 | printf(USAGE); // insufficient file name 125 | return -1; 126 | } 127 | 128 | printf("input file name = %s\n", fname_src); 129 | printf("output file name = %s\n", fname_dst); 130 | 131 | 132 | // read source file -------------------------------------------------------------------------------------------------- 133 | p_src = loadFromFile(&src_len, fname_src); 134 | if (p_src == NULL) { 135 | printf("*** error : load file %s failed\n", fname_src); 136 | return -1; 137 | } 138 | printf("input length = %lu\n", src_len); 139 | 140 | 141 | // estimate destination size, and allocate destination buffer -------------------------------------------------------- 142 | switch (type_action) { 143 | case COMPRESS : 144 | dst_len = src_len + (src_len>>3) + 1048576; // estimate maximum compressed size based on original size 145 | break; 146 | case DECOMPRESS : 147 | dst_len = MAX_DST_LEN; 148 | break; 149 | case ACTION_NONE : 150 | printf(USAGE); 151 | return -1; 152 | } 153 | 154 | if (dst_len > MAX_DST_LEN) { 155 | dst_len = MAX_DST_LEN; 156 | } 157 | 158 | p_dst = (uint8_t*)malloc(dst_len); 159 | 160 | if (p_dst == NULL) { 161 | printf("*** error : allocate destination buffer failed\n"); 162 | return -1; 163 | } 164 | 165 | 166 | // do compress / decompress -------------------------------------------------------------------------------------------- 167 | switch (type_format) { 168 | case GZIP : { 169 | if (type_action == DECOMPRESS) { 170 | printf("*** error : GZIP decompress is not yet supported\n"); 171 | return -1; 172 | } else if (type_container != ZIP) { 173 | ret_code = gzipC(p_src, src_len, p_dst, &dst_len); 174 | } else { 175 | removeDirectoryPathFromFileName(fname_src); 176 | ret_code = zipCdeflate(p_src, src_len, p_dst, &dst_len, fname_src); 177 | } 178 | break; 179 | } 180 | case LZMA : { 181 | if (type_action == DECOMPRESS) { 182 | ret_code = lzmaD(p_src, src_len, p_dst, &dst_len); 183 | } else if (type_container != ZIP) { 184 | ret_code = lzmaC(p_src, src_len, p_dst, &dst_len); 185 | } else { 186 | removeDirectoryPathFromFileName(fname_src); 187 | ret_code = zipClzma(p_src, src_len, p_dst, &dst_len, fname_src); 188 | } 189 | break; 190 | } 191 | case LZ4 : { 192 | if (type_action == DECOMPRESS) { 193 | ret_code = lz4D(p_src, src_len, p_dst, &dst_len); 194 | } else if (type_container != ZIP) { 195 | ret_code = lz4C(p_src, src_len, p_dst, &dst_len); 196 | } else { 197 | printf("*** error : LZ4 compress to ZIP is not supported\n"); 198 | return -1; 199 | } 200 | break; 201 | } 202 | case ZSTD : { 203 | if (type_action == DECOMPRESS) { 204 | zstdD(p_src, src_len, p_dst, &dst_len); 205 | } else { 206 | printf("*** error : ZSTD compress is not yet supported\n"); 207 | return -1; 208 | } 209 | break; 210 | } 211 | case LPAQ8 : { 212 | size_t mem_usage = 0; 213 | if (type_action == DECOMPRESS) { 214 | ret_code = lpaq8D(p_src, src_len, p_dst, &dst_len, &compress_level, &mem_usage); 215 | } else if (type_container != ZIP) { 216 | ret_code = lpaq8C(p_src, src_len, p_dst, &dst_len, compress_level, &mem_usage); 217 | } else { 218 | printf("*** error : LPAQ8 compress to ZIP is not supported\n"); 219 | return -1; 220 | } 221 | printf("compress level = %d\n", (int)compress_level); 222 | printf("memory usage = %lu\n", mem_usage); 223 | break; 224 | } 225 | case FORMAT_NONE : { 226 | printf(USAGE); 227 | return -1; 228 | } 229 | } 230 | 231 | if (ret_code) { 232 | printf("*** error : failed (return_code = %d)\n", ret_code); 233 | return ret_code; 234 | } 235 | 236 | 237 | free(p_src); 238 | 239 | printf("output length = %lu\n", dst_len); 240 | 241 | { size_t decomp_size = (type_action==COMPRESS) ? src_len : dst_len; 242 | double time = (double)clock() / CLOCKS_PER_SEC; 243 | double speed = (0.001*decomp_size) / (time + 0.00000001); 244 | printf("time consumed = %.3f sec (%.0f kB/s)\n", time, speed); 245 | } 246 | 247 | if (saveToFile(p_dst, dst_len, fname_dst)) { 248 | printf("*** error : save file %s failed\n", fname_dst); 249 | return -1; 250 | } 251 | 252 | free(p_dst); 253 | 254 | return 0; 255 | } 256 | 257 | -------------------------------------------------------------------------------- /src/zipC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | int writeZipLzmaProperty (uint8_t *p_dst, size_t *p_dst_len); // lzmaC.c 6 | int lzmaEncode (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, uint8_t with_end_mark); // lzmaC.c 7 | int deflateEncode (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len); // gzipC.c 8 | 9 | 10 | #define R_OK 0 11 | #define R_ERR_UNSUPPORTED 2 12 | #define R_ERR_OUTPUT_OVERFLOW 3 13 | 14 | #define RET_WHEN_ERR(err_code) { int ec = (err_code); if (ec) return ec; } 15 | 16 | 17 | static size_t getStringLength (const char *string) { 18 | size_t i; 19 | for (i=0; *string; string++, i++); 20 | return i; 21 | } 22 | 23 | 24 | #define ZIP_HEADER_LEN_EXCLUDE_FILENAME 30 25 | #define ZIP_FOOTER_LEN_EXCLUDE_FILENAME (46 + 22) 26 | 27 | #define FILE_NAME_IN_ZIP_MAX_LEN ((size_t)0xFF00) 28 | #define ZIP_UNCOMPRESSED_MAX_LEN ((size_t)0xFFFF0000) 29 | #define ZIP_COMPRESSED_MAX_LEN ((size_t)0xFFFF0000) 30 | 31 | #define COMP_METHOD_LZMA 0x0E 32 | #define COMP_METHOD_DEFLATE 0x08 33 | 34 | 35 | static int writeZipHeader (uint8_t *p_dst, size_t *p_dst_len, uint32_t crc, size_t compressed_len, size_t uncompressed_len, const char *file_name, uint8_t comp_method) { 36 | size_t i; 37 | const size_t file_name_len = getStringLength(file_name); 38 | 39 | if (file_name_len > FILE_NAME_IN_ZIP_MAX_LEN) 40 | return R_ERR_UNSUPPORTED; 41 | 42 | if (uncompressed_len > ZIP_UNCOMPRESSED_MAX_LEN) // ".zip" format don't support uncompressed size > 32-bit integer 43 | return R_ERR_UNSUPPORTED; 44 | 45 | if (compressed_len > ZIP_COMPRESSED_MAX_LEN) // ".zip" format don't support compressed size > 32-bit integer 46 | return R_ERR_UNSUPPORTED; 47 | 48 | if (*p_dst_len < ZIP_HEADER_LEN_EXCLUDE_FILENAME + file_name_len) // no enough space for writing ZIP header 49 | return R_ERR_OUTPUT_OVERFLOW; 50 | 51 | *p_dst_len = ZIP_HEADER_LEN_EXCLUDE_FILENAME + file_name_len; 52 | 53 | // Local File Header ---------------------------------------------------- 54 | *(p_dst++) = 0x50; // 0~3 Local file header signature # 0x04034b50 (read as a little-endian number) 55 | *(p_dst++) = 0x4B; 56 | *(p_dst++) = 0x03; 57 | *(p_dst++) = 0x04; 58 | *(p_dst++) = 0x3F; // 4~5 Version needed to extract (minimum) 59 | *(p_dst++) = 0x00; 60 | *(p_dst++) = 0x00; // 6~7 General purpose bit flag 61 | *(p_dst++) = 0x00; 62 | *(p_dst++) = comp_method; // 8~9 Compression method 63 | *(p_dst++) = 0x00; 64 | *(p_dst++) = 0x00; // 10~11 File last modification time 65 | *(p_dst++) = 0x00; 66 | *(p_dst++) = 0x00; // 12~13 File last modification date 67 | *(p_dst++) = 0x00; 68 | *(p_dst++) = (uint8_t)(crc >> 0); // 14~17 CRC-32 69 | *(p_dst++) = (uint8_t)(crc >> 8); 70 | *(p_dst++) = (uint8_t)(crc >>16); 71 | *(p_dst++) = (uint8_t)(crc >>24); 72 | *(p_dst++) = (uint8_t)(compressed_len >> 0); // 18~21 Compressed size 73 | *(p_dst++) = (uint8_t)(compressed_len >> 8); 74 | *(p_dst++) = (uint8_t)(compressed_len >>16); 75 | *(p_dst++) = (uint8_t)(compressed_len >>24); 76 | *(p_dst++) = (uint8_t)(uncompressed_len >> 0); // 22~25 Uncompressed size 77 | *(p_dst++) = (uint8_t)(uncompressed_len >> 8); 78 | *(p_dst++) = (uint8_t)(uncompressed_len >>16); 79 | *(p_dst++) = (uint8_t)(uncompressed_len >>24); 80 | *(p_dst++) = (uint8_t)(file_name_len >> 0); // 26~27 File name length (n) 81 | *(p_dst++) = (uint8_t)(file_name_len >> 8); 82 | *(p_dst++) = 0x00; // 28~29 Extra field length (m) 83 | *(p_dst++) = 0x00; 84 | 85 | for (i=0; i> 0); // 16~19 CRC-32 119 | *(p_dst++) = (uint8_t)(crc >> 8); 120 | *(p_dst++) = (uint8_t)(crc >>16); 121 | *(p_dst++) = (uint8_t)(crc >>24); 122 | *(p_dst++) = (uint8_t)(compressed_len >> 0); // 20~23 Compressed size 123 | *(p_dst++) = (uint8_t)(compressed_len >> 8); 124 | *(p_dst++) = (uint8_t)(compressed_len >>16); 125 | *(p_dst++) = (uint8_t)(compressed_len >>24); 126 | *(p_dst++) = (uint8_t)(uncompressed_len >> 0); // 24~27 Uncompressed size 127 | *(p_dst++) = (uint8_t)(uncompressed_len >> 8); 128 | *(p_dst++) = (uint8_t)(uncompressed_len >>16); 129 | *(p_dst++) = (uint8_t)(uncompressed_len >>24); 130 | *(p_dst++) = (uint8_t)(file_name_len >> 0); // 28~29 File name length (n) 131 | *(p_dst++) = (uint8_t)(file_name_len >> 8); 132 | *(p_dst++) = 0x00; // 30~31 Extra field length (m) 133 | *(p_dst++) = 0x00; 134 | *(p_dst++) = 0x00; // 32~33 File comment length (k) 135 | *(p_dst++) = 0x00; 136 | *(p_dst++) = 0x00; // 34~35 Disk number where file starts 137 | *(p_dst++) = 0x00; 138 | *(p_dst++) = 0x00; // 36~37 Internal file attributes 139 | *(p_dst++) = 0x00; 140 | *(p_dst++) = 0x00; // 38~41 External file attributes 141 | *(p_dst++) = 0x00; 142 | *(p_dst++) = 0x00; 143 | *(p_dst++) = 0x00; 144 | *(p_dst++) = 0x00; // 42~45 Relative offset of local file header. 145 | *(p_dst++) = 0x00; 146 | *(p_dst++) = 0x00; 147 | *(p_dst++) = 0x00; 148 | 149 | for (i=0; i> 0); // 12~15 Size of central directory (bytes) 166 | *(p_dst++) = (uint8_t)((46+file_name_len) >> 8); 167 | *(p_dst++) = (uint8_t)((46+file_name_len) >>16); 168 | *(p_dst++) = (uint8_t)((46+file_name_len) >>24); 169 | *(p_dst++) = (uint8_t)(offset >> 0); // 16~19 Offset of start of central directory, relative to start of archive (pos of p_dst) 170 | *(p_dst++) = (uint8_t)(offset >> 8); 171 | *(p_dst++) = (uint8_t)(offset >>16); 172 | *(p_dst++) = (uint8_t)(offset >>24); 173 | *(p_dst++) = 0x00; // 20~21 Comment length (n) 174 | *(p_dst++) = 0x00; 175 | 176 | return R_OK; 177 | } 178 | 179 | 180 | static uint32_t calcCrc32 (const uint8_t *p_src, size_t src_len) { 181 | static const uint32_t TABLE_CRC32 [] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; 182 | 183 | uint32_t crc = 0xFFFFFFFF; 184 | const uint8_t *p_end = p_src + src_len; 185 | 186 | for (; p_src> 4); 189 | crc = TABLE_CRC32[crc & 0x0f] ^ (crc >> 4); 190 | } 191 | 192 | return ~crc; 193 | } 194 | 195 | 196 | static int zipC (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, const char *file_name_in_zip, uint8_t comp_method) { 197 | size_t zip_hdr_len, lzma_prop_len, cmprs_len, zip_ftr_len; // there are 4 parts of the final output data : ZIP header, ZIP LZMA property, LZMA compressed data, and ZIP footer 198 | uint32_t crc; 199 | 200 | zip_hdr_len = *p_dst_len; // set available space for ZIP header 201 | 202 | RET_WHEN_ERR( writeZipHeader(p_dst, &zip_hdr_len, 0, 0, src_len, file_name_in_zip, comp_method) ); // note that some fields are unknown and filled using "0", we should rewrite it later 203 | 204 | if (comp_method == COMP_METHOD_LZMA) { 205 | lzma_prop_len = *p_dst_len - zip_hdr_len; // set available space for ZIP LZMA property 206 | RET_WHEN_ERR( writeZipLzmaProperty(p_dst+zip_hdr_len, &lzma_prop_len) ); 207 | } else { 208 | lzma_prop_len = 0; 209 | } 210 | 211 | cmprs_len = *p_dst_len - zip_hdr_len - lzma_prop_len; // set available space for LZMA compressed data 212 | 213 | if (comp_method == COMP_METHOD_LZMA) { 214 | RET_WHEN_ERR( lzmaEncode(p_src, src_len, p_dst+zip_hdr_len+lzma_prop_len, &cmprs_len, 1)); 215 | } else { 216 | RET_WHEN_ERR(deflateEncode(p_src, src_len, p_dst+zip_hdr_len+lzma_prop_len, &cmprs_len)); 217 | } 218 | 219 | if (cmprs_len > ZIP_COMPRESSED_MAX_LEN) { 220 | return R_ERR_UNSUPPORTED; 221 | } 222 | 223 | cmprs_len += lzma_prop_len; // ZIP's LZMA property is actually a part of compressed data 224 | 225 | crc = calcCrc32(p_src, src_len); 226 | 227 | zip_ftr_len = *p_dst_len - zip_hdr_len - cmprs_len; // set available space for ZIP footer 228 | 229 | RET_WHEN_ERR( writeZipFooter(p_dst+zip_hdr_len+cmprs_len, &zip_ftr_len, crc, cmprs_len, src_len, file_name_in_zip, zip_hdr_len+cmprs_len, comp_method) ); 230 | 231 | RET_WHEN_ERR( writeZipHeader(p_dst, &zip_hdr_len, crc, cmprs_len, src_len, file_name_in_zip, comp_method) ); // rewrite ZIP header, since some fields are not writed previously. 232 | 233 | *p_dst_len = zip_hdr_len + cmprs_len + zip_ftr_len; // the total output length = ZIP header length + compressed data length (include ZIP LZMA property) + ZIP footer length 234 | 235 | return R_OK; 236 | } 237 | 238 | 239 | int zipClzma (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, const char *file_name_in_zip) { 240 | return zipC(p_src, src_len, p_dst, p_dst_len, file_name_in_zip, COMP_METHOD_LZMA); 241 | } 242 | 243 | 244 | int zipCdeflate (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len, const char *file_name_in_zip) { 245 | return zipC(p_src, src_len, p_dst, p_dst_len, file_name_in_zip, COMP_METHOD_DEFLATE); 246 | } 247 | 248 | -------------------------------------------------------------------------------- /src/lzmaD.c: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // uint8_t, uint16_t, uint32_t 3 | #include // malloc, free 4 | 5 | #define R_OK 0 6 | #define R_ERR_MEMORY_RUNOUT 1 7 | #define R_ERR_UNSUPPORTED 2 8 | #define R_ERR_OUTPUT_OVERFLOW 3 9 | #define R_ERR_INPUT_OVERFLOW 4 10 | #define R_ERR_DATA 5 11 | #define R_ERR_OUTPUT_LEN_MISMATCH 6 12 | 13 | #define RET_WHEN_ERR(err_code) { int ec = (err_code); if (ec) return ec; } 14 | 15 | 16 | 17 | // the code only use these basic types : 18 | // int : as return code 19 | // uint8_t : as compressed and uncompressed data, as LZMA state 20 | // uint16_t : as probabilities of range coder 21 | // uint32_t : as generic integers 22 | // size_t : as data length 23 | 24 | 25 | 26 | 27 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 28 | // common useful functions 29 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 30 | 31 | static uint32_t bitsReverse (uint32_t bits, uint32_t bit_count) { 32 | uint32_t revbits = 0; 33 | for (; bit_count>0; bit_count--) { 34 | revbits <<= 1; 35 | revbits |= (bits & 1); 36 | bits >>= 1; 37 | } 38 | return revbits; 39 | } 40 | 41 | 42 | 43 | 44 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 45 | // Range Decoder 46 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 47 | 48 | #define RANGE_CODE_NORMALIZE_THRESHOLD (1 << 24) 49 | #define RANGE_CODE_MOVE_BITS 5 50 | #define RANGE_CODE_N_BIT_MODEL_TOTAL_BITS 11 51 | #define RANGE_CODE_BIT_MODEL_TOTAL (1 << RANGE_CODE_N_BIT_MODEL_TOTAL_BITS) 52 | #define RANGE_CODE_HALF_PROBABILITY (RANGE_CODE_BIT_MODEL_TOTAL >> 1) 53 | 54 | 55 | typedef struct { 56 | uint32_t code; 57 | uint32_t range; 58 | const uint8_t *p_src; 59 | const uint8_t *p_src_limit; 60 | uint8_t overflow; 61 | } RangeDecoder_t; 62 | 63 | 64 | static void rangeDecodeNormalize (RangeDecoder_t *d) { 65 | if (d->range < RANGE_CODE_NORMALIZE_THRESHOLD) { 66 | if (d->p_src != d->p_src_limit) { 67 | d->range <<= 8; 68 | d->code <<= 8; 69 | d->code |= (uint32_t)(*(d->p_src)); 70 | d->p_src ++; 71 | } else { 72 | d->overflow = 1; 73 | } 74 | } 75 | } 76 | 77 | 78 | static RangeDecoder_t newRangeDecoder (const uint8_t *p_src, size_t src_len) { 79 | RangeDecoder_t coder; 80 | coder.code = 0; 81 | coder.range = 0; 82 | coder.p_src = p_src; 83 | coder.p_src_limit = p_src + src_len; 84 | coder.overflow = 0; 85 | rangeDecodeNormalize(&coder); 86 | rangeDecodeNormalize(&coder); 87 | rangeDecodeNormalize(&coder); 88 | rangeDecodeNormalize(&coder); 89 | rangeDecodeNormalize(&coder); 90 | coder.range = 0xFFFFFFFF; 91 | return coder; 92 | } 93 | 94 | 95 | static uint32_t rangeDecodeIntByFixedProb (RangeDecoder_t *d, uint32_t bit_count) { 96 | uint32_t val=0, b; 97 | for (; bit_count>0; bit_count--) { 98 | rangeDecodeNormalize(d); 99 | d->range >>= 1; 100 | d->code -= d->range; 101 | b = !(1 & (d->code >> 31)); 102 | if (!b) 103 | d->code += d->range; 104 | val <<= 1; 105 | val |= b; 106 | } 107 | return val; 108 | } 109 | 110 | 111 | static uint32_t rangeDecodeBit (RangeDecoder_t *d, uint16_t *p_prob) { 112 | uint32_t prob = *p_prob; 113 | uint32_t bound; 114 | rangeDecodeNormalize(d); 115 | bound = (d->range >> RANGE_CODE_N_BIT_MODEL_TOTAL_BITS) * prob; 116 | if (d->code < bound) { 117 | d->range = bound; 118 | *p_prob = (uint16_t)(prob + ((RANGE_CODE_BIT_MODEL_TOTAL - prob) >> RANGE_CODE_MOVE_BITS)); 119 | return 0; 120 | } else { 121 | d->range -= bound; 122 | d->code -= bound; 123 | *p_prob = (uint16_t)(prob - (prob >> RANGE_CODE_MOVE_BITS)); 124 | return 1; 125 | } 126 | } 127 | 128 | 129 | static uint32_t rangeDecodeInt (RangeDecoder_t *d, uint16_t *p_prob, uint32_t bit_count) { 130 | uint32_t val = 1; 131 | uint32_t i; 132 | for (i=0; i> lc_shift); 288 | const uint8_t literal_pos_state = lp_mask & (uint32_t)pos; 289 | const uint8_t pos_state = pb_mask & (uint32_t)pos; 290 | uint32_t dist=0, len=0; 291 | PACKET_t type; 292 | 293 | if (coder.overflow) 294 | return R_ERR_INPUT_OVERFLOW; 295 | 296 | if ( !rangeDecodeBit(&coder, &probs_is_match [state][pos_state]) ) { // decoded bit sequence = 0 (packet LIT) 297 | type = PKT_LIT; 298 | } else if ( !rangeDecodeBit(&coder, &probs_is_rep [state] ) ) { // decoded bit sequence = 10 (packet MATCH) 299 | type = PKT_MATCH; 300 | } else if ( !rangeDecodeBit(&coder, &probs_is_rep0 [state] ) ) { // decoded bit sequence = 110 (packet SHORTREP or LONGREP0) 301 | type = rangeDecodeBit(&coder, &probs_is_rep0_long[state][pos_state]) ? PKT_REP0 : PKT_SHORTREP; 302 | } else if ( !rangeDecodeBit(&coder, &probs_is_rep1 [state] ) ) { // decoded bit sequence = 1110 (packet LONGREP1) 303 | type = PKT_REP1; 304 | } else { 305 | type = rangeDecodeBit(&coder, &probs_is_rep2 [state] ) ? PKT_REP3 : PKT_REP2; 306 | } 307 | 308 | if (type == PKT_LIT) { 309 | if (state < N_LIT_STATES) { 310 | prev_byte = rangeDecodeInt(&coder, probs_literal[literal_pos_state][prev_byte_lc_msbs], 8); 311 | } else { 312 | uint8_t match_byte = 0; 313 | if (pos >= (size_t)rep0) 314 | match_byte = p_dst[pos-rep0]; 315 | prev_byte = rangeDecodeMB (&coder, probs_literal[literal_pos_state][prev_byte_lc_msbs], match_byte); 316 | } 317 | } 318 | 319 | state = stateTransition(state, type); 320 | 321 | switch (type) { 322 | case PKT_SHORTREP : 323 | case PKT_REP0 : dist = rep0; break; 324 | case PKT_REP1 : dist = rep1; break; 325 | case PKT_REP2 : dist = rep2; break; 326 | case PKT_REP3 : dist = rep3; break; 327 | default : break; 328 | } 329 | 330 | switch (type) { 331 | case PKT_LIT : 332 | case PKT_SHORTREP : len = 1; break; 333 | case PKT_MATCH : 334 | case PKT_REP3 : rep3 = rep2; 335 | case PKT_REP2 : rep2 = rep1; 336 | case PKT_REP1 : rep1 = rep0; break; 337 | default : break; 338 | } 339 | 340 | if (len == 0) { // unknown length, need to decode 341 | const uint32_t is_rep = (type != PKT_MATCH); 342 | if ( !rangeDecodeBit(&coder, &probs_len_choice [is_rep]) ) 343 | len = 2 + rangeDecodeInt(&coder, probs_len_low[is_rep][pos_state], 3); // len = 2~9 344 | else if ( !rangeDecodeBit(&coder, &probs_len_choice2[is_rep]) ) 345 | len = 10 + rangeDecodeInt(&coder, probs_len_mid[is_rep][pos_state], 3); // len = 10~17 346 | else 347 | len = 18 + rangeDecodeInt(&coder,probs_len_high[is_rep], 8); // len = 18~273 348 | } 349 | 350 | if (type == PKT_MATCH) { // unknown distance, need to decode 351 | const uint32_t len_min5_minus2 = (len>5) ? 3 : (len-2); 352 | uint32_t dist_slot, bcnt; 353 | 354 | dist_slot = rangeDecodeInt(&coder, probs_dist_slot[len_min5_minus2], 6); // decode distance slot (0~63) 355 | bcnt = (dist_slot >> 1) - 1; 356 | dist = (2 | (dist_slot & 1)); // high 2 bits of dist 357 | dist<<= bcnt; 358 | 359 | if (dist_slot >=14) { // dist slot = 14~63 360 | dist |= rangeDecodeIntByFixedProb (&coder, bcnt-4) << 4; 361 | dist |= bitsReverse(rangeDecodeInt(&coder, probs_dist_align, 4), 4); 362 | } else if (dist_slot >=4 ) { // dist slot = 4~13 363 | dist |= bitsReverse(rangeDecodeInt(&coder, probs_dist_special[dist_slot-4], bcnt), bcnt); 364 | } else { // dist slot = 0~3 365 | dist = dist_slot; 366 | } 367 | 368 | if (dist == 0xFFFFFFFF) // meeting end marker 369 | break; 370 | 371 | dist ++; 372 | } 373 | 374 | if ((size_t)dist > pos) 375 | return R_ERR_DATA; 376 | 377 | if ((pos+len) > *p_dst_len) 378 | return R_ERR_OUTPUT_OVERFLOW; 379 | 380 | if (type == PKT_LIT) 381 | p_dst[pos] = prev_byte; 382 | else 383 | rep0 = dist; 384 | 385 | for (; len>0; len--) { 386 | p_dst[pos] = prev_byte = p_dst[pos-dist]; 387 | pos ++; 388 | } 389 | } 390 | 391 | free(probs_literal); 392 | 393 | *p_dst_len = pos; 394 | 395 | return R_OK; 396 | } 397 | 398 | 399 | 400 | 401 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 402 | // LZMA decompress (include parsing ".lzma" format's header) 403 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 404 | 405 | #define LZMA_HEADER_LEN 13 406 | #define LZMA_DIC_MIN (1 << 12) 407 | 408 | 409 | static int parseLzmaHeader (uint8_t *p_src, uint8_t *p_lc, uint8_t *p_lp, uint8_t *p_pb, uint32_t *p_dict_len, size_t *p_uncompressed_len, uint32_t *p_uncompressed_len_known) { 410 | uint8_t byte0 = p_src[0]; 411 | 412 | *p_dict_len = ((uint32_t)p_src[1] ) | ((uint32_t)p_src[2] <<8) | ((uint32_t)p_src[3] <<16) | ((uint32_t)p_src[4] <<24) ; 413 | 414 | if (*p_dict_len < LZMA_DIC_MIN) 415 | *p_dict_len = LZMA_DIC_MIN; 416 | 417 | if (p_src[5] == 0xFF && p_src[6] == 0xFF && p_src[7] == 0xFF && p_src[8] == 0xFF && p_src[9] == 0xFF && p_src[10] == 0xFF && p_src[11] == 0xFF && p_src[12] == 0xFF) { 418 | *p_uncompressed_len_known = 0; 419 | } else { 420 | uint32_t i; 421 | *p_uncompressed_len_known = 1; 422 | *p_uncompressed_len = 0; 423 | for (i=0; i<8; i++) { 424 | if (i < sizeof(size_t)) { 425 | *p_uncompressed_len |= (((size_t)p_src[5+i]) << (i<<3)); // get (sizeof(size_t)) bytes from p_src, and put it to (*p_uncompressed_len) 426 | } else if (p_src[5+i] > 0) { 427 | return R_ERR_OUTPUT_OVERFLOW; // uncompressed length overflow from the machine's memory address limit 428 | } 429 | } 430 | } 431 | 432 | *p_lc = (uint8_t)(byte0 % 9); 433 | byte0 /= 9; 434 | *p_lp = (uint8_t)(byte0 % 5); 435 | *p_pb = (uint8_t)(byte0 / 5); 436 | 437 | if (*p_lc > MAX_LC || *p_lp > MAX_LP || *p_pb > MAX_PB) 438 | return R_ERR_UNSUPPORTED; 439 | 440 | return R_OK; 441 | } 442 | 443 | 444 | int lzmaD (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len) { 445 | uint8_t lc, lp, pb; // lc=0~8 lp=0~4 pb=0~4 446 | uint32_t dict_len, uncompressed_len_known; 447 | size_t uncompressed_len = 0; 448 | 449 | if (src_len < LZMA_HEADER_LEN) 450 | return R_ERR_INPUT_OVERFLOW; 451 | 452 | RET_WHEN_ERR( parseLzmaHeader(p_src, &lc, &lp, &pb, &dict_len, &uncompressed_len, &uncompressed_len_known) ) 453 | 454 | //printf("[LZMAd] lc=%d lp=%d pb=%d dict_len=%u\n", lc, lp, pb, dict_len); 455 | 456 | if (uncompressed_len_known) { 457 | if (uncompressed_len > *p_dst_len) 458 | return R_ERR_OUTPUT_OVERFLOW; 459 | *p_dst_len = uncompressed_len; 460 | //printf("[LZMAd] uncompressed length = %lu (parsed from header)\n" , *p_dst_len); 461 | } else { 462 | //printf("[LZMAd] uncompressed length is not in header, decoding using output buffer length = %lu\n" , *p_dst_len); 463 | } 464 | 465 | RET_WHEN_ERR( lzmaDecode(p_src+LZMA_HEADER_LEN, src_len-LZMA_HEADER_LEN, p_dst, p_dst_len, lc, lp, pb) ); 466 | 467 | if (uncompressed_len_known && uncompressed_len != *p_dst_len) 468 | return R_ERR_OUTPUT_LEN_MISMATCH; 469 | 470 | return R_OK; 471 | } 472 | 473 | -------------------------------------------------------------------------------- /src/gzipC.c: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // uint8_t, uint32_t 3 | #include // malloc, free 4 | 5 | 6 | #define R_OK 0 7 | #define R_DST_OVERFLOW 1 8 | #define R_SRC_OVERFLOW 2 9 | #define R_MEM_RANOUT 3 10 | 11 | #define RET_WHEN_ERR(err_code) { int ec = (err_code); if (ec) return ec; } 12 | #define RET_ERR_IF(err_code,condition) { if (condition) return err_code; } 13 | 14 | 15 | #define N_LITERAL 256 // literal (symbol = 0-255 ) 16 | #define SYMBOL_END N_LITERAL // end_of_block (symbol = 256 ) 17 | #define N_LZ77_ML 29 // LZ77_len (symbol = 257-285) 18 | #define N_SYMBOL ( (N_LITERAL) + 1 + (N_LZ77_ML) ) 19 | #define N_LZ77_OF 30 20 | 21 | #define MIN_LZ77_ML 3 22 | #define MAX_LZ77_ML 258 23 | #define SKIP_LZ77_ML 64 24 | #define MIN_LZ77_OF 1 25 | #define MAX_LZ77_OF 32767 26 | 27 | #define MAX_HUFFMAN_BITS_LEN 15 28 | 29 | #define SYMBOL_TREE_MERGE_INC 20 30 | #define OFFSET_TREE_MERGE_INC 7 31 | 32 | #define MAX_BLOCK_LEN 32768 33 | 34 | #define N_HASH 16381 35 | #define N_HASH1_ROW 4 36 | #define N_HASH2_ROW 2 37 | #define N_HASH3_ROW 2 38 | #define N_HASH_ROW (N_HASH1_ROW + N_HASH2_ROW + N_HASH3_ROW) 39 | 40 | static const uint32_t static_symbol_huffman_bits [N_SYMBOL] = {0x00c, 0x08c, 0x04c, 0x0cc, 0x02c, 0x0ac, 0x06c, 0x0ec, 0x01c, 0x09c, 0x05c, 0x0dc, 0x03c, 0x0bc, 0x07c, 0x0fc, 0x002, 0x082, 0x042, 0x0c2, 0x022, 0x0a2, 0x062, 0x0e2, 0x012, 0x092, 0x052, 0x0d2, 0x032, 0x0b2, 0x072, 0x0f2, 0x00a, 0x08a, 0x04a, 0x0ca, 0x02a, 0x0aa, 0x06a, 0x0ea, 0x01a, 0x09a, 0x05a, 0x0da, 0x03a, 0x0ba, 0x07a, 0x0fa, 0x006, 0x086, 0x046, 0x0c6, 0x026, 0x0a6, 0x066, 0x0e6, 0x016, 0x096, 0x056, 0x0d6, 0x036, 0x0b6, 0x076, 0x0f6, 0x00e, 0x08e, 0x04e, 0x0ce, 0x02e, 0x0ae, 0x06e, 0x0ee, 0x01e, 0x09e, 0x05e, 0x0de, 0x03e, 0x0be, 0x07e, 0x0fe, 0x001, 0x081, 0x041, 0x0c1, 0x021, 0x0a1, 0x061, 0x0e1, 0x011, 0x091, 0x051, 0x0d1, 0x031, 0x0b1, 0x071, 0x0f1, 0x009, 0x089, 0x049, 0x0c9, 0x029, 0x0a9, 0x069, 0x0e9, 0x019, 0x099, 0x059, 0x0d9, 0x039, 0x0b9, 0x079, 0x0f9, 0x005, 0x085, 0x045, 0x0c5, 0x025, 0x0a5, 0x065, 0x0e5, 0x015, 0x095, 0x055, 0x0d5, 0x035, 0x0b5, 0x075, 0x0f5, 0x00d, 0x08d, 0x04d, 0x0cd, 0x02d, 0x0ad, 0x06d, 0x0ed, 0x01d, 0x09d, 0x05d, 0x0dd, 0x03d, 0x0bd, 0x07d, 0x0fd, 0x013, 0x113, 0x093, 0x193, 0x053, 0x153, 0x0d3, 0x1d3, 0x033, 0x133, 0x0b3, 0x1b3, 0x073, 0x173, 0x0f3, 0x1f3, 0x00b, 0x10b, 0x08b, 0x18b, 0x04b, 0x14b, 0x0cb, 0x1cb, 0x02b, 0x12b, 0x0ab, 0x1ab, 0x06b, 0x16b, 0x0eb, 0x1eb, 0x01b, 0x11b, 0x09b, 0x19b, 0x05b, 0x15b, 0x0db, 0x1db, 0x03b, 0x13b, 0x0bb, 0x1bb, 0x07b, 0x17b, 0x0fb, 0x1fb, 0x007, 0x107, 0x087, 0x187, 0x047, 0x147, 0x0c7, 0x1c7, 0x027, 0x127, 0x0a7, 0x1a7, 0x067, 0x167, 0x0e7, 0x1e7, 0x017, 0x117, 0x097, 0x197, 0x057, 0x157, 0x0d7, 0x1d7, 0x037, 0x137, 0x0b7, 0x1b7, 0x077, 0x177, 0x0f7, 0x1f7, 0x00f, 0x10f, 0x08f, 0x18f, 0x04f, 0x14f, 0x0cf, 0x1cf, 0x02f, 0x12f, 0x0af, 0x1af, 0x06f, 0x16f, 0x0ef, 0x1ef, 0x01f, 0x11f, 0x09f, 0x19f, 0x05f, 0x15f, 0x0df, 0x1df, 0x03f, 0x13f, 0x0bf, 0x1bf, 0x07f, 0x17f, 0x0ff, 0x1ff, 0x000, 0x040, 0x020, 0x060, 0x010, 0x050, 0x030, 0x070, 0x008, 0x048, 0x028, 0x068, 0x018, 0x058, 0x038, 0x078, 0x004, 0x044, 0x024, 0x064, 0x014, 0x054, 0x034, 0x074, 0x003, 0x083, 0x043, 0x0c3, 0x023, 0x0a3}; 41 | static const uint32_t static_symbol_huffman_len [N_SYMBOL] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8}; 42 | //static const uint32_t static_dist_huffman_bits[N_LZ77_OF] = {0x00, 0x10, 0x08, 0x18, 0x04, 0x14, 0x0c, 0x1c, 0x02, 0x12, 0x0a, 0x1a, 0x06, 0x16, 0x0e, 0x1e, 0x01, 0x11, 0x09, 0x19, 0x05, 0x15, 0x0d, 0x1d, 0x03, 0x13, 0x0b, 0x1b, 0x07, 0x17}; 43 | 44 | 45 | 46 | struct StreamWriter_t { 47 | uint8_t *p_buf; 48 | uint8_t *p_limit; 49 | uint8_t byte; 50 | uint8_t mask; 51 | }; 52 | 53 | 54 | static struct StreamWriter_t newStreamWriter (uint8_t *p_buf, uint32_t len) { 55 | struct StreamWriter_t bs = {p_buf, (p_buf+len), 0x00, 0x01}; 56 | return bs; 57 | } 58 | 59 | 60 | static int appendBits (struct StreamWriter_t *p_bs, uint32_t bits, uint32_t cnt) { 61 | //assert(cnt <= 32); 62 | for (; cnt>0; cnt--) { 63 | if (bits & 1) 64 | p_bs->byte |= p_bs->mask; 65 | bits >>= 1; 66 | p_bs->mask <<= 1; 67 | if (p_bs->mask == 0x00) { 68 | RET_ERR_IF(R_DST_OVERFLOW, (p_bs->p_buf >= p_bs->p_limit)); 69 | *(p_bs->p_buf) = p_bs->byte; 70 | p_bs->p_buf ++; 71 | p_bs->byte = 0x00; 72 | p_bs->mask = 0x01; 73 | } 74 | } 75 | return R_OK; 76 | } 77 | 78 | 79 | static int alignBitsToBytes (struct StreamWriter_t *p_bs) { 80 | if (p_bs->mask > 0x01) { 81 | RET_ERR_IF(R_DST_OVERFLOW, (p_bs->p_buf >= p_bs->p_limit)); 82 | *(p_bs->p_buf) = p_bs->byte; 83 | p_bs->p_buf ++; 84 | p_bs->byte = 0x00; 85 | p_bs->mask = 0x01; 86 | } 87 | return R_OK; 88 | } 89 | 90 | 91 | static int writeValue (uint8_t **pp_dst, uint8_t *p_dst_limit, uint32_t value, uint8_t n_bytes) { 92 | RET_ERR_IF(R_DST_OVERFLOW, (n_bytes > p_dst_limit - *pp_dst)); 93 | for (; n_bytes>0; n_bytes--) { 94 | *((*pp_dst)++) = value & 0xFF; 95 | value >>= 8; 96 | } 97 | return R_OK; 98 | } 99 | 100 | 101 | static uint32_t bitsReverse (uint32_t bits, uint32_t len) { 102 | uint32_t revbits = 0; 103 | //assert(len <= 32); 104 | for (; len>0; len--) { 105 | revbits <<= 1; 106 | revbits |= (bits & 1); 107 | bits >>= 1; 108 | } 109 | return revbits; 110 | } 111 | 112 | 113 | static uint32_t calcCrc32 (uint8_t *p_src, uint32_t src_len) { 114 | static const uint32_t TABLE_CRC32 [] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; 115 | uint32_t crc = 0xFFFFFFFF; 116 | uint8_t *p_end = p_src + src_len; 117 | for (; p_src> 4); 120 | crc = TABLE_CRC32[crc & 0x0f] ^ (crc >> 4); 121 | } 122 | return ~crc; 123 | } 124 | 125 | 126 | 127 | static void buildHuffmanLen (uint32_t num, uint32_t count [], uint32_t huffman_len [], uint32_t tree_merge_inc) { 128 | uint32_t i, group1_no, group2_no; 129 | 130 | uint32_t huffman_group [N_SYMBOL]; 131 | 132 | //assert(0<=num && num<=N_SYMBOL); 133 | 134 | for (i=0; i 0) { // skip the values that never appear (count=0) 148 | if (count[i] < m1c) { 149 | m2c = m1c; 150 | m2i = m1i; 151 | m1c = count[i]; 152 | m1i = i; 153 | } else if (count[i] < m2c) { 154 | m2c = count[i]; 155 | m2i = i; 156 | } 157 | } 158 | } 159 | 160 | if (m2i == UINT32_MAX) { // if there's only one minimum value found, which means all nodes a merged in one sub-tree 161 | if (m1i != UINT32_MAX && huffman_len[m1i] == 0) // a special case : there is only one symbol appears, we should assign a one-node huffman tree for it, set its huffman_len to 1 162 | huffman_len[m1i] = 1; 163 | break; 164 | } 165 | 166 | //assert (m1i != UINT32_MAX); 167 | 168 | // merge the two sub-trees to one sub-tree -------------------- 169 | count[m1i] += tree_merge_inc; // NOTE : to make the merged sub-tree's counter be larger, Avoid trees that are too deep 170 | count[m1i] += count[m2i]; // merge the 2nd sub-tree's count to the 1st sub-tree 171 | //count[m1i] *= 1.3; 172 | count[m2i] = 0; // clear the 2nd sub-tree's count to zero, since it is merged to the 1st sub-tree 173 | group1_no = huffman_group[m1i]; 174 | group2_no = huffman_group[m2i]; 175 | 176 | for (i=0; i 0) 208 | huffman_bits[i] = bitsReverse(next_bits[len]++, len); 209 | else 210 | huffman_bits[i] = 0; 211 | } 212 | } 213 | 214 | 215 | static uint32_t getLZ77SymbolAndExtraBits (uint32_t ml, uint32_t of, uint32_t *p_ml_ebits, uint32_t *p_ml_elen, uint32_t *p_of_symbol, uint32_t *p_of_ebits, uint32_t *p_of_elen) { 216 | static const uint32_t TABLE_OF_EXTRA [N_LZ77_OF] = {0,0,0,0,1,1,2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; 217 | static const uint32_t TABLE_OF_START [N_LZ77_OF] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; 218 | 219 | static const uint32_t TABLE_ML_EXTRA [N_LZ77_ML] = {0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; 220 | static const uint32_t TABLE_ML_START [N_LZ77_ML] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258}; 221 | 222 | uint32_t i; 223 | 224 | //assert(1 <= of && of <= MAX_LZ77_OF); 225 | //assert(MIN_LZ77_ML <= ml && ml <= MAX_LZ77_ML); 226 | 227 | for (i=N_LZ77_OF-1; ; i--) 228 | if (TABLE_OF_START[i] <= of) 229 | break; 230 | 231 | *p_of_symbol = i; 232 | *p_of_ebits = of - TABLE_OF_START[i]; 233 | *p_of_elen = TABLE_OF_EXTRA[i]; 234 | 235 | for (i=N_LZ77_ML-1; ; i--) 236 | if (TABLE_ML_START[i] <= ml) 237 | break; 238 | 239 | *p_ml_ebits = ml - TABLE_ML_START[i]; 240 | *p_ml_elen = TABLE_ML_EXTRA[i]; 241 | 242 | return i + 257; // return ml symbol (257-285) 243 | } 244 | 245 | 246 | typedef struct { 247 | uint16_t ml [MAX_BLOCK_LEN]; 248 | uint16_t of [MAX_BLOCK_LEN]; 249 | uint16_t ml_cand [MAX_BLOCK_LEN] [N_HASH_ROW]; 250 | uint16_t of_cand [MAX_BLOCK_LEN] [N_HASH_ROW]; 251 | size_t cost [MAX_BLOCK_LEN+1]; 252 | const uint8_t* hash1_tab [N_HASH] [N_HASH1_ROW]; 253 | const uint8_t* hash2_tab [N_HASH] [N_HASH2_ROW]; 254 | const uint8_t* hash3_tab [N_HASH] [N_HASH3_ROW]; 255 | } searchLZ77_workspace_t; 256 | 257 | 258 | typedef struct { 259 | uint32_t symbol_cnt [N_SYMBOL] , symbol_huffman_len [N_SYMBOL] , symbol_huffman_bits [N_SYMBOL]; 260 | uint32_t of_cnt [N_LZ77_OF] , of_huffman_len [N_LZ77_OF] , of_huffman_bits [N_LZ77_OF]; 261 | } EntropyCtx_t; 262 | 263 | 264 | static size_t costLit (EntropyCtx_t *ectx, uint8_t lit) { 265 | return ectx->symbol_huffman_len[lit]; 266 | } 267 | 268 | 269 | static size_t costMatch (EntropyCtx_t *ectx, uint16_t ml, uint16_t of) { 270 | uint32_t symbol, ml_ebits=0, ml_elen=0, of_symbol=0, of_ebits=0, of_elen=0; 271 | symbol = getLZ77SymbolAndExtraBits(ml, of, &ml_ebits, &ml_elen, &of_symbol, &of_ebits, &of_elen); 272 | return ectx->symbol_huffman_len[symbol] + 273 | ml_elen + 274 | ectx->of_huffman_len[of_symbol] + 275 | of_elen; 276 | } 277 | 278 | 279 | static void searchLZ77 (uint8_t *p_src, size_t src_len, searchLZ77_workspace_t *wksp) { 280 | size_t i, skip=0; 281 | int k; //sizeof(searchLZ77_workspace_t); 282 | 283 | for (i=0; i+5ml[i] = wksp->of[i] = 0; 292 | for (k=0; kml_cand[i][k] = 0; 294 | } 295 | 296 | if (skip > 0) { 297 | skip --; 298 | } else { 299 | size_t len_max = src_len - i; 300 | if (len_max > MAX_LZ77_ML) { 301 | len_max = MAX_LZ77_ML; 302 | } 303 | 304 | for (k=0; khash1_tab[hash1][k] : 306 | (k < N_HASH1_ROW+N_HASH2_ROW) ? wksp->hash2_tab[hash2][k-N_HASH1_ROW] : 307 | wksp->hash3_tab[hash3][k-N_HASH1_ROW-N_HASH2_ROW] ; 308 | if (p_src <= p_match && p_match < p) { 309 | size_t of = p - p_match; 310 | if (MIN_LZ77_OF <= of && of <= MAX_LZ77_OF) { 311 | size_t ml; 312 | for (ml=0; ml MIN_LZ77_LEN || (lz77_len >= MIN_LZ77_LEN && (p_curr-p_match) < 256)) { 314 | if (ml > MIN_LZ77_ML || (ml >= MIN_LZ77_ML && of < 256)) { 315 | wksp->ml_cand[i][k] = ml; 316 | wksp->of_cand[i][k] = of; 317 | if (wksp->ml[i] < ml) { 318 | wksp->ml[i] = ml; 319 | wksp->of[i] = of; 320 | } 321 | } 322 | } 323 | } 324 | } 325 | 326 | if (wksp->ml[i] >= SKIP_LZ77_ML) { 327 | skip = wksp->ml[i] - 1; 328 | } 329 | } 330 | 331 | for (k=0; khash1_tab[hash1][k] = wksp->hash1_tab[hash1][k+1]; 333 | } 334 | wksp->hash1_tab[hash1][N_HASH1_ROW-1] = p; 335 | 336 | for (k=0; khash2_tab[hash2][k] = wksp->hash2_tab[hash2][k+1]; 338 | } 339 | wksp->hash2_tab[hash2][N_HASH2_ROW-1] = p; 340 | 341 | for (k=0; khash3_tab[hash3][k] = wksp->hash3_tab[hash3][k+1]; 343 | } 344 | wksp->hash3_tab[hash3][N_HASH3_ROW-1] = p; 345 | } 346 | 347 | for (; iml_cand[i][k] = 0; 350 | } 351 | } 352 | } 353 | 354 | 355 | static void optimizeLZ77 (uint8_t *p_src, size_t src_len, searchLZ77_workspace_t *wksp, EntropyCtx_t *ectx) { 356 | size_t i; 357 | int k; 358 | wksp->cost[src_len] = 0; 359 | for (i=src_len; i>0;) { 360 | i --; 361 | wksp->ml[i] = wksp->of[i] = 0; 362 | wksp->cost[i] = wksp->cost[i+1] + costLit(ectx, p_src[i]); 363 | for (k=0; kml_cand[i][k]; 365 | uint16_t of = wksp->of_cand[i][k]; 366 | if (ml > 0) { 367 | size_t match_cost = wksp->cost[i+ml] + costMatch(ectx, ml, of); 368 | if (wksp->cost[i] > match_cost) { 369 | wksp->cost[i] = match_cost; 370 | wksp->ml[i] = ml; 371 | wksp->of[i] = of; 372 | } 373 | } 374 | } 375 | } 376 | } 377 | 378 | 379 | static int deflateBlockDynamicHuffman (struct StreamWriter_t *p_bs, uint8_t *p_src, uint32_t src_len, uint32_t is_final_block, searchLZ77_workspace_t *wksp) { 380 | uint32_t i; 381 | 382 | EntropyCtx_t ectx; 383 | 384 | // LZ77 search 385 | searchLZ77(p_src, src_len, wksp); 386 | 387 | // clear count 388 | for (i=0; iml[i] > 0) { 395 | uint32_t symbol=0, ml_ebits=0, ml_elen=0, of_symbol=0, of_ebits=0, of_elen=0; 396 | symbol = getLZ77SymbolAndExtraBits(wksp->ml[i], wksp->of[i], &ml_ebits, &ml_elen, &of_symbol, &of_ebits, &of_elen); 397 | ectx.symbol_cnt[symbol] ++; 398 | ectx.of_cnt[of_symbol] ++; 399 | } 400 | } 401 | ectx.symbol_cnt[SYMBOL_END] ++; 402 | 403 | // build huffman tree first time ------------------------------------------------------ 404 | buildHuffmanLen (N_LZ77_OF, ectx.of_cnt, ectx.of_huffman_len, OFFSET_TREE_MERGE_INC); 405 | buildHuffmanBits(N_LZ77_OF, ectx.of_huffman_len, ectx.of_huffman_bits); 406 | buildHuffmanLen (N_SYMBOL, ectx.symbol_cnt, ectx.symbol_huffman_len, SYMBOL_TREE_MERGE_INC); 407 | buildHuffmanBits(N_SYMBOL, ectx.symbol_huffman_len, ectx.symbol_huffman_bits); 408 | 409 | // second-time LZ77 search 410 | optimizeLZ77(p_src, src_len, wksp, &ectx); 411 | 412 | // clear count 413 | for (i=0; iml[i] > 0) { 420 | symbol = getLZ77SymbolAndExtraBits(wksp->ml[i], wksp->of[i], &ml_ebits, &ml_elen, &of_symbol, &of_ebits, &of_elen); 421 | ectx.symbol_cnt[symbol] ++; 422 | ectx.of_cnt[of_symbol] ++; 423 | i += (wksp->ml[i] - 1); 424 | } else { 425 | symbol = p_src[i]; 426 | ectx.symbol_cnt[symbol] ++; 427 | } 428 | } 429 | ectx.symbol_cnt[SYMBOL_END] ++; 430 | 431 | // build huffman tree second time ------------------------------------------------------ 432 | buildHuffmanLen (N_LZ77_OF, ectx.of_cnt, ectx.of_huffman_len, OFFSET_TREE_MERGE_INC); 433 | buildHuffmanBits(N_LZ77_OF, ectx.of_huffman_len, ectx.of_huffman_bits); 434 | buildHuffmanLen (N_SYMBOL, ectx.symbol_cnt, ectx.symbol_huffman_len, SYMBOL_TREE_MERGE_INC); 435 | buildHuffmanBits(N_SYMBOL, ectx.symbol_huffman_len, ectx.symbol_huffman_bits); 436 | 437 | // write block header ------------------------------------------------------ 438 | RET_WHEN_ERR(appendBits(p_bs, (!!is_final_block), 1)); // final block ? 439 | RET_WHEN_ERR(appendBits(p_bs, 2, 2)); // dynamic huffman tree 440 | 441 | { // write huf tree 442 | uint32_t hlit, hof; 443 | 444 | for (hlit=N_LZ77_ML; hlit>0; hlit--) 445 | if (ectx.symbol_huffman_len[N_LITERAL+1+hlit-1] != 0) 446 | break; 447 | 448 | for (hof=N_LZ77_OF-1; hof>0; hof--) 449 | if (ectx.of_huffman_len[hof] != 0) 450 | break; 451 | 452 | RET_WHEN_ERR(appendBits(p_bs, hlit, 5)); // hlit 453 | RET_WHEN_ERR(appendBits(p_bs, hof , 5)); // hof 454 | RET_WHEN_ERR(appendBits(p_bs, 19-4, 4)); // hclen 455 | 456 | for (i=0; i<3; i++) 457 | RET_WHEN_ERR(appendBits(p_bs, 0, 3)); 458 | for (i=0; i<16; i++) 459 | RET_WHEN_ERR(appendBits(p_bs, 4, 3)); 460 | 461 | for (i=0; iml[i] > 0) { 472 | symbol = getLZ77SymbolAndExtraBits(wksp->ml[i], wksp->of[i], &ml_ebits, &ml_elen, &of_symbol, &of_ebits, &of_elen); 473 | //assert(symbol_huffman_len[symbol] > 0); 474 | i += (wksp->ml[i] - 1); 475 | RET_WHEN_ERR(appendBits(p_bs, ectx.symbol_huffman_bits[symbol], ectx.symbol_huffman_len[symbol])); // write LZ77_ml_symbol 476 | RET_WHEN_ERR(appendBits(p_bs, ml_ebits, ml_elen)); // write extra bits of LZ77_len 477 | RET_WHEN_ERR(appendBits(p_bs, ectx.of_huffman_bits[of_symbol], ectx.of_huffman_len[of_symbol])); // write symbol of LZ77_offset 478 | RET_WHEN_ERR(appendBits(p_bs, of_ebits, of_elen)); // write extra bits of LZ77_offset 479 | } else { 480 | symbol = p_src[i]; 481 | RET_WHEN_ERR(appendBits(p_bs, ectx.symbol_huffman_bits[symbol], ectx.symbol_huffman_len[symbol])); // write literal 482 | } 483 | } 484 | RET_WHEN_ERR(appendBits(p_bs, ectx.symbol_huffman_bits[SYMBOL_END], ectx.symbol_huffman_len[SYMBOL_END])); // write SYMBOL_END 485 | 486 | return R_OK; 487 | } 488 | 489 | 490 | static int deflateNullBlock (struct StreamWriter_t *p_bs, uint32_t is_final_block) { 491 | RET_WHEN_ERR(appendBits(p_bs, (!!is_final_block), 1)); // final block ? 492 | RET_WHEN_ERR(appendBits(p_bs, 1, 2)); // fixed huffman tree 493 | RET_WHEN_ERR(appendBits(p_bs, static_symbol_huffman_bits[SYMBOL_END], static_symbol_huffman_len[SYMBOL_END])); // write SYMBOL_END 494 | return R_OK; 495 | } 496 | 497 | 498 | int deflateEncode (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len) { 499 | struct StreamWriter_t bs = newStreamWriter(p_dst, (*p_dst_len)); 500 | 501 | RET_ERR_IF(R_SRC_OVERFLOW, src_len > 0xFFFF0000U); 502 | RET_ERR_IF(R_DST_OVERFLOW, (*p_dst_len) > 0xFFFF0000U); 503 | 504 | if (src_len == 0) { 505 | RET_WHEN_ERR(deflateNullBlock(&bs, 1)); // special case : data length = 0, fill a empty block 506 | } else { 507 | uint32_t i; 508 | searchLZ77_workspace_t *wksp = (searchLZ77_workspace_t*)malloc(sizeof(searchLZ77_workspace_t)); 509 | RET_ERR_IF(R_MEM_RANOUT, (wksp==NULL)); 510 | 511 | for (i=0; i= src_len); 513 | uint32_t block_len = is_final_block ? src_len-i : MAX_BLOCK_LEN; 514 | RET_WHEN_ERR(deflateBlockDynamicHuffman(&bs, p_src+i, block_len, is_final_block, wksp)); // try dynamic huffman 515 | } 516 | 517 | free(wksp); 518 | } 519 | 520 | RET_WHEN_ERR(alignBitsToBytes(&bs)); 521 | 522 | *p_dst_len = (bs.p_buf - p_dst); 523 | 524 | return R_OK; 525 | } 526 | 527 | 528 | 529 | int gzipC (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len) { 530 | uint8_t *p_dst_tmp = p_dst; 531 | uint8_t **pp_dst = &p_dst_tmp; 532 | uint8_t *p_dst_limit = p_dst + *p_dst_len; 533 | size_t deflate_len; 534 | 535 | RET_ERR_IF(R_SRC_OVERFLOW, p_src > (p_src + src_len)); 536 | RET_ERR_IF(R_DST_OVERFLOW, p_dst > p_dst_limit); 537 | 538 | RET_WHEN_ERR(writeValue(pp_dst, p_dst_limit, 0x00088B1FU, 4)); 539 | RET_WHEN_ERR(writeValue(pp_dst, p_dst_limit, 0x00000000U, 4)); 540 | RET_WHEN_ERR(writeValue(pp_dst, p_dst_limit, 0x0304U, 2)); 541 | 542 | deflate_len = p_dst_limit - (*pp_dst); 543 | RET_WHEN_ERR(deflateEncode(p_src, src_len, *pp_dst, &deflate_len)); 544 | (*pp_dst) += deflate_len; 545 | 546 | RET_WHEN_ERR(writeValue(pp_dst, p_dst_limit, calcCrc32(p_src, src_len), 4)); 547 | RET_WHEN_ERR(writeValue(pp_dst, p_dst_limit, src_len , 4)); 548 | 549 | *p_dst_len = (*pp_dst) - p_dst; 550 | 551 | return R_OK; 552 | } 553 | -------------------------------------------------------------------------------- /src/zstdD.c: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // uint8_t, uint16_t, int32_t, uint64_t 3 | #include // memset, memcpy 4 | #include // malloc, free, exit 5 | #include // printf 6 | 7 | 8 | typedef uint8_t u8; 9 | typedef uint16_t u16; 10 | typedef int32_t i32; 11 | typedef uint64_t u64; 12 | 13 | #define SKIP_MAGIC_NUMBER_MIN (0x184D2A50U) // min magic number of skip frame 14 | #define SKIP_MAGIC_NUMBER_MAX (0x184D2A5FU) // max magic number of skip frame 15 | #define ZSTD_MAGIC_NUMBER (0xFD2FB528U) // magic number of zstd frame 16 | #define ZSTD_BLOCK_SIZE_MAX (128 * 1024) 17 | #define MAX_SEQ_SIZE (0x18000) 18 | 19 | /// This decoder calls exit(1) when it encounters an error, however a production library should propagate error codes 20 | #define ERROR(msg) { printf("Error: %s\n", (msg)); exit(1); } 21 | #define ERROR_IF(cond, msg) { if((cond)) ERROR(msg); } 22 | #define ERROR_I_SIZE_IF(cond) { ERROR_IF((cond), ("Input buffer smaller than it should be or input is corrupted")); } 23 | #define ERROR_O_SIZE_IF(cond) { ERROR_IF((cond), ("Output buffer overflow")); } 24 | #define ERROR_CORRUPT_IF(cond) { ERROR_IF((cond), ("Corruption detected while decompressing")); } 25 | #define ERROR_NOT_ZSTD_IF(cond) { ERROR_IF((cond), ("This data is not valid ZSTD frame")); } 26 | #define ERROR_MALLOC_IF(cond) { ERROR_IF((cond), ("Memory allocation error")); } 27 | 28 | #define HUF_MAX_BITS (13) 29 | #define HUF_MAX_SYMBS (256) 30 | #define HUF_TABLE_LENGTH (1< -1 68 | /// 001 -> 0 69 | /// 01x -> 1 70 | /// 1xx -> 2 71 | /// ... 72 | static i32 highest_set_bit (u64 value) { 73 | i32 i = -1; 74 | while (value) { 75 | value >>= 1; 76 | i++; 77 | } 78 | return i; 79 | } 80 | 81 | 82 | 83 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 84 | /// 正向输入流类型,用于除了 huffman 和 fse 以外的数据读取 (meta-data) 85 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 86 | 87 | typedef struct { 88 | u8 *p; 89 | u8 *plimit; 90 | u8 c; 91 | } istream_t; 92 | 93 | static istream_t istream_new (u8 *ptr, size_t len) { 94 | istream_t st; 95 | st.p = ptr; 96 | st.plimit = ptr + len; 97 | st.c = 0; 98 | return st; 99 | } 100 | 101 | static u8 istream_get_curr_byte (istream_t *p_st) { 102 | ERROR_I_SIZE_IF(p_st->p >= p_st->plimit); 103 | return p_st->p[0]; 104 | } 105 | 106 | static u64 istream_readbytes (istream_t *p_st, u8 n_bytes) { 107 | u64 value = 0; 108 | u8 smt = 0; 109 | ERROR_CORRUPT_IF(p_st->c != 0); 110 | for (; n_bytes>0; n_bytes--) { 111 | value |= ((u64)istream_get_curr_byte(p_st)) << smt; 112 | p_st->p ++; 113 | smt += 8; 114 | } 115 | return value; 116 | } 117 | 118 | static u64 istream_readbits (istream_t *p_st, u8 n_bits) { 119 | u8 bitpos_start = p_st->c; 120 | u8 bitpos_end = p_st->c + n_bits; 121 | u8 bytepos_end = bitpos_end / 8; 122 | u64 valueh, valuel=0; 123 | ERROR_IF(n_bits==0, "why???"); 124 | p_st->c = 0; 125 | valueh = istream_readbytes(p_st, bytepos_end); 126 | valueh >>= bitpos_start; 127 | p_st->c = bitpos_end % 8; 128 | if (p_st->c) { 129 | valuel = istream_get_curr_byte(p_st) & ((1 << p_st->c) - 1); 130 | if (bytepos_end) { 131 | valuel <<= (bytepos_end*8 - bitpos_start); 132 | } else { 133 | valuel >>= bitpos_start; 134 | } 135 | } 136 | return valueh | valuel; 137 | } 138 | 139 | static void istream_align (istream_t *p_st) { 140 | if (p_st->c != 0) { 141 | p_st->p ++; 142 | p_st->c = 0; 143 | } 144 | } 145 | 146 | static size_t istream_get_remain_len (istream_t *p_st) { 147 | ERROR_CORRUPT_IF(p_st->c != 0); 148 | return (p_st->plimit - p_st->p); 149 | } 150 | 151 | static u8 *istream_skip (istream_t* p_st, size_t len) { 152 | u8 *ptr = p_st->p; 153 | ERROR_CORRUPT_IF(p_st->c != 0); 154 | ERROR_I_SIZE_IF(len > (p_st->plimit - p_st->p)); 155 | p_st->p += len; 156 | return ptr; 157 | } 158 | 159 | static istream_t istream_fork_substream (istream_t *p_st, size_t len) { 160 | return istream_new(istream_skip(p_st, len), len); 161 | } 162 | 163 | 164 | 165 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 166 | /// 反向输入流类型,用于 huffman 或 fse 解码 167 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 168 | 169 | typedef struct { 170 | u8 smt; 171 | u8 c; 172 | u8 *pbase; 173 | u8 *p; 174 | u64 data; 175 | } backward_stream_t; 176 | 177 | /// load 一次就会在 data 中缓存至少 57 bit 178 | /// 之后可以 read/move 多次,注意多次累计读取的 bit 数量不能超过57,否则 57 bit 会被消耗完,需要重新 load 再进行 read/move 179 | static void backward_stream_load (backward_stream_t *p_bst) { 180 | p_bst->p -= (p_bst->c >> 3); 181 | p_bst->c &= 0x7; 182 | p_bst->data = *((u64*)p_bst->p); 183 | p_bst->data <<= p_bst->c; 184 | } 185 | 186 | static u64 backward_stream_read (backward_stream_t *p_bst) { 187 | return p_bst->data >> p_bst->smt; 188 | } 189 | 190 | static void backward_stream_move (backward_stream_t *p_bst, u8 n_bits) { 191 | p_bst->data <<= n_bits; 192 | p_bst->c += n_bits; 193 | } 194 | 195 | static u64 backward_stream_readmove (backward_stream_t *p_bst, u8 n_bits) { 196 | u64 res = n_bits ? (p_bst->data >> (64 - n_bits)) : 0; 197 | p_bst->data <<= n_bits; 198 | p_bst->c += n_bits; 199 | return res; 200 | } 201 | 202 | static u8 backward_stream_load_and_judge_ended (backward_stream_t *p_bst) { 203 | backward_stream_load(p_bst); 204 | if ((p_bst->p + 8) < p_bst->pbase) { 205 | return 1; 206 | } else if ((p_bst->p + 8) == p_bst->pbase) { 207 | return (p_bst->c > 0); 208 | } else { 209 | return 0; 210 | } 211 | } 212 | 213 | static void backward_stream_check_ended (backward_stream_t *p_bst) { 214 | backward_stream_load(p_bst); 215 | ERROR_CORRUPT_IF((p_bst->p + 8) != p_bst->pbase); 216 | ERROR_CORRUPT_IF(p_bst->c != 0); 217 | } 218 | 219 | /// 用 istream_t 对象初始化一个 backward_stream_t 对象 ,用于解码FSE流和huffman流 220 | static backward_stream_t backward_stream_new (istream_t st, u8 n_bits_for_huf_read) { 221 | backward_stream_t bst; 222 | ERROR_CORRUPT_IF(st.c != 0); 223 | ERROR_CORRUPT_IF(st.p >= st.plimit); 224 | bst.smt = sizeof(bst.data)*8 - n_bits_for_huf_read; 225 | bst.pbase = st.p; 226 | bst.p = st.plimit - 8; 227 | bst.c = 8 - highest_set_bit(bst.p[7]); 228 | backward_stream_load(&bst); 229 | return bst; 230 | } 231 | 232 | 233 | 234 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 235 | /// ZSTD 解码相关函数(内部) 236 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 237 | 238 | static i32 decode_fse_freqs (istream_t *p_st_src, i32 *p_freq, i32 m_bits) { 239 | i32 remaining, n_symb=0; 240 | remaining = 1 + (1 << m_bits); 241 | while (remaining > 1 && n_symb < FSE_MAX_SYMBS) { 242 | i32 bits = highest_set_bit(remaining); 243 | i32 val = istream_readbits(p_st_src, bits); 244 | i32 thresh = (1 << (bits+1)) - 1 - remaining; 245 | if (val >= thresh) { 246 | if (istream_readbits(p_st_src, 1)) { 247 | val |= (1 << bits); 248 | val -= thresh; 249 | } 250 | } 251 | val --; 252 | remaining -= val<0 ? -val : val; 253 | p_freq[n_symb++] = val; 254 | if (val == 0) { 255 | u8 i, repeat; 256 | do { 257 | repeat = istream_readbits(p_st_src, 2); 258 | for (i=0; (i= FSE_MAX_SYMBS); 265 | istream_align(p_st_src); 266 | 267 | return n_symb; 268 | } 269 | 270 | 271 | static void build_fse_table (FSE_table *p_ftab, i32 *p_freq, i32 n_symb) { 272 | i32 state_desc[FSE_MAX_SYMBS]; 273 | i32 pos_limit = 1 << p_ftab->m_bits; 274 | i32 pos_high = pos_limit; 275 | i32 pos = 0; 276 | i32 step = (pos_limit >> 1) + (pos_limit >> 3) + 3; 277 | i32 s, i; 278 | 279 | ERROR_CORRUPT_IF(p_ftab->m_bits > FSE_MAX_BITS); 280 | ERROR_CORRUPT_IF(n_symb > FSE_MAX_SYMBS); 281 | 282 | for (s=0; stable[pos_high] = s; 286 | state_desc[s] = 1; 287 | } 288 | } 289 | 290 | for (s=0; s 0) { 292 | state_desc[s] = p_freq[s]; 293 | for (i=0; itable[pos] = s; // Give `p_freq[s]` states to symbol s 295 | do { // "A position is skipped if already occupied, typically by a "less than 1" probability symbol." 296 | pos = (pos + step) & (pos_limit - 1); 297 | } while (pos >= pos_high); // Note: no other collision checking is necessary as `step` is coprime to `size`, so the cycle will visit each position exactly once 298 | } 299 | } 300 | } 301 | 302 | ERROR_CORRUPT_IF(pos != 0); 303 | 304 | for (i=0; itable[i]; 306 | i32 next_state_desc = state_desc[symbol]++; 307 | p_ftab->n_bits[i] = (u8)(p_ftab->m_bits - highest_set_bit(next_state_desc)); // Fills in the table appropriately, next_state_desc increases by symbol over time, decreasing number of bits 308 | p_ftab->state_base[i] = ((i32)next_state_desc << p_ftab->n_bits[i]) - pos_limit; // Baseline increases until the bit threshold is passed, at which point it resets to 0 309 | } 310 | } 311 | 312 | 313 | static void decode_and_build_fse_table (FSE_table *p_ftab, istream_t *p_st_src, i32 max_m_bits) { 314 | i32 n_fse_symb; 315 | i32 p_fse_freq [FSE_MAX_SYMBS] = {0}; 316 | p_ftab->m_bits = 5 + istream_readbits(p_st_src, 4); 317 | ERROR_CORRUPT_IF(p_ftab->m_bits > max_m_bits); 318 | n_fse_symb = decode_fse_freqs(p_st_src, p_fse_freq, p_ftab->m_bits); 319 | build_fse_table(p_ftab, p_fse_freq, n_fse_symb); 320 | } 321 | 322 | 323 | static size_t decode_huf_weights_by_fse (FSE_table *p_ftab, istream_t *p_st_src, u8 *p_huf_weights) { 324 | backward_stream_t bst = backward_stream_new(*p_st_src, 0); 325 | i32 state1 = backward_stream_readmove(&bst, p_ftab->m_bits); 326 | i32 state2 = backward_stream_readmove(&bst, p_ftab->m_bits); 327 | size_t i = 0; 328 | for (;;) { 329 | p_huf_weights[i++] = p_ftab->table[state1]; 330 | if (backward_stream_load_and_judge_ended(&bst)) return i; 331 | state1 = p_ftab->state_base[state1] + backward_stream_readmove(&bst, p_ftab->n_bits[state1]); 332 | p_huf_weights[i++] = p_ftab->table[state2]; 333 | if (backward_stream_load_and_judge_ended(&bst)) return i; 334 | state2 = p_ftab->state_base[state2] + backward_stream_readmove(&bst, p_ftab->n_bits[state2]); 335 | } 336 | } 337 | 338 | 339 | static size_t decode_huf_weights (istream_t *p_st_src, u8 *p_huf_weights) { 340 | size_t hbyte = istream_readbytes(p_st_src, 1); 341 | if (hbyte >= 128) { 342 | u8 i, tmp=0; 343 | hbyte -= 127; 344 | for (i=0; i> 4); 348 | tmp &= 0xF; 349 | } else { 350 | p_huf_weights[i] = tmp; 351 | } 352 | } 353 | return hbyte; 354 | } else { 355 | istream_t st_hufweight = istream_fork_substream(p_st_src, hbyte); 356 | FSE_table ftab; 357 | decode_and_build_fse_table(&ftab, &st_hufweight, 7); 358 | hbyte = decode_huf_weights_by_fse(&ftab, &st_hufweight, p_huf_weights); 359 | return hbyte; 360 | } 361 | } 362 | 363 | 364 | static void convert_huf_weights_to_bits (u8 *p, size_t n_symb) { 365 | i32 sum=0, left; 366 | u8 max_bits; 367 | size_t i; 368 | for (i=0; i HUF_MAX_BITS); 370 | sum += p[i] ? ((u64)1<<(p[i]-1)) : 0; 371 | } 372 | max_bits = 1 + highest_set_bit(sum); 373 | left = (1 << max_bits) - sum; 374 | ERROR_CORRUPT_IF(left & (left - 1)); // left 必须是2的指数 375 | p[n_symb-1] = highest_set_bit(left) + 1; 376 | for (i=0; ihuf_m_bits = 0; 389 | for (i=0; i HUF_MAX_BITS); 391 | rank_count[bits[i]]++; 392 | if (p_ctx->huf_m_bits < bits[i]) { 393 | p_ctx->huf_m_bits = bits[i]; 394 | } 395 | } 396 | for (i=0; ihuf_table [i] = 0; 398 | p_ctx->huf_n_bits[i] = 0; 399 | } 400 | rank_idx[p_ctx->huf_m_bits] = 0; // Initialize the starting codes for each rank (number of bits) 401 | for (i=p_ctx->huf_m_bits; i>=1; i--) { 402 | rank_idx[i - 1] = rank_idx[i] + rank_count[i] * (1 << ((i32)p_ctx->huf_m_bits - i)); 403 | memset(&p_ctx->huf_n_bits[rank_idx[i]], i, rank_idx[i - 1] - rank_idx[i]); // The entire range takes the same number of bits so we can memset it 404 | } 405 | ERROR_CORRUPT_IF(rank_idx[0] != (1 << p_ctx->huf_m_bits)); 406 | for (i=0; ihuf_m_bits - bits[i]); // Since the code doesn't care about the bottom `m_bits - bits[i]` bits of state, it gets a range that spans all possible values of the lower bits 410 | memset(&p_ctx->huf_table[code], i, len); 411 | rank_idx[bits[i]] += len; 412 | } 413 | } 414 | } 415 | 416 | 417 | static void decode_and_build_huf_table (frame_context_t *p_ctx, istream_t *p_st_src) { 418 | u8 p_weights_or_bits [HUF_MAX_SYMBS] = {0}; 419 | size_t n_symb = decode_huf_weights(p_st_src, p_weights_or_bits) + 1; // 最后一个weight不编码,而是算出来的,所以这里要+1 420 | ERROR_CORRUPT_IF(n_symb > HUF_MAX_SYMBS); 421 | convert_huf_weights_to_bits(p_weights_or_bits, n_symb); 422 | build_huf_table(p_ctx, p_weights_or_bits, n_symb); 423 | } 424 | 425 | 426 | static void huf_decode_1x1 (frame_context_t *p_ctx, istream_t *p_st_src, size_t n_lit, u8 *p_dst) { 427 | u8 i; 428 | backward_stream_t bst = backward_stream_new(*p_st_src, p_ctx->huf_m_bits); 429 | size_t n_lit_div = n_lit / 5; 430 | size_t n_lit_rem = n_lit - n_lit_div*5; 431 | for (; n_lit_div>0; n_lit_div--) { 432 | backward_stream_load(&bst); 433 | for (i=0; i<5; i++) { 434 | u64 entry = backward_stream_read(&bst); 435 | *(p_dst++) = p_ctx->huf_table[entry]; 436 | backward_stream_move(&bst, p_ctx->huf_n_bits[entry]); 437 | } 438 | } 439 | backward_stream_load(&bst); 440 | for (; n_lit_rem>0; n_lit_rem--) { 441 | u64 entry = backward_stream_read(&bst); 442 | *(p_dst++) = p_ctx->huf_table[entry]; 443 | backward_stream_move(&bst, p_ctx->huf_n_bits[entry]); 444 | } 445 | backward_stream_check_ended(&bst); 446 | } 447 | 448 | 449 | static void huf_decode_4x1 (frame_context_t *p_ctx, istream_t *p_st_src, size_t n_lit, u8 *p_dst) { 450 | size_t csize1 = istream_readbytes(p_st_src, 2); 451 | size_t csize2 = istream_readbytes(p_st_src, 2); 452 | size_t csize3 = istream_readbytes(p_st_src, 2); 453 | istream_t st1 = istream_fork_substream(p_st_src, csize1); 454 | istream_t st2 = istream_fork_substream(p_st_src, csize2); 455 | istream_t st3 = istream_fork_substream(p_st_src, csize3); 456 | istream_t st4 = *p_st_src; 457 | size_t n_lit123 = ((n_lit+3) / 4); 458 | size_t n_lit4 = n_lit - n_lit123 * 3; 459 | ERROR_CORRUPT_IF(n_lit < 6); 460 | ERROR_CORRUPT_IF(n_lit123 < n_lit4); 461 | huf_decode_1x1(p_ctx, &st1, n_lit123, p_dst); 462 | huf_decode_1x1(p_ctx, &st2, n_lit123, p_dst+n_lit123); 463 | huf_decode_1x1(p_ctx, &st3, n_lit123, p_dst+n_lit123*2); 464 | huf_decode_1x1(p_ctx, &st4, n_lit4 , p_dst+n_lit123*3); 465 | } 466 | 467 | 468 | static size_t decode_literals (frame_context_t *p_ctx, istream_t *p_st_src) { 469 | u8 lit_type = istream_readbits(p_st_src, 2); 470 | u8 n_lit_type = istream_readbits(p_st_src, 2); 471 | size_t n_lit, huf_size; 472 | u8 huf_x1 = 0; 473 | if (lit_type < 2) { 474 | switch (n_lit_type) { 475 | case 0: n_lit = (istream_readbits(p_st_src, 4) << 1); break; 476 | case 2: n_lit = (istream_readbits(p_st_src, 4) << 1) + 1; break; 477 | case 1: n_lit = istream_readbits(p_st_src, 12); break; 478 | default: n_lit = istream_readbits(p_st_src, 20); break; 479 | } 480 | ERROR_CORRUPT_IF(n_lit > ZSTD_BLOCK_SIZE_MAX); 481 | if (lit_type == 0) { 482 | memcpy(p_ctx->buf_lit, istream_skip(p_st_src, n_lit) , n_lit); 483 | } else { 484 | memset(p_ctx->buf_lit, istream_readbytes(p_st_src, 1), n_lit); 485 | } 486 | } else { 487 | istream_t st_huf; 488 | switch (n_lit_type) { 489 | case 0 : huf_x1 = 1; 490 | case 1 : n_lit = istream_readbits(p_st_src, 10); 491 | huf_size = istream_readbits(p_st_src, 10); break; 492 | case 2 : n_lit = istream_readbits(p_st_src, 14); 493 | huf_size = istream_readbits(p_st_src, 14); break; 494 | default: n_lit = istream_readbits(p_st_src, 18); 495 | huf_size = istream_readbits(p_st_src, 18); break; 496 | } 497 | ERROR_CORRUPT_IF(n_lit > ZSTD_BLOCK_SIZE_MAX); 498 | st_huf = istream_fork_substream(p_st_src, huf_size); 499 | if (lit_type == 3) { // 复用前一个 block 的 huffman table 500 | ERROR_CORRUPT_IF(!p_ctx->huf_table_exist); // huffman table 必须已经存在 501 | } else { // 需要解码 huffman table 502 | decode_and_build_huf_table(p_ctx, &st_huf); 503 | p_ctx->huf_table_exist = 1; 504 | } 505 | if (huf_x1) { 506 | huf_decode_1x1(p_ctx, &st_huf, n_lit, p_ctx->buf_lit); 507 | } else { 508 | huf_decode_4x1(p_ctx, &st_huf, n_lit, p_ctx->buf_lit); 509 | } 510 | } 511 | return n_lit; 512 | } 513 | 514 | 515 | static void decode_and_build_ll_or_of_or_ml_fse_table (FSE_table *p_ftab, istream_t *p_st_src, i32 type, i32 mode) { 516 | switch (mode) { 517 | case 0: { // Predefined_Mode 518 | static i32 LL_FREQ_DEFAULT[] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1}; 519 | static i32 OF_FREQ_DEFAULT[] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}; 520 | static i32 ML_FREQ_DEFAULT[] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; 521 | switch (type) { 522 | case 0 : p_ftab->m_bits=6; build_fse_table(p_ftab, LL_FREQ_DEFAULT, sizeof(LL_FREQ_DEFAULT)/sizeof(LL_FREQ_DEFAULT[0])); break; 523 | case 1 : p_ftab->m_bits=5; build_fse_table(p_ftab, OF_FREQ_DEFAULT, sizeof(OF_FREQ_DEFAULT)/sizeof(OF_FREQ_DEFAULT[0])); break; 524 | default: p_ftab->m_bits=6; build_fse_table(p_ftab, ML_FREQ_DEFAULT, sizeof(ML_FREQ_DEFAULT)/sizeof(ML_FREQ_DEFAULT[0])); break; 525 | } 526 | break; 527 | } 528 | case 1: { // RLE_Mode 529 | p_ftab->table[0] = istream_readbytes(p_st_src, 1); 530 | p_ftab->n_bits[0] = 0; 531 | p_ftab->state_base[0] = 0; 532 | p_ftab->m_bits = 0; 533 | break; 534 | } 535 | case 2: { // FSE_Compressed_Mode 536 | const static u8 lut_max_m_bits [] = {9, 8, 9}; 537 | decode_and_build_fse_table(p_ftab, p_st_src, lut_max_m_bits[type]); 538 | break; 539 | } 540 | default:{ // Repeat_Mode 541 | ERROR_CORRUPT_IF(!p_ftab->exist); 542 | break; 543 | } 544 | } 545 | p_ftab->exist = 1; 546 | } 547 | 548 | 549 | static size_t decode_and_build_seq_fse_table (frame_context_t *p_ctx, istream_t *p_st_src) { 550 | size_t n_seq = istream_readbytes(p_st_src, 1); 551 | if (n_seq >=255) { 552 | n_seq = istream_readbytes(p_st_src, 2) + 0x7F00; 553 | } else if (n_seq >= 128) { 554 | n_seq -= 128; 555 | n_seq <<= 8; 556 | n_seq += istream_readbytes(p_st_src, 1); 557 | } 558 | if (n_seq) { 559 | u8 mode_ml, mode_of, mode_ll; 560 | istream_readbits(p_st_src, 2); // 1-0 : Reserved 561 | mode_ml = istream_readbits(p_st_src, 2); // 3-2 : Match_Lengths_Mode 562 | mode_of = istream_readbits(p_st_src, 2); // 5-4 : Offsets_Mode 563 | mode_ll = istream_readbits(p_st_src, 2); // 7-6 : Literals_Lengths_Mode 564 | decode_and_build_ll_or_of_or_ml_fse_table(&p_ctx->table_ll, p_st_src, 0, mode_ll); 565 | decode_and_build_ll_or_of_or_ml_fse_table(&p_ctx->table_of, p_st_src, 1, mode_of); 566 | decode_and_build_ll_or_of_or_ml_fse_table(&p_ctx->table_ml, p_st_src, 2, mode_ml); 567 | } 568 | return n_seq; 569 | } 570 | 571 | 572 | static u64 parse_offset (u64 *prev_of, u64 of, u64 ll) { 573 | u64 real_of = of - 3; 574 | if (of <= 3) { 575 | of -= ((ll == 0) ? 0 : 1); 576 | real_of = (of < 3) ? prev_of[of] : prev_of[0]-1; 577 | } 578 | switch (of) { 579 | default : 580 | prev_of[2] = prev_of[1]; 581 | case 1 : 582 | prev_of[1] = prev_of[0]; 583 | prev_of[0] = real_of; 584 | case 0 : 585 | break; 586 | } 587 | return real_of; 588 | } 589 | 590 | 591 | static void decode_sequences_by_fse_and_execute (frame_context_t *p_ctx, istream_t *p_st_src, size_t n_seq, size_t n_lit, u8 **pp_dst, u8 *p_dst_limit) { 592 | u8 *p_lit = p_ctx->buf_lit; 593 | 594 | if (n_seq) { 595 | backward_stream_t bst = backward_stream_new(*p_st_src, 0); 596 | i32 ll_state = backward_stream_readmove(&bst, p_ctx->table_ll.m_bits); 597 | i32 of_state = backward_stream_readmove(&bst, p_ctx->table_of.m_bits); 598 | i32 ml_state = backward_stream_readmove(&bst, p_ctx->table_ml.m_bits); 599 | size_t i = 0; 600 | 601 | for (;;) { 602 | const static u64 LL_BASELINES[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536}; 603 | const static u64 ML_BASELINES[] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,31, 32, 33, 34, 35, 37, 39, 41, 43, 47, 51, 59, 67, 83,99, 131, 259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539}; 604 | const static u8 LL_EXTRA_BITS[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; 605 | const static u8 ML_EXTRA_BITS[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; 606 | 607 | u8 ll_code = p_ctx->table_ll.table[ll_state]; 608 | u8 of_code = p_ctx->table_of.table[of_state]; 609 | u8 ml_code = p_ctx->table_ml.table[ml_state]; 610 | 611 | u64 of, ml, ll; 612 | 613 | ERROR_CORRUPT_IF(ll_code > MAX_LL_CODE || ml_code > MAX_ML_CODE); 614 | 615 | backward_stream_load(&bst); 616 | of = ((u64)1 << of_code) + backward_stream_readmove(&bst, of_code); // "Decoding starts by reading the Number_of_Bits required to decode Offset. It then does the same for Match_Length, and then for Literals_Length." 617 | ml = ML_BASELINES[ml_code] + backward_stream_readmove(&bst, ML_EXTRA_BITS[ml_code]); 618 | ll = LL_BASELINES[ll_code] + backward_stream_readmove(&bst, LL_EXTRA_BITS[ll_code]); 619 | 620 | memcpy(*pp_dst, p_lit, ll); 621 | (*pp_dst) += ll; 622 | p_lit += ll; 623 | n_lit -= ll; 624 | of = parse_offset(p_ctx->prev_of, of, ll); 625 | for (; ml>0; ml--) { 626 | **pp_dst = *(*pp_dst - of); 627 | (*pp_dst) ++; 628 | } 629 | 630 | if (++i >= n_seq) break; 631 | 632 | backward_stream_load(&bst); 633 | ll_state = p_ctx->table_ll.state_base[ll_state] + backward_stream_readmove(&bst, p_ctx->table_ll.n_bits[ll_state]); 634 | ml_state = p_ctx->table_ml.state_base[ml_state] + backward_stream_readmove(&bst, p_ctx->table_ml.n_bits[ml_state]); 635 | of_state = p_ctx->table_of.state_base[of_state] + backward_stream_readmove(&bst, p_ctx->table_of.n_bits[of_state]); 636 | } 637 | 638 | backward_stream_check_ended(&bst); 639 | } 640 | 641 | memcpy(*pp_dst, p_lit, n_lit); 642 | (*pp_dst) += n_lit; 643 | } 644 | 645 | 646 | static void decode_blocks_in_a_frame (frame_context_t *p_ctx, istream_t *p_st_src, u8 **pp_dst, u8 *p_dst_limit) { 647 | u8 block_last, block_type; 648 | size_t block_len; 649 | do { 650 | block_last = istream_readbits(p_st_src, 1); 651 | block_type = istream_readbits(p_st_src, 2); 652 | block_len = istream_readbits(p_st_src, 21); // the compressed length of this block 653 | switch (block_type) { 654 | case 0: // Raw_Block 655 | case 1: // RLE_Block 656 | ERROR_O_SIZE_IF(block_len > (p_dst_limit - *pp_dst)); 657 | if (block_type == 0) { 658 | memcpy(*pp_dst, istream_skip(p_st_src, block_len), block_len); 659 | } else { 660 | memset(*pp_dst, istream_readbytes(p_st_src, 1) , block_len); 661 | } 662 | (*pp_dst) += block_len; 663 | break; 664 | case 2: { // Compressed_Block 665 | istream_t st_blk = istream_fork_substream(p_st_src, block_len); 666 | size_t n_lit = decode_literals(p_ctx, &st_blk); 667 | size_t n_seq = decode_and_build_seq_fse_table(p_ctx, &st_blk); 668 | decode_sequences_by_fse_and_execute(p_ctx, &st_blk, n_seq, n_lit, pp_dst, p_dst_limit); 669 | break; 670 | } 671 | default: ERROR_CORRUPT_IF(1); 672 | } 673 | } while (!block_last); 674 | if (p_ctx->checksum_flag) { 675 | istream_skip(p_st_src, 4); // This program does not support checking the checksum, so skip it if it's present 676 | } 677 | } 678 | 679 | 680 | static void parse_frame_header (istream_t *p_st_src, u8 *p_checksum_flag, size_t *p_window_size, size_t *p_decoded_len) { 681 | u8 dictionary_id_flag, single_segment_flag, frame_content_size_flag; 682 | 683 | dictionary_id_flag = istream_readbits(p_st_src, 2); // 1-0 Dictionary_ID_flag" 684 | *p_checksum_flag = istream_readbits(p_st_src, 1); // 2 checksum_flag 685 | ERROR_CORRUPT_IF(istream_readbits(p_st_src, 1) != 0); // 3 Reserved_bit 686 | istream_readbits(p_st_src, 1); // 4 Unused_bit 687 | single_segment_flag = istream_readbits(p_st_src, 1); // 5 Single_Segment_flag 688 | frame_content_size_flag = istream_readbits(p_st_src, 2); // 7-6 Frame_Content_Size_flag 689 | 690 | ERROR_IF(dictionary_id_flag, "This zstd data is compressed using a dictionary, but this decoder do not support dictionary"); 691 | 692 | if (!single_segment_flag) { // decode window_size if it exists 693 | u8 mantissa = istream_readbits(p_st_src, 3); // mantissa: low 3-bit 694 | u8 exponent = istream_readbits(p_st_src, 5); // exponent: high 5-bit 695 | size_t window_base = ((size_t)1) << (10 + exponent); 696 | size_t window_add = (window_base / 8) * mantissa; 697 | *p_window_size = window_base + window_add; 698 | } 699 | 700 | if (single_segment_flag || frame_content_size_flag) { // decode frame content size (decoded_size) if it exists 701 | const static i32 bytes_choices[] = {1, 2, 4, 8}; 702 | i32 bytes = bytes_choices[frame_content_size_flag]; 703 | *p_decoded_len = istream_readbytes(p_st_src, bytes); 704 | *p_decoded_len += (bytes == 2) ? 256 : 0; // "When Field_Size is 2, the offset of 256 is added." 705 | } else { 706 | *p_decoded_len = 0; 707 | } 708 | 709 | if (single_segment_flag) { // when Single_Segment_flag=1 710 | *p_window_size = *p_decoded_len; // the maximum back-reference distance is the content size itself, which can be any value from 1 to 2^64-1 bytes (16 EB)." 711 | } 712 | } 713 | 714 | 715 | static void decode_frame (frame_context_t *p_ctx, istream_t *p_st_src, u8 **pp_dst, u8 *p_dst_limit) { 716 | u64 magic = istream_readbytes(p_st_src, 4); 717 | if (magic == ZSTD_MAGIC_NUMBER) { 718 | size_t decoded_len = 0; 719 | u8 *p_dst_base = *pp_dst; 720 | memset(p_ctx, 0, sizeof(*p_ctx)); 721 | p_ctx->prev_of[0] = 1; 722 | p_ctx->prev_of[1] = 4; 723 | p_ctx->prev_of[2] = 8; 724 | parse_frame_header(p_st_src, &p_ctx->checksum_flag, &p_ctx->window_size, &decoded_len); 725 | if (decoded_len) { 726 | ERROR_O_SIZE_IF(decoded_len > (p_dst_limit - p_dst_base)); 727 | } 728 | decode_blocks_in_a_frame(p_ctx, p_st_src, pp_dst, p_dst_limit); 729 | if (decoded_len) { 730 | ERROR_CORRUPT_IF(decoded_len != (*pp_dst - p_dst_base)); 731 | } 732 | } else if (SKIP_MAGIC_NUMBER_MIN <= magic && magic <= SKIP_MAGIC_NUMBER_MAX) { 733 | size_t skip_frame_len = istream_readbytes(p_st_src, 4); 734 | istream_skip(p_st_src, skip_frame_len); 735 | // printf(" skip frame length = %lu\n", skip_frame_len); 736 | } else { 737 | ERROR_NOT_ZSTD_IF(1); 738 | } 739 | } 740 | 741 | 742 | 743 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 744 | /// ZSTD 解码函数(外部可调用) 745 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 746 | 747 | void zstdD (u8 *p_src, size_t src_len, u8 *p_dst, size_t *p_dst_len) { 748 | u8 *p_dst_base = p_dst; 749 | u8 *p_dst_limit = p_dst + (*p_dst_len); 750 | istream_t st_src = istream_new(p_src, src_len); 751 | frame_context_t *p_ctx = (frame_context_t*)malloc(sizeof(frame_context_t)); 752 | ERROR_MALLOC_IF(p_ctx == NULL); 753 | while (istream_get_remain_len(&st_src) > 0) { 754 | decode_frame(p_ctx, &st_src, &p_dst, p_dst_limit); 755 | } 756 | free(p_ctx); 757 | *p_dst_len = (p_dst - p_dst_base); 758 | } 759 | -------------------------------------------------------------------------------- /src/lzmaC.c: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // uint8_t, uint16_t, uint32_t 3 | #include // malloc, free 4 | 5 | #define R_OK 0 6 | #define R_ERR_MEMORY_RUNOUT 1 7 | #define R_ERR_UNSUPPORTED 2 8 | #define R_ERR_OUTPUT_OVERFLOW 3 9 | 10 | #define RET_WHEN_ERR(err_code) { int ec = (err_code); if (ec) return ec; } 11 | 12 | 13 | 14 | // the code only use these basic types : 15 | // int : as return code 16 | // uint8_t : as compressed and uncompressed data, as LZMA state 17 | // uint16_t : as probabilities of range coder 18 | // uint32_t : as generic integers 19 | // size_t : as data length 20 | 21 | 22 | 23 | 24 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 25 | // common useful functions 26 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 27 | 28 | static uint32_t bitsReverse (uint32_t bits, uint32_t bit_count) { 29 | uint32_t revbits = 0; 30 | for (; bit_count>0; bit_count--) { 31 | revbits <<= 1; 32 | revbits |= (bits & 1); 33 | bits >>= 1; 34 | } 35 | return revbits; 36 | } 37 | 38 | 39 | static uint32_t countBit (uint32_t val) { // count bits after the highest bit '1' 40 | uint32_t count = 0; 41 | for (; val!=0; val>>=1) 42 | count ++; 43 | return count; 44 | } 45 | 46 | 47 | 48 | 49 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 50 | // Range Encoder 51 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 52 | 53 | #define RANGE_CODE_NORMALIZE_THRESHOLD (1 << 24) 54 | #define RANGE_CODE_MOVE_BITS 5 55 | #define RANGE_CODE_N_BIT_MODEL_TOTAL_BITS 11 56 | #define RANGE_CODE_BIT_MODEL_TOTAL (1 << RANGE_CODE_N_BIT_MODEL_TOTAL_BITS) 57 | #define RANGE_CODE_HALF_PROBABILITY (RANGE_CODE_BIT_MODEL_TOTAL >> 1) 58 | 59 | #define RANGE_CODE_CACHE_SIZE_MAX (~((size_t)0)) 60 | 61 | 62 | typedef struct { 63 | uint8_t overflow; 64 | uint8_t cache; 65 | uint8_t low_msb; // the 32th bit (high 1 bit) of "low" 66 | uint32_t low_lsb; // the 31~0th bit (low 32 bits) of "low". Note that ((low_msb<<32) | low_lsb) forms a 33-bit unsigned integer. The goal is to avoid using 64-bit integer type. 67 | uint32_t range; 68 | size_t cache_size; 69 | uint8_t *p_dst; 70 | uint8_t *p_dst_limit; 71 | } RangeEncoder_t; 72 | 73 | 74 | static RangeEncoder_t newRangeEncoder (uint8_t *p_dst, size_t dst_len) { 75 | RangeEncoder_t coder; 76 | coder.cache = 0; 77 | coder.low_msb = 0; 78 | coder.low_lsb = 0; 79 | coder.range = 0xFFFFFFFF; 80 | coder.cache_size = 1; 81 | coder.p_dst = p_dst; 82 | coder.p_dst_limit = p_dst + dst_len; 83 | coder.overflow = 0; 84 | return coder; 85 | } 86 | 87 | 88 | static void rangeEncodeOutByte (RangeEncoder_t *e, uint8_t byte) { 89 | if (e->p_dst != e->p_dst_limit) 90 | *(e->p_dst++) = byte; 91 | else 92 | e->overflow = 1; 93 | } 94 | 95 | 96 | static void rangeEncodeNormalize (RangeEncoder_t *e) { 97 | if (e->range < RANGE_CODE_NORMALIZE_THRESHOLD) { 98 | if (e->low_msb) { // if "low" is greater than or equal to (1<<32) 99 | rangeEncodeOutByte(e, e->cache+1); 100 | for (; e->cache_size>1; e->cache_size--) 101 | rangeEncodeOutByte(e, 0x00); 102 | e->cache = (uint8_t)((e->low_lsb) >> 24); 103 | e->cache_size = 0; 104 | 105 | } else if (e->low_lsb < 0xFF000000) { // if "low" is less than ((1<<32)-(1<<24)) 106 | rangeEncodeOutByte(e, e->cache); 107 | for (; e->cache_size>1; e->cache_size--) 108 | rangeEncodeOutByte(e, 0xFF); 109 | e->cache = (uint8_t)((e->low_lsb) >> 24); 110 | e->cache_size = 0; 111 | } 112 | 113 | if (e->cache_size < RANGE_CODE_CACHE_SIZE_MAX) 114 | e->cache_size ++; 115 | 116 | e->low_msb = 0; 117 | e->low_lsb <<= 8; 118 | e->range <<= 8; 119 | } 120 | } 121 | 122 | 123 | static void rangeEncodeTerminate (RangeEncoder_t *e) { 124 | e->range = 0; 125 | rangeEncodeNormalize(e); 126 | rangeEncodeNormalize(e); 127 | rangeEncodeNormalize(e); 128 | rangeEncodeNormalize(e); 129 | rangeEncodeNormalize(e); 130 | rangeEncodeNormalize(e); 131 | } 132 | 133 | 134 | static void rangeEncodeIntByFixedProb (RangeEncoder_t *e, uint32_t val, uint32_t bit_count) { 135 | for (; bit_count>0; bit_count--) { 136 | uint8_t bit = 1 & (val >> (bit_count-1)); 137 | rangeEncodeNormalize(e); 138 | e->range >>= 1; 139 | if (bit) { 140 | if ((e->low_lsb + e->range) < e->low_lsb) // if low_lsb + range overflow from 32-bit unsigned integer 141 | e->low_msb = 1; 142 | e->low_lsb += e->range; 143 | } 144 | } 145 | } 146 | 147 | 148 | static void rangeEncodeBit (RangeEncoder_t *e, uint16_t *p_prob, uint8_t bit) { 149 | uint32_t prob = *p_prob; 150 | uint32_t bound; 151 | 152 | rangeEncodeNormalize(e); 153 | 154 | bound = (e->range >> RANGE_CODE_N_BIT_MODEL_TOTAL_BITS) * prob; 155 | 156 | if (!bit) { // encode bit 0 157 | e->range = bound; 158 | *p_prob = (uint16_t)(prob + ((RANGE_CODE_BIT_MODEL_TOTAL - prob) >> RANGE_CODE_MOVE_BITS)); 159 | } else { // encode bit 1 160 | e->range -= bound; 161 | if ((e->low_lsb + bound) < e->low_lsb) // if low_lsb + bound overflow from 32-bit unsigned integer 162 | e->low_msb = 1; 163 | e->low_lsb += bound; 164 | *p_prob = (uint16_t)(prob - (prob >> RANGE_CODE_MOVE_BITS)); 165 | } 166 | } 167 | 168 | 169 | static void rangeEncodeInt (RangeEncoder_t *e, uint16_t *p_prob, uint32_t val, uint32_t bit_count) { 170 | uint32_t treepos = 1; 171 | for (; bit_count>0; bit_count--) { 172 | uint8_t bit = (uint8_t)(1 & (val >> (bit_count-1))); 173 | rangeEncodeBit(e, p_prob+(treepos-1), bit); 174 | treepos <<= 1; 175 | if (bit) 176 | treepos |= 1; 177 | } 178 | } 179 | 180 | 181 | static void rangeEncodeMB (RangeEncoder_t *e, uint16_t *p_prob, uint32_t byte, uint32_t match_byte) { 182 | uint32_t i, treepos = 1, off0 = 0x100, off1; 183 | for (i=0; i<8; i++) { 184 | uint8_t bit = (uint8_t)(1 & (byte >> 7)); 185 | byte <<= 1; 186 | match_byte <<= 1; 187 | off1 = off0; 188 | off0 &= match_byte; 189 | rangeEncodeBit(e, p_prob+(off0+off1+treepos-1), bit); 190 | treepos <<= 1; 191 | if (bit) 192 | treepos |= 1; 193 | else 194 | off0 ^= off1; 195 | } 196 | } 197 | 198 | 199 | 200 | 201 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 202 | // LZ {length, distance} searching algorithm 203 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 204 | 205 | #define LZ_LEN_MAX 273 206 | //#define LZ_DIST_MAX_PLUS1 0xFFFFFFFF 207 | #define LZ_DIST_MAX_PLUS1 0x40000000 208 | 209 | #define HASH_LEVEL 16 210 | #define HASH_N 21 211 | #define HASH_SIZE (1<= src_len || pos+1 == src_len || pos+2 == src_len) 226 | return 0 ; 227 | else 228 | #if HASH_N < 24 229 | return ((p_src[pos+2]<<16) + (p_src[pos+1]<<8) + p_src[pos]) & HASH_MASK; 230 | #else 231 | return ((p_src[pos+2]<<16) + (p_src[pos+1]<<8) + p_src[pos]); 232 | #endif 233 | } 234 | 235 | 236 | static void updateHashTable (uint8_t *p_src, size_t src_len, size_t pos, size_t hash_table [][HASH_LEVEL]) { 237 | uint32_t hash = getHash(p_src, src_len, pos); 238 | uint32_t i, oldest_i = 0; 239 | size_t oldest_pos = INVALID_HASH_ITEM; 240 | 241 | if (pos >= src_len) 242 | return; 243 | 244 | for (i=0; i hash_table[hash][i]) { // search the oldest hash item 250 | oldest_pos = hash_table[hash][i]; 251 | oldest_i = i; 252 | } 253 | } 254 | 255 | hash_table[hash][oldest_i] = pos; 256 | } 257 | 258 | 259 | static uint32_t lenDistScore (uint32_t len, uint32_t dist, uint32_t rep0, uint32_t rep1, uint32_t rep2, uint32_t rep3) { 260 | #define D 12 261 | static const uint32_t TABLE_THRESHOLDS [] = {D*D*D*D*D*5, D*D*D*D*4, D*D*D*3, D*D*2, D}; 262 | uint32_t score; 263 | 264 | if (dist == rep0 || dist == rep1 || dist == rep2 || dist == rep3) { 265 | score = 5; 266 | } else { 267 | for (score=4; score>0; score--) 268 | if (dist <= TABLE_THRESHOLDS[score]) 269 | break; 270 | } 271 | 272 | if (len < 2) 273 | return 8 + 5; 274 | else if (len == 2) 275 | return 8 + score + 1; 276 | else 277 | return 8 + score + len; 278 | } 279 | 280 | 281 | static void lzSearchMatch (uint8_t *p_src, size_t src_len, size_t pos, size_t hash_table [][HASH_LEVEL], uint32_t *p_len, uint32_t *p_dist) { 282 | uint32_t len_max = ((src_len-pos) < LZ_LEN_MAX) ? (src_len-pos) : LZ_LEN_MAX; 283 | uint32_t hash = getHash(p_src, src_len, pos); 284 | uint32_t i, j, score1, score2; 285 | 286 | *p_len = 0; 287 | *p_dist = 0; 288 | 289 | score1 = lenDistScore(0, 0xFFFFFFFF, 0, 0, 0, 0); 290 | 291 | for (i=0; i= 2 && score1 < score2) { 299 | score1 = score2; 300 | *p_len = j; 301 | *p_dist = pos - ppos; 302 | } 303 | } 304 | } 305 | } 306 | 307 | 308 | static void lzSearchRep (uint8_t *p_src, size_t src_len, size_t pos, uint32_t rep0, uint32_t rep1, uint32_t rep2, uint32_t rep3, uint32_t len_limit, uint32_t *p_len, uint32_t *p_dist) { 309 | uint32_t len_max = ((src_len-pos) < LZ_LEN_MAX) ? (src_len-pos) : LZ_LEN_MAX; 310 | uint32_t reps [4]; 311 | uint32_t i, j; 312 | 313 | if (len_max > len_limit) 314 | len_max = len_limit; 315 | 316 | reps[0] = rep0; reps[1] = rep1; reps[2] = rep2; reps[3] = rep3; 317 | 318 | *p_len = 0; 319 | *p_dist = 0; 320 | 321 | for (i=0; i<4; i++) { 322 | if (reps[i] <= pos) { 323 | size_t ppos = pos - reps[i]; 324 | for (j=0; j= 2 && j > *p_len) { 328 | *p_len = j; 329 | *p_dist = reps[i]; 330 | } 331 | } 332 | } 333 | } 334 | 335 | 336 | static void lzSearch (uint8_t *p_src, size_t src_len, size_t pos, uint32_t rep0, uint32_t rep1, uint32_t rep2, uint32_t rep3, size_t hash_table [][HASH_LEVEL], uint32_t *p_len, uint32_t *p_dist) { 337 | uint32_t rlen, rdist; 338 | uint32_t mlen, mdist; 339 | 340 | lzSearchRep(p_src, src_len, pos, rep0, rep1, rep2, rep3, 0xFFFFFFFF, &rlen, &rdist); 341 | lzSearchMatch(p_src, src_len, pos, hash_table, &mlen, &mdist); 342 | 343 | if ( lenDistScore(rlen, rdist, rep0, rep1, rep2, rep3) >= lenDistScore(mlen, mdist, rep0, rep1, rep2, rep3) ) { 344 | *p_len = rlen; 345 | *p_dist = rdist; 346 | } else { 347 | *p_len = mlen; 348 | *p_dist = mdist; 349 | } 350 | } 351 | 352 | 353 | static uint8_t isShortRep (uint8_t *p_src, size_t src_len, size_t pos, uint32_t rep0) { 354 | return (pos >= rep0 && (p_src[pos] == p_src[pos-rep0])) ? 1 : 0; 355 | } 356 | 357 | 358 | 359 | 360 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 361 | // LZMA Encoder 362 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 363 | 364 | typedef enum { // packet_type 365 | PKT_LIT, 366 | PKT_MATCH, 367 | PKT_SHORTREP, 368 | PKT_REP0, // LONGREP0 369 | PKT_REP1, // LONGREP1 370 | PKT_REP2, // LONGREP2 371 | PKT_REP3 // LONGREP3 372 | } PACKET_t; 373 | 374 | 375 | static uint8_t stateTransition (uint8_t state, PACKET_t type) { 376 | switch (state) { 377 | case 0 : return (type==PKT_LIT) ? 0 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 378 | case 1 : return (type==PKT_LIT) ? 0 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 379 | case 2 : return (type==PKT_LIT) ? 0 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 380 | case 3 : return (type==PKT_LIT) ? 0 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 381 | case 4 : return (type==PKT_LIT) ? 1 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 382 | case 5 : return (type==PKT_LIT) ? 2 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 383 | case 6 : return (type==PKT_LIT) ? 3 : (type==PKT_MATCH) ? 7 : (type==PKT_SHORTREP) ? 9 : 8; 384 | case 7 : return (type==PKT_LIT) ? 4 : (type==PKT_MATCH) ? 10 : (type==PKT_SHORTREP) ? 11 : 11; 385 | case 8 : return (type==PKT_LIT) ? 5 : (type==PKT_MATCH) ? 10 : (type==PKT_SHORTREP) ? 11 : 11; 386 | case 9 : return (type==PKT_LIT) ? 6 : (type==PKT_MATCH) ? 10 : (type==PKT_SHORTREP) ? 11 : 11; 387 | case 10 : return (type==PKT_LIT) ? 4 : (type==PKT_MATCH) ? 10 : (type==PKT_SHORTREP) ? 11 : 11; 388 | case 11 : return (type==PKT_LIT) ? 5 : (type==PKT_MATCH) ? 10 : (type==PKT_SHORTREP) ? 11 : 11; 389 | default : return 0xFF; // 0xFF is invalid state which will never appear 390 | } 391 | } 392 | 393 | 394 | 395 | #define N_STATES 12 396 | #define N_LIT_STATES 7 397 | 398 | #define LC 4 // valid range : 0~8 399 | #define N_PREV_BYTE_LC_MSBS (1 << LC) 400 | #define LC_SHIFT (8 - LC) 401 | #define LC_MASK ((1 << LC) - 1) 402 | 403 | #define LP 0 // valid range : 0~4 404 | #define N_LIT_POS_STATES (1 << LP) 405 | #define LP_MASK ((1 << LP) - 1) 406 | 407 | #define PB 3 // valid range : 0~4 408 | #define N_POS_STATES (1 << PB) 409 | #define PB_MASK ((1 << PB) - 1) 410 | 411 | #define LCLPPB_BYTE ((uint8_t)( (PB * 5 + LP) * 9 + LC )) 412 | 413 | 414 | #define INIT_PROBS(probs) { \ 415 | uint16_t *p = (uint16_t*)(probs); \ 416 | uint16_t *q = p + (sizeof(probs) / sizeof(uint16_t)); \ 417 | for (; p 0) { 488 | match_byte = p_src[pos-rep0]; 489 | prev_byte_lc_msbs = (p_src[pos-1] >> LC_SHIFT) & LC_MASK; 490 | } 491 | 492 | if (pos >= src_len) { // input end (no more data to be encoded) 493 | if (!with_end_mark) // if user dont want to encode end marker 494 | break; // finish immediently 495 | with_end_mark = 0; // clear with_end_mark. we will finish at the next loop 496 | type = PKT_MATCH; // the end marker is regarded as a MATCH packet 497 | len = 2; // this MATCH packet's len = 2 498 | dist = 0; // this MATCH packet's dist = 0, in next steps, we will encode dist-1 (0xFFFFFFFF), aka end marker 499 | 500 | } else { // there are still data need to be encoded 501 | if (n_bypass > 0) { 502 | len = 0; 503 | dist = 0; 504 | n_bypass --; 505 | } else if (len_bypass > 0) { 506 | len = len_bypass; 507 | dist = dist_bypass; 508 | len_bypass = 0; 509 | dist_bypass = 0; 510 | } else { 511 | lzSearch(p_src, src_len, pos, rep0, rep1, rep2, rep3, hash_table, &len, &dist); 512 | 513 | if ((src_len-pos)>8 && len>=2) { 514 | uint32_t score0 = lenDistScore(len, dist, rep0, rep1, rep2, rep3); 515 | uint32_t len1=0, dist1=0, score1=0; 516 | uint32_t len2=0, dist2=0, score2=0; 517 | 518 | lzSearch(p_src, src_len, pos+1, rep0, rep1, rep2, rep3, hash_table, &len1, &dist1); 519 | score1 = lenDistScore(len1, dist1, rep0, rep1, rep2, rep3); 520 | 521 | if (len >= 3) { 522 | lzSearch(p_src, src_len, pos+2, rep0, rep1, rep2, rep3, hash_table, &len2, &dist2); 523 | score2 = lenDistScore(len2, dist2, rep0, rep1, rep2, rep3) - 1; 524 | } 525 | 526 | if (score2 > score0 && score2 > score1) { 527 | len = 0; 528 | dist = 0; 529 | lzSearchRep(p_src, src_len, pos, rep0, rep1, rep2, rep3, 2, &len, &dist); 530 | len_bypass = len2; 531 | dist_bypass = dist2; 532 | n_bypass = (len<2) ? 1 : 0; 533 | } else if (score1 > score0) { 534 | len = 0; 535 | dist = 0; 536 | len_bypass = len1; 537 | dist_bypass = dist1; 538 | n_bypass = 0; 539 | } 540 | } 541 | } 542 | 543 | if (len < 2) { 544 | type = isShortRep(p_src, src_len, pos, rep0) ? PKT_SHORTREP : PKT_LIT; 545 | } else if (dist == rep0) { 546 | type = PKT_REP0; 547 | } else if (dist == rep1) { 548 | type = PKT_REP1; 549 | rep1 = rep0; 550 | rep0 = dist; 551 | } else if (dist == rep2) { 552 | type = PKT_REP2; 553 | rep2 = rep1; 554 | rep1 = rep0; 555 | rep0 = dist; 556 | } else if (dist == rep3) { 557 | type = PKT_REP3; 558 | rep3 = rep2; 559 | rep2 = rep1; 560 | rep1 = rep0; 561 | rep0 = dist; 562 | } else { 563 | type = PKT_MATCH; 564 | rep3 = rep2; 565 | rep2 = rep1; 566 | rep1 = rep0; 567 | rep0 = dist; 568 | } 569 | 570 | { 571 | size_t pos2 = pos + ((type==PKT_LIT || type==PKT_SHORTREP) ? 1 : len); 572 | for (; pos5) ? 3 : (len-2); 644 | uint32_t dist_slot, bcnt, bits; 645 | 646 | dist --; 647 | 648 | if (dist < 4) { 649 | dist_slot = dist; 650 | } else { 651 | dist_slot = countBit(dist) - 1; 652 | dist_slot = (dist_slot<<1) | ((dist>>(dist_slot-1)) & 1); 653 | } 654 | 655 | rangeEncodeInt(&coder, probs_dist_slot[len_min5_minus2], dist_slot, 6); 656 | 657 | bcnt = (dist_slot >> 1) - 1; 658 | 659 | if (dist_slot >= 14) { // dist slot = 14~63 660 | bcnt-= 4; 661 | bits = (dist>>4) & ((1<= 4) { // dist slot = 4~13 668 | bits = dist & ((1<LZMA_DIC_MIN) ? LZ_DIST_MAX_PLUS1 : LZMA_DIC_MIN) 698 | 699 | #define LZMA_HEADER_LEN 13 700 | 701 | static int writeLzmaHeader (uint8_t *p_dst, size_t *p_dst_len, size_t uncompressed_len, uint8_t uncompressed_len_known) { 702 | uint32_t i; 703 | 704 | if (*p_dst_len < LZMA_HEADER_LEN) 705 | return R_ERR_OUTPUT_OVERFLOW; 706 | 707 | *p_dst_len = LZMA_HEADER_LEN; 708 | 709 | *(p_dst++) = LCLPPB_BYTE; 710 | 711 | for (i=0; i<4; i++) 712 | *(p_dst++) = (uint8_t)(LZMA_DIC_LEN >> (i*8)); 713 | 714 | for (i=0; i<8; i++) { 715 | if (uncompressed_len_known) { 716 | *(p_dst++) = (uint8_t)uncompressed_len; 717 | uncompressed_len >>= 8; 718 | } else { 719 | *(p_dst++) = 0xFF; 720 | } 721 | } 722 | 723 | return R_OK; 724 | } 725 | 726 | 727 | int lzmaC (uint8_t *p_src, size_t src_len, uint8_t *p_dst, size_t *p_dst_len) { 728 | size_t hdr_len, cmprs_len; 729 | 730 | hdr_len = *p_dst_len; // set available space for header length 731 | 732 | RET_WHEN_ERR( writeLzmaHeader(p_dst, &hdr_len, src_len, 1) ); // 733 | 734 | cmprs_len = *p_dst_len - hdr_len; // set available space for compressed data length 735 | 736 | RET_WHEN_ERR( lzmaEncode(p_src, src_len, p_dst+hdr_len, &cmprs_len, 1) ); // do compression 737 | 738 | *p_dst_len = hdr_len + cmprs_len; // the final output data length = LZMA file header len + compressed data len 739 | 740 | return R_OK; 741 | } 742 | 743 | 744 | 745 | 746 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 747 | // for zip container 748 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 749 | 750 | #define ZIP_LZMA_PROPERTY_LEN 9 751 | 752 | int writeZipLzmaProperty (uint8_t *p_dst, size_t *p_dst_len) { 753 | if (*p_dst_len < ZIP_LZMA_PROPERTY_LEN) // no enough space for writing ZIP's LZMA property 754 | return R_ERR_OUTPUT_OVERFLOW; 755 | 756 | *p_dst_len = ZIP_LZMA_PROPERTY_LEN; 757 | 758 | *(p_dst++) = 0x10; 759 | *(p_dst++) = 0x02; 760 | *(p_dst++) = 0x05; 761 | *(p_dst++) = 0x00; 762 | *(p_dst++) = LCLPPB_BYTE; 763 | *(p_dst++) = (uint8_t)(LZMA_DIC_LEN >> 0); 764 | *(p_dst++) = (uint8_t)(LZMA_DIC_LEN >> 8); 765 | *(p_dst++) = (uint8_t)(LZMA_DIC_LEN >>16); 766 | *(p_dst++) = (uint8_t)(LZMA_DIC_LEN >>24); 767 | 768 | return R_OK; 769 | } 770 | --------------------------------------------------------------------------------