├── .gitignore ├── Cargo.toml ├── LICENSE-MIT ├── README.md ├── .github └── workflows │ └── ci.yml ├── LICENSE-APACHE └── src ├── lib.rs ├── decode.rs └── encode.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "yazi" 3 | version = "0.2.1" 4 | authors = ["Chad Brokaw "] 5 | edition = "2021" 6 | description = "DEFLATE/zlib compression and decompression." 7 | license = "Apache-2.0 OR MIT" 8 | keywords = ["deflate", "zlib", "compression", "decompression"] 9 | categories = ["compression"] 10 | repository = "https://github.com/dfrg/yazi" 11 | homepage = "https://github.com/dfrg/yazi" 12 | readme = "README.md" 13 | 14 | [features] 15 | default = ["std"] 16 | std = [] 17 | 18 | [target.'cfg(target_family = "wasm")'.dev-dependencies] 19 | wasm-bindgen-test = "0.3.37" 20 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Chad Brokaw 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yazi - Yet another zlib implementation 2 | 3 | Yazi is a pure Rust implementation of the RFC 1950 DEFLATE specification with support for 4 | the zlib wrapper. It provides streaming compression and decompression. 5 | 6 | [![Crates.io][crates-badge]][crates-url] 7 | [![Docs.rs][docs-badge]][docs-url] 8 | [![Apache 2.0 or MIT license.][license-badge]][license-url] 9 | 10 | [crates-badge]: https://img.shields.io/crates/v/yazi.svg 11 | [crates-url]: https://crates.io/crates/yazi 12 | [docs-badge]: https://docs.rs/yazi/badge.svg 13 | [docs-url]: https://docs.rs/yazi 14 | [license-badge]: https://img.shields.io/badge/license-Apache--2.0_OR_MIT-blue.svg 15 | [license-url]: #license 16 | 17 | ## Usage 18 | 19 | The following demonstrates simple usage for compressing and decompressing in-memory buffers: 20 | 21 | ```rust 22 | use yazi::*; 23 | // Your source data. 24 | let data = &(0..=255).cycle().take(8192).collect::>()[..]; 25 | // Compress it into a Vec with a zlib wrapper using the default compression level. 26 | let compressed = compress(data, Format::Zlib, CompressionLevel::Default).unwrap(); 27 | // Decompress it into a Vec. 28 | let (decompressed, checksum) = decompress(&compressed, Format::Zlib).unwrap(); 29 | // Verify the checksum. 30 | assert_eq!(Adler32::from_buf(&decompressed).finish(), checksum.unwrap()); 31 | // Verify that the decompressed data matches the original. 32 | assert_eq!(data, &decompressed[..]); 33 | ``` 34 | 35 | For detail on more advanced usage, see the full API [documentation](https://docs.rs/yazi). 36 | 37 | ## License 38 | 39 | Licensed under either of 40 | 41 | - Apache License, Version 2.0 42 | ([LICENSE-APACHE](LICENSE-APACHE) or ) 43 | - MIT license 44 | ([LICENSE-MIT](LICENSE-MIT) or ) 45 | 46 | at your option. 47 | 48 | ## Contribution 49 | 50 | Contributions are welcome by pull request. The [Rust code of conduct] applies. 51 | 52 | Unless you explicitly state otherwise, any contribution intentionally submitted 53 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 54 | licensed as above, without any additional terms or conditions. 55 | 56 | [Rust Code of Conduct]: https://www.rust-lang.org/policies/code-of-conduct 57 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | permissions: 4 | contents: read 5 | 6 | on: 7 | pull_request: 8 | push: 9 | branches: 10 | - main 11 | schedule: 12 | - cron: '0 2 * * 0' 13 | 14 | env: 15 | CARGO_INCREMENTAL: 0 16 | CARGO_NET_GIT_FETCH_WITH_CLI: true 17 | CARGO_NET_RETRY: 10 18 | CARGO_TERM_COLOR: always 19 | RUST_BACKTRACE: 1 20 | RUSTFLAGS: -D warnings 21 | RUSTDOCFLAGS: -D warnings 22 | RUSTUP_MAX_RETRIES: 10 23 | 24 | defaults: 25 | run: 26 | shell: bash 27 | 28 | jobs: 29 | test: 30 | runs-on: ubuntu-latest 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | include: 35 | - rust: stable 36 | - rust: beta 37 | - rust: nightly 38 | steps: 39 | - uses: actions/checkout@v4 40 | - name: Install Rust 41 | run: rustup update ${{ matrix.rust }} && rustup default ${{ matrix.rust }} 42 | - run: cargo build --all --all-features --all-targets 43 | - run: cargo test --all 44 | - run: cargo test --no-default-features --tests 45 | - name: Install cargo-hack 46 | uses: taiki-e/install-action@cargo-hack 47 | - run: rustup target add thumbv7m-none-eabi 48 | - name: Run cargo check (without dev-dependencies to catch missing feature flags) 49 | run: cargo hack build --all --no-dev-deps 50 | - run: cargo hack build --all --target thumbv7m-none-eabi --no-default-features --no-dev-deps 51 | - name: Install wasm-pack 52 | uses: taiki-e/install-action@wasm-pack 53 | - run: wasm-pack test --node 54 | - run: wasm-pack test --node --no-default-features 55 | 56 | clippy: 57 | runs-on: ubuntu-latest 58 | steps: 59 | - uses: actions/checkout@v4 60 | - name: Install Rust 61 | run: rustup update stable 62 | - run: cargo clippy --all --all-features --all-targets 63 | 64 | fmt: 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v4 68 | - name: Install Rust 69 | run: rustup update stable 70 | - run: cargo fmt --all --check 71 | 72 | miri: 73 | runs-on: ubuntu-latest 74 | steps: 75 | - uses: actions/checkout@v4 76 | - name: Install Rust 77 | run: rustup toolchain install nightly --component miri && rustup default nightly 78 | - run: | 79 | echo "MIRIFLAGS=-Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-disable-isolation" >>"${GITHUB_ENV}" 80 | echo "RUSTFLAGS=${RUSTFLAGS} -Z randomize-layout" >>"${GITHUB_ENV}" 81 | - run: cargo miri test --all 82 | - run: cargo miri test --no-default-features --tests 83 | 84 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Yet another zlib implementation. 2 | //! 3 | //! This crate is an implementation of the RFC 1950 DEFLATE specification with 4 | //! support for the zlib wrapper. There are many fine options for such in the 5 | //! Rust ecosystem, but I was looking for one that was small and relatively 6 | //! simple with reasonable performance/compression ratio and support for heap-free 7 | //! compression/decompression scenarios. This crate aims to tick those boxes 8 | //! while also providing composable streaming support based on the standard I/O 9 | //! mechanisms. 10 | //! 11 | //! See the quick start guide below for basic usage or jump to the [compression](#compression) 12 | //! or [decompression](#decompression) section for more detail. 13 | //! 14 | //! # Quick Start 15 | //! 16 | //! So you've got some bytes, they all fit in memory, you don't need to reuse allocations, 17 | //! and you just want to compress or decompress them. This section is for you. 18 | //! 19 | //! Cargo.toml: 20 | //! ```toml 21 | //! [dependencies] 22 | //! yazi = "0.1.4" 23 | //! ``` 24 | //! 25 | //! The [`compress`] and [`decompress`] functions are provided for the most common use cases: 26 | //! ``` 27 | //! use yazi::*; 28 | //! // Your source data. 29 | //! let data = &(0..=255).cycle().take(8192).collect::>()[..]; 30 | //! // Compress it into a Vec with a zlib wrapper using the default compression level. 31 | //! let compressed = compress(data, Format::Zlib, CompressionLevel::Default).unwrap(); 32 | //! // Decompress it into a Vec. 33 | //! let (decompressed, checksum) = decompress(&compressed, Format::Zlib).unwrap(); 34 | //! // Verify the checksum. 35 | //! assert_eq!(Adler32::from_buf(&decompressed).finish(), checksum.unwrap()); 36 | //! // Verify that the decompressed data matches the original. 37 | //! assert_eq!(&decompressed[..], data); 38 | //! ``` 39 | //! 40 | //! Read on for more detailed usage. 41 | //! 42 | //! # Compression 43 | //! 44 | //! To compress data, you'll need to create an instance of the [`Encoder`] struct. 45 | //! The [`new`](Encoder::new) method can be used to construct an encoder on the 46 | //! stack, but the internal buffers are large (~300k) and may cause a stack overflow 47 | //! so it is advisable to use the [`boxed`](Encoder::boxed) method to allocate 48 | //! the encoder on the heap. 49 | //! 50 | //! Newly constructed encoders are configured to output a raw DEFLATE bitstream using a 51 | //! medium compression level and a default strategy. Call [`set_format`](Encoder::set_format) 52 | //! to change the output [`Format`]. Raw DEFLATE and zlib are supported. The 53 | //! [`set_level`](Encoder::set_level) method allows you to choose the preferred 54 | //! [`CompressionLevel`] from a set of basic options or a specific level between 1 and 10. 55 | //! The [`CompressionStrategy`] can be changed with the [`set_strategy`](Encoder::set_strategy) 56 | //! method. This allows you to, for example, force the encoder to output only static blocks. 57 | //! 58 | //! To create an encoder that outputs a zlib bitstream and spends some extra time to potentially 59 | //! produce a result with a higher compression ratio: 60 | //! ``` 61 | //! use yazi::{CompressionLevel, Encoder, Format}; 62 | //! let mut encoder = Encoder::boxed(); 63 | //! encoder.set_format(Format::Zlib); 64 | //! encoder.set_level(CompressionLevel::BestSize); 65 | //! ``` 66 | //! 67 | //! The encoder itself does not provide any functionality. It simply stores state and 68 | //! configuration. To actually compress data, you'll need an [`EncoderStream`]. A stream 69 | //! is a binding between an encoder and some specific output that will receive the 70 | //! compressed data. This design allows an encoder to be reused with different types 71 | //! of outputs without paying the allocation and initialization cost each time. 72 | //! 73 | //! Streaming supports outputs of the following forms: 74 | //! - Fixed buffers, created with the [`stream_into_buf`](Encoder::stream_into_buf) method. 75 | //! - Vectors, created with the [`stream_into_vec`](Encoder::stream_into_vec) method. 76 | //! - Any type that implements [`std::io::Write`], created with the generic 77 | //! [`stream`](Encoder::stream) method. 78 | //! 79 | //! Once you have an [`EncoderStream`], simply call [`write`](EncoderStream::write) one 80 | //! or more times, feeding your raw data into the stream. If available, you can submit 81 | //! the entire input buffer at once, or in arbitrarily sized chunks down to a single 82 | //! byte. After all data has been written, call [`finish`](EncoderStream::finish) on 83 | //! the stream which will consume it, flush all remaining input and output, and 84 | //! finalize the operation. The finish method returns a [`Result`] containing the 85 | //! total number of compressed bytes written to the output on success, or an 86 | //! [`Error`] describing the problem on failure. 87 | //! 88 | //! Let's write a function that compresses some arbitrary bytes into a vector: 89 | //! ``` 90 | //! fn compress_bytes(buf: &[u8]) -> Result, yazi::Error> { 91 | //! use yazi::Encoder; 92 | //! let mut encoder = Encoder::boxed(); 93 | //! let mut vec = Vec::new(); 94 | //! let mut stream = encoder.stream_into_vec(&mut vec); 95 | //! stream.write(buf)?; 96 | //! stream.finish()?; 97 | //! Ok(vec) 98 | //! } 99 | //! ``` 100 | //! 101 | //! Now let's do something a bit more interesting, and given two paths, compress 102 | //! one file into another: 103 | //! ``` 104 | //! fn compress_file(source: &str, dest: &str) -> Result { 105 | //! use yazi::Encoder; 106 | //! use std::fs::File; 107 | //! use std::io::{copy, BufWriter}; 108 | //! let mut encoder = Encoder::boxed(); 109 | //! // yazi does not perform any internal buffering beyond what is necessary 110 | //! // for correctness. 111 | //! let mut target = BufWriter::new(File::create(dest)?); 112 | //! let mut stream = encoder.stream(&mut target); 113 | //! copy(&mut File::open(source)?, &mut stream)?; 114 | //! stream.finish() 115 | //! } 116 | //! ``` 117 | //! 118 | //! Here, we can see that [`EncoderStream`] also implements [`std::io::Write`], so we 119 | //! can pass it directly to [`std::io::copy`]. This allows streams to be composable 120 | //! with the standard I/O facilities and other libraries that support those interfaces. 121 | //! 122 | //! # Decompression 123 | //! 124 | //! If you've already read the section on compression, the API for decompression 125 | //! is essentially identical with the types replaced by [`Decoder`] and [`DecoderStream`]. 126 | //! The documentation is copied here almost verbatim for the sake of completeness and for 127 | //! those who might have skipped directly to this section. 128 | //! 129 | //! To decompress data, you'll need to create an instance of the [`Decoder`] struct. 130 | //! The [`new`](Decoder::new) method can be used to construct a decoder on the stack, 131 | //! and unlike encoders, the decoder struct is relatively small (~10k) and generally 132 | //! safe to stack allocate. You can create a decoder on the heap with the 133 | //! [`boxed`](Decoder::boxed) method if you prefer. 134 | //! 135 | //! Newly constructed decoders are configured to decompress a raw DEFLATE bitstream. Call 136 | //! [`set_format`](Decoder::set_format) to change the input [`Format`]. Raw DEFLATE and 137 | //! zlib are supported. No other configuration is necessary for decompression. 138 | //! 139 | //! To create a decoder that decompresses a zlib bitstream: 140 | //! ``` 141 | //! use yazi::{Decoder, Format}; 142 | //! let mut decoder = Decoder::new(); 143 | //! decoder.set_format(Format::Zlib); 144 | //! ``` 145 | //! 146 | //! The decoder itself does not provide any functionality. It simply stores state and 147 | //! configuration. To actually decompress data, you'll need a 148 | //! [`DecoderStream`]. A stream is a binding between a 149 | //! decoder and some specific output that will receive the decompressed data. This 150 | //! design allows a decoder to be reused with different types of outputs without paying the 151 | //! allocation and initialization cost each time. 152 | //! 153 | //! Streaming supports outputs of the following forms: 154 | //! - Fixed buffers, created with the [`stream_into_buf`](Decoder::stream_into_buf) method. 155 | //! - Vectors, created with the [`stream_into_vec`](Decoder::stream_into_vec) method. 156 | //! - Any type that implements [`std::io::Write`], created with the generic 157 | //! [`stream`](Decoder::stream) method. 158 | //! 159 | //! Once you have a [`DecoderStream`], simply call [`write`](DecoderStream::write) one or 160 | //! more times, feeding your compressed data into the stream. If available, you can submit 161 | //! the entire input buffer at once, or in arbitrarily sized chunks down to a single byte. 162 | //! After all data has been written, call [`finish`](DecoderStream::finish) on the stream 163 | //! which will consume it, flush all remaining input and output, and finalize the operation. 164 | //! The finish method returns a [`Result`] containing the total number of decompressed bytes 165 | //! written to the output along with an optional Adler-32 checksum (if the stream was 166 | //! zlib-encoded) on success, or an [`Error`] describing the problem on failure. 167 | //! 168 | //! Let's write a function that decompresses a zlib bitstream into a vector and verifies 169 | //! the checksum: 170 | //! ``` 171 | //! fn decompress_zlib(buf: &[u8]) -> Result, yazi::Error> { 172 | //! use yazi::{Adler32, Decoder, Error, Format}; 173 | //! let mut decoder = Decoder::new(); 174 | //! decoder.set_format(Format::Zlib); 175 | //! let mut vec = Vec::new(); 176 | //! let mut stream = decoder.stream_into_vec(&mut vec); 177 | //! stream.write(buf)?; 178 | //! // checksum is an Option 179 | //! let (_, checksum) = stream.finish()?; 180 | //! if Adler32::from_buf(&vec).finish() != checksum.unwrap() { 181 | //! return Err(Error::InvalidBitstream); 182 | //! } 183 | //! Ok(vec) 184 | //! } 185 | //! ``` 186 | //! 187 | //! Now let's do something a bit more interesting, and given two paths, decompress 188 | //! one file into another: 189 | //! ``` 190 | //! fn decompress_file(source: &str, dest: &str) -> Result<(u64, Option), yazi::Error> { 191 | //! use yazi::Decoder; 192 | //! use std::fs::File; 193 | //! use std::io::{copy, BufWriter}; 194 | //! let mut decoder = Decoder::new(); 195 | //! // yazi does not perform any internal buffering beyond what is necessary 196 | //! // for correctness. 197 | //! let mut target = BufWriter::new(File::create(dest)?); 198 | //! let mut stream = decoder.stream(&mut target); 199 | //! copy(&mut File::open(source)?, &mut stream)?; 200 | //! stream.finish() 201 | //! } 202 | //! ``` 203 | //! 204 | //! Here, we can see that [`DecoderStream`] also implements [`std::io::Write`], so we can 205 | //! pass it directly to [`std::io::copy`]. This allows streams to be composable with the 206 | //! standard I/O facilities and other libraries that support those interfaces. 207 | //! 208 | //! # Implementation Notes 209 | //! 210 | //! The compressor is based heavily on both [miniz](https://github.com/richgel999/miniz) 211 | //! by Rich Geldreich and [miniz_oxide](https://github.com/Frommi/miniz_oxide) 212 | //! by Frommi. The available compression levels and strategies are the same and 213 | //! it should produce an identical bitstream for a given set of options. The 214 | //! decompressor is based on the techniques in [libdeflate](https://github.com/ebiggers/libdeflate) 215 | //! by Eric Biggers. 216 | 217 | #![cfg_attr(not(feature = "std"), no_std)] 218 | 219 | extern crate alloc; 220 | 221 | mod decode; 222 | mod encode; 223 | 224 | #[cfg(feature = "std")] 225 | use std::io; 226 | 227 | pub use decode::{decompress, Decoder, DecoderStream}; 228 | pub use encode::{compress, CompressionLevel, CompressionStrategy, Encoder, EncoderStream}; 229 | 230 | /// Defines the format for a compressed bitstream. 231 | #[derive(Copy, Clone, PartialEq, Debug)] 232 | pub enum Format { 233 | /// Raw DEFLATE data. 234 | Raw, 235 | /// Zlib header with an Adler-32 footer. 236 | Zlib, 237 | } 238 | 239 | /// Errors that may occur during compression or decompression. 240 | #[derive(Debug)] 241 | pub enum Error { 242 | /// Not enough input was provided. 243 | Underflow, 244 | /// The bitstream was corrupt. 245 | InvalidBitstream, 246 | /// Output buffer was too small. 247 | Overflow, 248 | /// Attempt to write into a finished stream. 249 | Finished, 250 | /// A system I/O error. 251 | /// 252 | /// Only available with the `std` feature enabled. 253 | #[cfg(feature = "std")] 254 | Io(io::Error), 255 | } 256 | 257 | #[cfg(feature = "std")] 258 | impl From for Error { 259 | fn from(error: io::Error) -> Self { 260 | Self::Io(error) 261 | } 262 | } 263 | 264 | /// Rolling Adler-32 checksum. 265 | #[derive(Copy, Clone)] 266 | pub struct Adler32(u32); 267 | 268 | impl Adler32 { 269 | /// Creates a new checksum initialized to the default value. 270 | pub fn new() -> Self { 271 | Self(1) 272 | } 273 | 274 | /// Creates a checksum from a buffer. 275 | pub fn from_buf(buf: &[u8]) -> Self { 276 | let mut checksum = Self::new(); 277 | checksum.update(buf); 278 | checksum 279 | } 280 | 281 | /// Updates the checksum with bytes provided by the specified buffer. 282 | pub fn update(&mut self, buf: &[u8]) { 283 | let mut s1 = self.0 & 0xFFFF; 284 | let mut s2 = (self.0 >> 16) & 0xFFFF; 285 | for chunk in buf.chunks(5550) { 286 | for b in chunk { 287 | s1 += *b as u32; 288 | s2 += s1; 289 | } 290 | s1 %= 65521; 291 | s2 %= 65521; 292 | } 293 | self.0 = (s2 << 16) | s1; 294 | } 295 | 296 | /// Returns the checksum. 297 | pub fn finish(self) -> u32 { 298 | self.0 299 | } 300 | } 301 | 302 | impl Default for Adler32 { 303 | fn default() -> Self { 304 | Self::new() 305 | } 306 | } 307 | 308 | #[cfg(test)] 309 | mod tests { 310 | use super::*; 311 | use alloc::vec::Vec; 312 | 313 | #[cfg(target_family = "wasm")] 314 | use wasm_bindgen_test::wasm_bindgen_test as test; 315 | 316 | fn generate_bytes() -> Vec { 317 | const BYTES: &[u8; 26] = b"abcdefghijklmnopqrstuvwxyz"; 318 | let mut buf = Vec::new(); 319 | for i in 0..4096 { 320 | if i % 3 == 0 { 321 | buf.extend_from_slice(&BYTES[13..]); 322 | } else if i & 1 != 0 { 323 | buf.extend_from_slice(BYTES); 324 | } else { 325 | buf.extend(BYTES.iter().rev()); 326 | } 327 | } 328 | buf 329 | } 330 | 331 | #[test] 332 | fn compress_decompress() { 333 | let buf = generate_bytes(); 334 | let mut compressed = Vec::new(); 335 | let mut encoder = Encoder::boxed(); 336 | let mut stream = encoder.stream_into_vec(&mut compressed); 337 | stream.write(&buf).unwrap(); 338 | stream.finish().unwrap(); 339 | let mut decompressed = Vec::new(); 340 | let mut decoder = Decoder::new(); 341 | let mut stream = decoder.stream_into_vec(&mut decompressed); 342 | stream.write(&compressed).unwrap(); 343 | stream.finish().unwrap(); 344 | assert_eq!(buf, decompressed); 345 | } 346 | 347 | #[test] 348 | fn compress_decompress_zlib() { 349 | let buf = generate_bytes(); 350 | let mut compressed = Vec::new(); 351 | let mut encoder = Encoder::boxed(); 352 | encoder.set_format(Format::Zlib); 353 | let mut stream = encoder.stream_into_vec(&mut compressed); 354 | stream.write(&buf).unwrap(); 355 | stream.finish().unwrap(); 356 | let mut decompressed = Vec::new(); 357 | let mut decoder = Decoder::new(); 358 | decoder.set_format(Format::Zlib); 359 | let mut stream = decoder.stream_into_vec(&mut decompressed); 360 | stream.write(&compressed).unwrap(); 361 | let (_, checksum) = stream.finish().unwrap(); 362 | assert_eq!(buf, decompressed); 363 | let mut adler = Adler32::new(); 364 | adler.update(&decompressed); 365 | assert_eq!(adler.finish(), checksum.unwrap()); 366 | } 367 | 368 | #[test] 369 | fn compress_decompress_static() { 370 | let buf = generate_bytes(); 371 | let mut compressed = Vec::new(); 372 | let mut encoder = Encoder::boxed(); 373 | encoder.set_strategy(CompressionStrategy::Static); 374 | let mut stream = encoder.stream_into_vec(&mut compressed); 375 | stream.write(&buf).unwrap(); 376 | stream.finish().unwrap(); 377 | let mut decompressed = Vec::new(); 378 | let mut decoder = Decoder::new(); 379 | let mut stream = decoder.stream_into_vec(&mut decompressed); 380 | stream.write(&compressed).unwrap(); 381 | stream.finish().unwrap(); 382 | assert_eq!(buf, decompressed); 383 | } 384 | 385 | #[test] 386 | fn compress_decompress_raw() { 387 | let buf = generate_bytes(); 388 | let mut compressed = Vec::new(); 389 | let mut encoder = Encoder::boxed(); 390 | encoder.set_level(CompressionLevel::None); 391 | let mut stream = encoder.stream_into_vec(&mut compressed); 392 | stream.write(&buf).unwrap(); 393 | stream.finish().unwrap(); 394 | let mut decompressed = Vec::new(); 395 | let mut decoder = Decoder::new(); 396 | let mut stream = decoder.stream_into_vec(&mut decompressed); 397 | stream.write(&compressed).unwrap(); 398 | stream.finish().unwrap(); 399 | assert_eq!(buf, decompressed); 400 | } 401 | 402 | #[test] 403 | fn compress_decompress_streaming_1byte() { 404 | let buf = generate_bytes(); 405 | let mut compressed = Vec::new(); 406 | let mut encoder = Encoder::boxed(); 407 | let mut stream = encoder.stream_into_vec(&mut compressed); 408 | for &b in &buf { 409 | stream.write(&[b]).unwrap(); 410 | } 411 | stream.finish().unwrap(); 412 | let mut decompressed = Vec::new(); 413 | let mut decoder = Decoder::new(); 414 | let mut stream = decoder.stream_into_vec(&mut decompressed); 415 | for &b in &compressed { 416 | stream.write(&[b]).unwrap(); 417 | } 418 | stream.finish().unwrap(); 419 | assert_eq!(buf, decompressed); 420 | } 421 | #[test] 422 | fn compress_decompress_streaming_64bytes() { 423 | let buf = generate_bytes(); 424 | let mut compressed = Vec::new(); 425 | let mut encoder = Encoder::boxed(); 426 | let mut stream = encoder.stream_into_vec(&mut compressed); 427 | for chunk in buf.chunks(64) { 428 | stream.write(chunk).unwrap(); 429 | } 430 | stream.finish().unwrap(); 431 | let mut decompressed = Vec::new(); 432 | let mut decoder = Decoder::new(); 433 | let mut stream = decoder.stream_into_vec(&mut decompressed); 434 | for chunk in compressed.chunks(64) { 435 | stream.write(chunk).unwrap(); 436 | } 437 | stream.finish().unwrap(); 438 | assert_eq!(buf, decompressed); 439 | } 440 | } 441 | -------------------------------------------------------------------------------- /src/decode.rs: -------------------------------------------------------------------------------- 1 | //! RFC 1590 decompression implementation. 2 | 3 | #![allow( 4 | clippy::needless_range_loop, 5 | clippy::new_without_default, 6 | clippy::too_many_arguments 7 | )] 8 | 9 | use super::{Error, Format}; 10 | use alloc::boxed::Box; 11 | use alloc::vec::Vec; 12 | 13 | #[cfg(feature = "std")] 14 | use std::io::{self, Write}; 15 | 16 | /// Stateful context for decompression. 17 | /// 18 | /// See the crate level [decompression](index.html#decompression) section 19 | /// for detailed usage. 20 | pub struct Decoder(InflateContext); 21 | 22 | impl Decoder { 23 | /// Creates a new deflate decoder. 24 | pub fn new() -> Self { 25 | Self(InflateContext::new()) 26 | } 27 | 28 | /// Creates a new deflate decoder on the heap. 29 | pub fn boxed() -> Box { 30 | Box::new(Self(InflateContext::new())) 31 | } 32 | 33 | /// Sets the expected format of the input data for the next usage of the 34 | /// decoder. 35 | pub fn set_format(&mut self, format: Format) { 36 | self.0.reset(format == Format::Zlib) 37 | } 38 | 39 | /// Creates a decoder stream that will write into the specified writer. 40 | #[cfg(feature = "std")] 41 | pub fn stream<'a, W: Write>( 42 | &'a mut self, 43 | writer: &'a mut W, 44 | ) -> DecoderStream<'a, impl Sink + 'a> { 45 | self.0.reset(self.0.zlib); 46 | DecoderStream { 47 | ctx: &mut self.0, 48 | sink: WriterSink { 49 | writer, 50 | ring: RingBuffer::new(), 51 | written: 0, 52 | }, 53 | finished: false, 54 | } 55 | } 56 | 57 | /// Creates a decoder stream that will write into the specified vector. 58 | /// The resulting stream will not clear the vector but will instead append 59 | /// the decompressed data. 60 | pub fn stream_into_vec<'a>( 61 | &'a mut self, 62 | vec: &'a mut Vec, 63 | ) -> DecoderStream<'a, impl Sink + 'a> { 64 | self.0.reset(self.0.zlib); 65 | DecoderStream { 66 | ctx: &mut self.0, 67 | sink: VecSink::new(vec), 68 | finished: false, 69 | } 70 | } 71 | 72 | /// Creates a decoder stream that will write into the specified buffer. The 73 | /// stream will generate an overflow error if the buffer is not large enough 74 | /// to contain the decompressed data. 75 | pub fn stream_into_buf<'a>( 76 | &'a mut self, 77 | buf: &'a mut [u8], 78 | ) -> DecoderStream<'a, impl Sink + 'a> { 79 | self.0.reset(self.0.zlib); 80 | DecoderStream { 81 | ctx: &mut self.0, 82 | sink: BufSink { 83 | buffer: buf, 84 | pos: 0, 85 | }, 86 | finished: false, 87 | } 88 | } 89 | } 90 | 91 | /// Decompression stream combining a decoder context with an output. 92 | /// 93 | /// See the crate level [decompression](index.html#decompression) section 94 | /// for detailed usage. 95 | pub struct DecoderStream<'a, S: Sink> { 96 | ctx: &'a mut InflateContext, 97 | sink: S, 98 | finished: bool, 99 | } 100 | 101 | impl DecoderStream<'_, S> { 102 | /// Writes the specified buffer to the stream, producing decompressed data 103 | /// in the output. 104 | pub fn write(&mut self, buf: &[u8]) -> Result<(), Error> { 105 | if self.finished { 106 | return Err(Error::Finished); 107 | } 108 | self.ctx.inflate(buf, &mut self.sink, false) 109 | } 110 | 111 | /// Returns the number of decompressed bytes that have been written to the 112 | /// output. 113 | pub fn decompressed_size(&self) -> u64 { 114 | self.sink.written() 115 | } 116 | 117 | /// Consumes the stream, flushing any input that may be buffered. Returns 118 | /// the total number of decompressed bytes written to the output and an 119 | /// optional checksum if the stream was zlib encoded. 120 | pub fn finish(mut self) -> Result<(u64, Option), Error> { 121 | if self.finished { 122 | return Err(Error::Finished); 123 | } 124 | self.finished = true; 125 | self.ctx.inflate(&[], &mut self.sink, true)?; 126 | Ok((self.sink.written(), self.ctx.checksum)) 127 | } 128 | } 129 | 130 | impl Drop for DecoderStream<'_, S> { 131 | fn drop(&mut self) { 132 | if !self.finished { 133 | let _ = self.ctx.inflate(&[], &mut self.sink, true); 134 | self.finished = true; 135 | } 136 | } 137 | } 138 | 139 | #[cfg(feature = "std")] 140 | impl Write for DecoderStream<'_, S> { 141 | fn write(&mut self, buf: &[u8]) -> io::Result { 142 | match self.ctx.inflate(buf, &mut self.sink, false) { 143 | Ok(_) => Ok(buf.len()), 144 | Err(err) => match err { 145 | Error::Io(err) => Err(err), 146 | Error::Underflow | Error::Overflow => { 147 | Err(io::Error::from(io::ErrorKind::InvalidInput)) 148 | } 149 | _ => Err(io::Error::from(io::ErrorKind::InvalidData)), 150 | }, 151 | } 152 | } 153 | 154 | fn flush(&mut self) -> io::Result<()> { 155 | Ok(()) 156 | } 157 | } 158 | 159 | /// Decompresses a buffer of the specified format into a vector. 160 | /// 161 | /// On success, returns a vector containing the decompressed data and 162 | /// optionally an Adler-32 checksum if the source data was zlib 163 | /// encoded. 164 | pub fn decompress(buf: &[u8], format: Format) -> Result<(Vec, Option), Error> { 165 | let mut decoder = Decoder::new(); 166 | decoder.set_format(format); 167 | let mut vec = Vec::with_capacity(buf.len() * 2); 168 | let mut stream = decoder.stream_into_vec(&mut vec); 169 | stream.write(buf)?; 170 | let (_, checksum) = stream.finish()?; 171 | Ok((vec, checksum)) 172 | } 173 | 174 | struct InflateContext { 175 | zlib: bool, 176 | state: State, 177 | remainder: Remainder, 178 | pos: usize, 179 | bit_buffer: u64, 180 | bits_in: u32, 181 | trees: Trees, 182 | checksum: Option, 183 | last_block: bool, 184 | done: bool, 185 | } 186 | 187 | impl InflateContext { 188 | #[inline(always)] 189 | fn new() -> Self { 190 | Self { 191 | zlib: false, 192 | state: State::Block, 193 | remainder: Remainder::new(), 194 | bit_buffer: 0, 195 | bits_in: 0, 196 | pos: 0, 197 | trees: Trees::new(), 198 | checksum: None, 199 | last_block: false, 200 | done: false, 201 | } 202 | } 203 | 204 | fn reset(&mut self, zlib: bool) { 205 | self.zlib = zlib; 206 | self.state = if zlib { State::Header } else { State::Block }; 207 | self.remainder.pos = 0; 208 | self.remainder.avail = 0; 209 | self.pos = 0; 210 | self.bit_buffer = 0; 211 | self.bits_in = 0; 212 | self.checksum = None; 213 | self.last_block = false; 214 | self.done = false; 215 | } 216 | 217 | fn inflate( 218 | &mut self, 219 | mut buf: &[u8], 220 | sink: &mut S, 221 | is_last: bool, 222 | ) -> Result<(), Error> { 223 | while !self.done && (is_last || !buf.is_empty()) { 224 | let mut bits = Bits::new(self.bit_buffer, self.bits_in); 225 | let (res, used_remainder) = if self.remainder.avail != 0 { 226 | let used = self.remainder.push(buf); 227 | buf = &buf[used..]; 228 | let mut source = Source::from_remainder(&self.remainder); 229 | let res = inflate( 230 | self.zlib, 231 | &mut self.state, 232 | &mut self.last_block, 233 | &mut self.done, 234 | &mut source, 235 | &mut bits, 236 | &mut self.trees, 237 | sink, 238 | &mut self.checksum, 239 | is_last, 240 | ); 241 | let source_pos = source.pos; 242 | self.remainder.pos = source_pos; 243 | self.remainder.avail -= source_pos; 244 | (res, true) 245 | } else { 246 | let mut source = Source::new(buf); 247 | let res = inflate( 248 | self.zlib, 249 | &mut self.state, 250 | &mut self.last_block, 251 | &mut self.done, 252 | &mut source, 253 | &mut bits, 254 | &mut self.trees, 255 | sink, 256 | &mut self.checksum, 257 | is_last, 258 | ); 259 | buf = &buf[source.pos..]; 260 | (res, false) 261 | }; 262 | self.bit_buffer = bits.bit_buffer; 263 | self.bits_in = bits.bits_in; 264 | let more_input = !buf.is_empty(); 265 | match res { 266 | Err(Error::Underflow) => { 267 | if is_last && !more_input { 268 | return res; 269 | } else if !more_input { 270 | return Ok(()); 271 | } else if self.remainder.avail != 0 || !used_remainder { 272 | let used = self.remainder.push(buf); 273 | buf = &buf[used..]; 274 | } 275 | } 276 | Err(_) => { 277 | return res; 278 | } 279 | _ => { 280 | if is_last { 281 | return Ok(()); 282 | } 283 | } 284 | } 285 | } 286 | Ok(()) 287 | } 288 | } 289 | 290 | #[derive(Copy, Clone, PartialEq, Debug)] 291 | enum State { 292 | Header, 293 | Block, 294 | Copy(usize), 295 | Inflate, 296 | Match(u32), 297 | } 298 | 299 | fn inflate( 300 | zlib: bool, 301 | state: &mut State, 302 | last_block: &mut bool, 303 | done: &mut bool, 304 | source: &mut Source, 305 | bits: &mut Bits, 306 | trees: &mut Trees, 307 | sink: &mut S, 308 | checksum: &mut Option, 309 | is_last: bool, 310 | ) -> Result<(), Error> { 311 | loop { 312 | match *state { 313 | State::Header => { 314 | if bits.bytes_available(source) < 2 { 315 | return Err(Error::Underflow); 316 | } 317 | verify_zlib_header(source, bits)?; 318 | *state = State::Block; 319 | continue; 320 | } 321 | State::Block => { 322 | if *last_block { 323 | if zlib && checksum.is_none() { 324 | bits.skip(bits.bits_in & 7); 325 | if bits.bytes_available(source) < 4 { 326 | return Err(Error::Underflow); 327 | } 328 | *checksum = Some(read_zlib_checksum(source, bits)?); 329 | } 330 | *done = true; 331 | return Ok(()); 332 | } 333 | if bits.bytes_available(source) < 286 && !is_last { 334 | return Err(Error::Underflow); 335 | } 336 | let header = bits.try_pop_source(source, 3)?; 337 | *last_block = header & 1 != 0; 338 | match header >> 1 { 339 | 0 => { 340 | bits.try_skip(bits.bits_in & 7)?; 341 | let mut parts = [0u32; 4]; 342 | for part in &mut parts { 343 | if bits.bits_in >= 8 { 344 | *part = bits.pop(8); 345 | } else { 346 | *part = *source 347 | .buffer 348 | .get(source.pos) 349 | .ok_or(Error::InvalidBitstream)? 350 | as u32; 351 | source.pos += 1; 352 | source.avail -= 1; 353 | } 354 | } 355 | let length = parts[0] | (parts[1] << 8); 356 | let inv_length = parts[2] | (parts[3] << 8); 357 | if length != (!inv_length & 0xFFFF) { 358 | return Err(Error::InvalidBitstream); 359 | } 360 | let mut remaining = length as usize; 361 | while bits.bits_in >= 8 && remaining > 0 { 362 | sink.push(bits.pop(8) as u8)?; 363 | remaining -= 1; 364 | } 365 | if bits.bits_in == 0 { 366 | bits.bit_buffer = 0; 367 | } 368 | *state = State::Copy(remaining); 369 | while remaining > 0 { 370 | let bytes = source.try_get(remaining)?; 371 | sink.write(bytes)?; 372 | remaining -= bytes.len(); 373 | *state = State::Copy(remaining); 374 | } 375 | *state = State::Block; 376 | continue; 377 | } 378 | 1 => { 379 | const DISTANCE_LENGTHS: [u8; 32] = [5; 32]; 380 | let mut lengths: [u8; 288] = [0; 288]; 381 | lengths[0..144].iter_mut().for_each(|p| *p = 8); 382 | lengths[144..256].iter_mut().for_each(|p| *p = 9); 383 | lengths[256..280].iter_mut().for_each(|p| *p = 7); 384 | lengths[280..288].iter_mut().for_each(|p| *p = 8); 385 | trees.lt.build(&lengths[..288]); 386 | trees.dt.build(&DISTANCE_LENGTHS); 387 | *state = State::Inflate; 388 | continue; 389 | } 390 | 2 => { 391 | decode_trees(source, bits, &mut trees.lt, &mut trees.dt, is_last)?; 392 | *state = State::Inflate; 393 | continue; 394 | } 395 | _ => { 396 | return Err(Error::InvalidBitstream); 397 | } 398 | } 399 | } 400 | State::Copy(mut remaining) => { 401 | while remaining > 0 { 402 | let bytes = source.try_get(remaining)?; 403 | sink.write(bytes)?; 404 | remaining -= bytes.len(); 405 | *state = State::Copy(remaining); 406 | } 407 | *state = State::Block; 408 | continue; 409 | } 410 | State::Inflate => { 411 | let mut lbits = *bits; 412 | let mut entry = 0; 413 | if !is_last { 414 | loop { 415 | let mut handle_match = false; 416 | while lbits.bits_in >= 15 { 417 | entry = trees.lt.table[lbits.peek(LITERAL_LENGTH_TABLE_BITS) as usize]; 418 | if entry & ENTRY_SUBTABLE != 0 { 419 | lbits.skip(LITERAL_LENGTH_TABLE_BITS); 420 | entry = trees.lt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 421 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 422 | as usize]; 423 | } 424 | lbits.skip(entry & ENTRY_LENGTH_MASK); 425 | if entry & ENTRY_LITERAL == 0 { 426 | handle_match = true; 427 | break; 428 | } 429 | sink.push((entry >> ENTRY_SHIFT) as u8)?; 430 | } 431 | if !handle_match { 432 | if lbits.fill(source) >= 15 { 433 | entry = 434 | trees.lt.table[lbits.peek(LITERAL_LENGTH_TABLE_BITS) as usize]; 435 | if entry & ENTRY_SUBTABLE != 0 { 436 | lbits.skip(LITERAL_LENGTH_TABLE_BITS); 437 | entry = trees.lt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 438 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 439 | as usize]; 440 | } 441 | lbits.skip(entry & ENTRY_LENGTH_MASK); 442 | if entry & ENTRY_LITERAL != 0 { 443 | sink.push((entry >> ENTRY_SHIFT) as u8)?; 444 | continue; 445 | } 446 | } else { 447 | *bits = lbits; 448 | return Err(Error::Underflow); 449 | } 450 | } 451 | entry >>= ENTRY_SHIFT; 452 | if lbits.fill(source) >= 33 { 453 | let length = ((entry >> LENGTH_BASE_SHIFT) 454 | + lbits.pop(entry & EXTRA_LENGTH_BITS_MASK)) 455 | as usize; 456 | if length == 0 { 457 | *bits = lbits; 458 | *state = State::Block; 459 | break; 460 | } 461 | entry = trees.dt.table[lbits.peek(DISTANCE_TABLE_BITS) as usize]; 462 | if entry & ENTRY_SUBTABLE != 0 { 463 | lbits.skip(DISTANCE_TABLE_BITS); 464 | entry = trees.dt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 465 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 466 | as usize]; 467 | } 468 | lbits.skip(entry & ENTRY_LENGTH_MASK); 469 | entry >>= ENTRY_SHIFT; 470 | let distance = ((entry & DISTANCE_BASE_MASK) 471 | + lbits.pop(entry >> EXTRA_DISTANCE_BITS_SHIFT)) 472 | as usize; 473 | sink.apply_match(distance, length)?; 474 | } else { 475 | *bits = lbits; 476 | *state = State::Match(entry); 477 | return Err(Error::Underflow); 478 | } 479 | } 480 | } else { 481 | loop { 482 | if lbits.bits_in < 15 { 483 | lbits.fill(source); 484 | } 485 | let mut entry = 486 | trees.lt.table[lbits.peek(LITERAL_LENGTH_TABLE_BITS) as usize]; 487 | if entry & ENTRY_SUBTABLE != 0 { 488 | lbits.try_skip(LITERAL_LENGTH_TABLE_BITS)?; 489 | entry = trees.lt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 490 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 491 | as usize]; 492 | } 493 | lbits.try_skip(entry & ENTRY_LENGTH_MASK)?; 494 | if entry & ENTRY_LITERAL != 0 { 495 | sink.push((entry >> ENTRY_SHIFT) as u8)?; 496 | continue; 497 | } 498 | entry >>= ENTRY_SHIFT; 499 | lbits.fill(source); 500 | let length = ((entry >> LENGTH_BASE_SHIFT) 501 | + lbits.try_pop(entry & EXTRA_LENGTH_BITS_MASK)?) 502 | as usize; 503 | if length == 0 { 504 | *bits = lbits; 505 | *state = State::Block; 506 | break; 507 | } 508 | entry = trees.dt.table[lbits.peek(DISTANCE_TABLE_BITS) as usize]; 509 | if entry & ENTRY_SUBTABLE != 0 { 510 | lbits.try_skip(DISTANCE_TABLE_BITS)?; 511 | entry = trees.dt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 512 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 513 | as usize]; 514 | } 515 | lbits.try_skip(entry & ENTRY_LENGTH_MASK)?; 516 | entry >>= ENTRY_SHIFT; 517 | let distance = ((entry & DISTANCE_BASE_MASK) 518 | + lbits.try_pop(entry >> EXTRA_DISTANCE_BITS_SHIFT)?) 519 | as usize; 520 | sink.apply_match(distance, length)?; 521 | } 522 | } 523 | } 524 | State::Match(mut entry) => { 525 | let mut lbits = *bits; 526 | if !is_last { 527 | if lbits.fill(source) < 33 { 528 | *bits = lbits; 529 | return Err(Error::Underflow); 530 | } 531 | let length = ((entry >> LENGTH_BASE_SHIFT) 532 | + lbits.pop(entry & EXTRA_LENGTH_BITS_MASK)) 533 | as usize; 534 | if length == 0 { 535 | *bits = lbits; 536 | *state = State::Block; 537 | continue; 538 | } 539 | entry = trees.dt.table[lbits.peek(DISTANCE_TABLE_BITS) as usize]; 540 | if entry & ENTRY_SUBTABLE != 0 { 541 | lbits.skip(DISTANCE_TABLE_BITS); 542 | entry = trees.dt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 543 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 544 | as usize]; 545 | } 546 | lbits.skip(entry & ENTRY_LENGTH_MASK); 547 | entry >>= ENTRY_SHIFT; 548 | let distance = ((entry & DISTANCE_BASE_MASK) 549 | + lbits.pop(entry >> EXTRA_DISTANCE_BITS_SHIFT)) 550 | as usize; 551 | *bits = lbits; 552 | *state = State::Inflate; 553 | sink.apply_match(distance, length)?; 554 | } else { 555 | let length = ((entry >> LENGTH_BASE_SHIFT) 556 | + lbits.try_pop(entry & EXTRA_LENGTH_BITS_MASK)?) 557 | as usize; 558 | if length == 0 { 559 | *bits = lbits; 560 | *state = State::Block; 561 | continue; 562 | } 563 | entry = trees.dt.table[lbits.peek(DISTANCE_TABLE_BITS) as usize]; 564 | if entry & ENTRY_SUBTABLE != 0 { 565 | lbits.try_skip(DISTANCE_TABLE_BITS)?; 566 | entry = trees.dt.table[(((entry >> ENTRY_SHIFT) & 0xFFFF) 567 | + lbits.peek(entry & ENTRY_LENGTH_MASK)) 568 | as usize]; 569 | } 570 | lbits.try_skip(entry & ENTRY_LENGTH_MASK)?; 571 | entry >>= ENTRY_SHIFT; 572 | let distance = ((entry & DISTANCE_BASE_MASK) 573 | + lbits.try_pop(entry >> EXTRA_DISTANCE_BITS_SHIFT)?) 574 | as usize; 575 | *bits = lbits; 576 | *state = State::Inflate; 577 | sink.apply_match(distance, length)?; 578 | } 579 | } 580 | } 581 | } 582 | } 583 | 584 | fn decode_trees( 585 | source: &mut Source, 586 | bits: &mut Bits, 587 | lt: &mut LiteralLengthTree, 588 | dt: &mut DistanceTree, 589 | is_last: bool, 590 | ) -> Result<(), Error> { 591 | let mut lengths: [u8; MAX_LENGTHS] = [0; MAX_LENGTHS]; 592 | bits.fill(source); 593 | let ltlen; 594 | let dtlen; 595 | if !is_last { 596 | ltlen = bits.pop(5) as usize + 257; 597 | dtlen = bits.pop(5) as usize + 1; 598 | let ptlen = bits.pop(4) as usize + 4; 599 | if ltlen > 286 || dtlen > 30 { 600 | return Err(Error::InvalidBitstream); 601 | } 602 | for length in &mut lengths[0..19] { 603 | *length = 0; 604 | } 605 | bits.fill(source); 606 | for code in &PRECODE_SWIZZLE[..ptlen] { 607 | let clen = bits.try_pop_source(source, 3)?; 608 | lengths[*code as usize] = clen as u8; 609 | } 610 | if !lt.build_precode(&lengths[..19]) { 611 | return Err(Error::InvalidBitstream); 612 | } 613 | let mut i = 0; 614 | while i < (ltlen + dtlen) { 615 | if bits.bits_in < 7 { 616 | bits.fill(source); 617 | } 618 | let entry = lt.table[bits.peek(7) as usize]; 619 | bits.skip(entry & ENTRY_LENGTH_MASK); 620 | let presym = entry >> ENTRY_SHIFT; 621 | if presym < 16 { 622 | lengths[i] = presym as u8; 623 | i += 1; 624 | continue; 625 | } 626 | if bits.bits_in < 7 { 627 | bits.fill(source); 628 | } 629 | if presym > 18 || (presym == 16 && i == 0) { 630 | return Err(Error::InvalidBitstream); 631 | } 632 | let (extra_bits, extra) = 633 | [(2, 3), (3, 3), (7, 11), (0, 0)][(presym as usize - 16) & 0x3]; 634 | let count = bits.pop(extra_bits) as usize + extra; 635 | let l = if presym == 16 { lengths[i - 1] } else { 0 }; 636 | let p = lengths 637 | .get_mut(i..i + count) 638 | .ok_or(Error::InvalidBitstream)?; 639 | p.iter_mut().for_each(|p| *p = l); 640 | i += count; 641 | } 642 | } else { 643 | ltlen = bits.try_pop(5)? as usize + 257; 644 | dtlen = bits.try_pop(5)? as usize + 1; 645 | let ptlen = bits.try_pop(4)? as usize + 4; 646 | if ltlen > 286 || dtlen > 30 { 647 | return Err(Error::InvalidBitstream); 648 | } 649 | for length in &mut lengths[0..19] { 650 | *length = 0; 651 | } 652 | bits.fill(source); 653 | for code in &PRECODE_SWIZZLE[..ptlen] { 654 | let clen = bits.try_pop_source(source, 3)?; 655 | lengths[*code as usize] = clen as u8; 656 | } 657 | if !lt.build_precode(&lengths[..19]) { 658 | return Err(Error::InvalidBitstream); 659 | } 660 | let mut i = 0; 661 | while i < (ltlen + dtlen) { 662 | if bits.bits_in < 7 { 663 | bits.fill(source); 664 | } 665 | let entry = lt.table[bits.peek(7) as usize]; 666 | bits.try_skip(entry & ENTRY_LENGTH_MASK)?; 667 | let presym = entry >> ENTRY_SHIFT; 668 | if presym < 16 { 669 | lengths[i] = presym as u8; 670 | i += 1; 671 | continue; 672 | } 673 | if bits.bits_in < 7 { 674 | bits.fill(source); 675 | } 676 | if presym > 18 || (presym == 16 && i == 0) { 677 | return Err(Error::InvalidBitstream); 678 | } 679 | let (extra_bits, extra) = 680 | [(2, 3), (3, 3), (7, 11), (0, 0)][(presym as usize - 16) & 0x3]; 681 | let count = bits.try_pop(extra_bits)? as usize + extra; 682 | let l = if presym == 16 { lengths[i - 1] } else { 0 }; 683 | let p = lengths 684 | .get_mut(i..i + count) 685 | .ok_or(Error::InvalidBitstream)?; 686 | p.iter_mut().for_each(|p| *p = l); 687 | i += count; 688 | } 689 | } 690 | if lengths[256] == 0 691 | || !lt.build(&lengths[..ltlen]) 692 | || !dt.build(&lengths[ltlen..ltlen + dtlen]) 693 | { 694 | return Err(Error::InvalidBitstream); 695 | } 696 | Ok(()) 697 | } 698 | 699 | fn build_tree( 700 | table: &mut [u32], 701 | lengths: &[u8], 702 | entries: &[u32], 703 | table_bits: usize, 704 | max_codeword_len: usize, 705 | ) -> bool { 706 | let mut len_counts = [0usize; MAX_CODE_SIZE + 1]; 707 | let mut offsets = [0usize; MAX_CODE_SIZE + 1]; 708 | let mut sorted_entries: [u32; 288] = [0; 288]; 709 | for &len in lengths { 710 | len_counts[len as usize] += 1; 711 | } 712 | offsets[1] = len_counts[0]; 713 | let mut codespace_used = 0; 714 | for len in 1..max_codeword_len { 715 | offsets[len + 1] = offsets[len] + len_counts[len]; 716 | codespace_used = (codespace_used << 1) + len_counts[len]; 717 | } 718 | codespace_used = (codespace_used << 1) + len_counts[max_codeword_len]; 719 | for sym in 0..lengths.len() { 720 | let len = lengths[sym]; 721 | let idx = &mut offsets[len as usize]; 722 | sorted_entries[*idx] = entries[sym]; 723 | *idx += 1; 724 | } 725 | let sorted_entries = &mut sorted_entries[offsets[0]..]; 726 | if codespace_used > (1 << max_codeword_len) { 727 | return false; 728 | } 729 | if codespace_used < (1 << max_codeword_len) { 730 | let entry = if codespace_used == 0 { 731 | entries[0] | 1 732 | } else { 733 | if codespace_used != (1 << (max_codeword_len - 1)) || len_counts[1] != 1 { 734 | return false; 735 | } 736 | sorted_entries[0] | 1 737 | }; 738 | for i in 0..(1 << table_bits) { 739 | table[i] = entry; 740 | } 741 | return true; 742 | } 743 | let mut len = 1; 744 | let mut count; 745 | loop { 746 | count = len_counts[len & 15]; 747 | if count != 0 { 748 | break; 749 | } 750 | len += 1; 751 | } 752 | let mut codeword = 0; 753 | let mut cur_table_end = 1 << len; 754 | let mut s = 0; 755 | while len <= table_bits { 756 | loop { 757 | table[codeword] = sorted_entries[s] | len as u32; 758 | s += 1; 759 | if codeword == cur_table_end - 1 { 760 | while len < table_bits { 761 | table.copy_within(0..cur_table_end, cur_table_end); 762 | cur_table_end <<= 1; 763 | len += 1; 764 | } 765 | return true; 766 | } 767 | let bit = 1 << (31 - ((codeword ^ (cur_table_end - 1)) as u32).leading_zeros()); 768 | codeword &= bit - 1; 769 | codeword |= bit; 770 | count -= 1; 771 | if count == 0 { 772 | break; 773 | } 774 | } 775 | loop { 776 | len += 1; 777 | if len <= table_bits { 778 | table.copy_within(0..cur_table_end, cur_table_end); 779 | cur_table_end <<= 1; 780 | } 781 | count = len_counts[len & 15]; 782 | if count != 0 { 783 | break; 784 | } 785 | } 786 | } 787 | cur_table_end = 1 << table_bits; 788 | let mut subtable_prefix = !0; 789 | let mut subtable_start = 0; 790 | loop { 791 | if (codeword & ((1 << table_bits) - 1)) != subtable_prefix { 792 | subtable_prefix = codeword & ((1 << table_bits) - 1); 793 | subtable_start = cur_table_end; 794 | let mut subtable_bits = len - table_bits; 795 | codespace_used = count; 796 | while codespace_used < (1 << subtable_bits) { 797 | subtable_bits += 1; 798 | codespace_used = (codespace_used << 1) + len_counts[table_bits + subtable_bits]; 799 | } 800 | cur_table_end = subtable_start + (1 << subtable_bits); 801 | table[subtable_prefix] = 802 | ENTRY_SUBTABLE | (subtable_start << 8) as u32 | subtable_bits as u32; 803 | } 804 | let entry = sorted_entries[s] | (len - table_bits) as u32; 805 | s += 1; 806 | let mut i = subtable_start + (codeword >> table_bits); 807 | let stride = 1 << (len - table_bits); 808 | loop { 809 | table[i] = entry; 810 | i += stride; 811 | if i >= cur_table_end { 812 | break; 813 | } 814 | } 815 | if codeword == (1 << len) - 1 { 816 | return true; 817 | } 818 | let bit = 1 << (31 - ((codeword ^ ((1 << len) - 1)) as u32).leading_zeros()); 819 | codeword &= bit - 1; 820 | codeword |= bit; 821 | count -= 1; 822 | while count == 0 { 823 | len += 1; 824 | count = len_counts[len & 15]; 825 | } 826 | } 827 | } 828 | 829 | fn verify_zlib_header(source: &mut Source, bits: &mut Bits) -> Result<(), Error> { 830 | let cmf = bits.try_pop_source(source, 8)?; 831 | let flg = bits.try_pop_source(source, 8)?; 832 | if (256 * cmf + flg) % 31 != 0 || cmf & 0x0F != 8 || (cmf >> 4) > 7 || flg & 0x20 != 0 { 833 | return Err(Error::InvalidBitstream); 834 | } 835 | Ok(()) 836 | } 837 | 838 | fn read_zlib_checksum(source: &mut Source, bits: &mut Bits) -> Result { 839 | let mut parts = [0u32; 4]; 840 | for part in &mut parts { 841 | *part = bits.try_pop_source(source, 8)?; 842 | } 843 | Ok((parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]) 844 | } 845 | 846 | struct Trees { 847 | lt: LiteralLengthTree, 848 | dt: DistanceTree, 849 | } 850 | 851 | impl Trees { 852 | #[inline(always)] 853 | fn new() -> Self { 854 | Self { 855 | lt: LiteralLengthTree::new(), 856 | dt: DistanceTree::new(), 857 | } 858 | } 859 | } 860 | 861 | struct Remainder { 862 | buffer: [u8; 286], 863 | pos: usize, 864 | avail: usize, 865 | } 866 | 867 | impl Remainder { 868 | fn new() -> Self { 869 | Self { 870 | buffer: [0; 286], 871 | pos: 0, 872 | avail: 0, 873 | } 874 | } 875 | 876 | fn push(&mut self, buf: &[u8]) -> usize { 877 | if self.pos != 0 { 878 | self.buffer.copy_within(self.pos..self.pos + self.avail, 0); 879 | self.pos = 0; 880 | } 881 | let extra = self.buffer.len() - self.avail; 882 | let copy_len = extra.min(buf.len()); 883 | self.buffer[self.avail..self.avail + copy_len].copy_from_slice(&buf[0..copy_len]); 884 | self.avail += copy_len; 885 | copy_len 886 | } 887 | } 888 | 889 | struct Source<'a> { 890 | buffer: &'a [u8], 891 | pos: usize, 892 | avail: usize, 893 | } 894 | 895 | impl<'a> Source<'a> { 896 | fn new(buffer: &'a [u8]) -> Self { 897 | Self { 898 | buffer, 899 | pos: 0, 900 | avail: buffer.len(), 901 | } 902 | } 903 | 904 | fn from_remainder(remainder: &'a Remainder) -> Self { 905 | Self { 906 | buffer: &remainder.buffer[remainder.pos..remainder.pos + remainder.avail], 907 | pos: 0, 908 | avail: remainder.avail, 909 | } 910 | } 911 | 912 | fn try_get(&mut self, len: usize) -> Result<&[u8], Error> { 913 | let bytes = self.get(len); 914 | if bytes.is_empty() { 915 | return Err(Error::Underflow); 916 | } 917 | Ok(bytes) 918 | } 919 | 920 | #[inline(always)] 921 | fn get(&mut self, len: usize) -> &[u8] { 922 | let len = len.min(self.avail); 923 | let pos = self.pos; 924 | let bytes = &self.buffer[pos..pos + len]; 925 | self.pos += len; 926 | self.avail -= len; 927 | bytes 928 | } 929 | } 930 | 931 | #[derive(Copy, Clone)] 932 | struct Bits { 933 | bit_buffer: u64, 934 | bits_in: u32, 935 | } 936 | 937 | impl Bits { 938 | fn new(bit_buffer: u64, bits_in: u32) -> Self { 939 | Self { 940 | bit_buffer, 941 | bits_in, 942 | } 943 | } 944 | 945 | fn bytes_available(&self, source: &Source) -> usize { 946 | source.avail + (self.bits_in as usize / 8) 947 | } 948 | 949 | #[inline(always)] 950 | fn fill(&mut self, source: &mut Source) -> u32 { 951 | let count = (64 - self.bits_in as usize) >> 3; 952 | let bytes = source.get(count); 953 | let len = bytes.len(); 954 | let mut i = 0; 955 | while (i + 4) <= len { 956 | use core::convert::TryInto; 957 | let v = u32::from_le_bytes((&bytes[i..i + 4]).try_into().unwrap()) as u64; 958 | self.bit_buffer |= v << self.bits_in; 959 | self.bits_in += 32; 960 | i += 4; 961 | } 962 | while i < len { 963 | self.bit_buffer |= (bytes[i] as u64) << self.bits_in; 964 | self.bits_in += 8; 965 | i += 1; 966 | } 967 | self.bits_in 968 | } 969 | 970 | #[inline(always)] 971 | fn try_pop_source(&mut self, source: &mut Source, len: u32) -> Result { 972 | if self.bits_in < len && self.fill(source) < len { 973 | return Err(Error::Underflow); 974 | } 975 | let bits = self.bit_buffer & ((1 << len) - 1); 976 | self.bit_buffer >>= len; 977 | self.bits_in -= len; 978 | Ok(bits as u32) 979 | } 980 | 981 | #[inline(always)] 982 | fn try_pop(&mut self, len: u32) -> Result { 983 | if self.bits_in < len { 984 | return Err(Error::Underflow); 985 | } 986 | let bits = self.bit_buffer & ((1 << len) - 1); 987 | self.bit_buffer >>= len; 988 | self.bits_in -= len; 989 | Ok(bits as u32) 990 | } 991 | 992 | #[inline(always)] 993 | fn try_skip(&mut self, len: u32) -> Result<(), Error> { 994 | if self.bits_in < len { 995 | return Err(Error::Underflow); 996 | } 997 | self.bit_buffer >>= len; 998 | self.bits_in -= len; 999 | Ok(()) 1000 | } 1001 | 1002 | #[inline(always)] 1003 | fn peek(&mut self, len: u32) -> u32 { 1004 | (self.bit_buffer & ((1 << len) - 1)) as u32 1005 | } 1006 | 1007 | #[inline(always)] 1008 | fn pop(&mut self, len: u32) -> u32 { 1009 | let bits = self.bit_buffer & ((1 << len) - 1); 1010 | self.bit_buffer >>= len; 1011 | self.bits_in -= len; 1012 | bits as u32 1013 | } 1014 | 1015 | #[inline(always)] 1016 | fn skip(&mut self, len: u32) { 1017 | self.bit_buffer >>= len; 1018 | self.bits_in -= len; 1019 | } 1020 | } 1021 | 1022 | #[inline(always)] 1023 | fn copy_match(buf: &mut [u8], pos: usize, len: usize, buf_end: usize) { 1024 | let dist = buf_end - pos; 1025 | if dist > len { 1026 | buf.copy_within(pos..pos + len, buf_end); 1027 | } else { 1028 | for i in 0..len { 1029 | buf[buf_end + i] = buf[pos + i]; 1030 | } 1031 | } 1032 | } 1033 | 1034 | #[doc(hidden)] 1035 | pub trait Sink { 1036 | fn written(&self) -> u64; 1037 | fn push(&mut self, byte: u8) -> Result<(), Error>; 1038 | fn write(&mut self, bytes: &[u8]) -> Result<(), Error>; 1039 | fn apply_match(&mut self, dist: usize, len: usize) -> Result<(), Error>; 1040 | } 1041 | 1042 | struct VecSink<'a> { 1043 | buffer: &'a mut Vec, 1044 | start_pos: usize, 1045 | pos: usize, 1046 | } 1047 | 1048 | impl<'a> VecSink<'a> { 1049 | fn new(buffer: &'a mut Vec) -> Self { 1050 | let start_pos = buffer.len(); 1051 | Self { 1052 | buffer, 1053 | start_pos, 1054 | pos: start_pos, 1055 | } 1056 | } 1057 | } 1058 | 1059 | impl Drop for VecSink<'_> { 1060 | fn drop(&mut self) { 1061 | self.buffer.truncate(self.pos); 1062 | } 1063 | } 1064 | 1065 | impl Sink for VecSink<'_> { 1066 | fn written(&self) -> u64 { 1067 | (self.pos - self.start_pos) as u64 1068 | } 1069 | 1070 | #[inline(always)] 1071 | fn push(&mut self, byte: u8) -> Result<(), Error> { 1072 | self.buffer.push(byte); 1073 | self.pos += 1; 1074 | Ok(()) 1075 | } 1076 | 1077 | fn write(&mut self, bytes: &[u8]) -> Result<(), Error> { 1078 | let len = bytes.len(); 1079 | self.buffer.extend_from_slice(bytes); 1080 | self.pos += len; 1081 | Ok(()) 1082 | } 1083 | 1084 | #[inline(always)] 1085 | fn apply_match(&mut self, dist: usize, len: usize) -> Result<(), Error> { 1086 | let buf_len = self.pos - self.start_pos; 1087 | if dist > buf_len { 1088 | return Err(Error::InvalidBitstream); 1089 | } 1090 | let pos = self.pos - dist; 1091 | self.buffer.resize(self.pos + len, 0); 1092 | copy_match(self.buffer, pos, len, self.pos); 1093 | self.pos += len; 1094 | Ok(()) 1095 | } 1096 | } 1097 | 1098 | struct BufSink<'a> { 1099 | buffer: &'a mut [u8], 1100 | pos: usize, 1101 | } 1102 | 1103 | impl Sink for BufSink<'_> { 1104 | fn written(&self) -> u64 { 1105 | self.pos as u64 1106 | } 1107 | 1108 | #[inline(always)] 1109 | fn push(&mut self, byte: u8) -> Result<(), Error> { 1110 | if self.pos < self.buffer.len() { 1111 | self.buffer[self.pos] = byte; 1112 | self.pos += 1; 1113 | Ok(()) 1114 | } else { 1115 | Err(Error::Overflow) 1116 | } 1117 | } 1118 | 1119 | fn write(&mut self, bytes: &[u8]) -> Result<(), Error> { 1120 | let len = bytes.len(); 1121 | if self.pos + len <= self.buffer.len() { 1122 | self.buffer[self.pos..self.pos + len].copy_from_slice(bytes); 1123 | self.pos += len; 1124 | Ok(()) 1125 | } else { 1126 | Err(Error::Overflow) 1127 | } 1128 | } 1129 | 1130 | #[inline(always)] 1131 | fn apply_match(&mut self, dist: usize, len: usize) -> Result<(), Error> { 1132 | if dist > self.pos { 1133 | return Err(Error::InvalidBitstream); 1134 | } 1135 | if self.pos + len > self.buffer.len() { 1136 | return Err(Error::Overflow); 1137 | } 1138 | let pos = self.pos - dist; 1139 | copy_match(self.buffer, pos, len, self.pos); 1140 | self.pos += len; 1141 | Ok(()) 1142 | } 1143 | } 1144 | 1145 | #[cfg(feature = "std")] 1146 | struct WriterSink { 1147 | writer: W, 1148 | ring: RingBuffer, 1149 | written: u64, 1150 | } 1151 | 1152 | #[cfg(feature = "std")] 1153 | impl Sink for WriterSink { 1154 | fn written(&self) -> u64 { 1155 | self.written 1156 | } 1157 | 1158 | #[inline] 1159 | fn push(&mut self, byte: u8) -> Result<(), Error> { 1160 | self.ring.push(byte); 1161 | self.written += 1; 1162 | match self.writer.write_all(&[byte]) { 1163 | Err(err) => Err(Error::Io(err)), 1164 | Ok(_) => Ok(()), 1165 | } 1166 | } 1167 | 1168 | fn write(&mut self, bytes: &[u8]) -> Result<(), Error> { 1169 | for &b in bytes { 1170 | self.ring.push(b); 1171 | } 1172 | self.written += bytes.len() as u64; 1173 | match self.writer.write_all(bytes) { 1174 | Err(err) => Err(Error::Io(err)), 1175 | Ok(_) => Ok(()), 1176 | } 1177 | } 1178 | 1179 | #[inline] 1180 | fn apply_match(&mut self, dist: usize, len: usize) -> Result<(), Error> { 1181 | if dist > self.ring.len { 1182 | return Err(Error::InvalidBitstream); 1183 | } 1184 | let pos = self.ring.len - dist; 1185 | for i in 0..len { 1186 | self.push(self.ring.get(pos + i))?; 1187 | } 1188 | Ok(()) 1189 | } 1190 | } 1191 | 1192 | #[cfg(feature = "std")] 1193 | struct RingBuffer { 1194 | buffer: [u8; RING_BUFFER_SIZE], 1195 | len: usize, 1196 | } 1197 | 1198 | #[cfg(feature = "std")] 1199 | impl RingBuffer { 1200 | #[inline(always)] 1201 | fn new() -> Self { 1202 | Self { 1203 | buffer: [0; RING_BUFFER_SIZE], 1204 | len: 0, 1205 | } 1206 | } 1207 | 1208 | #[inline(always)] 1209 | fn push(&mut self, value: u8) { 1210 | self.buffer[self.len & (RING_BUFFER_SIZE - 1)] = value; 1211 | self.len += 1; 1212 | } 1213 | 1214 | #[inline(always)] 1215 | fn get(&self, index: usize) -> u8 { 1216 | self.buffer[index & (RING_BUFFER_SIZE - 1)] 1217 | } 1218 | } 1219 | 1220 | struct LiteralLengthTree { 1221 | table: [u32; LITERAL_LENGTH_TREE_SIZE], 1222 | } 1223 | 1224 | impl LiteralLengthTree { 1225 | #[inline(always)] 1226 | fn new() -> Self { 1227 | Self { 1228 | table: [0; LITERAL_LENGTH_TREE_SIZE], 1229 | } 1230 | } 1231 | 1232 | fn build(&mut self, lengths: &[u8]) -> bool { 1233 | build_tree(&mut self.table, lengths, &LITERAL_LENGTH_ENTRIES, 10, 15) 1234 | } 1235 | 1236 | fn build_precode(&mut self, lengths: &[u8]) -> bool { 1237 | build_tree(&mut self.table, &lengths[..19], &PRECODE_ENTRIES, 7, 7) 1238 | } 1239 | } 1240 | 1241 | struct DistanceTree { 1242 | table: [u32; DISTANCE_TREE_SIZE], 1243 | } 1244 | 1245 | impl DistanceTree { 1246 | #[inline(always)] 1247 | fn new() -> Self { 1248 | Self { 1249 | table: [0; DISTANCE_TREE_SIZE], 1250 | } 1251 | } 1252 | 1253 | fn build(&mut self, lengths: &[u8]) -> bool { 1254 | build_tree(&mut self.table, lengths, &DISTANCE_ENTRIES, 8, 15) 1255 | } 1256 | } 1257 | 1258 | #[cfg(feature = "std")] 1259 | const RING_BUFFER_SIZE: usize = 32768; 1260 | const LITERAL_LENGTH_TREE_SIZE: usize = 1334; 1261 | const DISTANCE_TREE_SIZE: usize = 402; 1262 | const MAX_CODE_SIZE: usize = 15; 1263 | const MAX_LENGTHS: usize = 288 + 32; 1264 | const ENTRY_LITERAL: u32 = 0x40000000; 1265 | const ENTRY_SUBTABLE: u32 = 0x80000000; 1266 | const ENTRY_LENGTH_MASK: u32 = 0xFF; 1267 | const ENTRY_SHIFT: u32 = 8; 1268 | const LITERAL_LENGTH_TABLE_BITS: u32 = 10; 1269 | const DISTANCE_TABLE_BITS: u32 = 8; 1270 | const EXTRA_LENGTH_BITS_MASK: u32 = 0xFF; 1271 | const LENGTH_BASE_SHIFT: u32 = 8; 1272 | const EXTRA_DISTANCE_BITS_SHIFT: u32 = 16; 1273 | const DISTANCE_BASE_MASK: u32 = (1 << EXTRA_DISTANCE_BITS_SHIFT) - 1; 1274 | 1275 | const PRECODE_SWIZZLE: [u8; 19] = [ 1276 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 1277 | ]; 1278 | 1279 | const PRECODE_ENTRIES: [u32; 19] = [ 1280 | 0x00000000, 0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600, 0x00000700, 1281 | 0x00000800, 0x00000900, 0x00000A00, 0x00000B00, 0x00000C00, 0x00000D00, 0x00000E00, 0x00000F00, 1282 | 0x00001000, 0x00001100, 0x00001200, 1283 | ]; 1284 | 1285 | const LITERAL_LENGTH_ENTRIES: [u32; 288] = [ 1286 | 0x40000000, 0x40000100, 0x40000200, 0x40000300, 0x40000400, 0x40000500, 0x40000600, 0x40000700, 1287 | 0x40000800, 0x40000900, 0x40000A00, 0x40000B00, 0x40000C00, 0x40000D00, 0x40000E00, 0x40000F00, 1288 | 0x40001000, 0x40001100, 0x40001200, 0x40001300, 0x40001400, 0x40001500, 0x40001600, 0x40001700, 1289 | 0x40001800, 0x40001900, 0x40001A00, 0x40001B00, 0x40001C00, 0x40001D00, 0x40001E00, 0x40001F00, 1290 | 0x40002000, 0x40002100, 0x40002200, 0x40002300, 0x40002400, 0x40002500, 0x40002600, 0x40002700, 1291 | 0x40002800, 0x40002900, 0x40002A00, 0x40002B00, 0x40002C00, 0x40002D00, 0x40002E00, 0x40002F00, 1292 | 0x40003000, 0x40003100, 0x40003200, 0x40003300, 0x40003400, 0x40003500, 0x40003600, 0x40003700, 1293 | 0x40003800, 0x40003900, 0x40003A00, 0x40003B00, 0x40003C00, 0x40003D00, 0x40003E00, 0x40003F00, 1294 | 0x40004000, 0x40004100, 0x40004200, 0x40004300, 0x40004400, 0x40004500, 0x40004600, 0x40004700, 1295 | 0x40004800, 0x40004900, 0x40004A00, 0x40004B00, 0x40004C00, 0x40004D00, 0x40004E00, 0x40004F00, 1296 | 0x40005000, 0x40005100, 0x40005200, 0x40005300, 0x40005400, 0x40005500, 0x40005600, 0x40005700, 1297 | 0x40005800, 0x40005900, 0x40005A00, 0x40005B00, 0x40005C00, 0x40005D00, 0x40005E00, 0x40005F00, 1298 | 0x40006000, 0x40006100, 0x40006200, 0x40006300, 0x40006400, 0x40006500, 0x40006600, 0x40006700, 1299 | 0x40006800, 0x40006900, 0x40006A00, 0x40006B00, 0x40006C00, 0x40006D00, 0x40006E00, 0x40006F00, 1300 | 0x40007000, 0x40007100, 0x40007200, 0x40007300, 0x40007400, 0x40007500, 0x40007600, 0x40007700, 1301 | 0x40007800, 0x40007900, 0x40007A00, 0x40007B00, 0x40007C00, 0x40007D00, 0x40007E00, 0x40007F00, 1302 | 0x40008000, 0x40008100, 0x40008200, 0x40008300, 0x40008400, 0x40008500, 0x40008600, 0x40008700, 1303 | 0x40008800, 0x40008900, 0x40008A00, 0x40008B00, 0x40008C00, 0x40008D00, 0x40008E00, 0x40008F00, 1304 | 0x40009000, 0x40009100, 0x40009200, 0x40009300, 0x40009400, 0x40009500, 0x40009600, 0x40009700, 1305 | 0x40009800, 0x40009900, 0x40009A00, 0x40009B00, 0x40009C00, 0x40009D00, 0x40009E00, 0x40009F00, 1306 | 0x4000A000, 0x4000A100, 0x4000A200, 0x4000A300, 0x4000A400, 0x4000A500, 0x4000A600, 0x4000A700, 1307 | 0x4000A800, 0x4000A900, 0x4000AA00, 0x4000AB00, 0x4000AC00, 0x4000AD00, 0x4000AE00, 0x4000AF00, 1308 | 0x4000B000, 0x4000B100, 0x4000B200, 0x4000B300, 0x4000B400, 0x4000B500, 0x4000B600, 0x4000B700, 1309 | 0x4000B800, 0x4000B900, 0x4000BA00, 0x4000BB00, 0x4000BC00, 0x4000BD00, 0x4000BE00, 0x4000BF00, 1310 | 0x4000C000, 0x4000C100, 0x4000C200, 0x4000C300, 0x4000C400, 0x4000C500, 0x4000C600, 0x4000C700, 1311 | 0x4000C800, 0x4000C900, 0x4000CA00, 0x4000CB00, 0x4000CC00, 0x4000CD00, 0x4000CE00, 0x4000CF00, 1312 | 0x4000D000, 0x4000D100, 0x4000D200, 0x4000D300, 0x4000D400, 0x4000D500, 0x4000D600, 0x4000D700, 1313 | 0x4000D800, 0x4000D900, 0x4000DA00, 0x4000DB00, 0x4000DC00, 0x4000DD00, 0x4000DE00, 0x4000DF00, 1314 | 0x4000E000, 0x4000E100, 0x4000E200, 0x4000E300, 0x4000E400, 0x4000E500, 0x4000E600, 0x4000E700, 1315 | 0x4000E800, 0x4000E900, 0x4000EA00, 0x4000EB00, 0x4000EC00, 0x4000ED00, 0x4000EE00, 0x4000EF00, 1316 | 0x4000F000, 0x4000F100, 0x4000F200, 0x4000F300, 0x4000F400, 0x4000F500, 0x4000F600, 0x4000F700, 1317 | 0x4000F800, 0x4000F900, 0x4000FA00, 0x4000FB00, 0x4000FC00, 0x4000FD00, 0x4000FE00, 0x4000FF00, 1318 | 0x00000000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, 0x00070000, 0x00080000, 0x00090000, 1319 | 0x000A0000, 0x000B0100, 0x000D0100, 0x000F0100, 0x00110100, 0x00130200, 0x00170200, 0x001B0200, 1320 | 0x001F0200, 0x00230300, 0x002B0300, 0x00330300, 0x003B0300, 0x00430400, 0x00530400, 0x00630400, 1321 | 0x00730400, 0x00830500, 0x00A30500, 0x00C30500, 0x00E30500, 0x01020000, 0x01020000, 0x01020000, 1322 | ]; 1323 | 1324 | const DISTANCE_ENTRIES: [u32; 32] = [ 1325 | 0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x01000500, 0x01000700, 0x02000900, 0x02000D00, 1326 | 0x03001100, 0x03001900, 0x04002100, 0x04003100, 0x05004100, 0x05006100, 0x06008100, 0x0600C100, 1327 | 0x07010100, 0x07018100, 0x08020100, 0x08030100, 0x09040100, 0x09060100, 0x0A080100, 0x0A0C0100, 1328 | 0x0B100100, 0x0B180100, 0x0C200100, 0x0C300100, 0x0D400100, 0x0D600100, 0x0E800100, 0x0EC00100, 1329 | ]; 1330 | -------------------------------------------------------------------------------- /src/encode.rs: -------------------------------------------------------------------------------- 1 | //! RFC 1590 compression implementation. 2 | 3 | #![allow(clippy::needless_range_loop, clippy::new_without_default)] 4 | 5 | use super::{Adler32, Error, Format}; 6 | use alloc::boxed::Box; 7 | use alloc::vec::Vec; 8 | use core::convert::TryInto; 9 | 10 | #[cfg(feature = "std")] 11 | use std::io::{self, Write}; 12 | 13 | /// The level of compression-- a compromise between speed and size. 14 | #[derive(Copy, Clone, PartialEq, Debug)] 15 | pub enum CompressionLevel { 16 | /// No compression. Outputs raw blocks. 17 | None, 18 | /// Fast compression. 19 | BestSpeed, 20 | /// Compromise between speed and size. 21 | Default, 22 | /// Slower compression for smaller size. 23 | BestSize, 24 | /// A specific compression level from 1-10. 25 | Specific(u8), 26 | } 27 | 28 | impl CompressionLevel { 29 | fn to_raw(self) -> usize { 30 | use CompressionLevel::*; 31 | match self { 32 | None => 0, 33 | BestSpeed => 1, 34 | Default => 6, 35 | BestSize => 9, 36 | Specific(level) => 10.min(level as usize), 37 | } 38 | } 39 | } 40 | 41 | /// Selects between various specialized compressor modes. 42 | #[derive(Copy, Clone)] 43 | pub enum CompressionStrategy { 44 | /// Let it do its thing. 45 | Default, 46 | /// Run-length encoding only. 47 | RLE, 48 | /// Ignore matches fewer than 5 bytes. 49 | Filtered, 50 | /// Static blocks only. 51 | Static, 52 | /// Huffman encoding only. 53 | Huffman, 54 | } 55 | 56 | /// Stateful context for compression. 57 | /// 58 | /// See the crate level [compression](index.html#compression) section 59 | /// for detailed usage. 60 | pub struct Encoder(DeflateContext); 61 | 62 | impl Encoder { 63 | /// Creates a new deflate encoder. Note that creating an encoder with this 64 | /// method allocates a large (200-300k) chunk of data on the stack and is 65 | /// likely to cause an overflow if not carefully managed. See the [`boxed()`] 66 | /// constructor for a safer method that allocates on the heap. 67 | /// 68 | /// [`boxed()`]: Self::boxed 69 | pub fn new() -> Self { 70 | let flags = make_flags( 71 | false, 72 | CompressionLevel::Default, 73 | CompressionStrategy::Default, 74 | ); 75 | Self(DeflateContext { 76 | flags, 77 | ready: true, 78 | zlib: false, 79 | level: CompressionLevel::Default, 80 | strategy: CompressionStrategy::Default, 81 | greedy_parsing: flags & GREEDY_PARSING != 0, 82 | block_index: 0, 83 | saved_match_dist: 0, 84 | saved_match_len: 0, 85 | saved_lit: 0, 86 | saved_bit_buffer: 0, 87 | saved_bits_in: 0, 88 | adler32: Adler32::new(), 89 | lt: LiteralLengthTree::new(), 90 | dt: DistanceTree::new(), 91 | pt: PrecodeTree::new(), 92 | cb: CodeBuffer::new(), 93 | dict: Dictionary::new(flags), 94 | }) 95 | } 96 | 97 | /// Creates a new deflate encoder on the heap. 98 | pub fn boxed() -> Box { 99 | let flags = make_flags( 100 | false, 101 | CompressionLevel::Default, 102 | CompressionStrategy::Default, 103 | ); 104 | Box::new(Self(DeflateContext { 105 | flags, 106 | ready: true, 107 | zlib: false, 108 | level: CompressionLevel::Default, 109 | strategy: CompressionStrategy::Default, 110 | greedy_parsing: flags & GREEDY_PARSING != 0, 111 | block_index: 0, 112 | saved_match_dist: 0, 113 | saved_match_len: 0, 114 | saved_lit: 0, 115 | saved_bit_buffer: 0, 116 | saved_bits_in: 0, 117 | adler32: Adler32::new(), 118 | lt: LiteralLengthTree::new(), 119 | dt: DistanceTree::new(), 120 | pt: PrecodeTree::new(), 121 | cb: CodeBuffer::new(), 122 | dict: Dictionary::new(flags), 123 | })) 124 | } 125 | 126 | /// Sets the format of the output bitstream for the next usage of the 127 | /// encoder. 128 | pub fn set_format(&mut self, format: Format) { 129 | self.0.reset(format == Format::Zlib); 130 | } 131 | 132 | /// Sets the compression level for the next usage of the encoder. 133 | pub fn set_level(&mut self, level: CompressionLevel) { 134 | let flags = make_flags(self.0.zlib, level, self.0.strategy); 135 | self.0.flags = flags; 136 | self.0.level = level; 137 | self.0.greedy_parsing = flags & GREEDY_PARSING != 0; 138 | self.0.dict.max_probes = Dictionary::probes_from_flags(flags); 139 | } 140 | 141 | /// Sets the compression strategy for the next usage of the encoder. 142 | pub fn set_strategy(&mut self, strategy: CompressionStrategy) { 143 | let flags = make_flags(self.0.zlib, self.0.level, strategy); 144 | self.0.flags = flags; 145 | self.0.strategy = strategy; 146 | self.0.greedy_parsing = flags & GREEDY_PARSING != 0; 147 | self.0.dict.max_probes = Dictionary::probes_from_flags(flags); 148 | } 149 | 150 | /// Creates an encoder stream that will write into the specified writer. 151 | #[cfg(feature = "std")] 152 | pub fn stream<'a, W: Write>( 153 | &'a mut self, 154 | writer: &'a mut W, 155 | ) -> EncoderStream<'a, impl Sink + 'a> { 156 | self.0.reset(self.0.zlib); 157 | EncoderStream { 158 | ctx: &mut self.0, 159 | sink: WriterSink::new(writer), 160 | finished: false, 161 | } 162 | } 163 | 164 | /// Creates an encoder stream that will write into the specified vector. 165 | /// The resulting stream will not clear the vector but will instead append 166 | /// the compressed data. 167 | pub fn stream_into_vec<'a>( 168 | &'a mut self, 169 | vec: &'a mut Vec, 170 | ) -> EncoderStream<'a, impl Sink + 'a> { 171 | self.0.reset(self.0.zlib); 172 | EncoderStream { 173 | ctx: &mut self.0, 174 | sink: VecSink::new(vec), 175 | finished: false, 176 | } 177 | } 178 | 179 | /// Creates an encoder stream that will write into the specified buffer. 180 | /// The stream will generate an overflow error if the buffer is not large 181 | /// enough to contain the compressed data. 182 | pub fn stream_into_buf<'a>( 183 | &'a mut self, 184 | buf: &'a mut [u8], 185 | ) -> EncoderStream<'a, impl Sink + 'a> { 186 | self.0.reset(self.0.zlib); 187 | EncoderStream { 188 | ctx: &mut self.0, 189 | sink: BufSink::new(buf), 190 | finished: false, 191 | } 192 | } 193 | } 194 | 195 | /// Compression stream combining an encoder context with an output. 196 | /// 197 | /// See the crate level [compression](index.html#compression) section 198 | /// for detailed usage. 199 | pub struct EncoderStream<'a, S: Sink> { 200 | ctx: &'a mut DeflateContext, 201 | sink: S, 202 | finished: bool, 203 | } 204 | 205 | impl EncoderStream<'_, S> { 206 | /// Writes the specified buffer to the stream, producing compressed data 207 | /// in the output. 208 | pub fn write(&mut self, buf: &[u8]) -> Result<(), Error> { 209 | if self.finished { 210 | return Err(Error::Finished); 211 | } 212 | self.ctx.deflate(buf, &mut self.sink, false) 213 | } 214 | 215 | /// Returns the number of compressed bytes that have been written to the 216 | /// output. 217 | pub fn compressed_size(&self) -> u64 { 218 | self.sink.written() 219 | } 220 | 221 | /// Consumes the stream, flushing any input that may be buffered and any 222 | /// remaining output. Returns the total number of compressed bytes written 223 | /// to the output. 224 | pub fn finish(mut self) -> Result { 225 | if self.finished { 226 | return Err(Error::Finished); 227 | } 228 | self.finished = true; 229 | self.ctx.deflate(&[], &mut self.sink, true)?; 230 | self.ctx.flush_block(&mut self.sink, true)?; 231 | Ok(self.sink.written()) 232 | } 233 | } 234 | 235 | impl Drop for EncoderStream<'_, S> { 236 | fn drop(&mut self) { 237 | if !self.finished { 238 | self.finished = true; 239 | let _ = self.ctx.deflate(&[], &mut self.sink, true); 240 | let _ = self.ctx.flush_block(&mut self.sink, true); 241 | } 242 | } 243 | } 244 | 245 | #[cfg(feature = "std")] 246 | impl Write for EncoderStream<'_, S> { 247 | fn write(&mut self, buf: &[u8]) -> io::Result { 248 | match self.ctx.deflate(buf, &mut self.sink, false) { 249 | Ok(_) => Ok(buf.len()), 250 | Err(err) => match err { 251 | Error::Io(err) => Err(err), 252 | Error::Underflow | Error::Overflow => { 253 | Err(io::Error::from(io::ErrorKind::InvalidInput)) 254 | } 255 | _ => Err(io::Error::from(io::ErrorKind::InvalidData)), 256 | }, 257 | } 258 | } 259 | 260 | fn flush(&mut self) -> io::Result<()> { 261 | Ok(()) 262 | } 263 | } 264 | 265 | /// Compresses a buffer into a vector with the specified format and 266 | /// compression level. 267 | pub fn compress(buf: &[u8], format: Format, level: CompressionLevel) -> Result, Error> { 268 | let mut encoder = Encoder::boxed(); 269 | encoder.set_format(format); 270 | encoder.set_level(level); 271 | let mut vec = Vec::new(); 272 | let mut stream = encoder.stream_into_vec(&mut vec); 273 | stream.write(buf)?; 274 | stream.finish()?; 275 | Ok(vec) 276 | } 277 | 278 | struct DeflateContext { 279 | flags: u32, 280 | ready: bool, 281 | zlib: bool, 282 | level: CompressionLevel, 283 | strategy: CompressionStrategy, 284 | greedy_parsing: bool, 285 | block_index: u32, 286 | saved_match_dist: usize, 287 | saved_match_len: usize, 288 | saved_lit: u8, 289 | saved_bit_buffer: u32, 290 | saved_bits_in: u32, 291 | adler32: Adler32, 292 | lt: LiteralLengthTree, 293 | dt: DistanceTree, 294 | pt: PrecodeTree, 295 | cb: CodeBuffer, 296 | dict: Dictionary, 297 | } 298 | 299 | impl DeflateContext { 300 | fn deflate(&mut self, buf: &[u8], sink: &mut S, is_last: bool) -> Result<(), Error> { 301 | if !is_last && buf.is_empty() { 302 | return Ok(()); 303 | } 304 | self.deflate_inner(buf, sink, is_last)?; 305 | if self.flags & WRITE_ZLIB_HEADER != 0 { 306 | self.adler32.update(buf); 307 | } 308 | Ok(()) 309 | } 310 | 311 | fn deflate_inner( 312 | &mut self, 313 | data: &[u8], 314 | sink: &mut S, 315 | is_last: bool, 316 | ) -> Result<(), Error> { 317 | const DICT_MASK: usize = DICTIONARY_SIZE - 1; 318 | self.ready = false; 319 | let mut src_pos = 0; 320 | let mut lookahead_size = self.dict.lookahead_size; 321 | let mut lookahead_pos = self.dict.lookahead_pos; 322 | let mut saved_lit = self.saved_lit; 323 | let mut saved_match_dist = self.saved_match_dist; 324 | let mut saved_match_len = self.saved_match_len; 325 | while src_pos < data.len() || (is_last && lookahead_size != 0) { 326 | let src_buf_left = data.len() - src_pos; 327 | let num_bytes_to_process = src_buf_left.min(MAX_MATCH_LEN - lookahead_size); 328 | if lookahead_size + self.dict.len >= MIN_MATCH_LEN - 1 && num_bytes_to_process > 0 { 329 | let dict = &mut self.dict; 330 | let mut dst_pos = (lookahead_pos + lookahead_size) & DICT_MASK; 331 | let mut ins_pos = lookahead_pos + lookahead_size - 2; 332 | let mut hash = (u32::from(dict.dict[ins_pos & DICT_MASK]) << HASH_SHIFT) 333 | ^ u32::from(dict.dict[(ins_pos + 1) & DICT_MASK]); 334 | lookahead_size += num_bytes_to_process; 335 | for &c in &data[src_pos..src_pos + num_bytes_to_process] { 336 | dict.dict[dst_pos] = c; 337 | if dst_pos < MAX_MATCH_LEN - 1 { 338 | dict.dict[DICTIONARY_SIZE + dst_pos] = c; 339 | } 340 | hash = ((hash << HASH_SHIFT) ^ u32::from(c)) & (HASH_SIZE as u32 - 1); 341 | dict.next[ins_pos & DICT_MASK] = dict.hash[hash as usize]; 342 | dict.hash[hash as usize] = ins_pos as u16; 343 | dst_pos = (dst_pos + 1) & DICT_MASK; 344 | ins_pos += 1; 345 | } 346 | src_pos += num_bytes_to_process; 347 | } else { 348 | let dict = &mut self.dict; 349 | for &c in &data[src_pos..src_pos + num_bytes_to_process] { 350 | let dst_pos = (lookahead_pos + lookahead_size) & DICT_MASK; 351 | dict.dict[dst_pos] = c; 352 | if dst_pos < MAX_MATCH_LEN - 1 { 353 | dict.dict[DICTIONARY_SIZE + dst_pos] = c; 354 | } 355 | lookahead_size += 1; 356 | if lookahead_size + dict.len >= MIN_MATCH_LEN { 357 | let ins_pos = lookahead_pos + lookahead_size - 3; 358 | let hash = ((u32::from(dict.dict[ins_pos & DICT_MASK]) 359 | << (HASH_SHIFT * 2)) 360 | ^ ((u32::from(dict.dict[(ins_pos + 1) & DICT_MASK]) << HASH_SHIFT) 361 | ^ u32::from(c))) 362 | & (HASH_SIZE as u32 - 1); 363 | dict.next[ins_pos & DICT_MASK] = dict.hash[hash as usize]; 364 | dict.hash[hash as usize] = ins_pos as u16; 365 | } 366 | } 367 | src_pos += num_bytes_to_process; 368 | } 369 | self.dict.len = self.dict.len.min(DICTIONARY_SIZE - lookahead_size); 370 | if lookahead_size < MAX_MATCH_LEN && !is_last { 371 | break; 372 | } 373 | let mut len_to_move = 1; 374 | let mut cur_match_dist = 0; 375 | let mut cur_match_len = if saved_match_len != 0 { 376 | saved_match_len 377 | } else { 378 | MIN_MATCH_LEN - 1 379 | }; 380 | let cur_pos = lookahead_pos & DICT_MASK; 381 | if self.flags & (RLE_MATCHES | FORCE_RAW) != 0 { 382 | if self.dict.len != 0 && self.flags & FORCE_RAW == 0 { 383 | let c = self.dict.dict[cur_pos.wrapping_sub(1) & DICT_MASK]; 384 | cur_match_len = self.dict.dict[cur_pos..(cur_pos + lookahead_size)] 385 | .iter() 386 | .take_while(|&x| *x == c) 387 | .count(); 388 | if cur_match_len < MIN_MATCH_LEN { 389 | cur_match_len = 0 390 | } else { 391 | cur_match_dist = 1 392 | } 393 | } 394 | } else { 395 | let dist_len = self.dict.find_match( 396 | lookahead_pos, 397 | self.dict.len, 398 | lookahead_size, 399 | cur_match_dist, 400 | cur_match_len, 401 | ); 402 | cur_match_dist = dist_len.0; 403 | cur_match_len = dist_len.1; 404 | } 405 | let far_and_small = cur_match_len == MIN_MATCH_LEN && cur_match_dist >= 8 * 1024; 406 | let filter_small = self.flags & FILTER_MATCHES != 0 && cur_match_len <= 5; 407 | if far_and_small || filter_small || cur_pos == cur_match_dist { 408 | cur_match_dist = 0; 409 | cur_match_len = 0; 410 | } 411 | if saved_match_len != 0 { 412 | if cur_match_len > saved_match_len { 413 | self.cb.push_literal(saved_lit, &mut self.lt); 414 | if cur_match_len >= 128 { 415 | self.cb.push_match( 416 | cur_match_len, 417 | cur_match_dist, 418 | &mut self.lt, 419 | &mut self.dt, 420 | ); 421 | saved_match_len = 0; 422 | len_to_move = cur_match_len; 423 | } else { 424 | saved_lit = self.dict.get(cur_pos); 425 | saved_match_dist = cur_match_dist; 426 | saved_match_len = cur_match_len; 427 | } 428 | } else { 429 | self.cb.push_match( 430 | saved_match_len, 431 | saved_match_dist, 432 | &mut self.lt, 433 | &mut self.dt, 434 | ); 435 | len_to_move = saved_match_len - 1; 436 | saved_match_len = 0; 437 | } 438 | } else if cur_match_dist == 0 { 439 | self.cb.push_literal(self.dict.get(cur_pos), &mut self.lt); 440 | } else if self.greedy_parsing || (self.flags & RLE_MATCHES != 0) || cur_match_len >= 128 441 | { 442 | self.cb 443 | .push_match(cur_match_len, cur_match_dist, &mut self.lt, &mut self.dt); 444 | len_to_move = cur_match_len; 445 | } else { 446 | saved_lit = self.dict.get(cur_pos); 447 | saved_match_dist = cur_match_dist; 448 | saved_match_len = cur_match_len; 449 | } 450 | lookahead_pos += len_to_move; 451 | assert!(lookahead_size >= len_to_move); 452 | lookahead_size -= len_to_move; 453 | self.dict.len = (self.dict.len + len_to_move).min(DICTIONARY_SIZE); 454 | let lz_buf_tight = self.cb.pos > CODE_BUFFER_SIZE - 8; 455 | let raw = self.flags & FORCE_RAW != 0; 456 | let fat = ((self.cb.pos * 115) >> 7) >= self.cb.total_bytes; 457 | let fat_or_raw = (self.cb.total_bytes > 31 * 1024) && (fat || raw); 458 | if lz_buf_tight || fat_or_raw { 459 | self.dict.lookahead_size = lookahead_size; 460 | self.dict.lookahead_pos = lookahead_pos; 461 | self.flush_block(sink, false)?; 462 | } 463 | } 464 | self.dict.lookahead_size = lookahead_size; 465 | self.dict.lookahead_pos = lookahead_pos; 466 | self.saved_lit = saved_lit; 467 | self.saved_match_dist = saved_match_dist; 468 | self.saved_match_len = saved_match_len; 469 | Ok(()) 470 | } 471 | 472 | fn flush_block(&mut self, sink: &mut S, finish: bool) -> Result<(), Error> { 473 | sink.set_bit_buffer(self.saved_bit_buffer, self.saved_bits_in); 474 | let mut snapshot; 475 | let use_raw_block = (self.flags & FORCE_RAW != 0) 476 | && (self.dict.lookahead_pos - self.dict.code_buffer_offset) <= self.dict.len; 477 | self.cb.init_flag(); 478 | if self.flags & WRITE_ZLIB_HEADER != 0 && self.block_index == 0 { 479 | let header = make_zlib_header(self.flags); 480 | sink.put_bits(header[0].into(), 8)?; 481 | sink.put_bits(header[1].into(), 8)?; 482 | } 483 | sink.put_bits(finish as u32, 1)?; 484 | snapshot = sink.snapshot(); 485 | let comp_success = if !use_raw_block { 486 | let use_static = (self.flags & FORCE_STATIC != 0) || (self.cb.total_bytes < 48); 487 | self.emit_block(sink, use_static)?; 488 | true 489 | } else { 490 | false 491 | }; 492 | let end_pos = sink.snapshot().pos; 493 | let expanded = (self.cb.total_bytes > 32) 494 | && (end_pos - snapshot.pos + 1 >= self.cb.total_bytes) 495 | && (self.dict.lookahead_pos - self.dict.code_buffer_offset <= self.dict.len); 496 | if use_raw_block || expanded { 497 | sink.restore(&snapshot); 498 | sink.put_bits(0, 2)?; 499 | sink.pad()?; 500 | sink.put_bits(self.cb.total_bytes as u32 & 0xFFFF, 16)?; 501 | sink.put_bits(!self.cb.total_bytes as u32 & 0xFFFF, 16)?; 502 | for i in 0..self.cb.total_bytes { 503 | let pos = (self.dict.code_buffer_offset + i) & DICTIONARY_SIZE_MASK; 504 | sink.put_bits(u32::from(self.dict.dict[pos]), 8)?; 505 | } 506 | } else if !comp_success { 507 | sink.restore(&snapshot); 508 | self.emit_block(sink, true)?; 509 | } 510 | if finish { 511 | sink.pad()?; 512 | if self.flags & WRITE_ZLIB_HEADER != 0 { 513 | let mut adler = self.adler32.finish(); 514 | for _ in 0..4 { 515 | sink.put_bits((adler >> 24) & 0xFF, 8)?; 516 | adler <<= 8; 517 | } 518 | } 519 | } 520 | self.lt.reset(); 521 | self.dt.reset(); 522 | self.cb.pos = 1; 523 | self.cb.flags_offset = 0; 524 | self.cb.flags_left = 8; 525 | self.dict.code_buffer_offset += self.cb.total_bytes; 526 | self.cb.total_bytes = 0; 527 | self.block_index += 1; 528 | snapshot = sink.snapshot(); 529 | self.saved_bit_buffer = snapshot.bit_buffer; 530 | self.saved_bits_in = snapshot.bits_in; 531 | sink.flush() 532 | } 533 | 534 | fn reset(&mut self, zlib: bool) { 535 | if self.ready && zlib == self.zlib { 536 | return; 537 | } 538 | let flags = make_flags(zlib, self.level, self.strategy); 539 | self.zlib = zlib; 540 | self.flags = flags; 541 | self.greedy_parsing = flags & GREEDY_PARSING != 0; 542 | self.block_index = 0; 543 | self.saved_lit = 0; 544 | self.saved_match_dist = 0; 545 | self.saved_match_len = 0; 546 | self.saved_bit_buffer = 0; 547 | self.saved_bits_in = 0; 548 | self.dict.code_buffer_offset = 0; 549 | self.dict.len = 0; 550 | self.dict.lookahead_pos = 0; 551 | self.dict.lookahead_size = 0; 552 | self.dict.max_probes = Dictionary::probes_from_flags(flags); 553 | self.cb.reset(); 554 | if !self.ready { 555 | self.lt.reset(); 556 | self.dt.reset(); 557 | self.pt.reset(); 558 | } 559 | self.ready = true; 560 | self.adler32 = Adler32::new(); 561 | } 562 | } 563 | 564 | impl DeflateContext { 565 | fn start_dynamic_block(&mut self, sink: &mut S) -> Result<(), Error> { 566 | const CODE_SIZES_LEN: usize = LITERAL_LENGTH_TREE_SIZE + DISTANCE_TREE_SIZE; 567 | let mut code_sizes_to_pack = [0u8; CODE_SIZES_LEN]; 568 | let mut packed = [0u8; CODE_SIZES_LEN]; 569 | self.lt.counts[256] = 1; 570 | self.lt.optimize(false); 571 | self.dt.optimize(false); 572 | let mut num_lit_codes = 286; 573 | while num_lit_codes > 257 { 574 | if self.lt.code_sizes[num_lit_codes - 1] != 0 { 575 | break; 576 | } 577 | num_lit_codes -= 1; 578 | } 579 | let mut num_dist_codes = 30; 580 | while num_dist_codes > 1 { 581 | if self.dt.code_sizes[num_dist_codes - 1] != 0 { 582 | break; 583 | } 584 | num_dist_codes -= 1; 585 | } 586 | code_sizes_to_pack[0..num_lit_codes].copy_from_slice(&self.lt.code_sizes[0..num_lit_codes]); 587 | code_sizes_to_pack[num_lit_codes..num_lit_codes + num_dist_codes] 588 | .copy_from_slice(&self.dt.code_sizes[0..num_dist_codes]); 589 | let total_code_sizes_to_pack = num_lit_codes + num_dist_codes; 590 | let mut num_packed = 0; 591 | for i in 0..PRECODE_TREE_SIZE { 592 | self.pt.counts[i] = 0; 593 | } 594 | let mut rle = Rle::new(); 595 | for i in 0..total_code_sizes_to_pack { 596 | let code_size = code_sizes_to_pack[i] as usize; 597 | if code_size == 0 { 598 | rle.prev(&mut packed, &mut num_packed, &mut self.pt); 599 | rle.z_count += 1; 600 | if rle.z_count == 138 { 601 | rle.zero(&mut packed, &mut num_packed, &mut self.pt); 602 | } 603 | } else { 604 | rle.zero(&mut packed, &mut num_packed, &mut self.pt); 605 | if code_size != rle.prev { 606 | rle.prev(&mut packed, &mut num_packed, &mut self.pt); 607 | self.pt.counts[code_size] += 1; 608 | packed[num_packed] = code_size as u8; 609 | num_packed += 1; 610 | } else { 611 | rle.repeat_count += 1; 612 | if rle.repeat_count == 6 { 613 | rle.prev(&mut packed, &mut num_packed, &mut self.pt); 614 | } 615 | } 616 | } 617 | rle.prev = code_size; 618 | } 619 | if rle.repeat_count != 0 { 620 | rle.prev(&mut packed, &mut num_packed, &mut self.pt); 621 | } else { 622 | rle.zero(&mut packed, &mut num_packed, &mut self.pt); 623 | } 624 | self.pt.optimize(); 625 | sink.put_bits(2, 2)?; 626 | sink.put_bits(num_lit_codes as u32 - 257, 5)?; 627 | sink.put_bits(num_dist_codes as u32 - 1, 5)?; 628 | let mut num_bit_lengths = 0; 629 | for i in (0..=18).rev() { 630 | if self.pt.code_sizes[PRECODE_SWIZZLE[i] as usize] != 0 { 631 | num_bit_lengths = i; 632 | break; 633 | } 634 | } 635 | num_bit_lengths = 4.max(num_bit_lengths + 1); 636 | sink.put_bits(num_bit_lengths as u32 - 4, 4)?; 637 | for swizzle in &PRECODE_SWIZZLE[..num_bit_lengths] { 638 | sink.put_bits(self.pt.code_sizes[*swizzle as usize] as u32, 3)?; 639 | } 640 | let mut i = 0; 641 | while i < num_packed { 642 | let code = packed[i] as usize; 643 | i += 1; 644 | sink.put_bits(self.pt.codes[code] as u32, self.pt.code_sizes[code] as u32)?; 645 | if code >= 16 { 646 | sink.put_bits(packed[i] as u32, [2, 3, 7][code - 16])?; 647 | i += 1; 648 | } 649 | } 650 | Ok(()) 651 | } 652 | 653 | fn start_static_block(&mut self, sink: &mut S) -> Result<(), Error> { 654 | let lengths = &mut self.lt.code_sizes; 655 | lengths[0..144].iter_mut().for_each(|p| *p = 8); 656 | lengths[144..256].iter_mut().for_each(|p| *p = 9); 657 | lengths[256..280].iter_mut().for_each(|p| *p = 7); 658 | lengths[280..288].iter_mut().for_each(|p| *p = 8); 659 | self.dt.code_sizes = [5; 32]; 660 | self.lt.optimize(true); 661 | self.dt.optimize(true); 662 | sink.put_bits(1, 2) 663 | } 664 | 665 | fn emit_block(&mut self, sink: &mut S, is_static: bool) -> Result<(), Error> { 666 | if is_static { 667 | self.start_static_block(sink)?; 668 | } else { 669 | self.start_dynamic_block(sink)?; 670 | } 671 | self.cb.emit(sink, &self.lt, &self.dt) 672 | } 673 | } 674 | 675 | struct Rle { 676 | prev: usize, 677 | repeat_count: usize, 678 | z_count: usize, 679 | } 680 | 681 | impl Rle { 682 | fn new() -> Self { 683 | Self { 684 | prev: 0xFF, 685 | repeat_count: 0, 686 | z_count: 0, 687 | } 688 | } 689 | 690 | #[inline(always)] 691 | fn prev(&mut self, code_sizes: &mut [u8], count: &mut usize, pt: &mut PrecodeTree) { 692 | if self.repeat_count == 0 { 693 | return; 694 | } 695 | if self.repeat_count < 3 { 696 | pt.counts[self.prev] += self.repeat_count as u16; 697 | while self.repeat_count != 0 { 698 | code_sizes[*count] = self.prev as u8; 699 | *count += 1; 700 | self.repeat_count -= 1; 701 | } 702 | } else { 703 | pt.counts[16] += 1; 704 | code_sizes[*count] = 16; 705 | *count += 1; 706 | code_sizes[*count] = (self.repeat_count - 3) as u8; 707 | *count += 1; 708 | } 709 | self.repeat_count = 0; 710 | } 711 | 712 | #[inline(always)] 713 | fn zero(&mut self, code_sizes: &mut [u8], count: &mut usize, pt: &mut PrecodeTree) { 714 | if self.z_count == 0 { 715 | return; 716 | } 717 | if self.z_count < 3 { 718 | pt.counts[0] += self.z_count as u16; 719 | while self.z_count != 0 { 720 | code_sizes[*count] = 0; 721 | *count += 1; 722 | self.z_count -= 1; 723 | } 724 | } else if self.z_count <= 10 { 725 | pt.counts[17] += 1; 726 | code_sizes[*count] = 17; 727 | *count += 1; 728 | code_sizes[*count] = (self.z_count - 3) as u8; 729 | *count += 1; 730 | } else { 731 | pt.counts[18] += 1; 732 | code_sizes[*count] = 18; 733 | *count += 1; 734 | code_sizes[*count] = (self.z_count - 11) as u8; 735 | *count += 1; 736 | } 737 | self.z_count = 0; 738 | } 739 | } 740 | 741 | struct LiteralLengthTree { 742 | pub counts: [u16; LITERAL_LENGTH_TREE_SIZE], 743 | pub codes: [u16; LITERAL_LENGTH_TREE_SIZE], 744 | pub code_sizes: [u8; LITERAL_LENGTH_TREE_SIZE], 745 | } 746 | 747 | impl LiteralLengthTree { 748 | #[inline(always)] 749 | fn new() -> Self { 750 | Self { 751 | counts: [0; LITERAL_LENGTH_TREE_SIZE], 752 | codes: [0; LITERAL_LENGTH_TREE_SIZE], 753 | code_sizes: [0; LITERAL_LENGTH_TREE_SIZE], 754 | } 755 | } 756 | 757 | fn reset(&mut self) { 758 | self.counts.iter_mut().for_each(|p| *p = 0); 759 | } 760 | 761 | fn optimize(&mut self, is_static: bool) { 762 | huffman::optimize( 763 | &mut self.counts, 764 | &mut self.codes, 765 | &mut self.code_sizes, 766 | 15, 767 | is_static, 768 | ); 769 | } 770 | } 771 | 772 | struct DistanceTree { 773 | pub counts: [u16; DISTANCE_TREE_SIZE], 774 | pub codes: [u16; DISTANCE_TREE_SIZE], 775 | pub code_sizes: [u8; DISTANCE_TREE_SIZE], 776 | } 777 | 778 | impl DistanceTree { 779 | #[inline(always)] 780 | fn new() -> Self { 781 | Self { 782 | counts: [0; DISTANCE_TREE_SIZE], 783 | codes: [0; DISTANCE_TREE_SIZE], 784 | code_sizes: [0; DISTANCE_TREE_SIZE], 785 | } 786 | } 787 | 788 | fn reset(&mut self) { 789 | self.counts.iter_mut().for_each(|p| *p = 0); 790 | } 791 | 792 | fn optimize(&mut self, is_static: bool) { 793 | huffman::optimize( 794 | &mut self.counts, 795 | &mut self.codes, 796 | &mut self.code_sizes, 797 | 15, 798 | is_static, 799 | ); 800 | } 801 | } 802 | 803 | struct PrecodeTree { 804 | pub counts: [u16; PRECODE_TREE_SIZE], 805 | pub codes: [u16; PRECODE_TREE_SIZE], 806 | pub code_sizes: [u8; PRECODE_TREE_SIZE], 807 | } 808 | 809 | impl PrecodeTree { 810 | fn new() -> Self { 811 | Self { 812 | counts: [0; PRECODE_TREE_SIZE], 813 | codes: [0; PRECODE_TREE_SIZE], 814 | code_sizes: [0; PRECODE_TREE_SIZE], 815 | } 816 | } 817 | 818 | fn reset(&mut self) { 819 | self.counts.iter_mut().for_each(|p| *p = 0); 820 | } 821 | 822 | fn optimize(&mut self) { 823 | huffman::optimize( 824 | &mut self.counts, 825 | &mut self.codes, 826 | &mut self.code_sizes, 827 | 7, 828 | false, 829 | ); 830 | } 831 | } 832 | 833 | mod huffman { 834 | const MAX_HUFF_SYMBOLS: usize = 288; 835 | const MAX_SUPPORTED_HUFF_CODE_SIZE: usize = 32; 836 | 837 | #[derive(Copy, Clone, Default)] 838 | struct SymbolFrequency { 839 | key: u16, 840 | index: u16, 841 | } 842 | 843 | pub fn optimize( 844 | counts: &mut [u16], 845 | codes: &mut [u16], 846 | code_sizes: &mut [u8], 847 | size_limit: usize, 848 | is_static: bool, 849 | ) { 850 | let mut num_codes = [0i32; 1 + MAX_SUPPORTED_HUFF_CODE_SIZE]; 851 | let mut next_code = [0u32; 1 + MAX_SUPPORTED_HUFF_CODE_SIZE]; 852 | let len = counts.len(); 853 | if is_static { 854 | for i in 0..len { 855 | num_codes[code_sizes[i] as usize] += 1; 856 | } 857 | } else { 858 | let mut syms0 = [SymbolFrequency::default(); MAX_HUFF_SYMBOLS]; 859 | let mut syms1 = [SymbolFrequency::default(); MAX_HUFF_SYMBOLS]; 860 | let mut used = 0; 861 | for i in 0..len { 862 | let count = counts[i]; 863 | if count != 0 { 864 | let sym = &mut syms0[used]; 865 | used += 1; 866 | sym.key = count; 867 | sym.index = i as u16; 868 | } 869 | } 870 | let syms = sort_symbols(&mut syms0[..used], &mut syms1[..used]); 871 | minimum_redundancy(syms); 872 | for s in syms.iter() { 873 | num_codes[s.key as usize] += 1; 874 | } 875 | enforce_size_limit(&mut num_codes, used, size_limit); 876 | codes.iter_mut().for_each(|p| *p = 0); 877 | code_sizes.iter_mut().for_each(|p| *p = 0); 878 | let mut last = used; 879 | for i in 1..=size_limit { 880 | let first = last - num_codes[i] as usize; 881 | for sym in &syms[first..last] { 882 | code_sizes[sym.index as usize] = i as u8; 883 | } 884 | last = first; 885 | } 886 | } 887 | next_code[1] = 0; 888 | let mut j = 0; 889 | for i in 2..=size_limit { 890 | j = (j + num_codes[i - 1]) << 1; 891 | next_code[i] = j as u32; 892 | } 893 | for i in 0..len { 894 | let code_size = code_sizes[i] as usize; 895 | if code_size == 0 { 896 | continue; 897 | } 898 | let mut code = next_code[code_size]; 899 | let mut rev_code = 0; 900 | next_code[code_size] += 1; 901 | for _ in 0..code_size { 902 | rev_code = (rev_code << 1) | (code & 1); 903 | code >>= 1; 904 | } 905 | codes[i] = rev_code as u16; 906 | } 907 | } 908 | 909 | fn sort_symbols<'a>( 910 | syms0: &'a mut [SymbolFrequency], 911 | syms1: &'a mut [SymbolFrequency], 912 | ) -> &'a mut [SymbolFrequency] { 913 | let mut hist = [[0u32; 256]; 2]; 914 | for freq in syms0.iter() { 915 | let key = freq.key as usize; 916 | hist[0][key & 0xFF] += 1; 917 | hist[1][(key >> 8) & 0xFF] += 1; 918 | } 919 | let mut passes = 2; 920 | if syms0.len() == hist[1][0] as usize { 921 | passes -= 1; 922 | } 923 | let mut offsets = [0u32; 256]; 924 | let mut cur_syms = syms0; 925 | let mut new_syms = syms1; 926 | for pass in 0..passes { 927 | let mut offset = 0; 928 | for i in 0..256 { 929 | offsets[i] = offset; 930 | offset += hist[pass][i]; 931 | } 932 | for sym in cur_syms.iter() { 933 | let j = ((sym.key >> (pass * 8)) & 0xFF) as usize; 934 | new_syms[offsets[j] as usize] = *sym; 935 | offsets[j] += 1; 936 | } 937 | core::mem::swap(&mut cur_syms, &mut new_syms); 938 | } 939 | cur_syms 940 | } 941 | 942 | fn minimum_redundancy(a: &mut [SymbolFrequency]) { 943 | let n = a.len(); 944 | if n == 0 { 945 | return; 946 | } else if n == 1 { 947 | a[0].key = 1; 948 | return; 949 | } 950 | a[0].key += a[1].key; 951 | let mut root = 0; 952 | let mut leaf = 2; 953 | for next in 1..n - 1 { 954 | if leaf >= n || a[root].key < a[leaf].key { 955 | a[next].key = a[root].key; 956 | a[root].key = next as u16; 957 | root += 1; 958 | } else { 959 | a[next].key = a[leaf].key; 960 | leaf += 1; 961 | } 962 | if leaf >= n || (root < next && a[root].key < a[leaf].key) { 963 | a[next].key += a[root].key; 964 | a[root].key = next as u16; 965 | root += 1; 966 | } else { 967 | a[next].key += a[leaf].key; 968 | leaf += 1; 969 | } 970 | } 971 | a[n - 2].key = 0; 972 | for next in (0..n - 2).rev() { 973 | a[next].key = a[a[next].key as usize].key + 1; 974 | } 975 | let mut avail = 1isize; 976 | let mut used = 0isize; 977 | let mut depth = 0; 978 | let mut root = n as isize - 2; 979 | let mut next = n as isize - 1; 980 | while avail > 0 { 981 | while root >= 0 && a[root as usize].key == depth { 982 | used += 1; 983 | root -= 1; 984 | } 985 | while avail > used { 986 | a[next as usize].key = depth; 987 | next -= 1; 988 | avail -= 1; 989 | } 990 | avail = 2 * used; 991 | depth += 1; 992 | used = 0; 993 | } 994 | } 995 | 996 | fn enforce_size_limit(num_codes: &mut [i32], len: usize, size_limit: usize) { 997 | if len <= 1 { 998 | return; 999 | } 1000 | for i in size_limit + 1..=MAX_SUPPORTED_HUFF_CODE_SIZE { 1001 | num_codes[size_limit] += num_codes[i]; 1002 | } 1003 | let mut total = 0; 1004 | for i in (1..=size_limit).rev() { 1005 | total += (num_codes[i] as u32) << (size_limit - i); 1006 | } 1007 | while total != (1 << size_limit) { 1008 | num_codes[size_limit] -= 1; 1009 | for i in (1..size_limit).rev() { 1010 | if num_codes[i] != 0 { 1011 | num_codes[i] -= 1; 1012 | num_codes[i + 1] += 2; 1013 | break; 1014 | } 1015 | } 1016 | total -= 1; 1017 | } 1018 | } 1019 | } 1020 | 1021 | struct CodeBuffer { 1022 | pub buffer: [u8; CODE_BUFFER_SIZE], 1023 | pub pos: usize, 1024 | pub flags_offset: usize, 1025 | pub flags_left: usize, 1026 | pub total_bytes: usize, 1027 | } 1028 | 1029 | impl CodeBuffer { 1030 | #[inline(always)] 1031 | fn new() -> Self { 1032 | Self { 1033 | buffer: [0u8; CODE_BUFFER_SIZE], 1034 | pos: 1, 1035 | flags_offset: 0, 1036 | flags_left: 8, 1037 | total_bytes: 0, 1038 | } 1039 | } 1040 | 1041 | fn reset(&mut self) { 1042 | self.pos = 1; 1043 | self.flags_offset = 0; 1044 | self.flags_left = 8; 1045 | self.total_bytes = 0; 1046 | } 1047 | 1048 | fn init_flag(&mut self) { 1049 | if self.flags_left == 8 { 1050 | self.buffer[self.flags_offset] = 0; 1051 | self.pos -= 1; 1052 | } else { 1053 | self.buffer[self.flags_offset] >>= self.flags_left; 1054 | } 1055 | } 1056 | 1057 | #[inline(always)] 1058 | fn push_literal(&mut self, lit: u8, lt: &mut LiteralLengthTree) { 1059 | self.buffer[self.pos] = lit; 1060 | self.pos += 1; 1061 | self.total_bytes += 1; 1062 | self.buffer[self.flags_offset] >>= 1; 1063 | self.flags_left -= 1; 1064 | if self.flags_left == 0 { 1065 | self.flags_left = 8; 1066 | self.flags_offset = self.pos; 1067 | self.pos += 1; 1068 | } 1069 | lt.counts[lit as usize] += 1; 1070 | } 1071 | 1072 | #[inline(always)] 1073 | fn push_match( 1074 | &mut self, 1075 | len: usize, 1076 | mut dist: usize, 1077 | lt: &mut LiteralLengthTree, 1078 | dt: &mut DistanceTree, 1079 | ) { 1080 | self.total_bytes += len; 1081 | self.buffer[self.pos] = (len - MIN_MATCH_LEN) as u8; 1082 | dist -= 1; 1083 | self.buffer[self.pos + 1] = (dist & 0xFF) as u8; 1084 | self.buffer[self.pos + 2] = (dist >> 8) as u8; 1085 | self.pos += 3; 1086 | self.buffer[self.flags_offset] = (self.buffer[self.flags_offset] >> 1) | 0x80; 1087 | self.flags_left -= 1; 1088 | if self.flags_left == 0 { 1089 | self.flags_left = 8; 1090 | self.flags_offset = self.pos; 1091 | self.pos += 1; 1092 | } 1093 | let s = if dist < 512 { 1094 | SMALL_DIST_SYM[dist & 511] as usize 1095 | } else { 1096 | LARGE_DIST_SYM[(dist >> 8) & 127] as usize 1097 | }; 1098 | dt.counts[s] += 1; 1099 | if len >= MIN_MATCH_LEN { 1100 | lt.counts[LEN_SYM[len - MIN_MATCH_LEN] as usize] += 1; 1101 | } 1102 | } 1103 | 1104 | fn emit( 1105 | &self, 1106 | sink: &mut S, 1107 | lt: &LiteralLengthTree, 1108 | dt: &DistanceTree, 1109 | ) -> Result<(), Error> { 1110 | let mut flags = 1; 1111 | let snap = sink.snapshot(); 1112 | let mut bits = FastBits::new(snap.bit_buffer, snap.bits_in); 1113 | let mut i = 0; 1114 | while i < self.pos { 1115 | if flags == 1 { 1116 | flags = self.buffer[i] as u32 | 0x100; 1117 | i += 1; 1118 | } 1119 | if flags & 1 != 0 { 1120 | if bits.bits_in > 16 { 1121 | bits.flush(sink)?; 1122 | } 1123 | let match_len = self.buffer[i] as usize; 1124 | let match_dist = self.buffer[i + 1] as usize | ((self.buffer[i + 2] as usize) << 8); 1125 | i += 3; 1126 | let i0 = LEN_SYM[match_len & 0xFF] as usize; 1127 | bits.put(lt.codes[i0] as u32, lt.code_sizes[i0] as u32); 1128 | let extra = LEN_EXTRA[match_len & 0xFF] as usize; 1129 | bits.put(match_len as u32 & BITMASKS[extra], extra as u32); 1130 | let (sym, extra_bits) = if match_dist < 512 { 1131 | ( 1132 | SMALL_DIST_SYM[match_dist & 511] as usize, 1133 | SMALL_DIST_EXTRA[match_dist & 511] as usize, 1134 | ) 1135 | } else { 1136 | ( 1137 | LARGE_DIST_SYM[(match_dist >> 8) & 127] as usize, 1138 | LARGE_DIST_EXTRA[(match_dist >> 8) & 127] as usize, 1139 | ) 1140 | }; 1141 | bits.put(dt.codes[sym] as u32, dt.code_sizes[sym] as u32); 1142 | bits.put(match_dist as u32 & BITMASKS[extra_bits], extra_bits as u32); 1143 | } else { 1144 | let lit = self.buffer[i] as usize; 1145 | i += 1; 1146 | if bits.bits_in > 48 { 1147 | bits.flush(sink)?; 1148 | } 1149 | bits.put( 1150 | lt.codes[lit & 0xFF] as u32, 1151 | lt.code_sizes[lit & 0xFF] as u32, 1152 | ); 1153 | } 1154 | flags >>= 1; 1155 | } 1156 | bits.flush(sink)?; 1157 | sink.set_bit_buffer(bits.bit_buffer as u32, bits.bits_in); 1158 | sink.put_bits(lt.codes[256] as u32, lt.code_sizes[256] as u32) 1159 | } 1160 | } 1161 | 1162 | struct FastBits { 1163 | bit_buffer: u64, 1164 | bits_in: u32, 1165 | buf: [u8; 8], 1166 | } 1167 | 1168 | impl FastBits { 1169 | pub fn new(bit_buffer: u32, bits_in: u32) -> Self { 1170 | Self { 1171 | bit_buffer: bit_buffer as u64, 1172 | bits_in, 1173 | buf: [0; 8], 1174 | } 1175 | } 1176 | 1177 | #[inline(always)] 1178 | pub fn put(&mut self, bits: u32, len: u32) { 1179 | self.bit_buffer |= (bits as u64) << self.bits_in; 1180 | self.bits_in += len; 1181 | } 1182 | 1183 | #[inline(always)] 1184 | pub fn flush(&mut self, sink: &mut S) -> Result<(), Error> { 1185 | let mut i = 0; 1186 | while self.bits_in >= 8 { 1187 | self.buf[i] = self.bit_buffer as u8; 1188 | self.bit_buffer >>= 8; 1189 | self.bits_in -= 8; 1190 | i += 1; 1191 | } 1192 | sink.write(&self.buf[0..i]) 1193 | } 1194 | } 1195 | 1196 | struct Dictionary { 1197 | pub dict: [u8; DICTIONARY_FULL_SIZE], 1198 | pub next: [u16; DICTIONARY_SIZE], 1199 | pub hash: [u16; HASH_SIZE], 1200 | pub code_buffer_offset: usize, 1201 | pub max_probes: [u32; 2], 1202 | pub lookahead_size: usize, 1203 | pub lookahead_pos: usize, 1204 | pub len: usize, 1205 | } 1206 | 1207 | impl Dictionary { 1208 | #[inline(always)] 1209 | fn new(flags: u32) -> Self { 1210 | Self { 1211 | dict: [0; DICTIONARY_FULL_SIZE], 1212 | next: [0; DICTIONARY_SIZE], 1213 | hash: [0; HASH_SIZE], 1214 | code_buffer_offset: 0, 1215 | max_probes: Self::probes_from_flags(flags), 1216 | lookahead_size: 0, 1217 | lookahead_pos: 0, 1218 | len: 0, 1219 | } 1220 | } 1221 | 1222 | fn probes_from_flags(flags: u32) -> [u32; 2] { 1223 | [ 1224 | 1 + ((flags & 0xFFF) + 2) / 3, 1225 | 1 + (((flags & 0xFFF) >> 2) + 2) / 3, 1226 | ] 1227 | } 1228 | 1229 | fn read_u64(&self, pos: usize) -> u64 { 1230 | let bytes: [u8; 8] = self.dict[pos..pos + 8].try_into().unwrap(); 1231 | u64::from_le_bytes(bytes) 1232 | } 1233 | 1234 | fn read_u16(&self, pos: usize) -> u16 { 1235 | self.dict[pos] as u16 | ((self.dict[pos + 1] as u16) << 8) 1236 | } 1237 | 1238 | fn get(&self, pos: usize) -> u8 { 1239 | self.dict[pos.min(self.dict.len() - 1)] 1240 | } 1241 | 1242 | fn find_match( 1243 | &self, 1244 | lookahead_pos: usize, 1245 | max_dist: usize, 1246 | max_match_len: usize, 1247 | mut match_dist: usize, 1248 | mut match_len: usize, 1249 | ) -> (usize, usize) { 1250 | let max_match_len = max_match_len.min(MAX_MATCH_LEN); 1251 | match_len = match_len.max(1); 1252 | let pos = lookahead_pos & DICTIONARY_SIZE_MASK; 1253 | let mut probe_pos = pos; 1254 | let mut num_probes_left = self.max_probes[(match_len >= 32) as usize]; 1255 | if max_match_len <= match_len { 1256 | return (match_dist, match_len); 1257 | } 1258 | let mut c01 = self.read_u16(pos + match_len - 1); 1259 | let s01 = self.read_u16(pos); 1260 | 'outer: loop { 1261 | let mut dist; 1262 | 'found: loop { 1263 | num_probes_left -= 1; 1264 | if num_probes_left == 0 { 1265 | return (match_dist, match_len); 1266 | } 1267 | for _ in 0..3 { 1268 | let next_probe_pos = self.next[probe_pos] as usize; 1269 | dist = (lookahead_pos - next_probe_pos) & 0xFFFF; 1270 | if next_probe_pos == 0 || dist > max_dist { 1271 | return (match_dist, match_len); 1272 | } 1273 | probe_pos = next_probe_pos & DICTIONARY_SIZE_MASK; 1274 | if self.read_u16(probe_pos + match_len - 1) == c01 { 1275 | break 'found; 1276 | } 1277 | } 1278 | } 1279 | if dist == 0 { 1280 | return (match_dist, match_len); 1281 | } 1282 | if self.read_u16(probe_pos) != s01 { 1283 | continue; 1284 | } 1285 | let mut p = pos + 2; 1286 | let mut q = probe_pos + 2; 1287 | for _ in 0..32 { 1288 | let p_data: u64 = self.read_u64(p); 1289 | let q_data: u64 = self.read_u64(q); 1290 | let xor_data = p_data ^ q_data; 1291 | if xor_data == 0 { 1292 | p += 8; 1293 | q += 8; 1294 | } else { 1295 | let trailing = xor_data.trailing_zeros() as usize; 1296 | let probe_len = p - pos + (trailing >> 3); 1297 | if probe_len > match_len { 1298 | match_dist = dist; 1299 | match_len = max_match_len.min(probe_len); 1300 | if match_len == max_match_len { 1301 | return (match_dist, match_len); 1302 | } 1303 | c01 = self.read_u16(pos + match_len - 1) 1304 | } 1305 | continue 'outer; 1306 | } 1307 | } 1308 | return (dist, max_match_len.min(MAX_MATCH_LEN)); 1309 | } 1310 | } 1311 | } 1312 | 1313 | #[doc(hidden)] 1314 | pub struct Snapshot { 1315 | pos: usize, 1316 | bit_buffer: u32, 1317 | bits_in: u32, 1318 | } 1319 | 1320 | #[doc(hidden)] 1321 | pub trait Sink { 1322 | fn put_bits(&mut self, bits: u32, len: u32) -> Result<(), Error>; 1323 | fn write(&mut self, buf: &[u8]) -> Result<(), Error>; 1324 | fn pad(&mut self) -> Result<(), Error>; 1325 | fn snapshot(&self) -> Snapshot; 1326 | fn restore(&mut self, snapshot: &Snapshot); 1327 | fn set_bit_buffer(&mut self, bit_buffer: u32, bits_in: u32); 1328 | fn flush(&mut self) -> Result<(), Error> { 1329 | Ok(()) 1330 | } 1331 | fn written(&self) -> u64; 1332 | } 1333 | 1334 | struct BufSink<'a> { 1335 | buffer: &'a mut [u8], 1336 | pos: usize, 1337 | bit_buffer: u32, 1338 | bits_in: u32, 1339 | } 1340 | 1341 | impl<'a> BufSink<'a> { 1342 | pub fn new(buffer: &'a mut [u8]) -> Self { 1343 | Self { 1344 | buffer, 1345 | pos: 0, 1346 | bit_buffer: 0, 1347 | bits_in: 0, 1348 | } 1349 | } 1350 | } 1351 | 1352 | impl Sink for BufSink<'_> { 1353 | #[inline(always)] 1354 | fn put_bits(&mut self, bits: u32, len: u32) -> Result<(), Error> { 1355 | self.bit_buffer |= bits << self.bits_in; 1356 | self.bits_in += len; 1357 | let limit = self.buffer.len(); 1358 | while self.bits_in >= 8 { 1359 | if self.pos == limit { 1360 | return Err(Error::Overflow); 1361 | } 1362 | self.buffer[self.pos] = self.bit_buffer as u8; 1363 | self.pos += 1; 1364 | self.bit_buffer >>= 8; 1365 | self.bits_in -= 8; 1366 | } 1367 | Ok(()) 1368 | } 1369 | 1370 | #[inline(always)] 1371 | fn write(&mut self, buf: &[u8]) -> Result<(), Error> { 1372 | let len = buf.len(); 1373 | if self.pos + len > self.buffer.len() { 1374 | return Err(Error::Overflow); 1375 | } 1376 | self.buffer[self.pos..self.pos + len].copy_from_slice(buf); 1377 | self.pos += len; 1378 | Ok(()) 1379 | } 1380 | 1381 | fn pad(&mut self) -> Result<(), Error> { 1382 | if self.bits_in != 0 { 1383 | let len = 8 - self.bits_in; 1384 | self.put_bits(0, len) 1385 | } else { 1386 | Ok(()) 1387 | } 1388 | } 1389 | 1390 | fn snapshot(&self) -> Snapshot { 1391 | Snapshot { 1392 | pos: self.pos, 1393 | bit_buffer: self.bit_buffer, 1394 | bits_in: self.bits_in, 1395 | } 1396 | } 1397 | 1398 | fn restore(&mut self, snapshot: &Snapshot) { 1399 | self.pos = snapshot.pos; 1400 | self.bit_buffer = snapshot.bit_buffer; 1401 | self.bits_in = snapshot.bits_in; 1402 | } 1403 | 1404 | fn set_bit_buffer(&mut self, bit_buffer: u32, bits_in: u32) { 1405 | self.bit_buffer = bit_buffer; 1406 | self.bits_in = bits_in; 1407 | } 1408 | 1409 | fn written(&self) -> u64 { 1410 | self.pos as u64 1411 | } 1412 | } 1413 | 1414 | struct VecSink<'a> { 1415 | buffer: &'a mut Vec, 1416 | start_pos: usize, 1417 | bit_buffer: u32, 1418 | bits_in: u32, 1419 | } 1420 | 1421 | impl<'a> VecSink<'a> { 1422 | pub fn new(buffer: &'a mut Vec) -> Self { 1423 | let start_pos = buffer.len(); 1424 | Self { 1425 | buffer, 1426 | start_pos, 1427 | bit_buffer: 0, 1428 | bits_in: 0, 1429 | } 1430 | } 1431 | } 1432 | 1433 | impl Sink for VecSink<'_> { 1434 | #[inline(always)] 1435 | fn put_bits(&mut self, bits: u32, len: u32) -> Result<(), Error> { 1436 | self.bit_buffer |= bits << self.bits_in; 1437 | self.bits_in += len; 1438 | while self.bits_in >= 8 { 1439 | self.buffer.push(self.bit_buffer as u8); 1440 | self.bit_buffer >>= 8; 1441 | self.bits_in -= 8; 1442 | } 1443 | Ok(()) 1444 | } 1445 | 1446 | #[inline(always)] 1447 | fn write(&mut self, buf: &[u8]) -> Result<(), Error> { 1448 | self.buffer.extend_from_slice(buf); 1449 | Ok(()) 1450 | } 1451 | 1452 | fn pad(&mut self) -> Result<(), Error> { 1453 | if self.bits_in != 0 { 1454 | let len = 8 - self.bits_in; 1455 | self.put_bits(0, len) 1456 | } else { 1457 | Ok(()) 1458 | } 1459 | } 1460 | 1461 | fn snapshot(&self) -> Snapshot { 1462 | Snapshot { 1463 | pos: self.buffer.len(), 1464 | bit_buffer: self.bit_buffer, 1465 | bits_in: self.bits_in, 1466 | } 1467 | } 1468 | 1469 | fn restore(&mut self, snapshot: &Snapshot) { 1470 | self.buffer.truncate(snapshot.pos); 1471 | self.bit_buffer = snapshot.bit_buffer; 1472 | self.bits_in = snapshot.bits_in; 1473 | } 1474 | 1475 | fn set_bit_buffer(&mut self, bit_buffer: u32, bits_in: u32) { 1476 | self.bit_buffer = bit_buffer; 1477 | self.bits_in = bits_in; 1478 | } 1479 | 1480 | fn written(&self) -> u64 { 1481 | (self.buffer.len() - self.start_pos) as u64 1482 | } 1483 | } 1484 | 1485 | #[cfg(feature = "std")] 1486 | struct WriterSink { 1487 | writer: W, 1488 | buffer: [u8; OUT_BUFFER_SIZE], 1489 | pos: usize, 1490 | bit_buffer: u32, 1491 | bits_in: u32, 1492 | written: u64, 1493 | } 1494 | 1495 | #[cfg(feature = "std")] 1496 | impl WriterSink { 1497 | fn new(writer: W) -> Self { 1498 | Self { 1499 | writer, 1500 | buffer: [0; OUT_BUFFER_SIZE], 1501 | pos: 0, 1502 | bit_buffer: 0, 1503 | bits_in: 0, 1504 | written: 0, 1505 | } 1506 | } 1507 | } 1508 | 1509 | #[cfg(feature = "std")] 1510 | impl Sink for WriterSink { 1511 | #[inline(always)] 1512 | fn put_bits(&mut self, bits: u32, len: u32) -> Result<(), Error> { 1513 | self.bit_buffer |= bits << self.bits_in; 1514 | self.bits_in += len; 1515 | let limit = self.buffer.len(); 1516 | while self.bits_in >= 8 { 1517 | if self.pos == limit { 1518 | return Err(Error::Overflow); 1519 | } 1520 | self.buffer[self.pos] = self.bit_buffer as u8; 1521 | self.pos += 1; 1522 | self.bit_buffer >>= 8; 1523 | self.bits_in -= 8; 1524 | } 1525 | Ok(()) 1526 | } 1527 | 1528 | #[inline(always)] 1529 | fn write(&mut self, buf: &[u8]) -> Result<(), Error> { 1530 | let len = buf.len(); 1531 | if self.pos + len > self.buffer.len() { 1532 | return Err(Error::Overflow); 1533 | } 1534 | self.buffer[self.pos..self.pos + len].copy_from_slice(buf); 1535 | self.pos += len; 1536 | Ok(()) 1537 | } 1538 | 1539 | fn pad(&mut self) -> Result<(), Error> { 1540 | if self.bits_in != 0 { 1541 | let len = 8 - self.bits_in; 1542 | self.put_bits(0, len) 1543 | } else { 1544 | Ok(()) 1545 | } 1546 | } 1547 | 1548 | fn snapshot(&self) -> Snapshot { 1549 | Snapshot { 1550 | pos: self.pos, 1551 | bit_buffer: self.bit_buffer, 1552 | bits_in: self.bits_in, 1553 | } 1554 | } 1555 | 1556 | fn restore(&mut self, snapshot: &Snapshot) { 1557 | self.pos = snapshot.pos; 1558 | self.bit_buffer = snapshot.bit_buffer; 1559 | self.bits_in = snapshot.bits_in; 1560 | } 1561 | 1562 | fn set_bit_buffer(&mut self, bit_buffer: u32, bits_in: u32) { 1563 | self.bit_buffer = bit_buffer; 1564 | self.bits_in = bits_in; 1565 | } 1566 | 1567 | fn flush(&mut self) -> Result<(), Error> { 1568 | let res = match self.writer.write_all(&self.buffer[0..self.pos]) { 1569 | Ok(_) => Ok(()), 1570 | Err(err) => Err(Error::Io(err)), 1571 | }; 1572 | self.written += self.pos as u64; 1573 | self.pos = 0; 1574 | res 1575 | } 1576 | 1577 | fn written(&self) -> u64 { 1578 | self.written 1579 | } 1580 | } 1581 | 1582 | fn make_flags(zlib: bool, level: CompressionLevel, strategy: CompressionStrategy) -> u32 { 1583 | let level = level.to_raw(); 1584 | let greedy = if level <= 3 { GREEDY_PARSING } else { 0 }; 1585 | let mut flags = NUM_PROBES[level] | greedy; 1586 | if zlib { 1587 | flags |= WRITE_ZLIB_HEADER; 1588 | } 1589 | if level == 0 { 1590 | flags |= FORCE_RAW; 1591 | } else { 1592 | use CompressionStrategy::*; 1593 | match strategy { 1594 | Filtered => flags |= FILTER_MATCHES, 1595 | Huffman => flags &= !MAX_PROBES_MASK as u32, 1596 | Static => flags |= FORCE_STATIC, 1597 | RLE => flags |= RLE_MATCHES, 1598 | _ => {} 1599 | } 1600 | } 1601 | flags 1602 | } 1603 | 1604 | fn make_zlib_header(flags: u32) -> [u8; 2] { 1605 | const FCHECK_DIVISOR: u32 = 31; 1606 | let num_probes = flags & (MAX_PROBES_MASK as u32); 1607 | let level = if flags & GREEDY_PARSING != 0 { 1608 | if num_probes <= 1 { 1609 | 0 1610 | } else { 1611 | 1 1612 | } 1613 | } else if num_probes >= NUM_PROBES[9] { 1614 | 3 1615 | } else { 1616 | 2 1617 | }; 1618 | let cmf = 8 | (7 << 4); 1619 | let flg = (level as u8) << 6; 1620 | let rem = ((cmf as u32 * 256) + flg as u32) % FCHECK_DIVISOR; 1621 | let check = (flg & 0b11100000) + (FCHECK_DIVISOR - rem) as u8; 1622 | [cmf, check] 1623 | } 1624 | 1625 | const LITERAL_LENGTH_TREE_SIZE: usize = 288; 1626 | const DISTANCE_TREE_SIZE: usize = 32; 1627 | const PRECODE_TREE_SIZE: usize = 19; 1628 | // const CODE_BUFFER_SIZE: usize = 24 * 1024; 1629 | // const HASH_BITS: usize = 12; 1630 | const CODE_BUFFER_SIZE: usize = 64 * 1024; 1631 | const HASH_BITS: usize = 15; 1632 | const HASH_SHIFT: usize = (HASH_BITS + 2) / 3; 1633 | const HASH_SIZE: usize = 1 << HASH_BITS; 1634 | #[cfg(feature = "std")] 1635 | const OUT_BUFFER_SIZE: usize = (CODE_BUFFER_SIZE * 13) / 10; 1636 | const MIN_MATCH_LEN: usize = 3; 1637 | const MAX_MATCH_LEN: usize = 258; 1638 | const DICTIONARY_SIZE: usize = 32768; 1639 | const DICTIONARY_SIZE_MASK: usize = DICTIONARY_SIZE - 1; 1640 | const DICTIONARY_FULL_SIZE: usize = DICTIONARY_SIZE + MAX_MATCH_LEN; 1641 | 1642 | const WRITE_ZLIB_HEADER: u32 = 0x0000_1000; 1643 | const GREEDY_PARSING: u32 = 0x0000_4000; 1644 | const RLE_MATCHES: u32 = 0x0001_0000; 1645 | const FILTER_MATCHES: u32 = 0x0002_0000; 1646 | const FORCE_STATIC: u32 = 0x0004_0000; 1647 | const FORCE_RAW: u32 = 0x0008_0000; 1648 | 1649 | const MAX_PROBES_MASK: i32 = 0xFFF; 1650 | const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500]; 1651 | 1652 | const LEN_SYM: [u16; 256] = [ 1653 | 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 1654 | 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 1655 | 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 1656 | 276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 1657 | 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 1658 | 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 1659 | 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 1660 | 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 1661 | 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 1662 | 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 1663 | 282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 1664 | 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284, 1665 | 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 1666 | 284, 284, 284, 284, 284, 284, 284, 284, 285, 1667 | ]; 1668 | 1669 | const LEN_EXTRA: [u8; 256] = [ 1670 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1671 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1672 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1673 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1674 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1675 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1676 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1677 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 1678 | ]; 1679 | 1680 | const SMALL_DIST_SYM: [u8; 512] = [ 1681 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 1682 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 1683 | 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1684 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 1685 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 1686 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1687 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1688 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 1689 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 1690 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 1691 | 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1692 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1693 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1694 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1695 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1696 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 1697 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 1698 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 1699 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 1700 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 1701 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 1702 | ]; 1703 | 1704 | const SMALL_DIST_EXTRA: [u8; 512] = [ 1705 | 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1706 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1707 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1708 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1709 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1710 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1711 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1712 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1713 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1714 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1715 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1716 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1717 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1718 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1719 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1720 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1721 | ]; 1722 | 1723 | const LARGE_DIST_SYM: [u8; 128] = [ 1724 | 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 1725 | 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 1726 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 1727 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 1728 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 1729 | 29, 29, 29, 29, 29, 29, 29, 29, 1730 | ]; 1731 | 1732 | const LARGE_DIST_EXTRA: [u8; 128] = [ 1733 | 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 1734 | 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1735 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 1736 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 1737 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 1738 | 13, 13, 13, 13, 13, 13, 1739 | ]; 1740 | 1741 | const PRECODE_SWIZZLE: [u8; 19] = [ 1742 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 1743 | ]; 1744 | 1745 | const BITMASKS: [u32; 17] = [ 1746 | 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 1747 | 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 1748 | ]; 1749 | --------------------------------------------------------------------------------