├── .github ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── benches.yml │ ├── codecov.yml │ ├── format.yml │ ├── fuzzing.yml │ ├── lints.yml │ ├── rustdoc.yml │ └── tests.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── lzma.rs ├── fuzz ├── .gitignore ├── Cargo.toml ├── README.md └── fuzz_targets │ ├── compare_xz.rs │ ├── decompress_lzma.rs │ ├── decompress_lzma2.rs │ ├── decompress_lzma_stream.rs │ ├── decompress_xz.rs │ ├── interop_xz_decode.rs │ ├── interop_xz_encode.rs │ ├── roundtrip_lzma.rs │ ├── roundtrip_lzma2.rs │ └── roundtrip_xz.rs ├── rustfmt.toml ├── src ├── decode │ ├── lzbuffer.rs │ ├── lzma.rs │ ├── lzma2.rs │ ├── mod.rs │ ├── options.rs │ ├── rangecoder.rs │ ├── stream.rs │ ├── util.rs │ └── xz.rs ├── encode │ ├── dumbencoder.rs │ ├── lzma2.rs │ ├── mod.rs │ ├── options.rs │ ├── rangecoder.rs │ ├── util.rs │ └── xz.rs ├── error.rs ├── lib.rs ├── macros.rs ├── util │ ├── mod.rs │ └── vec2d.rs └── xz │ ├── crc.rs │ ├── footer.rs │ ├── header.rs │ └── mod.rs └── tests ├── files ├── README.md ├── block-check-crc32.txt ├── block-check-crc32.txt.xz ├── empty.txt ├── empty.txt.lzma ├── empty.txt.xz ├── foo.txt ├── foo.txt.lzma ├── foo.txt.xz ├── good-1-lzma2-1 ├── good-1-lzma2-1.xz ├── good-1-lzma2-2 ├── good-1-lzma2-2.xz ├── good-1-lzma2-3 ├── good-1-lzma2-3.xz ├── good-1-lzma2-4 ├── good-1-lzma2-4.xz ├── hello.txt ├── hello.txt.lzma ├── hello.txt.xz ├── hugedict.txt.lzma ├── range-coder-edge-case ├── range-coder-edge-case.lzma └── small.txt ├── lzma.rs ├── lzma2.rs └── xz.rs /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Pull Request Overview 2 | 3 | This pull request adds/changes/fixes... 4 | 5 | 6 | ### Testing Strategy 7 | 8 | This pull request was tested by... 9 | 10 | - [ ] Added relevant unit tests. 11 | - [ ] Added relevant end-to-end tests (such as `.lzma`, `.lzma2`, `.xz` files). 12 | 13 | 14 | ### Supporting Documentation and References 15 | 16 | *If supporting an edge case, such as files created by a legacy SDK, please document here where this edge case comes from. 17 | Whenever possible, please include links to artifacts such as example files, existing code handling this edge case, etc.* 18 | 19 | 20 | ### TODO or Help Wanted 21 | 22 | This pull request still needs... 23 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Please see the documentation for all configuration options: 2 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 3 | version: 2 4 | updates: 5 | - package-ecosystem: "cargo" 6 | directory: "/" # Location of package manifests 7 | schedule: 8 | interval: "weekly" 9 | -------------------------------------------------------------------------------- /.github/workflows/benches.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Build benches on nightly toolchain 3 | jobs: 4 | build_benchmarks: 5 | runs-on: ubuntu-latest 6 | env: 7 | RUSTFLAGS: "-D warnings" 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: dtolnay/rust-toolchain@nightly 11 | - name: Build benches 12 | run: cargo build --benches --verbose 13 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Code coverage 3 | jobs: 4 | unit_tests: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - uses: dtolnay/rust-toolchain@nightly 9 | - name: Install cargo-llvm-cov 10 | uses: taiki-e/install-action@cargo-llvm-cov 11 | - name: Generate code coverage 12 | run: cargo llvm-cov --all-features --workspace --lib --lcov --output-path lcov.info 13 | - name: Upload to Codecov 14 | uses: codecov/codecov-action@v3.1.0 15 | with: 16 | files: lcov.info 17 | flags: unit 18 | verbose: true 19 | fail_ci_if_error: true 20 | integration_tests: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - uses: dtolnay/rust-toolchain@nightly 25 | - name: Install cargo-llvm-cov 26 | uses: taiki-e/install-action@cargo-llvm-cov 27 | - name: Generate code coverage 28 | run: cargo llvm-cov --all-features --workspace --test '*' --lcov --output-path lcov.info 29 | - name: Upload to Codecov 30 | uses: codecov/codecov-action@v3.1.0 31 | with: 32 | files: lcov.info 33 | flags: integration 34 | verbose: true 35 | fail_ci_if_error: true 36 | -------------------------------------------------------------------------------- /.github/workflows/format.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Formatting on nightly toolchain 3 | jobs: 4 | format: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - uses: dtolnay/rust-toolchain@nightly 9 | with: 10 | components: rustfmt 11 | 12 | - name: Check formatting 13 | run: cargo fmt --verbose -- --check --verbose 14 | - name: Check formatting on fuzzing 15 | run: cargo fmt --verbose --manifest-path fuzz/Cargo.toml -- --check --verbose 16 | -------------------------------------------------------------------------------- /.github/workflows/fuzzing.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Build fuzz targets on nightly toolchain 3 | jobs: 4 | build_fuzzing: 5 | runs-on: ubuntu-latest 6 | env: 7 | RUSTFLAGS: "-D warnings" 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: dtolnay/rust-toolchain@nightly 11 | - name: Install cargo fuzz 12 | run: cargo install cargo-fuzz --verbose 13 | - name: Build fuzz targets 14 | run: cargo fuzz build --verbose 15 | -------------------------------------------------------------------------------- /.github/workflows/lints.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Lints on stable toolchain 3 | jobs: 4 | clippy: 5 | runs-on: ubuntu-latest 6 | env: 7 | RUSTFLAGS: "-D warnings" 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: dtolnay/rust-toolchain@stable 11 | with: 12 | components: clippy 13 | 14 | - name: Check Clippy lints 15 | run: cargo clippy --verbose --all-features -- -W clippy::match-same-arms 16 | - name: Check Clippy lints on tests 17 | run: cargo clippy --verbose --all-features --tests -- -W clippy::match-same-arms 18 | - name: Check Clippy lints on fuzzing 19 | run: cargo clippy --verbose --all-features --manifest-path fuzz/Cargo.toml -- -W clippy::match-same-arms 20 | -------------------------------------------------------------------------------- /.github/workflows/rustdoc.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Rustdoc on nightly toolchain 3 | jobs: 4 | rustdoc: 5 | runs-on: ubuntu-latest 6 | env: 7 | RUSTDOCFLAGS: "-D warnings" 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: dtolnay/rust-toolchain@nightly 11 | 12 | - name: Check Rust documentation 13 | run: cargo +nightly doc --document-private-items 14 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Build and run tests 3 | jobs: 4 | build_and_test: 5 | strategy: 6 | matrix: 7 | os: 8 | - ubuntu-latest 9 | - macos-latest 10 | rust: 11 | - stable 12 | - beta 13 | - nightly 14 | - 1.71.0 # MSRV 15 | fail-fast: false 16 | runs-on: ${{ matrix.os }} 17 | env: 18 | RUSTFLAGS: "-D warnings" 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: dtolnay/rust-toolchain@master 22 | with: 23 | toolchain: ${{ matrix.rust }} 24 | 25 | - name: Build with default features 26 | run: cargo build --verbose 27 | - name: Tests with default features 28 | run: cargo test --verbose 29 | - name: Build with all features 30 | run: cargo build --all-features --verbose 31 | - name: Tests with all features 32 | run: cargo test --all-features --verbose 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.3.0 - 2023-01-04 2 | 3 | - Update minimum supported Rust version: 1.40.0 -> 1.50.0. 4 | - Update dependencies (https://github.com/gendx/lzma-rs/pull/78): 5 | - `byteorder`: ^1.0.0 -> 1.4.3 6 | - `crc`: ^1.0.0 -> 3.0.0 7 | - `log`: ^0.4.14 -> 0.4.17 8 | - `env_logger`: ^0.8.3 -> 0.9.0 9 | - Expose a new `raw_decoder` API (https://github.com/gendx/lzma-rs/pull/74). 10 | - Reduce the number of allocations (https://github.com/gendx/lzma-rs/pull/77). 11 | - Display features on rustdoc (https://github.com/gendx/lzma-rs/pull/70). 12 | - Configure formatting style to `imports_granularity = "Module"` 13 | (https://github.com/gendx/lzma-rs/pull/82). 14 | - Add code coverage reporting (https://github.com/gendx/lzma-rs/pull/86). 15 | 16 | ## 0.2.0 - 2021-05-02 17 | 18 | - Update minimum supported Rust version: 1.32.0 -> 1.40.0. 19 | - Update dependencies: 20 | - `log`: ^0.4.8 -> ^0.4.14 21 | - `env_logger`: 0.7.1 -> ^0.8.3 22 | - [Breaking change] Rename acronyms to be lowercase, following 23 | clippy::upper-case-acronyms. 24 | - [Breaking change] Add a memory limit option 25 | (https://github.com/gendx/lzma-rs/pull/50). 26 | - Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61). 27 | - Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56). 28 | - Add a streaming mode for LZMA decompression, gated by the `stream` feature. 29 | - Add more fuzzing targets, including comparison with the `xz2` crate. 30 | - Various improvements: benchmarks, fix lint warnings. 31 | - Migrate from Travis-CI to GitHub Actions. 32 | 33 | ## 0.1.4 - 2021-05-02 34 | 35 | - Backports from 0.2.0: 36 | - Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61). 37 | - Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56). 38 | 39 | ## 0.1.3 - 2020-05-05 40 | 41 | - Minimum supported Rust version: 1.32.0. 42 | - Update dependencies: 43 | - `log`: ^0.4.0 -> ^0.4.8 44 | - `env_logger`: 0.6.0 -> ^0.7.1 45 | - Gate logging behind an opt-in feature. This improves decoding performance by 46 | ~25% (https://github.com/gendx/lzma-rs/pull/31). 47 | - Lazily allocate the circular buffer (https://github.com/gendx/lzma-rs/pull/22). 48 | This improves memory usage (especially for WebAssembly targets) at the expense 49 | of a ~5% performance regression (https://github.com/gendx/lzma-rs/issues/27). 50 | - Return an error instead of panicking on unsupported SHA-256 checksum for XZ 51 | decoding (https://github.com/gendx/lzma-rs/pull/40). 52 | - Add Clippy to CI. 53 | - Document public APIs. 54 | - Deny missing docs, missing Debug implementations and build warnings. 55 | - Forbid unsafe code. 56 | - Remove extern statements that are unnecessary on the 2018 edition. 57 | 58 | ## 0.1.2 - 2019-12-17 59 | 60 | - Fix bug in the range coder (https://github.com/gendx/lzma-rs/issues/15). 61 | - Add support for specifying the unpacked size outside of the header 62 | (https://github.com/gendx/lzma-rs/pull/17). 63 | - Migrate to Rust 2018 edition. 64 | - Add benchmarks. 65 | - Fix some Clippy warnings. 66 | 67 | ## 0.1.1 - 2019-02-24 68 | 69 | - Upgrade `env_logger` dependency. 70 | - Refactoring to use `std::io::Take`, operator `?`. 71 | 72 | ## 0.1.0 - 2018-01-07 73 | 74 | - Initial release. 75 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "lzma-rs" 3 | description = "A codec for LZMA, LZMA2 and XZ written in pure Rust" 4 | version = "0.3.0" 5 | license = "MIT" 6 | authors = ["Guillaume Endignoux "] 7 | repository = "https://github.com/gendx/lzma-rs" 8 | readme = "README.md" 9 | categories = ["compression"] 10 | keywords = ["lzma", "compression", "decompression"] 11 | exclude = ["tests/*", "benches/*", "fuzz/*", ".github/*", "Cargo.lock"] 12 | edition = "2018" 13 | rust-version = "1.71.0" 14 | 15 | [dependencies] 16 | byteorder = "1.4.3" 17 | crc = "3.0.0" 18 | log = { version = "0.4.17", optional = true } 19 | env_logger = { version = "0.11.3", optional = true } 20 | 21 | [dev-dependencies] 22 | rust-lzma = "0.6" 23 | seq-macro = "0.3" 24 | 25 | [features] 26 | enable_logging = ["env_logger", "log"] 27 | stream = [] 28 | raw_decoder = [] 29 | 30 | [package.metadata.docs.rs] 31 | features = ["stream", "raw_decoder"] 32 | rustdoc-args = ["--cfg", "docsrs"] 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 - 2018 Guillaume Endignoux 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lzma-rs 2 | 3 | [![Crate](https://img.shields.io/crates/v/lzma-rs.svg?logo=rust)](https://crates.io/crates/lzma-rs) 4 | [![Documentation](https://img.shields.io/docsrs/lzma-rs?logo=rust)](https://docs.rs/lzma-rs) 5 | [![Minimum Rust 1.71](https://img.shields.io/badge/rust-1.71%2B-orange.svg?logo=rust)](https://releases.rs/docs/1.71.0/) 6 | [![Dependencies](https://deps.rs/repo/github/gendx/lzma-rs/status.svg)](https://deps.rs/repo/github/gendx/lzma-rs) 7 | [![Safety Dance](https://img.shields.io/badge/unsafe-forbidden-success.svg?logo=rust)](https://github.com/rust-secure-code/safety-dance/) 8 | ![Build Status](https://github.com/gendx/lzma-rs/workflows/Build%20and%20run%20tests/badge.svg) 9 | [![Codecov](https://codecov.io/gh/gendx/lzma-rs/branch/master/graph/badge.svg?token=HVo74E0wzh)](https://codecov.io/gh/gendx/lzma-rs) 10 | [![Lines of Code](https://www.aschey.tech/tokei/github/gendx/lzma-rs?category=code)](https://github.com/aschey/vercel-tokei) 11 | [![Downloads (crates.io)](https://img.shields.io/crates/d/lzma-rs?label=downloads&logo=rust)](https://crates.io/crates/lzma-rs) 12 | 13 | This project is a decoder for LZMA and its variants written in pure Rust, with focus on clarity. 14 | It already supports LZMA, LZMA2 and a subset of the `.xz` file format. 15 | 16 | ## Usage 17 | 18 | Decompress a `.xz` file. 19 | 20 | ```rust 21 | let filename = "foo.xz"; 22 | let mut f = std::io::BufReader::new(std::fs::File::open(filename).unwrap()); 23 | // "decomp" can be anything that implements "std::io::Write" 24 | let mut decomp: Vec = Vec::new(); 25 | lzma_rs::xz_decompress(&mut f, &mut decomp).unwrap(); 26 | // Decompressed content is now in "decomp" 27 | ``` 28 | 29 | ## Encoder 30 | 31 | For now, there is also a dumb encoder that only uses byte literals, with many hard-coded constants for code simplicity. 32 | Better encoders are welcome! 33 | 34 | ## Contributing 35 | 36 | Pull-requests are welcome, to improve the decoder, add better encoders, or more tests. 37 | Ultimately, this project should also implement .xz and .7z files. 38 | 39 | ## License 40 | 41 | MIT 42 | 43 | -------------------------------------------------------------------------------- /benches/lzma.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use std::io::Read; 6 | use test::Bencher; 7 | 8 | fn compress_bench(x: &[u8], b: &mut Bencher) { 9 | b.iter(|| { 10 | let mut compressed: Vec = Vec::new(); 11 | lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap(); 12 | compressed 13 | }); 14 | } 15 | 16 | fn decompress_after_compress_bench(x: &[u8], b: &mut Bencher) { 17 | let mut compressed: Vec = Vec::new(); 18 | lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap(); 19 | 20 | b.iter(|| { 21 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 22 | let mut decomp: Vec = Vec::new(); 23 | lzma_rs::lzma_decompress(&mut bf, &mut decomp).unwrap(); 24 | decomp 25 | }); 26 | } 27 | 28 | fn decompress_bench(compressed: &[u8], b: &mut Bencher) { 29 | b.iter(|| { 30 | let mut bf = std::io::BufReader::new(compressed); 31 | let mut decomp: Vec = Vec::new(); 32 | lzma_rs::lzma_decompress(&mut bf, &mut decomp).unwrap(); 33 | decomp 34 | }); 35 | } 36 | 37 | #[cfg(feature = "stream")] 38 | fn decompress_stream_bench(compressed: &[u8], b: &mut Bencher) { 39 | use std::io::Write; 40 | b.iter(|| { 41 | let mut stream = lzma_rs::decompress::Stream::new(Vec::new()); 42 | stream.write_all(compressed).unwrap(); 43 | stream.finish().unwrap() 44 | }); 45 | } 46 | 47 | fn decompress_bench_file(compfile: &str, b: &mut Bencher) { 48 | let mut f = std::fs::File::open(compfile).unwrap(); 49 | let mut compressed = Vec::new(); 50 | f.read_to_end(&mut compressed).unwrap(); 51 | decompress_bench(&compressed, b); 52 | } 53 | 54 | #[cfg(feature = "stream")] 55 | fn decompress_stream_bench_file(compfile: &str, b: &mut Bencher) { 56 | let mut f = std::fs::File::open(compfile).unwrap(); 57 | let mut compressed = Vec::new(); 58 | f.read_to_end(&mut compressed).unwrap(); 59 | decompress_stream_bench(&compressed, b); 60 | } 61 | 62 | #[bench] 63 | fn compress_empty(b: &mut Bencher) { 64 | #[cfg(feature = "enable_logging")] 65 | let _ = env_logger::try_init(); 66 | compress_bench(b"", b); 67 | } 68 | 69 | #[bench] 70 | fn decompress_after_compress_empty(b: &mut Bencher) { 71 | #[cfg(feature = "enable_logging")] 72 | let _ = env_logger::try_init(); 73 | decompress_after_compress_bench(b"", b); 74 | } 75 | 76 | #[bench] 77 | fn compress_hello(b: &mut Bencher) { 78 | #[cfg(feature = "enable_logging")] 79 | let _ = env_logger::try_init(); 80 | compress_bench(b"Hello world", b); 81 | } 82 | 83 | #[bench] 84 | fn decompress_after_compress_hello(b: &mut Bencher) { 85 | #[cfg(feature = "enable_logging")] 86 | let _ = env_logger::try_init(); 87 | decompress_after_compress_bench(b"Hello world", b); 88 | } 89 | 90 | #[bench] 91 | fn compress_65536(b: &mut Bencher) { 92 | #[cfg(feature = "enable_logging")] 93 | let _ = env_logger::try_init(); 94 | compress_bench(&[0; 0x10000], b); 95 | } 96 | 97 | #[bench] 98 | fn decompress_after_compress_65536(b: &mut Bencher) { 99 | #[cfg(feature = "enable_logging")] 100 | let _ = env_logger::try_init(); 101 | decompress_after_compress_bench(&[0; 0x10000], b); 102 | } 103 | 104 | #[bench] 105 | fn decompress_big_file(b: &mut Bencher) { 106 | #[cfg(feature = "enable_logging")] 107 | let _ = env_logger::try_init(); 108 | decompress_bench_file("tests/files/foo.txt.lzma", b); 109 | } 110 | 111 | #[cfg(feature = "stream")] 112 | #[bench] 113 | fn decompress_stream_big_file(b: &mut Bencher) { 114 | #[cfg(feature = "enable_logging")] 115 | let _ = env_logger::try_init(); 116 | decompress_stream_bench_file("tests/files/foo.txt.lzma", b); 117 | } 118 | 119 | #[bench] 120 | fn decompress_huge_dict(b: &mut Bencher) { 121 | #[cfg(feature = "enable_logging")] 122 | let _ = env_logger::try_init(); 123 | let compressed: &[u8] = b"\x5d\x7f\x7f\x7f\x7f\xff\xff\xff\ 124 | \xff\xff\xff\xff\xff\x00\x24\x19\ 125 | \x49\x98\x6f\x10\x19\xc6\xd7\x31\ 126 | \xeb\x36\x50\xb2\x98\x48\xff\xfe\ 127 | \xa5\xb0\x00"; 128 | decompress_bench(&compressed, b); 129 | } 130 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "lzma-fuzz" 4 | version = "0.0.1" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | edition = "2018" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | xz2 = "0.1.6" 14 | 15 | [dependencies.lzma-rs] 16 | path = ".." 17 | features = ["stream"] 18 | [dependencies.libfuzzer-sys] 19 | git = "https://github.com/rust-fuzz/libfuzzer-sys.git" 20 | 21 | # Prevent this from interfering with workspaces 22 | [workspace] 23 | members = ["."] 24 | 25 | [[bin]] 26 | name = "roundtrip_lzma" 27 | path = "fuzz_targets/roundtrip_lzma.rs" 28 | 29 | [[bin]] 30 | name = "roundtrip_lzma2" 31 | path = "fuzz_targets/roundtrip_lzma2.rs" 32 | 33 | [[bin]] 34 | name = "roundtrip_xz" 35 | path = "fuzz_targets/roundtrip_xz.rs" 36 | 37 | [[bin]] 38 | name = "decompress_lzma" 39 | path = "fuzz_targets/decompress_lzma.rs" 40 | 41 | [[bin]] 42 | name = "decompress_lzma2" 43 | path = "fuzz_targets/decompress_lzma2.rs" 44 | 45 | [[bin]] 46 | name = "decompress_xz" 47 | path = "fuzz_targets/decompress_xz.rs" 48 | 49 | [[bin]] 50 | name = "compare_xz" 51 | path = "fuzz_targets/compare_xz.rs" 52 | 53 | [[bin]] 54 | name = "interop_xz_decode" 55 | path = "fuzz_targets/interop_xz_decode.rs" 56 | 57 | [[bin]] 58 | name = "interop_xz_encode" 59 | path = "fuzz_targets/interop_xz_encode.rs" 60 | 61 | [[bin]] 62 | name = "decompress_lzma_stream" 63 | path = "fuzz_targets/decompress_lzma_stream.rs" 64 | -------------------------------------------------------------------------------- /fuzz/README.md: -------------------------------------------------------------------------------- 1 | This directory contains fuzzing targets to verify implementation correctness: 2 | 3 | - `roundtrip_*` targets check that we can successfully decode what we've encoded. 4 | - `decompress_*` targets check that we don't panic or abort on decoding a crafted file. 5 | - `compare_*` targets check that we produce identical output to liblzma on decompression. 6 | 7 | The command to run fuzzer is: 8 | 9 | `cargo +nightly fuzz run --release -s none ` 10 | 11 | For example, 12 | 13 | `cargo +nightly fuzz run --release -s none compare_xz` 14 | 15 | We use `-s none` because this crate does not contain unsafe code, so we don't 16 | need sanitizers to detect memory or concurrency errors for us. 17 | 18 | For more info see `cargo +nightly fuzz help` 19 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/compare_xz.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | use std::io::Read; 7 | use xz2::stream; 8 | 9 | fn decode_xz_lzmars(compressed: &[u8]) -> Result> { 10 | let mut bf = std::io::Cursor::new(compressed); 11 | let mut decomp: Vec = Vec::new(); 12 | lzma_rs::xz_decompress(&mut bf, &mut decomp)?; 13 | Ok(decomp) 14 | } 15 | 16 | fn decode_xz_xz2(compressed: &[u8]) -> Result> { 17 | let bf = std::io::Cursor::new(compressed); 18 | let mut decomp: Vec = Vec::new(); 19 | // create new XZ decompression stream with 8Gb memory limit and checksum 20 | // verification disabled 21 | let xz_stream = 22 | stream::Stream::new_stream_decoder(8 * 1024 * 1024 * 1024, stream::IGNORE_CHECK) 23 | .expect("Failed to create stream"); 24 | xz2::bufread::XzDecoder::new_stream(bf, xz_stream).read_to_end(&mut decomp)?; 25 | Ok(decomp) 26 | } 27 | 28 | fuzz_target!(|data: &[u8]| { 29 | let result_lzmars = decode_xz_lzmars(data); 30 | let result_xz2 = decode_xz_xz2(data); 31 | match (result_lzmars, result_xz2) { 32 | (Err(_), Err(_)) => (), // both failed, so behavior matches 33 | (Ok(_), Err(_)) => panic!("lzma-rs succeeded but xz2 failed"), 34 | (Err(_), Ok(_)) => panic!("xz2 succeeded but lzma-rs failed"), 35 | (Ok(a), Ok(b)) => assert!(a == b), 36 | } 37 | }); 38 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/decompress_lzma.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | 7 | fn decode_lzma(compressed: &[u8]) -> Result> { 8 | let mut bf = std::io::Cursor::new(compressed); 9 | 10 | let mut decomp: Vec = Vec::new(); 11 | lzma_rs::lzma_decompress(&mut bf, &mut decomp)?; 12 | Ok(decomp) 13 | } 14 | 15 | fuzz_target!(|data: &[u8]| { 16 | let _decomp = decode_lzma(data); 17 | }); 18 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/decompress_lzma2.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | 7 | fn decode_lzma2(compressed: &[u8]) -> Result> { 8 | let mut bf = std::io::Cursor::new(compressed); 9 | 10 | let mut decomp: Vec = Vec::new(); 11 | lzma_rs::lzma2_decompress(&mut bf, &mut decomp)?; 12 | Ok(decomp) 13 | } 14 | 15 | fuzz_target!(|data: &[u8]| { 16 | let _decomp = decode_lzma2(data); 17 | }); 18 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/decompress_lzma_stream.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | use std::io::Write; 7 | 8 | fn decode_lzma(compressed: &[u8]) -> Result> { 9 | let mut decomp: Vec = Vec::new(); 10 | lzma_rs::lzma_decompress(&mut std::io::Cursor::new(compressed), &mut decomp)?; 11 | Ok(decomp) 12 | } 13 | 14 | fn decode_lzma_stream(compressed: &[u8], chunk_size: usize) -> Vec { 15 | let mut stream = lzma_rs::decompress::Stream::new(Vec::new()); 16 | for chunk in compressed.chunks(chunk_size) { 17 | stream.write_all(chunk).unwrap(); 18 | } 19 | stream.finish().unwrap() 20 | } 21 | 22 | fuzz_target!(|input: &[u8]| { 23 | if !input.is_empty() { 24 | let (chunk_size, input) = input.split_at(1); 25 | // use input length if chunk_size is zero because std::slice::chunks 26 | // will otherwise panic 27 | let chunk_size = if chunk_size[0] == 0 { 28 | input.len() 29 | } else { 30 | chunk_size[0] as usize 31 | }; 32 | let mut compressed = Vec::new(); 33 | lzma_rs::lzma_compress(&mut std::io::Cursor::new(input), &mut compressed).unwrap(); 34 | let decompressed = decode_lzma(&compressed).unwrap(); 35 | let decompressed_stream = decode_lzma_stream(&compressed, chunk_size); 36 | if decompressed_stream.len() != decompressed.len() { 37 | panic!( 38 | "chunk size: {}, ref len: {}, act len: {}", 39 | chunk_size, 40 | decompressed.len(), 41 | decompressed_stream.len() 42 | ); 43 | } 44 | assert_eq!(decompressed_stream, decompressed); 45 | assert_eq!(decompressed_stream, input); 46 | } 47 | }); 48 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/decompress_xz.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | 7 | fn decode_xz(compressed: &[u8]) -> Result> { 8 | let mut bf = std::io::Cursor::new(compressed); 9 | 10 | let mut decomp: Vec = Vec::new(); 11 | lzma_rs::xz_decompress(&mut bf, &mut decomp)?; 12 | Ok(decomp) 13 | } 14 | 15 | fuzz_target!(|data: &[u8]| { 16 | let _decomp = decode_xz(data); 17 | }); 18 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/interop_xz_decode.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | use std::io::Read; 7 | 8 | fn decode_xz_lzmars(compressed: &[u8]) -> Result> { 9 | let mut bf = std::io::Cursor::new(compressed); 10 | let mut decomp: Vec = Vec::new(); 11 | lzma_rs::xz_decompress(&mut bf, &mut decomp)?; 12 | Ok(decomp) 13 | } 14 | 15 | fn encode_xz_xz2(data: &[u8]) -> Result> { 16 | let bf = std::io::Cursor::new(data); 17 | let mut compressed: Vec = Vec::new(); 18 | xz2::bufread::XzEncoder::new(bf, 6).read_to_end(&mut compressed)?; 19 | Ok(compressed) 20 | } 21 | 22 | fuzz_target!(|data: &[u8]| { 23 | let compressed = encode_xz_xz2(data).expect("liblzma failed to compress data"); 24 | let decoded = 25 | decode_xz_lzmars(&compressed).expect("We've failed to decompress what liblzma compressed"); 26 | assert!( 27 | data == decoded.as_slice(), 28 | "Decompressed data is different from the original" 29 | ); 30 | }); 31 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/interop_xz_encode.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | use std::io::Read; 7 | use xz2::stream; 8 | 9 | fn encode_xz_lzmars(x: &[u8]) -> Result> { 10 | let mut compressed: Vec = Vec::new(); 11 | lzma_rs::xz_compress(&mut std::io::BufReader::new(x), &mut compressed)?; 12 | Ok(compressed) 13 | } 14 | 15 | fn decode_xz_xz2(compressed: &[u8]) -> Result> { 16 | let bf = std::io::Cursor::new(compressed); 17 | let mut decomp: Vec = Vec::new(); 18 | // create new XZ decompression stream with 8Gb memory limit and checksum 19 | // verification disabled 20 | let xz_stream = 21 | stream::Stream::new_stream_decoder(8 * 1024 * 1024 * 1024, stream::IGNORE_CHECK) 22 | .expect("Failed to create stream"); 23 | xz2::bufread::XzDecoder::new_stream(bf, xz_stream).read_to_end(&mut decomp)?; 24 | Ok(decomp) 25 | } 26 | 27 | fuzz_target!(|data: &[u8]| { 28 | let compressed = encode_xz_lzmars(data).expect("Compression failed"); 29 | let decoded = 30 | decode_xz_xz2(&compressed).expect("liblzma failed to decompress what we've compressed"); 31 | assert!( 32 | data == decoded.as_slice(), 33 | "Decompressed data is different from the original" 34 | ); 35 | }); 36 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/roundtrip_lzma.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | 7 | fn round_trip_lzma(x: &[u8]) -> Result> { 8 | let mut compressed: Vec = Vec::new(); 9 | lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed)?; 10 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 11 | 12 | let mut decomp: Vec = Vec::new(); 13 | lzma_rs::lzma_decompress(&mut bf, &mut decomp)?; 14 | Ok(decomp) 15 | } 16 | 17 | fuzz_target!(|data: &[u8]| { 18 | let decomp = round_trip_lzma(data).expect("Can't decompress what we just compressed"); 19 | assert_eq!(decomp, data); 20 | }); 21 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/roundtrip_lzma2.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | 7 | fn round_trip_lzma2(x: &[u8]) -> Result> { 8 | let mut compressed: Vec = Vec::new(); 9 | lzma_rs::lzma2_compress(&mut std::io::BufReader::new(x), &mut compressed)?; 10 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 11 | 12 | let mut decomp: Vec = Vec::new(); 13 | lzma_rs::lzma2_decompress(&mut bf, &mut decomp)?; 14 | Ok(decomp) 15 | } 16 | 17 | fuzz_target!(|data: &[u8]| { 18 | let decomp = round_trip_lzma2(data).expect("Can't decompress what we just compressed"); 19 | assert_eq!(decomp, data); 20 | }); 21 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/roundtrip_xz.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] 3 | extern crate libfuzzer_sys; 4 | 5 | use lzma_rs::error::Result; 6 | 7 | fn round_trip_xz(x: &[u8]) -> Result> { 8 | let mut compressed: Vec = Vec::new(); 9 | lzma_rs::xz_compress(&mut std::io::BufReader::new(x), &mut compressed)?; 10 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 11 | 12 | let mut decomp: Vec = Vec::new(); 13 | lzma_rs::xz_decompress(&mut bf, &mut decomp)?; 14 | Ok(decomp) 15 | } 16 | 17 | fuzz_target!(|data: &[u8]| { 18 | let decomp = round_trip_xz(data).expect("Can't decompress what we just compressed"); 19 | assert_eq!(decomp, data); 20 | }); 21 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | imports_granularity = "Module" 2 | wrap_comments = true 3 | -------------------------------------------------------------------------------- /src/decode/lzbuffer.rs: -------------------------------------------------------------------------------- 1 | use crate::error; 2 | use std::io; 3 | 4 | pub trait LzBuffer 5 | where 6 | W: io::Write, 7 | { 8 | fn len(&self) -> usize; 9 | 10 | /// Retrieve the last byte or return a default. 11 | fn last_or(&self, lit: u8) -> u8; 12 | 13 | /// Retrieve the n-th last byte. 14 | fn last_n(&self, dist: usize) -> error::Result; 15 | 16 | /// Append a literal. 17 | fn append_literal(&mut self, lit: u8) -> error::Result<()>; 18 | 19 | /// Fetch an LZ sequence (length, distance) from inside the buffer. 20 | fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()>; 21 | 22 | /// Get a reference to the output sink. 23 | #[cfg(feature = "stream")] 24 | fn get_output(&self) -> &W; 25 | 26 | /// Get a mutable reference to the output sink. 27 | #[cfg(feature = "stream")] 28 | fn get_output_mut(&mut self) -> &mut W; 29 | 30 | /// Consumes this buffer and flushes any data. 31 | fn finish(self) -> io::Result; 32 | 33 | /// Consumes this buffer without flushing any data. 34 | #[cfg(feature = "stream")] 35 | fn into_output(self) -> W; 36 | } 37 | 38 | /// An accumulating buffer for LZ sequences. 39 | pub struct LzAccumBuffer 40 | where 41 | W: io::Write, 42 | { 43 | /// Output sink 44 | stream: W, 45 | /// Buffer 46 | buf: Vec, 47 | /// Buffer memory limit 48 | memlimit: usize, 49 | /// Total number of bytes sent through the buffer 50 | len: usize, 51 | } 52 | 53 | impl LzAccumBuffer 54 | where 55 | W: io::Write, 56 | { 57 | pub fn from_stream(stream: W, memlimit: usize) -> Self { 58 | Self { 59 | stream, 60 | buf: Vec::new(), 61 | memlimit, 62 | len: 0, 63 | } 64 | } 65 | 66 | /// Append bytes. 67 | pub fn append_bytes(&mut self, buf: &[u8]) { 68 | self.buf.extend_from_slice(buf); 69 | self.len += buf.len(); 70 | } 71 | 72 | /// Reset the internal dictionary. 73 | pub fn reset(&mut self) -> io::Result<()> { 74 | self.stream.write_all(self.buf.as_slice())?; 75 | self.buf.clear(); 76 | self.len = 0; 77 | Ok(()) 78 | } 79 | } 80 | 81 | impl LzBuffer for LzAccumBuffer 82 | where 83 | W: io::Write, 84 | { 85 | fn len(&self) -> usize { 86 | self.len 87 | } 88 | 89 | fn last_or(&self, lit: u8) -> u8 { 90 | let buf_len = self.buf.len(); 91 | if buf_len == 0 { 92 | lit 93 | } else { 94 | self.buf[buf_len - 1] 95 | } 96 | } 97 | 98 | fn last_n(&self, dist: usize) -> error::Result { 99 | let buf_len = self.buf.len(); 100 | if dist > buf_len { 101 | return Err(error::Error::LzmaError(format!( 102 | "Match distance {} is beyond output size {}", 103 | dist, buf_len 104 | ))); 105 | } 106 | 107 | Ok(self.buf[buf_len - dist]) 108 | } 109 | 110 | fn append_literal(&mut self, lit: u8) -> error::Result<()> { 111 | let new_len = self.len + 1; 112 | 113 | if new_len > self.memlimit { 114 | Err(error::Error::LzmaError(format!( 115 | "exceeded memory limit of {}", 116 | self.memlimit 117 | ))) 118 | } else { 119 | self.buf.push(lit); 120 | self.len = new_len; 121 | Ok(()) 122 | } 123 | } 124 | 125 | fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> { 126 | lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist); 127 | let buf_len = self.buf.len(); 128 | if dist > buf_len { 129 | return Err(error::Error::LzmaError(format!( 130 | "LZ distance {} is beyond output size {}", 131 | dist, buf_len 132 | ))); 133 | } 134 | 135 | let mut offset = buf_len - dist; 136 | for _ in 0..len { 137 | let x = self.buf[offset]; 138 | self.buf.push(x); 139 | offset += 1; 140 | } 141 | self.len += len; 142 | Ok(()) 143 | } 144 | 145 | #[cfg(feature = "stream")] 146 | fn get_output(&self) -> &W { 147 | &self.stream 148 | } 149 | 150 | #[cfg(feature = "stream")] 151 | fn get_output_mut(&mut self) -> &mut W { 152 | &mut self.stream 153 | } 154 | 155 | fn finish(mut self) -> io::Result { 156 | self.stream.write_all(self.buf.as_slice())?; 157 | self.stream.flush()?; 158 | Ok(self.stream) 159 | } 160 | 161 | #[cfg(feature = "stream")] 162 | fn into_output(self) -> W { 163 | self.stream 164 | } 165 | } 166 | 167 | /// A circular buffer for LZ sequences 168 | pub struct LzCircularBuffer 169 | where 170 | W: io::Write, 171 | { 172 | /// Output sink 173 | stream: W, 174 | /// Circular buffer 175 | buf: Vec, 176 | /// Length of the buffer 177 | dict_size: usize, 178 | /// Buffer memory limit 179 | memlimit: usize, 180 | /// Current position 181 | cursor: usize, 182 | /// Total number of bytes sent through the buffer 183 | len: usize, 184 | } 185 | 186 | impl LzCircularBuffer 187 | where 188 | W: io::Write, 189 | { 190 | pub fn from_stream(stream: W, dict_size: usize, memlimit: usize) -> Self { 191 | lzma_info!("Dict size in LZ buffer: {}", dict_size); 192 | Self { 193 | stream, 194 | buf: Vec::new(), 195 | dict_size, 196 | memlimit, 197 | cursor: 0, 198 | len: 0, 199 | } 200 | } 201 | 202 | fn get(&self, index: usize) -> u8 { 203 | *self.buf.get(index).unwrap_or(&0) 204 | } 205 | 206 | fn set(&mut self, index: usize, value: u8) -> error::Result<()> { 207 | let new_len = index + 1; 208 | 209 | if self.buf.len() < new_len { 210 | if new_len <= self.memlimit { 211 | self.buf.resize(new_len, 0); 212 | } else { 213 | return Err(error::Error::LzmaError(format!( 214 | "exceeded memory limit of {}", 215 | self.memlimit 216 | ))); 217 | } 218 | } 219 | self.buf[index] = value; 220 | Ok(()) 221 | } 222 | } 223 | 224 | impl LzBuffer for LzCircularBuffer 225 | where 226 | W: io::Write, 227 | { 228 | fn len(&self) -> usize { 229 | self.len 230 | } 231 | 232 | fn last_or(&self, lit: u8) -> u8 { 233 | if self.len == 0 { 234 | lit 235 | } else { 236 | self.get((self.dict_size + self.cursor - 1) % self.dict_size) 237 | } 238 | } 239 | 240 | fn last_n(&self, dist: usize) -> error::Result { 241 | if dist > self.dict_size { 242 | return Err(error::Error::LzmaError(format!( 243 | "Match distance {} is beyond dictionary size {}", 244 | dist, self.dict_size 245 | ))); 246 | } 247 | if dist > self.len { 248 | return Err(error::Error::LzmaError(format!( 249 | "Match distance {} is beyond output size {}", 250 | dist, self.len 251 | ))); 252 | } 253 | 254 | let offset = (self.dict_size + self.cursor - dist) % self.dict_size; 255 | Ok(self.get(offset)) 256 | } 257 | 258 | fn append_literal(&mut self, lit: u8) -> error::Result<()> { 259 | self.set(self.cursor, lit)?; 260 | self.cursor += 1; 261 | self.len += 1; 262 | 263 | // Flush the circular buffer to the output 264 | if self.cursor == self.dict_size { 265 | self.stream.write_all(self.buf.as_slice())?; 266 | self.cursor = 0; 267 | } 268 | 269 | Ok(()) 270 | } 271 | 272 | fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> { 273 | lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist); 274 | if dist > self.dict_size { 275 | return Err(error::Error::LzmaError(format!( 276 | "LZ distance {} is beyond dictionary size {}", 277 | dist, self.dict_size 278 | ))); 279 | } 280 | if dist > self.len { 281 | return Err(error::Error::LzmaError(format!( 282 | "LZ distance {} is beyond output size {}", 283 | dist, self.len 284 | ))); 285 | } 286 | 287 | let mut offset = (self.dict_size + self.cursor - dist) % self.dict_size; 288 | for _ in 0..len { 289 | let x = self.get(offset); 290 | self.append_literal(x)?; 291 | offset += 1; 292 | if offset == self.dict_size { 293 | offset = 0 294 | } 295 | } 296 | Ok(()) 297 | } 298 | 299 | #[cfg(feature = "stream")] 300 | fn get_output(&self) -> &W { 301 | &self.stream 302 | } 303 | 304 | #[cfg(feature = "stream")] 305 | fn get_output_mut(&mut self) -> &mut W { 306 | &mut self.stream 307 | } 308 | 309 | fn finish(mut self) -> io::Result { 310 | if self.cursor > 0 { 311 | self.stream.write_all(&self.buf[0..self.cursor])?; 312 | } 313 | self.stream.flush()?; 314 | Ok(self.stream) 315 | } 316 | 317 | #[cfg(feature = "stream")] 318 | fn into_output(self) -> W { 319 | self.stream 320 | } 321 | } 322 | 323 | #[cfg(test)] 324 | mod test { 325 | use super::*; 326 | 327 | #[derive(Default)] 328 | struct ManuallyFlushedWriter { 329 | unflushed: Vec, 330 | flushed: Vec, 331 | } 332 | 333 | impl io::Write for ManuallyFlushedWriter { 334 | fn write(&mut self, buf: &[u8]) -> Result { 335 | let len = buf.len(); 336 | self.unflushed.extend_from_slice(buf); 337 | Ok(len) 338 | } 339 | 340 | fn flush(&mut self) -> Result<(), io::Error> { 341 | self.flushed.append(&mut self.unflushed); 342 | Ok(()) 343 | } 344 | } 345 | 346 | #[test] 347 | fn finish_flushes_everything() { 348 | const MEM_LIMIT: usize = 8; 349 | const DICT_SIZE: usize = MEM_LIMIT; 350 | let stream = ManuallyFlushedWriter::default(); 351 | let mut b = LzCircularBuffer::from_stream(stream, DICT_SIZE, MEM_LIMIT); 352 | for _ in 0..(DICT_SIZE * 4) { 353 | b.append_literal(5).unwrap(); 354 | } 355 | let stream = b.finish().unwrap(); 356 | assert!(stream.unflushed.is_empty()); 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /src/decode/lzma.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer}; 2 | use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder}; 3 | use crate::decompress::{Options, UnpackedSize}; 4 | use crate::error; 5 | use crate::util::vec2d::Vec2D; 6 | use byteorder::{LittleEndian, ReadBytesExt}; 7 | use std::io; 8 | 9 | /// Maximum input data that can be processed in one iteration. 10 | /// Libhtp uses the following equation to define the maximum number of bits 11 | /// for the worst case scenario: 12 | /// log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160 13 | const MAX_REQUIRED_INPUT: usize = 20; 14 | 15 | /// Processing mode for decompression. 16 | /// 17 | /// Tells the decompressor if we should expect more data after parsing the 18 | /// current input. 19 | #[derive(Debug, PartialEq)] 20 | enum ProcessingMode { 21 | /// Streaming mode. Process the input bytes but assume there will be more 22 | /// chunks of input data to receive in future calls to 23 | /// [`DecoderState::process_mode()`]. 24 | Partial, 25 | /// Synchronous mode. Process the input bytes and confirm end of stream has 26 | /// been reached. Use this mode if you are processing a fixed buffer of 27 | /// compressed data, or after using [`ProcessingMode::Partial`] to check for 28 | /// the end of stream. 29 | Finish, 30 | } 31 | 32 | /// Result of the next iteration of processing. 33 | /// 34 | /// Indicates whether processing should continue or is finished. 35 | #[derive(Debug, PartialEq)] 36 | enum ProcessingStatus { 37 | Continue, 38 | Finished, 39 | } 40 | 41 | #[derive(Debug, Copy, Clone)] 42 | /// LZMA "lclppb" decompression properties. 43 | pub struct LzmaProperties { 44 | /// The number of literal context bits. 45 | /// 46 | /// The most `lc` significant bits of the previous byte are part of the 47 | /// literal context. `lc` must not be greater than 8. 48 | pub lc: u32, // 0..=8 49 | /// The number of literal position bits. 50 | /// 51 | /// `lp` must not be greater than 4. 52 | pub lp: u32, // 0..=4 53 | /// The number of position bits. 54 | /// 55 | /// The context for literal/match is plaintext offset modulo `2^pb`. 56 | /// `pb` must not be greater than 4. 57 | pub pb: u32, // 0..=4 58 | } 59 | 60 | impl LzmaProperties { 61 | /// Assert the validity of the LZMA properties. 62 | pub(crate) fn validate(&self) { 63 | assert!(self.lc <= 8); 64 | assert!(self.lp <= 4); 65 | assert!(self.pb <= 4); 66 | } 67 | } 68 | 69 | #[derive(Debug, Copy, Clone)] 70 | /// LZMA decompression parameters. 71 | pub struct LzmaParams { 72 | /// The LZMA "lclppb" decompression properties. 73 | pub(crate) properties: LzmaProperties, 74 | /// The dictionary size to use when decompressing. 75 | pub(crate) dict_size: u32, 76 | /// The size of the unpacked data. 77 | pub(crate) unpacked_size: Option, 78 | } 79 | 80 | impl LzmaParams { 81 | /// Create an new instance of LZMA parameters. 82 | #[cfg(feature = "raw_decoder")] 83 | pub fn new( 84 | properties: LzmaProperties, 85 | dict_size: u32, 86 | unpacked_size: Option, 87 | ) -> LzmaParams { 88 | Self { 89 | properties, 90 | dict_size, 91 | unpacked_size, 92 | } 93 | } 94 | 95 | /// Read LZMA parameters from the LZMA stream header. 96 | pub fn read_header(input: &mut R, options: &Options) -> error::Result 97 | where 98 | R: io::BufRead, 99 | { 100 | // Properties 101 | let props = input.read_u8().map_err(error::Error::HeaderTooShort)?; 102 | 103 | let mut pb = props as u32; 104 | if pb >= 225 { 105 | return Err(error::Error::LzmaError(format!( 106 | "LZMA header invalid properties: {} must be < 225", 107 | pb 108 | ))); 109 | } 110 | 111 | let lc: u32 = pb % 9; 112 | pb /= 9; 113 | let lp: u32 = pb % 5; 114 | pb /= 5; 115 | 116 | lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb); 117 | 118 | // Dictionary 119 | let dict_size_provided = input 120 | .read_u32::() 121 | .map_err(error::Error::HeaderTooShort)?; 122 | let dict_size = if dict_size_provided < 0x1000 { 123 | 0x1000 124 | } else { 125 | dict_size_provided 126 | }; 127 | 128 | lzma_info!("Dict size: {}", dict_size); 129 | 130 | // Unpacked size 131 | let unpacked_size: Option = match options.unpacked_size { 132 | UnpackedSize::ReadFromHeader => { 133 | let unpacked_size_provided = input 134 | .read_u64::() 135 | .map_err(error::Error::HeaderTooShort)?; 136 | let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; 137 | if marker_mandatory { 138 | None 139 | } else { 140 | Some(unpacked_size_provided) 141 | } 142 | } 143 | UnpackedSize::ReadHeaderButUseProvided(x) => { 144 | input 145 | .read_u64::() 146 | .map_err(error::Error::HeaderTooShort)?; 147 | x 148 | } 149 | UnpackedSize::UseProvided(x) => x, 150 | }; 151 | 152 | lzma_info!("Unpacked size: {:?}", unpacked_size); 153 | 154 | let params = LzmaParams { 155 | properties: LzmaProperties { lc, lp, pb }, 156 | dict_size, 157 | unpacked_size, 158 | }; 159 | 160 | Ok(params) 161 | } 162 | } 163 | 164 | #[derive(Debug)] 165 | pub(crate) struct DecoderState { 166 | // Buffer input data here if we need more for decompression. Up to 167 | // MAX_REQUIRED_INPUT bytes can be consumed during one iteration. 168 | partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>, 169 | pub(crate) lzma_props: LzmaProperties, 170 | unpacked_size: Option, 171 | literal_probs: Vec2D, 172 | pos_slot_decoder: [BitTree<{ 1 << 6 }>; 4], 173 | align_decoder: BitTree<{ 1 << 4 }>, 174 | pos_decoders: [u16; 115], 175 | is_match: [u16; 192], // true = LZ, false = literal 176 | is_rep: [u16; 12], 177 | is_rep_g0: [u16; 12], 178 | is_rep_g1: [u16; 12], 179 | is_rep_g2: [u16; 12], 180 | is_rep_0long: [u16; 192], 181 | state: usize, 182 | rep: [usize; 4], 183 | len_decoder: LenDecoder, 184 | rep_len_decoder: LenDecoder, 185 | } 186 | 187 | impl DecoderState { 188 | pub fn new(lzma_props: LzmaProperties, unpacked_size: Option) -> Self { 189 | lzma_props.validate(); 190 | DecoderState { 191 | partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]), 192 | lzma_props, 193 | unpacked_size, 194 | literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)), 195 | pos_slot_decoder: [ 196 | BitTree::new(), 197 | BitTree::new(), 198 | BitTree::new(), 199 | BitTree::new(), 200 | ], 201 | align_decoder: BitTree::new(), 202 | pos_decoders: [0x400; 115], 203 | is_match: [0x400; 192], 204 | is_rep: [0x400; 12], 205 | is_rep_g0: [0x400; 12], 206 | is_rep_g1: [0x400; 12], 207 | is_rep_g2: [0x400; 12], 208 | is_rep_0long: [0x400; 192], 209 | state: 0, 210 | rep: [0; 4], 211 | len_decoder: LenDecoder::new(), 212 | rep_len_decoder: LenDecoder::new(), 213 | } 214 | } 215 | 216 | pub fn reset_state(&mut self, new_props: LzmaProperties) { 217 | new_props.validate(); 218 | if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp { 219 | // We can reset here by filling the existing buffer with 0x400. 220 | self.literal_probs.fill(0x400); 221 | } else { 222 | // We need to reallocate because of the new size of `lc+lp`. 223 | self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300)); 224 | } 225 | 226 | self.lzma_props = new_props; 227 | // For stack-allocated arrays, it was found to be faster to re-create new arrays 228 | // dropping the existing one, rather than using `fill` to reset the contents to 229 | // zero. Heap-based arrays use fill to keep their allocation rather than 230 | // reallocate. 231 | self.pos_slot_decoder = [ 232 | BitTree::new(), 233 | BitTree::new(), 234 | BitTree::new(), 235 | BitTree::new(), 236 | ]; 237 | self.align_decoder = BitTree::new(); 238 | self.pos_decoders = [0x400; 115]; 239 | self.is_match = [0x400; 192]; 240 | self.is_rep = [0x400; 12]; 241 | self.is_rep_g0 = [0x400; 12]; 242 | self.is_rep_g1 = [0x400; 12]; 243 | self.is_rep_g2 = [0x400; 12]; 244 | self.is_rep_0long = [0x400; 192]; 245 | self.state = 0; 246 | self.rep = [0; 4]; 247 | self.len_decoder = LenDecoder::new(); 248 | self.rep_len_decoder = LenDecoder::new(); 249 | } 250 | 251 | pub fn set_unpacked_size(&mut self, unpacked_size: Option) { 252 | self.unpacked_size = unpacked_size; 253 | } 254 | 255 | pub fn process, R: io::BufRead>( 256 | &mut self, 257 | output: &mut LZB, 258 | rangecoder: &mut RangeDecoder<'_, R>, 259 | ) -> error::Result<()> { 260 | self.process_mode(output, rangecoder, ProcessingMode::Finish) 261 | } 262 | 263 | #[cfg(feature = "stream")] 264 | pub fn process_stream, R: io::BufRead>( 265 | &mut self, 266 | output: &mut LZB, 267 | rangecoder: &mut RangeDecoder<'_, R>, 268 | ) -> error::Result<()> { 269 | self.process_mode(output, rangecoder, ProcessingMode::Partial) 270 | } 271 | 272 | /// Process the next iteration of the loop. 273 | /// 274 | /// If the update flag is true, the decoder's state will be updated. 275 | /// 276 | /// Returns [`ProcessingStatus`] to determine whether one should continue 277 | /// processing the loop. 278 | fn process_next_inner, R: io::BufRead>( 279 | &mut self, 280 | output: &mut LZB, 281 | rangecoder: &mut RangeDecoder<'_, R>, 282 | update: bool, 283 | ) -> error::Result { 284 | let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1); 285 | 286 | // Literal 287 | if !rangecoder.decode_bit( 288 | // TODO: assumes pb = 2 ?? 289 | &mut self.is_match[(self.state << 4) + pos_state], 290 | update, 291 | )? { 292 | let byte: u8 = self.decode_literal(output, rangecoder, update)?; 293 | 294 | if update { 295 | lzma_debug!("Literal: {}", byte); 296 | output.append_literal(byte)?; 297 | 298 | self.state = if self.state < 4 { 299 | 0 300 | } else if self.state < 10 { 301 | self.state - 3 302 | } else { 303 | self.state - 6 304 | }; 305 | } 306 | return Ok(ProcessingStatus::Continue); 307 | } 308 | 309 | // LZ 310 | let mut len: usize; 311 | // Distance is repeated from LRU 312 | if rangecoder.decode_bit(&mut self.is_rep[self.state], update)? { 313 | // dist = rep[0] 314 | if !rangecoder.decode_bit(&mut self.is_rep_g0[self.state], update)? { 315 | // len = 1 316 | if !rangecoder.decode_bit( 317 | &mut self.is_rep_0long[(self.state << 4) + pos_state], 318 | update, 319 | )? { 320 | // update state (short rep) 321 | if update { 322 | self.state = if self.state < 7 { 9 } else { 11 }; 323 | let dist = self.rep[0] + 1; 324 | output.append_lz(1, dist)?; 325 | } 326 | return Ok(ProcessingStatus::Continue); 327 | } 328 | // dist = rep[i] 329 | } else { 330 | let idx: usize; 331 | if !rangecoder.decode_bit(&mut self.is_rep_g1[self.state], update)? { 332 | idx = 1; 333 | } else if !rangecoder.decode_bit(&mut self.is_rep_g2[self.state], update)? { 334 | idx = 2; 335 | } else { 336 | idx = 3; 337 | } 338 | if update { 339 | // Update LRU 340 | let dist = self.rep[idx]; 341 | for i in (0..idx).rev() { 342 | self.rep[i + 1] = self.rep[i]; 343 | } 344 | self.rep[0] = dist 345 | } 346 | } 347 | 348 | len = self.rep_len_decoder.decode(rangecoder, pos_state, update)?; 349 | 350 | if update { 351 | // update state (rep) 352 | self.state = if self.state < 7 { 8 } else { 11 }; 353 | } 354 | // New distance 355 | } else { 356 | if update { 357 | // Update LRU 358 | self.rep[3] = self.rep[2]; 359 | self.rep[2] = self.rep[1]; 360 | self.rep[1] = self.rep[0]; 361 | } 362 | 363 | len = self.len_decoder.decode(rangecoder, pos_state, update)?; 364 | 365 | if update { 366 | // update state (match) 367 | self.state = if self.state < 7 { 7 } else { 10 }; 368 | } 369 | 370 | let rep_0 = self.decode_distance(rangecoder, len, update)?; 371 | 372 | if update { 373 | self.rep[0] = rep_0; 374 | if self.rep[0] == 0xFFFF_FFFF { 375 | if rangecoder.is_finished_ok()? { 376 | return Ok(ProcessingStatus::Finished); 377 | } 378 | return Err(error::Error::LzmaError(String::from( 379 | "Found end-of-stream marker but more bytes are available", 380 | ))); 381 | } 382 | } 383 | } 384 | 385 | if update { 386 | len += 2; 387 | 388 | let dist = self.rep[0] + 1; 389 | output.append_lz(len, dist)?; 390 | } 391 | 392 | Ok(ProcessingStatus::Continue) 393 | } 394 | 395 | fn process_next, R: io::BufRead>( 396 | &mut self, 397 | output: &mut LZB, 398 | rangecoder: &mut RangeDecoder<'_, R>, 399 | ) -> error::Result { 400 | self.process_next_inner(output, rangecoder, true) 401 | } 402 | 403 | /// Try to process the next iteration of the loop. 404 | /// 405 | /// This will check to see if there is enough data to consume and advance 406 | /// the decompressor. Needed in streaming mode to avoid corrupting the 407 | /// state while processing incomplete chunks of data. 408 | fn try_process_next>( 409 | &mut self, 410 | output: &mut LZB, 411 | buf: &[u8], 412 | range: u32, 413 | code: u32, 414 | ) -> error::Result<()> { 415 | let mut temp = std::io::Cursor::new(buf); 416 | let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code); 417 | let _ = self.process_next_inner(output, &mut rangecoder, false)?; 418 | Ok(()) 419 | } 420 | 421 | /// Utility function to read data into the partial input buffer. 422 | fn read_partial_input_buf( 423 | &mut self, 424 | rangecoder: &mut RangeDecoder<'_, R>, 425 | ) -> error::Result<()> { 426 | // Fill as much of the tmp buffer as possible 427 | let start = self.partial_input_buf.position() as usize; 428 | let bytes_read = 429 | rangecoder.read_into(&mut self.partial_input_buf.get_mut()[start..])? as u64; 430 | self.partial_input_buf 431 | .set_position(self.partial_input_buf.position() + bytes_read); 432 | Ok(()) 433 | } 434 | 435 | fn process_mode, R: io::BufRead>( 436 | &mut self, 437 | output: &mut LZB, 438 | rangecoder: &mut RangeDecoder<'_, R>, 439 | mode: ProcessingMode, 440 | ) -> error::Result<()> { 441 | loop { 442 | if let Some(unpacked_size) = self.unpacked_size { 443 | if output.len() as u64 >= unpacked_size { 444 | break; 445 | } 446 | } else if match mode { 447 | ProcessingMode::Partial => { 448 | rangecoder.is_eof()? && self.partial_input_buf.position() as usize == 0 449 | } 450 | ProcessingMode::Finish => { 451 | rangecoder.is_finished_ok()? && self.partial_input_buf.position() as usize == 0 452 | } 453 | } { 454 | break; 455 | } 456 | 457 | if self.partial_input_buf.position() as usize > 0 { 458 | self.read_partial_input_buf(rangecoder)?; 459 | let tmp = *self.partial_input_buf.get_ref(); 460 | 461 | // Check if we need more data to advance the decompressor 462 | if mode == ProcessingMode::Partial 463 | && (self.partial_input_buf.position() as usize) < MAX_REQUIRED_INPUT 464 | && self 465 | .try_process_next( 466 | output, 467 | &tmp[..self.partial_input_buf.position() as usize], 468 | rangecoder.range, 469 | rangecoder.code, 470 | ) 471 | .is_err() 472 | { 473 | return Ok(()); 474 | } 475 | 476 | // Run the decompressor on the tmp buffer 477 | let mut tmp_reader = 478 | io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]); 479 | let mut tmp_rangecoder = 480 | RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code); 481 | let res = self.process_next(output, &mut tmp_rangecoder)?; 482 | 483 | // Update the actual rangecoder 484 | rangecoder.set(tmp_rangecoder.range, tmp_rangecoder.code); 485 | 486 | // Update tmp buffer 487 | let end = self.partial_input_buf.position(); 488 | let new_len = end - tmp_reader.position(); 489 | self.partial_input_buf.get_mut()[..new_len as usize] 490 | .copy_from_slice(&tmp[tmp_reader.position() as usize..end as usize]); 491 | self.partial_input_buf.set_position(new_len); 492 | 493 | if res == ProcessingStatus::Finished { 494 | break; 495 | }; 496 | } else { 497 | let buf: &[u8] = rangecoder.stream.fill_buf()?; 498 | if mode == ProcessingMode::Partial 499 | && buf.len() < MAX_REQUIRED_INPUT 500 | && self 501 | .try_process_next(output, buf, rangecoder.range, rangecoder.code) 502 | .is_err() 503 | { 504 | return self.read_partial_input_buf(rangecoder); 505 | } 506 | 507 | if self.process_next(output, rangecoder)? == ProcessingStatus::Finished { 508 | break; 509 | }; 510 | } 511 | } 512 | 513 | if let Some(len) = self.unpacked_size { 514 | if mode == ProcessingMode::Finish && len != output.len() as u64 { 515 | return Err(error::Error::LzmaError(format!( 516 | "Expected unpacked size of {} but decompressed to {}", 517 | len, 518 | output.len() 519 | ))); 520 | } 521 | } 522 | 523 | Ok(()) 524 | } 525 | 526 | fn decode_literal, R: io::BufRead>( 527 | &mut self, 528 | output: &mut LZB, 529 | rangecoder: &mut RangeDecoder<'_, R>, 530 | update: bool, 531 | ) -> error::Result { 532 | let def_prev_byte = 0u8; 533 | let prev_byte = output.last_or(def_prev_byte) as usize; 534 | 535 | let mut result: usize = 1; 536 | let lit_state = ((output.len() & ((1 << self.lzma_props.lp) - 1)) << self.lzma_props.lc) 537 | + (prev_byte >> (8 - self.lzma_props.lc)); 538 | let probs = &mut self.literal_probs[lit_state]; 539 | 540 | if self.state >= 7 { 541 | let mut match_byte = output.last_n(self.rep[0] + 1)? as usize; 542 | 543 | while result < 0x100 { 544 | let match_bit = (match_byte >> 7) & 1; 545 | match_byte <<= 1; 546 | let bit = rangecoder 547 | .decode_bit(&mut probs[((1 + match_bit) << 8) + result], update)? 548 | as usize; 549 | result = (result << 1) ^ bit; 550 | if match_bit != bit { 551 | break; 552 | } 553 | } 554 | } 555 | 556 | while result < 0x100 { 557 | result = (result << 1) ^ (rangecoder.decode_bit(&mut probs[result], update)? as usize); 558 | } 559 | 560 | Ok((result - 0x100) as u8) 561 | } 562 | 563 | fn decode_distance( 564 | &mut self, 565 | rangecoder: &mut RangeDecoder<'_, R>, 566 | length: usize, 567 | update: bool, 568 | ) -> error::Result { 569 | let len_state = if length > 3 { 3 } else { length }; 570 | 571 | let pos_slot = self.pos_slot_decoder[len_state].parse(rangecoder, update)? as usize; 572 | if pos_slot < 4 { 573 | return Ok(pos_slot); 574 | } 575 | 576 | let num_direct_bits = (pos_slot >> 1) - 1; 577 | let mut result = (2 ^ (pos_slot & 1)) << num_direct_bits; 578 | 579 | if pos_slot < 14 { 580 | result += rangecoder.parse_reverse_bit_tree( 581 | num_direct_bits, 582 | &mut self.pos_decoders, 583 | result - pos_slot, 584 | update, 585 | )? as usize; 586 | } else { 587 | result += (rangecoder.get(num_direct_bits - 4)? as usize) << 4; 588 | result += self.align_decoder.parse_reverse(rangecoder, update)? as usize; 589 | } 590 | 591 | Ok(result) 592 | } 593 | } 594 | 595 | #[derive(Debug)] 596 | /// Raw decoder for LZMA. 597 | pub struct LzmaDecoder { 598 | params: LzmaParams, 599 | memlimit: usize, 600 | state: DecoderState, 601 | } 602 | 603 | impl LzmaDecoder { 604 | /// Creates a new object ready for decompressing data that it's given for 605 | /// the input dict size, expected unpacked data size, and memory limit 606 | /// for the internal buffer. 607 | pub fn new(params: LzmaParams, memlimit: Option) -> error::Result { 608 | Ok(Self { 609 | params, 610 | memlimit: memlimit.unwrap_or(usize::MAX), 611 | state: DecoderState::new(params.properties, params.unpacked_size), 612 | }) 613 | } 614 | 615 | /// Performs the equivalent of replacing this decompression state with a 616 | /// freshly allocated copy. 617 | /// 618 | /// Because the decoder state is reset, the unpacked size may optionally be 619 | /// re-specified. If [`None`] is given, the previous unpacked size that 620 | /// the decoder was initialized with remains unchanged. 621 | /// 622 | /// This function may not allocate memory and will attempt to reuse any 623 | /// previously allocated resources. 624 | #[cfg(feature = "raw_decoder")] 625 | pub fn reset(&mut self, unpacked_size: Option>) { 626 | self.state.reset_state(self.params.properties); 627 | 628 | if let Some(unpacked_size) = unpacked_size { 629 | self.state.set_unpacked_size(unpacked_size); 630 | } 631 | } 632 | 633 | /// Decompresses the input data into the output, consuming only as much 634 | /// input as needed and writing as much output as possible. 635 | pub fn decompress( 636 | &mut self, 637 | input: &mut R, 638 | output: &mut W, 639 | ) -> error::Result<()> { 640 | let mut output = 641 | LzCircularBuffer::from_stream(output, self.params.dict_size as usize, self.memlimit); 642 | 643 | let mut rangecoder = RangeDecoder::new(input) 644 | .map_err(|e| error::Error::LzmaError(format!("LZMA stream too short: {}", e)))?; 645 | self.state.process(&mut output, &mut rangecoder)?; 646 | output.finish()?; 647 | Ok(()) 648 | } 649 | } 650 | -------------------------------------------------------------------------------- /src/decode/lzma2.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::lzbuffer::LzBuffer; 2 | use crate::decode::lzma::{DecoderState, LzmaProperties}; 3 | use crate::decode::{lzbuffer, rangecoder}; 4 | use crate::error; 5 | use byteorder::{BigEndian, ReadBytesExt}; 6 | use std::io; 7 | use std::io::Read; 8 | 9 | #[derive(Debug)] 10 | /// Raw decoder for LZMA2. 11 | pub struct Lzma2Decoder { 12 | lzma_state: DecoderState, 13 | } 14 | 15 | impl Default for Lzma2Decoder { 16 | fn default() -> Self { 17 | Self::new() 18 | } 19 | } 20 | 21 | impl Lzma2Decoder { 22 | /// Creates a new object ready for decompressing data that it's given. 23 | pub fn new() -> Lzma2Decoder { 24 | Lzma2Decoder { 25 | lzma_state: DecoderState::new( 26 | LzmaProperties { 27 | lc: 0, 28 | lp: 0, 29 | pb: 0, 30 | }, 31 | None, 32 | ), 33 | } 34 | } 35 | 36 | /// Performs the equivalent of replacing this decompression state with a 37 | /// freshly allocated copy. 38 | /// 39 | /// This function may not allocate memory and will attempt to reuse any 40 | /// previously allocated resources. 41 | #[cfg(feature = "raw_decoder")] 42 | pub fn reset(&mut self) { 43 | self.lzma_state.reset_state(LzmaProperties { 44 | lc: 0, 45 | lp: 0, 46 | pb: 0, 47 | }); 48 | } 49 | 50 | /// Decompresses the input data into the output, consuming only as much 51 | /// input as needed and writing as much output as possible. 52 | pub fn decompress( 53 | &mut self, 54 | input: &mut R, 55 | output: &mut W, 56 | ) -> error::Result<()> { 57 | let mut accum = lzbuffer::LzAccumBuffer::from_stream(output, usize::MAX); 58 | 59 | loop { 60 | let status = input.read_u8().map_err(|e| { 61 | error::Error::LzmaError(format!("LZMA2 expected new status: {}", e)) 62 | })?; 63 | 64 | lzma_info!("LZMA2 status: {}", status); 65 | 66 | if status == 0 { 67 | lzma_info!("LZMA2 end of input"); 68 | break; 69 | } else if status == 1 { 70 | // uncompressed reset dict 71 | Self::parse_uncompressed(&mut accum, input, true)?; 72 | } else if status == 2 { 73 | // uncompressed no reset 74 | Self::parse_uncompressed(&mut accum, input, false)?; 75 | } else { 76 | self.parse_lzma(&mut accum, input, status)?; 77 | } 78 | } 79 | 80 | accum.finish()?; 81 | Ok(()) 82 | } 83 | 84 | fn parse_lzma( 85 | &mut self, 86 | accum: &mut lzbuffer::LzAccumBuffer, 87 | input: &mut R, 88 | status: u8, 89 | ) -> error::Result<()> 90 | where 91 | R: io::BufRead, 92 | W: io::Write, 93 | { 94 | if status & 0x80 == 0 { 95 | return Err(error::Error::LzmaError(format!( 96 | "LZMA2 invalid status {}, must be 0, 1, 2 or >= 128", 97 | status 98 | ))); 99 | } 100 | 101 | let reset_dict: bool; 102 | let reset_state: bool; 103 | let reset_props: bool; 104 | match (status >> 5) & 0x3 { 105 | 0 => { 106 | reset_dict = false; 107 | reset_state = false; 108 | reset_props = false; 109 | } 110 | 1 => { 111 | reset_dict = false; 112 | reset_state = true; 113 | reset_props = false; 114 | } 115 | 2 => { 116 | reset_dict = false; 117 | reset_state = true; 118 | reset_props = true; 119 | } 120 | 3 => { 121 | reset_dict = true; 122 | reset_state = true; 123 | reset_props = true; 124 | } 125 | _ => unreachable!(), 126 | } 127 | 128 | let unpacked_size = input 129 | .read_u16::() 130 | .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?; 131 | let unpacked_size = ((((status & 0x1F) as u64) << 16) | (unpacked_size as u64)) + 1; 132 | 133 | let packed_size = input 134 | .read_u16::() 135 | .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected packed size: {}", e)))?; 136 | let packed_size = (packed_size as u64) + 1; 137 | 138 | lzma_info!( 139 | "LZMA2 compressed block {{ unpacked_size: {}, packed_size: {}, reset_dict: {}, reset_state: {}, reset_props: {} }}", 140 | unpacked_size, 141 | packed_size, 142 | reset_dict, 143 | reset_state, 144 | reset_props 145 | ); 146 | 147 | if reset_dict { 148 | accum.reset()?; 149 | } 150 | 151 | if reset_state { 152 | let new_props = if reset_props { 153 | let props = input.read_u8().map_err(|e| { 154 | error::Error::LzmaError(format!("LZMA2 expected new properties: {}", e)) 155 | })?; 156 | 157 | let mut pb = props as u32; 158 | if pb >= 225 { 159 | return Err(error::Error::LzmaError(format!( 160 | "LZMA2 invalid properties: {} must be < 225", 161 | pb 162 | ))); 163 | } 164 | 165 | let lc = pb % 9; 166 | pb /= 9; 167 | let lp = pb % 5; 168 | pb /= 5; 169 | 170 | if lc + lp > 4 { 171 | return Err(error::Error::LzmaError(format!( 172 | "LZMA2 invalid properties: lc + lp ({} + {}) must be <= 4", 173 | lc, lp 174 | ))); 175 | } 176 | 177 | lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb); 178 | LzmaProperties { lc, lp, pb } 179 | } else { 180 | self.lzma_state.lzma_props 181 | }; 182 | 183 | self.lzma_state.reset_state(new_props); 184 | } 185 | 186 | self.lzma_state 187 | .set_unpacked_size(Some(unpacked_size + accum.len() as u64)); 188 | 189 | let mut taken = input.take(packed_size); 190 | let mut rangecoder = rangecoder::RangeDecoder::new(&mut taken) 191 | .map_err(|e| error::Error::LzmaError(format!("LZMA input too short: {}", e)))?; 192 | self.lzma_state.process(accum, &mut rangecoder) 193 | } 194 | 195 | fn parse_uncompressed( 196 | accum: &mut lzbuffer::LzAccumBuffer, 197 | input: &mut R, 198 | reset_dict: bool, 199 | ) -> error::Result<()> 200 | where 201 | R: io::BufRead, 202 | W: io::Write, 203 | { 204 | let unpacked_size = input 205 | .read_u16::() 206 | .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?; 207 | let unpacked_size = (unpacked_size as usize) + 1; 208 | 209 | lzma_info!( 210 | "LZMA2 uncompressed block {{ unpacked_size: {}, reset_dict: {} }}", 211 | unpacked_size, 212 | reset_dict 213 | ); 214 | 215 | if reset_dict { 216 | accum.reset()?; 217 | } 218 | 219 | let mut buf = vec![0; unpacked_size]; 220 | input.read_exact(buf.as_mut_slice()).map_err(|e| { 221 | error::Error::LzmaError(format!( 222 | "LZMA2 expected {} uncompressed bytes: {}", 223 | unpacked_size, e 224 | )) 225 | })?; 226 | accum.append_bytes(buf.as_slice()); 227 | 228 | Ok(()) 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /src/decode/mod.rs: -------------------------------------------------------------------------------- 1 | //! Decoding logic. 2 | 3 | pub mod lzbuffer; 4 | pub mod lzma; 5 | pub mod lzma2; 6 | pub mod options; 7 | pub mod rangecoder; 8 | pub mod util; 9 | pub mod xz; 10 | 11 | #[cfg(feature = "stream")] 12 | pub mod stream; 13 | -------------------------------------------------------------------------------- /src/decode/options.rs: -------------------------------------------------------------------------------- 1 | /// Options to tweak decompression behavior. 2 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] 3 | pub struct Options { 4 | /// Defines whether the unpacked size should be read from the header or 5 | /// provided. 6 | /// 7 | /// The default is [`UnpackedSize::ReadFromHeader`]. 8 | pub unpacked_size: UnpackedSize, 9 | /// Defines whether the dictionary's dynamic size should be limited during 10 | /// decompression. 11 | /// 12 | /// The default is unlimited. 13 | pub memlimit: Option, 14 | /// Determines whether to bypass end of stream validation. 15 | /// 16 | /// This option only applies to the `stream` API. 17 | /// 18 | /// The default is false (always do completion check). 19 | pub allow_incomplete: bool, 20 | } 21 | 22 | /// Alternatives for defining the unpacked size of the decoded data. 23 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] 24 | pub enum UnpackedSize { 25 | /// Assume that the 8 bytes used to specify the unpacked size are present in 26 | /// the header. If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that 27 | /// there is an end-of-payload marker in the file. 28 | /// If not, read the 8 bytes as a little-endian encoded u64. 29 | #[default] 30 | ReadFromHeader, 31 | /// Assume that there are 8 bytes representing the unpacked size present in 32 | /// the header. Read it, but ignore it and use the provided value 33 | /// instead. If the provided value is [`None`], assume that there is an 34 | /// end-of-payload marker in the file. Note that this is a non-standard 35 | /// way of reading LZMA data, but is used by certain libraries such as 36 | /// [OpenCTM](http://openctm.sourceforge.net/). 37 | ReadHeaderButUseProvided(Option), 38 | /// Assume that the 8 bytes typically used to represent the unpacked size 39 | /// are *not* present in the header. Use the provided value. 40 | /// If the provided value is [`None`], assume that there is an 41 | /// end-of-payload marker in the file. 42 | UseProvided(Option), 43 | } 44 | 45 | #[cfg(test)] 46 | mod test { 47 | use super::*; 48 | 49 | #[test] 50 | fn test_options() { 51 | assert_eq!( 52 | Options { 53 | unpacked_size: UnpackedSize::ReadFromHeader, 54 | memlimit: None, 55 | allow_incomplete: false, 56 | }, 57 | Options::default() 58 | ); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/decode/rangecoder.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::util; 2 | use crate::error; 3 | use crate::util::const_assert; 4 | use byteorder::{BigEndian, ReadBytesExt}; 5 | use std::io; 6 | 7 | pub struct RangeDecoder<'a, R> 8 | where 9 | R: 'a + io::BufRead, 10 | { 11 | pub stream: &'a mut R, 12 | pub range: u32, 13 | pub code: u32, 14 | } 15 | 16 | impl<'a, R> RangeDecoder<'a, R> 17 | where 18 | R: io::BufRead, 19 | { 20 | pub fn new(stream: &'a mut R) -> io::Result { 21 | let mut dec = Self { 22 | stream, 23 | range: 0xFFFF_FFFF, 24 | code: 0, 25 | }; 26 | let _ = dec.stream.read_u8()?; 27 | dec.code = dec.stream.read_u32::()?; 28 | lzma_debug!("0 {{ range: {:08x}, code: {:08x} }}", dec.range, dec.code); 29 | Ok(dec) 30 | } 31 | 32 | pub fn from_parts(stream: &'a mut R, range: u32, code: u32) -> Self { 33 | Self { 34 | stream, 35 | range, 36 | code, 37 | } 38 | } 39 | 40 | pub fn set(&mut self, range: u32, code: u32) { 41 | self.range = range; 42 | self.code = code; 43 | } 44 | 45 | pub fn read_into(&mut self, dst: &mut [u8]) -> io::Result { 46 | self.stream.read(dst) 47 | } 48 | 49 | #[inline] 50 | pub fn is_finished_ok(&mut self) -> io::Result { 51 | Ok(self.code == 0 && self.is_eof()?) 52 | } 53 | 54 | #[inline] 55 | pub fn is_eof(&mut self) -> io::Result { 56 | util::is_eof(self.stream) 57 | } 58 | 59 | #[inline] 60 | fn normalize(&mut self) -> io::Result<()> { 61 | lzma_trace!(" {{ range: {:08x}, code: {:08x} }}", self.range, self.code); 62 | if self.range < 0x0100_0000 { 63 | self.range <<= 8; 64 | self.code = (self.code << 8) ^ (self.stream.read_u8()? as u32); 65 | 66 | lzma_debug!("+ {{ range: {:08x}, code: {:08x} }}", self.range, self.code); 67 | } 68 | Ok(()) 69 | } 70 | 71 | #[inline] 72 | fn get_bit(&mut self) -> error::Result { 73 | self.range >>= 1; 74 | 75 | let bit = self.code >= self.range; 76 | if bit { 77 | self.code -= self.range 78 | } 79 | 80 | self.normalize()?; 81 | Ok(bit) 82 | } 83 | 84 | pub fn get(&mut self, count: usize) -> error::Result { 85 | let mut result = 0u32; 86 | for _ in 0..count { 87 | result = (result << 1) ^ (self.get_bit()? as u32) 88 | } 89 | Ok(result) 90 | } 91 | 92 | #[inline] 93 | pub fn decode_bit(&mut self, prob: &mut u16, update: bool) -> io::Result { 94 | let bound: u32 = (self.range >> 11) * (*prob as u32); 95 | 96 | lzma_trace!( 97 | " bound: {:08x}, prob: {:04x}, bit: {}", 98 | bound, 99 | prob, 100 | (self.code > bound) as u8 101 | ); 102 | if self.code < bound { 103 | if update { 104 | *prob += (0x800_u16 - *prob) >> 5; 105 | } 106 | self.range = bound; 107 | 108 | self.normalize()?; 109 | Ok(false) 110 | } else { 111 | if update { 112 | *prob -= *prob >> 5; 113 | } 114 | self.code -= bound; 115 | self.range -= bound; 116 | 117 | self.normalize()?; 118 | Ok(true) 119 | } 120 | } 121 | 122 | fn parse_bit_tree( 123 | &mut self, 124 | num_bits: usize, 125 | probs: &mut [u16], 126 | update: bool, 127 | ) -> io::Result { 128 | let mut tmp: u32 = 1; 129 | for _ in 0..num_bits { 130 | let bit = self.decode_bit(&mut probs[tmp as usize], update)?; 131 | tmp = (tmp << 1) ^ (bit as u32); 132 | } 133 | Ok(tmp - (1 << num_bits)) 134 | } 135 | 136 | pub fn parse_reverse_bit_tree( 137 | &mut self, 138 | num_bits: usize, 139 | probs: &mut [u16], 140 | offset: usize, 141 | update: bool, 142 | ) -> io::Result { 143 | let mut result = 0u32; 144 | let mut tmp: usize = 1; 145 | for i in 0..num_bits { 146 | let bit = self.decode_bit(&mut probs[offset + tmp], update)?; 147 | tmp = (tmp << 1) ^ (bit as usize); 148 | result ^= (bit as u32) << i; 149 | } 150 | Ok(result) 151 | } 152 | } 153 | 154 | #[derive(Debug, Clone)] 155 | pub struct BitTree { 156 | probs: [u16; PROBS_ARRAY_LEN], 157 | } 158 | 159 | impl BitTree { 160 | pub fn new() -> Self { 161 | // The validity of PROBS_ARRAY_LEN is checked at compile-time with a macro 162 | // that confirms that the argument P passed is indeed 1 << N for 163 | // some N using usize::trailing_zeros to calculate floor(log_2(P)). 164 | // 165 | // Thus, BitTree is only valid for any P such that 166 | // P = 2 ** floor(log_2(P)), where P is the length of the probability array 167 | // of the BitTree. This maintains the invariant that P = 1 << N. 168 | // 169 | // This precondition must be checked for any way to construct a new, valid 170 | // instance of BitTree. Here it is checked for BitTree::new(), but if 171 | // another function is added that returns a new instance of BitTree, 172 | // this assertion must be checked there as well. 173 | const_assert!("BitTree's PROBS_ARRAY_LEN parameter must be a power of 2", 174 | PROBS_ARRAY_LEN: usize => (1 << (PROBS_ARRAY_LEN.trailing_zeros() as usize)) == PROBS_ARRAY_LEN); 175 | BitTree { 176 | probs: [0x400; PROBS_ARRAY_LEN], 177 | } 178 | } 179 | 180 | // NUM_BITS is derived from PROBS_ARRAY_LEN because of the lack of 181 | // generic const expressions. Where PROBS_ARRAY_LEN is a power of 2, 182 | // NUM_BITS can be derived by the number of trailing zeroes. 183 | const NUM_BITS: usize = PROBS_ARRAY_LEN.trailing_zeros() as usize; 184 | 185 | pub fn parse( 186 | &mut self, 187 | rangecoder: &mut RangeDecoder, 188 | update: bool, 189 | ) -> io::Result { 190 | rangecoder.parse_bit_tree(Self::NUM_BITS, &mut self.probs, update) 191 | } 192 | 193 | pub fn parse_reverse( 194 | &mut self, 195 | rangecoder: &mut RangeDecoder, 196 | update: bool, 197 | ) -> io::Result { 198 | rangecoder.parse_reverse_bit_tree(Self::NUM_BITS, &mut self.probs, 0, update) 199 | } 200 | } 201 | 202 | #[derive(Debug)] 203 | pub struct LenDecoder { 204 | choice: u16, 205 | choice2: u16, 206 | low_coder: [BitTree<{ 1 << 3 }>; 16], 207 | mid_coder: [BitTree<{ 1 << 3 }>; 16], 208 | high_coder: BitTree<{ 1 << 8 }>, 209 | } 210 | 211 | impl LenDecoder { 212 | pub fn new() -> Self { 213 | LenDecoder { 214 | choice: 0x400, 215 | choice2: 0x400, 216 | low_coder: [ 217 | BitTree::new(), 218 | BitTree::new(), 219 | BitTree::new(), 220 | BitTree::new(), 221 | BitTree::new(), 222 | BitTree::new(), 223 | BitTree::new(), 224 | BitTree::new(), 225 | BitTree::new(), 226 | BitTree::new(), 227 | BitTree::new(), 228 | BitTree::new(), 229 | BitTree::new(), 230 | BitTree::new(), 231 | BitTree::new(), 232 | BitTree::new(), 233 | ], 234 | mid_coder: [ 235 | BitTree::new(), 236 | BitTree::new(), 237 | BitTree::new(), 238 | BitTree::new(), 239 | BitTree::new(), 240 | BitTree::new(), 241 | BitTree::new(), 242 | BitTree::new(), 243 | BitTree::new(), 244 | BitTree::new(), 245 | BitTree::new(), 246 | BitTree::new(), 247 | BitTree::new(), 248 | BitTree::new(), 249 | BitTree::new(), 250 | BitTree::new(), 251 | ], 252 | high_coder: BitTree::new(), 253 | } 254 | } 255 | 256 | pub fn decode( 257 | &mut self, 258 | rangecoder: &mut RangeDecoder, 259 | pos_state: usize, 260 | update: bool, 261 | ) -> io::Result { 262 | if !rangecoder.decode_bit(&mut self.choice, update)? { 263 | Ok(self.low_coder[pos_state].parse(rangecoder, update)? as usize) 264 | } else if !rangecoder.decode_bit(&mut self.choice2, update)? { 265 | Ok(self.mid_coder[pos_state].parse(rangecoder, update)? as usize + 8) 266 | } else { 267 | Ok(self.high_coder.parse(rangecoder, update)? as usize + 16) 268 | } 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/decode/stream.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer}; 2 | use crate::decode::lzma::{DecoderState, LzmaParams}; 3 | use crate::decode::rangecoder::RangeDecoder; 4 | use crate::decompress::Options; 5 | use crate::error::Error; 6 | use std::fmt::Debug; 7 | use std::io::{self, BufRead, Cursor, Read, Write}; 8 | 9 | /// Minimum header length to be read. 10 | /// - props: u8 (1 byte) 11 | /// - dict_size: u32 (4 bytes) 12 | const MIN_HEADER_LEN: usize = 5; 13 | 14 | /// Max header length to be read. 15 | /// - unpacked_size: u64 (8 bytes) 16 | const MAX_HEADER_LEN: usize = MIN_HEADER_LEN + 8; 17 | 18 | /// Required bytes after the header. 19 | /// - ignore: u8 (1 byte) 20 | /// - code: u32 (4 bytes) 21 | const START_BYTES: usize = 5; 22 | 23 | /// Maximum number of bytes to buffer while reading the header. 24 | const MAX_TMP_LEN: usize = MAX_HEADER_LEN + START_BYTES; 25 | 26 | /// Internal state of this streaming decoder. This is needed because we have to 27 | /// initialize the stream before processing any data. 28 | #[derive(Debug)] 29 | enum State 30 | where 31 | W: Write, 32 | { 33 | /// Stream is initialized but header values have not yet been read. 34 | Header(W), 35 | /// Header values have been read and the stream is ready to process more 36 | /// data. 37 | Data(Box>), 38 | } 39 | 40 | /// Structures needed while decoding data. 41 | struct RunState 42 | where 43 | W: Write, 44 | { 45 | decoder: DecoderState, 46 | range: u32, 47 | code: u32, 48 | output: LzCircularBuffer, 49 | } 50 | 51 | impl Debug for RunState 52 | where 53 | W: Write, 54 | { 55 | fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { 56 | fmt.debug_struct("RunState") 57 | .field("range", &self.range) 58 | .field("code", &self.code) 59 | .finish() 60 | } 61 | } 62 | 63 | /// Lzma decompressor that can process multiple chunks of data using the 64 | /// [`io::Write`] interface. 65 | #[cfg_attr(docsrs, doc(cfg(stream)))] 66 | pub struct Stream 67 | where 68 | W: Write, 69 | { 70 | /// Temporary buffer to hold data while the header is being read. 71 | tmp: Cursor<[u8; MAX_TMP_LEN]>, 72 | /// Whether the stream is initialized and ready to process data. 73 | /// An [`Option`] is used to avoid interior mutability when updating the 74 | /// state. 75 | state: Option>, 76 | /// Options given when a stream is created. 77 | options: Options, 78 | } 79 | 80 | impl Stream 81 | where 82 | W: Write, 83 | { 84 | /// Initialize the stream. This will consume the `output` which is the sink 85 | /// implementing [`io::Write`] that will receive decompressed bytes. 86 | pub fn new(output: W) -> Self { 87 | Self::new_with_options(&Options::default(), output) 88 | } 89 | 90 | /// Initialize the stream with the given `options`. This will consume the 91 | /// `output` which is the sink implementing [`io::Write`] that will 92 | /// receive decompressed bytes. 93 | pub fn new_with_options(options: &Options, output: W) -> Self { 94 | Self { 95 | tmp: Cursor::new([0; MAX_TMP_LEN]), 96 | state: Some(State::Header(output)), 97 | options: *options, 98 | } 99 | } 100 | 101 | /// Get a reference to the output sink. 102 | pub fn get_output(&self) -> Option<&W> { 103 | self.state.as_ref().map(|state| match state { 104 | State::Header(output) => output, 105 | State::Data(state) => state.output.get_output(), 106 | }) 107 | } 108 | 109 | /// Get a mutable reference to the output sink; 110 | pub fn get_output_mut(&mut self) -> Option<&mut W> { 111 | self.state.as_mut().map(|state| match state { 112 | State::Header(output) => output, 113 | State::Data(state) => state.output.get_output_mut(), 114 | }) 115 | } 116 | 117 | /// Consumes the stream and returns the output sink. This also makes sure 118 | /// we have properly reached the end of the stream. 119 | pub fn finish(mut self) -> crate::error::Result { 120 | if let Some(state) = self.state.take() { 121 | match state { 122 | State::Header(output) => { 123 | if self.tmp.position() > 0 { 124 | Err(Error::LzmaError("failed to read header".to_string())) 125 | } else { 126 | Ok(output) 127 | } 128 | } 129 | State::Data(mut state) => { 130 | if !self.options.allow_incomplete { 131 | // Process one last time with empty input to force end of 132 | // stream checks 133 | let mut stream = 134 | Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); 135 | let mut range_decoder = 136 | RangeDecoder::from_parts(&mut stream, state.range, state.code); 137 | state 138 | .decoder 139 | .process(&mut state.output, &mut range_decoder)?; 140 | } 141 | let output = state.output.finish()?; 142 | Ok(output) 143 | } 144 | } 145 | } else { 146 | // this will occur if a call to `write()` fails 147 | Err(Error::LzmaError( 148 | "can't finish stream because of previous write error".to_string(), 149 | )) 150 | } 151 | } 152 | 153 | /// Attempts to read the header and transition into a running state. 154 | /// 155 | /// This function will consume the state, returning the next state on both 156 | /// error and success. 157 | fn read_header( 158 | output: W, 159 | mut input: &mut R, 160 | options: &Options, 161 | ) -> crate::error::Result> { 162 | match LzmaParams::read_header(&mut input, options) { 163 | Ok(params) => { 164 | let decoder = DecoderState::new(params.properties, params.unpacked_size); 165 | let output = LzCircularBuffer::from_stream( 166 | output, 167 | params.dict_size as usize, 168 | options.memlimit.unwrap_or(usize::MAX), 169 | ); 170 | // The RangeDecoder is only kept temporarily as we are processing 171 | // chunks of data. 172 | if let Ok(rangecoder) = RangeDecoder::new(&mut input) { 173 | Ok(State::Data(Box::new(RunState { 174 | decoder, 175 | output, 176 | range: rangecoder.range, 177 | code: rangecoder.code, 178 | }))) 179 | } else { 180 | // Failed to create a RangeDecoder because we need more data, 181 | // try again later. 182 | Ok(State::Header(output.into_output())) 183 | } 184 | } 185 | // Failed to read_header() because we need more data, try again later. 186 | Err(Error::HeaderTooShort(_)) => Ok(State::Header(output)), 187 | // Fatal error. Don't retry. 188 | Err(e) => Err(e), 189 | } 190 | } 191 | 192 | /// Process compressed data. 193 | fn read_data(state: &mut RunState, mut input: &mut R) -> io::Result<()> { 194 | // Construct our RangeDecoder from the previous range and code 195 | // values. 196 | let mut rangecoder = RangeDecoder::from_parts(&mut input, state.range, state.code); 197 | 198 | // Try to process all bytes of data. 199 | state 200 | .decoder 201 | .process_stream(&mut state.output, &mut rangecoder) 202 | .map_err(|e| -> io::Error { e.into() })?; 203 | 204 | state.range = rangecoder.range; 205 | state.code = rangecoder.code; 206 | Ok(()) 207 | } 208 | } 209 | 210 | impl Debug for Stream 211 | where 212 | W: Write + Debug, 213 | { 214 | fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { 215 | fmt.debug_struct("Stream") 216 | .field("tmp", &self.tmp.position()) 217 | .field("state", &self.state) 218 | .field("options", &self.options) 219 | .finish() 220 | } 221 | } 222 | 223 | impl Write for Stream 224 | where 225 | W: Write, 226 | { 227 | fn write(&mut self, data: &[u8]) -> io::Result { 228 | let mut input = Cursor::new(data); 229 | 230 | if let Some(state) = self.state.take() { 231 | let state = match state { 232 | // Read the header values and transition into a running state. 233 | State::Header(state) => { 234 | let res = if self.tmp.position() > 0 { 235 | // attempt to fill the tmp buffer 236 | let position = self.tmp.position(); 237 | let bytes_read = 238 | input.read(&mut self.tmp.get_mut()[position as usize..])?; 239 | let bytes_read = if bytes_read < std::u64::MAX as usize { 240 | bytes_read as u64 241 | } else { 242 | return Err(io::Error::new( 243 | io::ErrorKind::Other, 244 | "Failed to convert integer to u64.", 245 | )); 246 | }; 247 | self.tmp.set_position(position + bytes_read); 248 | 249 | // attempt to read the header from our tmp buffer 250 | let (position, res) = { 251 | let mut tmp_input = 252 | Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); 253 | let res = Stream::read_header(state, &mut tmp_input, &self.options); 254 | (tmp_input.position(), res) 255 | }; 256 | 257 | // discard all bytes up to position if reading the header 258 | // was successful 259 | if let Ok(State::Data(_)) = &res { 260 | let tmp = *self.tmp.get_ref(); 261 | let end = self.tmp.position(); 262 | let new_len = end - position; 263 | self.tmp.get_mut()[0..new_len as usize] 264 | .copy_from_slice(&tmp[position as usize..end as usize]); 265 | self.tmp.set_position(new_len); 266 | } 267 | res 268 | } else { 269 | Stream::read_header(state, &mut input, &self.options) 270 | }; 271 | 272 | match res { 273 | // occurs when not enough input bytes were provided to 274 | // read the entire header 275 | Ok(State::Header(val)) => { 276 | if self.tmp.position() == 0 { 277 | // reset the cursor because we may have partial reads 278 | input.set_position(0); 279 | let bytes_read = input.read(&mut self.tmp.get_mut()[..])?; 280 | let bytes_read = if bytes_read < std::u64::MAX as usize { 281 | bytes_read as u64 282 | } else { 283 | return Err(io::Error::new( 284 | io::ErrorKind::Other, 285 | "Failed to convert integer to u64.", 286 | )); 287 | }; 288 | self.tmp.set_position(bytes_read); 289 | } 290 | State::Header(val) 291 | } 292 | 293 | // occurs when the header was successfully read and we 294 | // move on to the next state 295 | Ok(State::Data(val)) => State::Data(val), 296 | 297 | // occurs when the output was consumed due to a 298 | // non-recoverable error 299 | Err(e) => { 300 | return Err(match e { 301 | Error::IoError(e) | Error::HeaderTooShort(e) => e, 302 | Error::LzmaError(e) | Error::XzError(e) => { 303 | io::Error::new(io::ErrorKind::Other, e) 304 | } 305 | }); 306 | } 307 | } 308 | } 309 | 310 | // Process another chunk of data. 311 | State::Data(mut state) => { 312 | if self.tmp.position() > 0 { 313 | let mut tmp_input = 314 | Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); 315 | Stream::read_data(&mut state, &mut tmp_input)?; 316 | self.tmp.set_position(0); 317 | }; 318 | Stream::read_data(&mut state, &mut input)?; 319 | State::Data(state) 320 | } 321 | }; 322 | self.state.replace(state); 323 | } 324 | Ok(input.position() as usize) 325 | } 326 | 327 | /// Flushes the output sink. The internal buffer isn't flushed to avoid 328 | /// corrupting the internal state. Instead, call [`Self::finish()`] to 329 | /// finalize the stream and flush all remaining internal data. 330 | fn flush(&mut self) -> io::Result<()> { 331 | if let Some(ref mut state) = self.state { 332 | match state { 333 | State::Header(_) => Ok(()), 334 | State::Data(state) => state.output.get_output_mut().flush(), 335 | } 336 | } else { 337 | Ok(()) 338 | } 339 | } 340 | } 341 | 342 | impl From for io::Error { 343 | fn from(error: Error) -> io::Error { 344 | io::Error::new(io::ErrorKind::Other, format!("{:?}", error)) 345 | } 346 | } 347 | 348 | #[cfg(test)] 349 | mod test { 350 | use super::*; 351 | 352 | /// Test an empty stream 353 | #[test] 354 | fn test_stream_noop() { 355 | let stream = Stream::new(Vec::new()); 356 | assert!(stream.get_output().unwrap().is_empty()); 357 | 358 | let output = stream.finish().unwrap(); 359 | assert!(output.is_empty()); 360 | } 361 | 362 | /// Test writing an empty slice 363 | #[test] 364 | fn test_stream_zero() { 365 | let mut stream = Stream::new(Vec::new()); 366 | 367 | stream.write_all(&[]).unwrap(); 368 | stream.write_all(&[]).unwrap(); 369 | 370 | let output = stream.finish().unwrap(); 371 | 372 | assert!(output.is_empty()); 373 | } 374 | 375 | /// Test a bad header value 376 | #[test] 377 | #[should_panic(expected = "LZMA header invalid properties: 255 must be < 225")] 378 | fn test_bad_header() { 379 | let input = [255u8; 32]; 380 | 381 | let mut stream = Stream::new(Vec::new()); 382 | 383 | stream.write_all(&input[..]).unwrap(); 384 | 385 | let output = stream.finish().unwrap(); 386 | 387 | assert!(output.is_empty()); 388 | } 389 | 390 | /// Test processing only partial data 391 | #[test] 392 | fn test_stream_incomplete() { 393 | let input = b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\ 394 | \xfb\xff\xff\xc0\x00\x00\x00"; 395 | // Process until this index is reached. 396 | let mut end = 1u64; 397 | 398 | // Test when we fail to provide the minimum number of bytes required to 399 | // read the header. Header size is 13 bytes but we also read the first 5 400 | // bytes of data. 401 | while end < (MAX_HEADER_LEN + START_BYTES) as u64 { 402 | let mut stream = Stream::new(Vec::new()); 403 | stream.write_all(&input[..end as usize]).unwrap(); 404 | assert_eq!(stream.tmp.position(), end); 405 | 406 | let err = stream.finish().unwrap_err(); 407 | assert!( 408 | err.to_string().contains("failed to read header"), 409 | "error was: {}", 410 | err 411 | ); 412 | 413 | end += 1; 414 | } 415 | 416 | // Test when we fail to provide enough bytes to terminate the stream. A 417 | // properly terminated stream will have a code value of 0. 418 | while end < input.len() as u64 { 419 | let mut stream = Stream::new(Vec::new()); 420 | stream.write_all(&input[..end as usize]).unwrap(); 421 | 422 | // Header bytes will be buffered until there are enough to read 423 | if end < (MAX_HEADER_LEN + START_BYTES) as u64 { 424 | assert_eq!(stream.tmp.position(), end); 425 | } 426 | 427 | let err = stream.finish().unwrap_err(); 428 | assert!(err.to_string().contains("failed to fill whole buffer")); 429 | 430 | end += 1; 431 | } 432 | } 433 | 434 | /// Test processing all chunk sizes 435 | #[test] 436 | fn test_stream_chunked() { 437 | let small_input = include_bytes!("../../tests/files/small.txt"); 438 | 439 | let mut reader = io::Cursor::new(&small_input[..]); 440 | let mut small_input_compressed = Vec::new(); 441 | crate::lzma_compress(&mut reader, &mut small_input_compressed).unwrap(); 442 | 443 | let input : Vec<(&[u8], &[u8])> = vec![ 444 | (b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\xfb\xff\xff\xc0\x00\x00\x00", b""), 445 | (&small_input_compressed[..], small_input)]; 446 | for (input, expected) in input { 447 | for chunk in 1..input.len() { 448 | let mut consumed = 0; 449 | let mut stream = Stream::new(Vec::new()); 450 | while consumed < input.len() { 451 | let end = std::cmp::min(consumed + chunk, input.len()); 452 | stream.write_all(&input[consumed..end]).unwrap(); 453 | consumed = end; 454 | } 455 | let output = stream.finish().unwrap(); 456 | assert_eq!(expected, &output[..]); 457 | } 458 | } 459 | } 460 | 461 | #[test] 462 | fn test_stream_corrupted() { 463 | let mut stream = Stream::new(Vec::new()); 464 | let err = stream 465 | .write_all(b"corrupted bytes here corrupted bytes here") 466 | .unwrap_err(); 467 | assert!(err.to_string().contains("beyond output size")); 468 | let err = stream.finish().unwrap_err(); 469 | assert!(err 470 | .to_string() 471 | .contains("can\'t finish stream because of previous write error")); 472 | } 473 | 474 | #[test] 475 | fn test_allow_incomplete() { 476 | let input = include_bytes!("../../tests/files/small.txt"); 477 | 478 | let mut reader = io::Cursor::new(&input[..]); 479 | let mut compressed = Vec::new(); 480 | crate::lzma_compress(&mut reader, &mut compressed).unwrap(); 481 | let compressed = &compressed[..compressed.len() / 2]; 482 | 483 | // Should fail to finish() without the allow_incomplete option. 484 | let mut stream = Stream::new(Vec::new()); 485 | stream.write_all(compressed).unwrap(); 486 | stream.finish().unwrap_err(); 487 | 488 | // Should succeed with the allow_incomplete option. 489 | let mut stream = Stream::new_with_options( 490 | &Options { 491 | allow_incomplete: true, 492 | ..Default::default() 493 | }, 494 | Vec::new(), 495 | ); 496 | stream.write_all(compressed).unwrap(); 497 | let output = stream.finish().unwrap(); 498 | assert_eq!(output, &input[..26]); 499 | } 500 | } 501 | -------------------------------------------------------------------------------- /src/decode/util.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | pub fn read_tag(input: &mut R, tag: &[u8]) -> io::Result { 4 | let mut buf = vec![0; tag.len()]; 5 | input.read_exact(buf.as_mut_slice())?; 6 | Ok(buf.as_slice() == tag) 7 | } 8 | 9 | pub fn is_eof(input: &mut R) -> io::Result { 10 | let buf = input.fill_buf()?; 11 | Ok(buf.is_empty()) 12 | } 13 | 14 | pub fn flush_zero_padding(input: &mut R) -> io::Result { 15 | loop { 16 | let len = { 17 | let buf = input.fill_buf()?; 18 | let len = buf.len(); 19 | 20 | if len == 0 { 21 | return Ok(true); 22 | } 23 | 24 | for x in buf { 25 | if *x != 0u8 { 26 | return Ok(false); 27 | } 28 | } 29 | len 30 | }; 31 | 32 | input.consume(len); 33 | } 34 | } 35 | 36 | /// An [`io::Read`] computing a digest on the bytes read. 37 | pub struct CrcDigestRead<'a, 'b, R, S> 38 | where 39 | R: 'a + io::Read, 40 | S: crc::Width, 41 | { 42 | /// Underlying reader 43 | read: &'a mut R, 44 | /// Hasher 45 | digest: &'a mut crc::Digest<'b, S>, 46 | } 47 | 48 | impl<'a, 'b, R, S> CrcDigestRead<'a, 'b, R, S> 49 | where 50 | R: io::Read, 51 | S: crc::Width, 52 | { 53 | pub fn new(read: &'a mut R, digest: &'a mut crc::Digest<'b, S>) -> Self { 54 | Self { read, digest } 55 | } 56 | } 57 | 58 | impl<'a, 'b, R> io::Read for CrcDigestRead<'a, 'b, R, u32> 59 | where 60 | R: io::Read, 61 | { 62 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 63 | let result = self.read.read(buf)?; 64 | self.digest.update(&buf[..result]); 65 | Ok(result) 66 | } 67 | } 68 | 69 | /// An [`io::BufRead`] counting the bytes read. 70 | pub struct CountBufRead<'a, R> 71 | where 72 | R: 'a + io::BufRead, 73 | { 74 | /// Underlying reader 75 | read: &'a mut R, 76 | /// Number of bytes read 77 | count: usize, 78 | } 79 | 80 | impl<'a, R> CountBufRead<'a, R> 81 | where 82 | R: io::BufRead, 83 | { 84 | pub fn new(read: &'a mut R) -> Self { 85 | Self { read, count: 0 } 86 | } 87 | 88 | pub fn count(&self) -> usize { 89 | self.count 90 | } 91 | } 92 | 93 | impl<'a, R> io::Read for CountBufRead<'a, R> 94 | where 95 | R: io::BufRead, 96 | { 97 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 98 | let result = self.read.read(buf)?; 99 | self.count += result; 100 | Ok(result) 101 | } 102 | } 103 | 104 | impl<'a, R> io::BufRead for CountBufRead<'a, R> 105 | where 106 | R: io::BufRead, 107 | { 108 | fn fill_buf(&mut self) -> io::Result<&[u8]> { 109 | self.read.fill_buf() 110 | } 111 | 112 | fn consume(&mut self, amt: usize) { 113 | self.read.consume(amt); 114 | self.count += amt; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/decode/xz.rs: -------------------------------------------------------------------------------- 1 | //! Decoder for the `.xz` file format. 2 | 3 | use crate::decode::lzma2::Lzma2Decoder; 4 | use crate::decode::util; 5 | use crate::error; 6 | use crate::xz::crc::{CRC32, CRC64}; 7 | use crate::xz::{footer, header, CheckMethod, StreamFlags}; 8 | use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; 9 | use std::io; 10 | use std::io::Read; 11 | 12 | #[derive(Debug)] 13 | struct Record { 14 | unpadded_size: u64, 15 | unpacked_size: u64, 16 | } 17 | 18 | pub fn decode_stream(input: &mut R, output: &mut W) -> error::Result<()> 19 | where 20 | R: io::BufRead, 21 | W: io::Write, 22 | { 23 | let header = header::StreamHeader::parse(input)?; 24 | 25 | let mut records: Vec = vec![]; 26 | let index_size = loop { 27 | let mut count_input = util::CountBufRead::new(input); 28 | let header_size = count_input.read_u8()?; 29 | lzma_info!("XZ block header_size byte: 0x{:02x}", header_size); 30 | 31 | if header_size == 0 { 32 | lzma_info!("XZ records: {:?}", records); 33 | check_index(&mut count_input, &records)?; 34 | let index_size = count_input.count(); 35 | break index_size; 36 | } 37 | 38 | read_block( 39 | &mut count_input, 40 | output, 41 | header.stream_flags.check_method, 42 | &mut records, 43 | header_size, 44 | )?; 45 | }; 46 | 47 | let crc32 = input.read_u32::()?; 48 | let mut digest = CRC32.digest(); 49 | { 50 | let mut digested = util::CrcDigestRead::new(input, &mut digest); 51 | let backward_size = digested.read_u32::()?; 52 | if index_size as u32 != (backward_size + 1) << 2 { 53 | return Err(error::Error::XzError(format!( 54 | "Invalid index size: expected {} but got {}", 55 | (backward_size + 1) << 2, 56 | index_size 57 | ))); 58 | } 59 | 60 | let stream_flags = { 61 | let field = digested.read_u16::()?; 62 | StreamFlags::parse(field)? 63 | }; 64 | 65 | if header.stream_flags != stream_flags { 66 | return Err(error::Error::XzError(format!( 67 | "Flags in header ({:?}) does not match footer ({:?})", 68 | header.stream_flags, stream_flags 69 | ))); 70 | } 71 | } 72 | 73 | let digest_crc32 = digest.finalize(); 74 | if crc32 != digest_crc32 { 75 | return Err(error::Error::XzError(format!( 76 | "Invalid footer CRC32: expected 0x{:08x} but got 0x{:08x}", 77 | crc32, digest_crc32 78 | ))); 79 | } 80 | 81 | if !util::read_tag(input, footer::XZ_MAGIC_FOOTER)? { 82 | return Err(error::Error::XzError(format!( 83 | "Invalid footer magic, expected {:?}", 84 | footer::XZ_MAGIC_FOOTER 85 | ))); 86 | } 87 | 88 | if !util::is_eof(input)? { 89 | return Err(error::Error::XzError( 90 | "Unexpected data after last XZ block".to_string(), 91 | )); 92 | } 93 | Ok(()) 94 | } 95 | 96 | fn check_index( 97 | count_input: &mut util::CountBufRead<'_, R>, 98 | records: &[Record], 99 | ) -> error::Result<()> 100 | where 101 | R: io::BufRead, 102 | { 103 | let mut digest = CRC32.digest(); 104 | let index_tag = 0u8; 105 | digest.update(&[index_tag]); 106 | { 107 | let mut digested = util::CrcDigestRead::new(count_input, &mut digest); 108 | 109 | let num_records = get_multibyte(&mut digested)?; 110 | if num_records != records.len() as u64 { 111 | return Err(error::Error::XzError(format!( 112 | "Expected {} records but got {} records", 113 | num_records, 114 | records.len() 115 | ))); 116 | } 117 | 118 | for (i, record) in records.iter().enumerate() { 119 | lzma_info!("XZ index checking record {}: {:?}", i, record); 120 | 121 | let unpadded_size = get_multibyte(&mut digested)?; 122 | if unpadded_size != record.unpadded_size { 123 | return Err(error::Error::XzError(format!( 124 | "Invalid index for record {}: unpadded size ({}) does not match index ({})", 125 | i, record.unpadded_size, unpadded_size 126 | ))); 127 | } 128 | 129 | let unpacked_size = get_multibyte(&mut digested)?; 130 | if unpacked_size != record.unpacked_size { 131 | return Err(error::Error::XzError(format!( 132 | "Invalid index for record {}: unpacked size ({}) does not match index ({})", 133 | i, record.unpacked_size, unpacked_size 134 | ))); 135 | } 136 | } 137 | }; 138 | // TODO: create padding parser function 139 | let count = count_input.count(); 140 | let padding_size = ((count ^ 0x03) + 1) & 0x03; 141 | lzma_info!( 142 | "XZ index: {} byte(s) read, {} byte(s) of padding", 143 | count, 144 | padding_size 145 | ); 146 | 147 | { 148 | let mut digested = util::CrcDigestRead::new(count_input, &mut digest); 149 | for _ in 0..padding_size { 150 | let byte = digested.read_u8()?; 151 | if byte != 0 { 152 | return Err(error::Error::XzError( 153 | "Invalid index padding, must be null bytes".to_string(), 154 | )); 155 | } 156 | } 157 | }; 158 | 159 | let digest_crc32 = digest.finalize(); 160 | lzma_info!("XZ index checking digest 0x{:08x}", digest_crc32); 161 | 162 | let crc32 = count_input.read_u32::()?; 163 | if crc32 != digest_crc32 { 164 | return Err(error::Error::XzError(format!( 165 | "Invalid index CRC32: expected 0x{:08x} but got 0x{:08x}", 166 | crc32, digest_crc32 167 | ))); 168 | } 169 | 170 | Ok(()) 171 | } 172 | 173 | #[derive(Debug)] 174 | enum FilterId { 175 | Lzma2, 176 | } 177 | 178 | fn get_filter_id(id: u64) -> error::Result { 179 | match id { 180 | 0x21 => Ok(FilterId::Lzma2), 181 | _ => Err(error::Error::XzError(format!("Unknown filter id {}", id))), 182 | } 183 | } 184 | 185 | struct Filter { 186 | filter_id: FilterId, 187 | props: Vec, 188 | } 189 | 190 | struct BlockHeader { 191 | filters: Vec, 192 | packed_size: Option, 193 | unpacked_size: Option, 194 | } 195 | 196 | fn read_block( 197 | count_input: &mut util::CountBufRead<'_, R>, 198 | output: &mut W, 199 | check_method: CheckMethod, 200 | records: &mut Vec, 201 | header_size: u8, 202 | ) -> error::Result 203 | where 204 | R: io::BufRead, 205 | W: io::Write, 206 | { 207 | let mut digest = CRC32.digest(); 208 | digest.update(&[header_size]); 209 | let header_size = ((header_size as u64) << 2) - 1; 210 | 211 | let block_header = { 212 | let mut taken = count_input.take(header_size); 213 | let mut digested = io::BufReader::new(util::CrcDigestRead::new(&mut taken, &mut digest)); 214 | read_block_header(&mut digested, header_size)? 215 | }; 216 | 217 | let crc32 = count_input.read_u32::()?; 218 | let digest_crc32 = digest.finalize(); 219 | if crc32 != digest_crc32 { 220 | return Err(error::Error::XzError(format!( 221 | "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}", 222 | crc32, digest_crc32 223 | ))); 224 | } 225 | 226 | let mut tmpbuf: Vec = Vec::new(); 227 | let filters = block_header.filters; 228 | for (i, filter) in filters.iter().enumerate() { 229 | if i == 0 { 230 | // TODO: use SubBufRead on input if packed_size is known? 231 | let packed_size = decode_filter(count_input, &mut tmpbuf, filter)?; 232 | if let Some(expected_packed_size) = block_header.packed_size { 233 | if (packed_size as u64) != expected_packed_size { 234 | return Err(error::Error::XzError(format!( 235 | "Invalid compressed size: expected {} but got {}", 236 | expected_packed_size, packed_size 237 | ))); 238 | } 239 | } 240 | } else { 241 | let mut newbuf: Vec = Vec::new(); 242 | decode_filter( 243 | &mut io::BufReader::new(tmpbuf.as_slice()), 244 | &mut newbuf, 245 | filter, 246 | )?; 247 | // TODO: does this move or copy? 248 | tmpbuf = newbuf; 249 | } 250 | } 251 | 252 | let unpacked_size = tmpbuf.len(); 253 | lzma_info!("XZ block decompressed to {} byte(s)", tmpbuf.len()); 254 | 255 | if let Some(expected_unpacked_size) = block_header.unpacked_size { 256 | if (unpacked_size as u64) != expected_unpacked_size { 257 | return Err(error::Error::XzError(format!( 258 | "Invalid decompressed size: expected {} but got {}", 259 | expected_unpacked_size, unpacked_size 260 | ))); 261 | } 262 | } 263 | 264 | let count = count_input.count(); 265 | let padding_size = ((count ^ 0x03) + 1) & 0x03; 266 | lzma_info!( 267 | "XZ block: {} byte(s) read, {} byte(s) of padding, check method {:?}", 268 | count, 269 | padding_size, 270 | check_method 271 | ); 272 | for _ in 0..padding_size { 273 | let byte = count_input.read_u8()?; 274 | if byte != 0 { 275 | return Err(error::Error::XzError( 276 | "Invalid block padding, must be null bytes".to_string(), 277 | )); 278 | } 279 | } 280 | validate_block_check(count_input, tmpbuf.as_slice(), check_method)?; 281 | 282 | output.write_all(tmpbuf.as_slice())?; 283 | records.push(Record { 284 | unpadded_size: (count_input.count() - padding_size) as u64, 285 | unpacked_size: unpacked_size as u64, 286 | }); 287 | 288 | let finished = false; 289 | Ok(finished) 290 | } 291 | 292 | /// Verify block checksum against the "Block Check" field. 293 | /// 294 | /// See spec section 3.4 for details. 295 | fn validate_block_check( 296 | input: &mut R, 297 | buf: &[u8], 298 | check_method: CheckMethod, 299 | ) -> error::Result<()> 300 | where 301 | R: io::BufRead, 302 | { 303 | match check_method { 304 | CheckMethod::None => (), 305 | CheckMethod::Crc32 => { 306 | let crc32 = input.read_u32::()?; 307 | let digest_crc32 = CRC32.checksum(buf); 308 | if crc32 != digest_crc32 { 309 | return Err(error::Error::XzError(format!( 310 | "Invalid block CRC32, expected 0x{:08x} but got 0x{:08x}", 311 | crc32, digest_crc32 312 | ))); 313 | } 314 | } 315 | CheckMethod::Crc64 => { 316 | let crc64 = input.read_u64::()?; 317 | let digest_crc64 = CRC64.checksum(buf); 318 | if crc64 != digest_crc64 { 319 | return Err(error::Error::XzError(format!( 320 | "Invalid block CRC64, expected 0x{:016x} but got 0x{:016x}", 321 | crc64, digest_crc64 322 | ))); 323 | } 324 | } 325 | // TODO 326 | CheckMethod::Sha256 => { 327 | return Err(error::Error::XzError( 328 | "Unsupported SHA-256 checksum (not yet implemented)".to_string(), 329 | )); 330 | } 331 | } 332 | Ok(()) 333 | } 334 | 335 | fn decode_filter(input: &mut R, output: &mut W, filter: &Filter) -> error::Result 336 | where 337 | R: io::BufRead, 338 | W: io::Write, 339 | { 340 | let mut count_input = util::CountBufRead::new(input); 341 | match filter.filter_id { 342 | FilterId::Lzma2 => { 343 | if filter.props.len() != 1 { 344 | return Err(error::Error::XzError(format!( 345 | "Invalid properties for filter {:?}", 346 | filter.filter_id 347 | ))); 348 | } 349 | // TODO: properties?? 350 | Lzma2Decoder::new().decompress(&mut count_input, output)?; 351 | Ok(count_input.count()) 352 | } 353 | } 354 | } 355 | 356 | fn read_block_header(input: &mut R, header_size: u64) -> error::Result 357 | where 358 | R: io::BufRead, 359 | { 360 | let flags = input.read_u8()?; 361 | let num_filters = (flags & 0x03) + 1; 362 | let reserved = flags & 0x3C; 363 | let has_packed_size = flags & 0x40 != 0; 364 | let has_unpacked_size = flags & 0x80 != 0; 365 | 366 | lzma_info!( 367 | "XZ block header: {{ header_size: {}, flags: {}, num_filters: {}, has_packed_size: {}, has_unpacked_size: {} }}", 368 | header_size, 369 | flags, 370 | num_filters, 371 | has_packed_size, 372 | has_unpacked_size 373 | ); 374 | 375 | if reserved != 0 { 376 | return Err(error::Error::XzError(format!( 377 | "Invalid block flags {}, reserved bits (mask 0x3C) must be zero", 378 | flags 379 | ))); 380 | } 381 | 382 | let packed_size = if has_packed_size { 383 | Some(get_multibyte(input)?) 384 | } else { 385 | None 386 | }; 387 | 388 | let unpacked_size = if has_unpacked_size { 389 | Some(get_multibyte(input)?) 390 | } else { 391 | None 392 | }; 393 | 394 | lzma_info!( 395 | "XZ block header: {{ packed_size: {:?}, unpacked_size: {:?} }}", 396 | packed_size, 397 | unpacked_size 398 | ); 399 | 400 | let mut filters: Vec = vec![]; 401 | for _ in 0..num_filters { 402 | let filter_id = get_filter_id(get_multibyte(input)?)?; 403 | let size_of_properties = get_multibyte(input)?; 404 | 405 | lzma_info!( 406 | "XZ filter: {{ filter_id: {:?}, size_of_properties: {} }}", 407 | filter_id, 408 | size_of_properties 409 | ); 410 | 411 | // Early abort to avoid allocating a large vector 412 | if size_of_properties > header_size { 413 | return Err(error::Error::XzError(format!( 414 | "Size of filter properties exceeds block header size ({} > {})", 415 | size_of_properties, header_size 416 | ))); 417 | } 418 | 419 | let mut buf = vec![0; size_of_properties as usize]; 420 | input.read_exact(buf.as_mut_slice()).map_err(|e| { 421 | error::Error::XzError(format!( 422 | "Could not read filter properties of size {}: {}", 423 | size_of_properties, e 424 | )) 425 | })?; 426 | 427 | lzma_info!("XZ filter properties: {:?}", buf); 428 | 429 | filters.push(Filter { 430 | filter_id, 431 | props: buf, 432 | }) 433 | } 434 | 435 | if !util::flush_zero_padding(input)? { 436 | return Err(error::Error::XzError( 437 | "Invalid block header padding, must be null bytes".to_string(), 438 | )); 439 | } 440 | 441 | Ok(BlockHeader { 442 | filters, 443 | packed_size, 444 | unpacked_size, 445 | }) 446 | } 447 | 448 | pub fn get_multibyte(input: &mut R) -> error::Result 449 | where 450 | R: io::Read, 451 | { 452 | let mut result = 0; 453 | for i in 0..9 { 454 | let byte = input.read_u8()?; 455 | result ^= ((byte & 0x7F) as u64) << (i * 7); 456 | if (byte & 0x80) == 0 { 457 | return Ok(result); 458 | } 459 | } 460 | 461 | Err(error::Error::XzError( 462 | "Invalid multi-byte encoding".to_string(), 463 | )) 464 | } 465 | -------------------------------------------------------------------------------- /src/encode/dumbencoder.rs: -------------------------------------------------------------------------------- 1 | use crate::compress::{Options, UnpackedSize}; 2 | use crate::encode::rangecoder; 3 | use byteorder::{LittleEndian, WriteBytesExt}; 4 | use std::io; 5 | 6 | pub struct Encoder<'a, W> 7 | where 8 | W: 'a + io::Write, 9 | { 10 | rangecoder: rangecoder::RangeEncoder<'a, W>, 11 | literal_probs: [[u16; 0x300]; 8], 12 | is_match: [u16; 4], // true = LZ, false = literal 13 | unpacked_size: UnpackedSize, 14 | } 15 | 16 | const LC: u32 = 3; 17 | const LP: u32 = 0; 18 | const PB: u32 = 2; 19 | 20 | impl<'a, W> Encoder<'a, W> 21 | where 22 | W: io::Write, 23 | { 24 | pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result { 25 | let dict_size = 0x0080_0000; 26 | 27 | // Properties 28 | let props = (LC + 9 * (LP + 5 * PB)) as u8; 29 | lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", LC, LP, PB); 30 | stream.write_u8(props)?; 31 | 32 | // Dictionary 33 | lzma_info!("Dict size: {}", dict_size); 34 | stream.write_u32::(dict_size)?; 35 | 36 | // Unpacked size 37 | match &options.unpacked_size { 38 | UnpackedSize::WriteToHeader(unpacked_size) => { 39 | let value: u64 = match unpacked_size { 40 | None => { 41 | lzma_info!("Unpacked size: unknown"); 42 | 0xFFFF_FFFF_FFFF_FFFF 43 | } 44 | Some(x) => { 45 | lzma_info!("Unpacked size: {}", x); 46 | *x 47 | } 48 | }; 49 | stream.write_u64::(value)?; 50 | } 51 | UnpackedSize::SkipWritingToHeader => {} 52 | }; 53 | 54 | let encoder = Encoder { 55 | rangecoder: rangecoder::RangeEncoder::new(stream), 56 | literal_probs: [[0x400; 0x300]; 8], 57 | is_match: [0x400; 4], 58 | unpacked_size: options.unpacked_size, 59 | }; 60 | 61 | Ok(encoder) 62 | } 63 | 64 | pub fn process(mut self, input: R) -> io::Result<()> 65 | where 66 | R: io::Read, 67 | { 68 | let mut prev_byte = 0u8; 69 | let mut input_len = 0; 70 | 71 | for (out_len, byte_result) in input.bytes().enumerate() { 72 | let byte = byte_result?; 73 | let pos_state = out_len & 3; 74 | input_len = out_len; 75 | 76 | // Literal 77 | self.rangecoder 78 | .encode_bit(&mut self.is_match[pos_state], false)?; 79 | 80 | self.encode_literal(byte, prev_byte)?; 81 | prev_byte = byte; 82 | } 83 | 84 | self.finish(input_len + 1) 85 | } 86 | 87 | fn finish(&mut self, input_len: usize) -> io::Result<()> { 88 | match self.unpacked_size { 89 | UnpackedSize::SkipWritingToHeader | UnpackedSize::WriteToHeader(Some(_)) => {} 90 | UnpackedSize::WriteToHeader(None) => { 91 | // Write end-of-stream marker 92 | let pos_state = input_len & 3; 93 | 94 | // Match 95 | self.rangecoder 96 | .encode_bit(&mut self.is_match[pos_state], true)?; 97 | // New distance 98 | self.rangecoder.encode_bit(&mut 0x400, false)?; 99 | 100 | // Dummy len, as small as possible (len = 0) 101 | for _ in 0..4 { 102 | self.rangecoder.encode_bit(&mut 0x400, false)?; 103 | } 104 | 105 | // Distance marker = 0xFFFFFFFF 106 | // pos_slot = 63 107 | for _ in 0..6 { 108 | self.rangecoder.encode_bit(&mut 0x400, true)?; 109 | } 110 | // num_direct_bits = 30 111 | // result = 3 << 30 = C000_0000 112 | // + 3FFF_FFF0 (26 bits) 113 | // + F ( 4 bits) 114 | for _ in 0..30 { 115 | self.rangecoder.encode_bit(&mut 0x400, true)?; 116 | } 117 | // = FFFF_FFFF 118 | } 119 | } 120 | 121 | // Flush range coder 122 | self.rangecoder.finish() 123 | } 124 | 125 | fn encode_literal(&mut self, byte: u8, prev_byte: u8) -> io::Result<()> { 126 | let prev_byte = prev_byte as usize; 127 | 128 | let mut result: usize = 1; 129 | let lit_state = prev_byte >> 5; 130 | let probs = &mut self.literal_probs[lit_state]; 131 | 132 | for i in 0..8 { 133 | let bit = ((byte >> (7 - i)) & 1) != 0; 134 | self.rangecoder.encode_bit(&mut probs[result], bit)?; 135 | result = (result << 1) ^ (bit as usize); 136 | } 137 | 138 | Ok(()) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/encode/lzma2.rs: -------------------------------------------------------------------------------- 1 | use byteorder::{BigEndian, WriteBytesExt}; 2 | use std::io; 3 | 4 | pub fn encode_stream(input: &mut R, output: &mut W) -> io::Result<()> 5 | where 6 | R: io::BufRead, 7 | W: io::Write, 8 | { 9 | let mut buf = vec![0u8; 0x10000]; 10 | loop { 11 | let n = input.read(&mut buf)?; 12 | if n == 0 { 13 | // status = EOF 14 | output.write_u8(0)?; 15 | break; 16 | } 17 | 18 | // status = uncompressed reset dict 19 | output.write_u8(1)?; 20 | // unpacked size 21 | output.write_u16::((n - 1) as u16)?; 22 | // contents 23 | output.write_all(&buf[..n])?; 24 | } 25 | Ok(()) 26 | } 27 | -------------------------------------------------------------------------------- /src/encode/mod.rs: -------------------------------------------------------------------------------- 1 | //! Encoding logic. 2 | 3 | pub mod dumbencoder; 4 | pub mod lzma2; 5 | pub mod options; 6 | mod rangecoder; 7 | mod util; 8 | pub mod xz; 9 | -------------------------------------------------------------------------------- /src/encode/options.rs: -------------------------------------------------------------------------------- 1 | /// Options for the `lzma_compress` function. 2 | #[derive(Clone, Copy, Debug, Default)] 3 | pub struct Options { 4 | /// Defines whether the unpacked size should be written to the header. 5 | /// The default is [`UnpackedSize::WriteToHeader(None)`]. 6 | pub unpacked_size: UnpackedSize, 7 | } 8 | 9 | /// Alternatives for handling unpacked size. 10 | #[derive(Clone, Copy, Debug)] 11 | pub enum UnpackedSize { 12 | /// If the value is `Some(u64)`, write the provided u64 value to the header. 13 | /// There is currently no check in place that verifies that this is the 14 | /// actual number of bytes provided by the input stream. 15 | /// If the value is [`None`], write the special `0xFFFF_FFFF_FFFF_FFFF` code 16 | /// to the header, indicating that the unpacked size is unknown. 17 | WriteToHeader(Option), 18 | /// Do not write anything to the header. The unpacked size needs to be 19 | /// stored elsewhere and provided when reading the file. Note that this 20 | /// is a non-standard way of writing LZMA data, but is used by certain 21 | /// libraries such as [OpenCTM](http://openctm.sourceforge.net/). 22 | SkipWritingToHeader, 23 | } 24 | 25 | impl Default for UnpackedSize { 26 | fn default() -> UnpackedSize { 27 | UnpackedSize::WriteToHeader(None) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/encode/rangecoder.rs: -------------------------------------------------------------------------------- 1 | use byteorder::WriteBytesExt; 2 | use std::io; 3 | 4 | #[cfg(test)] 5 | use crate::util::const_assert; 6 | 7 | pub struct RangeEncoder<'a, W> 8 | where 9 | W: 'a + io::Write, 10 | { 11 | stream: &'a mut W, 12 | range: u32, 13 | low: u64, 14 | cache: u8, 15 | cachesz: u32, 16 | } 17 | 18 | impl<'a, W> RangeEncoder<'a, W> 19 | where 20 | W: io::Write, 21 | { 22 | #[allow(clippy::let_and_return)] 23 | pub fn new(stream: &'a mut W) -> Self { 24 | let enc = Self { 25 | stream, 26 | range: 0xFFFF_FFFF, 27 | low: 0, 28 | cache: 0, 29 | cachesz: 1, 30 | }; 31 | lzma_debug!("0 {{ range: {:08x}, low: {:010x} }}", enc.range, enc.low); 32 | enc 33 | } 34 | 35 | fn write_low(&mut self) -> io::Result<()> { 36 | if self.low < 0xFF00_0000 || self.low > 0xFFFF_FFFF { 37 | let mut tmp = self.cache; 38 | loop { 39 | let byte = tmp.wrapping_add((self.low >> 32) as u8); 40 | self.stream.write_u8(byte)?; 41 | lzma_debug!("> byte: {:02x}", byte); 42 | tmp = 0xFF; 43 | self.cachesz -= 1; 44 | if self.cachesz == 0 { 45 | break; 46 | } 47 | } 48 | self.cache = (self.low >> 24) as u8; 49 | } 50 | 51 | self.cachesz += 1; 52 | self.low = (self.low << 8) & 0xFFFF_FFFF; 53 | Ok(()) 54 | } 55 | 56 | pub fn finish(&mut self) -> io::Result<()> { 57 | for _ in 0..5 { 58 | self.write_low()?; 59 | 60 | lzma_debug!("$ {{ range: {:08x}, low: {:010x} }}", self.range, self.low); 61 | } 62 | Ok(()) 63 | } 64 | 65 | fn normalize(&mut self) -> io::Result<()> { 66 | while self.range < 0x0100_0000 { 67 | lzma_debug!( 68 | "+ {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}", 69 | self.range, 70 | self.low, 71 | self.cache, 72 | self.cachesz 73 | ); 74 | self.range <<= 8; 75 | self.write_low()?; 76 | lzma_debug!( 77 | "* {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}", 78 | self.range, 79 | self.low, 80 | self.cache, 81 | self.cachesz 82 | ); 83 | } 84 | lzma_trace!(" {{ range: {:08x}, low: {:010x} }}", self.range, self.low); 85 | Ok(()) 86 | } 87 | 88 | pub fn encode_bit(&mut self, prob: &mut u16, bit: bool) -> io::Result<()> { 89 | let bound: u32 = (self.range >> 11) * (*prob as u32); 90 | lzma_trace!( 91 | " bound: {:08x}, prob: {:04x}, bit: {}", 92 | bound, 93 | prob, 94 | bit as u8 95 | ); 96 | 97 | if bit { 98 | *prob -= *prob >> 5; 99 | self.low += bound as u64; 100 | self.range -= bound; 101 | } else { 102 | *prob += (0x800_u16 - *prob) >> 5; 103 | self.range = bound; 104 | } 105 | 106 | self.normalize() 107 | } 108 | 109 | #[cfg(test)] 110 | fn encode_bit_tree( 111 | &mut self, 112 | num_bits: usize, 113 | probs: &mut [u16], 114 | value: u32, 115 | ) -> io::Result<()> { 116 | debug_assert!(value.leading_zeros() as usize + num_bits >= 32); 117 | let mut tmp: usize = 1; 118 | for i in 0..num_bits { 119 | let bit = ((value >> (num_bits - i - 1)) & 1) != 0; 120 | self.encode_bit(&mut probs[tmp], bit)?; 121 | tmp = (tmp << 1) ^ (bit as usize); 122 | } 123 | Ok(()) 124 | } 125 | 126 | #[cfg(test)] 127 | pub fn encode_reverse_bit_tree( 128 | &mut self, 129 | num_bits: usize, 130 | probs: &mut [u16], 131 | offset: usize, 132 | mut value: u32, 133 | ) -> io::Result<()> { 134 | debug_assert!(value.leading_zeros() as usize + num_bits >= 32); 135 | let mut tmp: usize = 1; 136 | for _ in 0..num_bits { 137 | let bit = (value & 1) != 0; 138 | value >>= 1; 139 | self.encode_bit(&mut probs[offset + tmp], bit)?; 140 | tmp = (tmp << 1) ^ (bit as usize); 141 | } 142 | Ok(()) 143 | } 144 | } 145 | 146 | #[cfg(test)] 147 | #[derive(Debug, Clone)] 148 | pub struct BitTree { 149 | probs: [u16; PROBS_ARRAY_LEN], 150 | } 151 | 152 | #[cfg(test)] 153 | impl BitTree { 154 | pub fn new() -> Self { 155 | // The validity of PROBS_ARRAY_LEN is checked at compile-time with a macro 156 | // that confirms that the argument P passed is indeed 1 << N for 157 | // some N using usize::trailing_zeros to calculate floor(log_2(P)). 158 | // 159 | // Thus, BitTree is only valid for any P such that 160 | // P = 2 ** floor(log_2(P)), where P is the length of the probability array 161 | // of the BitTree. This maintains the invariant that P = 1 << N. 162 | // 163 | // This precondition must be checked for any way to construct a new, valid 164 | // instance of BitTree. Here it is checked for BitTree::new(), but if 165 | // another function is added that returns a new instance of BitTree, 166 | // this assertion must be checked there as well. 167 | const_assert!( 168 | "BitTree's PROBS_ARRAY_LEN parameter must be a power of 2", 169 | PROBS_ARRAY_LEN: usize => 170 | (1 << (PROBS_ARRAY_LEN.trailing_zeros() as usize)) == PROBS_ARRAY_LEN 171 | ); 172 | BitTree { 173 | probs: [0x400; PROBS_ARRAY_LEN], 174 | } 175 | } 176 | 177 | // NUM_BITS is derived from PROBS_ARRAY_LEN because of the lack of 178 | // generic const expressions. Where PROBS_ARRAY_LEN is a power of 2, 179 | // NUM_BITS can be derived by the number of trailing zeroes. 180 | const NUM_BITS: usize = PROBS_ARRAY_LEN.trailing_zeros() as usize; 181 | 182 | pub fn encode( 183 | &mut self, 184 | rangecoder: &mut RangeEncoder, 185 | value: u32, 186 | ) -> io::Result<()> { 187 | rangecoder.encode_bit_tree(Self::NUM_BITS, &mut self.probs, value) 188 | } 189 | 190 | pub fn encode_reverse( 191 | &mut self, 192 | rangecoder: &mut RangeEncoder, 193 | value: u32, 194 | ) -> io::Result<()> { 195 | rangecoder.encode_reverse_bit_tree(Self::NUM_BITS, &mut self.probs, 0, value) 196 | } 197 | } 198 | 199 | #[cfg(test)] 200 | pub struct LenEncoder { 201 | choice: u16, 202 | choice2: u16, 203 | low_coder: [BitTree<{ 1 << 3 }>; 16], 204 | mid_coder: [BitTree<{ 1 << 3 }>; 16], 205 | high_coder: BitTree<{ 1 << 8 }>, 206 | } 207 | 208 | #[cfg(test)] 209 | impl LenEncoder { 210 | pub fn new() -> Self { 211 | LenEncoder { 212 | choice: 0x400, 213 | choice2: 0x400, 214 | low_coder: [ 215 | BitTree::new(), 216 | BitTree::new(), 217 | BitTree::new(), 218 | BitTree::new(), 219 | BitTree::new(), 220 | BitTree::new(), 221 | BitTree::new(), 222 | BitTree::new(), 223 | BitTree::new(), 224 | BitTree::new(), 225 | BitTree::new(), 226 | BitTree::new(), 227 | BitTree::new(), 228 | BitTree::new(), 229 | BitTree::new(), 230 | BitTree::new(), 231 | ], 232 | mid_coder: [ 233 | BitTree::new(), 234 | BitTree::new(), 235 | BitTree::new(), 236 | BitTree::new(), 237 | BitTree::new(), 238 | BitTree::new(), 239 | BitTree::new(), 240 | BitTree::new(), 241 | BitTree::new(), 242 | BitTree::new(), 243 | BitTree::new(), 244 | BitTree::new(), 245 | BitTree::new(), 246 | BitTree::new(), 247 | BitTree::new(), 248 | BitTree::new(), 249 | ], 250 | high_coder: BitTree::new(), 251 | } 252 | } 253 | 254 | pub fn encode( 255 | &mut self, 256 | rangecoder: &mut RangeEncoder, 257 | pos_state: usize, 258 | value: u32, 259 | ) -> io::Result<()> { 260 | let is_low: bool = value < 8; 261 | rangecoder.encode_bit(&mut self.choice, !is_low)?; 262 | if is_low { 263 | return self.low_coder[pos_state].encode(rangecoder, value); 264 | } 265 | 266 | let is_middle: bool = value < 16; 267 | rangecoder.encode_bit(&mut self.choice2, !is_middle)?; 268 | if is_middle { 269 | return self.mid_coder[pos_state].encode(rangecoder, value - 8); 270 | } 271 | 272 | self.high_coder.encode(rangecoder, value - 16) 273 | } 274 | } 275 | 276 | #[cfg(test)] 277 | mod test { 278 | use super::*; 279 | use crate::decode::rangecoder::{LenDecoder, RangeDecoder}; 280 | use crate::{decode, encode}; 281 | use seq_macro::seq; 282 | use std::io::BufReader; 283 | 284 | fn encode_decode(prob_init: u16, bits: &[bool]) { 285 | let mut buf: Vec = Vec::new(); 286 | 287 | let mut encoder = RangeEncoder::new(&mut buf); 288 | let mut prob = prob_init; 289 | for &b in bits { 290 | encoder.encode_bit(&mut prob, b).unwrap(); 291 | } 292 | encoder.finish().unwrap(); 293 | 294 | let mut bufread = BufReader::new(buf.as_slice()); 295 | let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); 296 | let mut prob = prob_init; 297 | for &b in bits { 298 | assert_eq!(decoder.decode_bit(&mut prob, true).unwrap(), b); 299 | } 300 | assert!(decoder.is_finished_ok().unwrap()); 301 | } 302 | 303 | #[test] 304 | fn test_encode_decode_zeros() { 305 | encode_decode(0x400, &[false; 10000]); 306 | } 307 | 308 | #[test] 309 | fn test_encode_decode_ones() { 310 | encode_decode(0x400, &[true; 10000]); 311 | } 312 | 313 | fn encode_decode_bittree(values: &[u32]) { 314 | let mut buf: Vec = Vec::new(); 315 | 316 | let mut encoder = RangeEncoder::new(&mut buf); 317 | let mut tree = encode::rangecoder::BitTree::::new(); 318 | for &v in values { 319 | tree.encode(&mut encoder, v).unwrap(); 320 | } 321 | encoder.finish().unwrap(); 322 | 323 | let mut bufread = BufReader::new(buf.as_slice()); 324 | let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); 325 | let mut tree = decode::rangecoder::BitTree::::new(); 326 | for &v in values { 327 | assert_eq!(tree.parse(&mut decoder, true).unwrap(), v); 328 | } 329 | assert!(decoder.is_finished_ok().unwrap()); 330 | } 331 | 332 | #[test] 333 | fn test_encode_decode_bittree_zeros() { 334 | seq!(NUM_BITS in 0..16 { 335 | encode_decode_bittree::<{1 << NUM_BITS}>(&[0; 10000]); 336 | }); 337 | } 338 | 339 | #[test] 340 | fn test_encode_decode_bittree_ones() { 341 | seq!(NUM_BITS in 0..16 { 342 | encode_decode_bittree::<{1 << NUM_BITS}>(&[(1 << NUM_BITS) - 1; 10000]); 343 | }); 344 | } 345 | 346 | #[test] 347 | fn test_encode_decode_bittree_all() { 348 | seq!(NUM_BITS in 0..16 { 349 | let max = 1 << NUM_BITS; 350 | let values: Vec = (0..max).collect(); 351 | encode_decode_bittree::<{1 << NUM_BITS}>(&values); 352 | }); 353 | } 354 | 355 | fn encode_decode_reverse_bittree(values: &[u32]) { 356 | let mut buf: Vec = Vec::new(); 357 | 358 | let mut encoder = RangeEncoder::new(&mut buf); 359 | let mut tree = encode::rangecoder::BitTree::::new(); 360 | for &v in values { 361 | tree.encode_reverse(&mut encoder, v).unwrap(); 362 | } 363 | encoder.finish().unwrap(); 364 | 365 | let mut bufread = BufReader::new(buf.as_slice()); 366 | let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); 367 | let mut tree = decode::rangecoder::BitTree::::new(); 368 | for &v in values { 369 | assert_eq!(tree.parse_reverse(&mut decoder, true).unwrap(), v); 370 | } 371 | assert!(decoder.is_finished_ok().unwrap()); 372 | } 373 | 374 | #[test] 375 | fn test_encode_decode_reverse_bittree_zeros() { 376 | seq!(NUM_BITS in 0..16 { 377 | encode_decode_reverse_bittree::<{1 << NUM_BITS}>(&[0; 10000]); 378 | }); 379 | } 380 | 381 | #[test] 382 | fn test_encode_decode_reverse_bittree_ones() { 383 | seq!(NUM_BITS in 0..16 { 384 | encode_decode_reverse_bittree::<{1 << NUM_BITS}>( 385 | &[(1 << NUM_BITS) - 1; 10000], 386 | ); 387 | }); 388 | } 389 | 390 | #[test] 391 | fn test_encode_decode_reverse_bittree_all() { 392 | seq!(NUM_BITS in 0..16 { 393 | let max = 1 << NUM_BITS; 394 | let values: Vec = (0..max).collect(); 395 | encode_decode_reverse_bittree::<{1 << NUM_BITS}>(&values); 396 | }); 397 | } 398 | 399 | fn encode_decode_length(pos_state: usize, values: &[u32]) { 400 | let mut buf: Vec = Vec::new(); 401 | 402 | let mut encoder = RangeEncoder::new(&mut buf); 403 | let mut len_encoder = LenEncoder::new(); 404 | for &v in values { 405 | len_encoder.encode(&mut encoder, pos_state, v).unwrap(); 406 | } 407 | encoder.finish().unwrap(); 408 | 409 | let mut bufread = BufReader::new(buf.as_slice()); 410 | let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); 411 | let mut len_decoder = LenDecoder::new(); 412 | for &v in values { 413 | assert_eq!( 414 | len_decoder.decode(&mut decoder, pos_state, true).unwrap(), 415 | v as usize 416 | ); 417 | } 418 | assert!(decoder.is_finished_ok().unwrap()); 419 | } 420 | 421 | #[test] 422 | fn test_encode_decode_length_zeros() { 423 | for pos_state in 0..16 { 424 | encode_decode_length(pos_state, &[0; 10000]); 425 | } 426 | } 427 | 428 | #[test] 429 | fn test_encode_decode_length_all() { 430 | for pos_state in 0..16 { 431 | let max = (1 << 8) + 16; 432 | let values: Vec = (0..max).collect(); 433 | encode_decode_length(pos_state, &values); 434 | } 435 | } 436 | } 437 | -------------------------------------------------------------------------------- /src/encode/util.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | /// An [`io::Write`] computing a digest on the bytes written. 4 | pub struct CrcDigestWrite<'a, 'b, W, S> 5 | where 6 | W: 'a + io::Write, 7 | S: crc::Width, 8 | { 9 | /// Underlying writer 10 | write: &'a mut W, 11 | /// Hasher 12 | digest: &'a mut crc::Digest<'b, S>, 13 | } 14 | 15 | impl<'a, 'b, W, S> CrcDigestWrite<'a, 'b, W, S> 16 | where 17 | W: io::Write, 18 | S: crc::Width, 19 | { 20 | pub fn new(write: &'a mut W, digest: &'a mut crc::Digest<'b, S>) -> Self { 21 | Self { write, digest } 22 | } 23 | } 24 | 25 | impl<'a, 'b, W> io::Write for CrcDigestWrite<'a, 'b, W, u32> 26 | where 27 | W: io::Write, 28 | { 29 | fn write(&mut self, buf: &[u8]) -> io::Result { 30 | let result = self.write.write(buf)?; 31 | self.digest.update(&buf[..result]); 32 | Ok(result) 33 | } 34 | fn flush(&mut self) -> io::Result<()> { 35 | self.write.flush() 36 | } 37 | } 38 | 39 | /// An [`io::Write`] counting the bytes written. 40 | pub struct CountWrite<'a, W> 41 | where 42 | W: 'a + io::Write, 43 | { 44 | /// Underlying writer 45 | write: &'a mut W, 46 | /// Number of bytes written 47 | count: usize, 48 | } 49 | 50 | impl<'a, W> CountWrite<'a, W> 51 | where 52 | W: io::Write, 53 | { 54 | pub fn new(write: &'a mut W) -> Self { 55 | Self { write, count: 0 } 56 | } 57 | 58 | pub fn count(&self) -> usize { 59 | self.count 60 | } 61 | } 62 | 63 | impl<'a, W> io::Write for CountWrite<'a, W> 64 | where 65 | W: io::Write, 66 | { 67 | fn write(&mut self, buf: &[u8]) -> io::Result { 68 | let result = self.write.write(buf)?; 69 | self.count += result; 70 | Ok(result) 71 | } 72 | 73 | fn flush(&mut self) -> io::Result<()> { 74 | self.write.flush() 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/encode/xz.rs: -------------------------------------------------------------------------------- 1 | use crate::decode; 2 | use crate::encode::{lzma2, util}; 3 | use crate::xz::crc::CRC32; 4 | use crate::xz::{footer, header, CheckMethod, StreamFlags}; 5 | use byteorder::{LittleEndian, WriteBytesExt}; 6 | use std::io; 7 | use std::io::Write; 8 | 9 | pub fn encode_stream(input: &mut R, output: &mut W) -> io::Result<()> 10 | where 11 | R: io::BufRead, 12 | W: io::Write, 13 | { 14 | let stream_flags = StreamFlags { 15 | check_method: CheckMethod::None, 16 | }; 17 | 18 | // Header 19 | write_header(output, stream_flags)?; 20 | 21 | // Block 22 | let (unpadded_size, unpacked_size) = write_block(input, output)?; 23 | 24 | // Index 25 | let index_size = write_index(output, unpadded_size, unpacked_size)?; 26 | 27 | // Footer 28 | write_footer(output, stream_flags, index_size) 29 | } 30 | 31 | fn write_header(output: &mut W, stream_flags: StreamFlags) -> io::Result<()> 32 | where 33 | W: io::Write, 34 | { 35 | output.write_all(header::XZ_MAGIC)?; 36 | let mut digest = CRC32.digest(); 37 | { 38 | let mut digested = util::CrcDigestWrite::new(output, &mut digest); 39 | stream_flags.serialize(&mut digested)?; 40 | } 41 | let crc32 = digest.finalize(); 42 | output.write_u32::(crc32)?; 43 | Ok(()) 44 | } 45 | 46 | fn write_footer(output: &mut W, stream_flags: StreamFlags, index_size: usize) -> io::Result<()> 47 | where 48 | W: io::Write, 49 | { 50 | let mut digest = CRC32.digest(); 51 | let mut footer_buf: Vec = Vec::new(); 52 | { 53 | let mut digested = util::CrcDigestWrite::new(&mut footer_buf, &mut digest); 54 | 55 | let backward_size = (index_size >> 2) - 1; 56 | digested.write_u32::(backward_size as u32)?; 57 | stream_flags.serialize(&mut digested)?; 58 | } 59 | let crc32 = digest.finalize(); 60 | output.write_u32::(crc32)?; 61 | output.write_all(footer_buf.as_slice())?; 62 | 63 | output.write_all(footer::XZ_MAGIC_FOOTER)?; 64 | Ok(()) 65 | } 66 | 67 | fn write_block(input: &mut R, output: &mut W) -> io::Result<(usize, usize)> 68 | where 69 | R: io::BufRead, 70 | W: io::Write, 71 | { 72 | let (unpadded_size, unpacked_size) = { 73 | let mut count_output = util::CountWrite::new(output); 74 | 75 | // Block header 76 | let mut digest = CRC32.digest(); 77 | { 78 | let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest); 79 | let header_size = 8; 80 | digested.write_u8((header_size >> 2) as u8)?; 81 | let flags = 0x00; // 1 filter, no (un)packed size provided 82 | digested.write_u8(flags)?; 83 | let filter_id = 0x21; // LZMA2 84 | digested.write_u8(filter_id)?; 85 | let size_of_properties = 1; 86 | digested.write_u8(size_of_properties)?; 87 | let properties = 22; // TODO 88 | digested.write_u8(properties)?; 89 | let padding = [0, 0, 0]; 90 | digested.write_all(&padding)?; 91 | } 92 | let crc32 = digest.finalize(); 93 | count_output.write_u32::(crc32)?; 94 | 95 | // Block 96 | let mut count_input = decode::util::CountBufRead::new(input); 97 | lzma2::encode_stream(&mut count_input, &mut count_output)?; 98 | (count_output.count(), count_input.count()) 99 | }; 100 | lzma_info!( 101 | "Unpadded size = {}, unpacked_size = {}", 102 | unpadded_size, 103 | unpacked_size 104 | ); 105 | 106 | let padding_size = ((unpadded_size ^ 0x03) + 1) & 0x03; 107 | let padding = vec![0; padding_size]; 108 | output.write_all(padding.as_slice())?; 109 | // Checksum = None (cf. above) 110 | 111 | Ok((unpadded_size, unpacked_size)) 112 | } 113 | 114 | fn write_index(output: &mut W, unpadded_size: usize, unpacked_size: usize) -> io::Result 115 | where 116 | W: io::Write, 117 | { 118 | let mut count_output = util::CountWrite::new(output); 119 | 120 | let mut digest = CRC32.digest(); 121 | { 122 | let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest); 123 | digested.write_u8(0)?; // No more block 124 | let num_records = 1; 125 | write_multibyte(&mut digested, num_records)?; 126 | 127 | write_multibyte(&mut digested, unpadded_size as u64)?; 128 | write_multibyte(&mut digested, unpacked_size as u64)?; 129 | } 130 | 131 | // Padding 132 | let count = count_output.count(); 133 | let padding_size = ((count ^ 0x03) + 1) & 0x03; 134 | { 135 | let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest); 136 | let padding = vec![0; padding_size]; 137 | digested.write_all(padding.as_slice())?; 138 | } 139 | 140 | let crc32 = digest.finalize(); 141 | count_output.write_u32::(crc32)?; 142 | 143 | Ok(count_output.count()) 144 | } 145 | 146 | fn write_multibyte(output: &mut W, mut value: u64) -> io::Result<()> 147 | where 148 | W: io::Write, 149 | { 150 | loop { 151 | let byte = (value & 0x7F) as u8; 152 | value >>= 7; 153 | if value == 0 { 154 | output.write_u8(byte)?; 155 | break; 156 | } else { 157 | output.write_u8(0x80 | byte)?; 158 | } 159 | } 160 | 161 | Ok(()) 162 | } 163 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! Error handling. 2 | 3 | use std::fmt::Display; 4 | use std::{io, result}; 5 | 6 | /// Library errors. 7 | #[derive(Debug)] 8 | pub enum Error { 9 | /// I/O error. 10 | IoError(io::Error), 11 | /// Not enough bytes to complete header 12 | HeaderTooShort(io::Error), 13 | /// LZMA error. 14 | LzmaError(String), 15 | /// XZ error. 16 | XzError(String), 17 | } 18 | 19 | /// Library result alias. 20 | pub type Result = result::Result; 21 | 22 | impl From for Error { 23 | fn from(e: io::Error) -> Error { 24 | Error::IoError(e) 25 | } 26 | } 27 | 28 | impl Display for Error { 29 | fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 30 | match self { 31 | Error::IoError(e) => write!(fmt, "io error: {}", e), 32 | Error::HeaderTooShort(e) => write!(fmt, "header too short: {}", e), 33 | Error::LzmaError(e) => write!(fmt, "lzma error: {}", e), 34 | Error::XzError(e) => write!(fmt, "xz error: {}", e), 35 | } 36 | } 37 | } 38 | 39 | impl std::error::Error for Error { 40 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 41 | match self { 42 | Error::IoError(e) | Error::HeaderTooShort(e) => Some(e), 43 | Error::LzmaError(_) | Error::XzError(_) => None, 44 | } 45 | } 46 | } 47 | 48 | #[cfg(test)] 49 | mod test { 50 | use super::Error; 51 | 52 | #[test] 53 | fn test_display() { 54 | assert_eq!( 55 | Error::IoError(std::io::Error::new( 56 | std::io::ErrorKind::Other, 57 | "this is an error" 58 | )) 59 | .to_string(), 60 | "io error: this is an error" 61 | ); 62 | assert_eq!( 63 | Error::LzmaError("this is an error".to_string()).to_string(), 64 | "lzma error: this is an error" 65 | ); 66 | assert_eq!( 67 | Error::XzError("this is an error".to_string()).to_string(), 68 | "xz error: this is an error" 69 | ); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Pure-Rust codecs for LZMA, LZMA2, and XZ. 2 | #![cfg_attr(docsrs, feature(doc_cfg, doc_cfg_hide))] 3 | #![deny(missing_docs)] 4 | #![deny(missing_debug_implementations)] 5 | #![forbid(unsafe_code)] 6 | 7 | #[macro_use] 8 | mod macros; 9 | 10 | mod decode; 11 | mod encode; 12 | 13 | pub mod error; 14 | 15 | mod util; 16 | mod xz; 17 | 18 | use std::io; 19 | 20 | /// Compression helpers. 21 | pub mod compress { 22 | pub use crate::encode::options::*; 23 | } 24 | 25 | /// Decompression helpers. 26 | pub mod decompress { 27 | pub use crate::decode::options::*; 28 | 29 | #[cfg(feature = "raw_decoder")] 30 | #[cfg_attr(docsrs, doc(cfg(raw_decoder)))] 31 | pub mod raw { 32 | //! Raw decoding primitives for LZMA/LZMA2 streams. 33 | pub use crate::decode::lzma::{LzmaDecoder, LzmaParams, LzmaProperties}; 34 | pub use crate::decode::lzma2::Lzma2Decoder; 35 | } 36 | 37 | #[cfg(feature = "stream")] 38 | #[cfg_attr(docsrs, doc(cfg(stream)))] 39 | pub use crate::decode::stream::Stream; 40 | } 41 | 42 | /// Decompress LZMA data with default 43 | /// [`Options`](decompress/struct.Options.html). 44 | pub fn lzma_decompress( 45 | input: &mut R, 46 | output: &mut W, 47 | ) -> error::Result<()> { 48 | lzma_decompress_with_options(input, output, &decompress::Options::default()) 49 | } 50 | 51 | /// Decompress LZMA data with the provided options. 52 | pub fn lzma_decompress_with_options( 53 | input: &mut R, 54 | output: &mut W, 55 | options: &decompress::Options, 56 | ) -> error::Result<()> { 57 | let params = decode::lzma::LzmaParams::read_header(input, options)?; 58 | let mut decoder = decode::lzma::LzmaDecoder::new(params, options.memlimit)?; 59 | decoder.decompress(input, output) 60 | } 61 | 62 | /// Compresses data with LZMA and default 63 | /// [`Options`](compress/struct.Options.html). 64 | pub fn lzma_compress( 65 | input: &mut R, 66 | output: &mut W, 67 | ) -> io::Result<()> { 68 | lzma_compress_with_options(input, output, &compress::Options::default()) 69 | } 70 | 71 | /// Compress LZMA data with the provided options. 72 | pub fn lzma_compress_with_options( 73 | input: &mut R, 74 | output: &mut W, 75 | options: &compress::Options, 76 | ) -> io::Result<()> { 77 | let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?; 78 | encoder.process(input) 79 | } 80 | 81 | /// Decompress LZMA2 data with default 82 | /// [`Options`](decompress/struct.Options.html). 83 | pub fn lzma2_decompress( 84 | input: &mut R, 85 | output: &mut W, 86 | ) -> error::Result<()> { 87 | decode::lzma2::Lzma2Decoder::new().decompress(input, output) 88 | } 89 | 90 | /// Compress data with LZMA2 and default 91 | /// [`Options`](compress/struct.Options.html). 92 | pub fn lzma2_compress( 93 | input: &mut R, 94 | output: &mut W, 95 | ) -> io::Result<()> { 96 | encode::lzma2::encode_stream(input, output) 97 | } 98 | 99 | /// Decompress XZ data with default [`Options`](decompress/struct.Options.html). 100 | pub fn xz_decompress( 101 | input: &mut R, 102 | output: &mut W, 103 | ) -> error::Result<()> { 104 | decode::xz::decode_stream(input, output) 105 | } 106 | 107 | /// Compress data with XZ and default [`Options`](compress/struct.Options.html). 108 | pub fn xz_compress(input: &mut R, output: &mut W) -> io::Result<()> { 109 | encode::xz::encode_stream(input, output) 110 | } 111 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Log trace message (feature: enabled). 2 | #[cfg(feature = "enable_logging")] 3 | macro_rules! lzma_trace { 4 | ($($arg:tt)+) => { 5 | log::trace!($($arg)+); 6 | } 7 | } 8 | 9 | /// Log debug message (feature: enabled). 10 | #[cfg(feature = "enable_logging")] 11 | macro_rules! lzma_debug { 12 | ($($arg:tt)+) => { 13 | log::debug!($($arg)+); 14 | } 15 | } 16 | 17 | /// Log info message (feature: enabled). 18 | #[cfg(feature = "enable_logging")] 19 | macro_rules! lzma_info { 20 | ($($arg:tt)+) => { 21 | log::info!($($arg)+); 22 | } 23 | } 24 | 25 | /// Log trace message (feature: disabled). 26 | #[cfg(not(feature = "enable_logging"))] 27 | macro_rules! lzma_trace { 28 | ($($arg:tt)+) => {}; 29 | } 30 | 31 | /// Log debug message (feature: disabled). 32 | #[cfg(not(feature = "enable_logging"))] 33 | macro_rules! lzma_debug { 34 | ($($arg:tt)+) => {}; 35 | } 36 | 37 | /// Log info message (feature: disabled). 38 | #[cfg(not(feature = "enable_logging"))] 39 | macro_rules! lzma_info { 40 | ($($arg:tt)+) => {}; 41 | } 42 | -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod vec2d; 2 | 3 | /// macro for compile-time const assertions 4 | macro_rules! const_assert { 5 | ($message:expr, $($list:ident : $ty:ty),* => $expr:expr) => {{ 6 | struct Assert<$(const $list: $ty,)*>; 7 | impl<$(const $list: $ty,)*> Assert<$($list,)*> { 8 | const OK: () = { 9 | if !($expr) { 10 | ::std::panic!(::std::concat!("assertion failed: ", $message)); 11 | } 12 | }; 13 | } 14 | Assert::<$($list,)*>::OK 15 | }}; 16 | } 17 | 18 | pub(crate) use const_assert; 19 | -------------------------------------------------------------------------------- /src/util/vec2d.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Index, IndexMut}; 2 | 3 | /// A 2 dimensional matrix in row-major order backed by a contiguous slice. 4 | #[derive(Debug)] 5 | pub struct Vec2D { 6 | data: Box<[T]>, 7 | cols: usize, 8 | } 9 | 10 | impl Vec2D { 11 | /// Initialize a grid of size (`rows`, `cols`) with the given data element. 12 | pub fn init(data: T, size: (usize, usize)) -> Vec2D 13 | where 14 | T: Clone, 15 | { 16 | let (rows, cols) = size; 17 | let len = rows 18 | .checked_mul(cols) 19 | .unwrap_or_else(|| panic!("{} rows by {} cols exceeds usize::MAX", rows, cols)); 20 | Vec2D { 21 | data: vec![data; len].into_boxed_slice(), 22 | cols, 23 | } 24 | } 25 | 26 | /// Fills the grid with elements by cloning `value`. 27 | pub fn fill(&mut self, value: T) 28 | where 29 | T: Clone, 30 | { 31 | self.data.fill(value) 32 | } 33 | } 34 | 35 | impl Index for Vec2D { 36 | type Output = [T]; 37 | 38 | #[inline] 39 | fn index(&self, row: usize) -> &Self::Output { 40 | let start_row = row 41 | .checked_mul(self.cols) 42 | .unwrap_or_else(|| panic!("{} row by {} cols exceeds usize::MAX", row, self.cols)); 43 | &self.data[start_row..start_row + self.cols] 44 | } 45 | } 46 | 47 | impl IndexMut for Vec2D { 48 | #[inline] 49 | fn index_mut(&mut self, row: usize) -> &mut Self::Output { 50 | let start_row = row 51 | .checked_mul(self.cols) 52 | .unwrap_or_else(|| panic!("{} row by {} cols exceeds usize::MAX", row, self.cols)); 53 | &mut self.data[start_row..start_row + self.cols] 54 | } 55 | } 56 | 57 | #[cfg(test)] 58 | mod test { 59 | use super::*; 60 | 61 | #[test] 62 | fn init() { 63 | let vec2d = Vec2D::init(1, (2, 3)); 64 | assert_eq!(vec2d[0], [1, 1, 1]); 65 | assert_eq!(vec2d[1], [1, 1, 1]); 66 | } 67 | 68 | #[test] 69 | #[should_panic] 70 | fn init_overflow() { 71 | Vec2D::init(1, (usize::MAX, usize::MAX)); 72 | } 73 | 74 | #[test] 75 | fn fill() { 76 | let mut vec2d = Vec2D::init(0, (2, 3)); 77 | vec2d.fill(7); 78 | assert_eq!(vec2d[0], [7, 7, 7]); 79 | assert_eq!(vec2d[1], [7, 7, 7]); 80 | } 81 | 82 | #[test] 83 | fn index() { 84 | let vec2d = Vec2D { 85 | data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(), 86 | cols: 2, 87 | }; 88 | assert_eq!(vec2d[0], [0, 1]); 89 | assert_eq!(vec2d[1], [2, 3]); 90 | assert_eq!(vec2d[2], [4, 5]); 91 | assert_eq!(vec2d[3], [6, 7]); 92 | } 93 | 94 | #[test] 95 | fn indexmut() { 96 | let mut vec2d = Vec2D { 97 | data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(), 98 | cols: 2, 99 | }; 100 | 101 | vec2d[1][1] = 9; 102 | assert_eq!(vec2d[0], [0, 1]); 103 | // (1, 1) should be 9. 104 | assert_eq!(vec2d[1], [2, 9]); 105 | assert_eq!(vec2d[2], [4, 5]); 106 | assert_eq!(vec2d[3], [6, 7]); 107 | } 108 | 109 | #[test] 110 | #[should_panic] 111 | fn index_out_of_bounds() { 112 | let vec2d = Vec2D::init(1, (2, 3)); 113 | let _x = vec2d[2][3]; 114 | } 115 | 116 | #[test] 117 | #[should_panic] 118 | fn index_out_of_bounds_vec_edge() { 119 | let vec2d = Vec2D::init(1, (2, 3)); 120 | let _x = vec2d[1][3]; 121 | } 122 | 123 | #[test] 124 | #[should_panic] 125 | fn index_column_out_of_bounds() { 126 | let vec2d = Vec2D::init(1, (2, 3)); 127 | let _x = vec2d[0][3]; 128 | } 129 | 130 | #[test] 131 | #[should_panic] 132 | fn index_row_out_of_bounds() { 133 | let vec2d = Vec2D::init(1, (2, 3)); 134 | let _x = vec2d[2][0]; 135 | } 136 | 137 | #[test] 138 | #[should_panic] 139 | fn index_mul_overflow() { 140 | // Matrix with 4 columns. 141 | let matrix = Vec2D::init(0, (3, 4)); 142 | // 2^{usize.numbits() - 2}. 143 | let row = (usize::MAX / 4) + 1; 144 | // Returns the same as matrix[0] if overflow is not caught. 145 | let _ = matrix[row]; 146 | } 147 | 148 | #[test] 149 | #[should_panic] 150 | fn index_add_overflow() { 151 | // Matrix with 5 columns. 152 | let matrix = Vec2D::init(0, (3, 5)); 153 | // Somehow, as long as numbits(usize) is a multiple of 4, then 5 divides 154 | // usize::MAX. This is clear in hexadecimal: usize::MAX is 0xFFF...F and 155 | // usize::MAX / 5 is 0x333...3. 156 | let row = usize::MAX / 5; 157 | // This will therefore try to index data[usize::MAX..4]. 158 | let _ = matrix[row]; 159 | } 160 | 161 | #[test] 162 | #[should_panic] 163 | fn indexmut_out_of_bounds() { 164 | let mut vec2d = Vec2D::init(1, (2, 3)); 165 | vec2d[2][3] = 0; 166 | } 167 | 168 | #[test] 169 | #[should_panic] 170 | fn indexmut_out_of_bounds_vec_edge() { 171 | let mut vec2d = Vec2D::init(1, (2, 3)); 172 | vec2d[1][3] = 0; 173 | } 174 | 175 | #[test] 176 | #[should_panic] 177 | fn indexmut_column_out_of_bounds() { 178 | let mut vec2d = Vec2D::init(1, (2, 3)); 179 | vec2d[0][3] = 0; 180 | } 181 | 182 | #[test] 183 | #[should_panic] 184 | fn indexmut_row_out_of_bounds() { 185 | let mut vec2d = Vec2D::init(1, (2, 3)); 186 | vec2d[2][0] = 0; 187 | } 188 | 189 | #[test] 190 | #[should_panic] 191 | fn indexmut_mul_overflow() { 192 | // Matrix with 4 columns. 193 | let mut matrix = Vec2D::init(0, (3, 4)); 194 | // 2^{usize.numbits() - 2}. 195 | let row = (usize::MAX / 4) + 1; 196 | // Returns the same as matrix[0] if overflow is not caught. 197 | matrix[row][0] = 9; 198 | } 199 | 200 | #[test] 201 | #[should_panic] 202 | fn indexmut_add_overflow() { 203 | // Matrix with 5 columns. 204 | let mut matrix = Vec2D::init(0, (3, 5)); 205 | // Somehow, as long as numbits(usize) is a multiple of 4, then 5 divides 206 | // usize::MAX. This is clear in hexadecimal: usize::MAX is 0xFFF...F and 207 | // usize::MAX / 5 is 0x333...3. 208 | let row = usize::MAX / 5; 209 | // This will therefore try to index data[usize::MAX..4]. 210 | matrix[row][0] = 9; 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/xz/crc.rs: -------------------------------------------------------------------------------- 1 | use crc::{Crc, CRC_32_ISO_HDLC, CRC_64_XZ}; 2 | 3 | pub const CRC32: Crc = Crc::::new(&CRC_32_ISO_HDLC); 4 | pub const CRC64: Crc = Crc::::new(&CRC_64_XZ); 5 | -------------------------------------------------------------------------------- /src/xz/footer.rs: -------------------------------------------------------------------------------- 1 | //! XZ footer. 2 | 3 | /// File format trailing terminator, see sect. 2.1.2.4. 4 | pub(crate) const XZ_MAGIC_FOOTER: &[u8] = &[0x59, 0x5A]; 5 | -------------------------------------------------------------------------------- /src/xz/header.rs: -------------------------------------------------------------------------------- 1 | //! XZ header. 2 | 3 | use crate::decode::util; 4 | use crate::error; 5 | use crate::xz::crc::CRC32; 6 | use crate::xz::StreamFlags; 7 | use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; 8 | 9 | /// File format magic header signature, see sect. 2.1.1.1. 10 | pub(crate) const XZ_MAGIC: &[u8] = &[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]; 11 | 12 | /// Stream Header, see sect. 2.1.1. 13 | #[derive(Clone, Copy, Debug)] 14 | pub(crate) struct StreamHeader { 15 | pub(crate) stream_flags: StreamFlags, 16 | } 17 | 18 | impl StreamHeader { 19 | /// Parse a Stream Header from a buffered reader. 20 | pub(crate) fn parse
(input: &mut BR) -> error::Result 21 | where 22 | BR: std::io::BufRead, 23 | { 24 | if !util::read_tag(input, XZ_MAGIC)? { 25 | return Err(error::Error::XzError(format!( 26 | "Invalid XZ magic, expected {:?}", 27 | XZ_MAGIC 28 | ))); 29 | } 30 | 31 | let (flags, digested) = { 32 | let mut digest = CRC32.digest(); 33 | let mut digest_rd = util::CrcDigestRead::new(input, &mut digest); 34 | let flags = digest_rd.read_u16::()?; 35 | (flags, digest.finalize()) 36 | }; 37 | 38 | let crc32 = input.read_u32::()?; 39 | if crc32 != digested { 40 | return Err(error::Error::XzError(format!( 41 | "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}", 42 | crc32, digested 43 | ))); 44 | } 45 | 46 | let stream_flags = StreamFlags::parse(flags)?; 47 | let header = Self { stream_flags }; 48 | 49 | lzma_info!("XZ check method: {:?}", header.stream_flags.check_method); 50 | Ok(header) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/xz/mod.rs: -------------------------------------------------------------------------------- 1 | //! Logic for handling `.xz` file format. 2 | //! 3 | //! Format specifications are at [https://tukaani.org/xz/xz-file-format.txt][spec]. 4 | //! 5 | //! [spec]: https://tukaani.org/xz/xz-file-format.txt 6 | 7 | use crate::error; 8 | use std::io; 9 | 10 | pub(crate) mod crc; 11 | pub(crate) mod footer; 12 | pub(crate) mod header; 13 | 14 | /// Stream flags, see sect. 2.1.1.2. 15 | /// 16 | /// This does not store the leading null byte, which is currently unused. 17 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 18 | pub(crate) struct StreamFlags { 19 | pub(crate) check_method: CheckMethod, 20 | } 21 | 22 | impl StreamFlags { 23 | /// Parse Stream Flags from a 16bits value. 24 | pub(crate) fn parse(input: u16) -> error::Result { 25 | let flags_bytes = input.to_be_bytes(); 26 | 27 | if flags_bytes[0] != 0x00 { 28 | return Err(error::Error::XzError(format!( 29 | "Invalid null byte in Stream Flags: {:x}", 30 | flags_bytes[0] 31 | ))); 32 | } 33 | 34 | let flags = Self { 35 | check_method: CheckMethod::try_from(flags_bytes[1])?, 36 | }; 37 | Ok(flags) 38 | } 39 | 40 | /// Serialize Stream Flags into a writer. 41 | pub(crate) fn serialize(self, writer: &mut W) -> io::Result 42 | where 43 | W: io::Write, 44 | { 45 | // First byte is currently unused and hard-coded to null. 46 | writer 47 | .write(&[0x00, self.check_method as u8]) 48 | .map_err(Into::into) 49 | } 50 | } 51 | 52 | /// Stream check type, see sect. 2.1.1.2. 53 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 54 | #[repr(u8)] 55 | pub enum CheckMethod { 56 | None = 0x00, 57 | Crc32 = 0x01, 58 | Crc64 = 0x04, 59 | Sha256 = 0x0A, 60 | } 61 | 62 | impl CheckMethod { 63 | /// Parse Check ID (second byte in Stream Flags). 64 | pub fn try_from(id: u8) -> error::Result { 65 | match id { 66 | 0x00 => Ok(CheckMethod::None), 67 | 0x01 => Ok(CheckMethod::Crc32), 68 | 0x04 => Ok(CheckMethod::Crc64), 69 | 0x0A => Ok(CheckMethod::Sha256), 70 | _ => Err(error::Error::XzError(format!( 71 | "Invalid check method {:x}, expected one of [0x00, 0x01, 0x04, 0x0A]", 72 | id 73 | ))), 74 | } 75 | } 76 | } 77 | 78 | impl From for u8 { 79 | fn from(method: CheckMethod) -> u8 { 80 | method as u8 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod test { 86 | use super::*; 87 | use byteorder::{BigEndian, ReadBytesExt}; 88 | use std::io::{Seek, SeekFrom}; 89 | 90 | #[test] 91 | fn test_checkmethod_roundtrip() { 92 | let mut count_valid = 0; 93 | for input in 0..std::u8::MAX { 94 | if let Ok(check) = CheckMethod::try_from(input) { 95 | let output: u8 = check.into(); 96 | assert_eq!(input, output); 97 | count_valid += 1; 98 | } 99 | } 100 | assert_eq!(count_valid, 4); 101 | } 102 | 103 | #[test] 104 | fn test_streamflags_roundtrip() { 105 | let input = StreamFlags { 106 | check_method: CheckMethod::Crc32, 107 | }; 108 | 109 | let mut cursor = std::io::Cursor::new(vec![0u8; 2]); 110 | let len = input.serialize(&mut cursor).unwrap(); 111 | assert_eq!(len, 2); 112 | 113 | cursor.seek(SeekFrom::Start(0)).unwrap(); 114 | let field = cursor.read_u16::().unwrap(); 115 | let output = StreamFlags::parse(field).unwrap(); 116 | assert_eq!(input, output); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /tests/files/README.md: -------------------------------------------------------------------------------- 1 | # Test files 2 | 3 | This folder contains a collection of test files to cover different use cases of the lzma-rs library. 4 | 5 | This README describes files that are not self-explanatory in this folder. 6 | 7 | ## range-coder-edge-case 8 | 9 | This is a file that causes the code and range to be equal at some point during decoding LZMA data. 10 | Previously, this file would raise an `LZMAError("Corrupted range coding")`, although the file is a valid LZMA file. 11 | 12 | The file was created by generating random geometry in [Blender](1) using the Array and Build 13 | modifier on a cube. 14 | The geometry was then exported as an FBX file that was converted into OpenCTM using the 3D service 15 | in [Cognite Data Fusion](2). 16 | The vertices in the resulting OpenCTM file are LZMA-compressed. 17 | This LZMA-compressed section of the file was manually extracted and the header modified to include 18 | the unpacked size. 19 | The unpacked size is four times the vertex count found in the OpenCTM data. 20 | 21 | [1]: https://blender.org 22 | [2]: https://docs.cognite.com 23 | -------------------------------------------------------------------------------- /tests/files/block-check-crc32.txt: -------------------------------------------------------------------------------- 1 | abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde -------------------------------------------------------------------------------- /tests/files/block-check-crc32.txt.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/block-check-crc32.txt.xz -------------------------------------------------------------------------------- /tests/files/empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/empty.txt -------------------------------------------------------------------------------- /tests/files/empty.txt.lzma: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/empty.txt.lzma -------------------------------------------------------------------------------- /tests/files/empty.txt.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/empty.txt.xz -------------------------------------------------------------------------------- /tests/files/foo.txt.lzma: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/foo.txt.lzma -------------------------------------------------------------------------------- /tests/files/foo.txt.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/foo.txt.xz -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-1: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 2 | elit, sed do eiusmod tempor incididunt ut 3 | labore et dolore magna aliqua. Ut enim 4 | ad minim veniam, quis nostrud exercitation ullamco 5 | laboris nisi ut aliquip ex ea commodo 6 | consequat. Duis aute irure dolor in reprehenderit 7 | in voluptate velit esse cillum dolore eu 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 9 | non proident, sunt in culpa qui officia 10 | deserunt mollit anim id est laborum. 11 | -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-1.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-1.xz -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-2: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 2 | elit, sed do eiusmod tempor incididunt ut 3 | labore et dolore magna aliqua. Ut enim 4 | ad minim veniam, quis nostrud exercitation ullamco 5 | laboris nisi ut aliquip ex ea commodo 6 | consequat. Duis aute irure dolor in reprehenderit 7 | in voluptate velit esse cillum dolore eu 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 9 | non proident, sunt in culpa qui officia 10 | deserunt mollit anim id est laborum. 11 | -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-2.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-2.xz -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-3: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 2 | elit, sed do eiusmod tempor incididunt ut 3 | labore et dolore magna aliqua. Ut enim 4 | ad minim veniam, quis nostrud exercitation ullamco 5 | laboris nisi ut aliquip ex ea commodo 6 | consequat. Duis aute irure dolor in reprehenderit 7 | in voluptate velit esse cillum dolore eu 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 9 | non proident, sunt in culpa qui officia 10 | deserunt mollit anim id est laborum. 11 | -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-3.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-3.xz -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-4: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 2 | elit, sed do eiusmod tempor incididunt ut 3 | labore et dolore magna aliqua. Ut enim 4 | ad minim veniam, quis nostrud exercitation ullamco 5 | laboris nisi ut aliquip ex ea commodo 6 | consequat. Duis aute irure dolor in reprehenderit 7 | in voluptate velit esse cillum dolore eu 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 9 | non proident, sunt in culpa qui officia 10 | deserunt mollit anim id est laborum. 11 | -------------------------------------------------------------------------------- /tests/files/good-1-lzma2-4.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-4.xz -------------------------------------------------------------------------------- /tests/files/hello.txt: -------------------------------------------------------------------------------- 1 | Hello world 2 | -------------------------------------------------------------------------------- /tests/files/hello.txt.lzma: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/hello.txt.lzma -------------------------------------------------------------------------------- /tests/files/hello.txt.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/hello.txt.xz -------------------------------------------------------------------------------- /tests/files/hugedict.txt.lzma: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/hugedict.txt.lzma -------------------------------------------------------------------------------- /tests/files/range-coder-edge-case: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/range-coder-edge-case -------------------------------------------------------------------------------- /tests/files/range-coder-edge-case.lzma: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/range-coder-edge-case.lzma -------------------------------------------------------------------------------- /tests/files/small.txt: -------------------------------------------------------------------------------- 1 | Project Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll 2 | -------------------------------------------------------------------------------- /tests/lzma.rs: -------------------------------------------------------------------------------- 1 | extern crate lzma; 2 | 3 | #[cfg(feature = "enable_logging")] 4 | use log::{debug, info}; 5 | use std::io::Read; 6 | #[cfg(feature = "stream")] 7 | use std::io::Write; 8 | 9 | /// Utility function to read a file into memory 10 | fn read_all_file(filename: &str) -> std::io::Result> { 11 | let mut data = Vec::new(); 12 | std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?; 13 | Ok(data) 14 | } 15 | 16 | fn round_trip(x: &[u8]) { 17 | round_trip_no_options(x); 18 | 19 | // Do another round trip, but this time also write it to the header 20 | let encode_options = lzma_rs::compress::Options { 21 | unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(Some(x.len() as u64)), 22 | }; 23 | let decode_options = lzma_rs::decompress::Options { 24 | unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader, 25 | ..Default::default() 26 | }; 27 | assert_round_trip_with_options(x, &encode_options, &decode_options); 28 | } 29 | 30 | fn round_trip_no_options(x: &[u8]) { 31 | let mut compressed: Vec = Vec::new(); 32 | lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap(); 33 | #[cfg(feature = "enable_logging")] 34 | info!("Compressed {} -> {} bytes", x.len(), compressed.len()); 35 | #[cfg(feature = "enable_logging")] 36 | debug!("Compressed content: {:?}", compressed); 37 | 38 | assert_decomp_eq(&compressed, x, /* compare_to_liblzma */ true); 39 | } 40 | 41 | fn assert_round_trip_with_options( 42 | x: &[u8], 43 | encode_options: &lzma_rs::compress::Options, 44 | decode_options: &lzma_rs::decompress::Options, 45 | ) { 46 | let mut compressed: Vec = Vec::new(); 47 | lzma_rs::lzma_compress_with_options( 48 | &mut std::io::BufReader::new(x), 49 | &mut compressed, 50 | encode_options, 51 | ) 52 | .unwrap(); 53 | #[cfg(feature = "enable_logging")] 54 | info!("Compressed {} -> {} bytes", x.len(), compressed.len()); 55 | #[cfg(feature = "enable_logging")] 56 | debug!("Compressed content: {:?}", compressed); 57 | 58 | // test non-streaming decompression 59 | { 60 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 61 | let mut decomp: Vec = Vec::new(); 62 | lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options).unwrap(); 63 | assert_eq!(decomp, x); 64 | } 65 | 66 | #[cfg(feature = "stream")] 67 | // test streaming decompression 68 | { 69 | let mut stream = lzma_rs::decompress::Stream::new_with_options(decode_options, Vec::new()); 70 | 71 | if let Err(error) = stream.write_all(&compressed) { 72 | // A WriteZero error may occur if decompression is finished but there 73 | // are remaining `compressed` bytes to write. 74 | // This is the case when the unpacked size is encoded as unknown but 75 | // provided when decoding. I.e. the 5 or 6 byte end-of-stream marker 76 | // is not read. 77 | if error.kind() == std::io::ErrorKind::WriteZero { 78 | match (encode_options.unpacked_size, decode_options.unpacked_size) { 79 | ( 80 | lzma_rs::compress::UnpackedSize::WriteToHeader(None), 81 | lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(_)), 82 | ) => {} 83 | _ => panic!("{}", error), 84 | } 85 | } else { 86 | panic!("{}", error); 87 | } 88 | } 89 | 90 | let decomp = stream.finish().unwrap(); 91 | assert_eq!(decomp, x); 92 | } 93 | } 94 | 95 | fn round_trip_file(filename: &str) { 96 | let x = read_all_file(filename).unwrap(); 97 | round_trip(x.as_slice()); 98 | } 99 | 100 | fn assert_decomp_eq(compressed: &[u8], expected: &[u8], compare_to_liblzma: bool) { 101 | // Test regular decompression. 102 | { 103 | let mut input = std::io::BufReader::new(compressed); 104 | let mut decomp: Vec = Vec::new(); 105 | lzma_rs::lzma_decompress(&mut input, &mut decomp).unwrap(); 106 | assert_eq!(decomp, expected); 107 | } 108 | 109 | // Test consistency with lzma crate. Sometimes that crate fails (e.g. huge 110 | // dictionary), so we have a flag to skip that. 111 | if compare_to_liblzma { 112 | let decomp = lzma::decompress(compressed).unwrap(); 113 | assert_eq!(decomp, expected); 114 | } 115 | 116 | #[cfg(feature = "stream")] 117 | { 118 | let mut stream = lzma_rs::decompress::Stream::new(Vec::new()); 119 | stream.write_all(compressed).unwrap(); 120 | let decomp = stream.finish().unwrap(); 121 | assert_eq!(decomp, expected); 122 | 123 | const CHUNK_SIZES: &[usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512, 1024]; 124 | for &chunk_size in CHUNK_SIZES { 125 | let mut stream = lzma_rs::decompress::Stream::new(Vec::new()); 126 | for chunk in compressed.chunks(chunk_size) { 127 | stream.write_all(chunk).unwrap(); 128 | } 129 | let decomp = stream.finish().unwrap(); 130 | assert_eq!(decomp, expected); 131 | } 132 | } 133 | } 134 | 135 | #[test] 136 | #[should_panic(expected = "HeaderTooShort")] 137 | fn decompress_short_header() { 138 | #[cfg(feature = "enable_logging")] 139 | let _ = env_logger::try_init(); 140 | let mut decomp: Vec = Vec::new(); 141 | // TODO: compare io::Errors? 142 | lzma_rs::lzma_decompress(&mut (b"" as &[u8]), &mut decomp).unwrap(); 143 | } 144 | 145 | #[test] 146 | fn round_trip_basics() { 147 | #[cfg(feature = "enable_logging")] 148 | let _ = env_logger::try_init(); 149 | round_trip(b""); 150 | // Note: we use vec! to avoid storing the slice in the binary 151 | round_trip(vec![0x00; 1_000_000].as_slice()); 152 | round_trip(vec![0xFF; 1_000_000].as_slice()); 153 | } 154 | 155 | #[test] 156 | fn round_trip_hello() { 157 | #[cfg(feature = "enable_logging")] 158 | let _ = env_logger::try_init(); 159 | round_trip(b"Hello world"); 160 | } 161 | 162 | #[test] 163 | fn round_trip_files() { 164 | #[cfg(feature = "enable_logging")] 165 | let _ = env_logger::try_init(); 166 | round_trip_file("tests/files/foo.txt"); 167 | round_trip_file("tests/files/range-coder-edge-case"); 168 | } 169 | 170 | #[test] 171 | fn decompress_big_file() { 172 | #[cfg(feature = "enable_logging")] 173 | let _ = env_logger::try_init(); 174 | let compressed = read_all_file("tests/files/foo.txt.lzma").unwrap(); 175 | let expected = read_all_file("tests/files/foo.txt").unwrap(); 176 | assert_decomp_eq(&compressed, &expected, /* compare_to_liblzma */ true); 177 | } 178 | 179 | #[test] 180 | fn decompress_big_file_with_huge_dict() { 181 | #[cfg(feature = "enable_logging")] 182 | let _ = env_logger::try_init(); 183 | let compressed = read_all_file("tests/files/hugedict.txt.lzma").unwrap(); 184 | let expected = read_all_file("tests/files/foo.txt").unwrap(); 185 | assert_decomp_eq(&compressed, &expected, /* compare_to_liblzma */ false); 186 | } 187 | 188 | #[test] 189 | fn decompress_range_coder_edge_case() { 190 | #[cfg(feature = "enable_logging")] 191 | let _ = env_logger::try_init(); 192 | let compressed = read_all_file("tests/files/range-coder-edge-case.lzma").unwrap(); 193 | let expected = read_all_file("tests/files/range-coder-edge-case").unwrap(); 194 | assert_decomp_eq(&compressed, &expected, /* compare_to_liblzma */ true); 195 | } 196 | 197 | #[test] 198 | fn decompress_empty_world() { 199 | #[cfg(feature = "enable_logging")] 200 | let _ = env_logger::try_init(); 201 | assert_decomp_eq( 202 | b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\ 203 | \xfb\xff\xff\xc0\x00\x00\x00", 204 | b"", 205 | /* compare_to_liblzma */ true, 206 | ); 207 | } 208 | 209 | #[test] 210 | fn decompress_hello_world() { 211 | #[cfg(feature = "enable_logging")] 212 | let _ = env_logger::try_init(); 213 | assert_decomp_eq( 214 | b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x24\x19\ 215 | \x49\x98\x6f\x10\x19\xc6\xd7\x31\xeb\x36\x50\xb2\x98\x48\xff\xfe\ 216 | \xa5\xb0\x00", 217 | b"Hello world\x0a", 218 | /* compare_to_liblzma */ true, 219 | ); 220 | } 221 | 222 | #[test] 223 | fn decompress_huge_dict() { 224 | // Hello world with a dictionary of size 0x7F7F7F7F 225 | #[cfg(feature = "enable_logging")] 226 | let _ = env_logger::try_init(); 227 | assert_decomp_eq( 228 | b"\x5d\x7f\x7f\x7f\x7f\xff\xff\xff\xff\xff\xff\xff\xff\x00\x24\x19\ 229 | \x49\x98\x6f\x10\x19\xc6\xd7\x31\xeb\x36\x50\xb2\x98\x48\xff\xfe\ 230 | \xa5\xb0\x00", 231 | b"Hello world\x0a", 232 | /* compare_to_liblzma */ false, 233 | ); 234 | } 235 | 236 | #[test] 237 | fn unpacked_size_write_to_header() { 238 | let data = b"Some data"; 239 | let encode_options = lzma_rs::compress::Options { 240 | unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(Some(data.len() as u64)), 241 | }; 242 | let decode_options = lzma_rs::decompress::Options { 243 | unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader, 244 | ..Default::default() 245 | }; 246 | assert_round_trip_with_options(&data[..], &encode_options, &decode_options); 247 | } 248 | 249 | #[test] 250 | fn unpacked_size_provided_outside() { 251 | let data = b"Some data"; 252 | let encode_options = lzma_rs::compress::Options { 253 | unpacked_size: lzma_rs::compress::UnpackedSize::SkipWritingToHeader, 254 | }; 255 | let decode_options = lzma_rs::decompress::Options { 256 | unpacked_size: lzma_rs::decompress::UnpackedSize::UseProvided(Some(data.len() as u64)), 257 | ..Default::default() 258 | }; 259 | assert_round_trip_with_options(&data[..], &encode_options, &decode_options); 260 | } 261 | 262 | #[test] 263 | fn unpacked_size_write_some_to_header_but_use_provided_on_read() { 264 | let data = b"Some data"; 265 | let encode_options = lzma_rs::compress::Options { 266 | unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(Some(data.len() as u64)), 267 | }; 268 | let decode_options = lzma_rs::decompress::Options { 269 | unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some( 270 | data.len() as u64, 271 | )), 272 | ..Default::default() 273 | }; 274 | assert_round_trip_with_options(&data[..], &encode_options, &decode_options); 275 | } 276 | 277 | #[test] 278 | fn unpacked_size_write_none_to_header_and_use_provided_on_read() { 279 | let data = b"Some data"; 280 | let encode_options = lzma_rs::compress::Options { 281 | unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None), 282 | }; 283 | let decode_options = lzma_rs::decompress::Options { 284 | unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some( 285 | data.len() as u64, 286 | )), 287 | ..Default::default() 288 | }; 289 | assert_round_trip_with_options(&data[..], &encode_options, &decode_options); 290 | } 291 | 292 | #[test] 293 | fn unpacked_size_write_none_to_header_and_use_provided_none_on_read() { 294 | let data = b"Some data"; 295 | let encode_options = lzma_rs::compress::Options { 296 | unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None), 297 | }; 298 | let decode_options = lzma_rs::decompress::Options { 299 | unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None), 300 | ..Default::default() 301 | }; 302 | assert_round_trip_with_options(&data[..], &encode_options, &decode_options); 303 | } 304 | 305 | #[test] 306 | fn memlimit() { 307 | let data = b"Some data"; 308 | let encode_options = lzma_rs::compress::Options { 309 | unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None), 310 | }; 311 | let decode_options = lzma_rs::decompress::Options { 312 | unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None), 313 | memlimit: Some(0), 314 | ..Default::default() 315 | }; 316 | 317 | let mut compressed: Vec = Vec::new(); 318 | lzma_rs::lzma_compress_with_options( 319 | &mut std::io::BufReader::new(&data[..]), 320 | &mut compressed, 321 | &encode_options, 322 | ) 323 | .unwrap(); 324 | 325 | // test non-streaming decompression 326 | { 327 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 328 | let mut decomp: Vec = Vec::new(); 329 | let error = lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, &decode_options) 330 | .unwrap_err(); 331 | assert!( 332 | error.to_string().contains("exceeded memory limit of 0"), 333 | "{}", 334 | error.to_string() 335 | ); 336 | } 337 | 338 | #[cfg(feature = "stream")] 339 | // test streaming decompression 340 | { 341 | let mut stream = lzma_rs::decompress::Stream::new_with_options(&decode_options, Vec::new()); 342 | 343 | let error = stream.write_all(&compressed).unwrap_err(); 344 | assert!( 345 | error.to_string().contains("exceeded memory limit of 0"), 346 | "{}", 347 | error.to_string() 348 | ); 349 | let error = stream.finish().unwrap_err(); 350 | assert!( 351 | error.to_string().contains("previous write error"), 352 | "{}", 353 | error.to_string() 354 | ); 355 | } 356 | } 357 | -------------------------------------------------------------------------------- /tests/lzma2.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "enable_logging")] 2 | use log::{debug, info}; 3 | use std::io::Read; 4 | 5 | /// Utility function to read a file into memory 6 | fn read_all_file(filename: &str) -> std::io::Result> { 7 | let mut data = Vec::new(); 8 | std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?; 9 | Ok(data) 10 | } 11 | 12 | fn round_trip(x: &[u8]) { 13 | let mut compressed: Vec = Vec::new(); 14 | lzma_rs::lzma2_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap(); 15 | #[cfg(feature = "enable_logging")] 16 | info!("Compressed {} -> {} bytes", x.len(), compressed.len()); 17 | #[cfg(feature = "enable_logging")] 18 | debug!("Compressed content: {:?}", compressed); 19 | let mut bf = std::io::BufReader::new(compressed.as_slice()); 20 | let mut decomp: Vec = Vec::new(); 21 | lzma_rs::lzma2_decompress(&mut bf, &mut decomp).unwrap(); 22 | assert_eq!(decomp, x) 23 | } 24 | 25 | fn round_trip_file(filename: &str) { 26 | let x = read_all_file(filename).unwrap(); 27 | round_trip(x.as_slice()); 28 | } 29 | 30 | #[test] 31 | fn round_trip_basics() { 32 | #[cfg(feature = "enable_logging")] 33 | let _ = env_logger::try_init(); 34 | round_trip(b""); 35 | // Note: we use vec! to avoid storing the slice in the binary 36 | round_trip(vec![0x00; 1_000_000].as_slice()); 37 | round_trip(vec![0xFF; 1_000_000].as_slice()); 38 | } 39 | 40 | #[test] 41 | fn round_trip_hello() { 42 | #[cfg(feature = "enable_logging")] 43 | let _ = env_logger::try_init(); 44 | round_trip(b"Hello world"); 45 | } 46 | 47 | #[test] 48 | fn round_trip_files() { 49 | #[cfg(feature = "enable_logging")] 50 | let _ = env_logger::try_init(); 51 | round_trip_file("tests/files/foo.txt"); 52 | } 53 | -------------------------------------------------------------------------------- /tests/xz.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "enable_logging")] 2 | use log::{debug, info}; 3 | use std::io::{BufReader, Cursor, Read}; 4 | 5 | /// Utility function to read a file into memory 6 | fn read_all_file(filename: &str) -> std::io::Result> { 7 | let mut data = Vec::new(); 8 | std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?; 9 | Ok(data) 10 | } 11 | 12 | fn round_trip(x: &[u8]) { 13 | let mut compressed: Vec = Vec::new(); 14 | lzma_rs::xz_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap(); 15 | #[cfg(feature = "enable_logging")] 16 | info!("Compressed {} -> {} bytes", x.len(), compressed.len()); 17 | #[cfg(feature = "enable_logging")] 18 | debug!("Compressed content: {:?}", compressed); 19 | let mut bf = BufReader::new(compressed.as_slice()); 20 | let mut decomp: Vec = Vec::new(); 21 | lzma_rs::xz_decompress(&mut bf, &mut decomp).unwrap(); 22 | assert_eq!(decomp, x) 23 | } 24 | 25 | fn round_trip_file(filename: &str) { 26 | let x = read_all_file(filename).unwrap(); 27 | round_trip(x.as_slice()); 28 | } 29 | 30 | #[test] 31 | fn round_trip_basics() { 32 | #[cfg(feature = "enable_logging")] 33 | let _ = env_logger::try_init(); 34 | round_trip(b""); 35 | // Note: we use vec! to avoid storing the slice in the binary 36 | round_trip(vec![0x00; 1_000_000].as_slice()); 37 | round_trip(vec![0xFF; 1_000_000].as_slice()); 38 | } 39 | 40 | #[test] 41 | fn round_trip_hello() { 42 | #[cfg(feature = "enable_logging")] 43 | let _ = env_logger::try_init(); 44 | round_trip(b"Hello world"); 45 | } 46 | 47 | #[test] 48 | fn round_trip_files() { 49 | #[cfg(feature = "enable_logging")] 50 | let _ = env_logger::try_init(); 51 | round_trip_file("tests/files/foo.txt"); 52 | } 53 | 54 | fn decomp_big_file(compfile: &str, plainfile: &str) { 55 | let expected = read_all_file(plainfile).unwrap(); 56 | let mut f = BufReader::new(std::fs::File::open(compfile).unwrap()); 57 | let mut decomp: Vec = Vec::new(); 58 | lzma_rs::xz_decompress(&mut f, &mut decomp).unwrap(); 59 | assert!(decomp == expected) 60 | } 61 | 62 | #[test] 63 | fn big_file() { 64 | #[cfg(feature = "enable_logging")] 65 | let _ = env_logger::try_init(); 66 | decomp_big_file("tests/files/foo.txt.xz", "tests/files/foo.txt"); 67 | decomp_big_file( 68 | "tests/files/good-1-lzma2-1.xz", 69 | "tests/files/good-1-lzma2-1", 70 | ); 71 | decomp_big_file( 72 | "tests/files/good-1-lzma2-2.xz", 73 | "tests/files/good-1-lzma2-2", 74 | ); 75 | decomp_big_file( 76 | "tests/files/good-1-lzma2-3.xz", 77 | "tests/files/good-1-lzma2-3", 78 | ); 79 | decomp_big_file( 80 | "tests/files/good-1-lzma2-4.xz", 81 | "tests/files/good-1-lzma2-4", 82 | ); 83 | } 84 | 85 | #[test] 86 | fn decompress_empty_world() { 87 | #[cfg(feature = "enable_logging")] 88 | let _ = env_logger::try_init(); 89 | let mut x: &[u8] = b"\xfd\x37\x7a\x58\x5a\x00\x00\x04\xe6\xd6\xb4\x46\x00\x00\x00\x00\ 90 | \x1c\xdf\x44\x21\x1f\xb6\xf3\x7d\x01\x00\x00\x00\x00\x04\x59\x5a\ 91 | "; 92 | let mut decomp: Vec = Vec::new(); 93 | lzma_rs::xz_decompress(&mut x, &mut decomp).unwrap(); 94 | assert_eq!(decomp, b"") 95 | } 96 | 97 | #[test] 98 | fn decompress_hello_world() { 99 | #[cfg(feature = "enable_logging")] 100 | let _ = env_logger::try_init(); 101 | let mut x: &[u8] = b"\xfd\x37\x7a\x58\x5a\x00\x00\x04\xe6\xd6\xb4\x46\x02\x00\x21\x01\ 102 | \x16\x00\x00\x00\x74\x2f\xe5\xa3\x01\x00\x0b\x48\x65\x6c\x6c\x6f\ 103 | \x20\x77\x6f\x72\x6c\x64\x0a\x00\xca\xec\x49\x05\x66\x3f\x67\x98\ 104 | \x00\x01\x24\x0c\xa6\x18\xd8\xd8\x1f\xb6\xf3\x7d\x01\x00\x00\x00\ 105 | \x00\x04\x59\x5a"; 106 | let mut decomp: Vec = Vec::new(); 107 | lzma_rs::xz_decompress(&mut x, &mut decomp).unwrap(); 108 | assert_eq!(decomp, b"Hello world\x0a") 109 | } 110 | 111 | #[test] 112 | fn test_xz_block_check_crc32() { 113 | #[cfg(feature = "enable_logging")] 114 | let _ = env_logger::try_init(); 115 | 116 | decomp_big_file( 117 | "tests/files/block-check-crc32.txt.xz", 118 | "tests/files/block-check-crc32.txt", 119 | ); 120 | } 121 | 122 | #[test] 123 | fn test_xz_block_check_crc32_invalid() { 124 | #[cfg(feature = "enable_logging")] 125 | let _ = env_logger::try_init(); 126 | 127 | let testcase = "tests/files/block-check-crc32.txt.xz"; 128 | let mut corrupted = { 129 | let mut buf = read_all_file(testcase).unwrap(); 130 | // Mangle the "Block Check" field. 131 | buf[0x54] = 0x67; 132 | buf[0x55] = 0x45; 133 | buf[0x56] = 0x23; 134 | buf[0x57] = 0x01; 135 | BufReader::new(Cursor::new(buf)) 136 | }; 137 | let mut decomp = Vec::new(); 138 | 139 | let err_msg = lzma_rs::xz_decompress(&mut corrupted, &mut decomp) 140 | .unwrap_err() 141 | .to_string(); 142 | assert_eq!( 143 | err_msg, 144 | "xz error: Invalid footer CRC32: expected 0x01234567 but got 0x8b0d303e" 145 | ) 146 | } 147 | --------------------------------------------------------------------------------