├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    ├── dependabot.yml
    └── workflows
    │   ├── benches.yml
    │   ├── codecov.yml
    │   ├── format.yml
    │   ├── fuzzing.yml
    │   ├── lints.yml
    │   ├── rustdoc.yml
    │   └── tests.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    └── lzma.rs
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    ├── README.md
    └── fuzz_targets
    │   ├── compare_xz.rs
    │   ├── decompress_lzma.rs
    │   ├── decompress_lzma2.rs
    │   ├── decompress_lzma_stream.rs
    │   ├── decompress_xz.rs
    │   ├── interop_xz_decode.rs
    │   ├── interop_xz_encode.rs
    │   ├── roundtrip_lzma.rs
    │   ├── roundtrip_lzma2.rs
    │   └── roundtrip_xz.rs
├── rustfmt.toml
├── src
    ├── decode
    │   ├── lzbuffer.rs
    │   ├── lzma.rs
    │   ├── lzma2.rs
    │   ├── mod.rs
    │   ├── options.rs
    │   ├── rangecoder.rs
    │   ├── stream.rs
    │   ├── util.rs
    │   └── xz.rs
    ├── encode
    │   ├── dumbencoder.rs
    │   ├── lzma2.rs
    │   ├── mod.rs
    │   ├── options.rs
    │   ├── rangecoder.rs
    │   ├── util.rs
    │   └── xz.rs
    ├── error.rs
    ├── lib.rs
    ├── macros.rs
    ├── util
    │   ├── mod.rs
    │   └── vec2d.rs
    └── xz
    │   ├── crc.rs
    │   ├── footer.rs
    │   ├── header.rs
    │   └── mod.rs
└── tests
    ├── files
        ├── README.md
        ├── block-check-crc32.txt
        ├── block-check-crc32.txt.xz
        ├── empty.txt
        ├── empty.txt.lzma
        ├── empty.txt.xz
        ├── foo.txt
        ├── foo.txt.lzma
        ├── foo.txt.xz
        ├── good-1-lzma2-1
        ├── good-1-lzma2-1.xz
        ├── good-1-lzma2-2
        ├── good-1-lzma2-2.xz
        ├── good-1-lzma2-3
        ├── good-1-lzma2-3.xz
        ├── good-1-lzma2-4
        ├── good-1-lzma2-4.xz
        ├── hello.txt
        ├── hello.txt.lzma
        ├── hello.txt.xz
        ├── hugedict.txt.lzma
        ├── range-coder-edge-case
        ├── range-coder-edge-case.lzma
        └── small.txt
    ├── lzma.rs
    ├── lzma2.rs
    └── xz.rs


/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### Pull Request Overview
 2 | 
 3 | This pull request adds/changes/fixes...
 4 | 
 5 | 
 6 | ### Testing Strategy
 7 | 
 8 | This pull request was tested by...
 9 | 
10 | - [ ] Added relevant unit tests.
11 | - [ ] Added relevant end-to-end tests (such as `.lzma`, `.lzma2`, `.xz` files).
12 | 
13 | 
14 | ### Supporting Documentation and References
15 | 
16 | *If supporting an edge case, such as files created by a legacy SDK, please document here where this edge case comes from.
17 | Whenever possible, please include links to artifacts such as example files, existing code handling this edge case, etc.*
18 | 
19 | 
20 | ### TODO or Help Wanted
21 | 
22 | This pull request still needs...
23 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # Please see the documentation for all configuration options:
2 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
3 | version: 2
4 | updates:
5 |   - package-ecosystem: "cargo"
6 |     directory: "/" # Location of package manifests
7 |     schedule:
8 |       interval: "weekly"
9 | 


--------------------------------------------------------------------------------
/.github/workflows/benches.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Build benches on nightly toolchain
 3 | jobs:
 4 |   build_benchmarks:
 5 |     runs-on: ubuntu-latest
 6 |     env:
 7 |       RUSTFLAGS: "-D warnings"
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: dtolnay/rust-toolchain@nightly
11 |       - name: Build benches
12 |         run: cargo build --benches --verbose
13 | 


--------------------------------------------------------------------------------
/.github/workflows/codecov.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Code coverage
 3 | jobs:
 4 |   unit_tests:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - uses: actions/checkout@v4
 8 |       - uses: dtolnay/rust-toolchain@nightly
 9 |       - name: Install cargo-llvm-cov
10 |         uses: taiki-e/install-action@cargo-llvm-cov
11 |       - name: Generate code coverage
12 |         run: cargo llvm-cov --all-features --workspace --lib --lcov --output-path lcov.info
13 |       - name: Upload to Codecov
14 |         uses: codecov/codecov-action@v3.1.0
15 |         with:
16 |           files: lcov.info
17 |           flags: unit
18 |           verbose: true
19 |           fail_ci_if_error: true
20 |   integration_tests:
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: actions/checkout@v4
24 |       - uses: dtolnay/rust-toolchain@nightly
25 |       - name: Install cargo-llvm-cov
26 |         uses: taiki-e/install-action@cargo-llvm-cov
27 |       - name: Generate code coverage
28 |         run: cargo llvm-cov --all-features --workspace --test '*' --lcov --output-path lcov.info
29 |       - name: Upload to Codecov
30 |         uses: codecov/codecov-action@v3.1.0
31 |         with:
32 |           files: lcov.info
33 |           flags: integration
34 |           verbose: true
35 |           fail_ci_if_error: true
36 | 


--------------------------------------------------------------------------------
/.github/workflows/format.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Formatting on nightly toolchain
 3 | jobs:
 4 |   format:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - uses: actions/checkout@v4
 8 |       - uses: dtolnay/rust-toolchain@nightly
 9 |         with:
10 |           components: rustfmt
11 | 
12 |       - name: Check formatting
13 |         run: cargo fmt --verbose -- --check --verbose
14 |       - name: Check formatting on fuzzing
15 |         run: cargo fmt --verbose --manifest-path fuzz/Cargo.toml -- --check --verbose
16 | 


--------------------------------------------------------------------------------
/.github/workflows/fuzzing.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Build fuzz targets on nightly toolchain
 3 | jobs:
 4 |   build_fuzzing:
 5 |     runs-on: ubuntu-latest
 6 |     env:
 7 |       RUSTFLAGS: "-D warnings"
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: dtolnay/rust-toolchain@nightly
11 |       - name: Install cargo fuzz
12 |         run: cargo install cargo-fuzz --verbose
13 |       - name: Build fuzz targets
14 |         run: cargo fuzz build --verbose
15 | 


--------------------------------------------------------------------------------
/.github/workflows/lints.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Lints on stable toolchain
 3 | jobs:
 4 |   clippy:
 5 |     runs-on: ubuntu-latest
 6 |     env:
 7 |       RUSTFLAGS: "-D warnings"
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: dtolnay/rust-toolchain@stable
11 |         with:
12 |           components: clippy
13 | 
14 |       - name: Check Clippy lints
15 |         run: cargo clippy --verbose --all-features -- -W clippy::match-same-arms
16 |       - name: Check Clippy lints on tests
17 |         run: cargo clippy --verbose --all-features --tests -- -W clippy::match-same-arms
18 |       - name: Check Clippy lints on fuzzing
19 |         run: cargo clippy --verbose --all-features --manifest-path fuzz/Cargo.toml -- -W clippy::match-same-arms
20 | 


--------------------------------------------------------------------------------
/.github/workflows/rustdoc.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Rustdoc on nightly toolchain
 3 | jobs:
 4 |   rustdoc:
 5 |     runs-on: ubuntu-latest
 6 |     env:
 7 |       RUSTDOCFLAGS: "-D warnings"
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: dtolnay/rust-toolchain@nightly
11 | 
12 |       - name: Check Rust documentation
13 |         run: cargo +nightly doc --document-private-items
14 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | name: Build and run tests
 3 | jobs:
 4 |   build_and_test:
 5 |     strategy:
 6 |       matrix:
 7 |         os:
 8 |           - ubuntu-latest
 9 |           - macos-latest
10 |         rust:
11 |           - stable
12 |           - beta
13 |           - nightly
14 |           - 1.71.0  # MSRV
15 |       fail-fast: false
16 |     runs-on: ${{ matrix.os }}
17 |     env:
18 |       RUSTFLAGS: "-D warnings"
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - uses: dtolnay/rust-toolchain@master
22 |         with:
23 |           toolchain: ${{ matrix.rust }}
24 | 
25 |       - name: Build with default features
26 |         run: cargo build --verbose
27 |       - name: Tests with default features
28 |         run: cargo test --verbose
29 |       - name: Build with all features
30 |         run: cargo build --all-features --verbose
31 |       - name: Tests with all features
32 |         run: cargo test --all-features --verbose
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | **/*.rs.bk
3 | Cargo.lock
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.3.0 - 2023-01-04
 2 | 
 3 | - Update minimum supported Rust version: 1.40.0 -> 1.50.0.
 4 | - Update dependencies (https://github.com/gendx/lzma-rs/pull/78):
 5 |   - `byteorder`: ^1.0.0 -> 1.4.3
 6 |   - `crc`: ^1.0.0 -> 3.0.0
 7 |   - `log`: ^0.4.14 -> 0.4.17
 8 |   - `env_logger`: ^0.8.3 -> 0.9.0
 9 | - Expose a new `raw_decoder` API (https://github.com/gendx/lzma-rs/pull/74).
10 | - Reduce the number of allocations (https://github.com/gendx/lzma-rs/pull/77).
11 | - Display features on rustdoc (https://github.com/gendx/lzma-rs/pull/70).
12 | - Configure formatting style to `imports_granularity = "Module"`
13 |   (https://github.com/gendx/lzma-rs/pull/82).
14 | - Add code coverage reporting (https://github.com/gendx/lzma-rs/pull/86).
15 | 
16 | ## 0.2.0 - 2021-05-02
17 | 
18 | - Update minimum supported Rust version: 1.32.0 -> 1.40.0.
19 | - Update dependencies:
20 |   - `log`: ^0.4.8 -> ^0.4.14
21 |   - `env_logger`: 0.7.1 -> ^0.8.3
22 | - [Breaking change] Rename acronyms to be lowercase, following
23 |   clippy::upper-case-acronyms.
24 | - [Breaking change] Add a memory limit option
25 |   (https://github.com/gendx/lzma-rs/pull/50).
26 | - Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61).
27 | - Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56).
28 | - Add a streaming mode for LZMA decompression, gated by the `stream` feature.
29 | - Add more fuzzing targets, including comparison with the `xz2` crate.
30 | - Various improvements: benchmarks, fix lint warnings.
31 | - Migrate from Travis-CI to GitHub Actions.
32 | 
33 | ## 0.1.4 - 2021-05-02
34 | 
35 | - Backports from 0.2.0:
36 |   - Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61).
37 |   - Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56).
38 | 
39 | ## 0.1.3 - 2020-05-05
40 | 
41 | - Minimum supported Rust version: 1.32.0.
42 | - Update dependencies:
43 |   - `log`: ^0.4.0 -> ^0.4.8
44 |   - `env_logger`: 0.6.0 -> ^0.7.1
45 | - Gate logging behind an opt-in feature. This improves decoding performance by
46 |   ~25% (https://github.com/gendx/lzma-rs/pull/31).
47 | - Lazily allocate the circular buffer (https://github.com/gendx/lzma-rs/pull/22).
48 |   This improves memory usage (especially for WebAssembly targets) at the expense
49 |   of a ~5%  performance regression (https://github.com/gendx/lzma-rs/issues/27).
50 | - Return an error instead of panicking on unsupported SHA-256 checksum for XZ
51 |   decoding (https://github.com/gendx/lzma-rs/pull/40).
52 | - Add Clippy to CI.
53 | - Document public APIs.
54 | - Deny missing docs, missing Debug implementations and build warnings.
55 | - Forbid unsafe code.
56 | - Remove extern statements that are unnecessary on the 2018 edition.
57 | 
58 | ## 0.1.2 - 2019-12-17
59 | 
60 | - Fix bug in the range coder (https://github.com/gendx/lzma-rs/issues/15).
61 | - Add support for specifying the unpacked size outside of the header
62 |   (https://github.com/gendx/lzma-rs/pull/17).
63 | - Migrate to Rust 2018 edition.
64 | - Add benchmarks.
65 | - Fix some Clippy warnings.
66 | 
67 | ## 0.1.1 - 2019-02-24
68 | 
69 | - Upgrade `env_logger` dependency.
70 | - Refactoring to use `std::io::Take`, operator `?`.
71 | 
72 | ## 0.1.0 - 2018-01-07
73 | 
74 | - Initial release.
75 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "lzma-rs"
 3 | description = "A codec for LZMA, LZMA2 and XZ written in pure Rust"
 4 | version = "0.3.0"
 5 | license = "MIT"
 6 | authors = ["Guillaume Endignoux <ggendx@gmail.com>"]
 7 | repository = "https://github.com/gendx/lzma-rs"
 8 | readme = "README.md"
 9 | categories = ["compression"]
10 | keywords = ["lzma", "compression", "decompression"]
11 | exclude = ["tests/*", "benches/*", "fuzz/*", ".github/*", "Cargo.lock"]
12 | edition = "2018"
13 | rust-version = "1.71.0"
14 | 
15 | [dependencies]
16 | byteorder = "1.4.3"
17 | crc = "3.0.0"
18 | log = { version = "0.4.17", optional = true }
19 | env_logger = { version = "0.11.3", optional = true }
20 | 
21 | [dev-dependencies]
22 | rust-lzma = "0.6"
23 | seq-macro = "0.3"
24 | 
25 | [features]
26 | enable_logging = ["env_logger", "log"]
27 | stream = []
28 | raw_decoder = []
29 | 
30 | [package.metadata.docs.rs]
31 | features = ["stream", "raw_decoder"]
32 | rustdoc-args = ["--cfg", "docsrs"]
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 - 2018  Guillaume Endignoux
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # lzma-rs
 2 | 
 3 | [![Crate](https://img.shields.io/crates/v/lzma-rs.svg?logo=rust)](https://crates.io/crates/lzma-rs)
 4 | [![Documentation](https://img.shields.io/docsrs/lzma-rs?logo=rust)](https://docs.rs/lzma-rs)
 5 | [![Minimum Rust 1.71](https://img.shields.io/badge/rust-1.71%2B-orange.svg?logo=rust)](https://releases.rs/docs/1.71.0/)
 6 | [![Dependencies](https://deps.rs/repo/github/gendx/lzma-rs/status.svg)](https://deps.rs/repo/github/gendx/lzma-rs)
 7 | [![Safety Dance](https://img.shields.io/badge/unsafe-forbidden-success.svg?logo=rust)](https://github.com/rust-secure-code/safety-dance/)
 8 | ![Build Status](https://github.com/gendx/lzma-rs/workflows/Build%20and%20run%20tests/badge.svg)
 9 | [![Codecov](https://codecov.io/gh/gendx/lzma-rs/branch/master/graph/badge.svg?token=HVo74E0wzh)](https://codecov.io/gh/gendx/lzma-rs)
10 | [![Lines of Code](https://www.aschey.tech/tokei/github/gendx/lzma-rs?category=code)](https://github.com/aschey/vercel-tokei)
11 | [![Downloads (crates.io)](https://img.shields.io/crates/d/lzma-rs?label=downloads&logo=rust)](https://crates.io/crates/lzma-rs)
12 | 
13 | This project is a decoder for LZMA and its variants written in pure Rust, with focus on clarity.
14 | It already supports LZMA, LZMA2 and a subset of the `.xz` file format.
15 | 
16 | ## Usage
17 | 
18 | Decompress a `.xz` file.
19 | 
20 | ```rust
21 | let filename = "foo.xz";
22 | let mut f = std::io::BufReader::new(std::fs::File::open(filename).unwrap());
23 | // "decomp" can be anything that implements "std::io::Write"
24 | let mut decomp: Vec<u8> = Vec::new();
25 | lzma_rs::xz_decompress(&mut f, &mut decomp).unwrap();
26 | // Decompressed content is now in "decomp"
27 | ```
28 | 
29 | ## Encoder
30 | 
31 | For now, there is also a dumb encoder that only uses byte literals, with many hard-coded constants for code simplicity.
32 | Better encoders are welcome!
33 | 
34 | ## Contributing
35 | 
36 | Pull-requests are welcome, to improve the decoder, add better encoders, or more tests.
37 | Ultimately, this project should also implement .xz and .7z files.
38 | 
39 | ## License
40 | 
41 | MIT
42 | 
43 | 


--------------------------------------------------------------------------------
/benches/lzma.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | 
  5 | use std::io::Read;
  6 | use test::Bencher;
  7 | 
  8 | fn compress_bench(x: &[u8], b: &mut Bencher) {
  9 |     b.iter(|| {
 10 |         let mut compressed: Vec<u8> = Vec::new();
 11 |         lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap();
 12 |         compressed
 13 |     });
 14 | }
 15 | 
 16 | fn decompress_after_compress_bench(x: &[u8], b: &mut Bencher) {
 17 |     let mut compressed: Vec<u8> = Vec::new();
 18 |     lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap();
 19 | 
 20 |     b.iter(|| {
 21 |         let mut bf = std::io::BufReader::new(compressed.as_slice());
 22 |         let mut decomp: Vec<u8> = Vec::new();
 23 |         lzma_rs::lzma_decompress(&mut bf, &mut decomp).unwrap();
 24 |         decomp
 25 |     });
 26 | }
 27 | 
 28 | fn decompress_bench(compressed: &[u8], b: &mut Bencher) {
 29 |     b.iter(|| {
 30 |         let mut bf = std::io::BufReader::new(compressed);
 31 |         let mut decomp: Vec<u8> = Vec::new();
 32 |         lzma_rs::lzma_decompress(&mut bf, &mut decomp).unwrap();
 33 |         decomp
 34 |     });
 35 | }
 36 | 
 37 | #[cfg(feature = "stream")]
 38 | fn decompress_stream_bench(compressed: &[u8], b: &mut Bencher) {
 39 |     use std::io::Write;
 40 |     b.iter(|| {
 41 |         let mut stream = lzma_rs::decompress::Stream::new(Vec::new());
 42 |         stream.write_all(compressed).unwrap();
 43 |         stream.finish().unwrap()
 44 |     });
 45 | }
 46 | 
 47 | fn decompress_bench_file(compfile: &str, b: &mut Bencher) {
 48 |     let mut f = std::fs::File::open(compfile).unwrap();
 49 |     let mut compressed = Vec::new();
 50 |     f.read_to_end(&mut compressed).unwrap();
 51 |     decompress_bench(&compressed, b);
 52 | }
 53 | 
 54 | #[cfg(feature = "stream")]
 55 | fn decompress_stream_bench_file(compfile: &str, b: &mut Bencher) {
 56 |     let mut f = std::fs::File::open(compfile).unwrap();
 57 |     let mut compressed = Vec::new();
 58 |     f.read_to_end(&mut compressed).unwrap();
 59 |     decompress_stream_bench(&compressed, b);
 60 | }
 61 | 
 62 | #[bench]
 63 | fn compress_empty(b: &mut Bencher) {
 64 |     #[cfg(feature = "enable_logging")]
 65 |     let _ = env_logger::try_init();
 66 |     compress_bench(b"", b);
 67 | }
 68 | 
 69 | #[bench]
 70 | fn decompress_after_compress_empty(b: &mut Bencher) {
 71 |     #[cfg(feature = "enable_logging")]
 72 |     let _ = env_logger::try_init();
 73 |     decompress_after_compress_bench(b"", b);
 74 | }
 75 | 
 76 | #[bench]
 77 | fn compress_hello(b: &mut Bencher) {
 78 |     #[cfg(feature = "enable_logging")]
 79 |     let _ = env_logger::try_init();
 80 |     compress_bench(b"Hello world", b);
 81 | }
 82 | 
 83 | #[bench]
 84 | fn decompress_after_compress_hello(b: &mut Bencher) {
 85 |     #[cfg(feature = "enable_logging")]
 86 |     let _ = env_logger::try_init();
 87 |     decompress_after_compress_bench(b"Hello world", b);
 88 | }
 89 | 
 90 | #[bench]
 91 | fn compress_65536(b: &mut Bencher) {
 92 |     #[cfg(feature = "enable_logging")]
 93 |     let _ = env_logger::try_init();
 94 |     compress_bench(&[0; 0x10000], b);
 95 | }
 96 | 
 97 | #[bench]
 98 | fn decompress_after_compress_65536(b: &mut Bencher) {
 99 |     #[cfg(feature = "enable_logging")]
100 |     let _ = env_logger::try_init();
101 |     decompress_after_compress_bench(&[0; 0x10000], b);
102 | }
103 | 
104 | #[bench]
105 | fn decompress_big_file(b: &mut Bencher) {
106 |     #[cfg(feature = "enable_logging")]
107 |     let _ = env_logger::try_init();
108 |     decompress_bench_file("tests/files/foo.txt.lzma", b);
109 | }
110 | 
111 | #[cfg(feature = "stream")]
112 | #[bench]
113 | fn decompress_stream_big_file(b: &mut Bencher) {
114 |     #[cfg(feature = "enable_logging")]
115 |     let _ = env_logger::try_init();
116 |     decompress_stream_bench_file("tests/files/foo.txt.lzma", b);
117 | }
118 | 
119 | #[bench]
120 | fn decompress_huge_dict(b: &mut Bencher) {
121 |     #[cfg(feature = "enable_logging")]
122 |     let _ = env_logger::try_init();
123 |     let compressed: &[u8] = b"\x5d\x7f\x7f\x7f\x7f\xff\xff\xff\
124 |                               \xff\xff\xff\xff\xff\x00\x24\x19\
125 |                               \x49\x98\x6f\x10\x19\xc6\xd7\x31\
126 |                               \xeb\x36\x50\xb2\x98\x48\xff\xfe\
127 |                               \xa5\xb0\x00";
128 |     decompress_bench(&compressed, b);
129 | }
130 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target
3 | corpus
4 | artifacts
5 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [package]
 3 | name = "lzma-fuzz"
 4 | version = "0.0.1"
 5 | authors = ["Automatically generated"]
 6 | publish = false
 7 | edition = "2018"
 8 | 
 9 | [package.metadata]
10 | cargo-fuzz = true
11 | 
12 | [dependencies]
13 | xz2 = "0.1.6"
14 | 
15 | [dependencies.lzma-rs]
16 | path = ".."
17 | features = ["stream"]
18 | [dependencies.libfuzzer-sys]
19 | git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
20 | 
21 | # Prevent this from interfering with workspaces
22 | [workspace]
23 | members = ["."]
24 | 
25 | [[bin]]
26 | name = "roundtrip_lzma"
27 | path = "fuzz_targets/roundtrip_lzma.rs"
28 | 
29 | [[bin]]
30 | name = "roundtrip_lzma2"
31 | path = "fuzz_targets/roundtrip_lzma2.rs"
32 | 
33 | [[bin]]
34 | name = "roundtrip_xz"
35 | path = "fuzz_targets/roundtrip_xz.rs"
36 | 
37 | [[bin]]
38 | name = "decompress_lzma"
39 | path = "fuzz_targets/decompress_lzma.rs"
40 | 
41 | [[bin]]
42 | name = "decompress_lzma2"
43 | path = "fuzz_targets/decompress_lzma2.rs"
44 | 
45 | [[bin]]
46 | name = "decompress_xz"
47 | path = "fuzz_targets/decompress_xz.rs"
48 | 
49 | [[bin]]
50 | name = "compare_xz"
51 | path = "fuzz_targets/compare_xz.rs"
52 | 
53 | [[bin]]
54 | name = "interop_xz_decode"
55 | path = "fuzz_targets/interop_xz_decode.rs"
56 | 
57 | [[bin]]
58 | name = "interop_xz_encode"
59 | path = "fuzz_targets/interop_xz_encode.rs"
60 | 
61 | [[bin]]
62 | name = "decompress_lzma_stream"
63 | path = "fuzz_targets/decompress_lzma_stream.rs"
64 | 


--------------------------------------------------------------------------------
/fuzz/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains fuzzing targets to verify implementation correctness: 
 2 | 
 3 | - `roundtrip_*` targets check that we can successfully decode what we've encoded.
 4 | - `decompress_*` targets check that we don't panic or abort on decoding a crafted file.
 5 | - `compare_*` targets check that we produce identical output to liblzma on decompression.
 6 | 
 7 | The command to run fuzzer is:
 8 | 
 9 | `cargo +nightly fuzz run --release -s none <fuzzing_target>`
10 | 
11 | For example,
12 | 
13 | `cargo +nightly fuzz run --release -s none compare_xz`
14 | 
15 | We use `-s none` because this crate does not contain unsafe code, so we don't
16 | need sanitizers to detect memory or concurrency errors for us.
17 | 
18 | For more info see `cargo +nightly fuzz help`
19 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/compare_xz.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | use std::io::Read;
 7 | use xz2::stream;
 8 | 
 9 | fn decode_xz_lzmars(compressed: &[u8]) -> Result<Vec<u8>> {
10 |     let mut bf = std::io::Cursor::new(compressed);
11 |     let mut decomp: Vec<u8> = Vec::new();
12 |     lzma_rs::xz_decompress(&mut bf, &mut decomp)?;
13 |     Ok(decomp)
14 | }
15 | 
16 | fn decode_xz_xz2(compressed: &[u8]) -> Result<Vec<u8>> {
17 |     let bf = std::io::Cursor::new(compressed);
18 |     let mut decomp: Vec<u8> = Vec::new();
19 |     // create new XZ decompression stream with 8Gb memory limit and checksum
20 |     // verification disabled
21 |     let xz_stream =
22 |         stream::Stream::new_stream_decoder(8 * 1024 * 1024 * 1024, stream::IGNORE_CHECK)
23 |             .expect("Failed to create stream");
24 |     xz2::bufread::XzDecoder::new_stream(bf, xz_stream).read_to_end(&mut decomp)?;
25 |     Ok(decomp)
26 | }
27 | 
28 | fuzz_target!(|data: &[u8]| {
29 |     let result_lzmars = decode_xz_lzmars(data);
30 |     let result_xz2 = decode_xz_xz2(data);
31 |     match (result_lzmars, result_xz2) {
32 |         (Err(_), Err(_)) => (), // both failed, so behavior matches
33 |         (Ok(_), Err(_)) => panic!("lzma-rs succeeded but xz2 failed"),
34 |         (Err(_), Ok(_)) => panic!("xz2 succeeded but lzma-rs failed"),
35 |         (Ok(a), Ok(b)) => assert!(a == b),
36 |     }
37 | });
38 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/decompress_lzma.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | 
 7 | fn decode_lzma(compressed: &[u8]) -> Result<Vec<u8>> {
 8 |     let mut bf = std::io::Cursor::new(compressed);
 9 | 
10 |     let mut decomp: Vec<u8> = Vec::new();
11 |     lzma_rs::lzma_decompress(&mut bf, &mut decomp)?;
12 |     Ok(decomp)
13 | }
14 | 
15 | fuzz_target!(|data: &[u8]| {
16 |     let _decomp = decode_lzma(data);
17 | });
18 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/decompress_lzma2.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | 
 7 | fn decode_lzma2(compressed: &[u8]) -> Result<Vec<u8>> {
 8 |     let mut bf = std::io::Cursor::new(compressed);
 9 | 
10 |     let mut decomp: Vec<u8> = Vec::new();
11 |     lzma_rs::lzma2_decompress(&mut bf, &mut decomp)?;
12 |     Ok(decomp)
13 | }
14 | 
15 | fuzz_target!(|data: &[u8]| {
16 |     let _decomp = decode_lzma2(data);
17 | });
18 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/decompress_lzma_stream.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | use std::io::Write;
 7 | 
 8 | fn decode_lzma(compressed: &[u8]) -> Result<Vec<u8>> {
 9 |     let mut decomp: Vec<u8> = Vec::new();
10 |     lzma_rs::lzma_decompress(&mut std::io::Cursor::new(compressed), &mut decomp)?;
11 |     Ok(decomp)
12 | }
13 | 
14 | fn decode_lzma_stream(compressed: &[u8], chunk_size: usize) -> Vec<u8> {
15 |     let mut stream = lzma_rs::decompress::Stream::new(Vec::new());
16 |     for chunk in compressed.chunks(chunk_size) {
17 |         stream.write_all(chunk).unwrap();
18 |     }
19 |     stream.finish().unwrap()
20 | }
21 | 
22 | fuzz_target!(|input: &[u8]| {
23 |     if !input.is_empty() {
24 |         let (chunk_size, input) = input.split_at(1);
25 |         // use input length if chunk_size is zero because std::slice::chunks
26 |         // will otherwise panic
27 |         let chunk_size = if chunk_size[0] == 0 {
28 |             input.len()
29 |         } else {
30 |             chunk_size[0] as usize
31 |         };
32 |         let mut compressed = Vec::new();
33 |         lzma_rs::lzma_compress(&mut std::io::Cursor::new(input), &mut compressed).unwrap();
34 |         let decompressed = decode_lzma(&compressed).unwrap();
35 |         let decompressed_stream = decode_lzma_stream(&compressed, chunk_size);
36 |         if decompressed_stream.len() != decompressed.len() {
37 |             panic!(
38 |                 "chunk size: {}, ref len: {}, act len: {}",
39 |                 chunk_size,
40 |                 decompressed.len(),
41 |                 decompressed_stream.len()
42 |             );
43 |         }
44 |         assert_eq!(decompressed_stream, decompressed);
45 |         assert_eq!(decompressed_stream, input);
46 |     }
47 | });
48 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/decompress_xz.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | 
 7 | fn decode_xz(compressed: &[u8]) -> Result<Vec<u8>> {
 8 |     let mut bf = std::io::Cursor::new(compressed);
 9 | 
10 |     let mut decomp: Vec<u8> = Vec::new();
11 |     lzma_rs::xz_decompress(&mut bf, &mut decomp)?;
12 |     Ok(decomp)
13 | }
14 | 
15 | fuzz_target!(|data: &[u8]| {
16 |     let _decomp = decode_xz(data);
17 | });
18 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/interop_xz_decode.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | use std::io::Read;
 7 | 
 8 | fn decode_xz_lzmars(compressed: &[u8]) -> Result<Vec<u8>> {
 9 |     let mut bf = std::io::Cursor::new(compressed);
10 |     let mut decomp: Vec<u8> = Vec::new();
11 |     lzma_rs::xz_decompress(&mut bf, &mut decomp)?;
12 |     Ok(decomp)
13 | }
14 | 
15 | fn encode_xz_xz2(data: &[u8]) -> Result<Vec<u8>> {
16 |     let bf = std::io::Cursor::new(data);
17 |     let mut compressed: Vec<u8> = Vec::new();
18 |     xz2::bufread::XzEncoder::new(bf, 6).read_to_end(&mut compressed)?;
19 |     Ok(compressed)
20 | }
21 | 
22 | fuzz_target!(|data: &[u8]| {
23 |     let compressed = encode_xz_xz2(data).expect("liblzma failed to compress data");
24 |     let decoded =
25 |         decode_xz_lzmars(&compressed).expect("We've failed to decompress what liblzma compressed");
26 |     assert!(
27 |         data == decoded.as_slice(),
28 |         "Decompressed data is different from the original"
29 |     );
30 | });
31 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/interop_xz_encode.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | use std::io::Read;
 7 | use xz2::stream;
 8 | 
 9 | fn encode_xz_lzmars(x: &[u8]) -> Result<Vec<u8>> {
10 |     let mut compressed: Vec<u8> = Vec::new();
11 |     lzma_rs::xz_compress(&mut std::io::BufReader::new(x), &mut compressed)?;
12 |     Ok(compressed)
13 | }
14 | 
15 | fn decode_xz_xz2(compressed: &[u8]) -> Result<Vec<u8>> {
16 |     let bf = std::io::Cursor::new(compressed);
17 |     let mut decomp: Vec<u8> = Vec::new();
18 |     // create new XZ decompression stream with 8Gb memory limit and checksum
19 |     // verification disabled
20 |     let xz_stream =
21 |         stream::Stream::new_stream_decoder(8 * 1024 * 1024 * 1024, stream::IGNORE_CHECK)
22 |             .expect("Failed to create stream");
23 |     xz2::bufread::XzDecoder::new_stream(bf, xz_stream).read_to_end(&mut decomp)?;
24 |     Ok(decomp)
25 | }
26 | 
27 | fuzz_target!(|data: &[u8]| {
28 |     let compressed = encode_xz_lzmars(data).expect("Compression failed");
29 |     let decoded =
30 |         decode_xz_xz2(&compressed).expect("liblzma failed to decompress what we've compressed");
31 |     assert!(
32 |         data == decoded.as_slice(),
33 |         "Decompressed data is different from the original"
34 |     );
35 | });
36 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/roundtrip_lzma.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | 
 7 | fn round_trip_lzma(x: &[u8]) -> Result<Vec<u8>> {
 8 |     let mut compressed: Vec<u8> = Vec::new();
 9 |     lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed)?;
10 |     let mut bf = std::io::BufReader::new(compressed.as_slice());
11 | 
12 |     let mut decomp: Vec<u8> = Vec::new();
13 |     lzma_rs::lzma_decompress(&mut bf, &mut decomp)?;
14 |     Ok(decomp)
15 | }
16 | 
17 | fuzz_target!(|data: &[u8]| {
18 |     let decomp = round_trip_lzma(data).expect("Can't decompress what we just compressed");
19 |     assert_eq!(decomp, data);
20 | });
21 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/roundtrip_lzma2.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | 
 7 | fn round_trip_lzma2(x: &[u8]) -> Result<Vec<u8>> {
 8 |     let mut compressed: Vec<u8> = Vec::new();
 9 |     lzma_rs::lzma2_compress(&mut std::io::BufReader::new(x), &mut compressed)?;
10 |     let mut bf = std::io::BufReader::new(compressed.as_slice());
11 | 
12 |     let mut decomp: Vec<u8> = Vec::new();
13 |     lzma_rs::lzma2_decompress(&mut bf, &mut decomp)?;
14 |     Ok(decomp)
15 | }
16 | 
17 | fuzz_target!(|data: &[u8]| {
18 |     let decomp = round_trip_lzma2(data).expect("Can't decompress what we just compressed");
19 |     assert_eq!(decomp, data);
20 | });
21 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/roundtrip_xz.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use]
 3 | extern crate libfuzzer_sys;
 4 | 
 5 | use lzma_rs::error::Result;
 6 | 
 7 | fn round_trip_xz(x: &[u8]) -> Result<Vec<u8>> {
 8 |     let mut compressed: Vec<u8> = Vec::new();
 9 |     lzma_rs::xz_compress(&mut std::io::BufReader::new(x), &mut compressed)?;
10 |     let mut bf = std::io::BufReader::new(compressed.as_slice());
11 | 
12 |     let mut decomp: Vec<u8> = Vec::new();
13 |     lzma_rs::xz_decompress(&mut bf, &mut decomp)?;
14 |     Ok(decomp)
15 | }
16 | 
17 | fuzz_target!(|data: &[u8]| {
18 |     let decomp = round_trip_xz(data).expect("Can't decompress what we just compressed");
19 |     assert_eq!(decomp, data);
20 | });
21 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | imports_granularity = "Module"
2 | wrap_comments = true
3 | 


--------------------------------------------------------------------------------
/src/decode/lzbuffer.rs:
--------------------------------------------------------------------------------
  1 | use crate::error;
  2 | use std::io;
  3 | 
  4 | pub trait LzBuffer<W>
  5 | where
  6 |     W: io::Write,
  7 | {
  8 |     fn len(&self) -> usize;
  9 | 
 10 |     /// Retrieve the last byte or return a default.
 11 |     fn last_or(&self, lit: u8) -> u8;
 12 | 
 13 |     /// Retrieve the n-th last byte.
 14 |     fn last_n(&self, dist: usize) -> error::Result<u8>;
 15 | 
 16 |     /// Append a literal.
 17 |     fn append_literal(&mut self, lit: u8) -> error::Result<()>;
 18 | 
 19 |     /// Fetch an LZ sequence (length, distance) from inside the buffer.
 20 |     fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()>;
 21 | 
 22 |     /// Get a reference to the output sink.
 23 |     #[cfg(feature = "stream")]
 24 |     fn get_output(&self) -> &W;
 25 | 
 26 |     /// Get a mutable reference to the output sink.
 27 |     #[cfg(feature = "stream")]
 28 |     fn get_output_mut(&mut self) -> &mut W;
 29 | 
 30 |     /// Consumes this buffer and flushes any data.
 31 |     fn finish(self) -> io::Result<W>;
 32 | 
 33 |     /// Consumes this buffer without flushing any data.
 34 |     #[cfg(feature = "stream")]
 35 |     fn into_output(self) -> W;
 36 | }
 37 | 
 38 | /// An accumulating buffer for LZ sequences.
 39 | pub struct LzAccumBuffer<W>
 40 | where
 41 |     W: io::Write,
 42 | {
 43 |     /// Output sink
 44 |     stream: W,
 45 |     /// Buffer
 46 |     buf: Vec<u8>,
 47 |     /// Buffer memory limit
 48 |     memlimit: usize,
 49 |     /// Total number of bytes sent through the buffer
 50 |     len: usize,
 51 | }
 52 | 
 53 | impl<W> LzAccumBuffer<W>
 54 | where
 55 |     W: io::Write,
 56 | {
 57 |     pub fn from_stream(stream: W, memlimit: usize) -> Self {
 58 |         Self {
 59 |             stream,
 60 |             buf: Vec::new(),
 61 |             memlimit,
 62 |             len: 0,
 63 |         }
 64 |     }
 65 | 
 66 |     /// Append bytes.
 67 |     pub fn append_bytes(&mut self, buf: &[u8]) {
 68 |         self.buf.extend_from_slice(buf);
 69 |         self.len += buf.len();
 70 |     }
 71 | 
 72 |     /// Reset the internal dictionary.
 73 |     pub fn reset(&mut self) -> io::Result<()> {
 74 |         self.stream.write_all(self.buf.as_slice())?;
 75 |         self.buf.clear();
 76 |         self.len = 0;
 77 |         Ok(())
 78 |     }
 79 | }
 80 | 
 81 | impl<W> LzBuffer<W> for LzAccumBuffer<W>
 82 | where
 83 |     W: io::Write,
 84 | {
 85 |     fn len(&self) -> usize {
 86 |         self.len
 87 |     }
 88 | 
 89 |     fn last_or(&self, lit: u8) -> u8 {
 90 |         let buf_len = self.buf.len();
 91 |         if buf_len == 0 {
 92 |             lit
 93 |         } else {
 94 |             self.buf[buf_len - 1]
 95 |         }
 96 |     }
 97 | 
 98 |     fn last_n(&self, dist: usize) -> error::Result<u8> {
 99 |         let buf_len = self.buf.len();
100 |         if dist > buf_len {
101 |             return Err(error::Error::LzmaError(format!(
102 |                 "Match distance {} is beyond output size {}",
103 |                 dist, buf_len
104 |             )));
105 |         }
106 | 
107 |         Ok(self.buf[buf_len - dist])
108 |     }
109 | 
110 |     fn append_literal(&mut self, lit: u8) -> error::Result<()> {
111 |         let new_len = self.len + 1;
112 | 
113 |         if new_len > self.memlimit {
114 |             Err(error::Error::LzmaError(format!(
115 |                 "exceeded memory limit of {}",
116 |                 self.memlimit
117 |             )))
118 |         } else {
119 |             self.buf.push(lit);
120 |             self.len = new_len;
121 |             Ok(())
122 |         }
123 |     }
124 | 
125 |     fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> {
126 |         lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist);
127 |         let buf_len = self.buf.len();
128 |         if dist > buf_len {
129 |             return Err(error::Error::LzmaError(format!(
130 |                 "LZ distance {} is beyond output size {}",
131 |                 dist, buf_len
132 |             )));
133 |         }
134 | 
135 |         let mut offset = buf_len - dist;
136 |         for _ in 0..len {
137 |             let x = self.buf[offset];
138 |             self.buf.push(x);
139 |             offset += 1;
140 |         }
141 |         self.len += len;
142 |         Ok(())
143 |     }
144 | 
145 |     #[cfg(feature = "stream")]
146 |     fn get_output(&self) -> &W {
147 |         &self.stream
148 |     }
149 | 
150 |     #[cfg(feature = "stream")]
151 |     fn get_output_mut(&mut self) -> &mut W {
152 |         &mut self.stream
153 |     }
154 | 
155 |     fn finish(mut self) -> io::Result<W> {
156 |         self.stream.write_all(self.buf.as_slice())?;
157 |         self.stream.flush()?;
158 |         Ok(self.stream)
159 |     }
160 | 
161 |     #[cfg(feature = "stream")]
162 |     fn into_output(self) -> W {
163 |         self.stream
164 |     }
165 | }
166 | 
167 | /// A circular buffer for LZ sequences
168 | pub struct LzCircularBuffer<W>
169 | where
170 |     W: io::Write,
171 | {
172 |     /// Output sink
173 |     stream: W,
174 |     /// Circular buffer
175 |     buf: Vec<u8>,
176 |     /// Length of the buffer
177 |     dict_size: usize,
178 |     /// Buffer memory limit
179 |     memlimit: usize,
180 |     /// Current position
181 |     cursor: usize,
182 |     /// Total number of bytes sent through the buffer
183 |     len: usize,
184 | }
185 | 
186 | impl<W> LzCircularBuffer<W>
187 | where
188 |     W: io::Write,
189 | {
190 |     pub fn from_stream(stream: W, dict_size: usize, memlimit: usize) -> Self {
191 |         lzma_info!("Dict size in LZ buffer: {}", dict_size);
192 |         Self {
193 |             stream,
194 |             buf: Vec::new(),
195 |             dict_size,
196 |             memlimit,
197 |             cursor: 0,
198 |             len: 0,
199 |         }
200 |     }
201 | 
202 |     fn get(&self, index: usize) -> u8 {
203 |         *self.buf.get(index).unwrap_or(&0)
204 |     }
205 | 
206 |     fn set(&mut self, index: usize, value: u8) -> error::Result<()> {
207 |         let new_len = index + 1;
208 | 
209 |         if self.buf.len() < new_len {
210 |             if new_len <= self.memlimit {
211 |                 self.buf.resize(new_len, 0);
212 |             } else {
213 |                 return Err(error::Error::LzmaError(format!(
214 |                     "exceeded memory limit of {}",
215 |                     self.memlimit
216 |                 )));
217 |             }
218 |         }
219 |         self.buf[index] = value;
220 |         Ok(())
221 |     }
222 | }
223 | 
224 | impl<W> LzBuffer<W> for LzCircularBuffer<W>
225 | where
226 |     W: io::Write,
227 | {
228 |     fn len(&self) -> usize {
229 |         self.len
230 |     }
231 | 
232 |     fn last_or(&self, lit: u8) -> u8 {
233 |         if self.len == 0 {
234 |             lit
235 |         } else {
236 |             self.get((self.dict_size + self.cursor - 1) % self.dict_size)
237 |         }
238 |     }
239 | 
240 |     fn last_n(&self, dist: usize) -> error::Result<u8> {
241 |         if dist > self.dict_size {
242 |             return Err(error::Error::LzmaError(format!(
243 |                 "Match distance {} is beyond dictionary size {}",
244 |                 dist, self.dict_size
245 |             )));
246 |         }
247 |         if dist > self.len {
248 |             return Err(error::Error::LzmaError(format!(
249 |                 "Match distance {} is beyond output size {}",
250 |                 dist, self.len
251 |             )));
252 |         }
253 | 
254 |         let offset = (self.dict_size + self.cursor - dist) % self.dict_size;
255 |         Ok(self.get(offset))
256 |     }
257 | 
258 |     fn append_literal(&mut self, lit: u8) -> error::Result<()> {
259 |         self.set(self.cursor, lit)?;
260 |         self.cursor += 1;
261 |         self.len += 1;
262 | 
263 |         // Flush the circular buffer to the output
264 |         if self.cursor == self.dict_size {
265 |             self.stream.write_all(self.buf.as_slice())?;
266 |             self.cursor = 0;
267 |         }
268 | 
269 |         Ok(())
270 |     }
271 | 
272 |     fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> {
273 |         lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist);
274 |         if dist > self.dict_size {
275 |             return Err(error::Error::LzmaError(format!(
276 |                 "LZ distance {} is beyond dictionary size {}",
277 |                 dist, self.dict_size
278 |             )));
279 |         }
280 |         if dist > self.len {
281 |             return Err(error::Error::LzmaError(format!(
282 |                 "LZ distance {} is beyond output size {}",
283 |                 dist, self.len
284 |             )));
285 |         }
286 | 
287 |         let mut offset = (self.dict_size + self.cursor - dist) % self.dict_size;
288 |         for _ in 0..len {
289 |             let x = self.get(offset);
290 |             self.append_literal(x)?;
291 |             offset += 1;
292 |             if offset == self.dict_size {
293 |                 offset = 0
294 |             }
295 |         }
296 |         Ok(())
297 |     }
298 | 
299 |     #[cfg(feature = "stream")]
300 |     fn get_output(&self) -> &W {
301 |         &self.stream
302 |     }
303 | 
304 |     #[cfg(feature = "stream")]
305 |     fn get_output_mut(&mut self) -> &mut W {
306 |         &mut self.stream
307 |     }
308 | 
309 |     fn finish(mut self) -> io::Result<W> {
310 |         if self.cursor > 0 {
311 |             self.stream.write_all(&self.buf[0..self.cursor])?;
312 |         }
313 |         self.stream.flush()?;
314 |         Ok(self.stream)
315 |     }
316 | 
317 |     #[cfg(feature = "stream")]
318 |     fn into_output(self) -> W {
319 |         self.stream
320 |     }
321 | }
322 | 
323 | #[cfg(test)]
324 | mod test {
325 |     use super::*;
326 | 
327 |     #[derive(Default)]
328 |     struct ManuallyFlushedWriter {
329 |         unflushed: Vec<u8>,
330 |         flushed: Vec<u8>,
331 |     }
332 | 
333 |     impl io::Write for ManuallyFlushedWriter {
334 |         fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
335 |             let len = buf.len();
336 |             self.unflushed.extend_from_slice(buf);
337 |             Ok(len)
338 |         }
339 | 
340 |         fn flush(&mut self) -> Result<(), io::Error> {
341 |             self.flushed.append(&mut self.unflushed);
342 |             Ok(())
343 |         }
344 |     }
345 | 
346 |     #[test]
347 |     fn finish_flushes_everything() {
348 |         const MEM_LIMIT: usize = 8;
349 |         const DICT_SIZE: usize = MEM_LIMIT;
350 |         let stream = ManuallyFlushedWriter::default();
351 |         let mut b = LzCircularBuffer::from_stream(stream, DICT_SIZE, MEM_LIMIT);
352 |         for _ in 0..(DICT_SIZE * 4) {
353 |             b.append_literal(5).unwrap();
354 |         }
355 |         let stream = b.finish().unwrap();
356 |         assert!(stream.unflushed.is_empty());
357 |     }
358 | }
359 | 


--------------------------------------------------------------------------------
/src/decode/lzma.rs:
--------------------------------------------------------------------------------
  1 | use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer};
  2 | use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder};
  3 | use crate::decompress::{Options, UnpackedSize};
  4 | use crate::error;
  5 | use crate::util::vec2d::Vec2D;
  6 | use byteorder::{LittleEndian, ReadBytesExt};
  7 | use std::io;
  8 | 
  9 | /// Maximum input data that can be processed in one iteration.
 10 | /// Libhtp uses the following equation to define the maximum number of bits
 11 | /// for the worst case scenario:
 12 | ///   log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160
 13 | const MAX_REQUIRED_INPUT: usize = 20;
 14 | 
 15 | /// Processing mode for decompression.
 16 | ///
 17 | /// Tells the decompressor if we should expect more data after parsing the
 18 | /// current input.
 19 | #[derive(Debug, PartialEq)]
 20 | enum ProcessingMode {
 21 |     /// Streaming mode. Process the input bytes but assume there will be more
 22 |     /// chunks of input data to receive in future calls to
 23 |     /// [`DecoderState::process_mode()`].
 24 |     Partial,
 25 |     /// Synchronous mode. Process the input bytes and confirm end of stream has
 26 |     /// been reached. Use this mode if you are processing a fixed buffer of
 27 |     /// compressed data, or after using [`ProcessingMode::Partial`] to check for
 28 |     /// the end of stream.
 29 |     Finish,
 30 | }
 31 | 
 32 | /// Result of the next iteration of processing.
 33 | ///
 34 | /// Indicates whether processing should continue or is finished.
 35 | #[derive(Debug, PartialEq)]
 36 | enum ProcessingStatus {
 37 |     Continue,
 38 |     Finished,
 39 | }
 40 | 
 41 | #[derive(Debug, Copy, Clone)]
 42 | /// LZMA "lclppb" decompression properties.
 43 | pub struct LzmaProperties {
 44 |     /// The number of literal context bits.
 45 |     ///
 46 |     /// The most `lc` significant bits of the previous byte are part of the
 47 |     /// literal context. `lc` must not be greater than 8.
 48 |     pub lc: u32, // 0..=8
 49 |     /// The number of literal position bits.
 50 |     ///
 51 |     /// `lp` must not be greater than 4.
 52 |     pub lp: u32, // 0..=4
 53 |     /// The number of position bits.
 54 |     ///
 55 |     /// The context for literal/match is plaintext offset modulo `2^pb`.
 56 |     /// `pb` must not be greater than 4.
 57 |     pub pb: u32, // 0..=4
 58 | }
 59 | 
 60 | impl LzmaProperties {
 61 |     /// Assert the validity of the LZMA properties.
 62 |     pub(crate) fn validate(&self) {
 63 |         assert!(self.lc <= 8);
 64 |         assert!(self.lp <= 4);
 65 |         assert!(self.pb <= 4);
 66 |     }
 67 | }
 68 | 
 69 | #[derive(Debug, Copy, Clone)]
 70 | /// LZMA decompression parameters.
 71 | pub struct LzmaParams {
 72 |     /// The LZMA "lclppb" decompression properties.
 73 |     pub(crate) properties: LzmaProperties,
 74 |     /// The dictionary size to use when decompressing.
 75 |     pub(crate) dict_size: u32,
 76 |     /// The size of the unpacked data.
 77 |     pub(crate) unpacked_size: Option<u64>,
 78 | }
 79 | 
 80 | impl LzmaParams {
 81 |     /// Create an new instance of LZMA parameters.
 82 |     #[cfg(feature = "raw_decoder")]
 83 |     pub fn new(
 84 |         properties: LzmaProperties,
 85 |         dict_size: u32,
 86 |         unpacked_size: Option<u64>,
 87 |     ) -> LzmaParams {
 88 |         Self {
 89 |             properties,
 90 |             dict_size,
 91 |             unpacked_size,
 92 |         }
 93 |     }
 94 | 
 95 |     /// Read LZMA parameters from the LZMA stream header.
 96 |     pub fn read_header<R>(input: &mut R, options: &Options) -> error::Result<LzmaParams>
 97 |     where
 98 |         R: io::BufRead,
 99 |     {
100 |         // Properties
101 |         let props = input.read_u8().map_err(error::Error::HeaderTooShort)?;
102 | 
103 |         let mut pb = props as u32;
104 |         if pb >= 225 {
105 |             return Err(error::Error::LzmaError(format!(
106 |                 "LZMA header invalid properties: {} must be < 225",
107 |                 pb
108 |             )));
109 |         }
110 | 
111 |         let lc: u32 = pb % 9;
112 |         pb /= 9;
113 |         let lp: u32 = pb % 5;
114 |         pb /= 5;
115 | 
116 |         lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb);
117 | 
118 |         // Dictionary
119 |         let dict_size_provided = input
120 |             .read_u32::<LittleEndian>()
121 |             .map_err(error::Error::HeaderTooShort)?;
122 |         let dict_size = if dict_size_provided < 0x1000 {
123 |             0x1000
124 |         } else {
125 |             dict_size_provided
126 |         };
127 | 
128 |         lzma_info!("Dict size: {}", dict_size);
129 | 
130 |         // Unpacked size
131 |         let unpacked_size: Option<u64> = match options.unpacked_size {
132 |             UnpackedSize::ReadFromHeader => {
133 |                 let unpacked_size_provided = input
134 |                     .read_u64::<LittleEndian>()
135 |                     .map_err(error::Error::HeaderTooShort)?;
136 |                 let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
137 |                 if marker_mandatory {
138 |                     None
139 |                 } else {
140 |                     Some(unpacked_size_provided)
141 |                 }
142 |             }
143 |             UnpackedSize::ReadHeaderButUseProvided(x) => {
144 |                 input
145 |                     .read_u64::<LittleEndian>()
146 |                     .map_err(error::Error::HeaderTooShort)?;
147 |                 x
148 |             }
149 |             UnpackedSize::UseProvided(x) => x,
150 |         };
151 | 
152 |         lzma_info!("Unpacked size: {:?}", unpacked_size);
153 | 
154 |         let params = LzmaParams {
155 |             properties: LzmaProperties { lc, lp, pb },
156 |             dict_size,
157 |             unpacked_size,
158 |         };
159 | 
160 |         Ok(params)
161 |     }
162 | }
163 | 
164 | #[derive(Debug)]
165 | pub(crate) struct DecoderState {
166 |     // Buffer input data here if we need more for decompression. Up to
167 |     // MAX_REQUIRED_INPUT bytes can be consumed during one iteration.
168 |     partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>,
169 |     pub(crate) lzma_props: LzmaProperties,
170 |     unpacked_size: Option<u64>,
171 |     literal_probs: Vec2D<u16>,
172 |     pos_slot_decoder: [BitTree<{ 1 << 6 }>; 4],
173 |     align_decoder: BitTree<{ 1 << 4 }>,
174 |     pos_decoders: [u16; 115],
175 |     is_match: [u16; 192], // true = LZ, false = literal
176 |     is_rep: [u16; 12],
177 |     is_rep_g0: [u16; 12],
178 |     is_rep_g1: [u16; 12],
179 |     is_rep_g2: [u16; 12],
180 |     is_rep_0long: [u16; 192],
181 |     state: usize,
182 |     rep: [usize; 4],
183 |     len_decoder: LenDecoder,
184 |     rep_len_decoder: LenDecoder,
185 | }
186 | 
187 | impl DecoderState {
188 |     pub fn new(lzma_props: LzmaProperties, unpacked_size: Option<u64>) -> Self {
189 |         lzma_props.validate();
190 |         DecoderState {
191 |             partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]),
192 |             lzma_props,
193 |             unpacked_size,
194 |             literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)),
195 |             pos_slot_decoder: [
196 |                 BitTree::new(),
197 |                 BitTree::new(),
198 |                 BitTree::new(),
199 |                 BitTree::new(),
200 |             ],
201 |             align_decoder: BitTree::new(),
202 |             pos_decoders: [0x400; 115],
203 |             is_match: [0x400; 192],
204 |             is_rep: [0x400; 12],
205 |             is_rep_g0: [0x400; 12],
206 |             is_rep_g1: [0x400; 12],
207 |             is_rep_g2: [0x400; 12],
208 |             is_rep_0long: [0x400; 192],
209 |             state: 0,
210 |             rep: [0; 4],
211 |             len_decoder: LenDecoder::new(),
212 |             rep_len_decoder: LenDecoder::new(),
213 |         }
214 |     }
215 | 
216 |     pub fn reset_state(&mut self, new_props: LzmaProperties) {
217 |         new_props.validate();
218 |         if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp {
219 |             // We can reset here by filling the existing buffer with 0x400.
220 |             self.literal_probs.fill(0x400);
221 |         } else {
222 |             // We need to reallocate because of the new size of `lc+lp`.
223 |             self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300));
224 |         }
225 | 
226 |         self.lzma_props = new_props;
227 |         // For stack-allocated arrays, it was found to be faster to re-create new arrays
228 |         // dropping the existing one, rather than using `fill` to reset the contents to
229 |         // zero. Heap-based arrays use fill to keep their allocation rather than
230 |         // reallocate.
231 |         self.pos_slot_decoder = [
232 |             BitTree::new(),
233 |             BitTree::new(),
234 |             BitTree::new(),
235 |             BitTree::new(),
236 |         ];
237 |         self.align_decoder = BitTree::new();
238 |         self.pos_decoders = [0x400; 115];
239 |         self.is_match = [0x400; 192];
240 |         self.is_rep = [0x400; 12];
241 |         self.is_rep_g0 = [0x400; 12];
242 |         self.is_rep_g1 = [0x400; 12];
243 |         self.is_rep_g2 = [0x400; 12];
244 |         self.is_rep_0long = [0x400; 192];
245 |         self.state = 0;
246 |         self.rep = [0; 4];
247 |         self.len_decoder = LenDecoder::new();
248 |         self.rep_len_decoder = LenDecoder::new();
249 |     }
250 | 
251 |     pub fn set_unpacked_size(&mut self, unpacked_size: Option<u64>) {
252 |         self.unpacked_size = unpacked_size;
253 |     }
254 | 
255 |     pub fn process<W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
256 |         &mut self,
257 |         output: &mut LZB,
258 |         rangecoder: &mut RangeDecoder<'_, R>,
259 |     ) -> error::Result<()> {
260 |         self.process_mode(output, rangecoder, ProcessingMode::Finish)
261 |     }
262 | 
263 |     #[cfg(feature = "stream")]
264 |     pub fn process_stream<W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
265 |         &mut self,
266 |         output: &mut LZB,
267 |         rangecoder: &mut RangeDecoder<'_, R>,
268 |     ) -> error::Result<()> {
269 |         self.process_mode(output, rangecoder, ProcessingMode::Partial)
270 |     }
271 | 
272 |     /// Process the next iteration of the loop.
273 |     ///
274 |     /// If the update flag is true, the decoder's state will be updated.
275 |     ///
276 |     /// Returns [`ProcessingStatus`] to determine whether one should continue
277 |     /// processing the loop.
278 |     fn process_next_inner<W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
279 |         &mut self,
280 |         output: &mut LZB,
281 |         rangecoder: &mut RangeDecoder<'_, R>,
282 |         update: bool,
283 |     ) -> error::Result<ProcessingStatus> {
284 |         let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1);
285 | 
286 |         // Literal
287 |         if !rangecoder.decode_bit(
288 |             // TODO: assumes pb = 2 ??
289 |             &mut self.is_match[(self.state << 4) + pos_state],
290 |             update,
291 |         )? {
292 |             let byte: u8 = self.decode_literal(output, rangecoder, update)?;
293 | 
294 |             if update {
295 |                 lzma_debug!("Literal: {}", byte);
296 |                 output.append_literal(byte)?;
297 | 
298 |                 self.state = if self.state < 4 {
299 |                     0
300 |                 } else if self.state < 10 {
301 |                     self.state - 3
302 |                 } else {
303 |                     self.state - 6
304 |                 };
305 |             }
306 |             return Ok(ProcessingStatus::Continue);
307 |         }
308 | 
309 |         // LZ
310 |         let mut len: usize;
311 |         // Distance is repeated from LRU
312 |         if rangecoder.decode_bit(&mut self.is_rep[self.state], update)? {
313 |             // dist = rep[0]
314 |             if !rangecoder.decode_bit(&mut self.is_rep_g0[self.state], update)? {
315 |                 // len = 1
316 |                 if !rangecoder.decode_bit(
317 |                     &mut self.is_rep_0long[(self.state << 4) + pos_state],
318 |                     update,
319 |                 )? {
320 |                     // update state (short rep)
321 |                     if update {
322 |                         self.state = if self.state < 7 { 9 } else { 11 };
323 |                         let dist = self.rep[0] + 1;
324 |                         output.append_lz(1, dist)?;
325 |                     }
326 |                     return Ok(ProcessingStatus::Continue);
327 |                 }
328 |             // dist = rep[i]
329 |             } else {
330 |                 let idx: usize;
331 |                 if !rangecoder.decode_bit(&mut self.is_rep_g1[self.state], update)? {
332 |                     idx = 1;
333 |                 } else if !rangecoder.decode_bit(&mut self.is_rep_g2[self.state], update)? {
334 |                     idx = 2;
335 |                 } else {
336 |                     idx = 3;
337 |                 }
338 |                 if update {
339 |                     // Update LRU
340 |                     let dist = self.rep[idx];
341 |                     for i in (0..idx).rev() {
342 |                         self.rep[i + 1] = self.rep[i];
343 |                     }
344 |                     self.rep[0] = dist
345 |                 }
346 |             }
347 | 
348 |             len = self.rep_len_decoder.decode(rangecoder, pos_state, update)?;
349 | 
350 |             if update {
351 |                 // update state (rep)
352 |                 self.state = if self.state < 7 { 8 } else { 11 };
353 |             }
354 |         // New distance
355 |         } else {
356 |             if update {
357 |                 // Update LRU
358 |                 self.rep[3] = self.rep[2];
359 |                 self.rep[2] = self.rep[1];
360 |                 self.rep[1] = self.rep[0];
361 |             }
362 | 
363 |             len = self.len_decoder.decode(rangecoder, pos_state, update)?;
364 | 
365 |             if update {
366 |                 // update state (match)
367 |                 self.state = if self.state < 7 { 7 } else { 10 };
368 |             }
369 | 
370 |             let rep_0 = self.decode_distance(rangecoder, len, update)?;
371 | 
372 |             if update {
373 |                 self.rep[0] = rep_0;
374 |                 if self.rep[0] == 0xFFFF_FFFF {
375 |                     if rangecoder.is_finished_ok()? {
376 |                         return Ok(ProcessingStatus::Finished);
377 |                     }
378 |                     return Err(error::Error::LzmaError(String::from(
379 |                         "Found end-of-stream marker but more bytes are available",
380 |                     )));
381 |                 }
382 |             }
383 |         }
384 | 
385 |         if update {
386 |             len += 2;
387 | 
388 |             let dist = self.rep[0] + 1;
389 |             output.append_lz(len, dist)?;
390 |         }
391 | 
392 |         Ok(ProcessingStatus::Continue)
393 |     }
394 | 
395 |     fn process_next<W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
396 |         &mut self,
397 |         output: &mut LZB,
398 |         rangecoder: &mut RangeDecoder<'_, R>,
399 |     ) -> error::Result<ProcessingStatus> {
400 |         self.process_next_inner(output, rangecoder, true)
401 |     }
402 | 
403 |     /// Try to process the next iteration of the loop.
404 |     ///
405 |     /// This will check to see if there is enough data to consume and advance
406 |     /// the decompressor. Needed in streaming mode to avoid corrupting the
407 |     /// state while processing incomplete chunks of data.
408 |     fn try_process_next<W: io::Write, LZB: LzBuffer<W>>(
409 |         &mut self,
410 |         output: &mut LZB,
411 |         buf: &[u8],
412 |         range: u32,
413 |         code: u32,
414 |     ) -> error::Result<()> {
415 |         let mut temp = std::io::Cursor::new(buf);
416 |         let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code);
417 |         let _ = self.process_next_inner(output, &mut rangecoder, false)?;
418 |         Ok(())
419 |     }
420 | 
421 |     /// Utility function to read data into the partial input buffer.
422 |     fn read_partial_input_buf<R: io::BufRead>(
423 |         &mut self,
424 |         rangecoder: &mut RangeDecoder<'_, R>,
425 |     ) -> error::Result<()> {
426 |         // Fill as much of the tmp buffer as possible
427 |         let start = self.partial_input_buf.position() as usize;
428 |         let bytes_read =
429 |             rangecoder.read_into(&mut self.partial_input_buf.get_mut()[start..])? as u64;
430 |         self.partial_input_buf
431 |             .set_position(self.partial_input_buf.position() + bytes_read);
432 |         Ok(())
433 |     }
434 | 
435 |     fn process_mode<W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
436 |         &mut self,
437 |         output: &mut LZB,
438 |         rangecoder: &mut RangeDecoder<'_, R>,
439 |         mode: ProcessingMode,
440 |     ) -> error::Result<()> {
441 |         loop {
442 |             if let Some(unpacked_size) = self.unpacked_size {
443 |                 if output.len() as u64 >= unpacked_size {
444 |                     break;
445 |                 }
446 |             } else if match mode {
447 |                 ProcessingMode::Partial => {
448 |                     rangecoder.is_eof()? && self.partial_input_buf.position() as usize == 0
449 |                 }
450 |                 ProcessingMode::Finish => {
451 |                     rangecoder.is_finished_ok()? && self.partial_input_buf.position() as usize == 0
452 |                 }
453 |             } {
454 |                 break;
455 |             }
456 | 
457 |             if self.partial_input_buf.position() as usize > 0 {
458 |                 self.read_partial_input_buf(rangecoder)?;
459 |                 let tmp = *self.partial_input_buf.get_ref();
460 | 
461 |                 // Check if we need more data to advance the decompressor
462 |                 if mode == ProcessingMode::Partial
463 |                     && (self.partial_input_buf.position() as usize) < MAX_REQUIRED_INPUT
464 |                     && self
465 |                         .try_process_next(
466 |                             output,
467 |                             &tmp[..self.partial_input_buf.position() as usize],
468 |                             rangecoder.range,
469 |                             rangecoder.code,
470 |                         )
471 |                         .is_err()
472 |                 {
473 |                     return Ok(());
474 |                 }
475 | 
476 |                 // Run the decompressor on the tmp buffer
477 |                 let mut tmp_reader =
478 |                     io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]);
479 |                 let mut tmp_rangecoder =
480 |                     RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code);
481 |                 let res = self.process_next(output, &mut tmp_rangecoder)?;
482 | 
483 |                 // Update the actual rangecoder
484 |                 rangecoder.set(tmp_rangecoder.range, tmp_rangecoder.code);
485 | 
486 |                 // Update tmp buffer
487 |                 let end = self.partial_input_buf.position();
488 |                 let new_len = end - tmp_reader.position();
489 |                 self.partial_input_buf.get_mut()[..new_len as usize]
490 |                     .copy_from_slice(&tmp[tmp_reader.position() as usize..end as usize]);
491 |                 self.partial_input_buf.set_position(new_len);
492 | 
493 |                 if res == ProcessingStatus::Finished {
494 |                     break;
495 |                 };
496 |             } else {
497 |                 let buf: &[u8] = rangecoder.stream.fill_buf()?;
498 |                 if mode == ProcessingMode::Partial
499 |                     && buf.len() < MAX_REQUIRED_INPUT
500 |                     && self
501 |                         .try_process_next(output, buf, rangecoder.range, rangecoder.code)
502 |                         .is_err()
503 |                 {
504 |                     return self.read_partial_input_buf(rangecoder);
505 |                 }
506 | 
507 |                 if self.process_next(output, rangecoder)? == ProcessingStatus::Finished {
508 |                     break;
509 |                 };
510 |             }
511 |         }
512 | 
513 |         if let Some(len) = self.unpacked_size {
514 |             if mode == ProcessingMode::Finish && len != output.len() as u64 {
515 |                 return Err(error::Error::LzmaError(format!(
516 |                     "Expected unpacked size of {} but decompressed to {}",
517 |                     len,
518 |                     output.len()
519 |                 )));
520 |             }
521 |         }
522 | 
523 |         Ok(())
524 |     }
525 | 
526 |     fn decode_literal<W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
527 |         &mut self,
528 |         output: &mut LZB,
529 |         rangecoder: &mut RangeDecoder<'_, R>,
530 |         update: bool,
531 |     ) -> error::Result<u8> {
532 |         let def_prev_byte = 0u8;
533 |         let prev_byte = output.last_or(def_prev_byte) as usize;
534 | 
535 |         let mut result: usize = 1;
536 |         let lit_state = ((output.len() & ((1 << self.lzma_props.lp) - 1)) << self.lzma_props.lc)
537 |             + (prev_byte >> (8 - self.lzma_props.lc));
538 |         let probs = &mut self.literal_probs[lit_state];
539 | 
540 |         if self.state >= 7 {
541 |             let mut match_byte = output.last_n(self.rep[0] + 1)? as usize;
542 | 
543 |             while result < 0x100 {
544 |                 let match_bit = (match_byte >> 7) & 1;
545 |                 match_byte <<= 1;
546 |                 let bit = rangecoder
547 |                     .decode_bit(&mut probs[((1 + match_bit) << 8) + result], update)?
548 |                     as usize;
549 |                 result = (result << 1) ^ bit;
550 |                 if match_bit != bit {
551 |                     break;
552 |                 }
553 |             }
554 |         }
555 | 
556 |         while result < 0x100 {
557 |             result = (result << 1) ^ (rangecoder.decode_bit(&mut probs[result], update)? as usize);
558 |         }
559 | 
560 |         Ok((result - 0x100) as u8)
561 |     }
562 | 
563 |     fn decode_distance<R: io::BufRead>(
564 |         &mut self,
565 |         rangecoder: &mut RangeDecoder<'_, R>,
566 |         length: usize,
567 |         update: bool,
568 |     ) -> error::Result<usize> {
569 |         let len_state = if length > 3 { 3 } else { length };
570 | 
571 |         let pos_slot = self.pos_slot_decoder[len_state].parse(rangecoder, update)? as usize;
572 |         if pos_slot < 4 {
573 |             return Ok(pos_slot);
574 |         }
575 | 
576 |         let num_direct_bits = (pos_slot >> 1) - 1;
577 |         let mut result = (2 ^ (pos_slot & 1)) << num_direct_bits;
578 | 
579 |         if pos_slot < 14 {
580 |             result += rangecoder.parse_reverse_bit_tree(
581 |                 num_direct_bits,
582 |                 &mut self.pos_decoders,
583 |                 result - pos_slot,
584 |                 update,
585 |             )? as usize;
586 |         } else {
587 |             result += (rangecoder.get(num_direct_bits - 4)? as usize) << 4;
588 |             result += self.align_decoder.parse_reverse(rangecoder, update)? as usize;
589 |         }
590 | 
591 |         Ok(result)
592 |     }
593 | }
594 | 
595 | #[derive(Debug)]
596 | /// Raw decoder for LZMA.
597 | pub struct LzmaDecoder {
598 |     params: LzmaParams,
599 |     memlimit: usize,
600 |     state: DecoderState,
601 | }
602 | 
603 | impl LzmaDecoder {
604 |     /// Creates a new object ready for decompressing data that it's given for
605 |     /// the input dict size, expected unpacked data size, and memory limit
606 |     /// for the internal buffer.
607 |     pub fn new(params: LzmaParams, memlimit: Option<usize>) -> error::Result<LzmaDecoder> {
608 |         Ok(Self {
609 |             params,
610 |             memlimit: memlimit.unwrap_or(usize::MAX),
611 |             state: DecoderState::new(params.properties, params.unpacked_size),
612 |         })
613 |     }
614 | 
615 |     /// Performs the equivalent of replacing this decompression state with a
616 |     /// freshly allocated copy.
617 |     ///
618 |     /// Because the decoder state is reset, the unpacked size may optionally be
619 |     /// re-specified. If [`None`] is given, the previous unpacked size that
620 |     /// the decoder was initialized with remains unchanged.
621 |     ///
622 |     /// This function may not allocate memory and will attempt to reuse any
623 |     /// previously allocated resources.
624 |     #[cfg(feature = "raw_decoder")]
625 |     pub fn reset(&mut self, unpacked_size: Option<Option<u64>>) {
626 |         self.state.reset_state(self.params.properties);
627 | 
628 |         if let Some(unpacked_size) = unpacked_size {
629 |             self.state.set_unpacked_size(unpacked_size);
630 |         }
631 |     }
632 | 
633 |     /// Decompresses the input data into the output, consuming only as much
634 |     /// input as needed and writing as much output as possible.
635 |     pub fn decompress<W: io::Write, R: io::BufRead>(
636 |         &mut self,
637 |         input: &mut R,
638 |         output: &mut W,
639 |     ) -> error::Result<()> {
640 |         let mut output =
641 |             LzCircularBuffer::from_stream(output, self.params.dict_size as usize, self.memlimit);
642 | 
643 |         let mut rangecoder = RangeDecoder::new(input)
644 |             .map_err(|e| error::Error::LzmaError(format!("LZMA stream too short: {}", e)))?;
645 |         self.state.process(&mut output, &mut rangecoder)?;
646 |         output.finish()?;
647 |         Ok(())
648 |     }
649 | }
650 | 


--------------------------------------------------------------------------------
/src/decode/lzma2.rs:
--------------------------------------------------------------------------------
  1 | use crate::decode::lzbuffer::LzBuffer;
  2 | use crate::decode::lzma::{DecoderState, LzmaProperties};
  3 | use crate::decode::{lzbuffer, rangecoder};
  4 | use crate::error;
  5 | use byteorder::{BigEndian, ReadBytesExt};
  6 | use std::io;
  7 | use std::io::Read;
  8 | 
  9 | #[derive(Debug)]
 10 | /// Raw decoder for LZMA2.
 11 | pub struct Lzma2Decoder {
 12 |     lzma_state: DecoderState,
 13 | }
 14 | 
 15 | impl Default for Lzma2Decoder {
 16 |     fn default() -> Self {
 17 |         Self::new()
 18 |     }
 19 | }
 20 | 
 21 | impl Lzma2Decoder {
 22 |     /// Creates a new object ready for decompressing data that it's given.
 23 |     pub fn new() -> Lzma2Decoder {
 24 |         Lzma2Decoder {
 25 |             lzma_state: DecoderState::new(
 26 |                 LzmaProperties {
 27 |                     lc: 0,
 28 |                     lp: 0,
 29 |                     pb: 0,
 30 |                 },
 31 |                 None,
 32 |             ),
 33 |         }
 34 |     }
 35 | 
 36 |     /// Performs the equivalent of replacing this decompression state with a
 37 |     /// freshly allocated copy.
 38 |     ///
 39 |     /// This function may not allocate memory and will attempt to reuse any
 40 |     /// previously allocated resources.
 41 |     #[cfg(feature = "raw_decoder")]
 42 |     pub fn reset(&mut self) {
 43 |         self.lzma_state.reset_state(LzmaProperties {
 44 |             lc: 0,
 45 |             lp: 0,
 46 |             pb: 0,
 47 |         });
 48 |     }
 49 | 
 50 |     /// Decompresses the input data into the output, consuming only as much
 51 |     /// input as needed and writing as much output as possible.
 52 |     pub fn decompress<W: io::Write, R: io::BufRead>(
 53 |         &mut self,
 54 |         input: &mut R,
 55 |         output: &mut W,
 56 |     ) -> error::Result<()> {
 57 |         let mut accum = lzbuffer::LzAccumBuffer::from_stream(output, usize::MAX);
 58 | 
 59 |         loop {
 60 |             let status = input.read_u8().map_err(|e| {
 61 |                 error::Error::LzmaError(format!("LZMA2 expected new status: {}", e))
 62 |             })?;
 63 | 
 64 |             lzma_info!("LZMA2 status: {}", status);
 65 | 
 66 |             if status == 0 {
 67 |                 lzma_info!("LZMA2 end of input");
 68 |                 break;
 69 |             } else if status == 1 {
 70 |                 // uncompressed reset dict
 71 |                 Self::parse_uncompressed(&mut accum, input, true)?;
 72 |             } else if status == 2 {
 73 |                 // uncompressed no reset
 74 |                 Self::parse_uncompressed(&mut accum, input, false)?;
 75 |             } else {
 76 |                 self.parse_lzma(&mut accum, input, status)?;
 77 |             }
 78 |         }
 79 | 
 80 |         accum.finish()?;
 81 |         Ok(())
 82 |     }
 83 | 
 84 |     fn parse_lzma<R, W>(
 85 |         &mut self,
 86 |         accum: &mut lzbuffer::LzAccumBuffer<W>,
 87 |         input: &mut R,
 88 |         status: u8,
 89 |     ) -> error::Result<()>
 90 |     where
 91 |         R: io::BufRead,
 92 |         W: io::Write,
 93 |     {
 94 |         if status & 0x80 == 0 {
 95 |             return Err(error::Error::LzmaError(format!(
 96 |                 "LZMA2 invalid status {}, must be 0, 1, 2 or >= 128",
 97 |                 status
 98 |             )));
 99 |         }
100 | 
101 |         let reset_dict: bool;
102 |         let reset_state: bool;
103 |         let reset_props: bool;
104 |         match (status >> 5) & 0x3 {
105 |             0 => {
106 |                 reset_dict = false;
107 |                 reset_state = false;
108 |                 reset_props = false;
109 |             }
110 |             1 => {
111 |                 reset_dict = false;
112 |                 reset_state = true;
113 |                 reset_props = false;
114 |             }
115 |             2 => {
116 |                 reset_dict = false;
117 |                 reset_state = true;
118 |                 reset_props = true;
119 |             }
120 |             3 => {
121 |                 reset_dict = true;
122 |                 reset_state = true;
123 |                 reset_props = true;
124 |             }
125 |             _ => unreachable!(),
126 |         }
127 | 
128 |         let unpacked_size = input
129 |             .read_u16::<BigEndian>()
130 |             .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?;
131 |         let unpacked_size = ((((status & 0x1F) as u64) << 16) | (unpacked_size as u64)) + 1;
132 | 
133 |         let packed_size = input
134 |             .read_u16::<BigEndian>()
135 |             .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected packed size: {}", e)))?;
136 |         let packed_size = (packed_size as u64) + 1;
137 | 
138 |         lzma_info!(
139 |             "LZMA2 compressed block {{ unpacked_size: {}, packed_size: {}, reset_dict: {}, reset_state: {}, reset_props: {} }}",
140 |             unpacked_size,
141 |             packed_size,
142 |             reset_dict,
143 |             reset_state,
144 |             reset_props
145 |         );
146 | 
147 |         if reset_dict {
148 |             accum.reset()?;
149 |         }
150 | 
151 |         if reset_state {
152 |             let new_props = if reset_props {
153 |                 let props = input.read_u8().map_err(|e| {
154 |                     error::Error::LzmaError(format!("LZMA2 expected new properties: {}", e))
155 |                 })?;
156 | 
157 |                 let mut pb = props as u32;
158 |                 if pb >= 225 {
159 |                     return Err(error::Error::LzmaError(format!(
160 |                         "LZMA2 invalid properties: {} must be < 225",
161 |                         pb
162 |                     )));
163 |                 }
164 | 
165 |                 let lc = pb % 9;
166 |                 pb /= 9;
167 |                 let lp = pb % 5;
168 |                 pb /= 5;
169 | 
170 |                 if lc + lp > 4 {
171 |                     return Err(error::Error::LzmaError(format!(
172 |                         "LZMA2 invalid properties: lc + lp ({} + {}) must be <= 4",
173 |                         lc, lp
174 |                     )));
175 |                 }
176 | 
177 |                 lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb);
178 |                 LzmaProperties { lc, lp, pb }
179 |             } else {
180 |                 self.lzma_state.lzma_props
181 |             };
182 | 
183 |             self.lzma_state.reset_state(new_props);
184 |         }
185 | 
186 |         self.lzma_state
187 |             .set_unpacked_size(Some(unpacked_size + accum.len() as u64));
188 | 
189 |         let mut taken = input.take(packed_size);
190 |         let mut rangecoder = rangecoder::RangeDecoder::new(&mut taken)
191 |             .map_err(|e| error::Error::LzmaError(format!("LZMA input too short: {}", e)))?;
192 |         self.lzma_state.process(accum, &mut rangecoder)
193 |     }
194 | 
195 |     fn parse_uncompressed<R, W>(
196 |         accum: &mut lzbuffer::LzAccumBuffer<W>,
197 |         input: &mut R,
198 |         reset_dict: bool,
199 |     ) -> error::Result<()>
200 |     where
201 |         R: io::BufRead,
202 |         W: io::Write,
203 |     {
204 |         let unpacked_size = input
205 |             .read_u16::<BigEndian>()
206 |             .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?;
207 |         let unpacked_size = (unpacked_size as usize) + 1;
208 | 
209 |         lzma_info!(
210 |             "LZMA2 uncompressed block {{ unpacked_size: {}, reset_dict: {} }}",
211 |             unpacked_size,
212 |             reset_dict
213 |         );
214 | 
215 |         if reset_dict {
216 |             accum.reset()?;
217 |         }
218 | 
219 |         let mut buf = vec![0; unpacked_size];
220 |         input.read_exact(buf.as_mut_slice()).map_err(|e| {
221 |             error::Error::LzmaError(format!(
222 |                 "LZMA2 expected {} uncompressed bytes: {}",
223 |                 unpacked_size, e
224 |             ))
225 |         })?;
226 |         accum.append_bytes(buf.as_slice());
227 | 
228 |         Ok(())
229 |     }
230 | }
231 | 


--------------------------------------------------------------------------------
/src/decode/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Decoding logic.
 2 | 
 3 | pub mod lzbuffer;
 4 | pub mod lzma;
 5 | pub mod lzma2;
 6 | pub mod options;
 7 | pub mod rangecoder;
 8 | pub mod util;
 9 | pub mod xz;
10 | 
11 | #[cfg(feature = "stream")]
12 | pub mod stream;
13 | 


--------------------------------------------------------------------------------
/src/decode/options.rs:
--------------------------------------------------------------------------------
 1 | /// Options to tweak decompression behavior.
 2 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
 3 | pub struct Options {
 4 |     /// Defines whether the unpacked size should be read from the header or
 5 |     /// provided.
 6 |     ///
 7 |     /// The default is [`UnpackedSize::ReadFromHeader`].
 8 |     pub unpacked_size: UnpackedSize,
 9 |     /// Defines whether the dictionary's dynamic size should be limited during
10 |     /// decompression.
11 |     ///
12 |     /// The default is unlimited.
13 |     pub memlimit: Option<usize>,
14 |     /// Determines whether to bypass end of stream validation.
15 |     ///
16 |     /// This option only applies to the `stream` API.
17 |     ///
18 |     /// The default is false (always do completion check).
19 |     pub allow_incomplete: bool,
20 | }
21 | 
22 | /// Alternatives for defining the unpacked size of the decoded data.
23 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
24 | pub enum UnpackedSize {
25 |     /// Assume that the 8 bytes used to specify the unpacked size are present in
26 |     /// the header. If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that
27 |     /// there is an end-of-payload marker in the file.
28 |     /// If not, read the 8 bytes as a little-endian encoded u64.
29 |     #[default]
30 |     ReadFromHeader,
31 |     /// Assume that there are 8 bytes representing the unpacked size present in
32 |     /// the header. Read it, but ignore it and use the provided value
33 |     /// instead. If the provided value is [`None`], assume that there is an
34 |     /// end-of-payload marker in the file. Note that this is a non-standard
35 |     /// way of reading LZMA data, but is used by certain libraries such as
36 |     /// [OpenCTM](http://openctm.sourceforge.net/).
37 |     ReadHeaderButUseProvided(Option<u64>),
38 |     /// Assume that the 8 bytes typically used to represent the unpacked size
39 |     /// are *not* present in the header. Use the provided value.
40 |     /// If the provided value is [`None`], assume that there is an
41 |     /// end-of-payload marker in the file.
42 |     UseProvided(Option<u64>),
43 | }
44 | 
45 | #[cfg(test)]
46 | mod test {
47 |     use super::*;
48 | 
49 |     #[test]
50 |     fn test_options() {
51 |         assert_eq!(
52 |             Options {
53 |                 unpacked_size: UnpackedSize::ReadFromHeader,
54 |                 memlimit: None,
55 |                 allow_incomplete: false,
56 |             },
57 |             Options::default()
58 |         );
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/decode/rangecoder.rs:
--------------------------------------------------------------------------------
  1 | use crate::decode::util;
  2 | use crate::error;
  3 | use crate::util::const_assert;
  4 | use byteorder::{BigEndian, ReadBytesExt};
  5 | use std::io;
  6 | 
  7 | pub struct RangeDecoder<'a, R>
  8 | where
  9 |     R: 'a + io::BufRead,
 10 | {
 11 |     pub stream: &'a mut R,
 12 |     pub range: u32,
 13 |     pub code: u32,
 14 | }
 15 | 
 16 | impl<'a, R> RangeDecoder<'a, R>
 17 | where
 18 |     R: io::BufRead,
 19 | {
 20 |     pub fn new(stream: &'a mut R) -> io::Result<Self> {
 21 |         let mut dec = Self {
 22 |             stream,
 23 |             range: 0xFFFF_FFFF,
 24 |             code: 0,
 25 |         };
 26 |         let _ = dec.stream.read_u8()?;
 27 |         dec.code = dec.stream.read_u32::<BigEndian>()?;
 28 |         lzma_debug!("0 {{ range: {:08x}, code: {:08x} }}", dec.range, dec.code);
 29 |         Ok(dec)
 30 |     }
 31 | 
 32 |     pub fn from_parts(stream: &'a mut R, range: u32, code: u32) -> Self {
 33 |         Self {
 34 |             stream,
 35 |             range,
 36 |             code,
 37 |         }
 38 |     }
 39 | 
 40 |     pub fn set(&mut self, range: u32, code: u32) {
 41 |         self.range = range;
 42 |         self.code = code;
 43 |     }
 44 | 
 45 |     pub fn read_into(&mut self, dst: &mut [u8]) -> io::Result<usize> {
 46 |         self.stream.read(dst)
 47 |     }
 48 | 
 49 |     #[inline]
 50 |     pub fn is_finished_ok(&mut self) -> io::Result<bool> {
 51 |         Ok(self.code == 0 && self.is_eof()?)
 52 |     }
 53 | 
 54 |     #[inline]
 55 |     pub fn is_eof(&mut self) -> io::Result<bool> {
 56 |         util::is_eof(self.stream)
 57 |     }
 58 | 
 59 |     #[inline]
 60 |     fn normalize(&mut self) -> io::Result<()> {
 61 |         lzma_trace!("  {{ range: {:08x}, code: {:08x} }}", self.range, self.code);
 62 |         if self.range < 0x0100_0000 {
 63 |             self.range <<= 8;
 64 |             self.code = (self.code << 8) ^ (self.stream.read_u8()? as u32);
 65 | 
 66 |             lzma_debug!("+ {{ range: {:08x}, code: {:08x} }}", self.range, self.code);
 67 |         }
 68 |         Ok(())
 69 |     }
 70 | 
 71 |     #[inline]
 72 |     fn get_bit(&mut self) -> error::Result<bool> {
 73 |         self.range >>= 1;
 74 | 
 75 |         let bit = self.code >= self.range;
 76 |         if bit {
 77 |             self.code -= self.range
 78 |         }
 79 | 
 80 |         self.normalize()?;
 81 |         Ok(bit)
 82 |     }
 83 | 
 84 |     pub fn get(&mut self, count: usize) -> error::Result<u32> {
 85 |         let mut result = 0u32;
 86 |         for _ in 0..count {
 87 |             result = (result << 1) ^ (self.get_bit()? as u32)
 88 |         }
 89 |         Ok(result)
 90 |     }
 91 | 
 92 |     #[inline]
 93 |     pub fn decode_bit(&mut self, prob: &mut u16, update: bool) -> io::Result<bool> {
 94 |         let bound: u32 = (self.range >> 11) * (*prob as u32);
 95 | 
 96 |         lzma_trace!(
 97 |             " bound: {:08x}, prob: {:04x}, bit: {}",
 98 |             bound,
 99 |             prob,
100 |             (self.code > bound) as u8
101 |         );
102 |         if self.code < bound {
103 |             if update {
104 |                 *prob += (0x800_u16 - *prob) >> 5;
105 |             }
106 |             self.range = bound;
107 | 
108 |             self.normalize()?;
109 |             Ok(false)
110 |         } else {
111 |             if update {
112 |                 *prob -= *prob >> 5;
113 |             }
114 |             self.code -= bound;
115 |             self.range -= bound;
116 | 
117 |             self.normalize()?;
118 |             Ok(true)
119 |         }
120 |     }
121 | 
122 |     fn parse_bit_tree(
123 |         &mut self,
124 |         num_bits: usize,
125 |         probs: &mut [u16],
126 |         update: bool,
127 |     ) -> io::Result<u32> {
128 |         let mut tmp: u32 = 1;
129 |         for _ in 0..num_bits {
130 |             let bit = self.decode_bit(&mut probs[tmp as usize], update)?;
131 |             tmp = (tmp << 1) ^ (bit as u32);
132 |         }
133 |         Ok(tmp - (1 << num_bits))
134 |     }
135 | 
136 |     pub fn parse_reverse_bit_tree(
137 |         &mut self,
138 |         num_bits: usize,
139 |         probs: &mut [u16],
140 |         offset: usize,
141 |         update: bool,
142 |     ) -> io::Result<u32> {
143 |         let mut result = 0u32;
144 |         let mut tmp: usize = 1;
145 |         for i in 0..num_bits {
146 |             let bit = self.decode_bit(&mut probs[offset + tmp], update)?;
147 |             tmp = (tmp << 1) ^ (bit as usize);
148 |             result ^= (bit as u32) << i;
149 |         }
150 |         Ok(result)
151 |     }
152 | }
153 | 
154 | #[derive(Debug, Clone)]
155 | pub struct BitTree<const PROBS_ARRAY_LEN: usize> {
156 |     probs: [u16; PROBS_ARRAY_LEN],
157 | }
158 | 
159 | impl<const PROBS_ARRAY_LEN: usize> BitTree<PROBS_ARRAY_LEN> {
160 |     pub fn new() -> Self {
161 |         // The validity of PROBS_ARRAY_LEN is checked at compile-time with a macro
162 |         // that confirms that the argument P passed is indeed 1 << N for
163 |         // some N using usize::trailing_zeros to calculate floor(log_2(P)).
164 |         //
165 |         // Thus, BitTree<const P: usize> is only valid for any P such that
166 |         // P = 2 ** floor(log_2(P)), where P is the length of the probability array
167 |         // of the BitTree. This maintains the invariant that P = 1 << N.
168 |         //
169 |         // This precondition must be checked for any way to construct a new, valid
170 |         // instance of BitTree. Here it is checked for BitTree::new(), but if
171 |         // another function is added that returns a new instance of BitTree,
172 |         // this assertion must be checked there as well.
173 |         const_assert!("BitTree's PROBS_ARRAY_LEN parameter must be a power of 2",
174 |             PROBS_ARRAY_LEN: usize => (1 << (PROBS_ARRAY_LEN.trailing_zeros() as usize)) == PROBS_ARRAY_LEN);
175 |         BitTree {
176 |             probs: [0x400; PROBS_ARRAY_LEN],
177 |         }
178 |     }
179 | 
180 |     // NUM_BITS is derived from PROBS_ARRAY_LEN because of the lack of
181 |     // generic const expressions. Where PROBS_ARRAY_LEN is a power of 2,
182 |     // NUM_BITS can be derived by the number of trailing zeroes.
183 |     const NUM_BITS: usize = PROBS_ARRAY_LEN.trailing_zeros() as usize;
184 | 
185 |     pub fn parse<R: io::BufRead>(
186 |         &mut self,
187 |         rangecoder: &mut RangeDecoder<R>,
188 |         update: bool,
189 |     ) -> io::Result<u32> {
190 |         rangecoder.parse_bit_tree(Self::NUM_BITS, &mut self.probs, update)
191 |     }
192 | 
193 |     pub fn parse_reverse<R: io::BufRead>(
194 |         &mut self,
195 |         rangecoder: &mut RangeDecoder<R>,
196 |         update: bool,
197 |     ) -> io::Result<u32> {
198 |         rangecoder.parse_reverse_bit_tree(Self::NUM_BITS, &mut self.probs, 0, update)
199 |     }
200 | }
201 | 
202 | #[derive(Debug)]
203 | pub struct LenDecoder {
204 |     choice: u16,
205 |     choice2: u16,
206 |     low_coder: [BitTree<{ 1 << 3 }>; 16],
207 |     mid_coder: [BitTree<{ 1 << 3 }>; 16],
208 |     high_coder: BitTree<{ 1 << 8 }>,
209 | }
210 | 
211 | impl LenDecoder {
212 |     pub fn new() -> Self {
213 |         LenDecoder {
214 |             choice: 0x400,
215 |             choice2: 0x400,
216 |             low_coder: [
217 |                 BitTree::new(),
218 |                 BitTree::new(),
219 |                 BitTree::new(),
220 |                 BitTree::new(),
221 |                 BitTree::new(),
222 |                 BitTree::new(),
223 |                 BitTree::new(),
224 |                 BitTree::new(),
225 |                 BitTree::new(),
226 |                 BitTree::new(),
227 |                 BitTree::new(),
228 |                 BitTree::new(),
229 |                 BitTree::new(),
230 |                 BitTree::new(),
231 |                 BitTree::new(),
232 |                 BitTree::new(),
233 |             ],
234 |             mid_coder: [
235 |                 BitTree::new(),
236 |                 BitTree::new(),
237 |                 BitTree::new(),
238 |                 BitTree::new(),
239 |                 BitTree::new(),
240 |                 BitTree::new(),
241 |                 BitTree::new(),
242 |                 BitTree::new(),
243 |                 BitTree::new(),
244 |                 BitTree::new(),
245 |                 BitTree::new(),
246 |                 BitTree::new(),
247 |                 BitTree::new(),
248 |                 BitTree::new(),
249 |                 BitTree::new(),
250 |                 BitTree::new(),
251 |             ],
252 |             high_coder: BitTree::new(),
253 |         }
254 |     }
255 | 
256 |     pub fn decode<R: io::BufRead>(
257 |         &mut self,
258 |         rangecoder: &mut RangeDecoder<R>,
259 |         pos_state: usize,
260 |         update: bool,
261 |     ) -> io::Result<usize> {
262 |         if !rangecoder.decode_bit(&mut self.choice, update)? {
263 |             Ok(self.low_coder[pos_state].parse(rangecoder, update)? as usize)
264 |         } else if !rangecoder.decode_bit(&mut self.choice2, update)? {
265 |             Ok(self.mid_coder[pos_state].parse(rangecoder, update)? as usize + 8)
266 |         } else {
267 |             Ok(self.high_coder.parse(rangecoder, update)? as usize + 16)
268 |         }
269 |     }
270 | }
271 | 


--------------------------------------------------------------------------------
/src/decode/stream.rs:
--------------------------------------------------------------------------------
  1 | use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer};
  2 | use crate::decode::lzma::{DecoderState, LzmaParams};
  3 | use crate::decode::rangecoder::RangeDecoder;
  4 | use crate::decompress::Options;
  5 | use crate::error::Error;
  6 | use std::fmt::Debug;
  7 | use std::io::{self, BufRead, Cursor, Read, Write};
  8 | 
  9 | /// Minimum header length to be read.
 10 | /// - props: u8 (1 byte)
 11 | /// - dict_size: u32 (4 bytes)
 12 | const MIN_HEADER_LEN: usize = 5;
 13 | 
 14 | /// Max header length to be read.
 15 | /// - unpacked_size: u64 (8 bytes)
 16 | const MAX_HEADER_LEN: usize = MIN_HEADER_LEN + 8;
 17 | 
 18 | /// Required bytes after the header.
 19 | /// - ignore: u8 (1 byte)
 20 | /// - code: u32 (4 bytes)
 21 | const START_BYTES: usize = 5;
 22 | 
 23 | /// Maximum number of bytes to buffer while reading the header.
 24 | const MAX_TMP_LEN: usize = MAX_HEADER_LEN + START_BYTES;
 25 | 
 26 | /// Internal state of this streaming decoder. This is needed because we have to
 27 | /// initialize the stream before processing any data.
 28 | #[derive(Debug)]
 29 | enum State<W>
 30 | where
 31 |     W: Write,
 32 | {
 33 |     /// Stream is initialized but header values have not yet been read.
 34 |     Header(W),
 35 |     /// Header values have been read and the stream is ready to process more
 36 |     /// data.
 37 |     Data(Box<RunState<W>>),
 38 | }
 39 | 
 40 | /// Structures needed while decoding data.
 41 | struct RunState<W>
 42 | where
 43 |     W: Write,
 44 | {
 45 |     decoder: DecoderState,
 46 |     range: u32,
 47 |     code: u32,
 48 |     output: LzCircularBuffer<W>,
 49 | }
 50 | 
 51 | impl<W> Debug for RunState<W>
 52 | where
 53 |     W: Write,
 54 | {
 55 |     fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
 56 |         fmt.debug_struct("RunState")
 57 |             .field("range", &self.range)
 58 |             .field("code", &self.code)
 59 |             .finish()
 60 |     }
 61 | }
 62 | 
 63 | /// Lzma decompressor that can process multiple chunks of data using the
 64 | /// [`io::Write`] interface.
 65 | #[cfg_attr(docsrs, doc(cfg(stream)))]
 66 | pub struct Stream<W>
 67 | where
 68 |     W: Write,
 69 | {
 70 |     /// Temporary buffer to hold data while the header is being read.
 71 |     tmp: Cursor<[u8; MAX_TMP_LEN]>,
 72 |     /// Whether the stream is initialized and ready to process data.
 73 |     /// An [`Option`] is used to avoid interior mutability when updating the
 74 |     /// state.
 75 |     state: Option<State<W>>,
 76 |     /// Options given when a stream is created.
 77 |     options: Options,
 78 | }
 79 | 
 80 | impl<W> Stream<W>
 81 | where
 82 |     W: Write,
 83 | {
 84 |     /// Initialize the stream. This will consume the `output` which is the sink
 85 |     /// implementing [`io::Write`] that will receive decompressed bytes.
 86 |     pub fn new(output: W) -> Self {
 87 |         Self::new_with_options(&Options::default(), output)
 88 |     }
 89 | 
 90 |     /// Initialize the stream with the given `options`. This will consume the
 91 |     /// `output` which is the sink implementing [`io::Write`] that will
 92 |     /// receive decompressed bytes.
 93 |     pub fn new_with_options(options: &Options, output: W) -> Self {
 94 |         Self {
 95 |             tmp: Cursor::new([0; MAX_TMP_LEN]),
 96 |             state: Some(State::Header(output)),
 97 |             options: *options,
 98 |         }
 99 |     }
100 | 
101 |     /// Get a reference to the output sink.
102 |     pub fn get_output(&self) -> Option<&W> {
103 |         self.state.as_ref().map(|state| match state {
104 |             State::Header(output) => output,
105 |             State::Data(state) => state.output.get_output(),
106 |         })
107 |     }
108 | 
109 |     /// Get a mutable reference to the output sink;
110 |     pub fn get_output_mut(&mut self) -> Option<&mut W> {
111 |         self.state.as_mut().map(|state| match state {
112 |             State::Header(output) => output,
113 |             State::Data(state) => state.output.get_output_mut(),
114 |         })
115 |     }
116 | 
117 |     /// Consumes the stream and returns the output sink. This also makes sure
118 |     /// we have properly reached the end of the stream.
119 |     pub fn finish(mut self) -> crate::error::Result<W> {
120 |         if let Some(state) = self.state.take() {
121 |             match state {
122 |                 State::Header(output) => {
123 |                     if self.tmp.position() > 0 {
124 |                         Err(Error::LzmaError("failed to read header".to_string()))
125 |                     } else {
126 |                         Ok(output)
127 |                     }
128 |                 }
129 |                 State::Data(mut state) => {
130 |                     if !self.options.allow_incomplete {
131 |                         // Process one last time with empty input to force end of
132 |                         // stream checks
133 |                         let mut stream =
134 |                             Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]);
135 |                         let mut range_decoder =
136 |                             RangeDecoder::from_parts(&mut stream, state.range, state.code);
137 |                         state
138 |                             .decoder
139 |                             .process(&mut state.output, &mut range_decoder)?;
140 |                     }
141 |                     let output = state.output.finish()?;
142 |                     Ok(output)
143 |                 }
144 |             }
145 |         } else {
146 |             // this will occur if a call to `write()` fails
147 |             Err(Error::LzmaError(
148 |                 "can't finish stream because of previous write error".to_string(),
149 |             ))
150 |         }
151 |     }
152 | 
153 |     /// Attempts to read the header and transition into a running state.
154 |     ///
155 |     /// This function will consume the state, returning the next state on both
156 |     /// error and success.
157 |     fn read_header<R: BufRead>(
158 |         output: W,
159 |         mut input: &mut R,
160 |         options: &Options,
161 |     ) -> crate::error::Result<State<W>> {
162 |         match LzmaParams::read_header(&mut input, options) {
163 |             Ok(params) => {
164 |                 let decoder = DecoderState::new(params.properties, params.unpacked_size);
165 |                 let output = LzCircularBuffer::from_stream(
166 |                     output,
167 |                     params.dict_size as usize,
168 |                     options.memlimit.unwrap_or(usize::MAX),
169 |                 );
170 |                 // The RangeDecoder is only kept temporarily as we are processing
171 |                 // chunks of data.
172 |                 if let Ok(rangecoder) = RangeDecoder::new(&mut input) {
173 |                     Ok(State::Data(Box::new(RunState {
174 |                         decoder,
175 |                         output,
176 |                         range: rangecoder.range,
177 |                         code: rangecoder.code,
178 |                     })))
179 |                 } else {
180 |                     // Failed to create a RangeDecoder because we need more data,
181 |                     // try again later.
182 |                     Ok(State::Header(output.into_output()))
183 |                 }
184 |             }
185 |             // Failed to read_header() because we need more data, try again later.
186 |             Err(Error::HeaderTooShort(_)) => Ok(State::Header(output)),
187 |             // Fatal error. Don't retry.
188 |             Err(e) => Err(e),
189 |         }
190 |     }
191 | 
192 |     /// Process compressed data.
193 |     fn read_data<R: BufRead>(state: &mut RunState<W>, mut input: &mut R) -> io::Result<()> {
194 |         // Construct our RangeDecoder from the previous range and code
195 |         // values.
196 |         let mut rangecoder = RangeDecoder::from_parts(&mut input, state.range, state.code);
197 | 
198 |         // Try to process all bytes of data.
199 |         state
200 |             .decoder
201 |             .process_stream(&mut state.output, &mut rangecoder)
202 |             .map_err(|e| -> io::Error { e.into() })?;
203 | 
204 |         state.range = rangecoder.range;
205 |         state.code = rangecoder.code;
206 |         Ok(())
207 |     }
208 | }
209 | 
210 | impl<W> Debug for Stream<W>
211 | where
212 |     W: Write + Debug,
213 | {
214 |     fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
215 |         fmt.debug_struct("Stream")
216 |             .field("tmp", &self.tmp.position())
217 |             .field("state", &self.state)
218 |             .field("options", &self.options)
219 |             .finish()
220 |     }
221 | }
222 | 
223 | impl<W> Write for Stream<W>
224 | where
225 |     W: Write,
226 | {
227 |     fn write(&mut self, data: &[u8]) -> io::Result<usize> {
228 |         let mut input = Cursor::new(data);
229 | 
230 |         if let Some(state) = self.state.take() {
231 |             let state = match state {
232 |                 // Read the header values and transition into a running state.
233 |                 State::Header(state) => {
234 |                     let res = if self.tmp.position() > 0 {
235 |                         // attempt to fill the tmp buffer
236 |                         let position = self.tmp.position();
237 |                         let bytes_read =
238 |                             input.read(&mut self.tmp.get_mut()[position as usize..])?;
239 |                         let bytes_read = if bytes_read < std::u64::MAX as usize {
240 |                             bytes_read as u64
241 |                         } else {
242 |                             return Err(io::Error::new(
243 |                                 io::ErrorKind::Other,
244 |                                 "Failed to convert integer to u64.",
245 |                             ));
246 |                         };
247 |                         self.tmp.set_position(position + bytes_read);
248 | 
249 |                         // attempt to read the header from our tmp buffer
250 |                         let (position, res) = {
251 |                             let mut tmp_input =
252 |                                 Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]);
253 |                             let res = Stream::read_header(state, &mut tmp_input, &self.options);
254 |                             (tmp_input.position(), res)
255 |                         };
256 | 
257 |                         // discard all bytes up to position if reading the header
258 |                         // was successful
259 |                         if let Ok(State::Data(_)) = &res {
260 |                             let tmp = *self.tmp.get_ref();
261 |                             let end = self.tmp.position();
262 |                             let new_len = end - position;
263 |                             self.tmp.get_mut()[0..new_len as usize]
264 |                                 .copy_from_slice(&tmp[position as usize..end as usize]);
265 |                             self.tmp.set_position(new_len);
266 |                         }
267 |                         res
268 |                     } else {
269 |                         Stream::read_header(state, &mut input, &self.options)
270 |                     };
271 | 
272 |                     match res {
273 |                         // occurs when not enough input bytes were provided to
274 |                         // read the entire header
275 |                         Ok(State::Header(val)) => {
276 |                             if self.tmp.position() == 0 {
277 |                                 // reset the cursor because we may have partial reads
278 |                                 input.set_position(0);
279 |                                 let bytes_read = input.read(&mut self.tmp.get_mut()[..])?;
280 |                                 let bytes_read = if bytes_read < std::u64::MAX as usize {
281 |                                     bytes_read as u64
282 |                                 } else {
283 |                                     return Err(io::Error::new(
284 |                                         io::ErrorKind::Other,
285 |                                         "Failed to convert integer to u64.",
286 |                                     ));
287 |                                 };
288 |                                 self.tmp.set_position(bytes_read);
289 |                             }
290 |                             State::Header(val)
291 |                         }
292 | 
293 |                         // occurs when the header was successfully read and we
294 |                         // move on to the next state
295 |                         Ok(State::Data(val)) => State::Data(val),
296 | 
297 |                         // occurs when the output was consumed due to a
298 |                         // non-recoverable error
299 |                         Err(e) => {
300 |                             return Err(match e {
301 |                                 Error::IoError(e) | Error::HeaderTooShort(e) => e,
302 |                                 Error::LzmaError(e) | Error::XzError(e) => {
303 |                                     io::Error::new(io::ErrorKind::Other, e)
304 |                                 }
305 |                             });
306 |                         }
307 |                     }
308 |                 }
309 | 
310 |                 // Process another chunk of data.
311 |                 State::Data(mut state) => {
312 |                     if self.tmp.position() > 0 {
313 |                         let mut tmp_input =
314 |                             Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]);
315 |                         Stream::read_data(&mut state, &mut tmp_input)?;
316 |                         self.tmp.set_position(0);
317 |                     };
318 |                     Stream::read_data(&mut state, &mut input)?;
319 |                     State::Data(state)
320 |                 }
321 |             };
322 |             self.state.replace(state);
323 |         }
324 |         Ok(input.position() as usize)
325 |     }
326 | 
327 |     /// Flushes the output sink. The internal buffer isn't flushed to avoid
328 |     /// corrupting the internal state. Instead, call [`Self::finish()`] to
329 |     /// finalize the stream and flush all remaining internal data.
330 |     fn flush(&mut self) -> io::Result<()> {
331 |         if let Some(ref mut state) = self.state {
332 |             match state {
333 |                 State::Header(_) => Ok(()),
334 |                 State::Data(state) => state.output.get_output_mut().flush(),
335 |             }
336 |         } else {
337 |             Ok(())
338 |         }
339 |     }
340 | }
341 | 
342 | impl From<Error> for io::Error {
343 |     fn from(error: Error) -> io::Error {
344 |         io::Error::new(io::ErrorKind::Other, format!("{:?}", error))
345 |     }
346 | }
347 | 
348 | #[cfg(test)]
349 | mod test {
350 |     use super::*;
351 | 
352 |     /// Test an empty stream
353 |     #[test]
354 |     fn test_stream_noop() {
355 |         let stream = Stream::new(Vec::new());
356 |         assert!(stream.get_output().unwrap().is_empty());
357 | 
358 |         let output = stream.finish().unwrap();
359 |         assert!(output.is_empty());
360 |     }
361 | 
362 |     /// Test writing an empty slice
363 |     #[test]
364 |     fn test_stream_zero() {
365 |         let mut stream = Stream::new(Vec::new());
366 | 
367 |         stream.write_all(&[]).unwrap();
368 |         stream.write_all(&[]).unwrap();
369 | 
370 |         let output = stream.finish().unwrap();
371 | 
372 |         assert!(output.is_empty());
373 |     }
374 | 
375 |     /// Test a bad header value
376 |     #[test]
377 |     #[should_panic(expected = "LZMA header invalid properties: 255 must be < 225")]
378 |     fn test_bad_header() {
379 |         let input = [255u8; 32];
380 | 
381 |         let mut stream = Stream::new(Vec::new());
382 | 
383 |         stream.write_all(&input[..]).unwrap();
384 | 
385 |         let output = stream.finish().unwrap();
386 | 
387 |         assert!(output.is_empty());
388 |     }
389 | 
390 |     /// Test processing only partial data
391 |     #[test]
392 |     fn test_stream_incomplete() {
393 |         let input = b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\
394 |                       \xfb\xff\xff\xc0\x00\x00\x00";
395 |         // Process until this index is reached.
396 |         let mut end = 1u64;
397 | 
398 |         // Test when we fail to provide the minimum number of bytes required to
399 |         // read the header. Header size is 13 bytes but we also read the first 5
400 |         // bytes of data.
401 |         while end < (MAX_HEADER_LEN + START_BYTES) as u64 {
402 |             let mut stream = Stream::new(Vec::new());
403 |             stream.write_all(&input[..end as usize]).unwrap();
404 |             assert_eq!(stream.tmp.position(), end);
405 | 
406 |             let err = stream.finish().unwrap_err();
407 |             assert!(
408 |                 err.to_string().contains("failed to read header"),
409 |                 "error was: {}",
410 |                 err
411 |             );
412 | 
413 |             end += 1;
414 |         }
415 | 
416 |         // Test when we fail to provide enough bytes to terminate the stream. A
417 |         // properly terminated stream will have a code value of 0.
418 |         while end < input.len() as u64 {
419 |             let mut stream = Stream::new(Vec::new());
420 |             stream.write_all(&input[..end as usize]).unwrap();
421 | 
422 |             // Header bytes will be buffered until there are enough to read
423 |             if end < (MAX_HEADER_LEN + START_BYTES) as u64 {
424 |                 assert_eq!(stream.tmp.position(), end);
425 |             }
426 | 
427 |             let err = stream.finish().unwrap_err();
428 |             assert!(err.to_string().contains("failed to fill whole buffer"));
429 | 
430 |             end += 1;
431 |         }
432 |     }
433 | 
434 |     /// Test processing all chunk sizes
435 |     #[test]
436 |     fn test_stream_chunked() {
437 |         let small_input = include_bytes!("../../tests/files/small.txt");
438 | 
439 |         let mut reader = io::Cursor::new(&small_input[..]);
440 |         let mut small_input_compressed = Vec::new();
441 |         crate::lzma_compress(&mut reader, &mut small_input_compressed).unwrap();
442 | 
443 |         let input : Vec<(&[u8], &[u8])> = vec![
444 |             (b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\xfb\xff\xff\xc0\x00\x00\x00", b""),
445 |             (&small_input_compressed[..], small_input)];
446 |         for (input, expected) in input {
447 |             for chunk in 1..input.len() {
448 |                 let mut consumed = 0;
449 |                 let mut stream = Stream::new(Vec::new());
450 |                 while consumed < input.len() {
451 |                     let end = std::cmp::min(consumed + chunk, input.len());
452 |                     stream.write_all(&input[consumed..end]).unwrap();
453 |                     consumed = end;
454 |                 }
455 |                 let output = stream.finish().unwrap();
456 |                 assert_eq!(expected, &output[..]);
457 |             }
458 |         }
459 |     }
460 | 
461 |     #[test]
462 |     fn test_stream_corrupted() {
463 |         let mut stream = Stream::new(Vec::new());
464 |         let err = stream
465 |             .write_all(b"corrupted bytes here corrupted bytes here")
466 |             .unwrap_err();
467 |         assert!(err.to_string().contains("beyond output size"));
468 |         let err = stream.finish().unwrap_err();
469 |         assert!(err
470 |             .to_string()
471 |             .contains("can\'t finish stream because of previous write error"));
472 |     }
473 | 
474 |     #[test]
475 |     fn test_allow_incomplete() {
476 |         let input = include_bytes!("../../tests/files/small.txt");
477 | 
478 |         let mut reader = io::Cursor::new(&input[..]);
479 |         let mut compressed = Vec::new();
480 |         crate::lzma_compress(&mut reader, &mut compressed).unwrap();
481 |         let compressed = &compressed[..compressed.len() / 2];
482 | 
483 |         // Should fail to finish() without the allow_incomplete option.
484 |         let mut stream = Stream::new(Vec::new());
485 |         stream.write_all(compressed).unwrap();
486 |         stream.finish().unwrap_err();
487 | 
488 |         // Should succeed with the allow_incomplete option.
489 |         let mut stream = Stream::new_with_options(
490 |             &Options {
491 |                 allow_incomplete: true,
492 |                 ..Default::default()
493 |             },
494 |             Vec::new(),
495 |         );
496 |         stream.write_all(compressed).unwrap();
497 |         let output = stream.finish().unwrap();
498 |         assert_eq!(output, &input[..26]);
499 |     }
500 | }
501 | 


--------------------------------------------------------------------------------
/src/decode/util.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | 
  3 | pub fn read_tag<R: io::BufRead>(input: &mut R, tag: &[u8]) -> io::Result<bool> {
  4 |     let mut buf = vec![0; tag.len()];
  5 |     input.read_exact(buf.as_mut_slice())?;
  6 |     Ok(buf.as_slice() == tag)
  7 | }
  8 | 
  9 | pub fn is_eof<R: io::BufRead>(input: &mut R) -> io::Result<bool> {
 10 |     let buf = input.fill_buf()?;
 11 |     Ok(buf.is_empty())
 12 | }
 13 | 
 14 | pub fn flush_zero_padding<R: io::BufRead>(input: &mut R) -> io::Result<bool> {
 15 |     loop {
 16 |         let len = {
 17 |             let buf = input.fill_buf()?;
 18 |             let len = buf.len();
 19 | 
 20 |             if len == 0 {
 21 |                 return Ok(true);
 22 |             }
 23 | 
 24 |             for x in buf {
 25 |                 if *x != 0u8 {
 26 |                     return Ok(false);
 27 |                 }
 28 |             }
 29 |             len
 30 |         };
 31 | 
 32 |         input.consume(len);
 33 |     }
 34 | }
 35 | 
 36 | /// An [`io::Read`] computing a digest on the bytes read.
 37 | pub struct CrcDigestRead<'a, 'b, R, S>
 38 | where
 39 |     R: 'a + io::Read,
 40 |     S: crc::Width,
 41 | {
 42 |     /// Underlying reader
 43 |     read: &'a mut R,
 44 |     /// Hasher
 45 |     digest: &'a mut crc::Digest<'b, S>,
 46 | }
 47 | 
 48 | impl<'a, 'b, R, S> CrcDigestRead<'a, 'b, R, S>
 49 | where
 50 |     R: io::Read,
 51 |     S: crc::Width,
 52 | {
 53 |     pub fn new(read: &'a mut R, digest: &'a mut crc::Digest<'b, S>) -> Self {
 54 |         Self { read, digest }
 55 |     }
 56 | }
 57 | 
 58 | impl<'a, 'b, R> io::Read for CrcDigestRead<'a, 'b, R, u32>
 59 | where
 60 |     R: io::Read,
 61 | {
 62 |     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 63 |         let result = self.read.read(buf)?;
 64 |         self.digest.update(&buf[..result]);
 65 |         Ok(result)
 66 |     }
 67 | }
 68 | 
 69 | /// An [`io::BufRead`] counting the bytes read.
 70 | pub struct CountBufRead<'a, R>
 71 | where
 72 |     R: 'a + io::BufRead,
 73 | {
 74 |     /// Underlying reader
 75 |     read: &'a mut R,
 76 |     /// Number of bytes read
 77 |     count: usize,
 78 | }
 79 | 
 80 | impl<'a, R> CountBufRead<'a, R>
 81 | where
 82 |     R: io::BufRead,
 83 | {
 84 |     pub fn new(read: &'a mut R) -> Self {
 85 |         Self { read, count: 0 }
 86 |     }
 87 | 
 88 |     pub fn count(&self) -> usize {
 89 |         self.count
 90 |     }
 91 | }
 92 | 
 93 | impl<'a, R> io::Read for CountBufRead<'a, R>
 94 | where
 95 |     R: io::BufRead,
 96 | {
 97 |     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 98 |         let result = self.read.read(buf)?;
 99 |         self.count += result;
100 |         Ok(result)
101 |     }
102 | }
103 | 
104 | impl<'a, R> io::BufRead for CountBufRead<'a, R>
105 | where
106 |     R: io::BufRead,
107 | {
108 |     fn fill_buf(&mut self) -> io::Result<&[u8]> {
109 |         self.read.fill_buf()
110 |     }
111 | 
112 |     fn consume(&mut self, amt: usize) {
113 |         self.read.consume(amt);
114 |         self.count += amt;
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/src/decode/xz.rs:
--------------------------------------------------------------------------------
  1 | //! Decoder for the `.xz` file format.
  2 | 
  3 | use crate::decode::lzma2::Lzma2Decoder;
  4 | use crate::decode::util;
  5 | use crate::error;
  6 | use crate::xz::crc::{CRC32, CRC64};
  7 | use crate::xz::{footer, header, CheckMethod, StreamFlags};
  8 | use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
  9 | use std::io;
 10 | use std::io::Read;
 11 | 
 12 | #[derive(Debug)]
 13 | struct Record {
 14 |     unpadded_size: u64,
 15 |     unpacked_size: u64,
 16 | }
 17 | 
 18 | pub fn decode_stream<R, W>(input: &mut R, output: &mut W) -> error::Result<()>
 19 | where
 20 |     R: io::BufRead,
 21 |     W: io::Write,
 22 | {
 23 |     let header = header::StreamHeader::parse(input)?;
 24 | 
 25 |     let mut records: Vec<Record> = vec![];
 26 |     let index_size = loop {
 27 |         let mut count_input = util::CountBufRead::new(input);
 28 |         let header_size = count_input.read_u8()?;
 29 |         lzma_info!("XZ block header_size byte: 0x{:02x}", header_size);
 30 | 
 31 |         if header_size == 0 {
 32 |             lzma_info!("XZ records: {:?}", records);
 33 |             check_index(&mut count_input, &records)?;
 34 |             let index_size = count_input.count();
 35 |             break index_size;
 36 |         }
 37 | 
 38 |         read_block(
 39 |             &mut count_input,
 40 |             output,
 41 |             header.stream_flags.check_method,
 42 |             &mut records,
 43 |             header_size,
 44 |         )?;
 45 |     };
 46 | 
 47 |     let crc32 = input.read_u32::<LittleEndian>()?;
 48 |     let mut digest = CRC32.digest();
 49 |     {
 50 |         let mut digested = util::CrcDigestRead::new(input, &mut digest);
 51 |         let backward_size = digested.read_u32::<LittleEndian>()?;
 52 |         if index_size as u32 != (backward_size + 1) << 2 {
 53 |             return Err(error::Error::XzError(format!(
 54 |                 "Invalid index size: expected {} but got {}",
 55 |                 (backward_size + 1) << 2,
 56 |                 index_size
 57 |             )));
 58 |         }
 59 | 
 60 |         let stream_flags = {
 61 |             let field = digested.read_u16::<BigEndian>()?;
 62 |             StreamFlags::parse(field)?
 63 |         };
 64 | 
 65 |         if header.stream_flags != stream_flags {
 66 |             return Err(error::Error::XzError(format!(
 67 |                 "Flags in header ({:?}) does not match footer ({:?})",
 68 |                 header.stream_flags, stream_flags
 69 |             )));
 70 |         }
 71 |     }
 72 | 
 73 |     let digest_crc32 = digest.finalize();
 74 |     if crc32 != digest_crc32 {
 75 |         return Err(error::Error::XzError(format!(
 76 |             "Invalid footer CRC32: expected 0x{:08x} but got 0x{:08x}",
 77 |             crc32, digest_crc32
 78 |         )));
 79 |     }
 80 | 
 81 |     if !util::read_tag(input, footer::XZ_MAGIC_FOOTER)? {
 82 |         return Err(error::Error::XzError(format!(
 83 |             "Invalid footer magic, expected {:?}",
 84 |             footer::XZ_MAGIC_FOOTER
 85 |         )));
 86 |     }
 87 | 
 88 |     if !util::is_eof(input)? {
 89 |         return Err(error::Error::XzError(
 90 |             "Unexpected data after last XZ block".to_string(),
 91 |         ));
 92 |     }
 93 |     Ok(())
 94 | }
 95 | 
 96 | fn check_index<R>(
 97 |     count_input: &mut util::CountBufRead<'_, R>,
 98 |     records: &[Record],
 99 | ) -> error::Result<()>
100 | where
101 |     R: io::BufRead,
102 | {
103 |     let mut digest = CRC32.digest();
104 |     let index_tag = 0u8;
105 |     digest.update(&[index_tag]);
106 |     {
107 |         let mut digested = util::CrcDigestRead::new(count_input, &mut digest);
108 | 
109 |         let num_records = get_multibyte(&mut digested)?;
110 |         if num_records != records.len() as u64 {
111 |             return Err(error::Error::XzError(format!(
112 |                 "Expected {} records but got {} records",
113 |                 num_records,
114 |                 records.len()
115 |             )));
116 |         }
117 | 
118 |         for (i, record) in records.iter().enumerate() {
119 |             lzma_info!("XZ index checking record {}: {:?}", i, record);
120 | 
121 |             let unpadded_size = get_multibyte(&mut digested)?;
122 |             if unpadded_size != record.unpadded_size {
123 |                 return Err(error::Error::XzError(format!(
124 |                     "Invalid index for record {}: unpadded size ({}) does not match index ({})",
125 |                     i, record.unpadded_size, unpadded_size
126 |                 )));
127 |             }
128 | 
129 |             let unpacked_size = get_multibyte(&mut digested)?;
130 |             if unpacked_size != record.unpacked_size {
131 |                 return Err(error::Error::XzError(format!(
132 |                     "Invalid index for record {}: unpacked size ({}) does not match index ({})",
133 |                     i, record.unpacked_size, unpacked_size
134 |                 )));
135 |             }
136 |         }
137 |     };
138 |     // TODO: create padding parser function
139 |     let count = count_input.count();
140 |     let padding_size = ((count ^ 0x03) + 1) & 0x03;
141 |     lzma_info!(
142 |         "XZ index: {} byte(s) read, {} byte(s) of padding",
143 |         count,
144 |         padding_size
145 |     );
146 | 
147 |     {
148 |         let mut digested = util::CrcDigestRead::new(count_input, &mut digest);
149 |         for _ in 0..padding_size {
150 |             let byte = digested.read_u8()?;
151 |             if byte != 0 {
152 |                 return Err(error::Error::XzError(
153 |                     "Invalid index padding, must be null bytes".to_string(),
154 |                 ));
155 |             }
156 |         }
157 |     };
158 | 
159 |     let digest_crc32 = digest.finalize();
160 |     lzma_info!("XZ index checking digest 0x{:08x}", digest_crc32);
161 | 
162 |     let crc32 = count_input.read_u32::<LittleEndian>()?;
163 |     if crc32 != digest_crc32 {
164 |         return Err(error::Error::XzError(format!(
165 |             "Invalid index CRC32: expected 0x{:08x} but got 0x{:08x}",
166 |             crc32, digest_crc32
167 |         )));
168 |     }
169 | 
170 |     Ok(())
171 | }
172 | 
173 | #[derive(Debug)]
174 | enum FilterId {
175 |     Lzma2,
176 | }
177 | 
178 | fn get_filter_id(id: u64) -> error::Result<FilterId> {
179 |     match id {
180 |         0x21 => Ok(FilterId::Lzma2),
181 |         _ => Err(error::Error::XzError(format!("Unknown filter id {}", id))),
182 |     }
183 | }
184 | 
185 | struct Filter {
186 |     filter_id: FilterId,
187 |     props: Vec<u8>,
188 | }
189 | 
190 | struct BlockHeader {
191 |     filters: Vec<Filter>,
192 |     packed_size: Option<u64>,
193 |     unpacked_size: Option<u64>,
194 | }
195 | 
196 | fn read_block<R, W>(
197 |     count_input: &mut util::CountBufRead<'_, R>,
198 |     output: &mut W,
199 |     check_method: CheckMethod,
200 |     records: &mut Vec<Record>,
201 |     header_size: u8,
202 | ) -> error::Result<bool>
203 | where
204 |     R: io::BufRead,
205 |     W: io::Write,
206 | {
207 |     let mut digest = CRC32.digest();
208 |     digest.update(&[header_size]);
209 |     let header_size = ((header_size as u64) << 2) - 1;
210 | 
211 |     let block_header = {
212 |         let mut taken = count_input.take(header_size);
213 |         let mut digested = io::BufReader::new(util::CrcDigestRead::new(&mut taken, &mut digest));
214 |         read_block_header(&mut digested, header_size)?
215 |     };
216 | 
217 |     let crc32 = count_input.read_u32::<LittleEndian>()?;
218 |     let digest_crc32 = digest.finalize();
219 |     if crc32 != digest_crc32 {
220 |         return Err(error::Error::XzError(format!(
221 |             "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}",
222 |             crc32, digest_crc32
223 |         )));
224 |     }
225 | 
226 |     let mut tmpbuf: Vec<u8> = Vec::new();
227 |     let filters = block_header.filters;
228 |     for (i, filter) in filters.iter().enumerate() {
229 |         if i == 0 {
230 |             // TODO: use SubBufRead on input if packed_size is known?
231 |             let packed_size = decode_filter(count_input, &mut tmpbuf, filter)?;
232 |             if let Some(expected_packed_size) = block_header.packed_size {
233 |                 if (packed_size as u64) != expected_packed_size {
234 |                     return Err(error::Error::XzError(format!(
235 |                         "Invalid compressed size: expected {} but got {}",
236 |                         expected_packed_size, packed_size
237 |                     )));
238 |                 }
239 |             }
240 |         } else {
241 |             let mut newbuf: Vec<u8> = Vec::new();
242 |             decode_filter(
243 |                 &mut io::BufReader::new(tmpbuf.as_slice()),
244 |                 &mut newbuf,
245 |                 filter,
246 |             )?;
247 |             // TODO: does this move or copy?
248 |             tmpbuf = newbuf;
249 |         }
250 |     }
251 | 
252 |     let unpacked_size = tmpbuf.len();
253 |     lzma_info!("XZ block decompressed to {} byte(s)", tmpbuf.len());
254 | 
255 |     if let Some(expected_unpacked_size) = block_header.unpacked_size {
256 |         if (unpacked_size as u64) != expected_unpacked_size {
257 |             return Err(error::Error::XzError(format!(
258 |                 "Invalid decompressed size: expected {} but got {}",
259 |                 expected_unpacked_size, unpacked_size
260 |             )));
261 |         }
262 |     }
263 | 
264 |     let count = count_input.count();
265 |     let padding_size = ((count ^ 0x03) + 1) & 0x03;
266 |     lzma_info!(
267 |         "XZ block: {} byte(s) read, {} byte(s) of padding, check method {:?}",
268 |         count,
269 |         padding_size,
270 |         check_method
271 |     );
272 |     for _ in 0..padding_size {
273 |         let byte = count_input.read_u8()?;
274 |         if byte != 0 {
275 |             return Err(error::Error::XzError(
276 |                 "Invalid block padding, must be null bytes".to_string(),
277 |             ));
278 |         }
279 |     }
280 |     validate_block_check(count_input, tmpbuf.as_slice(), check_method)?;
281 | 
282 |     output.write_all(tmpbuf.as_slice())?;
283 |     records.push(Record {
284 |         unpadded_size: (count_input.count() - padding_size) as u64,
285 |         unpacked_size: unpacked_size as u64,
286 |     });
287 | 
288 |     let finished = false;
289 |     Ok(finished)
290 | }
291 | 
292 | /// Verify block checksum against the "Block Check" field.
293 | ///
294 | /// See spec section 3.4 for details.
295 | fn validate_block_check<R>(
296 |     input: &mut R,
297 |     buf: &[u8],
298 |     check_method: CheckMethod,
299 | ) -> error::Result<()>
300 | where
301 |     R: io::BufRead,
302 | {
303 |     match check_method {
304 |         CheckMethod::None => (),
305 |         CheckMethod::Crc32 => {
306 |             let crc32 = input.read_u32::<LittleEndian>()?;
307 |             let digest_crc32 = CRC32.checksum(buf);
308 |             if crc32 != digest_crc32 {
309 |                 return Err(error::Error::XzError(format!(
310 |                     "Invalid block CRC32, expected 0x{:08x} but got 0x{:08x}",
311 |                     crc32, digest_crc32
312 |                 )));
313 |             }
314 |         }
315 |         CheckMethod::Crc64 => {
316 |             let crc64 = input.read_u64::<LittleEndian>()?;
317 |             let digest_crc64 = CRC64.checksum(buf);
318 |             if crc64 != digest_crc64 {
319 |                 return Err(error::Error::XzError(format!(
320 |                     "Invalid block CRC64, expected 0x{:016x} but got 0x{:016x}",
321 |                     crc64, digest_crc64
322 |                 )));
323 |             }
324 |         }
325 |         // TODO
326 |         CheckMethod::Sha256 => {
327 |             return Err(error::Error::XzError(
328 |                 "Unsupported SHA-256 checksum (not yet implemented)".to_string(),
329 |             ));
330 |         }
331 |     }
332 |     Ok(())
333 | }
334 | 
335 | fn decode_filter<R, W>(input: &mut R, output: &mut W, filter: &Filter) -> error::Result<usize>
336 | where
337 |     R: io::BufRead,
338 |     W: io::Write,
339 | {
340 |     let mut count_input = util::CountBufRead::new(input);
341 |     match filter.filter_id {
342 |         FilterId::Lzma2 => {
343 |             if filter.props.len() != 1 {
344 |                 return Err(error::Error::XzError(format!(
345 |                     "Invalid properties for filter {:?}",
346 |                     filter.filter_id
347 |                 )));
348 |             }
349 |             // TODO: properties??
350 |             Lzma2Decoder::new().decompress(&mut count_input, output)?;
351 |             Ok(count_input.count())
352 |         }
353 |     }
354 | }
355 | 
356 | fn read_block_header<R>(input: &mut R, header_size: u64) -> error::Result<BlockHeader>
357 | where
358 |     R: io::BufRead,
359 | {
360 |     let flags = input.read_u8()?;
361 |     let num_filters = (flags & 0x03) + 1;
362 |     let reserved = flags & 0x3C;
363 |     let has_packed_size = flags & 0x40 != 0;
364 |     let has_unpacked_size = flags & 0x80 != 0;
365 | 
366 |     lzma_info!(
367 |         "XZ block header: {{ header_size: {}, flags: {}, num_filters: {}, has_packed_size: {}, has_unpacked_size: {} }}",
368 |         header_size,
369 |         flags,
370 |         num_filters,
371 |         has_packed_size,
372 |         has_unpacked_size
373 |     );
374 | 
375 |     if reserved != 0 {
376 |         return Err(error::Error::XzError(format!(
377 |             "Invalid block flags {}, reserved bits (mask 0x3C) must be zero",
378 |             flags
379 |         )));
380 |     }
381 | 
382 |     let packed_size = if has_packed_size {
383 |         Some(get_multibyte(input)?)
384 |     } else {
385 |         None
386 |     };
387 | 
388 |     let unpacked_size = if has_unpacked_size {
389 |         Some(get_multibyte(input)?)
390 |     } else {
391 |         None
392 |     };
393 | 
394 |     lzma_info!(
395 |         "XZ block header: {{ packed_size: {:?}, unpacked_size: {:?} }}",
396 |         packed_size,
397 |         unpacked_size
398 |     );
399 | 
400 |     let mut filters: Vec<Filter> = vec![];
401 |     for _ in 0..num_filters {
402 |         let filter_id = get_filter_id(get_multibyte(input)?)?;
403 |         let size_of_properties = get_multibyte(input)?;
404 | 
405 |         lzma_info!(
406 |             "XZ filter: {{ filter_id: {:?}, size_of_properties: {} }}",
407 |             filter_id,
408 |             size_of_properties
409 |         );
410 | 
411 |         // Early abort to avoid allocating a large vector
412 |         if size_of_properties > header_size {
413 |             return Err(error::Error::XzError(format!(
414 |                 "Size of filter properties exceeds block header size ({} > {})",
415 |                 size_of_properties, header_size
416 |             )));
417 |         }
418 | 
419 |         let mut buf = vec![0; size_of_properties as usize];
420 |         input.read_exact(buf.as_mut_slice()).map_err(|e| {
421 |             error::Error::XzError(format!(
422 |                 "Could not read filter properties of size {}: {}",
423 |                 size_of_properties, e
424 |             ))
425 |         })?;
426 | 
427 |         lzma_info!("XZ filter properties: {:?}", buf);
428 | 
429 |         filters.push(Filter {
430 |             filter_id,
431 |             props: buf,
432 |         })
433 |     }
434 | 
435 |     if !util::flush_zero_padding(input)? {
436 |         return Err(error::Error::XzError(
437 |             "Invalid block header padding, must be null bytes".to_string(),
438 |         ));
439 |     }
440 | 
441 |     Ok(BlockHeader {
442 |         filters,
443 |         packed_size,
444 |         unpacked_size,
445 |     })
446 | }
447 | 
448 | pub fn get_multibyte<R>(input: &mut R) -> error::Result<u64>
449 | where
450 |     R: io::Read,
451 | {
452 |     let mut result = 0;
453 |     for i in 0..9 {
454 |         let byte = input.read_u8()?;
455 |         result ^= ((byte & 0x7F) as u64) << (i * 7);
456 |         if (byte & 0x80) == 0 {
457 |             return Ok(result);
458 |         }
459 |     }
460 | 
461 |     Err(error::Error::XzError(
462 |         "Invalid multi-byte encoding".to_string(),
463 |     ))
464 | }
465 | 


--------------------------------------------------------------------------------
/src/encode/dumbencoder.rs:
--------------------------------------------------------------------------------
  1 | use crate::compress::{Options, UnpackedSize};
  2 | use crate::encode::rangecoder;
  3 | use byteorder::{LittleEndian, WriteBytesExt};
  4 | use std::io;
  5 | 
  6 | pub struct Encoder<'a, W>
  7 | where
  8 |     W: 'a + io::Write,
  9 | {
 10 |     rangecoder: rangecoder::RangeEncoder<'a, W>,
 11 |     literal_probs: [[u16; 0x300]; 8],
 12 |     is_match: [u16; 4], // true = LZ, false = literal
 13 |     unpacked_size: UnpackedSize,
 14 | }
 15 | 
 16 | const LC: u32 = 3;
 17 | const LP: u32 = 0;
 18 | const PB: u32 = 2;
 19 | 
 20 | impl<'a, W> Encoder<'a, W>
 21 | where
 22 |     W: io::Write,
 23 | {
 24 |     pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result<Self> {
 25 |         let dict_size = 0x0080_0000;
 26 | 
 27 |         // Properties
 28 |         let props = (LC + 9 * (LP + 5 * PB)) as u8;
 29 |         lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", LC, LP, PB);
 30 |         stream.write_u8(props)?;
 31 | 
 32 |         // Dictionary
 33 |         lzma_info!("Dict size: {}", dict_size);
 34 |         stream.write_u32::<LittleEndian>(dict_size)?;
 35 | 
 36 |         // Unpacked size
 37 |         match &options.unpacked_size {
 38 |             UnpackedSize::WriteToHeader(unpacked_size) => {
 39 |                 let value: u64 = match unpacked_size {
 40 |                     None => {
 41 |                         lzma_info!("Unpacked size: unknown");
 42 |                         0xFFFF_FFFF_FFFF_FFFF
 43 |                     }
 44 |                     Some(x) => {
 45 |                         lzma_info!("Unpacked size: {}", x);
 46 |                         *x
 47 |                     }
 48 |                 };
 49 |                 stream.write_u64::<LittleEndian>(value)?;
 50 |             }
 51 |             UnpackedSize::SkipWritingToHeader => {}
 52 |         };
 53 | 
 54 |         let encoder = Encoder {
 55 |             rangecoder: rangecoder::RangeEncoder::new(stream),
 56 |             literal_probs: [[0x400; 0x300]; 8],
 57 |             is_match: [0x400; 4],
 58 |             unpacked_size: options.unpacked_size,
 59 |         };
 60 | 
 61 |         Ok(encoder)
 62 |     }
 63 | 
 64 |     pub fn process<R>(mut self, input: R) -> io::Result<()>
 65 |     where
 66 |         R: io::Read,
 67 |     {
 68 |         let mut prev_byte = 0u8;
 69 |         let mut input_len = 0;
 70 | 
 71 |         for (out_len, byte_result) in input.bytes().enumerate() {
 72 |             let byte = byte_result?;
 73 |             let pos_state = out_len & 3;
 74 |             input_len = out_len;
 75 | 
 76 |             // Literal
 77 |             self.rangecoder
 78 |                 .encode_bit(&mut self.is_match[pos_state], false)?;
 79 | 
 80 |             self.encode_literal(byte, prev_byte)?;
 81 |             prev_byte = byte;
 82 |         }
 83 | 
 84 |         self.finish(input_len + 1)
 85 |     }
 86 | 
 87 |     fn finish(&mut self, input_len: usize) -> io::Result<()> {
 88 |         match self.unpacked_size {
 89 |             UnpackedSize::SkipWritingToHeader | UnpackedSize::WriteToHeader(Some(_)) => {}
 90 |             UnpackedSize::WriteToHeader(None) => {
 91 |                 // Write end-of-stream marker
 92 |                 let pos_state = input_len & 3;
 93 | 
 94 |                 // Match
 95 |                 self.rangecoder
 96 |                     .encode_bit(&mut self.is_match[pos_state], true)?;
 97 |                 // New distance
 98 |                 self.rangecoder.encode_bit(&mut 0x400, false)?;
 99 | 
100 |                 // Dummy len, as small as possible (len = 0)
101 |                 for _ in 0..4 {
102 |                     self.rangecoder.encode_bit(&mut 0x400, false)?;
103 |                 }
104 | 
105 |                 // Distance marker = 0xFFFFFFFF
106 |                 // pos_slot = 63
107 |                 for _ in 0..6 {
108 |                     self.rangecoder.encode_bit(&mut 0x400, true)?;
109 |                 }
110 |                 // num_direct_bits = 30
111 |                 // result = 3 << 30 = C000_0000
112 |                 //        + 3FFF_FFF0  (26 bits)
113 |                 //        + F          ( 4 bits)
114 |                 for _ in 0..30 {
115 |                     self.rangecoder.encode_bit(&mut 0x400, true)?;
116 |                 }
117 |                 //        = FFFF_FFFF
118 |             }
119 |         }
120 | 
121 |         // Flush range coder
122 |         self.rangecoder.finish()
123 |     }
124 | 
125 |     fn encode_literal(&mut self, byte: u8, prev_byte: u8) -> io::Result<()> {
126 |         let prev_byte = prev_byte as usize;
127 | 
128 |         let mut result: usize = 1;
129 |         let lit_state = prev_byte >> 5;
130 |         let probs = &mut self.literal_probs[lit_state];
131 | 
132 |         for i in 0..8 {
133 |             let bit = ((byte >> (7 - i)) & 1) != 0;
134 |             self.rangecoder.encode_bit(&mut probs[result], bit)?;
135 |             result = (result << 1) ^ (bit as usize);
136 |         }
137 | 
138 |         Ok(())
139 |     }
140 | }
141 | 


--------------------------------------------------------------------------------
/src/encode/lzma2.rs:
--------------------------------------------------------------------------------
 1 | use byteorder::{BigEndian, WriteBytesExt};
 2 | use std::io;
 3 | 
 4 | pub fn encode_stream<R, W>(input: &mut R, output: &mut W) -> io::Result<()>
 5 | where
 6 |     R: io::BufRead,
 7 |     W: io::Write,
 8 | {
 9 |     let mut buf = vec![0u8; 0x10000];
10 |     loop {
11 |         let n = input.read(&mut buf)?;
12 |         if n == 0 {
13 |             // status = EOF
14 |             output.write_u8(0)?;
15 |             break;
16 |         }
17 | 
18 |         // status = uncompressed reset dict
19 |         output.write_u8(1)?;
20 |         // unpacked size
21 |         output.write_u16::<BigEndian>((n - 1) as u16)?;
22 |         // contents
23 |         output.write_all(&buf[..n])?;
24 |     }
25 |     Ok(())
26 | }
27 | 


--------------------------------------------------------------------------------
/src/encode/mod.rs:
--------------------------------------------------------------------------------
1 | //! Encoding logic.
2 | 
3 | pub mod dumbencoder;
4 | pub mod lzma2;
5 | pub mod options;
6 | mod rangecoder;
7 | mod util;
8 | pub mod xz;
9 | 


--------------------------------------------------------------------------------
/src/encode/options.rs:
--------------------------------------------------------------------------------
 1 | /// Options for the `lzma_compress` function.
 2 | #[derive(Clone, Copy, Debug, Default)]
 3 | pub struct Options {
 4 |     /// Defines whether the unpacked size should be written to the header.
 5 |     /// The default is [`UnpackedSize::WriteToHeader(None)`].
 6 |     pub unpacked_size: UnpackedSize,
 7 | }
 8 | 
 9 | /// Alternatives for handling unpacked size.
10 | #[derive(Clone, Copy, Debug)]
11 | pub enum UnpackedSize {
12 |     /// If the value is `Some(u64)`, write the provided u64 value to the header.
13 |     /// There is currently no check in place that verifies that this is the
14 |     /// actual number of bytes provided by the input stream.
15 |     /// If the value is [`None`], write the special `0xFFFF_FFFF_FFFF_FFFF` code
16 |     /// to the header, indicating that the unpacked size is unknown.
17 |     WriteToHeader(Option<u64>),
18 |     /// Do not write anything to the header. The unpacked size needs to be
19 |     /// stored elsewhere and provided when reading the file. Note that this
20 |     /// is a non-standard way of writing LZMA data, but is used by certain
21 |     /// libraries such as [OpenCTM](http://openctm.sourceforge.net/).
22 |     SkipWritingToHeader,
23 | }
24 | 
25 | impl Default for UnpackedSize {
26 |     fn default() -> UnpackedSize {
27 |         UnpackedSize::WriteToHeader(None)
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/encode/rangecoder.rs:
--------------------------------------------------------------------------------
  1 | use byteorder::WriteBytesExt;
  2 | use std::io;
  3 | 
  4 | #[cfg(test)]
  5 | use crate::util::const_assert;
  6 | 
  7 | pub struct RangeEncoder<'a, W>
  8 | where
  9 |     W: 'a + io::Write,
 10 | {
 11 |     stream: &'a mut W,
 12 |     range: u32,
 13 |     low: u64,
 14 |     cache: u8,
 15 |     cachesz: u32,
 16 | }
 17 | 
 18 | impl<'a, W> RangeEncoder<'a, W>
 19 | where
 20 |     W: io::Write,
 21 | {
 22 |     #[allow(clippy::let_and_return)]
 23 |     pub fn new(stream: &'a mut W) -> Self {
 24 |         let enc = Self {
 25 |             stream,
 26 |             range: 0xFFFF_FFFF,
 27 |             low: 0,
 28 |             cache: 0,
 29 |             cachesz: 1,
 30 |         };
 31 |         lzma_debug!("0 {{ range: {:08x}, low: {:010x} }}", enc.range, enc.low);
 32 |         enc
 33 |     }
 34 | 
 35 |     fn write_low(&mut self) -> io::Result<()> {
 36 |         if self.low < 0xFF00_0000 || self.low > 0xFFFF_FFFF {
 37 |             let mut tmp = self.cache;
 38 |             loop {
 39 |                 let byte = tmp.wrapping_add((self.low >> 32) as u8);
 40 |                 self.stream.write_u8(byte)?;
 41 |                 lzma_debug!("> byte: {:02x}", byte);
 42 |                 tmp = 0xFF;
 43 |                 self.cachesz -= 1;
 44 |                 if self.cachesz == 0 {
 45 |                     break;
 46 |                 }
 47 |             }
 48 |             self.cache = (self.low >> 24) as u8;
 49 |         }
 50 | 
 51 |         self.cachesz += 1;
 52 |         self.low = (self.low << 8) & 0xFFFF_FFFF;
 53 |         Ok(())
 54 |     }
 55 | 
 56 |     pub fn finish(&mut self) -> io::Result<()> {
 57 |         for _ in 0..5 {
 58 |             self.write_low()?;
 59 | 
 60 |             lzma_debug!("$ {{ range: {:08x}, low: {:010x} }}", self.range, self.low);
 61 |         }
 62 |         Ok(())
 63 |     }
 64 | 
 65 |     fn normalize(&mut self) -> io::Result<()> {
 66 |         while self.range < 0x0100_0000 {
 67 |             lzma_debug!(
 68 |                 "+ {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}",
 69 |                 self.range,
 70 |                 self.low,
 71 |                 self.cache,
 72 |                 self.cachesz
 73 |             );
 74 |             self.range <<= 8;
 75 |             self.write_low()?;
 76 |             lzma_debug!(
 77 |                 "* {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}",
 78 |                 self.range,
 79 |                 self.low,
 80 |                 self.cache,
 81 |                 self.cachesz
 82 |             );
 83 |         }
 84 |         lzma_trace!("  {{ range: {:08x}, low: {:010x} }}", self.range, self.low);
 85 |         Ok(())
 86 |     }
 87 | 
 88 |     pub fn encode_bit(&mut self, prob: &mut u16, bit: bool) -> io::Result<()> {
 89 |         let bound: u32 = (self.range >> 11) * (*prob as u32);
 90 |         lzma_trace!(
 91 |             "  bound: {:08x}, prob: {:04x}, bit: {}",
 92 |             bound,
 93 |             prob,
 94 |             bit as u8
 95 |         );
 96 | 
 97 |         if bit {
 98 |             *prob -= *prob >> 5;
 99 |             self.low += bound as u64;
100 |             self.range -= bound;
101 |         } else {
102 |             *prob += (0x800_u16 - *prob) >> 5;
103 |             self.range = bound;
104 |         }
105 | 
106 |         self.normalize()
107 |     }
108 | 
109 |     #[cfg(test)]
110 |     fn encode_bit_tree(
111 |         &mut self,
112 |         num_bits: usize,
113 |         probs: &mut [u16],
114 |         value: u32,
115 |     ) -> io::Result<()> {
116 |         debug_assert!(value.leading_zeros() as usize + num_bits >= 32);
117 |         let mut tmp: usize = 1;
118 |         for i in 0..num_bits {
119 |             let bit = ((value >> (num_bits - i - 1)) & 1) != 0;
120 |             self.encode_bit(&mut probs[tmp], bit)?;
121 |             tmp = (tmp << 1) ^ (bit as usize);
122 |         }
123 |         Ok(())
124 |     }
125 | 
126 |     #[cfg(test)]
127 |     pub fn encode_reverse_bit_tree(
128 |         &mut self,
129 |         num_bits: usize,
130 |         probs: &mut [u16],
131 |         offset: usize,
132 |         mut value: u32,
133 |     ) -> io::Result<()> {
134 |         debug_assert!(value.leading_zeros() as usize + num_bits >= 32);
135 |         let mut tmp: usize = 1;
136 |         for _ in 0..num_bits {
137 |             let bit = (value & 1) != 0;
138 |             value >>= 1;
139 |             self.encode_bit(&mut probs[offset + tmp], bit)?;
140 |             tmp = (tmp << 1) ^ (bit as usize);
141 |         }
142 |         Ok(())
143 |     }
144 | }
145 | 
146 | #[cfg(test)]
147 | #[derive(Debug, Clone)]
148 | pub struct BitTree<const PROBS_ARRAY_LEN: usize> {
149 |     probs: [u16; PROBS_ARRAY_LEN],
150 | }
151 | 
152 | #[cfg(test)]
153 | impl<const PROBS_ARRAY_LEN: usize> BitTree<PROBS_ARRAY_LEN> {
154 |     pub fn new() -> Self {
155 |         // The validity of PROBS_ARRAY_LEN is checked at compile-time with a macro
156 |         // that confirms that the argument P passed is indeed 1 << N for
157 |         // some N using usize::trailing_zeros to calculate floor(log_2(P)).
158 |         //
159 |         // Thus, BitTree<const P: usize> is only valid for any P such that
160 |         // P = 2 ** floor(log_2(P)), where P is the length of the probability array
161 |         // of the BitTree. This maintains the invariant that P = 1 << N.
162 |         //
163 |         // This precondition must be checked for any way to construct a new, valid
164 |         // instance of BitTree. Here it is checked for BitTree::new(), but if
165 |         // another function is added that returns a new instance of BitTree,
166 |         // this assertion must be checked there as well.
167 |         const_assert!(
168 |             "BitTree's PROBS_ARRAY_LEN parameter must be a power of 2",
169 |             PROBS_ARRAY_LEN: usize =>
170 |                 (1 << (PROBS_ARRAY_LEN.trailing_zeros() as usize)) == PROBS_ARRAY_LEN
171 |         );
172 |         BitTree {
173 |             probs: [0x400; PROBS_ARRAY_LEN],
174 |         }
175 |     }
176 | 
177 |     // NUM_BITS is derived from PROBS_ARRAY_LEN because of the lack of
178 |     // generic const expressions. Where PROBS_ARRAY_LEN is a power of 2,
179 |     // NUM_BITS can be derived by the number of trailing zeroes.
180 |     const NUM_BITS: usize = PROBS_ARRAY_LEN.trailing_zeros() as usize;
181 | 
182 |     pub fn encode<W: io::Write>(
183 |         &mut self,
184 |         rangecoder: &mut RangeEncoder<W>,
185 |         value: u32,
186 |     ) -> io::Result<()> {
187 |         rangecoder.encode_bit_tree(Self::NUM_BITS, &mut self.probs, value)
188 |     }
189 | 
190 |     pub fn encode_reverse<W: io::Write>(
191 |         &mut self,
192 |         rangecoder: &mut RangeEncoder<W>,
193 |         value: u32,
194 |     ) -> io::Result<()> {
195 |         rangecoder.encode_reverse_bit_tree(Self::NUM_BITS, &mut self.probs, 0, value)
196 |     }
197 | }
198 | 
199 | #[cfg(test)]
200 | pub struct LenEncoder {
201 |     choice: u16,
202 |     choice2: u16,
203 |     low_coder: [BitTree<{ 1 << 3 }>; 16],
204 |     mid_coder: [BitTree<{ 1 << 3 }>; 16],
205 |     high_coder: BitTree<{ 1 << 8 }>,
206 | }
207 | 
208 | #[cfg(test)]
209 | impl LenEncoder {
210 |     pub fn new() -> Self {
211 |         LenEncoder {
212 |             choice: 0x400,
213 |             choice2: 0x400,
214 |             low_coder: [
215 |                 BitTree::new(),
216 |                 BitTree::new(),
217 |                 BitTree::new(),
218 |                 BitTree::new(),
219 |                 BitTree::new(),
220 |                 BitTree::new(),
221 |                 BitTree::new(),
222 |                 BitTree::new(),
223 |                 BitTree::new(),
224 |                 BitTree::new(),
225 |                 BitTree::new(),
226 |                 BitTree::new(),
227 |                 BitTree::new(),
228 |                 BitTree::new(),
229 |                 BitTree::new(),
230 |                 BitTree::new(),
231 |             ],
232 |             mid_coder: [
233 |                 BitTree::new(),
234 |                 BitTree::new(),
235 |                 BitTree::new(),
236 |                 BitTree::new(),
237 |                 BitTree::new(),
238 |                 BitTree::new(),
239 |                 BitTree::new(),
240 |                 BitTree::new(),
241 |                 BitTree::new(),
242 |                 BitTree::new(),
243 |                 BitTree::new(),
244 |                 BitTree::new(),
245 |                 BitTree::new(),
246 |                 BitTree::new(),
247 |                 BitTree::new(),
248 |                 BitTree::new(),
249 |             ],
250 |             high_coder: BitTree::new(),
251 |         }
252 |     }
253 | 
254 |     pub fn encode<W: io::Write>(
255 |         &mut self,
256 |         rangecoder: &mut RangeEncoder<W>,
257 |         pos_state: usize,
258 |         value: u32,
259 |     ) -> io::Result<()> {
260 |         let is_low: bool = value < 8;
261 |         rangecoder.encode_bit(&mut self.choice, !is_low)?;
262 |         if is_low {
263 |             return self.low_coder[pos_state].encode(rangecoder, value);
264 |         }
265 | 
266 |         let is_middle: bool = value < 16;
267 |         rangecoder.encode_bit(&mut self.choice2, !is_middle)?;
268 |         if is_middle {
269 |             return self.mid_coder[pos_state].encode(rangecoder, value - 8);
270 |         }
271 | 
272 |         self.high_coder.encode(rangecoder, value - 16)
273 |     }
274 | }
275 | 
276 | #[cfg(test)]
277 | mod test {
278 |     use super::*;
279 |     use crate::decode::rangecoder::{LenDecoder, RangeDecoder};
280 |     use crate::{decode, encode};
281 |     use seq_macro::seq;
282 |     use std::io::BufReader;
283 | 
284 |     fn encode_decode(prob_init: u16, bits: &[bool]) {
285 |         let mut buf: Vec<u8> = Vec::new();
286 | 
287 |         let mut encoder = RangeEncoder::new(&mut buf);
288 |         let mut prob = prob_init;
289 |         for &b in bits {
290 |             encoder.encode_bit(&mut prob, b).unwrap();
291 |         }
292 |         encoder.finish().unwrap();
293 | 
294 |         let mut bufread = BufReader::new(buf.as_slice());
295 |         let mut decoder = RangeDecoder::new(&mut bufread).unwrap();
296 |         let mut prob = prob_init;
297 |         for &b in bits {
298 |             assert_eq!(decoder.decode_bit(&mut prob, true).unwrap(), b);
299 |         }
300 |         assert!(decoder.is_finished_ok().unwrap());
301 |     }
302 | 
303 |     #[test]
304 |     fn test_encode_decode_zeros() {
305 |         encode_decode(0x400, &[false; 10000]);
306 |     }
307 | 
308 |     #[test]
309 |     fn test_encode_decode_ones() {
310 |         encode_decode(0x400, &[true; 10000]);
311 |     }
312 | 
313 |     fn encode_decode_bittree<const PROBS_LEN: usize>(values: &[u32]) {
314 |         let mut buf: Vec<u8> = Vec::new();
315 | 
316 |         let mut encoder = RangeEncoder::new(&mut buf);
317 |         let mut tree = encode::rangecoder::BitTree::<PROBS_LEN>::new();
318 |         for &v in values {
319 |             tree.encode(&mut encoder, v).unwrap();
320 |         }
321 |         encoder.finish().unwrap();
322 | 
323 |         let mut bufread = BufReader::new(buf.as_slice());
324 |         let mut decoder = RangeDecoder::new(&mut bufread).unwrap();
325 |         let mut tree = decode::rangecoder::BitTree::<PROBS_LEN>::new();
326 |         for &v in values {
327 |             assert_eq!(tree.parse(&mut decoder, true).unwrap(), v);
328 |         }
329 |         assert!(decoder.is_finished_ok().unwrap());
330 |     }
331 | 
332 |     #[test]
333 |     fn test_encode_decode_bittree_zeros() {
334 |         seq!(NUM_BITS in 0..16 {
335 |             encode_decode_bittree::<{1 << NUM_BITS}>(&[0; 10000]);
336 |         });
337 |     }
338 | 
339 |     #[test]
340 |     fn test_encode_decode_bittree_ones() {
341 |         seq!(NUM_BITS in 0..16 {
342 |             encode_decode_bittree::<{1 << NUM_BITS}>(&[(1 << NUM_BITS) - 1; 10000]);
343 |         });
344 |     }
345 | 
346 |     #[test]
347 |     fn test_encode_decode_bittree_all() {
348 |         seq!(NUM_BITS in 0..16 {
349 |             let max = 1 << NUM_BITS;
350 |             let values: Vec<u32> = (0..max).collect();
351 |             encode_decode_bittree::<{1 << NUM_BITS}>(&values);
352 |         });
353 |     }
354 | 
355 |     fn encode_decode_reverse_bittree<const PROBS_LEN: usize>(values: &[u32]) {
356 |         let mut buf: Vec<u8> = Vec::new();
357 | 
358 |         let mut encoder = RangeEncoder::new(&mut buf);
359 |         let mut tree = encode::rangecoder::BitTree::<PROBS_LEN>::new();
360 |         for &v in values {
361 |             tree.encode_reverse(&mut encoder, v).unwrap();
362 |         }
363 |         encoder.finish().unwrap();
364 | 
365 |         let mut bufread = BufReader::new(buf.as_slice());
366 |         let mut decoder = RangeDecoder::new(&mut bufread).unwrap();
367 |         let mut tree = decode::rangecoder::BitTree::<PROBS_LEN>::new();
368 |         for &v in values {
369 |             assert_eq!(tree.parse_reverse(&mut decoder, true).unwrap(), v);
370 |         }
371 |         assert!(decoder.is_finished_ok().unwrap());
372 |     }
373 | 
374 |     #[test]
375 |     fn test_encode_decode_reverse_bittree_zeros() {
376 |         seq!(NUM_BITS in 0..16 {
377 |             encode_decode_reverse_bittree::<{1 << NUM_BITS}>(&[0; 10000]);
378 |         });
379 |     }
380 | 
381 |     #[test]
382 |     fn test_encode_decode_reverse_bittree_ones() {
383 |         seq!(NUM_BITS in 0..16 {
384 |             encode_decode_reverse_bittree::<{1 << NUM_BITS}>(
385 |                 &[(1 << NUM_BITS) - 1; 10000],
386 |             );
387 |         });
388 |     }
389 | 
390 |     #[test]
391 |     fn test_encode_decode_reverse_bittree_all() {
392 |         seq!(NUM_BITS in 0..16 {
393 |             let max = 1 << NUM_BITS;
394 |             let values: Vec<u32> = (0..max).collect();
395 |             encode_decode_reverse_bittree::<{1 << NUM_BITS}>(&values);
396 |         });
397 |     }
398 | 
399 |     fn encode_decode_length(pos_state: usize, values: &[u32]) {
400 |         let mut buf: Vec<u8> = Vec::new();
401 | 
402 |         let mut encoder = RangeEncoder::new(&mut buf);
403 |         let mut len_encoder = LenEncoder::new();
404 |         for &v in values {
405 |             len_encoder.encode(&mut encoder, pos_state, v).unwrap();
406 |         }
407 |         encoder.finish().unwrap();
408 | 
409 |         let mut bufread = BufReader::new(buf.as_slice());
410 |         let mut decoder = RangeDecoder::new(&mut bufread).unwrap();
411 |         let mut len_decoder = LenDecoder::new();
412 |         for &v in values {
413 |             assert_eq!(
414 |                 len_decoder.decode(&mut decoder, pos_state, true).unwrap(),
415 |                 v as usize
416 |             );
417 |         }
418 |         assert!(decoder.is_finished_ok().unwrap());
419 |     }
420 | 
421 |     #[test]
422 |     fn test_encode_decode_length_zeros() {
423 |         for pos_state in 0..16 {
424 |             encode_decode_length(pos_state, &[0; 10000]);
425 |         }
426 |     }
427 | 
428 |     #[test]
429 |     fn test_encode_decode_length_all() {
430 |         for pos_state in 0..16 {
431 |             let max = (1 << 8) + 16;
432 |             let values: Vec<u32> = (0..max).collect();
433 |             encode_decode_length(pos_state, &values);
434 |         }
435 |     }
436 | }
437 | 


--------------------------------------------------------------------------------
/src/encode/util.rs:
--------------------------------------------------------------------------------
 1 | use std::io;
 2 | 
 3 | /// An [`io::Write`] computing a digest on the bytes written.
 4 | pub struct CrcDigestWrite<'a, 'b, W, S>
 5 | where
 6 |     W: 'a + io::Write,
 7 |     S: crc::Width,
 8 | {
 9 |     /// Underlying writer
10 |     write: &'a mut W,
11 |     /// Hasher
12 |     digest: &'a mut crc::Digest<'b, S>,
13 | }
14 | 
15 | impl<'a, 'b, W, S> CrcDigestWrite<'a, 'b, W, S>
16 | where
17 |     W: io::Write,
18 |     S: crc::Width,
19 | {
20 |     pub fn new(write: &'a mut W, digest: &'a mut crc::Digest<'b, S>) -> Self {
21 |         Self { write, digest }
22 |     }
23 | }
24 | 
25 | impl<'a, 'b, W> io::Write for CrcDigestWrite<'a, 'b, W, u32>
26 | where
27 |     W: io::Write,
28 | {
29 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
30 |         let result = self.write.write(buf)?;
31 |         self.digest.update(&buf[..result]);
32 |         Ok(result)
33 |     }
34 |     fn flush(&mut self) -> io::Result<()> {
35 |         self.write.flush()
36 |     }
37 | }
38 | 
39 | /// An [`io::Write`] counting the bytes written.
40 | pub struct CountWrite<'a, W>
41 | where
42 |     W: 'a + io::Write,
43 | {
44 |     /// Underlying writer
45 |     write: &'a mut W,
46 |     /// Number of bytes written
47 |     count: usize,
48 | }
49 | 
50 | impl<'a, W> CountWrite<'a, W>
51 | where
52 |     W: io::Write,
53 | {
54 |     pub fn new(write: &'a mut W) -> Self {
55 |         Self { write, count: 0 }
56 |     }
57 | 
58 |     pub fn count(&self) -> usize {
59 |         self.count
60 |     }
61 | }
62 | 
63 | impl<'a, W> io::Write for CountWrite<'a, W>
64 | where
65 |     W: io::Write,
66 | {
67 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
68 |         let result = self.write.write(buf)?;
69 |         self.count += result;
70 |         Ok(result)
71 |     }
72 | 
73 |     fn flush(&mut self) -> io::Result<()> {
74 |         self.write.flush()
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/encode/xz.rs:
--------------------------------------------------------------------------------
  1 | use crate::decode;
  2 | use crate::encode::{lzma2, util};
  3 | use crate::xz::crc::CRC32;
  4 | use crate::xz::{footer, header, CheckMethod, StreamFlags};
  5 | use byteorder::{LittleEndian, WriteBytesExt};
  6 | use std::io;
  7 | use std::io::Write;
  8 | 
  9 | pub fn encode_stream<R, W>(input: &mut R, output: &mut W) -> io::Result<()>
 10 | where
 11 |     R: io::BufRead,
 12 |     W: io::Write,
 13 | {
 14 |     let stream_flags = StreamFlags {
 15 |         check_method: CheckMethod::None,
 16 |     };
 17 | 
 18 |     // Header
 19 |     write_header(output, stream_flags)?;
 20 | 
 21 |     // Block
 22 |     let (unpadded_size, unpacked_size) = write_block(input, output)?;
 23 | 
 24 |     // Index
 25 |     let index_size = write_index(output, unpadded_size, unpacked_size)?;
 26 | 
 27 |     // Footer
 28 |     write_footer(output, stream_flags, index_size)
 29 | }
 30 | 
 31 | fn write_header<W>(output: &mut W, stream_flags: StreamFlags) -> io::Result<()>
 32 | where
 33 |     W: io::Write,
 34 | {
 35 |     output.write_all(header::XZ_MAGIC)?;
 36 |     let mut digest = CRC32.digest();
 37 |     {
 38 |         let mut digested = util::CrcDigestWrite::new(output, &mut digest);
 39 |         stream_flags.serialize(&mut digested)?;
 40 |     }
 41 |     let crc32 = digest.finalize();
 42 |     output.write_u32::<LittleEndian>(crc32)?;
 43 |     Ok(())
 44 | }
 45 | 
 46 | fn write_footer<W>(output: &mut W, stream_flags: StreamFlags, index_size: usize) -> io::Result<()>
 47 | where
 48 |     W: io::Write,
 49 | {
 50 |     let mut digest = CRC32.digest();
 51 |     let mut footer_buf: Vec<u8> = Vec::new();
 52 |     {
 53 |         let mut digested = util::CrcDigestWrite::new(&mut footer_buf, &mut digest);
 54 | 
 55 |         let backward_size = (index_size >> 2) - 1;
 56 |         digested.write_u32::<LittleEndian>(backward_size as u32)?;
 57 |         stream_flags.serialize(&mut digested)?;
 58 |     }
 59 |     let crc32 = digest.finalize();
 60 |     output.write_u32::<LittleEndian>(crc32)?;
 61 |     output.write_all(footer_buf.as_slice())?;
 62 | 
 63 |     output.write_all(footer::XZ_MAGIC_FOOTER)?;
 64 |     Ok(())
 65 | }
 66 | 
 67 | fn write_block<R, W>(input: &mut R, output: &mut W) -> io::Result<(usize, usize)>
 68 | where
 69 |     R: io::BufRead,
 70 |     W: io::Write,
 71 | {
 72 |     let (unpadded_size, unpacked_size) = {
 73 |         let mut count_output = util::CountWrite::new(output);
 74 | 
 75 |         // Block header
 76 |         let mut digest = CRC32.digest();
 77 |         {
 78 |             let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest);
 79 |             let header_size = 8;
 80 |             digested.write_u8((header_size >> 2) as u8)?;
 81 |             let flags = 0x00; // 1 filter, no (un)packed size provided
 82 |             digested.write_u8(flags)?;
 83 |             let filter_id = 0x21; // LZMA2
 84 |             digested.write_u8(filter_id)?;
 85 |             let size_of_properties = 1;
 86 |             digested.write_u8(size_of_properties)?;
 87 |             let properties = 22; // TODO
 88 |             digested.write_u8(properties)?;
 89 |             let padding = [0, 0, 0];
 90 |             digested.write_all(&padding)?;
 91 |         }
 92 |         let crc32 = digest.finalize();
 93 |         count_output.write_u32::<LittleEndian>(crc32)?;
 94 | 
 95 |         // Block
 96 |         let mut count_input = decode::util::CountBufRead::new(input);
 97 |         lzma2::encode_stream(&mut count_input, &mut count_output)?;
 98 |         (count_output.count(), count_input.count())
 99 |     };
100 |     lzma_info!(
101 |         "Unpadded size = {}, unpacked_size = {}",
102 |         unpadded_size,
103 |         unpacked_size
104 |     );
105 | 
106 |     let padding_size = ((unpadded_size ^ 0x03) + 1) & 0x03;
107 |     let padding = vec![0; padding_size];
108 |     output.write_all(padding.as_slice())?;
109 |     // Checksum = None (cf. above)
110 | 
111 |     Ok((unpadded_size, unpacked_size))
112 | }
113 | 
114 | fn write_index<W>(output: &mut W, unpadded_size: usize, unpacked_size: usize) -> io::Result<usize>
115 | where
116 |     W: io::Write,
117 | {
118 |     let mut count_output = util::CountWrite::new(output);
119 | 
120 |     let mut digest = CRC32.digest();
121 |     {
122 |         let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest);
123 |         digested.write_u8(0)?; // No more block
124 |         let num_records = 1;
125 |         write_multibyte(&mut digested, num_records)?;
126 | 
127 |         write_multibyte(&mut digested, unpadded_size as u64)?;
128 |         write_multibyte(&mut digested, unpacked_size as u64)?;
129 |     }
130 | 
131 |     // Padding
132 |     let count = count_output.count();
133 |     let padding_size = ((count ^ 0x03) + 1) & 0x03;
134 |     {
135 |         let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest);
136 |         let padding = vec![0; padding_size];
137 |         digested.write_all(padding.as_slice())?;
138 |     }
139 | 
140 |     let crc32 = digest.finalize();
141 |     count_output.write_u32::<LittleEndian>(crc32)?;
142 | 
143 |     Ok(count_output.count())
144 | }
145 | 
146 | fn write_multibyte<W>(output: &mut W, mut value: u64) -> io::Result<()>
147 | where
148 |     W: io::Write,
149 | {
150 |     loop {
151 |         let byte = (value & 0x7F) as u8;
152 |         value >>= 7;
153 |         if value == 0 {
154 |             output.write_u8(byte)?;
155 |             break;
156 |         } else {
157 |             output.write_u8(0x80 | byte)?;
158 |         }
159 |     }
160 | 
161 |     Ok(())
162 | }
163 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! Error handling.
 2 | 
 3 | use std::fmt::Display;
 4 | use std::{io, result};
 5 | 
 6 | /// Library errors.
 7 | #[derive(Debug)]
 8 | pub enum Error {
 9 |     /// I/O error.
10 |     IoError(io::Error),
11 |     /// Not enough bytes to complete header
12 |     HeaderTooShort(io::Error),
13 |     /// LZMA error.
14 |     LzmaError(String),
15 |     /// XZ error.
16 |     XzError(String),
17 | }
18 | 
19 | /// Library result alias.
20 | pub type Result<T> = result::Result<T, Error>;
21 | 
22 | impl From<io::Error> for Error {
23 |     fn from(e: io::Error) -> Error {
24 |         Error::IoError(e)
25 |     }
26 | }
27 | 
28 | impl Display for Error {
29 |     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 |         match self {
31 |             Error::IoError(e) => write!(fmt, "io error: {}", e),
32 |             Error::HeaderTooShort(e) => write!(fmt, "header too short: {}", e),
33 |             Error::LzmaError(e) => write!(fmt, "lzma error: {}", e),
34 |             Error::XzError(e) => write!(fmt, "xz error: {}", e),
35 |         }
36 |     }
37 | }
38 | 
39 | impl std::error::Error for Error {
40 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
41 |         match self {
42 |             Error::IoError(e) | Error::HeaderTooShort(e) => Some(e),
43 |             Error::LzmaError(_) | Error::XzError(_) => None,
44 |         }
45 |     }
46 | }
47 | 
48 | #[cfg(test)]
49 | mod test {
50 |     use super::Error;
51 | 
52 |     #[test]
53 |     fn test_display() {
54 |         assert_eq!(
55 |             Error::IoError(std::io::Error::new(
56 |                 std::io::ErrorKind::Other,
57 |                 "this is an error"
58 |             ))
59 |             .to_string(),
60 |             "io error: this is an error"
61 |         );
62 |         assert_eq!(
63 |             Error::LzmaError("this is an error".to_string()).to_string(),
64 |             "lzma error: this is an error"
65 |         );
66 |         assert_eq!(
67 |             Error::XzError("this is an error".to_string()).to_string(),
68 |             "xz error: this is an error"
69 |         );
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Pure-Rust codecs for LZMA, LZMA2, and XZ.
  2 | #![cfg_attr(docsrs, feature(doc_cfg, doc_cfg_hide))]
  3 | #![deny(missing_docs)]
  4 | #![deny(missing_debug_implementations)]
  5 | #![forbid(unsafe_code)]
  6 | 
  7 | #[macro_use]
  8 | mod macros;
  9 | 
 10 | mod decode;
 11 | mod encode;
 12 | 
 13 | pub mod error;
 14 | 
 15 | mod util;
 16 | mod xz;
 17 | 
 18 | use std::io;
 19 | 
 20 | /// Compression helpers.
 21 | pub mod compress {
 22 |     pub use crate::encode::options::*;
 23 | }
 24 | 
 25 | /// Decompression helpers.
 26 | pub mod decompress {
 27 |     pub use crate::decode::options::*;
 28 | 
 29 |     #[cfg(feature = "raw_decoder")]
 30 |     #[cfg_attr(docsrs, doc(cfg(raw_decoder)))]
 31 |     pub mod raw {
 32 |         //! Raw decoding primitives for LZMA/LZMA2 streams.
 33 |         pub use crate::decode::lzma::{LzmaDecoder, LzmaParams, LzmaProperties};
 34 |         pub use crate::decode::lzma2::Lzma2Decoder;
 35 |     }
 36 | 
 37 |     #[cfg(feature = "stream")]
 38 |     #[cfg_attr(docsrs, doc(cfg(stream)))]
 39 |     pub use crate::decode::stream::Stream;
 40 | }
 41 | 
 42 | /// Decompress LZMA data with default
 43 | /// [`Options`](decompress/struct.Options.html).
 44 | pub fn lzma_decompress<R: io::BufRead, W: io::Write>(
 45 |     input: &mut R,
 46 |     output: &mut W,
 47 | ) -> error::Result<()> {
 48 |     lzma_decompress_with_options(input, output, &decompress::Options::default())
 49 | }
 50 | 
 51 | /// Decompress LZMA data with the provided options.
 52 | pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>(
 53 |     input: &mut R,
 54 |     output: &mut W,
 55 |     options: &decompress::Options,
 56 | ) -> error::Result<()> {
 57 |     let params = decode::lzma::LzmaParams::read_header(input, options)?;
 58 |     let mut decoder = decode::lzma::LzmaDecoder::new(params, options.memlimit)?;
 59 |     decoder.decompress(input, output)
 60 | }
 61 | 
 62 | /// Compresses data with LZMA and default
 63 | /// [`Options`](compress/struct.Options.html).
 64 | pub fn lzma_compress<R: io::BufRead, W: io::Write>(
 65 |     input: &mut R,
 66 |     output: &mut W,
 67 | ) -> io::Result<()> {
 68 |     lzma_compress_with_options(input, output, &compress::Options::default())
 69 | }
 70 | 
 71 | /// Compress LZMA data with the provided options.
 72 | pub fn lzma_compress_with_options<R: io::BufRead, W: io::Write>(
 73 |     input: &mut R,
 74 |     output: &mut W,
 75 |     options: &compress::Options,
 76 | ) -> io::Result<()> {
 77 |     let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?;
 78 |     encoder.process(input)
 79 | }
 80 | 
 81 | /// Decompress LZMA2 data with default
 82 | /// [`Options`](decompress/struct.Options.html).
 83 | pub fn lzma2_decompress<R: io::BufRead, W: io::Write>(
 84 |     input: &mut R,
 85 |     output: &mut W,
 86 | ) -> error::Result<()> {
 87 |     decode::lzma2::Lzma2Decoder::new().decompress(input, output)
 88 | }
 89 | 
 90 | /// Compress data with LZMA2 and default
 91 | /// [`Options`](compress/struct.Options.html).
 92 | pub fn lzma2_compress<R: io::BufRead, W: io::Write>(
 93 |     input: &mut R,
 94 |     output: &mut W,
 95 | ) -> io::Result<()> {
 96 |     encode::lzma2::encode_stream(input, output)
 97 | }
 98 | 
 99 | /// Decompress XZ data with default [`Options`](decompress/struct.Options.html).
100 | pub fn xz_decompress<R: io::BufRead, W: io::Write>(
101 |     input: &mut R,
102 |     output: &mut W,
103 | ) -> error::Result<()> {
104 |     decode::xz::decode_stream(input, output)
105 | }
106 | 
107 | /// Compress data with XZ and default [`Options`](compress/struct.Options.html).
108 | pub fn xz_compress<R: io::BufRead, W: io::Write>(input: &mut R, output: &mut W) -> io::Result<()> {
109 |     encode::xz::encode_stream(input, output)
110 | }
111 | 


--------------------------------------------------------------------------------
/src/macros.rs:
--------------------------------------------------------------------------------
 1 | /// Log trace message (feature: enabled).
 2 | #[cfg(feature = "enable_logging")]
 3 | macro_rules! lzma_trace {
 4 |     ($($arg:tt)+) => {
 5 |         log::trace!($($arg)+);
 6 |     }
 7 | }
 8 | 
 9 | /// Log debug message (feature: enabled).
10 | #[cfg(feature = "enable_logging")]
11 | macro_rules! lzma_debug {
12 |     ($($arg:tt)+) => {
13 |         log::debug!($($arg)+);
14 |     }
15 | }
16 | 
17 | /// Log info message (feature: enabled).
18 | #[cfg(feature = "enable_logging")]
19 | macro_rules! lzma_info {
20 |     ($($arg:tt)+) => {
21 |         log::info!($($arg)+);
22 |     }
23 | }
24 | 
25 | /// Log trace message (feature: disabled).
26 | #[cfg(not(feature = "enable_logging"))]
27 | macro_rules! lzma_trace {
28 |     ($($arg:tt)+) => {};
29 | }
30 | 
31 | /// Log debug message (feature: disabled).
32 | #[cfg(not(feature = "enable_logging"))]
33 | macro_rules! lzma_debug {
34 |     ($($arg:tt)+) => {};
35 | }
36 | 
37 | /// Log info message (feature: disabled).
38 | #[cfg(not(feature = "enable_logging"))]
39 | macro_rules! lzma_info {
40 |     ($($arg:tt)+) => {};
41 | }
42 | 


--------------------------------------------------------------------------------
/src/util/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod vec2d;
 2 | 
 3 | /// macro for compile-time const assertions
 4 | macro_rules! const_assert {
 5 |     ($message:expr, $($list:ident : $ty:ty),* => $expr:expr) => {{
 6 |         struct Assert<$(const $list: $ty,)*>;
 7 |         impl<$(const $list: $ty,)*> Assert<$($list,)*> {
 8 |             const OK: () = {
 9 |                 if !($expr) {
10 |                     ::std::panic!(::std::concat!("assertion failed: ", $message));
11 |                 }
12 |             };
13 |         }
14 |         Assert::<$($list,)*>::OK
15 |     }};
16 | }
17 | 
18 | pub(crate) use const_assert;
19 | 


--------------------------------------------------------------------------------
/src/util/vec2d.rs:
--------------------------------------------------------------------------------
  1 | use std::ops::{Index, IndexMut};
  2 | 
  3 | /// A 2 dimensional matrix in row-major order backed by a contiguous slice.
  4 | #[derive(Debug)]
  5 | pub struct Vec2D<T> {
  6 |     data: Box<[T]>,
  7 |     cols: usize,
  8 | }
  9 | 
 10 | impl<T> Vec2D<T> {
 11 |     /// Initialize a grid of size (`rows`, `cols`) with the given data element.
 12 |     pub fn init(data: T, size: (usize, usize)) -> Vec2D<T>
 13 |     where
 14 |         T: Clone,
 15 |     {
 16 |         let (rows, cols) = size;
 17 |         let len = rows
 18 |             .checked_mul(cols)
 19 |             .unwrap_or_else(|| panic!("{} rows by {} cols exceeds usize::MAX", rows, cols));
 20 |         Vec2D {
 21 |             data: vec![data; len].into_boxed_slice(),
 22 |             cols,
 23 |         }
 24 |     }
 25 | 
 26 |     /// Fills the grid with elements by cloning `value`.
 27 |     pub fn fill(&mut self, value: T)
 28 |     where
 29 |         T: Clone,
 30 |     {
 31 |         self.data.fill(value)
 32 |     }
 33 | }
 34 | 
 35 | impl<T> Index<usize> for Vec2D<T> {
 36 |     type Output = [T];
 37 | 
 38 |     #[inline]
 39 |     fn index(&self, row: usize) -> &Self::Output {
 40 |         let start_row = row
 41 |             .checked_mul(self.cols)
 42 |             .unwrap_or_else(|| panic!("{} row by {} cols exceeds usize::MAX", row, self.cols));
 43 |         &self.data[start_row..start_row + self.cols]
 44 |     }
 45 | }
 46 | 
 47 | impl<T> IndexMut<usize> for Vec2D<T> {
 48 |     #[inline]
 49 |     fn index_mut(&mut self, row: usize) -> &mut Self::Output {
 50 |         let start_row = row
 51 |             .checked_mul(self.cols)
 52 |             .unwrap_or_else(|| panic!("{} row by {} cols exceeds usize::MAX", row, self.cols));
 53 |         &mut self.data[start_row..start_row + self.cols]
 54 |     }
 55 | }
 56 | 
 57 | #[cfg(test)]
 58 | mod test {
 59 |     use super::*;
 60 | 
 61 |     #[test]
 62 |     fn init() {
 63 |         let vec2d = Vec2D::init(1, (2, 3));
 64 |         assert_eq!(vec2d[0], [1, 1, 1]);
 65 |         assert_eq!(vec2d[1], [1, 1, 1]);
 66 |     }
 67 | 
 68 |     #[test]
 69 |     #[should_panic]
 70 |     fn init_overflow() {
 71 |         Vec2D::init(1, (usize::MAX, usize::MAX));
 72 |     }
 73 | 
 74 |     #[test]
 75 |     fn fill() {
 76 |         let mut vec2d = Vec2D::init(0, (2, 3));
 77 |         vec2d.fill(7);
 78 |         assert_eq!(vec2d[0], [7, 7, 7]);
 79 |         assert_eq!(vec2d[1], [7, 7, 7]);
 80 |     }
 81 | 
 82 |     #[test]
 83 |     fn index() {
 84 |         let vec2d = Vec2D {
 85 |             data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(),
 86 |             cols: 2,
 87 |         };
 88 |         assert_eq!(vec2d[0], [0, 1]);
 89 |         assert_eq!(vec2d[1], [2, 3]);
 90 |         assert_eq!(vec2d[2], [4, 5]);
 91 |         assert_eq!(vec2d[3], [6, 7]);
 92 |     }
 93 | 
 94 |     #[test]
 95 |     fn indexmut() {
 96 |         let mut vec2d = Vec2D {
 97 |             data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(),
 98 |             cols: 2,
 99 |         };
100 | 
101 |         vec2d[1][1] = 9;
102 |         assert_eq!(vec2d[0], [0, 1]);
103 |         // (1, 1) should be 9.
104 |         assert_eq!(vec2d[1], [2, 9]);
105 |         assert_eq!(vec2d[2], [4, 5]);
106 |         assert_eq!(vec2d[3], [6, 7]);
107 |     }
108 | 
109 |     #[test]
110 |     #[should_panic]
111 |     fn index_out_of_bounds() {
112 |         let vec2d = Vec2D::init(1, (2, 3));
113 |         let _x = vec2d[2][3];
114 |     }
115 | 
116 |     #[test]
117 |     #[should_panic]
118 |     fn index_out_of_bounds_vec_edge() {
119 |         let vec2d = Vec2D::init(1, (2, 3));
120 |         let _x = vec2d[1][3];
121 |     }
122 | 
123 |     #[test]
124 |     #[should_panic]
125 |     fn index_column_out_of_bounds() {
126 |         let vec2d = Vec2D::init(1, (2, 3));
127 |         let _x = vec2d[0][3];
128 |     }
129 | 
130 |     #[test]
131 |     #[should_panic]
132 |     fn index_row_out_of_bounds() {
133 |         let vec2d = Vec2D::init(1, (2, 3));
134 |         let _x = vec2d[2][0];
135 |     }
136 | 
137 |     #[test]
138 |     #[should_panic]
139 |     fn index_mul_overflow() {
140 |         // Matrix with 4 columns.
141 |         let matrix = Vec2D::init(0, (3, 4));
142 |         // 2^{usize.numbits() - 2}.
143 |         let row = (usize::MAX / 4) + 1;
144 |         // Returns the same as matrix[0] if overflow is not caught.
145 |         let _ = matrix[row];
146 |     }
147 | 
148 |     #[test]
149 |     #[should_panic]
150 |     fn index_add_overflow() {
151 |         // Matrix with 5 columns.
152 |         let matrix = Vec2D::init(0, (3, 5));
153 |         // Somehow, as long as numbits(usize) is a multiple of 4, then 5 divides
154 |         // usize::MAX. This is clear in hexadecimal: usize::MAX is 0xFFF...F and
155 |         // usize::MAX / 5 is 0x333...3.
156 |         let row = usize::MAX / 5;
157 |         // This will therefore try to index data[usize::MAX..4].
158 |         let _ = matrix[row];
159 |     }
160 | 
161 |     #[test]
162 |     #[should_panic]
163 |     fn indexmut_out_of_bounds() {
164 |         let mut vec2d = Vec2D::init(1, (2, 3));
165 |         vec2d[2][3] = 0;
166 |     }
167 | 
168 |     #[test]
169 |     #[should_panic]
170 |     fn indexmut_out_of_bounds_vec_edge() {
171 |         let mut vec2d = Vec2D::init(1, (2, 3));
172 |         vec2d[1][3] = 0;
173 |     }
174 | 
175 |     #[test]
176 |     #[should_panic]
177 |     fn indexmut_column_out_of_bounds() {
178 |         let mut vec2d = Vec2D::init(1, (2, 3));
179 |         vec2d[0][3] = 0;
180 |     }
181 | 
182 |     #[test]
183 |     #[should_panic]
184 |     fn indexmut_row_out_of_bounds() {
185 |         let mut vec2d = Vec2D::init(1, (2, 3));
186 |         vec2d[2][0] = 0;
187 |     }
188 | 
189 |     #[test]
190 |     #[should_panic]
191 |     fn indexmut_mul_overflow() {
192 |         // Matrix with 4 columns.
193 |         let mut matrix = Vec2D::init(0, (3, 4));
194 |         // 2^{usize.numbits() - 2}.
195 |         let row = (usize::MAX / 4) + 1;
196 |         // Returns the same as matrix[0] if overflow is not caught.
197 |         matrix[row][0] = 9;
198 |     }
199 | 
200 |     #[test]
201 |     #[should_panic]
202 |     fn indexmut_add_overflow() {
203 |         // Matrix with 5 columns.
204 |         let mut matrix = Vec2D::init(0, (3, 5));
205 |         // Somehow, as long as numbits(usize) is a multiple of 4, then 5 divides
206 |         // usize::MAX. This is clear in hexadecimal: usize::MAX is 0xFFF...F and
207 |         // usize::MAX / 5 is 0x333...3.
208 |         let row = usize::MAX / 5;
209 |         // This will therefore try to index data[usize::MAX..4].
210 |         matrix[row][0] = 9;
211 |     }
212 | }
213 | 


--------------------------------------------------------------------------------
/src/xz/crc.rs:
--------------------------------------------------------------------------------
1 | use crc::{Crc, CRC_32_ISO_HDLC, CRC_64_XZ};
2 | 
3 | pub const CRC32: Crc<u32> = Crc::<u32>::new(&CRC_32_ISO_HDLC);
4 | pub const CRC64: Crc<u64> = Crc::<u64>::new(&CRC_64_XZ);
5 | 


--------------------------------------------------------------------------------
/src/xz/footer.rs:
--------------------------------------------------------------------------------
1 | //! XZ footer.
2 | 
3 | /// File format trailing terminator, see sect. 2.1.2.4.
4 | pub(crate) const XZ_MAGIC_FOOTER: &[u8] = &[0x59, 0x5A];
5 | 


--------------------------------------------------------------------------------
/src/xz/header.rs:
--------------------------------------------------------------------------------
 1 | //! XZ header.
 2 | 
 3 | use crate::decode::util;
 4 | use crate::error;
 5 | use crate::xz::crc::CRC32;
 6 | use crate::xz::StreamFlags;
 7 | use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
 8 | 
 9 | /// File format magic header signature, see sect. 2.1.1.1.
10 | pub(crate) const XZ_MAGIC: &[u8] = &[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00];
11 | 
12 | /// Stream Header, see sect. 2.1.1.
13 | #[derive(Clone, Copy, Debug)]
14 | pub(crate) struct StreamHeader {
15 |     pub(crate) stream_flags: StreamFlags,
16 | }
17 | 
18 | impl StreamHeader {
19 |     /// Parse a Stream Header from a buffered reader.
20 |     pub(crate) fn parse<BR>(input: &mut BR) -> error::Result<Self>
21 |     where
22 |         BR: std::io::BufRead,
23 |     {
24 |         if !util::read_tag(input, XZ_MAGIC)? {
25 |             return Err(error::Error::XzError(format!(
26 |                 "Invalid XZ magic, expected {:?}",
27 |                 XZ_MAGIC
28 |             )));
29 |         }
30 | 
31 |         let (flags, digested) = {
32 |             let mut digest = CRC32.digest();
33 |             let mut digest_rd = util::CrcDigestRead::new(input, &mut digest);
34 |             let flags = digest_rd.read_u16::<BigEndian>()?;
35 |             (flags, digest.finalize())
36 |         };
37 | 
38 |         let crc32 = input.read_u32::<LittleEndian>()?;
39 |         if crc32 != digested {
40 |             return Err(error::Error::XzError(format!(
41 |                 "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}",
42 |                 crc32, digested
43 |             )));
44 |         }
45 | 
46 |         let stream_flags = StreamFlags::parse(flags)?;
47 |         let header = Self { stream_flags };
48 | 
49 |         lzma_info!("XZ check method: {:?}", header.stream_flags.check_method);
50 |         Ok(header)
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/xz/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Logic for handling `.xz` file format.
  2 | //!
  3 | //! Format specifications are at [https://tukaani.org/xz/xz-file-format.txt][spec].
  4 | //!
  5 | //! [spec]: https://tukaani.org/xz/xz-file-format.txt
  6 | 
  7 | use crate::error;
  8 | use std::io;
  9 | 
 10 | pub(crate) mod crc;
 11 | pub(crate) mod footer;
 12 | pub(crate) mod header;
 13 | 
 14 | /// Stream flags, see sect. 2.1.1.2.
 15 | ///
 16 | /// This does not store the leading null byte, which is currently unused.
 17 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 18 | pub(crate) struct StreamFlags {
 19 |     pub(crate) check_method: CheckMethod,
 20 | }
 21 | 
 22 | impl StreamFlags {
 23 |     /// Parse Stream Flags from a 16bits value.
 24 |     pub(crate) fn parse(input: u16) -> error::Result<Self> {
 25 |         let flags_bytes = input.to_be_bytes();
 26 | 
 27 |         if flags_bytes[0] != 0x00 {
 28 |             return Err(error::Error::XzError(format!(
 29 |                 "Invalid null byte in Stream Flags: {:x}",
 30 |                 flags_bytes[0]
 31 |             )));
 32 |         }
 33 | 
 34 |         let flags = Self {
 35 |             check_method: CheckMethod::try_from(flags_bytes[1])?,
 36 |         };
 37 |         Ok(flags)
 38 |     }
 39 | 
 40 |     /// Serialize Stream Flags into a writer.
 41 |     pub(crate) fn serialize<W>(self, writer: &mut W) -> io::Result<usize>
 42 |     where
 43 |         W: io::Write,
 44 |     {
 45 |         // First byte is currently unused and hard-coded to null.
 46 |         writer
 47 |             .write(&[0x00, self.check_method as u8])
 48 |             .map_err(Into::into)
 49 |     }
 50 | }
 51 | 
 52 | /// Stream check type, see sect. 2.1.1.2.
 53 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 54 | #[repr(u8)]
 55 | pub enum CheckMethod {
 56 |     None = 0x00,
 57 |     Crc32 = 0x01,
 58 |     Crc64 = 0x04,
 59 |     Sha256 = 0x0A,
 60 | }
 61 | 
 62 | impl CheckMethod {
 63 |     /// Parse Check ID (second byte in Stream Flags).
 64 |     pub fn try_from(id: u8) -> error::Result<CheckMethod> {
 65 |         match id {
 66 |             0x00 => Ok(CheckMethod::None),
 67 |             0x01 => Ok(CheckMethod::Crc32),
 68 |             0x04 => Ok(CheckMethod::Crc64),
 69 |             0x0A => Ok(CheckMethod::Sha256),
 70 |             _ => Err(error::Error::XzError(format!(
 71 |                 "Invalid check method {:x}, expected one of [0x00, 0x01, 0x04, 0x0A]",
 72 |                 id
 73 |             ))),
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | impl From<CheckMethod> for u8 {
 79 |     fn from(method: CheckMethod) -> u8 {
 80 |         method as u8
 81 |     }
 82 | }
 83 | 
 84 | #[cfg(test)]
 85 | mod test {
 86 |     use super::*;
 87 |     use byteorder::{BigEndian, ReadBytesExt};
 88 |     use std::io::{Seek, SeekFrom};
 89 | 
 90 |     #[test]
 91 |     fn test_checkmethod_roundtrip() {
 92 |         let mut count_valid = 0;
 93 |         for input in 0..std::u8::MAX {
 94 |             if let Ok(check) = CheckMethod::try_from(input) {
 95 |                 let output: u8 = check.into();
 96 |                 assert_eq!(input, output);
 97 |                 count_valid += 1;
 98 |             }
 99 |         }
100 |         assert_eq!(count_valid, 4);
101 |     }
102 | 
103 |     #[test]
104 |     fn test_streamflags_roundtrip() {
105 |         let input = StreamFlags {
106 |             check_method: CheckMethod::Crc32,
107 |         };
108 | 
109 |         let mut cursor = std::io::Cursor::new(vec![0u8; 2]);
110 |         let len = input.serialize(&mut cursor).unwrap();
111 |         assert_eq!(len, 2);
112 | 
113 |         cursor.seek(SeekFrom::Start(0)).unwrap();
114 |         let field = cursor.read_u16::<BigEndian>().unwrap();
115 |         let output = StreamFlags::parse(field).unwrap();
116 |         assert_eq!(input, output);
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/tests/files/README.md:
--------------------------------------------------------------------------------
 1 | # Test files
 2 | 
 3 | This folder contains a collection of test files to cover different use cases of the lzma-rs library.
 4 | 
 5 | This README describes files that are not self-explanatory in this folder.
 6 | 
 7 | ## range-coder-edge-case
 8 | 
 9 | This is a file that causes the code and range to be equal at some point during decoding LZMA data.
10 | Previously, this file would raise an `LZMAError("Corrupted range coding")`, although the file is a valid LZMA file.
11 | 
12 | The file was created by generating random geometry in [Blender](1) using the Array and Build
13 | modifier on a cube.
14 | The geometry was then exported as an FBX file that was converted into OpenCTM using the 3D service
15 | in [Cognite Data Fusion](2).
16 | The vertices in the resulting OpenCTM file are LZMA-compressed.
17 | This LZMA-compressed section of the file was manually extracted and the header modified to include
18 | the unpacked size.
19 | The unpacked size is four times the vertex count found in the OpenCTM data.
20 | 
21 | [1]: https://blender.org
22 | [2]: https://docs.cognite.com
23 | 


--------------------------------------------------------------------------------
/tests/files/block-check-crc32.txt:
--------------------------------------------------------------------------------
1 | abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde


--------------------------------------------------------------------------------
/tests/files/block-check-crc32.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/block-check-crc32.txt.xz


--------------------------------------------------------------------------------
/tests/files/empty.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/empty.txt


--------------------------------------------------------------------------------
/tests/files/empty.txt.lzma:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/empty.txt.lzma


--------------------------------------------------------------------------------
/tests/files/empty.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/empty.txt.xz


--------------------------------------------------------------------------------
/tests/files/foo.txt.lzma:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/foo.txt.lzma


--------------------------------------------------------------------------------
/tests/files/foo.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/foo.txt.xz


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-1:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 
 2 | elit, sed do eiusmod tempor incididunt ut 
 3 | labore et dolore magna aliqua. Ut enim 
 4 | ad minim veniam, quis nostrud exercitation ullamco 
 5 | laboris nisi ut aliquip ex ea commodo 
 6 | consequat. Duis aute irure dolor in reprehenderit 
 7 | in voluptate velit esse cillum dolore eu 
 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 
 9 | non proident, sunt in culpa qui officia 
10 | deserunt mollit anim id est laborum. 
11 | 


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-1.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-1.xz


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-2:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 
 2 | elit, sed do eiusmod tempor incididunt ut 
 3 | labore et dolore magna aliqua. Ut enim 
 4 | ad minim veniam, quis nostrud exercitation ullamco 
 5 | laboris nisi ut aliquip ex ea commodo 
 6 | consequat. Duis aute irure dolor in reprehenderit 
 7 | in voluptate velit esse cillum dolore eu 
 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 
 9 | non proident, sunt in culpa qui officia 
10 | deserunt mollit anim id est laborum. 
11 | 


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-2.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-2.xz


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-3:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 
 2 | elit, sed do eiusmod tempor incididunt ut 
 3 | labore et dolore magna aliqua. Ut enim 
 4 | ad minim veniam, quis nostrud exercitation ullamco 
 5 | laboris nisi ut aliquip ex ea commodo 
 6 | consequat. Duis aute irure dolor in reprehenderit 
 7 | in voluptate velit esse cillum dolore eu 
 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 
 9 | non proident, sunt in culpa qui officia 
10 | deserunt mollit anim id est laborum. 
11 | 


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-3.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-3.xz


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-4:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipisicing 
 2 | elit, sed do eiusmod tempor incididunt ut 
 3 | labore et dolore magna aliqua. Ut enim 
 4 | ad minim veniam, quis nostrud exercitation ullamco 
 5 | laboris nisi ut aliquip ex ea commodo 
 6 | consequat. Duis aute irure dolor in reprehenderit 
 7 | in voluptate velit esse cillum dolore eu 
 8 | fugiat nulla pariatur. Excepteur sint occaecat cupidatat 
 9 | non proident, sunt in culpa qui officia 
10 | deserunt mollit anim id est laborum. 
11 | 


--------------------------------------------------------------------------------
/tests/files/good-1-lzma2-4.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/good-1-lzma2-4.xz


--------------------------------------------------------------------------------
/tests/files/hello.txt:
--------------------------------------------------------------------------------
1 | Hello world
2 | 


--------------------------------------------------------------------------------
/tests/files/hello.txt.lzma:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/hello.txt.lzma


--------------------------------------------------------------------------------
/tests/files/hello.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/hello.txt.xz


--------------------------------------------------------------------------------
/tests/files/hugedict.txt.lzma:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/hugedict.txt.lzma


--------------------------------------------------------------------------------
/tests/files/range-coder-edge-case:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/range-coder-edge-case


--------------------------------------------------------------------------------
/tests/files/range-coder-edge-case.lzma:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gendx/lzma-rs/1f14478def43b8ea0d27a6b3db88bb307aafd18a/tests/files/range-coder-edge-case.lzma


--------------------------------------------------------------------------------
/tests/files/small.txt:
--------------------------------------------------------------------------------
1 | Project Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll
2 | 


--------------------------------------------------------------------------------
/tests/lzma.rs:
--------------------------------------------------------------------------------
  1 | extern crate lzma;
  2 | 
  3 | #[cfg(feature = "enable_logging")]
  4 | use log::{debug, info};
  5 | use std::io::Read;
  6 | #[cfg(feature = "stream")]
  7 | use std::io::Write;
  8 | 
  9 | /// Utility function to read a file into memory
 10 | fn read_all_file(filename: &str) -> std::io::Result<Vec<u8>> {
 11 |     let mut data = Vec::new();
 12 |     std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?;
 13 |     Ok(data)
 14 | }
 15 | 
 16 | fn round_trip(x: &[u8]) {
 17 |     round_trip_no_options(x);
 18 | 
 19 |     // Do another round trip, but this time also write it to the header
 20 |     let encode_options = lzma_rs::compress::Options {
 21 |         unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(Some(x.len() as u64)),
 22 |     };
 23 |     let decode_options = lzma_rs::decompress::Options {
 24 |         unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader,
 25 |         ..Default::default()
 26 |     };
 27 |     assert_round_trip_with_options(x, &encode_options, &decode_options);
 28 | }
 29 | 
 30 | fn round_trip_no_options(x: &[u8]) {
 31 |     let mut compressed: Vec<u8> = Vec::new();
 32 |     lzma_rs::lzma_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap();
 33 |     #[cfg(feature = "enable_logging")]
 34 |     info!("Compressed {} -> {} bytes", x.len(), compressed.len());
 35 |     #[cfg(feature = "enable_logging")]
 36 |     debug!("Compressed content: {:?}", compressed);
 37 | 
 38 |     assert_decomp_eq(&compressed, x, /* compare_to_liblzma */ true);
 39 | }
 40 | 
 41 | fn assert_round_trip_with_options(
 42 |     x: &[u8],
 43 |     encode_options: &lzma_rs::compress::Options,
 44 |     decode_options: &lzma_rs::decompress::Options,
 45 | ) {
 46 |     let mut compressed: Vec<u8> = Vec::new();
 47 |     lzma_rs::lzma_compress_with_options(
 48 |         &mut std::io::BufReader::new(x),
 49 |         &mut compressed,
 50 |         encode_options,
 51 |     )
 52 |     .unwrap();
 53 |     #[cfg(feature = "enable_logging")]
 54 |     info!("Compressed {} -> {} bytes", x.len(), compressed.len());
 55 |     #[cfg(feature = "enable_logging")]
 56 |     debug!("Compressed content: {:?}", compressed);
 57 | 
 58 |     // test non-streaming decompression
 59 |     {
 60 |         let mut bf = std::io::BufReader::new(compressed.as_slice());
 61 |         let mut decomp: Vec<u8> = Vec::new();
 62 |         lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options).unwrap();
 63 |         assert_eq!(decomp, x);
 64 |     }
 65 | 
 66 |     #[cfg(feature = "stream")]
 67 |     // test streaming decompression
 68 |     {
 69 |         let mut stream = lzma_rs::decompress::Stream::new_with_options(decode_options, Vec::new());
 70 | 
 71 |         if let Err(error) = stream.write_all(&compressed) {
 72 |             // A WriteZero error may occur if decompression is finished but there
 73 |             // are remaining `compressed` bytes to write.
 74 |             // This is the case when the unpacked size is encoded as unknown but
 75 |             // provided when decoding. I.e. the 5 or 6 byte end-of-stream marker
 76 |             // is not read.
 77 |             if error.kind() == std::io::ErrorKind::WriteZero {
 78 |                 match (encode_options.unpacked_size, decode_options.unpacked_size) {
 79 |                     (
 80 |                         lzma_rs::compress::UnpackedSize::WriteToHeader(None),
 81 |                         lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(_)),
 82 |                     ) => {}
 83 |                     _ => panic!("{}", error),
 84 |                 }
 85 |             } else {
 86 |                 panic!("{}", error);
 87 |             }
 88 |         }
 89 | 
 90 |         let decomp = stream.finish().unwrap();
 91 |         assert_eq!(decomp, x);
 92 |     }
 93 | }
 94 | 
 95 | fn round_trip_file(filename: &str) {
 96 |     let x = read_all_file(filename).unwrap();
 97 |     round_trip(x.as_slice());
 98 | }
 99 | 
100 | fn assert_decomp_eq(compressed: &[u8], expected: &[u8], compare_to_liblzma: bool) {
101 |     // Test regular decompression.
102 |     {
103 |         let mut input = std::io::BufReader::new(compressed);
104 |         let mut decomp: Vec<u8> = Vec::new();
105 |         lzma_rs::lzma_decompress(&mut input, &mut decomp).unwrap();
106 |         assert_eq!(decomp, expected);
107 |     }
108 | 
109 |     // Test consistency with lzma crate. Sometimes that crate fails (e.g. huge
110 |     // dictionary), so we have a flag to skip that.
111 |     if compare_to_liblzma {
112 |         let decomp = lzma::decompress(compressed).unwrap();
113 |         assert_eq!(decomp, expected);
114 |     }
115 | 
116 |     #[cfg(feature = "stream")]
117 |     {
118 |         let mut stream = lzma_rs::decompress::Stream::new(Vec::new());
119 |         stream.write_all(compressed).unwrap();
120 |         let decomp = stream.finish().unwrap();
121 |         assert_eq!(decomp, expected);
122 | 
123 |         const CHUNK_SIZES: &[usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512, 1024];
124 |         for &chunk_size in CHUNK_SIZES {
125 |             let mut stream = lzma_rs::decompress::Stream::new(Vec::new());
126 |             for chunk in compressed.chunks(chunk_size) {
127 |                 stream.write_all(chunk).unwrap();
128 |             }
129 |             let decomp = stream.finish().unwrap();
130 |             assert_eq!(decomp, expected);
131 |         }
132 |     }
133 | }
134 | 
135 | #[test]
136 | #[should_panic(expected = "HeaderTooShort")]
137 | fn decompress_short_header() {
138 |     #[cfg(feature = "enable_logging")]
139 |     let _ = env_logger::try_init();
140 |     let mut decomp: Vec<u8> = Vec::new();
141 |     // TODO: compare io::Errors?
142 |     lzma_rs::lzma_decompress(&mut (b"" as &[u8]), &mut decomp).unwrap();
143 | }
144 | 
145 | #[test]
146 | fn round_trip_basics() {
147 |     #[cfg(feature = "enable_logging")]
148 |     let _ = env_logger::try_init();
149 |     round_trip(b"");
150 |     // Note: we use vec! to avoid storing the slice in the binary
151 |     round_trip(vec![0x00; 1_000_000].as_slice());
152 |     round_trip(vec![0xFF; 1_000_000].as_slice());
153 | }
154 | 
155 | #[test]
156 | fn round_trip_hello() {
157 |     #[cfg(feature = "enable_logging")]
158 |     let _ = env_logger::try_init();
159 |     round_trip(b"Hello world");
160 | }
161 | 
162 | #[test]
163 | fn round_trip_files() {
164 |     #[cfg(feature = "enable_logging")]
165 |     let _ = env_logger::try_init();
166 |     round_trip_file("tests/files/foo.txt");
167 |     round_trip_file("tests/files/range-coder-edge-case");
168 | }
169 | 
170 | #[test]
171 | fn decompress_big_file() {
172 |     #[cfg(feature = "enable_logging")]
173 |     let _ = env_logger::try_init();
174 |     let compressed = read_all_file("tests/files/foo.txt.lzma").unwrap();
175 |     let expected = read_all_file("tests/files/foo.txt").unwrap();
176 |     assert_decomp_eq(&compressed, &expected, /* compare_to_liblzma */ true);
177 | }
178 | 
179 | #[test]
180 | fn decompress_big_file_with_huge_dict() {
181 |     #[cfg(feature = "enable_logging")]
182 |     let _ = env_logger::try_init();
183 |     let compressed = read_all_file("tests/files/hugedict.txt.lzma").unwrap();
184 |     let expected = read_all_file("tests/files/foo.txt").unwrap();
185 |     assert_decomp_eq(&compressed, &expected, /* compare_to_liblzma */ false);
186 | }
187 | 
188 | #[test]
189 | fn decompress_range_coder_edge_case() {
190 |     #[cfg(feature = "enable_logging")]
191 |     let _ = env_logger::try_init();
192 |     let compressed = read_all_file("tests/files/range-coder-edge-case.lzma").unwrap();
193 |     let expected = read_all_file("tests/files/range-coder-edge-case").unwrap();
194 |     assert_decomp_eq(&compressed, &expected, /* compare_to_liblzma */ true);
195 | }
196 | 
197 | #[test]
198 | fn decompress_empty_world() {
199 |     #[cfg(feature = "enable_logging")]
200 |     let _ = env_logger::try_init();
201 |     assert_decomp_eq(
202 |         b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\
203 |           \xfb\xff\xff\xc0\x00\x00\x00",
204 |         b"",
205 |         /* compare_to_liblzma */ true,
206 |     );
207 | }
208 | 
209 | #[test]
210 | fn decompress_hello_world() {
211 |     #[cfg(feature = "enable_logging")]
212 |     let _ = env_logger::try_init();
213 |     assert_decomp_eq(
214 |         b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x24\x19\
215 |           \x49\x98\x6f\x10\x19\xc6\xd7\x31\xeb\x36\x50\xb2\x98\x48\xff\xfe\
216 |           \xa5\xb0\x00",
217 |         b"Hello world\x0a",
218 |         /* compare_to_liblzma */ true,
219 |     );
220 | }
221 | 
222 | #[test]
223 | fn decompress_huge_dict() {
224 |     // Hello world with a dictionary of size 0x7F7F7F7F
225 |     #[cfg(feature = "enable_logging")]
226 |     let _ = env_logger::try_init();
227 |     assert_decomp_eq(
228 |         b"\x5d\x7f\x7f\x7f\x7f\xff\xff\xff\xff\xff\xff\xff\xff\x00\x24\x19\
229 |           \x49\x98\x6f\x10\x19\xc6\xd7\x31\xeb\x36\x50\xb2\x98\x48\xff\xfe\
230 |           \xa5\xb0\x00",
231 |         b"Hello world\x0a",
232 |         /* compare_to_liblzma */ false,
233 |     );
234 | }
235 | 
236 | #[test]
237 | fn unpacked_size_write_to_header() {
238 |     let data = b"Some data";
239 |     let encode_options = lzma_rs::compress::Options {
240 |         unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(Some(data.len() as u64)),
241 |     };
242 |     let decode_options = lzma_rs::decompress::Options {
243 |         unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader,
244 |         ..Default::default()
245 |     };
246 |     assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
247 | }
248 | 
249 | #[test]
250 | fn unpacked_size_provided_outside() {
251 |     let data = b"Some data";
252 |     let encode_options = lzma_rs::compress::Options {
253 |         unpacked_size: lzma_rs::compress::UnpackedSize::SkipWritingToHeader,
254 |     };
255 |     let decode_options = lzma_rs::decompress::Options {
256 |         unpacked_size: lzma_rs::decompress::UnpackedSize::UseProvided(Some(data.len() as u64)),
257 |         ..Default::default()
258 |     };
259 |     assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
260 | }
261 | 
262 | #[test]
263 | fn unpacked_size_write_some_to_header_but_use_provided_on_read() {
264 |     let data = b"Some data";
265 |     let encode_options = lzma_rs::compress::Options {
266 |         unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(Some(data.len() as u64)),
267 |     };
268 |     let decode_options = lzma_rs::decompress::Options {
269 |         unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
270 |             data.len() as u64,
271 |         )),
272 |         ..Default::default()
273 |     };
274 |     assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
275 | }
276 | 
277 | #[test]
278 | fn unpacked_size_write_none_to_header_and_use_provided_on_read() {
279 |     let data = b"Some data";
280 |     let encode_options = lzma_rs::compress::Options {
281 |         unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
282 |     };
283 |     let decode_options = lzma_rs::decompress::Options {
284 |         unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
285 |             data.len() as u64,
286 |         )),
287 |         ..Default::default()
288 |     };
289 |     assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
290 | }
291 | 
292 | #[test]
293 | fn unpacked_size_write_none_to_header_and_use_provided_none_on_read() {
294 |     let data = b"Some data";
295 |     let encode_options = lzma_rs::compress::Options {
296 |         unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
297 |     };
298 |     let decode_options = lzma_rs::decompress::Options {
299 |         unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None),
300 |         ..Default::default()
301 |     };
302 |     assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
303 | }
304 | 
305 | #[test]
306 | fn memlimit() {
307 |     let data = b"Some data";
308 |     let encode_options = lzma_rs::compress::Options {
309 |         unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
310 |     };
311 |     let decode_options = lzma_rs::decompress::Options {
312 |         unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None),
313 |         memlimit: Some(0),
314 |         ..Default::default()
315 |     };
316 | 
317 |     let mut compressed: Vec<u8> = Vec::new();
318 |     lzma_rs::lzma_compress_with_options(
319 |         &mut std::io::BufReader::new(&data[..]),
320 |         &mut compressed,
321 |         &encode_options,
322 |     )
323 |     .unwrap();
324 | 
325 |     // test non-streaming decompression
326 |     {
327 |         let mut bf = std::io::BufReader::new(compressed.as_slice());
328 |         let mut decomp: Vec<u8> = Vec::new();
329 |         let error = lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, &decode_options)
330 |             .unwrap_err();
331 |         assert!(
332 |             error.to_string().contains("exceeded memory limit of 0"),
333 |             "{}",
334 |             error.to_string()
335 |         );
336 |     }
337 | 
338 |     #[cfg(feature = "stream")]
339 |     // test streaming decompression
340 |     {
341 |         let mut stream = lzma_rs::decompress::Stream::new_with_options(&decode_options, Vec::new());
342 | 
343 |         let error = stream.write_all(&compressed).unwrap_err();
344 |         assert!(
345 |             error.to_string().contains("exceeded memory limit of 0"),
346 |             "{}",
347 |             error.to_string()
348 |         );
349 |         let error = stream.finish().unwrap_err();
350 |         assert!(
351 |             error.to_string().contains("previous write error"),
352 |             "{}",
353 |             error.to_string()
354 |         );
355 |     }
356 | }
357 | 


--------------------------------------------------------------------------------
/tests/lzma2.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "enable_logging")]
 2 | use log::{debug, info};
 3 | use std::io::Read;
 4 | 
 5 | /// Utility function to read a file into memory
 6 | fn read_all_file(filename: &str) -> std::io::Result<Vec<u8>> {
 7 |     let mut data = Vec::new();
 8 |     std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?;
 9 |     Ok(data)
10 | }
11 | 
12 | fn round_trip(x: &[u8]) {
13 |     let mut compressed: Vec<u8> = Vec::new();
14 |     lzma_rs::lzma2_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap();
15 |     #[cfg(feature = "enable_logging")]
16 |     info!("Compressed {} -> {} bytes", x.len(), compressed.len());
17 |     #[cfg(feature = "enable_logging")]
18 |     debug!("Compressed content: {:?}", compressed);
19 |     let mut bf = std::io::BufReader::new(compressed.as_slice());
20 |     let mut decomp: Vec<u8> = Vec::new();
21 |     lzma_rs::lzma2_decompress(&mut bf, &mut decomp).unwrap();
22 |     assert_eq!(decomp, x)
23 | }
24 | 
25 | fn round_trip_file(filename: &str) {
26 |     let x = read_all_file(filename).unwrap();
27 |     round_trip(x.as_slice());
28 | }
29 | 
30 | #[test]
31 | fn round_trip_basics() {
32 |     #[cfg(feature = "enable_logging")]
33 |     let _ = env_logger::try_init();
34 |     round_trip(b"");
35 |     // Note: we use vec! to avoid storing the slice in the binary
36 |     round_trip(vec![0x00; 1_000_000].as_slice());
37 |     round_trip(vec![0xFF; 1_000_000].as_slice());
38 | }
39 | 
40 | #[test]
41 | fn round_trip_hello() {
42 |     #[cfg(feature = "enable_logging")]
43 |     let _ = env_logger::try_init();
44 |     round_trip(b"Hello world");
45 | }
46 | 
47 | #[test]
48 | fn round_trip_files() {
49 |     #[cfg(feature = "enable_logging")]
50 |     let _ = env_logger::try_init();
51 |     round_trip_file("tests/files/foo.txt");
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/xz.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(feature = "enable_logging")]
  2 | use log::{debug, info};
  3 | use std::io::{BufReader, Cursor, Read};
  4 | 
  5 | /// Utility function to read a file into memory
  6 | fn read_all_file(filename: &str) -> std::io::Result<Vec<u8>> {
  7 |     let mut data = Vec::new();
  8 |     std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?;
  9 |     Ok(data)
 10 | }
 11 | 
 12 | fn round_trip(x: &[u8]) {
 13 |     let mut compressed: Vec<u8> = Vec::new();
 14 |     lzma_rs::xz_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap();
 15 |     #[cfg(feature = "enable_logging")]
 16 |     info!("Compressed {} -> {} bytes", x.len(), compressed.len());
 17 |     #[cfg(feature = "enable_logging")]
 18 |     debug!("Compressed content: {:?}", compressed);
 19 |     let mut bf = BufReader::new(compressed.as_slice());
 20 |     let mut decomp: Vec<u8> = Vec::new();
 21 |     lzma_rs::xz_decompress(&mut bf, &mut decomp).unwrap();
 22 |     assert_eq!(decomp, x)
 23 | }
 24 | 
 25 | fn round_trip_file(filename: &str) {
 26 |     let x = read_all_file(filename).unwrap();
 27 |     round_trip(x.as_slice());
 28 | }
 29 | 
 30 | #[test]
 31 | fn round_trip_basics() {
 32 |     #[cfg(feature = "enable_logging")]
 33 |     let _ = env_logger::try_init();
 34 |     round_trip(b"");
 35 |     // Note: we use vec! to avoid storing the slice in the binary
 36 |     round_trip(vec![0x00; 1_000_000].as_slice());
 37 |     round_trip(vec![0xFF; 1_000_000].as_slice());
 38 | }
 39 | 
 40 | #[test]
 41 | fn round_trip_hello() {
 42 |     #[cfg(feature = "enable_logging")]
 43 |     let _ = env_logger::try_init();
 44 |     round_trip(b"Hello world");
 45 | }
 46 | 
 47 | #[test]
 48 | fn round_trip_files() {
 49 |     #[cfg(feature = "enable_logging")]
 50 |     let _ = env_logger::try_init();
 51 |     round_trip_file("tests/files/foo.txt");
 52 | }
 53 | 
 54 | fn decomp_big_file(compfile: &str, plainfile: &str) {
 55 |     let expected = read_all_file(plainfile).unwrap();
 56 |     let mut f = BufReader::new(std::fs::File::open(compfile).unwrap());
 57 |     let mut decomp: Vec<u8> = Vec::new();
 58 |     lzma_rs::xz_decompress(&mut f, &mut decomp).unwrap();
 59 |     assert!(decomp == expected)
 60 | }
 61 | 
 62 | #[test]
 63 | fn big_file() {
 64 |     #[cfg(feature = "enable_logging")]
 65 |     let _ = env_logger::try_init();
 66 |     decomp_big_file("tests/files/foo.txt.xz", "tests/files/foo.txt");
 67 |     decomp_big_file(
 68 |         "tests/files/good-1-lzma2-1.xz",
 69 |         "tests/files/good-1-lzma2-1",
 70 |     );
 71 |     decomp_big_file(
 72 |         "tests/files/good-1-lzma2-2.xz",
 73 |         "tests/files/good-1-lzma2-2",
 74 |     );
 75 |     decomp_big_file(
 76 |         "tests/files/good-1-lzma2-3.xz",
 77 |         "tests/files/good-1-lzma2-3",
 78 |     );
 79 |     decomp_big_file(
 80 |         "tests/files/good-1-lzma2-4.xz",
 81 |         "tests/files/good-1-lzma2-4",
 82 |     );
 83 | }
 84 | 
 85 | #[test]
 86 | fn decompress_empty_world() {
 87 |     #[cfg(feature = "enable_logging")]
 88 |     let _ = env_logger::try_init();
 89 |     let mut x: &[u8] = b"\xfd\x37\x7a\x58\x5a\x00\x00\x04\xe6\xd6\xb4\x46\x00\x00\x00\x00\
 90 |                          \x1c\xdf\x44\x21\x1f\xb6\xf3\x7d\x01\x00\x00\x00\x00\x04\x59\x5a\
 91 |                          ";
 92 |     let mut decomp: Vec<u8> = Vec::new();
 93 |     lzma_rs::xz_decompress(&mut x, &mut decomp).unwrap();
 94 |     assert_eq!(decomp, b"")
 95 | }
 96 | 
 97 | #[test]
 98 | fn decompress_hello_world() {
 99 |     #[cfg(feature = "enable_logging")]
100 |     let _ = env_logger::try_init();
101 |     let mut x: &[u8] = b"\xfd\x37\x7a\x58\x5a\x00\x00\x04\xe6\xd6\xb4\x46\x02\x00\x21\x01\
102 |                          \x16\x00\x00\x00\x74\x2f\xe5\xa3\x01\x00\x0b\x48\x65\x6c\x6c\x6f\
103 |                          \x20\x77\x6f\x72\x6c\x64\x0a\x00\xca\xec\x49\x05\x66\x3f\x67\x98\
104 |                          \x00\x01\x24\x0c\xa6\x18\xd8\xd8\x1f\xb6\xf3\x7d\x01\x00\x00\x00\
105 |                          \x00\x04\x59\x5a";
106 |     let mut decomp: Vec<u8> = Vec::new();
107 |     lzma_rs::xz_decompress(&mut x, &mut decomp).unwrap();
108 |     assert_eq!(decomp, b"Hello world\x0a")
109 | }
110 | 
111 | #[test]
112 | fn test_xz_block_check_crc32() {
113 |     #[cfg(feature = "enable_logging")]
114 |     let _ = env_logger::try_init();
115 | 
116 |     decomp_big_file(
117 |         "tests/files/block-check-crc32.txt.xz",
118 |         "tests/files/block-check-crc32.txt",
119 |     );
120 | }
121 | 
122 | #[test]
123 | fn test_xz_block_check_crc32_invalid() {
124 |     #[cfg(feature = "enable_logging")]
125 |     let _ = env_logger::try_init();
126 | 
127 |     let testcase = "tests/files/block-check-crc32.txt.xz";
128 |     let mut corrupted = {
129 |         let mut buf = read_all_file(testcase).unwrap();
130 |         // Mangle the "Block Check" field.
131 |         buf[0x54] = 0x67;
132 |         buf[0x55] = 0x45;
133 |         buf[0x56] = 0x23;
134 |         buf[0x57] = 0x01;
135 |         BufReader::new(Cursor::new(buf))
136 |     };
137 |     let mut decomp = Vec::new();
138 | 
139 |     let err_msg = lzma_rs::xz_decompress(&mut corrupted, &mut decomp)
140 |         .unwrap_err()
141 |         .to_string();
142 |     assert_eq!(
143 |         err_msg,
144 |         "xz error: Invalid footer CRC32: expected 0x01234567 but got 0x8b0d303e"
145 |     )
146 | }
147 | 


--------------------------------------------------------------------------------