├── .gitignore
├── fuzz
    ├── .gitignore
    ├── fuzz_targets
    │   ├── decode0.rs
    │   ├── roundtrip.rs
    │   └── diff.rs
    └── Cargo.toml
├── benches
    ├── lib-8-msb.lzw
    ├── Cargo-8-msb.lzw
    ├── binary-8-msb.lzw
    └── msb8.rs
├── tests
    ├── end_of_buffer.rs
    ├── implicit_reset.rs
    ├── async.rs
    ├── roundtrip_vec.rs
    └── roundtrip.rs
├── examples
    ├── lzw-compress.rs
    └── lzw-decompress.rs
├── README.md
├── LICENSE-MIT
├── .github
    └── workflows
    │   └── rust.yml
├── Cargo.toml
├── src
    ├── error.rs
    ├── lib.rs
    ├── encode_into_async.rs
    ├── decode_into_async.rs
    ├── encode.rs
    └── decode.rs
├── Changes.md
├── bin
    └── lzw.rs
├── LICENSE-APACHE
└── Cargo.lock


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target
3 | corpus
4 | artifacts
5 | 


--------------------------------------------------------------------------------
/benches/lib-8-msb.lzw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/image-rs/weezl/HEAD/benches/lib-8-msb.lzw


--------------------------------------------------------------------------------
/benches/Cargo-8-msb.lzw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/image-rs/weezl/HEAD/benches/Cargo-8-msb.lzw


--------------------------------------------------------------------------------
/benches/binary-8-msb.lzw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/image-rs/weezl/HEAD/benches/binary-8-msb.lzw


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/decode0.rs:
--------------------------------------------------------------------------------
1 | #![no_main]
2 | use libfuzzer_sys::fuzz_target;
3 | 
4 | fuzz_target!(|raw_data: &[u8]| {
5 |     let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Lsb, 0);
6 |     let _ = decoder.into_stream(std::io::sink())
7 |         .decode_all(raw_data);
8 | });
9 | 


--------------------------------------------------------------------------------
/tests/end_of_buffer.rs:
--------------------------------------------------------------------------------
 1 | use weezl::{decode, BitOrder};
 2 | 
 3 | #[test]
 4 | fn stop_after_end_of_buffer() {
 5 |     let inp = vec![0x00u8, 0x01, 0x02, 0xff];
 6 |     let mut decoder = decode::Configuration::new(BitOrder::Lsb, 7)
 7 |         .with_yield_on_full_buffer(true)
 8 |         .build();
 9 |     let mut out = vec![0u8, 0u8, 0u8];
10 |     let status = decoder.decode_bytes(&inp, &mut out).status;
11 |     assert!(status.is_ok(), "{:?} {:?}", status, out);
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/lzw-compress.rs:
--------------------------------------------------------------------------------
 1 | //! Compresses the input from stdin and writes the result to stdout.
 2 | 
 3 | use std::io::{self, BufWriter};
 4 | 
 5 | fn main() {
 6 |     match {
 7 |         let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8);
 8 |         let stdin = io::stdin();
 9 |         let stdin = stdin.lock();
10 |         let stdout = io::stdout();
11 |         let stdout = BufWriter::new(stdout.lock());
12 |         encoder.into_stream(stdout).encode_all(stdin).status
13 |     } {
14 |         Ok(()) => (),
15 |         Err(err) => eprintln!("{}", err),
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/examples/lzw-decompress.rs:
--------------------------------------------------------------------------------
 1 | //! Decompresses the input from stdin and writes the result to stdout.
 2 | 
 3 | use std::io::{self, BufWriter};
 4 | 
 5 | fn main() {
 6 |     match {
 7 |         let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8);
 8 |         let stdout = io::stdout();
 9 |         let stdout = BufWriter::new(stdout.lock());
10 |         let stdin = io::stdin();
11 |         let stdin = stdin.lock();
12 |         decoder.into_stream(stdout).decode_all(stdin).status
13 |     } {
14 |         Ok(()) => (),
15 |         Err(err) => eprintln!("{}", err),
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/roundtrip.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | use weezl::{BitOrder, encode, decode};
 4 | 
 5 | fuzz_target!(|data: &[u8]| {
 6 |     let mut encoder = encode::Encoder::with_tiff_size_switch(BitOrder::Msb, 8);
 7 |     let mut buffer = Vec::with_capacity(2*data.len() + 40);
 8 |     let _ = encoder.into_stream(&mut buffer).encode_all(data);
 9 | 
10 |     let mut decoder = decode::Decoder::with_tiff_size_switch(BitOrder::Msb, 8);
11 |     let mut compare = vec![];
12 |     let result = decoder.into_stream(&mut compare).decode_all(buffer.as_slice());
13 |     assert!(result.status.is_ok(), "{:?}", result.status);
14 |     assert_eq!(data, &*compare);
15 | });
16 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [package]
 3 | name = "lzw-fuzz"
 4 | version = "0.0.0"
 5 | authors = ["Automatically generated"]
 6 | publish = false
 7 | edition = "2018"
 8 | 
 9 | [package.metadata]
10 | cargo-fuzz = true
11 | 
12 | [dependencies]
13 | libfuzzer-sys = "0.3"
14 | lzw = { package = "weezl", version = "0.0.1" }
15 | 
16 | [dependencies.weezl]
17 | path = ".."
18 | 
19 | # Prevent this from interfering with workspaces
20 | [workspace]
21 | members = ["."]
22 | 
23 | [[bin]]
24 | name = "diff"
25 | path = "fuzz_targets/diff.rs"
26 | 
27 | [[bin]]
28 | name = "roundtrip"
29 | path = "fuzz_targets/roundtrip.rs"
30 | test = false
31 | doc = false
32 | 
33 | [[bin]]
34 | name = "decode0"
35 | path = "fuzz_targets/decode0.rs"
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # weezl
 2 | 
 3 | LZW en- and decoding that goes weeeee!
 4 | 
 5 | ## Overview
 6 | 
 7 | This library, written in purely safe and dependency-less Rust, provides
 8 | encoding and decoding for lzw compression in the style as it occurs in `gif`
 9 | and `tiff` image formats. It has a standalone binary that may be used to handle
10 | those data streams but it is _not_ compatible with Spencer's `compress` and
11 | `uncompress` binaries (though a drop-in may be developed at a later point).
12 | 
13 | Using in a `no_std` environment is also possible though an allocator is
14 | required. This, too, may be relaxed in a later release. A feature flag already
15 | exists but currently turns off almost all interfaces.
16 | 
17 | ## License
18 | 
19 | All code is dual licensed MIT OR Apache-2.0.
20 | 


--------------------------------------------------------------------------------
/tests/implicit_reset.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, fs};
 2 | use weezl::{decode, encode, BitOrder};
 3 | 
 4 | #[test]
 5 | fn read_from_mangled() {
 6 |     let file = env::args().next().unwrap();
 7 |     let data = fs::read(file).unwrap();
 8 | 
 9 |     // For simplicity, encode 7-bit data.
10 |     let data: Vec<_> = data.iter().copied().map(|b| b & 0x7f).collect();
11 | 
12 |     let mut encoder = encode::Encoder::new(BitOrder::Lsb, 7);
13 |     let mut buffer = Vec::with_capacity(2 * data.len() + 40);
14 |     let _ = encoder.into_stream(&mut buffer).encode_all(&*data);
15 | 
16 |     let mut decoder = decode::Decoder::new(BitOrder::Lsb, 7);
17 |     let mut compare = vec![];
18 |     let result = decoder.into_stream(&mut compare).decode_all(&buffer[1..]);
19 |     assert!(result.status.is_ok(), "{:?}", result.status);
20 |     assert!(data == &*compare, "{:?}\n{:?}", data, compare);
21 | }
22 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) HeroicKatora 2020
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust CI
 2 | on:
 3 |   push:
 4 |     branches: [ master ]
 5 |   pull_request:
 6 |     branches: [ master ]
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         rust: [stable, beta, nightly]
13 |         features: ["", "alloc", "std", "async"]
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - run: rustup default ${{ matrix.rust }}
17 |     - name: build
18 |       run: >
19 |         cargo build --verbose --no-default-features --features "$FEATURES"
20 |       env:
21 |         FEATURES: ${{ matrix.features }}
22 |     - name: test
23 |       run: >
24 |         cargo test --tests --benches --no-default-features --features "$FEATURES" --release
25 |       if: ${{ matrix.rust != '1.34.2' }}
26 |       env:
27 |         FEATURES: ${{ matrix.features }}
28 |   build_msrv:
29 |     runs-on: ubuntu-latest
30 |     strategy:
31 |       matrix:
32 |         features: ["", "std"]
33 |     steps:
34 |     - uses: actions/checkout@v2
35 |     - run: rustup default "1.34.2"
36 |     - name: build
37 |       run: cargo build --verbose --no-default-features --features "$FEATURES"
38 |       env:
39 |         FEATURES: ${{ matrix.features }}
40 |   rustfmt:
41 |     runs-on: ubuntu-latest
42 |     steps:
43 |     - uses: actions/checkout@v2
44 |     - uses: actions-rs/toolchain@v1
45 |       with:
46 |         toolchain: stable
47 |         override: true
48 |         components: rustfmt, clippy
49 |     - name: Run rustfmt check
50 |       uses: actions-rs/cargo@v1
51 |       with:
52 |         command: fmt
53 |         args: -- --check
54 | 


--------------------------------------------------------------------------------
/tests/async.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, fs};
 2 | use tokio::io::BufReader;
 3 | use tokio::net::{TcpListener, TcpStream};
 4 | use tokio_util::compat::TokioAsyncReadCompatExt as _;
 5 | use weezl::{decode, encode, BitOrder};
 6 | 
 7 | async fn pair() -> (TcpStream, TcpStream) {
 8 |     let listener = TcpListener::bind("localhost:0")
 9 |         .await
10 |         .expect("No loop tcp for testing");
11 |     let addr = listener.local_addr().expect("No address for listener");
12 | 
13 |     let connect = TcpStream::connect(addr);
14 |     let accept = listener.accept();
15 | 
16 |     let (a, (b, _)) = tokio::try_join!(connect, accept).expect("Can connect");
17 |     (a, b)
18 | }
19 | 
20 | async fn assert_send_through(data: &[u8], send: &mut TcpStream, recv: &mut TcpStream) {
21 |     let mut send = send.compat();
22 |     let mut recv = BufReader::new(recv).compat();
23 | 
24 |     let mut encoder = encode::Encoder::new(BitOrder::Lsb, 8);
25 |     let encode = encoder.into_async(&mut send).encode_all(data);
26 | 
27 |     let mut recv_buffer = vec![];
28 |     let mut decoder = decode::Decoder::new(BitOrder::Lsb, 8);
29 |     let decode = decoder.into_async(&mut recv_buffer).decode_all(&mut recv);
30 | 
31 |     let (encode, decode) = tokio::join!(encode, decode);
32 |     encode.status.expect("Could send/encoded data");
33 |     decode.status.expect("Could recv/decode data");
34 | 
35 |     assert_eq!(recv_buffer, data);
36 | }
37 | 
38 | #[test]
39 | fn with_streams() {
40 |     let file = env::args().next().unwrap();
41 |     let data = fs::read(file).unwrap();
42 | 
43 |     let rt = tokio::runtime::Runtime::new().expect("runtime");
44 |     let _enter = rt.enter();
45 | 
46 |     let (mut send, mut recv) = rt.block_on(pair());
47 |     rt.block_on(assert_send_through(&data, &mut send, &mut recv));
48 | }
49 | 


--------------------------------------------------------------------------------
/benches/msb8.rs:
--------------------------------------------------------------------------------
 1 | extern crate criterion;
 2 | extern crate weezl;
 3 | 
 4 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
 5 | use std::fs;
 6 | use weezl::{decode::Decoder, BitOrder, LzwStatus};
 7 | 
 8 | pub fn criterion_benchmark(c: &mut Criterion, file: &str) {
 9 |     let data = fs::read(file).expect("Benchmark input not found");
10 |     let mut group = c.benchmark_group("msb-8");
11 |     let id = BenchmarkId::new(file, data.len());
12 |     let mut outbuf = vec![0; 1 << 26]; // 64MB, what wuff uses..
13 |     let mut decode_once = |data: &[u8]| {
14 |         let mut decoder = Decoder::new(BitOrder::Msb, 8);
15 |         let mut written = 0;
16 |         let outbuf = outbuf.as_mut_slice();
17 |         let mut data = data;
18 |         loop {
19 |             let result = decoder.decode_bytes(data, &mut outbuf[..]);
20 |             let done = result.status.expect("Error");
21 |             data = &data[result.consumed_in..];
22 |             written += result.consumed_out;
23 |             black_box(&outbuf[..result.consumed_out]);
24 |             if let LzwStatus::Done = done {
25 |                 break;
26 |             }
27 |             if let LzwStatus::NoProgress = done {
28 |                 panic!("Need to make progress");
29 |             }
30 |         }
31 |         written
32 |     };
33 |     group.throughput(Throughput::Bytes(decode_once(&data) as u64));
34 |     group.bench_with_input(id, &data, |b, data| {
35 |         b.iter(|| {
36 |             decode_once(data);
37 |         })
38 |     });
39 | }
40 | 
41 | pub fn bench_toml(c: &mut Criterion) {
42 |     criterion_benchmark(c, "benches/Cargo-8-msb.lzw");
43 | }
44 | 
45 | pub fn bench_binary(c: &mut Criterion) {
46 |     criterion_benchmark(c, "benches/binary-8-msb.lzw");
47 | }
48 | 
49 | pub fn bench_lib(c: &mut Criterion) {
50 |     criterion_benchmark(c, "benches/lib-8-msb.lzw");
51 | }
52 | 
53 | criterion_group!(benches, bench_toml, bench_binary, bench_lib);
54 | criterion_main!(benches);
55 | 


--------------------------------------------------------------------------------
/tests/roundtrip_vec.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, fs};
 2 | use weezl::{decode, encode, BitOrder};
 3 | 
 4 | #[derive(Clone, Copy, Debug)]
 5 | enum Flavor {
 6 |     Gif,
 7 |     Tiff,
 8 | }
 9 | 
10 | #[test]
11 | fn roundtrip_all_lsb() {
12 |     roundtrip_all(BitOrder::Lsb);
13 | }
14 | 
15 | #[test]
16 | fn roundtrip_all_msb() {
17 |     roundtrip_all(BitOrder::Msb);
18 | }
19 | 
20 | fn roundtrip_all(bit_order: BitOrder) {
21 |     let file = env::args().next().unwrap();
22 |     let data = fs::read(file).unwrap();
23 | 
24 |     for &flavor in &[Flavor::Gif, Flavor::Tiff] {
25 |         for bit_width in 2..8 {
26 |             let data: Vec<_> = data
27 |                 .iter()
28 |                 .copied()
29 |                 .map(|b| b & ((1 << bit_width) - 1))
30 |                 .collect();
31 | 
32 |             println!("Roundtrip test {:?} {:?} {}", flavor, bit_order, bit_width);
33 |             assert_roundtrips(&*data, flavor, bit_width, bit_order);
34 |         }
35 |     }
36 | }
37 | 
38 | fn assert_roundtrips(data: &[u8], flavor: Flavor, bit_width: u8, bit_order: BitOrder) {
39 |     let (c, d): (
40 |         fn(BitOrder, u8) -> encode::Encoder,
41 |         fn(BitOrder, u8) -> decode::Decoder,
42 |     ) = match flavor {
43 |         Flavor::Gif => (encode::Encoder::new, decode::Decoder::new),
44 |         Flavor::Tiff => (
45 |             encode::Encoder::with_tiff_size_switch,
46 |             decode::Decoder::with_tiff_size_switch,
47 |         ),
48 |     };
49 |     let mut encoder = c(bit_order, bit_width);
50 |     let mut buffer = Vec::with_capacity(2 * data.len() + 40);
51 | 
52 |     let _ = encoder.into_vec(&mut buffer).encode_all(data);
53 | 
54 |     let mut decoder = d(bit_order, bit_width);
55 |     let mut compare = vec![];
56 |     let result = decoder.into_vec(&mut compare).decode_all(buffer.as_slice());
57 |     assert!(
58 |         result.status.is_ok(),
59 |         "{:?}, {}, {:?}",
60 |         bit_order,
61 |         bit_width,
62 |         result.status
63 |     );
64 |     assert!(
65 |         data == &*compare,
66 |         "{:?}, {}\n{:?}\n{:?}",
67 |         bit_order,
68 |         bit_width,
69 |         data,
70 |         compare
71 |     );
72 | }
73 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/diff.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | 
 4 | fuzz_target!(|raw_data: &[u8]| {
 5 |     // No implicit restart in `lzw` so make sure there is one. Otherwise we get an instant
 6 |     // detection that is not helpful at all.
 7 |     let mut data = vec![0b1000_0000];
 8 |     data.extend_from_slice(raw_data);
 9 |     let data = data.as_slice();
10 |     const CUT_OFF: usize = 1 << 20;
11 | 
12 |     let mut detailed_ref: Option<_> = None;
13 |     let reference = (|| {
14 |         let mut decoder = lzw::Decoder::new(lzw::LsbReader::new(), 7);
15 |         let mut data = data;
16 |         let mut output = vec![];
17 |         while !data.is_empty() && output.len() < CUT_OFF {
18 |             match decoder.decode_bytes(data) {
19 |                 Ok((0, _)) => break,
20 |                 Ok((len, decoded)) => {
21 |                     // eprintln!("Ref {:?}", decoded);
22 |                     data = &data[len..];
23 |                     output.extend_from_slice(decoded);
24 |                 }
25 |                 Err(err) => {
26 |                     detailed_ref = Some(err);
27 |                     return Err(());
28 |                 }
29 |             }
30 | 
31 |             if decoder.has_ended() {
32 |                 break;
33 |             }
34 |         }
35 |         output.truncate(CUT_OFF);
36 |         Ok(output)
37 |     })();
38 | 
39 |     let mut detailed_err = None;
40 |     let new = (|| {
41 |         let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Lsb, 7);
42 |         let mut output = Vec::with_capacity(CUT_OFF);
43 |         let err = decoder.into_stream(&mut output).decode(data);
44 |         if let Err(err) = err.status {
45 |             detailed_err = Some(err);
46 |             return Err(());
47 |         }
48 |         output.truncate(CUT_OFF);
49 |         Ok(output)
50 |     })();
51 | 
52 |     // Output my be omitted if the stream did not end properly in an end code.
53 |     let ref_len = reference.as_ref().map_or(usize::MAX, |x| x.len());
54 |     let new_len = new.as_ref().map_or(usize::MAX, |x| x.len());
55 | 
56 |     let reference = reference.map(|mut vec| {
57 |         vec.truncate(ref_len.min(new_len));
58 |         vec
59 |     });
60 | 
61 |     let new = new.map(|mut vec| {
62 |         vec.truncate(ref_len.min(new_len));
63 |         vec
64 |     });
65 | 
66 |     assert_eq!(reference, new, "{:?} vs {:?}", detailed_ref, detailed_err);
67 | });
68 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "weezl"
 3 | version = "0.1.12"
 4 | license = "MIT OR Apache-2.0"
 5 | description = "Fast LZW compression and decompression."
 6 | authors = ["The image-rs Developers"]
 7 | readme = "README.md"
 8 | repository = "https://github.com/image-rs/weezl"
 9 | documentation = "https://docs.rs/weezl"
10 | edition = "2018"
11 | exclude = ["benches/*.lzw"]
12 | 
13 | [lib]
14 | name = "weezl"
15 | bench = false
16 | 
17 | [dependencies.futures]
18 | optional = true
19 | version = "0.3.12"
20 | default-features = false
21 | features = ["std"]
22 | 
23 | [dev-dependencies]
24 | criterion = "0.3.1"
25 | [dev-dependencies.tokio]
26 | version = "1"
27 | default-features = false
28 | features = ["macros", "io-util", "net", "rt", "rt-multi-thread"]
29 | [dev-dependencies.tokio-util]
30 | version = "0.6.2"
31 | default-features = false
32 | features = ["compat"]
33 | 
34 | [features]
35 | default = ["std"]
36 | # Enable usage of the standard library and in particular any interface that
37 | # requires the io interfaces `Read` and `Write`.
38 | std = ["alloc"]
39 | # Enable usage of the `alloc` crate. You should always have this feature
40 | # enabled. Without this feature neither the encoder nor the decoder exists.
41 | # This is purely a reservation for future possibilities.
42 | alloc = []
43 | # Enable usage of `async` through `futures`. This is basically the blocker
44 | # for `1.0` as we may track its version. Note that this negates no_std right
45 | # now but implicitly through being required from futures. We also use the
46 | # `std::io::Error` interface. Note that this features is NOT supported on
47 | # 1.34.2 but only on stable.
48 | async = ["futures", "std"]
49 | 
50 | [[bin]]
51 | name = "lzw"
52 | path = "bin/lzw.rs"
53 | bench = false
54 | required-features = ["std"]
55 | 
56 | [[bench]]
57 | name = "msb8"
58 | harness = false
59 | required-features = ["std"]
60 | 
61 | [[example]]
62 | name = "lzw-compress"
63 | required-features = ["std"]
64 | 
65 | [[example]]
66 | name = "lzw-decompress"
67 | required-features = ["std"]
68 | 
69 | [[test]]
70 | name = "async"
71 | required-features = ["async", "std"]
72 | 
73 | [[test]]
74 | name = "roundtrip"
75 | required-features = ["std"]
76 | 
77 | [[test]]
78 | name = "roundtrip_vec"
79 | required-features = ["alloc"]
80 | 
81 | [[test]]
82 | name = "implicit_reset"
83 | required-features = ["std"]
84 | 
85 | [[test]]
86 | name = "end_of_buffer"
87 | required-features = ["alloc"]
88 | 
89 | [package.metadata.docs.rs]
90 | all-features = true
91 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | /// The result of a coding operation on a pair of buffer.
 2 | #[must_use = "Contains a status with potential error information"]
 3 | #[derive(Debug)]
 4 | pub struct BufferResult {
 5 |     /// The number of bytes consumed from the input buffer.
 6 |     pub consumed_in: usize,
 7 |     /// The number of bytes written into the output buffer.
 8 |     pub consumed_out: usize,
 9 |     /// The status after returning from the write call.
10 |     pub status: Result<LzwStatus, LzwError>,
11 | }
12 | 
13 | /// The result of a coding operation into a vector.
14 | #[must_use = "Contains a status with potential error information"]
15 | #[derive(Debug)]
16 | pub struct VectorResult {
17 |     /// The number of bytes consumed from the input buffer.
18 |     pub consumed_in: usize,
19 |     /// The number of bytes written into the output buffer.
20 |     pub consumed_out: usize,
21 |     /// The status after returning from the write call.
22 |     pub status: Result<LzwStatus, LzwError>,
23 | }
24 | 
25 | /// The result of coding into an output stream.
26 | #[cfg(feature = "std")]
27 | #[must_use = "Contains a status with potential error information"]
28 | #[derive(Debug)]
29 | pub struct StreamResult {
30 |     /// The total number of bytes consumed from the reader.
31 |     pub bytes_read: usize,
32 |     /// The total number of bytes written into the writer.
33 |     pub bytes_written: usize,
34 |     /// The possible error that occurred.
35 |     ///
36 |     /// Note that when writing into streams it is not in general possible to recover from an error.
37 |     pub status: std::io::Result<()>,
38 | }
39 | 
40 | /// The status after successful coding of an LZW stream.
41 | #[derive(Debug, Clone, Copy)]
42 | pub enum LzwStatus {
43 |     /// Everything went well.
44 |     Ok,
45 |     /// No bytes were read or written and no internal state advanced.
46 |     ///
47 |     /// If this is returned but your application can not provide more input data then decoding is
48 |     /// definitely stuck for good and it should stop trying and report some error of its own. In
49 |     /// other situations this may be used as a signal to refill an internal buffer.
50 |     NoProgress,
51 |     /// No more data will be produced because an end marker was reached.
52 |     Done,
53 | }
54 | 
55 | /// The error kind after unsuccessful coding of an LZW stream.
56 | #[derive(Debug, Clone, Copy)]
57 | pub enum LzwError {
58 |     /// The input contained an invalid code.
59 |     ///
60 |     /// For decompression this refers to a code larger than those currently known through the prior
61 |     /// decoding stages. For compression this refers to a byte that has no code representation due
62 |     /// to being larger than permitted by the `size` parameter given to the Encoder.
63 |     InvalidCode,
64 | }
65 | 
66 | impl core::fmt::Display for LzwError {
67 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
68 |         match self {
69 |             LzwError::InvalidCode => f.write_str("invalid code in LZW stream"),
70 |         }
71 |     }
72 | }
73 | 
74 | #[cfg(feature = "std")]
75 | impl std::error::Error for LzwError {}
76 | 


--------------------------------------------------------------------------------
/Changes.md:
--------------------------------------------------------------------------------
 1 | ## Version 0.1.12
 2 | 
 3 | - Further adjusted a debug assertion for TIFF compatibility. It still had one
 4 |   of its OR conditions formulated incorrectly, relying on equality of the
 5 |   code word mask and the size switch code. In TIFF we hit the limit of the
 6 |   code dictionary (all 12-bit words) one code later than we would detect a
 7 |   code switch that we need to ignore.
 8 | 
 9 | ## Version 0.1.11
10 | 
11 | - Adjusted a debug assertion that handled 1-bit code sizes incorrectly in the
12 |   decoder (relevant for TIFF but non-standard for GIF). In that case the size
13 |   switch semantics are a bit messy.
14 | 
15 | ## Version 0.1.10
16 | 
17 | - Reverted changes made in 0.1.9 to the behavior of the decoder under non
18 |   libtiff-compatibility mode. Trying to read the decoder with an empty output
19 |   buffer will at least inspect the next symbol and either error or indicate the
20 |   end-of-stream accordingly.
21 | 
22 | ## Version 0.1.9 (yanked)
23 | 
24 | - Increased decoding throughput by 3—30% depending on CPU and data.
25 | - Added `{encode,decode}::Configuration` as builder types for their respective
26 |   module. They can be cheaply cloned.
27 | - Added `decode::Configuration::with_yield_on_full_buffer` to explicitly opt-in
28 |   to libtiff compatibility. The decoder will not read or interpret further
29 |   symbols of the decoding stream when the output buffer is full. This enables a
30 |   caller to stop fetching symbols and elide an end of stream marker based on
31 |   out-of-band length information. The decoder might otherwise error, trying to
32 |   interpret data that does not belong to the stream.
33 | 
34 | ## Version 0.1.8
35 | 
36 | - Fixed incorrect state after `Decoder::reset`
37 | - Added `Debug` to result types
38 | 
39 | ## Version 0.1.7
40 | 
41 | - Implicit reset is now supported for decoding.
42 | 
43 | ## Version 0.1.6
44 | 
45 | - Fixed an integer overflow and panic that could occur during decoding.
46 |   Decoding performance may degrade after long sequences without a reset code.
47 | 
48 | ## Version 0.1.5
49 | 
50 | - Added `IntoVec` adapters that simplify in-memory de- and encoding. A further
51 |   'one-shot' interface is exposed in the `Decoder` and `Encoder` themselves
52 |   which makes the process a one liner in the simplest cases. Contrary to
53 |   `IntoStream`, these are available in all cases and do not require `std`.
54 | 
55 | ## Version 0.1.4
56 | 
57 | - Added `IntoAsync` adapters for asynchronous de- and encoding. The interface
58 |   is implemented only in terms of `futures = 0.3` traits at the moment.
59 | - Code sizes smaller than 2 are now allowed for decoding. Since they do not
60 |   roundtrip it is still an error to use them in the decoder but this avoids
61 |   accidental panicking, i.e. denial of service, in parsers.
62 | 
63 | ## Version 0.1.3
64 | 
65 | - Fixes an issue in compression that caused some data to be lost around clear
66 |   codes. This could corrupt the data stream.
67 | 
68 | ## Version 0.1.2
69 | 
70 | - Fixes incorrect compression after `Encoder::reset`.
71 | 
72 | ## Version 0.1.1 
73 | 
74 | - The `IntoStream` types now reuse their internal buffers.
75 | - Added the methods `set_buffer`, `set_buffer_size` to `IntoStream` for both
76 |   the encoder and decoder, used to control the automatic allocation.
77 | - Deprecated `IntoStream` in configurations without the `std` feature where the
78 |   type can't even be constructed.
79 | 
80 | ## Version 0.1.0 – Aleph
81 | 
82 | - Initial major release
83 | - Support gif and tiff code size changes
84 | - Rough performance numbers:
85 |   On i5-4690, 8GiB DIMM DDR3 Synchronous 1600 MHz (0,6 ns)
86 |   ~70MB/s encode, ~230MB/s decode
87 | 


--------------------------------------------------------------------------------
/tests/roundtrip.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | use std::{env, fs};
  3 | use weezl::{decode, encode, BitOrder};
  4 | 
  5 | #[derive(Clone, Copy, Debug)]
  6 | enum Flavor {
  7 |     Gif,
  8 |     Tiff,
  9 | }
 10 | 
 11 | #[test]
 12 | fn roundtrip_all_lsb_tiny() {
 13 |     roundtrip_all(BitOrder::Lsb, 1);
 14 | }
 15 | 
 16 | #[test]
 17 | fn roundtrip_all_msb_tiny() {
 18 |     roundtrip_all(BitOrder::Msb, 1);
 19 | }
 20 | 
 21 | #[test]
 22 | fn roundtrip_all_lsb() {
 23 |     roundtrip_all(BitOrder::Lsb, 1 << 20);
 24 | }
 25 | 
 26 | #[test]
 27 | fn roundtrip_all_msb() {
 28 |     roundtrip_all(BitOrder::Msb, 1 << 20);
 29 | }
 30 | 
 31 | fn roundtrip_all(bit_order: BitOrder, max_io_len: usize) {
 32 |     let file = env::args().next().unwrap();
 33 |     let data = fs::read(file).unwrap();
 34 | 
 35 |     for &flavor in &[Flavor::Gif, Flavor::Tiff] {
 36 |         for bit_width in 2..8 {
 37 |             let data: Vec<_> = data
 38 |                 .iter()
 39 |                 .copied()
 40 |                 .map(|b| b & ((1 << bit_width) - 1))
 41 |                 .collect();
 42 | 
 43 |             let enc = match flavor {
 44 |                 Flavor::Gif => encode::Configuration::new,
 45 |                 Flavor::Tiff => encode::Configuration::with_tiff_size_switch,
 46 |             }(bit_order, bit_width);
 47 | 
 48 |             let dec = match flavor {
 49 |                 Flavor::Gif => decode::Configuration::new,
 50 |                 Flavor::Tiff => decode::Configuration::with_tiff_size_switch,
 51 |             }(bit_order, bit_width);
 52 | 
 53 |             let yielding = dec.clone().with_yield_on_full_buffer(true);
 54 | 
 55 |             println!("Roundtrip test {:?} {:?} {}", flavor, bit_order, bit_width);
 56 |             assert_roundtrips(&*data, enc.clone(), dec, max_io_len);
 57 | 
 58 |             // Our encoder always passes an enclosed stream. So this must be the same.
 59 |             assert_roundtrips(&*data, enc, yielding, max_io_len);
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | fn assert_roundtrips(
 65 |     data: &[u8],
 66 |     enc: encode::Configuration,
 67 |     dec: decode::Configuration,
 68 |     max_io_len: usize,
 69 | ) {
 70 |     let mut encoder = enc.clone().build();
 71 |     let mut writer = TinyWrite {
 72 |         data: Vec::with_capacity(2 * data.len() + 40),
 73 |         max_write_len: max_io_len,
 74 |     };
 75 |     let _ = encoder.into_stream(&mut writer).encode_all(data);
 76 | 
 77 |     let mut decoder = dec.clone().build();
 78 |     let mut compare = vec![];
 79 | 
 80 |     let buf_reader = TinyRead {
 81 |         data: &writer.data,
 82 |         max_read_len: max_io_len,
 83 |     };
 84 |     let result = decoder.into_stream(&mut compare).decode_all(buf_reader);
 85 |     assert!(result.status.is_ok(), "{:?}, {:?}", dec, result.status);
 86 |     assert!(data == &*compare, "{:?}\n{:?}\n{:?}", dec, data, compare);
 87 | }
 88 | 
 89 | struct TinyRead<'a> {
 90 |     data: &'a [u8],
 91 |     max_read_len: usize,
 92 | }
 93 | 
 94 | impl io::BufRead for TinyRead<'_> {
 95 |     fn fill_buf(&mut self) -> io::Result<&[u8]> {
 96 |         Ok(&self.data[..self.data.len().min(self.max_read_len)])
 97 |     }
 98 |     fn consume(&mut self, n: usize) {
 99 |         debug_assert!(n <= self.max_read_len);
100 |         self.data = &self.data[n..];
101 |     }
102 | }
103 | 
104 | impl io::Read for TinyRead<'_> {
105 |     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
106 |         let n = self.data.len().min(buf.len()).min(self.max_read_len);
107 |         buf[..n].copy_from_slice(&self.data[..n]);
108 |         self.data = &self.data[n..];
109 |         Ok(n)
110 |     }
111 | }
112 | 
113 | struct TinyWrite {
114 |     data: Vec<u8>,
115 |     max_write_len: usize,
116 | }
117 | 
118 | impl io::Write for TinyWrite {
119 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
120 |         let n = buf.len().min(self.max_write_len);
121 |         self.data.extend_from_slice(&buf[..n]);
122 |         Ok(n)
123 |     }
124 | 
125 |     fn flush(&mut self) -> io::Result<()> {
126 |         Ok(())
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # LZW decoder and encoder
  2 | //!
  3 | //! This crates provides an `Encoder` and a `Decoder` in their respective modules. The code words
  4 | //! are written from and to bit byte slices (or streams) where it is possible to write either the
  5 | //! most or least significant bits first. The maximum possible code size is 12 bits, the smallest
  6 | //! available code size is 2 bits.
  7 | //!
  8 | //! ## Example
  9 | //!
 10 | //! These two code blocks show the compression and corresponding decompression. Note that you must
 11 | //! use the same arguments to `Encoder` and `Decoder`, otherwise the decoding might fail or produce
 12 | //! bad results.
 13 | //!
 14 | #![cfg_attr(feature = "std", doc = "```")]
 15 | #![cfg_attr(not(feature = "std"), doc = "```ignore")]
 16 | //! use weezl::{BitOrder, encode::Encoder};
 17 | //!
 18 | //! let data = b"Hello, world";
 19 | //! let compressed = Encoder::new(BitOrder::Msb, 9)
 20 | //!     .encode(data)
 21 | //!     .unwrap();
 22 | //! ```
 23 | //!
 24 | #![cfg_attr(feature = "std", doc = "```")]
 25 | #![cfg_attr(not(feature = "std"), doc = "```ignore")]
 26 | //! use weezl::{BitOrder, decode::Decoder};
 27 | //! # let compressed = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10".to_vec();
 28 | //! # let data = b"Hello, world";
 29 | //!
 30 | //! let decompressed = Decoder::new(BitOrder::Msb, 9)
 31 | //!     .decode(&compressed)
 32 | //!     .unwrap();
 33 | //! assert_eq!(decompressed, data);
 34 | //! ```
 35 | //!
 36 | //! ## LZW Details
 37 | //!
 38 | //! The de- and encoder expect the LZW stream to start with a clear code and end with an
 39 | //! end code which are defined as follows:
 40 | //!
 41 | //!  * `CLEAR_CODE == 1 << min_code_size`
 42 | //!  * `END_CODE   == CLEAR_CODE + 1`
 43 | //!
 44 | //! For optimal performance, all buffers and input and output slices should be as large as possible
 45 | //! and at least 2048 bytes long. This extends to input streams which should have similarly sized
 46 | //! buffers. This library uses Rust's standard allocation interfaces (`Box` and `Vec` to be
 47 | //! precise). Since there are no ways to handle allocation errors it is not recommended to operate
 48 | //! it on 16-bit targets.
 49 | //!
 50 | //! ## Allocations and standard library
 51 | //!
 52 | //! The main algorithm can be used in `no_std` as well, although it requires an allocator. This
 53 | //! restriction might be lifted at a later stage. For this you should deactivate the `std` feature.
 54 | //! The main interfaces stay intact but the `into_stream` combinator is no available.
 55 | #![cfg_attr(not(feature = "std"), no_std)]
 56 | #![forbid(unsafe_code)]
 57 | #![forbid(missing_docs)]
 58 | 
 59 | #[cfg(all(feature = "alloc", not(feature = "std")))]
 60 | extern crate alloc;
 61 | #[cfg(all(feature = "alloc", feature = "std"))]
 62 | use std as alloc;
 63 | 
 64 | pub(crate) const MAX_CODESIZE: u8 = 12;
 65 | pub(crate) const MAX_ENTRIES: usize = 1 << MAX_CODESIZE as usize;
 66 | 
 67 | /// Alias for a LZW code point
 68 | pub(crate) type Code = u16;
 69 | 
 70 | /// A default buffer size for encoding/decoding buffer.
 71 | ///
 72 | /// Note that this is larger than the default size for buffers (usually 4K) since each code word
 73 | /// can expand to multiple bytes. Expanding one buffer would yield multiple and require a costly
 74 | /// break in the decoding loop. Note that the decoded size can be up to quadratic in code block.
 75 | pub(crate) const STREAM_BUF_SIZE: usize = 1 << 24;
 76 | 
 77 | /// The order of bits in bytes.
 78 | #[derive(Clone, Copy, Debug)]
 79 | pub enum BitOrder {
 80 |     /// The most significant bit is processed first.
 81 |     Msb,
 82 |     /// The least significant bit is processed first.
 83 |     Lsb,
 84 | }
 85 | 
 86 | /// An owned or borrowed buffer for stream operations.
 87 | #[cfg(feature = "alloc")]
 88 | pub(crate) enum StreamBuf<'d> {
 89 |     Borrowed(&'d mut [u8]),
 90 |     Owned(crate::alloc::vec::Vec<u8>),
 91 | }
 92 | 
 93 | #[cold]
 94 | fn assert_decode_size(size: u8) {
 95 |     assert!(
 96 |         size <= MAX_CODESIZE,
 97 |         "Maximum code size 12 required, got {}",
 98 |         size
 99 |     );
100 | }
101 | 
102 | #[cold]
103 | fn assert_encode_size(size: u8) {
104 |     assert!(size >= 2, "Minimum code size 2 required, got {}", size);
105 |     assert!(
106 |         size <= MAX_CODESIZE,
107 |         "Maximum code size 12 required, got {}",
108 |         size
109 |     );
110 | }
111 | 
112 | #[cfg(feature = "alloc")]
113 | pub mod decode;
114 | #[cfg(feature = "alloc")]
115 | pub mod encode;
116 | mod error;
117 | 
118 | #[cfg(feature = "std")]
119 | pub use self::error::StreamResult;
120 | pub use self::error::{BufferResult, LzwError, LzwStatus};
121 | 
122 | #[cfg(all(test, feature = "alloc"))]
123 | mod tests {
124 |     use crate::decode::Decoder;
125 |     use crate::encode::Encoder;
126 | 
127 |     #[cfg(feature = "std")]
128 |     use crate::{decode, encode};
129 | 
130 |     #[test]
131 |     fn stable_send() {
132 |         fn must_be_send<T: Send + 'static>() {}
133 |         must_be_send::<Decoder>();
134 |         must_be_send::<Encoder>();
135 | 
136 |         #[cfg(feature = "std")]
137 |         fn _send_and_lt<'lt, T: Send + 'lt>() {}
138 | 
139 |         // Check that the inference `W: Send + 'd` => `IntoStream: Send + 'd` works.
140 |         #[cfg(feature = "std")]
141 |         fn _all_send_writer<'d, W: std::io::Write + Send + 'd>() {
142 |             _send_and_lt::<'d, decode::IntoStream<'d, W>>();
143 |             _send_and_lt::<'d, encode::IntoStream<'d, W>>();
144 |         }
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/src/encode_into_async.rs:
--------------------------------------------------------------------------------
  1 | use crate::encode::IntoAsync;
  2 | use crate::error::LzwStatus;
  3 | use crate::error::StreamResult;
  4 | use crate::StreamBuf;
  5 | use std::io;
  6 | 
  7 | impl<'d, W: futures::io::AsyncWrite + core::marker::Unpin> IntoAsync<'d, W> {
  8 |     /// Encode data from a reader.
  9 |     ///
 10 |     /// This will drain the supplied reader. It will not encode an end marker after all data has
 11 |     /// been processed.
 12 |     pub async fn encode(&mut self, read: impl futures::io::AsyncBufRead) -> StreamResult {
 13 |         self.encode_part(read, false).await
 14 |     }
 15 | 
 16 |     /// Encode data from a reader and an end marker.
 17 |     pub async fn encode_all(mut self, read: impl futures::io::AsyncBufRead) -> StreamResult {
 18 |         self.encode_part(read, true).await
 19 |     }
 20 | 
 21 |     /// Set the size of the intermediate decode buffer.
 22 |     ///
 23 |     /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is
 24 |     /// available and any decoding method is called. No buffer is allocated if `set_buffer` has
 25 |     /// been called. The buffer is reused.
 26 |     ///
 27 |     /// # Panics
 28 |     /// This method panics if `size` is `0`.
 29 |     pub fn set_buffer_size(&mut self, size: usize) {
 30 |         assert_ne!(size, 0, "Attempted to set empty buffer");
 31 |         self.default_size = size;
 32 |     }
 33 | 
 34 |     /// Use a particular buffer as an intermediate decode buffer.
 35 |     ///
 36 |     /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
 37 |     /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical
 38 |     /// for efficient decoding. Some optimization techniques require the buffer to hold one or more
 39 |     /// previous decoded words. There is also additional overhead from `write` calls each time the
 40 |     /// buffer has been filled.
 41 |     ///
 42 |     /// # Panics
 43 |     /// This method panics if the `buffer` is empty.
 44 |     pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
 45 |         assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
 46 |         self.buffer = Some(StreamBuf::Borrowed(buffer));
 47 |     }
 48 | 
 49 |     async fn encode_part(
 50 |         &mut self,
 51 |         read: impl futures::io::AsyncBufRead,
 52 |         finish: bool,
 53 |     ) -> StreamResult {
 54 |         use futures::io::AsyncBufReadExt;
 55 |         use futures::io::AsyncWriteExt;
 56 | 
 57 |         let IntoAsync {
 58 |             encoder,
 59 |             writer,
 60 |             buffer,
 61 |             default_size,
 62 |         } = self;
 63 | 
 64 |         futures::pin_mut!(read);
 65 |         let mut read: core::pin::Pin<_> = read;
 66 | 
 67 |         let mut bytes_read = 0;
 68 |         let mut bytes_written = 0;
 69 | 
 70 |         // Converting to mutable refs to move into the `once` closure.
 71 |         let read_bytes = &mut bytes_read;
 72 |         let write_bytes = &mut bytes_written;
 73 | 
 74 |         let outbuf: &mut [u8] =
 75 |             match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
 76 |                 StreamBuf::Borrowed(slice) => &mut *slice,
 77 |                 StreamBuf::Owned(vec) => &mut *vec,
 78 |             };
 79 |         assert!(!outbuf.is_empty());
 80 | 
 81 |         let status = loop {
 82 |             // Try to grab one buffer of input data.
 83 |             let mut filler = read.as_mut();
 84 |             let data = match filler.fill_buf().await {
 85 |                 Ok(buf) => buf,
 86 |                 Err(err) => break Err(err),
 87 |             };
 88 | 
 89 |             if data.is_empty() {
 90 |                 if finish {
 91 |                     encoder.finish();
 92 |                 } else {
 93 |                     break Ok(());
 94 |                 }
 95 |             }
 96 | 
 97 |             // Decode as much of the buffer as fits.
 98 |             let result = encoder.encode_bytes(data, &mut outbuf[..]);
 99 |             // Do the bookkeeping and consume the buffer.
100 |             *read_bytes += result.consumed_in;
101 |             *write_bytes += result.consumed_out;
102 |             read.as_mut().consume(result.consumed_in);
103 | 
104 |             // Handle an error status in the result.
105 |             let done = match result.status {
106 |                 Ok(ok) => ok,
107 |                 Err(err) => {
108 |                     break Err(io::Error::new(
109 |                         io::ErrorKind::InvalidData,
110 |                         &*format!("{:?}", err),
111 |                     ));
112 |                 }
113 |             };
114 | 
115 |             if let LzwStatus::Done = done {
116 |                 break writer.write_all(&outbuf[..result.consumed_out]).await;
117 |             }
118 | 
119 |             if let LzwStatus::NoProgress = done {
120 |                 break Err(io::Error::new(
121 |                     io::ErrorKind::UnexpectedEof,
122 |                     "No more data but no end marker detected",
123 |                 ));
124 |             }
125 | 
126 |             // And finish by writing our result.
127 |             // TODO: we may lose data on error (also on status error above) which we might want to
128 |             // deterministically handle so that we don't need to restart everything from scratch as
129 |             // the only recovery strategy. Any changes welcome.
130 |             match writer.write_all(&outbuf[..result.consumed_out]).await {
131 |                 Ok(_) => {}
132 |                 Err(err) => break Err(err),
133 |             }
134 |         };
135 | 
136 |         StreamResult {
137 |             bytes_read,
138 |             bytes_written,
139 |             status,
140 |         }
141 |     }
142 | }
143 | 


--------------------------------------------------------------------------------
/bin/lzw.rs:
--------------------------------------------------------------------------------
  1 | #![forbid(unsafe_code)]
  2 | use std::path::PathBuf;
  3 | use std::{env, ffi, fs, io, process};
  4 | 
  5 | extern crate weezl;
  6 | use weezl::{decode as delzw, encode as enlzw, BitOrder};
  7 | 
  8 | fn main() {
  9 |     let args = env::args_os().skip(1);
 10 |     let flags = Flags::from_args(args).unwrap_or_else(|ParamError| explain());
 11 | 
 12 |     let out = io::stdout();
 13 |     let out = out.lock();
 14 | 
 15 |     let mut files = flags.files;
 16 |     let input = files.pop().unwrap_or_else(explain);
 17 |     if !files.is_empty() {
 18 |         return explain();
 19 |     }
 20 |     let operation = flags.operation.unwrap_or_else(explain);
 21 |     let min_code = if flags.min_code < 2 || flags.min_code > 12 {
 22 |         return explain();
 23 |     } else {
 24 |         flags.min_code
 25 |     };
 26 |     let bit_order = flags.bit_order;
 27 | 
 28 |     let result = match (input, operation) {
 29 |         (Input::File(file), Operation::Encode) => (|| {
 30 |             let data = fs::File::open(file)?;
 31 |             let file = io::BufReader::with_capacity(1 << 26, data);
 32 | 
 33 |             let mut encoder = enlzw::Encoder::new(bit_order, min_code);
 34 |             encoder.into_stream(out).encode_all(file).status
 35 |         })(),
 36 |         (Input::Stdin, Operation::Encode) => {
 37 |             let input = io::BufReader::with_capacity(1 << 26, io::stdin());
 38 |             let mut encoder = enlzw::Encoder::new(bit_order, min_code);
 39 |             encoder.into_stream(out).encode_all(input).status
 40 |         }
 41 |         (Input::File(file), Operation::Decode) => (|| {
 42 |             let data = fs::File::open(file)?;
 43 |             let file = io::BufReader::with_capacity(1 << 26, data);
 44 | 
 45 |             let mut decoder = delzw::Decoder::new(bit_order, min_code);
 46 |             decoder.into_stream(out).decode_all(file).status
 47 |         })(),
 48 |         (Input::Stdin, Operation::Decode) => {
 49 |             let input = io::BufReader::with_capacity(1 << 26, io::stdin());
 50 |             let mut decoder = delzw::Decoder::new(bit_order, min_code);
 51 |             decoder.into_stream(out).decode_all(input).status
 52 |         }
 53 |     };
 54 | 
 55 |     result.expect("Operation Failed: ");
 56 | }
 57 | 
 58 | struct Flags {
 59 |     files: Vec<Input>,
 60 |     operation: Option<Operation>,
 61 |     min_code: u8,
 62 |     bit_order: BitOrder,
 63 | }
 64 | 
 65 | struct ParamError;
 66 | 
 67 | enum Input {
 68 |     File(PathBuf),
 69 |     Stdin,
 70 | }
 71 | 
 72 | enum Operation {
 73 |     Encode,
 74 |     Decode,
 75 | }
 76 | 
 77 | fn explain<T>() -> T {
 78 |     println!(
 79 |         "Usage: lzw [-e|-d] <file>\n\
 80 |         Arguments:\n\
 81 |         -e\t operation encode (default)\n\
 82 |         -d\t operation decode\n\
 83 |         <file>\tfilepath or '-' for stdin"
 84 |     );
 85 |     process::exit(1);
 86 | }
 87 | 
 88 | impl Default for Flags {
 89 |     fn default() -> Flags {
 90 |         Flags {
 91 |             files: vec![],
 92 |             operation: None,
 93 |             min_code: 8,
 94 |             bit_order: BitOrder::Msb,
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | impl Flags {
100 |     fn from_args(mut args: impl Iterator<Item = ffi::OsString>) -> Result<Self, ParamError> {
101 |         let mut flags = Flags::default();
102 |         let mut operation = None;
103 |         loop {
104 |             match args.next().as_ref().and_then(|s| s.to_str()) {
105 |                 Some("-d") | Some("--decode") => {
106 |                     if operation.is_some() {
107 |                         return Err(ParamError);
108 |                     }
109 |                     operation = Some(Operation::Decode);
110 |                 }
111 |                 Some("-e") | Some("--encode") => {
112 |                     if operation.is_some() {
113 |                         return Err(ParamError);
114 |                     }
115 |                     operation = Some(Operation::Encode);
116 |                 }
117 |                 Some("-w") | Some("--word-bits") => match args.next() {
118 |                     None => return Err(ParamError),
119 |                     Some(bits) => {
120 |                         let st = bits.to_str().ok_or(ParamError)?;
121 |                         flags.min_code = st.parse().ok().ok_or(ParamError)?;
122 |                     }
123 |                 },
124 |                 Some("-le") | Some("--little-endian") => {
125 |                     flags.bit_order = BitOrder::Lsb;
126 |                 }
127 |                 Some("-be") | Some("--big-endian") | Some("-ne") | Some("--network-endian") => {
128 |                     flags.bit_order = BitOrder::Msb;
129 |                 }
130 |                 Some("-") => {
131 |                     flags.files.push(Input::Stdin);
132 |                 }
133 |                 Some(other) if other.starts_with('-') => {
134 |                     // Reserved for future use.
135 |                     // -a: self-describing archive format, similar to actual compress
136 |                     // -b: maximum bits
137 |                     // -v: verbosity
138 |                     // some compress compatibility mode? Probably through arg(0) though.
139 |                     return Err(ParamError);
140 |                 }
141 |                 Some(file) => {
142 |                     flags.files.push(Input::File(file.into()));
143 |                 }
144 |                 None => break,
145 |             };
146 |         }
147 | 
148 |         flags.files.extend(args.map(|file| {
149 |             if let Some("-") = file.to_str() {
150 |                 Input::Stdin
151 |             } else {
152 |                 Input::File(file.into())
153 |             }
154 |         }));
155 | 
156 |         flags.operation = operation;
157 |         Ok(flags)
158 |     }
159 | }
160 | 


--------------------------------------------------------------------------------
/src/decode_into_async.rs:
--------------------------------------------------------------------------------
  1 | use crate::decode::IntoAsync;
  2 | use crate::error::LzwStatus;
  3 | use crate::error::StreamResult;
  4 | use crate::StreamBuf;
  5 | use std::io;
  6 | 
  7 | impl<'d, W: futures::io::AsyncWrite + core::marker::Unpin> IntoAsync<'d, W> {
  8 |     /// Decode data from a reader.
  9 |     ///
 10 |     /// This will read data until the stream is empty or an end marker is reached.
 11 |     pub async fn decode(&mut self, read: impl futures::io::AsyncBufRead) -> StreamResult {
 12 |         self.decode_part(read, false).await
 13 |     }
 14 | 
 15 |     /// Decode data from a reader, requiring an end marker.
 16 |     pub async fn decode_all(mut self, read: impl futures::io::AsyncBufRead) -> StreamResult {
 17 |         self.decode_part(read, true).await
 18 |     }
 19 | 
 20 |     /// Set the size of the intermediate decode buffer.
 21 |     ///
 22 |     /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is
 23 |     /// available and any decoding method is called. No buffer is allocated if `set_buffer` has
 24 |     /// been called. The buffer is reused.
 25 |     ///
 26 |     /// # Panics
 27 |     /// This method panics if `size` is `0`.
 28 |     pub fn set_buffer_size(&mut self, size: usize) {
 29 |         assert_ne!(size, 0, "Attempted to set empty buffer");
 30 |         self.default_size = size;
 31 |     }
 32 | 
 33 |     /// Use a particular buffer as an intermediate decode buffer.
 34 |     ///
 35 |     /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
 36 |     /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical
 37 |     /// for efficient decoding. Some optimization techniques require the buffer to hold one or more
 38 |     /// previous decoded words. There is also additional overhead from `write` calls each time the
 39 |     /// buffer has been filled.
 40 |     ///
 41 |     /// # Panics
 42 |     /// This method panics if the `buffer` is empty.
 43 |     pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
 44 |         assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
 45 |         self.buffer = Some(StreamBuf::Borrowed(buffer));
 46 |     }
 47 | 
 48 |     async fn decode_part(
 49 |         &mut self,
 50 |         read: impl futures::io::AsyncBufRead,
 51 |         must_finish: bool,
 52 |     ) -> StreamResult {
 53 |         use futures::io::AsyncBufReadExt;
 54 |         use futures::io::AsyncWriteExt;
 55 | 
 56 |         let IntoAsync {
 57 |             decoder,
 58 |             writer,
 59 |             buffer,
 60 |             default_size,
 61 |         } = self;
 62 | 
 63 |         futures::pin_mut!(read);
 64 |         let mut read: core::pin::Pin<_> = read;
 65 | 
 66 |         let mut bytes_read = 0;
 67 |         let mut bytes_written = 0;
 68 | 
 69 |         // Converting to mutable refs to move into the `once` closure.
 70 |         let read_bytes = &mut bytes_read;
 71 |         let write_bytes = &mut bytes_written;
 72 | 
 73 |         let outbuf: &mut [u8] =
 74 |             match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
 75 |                 StreamBuf::Borrowed(slice) => &mut *slice,
 76 |                 StreamBuf::Owned(vec) => &mut *vec,
 77 |             };
 78 |         assert!(!outbuf.is_empty());
 79 | 
 80 |         let status = loop {
 81 |             // Try to grab one buffer of input data.
 82 |             let mut filler = read.as_mut();
 83 |             let data = match filler.fill_buf().await {
 84 |                 Ok(buf) => buf,
 85 |                 Err(err) => break Err(err),
 86 |             };
 87 | 
 88 |             // Decode as much of the buffer as fits.
 89 |             let result = decoder.decode_bytes(data, &mut outbuf[..]);
 90 |             // Do the bookkeeping and consume the buffer.
 91 |             *read_bytes += result.consumed_in;
 92 |             *write_bytes += result.consumed_out;
 93 |             read.as_mut().consume(result.consumed_in);
 94 | 
 95 |             // Handle an error status in the result.
 96 |             let status = match result.status {
 97 |                 Ok(ok) => ok,
 98 |                 Err(err) => {
 99 |                     break Err(io::Error::new(
100 |                         io::ErrorKind::InvalidData,
101 |                         &*format!("{:?}", err),
102 |                     ));
103 |                 }
104 |             };
105 | 
106 |             // Check if we had any new data at all.
107 |             if let LzwStatus::NoProgress = status {
108 |                 debug_assert_eq!(
109 |                     result.consumed_out, 0,
110 |                     "No progress means we have not decoded any data"
111 |                 );
112 |                 // In particular we did not finish decoding.
113 |                 if must_finish {
114 |                     break Err(io::Error::new(
115 |                         io::ErrorKind::UnexpectedEof,
116 |                         "No more data but no end marker detected",
117 |                     ));
118 |                 } else {
119 |                     break Ok(());
120 |                 }
121 |             }
122 | 
123 |             // And finish by writing our result.
124 |             // TODO: we may lose data on error (also on status error above) which we might want to
125 |             // deterministically handle so that we don't need to restart everything from scratch as
126 |             // the only recovery strategy. Any changes welcome.
127 |             match writer.write_all(&outbuf[..result.consumed_out]).await {
128 |                 Ok(_) => {}
129 |                 Err(err) => break Err(err),
130 |             }
131 | 
132 |             if let LzwStatus::Done = status {
133 |                 break Ok(());
134 |             }
135 |         };
136 | 
137 |         StreamResult {
138 |             bytes_read,
139 |             bytes_written,
140 |             status,
141 |         }
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | [[package]]
  4 | name = "atty"
  5 | version = "0.2.14"
  6 | source = "registry+https://github.com/rust-lang/crates.io-index"
  7 | dependencies = [
  8 |  "hermit-abi 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)",
  9 |  "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)",
 10 |  "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
 11 | ]
 12 | 
 13 | [[package]]
 14 | name = "autocfg"
 15 | version = "1.1.0"
 16 | source = "registry+https://github.com/rust-lang/crates.io-index"
 17 | 
 18 | [[package]]
 19 | name = "bitflags"
 20 | version = "1.3.2"
 21 | source = "registry+https://github.com/rust-lang/crates.io-index"
 22 | 
 23 | [[package]]
 24 | name = "bstr"
 25 | version = "0.2.17"
 26 | source = "registry+https://github.com/rust-lang/crates.io-index"
 27 | dependencies = [
 28 |  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 29 |  "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
 30 |  "regex-automata 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
 31 |  "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
 32 | ]
 33 | 
 34 | [[package]]
 35 | name = "bumpalo"
 36 | version = "3.10.0"
 37 | source = "registry+https://github.com/rust-lang/crates.io-index"
 38 | 
 39 | [[package]]
 40 | name = "bytes"
 41 | version = "1.1.0"
 42 | source = "registry+https://github.com/rust-lang/crates.io-index"
 43 | 
 44 | [[package]]
 45 | name = "cast"
 46 | version = "0.2.7"
 47 | source = "registry+https://github.com/rust-lang/crates.io-index"
 48 | dependencies = [
 49 |  "rustc_version 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 50 | ]
 51 | 
 52 | [[package]]
 53 | name = "cast"
 54 | version = "0.3.0"
 55 | source = "registry+https://github.com/rust-lang/crates.io-index"
 56 | 
 57 | [[package]]
 58 | name = "cfg-if"
 59 | version = "1.0.0"
 60 | source = "registry+https://github.com/rust-lang/crates.io-index"
 61 | 
 62 | [[package]]
 63 | name = "clap"
 64 | version = "2.34.0"
 65 | source = "registry+https://github.com/rust-lang/crates.io-index"
 66 | dependencies = [
 67 |  "bitflags 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 68 |  "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
 69 |  "unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
 70 | ]
 71 | 
 72 | [[package]]
 73 | name = "criterion"
 74 | version = "0.3.6"
 75 | source = "registry+https://github.com/rust-lang/crates.io-index"
 76 | dependencies = [
 77 |  "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
 78 |  "cast 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 79 |  "clap 2.34.0 (registry+https://github.com/rust-lang/crates.io-index)",
 80 |  "criterion-plot 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
 81 |  "csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 82 |  "itertools 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
 83 |  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 84 |  "num-traits 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
 85 |  "oorandom 11.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 86 |  "plotters 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 87 |  "rayon 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
 88 |  "regex 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
 89 |  "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
 90 |  "serde_cbor 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)",
 91 |  "serde_derive 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
 92 |  "serde_json 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)",
 93 |  "tinytemplate 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 94 |  "walkdir 2.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 95 | ]
 96 | 
 97 | [[package]]
 98 | name = "criterion-plot"
 99 | version = "0.4.4"
100 | source = "registry+https://github.com/rust-lang/crates.io-index"
101 | dependencies = [
102 |  "cast 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
103 |  "itertools 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
104 | ]
105 | 
106 | [[package]]
107 | name = "crossbeam-channel"
108 | version = "0.5.5"
109 | source = "registry+https://github.com/rust-lang/crates.io-index"
110 | dependencies = [
111 |  "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
112 |  "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)",
113 | ]
114 | 
115 | [[package]]
116 | name = "crossbeam-deque"
117 | version = "0.8.1"
118 | source = "registry+https://github.com/rust-lang/crates.io-index"
119 | dependencies = [
120 |  "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
121 |  "crossbeam-epoch 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)",
122 |  "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)",
123 | ]
124 | 
125 | [[package]]
126 | name = "crossbeam-epoch"
127 | version = "0.9.9"
128 | source = "registry+https://github.com/rust-lang/crates.io-index"
129 | dependencies = [
130 |  "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
131 |  "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
132 |  "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)",
133 |  "memoffset 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
134 |  "once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
135 |  "scopeguard 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
136 | ]
137 | 
138 | [[package]]
139 | name = "crossbeam-utils"
140 | version = "0.8.10"
141 | source = "registry+https://github.com/rust-lang/crates.io-index"
142 | dependencies = [
143 |  "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
144 |  "once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
145 | ]
146 | 
147 | [[package]]
148 | name = "csv"
149 | version = "1.1.6"
150 | source = "registry+https://github.com/rust-lang/crates.io-index"
151 | dependencies = [
152 |  "bstr 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
153 |  "csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
154 |  "itoa 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
155 |  "ryu 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)",
156 |  "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
157 | ]
158 | 
159 | [[package]]
160 | name = "csv-core"
161 | version = "0.1.10"
162 | source = "registry+https://github.com/rust-lang/crates.io-index"
163 | dependencies = [
164 |  "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
165 | ]
166 | 
167 | [[package]]
168 | name = "either"
169 | version = "1.7.0"
170 | source = "registry+https://github.com/rust-lang/crates.io-index"
171 | 
172 | [[package]]
173 | name = "futures"
174 | version = "0.3.21"
175 | source = "registry+https://github.com/rust-lang/crates.io-index"
176 | dependencies = [
177 |  "futures-channel 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
178 |  "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
179 |  "futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
180 |  "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
181 |  "futures-task 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
182 |  "futures-util 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
183 | ]
184 | 
185 | [[package]]
186 | name = "futures-channel"
187 | version = "0.3.21"
188 | source = "registry+https://github.com/rust-lang/crates.io-index"
189 | dependencies = [
190 |  "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
191 |  "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
192 | ]
193 | 
194 | [[package]]
195 | name = "futures-core"
196 | version = "0.3.21"
197 | source = "registry+https://github.com/rust-lang/crates.io-index"
198 | 
199 | [[package]]
200 | name = "futures-io"
201 | version = "0.3.21"
202 | source = "registry+https://github.com/rust-lang/crates.io-index"
203 | 
204 | [[package]]
205 | name = "futures-sink"
206 | version = "0.3.21"
207 | source = "registry+https://github.com/rust-lang/crates.io-index"
208 | 
209 | [[package]]
210 | name = "futures-task"
211 | version = "0.3.21"
212 | source = "registry+https://github.com/rust-lang/crates.io-index"
213 | 
214 | [[package]]
215 | name = "futures-util"
216 | version = "0.3.21"
217 | source = "registry+https://github.com/rust-lang/crates.io-index"
218 | dependencies = [
219 |  "futures-channel 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
220 |  "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
221 |  "futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
222 |  "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
223 |  "futures-task 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
224 |  "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
225 |  "pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
226 |  "pin-utils 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
227 |  "slab 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
228 | ]
229 | 
230 | [[package]]
231 | name = "half"
232 | version = "1.8.2"
233 | source = "registry+https://github.com/rust-lang/crates.io-index"
234 | 
235 | [[package]]
236 | name = "hermit-abi"
237 | version = "0.1.19"
238 | source = "registry+https://github.com/rust-lang/crates.io-index"
239 | dependencies = [
240 |  "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)",
241 | ]
242 | 
243 | [[package]]
244 | name = "itertools"
245 | version = "0.10.3"
246 | source = "registry+https://github.com/rust-lang/crates.io-index"
247 | dependencies = [
248 |  "either 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
249 | ]
250 | 
251 | [[package]]
252 | name = "itoa"
253 | version = "0.4.8"
254 | source = "registry+https://github.com/rust-lang/crates.io-index"
255 | 
256 | [[package]]
257 | name = "itoa"
258 | version = "1.0.2"
259 | source = "registry+https://github.com/rust-lang/crates.io-index"
260 | 
261 | [[package]]
262 | name = "js-sys"
263 | version = "0.3.58"
264 | source = "registry+https://github.com/rust-lang/crates.io-index"
265 | dependencies = [
266 |  "wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
267 | ]
268 | 
269 | [[package]]
270 | name = "lazy_static"
271 | version = "1.4.0"
272 | source = "registry+https://github.com/rust-lang/crates.io-index"
273 | 
274 | [[package]]
275 | name = "libc"
276 | version = "0.2.126"
277 | source = "registry+https://github.com/rust-lang/crates.io-index"
278 | 
279 | [[package]]
280 | name = "log"
281 | version = "0.4.17"
282 | source = "registry+https://github.com/rust-lang/crates.io-index"
283 | dependencies = [
284 |  "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
285 | ]
286 | 
287 | [[package]]
288 | name = "memchr"
289 | version = "2.5.0"
290 | source = "registry+https://github.com/rust-lang/crates.io-index"
291 | 
292 | [[package]]
293 | name = "memoffset"
294 | version = "0.6.5"
295 | source = "registry+https://github.com/rust-lang/crates.io-index"
296 | dependencies = [
297 |  "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
298 | ]
299 | 
300 | [[package]]
301 | name = "mio"
302 | version = "0.8.4"
303 | source = "registry+https://github.com/rust-lang/crates.io-index"
304 | dependencies = [
305 |  "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)",
306 |  "log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)",
307 |  "wasi 0.11.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)",
308 |  "windows-sys 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)",
309 | ]
310 | 
311 | [[package]]
312 | name = "num-traits"
313 | version = "0.2.15"
314 | source = "registry+https://github.com/rust-lang/crates.io-index"
315 | dependencies = [
316 |  "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
317 | ]
318 | 
319 | [[package]]
320 | name = "num_cpus"
321 | version = "1.13.1"
322 | source = "registry+https://github.com/rust-lang/crates.io-index"
323 | dependencies = [
324 |  "hermit-abi 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)",
325 |  "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)",
326 | ]
327 | 
328 | [[package]]
329 | name = "once_cell"
330 | version = "1.13.0"
331 | source = "registry+https://github.com/rust-lang/crates.io-index"
332 | 
333 | [[package]]
334 | name = "oorandom"
335 | version = "11.1.3"
336 | source = "registry+https://github.com/rust-lang/crates.io-index"
337 | 
338 | [[package]]
339 | name = "pin-project-lite"
340 | version = "0.2.9"
341 | source = "registry+https://github.com/rust-lang/crates.io-index"
342 | 
343 | [[package]]
344 | name = "pin-utils"
345 | version = "0.1.0"
346 | source = "registry+https://github.com/rust-lang/crates.io-index"
347 | 
348 | [[package]]
349 | name = "plotters"
350 | version = "0.3.2"
351 | source = "registry+https://github.com/rust-lang/crates.io-index"
352 | dependencies = [
353 |  "num-traits 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
354 |  "plotters-backend 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
355 |  "plotters-svg 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
356 |  "wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
357 |  "web-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)",
358 | ]
359 | 
360 | [[package]]
361 | name = "plotters-backend"
362 | version = "0.3.4"
363 | source = "registry+https://github.com/rust-lang/crates.io-index"
364 | 
365 | [[package]]
366 | name = "plotters-svg"
367 | version = "0.3.2"
368 | source = "registry+https://github.com/rust-lang/crates.io-index"
369 | dependencies = [
370 |  "plotters-backend 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
371 | ]
372 | 
373 | [[package]]
374 | name = "proc-macro2"
375 | version = "1.0.40"
376 | source = "registry+https://github.com/rust-lang/crates.io-index"
377 | dependencies = [
378 |  "unicode-ident 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
379 | ]
380 | 
381 | [[package]]
382 | name = "quote"
383 | version = "1.0.20"
384 | source = "registry+https://github.com/rust-lang/crates.io-index"
385 | dependencies = [
386 |  "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
387 | ]
388 | 
389 | [[package]]
390 | name = "rayon"
391 | version = "1.5.3"
392 | source = "registry+https://github.com/rust-lang/crates.io-index"
393 | dependencies = [
394 |  "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
395 |  "crossbeam-deque 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
396 |  "either 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
397 |  "rayon-core 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)",
398 | ]
399 | 
400 | [[package]]
401 | name = "rayon-core"
402 | version = "1.9.3"
403 | source = "registry+https://github.com/rust-lang/crates.io-index"
404 | dependencies = [
405 |  "crossbeam-channel 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
406 |  "crossbeam-deque 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
407 |  "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)",
408 |  "num_cpus 1.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
409 | ]
410 | 
411 | [[package]]
412 | name = "regex"
413 | version = "1.6.0"
414 | source = "registry+https://github.com/rust-lang/crates.io-index"
415 | dependencies = [
416 |  "regex-syntax 0.6.27 (registry+https://github.com/rust-lang/crates.io-index)",
417 | ]
418 | 
419 | [[package]]
420 | name = "regex-automata"
421 | version = "0.1.10"
422 | source = "registry+https://github.com/rust-lang/crates.io-index"
423 | 
424 | [[package]]
425 | name = "regex-syntax"
426 | version = "0.6.27"
427 | source = "registry+https://github.com/rust-lang/crates.io-index"
428 | 
429 | [[package]]
430 | name = "rustc_version"
431 | version = "0.4.0"
432 | source = "registry+https://github.com/rust-lang/crates.io-index"
433 | dependencies = [
434 |  "semver 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
435 | ]
436 | 
437 | [[package]]
438 | name = "ryu"
439 | version = "1.0.10"
440 | source = "registry+https://github.com/rust-lang/crates.io-index"
441 | 
442 | [[package]]
443 | name = "same-file"
444 | version = "1.0.6"
445 | source = "registry+https://github.com/rust-lang/crates.io-index"
446 | dependencies = [
447 |  "winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
448 | ]
449 | 
450 | [[package]]
451 | name = "scopeguard"
452 | version = "1.1.0"
453 | source = "registry+https://github.com/rust-lang/crates.io-index"
454 | 
455 | [[package]]
456 | name = "semver"
457 | version = "1.0.12"
458 | source = "registry+https://github.com/rust-lang/crates.io-index"
459 | 
460 | [[package]]
461 | name = "serde"
462 | version = "1.0.138"
463 | source = "registry+https://github.com/rust-lang/crates.io-index"
464 | 
465 | [[package]]
466 | name = "serde_cbor"
467 | version = "0.11.2"
468 | source = "registry+https://github.com/rust-lang/crates.io-index"
469 | dependencies = [
470 |  "half 1.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
471 |  "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
472 | ]
473 | 
474 | [[package]]
475 | name = "serde_derive"
476 | version = "1.0.138"
477 | source = "registry+https://github.com/rust-lang/crates.io-index"
478 | dependencies = [
479 |  "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
480 |  "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
481 |  "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
482 | ]
483 | 
484 | [[package]]
485 | name = "serde_json"
486 | version = "1.0.82"
487 | source = "registry+https://github.com/rust-lang/crates.io-index"
488 | dependencies = [
489 |  "itoa 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
490 |  "ryu 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)",
491 |  "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
492 | ]
493 | 
494 | [[package]]
495 | name = "slab"
496 | version = "0.4.6"
497 | source = "registry+https://github.com/rust-lang/crates.io-index"
498 | 
499 | [[package]]
500 | name = "socket2"
501 | version = "0.4.4"
502 | source = "registry+https://github.com/rust-lang/crates.io-index"
503 | dependencies = [
504 |  "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)",
505 |  "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
506 | ]
507 | 
508 | [[package]]
509 | name = "syn"
510 | version = "1.0.98"
511 | source = "registry+https://github.com/rust-lang/crates.io-index"
512 | dependencies = [
513 |  "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
514 |  "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
515 |  "unicode-ident 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
516 | ]
517 | 
518 | [[package]]
519 | name = "textwrap"
520 | version = "0.11.0"
521 | source = "registry+https://github.com/rust-lang/crates.io-index"
522 | dependencies = [
523 |  "unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
524 | ]
525 | 
526 | [[package]]
527 | name = "tinytemplate"
528 | version = "1.2.1"
529 | source = "registry+https://github.com/rust-lang/crates.io-index"
530 | dependencies = [
531 |  "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)",
532 |  "serde_json 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)",
533 | ]
534 | 
535 | [[package]]
536 | name = "tokio"
537 | version = "1.19.2"
538 | source = "registry+https://github.com/rust-lang/crates.io-index"
539 | dependencies = [
540 |  "bytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
541 |  "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)",
542 |  "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
543 |  "mio 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
544 |  "num_cpus 1.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
545 |  "once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
546 |  "pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
547 |  "socket2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
548 |  "tokio-macros 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
549 |  "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
550 | ]
551 | 
552 | [[package]]
553 | name = "tokio-macros"
554 | version = "1.8.0"
555 | source = "registry+https://github.com/rust-lang/crates.io-index"
556 | dependencies = [
557 |  "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
558 |  "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
559 |  "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
560 | ]
561 | 
562 | [[package]]
563 | name = "tokio-util"
564 | version = "0.6.10"
565 | source = "registry+https://github.com/rust-lang/crates.io-index"
566 | dependencies = [
567 |  "bytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
568 |  "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
569 |  "futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
570 |  "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
571 |  "log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)",
572 |  "pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
573 |  "tokio 1.19.2 (registry+https://github.com/rust-lang/crates.io-index)",
574 | ]
575 | 
576 | [[package]]
577 | name = "unicode-ident"
578 | version = "1.0.1"
579 | source = "registry+https://github.com/rust-lang/crates.io-index"
580 | 
581 | [[package]]
582 | name = "unicode-width"
583 | version = "0.1.9"
584 | source = "registry+https://github.com/rust-lang/crates.io-index"
585 | 
586 | [[package]]
587 | name = "walkdir"
588 | version = "2.3.2"
589 | source = "registry+https://github.com/rust-lang/crates.io-index"
590 | dependencies = [
591 |  "same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
592 |  "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
593 |  "winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
594 | ]
595 | 
596 | [[package]]
597 | name = "wasi"
598 | version = "0.11.0+wasi-snapshot-preview1"
599 | source = "registry+https://github.com/rust-lang/crates.io-index"
600 | 
601 | [[package]]
602 | name = "wasm-bindgen"
603 | version = "0.2.81"
604 | source = "registry+https://github.com/rust-lang/crates.io-index"
605 | dependencies = [
606 |  "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
607 |  "wasm-bindgen-macro 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
608 | ]
609 | 
610 | [[package]]
611 | name = "wasm-bindgen-backend"
612 | version = "0.2.81"
613 | source = "registry+https://github.com/rust-lang/crates.io-index"
614 | dependencies = [
615 |  "bumpalo 3.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
616 |  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
617 |  "log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)",
618 |  "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
619 |  "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
620 |  "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
621 |  "wasm-bindgen-shared 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
622 | ]
623 | 
624 | [[package]]
625 | name = "wasm-bindgen-macro"
626 | version = "0.2.81"
627 | source = "registry+https://github.com/rust-lang/crates.io-index"
628 | dependencies = [
629 |  "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
630 |  "wasm-bindgen-macro-support 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
631 | ]
632 | 
633 | [[package]]
634 | name = "wasm-bindgen-macro-support"
635 | version = "0.2.81"
636 | source = "registry+https://github.com/rust-lang/crates.io-index"
637 | dependencies = [
638 |  "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
639 |  "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
640 |  "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
641 |  "wasm-bindgen-backend 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
642 |  "wasm-bindgen-shared 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
643 | ]
644 | 
645 | [[package]]
646 | name = "wasm-bindgen-shared"
647 | version = "0.2.81"
648 | source = "registry+https://github.com/rust-lang/crates.io-index"
649 | 
650 | [[package]]
651 | name = "web-sys"
652 | version = "0.3.58"
653 | source = "registry+https://github.com/rust-lang/crates.io-index"
654 | dependencies = [
655 |  "js-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)",
656 |  "wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)",
657 | ]
658 | 
659 | [[package]]
660 | name = "weezl"
661 | version = "0.1.10"
662 | dependencies = [
663 |  "criterion 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
664 |  "futures 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)",
665 |  "tokio 1.19.2 (registry+https://github.com/rust-lang/crates.io-index)",
666 |  "tokio-util 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)",
667 | ]
668 | 
669 | [[package]]
670 | name = "winapi"
671 | version = "0.3.9"
672 | source = "registry+https://github.com/rust-lang/crates.io-index"
673 | dependencies = [
674 |  "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
675 |  "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
676 | ]
677 | 
678 | [[package]]
679 | name = "winapi-i686-pc-windows-gnu"
680 | version = "0.4.0"
681 | source = "registry+https://github.com/rust-lang/crates.io-index"
682 | 
683 | [[package]]
684 | name = "winapi-util"
685 | version = "0.1.5"
686 | source = "registry+https://github.com/rust-lang/crates.io-index"
687 | dependencies = [
688 |  "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
689 | ]
690 | 
691 | [[package]]
692 | name = "winapi-x86_64-pc-windows-gnu"
693 | version = "0.4.0"
694 | source = "registry+https://github.com/rust-lang/crates.io-index"
695 | 
696 | [[package]]
697 | name = "windows-sys"
698 | version = "0.36.1"
699 | source = "registry+https://github.com/rust-lang/crates.io-index"
700 | dependencies = [
701 |  "windows_aarch64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)",
702 |  "windows_i686_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)",
703 |  "windows_i686_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)",
704 |  "windows_x86_64_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)",
705 |  "windows_x86_64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)",
706 | ]
707 | 
708 | [[package]]
709 | name = "windows_aarch64_msvc"
710 | version = "0.36.1"
711 | source = "registry+https://github.com/rust-lang/crates.io-index"
712 | 
713 | [[package]]
714 | name = "windows_i686_gnu"
715 | version = "0.36.1"
716 | source = "registry+https://github.com/rust-lang/crates.io-index"
717 | 
718 | [[package]]
719 | name = "windows_i686_msvc"
720 | version = "0.36.1"
721 | source = "registry+https://github.com/rust-lang/crates.io-index"
722 | 
723 | [[package]]
724 | name = "windows_x86_64_gnu"
725 | version = "0.36.1"
726 | source = "registry+https://github.com/rust-lang/crates.io-index"
727 | 
728 | [[package]]
729 | name = "windows_x86_64_msvc"
730 | version = "0.36.1"
731 | source = "registry+https://github.com/rust-lang/crates.io-index"
732 | 
733 | [metadata]
734 | "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
735 | "checksum autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
736 | "checksum bitflags 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
737 | "checksum bstr 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
738 | "checksum bumpalo 3.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
739 | "checksum bytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
740 | "checksum cast 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
741 | "checksum cast 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
742 | "checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
743 | "checksum clap 2.34.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
744 | "checksum criterion 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
745 | "checksum criterion-plot 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
746 | "checksum crossbeam-channel 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c"
747 | "checksum crossbeam-deque 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
748 | "checksum crossbeam-epoch 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)" = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
749 | "checksum crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83"
750 | "checksum csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
751 | "checksum csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
752 | "checksum either 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
753 | "checksum futures 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e"
754 | "checksum futures-channel 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010"
755 | "checksum futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"
756 | "checksum futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
757 | "checksum futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868"
758 | "checksum futures-task 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"
759 | "checksum futures-util 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
760 | "checksum half 1.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
761 | "checksum hermit-abi 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)" = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
762 | "checksum itertools 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
763 | "checksum itoa 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
764 | "checksum itoa 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
765 | "checksum js-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)" = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27"
766 | "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
767 | "checksum libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)" = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
768 | "checksum log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)" = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
769 | "checksum memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
770 | "checksum memoffset 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
771 | "checksum mio 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
772 | "checksum num-traits 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)" = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
773 | "checksum num_cpus 1.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
774 | "checksum once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
775 | "checksum oorandom 11.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
776 | "checksum pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
777 | "checksum pin-utils 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
778 | "checksum plotters 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9428003b84df1496fb9d6eeee9c5f8145cb41ca375eb0dad204328888832811f"
779 | "checksum plotters-backend 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142"
780 | "checksum plotters-svg 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e0918736323d1baff32ee0eade54984f6f201ad7e97d5cfb5d6ab4a358529615"
781 | "checksum proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
782 | "checksum quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
783 | "checksum rayon 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
784 | "checksum rayon-core 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
785 | "checksum regex 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
786 | "checksum regex-automata 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
787 | "checksum regex-syntax 0.6.27 (registry+https://github.com/rust-lang/crates.io-index)" = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
788 | "checksum rustc_version 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
789 | "checksum ryu 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)" = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
790 | "checksum same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
791 | "checksum scopeguard 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
792 | "checksum semver 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)" = "a2333e6df6d6598f2b1974829f853c2b4c5f4a6e503c10af918081aa6f8564e1"
793 | "checksum serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)" = "1578c6245786b9d168c5447eeacfb96856573ca56c9d68fdcf394be134882a47"
794 | "checksum serde_cbor 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
795 | "checksum serde_derive 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)" = "023e9b1467aef8a10fb88f25611870ada9800ef7e22afce356bb0d2387b6f27c"
796 | "checksum serde_json 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)" = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7"
797 | "checksum slab 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
798 | "checksum socket2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
799 | "checksum syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)" = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
800 | "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
801 | "checksum tinytemplate 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
802 | "checksum tokio 1.19.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a52ed6686dd62c320f9b89299e9dfb46f730c7a48e635c19f21d116cb1439"
803 | "checksum tokio-macros 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
804 | "checksum tokio-util 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507"
805 | "checksum unicode-ident 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
806 | "checksum unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
807 | "checksum walkdir 2.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
808 | "checksum wasi 0.11.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
809 | "checksum wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994"
810 | "checksum wasm-bindgen-backend 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a"
811 | "checksum wasm-bindgen-macro 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa"
812 | "checksum wasm-bindgen-macro-support 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048"
813 | "checksum wasm-bindgen-shared 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be"
814 | "checksum web-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)" = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90"
815 | "checksum winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
816 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
817 | "checksum winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
818 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
819 | "checksum windows-sys 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
820 | "checksum windows_aarch64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
821 | "checksum windows_i686_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
822 | "checksum windows_i686_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
823 | "checksum windows_x86_64_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
824 | "checksum windows_x86_64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
825 | 


--------------------------------------------------------------------------------
/src/encode.rs:
--------------------------------------------------------------------------------
   1 | //! A module for all encoding needs.
   2 | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
   3 | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
   4 | 
   5 | use crate::alloc::{boxed::Box, vec::Vec};
   6 | #[cfg(feature = "std")]
   7 | use crate::error::StreamResult;
   8 | #[cfg(feature = "std")]
   9 | use std::io::{self, BufRead, Write};
  10 | 
  11 | /// The state for encoding data with an LZW algorithm.
  12 | ///
  13 | /// The same structure can be utilized with streams as well as your own buffers and driver logic.
  14 | /// It may even be possible to mix them if you are sufficiently careful not to lose any written
  15 | /// data in the process.
  16 | ///
  17 | /// This is a sans-IO implementation, meaning that it only contains the state of the encoder and
  18 | /// the caller will provide buffers for input and output data when calling the basic
  19 | /// [`encode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*`
  20 | /// methods for enoding with a particular style of common IO.
  21 | ///
  22 | /// * [`encode`] for encoding once without any IO-loop.
  23 | /// * [`into_async`] for encoding with the `futures` traits for asynchronous IO.
  24 | /// * [`into_stream`] for encoding with the standard `io` traits.
  25 | /// * [`into_vec`] for in-memory encoding.
  26 | ///
  27 | /// [`encode_bytes`]: #method.encode_bytes
  28 | /// [`encode`]: #method.encode
  29 | /// [`into_async`]: #method.into_async
  30 | /// [`into_stream`]: #method.into_stream
  31 | /// [`into_vec`]: #method.into_vec
  32 | pub struct Encoder {
  33 |     /// Internally dispatch via a dynamic trait object. This did not have any significant
  34 |     /// performance impact as we batch data internally and this pointer does not change after
  35 |     /// creation!
  36 |     state: Box<dyn Stateful + Send + 'static>,
  37 | }
  38 | 
  39 | /// A encoding stream sink.
  40 | ///
  41 | /// See [`Encoder::into_stream`] on how to create this type.
  42 | ///
  43 | /// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream
  44 | #[cfg_attr(
  45 |     not(feature = "std"),
  46 |     deprecated = "This type is only useful with the `std` feature."
  47 | )]
  48 | #[cfg_attr(not(feature = "std"), allow(dead_code))]
  49 | pub struct IntoStream<'d, W> {
  50 |     encoder: &'d mut Encoder,
  51 |     writer: W,
  52 |     buffer: Option<StreamBuf<'d>>,
  53 |     default_size: usize,
  54 | }
  55 | 
  56 | /// An async decoding sink.
  57 | ///
  58 | /// See [`Encoder::into_async`] on how to create this type.
  59 | ///
  60 | /// [`Encoder::into_async`]: struct.Encoder.html#method.into_async
  61 | #[cfg(feature = "async")]
  62 | pub struct IntoAsync<'d, W> {
  63 |     encoder: &'d mut Encoder,
  64 |     writer: W,
  65 |     buffer: Option<StreamBuf<'d>>,
  66 |     default_size: usize,
  67 | }
  68 | 
  69 | /// A encoding sink into a vector.
  70 | ///
  71 | /// See [`Encoder::into_vec`] on how to create this type.
  72 | ///
  73 | /// [`Encoder::into_vec`]: struct.Encoder.html#method.into_vec
  74 | pub struct IntoVec<'d> {
  75 |     encoder: &'d mut Encoder,
  76 |     vector: &'d mut Vec<u8>,
  77 | }
  78 | 
  79 | trait Stateful {
  80 |     fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
  81 |     fn mark_ended(&mut self) -> bool;
  82 |     /// Reset the state tracking if end code has been written.
  83 |     fn restart(&mut self);
  84 |     /// Reset the encoder to the beginning, dropping all buffers etc.
  85 |     fn reset(&mut self);
  86 | }
  87 | 
  88 | struct EncodeState<B: Buffer> {
  89 |     /// The configured minimal code size.
  90 |     min_size: u8,
  91 |     /// The current encoding symbol tree.
  92 |     tree: Tree,
  93 |     /// If we have pushed the end code.
  94 |     has_ended: bool,
  95 |     /// If tiff then bumps are a single code sooner.
  96 |     is_tiff: bool,
  97 |     /// The code corresponding to the currently read characters.
  98 |     current_code: Code,
  99 |     /// The clear code for resetting the dictionary.
 100 |     clear_code: Code,
 101 |     /// The bit buffer for encoding.
 102 |     buffer: B,
 103 | }
 104 | 
 105 | struct MsbBuffer {
 106 |     /// The current code length.
 107 |     code_size: u8,
 108 |     /// The buffer bits.
 109 |     buffer: u64,
 110 |     /// The number of valid buffer bits.
 111 |     bits_in_buffer: u8,
 112 | }
 113 | 
 114 | struct LsbBuffer {
 115 |     /// The current code length.
 116 |     code_size: u8,
 117 |     /// The buffer bits.
 118 |     buffer: u64,
 119 |     /// The number of valid buffer bits.
 120 |     bits_in_buffer: u8,
 121 | }
 122 | 
 123 | trait Buffer {
 124 |     fn new(size: u8) -> Self;
 125 |     /// Reset the code size in the buffer.
 126 |     fn reset(&mut self, min_size: u8);
 127 |     /// Apply effects of a Clear Code.
 128 |     fn clear(&mut self, min_size: u8);
 129 |     /// Insert a code into the buffer.
 130 |     fn buffer_code(&mut self, code: Code);
 131 |     /// Push bytes if the buffer space is getting small.
 132 |     fn push_out(&mut self, out: &mut &mut [u8]) -> bool;
 133 |     /// Flush all full bytes, returning if at least one more byte remains.
 134 |     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool;
 135 |     /// Pad the buffer to a full byte.
 136 |     fn buffer_pad(&mut self);
 137 |     /// Increase the maximum code size.
 138 |     fn bump_code_size(&mut self);
 139 |     /// Return the maximum code with the current code size.
 140 |     fn max_code(&self) -> Code;
 141 |     /// Return the current code size in bits.
 142 |     fn code_size(&self) -> u8;
 143 | }
 144 | 
 145 | /// One tree node for at most each code.
 146 | /// To avoid using too much memory we keep nodes with few successors in optimized form. This form
 147 | /// doesn't offer lookup by indexing but instead does a linear search.
 148 | #[derive(Default)]
 149 | struct Tree {
 150 |     simples: Vec<Simple>,
 151 |     complex: Vec<Full>,
 152 |     keys: Vec<CompressedKey>,
 153 | }
 154 | 
 155 | #[derive(Clone, Copy)]
 156 | enum FullKey {
 157 |     NoSuccessor,
 158 |     Simple(u16),
 159 |     Full(u16),
 160 | }
 161 | 
 162 | #[derive(Clone, Copy)]
 163 | struct CompressedKey(u16);
 164 | 
 165 | const SHORT: usize = 16;
 166 | 
 167 | #[derive(Clone, Copy)]
 168 | struct Simple {
 169 |     codes: [Code; SHORT],
 170 |     chars: [u8; SHORT],
 171 |     count: u8,
 172 | }
 173 | 
 174 | #[derive(Clone, Copy)]
 175 | struct Full {
 176 |     char_continuation: [Code; 256],
 177 | }
 178 | 
 179 | /// Describes the static parameters for creating a decoder.
 180 | #[derive(Clone, Debug)]
 181 | pub struct Configuration {
 182 |     order: BitOrder,
 183 |     size: u8,
 184 |     tiff: bool,
 185 | }
 186 | 
 187 | impl Configuration {
 188 |     /// Create a configuration to decode with the specified bit order and symbol size.
 189 |     ///
 190 |     /// # Panics
 191 |     ///
 192 |     /// The `size` needs to be in the interval `2..=12`.
 193 |     pub fn new(order: BitOrder, size: u8) -> Self {
 194 |         super::assert_encode_size(size);
 195 |         Configuration {
 196 |             order,
 197 |             size,
 198 |             tiff: false,
 199 |         }
 200 |     }
 201 | 
 202 |     /// Create a configuration for a TIFF compatible decoder.
 203 |     ///
 204 |     /// # Panics
 205 |     ///
 206 |     /// The `size` needs to be in the interval `2..=12`.
 207 |     pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
 208 |         super::assert_encode_size(size);
 209 |         Configuration {
 210 |             order,
 211 |             size,
 212 |             tiff: true,
 213 |         }
 214 |     }
 215 | 
 216 |     /// Create a new decoder with the define configuration.
 217 |     pub fn build(self) -> Encoder {
 218 |         Encoder {
 219 |             state: Encoder::from_configuration(&self),
 220 |         }
 221 |     }
 222 | }
 223 | 
 224 | impl Encoder {
 225 |     /// Create a new encoder with the specified bit order and symbol size.
 226 |     ///
 227 |     /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
 228 |     /// original specification. In particular you will need to specify an `Lsb` bit oder to encode
 229 |     /// the data portion of a compressed `gif` image.
 230 |     ///
 231 |     /// # Panics
 232 |     ///
 233 |     /// The `size` needs to be in the interval `2..=12`.
 234 |     pub fn new(order: BitOrder, size: u8) -> Self {
 235 |         Configuration::new(order, size).build()
 236 |     }
 237 | 
 238 |     /// Create a TIFF compatible encoder with the specified bit order and symbol size.
 239 |     ///
 240 |     /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
 241 |     /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
 242 |     /// the code size. It switches one symbol sooner.
 243 |     ///
 244 |     /// # Panics
 245 |     ///
 246 |     /// The `size` needs to be in the interval `2..=12`.
 247 |     pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
 248 |         Configuration::with_tiff_size_switch(order, size).build()
 249 |     }
 250 | 
 251 |     fn from_configuration(cfg: &Configuration) -> Box<dyn Stateful + Send + 'static> {
 252 |         match cfg.order {
 253 |             BitOrder::Lsb => {
 254 |                 let mut state = EncodeState::<LsbBuffer>::new(cfg.size);
 255 |                 state.is_tiff = cfg.tiff;
 256 |                 Box::new(state)
 257 |             }
 258 |             BitOrder::Msb => {
 259 |                 let mut state = EncodeState::<MsbBuffer>::new(cfg.size);
 260 |                 state.is_tiff = cfg.tiff;
 261 |                 Box::new(state)
 262 |             }
 263 |         }
 264 |     }
 265 | 
 266 |     /// Encode some bytes from `inp` into `out`.
 267 |     ///
 268 |     /// See [`into_stream`] for high-level functions (this interface is only available with the
 269 |     /// `std` feature) and [`finish`] for marking the input data as complete.
 270 |     ///
 271 |     /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and
 272 |     /// all following ones will _not_ be consumed and the `status` of the result will signal an
 273 |     /// error. The result will also indicate that all bytes up to but not including the offending
 274 |     /// byte have been consumed. You may try again with a fixed byte.
 275 |     ///
 276 |     /// [`into_stream`]: #method.into_stream
 277 |     /// [`finish`]: #method.finish
 278 |     pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
 279 |         self.state.advance(inp, out)
 280 |     }
 281 | 
 282 |     /// Encode a single chunk of data.
 283 |     ///
 284 |     /// This method will add an end marker to the encoded chunk.
 285 |     ///
 286 |     /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
 287 |     /// buffer size, to supply an existing vector, to control whether an end marker is required, or
 288 |     /// to preserve partial data in the case of a decoding error.
 289 |     ///
 290 |     /// [`into_vec`]: #into_vec
 291 |     ///
 292 |     /// # Example
 293 |     ///
 294 |     /// ```
 295 |     /// use weezl::{BitOrder, encode::Encoder};
 296 |     ///
 297 |     /// let data = b"Hello, world";
 298 |     /// let encoded = Encoder::new(BitOrder::Msb, 9)
 299 |     ///     .encode(data)
 300 |     ///     .expect("All bytes valid for code size");
 301 |     /// ```
 302 |     pub fn encode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
 303 |         let mut output = Vec::new();
 304 |         self.into_vec(&mut output).encode_all(data).status?;
 305 |         Ok(output)
 306 |     }
 307 | 
 308 |     /// Construct a encoder into a writer.
 309 |     #[cfg(feature = "std")]
 310 |     pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
 311 |         IntoStream {
 312 |             encoder: self,
 313 |             writer,
 314 |             buffer: None,
 315 |             default_size: STREAM_BUF_SIZE,
 316 |         }
 317 |     }
 318 | 
 319 |     /// Construct a encoder into an async writer.
 320 |     #[cfg(feature = "async")]
 321 |     pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
 322 |         IntoAsync {
 323 |             encoder: self,
 324 |             writer,
 325 |             buffer: None,
 326 |             default_size: STREAM_BUF_SIZE,
 327 |         }
 328 |     }
 329 | 
 330 |     /// Construct an encoder into a vector.
 331 |     ///
 332 |     /// All encoded data is appended and the vector is __not__ cleared.
 333 |     ///
 334 |     /// Compared to `into_stream` this interface allows a high-level access to encoding without
 335 |     /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
 336 |     /// special target exposes.
 337 |     pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
 338 |         IntoVec {
 339 |             encoder: self,
 340 |             vector: vec,
 341 |         }
 342 |     }
 343 | 
 344 |     /// Mark the encoding as in the process of finishing.
 345 |     ///
 346 |     /// The next following call to `encode_bytes` which is able to consume the complete input will
 347 |     /// also try to emit an end code. It's not recommended, but also not unsound, to use different
 348 |     /// byte slices in different calls from this point forward and thus to 'delay' the actual end
 349 |     /// of the data stream. The behaviour after the end marker has been written is unspecified but
 350 |     /// sound.
 351 |     pub fn finish(&mut self) {
 352 |         self.state.mark_ended();
 353 |     }
 354 | 
 355 |     /// Undo marking this data stream as ending.
 356 |     /// FIXME: clarify how this interacts with padding introduced after end code.
 357 |     #[allow(dead_code)]
 358 |     pub(crate) fn restart(&mut self) {
 359 |         self.state.restart()
 360 |     }
 361 | 
 362 |     /// Reset all internal state.
 363 |     ///
 364 |     /// This produce an encoder as if just constructed with `new` but taking slightly less work. In
 365 |     /// particular it will not deallocate any internal allocations. It will also avoid some
 366 |     /// duplicate setup work.
 367 |     pub fn reset(&mut self) {
 368 |         self.state.reset()
 369 |     }
 370 | }
 371 | 
 372 | #[cfg(feature = "std")]
 373 | impl<'d, W: Write> IntoStream<'d, W> {
 374 |     /// Encode data from a reader.
 375 |     ///
 376 |     /// This will drain the supplied reader. It will not encode an end marker after all data has
 377 |     /// been processed.
 378 |     pub fn encode(&mut self, read: impl BufRead) -> StreamResult {
 379 |         self.encode_part(read, false)
 380 |     }
 381 | 
 382 |     /// Encode data from a reader and an end marker.
 383 |     pub fn encode_all(mut self, read: impl BufRead) -> StreamResult {
 384 |         self.encode_part(read, true)
 385 |     }
 386 | 
 387 |     /// Set the size of the intermediate encode buffer.
 388 |     ///
 389 |     /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is
 390 |     /// available and any encoding method is called. No buffer is allocated if `set_buffer` has
 391 |     /// been called. The buffer is reused.
 392 |     ///
 393 |     /// # Panics
 394 |     /// This method panics if `size` is `0`.
 395 |     pub fn set_buffer_size(&mut self, size: usize) {
 396 |         assert_ne!(size, 0, "Attempted to set empty buffer");
 397 |         self.default_size = size;
 398 |     }
 399 | 
 400 |     /// Use a particular buffer as an intermediate encode buffer.
 401 |     ///
 402 |     /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
 403 |     /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant
 404 |     /// for efficient encoding as there is additional overhead from `write` calls each time the
 405 |     /// buffer has been filled.
 406 |     ///
 407 |     /// # Panics
 408 |     /// This method panics if the `buffer` is empty.
 409 |     pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
 410 |         assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
 411 |         self.buffer = Some(StreamBuf::Borrowed(buffer));
 412 |     }
 413 | 
 414 |     fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult {
 415 |         let IntoStream {
 416 |             encoder,
 417 |             writer,
 418 |             buffer,
 419 |             default_size,
 420 |         } = self;
 421 |         enum Progress {
 422 |             Ok,
 423 |             Done,
 424 |         }
 425 | 
 426 |         let mut bytes_read = 0;
 427 |         let mut bytes_written = 0;
 428 | 
 429 |         let read_bytes = &mut bytes_read;
 430 |         let write_bytes = &mut bytes_written;
 431 | 
 432 |         let outbuf: &mut [u8] =
 433 |             match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
 434 |                 StreamBuf::Borrowed(slice) => &mut *slice,
 435 |                 StreamBuf::Owned(vec) => &mut *vec,
 436 |             };
 437 |         assert!(!outbuf.is_empty());
 438 | 
 439 |         let once = move || {
 440 |             let data = read.fill_buf()?;
 441 | 
 442 |             if data.is_empty() {
 443 |                 if finish {
 444 |                     encoder.finish();
 445 |                 } else {
 446 |                     return Ok(Progress::Done);
 447 |                 }
 448 |             }
 449 | 
 450 |             let result = encoder.encode_bytes(data, &mut outbuf[..]);
 451 |             *read_bytes += result.consumed_in;
 452 |             *write_bytes += result.consumed_out;
 453 |             read.consume(result.consumed_in);
 454 | 
 455 |             let done = result.status.map_err(|err| {
 456 |                 io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
 457 |             })?;
 458 | 
 459 |             if let LzwStatus::Done = done {
 460 |                 writer.write_all(&outbuf[..result.consumed_out])?;
 461 |                 return Ok(Progress::Done);
 462 |             }
 463 | 
 464 |             if let LzwStatus::NoProgress = done {
 465 |                 return Err(io::Error::new(
 466 |                     io::ErrorKind::UnexpectedEof,
 467 |                     "No more data but no end marker detected",
 468 |                 ));
 469 |             }
 470 | 
 471 |             writer.write_all(&outbuf[..result.consumed_out])?;
 472 |             Ok(Progress::Ok)
 473 |         };
 474 | 
 475 |         let status = core::iter::repeat_with(once)
 476 |             // scan+fuse can be replaced with map_while
 477 |             .scan((), |(), result| match result {
 478 |                 Ok(Progress::Ok) => Some(Ok(())),
 479 |                 Err(err) => Some(Err(err)),
 480 |                 Ok(Progress::Done) => None,
 481 |             })
 482 |             .fuse()
 483 |             .collect();
 484 | 
 485 |         StreamResult {
 486 |             bytes_read,
 487 |             bytes_written,
 488 |             status,
 489 |         }
 490 |     }
 491 | }
 492 | 
 493 | impl IntoVec<'_> {
 494 |     /// Encode data from a slice.
 495 |     pub fn encode(&mut self, read: &[u8]) -> VectorResult {
 496 |         self.encode_part(read, false)
 497 |     }
 498 | 
 499 |     /// Decode data from a reader, adding an end marker.
 500 |     pub fn encode_all(mut self, read: &[u8]) -> VectorResult {
 501 |         self.encode_part(read, true)
 502 |     }
 503 | 
 504 |     fn grab_buffer(&mut self) -> (&mut [u8], &mut Encoder) {
 505 |         const CHUNK_SIZE: usize = 1 << 12;
 506 |         let decoder = &mut self.encoder;
 507 |         let length = self.vector.len();
 508 | 
 509 |         // Use the vector to do overflow checks and w/e.
 510 |         self.vector.reserve(CHUNK_SIZE);
 511 |         // FIXME: encoding into uninit buffer?
 512 |         self.vector.resize(length + CHUNK_SIZE, 0u8);
 513 | 
 514 |         (&mut self.vector[length..], decoder)
 515 |     }
 516 | 
 517 |     fn encode_part(&mut self, part: &[u8], finish: bool) -> VectorResult {
 518 |         let mut result = VectorResult {
 519 |             consumed_in: 0,
 520 |             consumed_out: 0,
 521 |             status: Ok(LzwStatus::Ok),
 522 |         };
 523 | 
 524 |         enum Progress {
 525 |             Ok,
 526 |             Done,
 527 |         }
 528 | 
 529 |         // Converting to mutable refs to move into the `once` closure.
 530 |         let read_bytes = &mut result.consumed_in;
 531 |         let write_bytes = &mut result.consumed_out;
 532 |         let mut data = part;
 533 | 
 534 |         // A 64 MB buffer is quite large but should get alloc_zeroed.
 535 |         // Note that the decoded size can be up to quadratic in code block.
 536 |         let once = move || {
 537 |             // Grab a new output buffer.
 538 |             let (outbuf, encoder) = self.grab_buffer();
 539 | 
 540 |             if finish {
 541 |                 encoder.finish();
 542 |             }
 543 | 
 544 |             // Decode as much of the buffer as fits.
 545 |             let result = encoder.encode_bytes(data, &mut outbuf[..]);
 546 |             // Do the bookkeeping and consume the buffer.
 547 |             *read_bytes += result.consumed_in;
 548 |             *write_bytes += result.consumed_out;
 549 |             data = &data[result.consumed_in..];
 550 | 
 551 |             let unfilled = outbuf.len() - result.consumed_out;
 552 |             let filled = self.vector.len() - unfilled;
 553 |             self.vector.truncate(filled);
 554 | 
 555 |             // Handle the status in the result.
 556 |             let done = result.status?;
 557 |             if let LzwStatus::Done = done {
 558 |                 Ok(Progress::Done)
 559 |             } else {
 560 |                 Ok(Progress::Ok)
 561 |             }
 562 |         };
 563 | 
 564 |         // Decode chunks of input data until we're done.
 565 |         let status: Result<(), _> = core::iter::repeat_with(once)
 566 |             // scan+fuse can be replaced with map_while
 567 |             .scan((), |(), result| match result {
 568 |                 Ok(Progress::Ok) => Some(Ok(())),
 569 |                 Err(err) => Some(Err(err)),
 570 |                 Ok(Progress::Done) => None,
 571 |             })
 572 |             .fuse()
 573 |             .collect();
 574 | 
 575 |         if let Err(err) = status {
 576 |             result.status = Err(err);
 577 |         }
 578 | 
 579 |         result
 580 |     }
 581 | }
 582 | 
 583 | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
 584 | // trip over the usage of await, which is a reserved keyword in that edition/version. It only
 585 | // contains an impl block.
 586 | #[cfg(feature = "async")]
 587 | #[path = "encode_into_async.rs"]
 588 | mod impl_encode_into_async;
 589 | 
 590 | impl<B: Buffer> EncodeState<B> {
 591 |     fn new(min_size: u8) -> Self {
 592 |         let clear_code = 1 << min_size;
 593 |         let mut tree = Tree::default();
 594 |         tree.init(min_size);
 595 |         let mut state = EncodeState {
 596 |             min_size,
 597 |             tree,
 598 |             has_ended: false,
 599 |             is_tiff: false,
 600 |             current_code: clear_code,
 601 |             clear_code,
 602 |             buffer: B::new(min_size),
 603 |         };
 604 |         state.buffer_code(clear_code);
 605 |         state
 606 |     }
 607 | }
 608 | 
 609 | impl<B: Buffer> Stateful for EncodeState<B> {
 610 |     fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
 611 |         let c_in = inp.len();
 612 |         let c_out = out.len();
 613 |         let mut status = Ok(LzwStatus::Ok);
 614 | 
 615 |         'encoding: loop {
 616 |             if self.push_out(&mut out) {
 617 |                 break;
 618 |             }
 619 | 
 620 |             if inp.is_empty() && self.has_ended {
 621 |                 let end = self.end_code();
 622 |                 if self.current_code != end {
 623 |                     if self.current_code != self.clear_code {
 624 |                         self.buffer_code(self.current_code);
 625 | 
 626 |                         // When reading this code, the decoder will add an extra entry to its table
 627 |                         // before reading th end code. Thusly, it may increase its code size based
 628 |                         // on this additional entry.
 629 |                         if self.tree.keys.len() + usize::from(self.is_tiff)
 630 |                             > usize::from(self.buffer.max_code())
 631 |                             && self.buffer.code_size() < MAX_CODESIZE
 632 |                         {
 633 |                             self.buffer.bump_code_size();
 634 |                         }
 635 |                     }
 636 |                     self.buffer_code(end);
 637 |                     self.current_code = end;
 638 |                     self.buffer_pad();
 639 |                 }
 640 | 
 641 |                 break;
 642 |             }
 643 | 
 644 |             let mut next_code = None;
 645 |             let mut bytes = inp.iter();
 646 |             while let Some(&byte) = bytes.next() {
 647 |                 if self.min_size < 8 && byte >= 1 << self.min_size {
 648 |                     status = Err(LzwError::InvalidCode);
 649 |                     break 'encoding;
 650 |                 }
 651 | 
 652 |                 inp = bytes.as_slice();
 653 |                 match self.tree.iterate(self.current_code, byte) {
 654 |                     Ok(code) => self.current_code = code,
 655 |                     Err(_) => {
 656 |                         next_code = Some(self.current_code);
 657 | 
 658 |                         self.current_code = u16::from(byte);
 659 |                         break;
 660 |                     }
 661 |                 }
 662 |             }
 663 | 
 664 |             match next_code {
 665 |                 // No more bytes, no code produced.
 666 |                 None => break,
 667 |                 Some(code) => {
 668 |                     self.buffer_code(code);
 669 | 
 670 |                     if self.tree.keys.len() + usize::from(self.is_tiff)
 671 |                         > usize::from(self.buffer.max_code()) + 1
 672 |                         && self.buffer.code_size() < MAX_CODESIZE
 673 |                     {
 674 |                         self.buffer.bump_code_size();
 675 |                     }
 676 | 
 677 |                     if self.tree.keys.len() > MAX_ENTRIES {
 678 |                         self.buffer_code(self.clear_code);
 679 |                         self.tree.reset(self.min_size);
 680 |                         self.buffer.clear(self.min_size);
 681 |                     }
 682 |                 }
 683 |             }
 684 |         }
 685 | 
 686 |         if inp.is_empty() && self.current_code == self.end_code() {
 687 |             if !self.flush_out(&mut out) {
 688 |                 status = Ok(LzwStatus::Done);
 689 |             }
 690 |         }
 691 | 
 692 |         BufferResult {
 693 |             consumed_in: c_in - inp.len(),
 694 |             consumed_out: c_out - out.len(),
 695 |             status,
 696 |         }
 697 |     }
 698 | 
 699 |     fn mark_ended(&mut self) -> bool {
 700 |         core::mem::replace(&mut self.has_ended, true)
 701 |     }
 702 | 
 703 |     fn restart(&mut self) {
 704 |         self.has_ended = false;
 705 |     }
 706 | 
 707 |     fn reset(&mut self) {
 708 |         self.restart();
 709 |         self.current_code = self.clear_code;
 710 |         self.tree.reset(self.min_size);
 711 |         self.buffer.reset(self.min_size);
 712 |         self.buffer_code(self.clear_code);
 713 |     }
 714 | }
 715 | 
 716 | impl<B: Buffer> EncodeState<B> {
 717 |     fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
 718 |         self.buffer.push_out(out)
 719 |     }
 720 | 
 721 |     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
 722 |         self.buffer.flush_out(out)
 723 |     }
 724 | 
 725 |     fn end_code(&self) -> Code {
 726 |         self.clear_code + 1
 727 |     }
 728 | 
 729 |     fn buffer_pad(&mut self) {
 730 |         self.buffer.buffer_pad();
 731 |     }
 732 | 
 733 |     fn buffer_code(&mut self, code: Code) {
 734 |         self.buffer.buffer_code(code);
 735 |     }
 736 | }
 737 | 
 738 | impl Buffer for MsbBuffer {
 739 |     fn new(min_size: u8) -> Self {
 740 |         MsbBuffer {
 741 |             code_size: min_size + 1,
 742 |             buffer: 0,
 743 |             bits_in_buffer: 0,
 744 |         }
 745 |     }
 746 | 
 747 |     fn reset(&mut self, min_size: u8) {
 748 |         self.code_size = min_size + 1;
 749 |         self.buffer = 0;
 750 |         self.bits_in_buffer = 0;
 751 |     }
 752 | 
 753 |     fn clear(&mut self, min_size: u8) {
 754 |         self.code_size = min_size + 1;
 755 |     }
 756 | 
 757 |     fn buffer_code(&mut self, code: Code) {
 758 |         let shift = 64 - self.bits_in_buffer - self.code_size;
 759 |         self.buffer |= u64::from(code) << shift;
 760 |         self.bits_in_buffer += self.code_size;
 761 |     }
 762 | 
 763 |     fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
 764 |         if self.bits_in_buffer + 2 * self.code_size < 64 {
 765 |             return false;
 766 |         }
 767 | 
 768 |         self.flush_out(out)
 769 |     }
 770 | 
 771 |     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
 772 |         let want = usize::from(self.bits_in_buffer / 8);
 773 |         let count = want.min((*out).len());
 774 |         let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
 775 |         *out = tail;
 776 | 
 777 |         for b in bytes {
 778 |             *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8;
 779 |             self.buffer <<= 8;
 780 |             self.bits_in_buffer -= 8;
 781 |         }
 782 | 
 783 |         count < want
 784 |     }
 785 | 
 786 |     fn buffer_pad(&mut self) {
 787 |         let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
 788 |         self.bits_in_buffer += to_byte;
 789 |     }
 790 | 
 791 |     fn bump_code_size(&mut self) {
 792 |         self.code_size += 1;
 793 |     }
 794 | 
 795 |     fn max_code(&self) -> Code {
 796 |         (1 << self.code_size) - 1
 797 |     }
 798 | 
 799 |     fn code_size(&self) -> u8 {
 800 |         self.code_size
 801 |     }
 802 | }
 803 | 
 804 | impl Buffer for LsbBuffer {
 805 |     fn new(min_size: u8) -> Self {
 806 |         LsbBuffer {
 807 |             code_size: min_size + 1,
 808 |             buffer: 0,
 809 |             bits_in_buffer: 0,
 810 |         }
 811 |     }
 812 | 
 813 |     fn reset(&mut self, min_size: u8) {
 814 |         self.code_size = min_size + 1;
 815 |         self.buffer = 0;
 816 |         self.bits_in_buffer = 0;
 817 |     }
 818 | 
 819 |     fn clear(&mut self, min_size: u8) {
 820 |         self.code_size = min_size + 1;
 821 |     }
 822 | 
 823 |     fn buffer_code(&mut self, code: Code) {
 824 |         self.buffer |= u64::from(code) << self.bits_in_buffer;
 825 |         self.bits_in_buffer += self.code_size;
 826 |     }
 827 | 
 828 |     fn push_out(&mut self, out: &mut &mut [u8]) -> bool {
 829 |         if self.bits_in_buffer + 2 * self.code_size < 64 {
 830 |             return false;
 831 |         }
 832 | 
 833 |         self.flush_out(out)
 834 |     }
 835 | 
 836 |     fn flush_out(&mut self, out: &mut &mut [u8]) -> bool {
 837 |         let want = usize::from(self.bits_in_buffer / 8);
 838 |         let count = want.min((*out).len());
 839 |         let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count);
 840 |         *out = tail;
 841 | 
 842 |         for b in bytes {
 843 |             *b = (self.buffer & 0x0000_0000_0000_00ff) as u8;
 844 |             self.buffer >>= 8;
 845 |             self.bits_in_buffer -= 8;
 846 |         }
 847 | 
 848 |         count < want
 849 |     }
 850 | 
 851 |     fn buffer_pad(&mut self) {
 852 |         let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7;
 853 |         self.bits_in_buffer += to_byte;
 854 |     }
 855 | 
 856 |     fn bump_code_size(&mut self) {
 857 |         self.code_size += 1;
 858 |     }
 859 | 
 860 |     fn max_code(&self) -> Code {
 861 |         (1 << self.code_size) - 1
 862 |     }
 863 | 
 864 |     fn code_size(&self) -> u8 {
 865 |         self.code_size
 866 |     }
 867 | }
 868 | 
 869 | impl Tree {
 870 |     fn init(&mut self, min_size: u8) {
 871 |         // We need a way to represent the state of a currently empty buffer. We use the clear code
 872 |         // for this, thus create one complex mapping that leads to the one-char base codes.
 873 |         self.keys
 874 |             .resize((1 << min_size) + 2, FullKey::NoSuccessor.into());
 875 |         self.complex.push(Full {
 876 |             char_continuation: [0; 256],
 877 |         });
 878 |         let map_of_begin = self.complex.last_mut().unwrap();
 879 |         for ch in 0u16..256 {
 880 |             map_of_begin.char_continuation[usize::from(ch)] = ch;
 881 |         }
 882 |         self.keys[1 << min_size] = FullKey::Full(0).into();
 883 |     }
 884 | 
 885 |     fn reset(&mut self, min_size: u8) {
 886 |         self.simples.clear();
 887 |         self.keys.truncate((1 << min_size) + 2);
 888 |         // Keep entry for clear code.
 889 |         self.complex.truncate(1);
 890 |         // The first complex is not changed..
 891 |         for k in self.keys[..(1 << min_size) + 2].iter_mut() {
 892 |             *k = FullKey::NoSuccessor.into();
 893 |         }
 894 |         self.keys[1 << min_size] = FullKey::Full(0).into();
 895 |     }
 896 | 
 897 |     fn at_key(&self, code: Code, ch: u8) -> Option<Code> {
 898 |         let key = self.keys[usize::from(code)];
 899 |         match FullKey::from(key) {
 900 |             FullKey::NoSuccessor => None,
 901 |             FullKey::Simple(idx) => {
 902 |                 let nexts = &self.simples[usize::from(idx)];
 903 |                 let successors = nexts
 904 |                     .codes
 905 |                     .iter()
 906 |                     .zip(nexts.chars.iter())
 907 |                     .take(usize::from(nexts.count));
 908 |                 for (&scode, &sch) in successors {
 909 |                     if sch == ch {
 910 |                         return Some(scode);
 911 |                     }
 912 |                 }
 913 | 
 914 |                 None
 915 |             }
 916 |             FullKey::Full(idx) => {
 917 |                 let full = &self.complex[usize::from(idx)];
 918 |                 let precode = full.char_continuation[usize::from(ch)];
 919 |                 if usize::from(precode) < MAX_ENTRIES {
 920 |                     Some(precode)
 921 |                 } else {
 922 |                     None
 923 |                 }
 924 |             }
 925 |         }
 926 |     }
 927 | 
 928 |     /// Iterate to the next char.
 929 |     /// Return Ok when it was already in the tree or creates a new entry for it and returns Err.
 930 |     fn iterate(&mut self, code: Code, ch: u8) -> Result<Code, Code> {
 931 |         if let Some(next) = self.at_key(code, ch) {
 932 |             Ok(next)
 933 |         } else {
 934 |             Err(self.append(code, ch))
 935 |         }
 936 |     }
 937 | 
 938 |     fn append(&mut self, code: Code, ch: u8) -> Code {
 939 |         let next: Code = self.keys.len() as u16;
 940 |         let key = self.keys[usize::from(code)];
 941 |         // TODO: with debug assertions, check for non-existence
 942 |         match FullKey::from(key) {
 943 |             FullKey::NoSuccessor => {
 944 |                 let new_key = FullKey::Simple(self.simples.len() as u16);
 945 |                 self.simples.push(Simple::default());
 946 |                 let simples = self.simples.last_mut().unwrap();
 947 |                 simples.codes[0] = next;
 948 |                 simples.chars[0] = ch;
 949 |                 simples.count = 1;
 950 |                 self.keys[usize::from(code)] = new_key.into();
 951 |             }
 952 |             FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => {
 953 |                 let nexts = &mut self.simples[usize::from(idx)];
 954 |                 let nidx = usize::from(nexts.count);
 955 |                 nexts.chars[nidx] = ch;
 956 |                 nexts.codes[nidx] = next;
 957 |                 nexts.count += 1;
 958 |             }
 959 |             FullKey::Simple(idx) => {
 960 |                 let new_key = FullKey::Full(self.complex.len() as u16);
 961 |                 let simples = &self.simples[usize::from(idx)];
 962 |                 self.complex.push(Full {
 963 |                     char_continuation: [Code::max_value(); 256],
 964 |                 });
 965 |                 let full = self.complex.last_mut().unwrap();
 966 |                 for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) {
 967 |                     full.char_continuation[usize::from(pch)] = pcont;
 968 |                 }
 969 |                 self.keys[usize::from(code)] = new_key.into();
 970 |             }
 971 |             FullKey::Full(idx) => {
 972 |                 let full = &mut self.complex[usize::from(idx)];
 973 |                 full.char_continuation[usize::from(ch)] = next;
 974 |             }
 975 |         }
 976 |         self.keys.push(FullKey::NoSuccessor.into());
 977 |         next
 978 |     }
 979 | }
 980 | 
 981 | impl Default for FullKey {
 982 |     fn default() -> Self {
 983 |         FullKey::NoSuccessor
 984 |     }
 985 | }
 986 | 
 987 | impl Default for Simple {
 988 |     fn default() -> Self {
 989 |         Simple {
 990 |             codes: [0; SHORT],
 991 |             chars: [0; SHORT],
 992 |             count: 0,
 993 |         }
 994 |     }
 995 | }
 996 | 
 997 | impl From<CompressedKey> for FullKey {
 998 |     fn from(CompressedKey(key): CompressedKey) -> Self {
 999 |         match (key >> MAX_CODESIZE) & 0xf {
1000 |             0 => FullKey::Full(key & 0xfff),
1001 |             1 => FullKey::Simple(key & 0xfff),
1002 |             _ => FullKey::NoSuccessor,
1003 |         }
1004 |     }
1005 | }
1006 | 
1007 | impl From<FullKey> for CompressedKey {
1008 |     fn from(full: FullKey) -> Self {
1009 |         CompressedKey(match full {
1010 |             FullKey::NoSuccessor => 0x2000,
1011 |             FullKey::Simple(code) => 0x1000 | code,
1012 |             FullKey::Full(code) => code,
1013 |         })
1014 |     }
1015 | }
1016 | 
1017 | #[cfg(test)]
1018 | mod tests {
1019 |     use super::{BitOrder, Encoder, LzwError, LzwStatus};
1020 |     use crate::alloc::vec::Vec;
1021 |     use crate::decode::Decoder;
1022 |     #[cfg(feature = "std")]
1023 |     use crate::StreamBuf;
1024 | 
1025 |     #[test]
1026 |     fn invalid_input_rejected() {
1027 |         const BIT_LEN: u8 = 2;
1028 |         let ref input = [0, 1 << BIT_LEN /* invalid */, 0];
1029 |         let ref mut target = [0u8; 128];
1030 |         let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN);
1031 | 
1032 |         encoder.finish();
1033 |         // We require simulation of normality, that is byte-for-byte compression.
1034 |         let result = encoder.encode_bytes(input, target);
1035 |         assert!(if let Err(LzwError::InvalidCode) = result.status {
1036 |             true
1037 |         } else {
1038 |             false
1039 |         });
1040 |         assert_eq!(result.consumed_in, 1);
1041 | 
1042 |         let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]);
1043 |         assert!(if let Ok(LzwStatus::Done) = fixed.status {
1044 |             true
1045 |         } else {
1046 |             false
1047 |         });
1048 |         assert_eq!(fixed.consumed_in, 2);
1049 | 
1050 |         // Okay, now test we actually fixed it.
1051 |         let ref mut compare = [0u8; 4];
1052 |         let mut todo = &target[..result.consumed_out + fixed.consumed_out];
1053 |         let mut free = &mut compare[..];
1054 |         let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN);
1055 | 
1056 |         // Decode with up to 16 rounds, far too much but inconsequential.
1057 |         for _ in 0..16 {
1058 |             if decoder.has_ended() {
1059 |                 break;
1060 |             }
1061 | 
1062 |             let result = decoder.decode_bytes(todo, free);
1063 |             assert!(result.status.is_ok());
1064 |             todo = &todo[result.consumed_in..];
1065 |             free = &mut free[result.consumed_out..];
1066 |         }
1067 | 
1068 |         let remaining = { free }.len();
1069 |         let len = compare.len() - remaining;
1070 |         assert_eq!(todo, &[]);
1071 |         assert_eq!(compare[..len], [0, 1, 0]);
1072 |     }
1073 | 
1074 |     #[test]
1075 |     #[should_panic]
1076 |     fn invalid_code_size_low() {
1077 |         let _ = Encoder::new(BitOrder::Msb, 1);
1078 |     }
1079 | 
1080 |     #[test]
1081 |     #[should_panic]
1082 |     fn invalid_code_size_high() {
1083 |         let _ = Encoder::new(BitOrder::Msb, 14);
1084 |     }
1085 | 
1086 |     fn make_decoded() -> Vec<u8> {
1087 |         const FILE: &'static [u8] =
1088 |             include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock"));
1089 |         return Vec::from(FILE);
1090 |     }
1091 | 
1092 |     #[test]
1093 |     #[cfg(feature = "std")]
1094 |     fn into_stream_buffer_no_alloc() {
1095 |         let encoded = make_decoded();
1096 |         let mut encoder = Encoder::new(BitOrder::Msb, 8);
1097 | 
1098 |         let mut output = vec![];
1099 |         let mut buffer = [0; 512];
1100 |         let mut istream = encoder.into_stream(&mut output);
1101 |         istream.set_buffer(&mut buffer[..]);
1102 |         istream.encode(&encoded[..]).status.unwrap();
1103 | 
1104 |         match istream.buffer {
1105 |             Some(StreamBuf::Borrowed(_)) => {}
1106 |             None => panic!("Decoded without buffer??"),
1107 |             Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1108 |         }
1109 |     }
1110 | 
1111 |     #[test]
1112 |     #[cfg(feature = "std")]
1113 |     fn into_stream_buffer_small_alloc() {
1114 |         struct WriteTap<W: std::io::Write>(W);
1115 |         const BUF_SIZE: usize = 512;
1116 | 
1117 |         impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1118 |             fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1119 |                 assert!(buf.len() <= BUF_SIZE);
1120 |                 self.0.write(buf)
1121 |             }
1122 |             fn flush(&mut self) -> std::io::Result<()> {
1123 |                 self.0.flush()
1124 |             }
1125 |         }
1126 | 
1127 |         let encoded = make_decoded();
1128 |         let mut encoder = Encoder::new(BitOrder::Msb, 8);
1129 | 
1130 |         let mut output = vec![];
1131 |         let mut istream = encoder.into_stream(WriteTap(&mut output));
1132 |         istream.set_buffer_size(512);
1133 |         istream.encode(&encoded[..]).status.unwrap();
1134 | 
1135 |         match istream.buffer {
1136 |             Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1137 |             Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1138 |             None => panic!("Decoded without buffer??"),
1139 |         }
1140 |     }
1141 | 
1142 |     #[test]
1143 |     #[cfg(feature = "std")]
1144 |     fn reset() {
1145 |         let encoded = make_decoded();
1146 |         let mut encoder = Encoder::new(BitOrder::Msb, 8);
1147 |         let mut reference = None;
1148 | 
1149 |         for _ in 0..2 {
1150 |             let mut output = vec![];
1151 |             let mut buffer = [0; 512];
1152 |             let mut istream = encoder.into_stream(&mut output);
1153 |             istream.set_buffer(&mut buffer[..]);
1154 |             istream.encode_all(&encoded[..]).status.unwrap();
1155 | 
1156 |             encoder.reset();
1157 |             if let Some(reference) = &reference {
1158 |                 assert_eq!(output, *reference);
1159 |             } else {
1160 |                 reference = Some(output);
1161 |             }
1162 |         }
1163 |     }
1164 | }
1165 | 


--------------------------------------------------------------------------------
/src/decode.rs:
--------------------------------------------------------------------------------
   1 | //! A module for all decoding needs.
   2 | #[cfg(feature = "std")]
   3 | use crate::error::StreamResult;
   4 | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
   5 | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
   6 | 
   7 | use crate::alloc::{boxed::Box, vec, vec::Vec};
   8 | #[cfg(feature = "std")]
   9 | use std::io::{self, BufRead, Write};
  10 | 
  11 | /// The state for decoding data with an LZW algorithm.
  12 | ///
  13 | /// The same structure can be utilized with streams as well as your own buffers and driver logic.
  14 | /// It may even be possible to mix them if you are sufficiently careful not to lose or skip any
  15 | /// already decode data in the process.
  16 | ///
  17 | /// This is a sans-IO implementation, meaning that it only contains the state of the decoder and
  18 | /// the caller will provide buffers for input and output data when calling the basic
  19 | /// [`decode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*`
  20 | /// methods for decoding with a particular style of common IO.
  21 | ///
  22 | /// * [`decode`] for decoding once without any IO-loop.
  23 | /// * [`into_async`] for decoding with the `futures` traits for asynchronous IO.
  24 | /// * [`into_stream`] for decoding with the standard `io` traits.
  25 | /// * [`into_vec`] for in-memory decoding.
  26 | ///
  27 | /// [`decode_bytes`]: #method.decode_bytes
  28 | /// [`decode`]: #method.decode
  29 | /// [`into_async`]: #method.into_async
  30 | /// [`into_stream`]: #method.into_stream
  31 | /// [`into_vec`]: #method.into_vec
  32 | pub struct Decoder {
  33 |     state: Box<dyn Stateful + Send + 'static>,
  34 | }
  35 | 
  36 | /// A decoding stream sink.
  37 | ///
  38 | /// See [`Decoder::into_stream`] on how to create this type.
  39 | ///
  40 | /// [`Decoder::into_stream`]: struct.Decoder.html#method.into_stream
  41 | #[cfg_attr(
  42 |     not(feature = "std"),
  43 |     deprecated = "This type is only useful with the `std` feature."
  44 | )]
  45 | #[cfg_attr(not(feature = "std"), allow(dead_code))]
  46 | pub struct IntoStream<'d, W> {
  47 |     decoder: &'d mut Decoder,
  48 |     writer: W,
  49 |     buffer: Option<StreamBuf<'d>>,
  50 |     default_size: usize,
  51 | }
  52 | 
  53 | /// An async decoding sink.
  54 | ///
  55 | /// See [`Decoder::into_async`] on how to create this type.
  56 | ///
  57 | /// [`Decoder::into_async`]: struct.Decoder.html#method.into_async
  58 | #[cfg(feature = "async")]
  59 | pub struct IntoAsync<'d, W> {
  60 |     decoder: &'d mut Decoder,
  61 |     writer: W,
  62 |     buffer: Option<StreamBuf<'d>>,
  63 |     default_size: usize,
  64 | }
  65 | 
  66 | /// A decoding sink into a vector.
  67 | ///
  68 | /// See [`Decoder::into_vec`] on how to create this type.
  69 | ///
  70 | /// [`Decoder::into_vec`]: struct.Decoder.html#method.into_vec
  71 | pub struct IntoVec<'d> {
  72 |     decoder: &'d mut Decoder,
  73 |     vector: &'d mut Vec<u8>,
  74 | }
  75 | 
  76 | trait Stateful {
  77 |     fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
  78 |     fn has_ended(&self) -> bool;
  79 |     /// Ignore an end code and continue decoding (no implied reset).
  80 |     fn restart(&mut self);
  81 |     /// Reset the decoder to the beginning, dropping all buffers etc.
  82 |     fn reset(&mut self);
  83 | }
  84 | 
  85 | #[derive(Clone)]
  86 | struct Link {
  87 |     prev: Code,
  88 |     byte: u8,
  89 |     first: u8,
  90 | }
  91 | 
  92 | #[derive(Clone)]
  93 | struct DerivationBase {
  94 |     code: Code,
  95 |     first: u8,
  96 | }
  97 | 
  98 | #[derive(Default)]
  99 | struct MsbBuffer {
 100 |     /// A buffer of individual bits. The oldest code is kept in the high-order bits.
 101 |     bit_buffer: u64,
 102 |     /// A precomputed mask for this code.
 103 |     code_mask: u16,
 104 |     /// The current code size.
 105 |     code_size: u8,
 106 |     /// The number of bits in the buffer.
 107 |     bits: u8,
 108 | }
 109 | 
 110 | #[derive(Default)]
 111 | struct LsbBuffer {
 112 |     /// A buffer of individual bits. The oldest code is kept in the high-order bits.
 113 |     bit_buffer: u64,
 114 |     /// A precomputed mask for this code.
 115 |     code_mask: u16,
 116 |     /// The current code size.
 117 |     code_size: u8,
 118 |     /// The number of bits in the buffer.
 119 |     bits: u8,
 120 | }
 121 | 
 122 | trait CodeBuffer {
 123 |     fn new(min_size: u8) -> Self;
 124 |     fn reset(&mut self, min_size: u8);
 125 |     fn bump_code_size(&mut self);
 126 | 
 127 |     /// Retrieve the next symbol, refilling if necessary.
 128 |     fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>;
 129 |     /// Refill the internal buffer.
 130 |     fn refill_bits(&mut self, inp: &mut &[u8]);
 131 | 
 132 |     fn peek_bits(&self, code: &mut [Code; BURST]) -> usize;
 133 |     fn consume_bits(&mut self, code_cnt: u8);
 134 | 
 135 |     fn max_code(&self) -> Code;
 136 |     fn code_size(&self) -> u8;
 137 | }
 138 | 
 139 | trait CodegenConstants {
 140 |     const YIELD_ON_FULL: bool;
 141 | }
 142 | 
 143 | struct DecodeState<CodeBuffer, Constants: CodegenConstants> {
 144 |     /// The original minimum code size.
 145 |     min_size: u8,
 146 |     /// The table of decoded codes.
 147 |     table: Table,
 148 |     /// The buffer of decoded data.
 149 |     buffer: Buffer,
 150 |     /// The link which we are still decoding and its original code.
 151 |     last: Option<DerivationBase>,
 152 |     /// The next code entry.
 153 |     next_code: Code,
 154 |     /// Code to reset all tables.
 155 |     clear_code: Code,
 156 |     /// Code to signal the end of the stream.
 157 |     end_code: Code,
 158 |     /// A stored flag if the end code has already appeared.
 159 |     has_ended: bool,
 160 |     /// If tiff then bumps are a single code sooner.
 161 |     is_tiff: bool,
 162 |     /// Do we allow stream to start without an explicit reset code?
 163 |     implicit_reset: bool,
 164 |     /// The buffer for decoded words.
 165 |     code_buffer: CodeBuffer,
 166 |     #[allow(dead_code)]
 167 |     constants: core::marker::PhantomData<Constants>,
 168 | }
 169 | 
 170 | // We have a buffer of 64 bits. So at max size at most 5 units can be read at once without
 171 | // refilling the buffer. At smaller code sizes there are more. We tune for 6 here, by slight
 172 | // experimentation. This may be an architecture dependent constant.
 173 | const BURST: usize = 6;
 174 | 
 175 | struct Buffer {
 176 |     bytes: Box<[u8]>,
 177 |     read_mark: usize,
 178 |     write_mark: usize,
 179 | }
 180 | 
 181 | struct Table {
 182 |     inner: Vec<Link>,
 183 |     depths: Vec<u16>,
 184 | }
 185 | 
 186 | /// Describes the static parameters for creating a decoder.
 187 | #[derive(Clone, Debug)]
 188 | pub struct Configuration {
 189 |     order: BitOrder,
 190 |     size: u8,
 191 |     tiff: bool,
 192 |     yield_on_full: bool,
 193 | }
 194 | 
 195 | impl Configuration {
 196 |     /// Create a configuration to decode with the specified bit order and symbol size.
 197 |     pub fn new(order: BitOrder, size: u8) -> Self {
 198 |         super::assert_decode_size(size);
 199 |         Configuration {
 200 |             order,
 201 |             size,
 202 |             tiff: false,
 203 |             yield_on_full: false,
 204 |         }
 205 |     }
 206 | 
 207 |     /// Create a configuration for a TIFF compatible decoder.
 208 |     pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
 209 |         super::assert_decode_size(size);
 210 |         Configuration {
 211 |             order,
 212 |             size,
 213 |             tiff: true,
 214 |             yield_on_full: false,
 215 |         }
 216 |     }
 217 | 
 218 |     /// Immediately yield to the caller when the decoder buffer is full.
 219 |     ///
 220 |     /// This can be used for `libtiff` compatibility. It will use a "relaxed" stream interpretation
 221 |     /// that need not contain an explicit EOF. Instead, the decoder is expected to stop fetching
 222 |     /// symbols when some out-of-band specified length of the decoded text has been reached. The
 223 |     /// caller indicates this maximum length through the available output buffer space.
 224 |     ///
 225 |     /// Symbols afterwards must not be expected to be valid. On filling the output buffer space
 226 |     /// completely, the decoder will return immediately to the caller instead of potentially
 227 |     /// interpreting the following bit-stream (and returning an error on doing so).
 228 |     ///
 229 |     /// Default: `false`.
 230 |     pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self {
 231 |         Configuration {
 232 |             yield_on_full: do_yield,
 233 |             ..self
 234 |         }
 235 |     }
 236 | 
 237 |     /// Create a new decoder with the define configuration.
 238 |     pub fn build(self) -> Decoder {
 239 |         Decoder {
 240 |             state: Decoder::from_configuration(&self),
 241 |         }
 242 |     }
 243 | }
 244 | 
 245 | impl Decoder {
 246 |     /// Create a new decoder with the specified bit order and symbol size.
 247 |     ///
 248 |     /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
 249 |     /// original specification. In particular you will need to specify an `Lsb` bit oder to decode
 250 |     /// the data portion of a compressed `gif` image.
 251 |     ///
 252 |     /// # Panics
 253 |     ///
 254 |     /// The `size` needs to be in the interval `0..=12`.
 255 |     pub fn new(order: BitOrder, size: u8) -> Self {
 256 |         Configuration::new(order, size).build()
 257 |     }
 258 | 
 259 |     /// Create a TIFF compatible decoder with the specified bit order and symbol size.
 260 |     ///
 261 |     /// The algorithm for dynamically increasing the code symbol bit width is compatible with the
 262 |     /// TIFF specification, which is a misinterpretation of the original algorithm for increasing
 263 |     /// the code size. It switches one symbol sooner.
 264 |     ///
 265 |     /// # Panics
 266 |     ///
 267 |     /// The `size` needs to be in the interval `0..=12`.
 268 |     pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
 269 |         Configuration::with_tiff_size_switch(order, size).build()
 270 |     }
 271 | 
 272 |     fn from_configuration(configuration: &Configuration) -> Box<dyn Stateful + Send + 'static> {
 273 |         struct NoYield;
 274 |         struct YieldOnFull;
 275 | 
 276 |         impl CodegenConstants for NoYield {
 277 |             const YIELD_ON_FULL: bool = false;
 278 |         }
 279 | 
 280 |         impl CodegenConstants for YieldOnFull {
 281 |             const YIELD_ON_FULL: bool = true;
 282 |         }
 283 | 
 284 |         type Boxed = Box<dyn Stateful + Send + 'static>;
 285 |         match (configuration.order, configuration.yield_on_full) {
 286 |             (BitOrder::Lsb, false) => {
 287 |                 let mut state =
 288 |                     Box::new(DecodeState::<LsbBuffer, NoYield>::new(configuration.size));
 289 |                 state.is_tiff = configuration.tiff;
 290 |                 state as Boxed
 291 |             }
 292 |             (BitOrder::Lsb, true) => {
 293 |                 let mut state = Box::new(DecodeState::<LsbBuffer, YieldOnFull>::new(
 294 |                     configuration.size,
 295 |                 ));
 296 |                 state.is_tiff = configuration.tiff;
 297 |                 state as Boxed
 298 |             }
 299 |             (BitOrder::Msb, false) => {
 300 |                 let mut state =
 301 |                     Box::new(DecodeState::<MsbBuffer, NoYield>::new(configuration.size));
 302 |                 state.is_tiff = configuration.tiff;
 303 |                 state as Boxed
 304 |             }
 305 |             (BitOrder::Msb, true) => {
 306 |                 let mut state = Box::new(DecodeState::<MsbBuffer, YieldOnFull>::new(
 307 |                     configuration.size,
 308 |                 ));
 309 |                 state.is_tiff = configuration.tiff;
 310 |                 state as Boxed
 311 |             }
 312 |         }
 313 |     }
 314 | 
 315 |     /// Decode some bytes from `inp` and write result to `out`.
 316 |     ///
 317 |     /// This will consume a prefix of the input buffer and write decoded output into a prefix of
 318 |     /// the output buffer. See the respective fields of the return value for the count of consumed
 319 |     /// and written bytes. For the next call You should have adjusted the inputs accordingly.
 320 |     ///
 321 |     /// The call will try to decode and write as many bytes of output as available. It will be
 322 |     /// much more optimized (and avoid intermediate buffering) if it is allowed to write a large
 323 |     /// contiguous chunk at once.
 324 |     ///
 325 |     /// See [`into_stream`] for high-level functions (that are only available with the `std`
 326 |     /// feature).
 327 |     ///
 328 |     /// [`into_stream`]: #method.into_stream
 329 |     pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
 330 |         self.state.advance(inp, out)
 331 |     }
 332 | 
 333 |     /// Decode a single chunk of lzw encoded data.
 334 |     ///
 335 |     /// This method requires the data to contain an end marker, and returns an error otherwise.
 336 |     ///
 337 |     /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize
 338 |     /// buffer size, to supply an existing vector, to control whether an end marker is required, or
 339 |     /// to preserve partial data in the case of a decoding error.
 340 |     ///
 341 |     /// [`into_vec`]: #into_vec
 342 |     ///
 343 |     /// # Example
 344 |     ///
 345 |     /// ```
 346 |     /// use weezl::{BitOrder, decode::Decoder};
 347 |     ///
 348 |     /// // Encoded that was created with an encoder.
 349 |     /// let data = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10";
 350 |     /// let decoded = Decoder::new(BitOrder::Msb, 9)
 351 |     ///     .decode(data)
 352 |     ///     .unwrap();
 353 |     /// assert_eq!(decoded, b"Hello, world");
 354 |     /// ```
 355 |     pub fn decode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
 356 |         let mut output = vec![];
 357 |         self.into_vec(&mut output).decode_all(data).status?;
 358 |         Ok(output)
 359 |     }
 360 | 
 361 |     /// Construct a decoder into a writer.
 362 |     #[cfg(feature = "std")]
 363 |     pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
 364 |         IntoStream {
 365 |             decoder: self,
 366 |             writer,
 367 |             buffer: None,
 368 |             default_size: STREAM_BUF_SIZE,
 369 |         }
 370 |     }
 371 | 
 372 |     /// Construct a decoder into an async writer.
 373 |     #[cfg(feature = "async")]
 374 |     pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
 375 |         IntoAsync {
 376 |             decoder: self,
 377 |             writer,
 378 |             buffer: None,
 379 |             default_size: STREAM_BUF_SIZE,
 380 |         }
 381 |     }
 382 | 
 383 |     /// Construct a decoder into a vector.
 384 |     ///
 385 |     /// All decoded data is appended and the vector is __not__ cleared.
 386 |     ///
 387 |     /// Compared to `into_stream` this interface allows a high-level access to decoding without
 388 |     /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the
 389 |     /// special target exposes.
 390 |     pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
 391 |         IntoVec {
 392 |             decoder: self,
 393 |             vector: vec,
 394 |         }
 395 |     }
 396 | 
 397 |     /// Check if the decoding has finished.
 398 |     ///
 399 |     /// No more output is produced beyond the end code that marked the finish of the stream. The
 400 |     /// decoder may have read additional bytes, including padding bits beyond the last code word
 401 |     /// but also excess bytes provided.
 402 |     pub fn has_ended(&self) -> bool {
 403 |         self.state.has_ended()
 404 |     }
 405 | 
 406 |     /// Ignore an end code and continue.
 407 |     ///
 408 |     /// This will _not_ reset any of the inner code tables and not have the effect of a clear code.
 409 |     /// It will instead continue as if the end code had not been present. If no end code has
 410 |     /// occurred then this is a no-op.
 411 |     ///
 412 |     /// You can test if an end code has occurred with [`has_ended`](#method.has_ended).
 413 |     /// FIXME: clarify how this interacts with padding introduced after end code.
 414 |     #[allow(dead_code)]
 415 |     pub(crate) fn restart(&mut self) {
 416 |         self.state.restart();
 417 |     }
 418 | 
 419 |     /// Reset all internal state.
 420 |     ///
 421 |     /// This produce a decoder as if just constructed with `new` but taking slightly less work. In
 422 |     /// particular it will not deallocate any internal allocations. It will also avoid some
 423 |     /// duplicate setup work.
 424 |     pub fn reset(&mut self) {
 425 |         self.state.reset();
 426 |     }
 427 | }
 428 | 
 429 | #[cfg(feature = "std")]
 430 | impl<'d, W: Write> IntoStream<'d, W> {
 431 |     /// Decode data from a reader.
 432 |     ///
 433 |     /// This will read data until the stream is empty or an end marker is reached.
 434 |     pub fn decode(&mut self, read: impl BufRead) -> StreamResult {
 435 |         self.decode_part(read, false)
 436 |     }
 437 | 
 438 |     /// Decode data from a reader, requiring an end marker.
 439 |     pub fn decode_all(mut self, read: impl BufRead) -> StreamResult {
 440 |         self.decode_part(read, true)
 441 |     }
 442 | 
 443 |     /// Set the size of the intermediate decode buffer.
 444 |     ///
 445 |     /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is
 446 |     /// available and any decoding method is called. No buffer is allocated if `set_buffer` has
 447 |     /// been called. The buffer is reused.
 448 |     ///
 449 |     /// # Panics
 450 |     /// This method panics if `size` is `0`.
 451 |     pub fn set_buffer_size(&mut self, size: usize) {
 452 |         assert_ne!(size, 0, "Attempted to set empty buffer");
 453 |         self.default_size = size;
 454 |     }
 455 | 
 456 |     /// Use a particular buffer as an intermediate decode buffer.
 457 |     ///
 458 |     /// Calling this sets or replaces the buffer. When a buffer has been set then it is used
 459 |     /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical
 460 |     /// for efficient decoding. Some optimization techniques require the buffer to hold one or more
 461 |     /// previous decoded words. There is also additional overhead from `write` calls each time the
 462 |     /// buffer has been filled.
 463 |     ///
 464 |     /// # Panics
 465 |     /// This method panics if the `buffer` is empty.
 466 |     pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
 467 |         assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
 468 |         self.buffer = Some(StreamBuf::Borrowed(buffer));
 469 |     }
 470 | 
 471 |     fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult {
 472 |         let IntoStream {
 473 |             decoder,
 474 |             writer,
 475 |             buffer,
 476 |             default_size,
 477 |         } = self;
 478 | 
 479 |         enum Progress {
 480 |             Ok,
 481 |             Done,
 482 |         }
 483 | 
 484 |         let mut bytes_read = 0;
 485 |         let mut bytes_written = 0;
 486 | 
 487 |         // Converting to mutable refs to move into the `once` closure.
 488 |         let read_bytes = &mut bytes_read;
 489 |         let write_bytes = &mut bytes_written;
 490 | 
 491 |         let outbuf: &mut [u8] =
 492 |             match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
 493 |                 StreamBuf::Borrowed(slice) => &mut *slice,
 494 |                 StreamBuf::Owned(vec) => &mut *vec,
 495 |             };
 496 |         assert!(!outbuf.is_empty());
 497 | 
 498 |         let once = move || {
 499 |             // Try to grab one buffer of input data.
 500 |             let data = read.fill_buf()?;
 501 | 
 502 |             // Decode as much of the buffer as fits.
 503 |             let result = decoder.decode_bytes(data, &mut outbuf[..]);
 504 |             // Do the bookkeeping and consume the buffer.
 505 |             *read_bytes += result.consumed_in;
 506 |             *write_bytes += result.consumed_out;
 507 |             read.consume(result.consumed_in);
 508 | 
 509 |             // Handle the status in the result.
 510 |             let done = result.status.map_err(|err| {
 511 |                 io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
 512 |             })?;
 513 | 
 514 |             // Check if we had any new data at all.
 515 |             if let LzwStatus::NoProgress = done {
 516 |                 debug_assert_eq!(
 517 |                     result.consumed_out, 0,
 518 |                     "No progress means we have not decoded any data"
 519 |                 );
 520 |                 // In particular we did not finish decoding.
 521 |                 if must_finish {
 522 |                     return Err(io::Error::new(
 523 |                         io::ErrorKind::UnexpectedEof,
 524 |                         "No more data but no end marker detected",
 525 |                     ));
 526 |                 } else {
 527 |                     return Ok(Progress::Done);
 528 |                 }
 529 |             }
 530 | 
 531 |             // And finish by writing our result.
 532 |             // TODO: we may lose data on error (also on status error above) which we might want to
 533 |             // deterministically handle so that we don't need to restart everything from scratch as
 534 |             // the only recovery strategy. Any changes welcome.
 535 |             writer.write_all(&outbuf[..result.consumed_out])?;
 536 | 
 537 |             Ok(if let LzwStatus::Done = done {
 538 |                 Progress::Done
 539 |             } else {
 540 |                 Progress::Ok
 541 |             })
 542 |         };
 543 | 
 544 |         // Decode chunks of input data until we're done.
 545 |         let status = core::iter::repeat_with(once)
 546 |             // scan+fuse can be replaced with map_while
 547 |             .scan((), |(), result| match result {
 548 |                 Ok(Progress::Ok) => Some(Ok(())),
 549 |                 Err(err) => Some(Err(err)),
 550 |                 Ok(Progress::Done) => None,
 551 |             })
 552 |             .fuse()
 553 |             .collect();
 554 | 
 555 |         StreamResult {
 556 |             bytes_read,
 557 |             bytes_written,
 558 |             status,
 559 |         }
 560 |     }
 561 | }
 562 | 
 563 | impl IntoVec<'_> {
 564 |     /// Decode data from a slice.
 565 |     ///
 566 |     /// This will read data until the slice is empty or an end marker is reached.
 567 |     pub fn decode(&mut self, read: &[u8]) -> VectorResult {
 568 |         self.decode_part(read, false)
 569 |     }
 570 | 
 571 |     /// Decode data from a slice, requiring an end marker.
 572 |     pub fn decode_all(mut self, read: &[u8]) -> VectorResult {
 573 |         self.decode_part(read, true)
 574 |     }
 575 | 
 576 |     fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) {
 577 |         const CHUNK_SIZE: usize = 1 << 12;
 578 |         let decoder = &mut self.decoder;
 579 |         let length = self.vector.len();
 580 | 
 581 |         // Use the vector to do overflow checks and w/e.
 582 |         self.vector.reserve(CHUNK_SIZE);
 583 |         // FIXME: decoding into uninit buffer?
 584 |         self.vector.resize(length + CHUNK_SIZE, 0u8);
 585 | 
 586 |         (&mut self.vector[length..], decoder)
 587 |     }
 588 | 
 589 |     fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult {
 590 |         let mut result = VectorResult {
 591 |             consumed_in: 0,
 592 |             consumed_out: 0,
 593 |             status: Ok(LzwStatus::Ok),
 594 |         };
 595 | 
 596 |         enum Progress {
 597 |             Ok,
 598 |             Done,
 599 |         }
 600 | 
 601 |         // Converting to mutable refs to move into the `once` closure.
 602 |         let read_bytes = &mut result.consumed_in;
 603 |         let write_bytes = &mut result.consumed_out;
 604 |         let mut data = part;
 605 | 
 606 |         // A 64 MB buffer is quite large but should get alloc_zeroed.
 607 |         // Note that the decoded size can be up to quadratic in code block.
 608 |         let once = move || {
 609 |             // Grab a new output buffer.
 610 |             let (outbuf, decoder) = self.grab_buffer();
 611 | 
 612 |             // Decode as much of the buffer as fits.
 613 |             let result = decoder.decode_bytes(data, &mut outbuf[..]);
 614 |             // Do the bookkeeping and consume the buffer.
 615 |             *read_bytes += result.consumed_in;
 616 |             *write_bytes += result.consumed_out;
 617 |             data = &data[result.consumed_in..];
 618 | 
 619 |             let unfilled = outbuf.len() - result.consumed_out;
 620 |             let filled = self.vector.len() - unfilled;
 621 |             self.vector.truncate(filled);
 622 | 
 623 |             // Handle the status in the result.
 624 |             match result.status {
 625 |                 Err(err) => Err(err),
 626 |                 Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode),
 627 |                 Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done),
 628 |                 Ok(LzwStatus::Ok) => Ok(Progress::Ok),
 629 |             }
 630 |         };
 631 | 
 632 |         // Decode chunks of input data until we're done.
 633 |         let status: Result<(), _> = core::iter::repeat_with(once)
 634 |             // scan+fuse can be replaced with map_while
 635 |             .scan((), |(), result| match result {
 636 |                 Ok(Progress::Ok) => Some(Ok(())),
 637 |                 Err(err) => Some(Err(err)),
 638 |                 Ok(Progress::Done) => None,
 639 |             })
 640 |             .fuse()
 641 |             .collect();
 642 | 
 643 |         if let Err(err) = status {
 644 |             result.status = Err(err);
 645 |         }
 646 | 
 647 |         result
 648 |     }
 649 | }
 650 | 
 651 | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would
 652 | // trip over the usage of await, which is a reserved keyword in that edition/version. It only
 653 | // contains an impl block.
 654 | #[cfg(feature = "async")]
 655 | #[path = "decode_into_async.rs"]
 656 | mod impl_decode_into_async;
 657 | 
 658 | impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
 659 |     fn new(min_size: u8) -> Self {
 660 |         DecodeState {
 661 |             min_size,
 662 |             table: Table::new(),
 663 |             buffer: Buffer::new(),
 664 |             last: None,
 665 |             clear_code: 1 << min_size,
 666 |             end_code: (1 << min_size) + 1,
 667 |             next_code: (1 << min_size) + 2,
 668 |             has_ended: false,
 669 |             is_tiff: false,
 670 |             implicit_reset: true,
 671 |             code_buffer: CodeBuffer::new(min_size),
 672 |             constants: core::marker::PhantomData,
 673 |         }
 674 |     }
 675 | 
 676 |     fn init_tables(&mut self) {
 677 |         self.code_buffer.reset(self.min_size);
 678 |         self.next_code = (1 << self.min_size) + 2;
 679 |         self.table.init(self.min_size);
 680 |     }
 681 | 
 682 |     fn reset_tables(&mut self) {
 683 |         self.code_buffer.reset(self.min_size);
 684 |         self.next_code = (1 << self.min_size) + 2;
 685 |         self.table.clear(self.min_size);
 686 |     }
 687 | }
 688 | 
 689 | impl<C: CodeBuffer, CgC: CodegenConstants> Stateful for DecodeState<C, CgC> {
 690 |     fn has_ended(&self) -> bool {
 691 |         self.has_ended
 692 |     }
 693 | 
 694 |     fn restart(&mut self) {
 695 |         self.has_ended = false;
 696 |     }
 697 | 
 698 |     fn reset(&mut self) {
 699 |         self.table.init(self.min_size);
 700 |         self.next_code = (1 << self.min_size) + 2;
 701 |         self.buffer.read_mark = 0;
 702 |         self.buffer.write_mark = 0;
 703 |         self.last = None;
 704 |         self.restart();
 705 |         self.code_buffer = CodeBuffer::new(self.min_size);
 706 |     }
 707 | 
 708 |     fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
 709 |         // Skip everything if there is nothing to do.
 710 |         if self.has_ended {
 711 |             return BufferResult {
 712 |                 consumed_in: 0,
 713 |                 consumed_out: 0,
 714 |                 status: Ok(LzwStatus::Done),
 715 |             };
 716 |         }
 717 | 
 718 |         // Rough description:
 719 |         // We will fill the output slice as much as possible until either there is no more symbols
 720 |         // to decode or an end code has been reached. This requires an internal buffer to hold a
 721 |         // potential tail of the word corresponding to the last symbol. This tail will then be
 722 |         // decoded first before continuing with the regular decoding. The same buffer is required
 723 |         // to persist some symbol state across calls.
 724 |         //
 725 |         // We store the words corresponding to code symbols in an index chain, bytewise, where we
 726 |         // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain
 727 |         // is traversed for each symbol when it is decoded and bytes are placed directly into the
 728 |         // output slice. In the special case (new_code == next_code) we use an existing decoded
 729 |         // version that is present in either the out bytes of this call or in buffer to copy the
 730 |         // repeated prefix slice.
 731 |         // TODO: I played with a 'decoding cache' to remember the position of long symbols and
 732 |         // avoid traversing the chain, doing a copy of memory instead. It did however not lead to
 733 |         // a serious improvement. It's just unlikely to both have a long symbol and have that
 734 |         // repeated twice in the same output buffer.
 735 |         //
 736 |         // You will also find the (to my knowledge novel) concept of a _decoding burst_ which
 737 |         // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order
 738 |         // execution as much as possible and for this reason have the least possible stress on
 739 |         // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but
 740 |         // only for re-used codes, not novel ones. This lookahead also makes the loop termination
 741 |         // when restoring each byte of the code word perfectly predictable! So a burst is a chunk
 742 |         // of code words which are all independent of each other, have known lengths _and_ are
 743 |         // guaranteed to fit into the out slice without requiring a buffer. One burst can be
 744 |         // decoded in an extremely tight loop.
 745 |         //
 746 |         // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid
 747 |         // that intermediate buffer at the expense of not always filling the output buffer
 748 |         // completely. Alternatively we might follow its chain of precursor states twice. This may
 749 |         // be even cheaper if we store more than one byte per link so it really should be
 750 |         // evaluated.
 751 |         // TODO: if the caller was required to provide the previous last word we could also avoid
 752 |         // the buffer for cases where we need it to restore the next code! This could be built
 753 |         // backwards compatible by only doing it after an opt-in call that enables the behaviour.
 754 | 
 755 |         // Record initial lengths for the result that is returned.
 756 |         let o_in = inp.len();
 757 |         let o_out = out.len();
 758 | 
 759 |         // The code_link is the previously decoded symbol.
 760 |         // It's used to link the new code back to its predecessor.
 761 |         let mut code_link = None;
 762 |         // The status, which is written to on an invalid code.
 763 |         let mut status = Ok(LzwStatus::Ok);
 764 | 
 765 |         match self.last.take() {
 766 |             // No last state? This is the first code after a reset?
 767 |             None => {
 768 |                 match self.next_symbol(&mut inp) {
 769 |                     // Plainly invalid code.
 770 |                     Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
 771 |                     // next_code would require an actual predecessor.
 772 |                     Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
 773 |                     // No more symbols available and nothing decoded yet.
 774 |                     // Assume that we didn't make progress, this may get reset to Done if we read
 775 |                     // some bytes from the input.
 776 |                     None => status = Ok(LzwStatus::NoProgress),
 777 |                     // Handle a valid code.
 778 |                     Some(init_code) => {
 779 |                         if init_code == self.clear_code {
 780 |                             self.init_tables();
 781 |                         } else if init_code == self.end_code {
 782 |                             self.has_ended = true;
 783 |                             status = Ok(LzwStatus::Done);
 784 |                         } else if self.table.is_empty() {
 785 |                             if self.implicit_reset {
 786 |                                 self.init_tables();
 787 | 
 788 |                                 self.buffer.fill_reconstruct(&self.table, init_code);
 789 |                                 let link = self.table.at(init_code).clone();
 790 |                                 code_link = Some(DerivationBase {
 791 |                                     code: init_code,
 792 |                                     first: link.first,
 793 |                                 });
 794 |                             } else {
 795 |                                 // We require an explicit reset.
 796 |                                 status = Err(LzwError::InvalidCode);
 797 |                             }
 798 |                         } else {
 799 |                             // Reconstruct the first code in the buffer.
 800 |                             self.buffer.fill_reconstruct(&self.table, init_code);
 801 |                             let link = self.table.at(init_code).clone();
 802 |                             code_link = Some(DerivationBase {
 803 |                                 code: init_code,
 804 |                                 first: link.first,
 805 |                             });
 806 |                         }
 807 |                     }
 808 |                 }
 809 |             }
 810 |             // Move the tracking state to the stack.
 811 |             Some(tup) => code_link = Some(tup),
 812 |         };
 813 | 
 814 |         // Track an empty `burst` (see below) means we made no progress.
 815 |         let mut have_yet_to_decode_data = false;
 816 | 
 817 |         // Restore the previous state, if any.
 818 |         if code_link.is_some() {
 819 |             let remain = self.buffer.buffer();
 820 |             // Check if we can fully finish the buffer.
 821 |             if remain.len() > out.len() {
 822 |                 if out.is_empty() {
 823 |                     // This also implies the buffer is _not_ empty and we will not enter any
 824 |                     // decoding loop.
 825 |                     status = Ok(LzwStatus::NoProgress);
 826 |                 } else {
 827 |                     out.copy_from_slice(&remain[..out.len()]);
 828 |                     self.buffer.consume(out.len());
 829 |                     out = &mut [];
 830 |                 }
 831 |             } else if remain.is_empty() {
 832 |                 status = Ok(LzwStatus::NoProgress);
 833 |                 have_yet_to_decode_data = true;
 834 |             } else {
 835 |                 let consumed = remain.len();
 836 |                 out[..consumed].copy_from_slice(remain);
 837 |                 self.buffer.consume(consumed);
 838 |                 out = &mut out[consumed..];
 839 |                 have_yet_to_decode_data = false;
 840 |             }
 841 |         }
 842 | 
 843 |         // A special reference to out slice which holds the last decoded symbol.
 844 |         let mut last_decoded: Option<&[u8]> = None;
 845 | 
 846 |         if self.buffer.buffer().is_empty() {
 847 |             // Hot loop that writes data to the output as long as we can do so directly from the
 848 |             // input stream. As an invariant of this block we did not need to use the buffer to
 849 |             // store a decoded code word. Testing the condition ahead of time avoids a test in the
 850 |             // loop body since every code path where the buffer is filled already breaks.
 851 |             //
 852 |             // In a previous iteration of the code we trusted compiler optimization to work this
 853 |             // out but it seems that it does not. Another edit hidden behind some performance work
 854 |             // then edited out the check, inadvertently changing the behavior for callers that
 855 |             // relied on being able to provide an empty output buffer and still receiving a useful
 856 |             // signal about the state of the stream.
 857 | 
 858 |             // A burst is a sequence of code words that are independently decoded, i.e. they do not
 859 |             // change the state of the decoder in ways that would influence the interpretation of
 860 |             // each other. That is: they are not special symbols, they do not make us increase the
 861 |             // code size, they are each codes already in the tree before the burst.
 862 |             //
 863 |             // The tracking state for a burst. These are actually initialized later but compiler
 864 |             // wasn't smart enough to fully optimize out the init code so that appears outside the
 865 |             // loop.
 866 |             let mut burst = [0; BURST];
 867 |             let mut burst_byte_len = [0u16; BURST];
 868 |             let mut burst_byte = [0u8; BURST];
 869 |             let mut target: [&mut [u8]; BURST] = Default::default();
 870 | 
 871 |             loop {
 872 |                 // In particular, we *also* break if the output buffer is still empty. Especially
 873 |                 // when the output parameter was an empty slice, we must try to fetch at least one
 874 |                 // code but with YIELD_ON_FULL we do not.
 875 |                 if CgC::YIELD_ON_FULL && out.is_empty() {
 876 |                     break;
 877 |                 }
 878 | 
 879 |                 let mut deriv = match code_link.take() {
 880 |                     Some(link) => link,
 881 |                     None => {
 882 |                         // TODO: we do not need to break here. This does not indicate that the buffer
 883 |                         // has been filled, rather it indicates we have reset the state. The next code
 884 |                         // should be part of the initial alphabet. However the first code is special in
 885 |                         // the sense of not creating a new code itself. This is handled correctly in
 886 |                         // the initialization prior to the loop; and in particular that handling as
 887 |                         // written currently relies on putting it into the buffer; so handling it we
 888 |                         // would need to ensure that either the buffer is fully cleared after its use,
 889 |                         // or use another implementation of handling that first code.
 890 |                         break;
 891 |                     }
 892 |                 };
 893 | 
 894 |                 // Ensure the code buffer is full, we're about to request some codes.
 895 |                 // Note that this also ensures at least one code is in the buffer if any input is left.
 896 |                 self.refill_bits(&mut inp);
 897 |                 let cnt = self.code_buffer.peek_bits(&mut burst);
 898 | 
 899 |                 // No code left in the buffer, and no more bytes to refill the buffer.
 900 |                 if cnt == 0 {
 901 |                     if have_yet_to_decode_data {
 902 |                         status = Ok(LzwStatus::NoProgress);
 903 |                     }
 904 | 
 905 |                     code_link = Some(deriv);
 906 |                     break;
 907 |                 }
 908 | 
 909 |                 debug_assert!(
 910 |                     // When the table is full, we have a max code above the size switch.
 911 |                     self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff)
 912 |                     // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the
 913 |                     // computed next_code is 4 which already exceeds the bit width from the start.
 914 |                     // Then we will immediately switch code size after this code.
 915 |                     //
 916 |                     // TODO: this is the reason for some saturating and non-sharp comparisons in
 917 |                     // the code below. Maybe it makes sense to revisit turning this into a compile
 918 |                     // time choice?
 919 |                     || (self.code_buffer.code_size() == 1 && self.next_code < 4)
 920 |                     || (self.code_buffer.code_size() == 2 && self.next_code == 4)
 921 |                     || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
 922 |                     "Table: {}, code_size: {}, next_code: {}, table_condition: {}",
 923 |                     self.table.is_full(),
 924 |                     self.code_buffer.code_size(),
 925 |                     self.next_code,
 926 |                     self.code_buffer.max_code() - Code::from(self.is_tiff),
 927 |                 );
 928 | 
 929 |                 let mut burst_size = 0;
 930 |                 let size_switch_at = self.code_buffer.max_code() - Code::from(self.is_tiff);
 931 |                 // This is intended to wrap. As by the debug assert above, we keep the next
 932 |                 // code bounded by the current size's max code where we switch code size.
 933 |                 // Except in case the table is full then we actually want to allow decoding
 934 |                 // of an arbitrary count of non-resetting symbols.
 935 |                 let left_before_size_switch = size_switch_at.wrapping_sub(self.next_code);
 936 | 
 937 |                 // A burst is a sequence of decodes that are completely independent of each other. This
 938 |                 // is the case if neither is an end code, a clear code, or a next code, i.e. we have
 939 |                 // all of them in the decoding table and thus known their depths, and additionally if
 940 |                 // we can decode them directly into the output buffer.
 941 |                 for b in &burst[..cnt] {
 942 |                     // We can commit the previous burst code, and will take a slice from the output
 943 |                     // buffer. This also avoids the bounds check in the tight loop later.
 944 |                     if burst_size > 0 {
 945 |                         let len = burst_byte_len[burst_size - 1];
 946 |                         let (into, tail) = out.split_at_mut(usize::from(len));
 947 |                         target[burst_size - 1] = into;
 948 |                         out = tail;
 949 |                     }
 950 | 
 951 |                     // Check that we don't overflow the code size with all codes we burst decode.
 952 |                     burst_size += 1;
 953 | 
 954 |                     if burst_size > usize::from(left_before_size_switch) {
 955 |                         break;
 956 |                     }
 957 | 
 958 |                     let read_code = *b;
 959 | 
 960 |                     // A burst code can't be special.
 961 |                     if read_code == self.clear_code
 962 |                         || read_code == self.end_code
 963 |                         || read_code >= self.next_code
 964 |                     {
 965 |                         break;
 966 |                     }
 967 | 
 968 |                     // Read the code length and check that we can decode directly into the out slice.
 969 |                     let len = self.table.depths[usize::from(read_code)];
 970 | 
 971 |                     if out.len() < usize::from(len) {
 972 |                         break;
 973 |                     }
 974 | 
 975 |                     // We do exactly one more code (the one being inspected in the current iteration)
 976 |                     // after the 'burst'. When we want to break decoding precisely on the supplied
 977 |                     // buffer, we check if this is the last code to be decoded into it.
 978 |                     if CgC::YIELD_ON_FULL {
 979 |                         if out.len() == usize::from(len) {
 980 |                             break;
 981 |                         }
 982 |                     }
 983 | 
 984 |                     burst_byte_len[burst_size - 1] = len;
 985 |                 }
 986 | 
 987 |                 self.code_buffer.consume_bits(burst_size as u8);
 988 |                 have_yet_to_decode_data = false;
 989 | 
 990 |                 // Note that the very last code in the burst buffer doesn't actually belong to the
 991 |                 // burst itself. TODO: sometimes it could, we just don't differentiate between the
 992 |                 // breaks and a loop end condition above. That may be a speed advantage?
 993 |                 let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
 994 | 
 995 |                 // The very tight loop for restoring the actual burst. These can be reconstructed in
 996 |                 // parallel since none of them depend on a prior constructed. Only the derivation of
 997 |                 // new codes is not parallel. There are no size changes here either.
 998 |                 let burst_targets = &mut target[..burst_size - 1];
 999 | 
1000 |                 if !self.table.is_full() {
1001 |                     self.next_code += burst_targets.len() as u16;
1002 |                 }
1003 | 
1004 |                 for ((&burst, target), byte) in
1005 |                     burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte)
1006 |                 {
1007 |                     *byte = self.table.reconstruct(burst, target);
1008 |                 }
1009 | 
1010 |                 self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
1011 | 
1012 |                 // Now handle the special codes.
1013 |                 if new_code == self.clear_code {
1014 |                     self.reset_tables();
1015 |                     last_decoded = None;
1016 |                     // Restarts in the next call to the entry point.
1017 |                     break;
1018 |                 }
1019 | 
1020 |                 if new_code == self.end_code {
1021 |                     self.has_ended = true;
1022 |                     status = Ok(LzwStatus::Done);
1023 |                     last_decoded = None;
1024 |                     break;
1025 |                 }
1026 | 
1027 |                 if new_code > self.next_code {
1028 |                     status = Err(LzwError::InvalidCode);
1029 |                     last_decoded = None;
1030 |                     break;
1031 |                 }
1032 | 
1033 |                 let required_len = if new_code == self.next_code {
1034 |                     self.table.depths[usize::from(deriv.code)] + 1
1035 |                 } else {
1036 |                     self.table.depths[usize::from(new_code)]
1037 |                 };
1038 | 
1039 |                 // We need the decoded data of the new code if it is the `next_code`. This is the
1040 |                 // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In
1041 |                 // all other cases we only need the first character of the decoded data.
1042 |                 let have_next_code = new_code == self.next_code;
1043 | 
1044 |                 // Update the slice holding the last decoded word.
1045 |                 if have_next_code {
1046 |                     // If we did not have any burst code, we still hold that slice in the buffer.
1047 |                     if let Some(new_last) = target[..burst_size - 1].last_mut() {
1048 |                         let slice = core::mem::replace(new_last, &mut []);
1049 |                         last_decoded = Some(&*slice);
1050 |                     }
1051 |                 }
1052 | 
1053 |                 let cha;
1054 |                 let is_in_buffer = usize::from(required_len) > out.len();
1055 |                 // Check if we will need to store our current state into the buffer.
1056 |                 if is_in_buffer {
1057 |                     if have_next_code {
1058 |                         // last_decoded will be Some if we have restored any code into the out slice.
1059 |                         // Otherwise it will still be present in the buffer.
1060 |                         if let Some(last) = last_decoded.take() {
1061 |                             self.buffer.bytes[..last.len()].copy_from_slice(last);
1062 |                             self.buffer.write_mark = last.len();
1063 |                             self.buffer.read_mark = last.len();
1064 |                         }
1065 | 
1066 |                         cha = self.buffer.fill_cscsc();
1067 |                     } else {
1068 |                         // Restore the decoded word into the buffer.
1069 |                         last_decoded = None;
1070 |                         cha = self.buffer.fill_reconstruct(&self.table, new_code);
1071 |                     }
1072 |                 } else {
1073 |                     let (target, tail) = out.split_at_mut(usize::from(required_len));
1074 |                     out = tail;
1075 | 
1076 |                     if have_next_code {
1077 |                         // Reconstruct high.
1078 |                         let source = match last_decoded.take() {
1079 |                             Some(last) => last,
1080 |                             None => &self.buffer.bytes[..self.buffer.write_mark],
1081 |                         };
1082 | 
1083 |                         // We don't *actually* expect the unwrap to happen. Each source is at least 1
1084 |                         // byte long. But llvm doesn't know this (too much indirect loads and cases).
1085 |                         cha = source.get(0).map(|x| *x).unwrap_or(0);
1086 |                         target[..source.len()].copy_from_slice(source);
1087 |                         target[source.len()..][0] = cha;
1088 |                     } else {
1089 |                         cha = self.table.reconstruct(new_code, target);
1090 |                     }
1091 | 
1092 |                     // A new decoded word.
1093 |                     last_decoded = Some(target);
1094 |                 }
1095 | 
1096 |                 // Each newly read code creates one new code/link based on the preceding code if we
1097 |                 // have enough space to put it there.
1098 |                 if !self.table.is_full() {
1099 |                     self.table.derive(&deriv, cha);
1100 | 
1101 |                     if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
1102 |                         && self.code_buffer.code_size() < MAX_CODESIZE
1103 |                     {
1104 |                         self.bump_code_size();
1105 |                     }
1106 | 
1107 |                     self.next_code += 1;
1108 |                 }
1109 | 
1110 |                 // store the information on the decoded word.
1111 |                 code_link = Some(DerivationBase {
1112 |                     code: new_code,
1113 |                     first: cha,
1114 |                 });
1115 | 
1116 |                 // Can't make any more progress with decoding.
1117 |                 //
1118 |                 // We have more data buffered but not enough space to put it? We want fetch a next
1119 |                 // symbol if possible as in the case of it being a new symbol we can refer to the
1120 |                 // buffered output as the source for that symbol's meaning and do a memcpy.
1121 |                 //
1122 |                 // Since this test is after decoding at least one code, we can now check for an
1123 |                 // empty buffer and still guarantee progress when one was passed as a parameter.
1124 |                 if is_in_buffer || out.is_empty() {
1125 |                     break;
1126 |                 }
1127 |             }
1128 |         }
1129 | 
1130 |         // We need to store the last word into the buffer in case the first code in the next
1131 |         // iteration is the next_code.
1132 |         if let Some(tail) = last_decoded {
1133 |             self.buffer.bytes[..tail.len()].copy_from_slice(tail);
1134 |             self.buffer.write_mark = tail.len();
1135 |             // Mark the full buffer as having been consumed.
1136 |             self.buffer.read_mark = tail.len();
1137 |         }
1138 | 
1139 |         // Ensure we don't indicate that no progress was made if we read some bytes from the input
1140 |         // (which is progress).
1141 |         if o_in > inp.len() {
1142 |             if let Ok(LzwStatus::NoProgress) = status {
1143 |                 status = Ok(LzwStatus::Ok);
1144 |             }
1145 |         }
1146 | 
1147 |         // Store the code/link state.
1148 |         self.last = code_link;
1149 | 
1150 |         BufferResult {
1151 |             consumed_in: o_in.wrapping_sub(inp.len()),
1152 |             consumed_out: o_out.wrapping_sub(out.len()),
1153 |             status,
1154 |         }
1155 |     }
1156 | }
1157 | 
1158 | impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
1159 |     fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1160 |         self.code_buffer.next_symbol(inp)
1161 |     }
1162 | 
1163 |     fn bump_code_size(&mut self) {
1164 |         self.code_buffer.bump_code_size()
1165 |     }
1166 | 
1167 |     fn refill_bits(&mut self, inp: &mut &[u8]) {
1168 |         self.code_buffer.refill_bits(inp)
1169 |     }
1170 | }
1171 | 
1172 | impl CodeBuffer for MsbBuffer {
1173 |     fn new(min_size: u8) -> Self {
1174 |         MsbBuffer {
1175 |             code_size: min_size + 1,
1176 |             code_mask: (1u16 << (min_size + 1)) - 1,
1177 |             bit_buffer: 0,
1178 |             bits: 0,
1179 |         }
1180 |     }
1181 | 
1182 |     fn reset(&mut self, min_size: u8) {
1183 |         self.code_size = min_size + 1;
1184 |         self.code_mask = (1 << self.code_size) - 1;
1185 |     }
1186 | 
1187 |     fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1188 |         if self.bits < self.code_size {
1189 |             self.refill_bits(inp);
1190 |         }
1191 | 
1192 |         if self.bits < self.code_size {
1193 |             return None;
1194 |         }
1195 | 
1196 |         let mask = u64::from(self.code_mask);
1197 |         let rotbuf = self.bit_buffer.rotate_left(self.code_size.into());
1198 |         self.bit_buffer = rotbuf & !mask;
1199 |         self.bits -= self.code_size;
1200 |         Some((rotbuf & mask) as u16)
1201 |     }
1202 | 
1203 |     fn bump_code_size(&mut self) {
1204 |         self.code_size += 1;
1205 |         self.code_mask = (self.code_mask << 1) | 1;
1206 |     }
1207 | 
1208 |     fn refill_bits(&mut self, inp: &mut &[u8]) {
1209 |         let wish_count = (64 - self.bits) / 8;
1210 |         let mut buffer = [0u8; 8];
1211 |         let new_bits = match inp.get(..usize::from(wish_count)) {
1212 |             Some(bytes) => {
1213 |                 buffer[..usize::from(wish_count)].copy_from_slice(bytes);
1214 |                 *inp = &inp[usize::from(wish_count)..];
1215 |                 wish_count * 8
1216 |             }
1217 |             None => {
1218 |                 let new_bits = inp.len() * 8;
1219 |                 buffer[..inp.len()].copy_from_slice(inp);
1220 |                 *inp = &[];
1221 |                 new_bits as u8
1222 |             }
1223 |         };
1224 |         self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits;
1225 |         self.bits += new_bits;
1226 |     }
1227 | 
1228 |     fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
1229 |         let mut bit_buffer = self.bit_buffer;
1230 |         let mask = u64::from(self.code_mask);
1231 |         let mut consumed = 0;
1232 |         let mut cnt = 0;
1233 | 
1234 |         for b in code {
1235 |             let consumed_after = consumed + self.code_size;
1236 |             if consumed_after > self.bits {
1237 |                 break;
1238 |             }
1239 | 
1240 |             cnt += 1;
1241 |             consumed = consumed_after;
1242 | 
1243 |             let rotbuf = bit_buffer.rotate_left(self.code_size.into());
1244 |             *b = (rotbuf & mask) as u16;
1245 |             // The read bits are 'appended' but we never interpret those appended bits.
1246 |             bit_buffer = rotbuf;
1247 |         }
1248 | 
1249 |         cnt
1250 |     }
1251 | 
1252 |     fn consume_bits(&mut self, code_cnt: u8) {
1253 |         let bits = self.code_size * code_cnt;
1254 |         debug_assert!(bits <= self.bits);
1255 | 
1256 |         if bits >= self.bits {
1257 |             self.bit_buffer = 0;
1258 |         } else {
1259 |             // bits < self.bits so this must be smaller than the number size.
1260 |             self.bit_buffer = self.bit_buffer << bits;
1261 |         }
1262 | 
1263 |         self.bits = self.bits.wrapping_sub(bits);
1264 |     }
1265 | 
1266 |     fn max_code(&self) -> Code {
1267 |         self.code_mask
1268 |     }
1269 | 
1270 |     fn code_size(&self) -> u8 {
1271 |         self.code_size
1272 |     }
1273 | }
1274 | 
1275 | impl CodeBuffer for LsbBuffer {
1276 |     fn new(min_size: u8) -> Self {
1277 |         LsbBuffer {
1278 |             code_size: min_size + 1,
1279 |             code_mask: (1u16 << (min_size + 1)) - 1,
1280 |             bit_buffer: 0,
1281 |             bits: 0,
1282 |         }
1283 |     }
1284 | 
1285 |     fn reset(&mut self, min_size: u8) {
1286 |         self.code_size = min_size + 1;
1287 |         self.code_mask = (1 << self.code_size) - 1;
1288 |     }
1289 | 
1290 |     fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
1291 |         if self.bits < self.code_size {
1292 |             self.refill_bits(inp);
1293 |         }
1294 | 
1295 |         if self.bits < self.code_size {
1296 |             return None;
1297 |         }
1298 | 
1299 |         let mask = u64::from(self.code_mask);
1300 |         let code = self.bit_buffer & mask;
1301 |         self.bit_buffer >>= self.code_size;
1302 |         self.bits -= self.code_size;
1303 |         Some(code as u16)
1304 |     }
1305 | 
1306 |     fn bump_code_size(&mut self) {
1307 |         self.code_size += 1;
1308 |         self.code_mask = (self.code_mask << 1) | 1;
1309 |     }
1310 | 
1311 |     fn refill_bits(&mut self, inp: &mut &[u8]) {
1312 |         let wish_count = (64 - self.bits) / 8;
1313 |         let mut buffer = [0u8; 8];
1314 |         let new_bits = match inp.get(..usize::from(wish_count)) {
1315 |             Some(bytes) => {
1316 |                 buffer[..usize::from(wish_count)].copy_from_slice(bytes);
1317 |                 *inp = &inp[usize::from(wish_count)..];
1318 |                 wish_count * 8
1319 |             }
1320 |             None => {
1321 |                 let new_bits = inp.len() * 8;
1322 |                 buffer[..inp.len()].copy_from_slice(inp);
1323 |                 *inp = &[];
1324 |                 new_bits as u8
1325 |             }
1326 |         };
1327 |         self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits;
1328 |         self.bits += new_bits;
1329 |     }
1330 | 
1331 |     fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
1332 |         let mut bit_buffer = self.bit_buffer;
1333 |         let mask = u64::from(self.code_mask);
1334 |         let mut consumed = 0;
1335 |         let mut cnt = 0;
1336 | 
1337 |         for b in code {
1338 |             let consumed_after = consumed + self.code_size;
1339 |             if consumed_after > self.bits {
1340 |                 break;
1341 |             }
1342 | 
1343 |             cnt += 1;
1344 |             consumed = consumed_after;
1345 | 
1346 |             *b = (bit_buffer & mask) as u16;
1347 |             bit_buffer = bit_buffer >> self.code_size;
1348 |         }
1349 | 
1350 |         cnt
1351 |     }
1352 | 
1353 |     fn consume_bits(&mut self, code_cnt: u8) {
1354 |         let bits = self.code_size * code_cnt;
1355 |         debug_assert!(bits <= self.bits);
1356 | 
1357 |         if bits >= self.bits {
1358 |             self.bit_buffer = 0;
1359 |         } else {
1360 |             // bits < self.bits so this must be smaller than the number size.
1361 |             self.bit_buffer = self.bit_buffer >> bits;
1362 |         }
1363 | 
1364 |         self.bits = self.bits.wrapping_sub(bits);
1365 |     }
1366 | 
1367 |     fn max_code(&self) -> Code {
1368 |         self.code_mask
1369 |     }
1370 | 
1371 |     fn code_size(&self) -> u8 {
1372 |         self.code_size
1373 |     }
1374 | }
1375 | 
1376 | impl Buffer {
1377 |     fn new() -> Self {
1378 |         Buffer {
1379 |             bytes: vec![0; MAX_ENTRIES].into_boxed_slice(),
1380 |             read_mark: 0,
1381 |             write_mark: 0,
1382 |         }
1383 |     }
1384 | 
1385 |     /// When encoding a sequence `cScSc` where `c` is any character and `S` is any string
1386 |     /// this results in two codes `AB`, `A` encoding `cS` and `B` encoding `cSc`. Supposing
1387 |     /// the buffer is already filled with the reconstruction of `A`, we can easily fill it
1388 |     /// with the reconstruction of `B`.
1389 |     fn fill_cscsc(&mut self) -> u8 {
1390 |         self.bytes[self.write_mark] = self.bytes[0];
1391 |         self.write_mark += 1;
1392 |         self.read_mark = 0;
1393 |         self.bytes[0]
1394 |     }
1395 | 
1396 |     // Fill the buffer by decoding from the table
1397 |     fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 {
1398 |         self.write_mark = 0;
1399 |         self.read_mark = 0;
1400 |         let depth = table.depths[usize::from(code)];
1401 |         let mut memory = core::mem::replace(&mut self.bytes, Box::default());
1402 | 
1403 |         let out = &mut memory[..usize::from(depth)];
1404 |         let last = table.reconstruct(code, out);
1405 | 
1406 |         self.bytes = memory;
1407 |         self.write_mark = usize::from(depth);
1408 |         last
1409 |     }
1410 | 
1411 |     fn buffer(&self) -> &[u8] {
1412 |         &self.bytes[self.read_mark..self.write_mark]
1413 |     }
1414 | 
1415 |     fn consume(&mut self, amt: usize) {
1416 |         self.read_mark += amt;
1417 |     }
1418 | }
1419 | 
1420 | impl Table {
1421 |     fn new() -> Self {
1422 |         Table {
1423 |             inner: Vec::with_capacity(MAX_ENTRIES),
1424 |             depths: Vec::with_capacity(MAX_ENTRIES),
1425 |         }
1426 |     }
1427 | 
1428 |     fn clear(&mut self, min_size: u8) {
1429 |         let static_count = usize::from(1u16 << u16::from(min_size)) + 2;
1430 |         self.inner.truncate(static_count);
1431 |         self.depths.truncate(static_count);
1432 |     }
1433 | 
1434 |     fn init(&mut self, min_size: u8) {
1435 |         self.inner.clear();
1436 |         self.depths.clear();
1437 |         for i in 0..(1u16 << u16::from(min_size)) {
1438 |             self.inner.push(Link::base(i as u8));
1439 |             self.depths.push(1);
1440 |         }
1441 |         // Clear code.
1442 |         self.inner.push(Link::base(0));
1443 |         self.depths.push(0);
1444 |         // End code.
1445 |         self.inner.push(Link::base(0));
1446 |         self.depths.push(0);
1447 |     }
1448 | 
1449 |     fn at(&self, code: Code) -> &Link {
1450 |         &self.inner[usize::from(code)]
1451 |     }
1452 | 
1453 |     fn is_empty(&self) -> bool {
1454 |         self.inner.is_empty()
1455 |     }
1456 | 
1457 |     fn is_full(&self) -> bool {
1458 |         self.inner.len() >= MAX_ENTRIES
1459 |     }
1460 | 
1461 |     fn derive(&mut self, from: &DerivationBase, byte: u8) {
1462 |         let link = from.derive(byte);
1463 |         let depth = self.depths[usize::from(from.code)] + 1;
1464 |         self.inner.push(link);
1465 |         self.depths.push(depth);
1466 |     }
1467 | 
1468 |     // Derive multiple codes in a row, where each base is guaranteed to already exist.
1469 |     fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) {
1470 |         let mut depth_of = from.code;
1471 |         // Note that false data dependency we want to get rid of!
1472 |         // TODO: this pushes into a Vec, maybe we can make this cleaner.
1473 |         for &code in burst {
1474 |             let depth = self.depths[usize::from(depth_of)] + 1;
1475 |             self.depths.push(depth);
1476 |             depth_of = code;
1477 |         }
1478 | 
1479 |         // Llvm tends to be flaky with code layout for the case of requiring an allocation. It's
1480 |         // not clear if that can occur in practice but it relies on iterator size hint..
1481 |         let extensions = burst.iter().zip(first);
1482 |         self.inner.extend(extensions.map(|(&code, &first)| {
1483 |             let link = from.derive(first);
1484 |             from.code = code;
1485 |             from.first = first;
1486 |             link
1487 |         }));
1488 |     }
1489 | 
1490 |     fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 {
1491 |         let mut code_iter = code;
1492 |         let table = &self.inner[..=usize::from(code)];
1493 |         let first = table[usize::from(code)].first;
1494 | 
1495 |         let len = code_iter;
1496 |         for ch in out.iter_mut().rev() {
1497 |             //(code, cha) = self.table[k as usize];
1498 |             // Note: This could possibly be replaced with an unchecked array access if
1499 |             //  - value is asserted to be < self.next_code() in push
1500 |             //  - min_size is asserted to be < MAX_CODESIZE
1501 |             let entry = &table[usize::from(code_iter)];
1502 |             code_iter = core::cmp::min(len, entry.prev);
1503 |             *ch = entry.byte;
1504 |         }
1505 | 
1506 |         first
1507 |     }
1508 | }
1509 | 
1510 | impl Link {
1511 |     fn base(byte: u8) -> Self {
1512 |         Link {
1513 |             prev: 0,
1514 |             byte,
1515 |             first: byte,
1516 |         }
1517 |     }
1518 | }
1519 | 
1520 | impl DerivationBase {
1521 |     // TODO: this has self type to make it clear we might depend on the old in a future
1522 |     // optimization. However, that has no practical purpose right now.
1523 |     fn derive(&self, byte: u8) -> Link {
1524 |         Link {
1525 |             prev: self.code,
1526 |             byte,
1527 |             first: self.first,
1528 |         }
1529 |     }
1530 | }
1531 | 
1532 | #[cfg(test)]
1533 | mod tests {
1534 |     use crate::alloc::vec::Vec;
1535 |     #[cfg(feature = "std")]
1536 |     use crate::StreamBuf;
1537 |     use crate::{decode::Decoder, BitOrder};
1538 | 
1539 |     #[test]
1540 |     fn invalid_code_size_low() {
1541 |         let _ = Decoder::new(BitOrder::Msb, 0);
1542 |         let _ = Decoder::new(BitOrder::Msb, 1);
1543 |     }
1544 | 
1545 |     #[test]
1546 |     #[should_panic]
1547 |     fn invalid_code_size_high() {
1548 |         let _ = Decoder::new(BitOrder::Msb, 14);
1549 |     }
1550 | 
1551 |     fn make_encoded() -> Vec<u8> {
1552 |         const FILE: &'static [u8] = include_bytes!(concat!(
1553 |             env!("CARGO_MANIFEST_DIR"),
1554 |             "/benches/binary-8-msb.lzw"
1555 |         ));
1556 |         return Vec::from(FILE);
1557 |     }
1558 | 
1559 |     #[test]
1560 |     #[cfg(feature = "std")]
1561 |     fn into_stream_buffer_no_alloc() {
1562 |         let encoded = make_encoded();
1563 |         let mut decoder = Decoder::new(BitOrder::Msb, 8);
1564 | 
1565 |         let mut output = vec![];
1566 |         let mut buffer = [0; 512];
1567 |         let mut istream = decoder.into_stream(&mut output);
1568 |         istream.set_buffer(&mut buffer[..]);
1569 |         istream.decode(&encoded[..]).status.unwrap();
1570 | 
1571 |         match istream.buffer {
1572 |             Some(StreamBuf::Borrowed(_)) => {}
1573 |             None => panic!("Decoded without buffer??"),
1574 |             Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
1575 |         }
1576 |     }
1577 | 
1578 |     #[test]
1579 |     #[cfg(feature = "std")]
1580 |     fn into_stream_buffer_small_alloc() {
1581 |         struct WriteTap<W: std::io::Write>(W);
1582 |         const BUF_SIZE: usize = 512;
1583 | 
1584 |         impl<W: std::io::Write> std::io::Write for WriteTap<W> {
1585 |             fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1586 |                 assert!(buf.len() <= BUF_SIZE);
1587 |                 self.0.write(buf)
1588 |             }
1589 |             fn flush(&mut self) -> std::io::Result<()> {
1590 |                 self.0.flush()
1591 |             }
1592 |         }
1593 | 
1594 |         let encoded = make_encoded();
1595 |         let mut decoder = Decoder::new(BitOrder::Msb, 8);
1596 | 
1597 |         let mut output = vec![];
1598 |         let mut istream = decoder.into_stream(WriteTap(&mut output));
1599 |         istream.set_buffer_size(512);
1600 |         istream.decode(&encoded[..]).status.unwrap();
1601 | 
1602 |         match istream.buffer {
1603 |             Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
1604 |             Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
1605 |             None => panic!("Decoded without buffer??"),
1606 |         }
1607 |     }
1608 | 
1609 |     #[test]
1610 |     #[cfg(feature = "std")]
1611 |     fn reset() {
1612 |         let encoded = make_encoded();
1613 |         let mut decoder = Decoder::new(BitOrder::Msb, 8);
1614 |         let mut reference = None;
1615 | 
1616 |         for _ in 0..2 {
1617 |             let mut output = vec![];
1618 |             let mut buffer = [0; 512];
1619 |             let mut istream = decoder.into_stream(&mut output);
1620 |             istream.set_buffer(&mut buffer[..]);
1621 |             istream.decode_all(&encoded[..]).status.unwrap();
1622 | 
1623 |             decoder.reset();
1624 |             if let Some(reference) = &reference {
1625 |                 assert_eq!(output, *reference);
1626 |             } else {
1627 |                 reference = Some(output);
1628 |             }
1629 |         }
1630 |     }
1631 | }
1632 | 


--------------------------------------------------------------------------------