├── .gitignore ├── fuzz ├── .gitignore ├── fuzz_targets │ ├── decode0.rs │ ├── roundtrip.rs │ └── diff.rs └── Cargo.toml ├── benches ├── lib-8-msb.lzw ├── Cargo-8-msb.lzw ├── binary-8-msb.lzw └── msb8.rs ├── tests ├── end_of_buffer.rs ├── implicit_reset.rs ├── async.rs ├── roundtrip_vec.rs └── roundtrip.rs ├── examples ├── lzw-compress.rs └── lzw-decompress.rs ├── README.md ├── LICENSE-MIT ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── src ├── error.rs ├── lib.rs ├── encode_into_async.rs ├── decode_into_async.rs ├── encode.rs └── decode.rs ├── Changes.md ├── bin └── lzw.rs ├── LICENSE-APACHE └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /benches/lib-8-msb.lzw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/weezl/HEAD/benches/lib-8-msb.lzw -------------------------------------------------------------------------------- /benches/Cargo-8-msb.lzw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/weezl/HEAD/benches/Cargo-8-msb.lzw -------------------------------------------------------------------------------- /benches/binary-8-msb.lzw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/weezl/HEAD/benches/binary-8-msb.lzw -------------------------------------------------------------------------------- /fuzz/fuzz_targets/decode0.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | 4 | fuzz_target!(|raw_data: &[u8]| { 5 | let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Lsb, 0); 6 | let _ = decoder.into_stream(std::io::sink()) 7 | .decode_all(raw_data); 8 | }); 9 | -------------------------------------------------------------------------------- /tests/end_of_buffer.rs: -------------------------------------------------------------------------------- 1 | use weezl::{decode, BitOrder}; 2 | 3 | #[test] 4 | fn stop_after_end_of_buffer() { 5 | let inp = vec![0x00u8, 0x01, 0x02, 0xff]; 6 | let mut decoder = decode::Configuration::new(BitOrder::Lsb, 7) 7 | .with_yield_on_full_buffer(true) 8 | .build(); 9 | let mut out = vec![0u8, 0u8, 0u8]; 10 | let status = decoder.decode_bytes(&inp, &mut out).status; 11 | assert!(status.is_ok(), "{:?} {:?}", status, out); 12 | } 13 | -------------------------------------------------------------------------------- /examples/lzw-compress.rs: -------------------------------------------------------------------------------- 1 | //! Compresses the input from stdin and writes the result to stdout. 2 | 3 | use std::io::{self, BufWriter}; 4 | 5 | fn main() { 6 | match { 7 | let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8); 8 | let stdin = io::stdin(); 9 | let stdin = stdin.lock(); 10 | let stdout = io::stdout(); 11 | let stdout = BufWriter::new(stdout.lock()); 12 | encoder.into_stream(stdout).encode_all(stdin).status 13 | } { 14 | Ok(()) => (), 15 | Err(err) => eprintln!("{}", err), 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /examples/lzw-decompress.rs: -------------------------------------------------------------------------------- 1 | //! Decompresses the input from stdin and writes the result to stdout. 2 | 3 | use std::io::{self, BufWriter}; 4 | 5 | fn main() { 6 | match { 7 | let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8); 8 | let stdout = io::stdout(); 9 | let stdout = BufWriter::new(stdout.lock()); 10 | let stdin = io::stdin(); 11 | let stdin = stdin.lock(); 12 | decoder.into_stream(stdout).decode_all(stdin).status 13 | } { 14 | Ok(()) => (), 15 | Err(err) => eprintln!("{}", err), 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/roundtrip.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | use weezl::{BitOrder, encode, decode}; 4 | 5 | fuzz_target!(|data: &[u8]| { 6 | let mut encoder = encode::Encoder::with_tiff_size_switch(BitOrder::Msb, 8); 7 | let mut buffer = Vec::with_capacity(2*data.len() + 40); 8 | let _ = encoder.into_stream(&mut buffer).encode_all(data); 9 | 10 | let mut decoder = decode::Decoder::with_tiff_size_switch(BitOrder::Msb, 8); 11 | let mut compare = vec![]; 12 | let result = decoder.into_stream(&mut compare).decode_all(buffer.as_slice()); 13 | assert!(result.status.is_ok(), "{:?}", result.status); 14 | assert_eq!(data, &*compare); 15 | }); 16 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "lzw-fuzz" 4 | version = "0.0.0" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | edition = "2018" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.3" 14 | lzw = { package = "weezl", version = "0.0.1" } 15 | 16 | [dependencies.weezl] 17 | path = ".." 18 | 19 | # Prevent this from interfering with workspaces 20 | [workspace] 21 | members = ["."] 22 | 23 | [[bin]] 24 | name = "diff" 25 | path = "fuzz_targets/diff.rs" 26 | 27 | [[bin]] 28 | name = "roundtrip" 29 | path = "fuzz_targets/roundtrip.rs" 30 | test = false 31 | doc = false 32 | 33 | [[bin]] 34 | name = "decode0" 35 | path = "fuzz_targets/decode0.rs" 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # weezl 2 | 3 | LZW en- and decoding that goes weeeee! 4 | 5 | ## Overview 6 | 7 | This library, written in purely safe and dependency-less Rust, provides 8 | encoding and decoding for lzw compression in the style as it occurs in `gif` 9 | and `tiff` image formats. It has a standalone binary that may be used to handle 10 | those data streams but it is _not_ compatible with Spencer's `compress` and 11 | `uncompress` binaries (though a drop-in may be developed at a later point). 12 | 13 | Using in a `no_std` environment is also possible though an allocator is 14 | required. This, too, may be relaxed in a later release. A feature flag already 15 | exists but currently turns off almost all interfaces. 16 | 17 | ## License 18 | 19 | All code is dual licensed MIT OR Apache-2.0. 20 | -------------------------------------------------------------------------------- /tests/implicit_reset.rs: -------------------------------------------------------------------------------- 1 | use std::{env, fs}; 2 | use weezl::{decode, encode, BitOrder}; 3 | 4 | #[test] 5 | fn read_from_mangled() { 6 | let file = env::args().next().unwrap(); 7 | let data = fs::read(file).unwrap(); 8 | 9 | // For simplicity, encode 7-bit data. 10 | let data: Vec<_> = data.iter().copied().map(|b| b & 0x7f).collect(); 11 | 12 | let mut encoder = encode::Encoder::new(BitOrder::Lsb, 7); 13 | let mut buffer = Vec::with_capacity(2 * data.len() + 40); 14 | let _ = encoder.into_stream(&mut buffer).encode_all(&*data); 15 | 16 | let mut decoder = decode::Decoder::new(BitOrder::Lsb, 7); 17 | let mut compare = vec![]; 18 | let result = decoder.into_stream(&mut compare).decode_all(&buffer[1..]); 19 | assert!(result.status.is_ok(), "{:?}", result.status); 20 | assert!(data == &*compare, "{:?}\n{:?}", data, compare); 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) HeroicKatora 2020 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust CI 2 | on: 3 | push: 4 | branches: [ master ] 5 | pull_request: 6 | branches: [ master ] 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | rust: [stable, beta, nightly] 13 | features: ["", "alloc", "std", "async"] 14 | steps: 15 | - uses: actions/checkout@v2 16 | - run: rustup default ${{ matrix.rust }} 17 | - name: build 18 | run: > 19 | cargo build --verbose --no-default-features --features "$FEATURES" 20 | env: 21 | FEATURES: ${{ matrix.features }} 22 | - name: test 23 | run: > 24 | cargo test --tests --benches --no-default-features --features "$FEATURES" --release 25 | if: ${{ matrix.rust != '1.34.2' }} 26 | env: 27 | FEATURES: ${{ matrix.features }} 28 | build_msrv: 29 | runs-on: ubuntu-latest 30 | strategy: 31 | matrix: 32 | features: ["", "std"] 33 | steps: 34 | - uses: actions/checkout@v2 35 | - run: rustup default "1.34.2" 36 | - name: build 37 | run: cargo build --verbose --no-default-features --features "$FEATURES" 38 | env: 39 | FEATURES: ${{ matrix.features }} 40 | rustfmt: 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v2 44 | - uses: actions-rs/toolchain@v1 45 | with: 46 | toolchain: stable 47 | override: true 48 | components: rustfmt, clippy 49 | - name: Run rustfmt check 50 | uses: actions-rs/cargo@v1 51 | with: 52 | command: fmt 53 | args: -- --check 54 | -------------------------------------------------------------------------------- /tests/async.rs: -------------------------------------------------------------------------------- 1 | use std::{env, fs}; 2 | use tokio::io::BufReader; 3 | use tokio::net::{TcpListener, TcpStream}; 4 | use tokio_util::compat::TokioAsyncReadCompatExt as _; 5 | use weezl::{decode, encode, BitOrder}; 6 | 7 | async fn pair() -> (TcpStream, TcpStream) { 8 | let listener = TcpListener::bind("localhost:0") 9 | .await 10 | .expect("No loop tcp for testing"); 11 | let addr = listener.local_addr().expect("No address for listener"); 12 | 13 | let connect = TcpStream::connect(addr); 14 | let accept = listener.accept(); 15 | 16 | let (a, (b, _)) = tokio::try_join!(connect, accept).expect("Can connect"); 17 | (a, b) 18 | } 19 | 20 | async fn assert_send_through(data: &[u8], send: &mut TcpStream, recv: &mut TcpStream) { 21 | let mut send = send.compat(); 22 | let mut recv = BufReader::new(recv).compat(); 23 | 24 | let mut encoder = encode::Encoder::new(BitOrder::Lsb, 8); 25 | let encode = encoder.into_async(&mut send).encode_all(data); 26 | 27 | let mut recv_buffer = vec![]; 28 | let mut decoder = decode::Decoder::new(BitOrder::Lsb, 8); 29 | let decode = decoder.into_async(&mut recv_buffer).decode_all(&mut recv); 30 | 31 | let (encode, decode) = tokio::join!(encode, decode); 32 | encode.status.expect("Could send/encoded data"); 33 | decode.status.expect("Could recv/decode data"); 34 | 35 | assert_eq!(recv_buffer, data); 36 | } 37 | 38 | #[test] 39 | fn with_streams() { 40 | let file = env::args().next().unwrap(); 41 | let data = fs::read(file).unwrap(); 42 | 43 | let rt = tokio::runtime::Runtime::new().expect("runtime"); 44 | let _enter = rt.enter(); 45 | 46 | let (mut send, mut recv) = rt.block_on(pair()); 47 | rt.block_on(assert_send_through(&data, &mut send, &mut recv)); 48 | } 49 | -------------------------------------------------------------------------------- /benches/msb8.rs: -------------------------------------------------------------------------------- 1 | extern crate criterion; 2 | extern crate weezl; 3 | 4 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; 5 | use std::fs; 6 | use weezl::{decode::Decoder, BitOrder, LzwStatus}; 7 | 8 | pub fn criterion_benchmark(c: &mut Criterion, file: &str) { 9 | let data = fs::read(file).expect("Benchmark input not found"); 10 | let mut group = c.benchmark_group("msb-8"); 11 | let id = BenchmarkId::new(file, data.len()); 12 | let mut outbuf = vec![0; 1 << 26]; // 64MB, what wuff uses.. 13 | let mut decode_once = |data: &[u8]| { 14 | let mut decoder = Decoder::new(BitOrder::Msb, 8); 15 | let mut written = 0; 16 | let outbuf = outbuf.as_mut_slice(); 17 | let mut data = data; 18 | loop { 19 | let result = decoder.decode_bytes(data, &mut outbuf[..]); 20 | let done = result.status.expect("Error"); 21 | data = &data[result.consumed_in..]; 22 | written += result.consumed_out; 23 | black_box(&outbuf[..result.consumed_out]); 24 | if let LzwStatus::Done = done { 25 | break; 26 | } 27 | if let LzwStatus::NoProgress = done { 28 | panic!("Need to make progress"); 29 | } 30 | } 31 | written 32 | }; 33 | group.throughput(Throughput::Bytes(decode_once(&data) as u64)); 34 | group.bench_with_input(id, &data, |b, data| { 35 | b.iter(|| { 36 | decode_once(data); 37 | }) 38 | }); 39 | } 40 | 41 | pub fn bench_toml(c: &mut Criterion) { 42 | criterion_benchmark(c, "benches/Cargo-8-msb.lzw"); 43 | } 44 | 45 | pub fn bench_binary(c: &mut Criterion) { 46 | criterion_benchmark(c, "benches/binary-8-msb.lzw"); 47 | } 48 | 49 | pub fn bench_lib(c: &mut Criterion) { 50 | criterion_benchmark(c, "benches/lib-8-msb.lzw"); 51 | } 52 | 53 | criterion_group!(benches, bench_toml, bench_binary, bench_lib); 54 | criterion_main!(benches); 55 | -------------------------------------------------------------------------------- /tests/roundtrip_vec.rs: -------------------------------------------------------------------------------- 1 | use std::{env, fs}; 2 | use weezl::{decode, encode, BitOrder}; 3 | 4 | #[derive(Clone, Copy, Debug)] 5 | enum Flavor { 6 | Gif, 7 | Tiff, 8 | } 9 | 10 | #[test] 11 | fn roundtrip_all_lsb() { 12 | roundtrip_all(BitOrder::Lsb); 13 | } 14 | 15 | #[test] 16 | fn roundtrip_all_msb() { 17 | roundtrip_all(BitOrder::Msb); 18 | } 19 | 20 | fn roundtrip_all(bit_order: BitOrder) { 21 | let file = env::args().next().unwrap(); 22 | let data = fs::read(file).unwrap(); 23 | 24 | for &flavor in &[Flavor::Gif, Flavor::Tiff] { 25 | for bit_width in 2..8 { 26 | let data: Vec<_> = data 27 | .iter() 28 | .copied() 29 | .map(|b| b & ((1 << bit_width) - 1)) 30 | .collect(); 31 | 32 | println!("Roundtrip test {:?} {:?} {}", flavor, bit_order, bit_width); 33 | assert_roundtrips(&*data, flavor, bit_width, bit_order); 34 | } 35 | } 36 | } 37 | 38 | fn assert_roundtrips(data: &[u8], flavor: Flavor, bit_width: u8, bit_order: BitOrder) { 39 | let (c, d): ( 40 | fn(BitOrder, u8) -> encode::Encoder, 41 | fn(BitOrder, u8) -> decode::Decoder, 42 | ) = match flavor { 43 | Flavor::Gif => (encode::Encoder::new, decode::Decoder::new), 44 | Flavor::Tiff => ( 45 | encode::Encoder::with_tiff_size_switch, 46 | decode::Decoder::with_tiff_size_switch, 47 | ), 48 | }; 49 | let mut encoder = c(bit_order, bit_width); 50 | let mut buffer = Vec::with_capacity(2 * data.len() + 40); 51 | 52 | let _ = encoder.into_vec(&mut buffer).encode_all(data); 53 | 54 | let mut decoder = d(bit_order, bit_width); 55 | let mut compare = vec![]; 56 | let result = decoder.into_vec(&mut compare).decode_all(buffer.as_slice()); 57 | assert!( 58 | result.status.is_ok(), 59 | "{:?}, {}, {:?}", 60 | bit_order, 61 | bit_width, 62 | result.status 63 | ); 64 | assert!( 65 | data == &*compare, 66 | "{:?}, {}\n{:?}\n{:?}", 67 | bit_order, 68 | bit_width, 69 | data, 70 | compare 71 | ); 72 | } 73 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/diff.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | 4 | fuzz_target!(|raw_data: &[u8]| { 5 | // No implicit restart in `lzw` so make sure there is one. Otherwise we get an instant 6 | // detection that is not helpful at all. 7 | let mut data = vec![0b1000_0000]; 8 | data.extend_from_slice(raw_data); 9 | let data = data.as_slice(); 10 | const CUT_OFF: usize = 1 << 20; 11 | 12 | let mut detailed_ref: Option<_> = None; 13 | let reference = (|| { 14 | let mut decoder = lzw::Decoder::new(lzw::LsbReader::new(), 7); 15 | let mut data = data; 16 | let mut output = vec![]; 17 | while !data.is_empty() && output.len() < CUT_OFF { 18 | match decoder.decode_bytes(data) { 19 | Ok((0, _)) => break, 20 | Ok((len, decoded)) => { 21 | // eprintln!("Ref {:?}", decoded); 22 | data = &data[len..]; 23 | output.extend_from_slice(decoded); 24 | } 25 | Err(err) => { 26 | detailed_ref = Some(err); 27 | return Err(()); 28 | } 29 | } 30 | 31 | if decoder.has_ended() { 32 | break; 33 | } 34 | } 35 | output.truncate(CUT_OFF); 36 | Ok(output) 37 | })(); 38 | 39 | let mut detailed_err = None; 40 | let new = (|| { 41 | let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Lsb, 7); 42 | let mut output = Vec::with_capacity(CUT_OFF); 43 | let err = decoder.into_stream(&mut output).decode(data); 44 | if let Err(err) = err.status { 45 | detailed_err = Some(err); 46 | return Err(()); 47 | } 48 | output.truncate(CUT_OFF); 49 | Ok(output) 50 | })(); 51 | 52 | // Output my be omitted if the stream did not end properly in an end code. 53 | let ref_len = reference.as_ref().map_or(usize::MAX, |x| x.len()); 54 | let new_len = new.as_ref().map_or(usize::MAX, |x| x.len()); 55 | 56 | let reference = reference.map(|mut vec| { 57 | vec.truncate(ref_len.min(new_len)); 58 | vec 59 | }); 60 | 61 | let new = new.map(|mut vec| { 62 | vec.truncate(ref_len.min(new_len)); 63 | vec 64 | }); 65 | 66 | assert_eq!(reference, new, "{:?} vs {:?}", detailed_ref, detailed_err); 67 | }); 68 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "weezl" 3 | version = "0.1.12" 4 | license = "MIT OR Apache-2.0" 5 | description = "Fast LZW compression and decompression." 6 | authors = ["The image-rs Developers"] 7 | readme = "README.md" 8 | repository = "https://github.com/image-rs/weezl" 9 | documentation = "https://docs.rs/weezl" 10 | edition = "2018" 11 | exclude = ["benches/*.lzw"] 12 | 13 | [lib] 14 | name = "weezl" 15 | bench = false 16 | 17 | [dependencies.futures] 18 | optional = true 19 | version = "0.3.12" 20 | default-features = false 21 | features = ["std"] 22 | 23 | [dev-dependencies] 24 | criterion = "0.3.1" 25 | [dev-dependencies.tokio] 26 | version = "1" 27 | default-features = false 28 | features = ["macros", "io-util", "net", "rt", "rt-multi-thread"] 29 | [dev-dependencies.tokio-util] 30 | version = "0.6.2" 31 | default-features = false 32 | features = ["compat"] 33 | 34 | [features] 35 | default = ["std"] 36 | # Enable usage of the standard library and in particular any interface that 37 | # requires the io interfaces `Read` and `Write`. 38 | std = ["alloc"] 39 | # Enable usage of the `alloc` crate. You should always have this feature 40 | # enabled. Without this feature neither the encoder nor the decoder exists. 41 | # This is purely a reservation for future possibilities. 42 | alloc = [] 43 | # Enable usage of `async` through `futures`. This is basically the blocker 44 | # for `1.0` as we may track its version. Note that this negates no_std right 45 | # now but implicitly through being required from futures. We also use the 46 | # `std::io::Error` interface. Note that this features is NOT supported on 47 | # 1.34.2 but only on stable. 48 | async = ["futures", "std"] 49 | 50 | [[bin]] 51 | name = "lzw" 52 | path = "bin/lzw.rs" 53 | bench = false 54 | required-features = ["std"] 55 | 56 | [[bench]] 57 | name = "msb8" 58 | harness = false 59 | required-features = ["std"] 60 | 61 | [[example]] 62 | name = "lzw-compress" 63 | required-features = ["std"] 64 | 65 | [[example]] 66 | name = "lzw-decompress" 67 | required-features = ["std"] 68 | 69 | [[test]] 70 | name = "async" 71 | required-features = ["async", "std"] 72 | 73 | [[test]] 74 | name = "roundtrip" 75 | required-features = ["std"] 76 | 77 | [[test]] 78 | name = "roundtrip_vec" 79 | required-features = ["alloc"] 80 | 81 | [[test]] 82 | name = "implicit_reset" 83 | required-features = ["std"] 84 | 85 | [[test]] 86 | name = "end_of_buffer" 87 | required-features = ["alloc"] 88 | 89 | [package.metadata.docs.rs] 90 | all-features = true 91 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | /// The result of a coding operation on a pair of buffer. 2 | #[must_use = "Contains a status with potential error information"] 3 | #[derive(Debug)] 4 | pub struct BufferResult { 5 | /// The number of bytes consumed from the input buffer. 6 | pub consumed_in: usize, 7 | /// The number of bytes written into the output buffer. 8 | pub consumed_out: usize, 9 | /// The status after returning from the write call. 10 | pub status: Result, 11 | } 12 | 13 | /// The result of a coding operation into a vector. 14 | #[must_use = "Contains a status with potential error information"] 15 | #[derive(Debug)] 16 | pub struct VectorResult { 17 | /// The number of bytes consumed from the input buffer. 18 | pub consumed_in: usize, 19 | /// The number of bytes written into the output buffer. 20 | pub consumed_out: usize, 21 | /// The status after returning from the write call. 22 | pub status: Result, 23 | } 24 | 25 | /// The result of coding into an output stream. 26 | #[cfg(feature = "std")] 27 | #[must_use = "Contains a status with potential error information"] 28 | #[derive(Debug)] 29 | pub struct StreamResult { 30 | /// The total number of bytes consumed from the reader. 31 | pub bytes_read: usize, 32 | /// The total number of bytes written into the writer. 33 | pub bytes_written: usize, 34 | /// The possible error that occurred. 35 | /// 36 | /// Note that when writing into streams it is not in general possible to recover from an error. 37 | pub status: std::io::Result<()>, 38 | } 39 | 40 | /// The status after successful coding of an LZW stream. 41 | #[derive(Debug, Clone, Copy)] 42 | pub enum LzwStatus { 43 | /// Everything went well. 44 | Ok, 45 | /// No bytes were read or written and no internal state advanced. 46 | /// 47 | /// If this is returned but your application can not provide more input data then decoding is 48 | /// definitely stuck for good and it should stop trying and report some error of its own. In 49 | /// other situations this may be used as a signal to refill an internal buffer. 50 | NoProgress, 51 | /// No more data will be produced because an end marker was reached. 52 | Done, 53 | } 54 | 55 | /// The error kind after unsuccessful coding of an LZW stream. 56 | #[derive(Debug, Clone, Copy)] 57 | pub enum LzwError { 58 | /// The input contained an invalid code. 59 | /// 60 | /// For decompression this refers to a code larger than those currently known through the prior 61 | /// decoding stages. For compression this refers to a byte that has no code representation due 62 | /// to being larger than permitted by the `size` parameter given to the Encoder. 63 | InvalidCode, 64 | } 65 | 66 | impl core::fmt::Display for LzwError { 67 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 68 | match self { 69 | LzwError::InvalidCode => f.write_str("invalid code in LZW stream"), 70 | } 71 | } 72 | } 73 | 74 | #[cfg(feature = "std")] 75 | impl std::error::Error for LzwError {} 76 | -------------------------------------------------------------------------------- /Changes.md: -------------------------------------------------------------------------------- 1 | ## Version 0.1.12 2 | 3 | - Further adjusted a debug assertion for TIFF compatibility. It still had one 4 | of its OR conditions formulated incorrectly, relying on equality of the 5 | code word mask and the size switch code. In TIFF we hit the limit of the 6 | code dictionary (all 12-bit words) one code later than we would detect a 7 | code switch that we need to ignore. 8 | 9 | ## Version 0.1.11 10 | 11 | - Adjusted a debug assertion that handled 1-bit code sizes incorrectly in the 12 | decoder (relevant for TIFF but non-standard for GIF). In that case the size 13 | switch semantics are a bit messy. 14 | 15 | ## Version 0.1.10 16 | 17 | - Reverted changes made in 0.1.9 to the behavior of the decoder under non 18 | libtiff-compatibility mode. Trying to read the decoder with an empty output 19 | buffer will at least inspect the next symbol and either error or indicate the 20 | end-of-stream accordingly. 21 | 22 | ## Version 0.1.9 (yanked) 23 | 24 | - Increased decoding throughput by 3—30% depending on CPU and data. 25 | - Added `{encode,decode}::Configuration` as builder types for their respective 26 | module. They can be cheaply cloned. 27 | - Added `decode::Configuration::with_yield_on_full_buffer` to explicitly opt-in 28 | to libtiff compatibility. The decoder will not read or interpret further 29 | symbols of the decoding stream when the output buffer is full. This enables a 30 | caller to stop fetching symbols and elide an end of stream marker based on 31 | out-of-band length information. The decoder might otherwise error, trying to 32 | interpret data that does not belong to the stream. 33 | 34 | ## Version 0.1.8 35 | 36 | - Fixed incorrect state after `Decoder::reset` 37 | - Added `Debug` to result types 38 | 39 | ## Version 0.1.7 40 | 41 | - Implicit reset is now supported for decoding. 42 | 43 | ## Version 0.1.6 44 | 45 | - Fixed an integer overflow and panic that could occur during decoding. 46 | Decoding performance may degrade after long sequences without a reset code. 47 | 48 | ## Version 0.1.5 49 | 50 | - Added `IntoVec` adapters that simplify in-memory de- and encoding. A further 51 | 'one-shot' interface is exposed in the `Decoder` and `Encoder` themselves 52 | which makes the process a one liner in the simplest cases. Contrary to 53 | `IntoStream`, these are available in all cases and do not require `std`. 54 | 55 | ## Version 0.1.4 56 | 57 | - Added `IntoAsync` adapters for asynchronous de- and encoding. The interface 58 | is implemented only in terms of `futures = 0.3` traits at the moment. 59 | - Code sizes smaller than 2 are now allowed for decoding. Since they do not 60 | roundtrip it is still an error to use them in the decoder but this avoids 61 | accidental panicking, i.e. denial of service, in parsers. 62 | 63 | ## Version 0.1.3 64 | 65 | - Fixes an issue in compression that caused some data to be lost around clear 66 | codes. This could corrupt the data stream. 67 | 68 | ## Version 0.1.2 69 | 70 | - Fixes incorrect compression after `Encoder::reset`. 71 | 72 | ## Version 0.1.1 73 | 74 | - The `IntoStream` types now reuse their internal buffers. 75 | - Added the methods `set_buffer`, `set_buffer_size` to `IntoStream` for both 76 | the encoder and decoder, used to control the automatic allocation. 77 | - Deprecated `IntoStream` in configurations without the `std` feature where the 78 | type can't even be constructed. 79 | 80 | ## Version 0.1.0 – Aleph 81 | 82 | - Initial major release 83 | - Support gif and tiff code size changes 84 | - Rough performance numbers: 85 | On i5-4690, 8GiB DIMM DDR3 Synchronous 1600 MHz (0,6 ns) 86 | ~70MB/s encode, ~230MB/s decode 87 | -------------------------------------------------------------------------------- /tests/roundtrip.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::{env, fs}; 3 | use weezl::{decode, encode, BitOrder}; 4 | 5 | #[derive(Clone, Copy, Debug)] 6 | enum Flavor { 7 | Gif, 8 | Tiff, 9 | } 10 | 11 | #[test] 12 | fn roundtrip_all_lsb_tiny() { 13 | roundtrip_all(BitOrder::Lsb, 1); 14 | } 15 | 16 | #[test] 17 | fn roundtrip_all_msb_tiny() { 18 | roundtrip_all(BitOrder::Msb, 1); 19 | } 20 | 21 | #[test] 22 | fn roundtrip_all_lsb() { 23 | roundtrip_all(BitOrder::Lsb, 1 << 20); 24 | } 25 | 26 | #[test] 27 | fn roundtrip_all_msb() { 28 | roundtrip_all(BitOrder::Msb, 1 << 20); 29 | } 30 | 31 | fn roundtrip_all(bit_order: BitOrder, max_io_len: usize) { 32 | let file = env::args().next().unwrap(); 33 | let data = fs::read(file).unwrap(); 34 | 35 | for &flavor in &[Flavor::Gif, Flavor::Tiff] { 36 | for bit_width in 2..8 { 37 | let data: Vec<_> = data 38 | .iter() 39 | .copied() 40 | .map(|b| b & ((1 << bit_width) - 1)) 41 | .collect(); 42 | 43 | let enc = match flavor { 44 | Flavor::Gif => encode::Configuration::new, 45 | Flavor::Tiff => encode::Configuration::with_tiff_size_switch, 46 | }(bit_order, bit_width); 47 | 48 | let dec = match flavor { 49 | Flavor::Gif => decode::Configuration::new, 50 | Flavor::Tiff => decode::Configuration::with_tiff_size_switch, 51 | }(bit_order, bit_width); 52 | 53 | let yielding = dec.clone().with_yield_on_full_buffer(true); 54 | 55 | println!("Roundtrip test {:?} {:?} {}", flavor, bit_order, bit_width); 56 | assert_roundtrips(&*data, enc.clone(), dec, max_io_len); 57 | 58 | // Our encoder always passes an enclosed stream. So this must be the same. 59 | assert_roundtrips(&*data, enc, yielding, max_io_len); 60 | } 61 | } 62 | } 63 | 64 | fn assert_roundtrips( 65 | data: &[u8], 66 | enc: encode::Configuration, 67 | dec: decode::Configuration, 68 | max_io_len: usize, 69 | ) { 70 | let mut encoder = enc.clone().build(); 71 | let mut writer = TinyWrite { 72 | data: Vec::with_capacity(2 * data.len() + 40), 73 | max_write_len: max_io_len, 74 | }; 75 | let _ = encoder.into_stream(&mut writer).encode_all(data); 76 | 77 | let mut decoder = dec.clone().build(); 78 | let mut compare = vec![]; 79 | 80 | let buf_reader = TinyRead { 81 | data: &writer.data, 82 | max_read_len: max_io_len, 83 | }; 84 | let result = decoder.into_stream(&mut compare).decode_all(buf_reader); 85 | assert!(result.status.is_ok(), "{:?}, {:?}", dec, result.status); 86 | assert!(data == &*compare, "{:?}\n{:?}\n{:?}", dec, data, compare); 87 | } 88 | 89 | struct TinyRead<'a> { 90 | data: &'a [u8], 91 | max_read_len: usize, 92 | } 93 | 94 | impl io::BufRead for TinyRead<'_> { 95 | fn fill_buf(&mut self) -> io::Result<&[u8]> { 96 | Ok(&self.data[..self.data.len().min(self.max_read_len)]) 97 | } 98 | fn consume(&mut self, n: usize) { 99 | debug_assert!(n <= self.max_read_len); 100 | self.data = &self.data[n..]; 101 | } 102 | } 103 | 104 | impl io::Read for TinyRead<'_> { 105 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 106 | let n = self.data.len().min(buf.len()).min(self.max_read_len); 107 | buf[..n].copy_from_slice(&self.data[..n]); 108 | self.data = &self.data[n..]; 109 | Ok(n) 110 | } 111 | } 112 | 113 | struct TinyWrite { 114 | data: Vec, 115 | max_write_len: usize, 116 | } 117 | 118 | impl io::Write for TinyWrite { 119 | fn write(&mut self, buf: &[u8]) -> io::Result { 120 | let n = buf.len().min(self.max_write_len); 121 | self.data.extend_from_slice(&buf[..n]); 122 | Ok(n) 123 | } 124 | 125 | fn flush(&mut self) -> io::Result<()> { 126 | Ok(()) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # LZW decoder and encoder 2 | //! 3 | //! This crates provides an `Encoder` and a `Decoder` in their respective modules. The code words 4 | //! are written from and to bit byte slices (or streams) where it is possible to write either the 5 | //! most or least significant bits first. The maximum possible code size is 12 bits, the smallest 6 | //! available code size is 2 bits. 7 | //! 8 | //! ## Example 9 | //! 10 | //! These two code blocks show the compression and corresponding decompression. Note that you must 11 | //! use the same arguments to `Encoder` and `Decoder`, otherwise the decoding might fail or produce 12 | //! bad results. 13 | //! 14 | #![cfg_attr(feature = "std", doc = "```")] 15 | #![cfg_attr(not(feature = "std"), doc = "```ignore")] 16 | //! use weezl::{BitOrder, encode::Encoder}; 17 | //! 18 | //! let data = b"Hello, world"; 19 | //! let compressed = Encoder::new(BitOrder::Msb, 9) 20 | //! .encode(data) 21 | //! .unwrap(); 22 | //! ``` 23 | //! 24 | #![cfg_attr(feature = "std", doc = "```")] 25 | #![cfg_attr(not(feature = "std"), doc = "```ignore")] 26 | //! use weezl::{BitOrder, decode::Decoder}; 27 | //! # let compressed = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10".to_vec(); 28 | //! # let data = b"Hello, world"; 29 | //! 30 | //! let decompressed = Decoder::new(BitOrder::Msb, 9) 31 | //! .decode(&compressed) 32 | //! .unwrap(); 33 | //! assert_eq!(decompressed, data); 34 | //! ``` 35 | //! 36 | //! ## LZW Details 37 | //! 38 | //! The de- and encoder expect the LZW stream to start with a clear code and end with an 39 | //! end code which are defined as follows: 40 | //! 41 | //! * `CLEAR_CODE == 1 << min_code_size` 42 | //! * `END_CODE == CLEAR_CODE + 1` 43 | //! 44 | //! For optimal performance, all buffers and input and output slices should be as large as possible 45 | //! and at least 2048 bytes long. This extends to input streams which should have similarly sized 46 | //! buffers. This library uses Rust's standard allocation interfaces (`Box` and `Vec` to be 47 | //! precise). Since there are no ways to handle allocation errors it is not recommended to operate 48 | //! it on 16-bit targets. 49 | //! 50 | //! ## Allocations and standard library 51 | //! 52 | //! The main algorithm can be used in `no_std` as well, although it requires an allocator. This 53 | //! restriction might be lifted at a later stage. For this you should deactivate the `std` feature. 54 | //! The main interfaces stay intact but the `into_stream` combinator is no available. 55 | #![cfg_attr(not(feature = "std"), no_std)] 56 | #![forbid(unsafe_code)] 57 | #![forbid(missing_docs)] 58 | 59 | #[cfg(all(feature = "alloc", not(feature = "std")))] 60 | extern crate alloc; 61 | #[cfg(all(feature = "alloc", feature = "std"))] 62 | use std as alloc; 63 | 64 | pub(crate) const MAX_CODESIZE: u8 = 12; 65 | pub(crate) const MAX_ENTRIES: usize = 1 << MAX_CODESIZE as usize; 66 | 67 | /// Alias for a LZW code point 68 | pub(crate) type Code = u16; 69 | 70 | /// A default buffer size for encoding/decoding buffer. 71 | /// 72 | /// Note that this is larger than the default size for buffers (usually 4K) since each code word 73 | /// can expand to multiple bytes. Expanding one buffer would yield multiple and require a costly 74 | /// break in the decoding loop. Note that the decoded size can be up to quadratic in code block. 75 | pub(crate) const STREAM_BUF_SIZE: usize = 1 << 24; 76 | 77 | /// The order of bits in bytes. 78 | #[derive(Clone, Copy, Debug)] 79 | pub enum BitOrder { 80 | /// The most significant bit is processed first. 81 | Msb, 82 | /// The least significant bit is processed first. 83 | Lsb, 84 | } 85 | 86 | /// An owned or borrowed buffer for stream operations. 87 | #[cfg(feature = "alloc")] 88 | pub(crate) enum StreamBuf<'d> { 89 | Borrowed(&'d mut [u8]), 90 | Owned(crate::alloc::vec::Vec), 91 | } 92 | 93 | #[cold] 94 | fn assert_decode_size(size: u8) { 95 | assert!( 96 | size <= MAX_CODESIZE, 97 | "Maximum code size 12 required, got {}", 98 | size 99 | ); 100 | } 101 | 102 | #[cold] 103 | fn assert_encode_size(size: u8) { 104 | assert!(size >= 2, "Minimum code size 2 required, got {}", size); 105 | assert!( 106 | size <= MAX_CODESIZE, 107 | "Maximum code size 12 required, got {}", 108 | size 109 | ); 110 | } 111 | 112 | #[cfg(feature = "alloc")] 113 | pub mod decode; 114 | #[cfg(feature = "alloc")] 115 | pub mod encode; 116 | mod error; 117 | 118 | #[cfg(feature = "std")] 119 | pub use self::error::StreamResult; 120 | pub use self::error::{BufferResult, LzwError, LzwStatus}; 121 | 122 | #[cfg(all(test, feature = "alloc"))] 123 | mod tests { 124 | use crate::decode::Decoder; 125 | use crate::encode::Encoder; 126 | 127 | #[cfg(feature = "std")] 128 | use crate::{decode, encode}; 129 | 130 | #[test] 131 | fn stable_send() { 132 | fn must_be_send() {} 133 | must_be_send::(); 134 | must_be_send::(); 135 | 136 | #[cfg(feature = "std")] 137 | fn _send_and_lt<'lt, T: Send + 'lt>() {} 138 | 139 | // Check that the inference `W: Send + 'd` => `IntoStream: Send + 'd` works. 140 | #[cfg(feature = "std")] 141 | fn _all_send_writer<'d, W: std::io::Write + Send + 'd>() { 142 | _send_and_lt::<'d, decode::IntoStream<'d, W>>(); 143 | _send_and_lt::<'d, encode::IntoStream<'d, W>>(); 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/encode_into_async.rs: -------------------------------------------------------------------------------- 1 | use crate::encode::IntoAsync; 2 | use crate::error::LzwStatus; 3 | use crate::error::StreamResult; 4 | use crate::StreamBuf; 5 | use std::io; 6 | 7 | impl<'d, W: futures::io::AsyncWrite + core::marker::Unpin> IntoAsync<'d, W> { 8 | /// Encode data from a reader. 9 | /// 10 | /// This will drain the supplied reader. It will not encode an end marker after all data has 11 | /// been processed. 12 | pub async fn encode(&mut self, read: impl futures::io::AsyncBufRead) -> StreamResult { 13 | self.encode_part(read, false).await 14 | } 15 | 16 | /// Encode data from a reader and an end marker. 17 | pub async fn encode_all(mut self, read: impl futures::io::AsyncBufRead) -> StreamResult { 18 | self.encode_part(read, true).await 19 | } 20 | 21 | /// Set the size of the intermediate decode buffer. 22 | /// 23 | /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is 24 | /// available and any decoding method is called. No buffer is allocated if `set_buffer` has 25 | /// been called. The buffer is reused. 26 | /// 27 | /// # Panics 28 | /// This method panics if `size` is `0`. 29 | pub fn set_buffer_size(&mut self, size: usize) { 30 | assert_ne!(size, 0, "Attempted to set empty buffer"); 31 | self.default_size = size; 32 | } 33 | 34 | /// Use a particular buffer as an intermediate decode buffer. 35 | /// 36 | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used 37 | /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical 38 | /// for efficient decoding. Some optimization techniques require the buffer to hold one or more 39 | /// previous decoded words. There is also additional overhead from `write` calls each time the 40 | /// buffer has been filled. 41 | /// 42 | /// # Panics 43 | /// This method panics if the `buffer` is empty. 44 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { 45 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); 46 | self.buffer = Some(StreamBuf::Borrowed(buffer)); 47 | } 48 | 49 | async fn encode_part( 50 | &mut self, 51 | read: impl futures::io::AsyncBufRead, 52 | finish: bool, 53 | ) -> StreamResult { 54 | use futures::io::AsyncBufReadExt; 55 | use futures::io::AsyncWriteExt; 56 | 57 | let IntoAsync { 58 | encoder, 59 | writer, 60 | buffer, 61 | default_size, 62 | } = self; 63 | 64 | futures::pin_mut!(read); 65 | let mut read: core::pin::Pin<_> = read; 66 | 67 | let mut bytes_read = 0; 68 | let mut bytes_written = 0; 69 | 70 | // Converting to mutable refs to move into the `once` closure. 71 | let read_bytes = &mut bytes_read; 72 | let write_bytes = &mut bytes_written; 73 | 74 | let outbuf: &mut [u8] = 75 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { 76 | StreamBuf::Borrowed(slice) => &mut *slice, 77 | StreamBuf::Owned(vec) => &mut *vec, 78 | }; 79 | assert!(!outbuf.is_empty()); 80 | 81 | let status = loop { 82 | // Try to grab one buffer of input data. 83 | let mut filler = read.as_mut(); 84 | let data = match filler.fill_buf().await { 85 | Ok(buf) => buf, 86 | Err(err) => break Err(err), 87 | }; 88 | 89 | if data.is_empty() { 90 | if finish { 91 | encoder.finish(); 92 | } else { 93 | break Ok(()); 94 | } 95 | } 96 | 97 | // Decode as much of the buffer as fits. 98 | let result = encoder.encode_bytes(data, &mut outbuf[..]); 99 | // Do the bookkeeping and consume the buffer. 100 | *read_bytes += result.consumed_in; 101 | *write_bytes += result.consumed_out; 102 | read.as_mut().consume(result.consumed_in); 103 | 104 | // Handle an error status in the result. 105 | let done = match result.status { 106 | Ok(ok) => ok, 107 | Err(err) => { 108 | break Err(io::Error::new( 109 | io::ErrorKind::InvalidData, 110 | &*format!("{:?}", err), 111 | )); 112 | } 113 | }; 114 | 115 | if let LzwStatus::Done = done { 116 | break writer.write_all(&outbuf[..result.consumed_out]).await; 117 | } 118 | 119 | if let LzwStatus::NoProgress = done { 120 | break Err(io::Error::new( 121 | io::ErrorKind::UnexpectedEof, 122 | "No more data but no end marker detected", 123 | )); 124 | } 125 | 126 | // And finish by writing our result. 127 | // TODO: we may lose data on error (also on status error above) which we might want to 128 | // deterministically handle so that we don't need to restart everything from scratch as 129 | // the only recovery strategy. Any changes welcome. 130 | match writer.write_all(&outbuf[..result.consumed_out]).await { 131 | Ok(_) => {} 132 | Err(err) => break Err(err), 133 | } 134 | }; 135 | 136 | StreamResult { 137 | bytes_read, 138 | bytes_written, 139 | status, 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /bin/lzw.rs: -------------------------------------------------------------------------------- 1 | #![forbid(unsafe_code)] 2 | use std::path::PathBuf; 3 | use std::{env, ffi, fs, io, process}; 4 | 5 | extern crate weezl; 6 | use weezl::{decode as delzw, encode as enlzw, BitOrder}; 7 | 8 | fn main() { 9 | let args = env::args_os().skip(1); 10 | let flags = Flags::from_args(args).unwrap_or_else(|ParamError| explain()); 11 | 12 | let out = io::stdout(); 13 | let out = out.lock(); 14 | 15 | let mut files = flags.files; 16 | let input = files.pop().unwrap_or_else(explain); 17 | if !files.is_empty() { 18 | return explain(); 19 | } 20 | let operation = flags.operation.unwrap_or_else(explain); 21 | let min_code = if flags.min_code < 2 || flags.min_code > 12 { 22 | return explain(); 23 | } else { 24 | flags.min_code 25 | }; 26 | let bit_order = flags.bit_order; 27 | 28 | let result = match (input, operation) { 29 | (Input::File(file), Operation::Encode) => (|| { 30 | let data = fs::File::open(file)?; 31 | let file = io::BufReader::with_capacity(1 << 26, data); 32 | 33 | let mut encoder = enlzw::Encoder::new(bit_order, min_code); 34 | encoder.into_stream(out).encode_all(file).status 35 | })(), 36 | (Input::Stdin, Operation::Encode) => { 37 | let input = io::BufReader::with_capacity(1 << 26, io::stdin()); 38 | let mut encoder = enlzw::Encoder::new(bit_order, min_code); 39 | encoder.into_stream(out).encode_all(input).status 40 | } 41 | (Input::File(file), Operation::Decode) => (|| { 42 | let data = fs::File::open(file)?; 43 | let file = io::BufReader::with_capacity(1 << 26, data); 44 | 45 | let mut decoder = delzw::Decoder::new(bit_order, min_code); 46 | decoder.into_stream(out).decode_all(file).status 47 | })(), 48 | (Input::Stdin, Operation::Decode) => { 49 | let input = io::BufReader::with_capacity(1 << 26, io::stdin()); 50 | let mut decoder = delzw::Decoder::new(bit_order, min_code); 51 | decoder.into_stream(out).decode_all(input).status 52 | } 53 | }; 54 | 55 | result.expect("Operation Failed: "); 56 | } 57 | 58 | struct Flags { 59 | files: Vec, 60 | operation: Option, 61 | min_code: u8, 62 | bit_order: BitOrder, 63 | } 64 | 65 | struct ParamError; 66 | 67 | enum Input { 68 | File(PathBuf), 69 | Stdin, 70 | } 71 | 72 | enum Operation { 73 | Encode, 74 | Decode, 75 | } 76 | 77 | fn explain() -> T { 78 | println!( 79 | "Usage: lzw [-e|-d] \n\ 80 | Arguments:\n\ 81 | -e\t operation encode (default)\n\ 82 | -d\t operation decode\n\ 83 | \tfilepath or '-' for stdin" 84 | ); 85 | process::exit(1); 86 | } 87 | 88 | impl Default for Flags { 89 | fn default() -> Flags { 90 | Flags { 91 | files: vec![], 92 | operation: None, 93 | min_code: 8, 94 | bit_order: BitOrder::Msb, 95 | } 96 | } 97 | } 98 | 99 | impl Flags { 100 | fn from_args(mut args: impl Iterator) -> Result { 101 | let mut flags = Flags::default(); 102 | let mut operation = None; 103 | loop { 104 | match args.next().as_ref().and_then(|s| s.to_str()) { 105 | Some("-d") | Some("--decode") => { 106 | if operation.is_some() { 107 | return Err(ParamError); 108 | } 109 | operation = Some(Operation::Decode); 110 | } 111 | Some("-e") | Some("--encode") => { 112 | if operation.is_some() { 113 | return Err(ParamError); 114 | } 115 | operation = Some(Operation::Encode); 116 | } 117 | Some("-w") | Some("--word-bits") => match args.next() { 118 | None => return Err(ParamError), 119 | Some(bits) => { 120 | let st = bits.to_str().ok_or(ParamError)?; 121 | flags.min_code = st.parse().ok().ok_or(ParamError)?; 122 | } 123 | }, 124 | Some("-le") | Some("--little-endian") => { 125 | flags.bit_order = BitOrder::Lsb; 126 | } 127 | Some("-be") | Some("--big-endian") | Some("-ne") | Some("--network-endian") => { 128 | flags.bit_order = BitOrder::Msb; 129 | } 130 | Some("-") => { 131 | flags.files.push(Input::Stdin); 132 | } 133 | Some(other) if other.starts_with('-') => { 134 | // Reserved for future use. 135 | // -a: self-describing archive format, similar to actual compress 136 | // -b: maximum bits 137 | // -v: verbosity 138 | // some compress compatibility mode? Probably through arg(0) though. 139 | return Err(ParamError); 140 | } 141 | Some(file) => { 142 | flags.files.push(Input::File(file.into())); 143 | } 144 | None => break, 145 | }; 146 | } 147 | 148 | flags.files.extend(args.map(|file| { 149 | if let Some("-") = file.to_str() { 150 | Input::Stdin 151 | } else { 152 | Input::File(file.into()) 153 | } 154 | })); 155 | 156 | flags.operation = operation; 157 | Ok(flags) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/decode_into_async.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::IntoAsync; 2 | use crate::error::LzwStatus; 3 | use crate::error::StreamResult; 4 | use crate::StreamBuf; 5 | use std::io; 6 | 7 | impl<'d, W: futures::io::AsyncWrite + core::marker::Unpin> IntoAsync<'d, W> { 8 | /// Decode data from a reader. 9 | /// 10 | /// This will read data until the stream is empty or an end marker is reached. 11 | pub async fn decode(&mut self, read: impl futures::io::AsyncBufRead) -> StreamResult { 12 | self.decode_part(read, false).await 13 | } 14 | 15 | /// Decode data from a reader, requiring an end marker. 16 | pub async fn decode_all(mut self, read: impl futures::io::AsyncBufRead) -> StreamResult { 17 | self.decode_part(read, true).await 18 | } 19 | 20 | /// Set the size of the intermediate decode buffer. 21 | /// 22 | /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is 23 | /// available and any decoding method is called. No buffer is allocated if `set_buffer` has 24 | /// been called. The buffer is reused. 25 | /// 26 | /// # Panics 27 | /// This method panics if `size` is `0`. 28 | pub fn set_buffer_size(&mut self, size: usize) { 29 | assert_ne!(size, 0, "Attempted to set empty buffer"); 30 | self.default_size = size; 31 | } 32 | 33 | /// Use a particular buffer as an intermediate decode buffer. 34 | /// 35 | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used 36 | /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical 37 | /// for efficient decoding. Some optimization techniques require the buffer to hold one or more 38 | /// previous decoded words. There is also additional overhead from `write` calls each time the 39 | /// buffer has been filled. 40 | /// 41 | /// # Panics 42 | /// This method panics if the `buffer` is empty. 43 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { 44 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); 45 | self.buffer = Some(StreamBuf::Borrowed(buffer)); 46 | } 47 | 48 | async fn decode_part( 49 | &mut self, 50 | read: impl futures::io::AsyncBufRead, 51 | must_finish: bool, 52 | ) -> StreamResult { 53 | use futures::io::AsyncBufReadExt; 54 | use futures::io::AsyncWriteExt; 55 | 56 | let IntoAsync { 57 | decoder, 58 | writer, 59 | buffer, 60 | default_size, 61 | } = self; 62 | 63 | futures::pin_mut!(read); 64 | let mut read: core::pin::Pin<_> = read; 65 | 66 | let mut bytes_read = 0; 67 | let mut bytes_written = 0; 68 | 69 | // Converting to mutable refs to move into the `once` closure. 70 | let read_bytes = &mut bytes_read; 71 | let write_bytes = &mut bytes_written; 72 | 73 | let outbuf: &mut [u8] = 74 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { 75 | StreamBuf::Borrowed(slice) => &mut *slice, 76 | StreamBuf::Owned(vec) => &mut *vec, 77 | }; 78 | assert!(!outbuf.is_empty()); 79 | 80 | let status = loop { 81 | // Try to grab one buffer of input data. 82 | let mut filler = read.as_mut(); 83 | let data = match filler.fill_buf().await { 84 | Ok(buf) => buf, 85 | Err(err) => break Err(err), 86 | }; 87 | 88 | // Decode as much of the buffer as fits. 89 | let result = decoder.decode_bytes(data, &mut outbuf[..]); 90 | // Do the bookkeeping and consume the buffer. 91 | *read_bytes += result.consumed_in; 92 | *write_bytes += result.consumed_out; 93 | read.as_mut().consume(result.consumed_in); 94 | 95 | // Handle an error status in the result. 96 | let status = match result.status { 97 | Ok(ok) => ok, 98 | Err(err) => { 99 | break Err(io::Error::new( 100 | io::ErrorKind::InvalidData, 101 | &*format!("{:?}", err), 102 | )); 103 | } 104 | }; 105 | 106 | // Check if we had any new data at all. 107 | if let LzwStatus::NoProgress = status { 108 | debug_assert_eq!( 109 | result.consumed_out, 0, 110 | "No progress means we have not decoded any data" 111 | ); 112 | // In particular we did not finish decoding. 113 | if must_finish { 114 | break Err(io::Error::new( 115 | io::ErrorKind::UnexpectedEof, 116 | "No more data but no end marker detected", 117 | )); 118 | } else { 119 | break Ok(()); 120 | } 121 | } 122 | 123 | // And finish by writing our result. 124 | // TODO: we may lose data on error (also on status error above) which we might want to 125 | // deterministically handle so that we don't need to restart everything from scratch as 126 | // the only recovery strategy. Any changes welcome. 127 | match writer.write_all(&outbuf[..result.consumed_out]).await { 128 | Ok(_) => {} 129 | Err(err) => break Err(err), 130 | } 131 | 132 | if let LzwStatus::Done = status { 133 | break Ok(()); 134 | } 135 | }; 136 | 137 | StreamResult { 138 | bytes_read, 139 | bytes_written, 140 | status, 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "atty" 5 | version = "0.2.14" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | dependencies = [ 8 | "hermit-abi 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)", 9 | "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)", 10 | "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", 11 | ] 12 | 13 | [[package]] 14 | name = "autocfg" 15 | version = "1.1.0" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | 18 | [[package]] 19 | name = "bitflags" 20 | version = "1.3.2" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | 23 | [[package]] 24 | name = "bstr" 25 | version = "0.2.17" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | dependencies = [ 28 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 29 | "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 30 | "regex-automata 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", 31 | "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 32 | ] 33 | 34 | [[package]] 35 | name = "bumpalo" 36 | version = "3.10.0" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | 39 | [[package]] 40 | name = "bytes" 41 | version = "1.1.0" 42 | source = "registry+https://github.com/rust-lang/crates.io-index" 43 | 44 | [[package]] 45 | name = "cast" 46 | version = "0.2.7" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | dependencies = [ 49 | "rustc_version 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 50 | ] 51 | 52 | [[package]] 53 | name = "cast" 54 | version = "0.3.0" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | 57 | [[package]] 58 | name = "cfg-if" 59 | version = "1.0.0" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | 62 | [[package]] 63 | name = "clap" 64 | version = "2.34.0" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | dependencies = [ 67 | "bitflags 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 68 | "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", 69 | "unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", 70 | ] 71 | 72 | [[package]] 73 | name = "criterion" 74 | version = "0.3.6" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | dependencies = [ 77 | "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", 78 | "cast 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 79 | "clap 2.34.0 (registry+https://github.com/rust-lang/crates.io-index)", 80 | "criterion-plot 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", 81 | "csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)", 82 | "itertools 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)", 83 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 84 | "num-traits 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)", 85 | "oorandom 11.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 86 | "plotters 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 87 | "rayon 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", 88 | "regex 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 89 | "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 90 | "serde_cbor 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)", 91 | "serde_derive 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 92 | "serde_json 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)", 93 | "tinytemplate 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 94 | "walkdir 2.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 95 | ] 96 | 97 | [[package]] 98 | name = "criterion-plot" 99 | version = "0.4.4" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | dependencies = [ 102 | "cast 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", 103 | "itertools 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)", 104 | ] 105 | 106 | [[package]] 107 | name = "crossbeam-channel" 108 | version = "0.5.5" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | dependencies = [ 111 | "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 112 | "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", 113 | ] 114 | 115 | [[package]] 116 | name = "crossbeam-deque" 117 | version = "0.8.1" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | dependencies = [ 120 | "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 121 | "crossbeam-epoch 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)", 122 | "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", 123 | ] 124 | 125 | [[package]] 126 | name = "crossbeam-epoch" 127 | version = "0.9.9" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | dependencies = [ 130 | "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 131 | "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 132 | "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", 133 | "memoffset 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", 134 | "once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", 135 | "scopeguard 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 136 | ] 137 | 138 | [[package]] 139 | name = "crossbeam-utils" 140 | version = "0.8.10" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | dependencies = [ 143 | "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 144 | "once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", 145 | ] 146 | 147 | [[package]] 148 | name = "csv" 149 | version = "1.1.6" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | dependencies = [ 152 | "bstr 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", 153 | "csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", 154 | "itoa 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", 155 | "ryu 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)", 156 | "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 157 | ] 158 | 159 | [[package]] 160 | name = "csv-core" 161 | version = "0.1.10" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | dependencies = [ 164 | "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 165 | ] 166 | 167 | [[package]] 168 | name = "either" 169 | version = "1.7.0" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | 172 | [[package]] 173 | name = "futures" 174 | version = "0.3.21" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | dependencies = [ 177 | "futures-channel 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 178 | "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 179 | "futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 180 | "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 181 | "futures-task 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 182 | "futures-util 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 183 | ] 184 | 185 | [[package]] 186 | name = "futures-channel" 187 | version = "0.3.21" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | dependencies = [ 190 | "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 191 | "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 192 | ] 193 | 194 | [[package]] 195 | name = "futures-core" 196 | version = "0.3.21" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | 199 | [[package]] 200 | name = "futures-io" 201 | version = "0.3.21" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | 204 | [[package]] 205 | name = "futures-sink" 206 | version = "0.3.21" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | 209 | [[package]] 210 | name = "futures-task" 211 | version = "0.3.21" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | 214 | [[package]] 215 | name = "futures-util" 216 | version = "0.3.21" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | dependencies = [ 219 | "futures-channel 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 220 | "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 221 | "futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 222 | "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 223 | "futures-task 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 224 | "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 225 | "pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", 226 | "pin-utils 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 227 | "slab 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", 228 | ] 229 | 230 | [[package]] 231 | name = "half" 232 | version = "1.8.2" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | 235 | [[package]] 236 | name = "hermit-abi" 237 | version = "0.1.19" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | dependencies = [ 240 | "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)", 241 | ] 242 | 243 | [[package]] 244 | name = "itertools" 245 | version = "0.10.3" 246 | source = "registry+https://github.com/rust-lang/crates.io-index" 247 | dependencies = [ 248 | "either 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 249 | ] 250 | 251 | [[package]] 252 | name = "itoa" 253 | version = "0.4.8" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | 256 | [[package]] 257 | name = "itoa" 258 | version = "1.0.2" 259 | source = "registry+https://github.com/rust-lang/crates.io-index" 260 | 261 | [[package]] 262 | name = "js-sys" 263 | version = "0.3.58" 264 | source = "registry+https://github.com/rust-lang/crates.io-index" 265 | dependencies = [ 266 | "wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 267 | ] 268 | 269 | [[package]] 270 | name = "lazy_static" 271 | version = "1.4.0" 272 | source = "registry+https://github.com/rust-lang/crates.io-index" 273 | 274 | [[package]] 275 | name = "libc" 276 | version = "0.2.126" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | 279 | [[package]] 280 | name = "log" 281 | version = "0.4.17" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | dependencies = [ 284 | "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 285 | ] 286 | 287 | [[package]] 288 | name = "memchr" 289 | version = "2.5.0" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | 292 | [[package]] 293 | name = "memoffset" 294 | version = "0.6.5" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | dependencies = [ 297 | "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 298 | ] 299 | 300 | [[package]] 301 | name = "mio" 302 | version = "0.8.4" 303 | source = "registry+https://github.com/rust-lang/crates.io-index" 304 | dependencies = [ 305 | "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)", 306 | "log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)", 307 | "wasi 0.11.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)", 308 | "windows-sys 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)", 309 | ] 310 | 311 | [[package]] 312 | name = "num-traits" 313 | version = "0.2.15" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | dependencies = [ 316 | "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 317 | ] 318 | 319 | [[package]] 320 | name = "num_cpus" 321 | version = "1.13.1" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | dependencies = [ 324 | "hermit-abi 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)", 325 | "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)", 326 | ] 327 | 328 | [[package]] 329 | name = "once_cell" 330 | version = "1.13.0" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | 333 | [[package]] 334 | name = "oorandom" 335 | version = "11.1.3" 336 | source = "registry+https://github.com/rust-lang/crates.io-index" 337 | 338 | [[package]] 339 | name = "pin-project-lite" 340 | version = "0.2.9" 341 | source = "registry+https://github.com/rust-lang/crates.io-index" 342 | 343 | [[package]] 344 | name = "pin-utils" 345 | version = "0.1.0" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | 348 | [[package]] 349 | name = "plotters" 350 | version = "0.3.2" 351 | source = "registry+https://github.com/rust-lang/crates.io-index" 352 | dependencies = [ 353 | "num-traits 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)", 354 | "plotters-backend 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 355 | "plotters-svg 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 356 | "wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 357 | "web-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)", 358 | ] 359 | 360 | [[package]] 361 | name = "plotters-backend" 362 | version = "0.3.4" 363 | source = "registry+https://github.com/rust-lang/crates.io-index" 364 | 365 | [[package]] 366 | name = "plotters-svg" 367 | version = "0.3.2" 368 | source = "registry+https://github.com/rust-lang/crates.io-index" 369 | dependencies = [ 370 | "plotters-backend 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 371 | ] 372 | 373 | [[package]] 374 | name = "proc-macro2" 375 | version = "1.0.40" 376 | source = "registry+https://github.com/rust-lang/crates.io-index" 377 | dependencies = [ 378 | "unicode-ident 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 379 | ] 380 | 381 | [[package]] 382 | name = "quote" 383 | version = "1.0.20" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | dependencies = [ 386 | "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", 387 | ] 388 | 389 | [[package]] 390 | name = "rayon" 391 | version = "1.5.3" 392 | source = "registry+https://github.com/rust-lang/crates.io-index" 393 | dependencies = [ 394 | "autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 395 | "crossbeam-deque 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", 396 | "either 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 397 | "rayon-core 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)", 398 | ] 399 | 400 | [[package]] 401 | name = "rayon-core" 402 | version = "1.9.3" 403 | source = "registry+https://github.com/rust-lang/crates.io-index" 404 | dependencies = [ 405 | "crossbeam-channel 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", 406 | "crossbeam-deque 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", 407 | "crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", 408 | "num_cpus 1.13.1 (registry+https://github.com/rust-lang/crates.io-index)", 409 | ] 410 | 411 | [[package]] 412 | name = "regex" 413 | version = "1.6.0" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | dependencies = [ 416 | "regex-syntax 0.6.27 (registry+https://github.com/rust-lang/crates.io-index)", 417 | ] 418 | 419 | [[package]] 420 | name = "regex-automata" 421 | version = "0.1.10" 422 | source = "registry+https://github.com/rust-lang/crates.io-index" 423 | 424 | [[package]] 425 | name = "regex-syntax" 426 | version = "0.6.27" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | 429 | [[package]] 430 | name = "rustc_version" 431 | version = "0.4.0" 432 | source = "registry+https://github.com/rust-lang/crates.io-index" 433 | dependencies = [ 434 | "semver 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)", 435 | ] 436 | 437 | [[package]] 438 | name = "ryu" 439 | version = "1.0.10" 440 | source = "registry+https://github.com/rust-lang/crates.io-index" 441 | 442 | [[package]] 443 | name = "same-file" 444 | version = "1.0.6" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | dependencies = [ 447 | "winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", 448 | ] 449 | 450 | [[package]] 451 | name = "scopeguard" 452 | version = "1.1.0" 453 | source = "registry+https://github.com/rust-lang/crates.io-index" 454 | 455 | [[package]] 456 | name = "semver" 457 | version = "1.0.12" 458 | source = "registry+https://github.com/rust-lang/crates.io-index" 459 | 460 | [[package]] 461 | name = "serde" 462 | version = "1.0.138" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | 465 | [[package]] 466 | name = "serde_cbor" 467 | version = "0.11.2" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | dependencies = [ 470 | "half 1.8.2 (registry+https://github.com/rust-lang/crates.io-index)", 471 | "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 472 | ] 473 | 474 | [[package]] 475 | name = "serde_derive" 476 | version = "1.0.138" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | dependencies = [ 479 | "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", 480 | "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", 481 | "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", 482 | ] 483 | 484 | [[package]] 485 | name = "serde_json" 486 | version = "1.0.82" 487 | source = "registry+https://github.com/rust-lang/crates.io-index" 488 | dependencies = [ 489 | "itoa 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", 490 | "ryu 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)", 491 | "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 492 | ] 493 | 494 | [[package]] 495 | name = "slab" 496 | version = "0.4.6" 497 | source = "registry+https://github.com/rust-lang/crates.io-index" 498 | 499 | [[package]] 500 | name = "socket2" 501 | version = "0.4.4" 502 | source = "registry+https://github.com/rust-lang/crates.io-index" 503 | dependencies = [ 504 | "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)", 505 | "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", 506 | ] 507 | 508 | [[package]] 509 | name = "syn" 510 | version = "1.0.98" 511 | source = "registry+https://github.com/rust-lang/crates.io-index" 512 | dependencies = [ 513 | "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", 514 | "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", 515 | "unicode-ident 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 516 | ] 517 | 518 | [[package]] 519 | name = "textwrap" 520 | version = "0.11.0" 521 | source = "registry+https://github.com/rust-lang/crates.io-index" 522 | dependencies = [ 523 | "unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", 524 | ] 525 | 526 | [[package]] 527 | name = "tinytemplate" 528 | version = "1.2.1" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | dependencies = [ 531 | "serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)", 532 | "serde_json 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)", 533 | ] 534 | 535 | [[package]] 536 | name = "tokio" 537 | version = "1.19.2" 538 | source = "registry+https://github.com/rust-lang/crates.io-index" 539 | dependencies = [ 540 | "bytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 541 | "libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)", 542 | "memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 543 | "mio 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)", 544 | "num_cpus 1.13.1 (registry+https://github.com/rust-lang/crates.io-index)", 545 | "once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", 546 | "pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", 547 | "socket2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", 548 | "tokio-macros 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", 549 | "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", 550 | ] 551 | 552 | [[package]] 553 | name = "tokio-macros" 554 | version = "1.8.0" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | dependencies = [ 557 | "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", 558 | "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", 559 | "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", 560 | ] 561 | 562 | [[package]] 563 | name = "tokio-util" 564 | version = "0.6.10" 565 | source = "registry+https://github.com/rust-lang/crates.io-index" 566 | dependencies = [ 567 | "bytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 568 | "futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 569 | "futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 570 | "futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 571 | "log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)", 572 | "pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", 573 | "tokio 1.19.2 (registry+https://github.com/rust-lang/crates.io-index)", 574 | ] 575 | 576 | [[package]] 577 | name = "unicode-ident" 578 | version = "1.0.1" 579 | source = "registry+https://github.com/rust-lang/crates.io-index" 580 | 581 | [[package]] 582 | name = "unicode-width" 583 | version = "0.1.9" 584 | source = "registry+https://github.com/rust-lang/crates.io-index" 585 | 586 | [[package]] 587 | name = "walkdir" 588 | version = "2.3.2" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | dependencies = [ 591 | "same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", 592 | "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", 593 | "winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", 594 | ] 595 | 596 | [[package]] 597 | name = "wasi" 598 | version = "0.11.0+wasi-snapshot-preview1" 599 | source = "registry+https://github.com/rust-lang/crates.io-index" 600 | 601 | [[package]] 602 | name = "wasm-bindgen" 603 | version = "0.2.81" 604 | source = "registry+https://github.com/rust-lang/crates.io-index" 605 | dependencies = [ 606 | "cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 607 | "wasm-bindgen-macro 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 608 | ] 609 | 610 | [[package]] 611 | name = "wasm-bindgen-backend" 612 | version = "0.2.81" 613 | source = "registry+https://github.com/rust-lang/crates.io-index" 614 | dependencies = [ 615 | "bumpalo 3.10.0 (registry+https://github.com/rust-lang/crates.io-index)", 616 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 617 | "log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)", 618 | "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", 619 | "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", 620 | "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", 621 | "wasm-bindgen-shared 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 622 | ] 623 | 624 | [[package]] 625 | name = "wasm-bindgen-macro" 626 | version = "0.2.81" 627 | source = "registry+https://github.com/rust-lang/crates.io-index" 628 | dependencies = [ 629 | "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", 630 | "wasm-bindgen-macro-support 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 631 | ] 632 | 633 | [[package]] 634 | name = "wasm-bindgen-macro-support" 635 | version = "0.2.81" 636 | source = "registry+https://github.com/rust-lang/crates.io-index" 637 | dependencies = [ 638 | "proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", 639 | "quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", 640 | "syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", 641 | "wasm-bindgen-backend 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 642 | "wasm-bindgen-shared 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 643 | ] 644 | 645 | [[package]] 646 | name = "wasm-bindgen-shared" 647 | version = "0.2.81" 648 | source = "registry+https://github.com/rust-lang/crates.io-index" 649 | 650 | [[package]] 651 | name = "web-sys" 652 | version = "0.3.58" 653 | source = "registry+https://github.com/rust-lang/crates.io-index" 654 | dependencies = [ 655 | "js-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)", 656 | "wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)", 657 | ] 658 | 659 | [[package]] 660 | name = "weezl" 661 | version = "0.1.10" 662 | dependencies = [ 663 | "criterion 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", 664 | "futures 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)", 665 | "tokio 1.19.2 (registry+https://github.com/rust-lang/crates.io-index)", 666 | "tokio-util 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", 667 | ] 668 | 669 | [[package]] 670 | name = "winapi" 671 | version = "0.3.9" 672 | source = "registry+https://github.com/rust-lang/crates.io-index" 673 | dependencies = [ 674 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 675 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 676 | ] 677 | 678 | [[package]] 679 | name = "winapi-i686-pc-windows-gnu" 680 | version = "0.4.0" 681 | source = "registry+https://github.com/rust-lang/crates.io-index" 682 | 683 | [[package]] 684 | name = "winapi-util" 685 | version = "0.1.5" 686 | source = "registry+https://github.com/rust-lang/crates.io-index" 687 | dependencies = [ 688 | "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", 689 | ] 690 | 691 | [[package]] 692 | name = "winapi-x86_64-pc-windows-gnu" 693 | version = "0.4.0" 694 | source = "registry+https://github.com/rust-lang/crates.io-index" 695 | 696 | [[package]] 697 | name = "windows-sys" 698 | version = "0.36.1" 699 | source = "registry+https://github.com/rust-lang/crates.io-index" 700 | dependencies = [ 701 | "windows_aarch64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)", 702 | "windows_i686_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)", 703 | "windows_i686_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)", 704 | "windows_x86_64_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)", 705 | "windows_x86_64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)", 706 | ] 707 | 708 | [[package]] 709 | name = "windows_aarch64_msvc" 710 | version = "0.36.1" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | 713 | [[package]] 714 | name = "windows_i686_gnu" 715 | version = "0.36.1" 716 | source = "registry+https://github.com/rust-lang/crates.io-index" 717 | 718 | [[package]] 719 | name = "windows_i686_msvc" 720 | version = "0.36.1" 721 | source = "registry+https://github.com/rust-lang/crates.io-index" 722 | 723 | [[package]] 724 | name = "windows_x86_64_gnu" 725 | version = "0.36.1" 726 | source = "registry+https://github.com/rust-lang/crates.io-index" 727 | 728 | [[package]] 729 | name = "windows_x86_64_msvc" 730 | version = "0.36.1" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | 733 | [metadata] 734 | "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 735 | "checksum autocfg 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 736 | "checksum bitflags 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 737 | "checksum bstr 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" 738 | "checksum bumpalo 3.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" 739 | "checksum bytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" 740 | "checksum cast 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" 741 | "checksum cast 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 742 | "checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 743 | "checksum clap 2.34.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" 744 | "checksum criterion 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" 745 | "checksum criterion-plot 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" 746 | "checksum crossbeam-channel 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" 747 | "checksum crossbeam-deque 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" 748 | "checksum crossbeam-epoch 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)" = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" 749 | "checksum crossbeam-utils 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" 750 | "checksum csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" 751 | "checksum csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" 752 | "checksum either 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" 753 | "checksum futures 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" 754 | "checksum futures-channel 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" 755 | "checksum futures-core 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" 756 | "checksum futures-io 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" 757 | "checksum futures-sink 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" 758 | "checksum futures-task 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" 759 | "checksum futures-util 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" 760 | "checksum half 1.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" 761 | "checksum hermit-abi 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)" = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 762 | "checksum itertools 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" 763 | "checksum itoa 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" 764 | "checksum itoa 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" 765 | "checksum js-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)" = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" 766 | "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 767 | "checksum libc 0.2.126 (registry+https://github.com/rust-lang/crates.io-index)" = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 768 | "checksum log 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)" = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 769 | "checksum memchr 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 770 | "checksum memoffset 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" 771 | "checksum mio 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" 772 | "checksum num-traits 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)" = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 773 | "checksum num_cpus 1.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" 774 | "checksum once_cell 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" 775 | "checksum oorandom 11.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" 776 | "checksum pin-project-lite 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 777 | "checksum pin-utils 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 778 | "checksum plotters 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9428003b84df1496fb9d6eeee9c5f8145cb41ca375eb0dad204328888832811f" 779 | "checksum plotters-backend 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" 780 | "checksum plotters-svg 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e0918736323d1baff32ee0eade54984f6f201ad7e97d5cfb5d6ab4a358529615" 781 | "checksum proc-macro2 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" 782 | "checksum quote 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" 783 | "checksum rayon 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" 784 | "checksum rayon-core 1.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" 785 | "checksum regex 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" 786 | "checksum regex-automata 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" 787 | "checksum regex-syntax 0.6.27 (registry+https://github.com/rust-lang/crates.io-index)" = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" 788 | "checksum rustc_version 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 789 | "checksum ryu 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)" = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" 790 | "checksum same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 791 | "checksum scopeguard 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 792 | "checksum semver 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)" = "a2333e6df6d6598f2b1974829f853c2b4c5f4a6e503c10af918081aa6f8564e1" 793 | "checksum serde 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)" = "1578c6245786b9d168c5447eeacfb96856573ca56c9d68fdcf394be134882a47" 794 | "checksum serde_cbor 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" 795 | "checksum serde_derive 1.0.138 (registry+https://github.com/rust-lang/crates.io-index)" = "023e9b1467aef8a10fb88f25611870ada9800ef7e22afce356bb0d2387b6f27c" 796 | "checksum serde_json 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)" = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" 797 | "checksum slab 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" 798 | "checksum socket2 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" 799 | "checksum syn 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)" = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" 800 | "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 801 | "checksum tinytemplate 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 802 | "checksum tokio 1.19.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a52ed6686dd62c320f9b89299e9dfb46f730c7a48e635c19f21d116cb1439" 803 | "checksum tokio-macros 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" 804 | "checksum tokio-util 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507" 805 | "checksum unicode-ident 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" 806 | "checksum unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" 807 | "checksum walkdir 2.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" 808 | "checksum wasi 0.11.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 809 | "checksum wasm-bindgen 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" 810 | "checksum wasm-bindgen-backend 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" 811 | "checksum wasm-bindgen-macro 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" 812 | "checksum wasm-bindgen-macro-support 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" 813 | "checksum wasm-bindgen-shared 0.2.81 (registry+https://github.com/rust-lang/crates.io-index)" = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" 814 | "checksum web-sys 0.3.58 (registry+https://github.com/rust-lang/crates.io-index)" = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" 815 | "checksum winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 816 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 817 | "checksum winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 818 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 819 | "checksum windows-sys 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 820 | "checksum windows_aarch64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 821 | "checksum windows_i686_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 822 | "checksum windows_i686_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 823 | "checksum windows_x86_64_gnu 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 824 | "checksum windows_x86_64_msvc 0.36.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 825 | -------------------------------------------------------------------------------- /src/encode.rs: -------------------------------------------------------------------------------- 1 | //! A module for all encoding needs. 2 | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult}; 3 | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE}; 4 | 5 | use crate::alloc::{boxed::Box, vec::Vec}; 6 | #[cfg(feature = "std")] 7 | use crate::error::StreamResult; 8 | #[cfg(feature = "std")] 9 | use std::io::{self, BufRead, Write}; 10 | 11 | /// The state for encoding data with an LZW algorithm. 12 | /// 13 | /// The same structure can be utilized with streams as well as your own buffers and driver logic. 14 | /// It may even be possible to mix them if you are sufficiently careful not to lose any written 15 | /// data in the process. 16 | /// 17 | /// This is a sans-IO implementation, meaning that it only contains the state of the encoder and 18 | /// the caller will provide buffers for input and output data when calling the basic 19 | /// [`encode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*` 20 | /// methods for enoding with a particular style of common IO. 21 | /// 22 | /// * [`encode`] for encoding once without any IO-loop. 23 | /// * [`into_async`] for encoding with the `futures` traits for asynchronous IO. 24 | /// * [`into_stream`] for encoding with the standard `io` traits. 25 | /// * [`into_vec`] for in-memory encoding. 26 | /// 27 | /// [`encode_bytes`]: #method.encode_bytes 28 | /// [`encode`]: #method.encode 29 | /// [`into_async`]: #method.into_async 30 | /// [`into_stream`]: #method.into_stream 31 | /// [`into_vec`]: #method.into_vec 32 | pub struct Encoder { 33 | /// Internally dispatch via a dynamic trait object. This did not have any significant 34 | /// performance impact as we batch data internally and this pointer does not change after 35 | /// creation! 36 | state: Box, 37 | } 38 | 39 | /// A encoding stream sink. 40 | /// 41 | /// See [`Encoder::into_stream`] on how to create this type. 42 | /// 43 | /// [`Encoder::into_stream`]: struct.Encoder.html#method.into_stream 44 | #[cfg_attr( 45 | not(feature = "std"), 46 | deprecated = "This type is only useful with the `std` feature." 47 | )] 48 | #[cfg_attr(not(feature = "std"), allow(dead_code))] 49 | pub struct IntoStream<'d, W> { 50 | encoder: &'d mut Encoder, 51 | writer: W, 52 | buffer: Option>, 53 | default_size: usize, 54 | } 55 | 56 | /// An async decoding sink. 57 | /// 58 | /// See [`Encoder::into_async`] on how to create this type. 59 | /// 60 | /// [`Encoder::into_async`]: struct.Encoder.html#method.into_async 61 | #[cfg(feature = "async")] 62 | pub struct IntoAsync<'d, W> { 63 | encoder: &'d mut Encoder, 64 | writer: W, 65 | buffer: Option>, 66 | default_size: usize, 67 | } 68 | 69 | /// A encoding sink into a vector. 70 | /// 71 | /// See [`Encoder::into_vec`] on how to create this type. 72 | /// 73 | /// [`Encoder::into_vec`]: struct.Encoder.html#method.into_vec 74 | pub struct IntoVec<'d> { 75 | encoder: &'d mut Encoder, 76 | vector: &'d mut Vec, 77 | } 78 | 79 | trait Stateful { 80 | fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult; 81 | fn mark_ended(&mut self) -> bool; 82 | /// Reset the state tracking if end code has been written. 83 | fn restart(&mut self); 84 | /// Reset the encoder to the beginning, dropping all buffers etc. 85 | fn reset(&mut self); 86 | } 87 | 88 | struct EncodeState { 89 | /// The configured minimal code size. 90 | min_size: u8, 91 | /// The current encoding symbol tree. 92 | tree: Tree, 93 | /// If we have pushed the end code. 94 | has_ended: bool, 95 | /// If tiff then bumps are a single code sooner. 96 | is_tiff: bool, 97 | /// The code corresponding to the currently read characters. 98 | current_code: Code, 99 | /// The clear code for resetting the dictionary. 100 | clear_code: Code, 101 | /// The bit buffer for encoding. 102 | buffer: B, 103 | } 104 | 105 | struct MsbBuffer { 106 | /// The current code length. 107 | code_size: u8, 108 | /// The buffer bits. 109 | buffer: u64, 110 | /// The number of valid buffer bits. 111 | bits_in_buffer: u8, 112 | } 113 | 114 | struct LsbBuffer { 115 | /// The current code length. 116 | code_size: u8, 117 | /// The buffer bits. 118 | buffer: u64, 119 | /// The number of valid buffer bits. 120 | bits_in_buffer: u8, 121 | } 122 | 123 | trait Buffer { 124 | fn new(size: u8) -> Self; 125 | /// Reset the code size in the buffer. 126 | fn reset(&mut self, min_size: u8); 127 | /// Apply effects of a Clear Code. 128 | fn clear(&mut self, min_size: u8); 129 | /// Insert a code into the buffer. 130 | fn buffer_code(&mut self, code: Code); 131 | /// Push bytes if the buffer space is getting small. 132 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool; 133 | /// Flush all full bytes, returning if at least one more byte remains. 134 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool; 135 | /// Pad the buffer to a full byte. 136 | fn buffer_pad(&mut self); 137 | /// Increase the maximum code size. 138 | fn bump_code_size(&mut self); 139 | /// Return the maximum code with the current code size. 140 | fn max_code(&self) -> Code; 141 | /// Return the current code size in bits. 142 | fn code_size(&self) -> u8; 143 | } 144 | 145 | /// One tree node for at most each code. 146 | /// To avoid using too much memory we keep nodes with few successors in optimized form. This form 147 | /// doesn't offer lookup by indexing but instead does a linear search. 148 | #[derive(Default)] 149 | struct Tree { 150 | simples: Vec, 151 | complex: Vec, 152 | keys: Vec, 153 | } 154 | 155 | #[derive(Clone, Copy)] 156 | enum FullKey { 157 | NoSuccessor, 158 | Simple(u16), 159 | Full(u16), 160 | } 161 | 162 | #[derive(Clone, Copy)] 163 | struct CompressedKey(u16); 164 | 165 | const SHORT: usize = 16; 166 | 167 | #[derive(Clone, Copy)] 168 | struct Simple { 169 | codes: [Code; SHORT], 170 | chars: [u8; SHORT], 171 | count: u8, 172 | } 173 | 174 | #[derive(Clone, Copy)] 175 | struct Full { 176 | char_continuation: [Code; 256], 177 | } 178 | 179 | /// Describes the static parameters for creating a decoder. 180 | #[derive(Clone, Debug)] 181 | pub struct Configuration { 182 | order: BitOrder, 183 | size: u8, 184 | tiff: bool, 185 | } 186 | 187 | impl Configuration { 188 | /// Create a configuration to decode with the specified bit order and symbol size. 189 | /// 190 | /// # Panics 191 | /// 192 | /// The `size` needs to be in the interval `2..=12`. 193 | pub fn new(order: BitOrder, size: u8) -> Self { 194 | super::assert_encode_size(size); 195 | Configuration { 196 | order, 197 | size, 198 | tiff: false, 199 | } 200 | } 201 | 202 | /// Create a configuration for a TIFF compatible decoder. 203 | /// 204 | /// # Panics 205 | /// 206 | /// The `size` needs to be in the interval `2..=12`. 207 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { 208 | super::assert_encode_size(size); 209 | Configuration { 210 | order, 211 | size, 212 | tiff: true, 213 | } 214 | } 215 | 216 | /// Create a new decoder with the define configuration. 217 | pub fn build(self) -> Encoder { 218 | Encoder { 219 | state: Encoder::from_configuration(&self), 220 | } 221 | } 222 | } 223 | 224 | impl Encoder { 225 | /// Create a new encoder with the specified bit order and symbol size. 226 | /// 227 | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the 228 | /// original specification. In particular you will need to specify an `Lsb` bit oder to encode 229 | /// the data portion of a compressed `gif` image. 230 | /// 231 | /// # Panics 232 | /// 233 | /// The `size` needs to be in the interval `2..=12`. 234 | pub fn new(order: BitOrder, size: u8) -> Self { 235 | Configuration::new(order, size).build() 236 | } 237 | 238 | /// Create a TIFF compatible encoder with the specified bit order and symbol size. 239 | /// 240 | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the 241 | /// TIFF specification, which is a misinterpretation of the original algorithm for increasing 242 | /// the code size. It switches one symbol sooner. 243 | /// 244 | /// # Panics 245 | /// 246 | /// The `size` needs to be in the interval `2..=12`. 247 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { 248 | Configuration::with_tiff_size_switch(order, size).build() 249 | } 250 | 251 | fn from_configuration(cfg: &Configuration) -> Box { 252 | match cfg.order { 253 | BitOrder::Lsb => { 254 | let mut state = EncodeState::::new(cfg.size); 255 | state.is_tiff = cfg.tiff; 256 | Box::new(state) 257 | } 258 | BitOrder::Msb => { 259 | let mut state = EncodeState::::new(cfg.size); 260 | state.is_tiff = cfg.tiff; 261 | Box::new(state) 262 | } 263 | } 264 | } 265 | 266 | /// Encode some bytes from `inp` into `out`. 267 | /// 268 | /// See [`into_stream`] for high-level functions (this interface is only available with the 269 | /// `std` feature) and [`finish`] for marking the input data as complete. 270 | /// 271 | /// When some input byte is invalid, i.e. is not smaller than `1 << size`, then that byte and 272 | /// all following ones will _not_ be consumed and the `status` of the result will signal an 273 | /// error. The result will also indicate that all bytes up to but not including the offending 274 | /// byte have been consumed. You may try again with a fixed byte. 275 | /// 276 | /// [`into_stream`]: #method.into_stream 277 | /// [`finish`]: #method.finish 278 | pub fn encode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult { 279 | self.state.advance(inp, out) 280 | } 281 | 282 | /// Encode a single chunk of data. 283 | /// 284 | /// This method will add an end marker to the encoded chunk. 285 | /// 286 | /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize 287 | /// buffer size, to supply an existing vector, to control whether an end marker is required, or 288 | /// to preserve partial data in the case of a decoding error. 289 | /// 290 | /// [`into_vec`]: #into_vec 291 | /// 292 | /// # Example 293 | /// 294 | /// ``` 295 | /// use weezl::{BitOrder, encode::Encoder}; 296 | /// 297 | /// let data = b"Hello, world"; 298 | /// let encoded = Encoder::new(BitOrder::Msb, 9) 299 | /// .encode(data) 300 | /// .expect("All bytes valid for code size"); 301 | /// ``` 302 | pub fn encode(&mut self, data: &[u8]) -> Result, LzwError> { 303 | let mut output = Vec::new(); 304 | self.into_vec(&mut output).encode_all(data).status?; 305 | Ok(output) 306 | } 307 | 308 | /// Construct a encoder into a writer. 309 | #[cfg(feature = "std")] 310 | pub fn into_stream(&mut self, writer: W) -> IntoStream<'_, W> { 311 | IntoStream { 312 | encoder: self, 313 | writer, 314 | buffer: None, 315 | default_size: STREAM_BUF_SIZE, 316 | } 317 | } 318 | 319 | /// Construct a encoder into an async writer. 320 | #[cfg(feature = "async")] 321 | pub fn into_async(&mut self, writer: W) -> IntoAsync<'_, W> { 322 | IntoAsync { 323 | encoder: self, 324 | writer, 325 | buffer: None, 326 | default_size: STREAM_BUF_SIZE, 327 | } 328 | } 329 | 330 | /// Construct an encoder into a vector. 331 | /// 332 | /// All encoded data is appended and the vector is __not__ cleared. 333 | /// 334 | /// Compared to `into_stream` this interface allows a high-level access to encoding without 335 | /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the 336 | /// special target exposes. 337 | pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec) -> IntoVec<'lt> { 338 | IntoVec { 339 | encoder: self, 340 | vector: vec, 341 | } 342 | } 343 | 344 | /// Mark the encoding as in the process of finishing. 345 | /// 346 | /// The next following call to `encode_bytes` which is able to consume the complete input will 347 | /// also try to emit an end code. It's not recommended, but also not unsound, to use different 348 | /// byte slices in different calls from this point forward and thus to 'delay' the actual end 349 | /// of the data stream. The behaviour after the end marker has been written is unspecified but 350 | /// sound. 351 | pub fn finish(&mut self) { 352 | self.state.mark_ended(); 353 | } 354 | 355 | /// Undo marking this data stream as ending. 356 | /// FIXME: clarify how this interacts with padding introduced after end code. 357 | #[allow(dead_code)] 358 | pub(crate) fn restart(&mut self) { 359 | self.state.restart() 360 | } 361 | 362 | /// Reset all internal state. 363 | /// 364 | /// This produce an encoder as if just constructed with `new` but taking slightly less work. In 365 | /// particular it will not deallocate any internal allocations. It will also avoid some 366 | /// duplicate setup work. 367 | pub fn reset(&mut self) { 368 | self.state.reset() 369 | } 370 | } 371 | 372 | #[cfg(feature = "std")] 373 | impl<'d, W: Write> IntoStream<'d, W> { 374 | /// Encode data from a reader. 375 | /// 376 | /// This will drain the supplied reader. It will not encode an end marker after all data has 377 | /// been processed. 378 | pub fn encode(&mut self, read: impl BufRead) -> StreamResult { 379 | self.encode_part(read, false) 380 | } 381 | 382 | /// Encode data from a reader and an end marker. 383 | pub fn encode_all(mut self, read: impl BufRead) -> StreamResult { 384 | self.encode_part(read, true) 385 | } 386 | 387 | /// Set the size of the intermediate encode buffer. 388 | /// 389 | /// A buffer of this size is allocated to hold one part of the encoded stream when no buffer is 390 | /// available and any encoding method is called. No buffer is allocated if `set_buffer` has 391 | /// been called. The buffer is reused. 392 | /// 393 | /// # Panics 394 | /// This method panics if `size` is `0`. 395 | pub fn set_buffer_size(&mut self, size: usize) { 396 | assert_ne!(size, 0, "Attempted to set empty buffer"); 397 | self.default_size = size; 398 | } 399 | 400 | /// Use a particular buffer as an intermediate encode buffer. 401 | /// 402 | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used 403 | /// instead of a dynamically allocating a buffer. Note that the size of the buffer is relevant 404 | /// for efficient encoding as there is additional overhead from `write` calls each time the 405 | /// buffer has been filled. 406 | /// 407 | /// # Panics 408 | /// This method panics if the `buffer` is empty. 409 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { 410 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); 411 | self.buffer = Some(StreamBuf::Borrowed(buffer)); 412 | } 413 | 414 | fn encode_part(&mut self, mut read: impl BufRead, finish: bool) -> StreamResult { 415 | let IntoStream { 416 | encoder, 417 | writer, 418 | buffer, 419 | default_size, 420 | } = self; 421 | enum Progress { 422 | Ok, 423 | Done, 424 | } 425 | 426 | let mut bytes_read = 0; 427 | let mut bytes_written = 0; 428 | 429 | let read_bytes = &mut bytes_read; 430 | let write_bytes = &mut bytes_written; 431 | 432 | let outbuf: &mut [u8] = 433 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { 434 | StreamBuf::Borrowed(slice) => &mut *slice, 435 | StreamBuf::Owned(vec) => &mut *vec, 436 | }; 437 | assert!(!outbuf.is_empty()); 438 | 439 | let once = move || { 440 | let data = read.fill_buf()?; 441 | 442 | if data.is_empty() { 443 | if finish { 444 | encoder.finish(); 445 | } else { 446 | return Ok(Progress::Done); 447 | } 448 | } 449 | 450 | let result = encoder.encode_bytes(data, &mut outbuf[..]); 451 | *read_bytes += result.consumed_in; 452 | *write_bytes += result.consumed_out; 453 | read.consume(result.consumed_in); 454 | 455 | let done = result.status.map_err(|err| { 456 | io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err)) 457 | })?; 458 | 459 | if let LzwStatus::Done = done { 460 | writer.write_all(&outbuf[..result.consumed_out])?; 461 | return Ok(Progress::Done); 462 | } 463 | 464 | if let LzwStatus::NoProgress = done { 465 | return Err(io::Error::new( 466 | io::ErrorKind::UnexpectedEof, 467 | "No more data but no end marker detected", 468 | )); 469 | } 470 | 471 | writer.write_all(&outbuf[..result.consumed_out])?; 472 | Ok(Progress::Ok) 473 | }; 474 | 475 | let status = core::iter::repeat_with(once) 476 | // scan+fuse can be replaced with map_while 477 | .scan((), |(), result| match result { 478 | Ok(Progress::Ok) => Some(Ok(())), 479 | Err(err) => Some(Err(err)), 480 | Ok(Progress::Done) => None, 481 | }) 482 | .fuse() 483 | .collect(); 484 | 485 | StreamResult { 486 | bytes_read, 487 | bytes_written, 488 | status, 489 | } 490 | } 491 | } 492 | 493 | impl IntoVec<'_> { 494 | /// Encode data from a slice. 495 | pub fn encode(&mut self, read: &[u8]) -> VectorResult { 496 | self.encode_part(read, false) 497 | } 498 | 499 | /// Decode data from a reader, adding an end marker. 500 | pub fn encode_all(mut self, read: &[u8]) -> VectorResult { 501 | self.encode_part(read, true) 502 | } 503 | 504 | fn grab_buffer(&mut self) -> (&mut [u8], &mut Encoder) { 505 | const CHUNK_SIZE: usize = 1 << 12; 506 | let decoder = &mut self.encoder; 507 | let length = self.vector.len(); 508 | 509 | // Use the vector to do overflow checks and w/e. 510 | self.vector.reserve(CHUNK_SIZE); 511 | // FIXME: encoding into uninit buffer? 512 | self.vector.resize(length + CHUNK_SIZE, 0u8); 513 | 514 | (&mut self.vector[length..], decoder) 515 | } 516 | 517 | fn encode_part(&mut self, part: &[u8], finish: bool) -> VectorResult { 518 | let mut result = VectorResult { 519 | consumed_in: 0, 520 | consumed_out: 0, 521 | status: Ok(LzwStatus::Ok), 522 | }; 523 | 524 | enum Progress { 525 | Ok, 526 | Done, 527 | } 528 | 529 | // Converting to mutable refs to move into the `once` closure. 530 | let read_bytes = &mut result.consumed_in; 531 | let write_bytes = &mut result.consumed_out; 532 | let mut data = part; 533 | 534 | // A 64 MB buffer is quite large but should get alloc_zeroed. 535 | // Note that the decoded size can be up to quadratic in code block. 536 | let once = move || { 537 | // Grab a new output buffer. 538 | let (outbuf, encoder) = self.grab_buffer(); 539 | 540 | if finish { 541 | encoder.finish(); 542 | } 543 | 544 | // Decode as much of the buffer as fits. 545 | let result = encoder.encode_bytes(data, &mut outbuf[..]); 546 | // Do the bookkeeping and consume the buffer. 547 | *read_bytes += result.consumed_in; 548 | *write_bytes += result.consumed_out; 549 | data = &data[result.consumed_in..]; 550 | 551 | let unfilled = outbuf.len() - result.consumed_out; 552 | let filled = self.vector.len() - unfilled; 553 | self.vector.truncate(filled); 554 | 555 | // Handle the status in the result. 556 | let done = result.status?; 557 | if let LzwStatus::Done = done { 558 | Ok(Progress::Done) 559 | } else { 560 | Ok(Progress::Ok) 561 | } 562 | }; 563 | 564 | // Decode chunks of input data until we're done. 565 | let status: Result<(), _> = core::iter::repeat_with(once) 566 | // scan+fuse can be replaced with map_while 567 | .scan((), |(), result| match result { 568 | Ok(Progress::Ok) => Some(Ok(())), 569 | Err(err) => Some(Err(err)), 570 | Ok(Progress::Done) => None, 571 | }) 572 | .fuse() 573 | .collect(); 574 | 575 | if let Err(err) = status { 576 | result.status = Err(err); 577 | } 578 | 579 | result 580 | } 581 | } 582 | 583 | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would 584 | // trip over the usage of await, which is a reserved keyword in that edition/version. It only 585 | // contains an impl block. 586 | #[cfg(feature = "async")] 587 | #[path = "encode_into_async.rs"] 588 | mod impl_encode_into_async; 589 | 590 | impl EncodeState { 591 | fn new(min_size: u8) -> Self { 592 | let clear_code = 1 << min_size; 593 | let mut tree = Tree::default(); 594 | tree.init(min_size); 595 | let mut state = EncodeState { 596 | min_size, 597 | tree, 598 | has_ended: false, 599 | is_tiff: false, 600 | current_code: clear_code, 601 | clear_code, 602 | buffer: B::new(min_size), 603 | }; 604 | state.buffer_code(clear_code); 605 | state 606 | } 607 | } 608 | 609 | impl Stateful for EncodeState { 610 | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { 611 | let c_in = inp.len(); 612 | let c_out = out.len(); 613 | let mut status = Ok(LzwStatus::Ok); 614 | 615 | 'encoding: loop { 616 | if self.push_out(&mut out) { 617 | break; 618 | } 619 | 620 | if inp.is_empty() && self.has_ended { 621 | let end = self.end_code(); 622 | if self.current_code != end { 623 | if self.current_code != self.clear_code { 624 | self.buffer_code(self.current_code); 625 | 626 | // When reading this code, the decoder will add an extra entry to its table 627 | // before reading th end code. Thusly, it may increase its code size based 628 | // on this additional entry. 629 | if self.tree.keys.len() + usize::from(self.is_tiff) 630 | > usize::from(self.buffer.max_code()) 631 | && self.buffer.code_size() < MAX_CODESIZE 632 | { 633 | self.buffer.bump_code_size(); 634 | } 635 | } 636 | self.buffer_code(end); 637 | self.current_code = end; 638 | self.buffer_pad(); 639 | } 640 | 641 | break; 642 | } 643 | 644 | let mut next_code = None; 645 | let mut bytes = inp.iter(); 646 | while let Some(&byte) = bytes.next() { 647 | if self.min_size < 8 && byte >= 1 << self.min_size { 648 | status = Err(LzwError::InvalidCode); 649 | break 'encoding; 650 | } 651 | 652 | inp = bytes.as_slice(); 653 | match self.tree.iterate(self.current_code, byte) { 654 | Ok(code) => self.current_code = code, 655 | Err(_) => { 656 | next_code = Some(self.current_code); 657 | 658 | self.current_code = u16::from(byte); 659 | break; 660 | } 661 | } 662 | } 663 | 664 | match next_code { 665 | // No more bytes, no code produced. 666 | None => break, 667 | Some(code) => { 668 | self.buffer_code(code); 669 | 670 | if self.tree.keys.len() + usize::from(self.is_tiff) 671 | > usize::from(self.buffer.max_code()) + 1 672 | && self.buffer.code_size() < MAX_CODESIZE 673 | { 674 | self.buffer.bump_code_size(); 675 | } 676 | 677 | if self.tree.keys.len() > MAX_ENTRIES { 678 | self.buffer_code(self.clear_code); 679 | self.tree.reset(self.min_size); 680 | self.buffer.clear(self.min_size); 681 | } 682 | } 683 | } 684 | } 685 | 686 | if inp.is_empty() && self.current_code == self.end_code() { 687 | if !self.flush_out(&mut out) { 688 | status = Ok(LzwStatus::Done); 689 | } 690 | } 691 | 692 | BufferResult { 693 | consumed_in: c_in - inp.len(), 694 | consumed_out: c_out - out.len(), 695 | status, 696 | } 697 | } 698 | 699 | fn mark_ended(&mut self) -> bool { 700 | core::mem::replace(&mut self.has_ended, true) 701 | } 702 | 703 | fn restart(&mut self) { 704 | self.has_ended = false; 705 | } 706 | 707 | fn reset(&mut self) { 708 | self.restart(); 709 | self.current_code = self.clear_code; 710 | self.tree.reset(self.min_size); 711 | self.buffer.reset(self.min_size); 712 | self.buffer_code(self.clear_code); 713 | } 714 | } 715 | 716 | impl EncodeState { 717 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool { 718 | self.buffer.push_out(out) 719 | } 720 | 721 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { 722 | self.buffer.flush_out(out) 723 | } 724 | 725 | fn end_code(&self) -> Code { 726 | self.clear_code + 1 727 | } 728 | 729 | fn buffer_pad(&mut self) { 730 | self.buffer.buffer_pad(); 731 | } 732 | 733 | fn buffer_code(&mut self, code: Code) { 734 | self.buffer.buffer_code(code); 735 | } 736 | } 737 | 738 | impl Buffer for MsbBuffer { 739 | fn new(min_size: u8) -> Self { 740 | MsbBuffer { 741 | code_size: min_size + 1, 742 | buffer: 0, 743 | bits_in_buffer: 0, 744 | } 745 | } 746 | 747 | fn reset(&mut self, min_size: u8) { 748 | self.code_size = min_size + 1; 749 | self.buffer = 0; 750 | self.bits_in_buffer = 0; 751 | } 752 | 753 | fn clear(&mut self, min_size: u8) { 754 | self.code_size = min_size + 1; 755 | } 756 | 757 | fn buffer_code(&mut self, code: Code) { 758 | let shift = 64 - self.bits_in_buffer - self.code_size; 759 | self.buffer |= u64::from(code) << shift; 760 | self.bits_in_buffer += self.code_size; 761 | } 762 | 763 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool { 764 | if self.bits_in_buffer + 2 * self.code_size < 64 { 765 | return false; 766 | } 767 | 768 | self.flush_out(out) 769 | } 770 | 771 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { 772 | let want = usize::from(self.bits_in_buffer / 8); 773 | let count = want.min((*out).len()); 774 | let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count); 775 | *out = tail; 776 | 777 | for b in bytes { 778 | *b = ((self.buffer & 0xff00_0000_0000_0000) >> 56) as u8; 779 | self.buffer <<= 8; 780 | self.bits_in_buffer -= 8; 781 | } 782 | 783 | count < want 784 | } 785 | 786 | fn buffer_pad(&mut self) { 787 | let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7; 788 | self.bits_in_buffer += to_byte; 789 | } 790 | 791 | fn bump_code_size(&mut self) { 792 | self.code_size += 1; 793 | } 794 | 795 | fn max_code(&self) -> Code { 796 | (1 << self.code_size) - 1 797 | } 798 | 799 | fn code_size(&self) -> u8 { 800 | self.code_size 801 | } 802 | } 803 | 804 | impl Buffer for LsbBuffer { 805 | fn new(min_size: u8) -> Self { 806 | LsbBuffer { 807 | code_size: min_size + 1, 808 | buffer: 0, 809 | bits_in_buffer: 0, 810 | } 811 | } 812 | 813 | fn reset(&mut self, min_size: u8) { 814 | self.code_size = min_size + 1; 815 | self.buffer = 0; 816 | self.bits_in_buffer = 0; 817 | } 818 | 819 | fn clear(&mut self, min_size: u8) { 820 | self.code_size = min_size + 1; 821 | } 822 | 823 | fn buffer_code(&mut self, code: Code) { 824 | self.buffer |= u64::from(code) << self.bits_in_buffer; 825 | self.bits_in_buffer += self.code_size; 826 | } 827 | 828 | fn push_out(&mut self, out: &mut &mut [u8]) -> bool { 829 | if self.bits_in_buffer + 2 * self.code_size < 64 { 830 | return false; 831 | } 832 | 833 | self.flush_out(out) 834 | } 835 | 836 | fn flush_out(&mut self, out: &mut &mut [u8]) -> bool { 837 | let want = usize::from(self.bits_in_buffer / 8); 838 | let count = want.min((*out).len()); 839 | let (bytes, tail) = core::mem::replace(out, &mut []).split_at_mut(count); 840 | *out = tail; 841 | 842 | for b in bytes { 843 | *b = (self.buffer & 0x0000_0000_0000_00ff) as u8; 844 | self.buffer >>= 8; 845 | self.bits_in_buffer -= 8; 846 | } 847 | 848 | count < want 849 | } 850 | 851 | fn buffer_pad(&mut self) { 852 | let to_byte = self.bits_in_buffer.wrapping_neg() & 0x7; 853 | self.bits_in_buffer += to_byte; 854 | } 855 | 856 | fn bump_code_size(&mut self) { 857 | self.code_size += 1; 858 | } 859 | 860 | fn max_code(&self) -> Code { 861 | (1 << self.code_size) - 1 862 | } 863 | 864 | fn code_size(&self) -> u8 { 865 | self.code_size 866 | } 867 | } 868 | 869 | impl Tree { 870 | fn init(&mut self, min_size: u8) { 871 | // We need a way to represent the state of a currently empty buffer. We use the clear code 872 | // for this, thus create one complex mapping that leads to the one-char base codes. 873 | self.keys 874 | .resize((1 << min_size) + 2, FullKey::NoSuccessor.into()); 875 | self.complex.push(Full { 876 | char_continuation: [0; 256], 877 | }); 878 | let map_of_begin = self.complex.last_mut().unwrap(); 879 | for ch in 0u16..256 { 880 | map_of_begin.char_continuation[usize::from(ch)] = ch; 881 | } 882 | self.keys[1 << min_size] = FullKey::Full(0).into(); 883 | } 884 | 885 | fn reset(&mut self, min_size: u8) { 886 | self.simples.clear(); 887 | self.keys.truncate((1 << min_size) + 2); 888 | // Keep entry for clear code. 889 | self.complex.truncate(1); 890 | // The first complex is not changed.. 891 | for k in self.keys[..(1 << min_size) + 2].iter_mut() { 892 | *k = FullKey::NoSuccessor.into(); 893 | } 894 | self.keys[1 << min_size] = FullKey::Full(0).into(); 895 | } 896 | 897 | fn at_key(&self, code: Code, ch: u8) -> Option { 898 | let key = self.keys[usize::from(code)]; 899 | match FullKey::from(key) { 900 | FullKey::NoSuccessor => None, 901 | FullKey::Simple(idx) => { 902 | let nexts = &self.simples[usize::from(idx)]; 903 | let successors = nexts 904 | .codes 905 | .iter() 906 | .zip(nexts.chars.iter()) 907 | .take(usize::from(nexts.count)); 908 | for (&scode, &sch) in successors { 909 | if sch == ch { 910 | return Some(scode); 911 | } 912 | } 913 | 914 | None 915 | } 916 | FullKey::Full(idx) => { 917 | let full = &self.complex[usize::from(idx)]; 918 | let precode = full.char_continuation[usize::from(ch)]; 919 | if usize::from(precode) < MAX_ENTRIES { 920 | Some(precode) 921 | } else { 922 | None 923 | } 924 | } 925 | } 926 | } 927 | 928 | /// Iterate to the next char. 929 | /// Return Ok when it was already in the tree or creates a new entry for it and returns Err. 930 | fn iterate(&mut self, code: Code, ch: u8) -> Result { 931 | if let Some(next) = self.at_key(code, ch) { 932 | Ok(next) 933 | } else { 934 | Err(self.append(code, ch)) 935 | } 936 | } 937 | 938 | fn append(&mut self, code: Code, ch: u8) -> Code { 939 | let next: Code = self.keys.len() as u16; 940 | let key = self.keys[usize::from(code)]; 941 | // TODO: with debug assertions, check for non-existence 942 | match FullKey::from(key) { 943 | FullKey::NoSuccessor => { 944 | let new_key = FullKey::Simple(self.simples.len() as u16); 945 | self.simples.push(Simple::default()); 946 | let simples = self.simples.last_mut().unwrap(); 947 | simples.codes[0] = next; 948 | simples.chars[0] = ch; 949 | simples.count = 1; 950 | self.keys[usize::from(code)] = new_key.into(); 951 | } 952 | FullKey::Simple(idx) if usize::from(self.simples[usize::from(idx)].count) < SHORT => { 953 | let nexts = &mut self.simples[usize::from(idx)]; 954 | let nidx = usize::from(nexts.count); 955 | nexts.chars[nidx] = ch; 956 | nexts.codes[nidx] = next; 957 | nexts.count += 1; 958 | } 959 | FullKey::Simple(idx) => { 960 | let new_key = FullKey::Full(self.complex.len() as u16); 961 | let simples = &self.simples[usize::from(idx)]; 962 | self.complex.push(Full { 963 | char_continuation: [Code::max_value(); 256], 964 | }); 965 | let full = self.complex.last_mut().unwrap(); 966 | for (&pch, &pcont) in simples.chars.iter().zip(simples.codes.iter()) { 967 | full.char_continuation[usize::from(pch)] = pcont; 968 | } 969 | self.keys[usize::from(code)] = new_key.into(); 970 | } 971 | FullKey::Full(idx) => { 972 | let full = &mut self.complex[usize::from(idx)]; 973 | full.char_continuation[usize::from(ch)] = next; 974 | } 975 | } 976 | self.keys.push(FullKey::NoSuccessor.into()); 977 | next 978 | } 979 | } 980 | 981 | impl Default for FullKey { 982 | fn default() -> Self { 983 | FullKey::NoSuccessor 984 | } 985 | } 986 | 987 | impl Default for Simple { 988 | fn default() -> Self { 989 | Simple { 990 | codes: [0; SHORT], 991 | chars: [0; SHORT], 992 | count: 0, 993 | } 994 | } 995 | } 996 | 997 | impl From for FullKey { 998 | fn from(CompressedKey(key): CompressedKey) -> Self { 999 | match (key >> MAX_CODESIZE) & 0xf { 1000 | 0 => FullKey::Full(key & 0xfff), 1001 | 1 => FullKey::Simple(key & 0xfff), 1002 | _ => FullKey::NoSuccessor, 1003 | } 1004 | } 1005 | } 1006 | 1007 | impl From for CompressedKey { 1008 | fn from(full: FullKey) -> Self { 1009 | CompressedKey(match full { 1010 | FullKey::NoSuccessor => 0x2000, 1011 | FullKey::Simple(code) => 0x1000 | code, 1012 | FullKey::Full(code) => code, 1013 | }) 1014 | } 1015 | } 1016 | 1017 | #[cfg(test)] 1018 | mod tests { 1019 | use super::{BitOrder, Encoder, LzwError, LzwStatus}; 1020 | use crate::alloc::vec::Vec; 1021 | use crate::decode::Decoder; 1022 | #[cfg(feature = "std")] 1023 | use crate::StreamBuf; 1024 | 1025 | #[test] 1026 | fn invalid_input_rejected() { 1027 | const BIT_LEN: u8 = 2; 1028 | let ref input = [0, 1 << BIT_LEN /* invalid */, 0]; 1029 | let ref mut target = [0u8; 128]; 1030 | let mut encoder = Encoder::new(BitOrder::Msb, BIT_LEN); 1031 | 1032 | encoder.finish(); 1033 | // We require simulation of normality, that is byte-for-byte compression. 1034 | let result = encoder.encode_bytes(input, target); 1035 | assert!(if let Err(LzwError::InvalidCode) = result.status { 1036 | true 1037 | } else { 1038 | false 1039 | }); 1040 | assert_eq!(result.consumed_in, 1); 1041 | 1042 | let fixed = encoder.encode_bytes(&[1, 0], &mut target[result.consumed_out..]); 1043 | assert!(if let Ok(LzwStatus::Done) = fixed.status { 1044 | true 1045 | } else { 1046 | false 1047 | }); 1048 | assert_eq!(fixed.consumed_in, 2); 1049 | 1050 | // Okay, now test we actually fixed it. 1051 | let ref mut compare = [0u8; 4]; 1052 | let mut todo = &target[..result.consumed_out + fixed.consumed_out]; 1053 | let mut free = &mut compare[..]; 1054 | let mut decoder = Decoder::new(BitOrder::Msb, BIT_LEN); 1055 | 1056 | // Decode with up to 16 rounds, far too much but inconsequential. 1057 | for _ in 0..16 { 1058 | if decoder.has_ended() { 1059 | break; 1060 | } 1061 | 1062 | let result = decoder.decode_bytes(todo, free); 1063 | assert!(result.status.is_ok()); 1064 | todo = &todo[result.consumed_in..]; 1065 | free = &mut free[result.consumed_out..]; 1066 | } 1067 | 1068 | let remaining = { free }.len(); 1069 | let len = compare.len() - remaining; 1070 | assert_eq!(todo, &[]); 1071 | assert_eq!(compare[..len], [0, 1, 0]); 1072 | } 1073 | 1074 | #[test] 1075 | #[should_panic] 1076 | fn invalid_code_size_low() { 1077 | let _ = Encoder::new(BitOrder::Msb, 1); 1078 | } 1079 | 1080 | #[test] 1081 | #[should_panic] 1082 | fn invalid_code_size_high() { 1083 | let _ = Encoder::new(BitOrder::Msb, 14); 1084 | } 1085 | 1086 | fn make_decoded() -> Vec { 1087 | const FILE: &'static [u8] = 1088 | include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.lock")); 1089 | return Vec::from(FILE); 1090 | } 1091 | 1092 | #[test] 1093 | #[cfg(feature = "std")] 1094 | fn into_stream_buffer_no_alloc() { 1095 | let encoded = make_decoded(); 1096 | let mut encoder = Encoder::new(BitOrder::Msb, 8); 1097 | 1098 | let mut output = vec![]; 1099 | let mut buffer = [0; 512]; 1100 | let mut istream = encoder.into_stream(&mut output); 1101 | istream.set_buffer(&mut buffer[..]); 1102 | istream.encode(&encoded[..]).status.unwrap(); 1103 | 1104 | match istream.buffer { 1105 | Some(StreamBuf::Borrowed(_)) => {} 1106 | None => panic!("Decoded without buffer??"), 1107 | Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"), 1108 | } 1109 | } 1110 | 1111 | #[test] 1112 | #[cfg(feature = "std")] 1113 | fn into_stream_buffer_small_alloc() { 1114 | struct WriteTap(W); 1115 | const BUF_SIZE: usize = 512; 1116 | 1117 | impl std::io::Write for WriteTap { 1118 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 1119 | assert!(buf.len() <= BUF_SIZE); 1120 | self.0.write(buf) 1121 | } 1122 | fn flush(&mut self) -> std::io::Result<()> { 1123 | self.0.flush() 1124 | } 1125 | } 1126 | 1127 | let encoded = make_decoded(); 1128 | let mut encoder = Encoder::new(BitOrder::Msb, 8); 1129 | 1130 | let mut output = vec![]; 1131 | let mut istream = encoder.into_stream(WriteTap(&mut output)); 1132 | istream.set_buffer_size(512); 1133 | istream.encode(&encoded[..]).status.unwrap(); 1134 | 1135 | match istream.buffer { 1136 | Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE), 1137 | Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"), 1138 | None => panic!("Decoded without buffer??"), 1139 | } 1140 | } 1141 | 1142 | #[test] 1143 | #[cfg(feature = "std")] 1144 | fn reset() { 1145 | let encoded = make_decoded(); 1146 | let mut encoder = Encoder::new(BitOrder::Msb, 8); 1147 | let mut reference = None; 1148 | 1149 | for _ in 0..2 { 1150 | let mut output = vec![]; 1151 | let mut buffer = [0; 512]; 1152 | let mut istream = encoder.into_stream(&mut output); 1153 | istream.set_buffer(&mut buffer[..]); 1154 | istream.encode_all(&encoded[..]).status.unwrap(); 1155 | 1156 | encoder.reset(); 1157 | if let Some(reference) = &reference { 1158 | assert_eq!(output, *reference); 1159 | } else { 1160 | reference = Some(output); 1161 | } 1162 | } 1163 | } 1164 | } 1165 | -------------------------------------------------------------------------------- /src/decode.rs: -------------------------------------------------------------------------------- 1 | //! A module for all decoding needs. 2 | #[cfg(feature = "std")] 3 | use crate::error::StreamResult; 4 | use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult}; 5 | use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE}; 6 | 7 | use crate::alloc::{boxed::Box, vec, vec::Vec}; 8 | #[cfg(feature = "std")] 9 | use std::io::{self, BufRead, Write}; 10 | 11 | /// The state for decoding data with an LZW algorithm. 12 | /// 13 | /// The same structure can be utilized with streams as well as your own buffers and driver logic. 14 | /// It may even be possible to mix them if you are sufficiently careful not to lose or skip any 15 | /// already decode data in the process. 16 | /// 17 | /// This is a sans-IO implementation, meaning that it only contains the state of the decoder and 18 | /// the caller will provide buffers for input and output data when calling the basic 19 | /// [`decode_bytes`] method. Nevertheless, a number of _adapters_ are provided in the `into_*` 20 | /// methods for decoding with a particular style of common IO. 21 | /// 22 | /// * [`decode`] for decoding once without any IO-loop. 23 | /// * [`into_async`] for decoding with the `futures` traits for asynchronous IO. 24 | /// * [`into_stream`] for decoding with the standard `io` traits. 25 | /// * [`into_vec`] for in-memory decoding. 26 | /// 27 | /// [`decode_bytes`]: #method.decode_bytes 28 | /// [`decode`]: #method.decode 29 | /// [`into_async`]: #method.into_async 30 | /// [`into_stream`]: #method.into_stream 31 | /// [`into_vec`]: #method.into_vec 32 | pub struct Decoder { 33 | state: Box, 34 | } 35 | 36 | /// A decoding stream sink. 37 | /// 38 | /// See [`Decoder::into_stream`] on how to create this type. 39 | /// 40 | /// [`Decoder::into_stream`]: struct.Decoder.html#method.into_stream 41 | #[cfg_attr( 42 | not(feature = "std"), 43 | deprecated = "This type is only useful with the `std` feature." 44 | )] 45 | #[cfg_attr(not(feature = "std"), allow(dead_code))] 46 | pub struct IntoStream<'d, W> { 47 | decoder: &'d mut Decoder, 48 | writer: W, 49 | buffer: Option>, 50 | default_size: usize, 51 | } 52 | 53 | /// An async decoding sink. 54 | /// 55 | /// See [`Decoder::into_async`] on how to create this type. 56 | /// 57 | /// [`Decoder::into_async`]: struct.Decoder.html#method.into_async 58 | #[cfg(feature = "async")] 59 | pub struct IntoAsync<'d, W> { 60 | decoder: &'d mut Decoder, 61 | writer: W, 62 | buffer: Option>, 63 | default_size: usize, 64 | } 65 | 66 | /// A decoding sink into a vector. 67 | /// 68 | /// See [`Decoder::into_vec`] on how to create this type. 69 | /// 70 | /// [`Decoder::into_vec`]: struct.Decoder.html#method.into_vec 71 | pub struct IntoVec<'d> { 72 | decoder: &'d mut Decoder, 73 | vector: &'d mut Vec, 74 | } 75 | 76 | trait Stateful { 77 | fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult; 78 | fn has_ended(&self) -> bool; 79 | /// Ignore an end code and continue decoding (no implied reset). 80 | fn restart(&mut self); 81 | /// Reset the decoder to the beginning, dropping all buffers etc. 82 | fn reset(&mut self); 83 | } 84 | 85 | #[derive(Clone)] 86 | struct Link { 87 | prev: Code, 88 | byte: u8, 89 | first: u8, 90 | } 91 | 92 | #[derive(Clone)] 93 | struct DerivationBase { 94 | code: Code, 95 | first: u8, 96 | } 97 | 98 | #[derive(Default)] 99 | struct MsbBuffer { 100 | /// A buffer of individual bits. The oldest code is kept in the high-order bits. 101 | bit_buffer: u64, 102 | /// A precomputed mask for this code. 103 | code_mask: u16, 104 | /// The current code size. 105 | code_size: u8, 106 | /// The number of bits in the buffer. 107 | bits: u8, 108 | } 109 | 110 | #[derive(Default)] 111 | struct LsbBuffer { 112 | /// A buffer of individual bits. The oldest code is kept in the high-order bits. 113 | bit_buffer: u64, 114 | /// A precomputed mask for this code. 115 | code_mask: u16, 116 | /// The current code size. 117 | code_size: u8, 118 | /// The number of bits in the buffer. 119 | bits: u8, 120 | } 121 | 122 | trait CodeBuffer { 123 | fn new(min_size: u8) -> Self; 124 | fn reset(&mut self, min_size: u8); 125 | fn bump_code_size(&mut self); 126 | 127 | /// Retrieve the next symbol, refilling if necessary. 128 | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option; 129 | /// Refill the internal buffer. 130 | fn refill_bits(&mut self, inp: &mut &[u8]); 131 | 132 | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize; 133 | fn consume_bits(&mut self, code_cnt: u8); 134 | 135 | fn max_code(&self) -> Code; 136 | fn code_size(&self) -> u8; 137 | } 138 | 139 | trait CodegenConstants { 140 | const YIELD_ON_FULL: bool; 141 | } 142 | 143 | struct DecodeState { 144 | /// The original minimum code size. 145 | min_size: u8, 146 | /// The table of decoded codes. 147 | table: Table, 148 | /// The buffer of decoded data. 149 | buffer: Buffer, 150 | /// The link which we are still decoding and its original code. 151 | last: Option, 152 | /// The next code entry. 153 | next_code: Code, 154 | /// Code to reset all tables. 155 | clear_code: Code, 156 | /// Code to signal the end of the stream. 157 | end_code: Code, 158 | /// A stored flag if the end code has already appeared. 159 | has_ended: bool, 160 | /// If tiff then bumps are a single code sooner. 161 | is_tiff: bool, 162 | /// Do we allow stream to start without an explicit reset code? 163 | implicit_reset: bool, 164 | /// The buffer for decoded words. 165 | code_buffer: CodeBuffer, 166 | #[allow(dead_code)] 167 | constants: core::marker::PhantomData, 168 | } 169 | 170 | // We have a buffer of 64 bits. So at max size at most 5 units can be read at once without 171 | // refilling the buffer. At smaller code sizes there are more. We tune for 6 here, by slight 172 | // experimentation. This may be an architecture dependent constant. 173 | const BURST: usize = 6; 174 | 175 | struct Buffer { 176 | bytes: Box<[u8]>, 177 | read_mark: usize, 178 | write_mark: usize, 179 | } 180 | 181 | struct Table { 182 | inner: Vec, 183 | depths: Vec, 184 | } 185 | 186 | /// Describes the static parameters for creating a decoder. 187 | #[derive(Clone, Debug)] 188 | pub struct Configuration { 189 | order: BitOrder, 190 | size: u8, 191 | tiff: bool, 192 | yield_on_full: bool, 193 | } 194 | 195 | impl Configuration { 196 | /// Create a configuration to decode with the specified bit order and symbol size. 197 | pub fn new(order: BitOrder, size: u8) -> Self { 198 | super::assert_decode_size(size); 199 | Configuration { 200 | order, 201 | size, 202 | tiff: false, 203 | yield_on_full: false, 204 | } 205 | } 206 | 207 | /// Create a configuration for a TIFF compatible decoder. 208 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { 209 | super::assert_decode_size(size); 210 | Configuration { 211 | order, 212 | size, 213 | tiff: true, 214 | yield_on_full: false, 215 | } 216 | } 217 | 218 | /// Immediately yield to the caller when the decoder buffer is full. 219 | /// 220 | /// This can be used for `libtiff` compatibility. It will use a "relaxed" stream interpretation 221 | /// that need not contain an explicit EOF. Instead, the decoder is expected to stop fetching 222 | /// symbols when some out-of-band specified length of the decoded text has been reached. The 223 | /// caller indicates this maximum length through the available output buffer space. 224 | /// 225 | /// Symbols afterwards must not be expected to be valid. On filling the output buffer space 226 | /// completely, the decoder will return immediately to the caller instead of potentially 227 | /// interpreting the following bit-stream (and returning an error on doing so). 228 | /// 229 | /// Default: `false`. 230 | pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self { 231 | Configuration { 232 | yield_on_full: do_yield, 233 | ..self 234 | } 235 | } 236 | 237 | /// Create a new decoder with the define configuration. 238 | pub fn build(self) -> Decoder { 239 | Decoder { 240 | state: Decoder::from_configuration(&self), 241 | } 242 | } 243 | } 244 | 245 | impl Decoder { 246 | /// Create a new decoder with the specified bit order and symbol size. 247 | /// 248 | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the 249 | /// original specification. In particular you will need to specify an `Lsb` bit oder to decode 250 | /// the data portion of a compressed `gif` image. 251 | /// 252 | /// # Panics 253 | /// 254 | /// The `size` needs to be in the interval `0..=12`. 255 | pub fn new(order: BitOrder, size: u8) -> Self { 256 | Configuration::new(order, size).build() 257 | } 258 | 259 | /// Create a TIFF compatible decoder with the specified bit order and symbol size. 260 | /// 261 | /// The algorithm for dynamically increasing the code symbol bit width is compatible with the 262 | /// TIFF specification, which is a misinterpretation of the original algorithm for increasing 263 | /// the code size. It switches one symbol sooner. 264 | /// 265 | /// # Panics 266 | /// 267 | /// The `size` needs to be in the interval `0..=12`. 268 | pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self { 269 | Configuration::with_tiff_size_switch(order, size).build() 270 | } 271 | 272 | fn from_configuration(configuration: &Configuration) -> Box { 273 | struct NoYield; 274 | struct YieldOnFull; 275 | 276 | impl CodegenConstants for NoYield { 277 | const YIELD_ON_FULL: bool = false; 278 | } 279 | 280 | impl CodegenConstants for YieldOnFull { 281 | const YIELD_ON_FULL: bool = true; 282 | } 283 | 284 | type Boxed = Box; 285 | match (configuration.order, configuration.yield_on_full) { 286 | (BitOrder::Lsb, false) => { 287 | let mut state = 288 | Box::new(DecodeState::::new(configuration.size)); 289 | state.is_tiff = configuration.tiff; 290 | state as Boxed 291 | } 292 | (BitOrder::Lsb, true) => { 293 | let mut state = Box::new(DecodeState::::new( 294 | configuration.size, 295 | )); 296 | state.is_tiff = configuration.tiff; 297 | state as Boxed 298 | } 299 | (BitOrder::Msb, false) => { 300 | let mut state = 301 | Box::new(DecodeState::::new(configuration.size)); 302 | state.is_tiff = configuration.tiff; 303 | state as Boxed 304 | } 305 | (BitOrder::Msb, true) => { 306 | let mut state = Box::new(DecodeState::::new( 307 | configuration.size, 308 | )); 309 | state.is_tiff = configuration.tiff; 310 | state as Boxed 311 | } 312 | } 313 | } 314 | 315 | /// Decode some bytes from `inp` and write result to `out`. 316 | /// 317 | /// This will consume a prefix of the input buffer and write decoded output into a prefix of 318 | /// the output buffer. See the respective fields of the return value for the count of consumed 319 | /// and written bytes. For the next call You should have adjusted the inputs accordingly. 320 | /// 321 | /// The call will try to decode and write as many bytes of output as available. It will be 322 | /// much more optimized (and avoid intermediate buffering) if it is allowed to write a large 323 | /// contiguous chunk at once. 324 | /// 325 | /// See [`into_stream`] for high-level functions (that are only available with the `std` 326 | /// feature). 327 | /// 328 | /// [`into_stream`]: #method.into_stream 329 | pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult { 330 | self.state.advance(inp, out) 331 | } 332 | 333 | /// Decode a single chunk of lzw encoded data. 334 | /// 335 | /// This method requires the data to contain an end marker, and returns an error otherwise. 336 | /// 337 | /// This is a convenience wrapper around [`into_vec`]. Use the `into_vec` adapter to customize 338 | /// buffer size, to supply an existing vector, to control whether an end marker is required, or 339 | /// to preserve partial data in the case of a decoding error. 340 | /// 341 | /// [`into_vec`]: #into_vec 342 | /// 343 | /// # Example 344 | /// 345 | /// ``` 346 | /// use weezl::{BitOrder, decode::Decoder}; 347 | /// 348 | /// // Encoded that was created with an encoder. 349 | /// let data = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10"; 350 | /// let decoded = Decoder::new(BitOrder::Msb, 9) 351 | /// .decode(data) 352 | /// .unwrap(); 353 | /// assert_eq!(decoded, b"Hello, world"); 354 | /// ``` 355 | pub fn decode(&mut self, data: &[u8]) -> Result, LzwError> { 356 | let mut output = vec![]; 357 | self.into_vec(&mut output).decode_all(data).status?; 358 | Ok(output) 359 | } 360 | 361 | /// Construct a decoder into a writer. 362 | #[cfg(feature = "std")] 363 | pub fn into_stream(&mut self, writer: W) -> IntoStream<'_, W> { 364 | IntoStream { 365 | decoder: self, 366 | writer, 367 | buffer: None, 368 | default_size: STREAM_BUF_SIZE, 369 | } 370 | } 371 | 372 | /// Construct a decoder into an async writer. 373 | #[cfg(feature = "async")] 374 | pub fn into_async(&mut self, writer: W) -> IntoAsync<'_, W> { 375 | IntoAsync { 376 | decoder: self, 377 | writer, 378 | buffer: None, 379 | default_size: STREAM_BUF_SIZE, 380 | } 381 | } 382 | 383 | /// Construct a decoder into a vector. 384 | /// 385 | /// All decoded data is appended and the vector is __not__ cleared. 386 | /// 387 | /// Compared to `into_stream` this interface allows a high-level access to decoding without 388 | /// requires the `std`-feature. Also, it can make full use of the extra buffer control that the 389 | /// special target exposes. 390 | pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec) -> IntoVec<'lt> { 391 | IntoVec { 392 | decoder: self, 393 | vector: vec, 394 | } 395 | } 396 | 397 | /// Check if the decoding has finished. 398 | /// 399 | /// No more output is produced beyond the end code that marked the finish of the stream. The 400 | /// decoder may have read additional bytes, including padding bits beyond the last code word 401 | /// but also excess bytes provided. 402 | pub fn has_ended(&self) -> bool { 403 | self.state.has_ended() 404 | } 405 | 406 | /// Ignore an end code and continue. 407 | /// 408 | /// This will _not_ reset any of the inner code tables and not have the effect of a clear code. 409 | /// It will instead continue as if the end code had not been present. If no end code has 410 | /// occurred then this is a no-op. 411 | /// 412 | /// You can test if an end code has occurred with [`has_ended`](#method.has_ended). 413 | /// FIXME: clarify how this interacts with padding introduced after end code. 414 | #[allow(dead_code)] 415 | pub(crate) fn restart(&mut self) { 416 | self.state.restart(); 417 | } 418 | 419 | /// Reset all internal state. 420 | /// 421 | /// This produce a decoder as if just constructed with `new` but taking slightly less work. In 422 | /// particular it will not deallocate any internal allocations. It will also avoid some 423 | /// duplicate setup work. 424 | pub fn reset(&mut self) { 425 | self.state.reset(); 426 | } 427 | } 428 | 429 | #[cfg(feature = "std")] 430 | impl<'d, W: Write> IntoStream<'d, W> { 431 | /// Decode data from a reader. 432 | /// 433 | /// This will read data until the stream is empty or an end marker is reached. 434 | pub fn decode(&mut self, read: impl BufRead) -> StreamResult { 435 | self.decode_part(read, false) 436 | } 437 | 438 | /// Decode data from a reader, requiring an end marker. 439 | pub fn decode_all(mut self, read: impl BufRead) -> StreamResult { 440 | self.decode_part(read, true) 441 | } 442 | 443 | /// Set the size of the intermediate decode buffer. 444 | /// 445 | /// A buffer of this size is allocated to hold one part of the decoded stream when no buffer is 446 | /// available and any decoding method is called. No buffer is allocated if `set_buffer` has 447 | /// been called. The buffer is reused. 448 | /// 449 | /// # Panics 450 | /// This method panics if `size` is `0`. 451 | pub fn set_buffer_size(&mut self, size: usize) { 452 | assert_ne!(size, 0, "Attempted to set empty buffer"); 453 | self.default_size = size; 454 | } 455 | 456 | /// Use a particular buffer as an intermediate decode buffer. 457 | /// 458 | /// Calling this sets or replaces the buffer. When a buffer has been set then it is used 459 | /// instead of dynamically allocating a buffer. Note that the size of the buffer is critical 460 | /// for efficient decoding. Some optimization techniques require the buffer to hold one or more 461 | /// previous decoded words. There is also additional overhead from `write` calls each time the 462 | /// buffer has been filled. 463 | /// 464 | /// # Panics 465 | /// This method panics if the `buffer` is empty. 466 | pub fn set_buffer(&mut self, buffer: &'d mut [u8]) { 467 | assert_ne!(buffer.len(), 0, "Attempted to set empty buffer"); 468 | self.buffer = Some(StreamBuf::Borrowed(buffer)); 469 | } 470 | 471 | fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult { 472 | let IntoStream { 473 | decoder, 474 | writer, 475 | buffer, 476 | default_size, 477 | } = self; 478 | 479 | enum Progress { 480 | Ok, 481 | Done, 482 | } 483 | 484 | let mut bytes_read = 0; 485 | let mut bytes_written = 0; 486 | 487 | // Converting to mutable refs to move into the `once` closure. 488 | let read_bytes = &mut bytes_read; 489 | let write_bytes = &mut bytes_written; 490 | 491 | let outbuf: &mut [u8] = 492 | match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } { 493 | StreamBuf::Borrowed(slice) => &mut *slice, 494 | StreamBuf::Owned(vec) => &mut *vec, 495 | }; 496 | assert!(!outbuf.is_empty()); 497 | 498 | let once = move || { 499 | // Try to grab one buffer of input data. 500 | let data = read.fill_buf()?; 501 | 502 | // Decode as much of the buffer as fits. 503 | let result = decoder.decode_bytes(data, &mut outbuf[..]); 504 | // Do the bookkeeping and consume the buffer. 505 | *read_bytes += result.consumed_in; 506 | *write_bytes += result.consumed_out; 507 | read.consume(result.consumed_in); 508 | 509 | // Handle the status in the result. 510 | let done = result.status.map_err(|err| { 511 | io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err)) 512 | })?; 513 | 514 | // Check if we had any new data at all. 515 | if let LzwStatus::NoProgress = done { 516 | debug_assert_eq!( 517 | result.consumed_out, 0, 518 | "No progress means we have not decoded any data" 519 | ); 520 | // In particular we did not finish decoding. 521 | if must_finish { 522 | return Err(io::Error::new( 523 | io::ErrorKind::UnexpectedEof, 524 | "No more data but no end marker detected", 525 | )); 526 | } else { 527 | return Ok(Progress::Done); 528 | } 529 | } 530 | 531 | // And finish by writing our result. 532 | // TODO: we may lose data on error (also on status error above) which we might want to 533 | // deterministically handle so that we don't need to restart everything from scratch as 534 | // the only recovery strategy. Any changes welcome. 535 | writer.write_all(&outbuf[..result.consumed_out])?; 536 | 537 | Ok(if let LzwStatus::Done = done { 538 | Progress::Done 539 | } else { 540 | Progress::Ok 541 | }) 542 | }; 543 | 544 | // Decode chunks of input data until we're done. 545 | let status = core::iter::repeat_with(once) 546 | // scan+fuse can be replaced with map_while 547 | .scan((), |(), result| match result { 548 | Ok(Progress::Ok) => Some(Ok(())), 549 | Err(err) => Some(Err(err)), 550 | Ok(Progress::Done) => None, 551 | }) 552 | .fuse() 553 | .collect(); 554 | 555 | StreamResult { 556 | bytes_read, 557 | bytes_written, 558 | status, 559 | } 560 | } 561 | } 562 | 563 | impl IntoVec<'_> { 564 | /// Decode data from a slice. 565 | /// 566 | /// This will read data until the slice is empty or an end marker is reached. 567 | pub fn decode(&mut self, read: &[u8]) -> VectorResult { 568 | self.decode_part(read, false) 569 | } 570 | 571 | /// Decode data from a slice, requiring an end marker. 572 | pub fn decode_all(mut self, read: &[u8]) -> VectorResult { 573 | self.decode_part(read, true) 574 | } 575 | 576 | fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) { 577 | const CHUNK_SIZE: usize = 1 << 12; 578 | let decoder = &mut self.decoder; 579 | let length = self.vector.len(); 580 | 581 | // Use the vector to do overflow checks and w/e. 582 | self.vector.reserve(CHUNK_SIZE); 583 | // FIXME: decoding into uninit buffer? 584 | self.vector.resize(length + CHUNK_SIZE, 0u8); 585 | 586 | (&mut self.vector[length..], decoder) 587 | } 588 | 589 | fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult { 590 | let mut result = VectorResult { 591 | consumed_in: 0, 592 | consumed_out: 0, 593 | status: Ok(LzwStatus::Ok), 594 | }; 595 | 596 | enum Progress { 597 | Ok, 598 | Done, 599 | } 600 | 601 | // Converting to mutable refs to move into the `once` closure. 602 | let read_bytes = &mut result.consumed_in; 603 | let write_bytes = &mut result.consumed_out; 604 | let mut data = part; 605 | 606 | // A 64 MB buffer is quite large but should get alloc_zeroed. 607 | // Note that the decoded size can be up to quadratic in code block. 608 | let once = move || { 609 | // Grab a new output buffer. 610 | let (outbuf, decoder) = self.grab_buffer(); 611 | 612 | // Decode as much of the buffer as fits. 613 | let result = decoder.decode_bytes(data, &mut outbuf[..]); 614 | // Do the bookkeeping and consume the buffer. 615 | *read_bytes += result.consumed_in; 616 | *write_bytes += result.consumed_out; 617 | data = &data[result.consumed_in..]; 618 | 619 | let unfilled = outbuf.len() - result.consumed_out; 620 | let filled = self.vector.len() - unfilled; 621 | self.vector.truncate(filled); 622 | 623 | // Handle the status in the result. 624 | match result.status { 625 | Err(err) => Err(err), 626 | Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode), 627 | Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done), 628 | Ok(LzwStatus::Ok) => Ok(Progress::Ok), 629 | } 630 | }; 631 | 632 | // Decode chunks of input data until we're done. 633 | let status: Result<(), _> = core::iter::repeat_with(once) 634 | // scan+fuse can be replaced with map_while 635 | .scan((), |(), result| match result { 636 | Ok(Progress::Ok) => Some(Ok(())), 637 | Err(err) => Some(Err(err)), 638 | Ok(Progress::Done) => None, 639 | }) 640 | .fuse() 641 | .collect(); 642 | 643 | if let Err(err) = status { 644 | result.status = Err(err); 645 | } 646 | 647 | result 648 | } 649 | } 650 | 651 | // This is implemented in a separate file, so that 1.34.2 does not parse it. Otherwise, it would 652 | // trip over the usage of await, which is a reserved keyword in that edition/version. It only 653 | // contains an impl block. 654 | #[cfg(feature = "async")] 655 | #[path = "decode_into_async.rs"] 656 | mod impl_decode_into_async; 657 | 658 | impl DecodeState { 659 | fn new(min_size: u8) -> Self { 660 | DecodeState { 661 | min_size, 662 | table: Table::new(), 663 | buffer: Buffer::new(), 664 | last: None, 665 | clear_code: 1 << min_size, 666 | end_code: (1 << min_size) + 1, 667 | next_code: (1 << min_size) + 2, 668 | has_ended: false, 669 | is_tiff: false, 670 | implicit_reset: true, 671 | code_buffer: CodeBuffer::new(min_size), 672 | constants: core::marker::PhantomData, 673 | } 674 | } 675 | 676 | fn init_tables(&mut self) { 677 | self.code_buffer.reset(self.min_size); 678 | self.next_code = (1 << self.min_size) + 2; 679 | self.table.init(self.min_size); 680 | } 681 | 682 | fn reset_tables(&mut self) { 683 | self.code_buffer.reset(self.min_size); 684 | self.next_code = (1 << self.min_size) + 2; 685 | self.table.clear(self.min_size); 686 | } 687 | } 688 | 689 | impl Stateful for DecodeState { 690 | fn has_ended(&self) -> bool { 691 | self.has_ended 692 | } 693 | 694 | fn restart(&mut self) { 695 | self.has_ended = false; 696 | } 697 | 698 | fn reset(&mut self) { 699 | self.table.init(self.min_size); 700 | self.next_code = (1 << self.min_size) + 2; 701 | self.buffer.read_mark = 0; 702 | self.buffer.write_mark = 0; 703 | self.last = None; 704 | self.restart(); 705 | self.code_buffer = CodeBuffer::new(self.min_size); 706 | } 707 | 708 | fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult { 709 | // Skip everything if there is nothing to do. 710 | if self.has_ended { 711 | return BufferResult { 712 | consumed_in: 0, 713 | consumed_out: 0, 714 | status: Ok(LzwStatus::Done), 715 | }; 716 | } 717 | 718 | // Rough description: 719 | // We will fill the output slice as much as possible until either there is no more symbols 720 | // to decode or an end code has been reached. This requires an internal buffer to hold a 721 | // potential tail of the word corresponding to the last symbol. This tail will then be 722 | // decoded first before continuing with the regular decoding. The same buffer is required 723 | // to persist some symbol state across calls. 724 | // 725 | // We store the words corresponding to code symbols in an index chain, bytewise, where we 726 | // push each decoded symbol. (TODO: wuffs shows some success with 8-byte units). This chain 727 | // is traversed for each symbol when it is decoded and bytes are placed directly into the 728 | // output slice. In the special case (new_code == next_code) we use an existing decoded 729 | // version that is present in either the out bytes of this call or in buffer to copy the 730 | // repeated prefix slice. 731 | // TODO: I played with a 'decoding cache' to remember the position of long symbols and 732 | // avoid traversing the chain, doing a copy of memory instead. It did however not lead to 733 | // a serious improvement. It's just unlikely to both have a long symbol and have that 734 | // repeated twice in the same output buffer. 735 | // 736 | // You will also find the (to my knowledge novel) concept of a _decoding burst_ which 737 | // gained some >~10% speedup in tests. This is motivated by wanting to use out-of-order 738 | // execution as much as possible and for this reason have the least possible stress on 739 | // branch prediction. Our decoding table already gives us a lookahead on symbol lengths but 740 | // only for re-used codes, not novel ones. This lookahead also makes the loop termination 741 | // when restoring each byte of the code word perfectly predictable! So a burst is a chunk 742 | // of code words which are all independent of each other, have known lengths _and_ are 743 | // guaranteed to fit into the out slice without requiring a buffer. One burst can be 744 | // decoded in an extremely tight loop. 745 | // 746 | // TODO: since words can be at most (1 << MAX_CODESIZE) = 4096 bytes long we could avoid 747 | // that intermediate buffer at the expense of not always filling the output buffer 748 | // completely. Alternatively we might follow its chain of precursor states twice. This may 749 | // be even cheaper if we store more than one byte per link so it really should be 750 | // evaluated. 751 | // TODO: if the caller was required to provide the previous last word we could also avoid 752 | // the buffer for cases where we need it to restore the next code! This could be built 753 | // backwards compatible by only doing it after an opt-in call that enables the behaviour. 754 | 755 | // Record initial lengths for the result that is returned. 756 | let o_in = inp.len(); 757 | let o_out = out.len(); 758 | 759 | // The code_link is the previously decoded symbol. 760 | // It's used to link the new code back to its predecessor. 761 | let mut code_link = None; 762 | // The status, which is written to on an invalid code. 763 | let mut status = Ok(LzwStatus::Ok); 764 | 765 | match self.last.take() { 766 | // No last state? This is the first code after a reset? 767 | None => { 768 | match self.next_symbol(&mut inp) { 769 | // Plainly invalid code. 770 | Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode), 771 | // next_code would require an actual predecessor. 772 | Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode), 773 | // No more symbols available and nothing decoded yet. 774 | // Assume that we didn't make progress, this may get reset to Done if we read 775 | // some bytes from the input. 776 | None => status = Ok(LzwStatus::NoProgress), 777 | // Handle a valid code. 778 | Some(init_code) => { 779 | if init_code == self.clear_code { 780 | self.init_tables(); 781 | } else if init_code == self.end_code { 782 | self.has_ended = true; 783 | status = Ok(LzwStatus::Done); 784 | } else if self.table.is_empty() { 785 | if self.implicit_reset { 786 | self.init_tables(); 787 | 788 | self.buffer.fill_reconstruct(&self.table, init_code); 789 | let link = self.table.at(init_code).clone(); 790 | code_link = Some(DerivationBase { 791 | code: init_code, 792 | first: link.first, 793 | }); 794 | } else { 795 | // We require an explicit reset. 796 | status = Err(LzwError::InvalidCode); 797 | } 798 | } else { 799 | // Reconstruct the first code in the buffer. 800 | self.buffer.fill_reconstruct(&self.table, init_code); 801 | let link = self.table.at(init_code).clone(); 802 | code_link = Some(DerivationBase { 803 | code: init_code, 804 | first: link.first, 805 | }); 806 | } 807 | } 808 | } 809 | } 810 | // Move the tracking state to the stack. 811 | Some(tup) => code_link = Some(tup), 812 | }; 813 | 814 | // Track an empty `burst` (see below) means we made no progress. 815 | let mut have_yet_to_decode_data = false; 816 | 817 | // Restore the previous state, if any. 818 | if code_link.is_some() { 819 | let remain = self.buffer.buffer(); 820 | // Check if we can fully finish the buffer. 821 | if remain.len() > out.len() { 822 | if out.is_empty() { 823 | // This also implies the buffer is _not_ empty and we will not enter any 824 | // decoding loop. 825 | status = Ok(LzwStatus::NoProgress); 826 | } else { 827 | out.copy_from_slice(&remain[..out.len()]); 828 | self.buffer.consume(out.len()); 829 | out = &mut []; 830 | } 831 | } else if remain.is_empty() { 832 | status = Ok(LzwStatus::NoProgress); 833 | have_yet_to_decode_data = true; 834 | } else { 835 | let consumed = remain.len(); 836 | out[..consumed].copy_from_slice(remain); 837 | self.buffer.consume(consumed); 838 | out = &mut out[consumed..]; 839 | have_yet_to_decode_data = false; 840 | } 841 | } 842 | 843 | // A special reference to out slice which holds the last decoded symbol. 844 | let mut last_decoded: Option<&[u8]> = None; 845 | 846 | if self.buffer.buffer().is_empty() { 847 | // Hot loop that writes data to the output as long as we can do so directly from the 848 | // input stream. As an invariant of this block we did not need to use the buffer to 849 | // store a decoded code word. Testing the condition ahead of time avoids a test in the 850 | // loop body since every code path where the buffer is filled already breaks. 851 | // 852 | // In a previous iteration of the code we trusted compiler optimization to work this 853 | // out but it seems that it does not. Another edit hidden behind some performance work 854 | // then edited out the check, inadvertently changing the behavior for callers that 855 | // relied on being able to provide an empty output buffer and still receiving a useful 856 | // signal about the state of the stream. 857 | 858 | // A burst is a sequence of code words that are independently decoded, i.e. they do not 859 | // change the state of the decoder in ways that would influence the interpretation of 860 | // each other. That is: they are not special symbols, they do not make us increase the 861 | // code size, they are each codes already in the tree before the burst. 862 | // 863 | // The tracking state for a burst. These are actually initialized later but compiler 864 | // wasn't smart enough to fully optimize out the init code so that appears outside the 865 | // loop. 866 | let mut burst = [0; BURST]; 867 | let mut burst_byte_len = [0u16; BURST]; 868 | let mut burst_byte = [0u8; BURST]; 869 | let mut target: [&mut [u8]; BURST] = Default::default(); 870 | 871 | loop { 872 | // In particular, we *also* break if the output buffer is still empty. Especially 873 | // when the output parameter was an empty slice, we must try to fetch at least one 874 | // code but with YIELD_ON_FULL we do not. 875 | if CgC::YIELD_ON_FULL && out.is_empty() { 876 | break; 877 | } 878 | 879 | let mut deriv = match code_link.take() { 880 | Some(link) => link, 881 | None => { 882 | // TODO: we do not need to break here. This does not indicate that the buffer 883 | // has been filled, rather it indicates we have reset the state. The next code 884 | // should be part of the initial alphabet. However the first code is special in 885 | // the sense of not creating a new code itself. This is handled correctly in 886 | // the initialization prior to the loop; and in particular that handling as 887 | // written currently relies on putting it into the buffer; so handling it we 888 | // would need to ensure that either the buffer is fully cleared after its use, 889 | // or use another implementation of handling that first code. 890 | break; 891 | } 892 | }; 893 | 894 | // Ensure the code buffer is full, we're about to request some codes. 895 | // Note that this also ensures at least one code is in the buffer if any input is left. 896 | self.refill_bits(&mut inp); 897 | let cnt = self.code_buffer.peek_bits(&mut burst); 898 | 899 | // No code left in the buffer, and no more bytes to refill the buffer. 900 | if cnt == 0 { 901 | if have_yet_to_decode_data { 902 | status = Ok(LzwStatus::NoProgress); 903 | } 904 | 905 | code_link = Some(deriv); 906 | break; 907 | } 908 | 909 | debug_assert!( 910 | // When the table is full, we have a max code above the size switch. 911 | self.table.inner.len() >= MAX_ENTRIES - usize::from(self.is_tiff) 912 | // When the code size is 2 we have a bit code: (0, 1, CLS, EOF). Then the 913 | // computed next_code is 4 which already exceeds the bit width from the start. 914 | // Then we will immediately switch code size after this code. 915 | // 916 | // TODO: this is the reason for some saturating and non-sharp comparisons in 917 | // the code below. Maybe it makes sense to revisit turning this into a compile 918 | // time choice? 919 | || (self.code_buffer.code_size() == 1 && self.next_code < 4) 920 | || (self.code_buffer.code_size() == 2 && self.next_code == 4) 921 | || self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code, 922 | "Table: {}, code_size: {}, next_code: {}, table_condition: {}", 923 | self.table.is_full(), 924 | self.code_buffer.code_size(), 925 | self.next_code, 926 | self.code_buffer.max_code() - Code::from(self.is_tiff), 927 | ); 928 | 929 | let mut burst_size = 0; 930 | let size_switch_at = self.code_buffer.max_code() - Code::from(self.is_tiff); 931 | // This is intended to wrap. As by the debug assert above, we keep the next 932 | // code bounded by the current size's max code where we switch code size. 933 | // Except in case the table is full then we actually want to allow decoding 934 | // of an arbitrary count of non-resetting symbols. 935 | let left_before_size_switch = size_switch_at.wrapping_sub(self.next_code); 936 | 937 | // A burst is a sequence of decodes that are completely independent of each other. This 938 | // is the case if neither is an end code, a clear code, or a next code, i.e. we have 939 | // all of them in the decoding table and thus known their depths, and additionally if 940 | // we can decode them directly into the output buffer. 941 | for b in &burst[..cnt] { 942 | // We can commit the previous burst code, and will take a slice from the output 943 | // buffer. This also avoids the bounds check in the tight loop later. 944 | if burst_size > 0 { 945 | let len = burst_byte_len[burst_size - 1]; 946 | let (into, tail) = out.split_at_mut(usize::from(len)); 947 | target[burst_size - 1] = into; 948 | out = tail; 949 | } 950 | 951 | // Check that we don't overflow the code size with all codes we burst decode. 952 | burst_size += 1; 953 | 954 | if burst_size > usize::from(left_before_size_switch) { 955 | break; 956 | } 957 | 958 | let read_code = *b; 959 | 960 | // A burst code can't be special. 961 | if read_code == self.clear_code 962 | || read_code == self.end_code 963 | || read_code >= self.next_code 964 | { 965 | break; 966 | } 967 | 968 | // Read the code length and check that we can decode directly into the out slice. 969 | let len = self.table.depths[usize::from(read_code)]; 970 | 971 | if out.len() < usize::from(len) { 972 | break; 973 | } 974 | 975 | // We do exactly one more code (the one being inspected in the current iteration) 976 | // after the 'burst'. When we want to break decoding precisely on the supplied 977 | // buffer, we check if this is the last code to be decoded into it. 978 | if CgC::YIELD_ON_FULL { 979 | if out.len() == usize::from(len) { 980 | break; 981 | } 982 | } 983 | 984 | burst_byte_len[burst_size - 1] = len; 985 | } 986 | 987 | self.code_buffer.consume_bits(burst_size as u8); 988 | have_yet_to_decode_data = false; 989 | 990 | // Note that the very last code in the burst buffer doesn't actually belong to the 991 | // burst itself. TODO: sometimes it could, we just don't differentiate between the 992 | // breaks and a loop end condition above. That may be a speed advantage? 993 | let (&new_code, burst) = burst[..burst_size].split_last().unwrap(); 994 | 995 | // The very tight loop for restoring the actual burst. These can be reconstructed in 996 | // parallel since none of them depend on a prior constructed. Only the derivation of 997 | // new codes is not parallel. There are no size changes here either. 998 | let burst_targets = &mut target[..burst_size - 1]; 999 | 1000 | if !self.table.is_full() { 1001 | self.next_code += burst_targets.len() as u16; 1002 | } 1003 | 1004 | for ((&burst, target), byte) in 1005 | burst.iter().zip(&mut *burst_targets).zip(&mut burst_byte) 1006 | { 1007 | *byte = self.table.reconstruct(burst, target); 1008 | } 1009 | 1010 | self.table.derive_burst(&mut deriv, burst, &burst_byte[..]); 1011 | 1012 | // Now handle the special codes. 1013 | if new_code == self.clear_code { 1014 | self.reset_tables(); 1015 | last_decoded = None; 1016 | // Restarts in the next call to the entry point. 1017 | break; 1018 | } 1019 | 1020 | if new_code == self.end_code { 1021 | self.has_ended = true; 1022 | status = Ok(LzwStatus::Done); 1023 | last_decoded = None; 1024 | break; 1025 | } 1026 | 1027 | if new_code > self.next_code { 1028 | status = Err(LzwError::InvalidCode); 1029 | last_decoded = None; 1030 | break; 1031 | } 1032 | 1033 | let required_len = if new_code == self.next_code { 1034 | self.table.depths[usize::from(deriv.code)] + 1 1035 | } else { 1036 | self.table.depths[usize::from(new_code)] 1037 | }; 1038 | 1039 | // We need the decoded data of the new code if it is the `next_code`. This is the 1040 | // special case of LZW decoding that is demonstrated by `banana` (or form cScSc). In 1041 | // all other cases we only need the first character of the decoded data. 1042 | let have_next_code = new_code == self.next_code; 1043 | 1044 | // Update the slice holding the last decoded word. 1045 | if have_next_code { 1046 | // If we did not have any burst code, we still hold that slice in the buffer. 1047 | if let Some(new_last) = target[..burst_size - 1].last_mut() { 1048 | let slice = core::mem::replace(new_last, &mut []); 1049 | last_decoded = Some(&*slice); 1050 | } 1051 | } 1052 | 1053 | let cha; 1054 | let is_in_buffer = usize::from(required_len) > out.len(); 1055 | // Check if we will need to store our current state into the buffer. 1056 | if is_in_buffer { 1057 | if have_next_code { 1058 | // last_decoded will be Some if we have restored any code into the out slice. 1059 | // Otherwise it will still be present in the buffer. 1060 | if let Some(last) = last_decoded.take() { 1061 | self.buffer.bytes[..last.len()].copy_from_slice(last); 1062 | self.buffer.write_mark = last.len(); 1063 | self.buffer.read_mark = last.len(); 1064 | } 1065 | 1066 | cha = self.buffer.fill_cscsc(); 1067 | } else { 1068 | // Restore the decoded word into the buffer. 1069 | last_decoded = None; 1070 | cha = self.buffer.fill_reconstruct(&self.table, new_code); 1071 | } 1072 | } else { 1073 | let (target, tail) = out.split_at_mut(usize::from(required_len)); 1074 | out = tail; 1075 | 1076 | if have_next_code { 1077 | // Reconstruct high. 1078 | let source = match last_decoded.take() { 1079 | Some(last) => last, 1080 | None => &self.buffer.bytes[..self.buffer.write_mark], 1081 | }; 1082 | 1083 | // We don't *actually* expect the unwrap to happen. Each source is at least 1 1084 | // byte long. But llvm doesn't know this (too much indirect loads and cases). 1085 | cha = source.get(0).map(|x| *x).unwrap_or(0); 1086 | target[..source.len()].copy_from_slice(source); 1087 | target[source.len()..][0] = cha; 1088 | } else { 1089 | cha = self.table.reconstruct(new_code, target); 1090 | } 1091 | 1092 | // A new decoded word. 1093 | last_decoded = Some(target); 1094 | } 1095 | 1096 | // Each newly read code creates one new code/link based on the preceding code if we 1097 | // have enough space to put it there. 1098 | if !self.table.is_full() { 1099 | self.table.derive(&deriv, cha); 1100 | 1101 | if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff) 1102 | && self.code_buffer.code_size() < MAX_CODESIZE 1103 | { 1104 | self.bump_code_size(); 1105 | } 1106 | 1107 | self.next_code += 1; 1108 | } 1109 | 1110 | // store the information on the decoded word. 1111 | code_link = Some(DerivationBase { 1112 | code: new_code, 1113 | first: cha, 1114 | }); 1115 | 1116 | // Can't make any more progress with decoding. 1117 | // 1118 | // We have more data buffered but not enough space to put it? We want fetch a next 1119 | // symbol if possible as in the case of it being a new symbol we can refer to the 1120 | // buffered output as the source for that symbol's meaning and do a memcpy. 1121 | // 1122 | // Since this test is after decoding at least one code, we can now check for an 1123 | // empty buffer and still guarantee progress when one was passed as a parameter. 1124 | if is_in_buffer || out.is_empty() { 1125 | break; 1126 | } 1127 | } 1128 | } 1129 | 1130 | // We need to store the last word into the buffer in case the first code in the next 1131 | // iteration is the next_code. 1132 | if let Some(tail) = last_decoded { 1133 | self.buffer.bytes[..tail.len()].copy_from_slice(tail); 1134 | self.buffer.write_mark = tail.len(); 1135 | // Mark the full buffer as having been consumed. 1136 | self.buffer.read_mark = tail.len(); 1137 | } 1138 | 1139 | // Ensure we don't indicate that no progress was made if we read some bytes from the input 1140 | // (which is progress). 1141 | if o_in > inp.len() { 1142 | if let Ok(LzwStatus::NoProgress) = status { 1143 | status = Ok(LzwStatus::Ok); 1144 | } 1145 | } 1146 | 1147 | // Store the code/link state. 1148 | self.last = code_link; 1149 | 1150 | BufferResult { 1151 | consumed_in: o_in.wrapping_sub(inp.len()), 1152 | consumed_out: o_out.wrapping_sub(out.len()), 1153 | status, 1154 | } 1155 | } 1156 | } 1157 | 1158 | impl DecodeState { 1159 | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option { 1160 | self.code_buffer.next_symbol(inp) 1161 | } 1162 | 1163 | fn bump_code_size(&mut self) { 1164 | self.code_buffer.bump_code_size() 1165 | } 1166 | 1167 | fn refill_bits(&mut self, inp: &mut &[u8]) { 1168 | self.code_buffer.refill_bits(inp) 1169 | } 1170 | } 1171 | 1172 | impl CodeBuffer for MsbBuffer { 1173 | fn new(min_size: u8) -> Self { 1174 | MsbBuffer { 1175 | code_size: min_size + 1, 1176 | code_mask: (1u16 << (min_size + 1)) - 1, 1177 | bit_buffer: 0, 1178 | bits: 0, 1179 | } 1180 | } 1181 | 1182 | fn reset(&mut self, min_size: u8) { 1183 | self.code_size = min_size + 1; 1184 | self.code_mask = (1 << self.code_size) - 1; 1185 | } 1186 | 1187 | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option { 1188 | if self.bits < self.code_size { 1189 | self.refill_bits(inp); 1190 | } 1191 | 1192 | if self.bits < self.code_size { 1193 | return None; 1194 | } 1195 | 1196 | let mask = u64::from(self.code_mask); 1197 | let rotbuf = self.bit_buffer.rotate_left(self.code_size.into()); 1198 | self.bit_buffer = rotbuf & !mask; 1199 | self.bits -= self.code_size; 1200 | Some((rotbuf & mask) as u16) 1201 | } 1202 | 1203 | fn bump_code_size(&mut self) { 1204 | self.code_size += 1; 1205 | self.code_mask = (self.code_mask << 1) | 1; 1206 | } 1207 | 1208 | fn refill_bits(&mut self, inp: &mut &[u8]) { 1209 | let wish_count = (64 - self.bits) / 8; 1210 | let mut buffer = [0u8; 8]; 1211 | let new_bits = match inp.get(..usize::from(wish_count)) { 1212 | Some(bytes) => { 1213 | buffer[..usize::from(wish_count)].copy_from_slice(bytes); 1214 | *inp = &inp[usize::from(wish_count)..]; 1215 | wish_count * 8 1216 | } 1217 | None => { 1218 | let new_bits = inp.len() * 8; 1219 | buffer[..inp.len()].copy_from_slice(inp); 1220 | *inp = &[]; 1221 | new_bits as u8 1222 | } 1223 | }; 1224 | self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits; 1225 | self.bits += new_bits; 1226 | } 1227 | 1228 | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize { 1229 | let mut bit_buffer = self.bit_buffer; 1230 | let mask = u64::from(self.code_mask); 1231 | let mut consumed = 0; 1232 | let mut cnt = 0; 1233 | 1234 | for b in code { 1235 | let consumed_after = consumed + self.code_size; 1236 | if consumed_after > self.bits { 1237 | break; 1238 | } 1239 | 1240 | cnt += 1; 1241 | consumed = consumed_after; 1242 | 1243 | let rotbuf = bit_buffer.rotate_left(self.code_size.into()); 1244 | *b = (rotbuf & mask) as u16; 1245 | // The read bits are 'appended' but we never interpret those appended bits. 1246 | bit_buffer = rotbuf; 1247 | } 1248 | 1249 | cnt 1250 | } 1251 | 1252 | fn consume_bits(&mut self, code_cnt: u8) { 1253 | let bits = self.code_size * code_cnt; 1254 | debug_assert!(bits <= self.bits); 1255 | 1256 | if bits >= self.bits { 1257 | self.bit_buffer = 0; 1258 | } else { 1259 | // bits < self.bits so this must be smaller than the number size. 1260 | self.bit_buffer = self.bit_buffer << bits; 1261 | } 1262 | 1263 | self.bits = self.bits.wrapping_sub(bits); 1264 | } 1265 | 1266 | fn max_code(&self) -> Code { 1267 | self.code_mask 1268 | } 1269 | 1270 | fn code_size(&self) -> u8 { 1271 | self.code_size 1272 | } 1273 | } 1274 | 1275 | impl CodeBuffer for LsbBuffer { 1276 | fn new(min_size: u8) -> Self { 1277 | LsbBuffer { 1278 | code_size: min_size + 1, 1279 | code_mask: (1u16 << (min_size + 1)) - 1, 1280 | bit_buffer: 0, 1281 | bits: 0, 1282 | } 1283 | } 1284 | 1285 | fn reset(&mut self, min_size: u8) { 1286 | self.code_size = min_size + 1; 1287 | self.code_mask = (1 << self.code_size) - 1; 1288 | } 1289 | 1290 | fn next_symbol(&mut self, inp: &mut &[u8]) -> Option { 1291 | if self.bits < self.code_size { 1292 | self.refill_bits(inp); 1293 | } 1294 | 1295 | if self.bits < self.code_size { 1296 | return None; 1297 | } 1298 | 1299 | let mask = u64::from(self.code_mask); 1300 | let code = self.bit_buffer & mask; 1301 | self.bit_buffer >>= self.code_size; 1302 | self.bits -= self.code_size; 1303 | Some(code as u16) 1304 | } 1305 | 1306 | fn bump_code_size(&mut self) { 1307 | self.code_size += 1; 1308 | self.code_mask = (self.code_mask << 1) | 1; 1309 | } 1310 | 1311 | fn refill_bits(&mut self, inp: &mut &[u8]) { 1312 | let wish_count = (64 - self.bits) / 8; 1313 | let mut buffer = [0u8; 8]; 1314 | let new_bits = match inp.get(..usize::from(wish_count)) { 1315 | Some(bytes) => { 1316 | buffer[..usize::from(wish_count)].copy_from_slice(bytes); 1317 | *inp = &inp[usize::from(wish_count)..]; 1318 | wish_count * 8 1319 | } 1320 | None => { 1321 | let new_bits = inp.len() * 8; 1322 | buffer[..inp.len()].copy_from_slice(inp); 1323 | *inp = &[]; 1324 | new_bits as u8 1325 | } 1326 | }; 1327 | self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits; 1328 | self.bits += new_bits; 1329 | } 1330 | 1331 | fn peek_bits(&self, code: &mut [Code; BURST]) -> usize { 1332 | let mut bit_buffer = self.bit_buffer; 1333 | let mask = u64::from(self.code_mask); 1334 | let mut consumed = 0; 1335 | let mut cnt = 0; 1336 | 1337 | for b in code { 1338 | let consumed_after = consumed + self.code_size; 1339 | if consumed_after > self.bits { 1340 | break; 1341 | } 1342 | 1343 | cnt += 1; 1344 | consumed = consumed_after; 1345 | 1346 | *b = (bit_buffer & mask) as u16; 1347 | bit_buffer = bit_buffer >> self.code_size; 1348 | } 1349 | 1350 | cnt 1351 | } 1352 | 1353 | fn consume_bits(&mut self, code_cnt: u8) { 1354 | let bits = self.code_size * code_cnt; 1355 | debug_assert!(bits <= self.bits); 1356 | 1357 | if bits >= self.bits { 1358 | self.bit_buffer = 0; 1359 | } else { 1360 | // bits < self.bits so this must be smaller than the number size. 1361 | self.bit_buffer = self.bit_buffer >> bits; 1362 | } 1363 | 1364 | self.bits = self.bits.wrapping_sub(bits); 1365 | } 1366 | 1367 | fn max_code(&self) -> Code { 1368 | self.code_mask 1369 | } 1370 | 1371 | fn code_size(&self) -> u8 { 1372 | self.code_size 1373 | } 1374 | } 1375 | 1376 | impl Buffer { 1377 | fn new() -> Self { 1378 | Buffer { 1379 | bytes: vec![0; MAX_ENTRIES].into_boxed_slice(), 1380 | read_mark: 0, 1381 | write_mark: 0, 1382 | } 1383 | } 1384 | 1385 | /// When encoding a sequence `cScSc` where `c` is any character and `S` is any string 1386 | /// this results in two codes `AB`, `A` encoding `cS` and `B` encoding `cSc`. Supposing 1387 | /// the buffer is already filled with the reconstruction of `A`, we can easily fill it 1388 | /// with the reconstruction of `B`. 1389 | fn fill_cscsc(&mut self) -> u8 { 1390 | self.bytes[self.write_mark] = self.bytes[0]; 1391 | self.write_mark += 1; 1392 | self.read_mark = 0; 1393 | self.bytes[0] 1394 | } 1395 | 1396 | // Fill the buffer by decoding from the table 1397 | fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 { 1398 | self.write_mark = 0; 1399 | self.read_mark = 0; 1400 | let depth = table.depths[usize::from(code)]; 1401 | let mut memory = core::mem::replace(&mut self.bytes, Box::default()); 1402 | 1403 | let out = &mut memory[..usize::from(depth)]; 1404 | let last = table.reconstruct(code, out); 1405 | 1406 | self.bytes = memory; 1407 | self.write_mark = usize::from(depth); 1408 | last 1409 | } 1410 | 1411 | fn buffer(&self) -> &[u8] { 1412 | &self.bytes[self.read_mark..self.write_mark] 1413 | } 1414 | 1415 | fn consume(&mut self, amt: usize) { 1416 | self.read_mark += amt; 1417 | } 1418 | } 1419 | 1420 | impl Table { 1421 | fn new() -> Self { 1422 | Table { 1423 | inner: Vec::with_capacity(MAX_ENTRIES), 1424 | depths: Vec::with_capacity(MAX_ENTRIES), 1425 | } 1426 | } 1427 | 1428 | fn clear(&mut self, min_size: u8) { 1429 | let static_count = usize::from(1u16 << u16::from(min_size)) + 2; 1430 | self.inner.truncate(static_count); 1431 | self.depths.truncate(static_count); 1432 | } 1433 | 1434 | fn init(&mut self, min_size: u8) { 1435 | self.inner.clear(); 1436 | self.depths.clear(); 1437 | for i in 0..(1u16 << u16::from(min_size)) { 1438 | self.inner.push(Link::base(i as u8)); 1439 | self.depths.push(1); 1440 | } 1441 | // Clear code. 1442 | self.inner.push(Link::base(0)); 1443 | self.depths.push(0); 1444 | // End code. 1445 | self.inner.push(Link::base(0)); 1446 | self.depths.push(0); 1447 | } 1448 | 1449 | fn at(&self, code: Code) -> &Link { 1450 | &self.inner[usize::from(code)] 1451 | } 1452 | 1453 | fn is_empty(&self) -> bool { 1454 | self.inner.is_empty() 1455 | } 1456 | 1457 | fn is_full(&self) -> bool { 1458 | self.inner.len() >= MAX_ENTRIES 1459 | } 1460 | 1461 | fn derive(&mut self, from: &DerivationBase, byte: u8) { 1462 | let link = from.derive(byte); 1463 | let depth = self.depths[usize::from(from.code)] + 1; 1464 | self.inner.push(link); 1465 | self.depths.push(depth); 1466 | } 1467 | 1468 | // Derive multiple codes in a row, where each base is guaranteed to already exist. 1469 | fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) { 1470 | let mut depth_of = from.code; 1471 | // Note that false data dependency we want to get rid of! 1472 | // TODO: this pushes into a Vec, maybe we can make this cleaner. 1473 | for &code in burst { 1474 | let depth = self.depths[usize::from(depth_of)] + 1; 1475 | self.depths.push(depth); 1476 | depth_of = code; 1477 | } 1478 | 1479 | // Llvm tends to be flaky with code layout for the case of requiring an allocation. It's 1480 | // not clear if that can occur in practice but it relies on iterator size hint.. 1481 | let extensions = burst.iter().zip(first); 1482 | self.inner.extend(extensions.map(|(&code, &first)| { 1483 | let link = from.derive(first); 1484 | from.code = code; 1485 | from.first = first; 1486 | link 1487 | })); 1488 | } 1489 | 1490 | fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 { 1491 | let mut code_iter = code; 1492 | let table = &self.inner[..=usize::from(code)]; 1493 | let first = table[usize::from(code)].first; 1494 | 1495 | let len = code_iter; 1496 | for ch in out.iter_mut().rev() { 1497 | //(code, cha) = self.table[k as usize]; 1498 | // Note: This could possibly be replaced with an unchecked array access if 1499 | // - value is asserted to be < self.next_code() in push 1500 | // - min_size is asserted to be < MAX_CODESIZE 1501 | let entry = &table[usize::from(code_iter)]; 1502 | code_iter = core::cmp::min(len, entry.prev); 1503 | *ch = entry.byte; 1504 | } 1505 | 1506 | first 1507 | } 1508 | } 1509 | 1510 | impl Link { 1511 | fn base(byte: u8) -> Self { 1512 | Link { 1513 | prev: 0, 1514 | byte, 1515 | first: byte, 1516 | } 1517 | } 1518 | } 1519 | 1520 | impl DerivationBase { 1521 | // TODO: this has self type to make it clear we might depend on the old in a future 1522 | // optimization. However, that has no practical purpose right now. 1523 | fn derive(&self, byte: u8) -> Link { 1524 | Link { 1525 | prev: self.code, 1526 | byte, 1527 | first: self.first, 1528 | } 1529 | } 1530 | } 1531 | 1532 | #[cfg(test)] 1533 | mod tests { 1534 | use crate::alloc::vec::Vec; 1535 | #[cfg(feature = "std")] 1536 | use crate::StreamBuf; 1537 | use crate::{decode::Decoder, BitOrder}; 1538 | 1539 | #[test] 1540 | fn invalid_code_size_low() { 1541 | let _ = Decoder::new(BitOrder::Msb, 0); 1542 | let _ = Decoder::new(BitOrder::Msb, 1); 1543 | } 1544 | 1545 | #[test] 1546 | #[should_panic] 1547 | fn invalid_code_size_high() { 1548 | let _ = Decoder::new(BitOrder::Msb, 14); 1549 | } 1550 | 1551 | fn make_encoded() -> Vec { 1552 | const FILE: &'static [u8] = include_bytes!(concat!( 1553 | env!("CARGO_MANIFEST_DIR"), 1554 | "/benches/binary-8-msb.lzw" 1555 | )); 1556 | return Vec::from(FILE); 1557 | } 1558 | 1559 | #[test] 1560 | #[cfg(feature = "std")] 1561 | fn into_stream_buffer_no_alloc() { 1562 | let encoded = make_encoded(); 1563 | let mut decoder = Decoder::new(BitOrder::Msb, 8); 1564 | 1565 | let mut output = vec![]; 1566 | let mut buffer = [0; 512]; 1567 | let mut istream = decoder.into_stream(&mut output); 1568 | istream.set_buffer(&mut buffer[..]); 1569 | istream.decode(&encoded[..]).status.unwrap(); 1570 | 1571 | match istream.buffer { 1572 | Some(StreamBuf::Borrowed(_)) => {} 1573 | None => panic!("Decoded without buffer??"), 1574 | Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"), 1575 | } 1576 | } 1577 | 1578 | #[test] 1579 | #[cfg(feature = "std")] 1580 | fn into_stream_buffer_small_alloc() { 1581 | struct WriteTap(W); 1582 | const BUF_SIZE: usize = 512; 1583 | 1584 | impl std::io::Write for WriteTap { 1585 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 1586 | assert!(buf.len() <= BUF_SIZE); 1587 | self.0.write(buf) 1588 | } 1589 | fn flush(&mut self) -> std::io::Result<()> { 1590 | self.0.flush() 1591 | } 1592 | } 1593 | 1594 | let encoded = make_encoded(); 1595 | let mut decoder = Decoder::new(BitOrder::Msb, 8); 1596 | 1597 | let mut output = vec![]; 1598 | let mut istream = decoder.into_stream(WriteTap(&mut output)); 1599 | istream.set_buffer_size(512); 1600 | istream.decode(&encoded[..]).status.unwrap(); 1601 | 1602 | match istream.buffer { 1603 | Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE), 1604 | Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"), 1605 | None => panic!("Decoded without buffer??"), 1606 | } 1607 | } 1608 | 1609 | #[test] 1610 | #[cfg(feature = "std")] 1611 | fn reset() { 1612 | let encoded = make_encoded(); 1613 | let mut decoder = Decoder::new(BitOrder::Msb, 8); 1614 | let mut reference = None; 1615 | 1616 | for _ in 0..2 { 1617 | let mut output = vec![]; 1618 | let mut buffer = [0; 512]; 1619 | let mut istream = decoder.into_stream(&mut output); 1620 | istream.set_buffer(&mut buffer[..]); 1621 | istream.decode_all(&encoded[..]).status.unwrap(); 1622 | 1623 | decoder.reset(); 1624 | if let Some(reference) = &reference { 1625 | assert_eq!(output, *reference); 1626 | } else { 1627 | reference = Some(output); 1628 | } 1629 | } 1630 | } 1631 | } 1632 | --------------------------------------------------------------------------------