├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── linux.yml │ ├── macos.yml │ ├── wasm.yml │ └── windows.yml ├── .gitignore ├── .gitmodules ├── Cargo.toml ├── LICENSE ├── Readme.md ├── assets ├── example.txt ├── example.txt.v5.zst ├── example.txt.v6.zst ├── example.txt.v7.zst ├── example.txt.v8.zst └── example.txt.zst ├── examples ├── basic.rs ├── benchmark.rs ├── stream.rs ├── train.rs ├── zstd.rs └── zstdcat.rs ├── rustfmt.toml ├── src ├── bulk │ ├── compressor.rs │ ├── decompressor.rs │ ├── mod.rs │ └── tests.rs ├── dict.rs ├── lib.rs └── stream │ ├── functions.rs │ ├── mod.rs │ ├── raw.rs │ ├── read │ ├── mod.rs │ └── tests.rs │ ├── tests.rs │ ├── write │ ├── mod.rs │ └── tests.rs │ └── zio │ ├── mod.rs │ ├── reader.rs │ └── writer.rs ├── tests └── issue_182.rs └── zstd-safe ├── Cargo.toml ├── LICENSE ├── LICENSE.Apache-2.0 ├── LICENSE.Mit ├── Readme.md ├── build.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── zstd_fuzzer.rs ├── src ├── constants.rs ├── constants_experimental.rs ├── constants_seekable.rs ├── lib.rs ├── seekable.rs └── tests.rs ├── update_consts.sh └── zstd-sys ├── Cargo.toml ├── LICENSE ├── LICENSE.Apache-2.0 ├── LICENSE.BSD-3-Clause ├── LICENSE.Mit ├── Readme.md ├── build.rs ├── examples └── it_work.rs ├── src ├── bindings_zdict.rs ├── bindings_zdict_experimental.rs ├── bindings_zdict_std_experimental.rs ├── bindings_zstd.rs ├── bindings_zstd_experimental.rs ├── bindings_zstd_seekable.rs ├── bindings_zstd_std_experimental.rs ├── lib.rs └── wasm_shim.rs ├── test_it.sh ├── update_bindings.sh ├── update_zstd.sh ├── wasm-shim ├── assert.h ├── stdio.h ├── stdlib.h ├── string.h └── time.h ├── zdict.h ├── zstd.h └── zstd_seekable.h /.gitattributes: -------------------------------------------------------------------------------- 1 | /assets/* -text -crlf 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Dependabot dependency version checks / updates 2 | 3 | version: 2 4 | updates: 5 | - package-ecosystem: "github-actions" 6 | # Workflow files stored in the 7 | # default location of `.github/workflows` 8 | directory: "/" 9 | schedule: 10 | interval: "daily" 11 | - package-ecosystem: "cargo" 12 | directory: "/" 13 | schedule: 14 | interval: "daily" 15 | - package-ecosystem: "cargo" 16 | directory: "/zstd-safe" 17 | schedule: 18 | interval: "daily" 19 | - package-ecosystem: "cargo" 20 | directory: "/zstd-safe/zstd-sys" 21 | schedule: 22 | interval: "daily" 23 | -------------------------------------------------------------------------------- /.github/workflows/linux.yml: -------------------------------------------------------------------------------- 1 | name: Linux 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: self-hosted 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | submodules: recursive 21 | - name: Build 22 | run: cargo build --verbose 23 | - name: Run tests 24 | run: cargo test --verbose 25 | 26 | - name: Build with feature thin 27 | run: cargo build --verbose --features thin 28 | - name: Run tests 29 | run: cargo test --verbose --features thin 30 | 31 | - name: Build zstd-safe with feature seekable 32 | run: cargo build --manifest-path zstd-safe/Cargo.toml --verbose --features seekable 33 | - name: Run zstd-safe tests with feature seekable 34 | run: cargo test --manifest-path zstd-safe/Cargo.toml --verbose --features seekable 35 | - name: Build zstd-safe with features std and seekable 36 | run: cargo build --manifest-path zstd-safe/Cargo.toml --verbose --features std,seekable 37 | - name: Run zstd-safe tests with features std and seekable 38 | run: cargo test --manifest-path zstd-safe/Cargo.toml --verbose --features std,seekable 39 | -------------------------------------------------------------------------------- /.github/workflows/macos.yml: -------------------------------------------------------------------------------- 1 | name: macOS 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: macos-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | submodules: recursive 21 | - name: Build 22 | run: cargo build --verbose 23 | - name: Run tests 24 | run: cargo test --verbose 25 | 26 | -------------------------------------------------------------------------------- /.github/workflows/wasm.yml: -------------------------------------------------------------------------------- 1 | name: Wasm 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: self-hosted 16 | 17 | steps: 18 | - name: Wasm target 19 | run: rustup target add wasm32-unknown-unknown 20 | - uses: actions/checkout@v4 21 | with: 22 | submodules: recursive 23 | 24 | - name: Build 25 | run: cargo build --verbose --target wasm32-unknown-unknown 26 | - name: Build with feature thin 27 | run: cargo build --verbose --features thin --target wasm32-unknown-unknown 28 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | strategy: 16 | matrix: 17 | target: 18 | #- i686-pc-windows-gnu 19 | - i686-pc-windows-msvc 20 | #- x86_64-pc-windows-gnu 21 | - x86_64-pc-windows-msvc 22 | channel: [ stable ] 23 | 24 | runs-on: windows-latest 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | with: 29 | submodules: recursive 30 | - name: setup 31 | uses: dtolnay/rust-toolchain@master 32 | with: 33 | toolchain: ${{ matrix.channel }}-${{ matrix.target }} 34 | targets: ${{ matrix.target }} 35 | 36 | - name: Add mingw32 to path for i686-gnu 37 | run: | 38 | echo "C:\msys64\mingw32\bin" >> $GITHUB_PATH 39 | echo "C:\msys64\usr\bin" >> $GITHUB_PATH 40 | if: matrix.target == 'i686-pc-windows-gnu' 41 | shell: bash 42 | - name: Add mingw64 to path for x86_64-gnu 43 | run: | 44 | echo "C:\msys64\mingw64\bin" >> $GITHUB_PATH 45 | echo "C:\msys64\usr\bin" >> $GITHUB_PATH 46 | if: matrix.target == 'x86_64-pc-windows-gnu' 47 | shell: bash 48 | - name: Update gcc 49 | if: matrix.target == 'x86_64-pc-windows-gnu' 50 | run: pacman.exe -Sy --noconfirm mingw-w64-x86_64-toolchain 51 | - name: Update gcc 52 | if: matrix.target == 'i686-pc-windows-gnu' 53 | run: pacman.exe -Sy --noconfirm mingw-w64-i686-toolchain 54 | 55 | - name: Build 56 | run: cargo build --verbose --verbose 57 | - name: Run tests 58 | run: cargo test --verbose 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | /silesia 4 | /silesia.zip 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "zstd-safe/zstd-sys/zstd"] 2 | path = zstd-safe/zstd-sys/zstd 3 | url = https://github.com/facebook/zstd 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Alexandre Bury "] 3 | description = "Binding for the zstd compression library." 4 | documentation = "https://docs.rs/zstd" 5 | keywords = ["zstd", "zstandard", "compression"] 6 | categories = ["compression", "api-bindings"] 7 | license = "MIT" 8 | name = "zstd" 9 | repository = "https://github.com/gyscos/zstd-rs" 10 | version = "0.13.3" 11 | exclude = ["assets/*.zst", "/.github"] 12 | readme = "Readme.md" 13 | edition = "2018" 14 | rust-version = "1.64" 15 | 16 | [package.metadata.docs.rs] 17 | features = ["experimental", "zstdmt", "zdict_builder", "doc-cfg"] 18 | 19 | [badges] 20 | travis-ci = { repository = "gyscos/zstd-rs" } 21 | 22 | [dependencies] 23 | zstd-safe = { path = "zstd-safe", version = "7.1.0", default-features = false, features = ["std"] } 24 | 25 | [dev-dependencies] 26 | clap = {version = "4.0", features=["derive"]} 27 | humansize = "2.0" 28 | partial-io = "0.5" 29 | walkdir = "2.2" 30 | 31 | [features] 32 | default = ["legacy", "arrays", "zdict_builder"] 33 | 34 | bindgen = ["zstd-safe/bindgen"] 35 | debug = ["zstd-safe/debug"] 36 | legacy = ["zstd-safe/legacy"] 37 | pkg-config = ["zstd-safe/pkg-config"] 38 | wasm = [] 39 | zstdmt = ["zstd-safe/zstdmt"] 40 | experimental = ["zstd-safe/experimental"] 41 | thin = ["zstd-safe/thin"] 42 | arrays = ["zstd-safe/arrays"] 43 | no_asm = ["zstd-safe/no_asm"] 44 | doc-cfg = [] 45 | zdict_builder = ["zstd-safe/zdict_builder"] 46 | 47 | # These two are for cross-language LTO. 48 | # Will only work if `clang` is used to build the C library. 49 | fat-lto = ["zstd-safe/fat-lto"] 50 | thin-lto = ["zstd-safe/thin-lto"] 51 | 52 | [[example]] 53 | name = "train" 54 | required-features = ["zdict_builder"] 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Alexandre Bury 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # zstd 2 | 3 | [![crates.io](https://img.shields.io/crates/v/zstd.svg)](https://crates.io/crates/zstd) 4 | [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) 5 | 6 | [![Build on Linux](https://github.com/gyscos/zstd-rs/actions/workflows/linux.yml/badge.svg)](https://github.com/gyscos/zstd-rs/actions/workflows/linux.yml) 7 | [![Build on Windows](https://github.com/gyscos/zstd-rs/actions/workflows/windows.yml/badge.svg)](https://github.com/gyscos/zstd-rs/actions/workflows/windows.yml) 8 | [![Build on macOS](https://github.com/gyscos/zstd-rs/actions/workflows/macos.yml/badge.svg)](https://github.com/gyscos/zstd-rs/actions/workflows/macos.yml) 9 | [![Build on wasm](https://github.com/gyscos/zstd-rs/actions/workflows/wasm.yml/badge.svg)](https://github.com/gyscos/zstd-rs/actions/workflows/wasm.yml) 10 | 11 | 12 | This library is a rust binding for the [zstd compression library][zstd]. 13 | 14 | # [Documentation][doc] 15 | 16 | ## 1 - Add to `cargo.toml` 17 | 18 | ```bash 19 | $ cargo add zstd 20 | ``` 21 | 22 | ```toml 23 | # Cargo.toml 24 | 25 | [dependencies] 26 | zstd = "0.13" 27 | ``` 28 | 29 | ## 2 - Usage 30 | 31 | This library provides `Read` and `Write` wrappers to handle (de)compression, 32 | along with convenience functions to made common tasks easier. 33 | 34 | For instance, `stream::copy_encode` and `stream::copy_decode` are easy-to-use 35 | wrappers around `std::io::copy`. Check the [stream] example: 36 | 37 | ```rust 38 | use std::io; 39 | 40 | // This function use the convenient `copy_encode` method 41 | fn compress(level: i32) { 42 | zstd::stream::copy_encode(io::stdin(), io::stdout(), level).unwrap(); 43 | } 44 | 45 | // This function does the same thing, directly using an `Encoder`: 46 | fn compress_manually(level: i32) { 47 | let mut encoder = zstd::stream::Encoder::new(io::stdout(), level).unwrap(); 48 | io::copy(&mut io::stdin(), &mut encoder).unwrap(); 49 | encoder.finish().unwrap(); 50 | } 51 | 52 | fn decompress() { 53 | zstd::stream::copy_decode(io::stdin(), io::stdout()).unwrap(); 54 | } 55 | ``` 56 | 57 | # Asynchronous support 58 | 59 | The [`async-compression`](https://github.com/Nemo157/async-compression/) crate 60 | provides an async-ready integration of various compression algorithms, 61 | including `zstd-rs`. 62 | 63 | # Compile it yourself 64 | 65 | `zstd` is included as a submodule. To get everything during your clone, use: 66 | 67 | ``` 68 | git clone https://github.com/gyscos/zstd-rs --recursive 69 | ``` 70 | 71 | Or, if you cloned it without the `--recursive` flag, 72 | call this from inside the repository: 73 | 74 | ``` 75 | git submodule update --init 76 | ``` 77 | 78 | Then, running `cargo build` should take care 79 | of building the C library and linking to it. 80 | 81 | # Build-time bindgen 82 | 83 | This library includes a pre-generated `bindings.rs` file. 84 | You can also generate new bindings at build-time, using the `bindgen` feature: 85 | 86 | ``` 87 | cargo build --features bindgen 88 | ``` 89 | 90 | # TODO 91 | 92 | * Benchmarks, optimizations, ... 93 | 94 | # Disclaimer 95 | 96 | This implementation is largely inspired by bozaro's [lz4-rs]. 97 | 98 | # License 99 | 100 | * The zstd C library is under a dual BSD/GPLv2 license. 101 | * This zstd-rs binding library is under a [MIT](LICENSE) license. 102 | 103 | [zstd]: https://github.com/facebook/zstd 104 | [lz4-rs]: https://github.com/bozaro/lz4-rs 105 | [cargo-edit]: https://github.com/killercup/cargo-edit#cargo-add 106 | [doc]: https://docs.rs/zstd 107 | [stream]: examples/stream.rs 108 | [submodule]: https://git-scm.com/book/en/v2/Git-Tools-Submodules 109 | -------------------------------------------------------------------------------- /assets/example.txt: -------------------------------------------------------------------------------- 1 | ’Twas brillig, and the slithy toves 2 | Did gyre and gimble in the wade; 3 | All mimsy were the borogoves, 4 | And the mome raths outgrabe. 5 | 6 | 7 | "Beware the Jabberwock, my son! 8 | The jaws that bite, the claws that catch! 9 | Beware the Jubjub bird, and shun 10 | The frumious Bandersnatch!" 11 | 12 | 13 | He took his vorpal sword in hand: 14 | Long time the manxome foe he sought— 15 | So rested he by the Tumtum tree, 16 | And stood awhile in thought. 17 | 18 | 19 | And as in uffish thought he stood, 20 | The Jabberwock, with eyes of flame, 21 | Came whiffling through the tulgey wood, 22 | And burbled as it came! 23 | 24 | 25 | One, two! One, two! And through and through 26 | The vorpal blade went snicker-snack! 27 | He left it dead, and with its head 28 | He went galumphing back. 29 | 30 | 31 | "And hast thou slain the Jabberwock? 32 | Come to my arms, my beamish boy! 33 | O frabjous day! Callooh! Callay!" 34 | He chortled in his joy. 35 | 36 | 37 | ’Twas brillig, and the slithy toves 38 | Did gyre and gimble in the wabe; 39 | All mimsy were the borogoves, 40 | And the mome raths outgrabe. 41 | -------------------------------------------------------------------------------- /assets/example.txt.v5.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyscos/zstd-rs/229054099aa73f7e861762f687d7e07cac1d9b3b/assets/example.txt.v5.zst -------------------------------------------------------------------------------- /assets/example.txt.v6.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyscos/zstd-rs/229054099aa73f7e861762f687d7e07cac1d9b3b/assets/example.txt.v6.zst -------------------------------------------------------------------------------- /assets/example.txt.v7.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyscos/zstd-rs/229054099aa73f7e861762f687d7e07cac1d9b3b/assets/example.txt.v7.zst -------------------------------------------------------------------------------- /assets/example.txt.v8.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyscos/zstd-rs/229054099aa73f7e861762f687d7e07cac1d9b3b/assets/example.txt.v8.zst -------------------------------------------------------------------------------- /assets/example.txt.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyscos/zstd-rs/229054099aa73f7e861762f687d7e07cac1d9b3b/assets/example.txt.zst -------------------------------------------------------------------------------- /examples/basic.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let some_content = "Something"; 3 | let compression_level = 3; 4 | 5 | // Compress some text 6 | let compressed = 7 | zstd::encode_all(some_content.as_bytes(), compression_level).unwrap(); 8 | 9 | // Now uncompress it 10 | let decoded: Vec = zstd::decode_all(compressed.as_slice()).unwrap(); 11 | 12 | // Convert it to text 13 | let decoded_text = std::str::from_utf8(&decoded).unwrap(); 14 | 15 | assert_eq!(some_content, decoded_text); 16 | } 17 | -------------------------------------------------------------------------------- /examples/benchmark.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use humansize::{format_size, DECIMAL}; 3 | use std::io::Read; 4 | use std::path::PathBuf; 5 | 6 | #[derive(Parser, Debug)] 7 | #[command(author, version, about, long_about=None)] 8 | struct Args { 9 | /// Directory containing the data to compress. 10 | /// To use the silesia corpus, run the following commands: 11 | /// 12 | /// ``` 13 | /// wget http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip 14 | /// unzip silesia.zip -d silesia/ 15 | /// cargo run --example benchmark -- silesia/", 16 | /// ``` 17 | dir: PathBuf, 18 | 19 | /// First compression level to test. 20 | #[arg(short, long)] 21 | begin: i32, 22 | 23 | /// Last compression level to test. 24 | #[arg(short, long)] 25 | end: i32, 26 | } 27 | 28 | fn main() { 29 | let args = Args::parse(); 30 | 31 | // Step 1: load data in memory 32 | let files: Vec> = std::fs::read_dir(args.dir) 33 | .unwrap() 34 | .map(|file| { 35 | let file = file.unwrap(); 36 | 37 | let mut content = Vec::new(); 38 | std::fs::File::open(file.path()) 39 | .unwrap() 40 | .read_to_end(&mut content) 41 | .unwrap(); 42 | content 43 | }) 44 | .collect(); 45 | 46 | let total_size: usize = files.iter().map(|data| data.len()).sum(); 47 | 48 | // Step 3: compress data 49 | 50 | // Print tsv headers 51 | println!( 52 | "{}\t{}\t{}\t{}", 53 | "Compression level", 54 | "Compression ratio", 55 | "Compression speed", 56 | "Decompression speed" 57 | ); 58 | 59 | for level in args.begin..args.end { 60 | // Compress each sample sequentially. 61 | let start = std::time::Instant::now(); 62 | 63 | let compressed: Vec> = files 64 | .iter() 65 | .map(|data| zstd::encode_all(&data[..], level).unwrap()) 66 | .collect(); 67 | let mid = std::time::Instant::now(); 68 | 69 | let uncompressed: Vec> = compressed 70 | .iter() 71 | .map(|data| zstd::decode_all(&data[..]).unwrap()) 72 | .collect(); 73 | let end = std::time::Instant::now(); 74 | 75 | for (original, processed) in files.iter().zip(uncompressed.iter()) { 76 | assert_eq!(&original[..], &processed[..]); 77 | } 78 | 79 | let compress_time = mid - start; 80 | let decompress_time = end - mid; 81 | 82 | let compress_seconds = compress_time.as_secs() as f64 83 | + compress_time.subsec_nanos() as f64 * 1e-9; 84 | 85 | let decompress_seconds = decompress_time.as_secs() as f64 86 | + decompress_time.subsec_nanos() as f64 * 1e-9; 87 | 88 | let compressed_size: usize = compressed.iter().map(Vec::len).sum(); 89 | 90 | let speed = (total_size as f64 / compress_seconds) as usize; 91 | let speed = format_size(speed, DECIMAL); 92 | 93 | let d_speed = (total_size as f64 / decompress_seconds) as usize; 94 | let d_speed = format_size(d_speed, DECIMAL); 95 | 96 | let ratio = compressed_size as f64 / total_size as f64; 97 | println!("{}\t{:.3}\t{}/s\t{}/s", level, 1.0 / ratio, speed, d_speed); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /examples/stream.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::io::{self, Write}; 3 | use std::str::FromStr; 4 | 5 | fn main() { 6 | match env::args().nth(1) { 7 | None => { 8 | writeln!( 9 | &mut io::stderr(), 10 | "Invalid option. Usage: `stream [-d|-1..-22]`" 11 | ) 12 | .unwrap(); 13 | } 14 | Some(ref option) if option == "-d" => decompress(), 15 | Some(ref option) => { 16 | if option.starts_with('-') { 17 | let level = match i32::from_str(&option[1..]) { 18 | Ok(level) => level, 19 | Err(e) => panic!("Error parsing compression level: {}", e), 20 | }; 21 | compress(level); 22 | } else { 23 | writeln!( 24 | &mut io::stderr(), 25 | "Invalid option. Usage: `stream [-d|-1..-22]`" 26 | ) 27 | .unwrap(); 28 | } 29 | } 30 | } 31 | } 32 | 33 | fn compress(level: i32) { 34 | zstd::stream::copy_encode(io::stdin(), io::stdout(), level).unwrap(); 35 | } 36 | 37 | fn decompress() { 38 | zstd::stream::copy_decode(io::stdin(), io::stdout()).unwrap(); 39 | } 40 | -------------------------------------------------------------------------------- /examples/train.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use std::io; 3 | use std::path::PathBuf; 4 | 5 | #[derive(Parser, Debug)] 6 | #[command(author, version, about, long_about=None)] 7 | /// This program trains a dictionary from one or more files, 8 | /// to make future compression of similar small files more efficient. 9 | /// 10 | /// The dictionary will need to be present during decompression, 11 | /// but if you need to compress many small files individually, 12 | /// it may be worth the trouble. 13 | struct Args { 14 | /// Maximum dictionary size in bytes. 15 | #[arg(short, long)] 16 | max_size: usize, 17 | 18 | /// Files to use as input. 19 | files: Vec, 20 | } 21 | 22 | fn main() { 23 | let args = Args::parse(); 24 | 25 | let dict = zstd::dict::from_files(&args.files, args.max_size).unwrap(); 26 | 27 | let mut dict_reader: &[u8] = &dict; 28 | io::copy(&mut dict_reader, &mut io::stdout()).unwrap(); 29 | } 30 | -------------------------------------------------------------------------------- /examples/zstd.rs: -------------------------------------------------------------------------------- 1 | use zstd; 2 | 3 | use std::env; 4 | use std::fs; 5 | use std::io; 6 | 7 | const SUFFIX: &'static str = ".zst"; 8 | 9 | fn main() { 10 | for arg in env::args().skip(1) { 11 | if arg.ends_with(SUFFIX) { 12 | match decompress(&arg) { 13 | Ok(()) => println!("Decompressed {}", arg), 14 | Err(e) => println!("Error decompressing {}: {}", arg, e), 15 | } 16 | } else { 17 | match compress(&arg) { 18 | Ok(()) => println!("Compressed {}", arg), 19 | Err(e) => println!("Error compressing {}: {}", arg, e), 20 | } 21 | } 22 | } 23 | } 24 | 25 | fn compress(source: &str) -> io::Result<()> { 26 | let mut file = fs::File::open(source)?; 27 | let mut encoder = { 28 | let target = fs::File::create(source.to_string() + SUFFIX)?; 29 | zstd::Encoder::new(target, 1)? 30 | }; 31 | 32 | io::copy(&mut file, &mut encoder)?; 33 | encoder.finish()?; 34 | 35 | Ok(()) 36 | } 37 | 38 | fn decompress(source: &str) -> io::Result<()> { 39 | let mut decoder = { 40 | let file = fs::File::open(source)?; 41 | zstd::Decoder::new(file)? 42 | }; 43 | 44 | let mut target = fs::File::create(source.trim_end_matches(SUFFIX))?; 45 | 46 | io::copy(&mut decoder, &mut target)?; 47 | 48 | Ok(()) 49 | } 50 | -------------------------------------------------------------------------------- /examples/zstdcat.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use std::fs; 3 | use std::io; 4 | 5 | #[derive(Parser, Debug)] 6 | #[command(author, version, about, long_about=None)] 7 | struct Args { 8 | /// Files to decompress. With no file, or when given -, read standard input. 9 | file: Vec, 10 | } 11 | 12 | fn main() { 13 | // This will be a simple application: 14 | // takes a single (repeatable and optional) argument. 15 | let args = Args::parse(); 16 | 17 | // If nothing was given, act as if `-` was there. 18 | if args.file.is_empty() { 19 | decompress_file("-").unwrap(); 20 | } else { 21 | for file in &args.file { 22 | decompress_file(file).unwrap(); 23 | } 24 | } 25 | } 26 | 27 | // Dispatch the source reader depending on the filename 28 | fn decompress_file(file: &str) -> io::Result<()> { 29 | match file { 30 | "-" => decompress_from(io::stdin()), 31 | other => decompress_from(io::BufReader::new(fs::File::open(other)?)), 32 | } 33 | } 34 | 35 | // Decompress from a `Reader` into stdout 36 | fn decompress_from(r: R) -> io::Result<()> { 37 | let mut decoder = zstd::Decoder::new(r)?; 38 | io::copy(&mut decoder, &mut io::stdout())?; 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 79 2 | reorder_imports = true 3 | use_try_shorthand = true 4 | -------------------------------------------------------------------------------- /src/bulk/compressor.rs: -------------------------------------------------------------------------------- 1 | use crate::map_error_code; 2 | 3 | use std::io; 4 | use zstd_safe; 5 | 6 | /// Allows to compress independently multiple chunks of data. 7 | /// 8 | /// Each job will be processed entirely in-memory without streaming, so this 9 | /// is most fitting for many small jobs. To compress larger volume that don't 10 | /// easily fit in memory, a streaming compression may be more appropriate. 11 | /// 12 | /// It is more efficient than a streaming compressor for 2 reasons: 13 | /// * It re-uses the zstd context between jobs to avoid re-allocations 14 | /// * It avoids copying data from a `Read` into a temporary buffer before compression. 15 | #[derive(Default)] 16 | pub struct Compressor<'a> { 17 | context: zstd_safe::CCtx<'a>, 18 | } 19 | 20 | impl Compressor<'static> { 21 | /// Creates a new zstd compressor 22 | pub fn new(level: i32) -> io::Result { 23 | Self::with_dictionary(level, &[]) 24 | } 25 | 26 | /// Creates a new zstd compressor, using the given dictionary. 27 | /// 28 | /// Note that using a dictionary means that decompression will need to use 29 | /// the same dictionary. 30 | pub fn with_dictionary(level: i32, dictionary: &[u8]) -> io::Result { 31 | let mut compressor = Self::default(); 32 | 33 | compressor.set_dictionary(level, dictionary)?; 34 | 35 | Ok(compressor) 36 | } 37 | } 38 | 39 | impl<'a> Compressor<'a> { 40 | /// Creates a new compressor using an existing `EncoderDictionary`. 41 | /// 42 | /// The compression level will be the one specified when creating the dictionary. 43 | /// 44 | /// Note that using a dictionary means that decompression will need to use 45 | /// the same dictionary. 46 | pub fn with_prepared_dictionary<'b>( 47 | dictionary: &'a crate::dict::EncoderDictionary<'b>, 48 | ) -> io::Result 49 | where 50 | 'b: 'a, 51 | { 52 | let mut compressor = Self::default(); 53 | 54 | compressor.set_prepared_dictionary(dictionary)?; 55 | 56 | Ok(compressor) 57 | } 58 | 59 | /// Changes the compression level used by this compressor. 60 | /// 61 | /// *This will clear any dictionary previously registered.* 62 | /// 63 | /// If you want to keep the existing dictionary, you will need to pass it again to 64 | /// `Self::set_dictionary` instead of using this method. 65 | pub fn set_compression_level(&mut self, level: i32) -> io::Result<()> { 66 | self.set_dictionary(level, &[]) 67 | } 68 | 69 | /// Changes the dictionary and compression level used by this compressor. 70 | /// 71 | /// Will affect future compression jobs. 72 | /// 73 | /// Note that using a dictionary means that decompression will need to use 74 | /// the same dictionary. 75 | pub fn set_dictionary( 76 | &mut self, 77 | level: i32, 78 | dictionary: &[u8], 79 | ) -> io::Result<()> { 80 | self.context 81 | .set_parameter(zstd_safe::CParameter::CompressionLevel(level)) 82 | .map_err(map_error_code)?; 83 | 84 | self.context 85 | .load_dictionary(dictionary) 86 | .map_err(map_error_code)?; 87 | 88 | Ok(()) 89 | } 90 | 91 | /// Changes the dictionary used by this compressor. 92 | /// 93 | /// The compression level used when preparing the dictionary will be used. 94 | /// 95 | /// Note that using a dictionary means that decompression will need to use 96 | /// the same dictionary. 97 | pub fn set_prepared_dictionary<'b>( 98 | &mut self, 99 | dictionary: &'a crate::dict::EncoderDictionary<'b>, 100 | ) -> io::Result<()> 101 | where 102 | 'b: 'a, 103 | { 104 | self.context 105 | .ref_cdict(dictionary.as_cdict()) 106 | .map_err(map_error_code)?; 107 | 108 | Ok(()) 109 | } 110 | 111 | /// Compress a single block of data to the given destination buffer. 112 | /// 113 | /// Returns the number of bytes written, or an error if something happened 114 | /// (for instance if the destination buffer was too small). 115 | /// 116 | /// A level of `0` uses zstd's default (currently `3`). 117 | pub fn compress_to_buffer( 118 | &mut self, 119 | source: &[u8], 120 | destination: &mut C, 121 | ) -> io::Result { 122 | self.context 123 | .compress2(destination, source) 124 | .map_err(map_error_code) 125 | } 126 | 127 | /// Compresses a block of data and returns the compressed result. 128 | /// 129 | /// A level of `0` uses zstd's default (currently `3`). 130 | pub fn compress(&mut self, data: &[u8]) -> io::Result> { 131 | // We allocate a big buffer, slightly larger than the input data. 132 | let buffer_len = zstd_safe::compress_bound(data.len()); 133 | let mut buffer = Vec::with_capacity(buffer_len); 134 | 135 | self.compress_to_buffer(data, &mut buffer)?; 136 | 137 | // Should we shrink the vec? Meh, let the user do it if he wants. 138 | Ok(buffer) 139 | } 140 | 141 | /// Gives mutable access to the internal context. 142 | pub fn context_mut(&mut self) -> &mut zstd_safe::CCtx<'a> { 143 | &mut self.context 144 | } 145 | 146 | /// Sets a compression parameter for this compressor. 147 | pub fn set_parameter( 148 | &mut self, 149 | parameter: zstd_safe::CParameter, 150 | ) -> io::Result<()> { 151 | self.context 152 | .set_parameter(parameter) 153 | .map_err(map_error_code)?; 154 | Ok(()) 155 | } 156 | 157 | /// Sets the expected size of the input. 158 | /// 159 | /// This affects the compression effectiveness. 160 | /// 161 | /// It is an error to give an incorrect size (an error will be returned when closing the 162 | /// stream if the size does not match what was pledged). 163 | /// 164 | /// Giving a `None` size means the size is unknown (this is the default). 165 | pub fn set_pledged_src_size( 166 | &mut self, 167 | size: Option, 168 | ) -> io::Result<()> { 169 | self.context 170 | .set_pledged_src_size(size) 171 | .map_err(map_error_code)?; 172 | Ok(()) 173 | } 174 | 175 | crate::encoder_parameters!(); 176 | } 177 | 178 | fn _assert_traits() { 179 | fn _assert_send(_: T) {} 180 | 181 | _assert_send(Compressor::new(0)); 182 | } 183 | -------------------------------------------------------------------------------- /src/bulk/decompressor.rs: -------------------------------------------------------------------------------- 1 | use crate::map_error_code; 2 | 3 | #[cfg(feature = "experimental")] 4 | use std::convert::TryInto; 5 | use std::io; 6 | use zstd_safe; 7 | 8 | /// Allows to decompress independently multiple blocks of data. 9 | /// 10 | /// This reduces memory usage compared to calling `decompress` multiple times. 11 | #[derive(Default)] 12 | pub struct Decompressor<'a> { 13 | context: zstd_safe::DCtx<'a>, 14 | } 15 | 16 | impl Decompressor<'static> { 17 | /// Creates a new zstd decompressor. 18 | pub fn new() -> io::Result { 19 | Self::with_dictionary(&[]) 20 | } 21 | 22 | /// Creates a new zstd decompressor, using the given dictionary. 23 | pub fn with_dictionary(dictionary: &[u8]) -> io::Result { 24 | let mut decompressor = Self::default(); 25 | 26 | decompressor.set_dictionary(dictionary)?; 27 | 28 | Ok(decompressor) 29 | } 30 | } 31 | 32 | impl<'a> Decompressor<'a> { 33 | /// Creates a new decompressor using an existing `DecoderDictionary`. 34 | /// 35 | /// Note that using a dictionary means that compression will need to use 36 | /// the same dictionary. 37 | pub fn with_prepared_dictionary<'b>( 38 | dictionary: &'a crate::dict::DecoderDictionary<'b>, 39 | ) -> io::Result 40 | where 41 | 'b: 'a, 42 | { 43 | let mut decompressor = Self::default(); 44 | 45 | decompressor.set_prepared_dictionary(dictionary)?; 46 | 47 | Ok(decompressor) 48 | } 49 | 50 | /// Changes the dictionary used by this decompressor. 51 | /// 52 | /// Will affect future compression jobs. 53 | /// 54 | /// Note that using a dictionary means that compression will need to use 55 | /// the same dictionary. 56 | pub fn set_dictionary(&mut self, dictionary: &[u8]) -> io::Result<()> { 57 | self.context 58 | .load_dictionary(dictionary) 59 | .map_err(map_error_code)?; 60 | 61 | Ok(()) 62 | } 63 | 64 | /// Changes the dictionary used by this decompressor. 65 | /// 66 | /// Note that using a dictionary means that compression will need to use 67 | /// the same dictionary. 68 | pub fn set_prepared_dictionary<'b>( 69 | &mut self, 70 | dictionary: &'a crate::dict::DecoderDictionary<'b>, 71 | ) -> io::Result<()> 72 | where 73 | 'b: 'a, 74 | { 75 | self.context 76 | .ref_ddict(dictionary.as_ddict()) 77 | .map_err(map_error_code)?; 78 | 79 | Ok(()) 80 | } 81 | 82 | /// Deompress a single block of data to the given destination buffer. 83 | /// 84 | /// Returns the number of bytes written, or an error if something happened 85 | /// (for instance if the destination buffer was too small). 86 | pub fn decompress_to_buffer( 87 | &mut self, 88 | source: &[u8], 89 | destination: &mut C, 90 | ) -> io::Result { 91 | self.context 92 | .decompress(destination, source) 93 | .map_err(map_error_code) 94 | } 95 | 96 | /// Decompress a block of data, and return the result in a `Vec`. 97 | /// 98 | /// The decompressed data should be at most `capacity` bytes, 99 | /// or an error will be returned. 100 | pub fn decompress( 101 | &mut self, 102 | data: &[u8], 103 | capacity: usize, 104 | ) -> io::Result> { 105 | let capacity = 106 | Self::upper_bound(data).unwrap_or(capacity).min(capacity); 107 | let mut buffer = Vec::with_capacity(capacity); 108 | self.decompress_to_buffer(data, &mut buffer)?; 109 | Ok(buffer) 110 | } 111 | 112 | /// Sets a decompression parameter for this decompressor. 113 | pub fn set_parameter( 114 | &mut self, 115 | parameter: zstd_safe::DParameter, 116 | ) -> io::Result<()> { 117 | self.context 118 | .set_parameter(parameter) 119 | .map_err(map_error_code)?; 120 | Ok(()) 121 | } 122 | 123 | crate::decoder_parameters!(); 124 | 125 | /// Get an upper bound on the decompressed size of data, if available 126 | /// 127 | /// This can be used to pre-allocate enough capacity for `decompress_to_buffer` 128 | /// and is used by `decompress` to ensure that it does not over-allocate if 129 | /// you supply a large `capacity`. 130 | /// 131 | /// Will return `None` if the upper bound cannot be determined or is larger than `usize::MAX` 132 | /// 133 | /// Note that unless the `experimental` feature is enabled, this will always return `None`. 134 | pub fn upper_bound(_data: &[u8]) -> Option { 135 | #[cfg(feature = "experimental")] 136 | { 137 | let bound = zstd_safe::decompress_bound(_data).ok()?; 138 | bound.try_into().ok() 139 | } 140 | #[cfg(not(feature = "experimental"))] 141 | { 142 | None 143 | } 144 | } 145 | } 146 | 147 | fn _assert_traits() { 148 | fn _assert_send(_: T) {} 149 | 150 | _assert_send(Decompressor::new()); 151 | } 152 | -------------------------------------------------------------------------------- /src/bulk/mod.rs: -------------------------------------------------------------------------------- 1 | //! Compress and decompress data in bulk. 2 | //! 3 | //! These methods process all the input data at once. 4 | //! It is therefore best used with relatively small blocks 5 | //! (like small network packets). 6 | 7 | mod compressor; 8 | mod decompressor; 9 | 10 | #[cfg(test)] 11 | mod tests; 12 | 13 | pub use self::compressor::Compressor; 14 | pub use self::decompressor::Decompressor; 15 | 16 | use std::io; 17 | 18 | /// Compresses a single block of data to the given destination buffer. 19 | /// 20 | /// Returns the number of bytes written, or an error if something happened 21 | /// (for instance if the destination buffer was too small). 22 | /// 23 | /// A level of `0` uses zstd's default (currently `3`). 24 | pub fn compress_to_buffer( 25 | source: &[u8], 26 | destination: &mut [u8], 27 | level: i32, 28 | ) -> io::Result { 29 | Compressor::new(level)?.compress_to_buffer(source, destination) 30 | } 31 | 32 | /// Compresses a block of data and returns the compressed result. 33 | /// 34 | /// A level of `0` uses zstd's default (currently `3`). 35 | pub fn compress(data: &[u8], level: i32) -> io::Result> { 36 | Compressor::new(level)?.compress(data) 37 | } 38 | 39 | /// Deompress a single block of data to the given destination buffer. 40 | /// 41 | /// Returns the number of bytes written, or an error if something happened 42 | /// (for instance if the destination buffer was too small). 43 | pub fn decompress_to_buffer( 44 | source: &[u8], 45 | destination: &mut [u8], 46 | ) -> io::Result { 47 | Decompressor::new()?.decompress_to_buffer(source, destination) 48 | } 49 | 50 | /// Decompresses a block of data and returns the decompressed result. 51 | /// 52 | /// The decompressed data should be at most `capacity` bytes, 53 | /// or an error will be returned. 54 | pub fn decompress(data: &[u8], capacity: usize) -> io::Result> { 55 | Decompressor::new()?.decompress(data, capacity) 56 | } 57 | -------------------------------------------------------------------------------- /src/bulk/tests.rs: -------------------------------------------------------------------------------- 1 | use super::{compress, decompress}; 2 | 3 | const TEXT: &str = include_str!("../../assets/example.txt"); 4 | 5 | #[test] 6 | fn test_direct() { 7 | // Can we include_str!("assets/example.txt")? 8 | // It's excluded from the packaging step, so maybe not. 9 | crate::test_cycle_unwrap( 10 | TEXT.as_bytes(), 11 | |data| compress(data, 1), 12 | |data| decompress(data, TEXT.len()), 13 | ); 14 | } 15 | 16 | #[test] 17 | fn test_stream_compat() { 18 | // We can bulk-compress and stream-decode 19 | crate::test_cycle_unwrap( 20 | TEXT.as_bytes(), 21 | |data| compress(data, 1), 22 | |data| crate::decode_all(data), 23 | ); 24 | 25 | // We can stream-encode and bulk-decompress 26 | crate::test_cycle_unwrap( 27 | TEXT.as_bytes(), 28 | |data| crate::encode_all(data, 1), 29 | |data| decompress(data, TEXT.len()), 30 | ); 31 | } 32 | 33 | #[test] 34 | fn has_content_size() { 35 | let compressed = compress(TEXT.as_bytes(), 1).unwrap(); 36 | 37 | // Bulk functions by default include the content size. 38 | assert_eq!( 39 | zstd_safe::get_frame_content_size(&compressed).unwrap(), 40 | Some(TEXT.len() as u64) 41 | ); 42 | } 43 | -------------------------------------------------------------------------------- /src/dict.rs: -------------------------------------------------------------------------------- 1 | //! Train a dictionary from various sources. 2 | //! 3 | //! A dictionary can help improve the compression of small files. 4 | //! The dictionary must be present during decompression, 5 | //! but can be shared across multiple "similar" files. 6 | //! 7 | //! Creating a dictionary using the `zstd` C library, 8 | //! using the `zstd` command-line interface, using this library, 9 | //! or using the `train` binary provided, should give the same result, 10 | //! and are therefore completely compatible. 11 | //! 12 | //! To use, see [`Encoder::with_dictionary`] or [`Decoder::with_dictionary`]. 13 | //! 14 | //! [`Encoder::with_dictionary`]: ../struct.Encoder.html#method.with_dictionary 15 | //! [`Decoder::with_dictionary`]: ../struct.Decoder.html#method.with_dictionary 16 | 17 | #[cfg(feature = "zdict_builder")] 18 | use std::io::{self, Read}; 19 | 20 | pub use zstd_safe::{CDict, DDict}; 21 | 22 | /// Prepared dictionary for compression 23 | /// 24 | /// A dictionary can include its own copy of the data (if it is `'static`), or it can merely point 25 | /// to a separate buffer (if it has another lifetime). 26 | pub struct EncoderDictionary<'a> { 27 | cdict: CDict<'a>, 28 | } 29 | 30 | impl EncoderDictionary<'static> { 31 | /// Creates a prepared dictionary for compression. 32 | /// 33 | /// This will copy the dictionary internally. 34 | pub fn copy(dictionary: &[u8], level: i32) -> Self { 35 | Self { 36 | cdict: zstd_safe::create_cdict(dictionary, level), 37 | } 38 | } 39 | } 40 | 41 | impl<'a> EncoderDictionary<'a> { 42 | #[cfg(feature = "experimental")] 43 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))] 44 | /// Create prepared dictionary for compression 45 | /// 46 | /// A level of `0` uses zstd's default (currently `3`). 47 | /// 48 | /// Only available with the `experimental` feature. Use `EncoderDictionary::copy` otherwise. 49 | pub fn new(dictionary: &'a [u8], level: i32) -> Self { 50 | Self { 51 | cdict: zstd_safe::CDict::create_by_reference(dictionary, level), 52 | } 53 | } 54 | 55 | /// Returns reference to `CDict` inner object 56 | pub fn as_cdict(&self) -> &CDict<'a> { 57 | &self.cdict 58 | } 59 | } 60 | 61 | /// Prepared dictionary for decompression 62 | pub struct DecoderDictionary<'a> { 63 | ddict: DDict<'a>, 64 | } 65 | 66 | impl DecoderDictionary<'static> { 67 | /// Create a prepared dictionary for decompression. 68 | /// 69 | /// This will copy the dictionary internally. 70 | pub fn copy(dictionary: &[u8]) -> Self { 71 | Self { 72 | ddict: zstd_safe::DDict::create(dictionary), 73 | } 74 | } 75 | } 76 | 77 | impl<'a> DecoderDictionary<'a> { 78 | #[cfg(feature = "experimental")] 79 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))] 80 | /// Create prepared dictionary for decompression 81 | /// 82 | /// Only available with the `experimental` feature. Use `DecoderDictionary::copy` otherwise. 83 | pub fn new(dict: &'a [u8]) -> Self { 84 | Self { 85 | ddict: zstd_safe::DDict::create_by_reference(dict), 86 | } 87 | } 88 | 89 | /// Returns reference to `DDict` inner object 90 | pub fn as_ddict(&self) -> &DDict<'a> { 91 | &self.ddict 92 | } 93 | } 94 | 95 | /// Train a dictionary from a big continuous chunk of data, with all samples 96 | /// contiguous in memory. 97 | /// 98 | /// This is the most efficient way to train a dictionary, 99 | /// since this is directly fed into `zstd`. 100 | /// 101 | /// * `sample_data` is the concatenation of all sample data. 102 | /// * `sample_sizes` is the size of each sample in `sample_data`. 103 | /// The sum of all `sample_sizes` should equal the length of `sample_data`. 104 | /// * `max_size` is the maximum size of the dictionary to generate. 105 | /// 106 | /// The result is the dictionary data. You can, for example, feed it to [`CDict::create`]. 107 | #[cfg(feature = "zdict_builder")] 108 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))] 109 | pub fn from_continuous( 110 | sample_data: &[u8], 111 | sample_sizes: &[usize], 112 | max_size: usize, 113 | ) -> io::Result> { 114 | use crate::map_error_code; 115 | 116 | // Complain if the lengths don't add up to the entire data. 117 | if sample_sizes.iter().sum::() != sample_data.len() { 118 | return Err(io::Error::new( 119 | io::ErrorKind::Other, 120 | "sample sizes don't add up".to_string(), 121 | )); 122 | } 123 | 124 | let mut result = Vec::with_capacity(max_size); 125 | zstd_safe::train_from_buffer(&mut result, sample_data, sample_sizes) 126 | .map_err(map_error_code)?; 127 | Ok(result) 128 | } 129 | 130 | /// Train a dictionary from multiple samples. 131 | /// 132 | /// The samples will internally be copied to a single continuous buffer, 133 | /// so make sure you have enough memory available. 134 | /// 135 | /// If you need to stretch your system's limits, 136 | /// [`from_continuous`] directly uses the given slice. 137 | /// 138 | /// [`from_continuous`]: ./fn.from_continuous.html 139 | /// 140 | /// * `samples` is a list of individual samples to train on. 141 | /// * `max_size` is the maximum size of the dictionary to generate. 142 | /// 143 | /// The result is the dictionary data. You can, for example, feed it to [`CDict::create`]. 144 | #[cfg(feature = "zdict_builder")] 145 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))] 146 | pub fn from_samples>( 147 | samples: &[S], 148 | max_size: usize, 149 | ) -> io::Result> { 150 | // Pre-allocate the entire required size. 151 | let total_length: usize = 152 | samples.iter().map(|sample| sample.as_ref().len()).sum(); 153 | 154 | let mut data = Vec::with_capacity(total_length); 155 | 156 | // Copy every sample to a big chunk of memory 157 | data.extend(samples.iter().flat_map(|s| s.as_ref()).cloned()); 158 | 159 | let sizes: Vec<_> = samples.iter().map(|s| s.as_ref().len()).collect(); 160 | 161 | from_continuous(&data, &sizes, max_size) 162 | } 163 | 164 | /// Train a dictionary from multiple samples. 165 | /// 166 | /// Unlike [`from_samples`], this does not require having a list of all samples. 167 | /// It also allows running into an error when iterating through the samples. 168 | /// 169 | /// They will still be copied to a continuous array and fed to [`from_continuous`]. 170 | /// 171 | /// * `samples` is an iterator of individual samples to train on. 172 | /// * `max_size` is the maximum size of the dictionary to generate. 173 | /// 174 | /// The result is the dictionary data. You can, for example, feed it to [`CDict::create`]. 175 | /// 176 | /// # Examples 177 | /// 178 | /// ```rust,no_run 179 | /// // Train from a couple of json files. 180 | /// let dict_buffer = zstd::dict::from_sample_iterator( 181 | /// ["file_a.json", "file_b.json"] 182 | /// .into_iter() 183 | /// .map(|filename| std::fs::File::open(filename)), 184 | /// 10_000, // 10kB dictionary 185 | /// ).unwrap(); 186 | /// ``` 187 | /// 188 | /// ```rust,no_run 189 | /// use std::io::BufRead as _; 190 | /// // Treat each line from stdin as a separate sample. 191 | /// let dict_buffer = zstd::dict::from_sample_iterator( 192 | /// std::io::stdin().lock().lines().map(|line: std::io::Result| { 193 | /// // Transform each line into a `Cursor>` so they implement Read. 194 | /// line.map(String::into_bytes) 195 | /// .map(std::io::Cursor::new) 196 | /// }), 197 | /// 10_000, // 10kB dictionary 198 | /// ).unwrap(); 199 | /// ``` 200 | #[cfg(feature = "zdict_builder")] 201 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))] 202 | pub fn from_sample_iterator( 203 | samples: I, 204 | max_size: usize, 205 | ) -> io::Result> 206 | where 207 | I: IntoIterator>, 208 | R: Read, 209 | { 210 | let mut data = Vec::new(); 211 | let mut sizes = Vec::new(); 212 | 213 | for sample in samples { 214 | let mut sample = sample?; 215 | let len = sample.read_to_end(&mut data)?; 216 | sizes.push(len); 217 | } 218 | 219 | from_continuous(&data, &sizes, max_size) 220 | } 221 | 222 | /// Train a dict from a list of files. 223 | /// 224 | /// * `filenames` is an iterator of files to load. Each file will be treated as an individual 225 | /// sample. 226 | /// * `max_size` is the maximum size of the dictionary to generate. 227 | /// 228 | /// The result is the dictionary data. You can, for example, feed it to [`CDict::create`]. 229 | #[cfg(feature = "zdict_builder")] 230 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))] 231 | pub fn from_files(filenames: I, max_size: usize) -> io::Result> 232 | where 233 | P: AsRef, 234 | I: IntoIterator, 235 | { 236 | from_sample_iterator( 237 | filenames 238 | .into_iter() 239 | .map(|filename| std::fs::File::open(filename)), 240 | max_size, 241 | ) 242 | } 243 | 244 | #[cfg(test)] 245 | #[cfg(feature = "zdict_builder")] 246 | mod tests { 247 | use std::fs; 248 | use std::io; 249 | use std::io::Read; 250 | 251 | use walkdir; 252 | 253 | #[test] 254 | fn test_dict_training() { 255 | // Train a dictionary 256 | let paths: Vec<_> = walkdir::WalkDir::new("src") 257 | .into_iter() 258 | .map(|entry| entry.unwrap()) 259 | .map(|entry| entry.into_path()) 260 | .filter(|path| path.to_str().unwrap().ends_with(".rs")) 261 | .collect(); 262 | 263 | let dict = super::from_files(&paths, 4000).unwrap(); 264 | 265 | for path in paths { 266 | let mut buffer = Vec::new(); 267 | let mut file = fs::File::open(path).unwrap(); 268 | let mut content = Vec::new(); 269 | file.read_to_end(&mut content).unwrap(); 270 | io::copy( 271 | &mut &content[..], 272 | &mut crate::stream::Encoder::with_dictionary( 273 | &mut buffer, 274 | 1, 275 | &dict, 276 | ) 277 | .unwrap() 278 | .auto_finish(), 279 | ) 280 | .unwrap(); 281 | 282 | let mut result = Vec::new(); 283 | io::copy( 284 | &mut crate::stream::Decoder::with_dictionary( 285 | &buffer[..], 286 | &dict[..], 287 | ) 288 | .unwrap(), 289 | &mut result, 290 | ) 291 | .unwrap(); 292 | 293 | assert_eq!(&content, &result); 294 | } 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Rust binding to the [zstd library][zstd]. 2 | //! 3 | //! This crate provides: 4 | //! 5 | //! * An [encoder](stream/write/struct.Encoder.html) to compress data using zstd 6 | //! and send the output to another write. 7 | //! * A [decoder](stream/read/struct.Decoder.html) to read input data from a `Read` 8 | //! and decompress it. 9 | //! * Convenient functions for common tasks. 10 | //! 11 | //! # Example 12 | //! 13 | //! ```no_run 14 | //! use std::io; 15 | //! 16 | //! // Uncompress input and print the result. 17 | //! zstd::stream::copy_decode(io::stdin(), io::stdout()).unwrap(); 18 | //! ``` 19 | //! 20 | //! [zstd]: https://github.com/facebook/zstd 21 | #![deny(missing_docs)] 22 | #![cfg_attr(feature = "doc-cfg", feature(doc_cfg))] 23 | 24 | // Re-export the zstd-safe crate. 25 | pub use zstd_safe; 26 | 27 | pub mod bulk; 28 | pub mod dict; 29 | 30 | #[macro_use] 31 | pub mod stream; 32 | 33 | use std::io; 34 | 35 | /// Default compression level. 36 | pub use zstd_safe::CLEVEL_DEFAULT as DEFAULT_COMPRESSION_LEVEL; 37 | 38 | /// The accepted range of compression levels. 39 | pub fn compression_level_range( 40 | ) -> std::ops::RangeInclusive { 41 | zstd_safe::min_c_level()..=zstd_safe::max_c_level() 42 | } 43 | 44 | #[doc(no_inline)] 45 | pub use crate::stream::{decode_all, encode_all, Decoder, Encoder}; 46 | 47 | /// Returns the error message as io::Error based on error_code. 48 | fn map_error_code(code: usize) -> io::Error { 49 | let msg = zstd_safe::get_error_name(code); 50 | io::Error::new(io::ErrorKind::Other, msg.to_string()) 51 | } 52 | 53 | // Some helper functions to write full-cycle tests. 54 | 55 | #[cfg(test)] 56 | fn test_cycle(data: &[u8], f: F, g: G) 57 | where 58 | F: Fn(&[u8]) -> Vec, 59 | G: Fn(&[u8]) -> Vec, 60 | { 61 | let mid = f(data); 62 | let end = g(&mid); 63 | assert_eq!(data, &end[..]); 64 | } 65 | 66 | #[cfg(test)] 67 | fn test_cycle_unwrap(data: &[u8], f: F, g: G) 68 | where 69 | F: Fn(&[u8]) -> io::Result>, 70 | G: Fn(&[u8]) -> io::Result>, 71 | { 72 | test_cycle(data, |data| f(data).unwrap(), |data| g(data).unwrap()) 73 | } 74 | 75 | #[test] 76 | fn default_compression_level_in_range() { 77 | assert!(compression_level_range().contains(&DEFAULT_COMPRESSION_LEVEL)); 78 | } 79 | -------------------------------------------------------------------------------- /src/stream/functions.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use super::{Decoder, Encoder}; 4 | 5 | /// Decompress from the given source as if using a `Decoder`. 6 | /// 7 | /// The input data must be in the zstd frame format. 8 | pub fn decode_all(source: R) -> io::Result> { 9 | let mut result = Vec::new(); 10 | copy_decode(source, &mut result)?; 11 | Ok(result) 12 | } 13 | 14 | /// Decompress from the given source as if using a `Decoder`. 15 | /// 16 | /// Decompressed data will be appended to `destination`. 17 | pub fn copy_decode(source: R, mut destination: W) -> io::Result<()> 18 | where 19 | R: io::Read, 20 | W: io::Write, 21 | { 22 | let mut decoder = Decoder::new(source)?; 23 | io::copy(&mut decoder, &mut destination)?; 24 | Ok(()) 25 | } 26 | 27 | /// Compress all data from the given source as if using an `Encoder`. 28 | /// 29 | /// Result will be in the zstd frame format. 30 | /// 31 | /// A level of `0` uses zstd's default (currently `3`). 32 | pub fn encode_all(source: R, level: i32) -> io::Result> { 33 | let mut result = Vec::::new(); 34 | copy_encode(source, &mut result, level)?; 35 | Ok(result) 36 | } 37 | 38 | /// Compress all data from the given source as if using an `Encoder`. 39 | /// 40 | /// Compressed data will be appended to `destination`. 41 | /// 42 | /// A level of `0` uses zstd's default (currently `3`). 43 | pub fn copy_encode( 44 | mut source: R, 45 | destination: W, 46 | level: i32, 47 | ) -> io::Result<()> 48 | where 49 | R: io::Read, 50 | W: io::Write, 51 | { 52 | let mut encoder = Encoder::new(destination, level)?; 53 | io::copy(&mut source, &mut encoder)?; 54 | encoder.finish()?; 55 | Ok(()) 56 | } 57 | 58 | #[cfg(test)] 59 | mod tests {} 60 | -------------------------------------------------------------------------------- /src/stream/mod.rs: -------------------------------------------------------------------------------- 1 | //! Compress and decompress Zstd streams. 2 | //! 3 | //! Zstd streams are the main way to compress and decompress data. 4 | //! They are compatible with the `zstd` command-line tool. 5 | //! 6 | //! This module provides both `Read` and `Write` interfaces to compressing and 7 | //! decompressing. 8 | 9 | pub mod read; 10 | pub mod write; 11 | 12 | mod functions; 13 | pub mod zio; 14 | 15 | #[cfg(test)] 16 | mod tests; 17 | 18 | pub mod raw; 19 | 20 | pub use self::functions::{copy_decode, copy_encode, decode_all, encode_all}; 21 | pub use self::read::Decoder; 22 | pub use self::write::{AutoFinishEncoder, Encoder}; 23 | 24 | #[doc(hidden)] 25 | #[macro_export] 26 | /// Common functions for the decoder, both in read and write mode. 27 | macro_rules! decoder_parameters { 28 | () => { 29 | /// Sets the maximum back-reference distance. 30 | /// 31 | /// The actual maximum distance is going to be `2^log_distance`. 32 | /// 33 | /// This will need to at least match the value set when compressing. 34 | pub fn window_log_max(&mut self, log_distance: u32) -> io::Result<()> { 35 | self.set_parameter(zstd_safe::DParameter::WindowLogMax( 36 | log_distance, 37 | )) 38 | } 39 | 40 | #[cfg(feature = "experimental")] 41 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))] 42 | /// Enables or disabled expecting the 4-byte magic header 43 | /// 44 | /// Only available with the `experimental` feature. 45 | /// 46 | /// This will need to match the settings used when compressing. 47 | pub fn include_magicbytes( 48 | &mut self, 49 | include_magicbytes: bool, 50 | ) -> io::Result<()> { 51 | self.set_parameter(zstd_safe::DParameter::Format( 52 | if include_magicbytes { 53 | zstd_safe::FrameFormat::One 54 | } else { 55 | zstd_safe::FrameFormat::Magicless 56 | }, 57 | )) 58 | } 59 | }; 60 | } 61 | 62 | #[doc(hidden)] 63 | #[macro_export] 64 | /// Common functions for the decoder, both in read and write mode. 65 | macro_rules! decoder_common { 66 | ($readwrite:ident) => { 67 | /// Sets a decompression parameter on the decompression stream. 68 | pub fn set_parameter( 69 | &mut self, 70 | parameter: zstd_safe::DParameter, 71 | ) -> io::Result<()> { 72 | self.$readwrite.operation_mut().set_parameter(parameter) 73 | } 74 | 75 | $crate::decoder_parameters!(); 76 | }; 77 | } 78 | 79 | #[doc(hidden)] 80 | #[macro_export] 81 | /// Parameter-setters for the encoder. Relies on a `set_parameter` method. 82 | macro_rules! encoder_parameters { 83 | () => { 84 | /// Controls whether zstd should include a content checksum at the end 85 | /// of each frame. 86 | pub fn include_checksum( 87 | &mut self, 88 | include_checksum: bool, 89 | ) -> io::Result<()> { 90 | self.set_parameter(zstd_safe::CParameter::ChecksumFlag( 91 | include_checksum, 92 | )) 93 | } 94 | 95 | /// Enables multithreaded compression 96 | /// 97 | /// * If `n_workers == 0` (default), then multithreaded will be 98 | /// disabled. 99 | /// * If `n_workers >= 1`, then compression will be done in separate 100 | /// threads. 101 | /// 102 | /// So even `n_workers = 1` may increase performance by separating 103 | /// IO and compression. 104 | /// 105 | /// Note: This is only available if the `zstdmt` cargo feature is activated. 106 | #[cfg(feature = "zstdmt")] 107 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zstdmt")))] 108 | pub fn multithread(&mut self, n_workers: u32) -> io::Result<()> { 109 | self.set_parameter(zstd_safe::CParameter::NbWorkers(n_workers)) 110 | } 111 | 112 | /// Enables or disables storing of the dict id. 113 | /// 114 | /// Defaults to true. If false, the behaviour of decoding with a wrong 115 | /// dictionary is undefined. 116 | pub fn include_dictid( 117 | &mut self, 118 | include_dictid: bool, 119 | ) -> io::Result<()> { 120 | self.set_parameter(zstd_safe::CParameter::DictIdFlag( 121 | include_dictid, 122 | )) 123 | } 124 | 125 | /// Enables or disabled storing of the contentsize. 126 | /// 127 | /// Note that this only has an effect if the size is given with `set_pledged_src_size`. 128 | pub fn include_contentsize( 129 | &mut self, 130 | include_contentsize: bool, 131 | ) -> io::Result<()> { 132 | self.set_parameter(zstd_safe::CParameter::ContentSizeFlag( 133 | include_contentsize, 134 | )) 135 | } 136 | /// Enables or disables long-distance matching 137 | pub fn long_distance_matching( 138 | &mut self, 139 | long_distance_matching: bool, 140 | ) -> io::Result<()> { 141 | self.set_parameter( 142 | zstd_safe::CParameter::EnableLongDistanceMatching( 143 | long_distance_matching, 144 | ), 145 | ) 146 | } 147 | 148 | /// Sets the target size for compressed blocks. 149 | /// 150 | /// A lower block size may result in slightly lower speed (~2%) and compression ratio 151 | /// (~0.1%), but may decrease end-to-end latency in low-bandwidth environments (time to 152 | /// first decompressed byte). 153 | /// 154 | /// No value, or a value of zero, results in no contraint for the block sizes. 155 | pub fn set_target_cblock_size( 156 | &mut self, 157 | target_size: Option, 158 | ) -> io::Result<()> { 159 | self.set_parameter(zstd_safe::CParameter::TargetCBlockSize( 160 | target_size.unwrap_or(0), 161 | )) 162 | } 163 | 164 | /// Sets the maximum back-reference distance. 165 | /// 166 | /// The actual maximum distance is going to be `2^log_distance`. 167 | /// 168 | /// Note that decompression will need to use at least the same setting. 169 | pub fn window_log(&mut self, log_distance: u32) -> io::Result<()> { 170 | self.set_parameter(zstd_safe::CParameter::WindowLog(log_distance)) 171 | } 172 | 173 | #[cfg(feature = "experimental")] 174 | #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))] 175 | /// Enables or disable the magic bytes at the beginning of each frame. 176 | /// 177 | /// If disabled, include_magicbytes must also be called on the decoder. 178 | /// 179 | /// Only available with the `experimental` feature. 180 | /// 181 | /// Note that decompression will need to use the same setting. 182 | pub fn include_magicbytes( 183 | &mut self, 184 | include_magicbytes: bool, 185 | ) -> io::Result<()> { 186 | self.set_parameter(zstd_safe::CParameter::Format( 187 | if include_magicbytes { 188 | zstd_safe::FrameFormat::One 189 | } else { 190 | zstd_safe::FrameFormat::Magicless 191 | }, 192 | )) 193 | } 194 | }; 195 | } 196 | 197 | #[doc(hidden)] 198 | #[macro_export] 199 | /// Common functions for the encoder, both in read and write mode. 200 | macro_rules! encoder_common { 201 | ($readwrite:ident) => { 202 | /// Sets the given zstd compression parameter. 203 | pub fn set_parameter( 204 | &mut self, 205 | parameter: zstd_safe::CParameter, 206 | ) -> io::Result<()> { 207 | self.$readwrite.operation_mut().set_parameter(parameter) 208 | } 209 | 210 | /// Sets the expected size of the input. 211 | /// 212 | /// This affects the compression effectiveness. 213 | /// 214 | /// It is an error to give an incorrect size (an error will be returned when closing the 215 | /// stream if the size does not match what was pledged). 216 | /// 217 | /// Giving a `None` size means the size is unknown (this is the default). 218 | pub fn set_pledged_src_size( 219 | &mut self, 220 | size: Option, 221 | ) -> io::Result<()> { 222 | self.$readwrite.operation_mut().set_pledged_src_size(size) 223 | } 224 | 225 | $crate::encoder_parameters!(); 226 | }; 227 | } 228 | -------------------------------------------------------------------------------- /src/stream/read/mod.rs: -------------------------------------------------------------------------------- 1 | //! Implement pull-based [`Read`] trait for both compressing and decompressing. 2 | use std::io::{self, BufRead, BufReader, Read}; 3 | 4 | use crate::dict::{DecoderDictionary, EncoderDictionary}; 5 | use crate::stream::{raw, zio}; 6 | use zstd_safe; 7 | 8 | #[cfg(test)] 9 | mod tests; 10 | 11 | /// A decoder that decompress input data from another `Read`. 12 | /// 13 | /// This allows to read a stream of compressed data 14 | /// (good for files or heavy network stream). 15 | pub struct Decoder<'a, R> { 16 | reader: zio::Reader>, 17 | } 18 | 19 | /// An encoder that compress input data from another `Read`. 20 | pub struct Encoder<'a, R> { 21 | reader: zio::Reader>, 22 | } 23 | 24 | impl Decoder<'static, BufReader> { 25 | /// Creates a new decoder. 26 | pub fn new(reader: R) -> io::Result { 27 | let buffer_size = zstd_safe::DCtx::in_size(); 28 | 29 | Self::with_buffer(BufReader::with_capacity(buffer_size, reader)) 30 | } 31 | } 32 | 33 | impl Decoder<'static, R> { 34 | /// Creates a new decoder around a `BufRead`. 35 | pub fn with_buffer(reader: R) -> io::Result { 36 | Self::with_dictionary(reader, &[]) 37 | } 38 | /// Creates a new decoder, using an existing dictionary. 39 | /// 40 | /// The dictionary must be the same as the one used during compression. 41 | pub fn with_dictionary(reader: R, dictionary: &[u8]) -> io::Result { 42 | let decoder = raw::Decoder::with_dictionary(dictionary)?; 43 | let reader = zio::Reader::new(reader, decoder); 44 | 45 | Ok(Decoder { reader }) 46 | } 47 | } 48 | impl<'a, R: BufRead> Decoder<'a, R> { 49 | /// Creates a new decoder which employs the provided context for deserialization. 50 | pub fn with_context( 51 | reader: R, 52 | context: &'a mut zstd_safe::DCtx<'static>, 53 | ) -> Self { 54 | Self { 55 | reader: zio::Reader::new( 56 | reader, 57 | raw::Decoder::with_context(context), 58 | ), 59 | } 60 | } 61 | 62 | /// Sets this `Decoder` to stop after the first frame. 63 | /// 64 | /// By default, it keeps concatenating frames until EOF is reached. 65 | #[must_use] 66 | pub fn single_frame(mut self) -> Self { 67 | self.reader.set_single_frame(); 68 | self 69 | } 70 | 71 | /// Creates a new decoder, using an existing `DecoderDictionary`. 72 | /// 73 | /// The dictionary must be the same as the one used during compression. 74 | pub fn with_prepared_dictionary<'b>( 75 | reader: R, 76 | dictionary: &DecoderDictionary<'b>, 77 | ) -> io::Result 78 | where 79 | 'b: 'a, 80 | { 81 | let decoder = raw::Decoder::with_prepared_dictionary(dictionary)?; 82 | let reader = zio::Reader::new(reader, decoder); 83 | 84 | Ok(Decoder { reader }) 85 | } 86 | 87 | /// Creates a new decoder, using a ref prefix. 88 | /// 89 | /// The prefix must be the same as the one used during compression. 90 | pub fn with_ref_prefix<'b>( 91 | reader: R, 92 | ref_prefix: &'b [u8], 93 | ) -> io::Result 94 | where 95 | 'b: 'a, 96 | { 97 | let decoder = raw::Decoder::with_ref_prefix(ref_prefix)?; 98 | let reader = zio::Reader::new(reader, decoder); 99 | 100 | Ok(Decoder { reader }) 101 | } 102 | 103 | /// Recommendation for the size of the output buffer. 104 | pub fn recommended_output_size() -> usize { 105 | zstd_safe::DCtx::out_size() 106 | } 107 | 108 | /// Acquire a reference to the underlying reader. 109 | pub fn get_ref(&self) -> &R { 110 | self.reader.reader() 111 | } 112 | 113 | /// Acquire a mutable reference to the underlying reader. 114 | /// 115 | /// Note that mutation of the reader may result in surprising results if 116 | /// this decoder is continued to be used. 117 | pub fn get_mut(&mut self) -> &mut R { 118 | self.reader.reader_mut() 119 | } 120 | 121 | /// Return the inner `Read`. 122 | /// 123 | /// Calling `finish()` is not *required* after reading a stream - 124 | /// just use it if you need to get the `Read` back. 125 | pub fn finish(self) -> R { 126 | self.reader.into_inner() 127 | } 128 | 129 | crate::decoder_common!(reader); 130 | } 131 | 132 | impl Read for Decoder<'_, R> { 133 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 134 | self.reader.read(buf) 135 | } 136 | } 137 | 138 | impl Encoder<'static, BufReader> { 139 | /// Creates a new encoder. 140 | pub fn new(reader: R, level: i32) -> io::Result { 141 | let buffer_size = zstd_safe::CCtx::in_size(); 142 | 143 | Self::with_buffer(BufReader::with_capacity(buffer_size, reader), level) 144 | } 145 | } 146 | 147 | impl Encoder<'static, R> { 148 | /// Creates a new encoder around a `BufRead`. 149 | pub fn with_buffer(reader: R, level: i32) -> io::Result { 150 | Self::with_dictionary(reader, level, &[]) 151 | } 152 | 153 | /// Creates a new encoder, using an existing dictionary. 154 | /// 155 | /// The dictionary must be the same as the one used during compression. 156 | pub fn with_dictionary( 157 | reader: R, 158 | level: i32, 159 | dictionary: &[u8], 160 | ) -> io::Result { 161 | let encoder = raw::Encoder::with_dictionary(level, dictionary)?; 162 | let reader = zio::Reader::new(reader, encoder); 163 | 164 | Ok(Encoder { reader }) 165 | } 166 | } 167 | 168 | impl<'a, R: BufRead> Encoder<'a, R> { 169 | /// Creates a new encoder, using an existing `EncoderDictionary`. 170 | /// 171 | /// The dictionary must be the same as the one used during compression. 172 | pub fn with_prepared_dictionary<'b>( 173 | reader: R, 174 | dictionary: &EncoderDictionary<'b>, 175 | ) -> io::Result 176 | where 177 | 'b: 'a, 178 | { 179 | let encoder = raw::Encoder::with_prepared_dictionary(dictionary)?; 180 | let reader = zio::Reader::new(reader, encoder); 181 | 182 | Ok(Encoder { reader }) 183 | } 184 | 185 | /// Recommendation for the size of the output buffer. 186 | pub fn recommended_output_size() -> usize { 187 | zstd_safe::CCtx::out_size() 188 | } 189 | 190 | /// Acquire a reference to the underlying reader. 191 | pub fn get_ref(&self) -> &R { 192 | self.reader.reader() 193 | } 194 | 195 | /// Acquire a mutable reference to the underlying reader. 196 | /// 197 | /// Note that mutation of the reader may result in surprising results if 198 | /// this encoder is continued to be used. 199 | pub fn get_mut(&mut self) -> &mut R { 200 | self.reader.reader_mut() 201 | } 202 | 203 | /// Flush any internal buffer. 204 | /// 205 | /// This ensures all input consumed so far is compressed. 206 | /// 207 | /// Since it prevents bundling currently buffered data with future input, 208 | /// it may affect compression ratio. 209 | /// 210 | /// * Returns the number of bytes written to `out`. 211 | /// * Returns `Ok(0)` when everything has been flushed. 212 | pub fn flush(&mut self, out: &mut [u8]) -> io::Result { 213 | self.reader.flush(out) 214 | } 215 | 216 | /// Return the inner `Read`. 217 | /// 218 | /// Calling `finish()` is not *required* after reading a stream - 219 | /// just use it if you need to get the `Read` back. 220 | pub fn finish(self) -> R { 221 | self.reader.into_inner() 222 | } 223 | 224 | crate::encoder_common!(reader); 225 | } 226 | 227 | impl Read for Encoder<'_, R> { 228 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 229 | self.reader.read(buf) 230 | } 231 | } 232 | 233 | fn _assert_traits() { 234 | use std::io::Cursor; 235 | 236 | fn _assert_send(_: T) {} 237 | 238 | _assert_send(Decoder::new(Cursor::new(Vec::new()))); 239 | _assert_send(Encoder::new(Cursor::new(Vec::new()), 1)); 240 | } 241 | -------------------------------------------------------------------------------- /src/stream/read/tests.rs: -------------------------------------------------------------------------------- 1 | use crate::stream::read::{Decoder, Encoder}; 2 | use std::io::Read; 3 | 4 | #[test] 5 | fn test_error_handling() { 6 | let invalid_input = b"Abcdefghabcdefgh"; 7 | 8 | let mut decoder = Decoder::new(&invalid_input[..]).unwrap(); 9 | let output = decoder.read_to_end(&mut Vec::new()); 10 | 11 | assert_eq!(output.is_err(), true); 12 | } 13 | 14 | #[test] 15 | fn test_cycle() { 16 | let input = b"Abcdefghabcdefgh"; 17 | 18 | let mut encoder = Encoder::new(&input[..], 1).unwrap(); 19 | let mut buffer = Vec::new(); 20 | encoder.read_to_end(&mut buffer).unwrap(); 21 | 22 | let mut decoder = Decoder::new(&buffer[..]).unwrap(); 23 | let mut buffer = Vec::new(); 24 | decoder.read_to_end(&mut buffer).unwrap(); 25 | 26 | assert_eq!(input, &buffer[..]); 27 | } 28 | -------------------------------------------------------------------------------- /src/stream/tests.rs: -------------------------------------------------------------------------------- 1 | use super::{copy_encode, decode_all, encode_all}; 2 | use super::{Decoder, Encoder}; 3 | 4 | use partial_io::{PartialOp, PartialWrite}; 5 | 6 | use std::io; 7 | use std::iter; 8 | 9 | #[test] 10 | fn test_end_of_frame() { 11 | use std::io::{Read, Write}; 12 | 13 | let mut enc = Encoder::new(Vec::new(), 1).unwrap(); 14 | enc.write_all(b"foo").unwrap(); 15 | let mut compressed = enc.finish().unwrap(); 16 | 17 | // Add footer/whatever to underlying storage. 18 | compressed.push(0); 19 | 20 | // Drain zstd stream until end-of-frame. 21 | let mut dec = Decoder::new(&compressed[..]).unwrap().single_frame(); 22 | let mut buf = Vec::new(); 23 | dec.read_to_end(&mut buf).unwrap(); 24 | assert_eq!(&buf, b"foo", "Error decoding a single frame."); 25 | } 26 | 27 | #[test] 28 | fn test_concatenated_frames() { 29 | let mut buffer = Vec::new(); 30 | copy_encode(&b"foo"[..], &mut buffer, 1).unwrap(); 31 | copy_encode(&b"bar"[..], &mut buffer, 2).unwrap(); 32 | copy_encode(&b"baz"[..], &mut buffer, 3).unwrap(); 33 | 34 | assert_eq!( 35 | &decode_all(&buffer[..]).unwrap(), 36 | b"foobarbaz", 37 | "Error decoding concatenated frames." 38 | ); 39 | } 40 | 41 | #[test] 42 | fn test_flush() { 43 | use std::io::Write; 44 | 45 | let buf = Vec::new(); 46 | let mut z = Encoder::new(buf, 19).unwrap(); 47 | 48 | z.write_all(b"hello").unwrap(); 49 | 50 | z.flush().unwrap(); // Might corrupt stream 51 | let buf = z.finish().unwrap(); 52 | 53 | let s = decode_all(&buf[..]).unwrap(); 54 | assert_eq!(s, b"hello", "Error decoding after flush."); 55 | } 56 | 57 | #[test] 58 | fn test_try_finish() { 59 | use std::io::Write; 60 | let mut z = setup_try_finish(); 61 | 62 | z.get_mut().set_ops(iter::repeat(PartialOp::Unlimited)); 63 | 64 | // flush() should continue to work even though write() doesn't. 65 | z.flush().unwrap(); 66 | 67 | let buf = match z.try_finish() { 68 | Ok(buf) => buf.into_inner(), 69 | Err((_z, e)) => panic!("try_finish failed with {:?}", e), 70 | }; 71 | 72 | // Make sure the multiple try_finish calls didn't screw up the internal 73 | // buffer and continued to produce valid compressed data. 74 | assert_eq!(&decode_all(&buf[..]).unwrap(), b"hello", "Error decoding"); 75 | } 76 | 77 | #[test] 78 | #[should_panic] 79 | fn test_write_after_try_finish() { 80 | use std::io::Write; 81 | let mut z = setup_try_finish(); 82 | z.write_all(b"hello world").unwrap(); 83 | } 84 | 85 | fn setup_try_finish() -> Encoder<'static, PartialWrite>> { 86 | use std::io::Write; 87 | 88 | let buf = 89 | PartialWrite::new(Vec::new(), iter::repeat(PartialOp::Unlimited)); 90 | let mut z = Encoder::new(buf, 19).unwrap(); 91 | 92 | z.write_all(b"hello").unwrap(); 93 | 94 | z.get_mut() 95 | .set_ops(iter::repeat(PartialOp::Err(io::ErrorKind::WouldBlock))); 96 | 97 | let (z, err) = z.try_finish().unwrap_err(); 98 | assert_eq!( 99 | err.kind(), 100 | io::ErrorKind::WouldBlock, 101 | "expected WouldBlock error" 102 | ); 103 | 104 | z 105 | } 106 | 107 | #[test] 108 | fn test_failing_write() { 109 | use std::io::Write; 110 | 111 | let buf = PartialWrite::new( 112 | Vec::new(), 113 | iter::repeat(PartialOp::Err(io::ErrorKind::WouldBlock)), 114 | ); 115 | let mut z = Encoder::new(buf, 1).unwrap(); 116 | 117 | // Fill in enough data to make sure the buffer gets written out. 118 | let input = vec![b'b'; 128 * 1024]; 119 | // This should work even though the inner writer rejects writes. 120 | assert_eq!( 121 | z.write(&input).unwrap(), 122 | 128 * 1024, 123 | "did not write all input buffer" 124 | ); 125 | 126 | // The next write would fail (the buffer still has some data in it). 127 | assert_eq!( 128 | z.write(b"abc").unwrap_err().kind(), 129 | io::ErrorKind::WouldBlock, 130 | "expected WouldBlock error" 131 | ); 132 | 133 | z.get_mut().set_ops(iter::repeat(PartialOp::Unlimited)); 134 | 135 | // This shouldn't have led to any corruption. 136 | let buf = z.finish().unwrap().into_inner(); 137 | assert_eq!( 138 | &decode_all(&buf[..]).unwrap(), 139 | &input, 140 | "WouldBlock errors should not corrupt stream" 141 | ); 142 | } 143 | 144 | #[test] 145 | fn test_invalid_frame() { 146 | use std::io::Read; 147 | 148 | // I really hope this data is invalid... 149 | let data = &[1u8, 2u8, 3u8, 4u8, 5u8]; 150 | let mut dec = Decoder::new(&data[..]).unwrap(); 151 | assert_eq!( 152 | dec.read_to_end(&mut Vec::new()).err().map(|e| e.kind()), 153 | Some(io::ErrorKind::Other), 154 | "did not encounter expected 'invalid frame' error" 155 | ); 156 | } 157 | 158 | #[test] 159 | fn test_incomplete_frame() { 160 | use std::io::{Read, Write}; 161 | 162 | let mut enc = Encoder::new(Vec::new(), 1).unwrap(); 163 | enc.write_all(b"This is a regular string").unwrap(); 164 | let mut compressed = enc.finish().unwrap(); 165 | 166 | let half_size = compressed.len() - 2; 167 | compressed.truncate(half_size); 168 | 169 | let mut dec = Decoder::new(&compressed[..]).unwrap(); 170 | assert_eq!( 171 | dec.read_to_end(&mut Vec::new()).err().map(|e| e.kind()), 172 | Some(io::ErrorKind::UnexpectedEof), 173 | "did not encounter expected EOF error" 174 | ); 175 | } 176 | 177 | #[test] 178 | fn test_cli_compatibility() { 179 | let input = include_bytes!("../../assets/example.txt.zst"); 180 | 181 | let output = decode_all(&input[..]).unwrap(); 182 | 183 | let expected = include_bytes!("../../assets/example.txt"); 184 | 185 | assert_eq!( 186 | &output[..], 187 | &expected[..], 188 | "error decoding cli-compressed data" 189 | ); 190 | } 191 | 192 | #[cfg(feature = "legacy")] 193 | #[test] 194 | fn test_legacy() { 195 | use std::fs; 196 | use std::io::Read; 197 | 198 | // Read the content from that file 199 | let expected = include_bytes!("../../assets/example.txt"); 200 | 201 | for version in &[5, 6, 7, 8] { 202 | let filename = format!("assets/example.txt.v{}.zst", version); 203 | let file = fs::File::open(filename).unwrap(); 204 | let mut decoder = Decoder::new(file).unwrap(); 205 | 206 | let mut buffer = Vec::new(); 207 | decoder.read_to_end(&mut buffer).unwrap(); 208 | 209 | assert_eq!( 210 | &expected[..], 211 | &buffer[..], 212 | "error decompressing legacy version {}", 213 | version 214 | ); 215 | } 216 | } 217 | 218 | // Check that compressing+decompressing some data gives back the original 219 | fn test_full_cycle(input: &[u8], level: i32) { 220 | crate::test_cycle_unwrap( 221 | input, 222 | |data| encode_all(data, level), 223 | |data| decode_all(data), 224 | ); 225 | } 226 | 227 | #[test] 228 | fn test_empty() { 229 | // Test compressing empty data 230 | for level in 1..19 { 231 | test_full_cycle(b"", level); 232 | } 233 | } 234 | 235 | #[test] 236 | fn test_ll_source() { 237 | // Where could I find some long text?... 238 | let data = include_bytes!("../../zstd-safe/zstd-sys/src/bindings_zstd.rs"); 239 | // Test a few compression levels. 240 | // TODO: check them all? 241 | for level in 1..5 { 242 | // Test compressing actual data 243 | test_full_cycle(data, level); 244 | } 245 | } 246 | 247 | #[test] 248 | fn reader_to_writer() { 249 | use std::io::{Read, Write}; 250 | 251 | let clear = include_bytes!("../../assets/example.txt"); 252 | // Compress using reader 253 | let mut encoder = super::read::Encoder::new(&clear[..], 1).unwrap(); 254 | 255 | let mut compressed_buffer = Vec::new(); 256 | encoder.read_to_end(&mut compressed_buffer).unwrap(); 257 | 258 | // eprintln!("Compressed Buffer: {:?}", compressed_buffer); 259 | 260 | // Decompress using writer 261 | let mut decompressed_buffer = Vec::new(); 262 | let mut decoder = 263 | super::write::Decoder::new(&mut decompressed_buffer).unwrap(); 264 | decoder.write_all(&compressed_buffer[..]).unwrap(); 265 | decoder.flush().unwrap(); 266 | // eprintln!("{:?}", decompressed_buffer); 267 | 268 | assert_eq!(clear, &decompressed_buffer[..]); 269 | } 270 | 271 | #[test] 272 | fn test_finish_empty_encoder() { 273 | use std::io::Write; 274 | let mut enc = Encoder::new(Vec::new(), 0).unwrap(); 275 | enc.do_finish().unwrap(); 276 | enc.write_all(b"this should not work").unwrap_err(); 277 | enc.finish().unwrap(); 278 | } 279 | -------------------------------------------------------------------------------- /src/stream/write/mod.rs: -------------------------------------------------------------------------------- 1 | //! Implement push-based [`Write`] trait for both compressing and decompressing. 2 | use std::io::{self, Write}; 3 | 4 | use zstd_safe; 5 | 6 | use crate::dict::{DecoderDictionary, EncoderDictionary}; 7 | use crate::stream::{raw, zio}; 8 | 9 | #[cfg(test)] 10 | mod tests; 11 | 12 | /// An encoder that compress and forward data to another writer. 13 | /// 14 | /// This allows to compress a stream of data 15 | /// (good for files or heavy network stream). 16 | /// 17 | /// Don't forget to call [`finish()`] before dropping it! 18 | /// 19 | /// Alternatively, you can call [`auto_finish()`] to use an 20 | /// [`AutoFinishEncoder`] that will finish on drop. 21 | /// 22 | /// Note: The zstd library has its own internal input buffer (~128kb). 23 | /// 24 | /// [`finish()`]: #method.finish 25 | /// [`auto_finish()`]: #method.auto_finish 26 | /// [`AutoFinishEncoder`]: AutoFinishEncoder 27 | pub struct Encoder<'a, W: Write> { 28 | // output writer (compressed data) 29 | writer: zio::Writer>, 30 | } 31 | 32 | /// A decoder that decompress and forward data to another writer. 33 | /// 34 | /// Note that you probably want to `flush()` after writing your stream content. 35 | /// You can use [`auto_flush()`] to automatically flush the writer on drop. 36 | /// 37 | /// [`auto_flush()`]: Decoder::auto_flush 38 | pub struct Decoder<'a, W: Write> { 39 | // output writer (decompressed data) 40 | writer: zio::Writer>, 41 | } 42 | 43 | /// A wrapper around an `Encoder` that finishes the stream on drop. 44 | /// 45 | /// This can be created by the [`auto_finish()`] method on the [`Encoder`]. 46 | /// 47 | /// [`auto_finish()`]: Encoder::auto_finish 48 | /// [`Encoder`]: Encoder 49 | pub struct AutoFinishEncoder< 50 | 'a, 51 | W: Write, 52 | F: FnMut(io::Result) = Box)>, 53 | > { 54 | // We wrap this in an option to take it during drop. 55 | encoder: Option>, 56 | 57 | on_finish: Option, 58 | } 59 | 60 | /// A wrapper around a `Decoder` that flushes the stream on drop. 61 | /// 62 | /// This can be created by the [`auto_flush()`] method on the [`Decoder`]. 63 | /// 64 | /// [`auto_flush()`]: Decoder::auto_flush 65 | /// [`Decoder`]: Decoder 66 | pub struct AutoFlushDecoder< 67 | 'a, 68 | W: Write, 69 | F: FnMut(io::Result<()>) = Box)>, 70 | > { 71 | // We wrap this in an option to take it during drop. 72 | decoder: Option>, 73 | 74 | on_flush: Option, 75 | } 76 | 77 | impl<'a, W: Write, F: FnMut(io::Result<()>)> AutoFlushDecoder<'a, W, F> { 78 | fn new(decoder: Decoder<'a, W>, on_flush: F) -> Self { 79 | AutoFlushDecoder { 80 | decoder: Some(decoder), 81 | on_flush: Some(on_flush), 82 | } 83 | } 84 | 85 | /// Acquires a reference to the underlying writer. 86 | pub fn get_ref(&self) -> &W { 87 | self.decoder.as_ref().unwrap().get_ref() 88 | } 89 | 90 | /// Acquires a mutable reference to the underlying writer. 91 | /// 92 | /// Note that mutation of the writer may result in surprising results if 93 | /// this decoder is continued to be used. 94 | /// 95 | /// Mostly used for testing purposes. 96 | pub fn get_mut(&mut self) -> &mut W { 97 | self.decoder.as_mut().unwrap().get_mut() 98 | } 99 | } 100 | 101 | impl Drop for AutoFlushDecoder<'_, W, F> 102 | where 103 | W: Write, 104 | F: FnMut(io::Result<()>), 105 | { 106 | fn drop(&mut self) { 107 | let mut decoder = self.decoder.take().unwrap(); 108 | let result = decoder.flush(); 109 | if let Some(mut on_finish) = self.on_flush.take() { 110 | on_finish(result); 111 | } 112 | } 113 | } 114 | 115 | impl)> Write for AutoFlushDecoder<'_, W, F> { 116 | fn write(&mut self, buf: &[u8]) -> io::Result { 117 | self.decoder.as_mut().unwrap().write(buf) 118 | } 119 | 120 | fn flush(&mut self) -> io::Result<()> { 121 | self.decoder.as_mut().unwrap().flush() 122 | } 123 | } 124 | 125 | impl<'a, W: Write, F: FnMut(io::Result)> AutoFinishEncoder<'a, W, F> { 126 | fn new(encoder: Encoder<'a, W>, on_finish: F) -> Self { 127 | AutoFinishEncoder { 128 | encoder: Some(encoder), 129 | on_finish: Some(on_finish), 130 | } 131 | } 132 | 133 | /// Acquires a reference to the underlying writer. 134 | pub fn get_ref(&self) -> &W { 135 | self.encoder.as_ref().unwrap().get_ref() 136 | } 137 | 138 | /// Acquires a mutable reference to the underlying writer. 139 | /// 140 | /// Note that mutation of the writer may result in surprising results if 141 | /// this encoder is continued to be used. 142 | /// 143 | /// Mostly used for testing purposes. 144 | pub fn get_mut(&mut self) -> &mut W { 145 | self.encoder.as_mut().unwrap().get_mut() 146 | } 147 | } 148 | 149 | impl)> Drop for AutoFinishEncoder<'_, W, F> { 150 | fn drop(&mut self) { 151 | let result = self.encoder.take().unwrap().finish(); 152 | if let Some(mut on_finish) = self.on_finish.take() { 153 | on_finish(result); 154 | } 155 | } 156 | } 157 | 158 | impl)> Write for AutoFinishEncoder<'_, W, F> { 159 | fn write(&mut self, buf: &[u8]) -> io::Result { 160 | self.encoder.as_mut().unwrap().write(buf) 161 | } 162 | 163 | fn flush(&mut self) -> io::Result<()> { 164 | self.encoder.as_mut().unwrap().flush() 165 | } 166 | } 167 | 168 | impl Encoder<'static, W> { 169 | /// Creates a new encoder. 170 | /// 171 | /// `level`: compression level (1-22). 172 | /// 173 | /// A level of `0` uses zstd's default (currently `3`). 174 | pub fn new(writer: W, level: i32) -> io::Result { 175 | Self::with_dictionary(writer, level, &[]) 176 | } 177 | 178 | /// Creates a new encoder, using an existing dictionary. 179 | /// 180 | /// (Provides better compression ratio for small files, 181 | /// but requires the dictionary to be present during decompression.) 182 | /// 183 | /// A level of `0` uses zstd's default (currently `3`). 184 | pub fn with_dictionary( 185 | writer: W, 186 | level: i32, 187 | dictionary: &[u8], 188 | ) -> io::Result { 189 | let encoder = raw::Encoder::with_dictionary(level, dictionary)?; 190 | Ok(Self::with_encoder(writer, encoder)) 191 | } 192 | } 193 | 194 | impl<'a, W: Write> Encoder<'a, W> { 195 | /// Creates a new encoder from a prepared zio writer. 196 | pub fn with_writer(writer: zio::Writer>) -> Self { 197 | Self { writer } 198 | } 199 | 200 | /// Creates a new encoder from the given `Write` and raw encoder. 201 | pub fn with_encoder(writer: W, encoder: raw::Encoder<'a>) -> Self { 202 | let writer = zio::Writer::new(writer, encoder); 203 | Self::with_writer(writer) 204 | } 205 | 206 | /// Creates an encoder that uses the provided context to compress a stream. 207 | pub fn with_context( 208 | writer: W, 209 | context: &'a mut zstd_safe::CCtx<'static>, 210 | ) -> Self { 211 | let encoder = raw::Encoder::with_context(context); 212 | Self::with_encoder(writer, encoder) 213 | } 214 | 215 | /// Creates a new encoder, using an existing prepared `EncoderDictionary`. 216 | /// 217 | /// (Provides better compression ratio for small files, 218 | /// but requires the dictionary to be present during decompression.) 219 | pub fn with_prepared_dictionary<'b>( 220 | writer: W, 221 | dictionary: &EncoderDictionary<'b>, 222 | ) -> io::Result 223 | where 224 | 'b: 'a, 225 | { 226 | let encoder = raw::Encoder::with_prepared_dictionary(dictionary)?; 227 | Ok(Self::with_encoder(writer, encoder)) 228 | } 229 | 230 | /// Creates a new encoder, using a ref prefix 231 | pub fn with_ref_prefix<'b>( 232 | writer: W, 233 | level: i32, 234 | ref_prefix: &'b [u8], 235 | ) -> io::Result 236 | where 237 | 'b: 'a, 238 | { 239 | let encoder = raw::Encoder::with_ref_prefix(level, ref_prefix)?; 240 | Ok(Self::with_encoder(writer, encoder)) 241 | } 242 | 243 | /// Returns a wrapper around `self` that will finish the stream on drop. 244 | pub fn auto_finish(self) -> AutoFinishEncoder<'a, W> { 245 | AutoFinishEncoder { 246 | encoder: Some(self), 247 | on_finish: None, 248 | } 249 | } 250 | 251 | /// Returns an encoder that will finish the stream on drop. 252 | /// 253 | /// Calls the given callback with the result from `finish()`. This runs during drop so it's 254 | /// important that the provided callback doesn't panic. 255 | pub fn on_finish)>( 256 | self, 257 | f: F, 258 | ) -> AutoFinishEncoder<'a, W, F> { 259 | AutoFinishEncoder::new(self, f) 260 | } 261 | 262 | /// Acquires a reference to the underlying writer. 263 | pub fn get_ref(&self) -> &W { 264 | self.writer.writer() 265 | } 266 | 267 | /// Acquires a mutable reference to the underlying writer. 268 | /// 269 | /// Note that mutation of the writer may result in surprising results if 270 | /// this encoder is continued to be used. 271 | pub fn get_mut(&mut self) -> &mut W { 272 | self.writer.writer_mut() 273 | } 274 | 275 | /// **Required**: Finishes the stream. 276 | /// 277 | /// You *need* to finish the stream when you're done writing, either with 278 | /// this method or with [`try_finish(self)`](#method.try_finish). 279 | /// 280 | /// This returns the inner writer in case you need it. 281 | /// 282 | /// To get back `self` in case an error happened, use `try_finish`. 283 | /// 284 | /// **Note**: If you don't want (or can't) call `finish()` manually after 285 | /// writing your data, consider using `auto_finish()` to get an 286 | /// `AutoFinishEncoder`. 287 | pub fn finish(self) -> io::Result { 288 | self.try_finish().map_err(|(_, err)| err) 289 | } 290 | 291 | /// **Required**: Attempts to finish the stream. 292 | /// 293 | /// You *need* to finish the stream when you're done writing, either with 294 | /// this method or with [`finish(self)`](#method.finish). 295 | /// 296 | /// This returns the inner writer if the finish was successful, or the 297 | /// object plus an error if it wasn't. 298 | /// 299 | /// `write` on this object will panic after `try_finish` has been called, 300 | /// even if it fails. 301 | pub fn try_finish(mut self) -> Result { 302 | match self.writer.finish() { 303 | // Return the writer, because why not 304 | Ok(()) => Ok(self.writer.into_inner().0), 305 | Err(e) => Err((self, e)), 306 | } 307 | } 308 | 309 | /// Attempts to finish the stream. 310 | /// 311 | /// You *need* to finish the stream when you're done writing, either with 312 | /// this method or with [`finish(self)`](#method.finish). 313 | pub fn do_finish(&mut self) -> io::Result<()> { 314 | self.writer.finish() 315 | } 316 | 317 | /// Return a recommendation for the size of data to write at once. 318 | pub fn recommended_input_size() -> usize { 319 | zstd_safe::CCtx::in_size() 320 | } 321 | 322 | crate::encoder_common!(writer); 323 | } 324 | 325 | impl<'a, W: Write> Write for Encoder<'a, W> { 326 | fn write(&mut self, buf: &[u8]) -> io::Result { 327 | self.writer.write(buf) 328 | } 329 | 330 | fn flush(&mut self) -> io::Result<()> { 331 | self.writer.flush() 332 | } 333 | } 334 | 335 | impl Decoder<'static, W> { 336 | /// Creates a new decoder. 337 | pub fn new(writer: W) -> io::Result { 338 | Self::with_dictionary(writer, &[]) 339 | } 340 | 341 | /// Creates a new decoder, using an existing dictionary. 342 | /// 343 | /// (Provides better compression ratio for small files, 344 | /// but requires the dictionary to be present during decompression.) 345 | pub fn with_dictionary(writer: W, dictionary: &[u8]) -> io::Result { 346 | let decoder = raw::Decoder::with_dictionary(dictionary)?; 347 | Ok(Self::with_decoder(writer, decoder)) 348 | } 349 | } 350 | 351 | impl<'a, W: Write> Decoder<'a, W> { 352 | /// Creates a new decoder around the given prepared zio writer. 353 | /// 354 | /// # Examples 355 | /// 356 | /// ```rust 357 | /// fn wrap(writer: W) -> zstd::stream::write::Decoder<'static, W> { 358 | /// let decoder = zstd::stream::raw::Decoder::new().unwrap(); 359 | /// let writer = zstd::stream::zio::Writer::new(writer, decoder); 360 | /// zstd::stream::write::Decoder::with_writer(writer) 361 | /// } 362 | /// ``` 363 | pub fn with_writer(writer: zio::Writer>) -> Self { 364 | Decoder { writer } 365 | } 366 | 367 | /// Creates a new decoder around the given `Write` and raw decoder. 368 | pub fn with_decoder(writer: W, decoder: raw::Decoder<'a>) -> Self { 369 | let writer = zio::Writer::new(writer, decoder); 370 | Decoder { writer } 371 | } 372 | 373 | /// Creates a new decoder, using an existing prepared `DecoderDictionary`. 374 | /// 375 | /// (Provides better compression ratio for small files, 376 | /// but requires the dictionary to be present during decompression.) 377 | pub fn with_prepared_dictionary<'b>( 378 | writer: W, 379 | dictionary: &DecoderDictionary<'b>, 380 | ) -> io::Result 381 | where 382 | 'b: 'a, 383 | { 384 | let decoder = raw::Decoder::with_prepared_dictionary(dictionary)?; 385 | Ok(Self::with_decoder(writer, decoder)) 386 | } 387 | 388 | /// Acquires a reference to the underlying writer. 389 | pub fn get_ref(&self) -> &W { 390 | self.writer.writer() 391 | } 392 | 393 | /// Acquires a mutable reference to the underlying writer. 394 | /// 395 | /// Note that mutation of the writer may result in surprising results if 396 | /// this decoder is continued to be used. 397 | pub fn get_mut(&mut self) -> &mut W { 398 | self.writer.writer_mut() 399 | } 400 | 401 | /// Returns the inner `Write`. 402 | pub fn into_inner(self) -> W { 403 | self.writer.into_inner().0 404 | } 405 | 406 | /// Return a recommendation for the size of data to write at once. 407 | pub fn recommended_input_size() -> usize { 408 | zstd_safe::DCtx::in_size() 409 | } 410 | 411 | /// Returns a wrapper around `self` that will flush the stream on drop. 412 | pub fn auto_flush(self) -> AutoFlushDecoder<'a, W> { 413 | AutoFlushDecoder { 414 | decoder: Some(self), 415 | on_flush: None, 416 | } 417 | } 418 | 419 | /// Returns a decoder that will flush the stream on drop. 420 | /// 421 | /// Calls the given callback with the result from `flush()`. This runs during drop so it's 422 | /// important that the provided callback doesn't panic. 423 | pub fn on_flush)>( 424 | self, 425 | f: F, 426 | ) -> AutoFlushDecoder<'a, W, F> { 427 | AutoFlushDecoder::new(self, f) 428 | } 429 | 430 | crate::decoder_common!(writer); 431 | } 432 | 433 | impl Write for Decoder<'_, W> { 434 | fn write(&mut self, buf: &[u8]) -> io::Result { 435 | self.writer.write(buf) 436 | } 437 | 438 | fn flush(&mut self) -> io::Result<()> { 439 | self.writer.flush() 440 | } 441 | } 442 | 443 | fn _assert_traits() { 444 | fn _assert_send(_: T) {} 445 | 446 | _assert_send(Decoder::new(Vec::new())); 447 | _assert_send(Encoder::new(Vec::new(), 1)); 448 | _assert_send(Decoder::new(Vec::new()).unwrap().auto_flush()); 449 | _assert_send(Encoder::new(Vec::new(), 1).unwrap().auto_finish()); 450 | } 451 | -------------------------------------------------------------------------------- /src/stream/write/tests.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Cursor, Write}; 2 | use std::iter; 3 | 4 | use partial_io::{PartialOp, PartialWrite}; 5 | 6 | use crate::stream::decode_all; 7 | use crate::stream::write::{Decoder, Encoder}; 8 | 9 | #[test] 10 | fn test_cycle() { 11 | let input = b"Abcdefghabcdefgh"; 12 | 13 | let buffer = Cursor::new(Vec::new()); 14 | let mut encoder = Encoder::new(buffer, 1).unwrap(); 15 | encoder.write_all(input).unwrap(); 16 | let encoded = encoder.finish().unwrap().into_inner(); 17 | 18 | // println!("Encoded: {:?}", encoded); 19 | 20 | let buffer = Cursor::new(Vec::new()); 21 | let mut decoder = Decoder::new(buffer).unwrap(); 22 | decoder.write_all(&encoded).unwrap(); 23 | decoder.flush().unwrap(); 24 | let decoded = decoder.into_inner().into_inner(); 25 | 26 | assert_eq!(input, &decoded[..]); 27 | } 28 | 29 | /// Test that flush after a partial write works successfully without 30 | /// corrupting the frame. This test is in this module because it checks 31 | /// internal implementation details. 32 | #[test] 33 | fn test_partial_write_flush() { 34 | let input = vec![b'b'; 128 * 1024]; 35 | let mut z = setup_partial_write(&input); 36 | 37 | // flush shouldn't corrupt the stream 38 | z.flush().unwrap(); 39 | 40 | let buf = z.finish().unwrap().into_inner(); 41 | assert_eq!(&decode_all(&buf[..]).unwrap(), &input); 42 | } 43 | 44 | /// Test that finish after a partial write works successfully without 45 | /// corrupting the frame. This test is in this module because it checks 46 | /// internal implementation details. 47 | #[test] 48 | fn test_partial_write_finish() { 49 | let input = vec![b'b'; 128 * 1024]; 50 | let z = setup_partial_write(&input); 51 | 52 | // finish shouldn't corrupt the stream 53 | let buf = z.finish().unwrap().into_inner(); 54 | assert_eq!(&decode_all(&buf[..]).unwrap(), &input); 55 | } 56 | 57 | fn setup_partial_write(input_data: &[u8]) -> Encoder>> { 58 | let buf = 59 | PartialWrite::new(Vec::new(), iter::repeat(PartialOp::Limited(1))); 60 | let mut z = Encoder::new(buf, 1).unwrap(); 61 | 62 | // Fill in enough data to make sure the buffer gets written out. 63 | z.write(input_data).unwrap(); 64 | 65 | { 66 | let inner = &mut z.writer; 67 | // At this point, the internal buffer in z should have some data. 68 | assert_ne!(inner.offset(), inner.buffer().len()); 69 | } 70 | 71 | z 72 | } 73 | -------------------------------------------------------------------------------- /src/stream/zio/mod.rs: -------------------------------------------------------------------------------- 1 | //! Wrappers around raw operations implementing `std::io::{Read, Write}`. 2 | 3 | mod reader; 4 | mod writer; 5 | 6 | pub use self::reader::Reader; 7 | pub use self::writer::Writer; 8 | -------------------------------------------------------------------------------- /src/stream/zio/reader.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, BufRead, Read}; 2 | 3 | use crate::stream::raw::{InBuffer, Operation, OutBuffer}; 4 | 5 | // [ reader -> zstd ] -> output 6 | /// Implements the [`Read`] API around an [`Operation`]. 7 | /// 8 | /// This can be used to wrap a raw in-memory operation in a read-focused API. 9 | /// 10 | /// It can wrap either a compression or decompression operation, and pulls 11 | /// input data from a wrapped `Read`. 12 | pub struct Reader { 13 | reader: R, 14 | operation: D, 15 | 16 | state: State, 17 | 18 | single_frame: bool, 19 | finished_frame: bool, 20 | } 21 | 22 | enum State { 23 | // Still actively reading from the inner `Read` 24 | Reading, 25 | // We reached EOF from the inner `Read`, now flushing. 26 | PastEof, 27 | // We are fully done, nothing can be read. 28 | Finished, 29 | } 30 | 31 | impl Reader { 32 | /// Creates a new `Reader`. 33 | /// 34 | /// `reader` will be used to pull input data for the given operation. 35 | pub fn new(reader: R, operation: D) -> Self { 36 | Reader { 37 | reader, 38 | operation, 39 | state: State::Reading, 40 | single_frame: false, 41 | finished_frame: false, 42 | } 43 | } 44 | 45 | /// Sets `self` to stop after the first decoded frame. 46 | pub fn set_single_frame(&mut self) { 47 | self.single_frame = true; 48 | } 49 | 50 | /// Returns a mutable reference to the underlying operation. 51 | pub fn operation_mut(&mut self) -> &mut D { 52 | &mut self.operation 53 | } 54 | 55 | /// Returns a mutable reference to the underlying reader. 56 | pub fn reader_mut(&mut self) -> &mut R { 57 | &mut self.reader 58 | } 59 | 60 | /// Returns a reference to the underlying reader. 61 | pub fn reader(&self) -> &R { 62 | &self.reader 63 | } 64 | 65 | /// Returns the inner reader. 66 | pub fn into_inner(self) -> R { 67 | self.reader 68 | } 69 | 70 | /// Flush any internal buffer. 71 | /// 72 | /// For encoders, this ensures all input consumed so far is compressed. 73 | pub fn flush(&mut self, output: &mut [u8]) -> io::Result 74 | where 75 | D: Operation, 76 | { 77 | self.operation.flush(&mut OutBuffer::around(output)) 78 | } 79 | } 80 | // Read and retry on Interrupted errors. 81 | fn fill_buf(reader: &mut R) -> io::Result<&[u8]> 82 | where 83 | R: BufRead, 84 | { 85 | // This doesn't work right now because of the borrow-checker. 86 | // When it can be made to compile, it would allow Reader to automatically 87 | // retry on `Interrupted` error. 88 | /* 89 | loop { 90 | match reader.fill_buf() { 91 | Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} 92 | otherwise => return otherwise, 93 | } 94 | } 95 | */ 96 | 97 | // Workaround for now 98 | let res = reader.fill_buf()?; 99 | 100 | // eprintln!("Filled buffer: {:?}", res); 101 | 102 | Ok(res) 103 | } 104 | 105 | impl Read for Reader 106 | where 107 | R: BufRead, 108 | D: Operation, 109 | { 110 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 111 | // Keep trying until _something_ has been written. 112 | let mut first = true; 113 | loop { 114 | match self.state { 115 | State::Reading => { 116 | let (bytes_read, bytes_written) = { 117 | // Start with a fresh pool of un-processed data. 118 | // This is the only line that can return an interruption error. 119 | let input = if first { 120 | // eprintln!("First run, no input coming."); 121 | b"" 122 | } else { 123 | fill_buf(&mut self.reader)? 124 | }; 125 | 126 | // eprintln!("Input = {:?}", input); 127 | 128 | // It's possible we don't have any new data to read. 129 | // (In this case we may still have zstd's own buffer to clear.) 130 | if !first && input.is_empty() { 131 | self.state = State::PastEof; 132 | continue; 133 | } 134 | first = false; 135 | 136 | let mut src = InBuffer::around(input); 137 | let mut dst = OutBuffer::around(buf); 138 | 139 | // We don't want empty input (from first=true) to cause a frame 140 | // re-initialization. 141 | if self.finished_frame && !input.is_empty() { 142 | // eprintln!("!! Reigniting !!"); 143 | self.operation.reinit()?; 144 | self.finished_frame = false; 145 | } 146 | 147 | // Phase 1: feed input to the operation 148 | let hint = self.operation.run(&mut src, &mut dst)?; 149 | // eprintln!( 150 | // "Hint={} Just run our operation:\n In={:?}\n Out={:?}", 151 | // hint, src, dst 152 | // ); 153 | 154 | if hint == 0 { 155 | // In practice this only happens when decoding, when we just finished 156 | // reading a frame. 157 | self.finished_frame = true; 158 | if self.single_frame { 159 | self.state = State::Finished; 160 | } 161 | } 162 | 163 | // eprintln!("Output: {:?}", dst); 164 | 165 | (src.pos(), dst.pos()) 166 | }; 167 | 168 | self.reader.consume(bytes_read); 169 | 170 | if bytes_written > 0 { 171 | return Ok(bytes_written); 172 | } 173 | 174 | // We need more data! Try again! 175 | } 176 | State::PastEof => { 177 | let mut dst = OutBuffer::around(buf); 178 | 179 | // We already sent all the input we could get to zstd. Time to flush out the 180 | // buffer and be done with it. 181 | 182 | // Phase 2: flush out the operation's buffer 183 | // Keep calling `finish()` until the buffer is empty. 184 | let hint = self 185 | .operation 186 | .finish(&mut dst, self.finished_frame)?; 187 | // eprintln!("Hint: {} ; Output: {:?}", hint, dst); 188 | if hint == 0 { 189 | // This indicates that the footer is complete. 190 | // This is the only way to terminate the stream cleanly. 191 | self.state = State::Finished; 192 | } 193 | 194 | return Ok(dst.pos()); 195 | } 196 | State::Finished => { 197 | return Ok(0); 198 | } 199 | } 200 | } 201 | } 202 | } 203 | 204 | #[cfg(test)] 205 | mod tests { 206 | use super::Reader; 207 | use std::io::{Cursor, Read}; 208 | 209 | #[test] 210 | fn test_noop() { 211 | use crate::stream::raw::NoOp; 212 | 213 | let input = b"AbcdefghAbcdefgh."; 214 | 215 | // Test reader 216 | let mut output = Vec::new(); 217 | { 218 | let mut reader = Reader::new(Cursor::new(input), NoOp); 219 | reader.read_to_end(&mut output).unwrap(); 220 | } 221 | assert_eq!(&output, input); 222 | } 223 | 224 | #[test] 225 | fn test_compress() { 226 | use crate::stream::raw::Encoder; 227 | 228 | let input = b"AbcdefghAbcdefgh."; 229 | 230 | // Test reader 231 | let mut output = Vec::new(); 232 | { 233 | let mut reader = 234 | Reader::new(Cursor::new(input), Encoder::new(1).unwrap()); 235 | reader.read_to_end(&mut output).unwrap(); 236 | } 237 | // eprintln!("{:?}", output); 238 | let decoded = crate::decode_all(&output[..]).unwrap(); 239 | assert_eq!(&decoded, input); 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /src/stream/zio/writer.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Write}; 2 | 3 | use crate::stream::raw::{InBuffer, Operation, OutBuffer}; 4 | 5 | // input -> [ zstd -> buffer -> writer ] 6 | 7 | /// Implements the [`Write`] API around an [`Operation`]. 8 | /// 9 | /// This can be used to wrap a raw in-memory operation in a write-focused API. 10 | /// 11 | /// It can be used with either compression or decompression, and forwards the 12 | /// output to a wrapped `Write`. 13 | pub struct Writer { 14 | /// Either an encoder or a decoder. 15 | operation: D, 16 | 17 | /// Where we send the output of the operation. 18 | writer: W, 19 | 20 | /// Offset into the buffer 21 | /// 22 | /// Only things after this matter. Things before have already been sent to the writer. 23 | offset: usize, 24 | 25 | /// Output buffer 26 | /// 27 | /// Where the operation writes, before it gets flushed to the writer 28 | buffer: Vec, 29 | 30 | // When `true`, indicates that nothing should be added to the buffer. 31 | // All that's left if to empty the buffer. 32 | finished: bool, 33 | 34 | /// When `true`, the operation just finished a frame. 35 | /// 36 | /// Only happens when decompressing. 37 | /// The context needs to be re-initialized to process the next frame. 38 | finished_frame: bool, 39 | } 40 | 41 | impl Writer 42 | where 43 | W: Write, 44 | D: Operation, 45 | { 46 | /// Creates a new `Writer` with a fixed buffer capacity of 32KB 47 | /// 48 | /// All output from the given operation will be forwarded to `writer`. 49 | pub fn new(writer: W, operation: D) -> Self { 50 | // 32KB buffer? That's what flate2 uses 51 | Self::new_with_capacity(writer, operation, 32 * 1024) 52 | } 53 | 54 | /// Creates a new `Writer` with user defined capacity. 55 | /// 56 | /// All output from the given operation will be forwarded to `writer`. 57 | pub fn new_with_capacity( 58 | writer: W, 59 | operation: D, 60 | capacity: usize, 61 | ) -> Self { 62 | Self::with_output_buffer( 63 | Vec::with_capacity(capacity), 64 | writer, 65 | operation, 66 | ) 67 | } 68 | 69 | /// Creates a new `Writer` using the given output buffer. 70 | /// 71 | /// The output buffer _must_ have pre-allocated capacity (its capacity will not be changed after). 72 | /// 73 | /// Usually you would use `Vec::with_capacity(desired_buffer_size)`. 74 | pub fn with_output_buffer( 75 | output_buffer: Vec, 76 | writer: W, 77 | operation: D, 78 | ) -> Self { 79 | Writer { 80 | writer, 81 | operation, 82 | 83 | offset: 0, 84 | // 32KB buffer? That's what flate2 uses 85 | buffer: output_buffer, 86 | 87 | finished: false, 88 | finished_frame: false, 89 | } 90 | } 91 | 92 | /// Ends the stream. 93 | /// 94 | /// This *must* be called after all data has been written to finish the 95 | /// stream. 96 | /// 97 | /// If you forget to call this and just drop the `Writer`, you *will* have 98 | /// an incomplete output. 99 | /// 100 | /// Keep calling it until it returns `Ok(())`, then don't call it again. 101 | pub fn finish(&mut self) -> io::Result<()> { 102 | loop { 103 | // Keep trying until we're really done. 104 | self.write_from_offset()?; 105 | 106 | // At this point the buffer has been fully written out. 107 | 108 | if self.finished { 109 | return Ok(()); 110 | } 111 | 112 | // Let's fill this buffer again! 113 | 114 | let finished_frame = self.finished_frame; 115 | let hint = 116 | self.with_buffer(|dst, op| op.finish(dst, finished_frame)); 117 | self.offset = 0; 118 | // println!("Hint: {:?}\nOut:{:?}", hint, &self.buffer); 119 | 120 | // We return here if zstd had a problem. 121 | // Could happen with invalid data, ... 122 | let hint = hint?; 123 | 124 | if hint != 0 && self.buffer.is_empty() { 125 | // This happens if we are decoding an incomplete frame. 126 | return Err(io::Error::new( 127 | io::ErrorKind::UnexpectedEof, 128 | "incomplete frame", 129 | )); 130 | } 131 | 132 | // println!("Finishing {}, {}", bytes_written, hint); 133 | 134 | self.finished = hint == 0; 135 | } 136 | } 137 | 138 | /// Run the given closure on `self.buffer`. 139 | /// 140 | /// The buffer will be cleared, and made available wrapped in an `OutBuffer`. 141 | fn with_buffer(&mut self, f: F) -> T 142 | where 143 | F: FnOnce(&mut OutBuffer<'_, Vec>, &mut D) -> T, 144 | { 145 | self.buffer.clear(); 146 | let mut output = OutBuffer::around(&mut self.buffer); 147 | // eprintln!("Output: {:?}", output); 148 | f(&mut output, &mut self.operation) 149 | } 150 | 151 | /// Attempt to write `self.buffer` to the wrapped writer. 152 | /// 153 | /// Returns `Ok(())` once all the buffer has been written. 154 | fn write_from_offset(&mut self) -> io::Result<()> { 155 | // The code looks a lot like `write_all`, but keeps track of what has 156 | // been written in case we're interrupted. 157 | while self.offset < self.buffer.len() { 158 | match self.writer.write(&self.buffer[self.offset..]) { 159 | Ok(0) => { 160 | return Err(io::Error::new( 161 | io::ErrorKind::WriteZero, 162 | "writer will not accept any more data", 163 | )) 164 | } 165 | Ok(n) => self.offset += n, 166 | Err(ref e) if e.kind() == io::ErrorKind::Interrupted => (), 167 | Err(e) => return Err(e), 168 | } 169 | } 170 | Ok(()) 171 | } 172 | 173 | /// Return the wrapped `Writer` and `Operation`. 174 | /// 175 | /// Careful: if you call this before calling [`Writer::finish()`], the 176 | /// output may be incomplete. 177 | pub fn into_inner(self) -> (W, D) { 178 | (self.writer, self.operation) 179 | } 180 | 181 | /// Gives a reference to the inner writer. 182 | pub fn writer(&self) -> &W { 183 | &self.writer 184 | } 185 | 186 | /// Gives a mutable reference to the inner writer. 187 | pub fn writer_mut(&mut self) -> &mut W { 188 | &mut self.writer 189 | } 190 | 191 | /// Gives a reference to the inner operation. 192 | pub fn operation(&self) -> &D { 193 | &self.operation 194 | } 195 | 196 | /// Gives a mutable reference to the inner operation. 197 | pub fn operation_mut(&mut self) -> &mut D { 198 | &mut self.operation 199 | } 200 | 201 | /// Returns the offset in the current buffer. Only useful for debugging. 202 | #[cfg(test)] 203 | pub fn offset(&self) -> usize { 204 | self.offset 205 | } 206 | 207 | /// Returns the current buffer. Only useful for debugging. 208 | #[cfg(test)] 209 | pub fn buffer(&self) -> &[u8] { 210 | &self.buffer 211 | } 212 | } 213 | 214 | impl Write for Writer 215 | where 216 | W: Write, 217 | D: Operation, 218 | { 219 | fn write(&mut self, buf: &[u8]) -> io::Result { 220 | if self.finished { 221 | return Err(io::Error::new( 222 | io::ErrorKind::Other, 223 | "encoder is finished", 224 | )); 225 | } 226 | // Keep trying until _something_ has been consumed. 227 | // As soon as some input has been taken, we cannot afford 228 | // to take any chance: if an error occurs, the user couldn't know 229 | // that some data _was_ successfully written. 230 | loop { 231 | // First, write any pending data from `self.buffer`. 232 | self.write_from_offset()?; 233 | // At this point `self.buffer` can safely be discarded. 234 | 235 | // Support writing concatenated frames by re-initializing the 236 | // context. 237 | if self.finished_frame { 238 | self.operation.reinit()?; 239 | self.finished_frame = false; 240 | } 241 | 242 | let mut src = InBuffer::around(buf); 243 | let hint = self.with_buffer(|dst, op| op.run(&mut src, dst)); 244 | let bytes_read = src.pos; 245 | 246 | // eprintln!( 247 | // "Write Hint: {:?}\n src: {:?}\n dst: {:?}", 248 | // hint, src, self.buffer 249 | // ); 250 | 251 | self.offset = 0; 252 | let hint = hint?; 253 | 254 | if hint == 0 { 255 | self.finished_frame = true; 256 | } 257 | 258 | // As we said, as soon as we've consumed something, return. 259 | if bytes_read > 0 || buf.is_empty() { 260 | // println!("Returning {}", bytes_read); 261 | return Ok(bytes_read); 262 | } 263 | } 264 | } 265 | 266 | fn flush(&mut self) -> io::Result<()> { 267 | let mut finished = self.finished; 268 | loop { 269 | // If the output is blocked or has an error, return now. 270 | self.write_from_offset()?; 271 | 272 | if finished { 273 | break; 274 | } 275 | 276 | let hint = self.with_buffer(|dst, op| op.flush(dst)); 277 | 278 | self.offset = 0; 279 | let hint = hint?; 280 | 281 | finished = hint == 0; 282 | } 283 | 284 | self.writer.flush() 285 | } 286 | } 287 | 288 | #[cfg(test)] 289 | mod tests { 290 | use super::Writer; 291 | use std::io::Write; 292 | 293 | #[test] 294 | fn test_noop() { 295 | use crate::stream::raw::NoOp; 296 | 297 | let input = b"AbcdefghAbcdefgh."; 298 | 299 | // Test writer 300 | let mut output = Vec::new(); 301 | { 302 | let mut writer = Writer::new(&mut output, NoOp); 303 | writer.write_all(input).unwrap(); 304 | writer.finish().unwrap(); 305 | } 306 | assert_eq!(&output, input); 307 | } 308 | 309 | #[test] 310 | fn test_compress() { 311 | use crate::stream::raw::Encoder; 312 | 313 | let input = b"AbcdefghAbcdefgh."; 314 | 315 | // Test writer 316 | let mut output = Vec::new(); 317 | { 318 | let mut writer = 319 | Writer::new(&mut output, Encoder::new(1).unwrap()); 320 | writer.write_all(input).unwrap(); 321 | writer.finish().unwrap(); 322 | } 323 | // println!("Output: {:?}", output); 324 | let decoded = crate::decode_all(&output[..]).unwrap(); 325 | assert_eq!(&decoded, input); 326 | } 327 | 328 | #[test] 329 | fn test_compress_with_capacity() { 330 | use crate::stream::raw::Encoder; 331 | 332 | let input = b"AbcdefghAbcdefgh."; 333 | 334 | // Test writer 335 | let mut output = Vec::new(); 336 | { 337 | let mut writer = Writer::new_with_capacity( 338 | &mut output, 339 | Encoder::new(1).unwrap(), 340 | 64, 341 | ); 342 | assert_eq!(writer.buffer.capacity(), 64); 343 | writer.write_all(input).unwrap(); 344 | writer.finish().unwrap(); 345 | } 346 | let decoded = crate::decode_all(&output[..]).unwrap(); 347 | assert_eq!(&decoded, input); 348 | } 349 | 350 | #[test] 351 | fn test_decompress() { 352 | use crate::stream::raw::Decoder; 353 | 354 | let input = b"AbcdefghAbcdefgh."; 355 | let compressed = crate::encode_all(&input[..], 1).unwrap(); 356 | 357 | // Test writer 358 | let mut output = Vec::new(); 359 | { 360 | let mut writer = Writer::new(&mut output, Decoder::new().unwrap()); 361 | writer.write_all(&compressed).unwrap(); 362 | writer.finish().unwrap(); 363 | } 364 | // println!("Output: {:?}", output); 365 | assert_eq!(&output, input); 366 | } 367 | 368 | #[test] 369 | fn test_decompress_with_capacity() { 370 | use crate::stream::raw::Decoder; 371 | 372 | let input = b"AbcdefghAbcdefgh."; 373 | let compressed = crate::encode_all(&input[..], 1).unwrap(); 374 | 375 | // Test writer 376 | let mut output = Vec::new(); 377 | { 378 | let mut writer = Writer::new_with_capacity( 379 | &mut output, 380 | Decoder::new().unwrap(), 381 | 64, 382 | ); 383 | assert_eq!(writer.buffer.capacity(), 64); 384 | writer.write_all(&compressed).unwrap(); 385 | writer.finish().unwrap(); 386 | } 387 | assert_eq!(&output, input); 388 | } 389 | } 390 | -------------------------------------------------------------------------------- /tests/issue_182.rs: -------------------------------------------------------------------------------- 1 | const TEXT: &[u8] = include_bytes!("../assets/example.txt"); 2 | 3 | #[test] 4 | #[should_panic] 5 | fn test_issue_182() { 6 | use std::io::BufRead; 7 | 8 | let compressed = zstd::encode_all(TEXT, 3).unwrap(); 9 | let truncated = &compressed[..compressed.len() / 2]; 10 | 11 | let rdr = zstd::Decoder::new(truncated).unwrap(); 12 | let rdr = std::io::BufReader::new(rdr); 13 | for line in rdr.lines() { 14 | line.unwrap(); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /zstd-safe/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Alexandre Bury "] 3 | name = "zstd-safe" 4 | build = "build.rs" 5 | version = "7.2.4" 6 | description = "Safe low-level bindings for the zstd compression library." 7 | keywords = ["zstd", "zstandard", "compression"] 8 | categories = ["api-bindings", "compression"] 9 | repository = "https://github.com/gyscos/zstd-rs" 10 | license = "MIT OR Apache-2.0" 11 | readme = "Readme.md" 12 | edition = "2018" 13 | rust-version = "1.64" 14 | exclude = ["update_consts.sh"] 15 | 16 | [package.metadata.docs.rs] 17 | features = ["experimental", "arrays", "std", "zdict_builder", "doc-cfg"] 18 | 19 | [dependencies] 20 | zstd-sys = { path = "zstd-sys", version = "2.0.15", default-features = false } 21 | 22 | [features] 23 | default = ["legacy", "arrays", "zdict_builder"] 24 | 25 | bindgen = ["zstd-sys/bindgen"] 26 | debug = ["zstd-sys/debug"] 27 | experimental = ["zstd-sys/experimental"] 28 | legacy = ["zstd-sys/legacy"] 29 | pkg-config = ["zstd-sys/pkg-config"] 30 | std = ["zstd-sys/std"] # Implements WriteBuf for std types like Cursor and Vec. 31 | zstdmt = ["zstd-sys/zstdmt"] 32 | thin = ["zstd-sys/thin"] 33 | arrays = [] 34 | no_asm = ["zstd-sys/no_asm"] 35 | doc-cfg = [] 36 | zdict_builder = ["zstd-sys/zdict_builder"] 37 | seekable = ["zstd-sys/seekable"] 38 | 39 | # These two are for cross-language LTO. 40 | # Will only work if `clang` is used to build the C library. 41 | fat-lto = ["zstd-sys/fat-lto"] 42 | thin-lto = ["zstd-sys/thin-lto"] 43 | 44 | [lints.rust] 45 | non_upper_case_globals = "allow" 46 | -------------------------------------------------------------------------------- /zstd-safe/LICENSE: -------------------------------------------------------------------------------- 1 | MIT or Apache-2.0 2 | -------------------------------------------------------------------------------- /zstd-safe/LICENSE.Apache-2.0: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /zstd-safe/LICENSE.Mit: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Alexandre Bury 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /zstd-safe/Readme.md: -------------------------------------------------------------------------------- 1 | # zstd-safe 2 | 3 | This is a thin, no-std, safe abstraction built on top of the bindings from [zstd-sys]. 4 | 5 | It is close to a 1-for-1 mapping to the C functions, but uses rust types like slices instead of pointers and lengths. 6 | 7 | For a more comfortable higher-level library (with `Read`/`Write` implementations), see [zstd-rs]. 8 | 9 | [zstd-rs]: https://github.com/gyscos/zstd-rs/tree/main/zstd-safe/zstd-sys 10 | [zstd-rs]: https://github.com/gyscos/zstd-rs 11 | -------------------------------------------------------------------------------- /zstd-safe/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Force the `std` feature in some cases 3 | let target_arch = 4 | std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); 5 | let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); 6 | 7 | if target_arch == "wasm32" || target_os == "hermit" { 8 | println!("cargo:rustc-cfg=feature=\"std\""); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /zstd-safe/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | coverage 5 | -------------------------------------------------------------------------------- /zstd-safe/fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zstd-safe-fuzz" 3 | version = "0.0.0" 4 | publish = false 5 | edition = "2018" 6 | 7 | [package.metadata] 8 | cargo-fuzz = true 9 | 10 | [package.metadata.docs.rs] 11 | features = ["std"] 12 | 13 | [dependencies] 14 | libfuzzer-sys = "0.4" 15 | zstd-sys = { path = "../zstd-sys", version = "2.0.10", default-features = false } 16 | 17 | [features] 18 | std = ["zstd-sys/std"] 19 | 20 | [dependencies.zstd-safe] 21 | path = ".." 22 | 23 | [[bin]] 24 | name = "zstd_fuzzer" 25 | path = "fuzz_targets/zstd_fuzzer.rs" 26 | test = false 27 | doc = false 28 | bench = false 29 | -------------------------------------------------------------------------------- /zstd-safe/fuzz/fuzz_targets/zstd_fuzzer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #![no_main] 16 | 17 | extern crate zstd_safe; 18 | use libfuzzer_sys::fuzz_target; 19 | 20 | fuzz_target!(|data: &[u8]| { 21 | // Generate random sized buffer 22 | let buffer_size = std::cmp::min(data.len() * 2, 2048); 23 | let mut buffer = vec![0u8; buffer_size]; 24 | 25 | // Fuzz compression and decompression 26 | for level in 0..=20 { 27 | if let Ok(written) = zstd_safe::compress(&mut buffer[..], data, level) { 28 | let compressed = &buffer[..written]; 29 | let mut decompressed = vec![0u8; buffer_size]; 30 | let _ = zstd_safe::decompress(&mut decompressed[..], compressed).unwrap_or_else(|_| 0); 31 | } 32 | } 33 | 34 | // Fuzz compression and decompression with CCtx 35 | let mut cctx = zstd_safe::CCtx::default(); 36 | if let Ok(written) = cctx.compress(&mut buffer[..], data, 3) { 37 | let compressed = &buffer[..written]; 38 | let mut dctx = zstd_safe::DCtx::default(); 39 | let mut decompressed = vec![0u8; buffer_size]; 40 | let _ = dctx.decompress(&mut decompressed[..], compressed).unwrap_or_else(|_| 0); 41 | } 42 | 43 | // Fuzz compression and decompression on dict 44 | let dict = b"sample dictionary for zstd fuzzing"; 45 | let mut cctx_dict = zstd_safe::CCtx::default(); 46 | if let Ok(written) = cctx_dict.compress_using_dict(&mut buffer[..], data, dict, 3) { 47 | let compressed = &buffer[..written]; 48 | 49 | let mut dctx_dict = zstd_safe::DCtx::default(); 50 | let mut decompressed = vec![0u8; buffer_size]; 51 | let _ = dctx_dict.decompress_using_dict(&mut decompressed[..], compressed, dict).unwrap_or_else(|_| 0); 52 | } 53 | 54 | // Fuzz compression and decompression with streaming 55 | let mut cctx_stream = zstd_safe::CCtx::default(); 56 | let mut dctx_stream = zstd_safe::DCtx::default(); 57 | let mut in_buffer = zstd_safe::InBuffer::around(data); 58 | let mut out_buffer = zstd_safe::OutBuffer::around(&mut buffer[..]); 59 | 60 | if let Ok(_) = cctx_stream.compress_stream(&mut out_buffer, &mut in_buffer) { 61 | let mut decompressed_stream = vec![0u8; buffer_size]; 62 | let mut out_buffer_stream = zstd_safe::OutBuffer::around(&mut decompressed_stream[..]); 63 | let mut in_buffer_stream = zstd_safe::InBuffer::around(out_buffer.as_slice()); 64 | let _ = dctx_stream.decompress_stream(&mut out_buffer_stream, &mut in_buffer_stream).unwrap_or_else(|_| 0); 65 | } 66 | 67 | // Fuzz error handling and malformed input 68 | let mut cctx_param = zstd_safe::CCtx::default(); 69 | if let Ok(_) = cctx_param.set_parameter(zstd_safe::CParameter::ChecksumFlag(true)) { 70 | if let Ok(written) = cctx_param.compress2(&mut buffer[..], data) { 71 | let compressed = &buffer[..written]; 72 | let mut dctx_param = zstd_safe::DCtx::default(); 73 | let mut decompressed = vec![0u8; buffer_size]; 74 | let _ = dctx_param.decompress(&mut decompressed[..], compressed).unwrap_or_else(|_| 0); 75 | } 76 | } 77 | if let Ok(written) = zstd_safe::compress(&mut buffer[..], data, 3) { 78 | let compressed = &mut buffer[..written]; 79 | for i in (0..compressed.len()).step_by(5) { 80 | compressed[i] = compressed[i].wrapping_add(1); 81 | } 82 | 83 | let mut decompressed = vec![0u8; 2048]; 84 | let mut dctx = zstd_safe::DCtx::default(); 85 | let _ = dctx.decompress(&mut decompressed[..], compressed).unwrap_or_else(|_| 0); 86 | } 87 | }); 88 | -------------------------------------------------------------------------------- /zstd-safe/src/constants.rs: -------------------------------------------------------------------------------- 1 | // This file has been generated by ./update_consts.sh 2 | pub const BLOCKSIZELOG_MAX: u32 = zstd_sys::ZSTD_BLOCKSIZELOG_MAX; 3 | pub const BLOCKSIZE_MAX: u32 = zstd_sys::ZSTD_BLOCKSIZE_MAX; 4 | pub const CLEVEL_DEFAULT: CompressionLevel = zstd_sys::ZSTD_CLEVEL_DEFAULT as CompressionLevel; 5 | pub const CONTENTSIZE_ERROR: u64 = zstd_sys::ZSTD_CONTENTSIZE_ERROR as u64; 6 | pub const CONTENTSIZE_UNKNOWN: u64 = zstd_sys::ZSTD_CONTENTSIZE_UNKNOWN as u64; 7 | pub const MAGIC_DICTIONARY: u32 = zstd_sys::ZSTD_MAGIC_DICTIONARY; 8 | pub const MAGICNUMBER: u32 = zstd_sys::ZSTD_MAGICNUMBER; 9 | pub const MAGIC_SKIPPABLE_MASK: u32 = zstd_sys::ZSTD_MAGIC_SKIPPABLE_MASK; 10 | pub const MAGIC_SKIPPABLE_START: u32 = zstd_sys::ZSTD_MAGIC_SKIPPABLE_START; 11 | pub const VERSION_MAJOR: u32 = zstd_sys::ZSTD_VERSION_MAJOR; 12 | pub const VERSION_MINOR: u32 = zstd_sys::ZSTD_VERSION_MINOR; 13 | pub const VERSION_NUMBER: u32 = zstd_sys::ZSTD_VERSION_NUMBER; 14 | pub const VERSION_RELEASE: u32 = zstd_sys::ZSTD_VERSION_RELEASE; 15 | -------------------------------------------------------------------------------- /zstd-safe/src/constants_experimental.rs: -------------------------------------------------------------------------------- 1 | // This file has been generated by ./update_consts.sh 2 | pub const BLOCKSIZE_MAX_MIN: u32 = zstd_sys::ZSTD_BLOCKSIZE_MAX_MIN; 3 | pub const BLOCKSPLITTER_LEVEL_MAX: u32 = zstd_sys::ZSTD_BLOCKSPLITTER_LEVEL_MAX; 4 | pub const CHAINLOG_MAX_32: u32 = zstd_sys::ZSTD_CHAINLOG_MAX_32; 5 | pub const CHAINLOG_MAX_64: u32 = zstd_sys::ZSTD_CHAINLOG_MAX_64; 6 | pub const CHAINLOG_MIN: u32 = zstd_sys::ZSTD_CHAINLOG_MIN; 7 | pub const FRAMEHEADERSIZE_MAX: u32 = zstd_sys::ZSTD_FRAMEHEADERSIZE_MAX; 8 | pub const HASHLOG_MIN: u32 = zstd_sys::ZSTD_HASHLOG_MIN; 9 | pub const LDM_BUCKETSIZELOG_MAX: u32 = zstd_sys::ZSTD_LDM_BUCKETSIZELOG_MAX; 10 | pub const LDM_BUCKETSIZELOG_MIN: u32 = zstd_sys::ZSTD_LDM_BUCKETSIZELOG_MIN; 11 | pub const LDM_HASHLOG_MIN: u32 = zstd_sys::ZSTD_LDM_HASHLOG_MIN; 12 | pub const LDM_HASHRATELOG_MIN: u32 = zstd_sys::ZSTD_LDM_HASHRATELOG_MIN; 13 | pub const LDM_MINMATCH_MAX: u32 = zstd_sys::ZSTD_LDM_MINMATCH_MAX; 14 | pub const LDM_MINMATCH_MIN: u32 = zstd_sys::ZSTD_LDM_MINMATCH_MIN; 15 | pub const MINMATCH_MAX: u32 = zstd_sys::ZSTD_MINMATCH_MAX; 16 | pub const MINMATCH_MIN: u32 = zstd_sys::ZSTD_MINMATCH_MIN; 17 | pub const OVERLAPLOG_MAX: u32 = zstd_sys::ZSTD_OVERLAPLOG_MAX; 18 | pub const OVERLAPLOG_MIN: u32 = zstd_sys::ZSTD_OVERLAPLOG_MIN; 19 | pub const SEARCHLOG_MIN: u32 = zstd_sys::ZSTD_SEARCHLOG_MIN; 20 | pub const SKIPPABLEHEADERSIZE: u32 = zstd_sys::ZSTD_SKIPPABLEHEADERSIZE; 21 | pub const SRCSIZEHINT_MIN: u32 = zstd_sys::ZSTD_SRCSIZEHINT_MIN; 22 | pub const TARGETCBLOCKSIZE_MAX: u32 = zstd_sys::ZSTD_TARGETCBLOCKSIZE_MAX; 23 | pub const TARGETCBLOCKSIZE_MIN: u32 = zstd_sys::ZSTD_TARGETCBLOCKSIZE_MIN; 24 | pub const TARGETLENGTH_MAX: u32 = zstd_sys::ZSTD_TARGETLENGTH_MAX; 25 | pub const TARGETLENGTH_MIN: u32 = zstd_sys::ZSTD_TARGETLENGTH_MIN; 26 | pub const WINDOWLOG_LIMIT_DEFAULT: u32 = zstd_sys::ZSTD_WINDOWLOG_LIMIT_DEFAULT; 27 | pub const WINDOWLOG_MAX_32: u32 = zstd_sys::ZSTD_WINDOWLOG_MAX_32; 28 | pub const WINDOWLOG_MAX_64: u32 = zstd_sys::ZSTD_WINDOWLOG_MAX_64; 29 | pub const WINDOWLOG_MIN: u32 = zstd_sys::ZSTD_WINDOWLOG_MIN; 30 | -------------------------------------------------------------------------------- /zstd-safe/src/constants_seekable.rs: -------------------------------------------------------------------------------- 1 | // This file has been generated by ./update_consts.sh 2 | pub const SEEKABLE_FRAMEINDEX_TOOLARGE: u64 = zstd_sys::ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE as u64; 3 | pub const SEEKABLE_MAGICNUMBER: u32 = zstd_sys::ZSTD_SEEKABLE_MAGICNUMBER; 4 | pub const SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE: u32 = zstd_sys::ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE; 5 | pub const SEEKABLE_MAXFRAMES: u32 = zstd_sys::ZSTD_SEEKABLE_MAXFRAMES; 6 | pub const seekTableFooterSize: u32 = zstd_sys::ZSTD_seekTableFooterSize; 7 | -------------------------------------------------------------------------------- /zstd-safe/src/tests.rs: -------------------------------------------------------------------------------- 1 | extern crate std; 2 | use crate as zstd_safe; 3 | 4 | use self::std::vec::Vec; 5 | 6 | const INPUT: &[u8] = b"Rust is a multi-paradigm system programming language focused on safety, especially safe concurrency. Rust is syntactically similar to C++, but is designed to provide better memory safety while maintaining high performance."; 7 | const LONG_CONTENT: &str = include_str!("lib.rs"); 8 | 9 | #[cfg(feature = "std")] 10 | #[test] 11 | fn test_writebuf() { 12 | use zstd_safe::WriteBuf; 13 | 14 | let mut data = Vec::with_capacity(10); 15 | unsafe { 16 | data.write_from(|ptr, n| { 17 | assert!(n >= 4); 18 | let ptr = ptr as *mut u8; 19 | ptr.write(0); 20 | ptr.add(1).write(1); 21 | ptr.add(2).write(2); 22 | ptr.add(3).write(3); 23 | Ok(4) 24 | }) 25 | } 26 | .unwrap(); 27 | assert_eq!(data.as_slice(), &[0, 1, 2, 3]); 28 | 29 | let mut cursor = std::io::Cursor::new(&mut data); 30 | // Here we use a position larger than the actual data. 31 | // So expect the data to be zero-filled. 32 | cursor.set_position(6); 33 | unsafe { 34 | cursor.write_from(|ptr, n| { 35 | assert!(n >= 4); 36 | let ptr = ptr as *mut u8; 37 | ptr.write(4); 38 | ptr.add(1).write(5); 39 | ptr.add(2).write(6); 40 | ptr.add(3).write(7); 41 | Ok(4) 42 | }) 43 | } 44 | .unwrap(); 45 | 46 | assert_eq!(data.as_slice(), &[0, 1, 2, 3, 0, 0, 4, 5, 6, 7]); 47 | } 48 | 49 | #[cfg(feature = "std")] 50 | #[test] 51 | fn test_simple_cycle() { 52 | let mut buffer = std::vec![0u8; 256]; 53 | let written = zstd_safe::compress(&mut buffer, INPUT, 3).unwrap(); 54 | let compressed = &buffer[..written]; 55 | 56 | let mut buffer = std::vec![0u8; 256]; 57 | let written = zstd_safe::decompress(&mut buffer, compressed).unwrap(); 58 | let decompressed = &buffer[..written]; 59 | 60 | assert_eq!(INPUT, decompressed); 61 | } 62 | 63 | #[test] 64 | fn test_cctx_cycle() { 65 | let mut buffer = std::vec![0u8; 256]; 66 | let mut cctx = zstd_safe::CCtx::default(); 67 | let written = cctx.compress(&mut buffer[..], INPUT, 1).unwrap(); 68 | let compressed = &buffer[..written]; 69 | 70 | let mut dctx = zstd_safe::DCtx::default(); 71 | let mut buffer = std::vec![0u8; 256]; 72 | let written = dctx.decompress(&mut buffer[..], compressed).unwrap(); 73 | let decompressed = &buffer[..written]; 74 | 75 | assert_eq!(INPUT, decompressed); 76 | } 77 | 78 | #[test] 79 | fn test_dictionary() { 80 | // Prepare some content to train the dictionary. 81 | let bytes = LONG_CONTENT.as_bytes(); 82 | let line_sizes: Vec = 83 | LONG_CONTENT.lines().map(|line| line.len() + 1).collect(); 84 | 85 | // Train the dictionary 86 | let mut dict_buffer = std::vec![0u8; 100_000]; 87 | let written = 88 | zstd_safe::train_from_buffer(&mut dict_buffer[..], bytes, &line_sizes) 89 | .unwrap(); 90 | let dict_buffer = &dict_buffer[..written]; 91 | 92 | // Create pre-hashed dictionaries for (de)compression 93 | let cdict = zstd_safe::create_cdict(dict_buffer, 3); 94 | let ddict = zstd_safe::create_ddict(dict_buffer); 95 | 96 | // Compress data 97 | let mut cctx = zstd_safe::CCtx::default(); 98 | cctx.ref_cdict(&cdict).unwrap(); 99 | 100 | let mut buffer = std::vec![0u8; 1024 * 1024]; 101 | // First, try to compress without a dict 102 | let big_written = zstd_safe::compress(&mut buffer[..], bytes, 3).unwrap(); 103 | 104 | let written = cctx 105 | .compress2(&mut buffer[..], bytes) 106 | .map_err(zstd_safe::get_error_name) 107 | .unwrap(); 108 | 109 | assert!(big_written > written); 110 | let compressed = &buffer[..written]; 111 | 112 | // Decompress data 113 | let mut dctx = zstd_safe::DCtx::default(); 114 | dctx.ref_ddict(&ddict).unwrap(); 115 | 116 | let mut buffer = std::vec![0u8; 1024 * 1024]; 117 | let written = dctx 118 | .decompress(&mut buffer[..], compressed) 119 | .map_err(zstd_safe::get_error_name) 120 | .unwrap(); 121 | let decompressed = &buffer[..written]; 122 | 123 | // Profit! 124 | assert_eq!(bytes, decompressed); 125 | } 126 | 127 | #[test] 128 | fn test_checksum() { 129 | let mut buffer = std::vec![0u8; 256]; 130 | let mut cctx = zstd_safe::CCtx::default(); 131 | cctx.set_parameter(zstd_safe::CParameter::ChecksumFlag(true)) 132 | .unwrap(); 133 | let written = cctx.compress2(&mut buffer[..], INPUT).unwrap(); 134 | let compressed = &mut buffer[..written]; 135 | 136 | let mut dctx = zstd_safe::DCtx::default(); 137 | let mut buffer = std::vec![0u8; 1024*1024]; 138 | let written = dctx 139 | .decompress(&mut buffer[..], compressed) 140 | .map_err(zstd_safe::get_error_name) 141 | .unwrap(); 142 | let decompressed = &buffer[..written]; 143 | 144 | assert_eq!(INPUT, decompressed); 145 | 146 | // Now try again with some corruption 147 | // TODO: Find a mutation that _wouldn't_ be detected without checksums. 148 | // (Most naive changes already trigger a "corrupt block" error.) 149 | if let Some(last) = compressed.last_mut() { 150 | *last = last.saturating_sub(1); 151 | } 152 | let err = dctx 153 | .decompress(&mut buffer[..], compressed) 154 | .map_err(zstd_safe::get_error_name) 155 | .err() 156 | .unwrap(); 157 | // The error message will complain about the checksum. 158 | assert!(err.contains("checksum")); 159 | } 160 | 161 | #[cfg(all(feature = "experimental", feature = "std"))] 162 | #[test] 163 | fn test_upper_bound() { 164 | let mut buffer = std::vec![0u8; 256]; 165 | 166 | assert!(zstd_safe::decompress_bound(&buffer).is_err()); 167 | 168 | let written = zstd_safe::compress(&mut buffer, INPUT, 3).unwrap(); 169 | let compressed = &buffer[..written]; 170 | 171 | assert_eq!( 172 | zstd_safe::decompress_bound(&compressed), 173 | Ok(INPUT.len() as u64) 174 | ); 175 | } 176 | 177 | #[cfg(feature = "seekable")] 178 | #[test] 179 | fn test_seekable_cycle() { 180 | let seekable_archive = new_seekable_archive(INPUT); 181 | let mut seekable = crate::seekable::Seekable::create(); 182 | seekable 183 | .init_buff(&seekable_archive) 184 | .map_err(zstd_safe::get_error_name) 185 | .unwrap(); 186 | 187 | decompress_seekable(&mut seekable); 188 | 189 | // Check that the archive can also be decompressed by a regular function 190 | let mut buffer = std::vec![0u8; 256]; 191 | let written = zstd_safe::decompress(&mut buffer[..], &seekable_archive) 192 | .map_err(zstd_safe::get_error_name) 193 | .unwrap(); 194 | let decompressed = &buffer[..written]; 195 | assert_eq!(INPUT, decompressed); 196 | 197 | // Trigger FrameIndexTooLargeError 198 | let frame_index = seekable.num_frames() + 1; 199 | assert_eq!( 200 | seekable.frame_compressed_offset(frame_index).unwrap_err(), 201 | crate::seekable::FrameIndexTooLargeError 202 | ); 203 | } 204 | 205 | #[cfg(feature = "seekable")] 206 | #[test] 207 | fn test_seekable_seek_table() { 208 | use crate::seekable::{FrameIndexTooLargeError, SeekTable, Seekable}; 209 | 210 | let seekable_archive = new_seekable_archive(INPUT); 211 | let mut seekable = Seekable::create(); 212 | 213 | // Assert that creating a SeekTable from an uninitialized seekable errors. 214 | // This led to segfaults with zstd versions prior v1.5.7 215 | assert!(SeekTable::try_from_seekable(&seekable).is_err()); 216 | 217 | seekable 218 | .init_buff(&seekable_archive) 219 | .map_err(zstd_safe::get_error_name) 220 | .unwrap(); 221 | 222 | // Try to create a seek table from the seekable 223 | let seek_table = 224 | { SeekTable::try_from_seekable(&seekable).unwrap() }; 225 | 226 | // Seekable and seek table should return the same results 227 | assert_eq!(seekable.num_frames(), seek_table.num_frames()); 228 | assert_eq!( 229 | seekable.frame_compressed_offset(2).unwrap(), 230 | seek_table.frame_compressed_offset(2).unwrap() 231 | ); 232 | assert_eq!( 233 | seekable.frame_decompressed_offset(2).unwrap(), 234 | seek_table.frame_decompressed_offset(2).unwrap() 235 | ); 236 | assert_eq!( 237 | seekable.frame_compressed_size(2).unwrap(), 238 | seek_table.frame_compressed_size(2).unwrap() 239 | ); 240 | assert_eq!( 241 | seekable.frame_decompressed_size(2).unwrap(), 242 | seek_table.frame_decompressed_size(2).unwrap() 243 | ); 244 | 245 | // Trigger FrameIndexTooLargeError 246 | let frame_index = seekable.num_frames() + 1; 247 | assert_eq!( 248 | seek_table.frame_compressed_offset(frame_index).unwrap_err(), 249 | FrameIndexTooLargeError 250 | ); 251 | } 252 | 253 | #[cfg(all(feature = "std", feature = "seekable"))] 254 | #[test] 255 | fn test_seekable_advanced_cycle() { 256 | use crate::seekable::Seekable; 257 | use std::{boxed::Box, io::Cursor}; 258 | 259 | // Wrap the archive in a cursor that implements Read and Seek, 260 | // a file would also work 261 | let seekable_archive = Cursor::new(new_seekable_archive(INPUT)); 262 | let mut seekable = Seekable::create() 263 | .init_advanced(Box::new(seekable_archive)) 264 | .map_err(zstd_safe::get_error_name) 265 | .unwrap(); 266 | 267 | decompress_seekable(&mut seekable); 268 | } 269 | 270 | #[cfg(feature = "seekable")] 271 | fn new_seekable_archive(input: &[u8]) -> Vec { 272 | use crate::{seekable::SeekableCStream, InBuffer, OutBuffer}; 273 | 274 | // Make sure the buffer is big enough 275 | // The buffer needs to be bigger as the uncompressed data here as the seekable archive has 276 | // more meta data than actual compressed data because the input is really small and we use 277 | // a max_frame_size of 64, which is way to small for real-world usages. 278 | let mut buffer = std::vec![0u8; 512]; 279 | let mut cstream = SeekableCStream::create(); 280 | cstream 281 | .init(3, true, 64) 282 | .map_err(zstd_safe::get_error_name) 283 | .unwrap(); 284 | let mut in_buffer = InBuffer::around(input); 285 | let mut out_buffer = OutBuffer::around(&mut buffer[..]); 286 | 287 | // This could get stuck if the buffer is too small 288 | while in_buffer.pos() < in_buffer.src.len() { 289 | cstream 290 | .compress_stream(&mut out_buffer, &mut in_buffer) 291 | .map_err(zstd_safe::get_error_name) 292 | .unwrap(); 293 | } 294 | 295 | // Make sure everything is flushed to out_buffer 296 | loop { 297 | if cstream 298 | .end_stream(&mut out_buffer) 299 | .map_err(zstd_safe::get_error_name) 300 | .unwrap() 301 | == 0 302 | { 303 | break; 304 | } 305 | } 306 | 307 | Vec::from(out_buffer.as_slice()) 308 | } 309 | 310 | #[cfg(feature = "seekable")] 311 | fn decompress_seekable(seekable: &mut crate::seekable::Seekable<'_>) { 312 | // Make the buffer as big as max_frame_size so it can hold a complete frame 313 | let mut buffer = std::vec![0u8; 64]; 314 | // Decompress only the first frame 315 | let written = seekable 316 | .decompress(&mut buffer[..], 0) 317 | .map_err(zstd_safe::get_error_name) 318 | .unwrap(); 319 | let decompressed = &buffer[..written]; 320 | assert!(INPUT.starts_with(decompressed)); 321 | assert_eq!(decompressed.len(), 64); 322 | 323 | // Make the buffer big enough to hold the complete input 324 | let mut buffer = std::vec![0u8; 256]; 325 | // Decompress everything 326 | let written = seekable 327 | .decompress(&mut buffer[..], 0) 328 | .map_err(zstd_safe::get_error_name) 329 | .unwrap(); 330 | let decompressed = &buffer[..written]; 331 | assert_eq!(INPUT, decompressed); 332 | } 333 | -------------------------------------------------------------------------------- /zstd-safe/update_consts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | declare -A varTypes 3 | varTypes[CLEVEL_DEFAULT]=CompressionLevel 4 | varTypes[CONTENTSIZE_UNKNOWN]=u64 5 | varTypes[CONTENTSIZE_ERROR]=u64 6 | # This is originally (0ULL-2) and gets falsely translated to -2 by bindgen. 7 | # Casting it to u64 "fixes" this to the right number 8 | varTypes[SEEKABLE_FRAMEINDEX_TOOLARGE]=u64 9 | 10 | header() { 11 | echo "// This file has been generated by $0" 12 | } 13 | 14 | fetch_constants() { 15 | rg 'pub const ZSTD_' $1 | while read pub const var vartype eq value; do 16 | vname=${var/:} 17 | newname=${vname/ZSTD_} 18 | vt=${varTypes[$newname]} 19 | if [ -z "$vt" ] 20 | then 21 | echo "pub const ${newname}: $vartype = zstd_sys::${vname};" 22 | else 23 | echo "pub const ${newname}: $vt = zstd_sys::${vname} as $vt;" 24 | fi 25 | done | sort 26 | } 27 | 28 | constants=$(fetch_constants zstd-sys/src/bindings_zstd.rs) 29 | header > src/constants.rs 30 | echo "$constants" >> src/constants.rs 31 | 32 | ( 33 | header 34 | comm -23 <(fetch_constants zstd-sys/src/bindings_zstd_experimental.rs) <(echo "$constants") 35 | ) > src/constants_experimental.rs 36 | 37 | ( 38 | header 39 | comm -23 <(fetch_constants zstd-sys/src/bindings_zstd_seekable.rs) <(echo "$constants") 40 | ) > src/constants_seekable.rs 41 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Alexandre Bury "] 3 | build = "build.rs" 4 | categories = [ 5 | "api-bindings", 6 | "compression", 7 | ] 8 | description = "Low-level bindings for the zstd compression library." 9 | keywords = [ 10 | "zstd", 11 | "zstandard", 12 | "compression", 13 | ] 14 | license = "MIT/Apache-2.0" 15 | links = "zstd" 16 | name = "zstd-sys" 17 | readme = "Readme.md" 18 | repository = "https://github.com/gyscos/zstd-rs" 19 | version = "2.0.15+zstd.1.5.7" 20 | edition = "2018" 21 | rust-version = "1.64" 22 | 23 | # Use include instead of exclude, as a (temporary) 24 | # workaround for https://github.com/rust-lang/cargo/issues/9555 25 | include = [ 26 | "/LICENSE*", 27 | "!/*.sh", 28 | "/build.rs", 29 | "/*.h", 30 | "/src/", 31 | "/wasm-shim/**/*.h", 32 | "/zstd/LICENSE", 33 | "/zstd/COPYING", 34 | "/zstd/lib/**/*.c", 35 | "/zstd/lib/**/*.h", 36 | "/zstd/lib/**/*.S", 37 | "/zstd/contrib/seekable_format/*.c", 38 | "/zstd/contrib/seekable_format/*.h", 39 | ] 40 | # exclude = [ 41 | # "zstd", 42 | # "!zstd/LICENSE", 43 | # "!zstd/COPYING", 44 | # "!zstd/lib/**/**.h", 45 | # "!zstd/lib/**/**.c", 46 | # ] 47 | 48 | [package.metadata.docs.rs] 49 | features = ["experimental"] 50 | 51 | [lib] 52 | doctest = false # Documentation is for C code, good luck testing that. 53 | 54 | [build-dependencies.bindgen] 55 | optional = true 56 | version = "0.71" 57 | default-features = false 58 | features = ["runtime", "which-rustfmt"] 59 | 60 | [build-dependencies.pkg-config] 61 | version = "0.3.28" 62 | 63 | [build-dependencies.cc] 64 | version = "1.0.45" 65 | features = ["parallel"] 66 | 67 | [features] 68 | default = ["legacy", "zdict_builder", "bindgen"] 69 | 70 | debug = [] # Enable zstd debug logs 71 | experimental = [] # Expose experimental ZSTD API 72 | legacy = [] # Enable legacy ZSTD support (for versions < zstd-0.8) 73 | non-cargo = [] # Silence cargo-specific build flags 74 | pkg-config = [] # Use pkg-config to build the zstd C library. 75 | std = [] # Deprecated: we never use types from std. 76 | zstdmt = [] # Enable multi-thread support (with pthread) 77 | thin = [] # Optimize binary by size 78 | no_asm = [] # Disable ASM files (only on amd64 for decompression) 79 | zdict_builder = [] # Enable dictionary building (dictionary _using_ is always supported). 80 | no_wasm_shim = [] # Disable wasm shims (in case your wasm toolchain includes a C stdlib). 81 | seekable = [] # Enable support of the seekable format 82 | 83 | # These two are for cross-language LTO. 84 | # Will only work if `clang` is used to build the C library. 85 | fat-lto = [] # Enable fat-lto, will override thin-lto if specified 86 | thin-lto = [] # Enable thin-lto, will fallback to fat-lto if not supported 87 | 88 | [lints.rust] 89 | non_upper_case_globals = "allow" 90 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/LICENSE: -------------------------------------------------------------------------------- 1 | MIT or Apache-2.0 2 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/LICENSE.Apache-2.0: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/LICENSE.BSD-3-Clause: -------------------------------------------------------------------------------- 1 | The auto-generated bindings are under the 3-clause BSD license: 2 | 3 | BSD License 4 | 5 | For Zstandard software 6 | 7 | Copyright (c) 2016-present, Facebook, Inc. All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without modification, 10 | are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the name Facebook nor the names of its contributors may be used to 20 | endorse or promote products derived from this software without specific 21 | prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 24 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 27 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 30 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/LICENSE.Mit: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Alexandre Bury 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/Readme.md: -------------------------------------------------------------------------------- 1 | # zstd-sys 2 | 3 | This is the low-level auto-generated binding to the [zstd] library. 4 | You probably don't want to use this library directly; instead, look at [zstd-rs] or [zstd-safe]. 5 | 6 | # Compile it yourself 7 | 8 | `zstd` is included as a submodule. To get everything during your clone, use: 9 | 10 | ``` 11 | git clone https://github.com/gyscos/zstd-rs --recursive 12 | ``` 13 | 14 | Or, if you cloned it without the `--recursive` flag, 15 | call this from inside the repository: 16 | 17 | ``` 18 | git submodule update --init 19 | ``` 20 | 21 | Then, running `cargo build` in this directory should 22 | take care of building the C library and linking to it. 23 | 24 | # Build-time bindgen 25 | 26 | This library includes a pre-generated `bindings.rs` file. 27 | You can also generate new bindings at build-time, using the `bindgen` feature: 28 | 29 | ``` 30 | cargo build --features bindgen 31 | ``` 32 | 33 | [zstd]: https://github.com/facebook/zstd 34 | [zstd-rs]: https://github.com/gyscos/zstd-rs 35 | [zstd-safe]: https://github.com/gyscos/zstd-rs/tree/main/zstd-safe 36 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/build.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::OsStr; 2 | use std::path::{Path, PathBuf}; 3 | use std::{env, fmt, fs}; 4 | 5 | #[cfg(feature = "bindgen")] 6 | fn generate_bindings(defs: Vec<&str>, headerpaths: Vec) { 7 | use bindgen::RustTarget; 8 | 9 | let bindings = bindgen::Builder::default().header("zstd.h"); 10 | 11 | #[cfg(feature = "zdict_builder")] 12 | let bindings = bindings.header("zdict.h"); 13 | 14 | #[cfg(feature = "seekable")] 15 | let bindings = bindings.header("zstd_seekable.h"); 16 | 17 | let bindings = bindings 18 | .layout_tests(false) 19 | .blocklist_type("max_align_t") 20 | .size_t_is_usize(true) 21 | .rust_target( 22 | RustTarget::stable(64, 0) 23 | .ok() 24 | .expect("Could not get 1.64.0 version"), 25 | ) 26 | .use_core() 27 | .rustified_enum(".*") 28 | .clang_args( 29 | headerpaths 30 | .into_iter() 31 | .map(|path| format!("-I{}", path.display())), 32 | ) 33 | .clang_args(defs.into_iter().map(|def| format!("-D{}", def))); 34 | 35 | #[cfg(feature = "experimental")] 36 | let bindings = bindings 37 | .clang_arg("-DZSTD_STATIC_LINKING_ONLY") 38 | .clang_arg("-DZDICT_STATIC_LINKING_ONLY") 39 | .clang_arg("-DZSTD_RUST_BINDINGS_EXPERIMENTAL"); 40 | 41 | #[cfg(feature = "seekable")] 42 | let bindings = bindings.blocklist_function("ZSTD_seekable_initFile"); 43 | 44 | let bindings = bindings.generate().expect("Unable to generate bindings"); 45 | 46 | let out_path = PathBuf::from(env::var_os("OUT_DIR").unwrap()); 47 | bindings 48 | .write_to_file(out_path.join("bindings.rs")) 49 | .expect("Could not write bindings"); 50 | } 51 | 52 | #[cfg(not(feature = "bindgen"))] 53 | fn generate_bindings(_: Vec<&str>, _: Vec) {} 54 | 55 | fn pkg_config() -> (Vec<&'static str>, Vec) { 56 | let library = pkg_config::Config::new() 57 | .statik(true) 58 | .cargo_metadata(!cfg!(feature = "non-cargo")) 59 | .probe("libzstd") 60 | .expect("Can't probe for zstd in pkg-config"); 61 | (vec!["PKG_CONFIG"], library.include_paths) 62 | } 63 | 64 | #[cfg(not(feature = "legacy"))] 65 | fn set_legacy(_config: &mut cc::Build) {} 66 | 67 | #[cfg(feature = "legacy")] 68 | fn set_legacy(config: &mut cc::Build) { 69 | config.define("ZSTD_LEGACY_SUPPORT", Some("1")); 70 | config.include("zstd/lib/legacy"); 71 | } 72 | 73 | #[cfg(feature = "zstdmt")] 74 | fn set_pthread(config: &mut cc::Build) { 75 | config.flag("-pthread"); 76 | } 77 | 78 | #[cfg(not(feature = "zstdmt"))] 79 | fn set_pthread(_config: &mut cc::Build) {} 80 | 81 | #[cfg(feature = "zstdmt")] 82 | fn enable_threading(config: &mut cc::Build) { 83 | config.define("ZSTD_MULTITHREAD", Some("")); 84 | } 85 | 86 | #[cfg(not(feature = "zstdmt"))] 87 | fn enable_threading(_config: &mut cc::Build) {} 88 | 89 | /// This function would find the first flag in `flags` that is supported 90 | /// and add that to `config`. 91 | #[allow(dead_code)] 92 | fn flag_if_supported_with_fallbacks(config: &mut cc::Build, flags: &[&str]) { 93 | let option = flags 94 | .iter() 95 | .find(|flag| config.is_flag_supported(flag).unwrap_or_default()); 96 | 97 | if let Some(flag) = option { 98 | config.flag(flag); 99 | } 100 | } 101 | 102 | fn compile_zstd() { 103 | let mut config = cc::Build::new(); 104 | 105 | // Search the following directories for C files to add to the compilation. 106 | for dir in &[ 107 | "zstd/lib/common", 108 | "zstd/lib/compress", 109 | "zstd/lib/decompress", 110 | #[cfg(feature = "seekable")] 111 | "zstd/contrib/seekable_format", 112 | #[cfg(feature = "zdict_builder")] 113 | "zstd/lib/dictBuilder", 114 | #[cfg(feature = "legacy")] 115 | "zstd/lib/legacy", 116 | ] { 117 | let mut entries: Vec<_> = fs::read_dir(dir) 118 | .unwrap() 119 | .map(Result::unwrap) 120 | .filter_map(|entry| { 121 | let filename = entry.file_name(); 122 | 123 | if Path::new(&filename).extension() == Some(OsStr::new("c")) 124 | // Skip xxhash*.c files: since we are using the "PRIVATE API" 125 | // mode, it will be inlined in the headers. 126 | && !filename.to_string_lossy().contains("xxhash") 127 | { 128 | Some(entry.path()) 129 | } else { 130 | None 131 | } 132 | }) 133 | .collect(); 134 | entries.sort(); 135 | 136 | config.files(entries); 137 | } 138 | 139 | // Either include ASM files, or disable ASM entirely. 140 | // Also disable it on windows, apparently it doesn't do well with these .S files at the moment. 141 | if cfg!(feature = "no_asm") || std::env::var("CARGO_CFG_WINDOWS").is_ok() { 142 | config.define("ZSTD_DISABLE_ASM", Some("")); 143 | } else { 144 | config.file("zstd/lib/decompress/huf_decompress_amd64.S"); 145 | } 146 | 147 | // List out the WASM targets that need wasm-shim. 148 | // Note that Emscripten already provides its own C standard library so 149 | // wasm32-unknown-emscripten should not be included here. 150 | // See: https://github.com/gyscos/zstd-rs/pull/209 151 | let need_wasm_shim = !cfg!(feature = "no_wasm_shim") 152 | && env::var("TARGET").map_or(false, |target| { 153 | target == "wasm32-unknown-unknown" 154 | || target.starts_with("wasm32-wasi") 155 | }); 156 | 157 | if need_wasm_shim { 158 | cargo_print(&"rerun-if-changed=wasm-shim/stdlib.h"); 159 | cargo_print(&"rerun-if-changed=wasm-shim/string.h"); 160 | 161 | config.include("wasm-shim/"); 162 | } 163 | 164 | // Some extra parameters 165 | config.include("zstd/lib/"); 166 | config.include("zstd/lib/common"); 167 | config.warnings(false); 168 | 169 | config.define("ZSTD_LIB_DEPRECATED", Some("0")); 170 | 171 | config 172 | .flag_if_supported("-ffunction-sections") 173 | .flag_if_supported("-fdata-sections") 174 | .flag_if_supported("-fmerge-all-constants"); 175 | 176 | if cfg!(feature = "fat-lto") { 177 | config.flag_if_supported("-flto"); 178 | } else if cfg!(feature = "thin-lto") { 179 | flag_if_supported_with_fallbacks( 180 | &mut config, 181 | &["-flto=thin", "-flto"], 182 | ); 183 | } 184 | 185 | #[cfg(feature = "thin")] 186 | { 187 | // Here we try to build a lib as thin/small as possible. 188 | // We cannot use ZSTD_LIB_MINIFY since it is only 189 | // used in Makefile to define other options. 190 | 191 | config 192 | .define("HUF_FORCE_DECOMPRESS_X1", Some("1")) 193 | .define("ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT", Some("1")) 194 | .define("ZSTD_NO_INLINE", Some("1")) 195 | // removes the error messages that are 196 | // otherwise returned by ZSTD_getErrorName 197 | .define("ZSTD_STRIP_ERROR_STRINGS", Some("1")); 198 | 199 | // Disable use of BMI2 instructions since it involves runtime checking 200 | // of the feature and fallback if no BMI2 instruction is detected. 201 | config.define("DYNAMIC_BMI2", Some("0")); 202 | 203 | // Disable support for all legacy formats 204 | #[cfg(not(feature = "legacy"))] 205 | config.define("ZSTD_LEGACY_SUPPORT", Some("0")); 206 | 207 | config.opt_level_str("z"); 208 | } 209 | 210 | // Hide symbols from resulting library, 211 | // so we can be used with another zstd-linking lib. 212 | // See https://github.com/gyscos/zstd-rs/issues/58 213 | config.flag("-fvisibility=hidden"); 214 | config.define("XXH_PRIVATE_API", Some("")); 215 | config.define("ZSTDLIB_VISIBILITY", Some("")); 216 | #[cfg(feature = "zdict_builder")] 217 | config.define("ZDICTLIB_VISIBILITY", Some("")); 218 | config.define("ZSTDERRORLIB_VISIBILITY", Some("")); 219 | 220 | // https://github.com/facebook/zstd/blob/d69d08ed6c83563b57d98132e1e3f2487880781e/lib/common/debug.h#L60 221 | /* recommended values for DEBUGLEVEL : 222 | * 0 : release mode, no debug, all run-time checks disabled 223 | * 1 : enables assert() only, no display 224 | * 2 : reserved, for currently active debug path 225 | * 3 : events once per object lifetime (CCtx, CDict, etc.) 226 | * 4 : events once per frame 227 | * 5 : events once per block 228 | * 6 : events once per sequence (verbose) 229 | * 7+: events at every position (*very* verbose) 230 | */ 231 | #[cfg(feature = "debug")] 232 | if !need_wasm_shim { 233 | config.define("DEBUGLEVEL", Some("5")); 234 | } 235 | 236 | set_pthread(&mut config); 237 | set_legacy(&mut config); 238 | enable_threading(&mut config); 239 | 240 | // Compile! 241 | config.compile("libzstd.a"); 242 | 243 | let src = env::current_dir().unwrap().join("zstd").join("lib"); 244 | let dst = PathBuf::from(env::var_os("OUT_DIR").unwrap()); 245 | let include = dst.join("include"); 246 | fs::create_dir_all(&include).unwrap(); 247 | fs::copy(src.join("zstd.h"), include.join("zstd.h")).unwrap(); 248 | fs::copy(src.join("zstd_errors.h"), include.join("zstd_errors.h")) 249 | .unwrap(); 250 | #[cfg(feature = "zdict_builder")] 251 | fs::copy(src.join("zdict.h"), include.join("zdict.h")).unwrap(); 252 | cargo_print(&format_args!("root={}", dst.display())); 253 | } 254 | 255 | /// Print a line for cargo. 256 | /// 257 | /// If non-cargo is set, do not print anything. 258 | fn cargo_print(content: &dyn fmt::Display) { 259 | if cfg!(not(feature = "non-cargo")) { 260 | println!("cargo:{}", content); 261 | } 262 | } 263 | 264 | fn main() { 265 | cargo_print(&"rerun-if-env-changed=ZSTD_SYS_USE_PKG_CONFIG"); 266 | 267 | let target_arch = 268 | std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); 269 | let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); 270 | 271 | if target_arch == "wasm32" || target_os == "hermit" { 272 | cargo_print(&"rustc-cfg=feature=\"std\""); 273 | } 274 | 275 | // println!("cargo:rustc-link-lib=zstd"); 276 | let (defs, headerpaths) = if cfg!(feature = "pkg-config") 277 | || env::var_os("ZSTD_SYS_USE_PKG_CONFIG").is_some() 278 | { 279 | pkg_config() 280 | } else { 281 | if !Path::new("zstd/lib").exists() { 282 | panic!("Folder 'zstd/lib' does not exists. Maybe you forgot to clone the 'zstd' submodule?"); 283 | } 284 | 285 | let manifest_dir = PathBuf::from( 286 | env::var_os("CARGO_MANIFEST_DIR") 287 | .expect("Manifest dir is always set by cargo"), 288 | ); 289 | 290 | compile_zstd(); 291 | (vec![], vec![manifest_dir.join("zstd/lib")]) 292 | }; 293 | 294 | let includes: Vec<_> = headerpaths 295 | .iter() 296 | .map(|p| p.display().to_string()) 297 | .collect(); 298 | cargo_print(&format_args!("include={}", includes.join(";"))); 299 | 300 | generate_bindings(defs, headerpaths); 301 | } 302 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/examples/it_work.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryInto; 2 | 3 | #[no_mangle] 4 | pub extern "C" fn zstd_version() -> u32 { 5 | unsafe { zstd_sys::ZSTD_versionNumber() } 6 | } 7 | 8 | macro_rules! zstd_check { 9 | ( $ret:expr ) => {{ 10 | let ret = $ret; 11 | let error_code = unsafe { zstd_sys::ZSTD_isError(ret) }; 12 | assert_eq!(error_code, 0); 13 | }}; 14 | } 15 | 16 | #[no_mangle] 17 | pub extern "C" fn test_compress() -> bool { 18 | let fbuf = include_bytes!("../Cargo.toml"); 19 | 20 | let cbufsize = unsafe { zstd_sys::ZSTD_compressBound(fbuf.len()) }; 21 | let mut cbuf = vec![0; cbufsize]; 22 | 23 | let csize = unsafe { 24 | zstd_sys::ZSTD_compress( 25 | cbuf.as_mut_ptr().cast(), 26 | cbuf.len(), 27 | fbuf.as_ptr().cast(), 28 | fbuf.len(), 29 | 1, 30 | ) 31 | }; 32 | zstd_check!(csize); 33 | let cbuf = &cbuf[..csize]; 34 | 35 | let rsize = unsafe { 36 | zstd_sys::ZSTD_getFrameContentSize(cbuf.as_ptr().cast(), cbuf.len()) 37 | }; 38 | let rsize = rsize.try_into().unwrap(); 39 | let mut rbuf = vec![0; rsize]; 40 | 41 | let dsize = unsafe { 42 | zstd_sys::ZSTD_decompress( 43 | rbuf.as_mut_ptr().cast(), 44 | rbuf.len(), 45 | cbuf.as_ptr().cast(), 46 | cbuf.len(), 47 | ) 48 | }; 49 | zstd_check!(dsize); 50 | assert_eq!(dsize, rsize); 51 | 52 | &fbuf[..] == &rbuf[..] 53 | } 54 | 55 | fn main() {} 56 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/src/bindings_zdict.rs: -------------------------------------------------------------------------------- 1 | /* 2 | This file is auto-generated from the public API of the zstd library. 3 | It is released under the same BSD license. 4 | 5 | BSD License 6 | 7 | For Zstandard software 8 | 9 | Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without modification, 12 | are permitted provided that the following conditions are met: 13 | 14 | * Redistributions of source code must retain the above copyright notice, this 15 | list of conditions and the following disclaimer. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, 18 | this list of conditions and the following disclaimer in the documentation 19 | and/or other materials provided with the distribution. 20 | 21 | * Neither the name Facebook, nor Meta, nor the names of its contributors may 22 | be used to endorse or promote products derived from this software without 23 | specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 29 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 30 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 32 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 34 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | */ 36 | /* automatically generated by rust-bindgen 0.71.1 */ 37 | 38 | extern "C" { 39 | #[doc = " ZDICT_trainFromBuffer():\n Train a dictionary from an array of samples.\n Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,\n f=20, and accel=1.\n Samples must be stored concatenated in a single flat buffer `samplesBuffer`,\n supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.\n The resulting dictionary will be saved into `dictBuffer`.\n @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)\n or an error code, which can be tested with ZDICT_isError().\n Note: Dictionary training will fail if there are not enough samples to construct a\n dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).\n If dictionary training fails, you should use zstd without a dictionary, as the dictionary\n would've been ineffective anyways. If you believe your samples would benefit from a dictionary\n please open an issue with details, and we can look into it.\n Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.\n Tips: In general, a reasonable dictionary has a size of ~ 100 KB.\n It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.\n In general, it's recommended to provide a few thousands samples, though this can vary a lot.\n It's recommended that total size of all samples be about ~x100 times the target size of dictionary."] 40 | pub fn ZDICT_trainFromBuffer( 41 | dictBuffer: *mut ::core::ffi::c_void, 42 | dictBufferCapacity: usize, 43 | samplesBuffer: *const ::core::ffi::c_void, 44 | samplesSizes: *const usize, 45 | nbSamples: ::core::ffi::c_uint, 46 | ) -> usize; 47 | } 48 | #[repr(C)] 49 | #[derive(Debug, Copy, Clone)] 50 | pub struct ZDICT_params_t { 51 | #[doc = "< optimize for a specific zstd compression level; 0 means default"] 52 | pub compressionLevel: ::core::ffi::c_int, 53 | #[doc = "< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug;"] 54 | pub notificationLevel: ::core::ffi::c_uint, 55 | #[doc = "< force dictID value; 0 means auto mode (32-bits random value)\n NOTE: The zstd format reserves some dictionary IDs for future use.\n You may use them in private settings, but be warned that they\n may be used by zstd in a public dictionary registry in the future.\n These dictionary IDs are:\n - low range : <= 32767\n - high range : >= (2^31)"] 56 | pub dictID: ::core::ffi::c_uint, 57 | } 58 | extern "C" { 59 | #[doc = " ZDICT_finalizeDictionary():\n Given a custom content as a basis for dictionary, and a set of samples,\n finalize dictionary by adding headers and statistics according to the zstd\n dictionary format.\n\n Samples must be stored concatenated in a flat buffer `samplesBuffer`,\n supplied with an array of sizes `samplesSizes`, providing the size of each\n sample in order. The samples are used to construct the statistics, so they\n should be representative of what you will compress with this dictionary.\n\n The compression level can be set in `parameters`. You should pass the\n compression level you expect to use in production. The statistics for each\n compression level differ, so tuning the dictionary for the compression level\n can help quite a bit.\n\n You can set an explicit dictionary ID in `parameters`, or allow us to pick\n a random dictionary ID for you, but we can't guarantee no collisions.\n\n The dstDictBuffer and the dictContent may overlap, and the content will be\n appended to the end of the header. If the header + the content doesn't fit in\n maxDictSize the beginning of the content is truncated to make room, since it\n is presumed that the most profitable content is at the end of the dictionary,\n since that is the cheapest to reference.\n\n `maxDictSize` must be >= max(dictContentSize, ZDICT_DICTSIZE_MIN).\n\n @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),\n or an error code, which can be tested by ZDICT_isError().\n Note: ZDICT_finalizeDictionary() will push notifications into stderr if\n instructed to, using notificationLevel>0.\n NOTE: This function currently may fail in several edge cases including:\n * Not enough samples\n * Samples are uncompressible\n * Samples are all exactly the same"] 60 | pub fn ZDICT_finalizeDictionary( 61 | dstDictBuffer: *mut ::core::ffi::c_void, 62 | maxDictSize: usize, 63 | dictContent: *const ::core::ffi::c_void, 64 | dictContentSize: usize, 65 | samplesBuffer: *const ::core::ffi::c_void, 66 | samplesSizes: *const usize, 67 | nbSamples: ::core::ffi::c_uint, 68 | parameters: ZDICT_params_t, 69 | ) -> usize; 70 | } 71 | extern "C" { 72 | pub fn ZDICT_getDictID( 73 | dictBuffer: *const ::core::ffi::c_void, 74 | dictSize: usize, 75 | ) -> ::core::ffi::c_uint; 76 | } 77 | extern "C" { 78 | pub fn ZDICT_getDictHeaderSize( 79 | dictBuffer: *const ::core::ffi::c_void, 80 | dictSize: usize, 81 | ) -> usize; 82 | } 83 | extern "C" { 84 | pub fn ZDICT_isError(errorCode: usize) -> ::core::ffi::c_uint; 85 | } 86 | extern "C" { 87 | pub fn ZDICT_getErrorName(errorCode: usize) -> *const ::core::ffi::c_char; 88 | } 89 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/src/bindings_zstd_seekable.rs: -------------------------------------------------------------------------------- 1 | /* 2 | This file is auto-generated from the public API of the zstd library. 3 | It is released under the same BSD license. 4 | 5 | BSD License 6 | 7 | For Zstandard software 8 | 9 | Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without modification, 12 | are permitted provided that the following conditions are met: 13 | 14 | * Redistributions of source code must retain the above copyright notice, this 15 | list of conditions and the following disclaimer. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, 18 | this list of conditions and the following disclaimer in the documentation 19 | and/or other materials provided with the distribution. 20 | 21 | * Neither the name Facebook, nor Meta, nor the names of its contributors may 22 | be used to endorse or promote products derived from this software without 23 | specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 29 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 30 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 32 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 34 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | */ 36 | /* automatically generated by rust-bindgen 0.71.1 */ 37 | 38 | pub const ZSTD_seekTableFooterSize: u32 = 9; 39 | pub const ZSTD_SEEKABLE_MAGICNUMBER: u32 = 2408770225; 40 | pub const ZSTD_SEEKABLE_MAXFRAMES: u32 = 134217728; 41 | pub const ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE: u32 = 1073741824; 42 | pub const ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE: i32 = -2; 43 | #[repr(C)] 44 | #[derive(Debug, Copy, Clone)] 45 | pub struct ZSTD_seekable_CStream_s { 46 | _unused: [u8; 0], 47 | } 48 | pub type ZSTD_seekable_CStream = ZSTD_seekable_CStream_s; 49 | #[repr(C)] 50 | #[derive(Debug, Copy, Clone)] 51 | pub struct ZSTD_seekable_s { 52 | _unused: [u8; 0], 53 | } 54 | pub type ZSTD_seekable = ZSTD_seekable_s; 55 | #[repr(C)] 56 | #[derive(Debug, Copy, Clone)] 57 | pub struct ZSTD_seekTable_s { 58 | _unused: [u8; 0], 59 | } 60 | pub type ZSTD_seekTable = ZSTD_seekTable_s; 61 | extern "C" { 62 | pub fn ZSTD_seekable_createCStream() -> *mut ZSTD_seekable_CStream; 63 | } 64 | extern "C" { 65 | pub fn ZSTD_seekable_freeCStream(zcs: *mut ZSTD_seekable_CStream) 66 | -> usize; 67 | } 68 | extern "C" { 69 | pub fn ZSTD_seekable_initCStream( 70 | zcs: *mut ZSTD_seekable_CStream, 71 | compressionLevel: ::core::ffi::c_int, 72 | checksumFlag: ::core::ffi::c_int, 73 | maxFrameSize: ::core::ffi::c_uint, 74 | ) -> usize; 75 | } 76 | extern "C" { 77 | pub fn ZSTD_seekable_compressStream( 78 | zcs: *mut ZSTD_seekable_CStream, 79 | output: *mut ZSTD_outBuffer, 80 | input: *mut ZSTD_inBuffer, 81 | ) -> usize; 82 | } 83 | extern "C" { 84 | pub fn ZSTD_seekable_endFrame( 85 | zcs: *mut ZSTD_seekable_CStream, 86 | output: *mut ZSTD_outBuffer, 87 | ) -> usize; 88 | } 89 | extern "C" { 90 | pub fn ZSTD_seekable_endStream( 91 | zcs: *mut ZSTD_seekable_CStream, 92 | output: *mut ZSTD_outBuffer, 93 | ) -> usize; 94 | } 95 | #[repr(C)] 96 | #[derive(Debug, Copy, Clone)] 97 | pub struct ZSTD_frameLog_s { 98 | _unused: [u8; 0], 99 | } 100 | pub type ZSTD_frameLog = ZSTD_frameLog_s; 101 | extern "C" { 102 | pub fn ZSTD_seekable_createFrameLog( 103 | checksumFlag: ::core::ffi::c_int, 104 | ) -> *mut ZSTD_frameLog; 105 | } 106 | extern "C" { 107 | pub fn ZSTD_seekable_freeFrameLog(fl: *mut ZSTD_frameLog) -> usize; 108 | } 109 | extern "C" { 110 | pub fn ZSTD_seekable_logFrame( 111 | fl: *mut ZSTD_frameLog, 112 | compressedSize: ::core::ffi::c_uint, 113 | decompressedSize: ::core::ffi::c_uint, 114 | checksum: ::core::ffi::c_uint, 115 | ) -> usize; 116 | } 117 | extern "C" { 118 | pub fn ZSTD_seekable_writeSeekTable( 119 | fl: *mut ZSTD_frameLog, 120 | output: *mut ZSTD_outBuffer, 121 | ) -> usize; 122 | } 123 | extern "C" { 124 | pub fn ZSTD_seekable_create() -> *mut ZSTD_seekable; 125 | } 126 | extern "C" { 127 | pub fn ZSTD_seekable_free(zs: *mut ZSTD_seekable) -> usize; 128 | } 129 | extern "C" { 130 | pub fn ZSTD_seekable_initBuff( 131 | zs: *mut ZSTD_seekable, 132 | src: *const ::core::ffi::c_void, 133 | srcSize: usize, 134 | ) -> usize; 135 | } 136 | extern "C" { 137 | pub fn ZSTD_seekable_decompress( 138 | zs: *mut ZSTD_seekable, 139 | dst: *mut ::core::ffi::c_void, 140 | dstSize: usize, 141 | offset: ::core::ffi::c_ulonglong, 142 | ) -> usize; 143 | } 144 | extern "C" { 145 | pub fn ZSTD_seekable_decompressFrame( 146 | zs: *mut ZSTD_seekable, 147 | dst: *mut ::core::ffi::c_void, 148 | dstSize: usize, 149 | frameIndex: ::core::ffi::c_uint, 150 | ) -> usize; 151 | } 152 | extern "C" { 153 | pub fn ZSTD_seekable_getNumFrames( 154 | zs: *const ZSTD_seekable, 155 | ) -> ::core::ffi::c_uint; 156 | } 157 | extern "C" { 158 | pub fn ZSTD_seekable_getFrameCompressedOffset( 159 | zs: *const ZSTD_seekable, 160 | frameIndex: ::core::ffi::c_uint, 161 | ) -> ::core::ffi::c_ulonglong; 162 | } 163 | extern "C" { 164 | pub fn ZSTD_seekable_getFrameDecompressedOffset( 165 | zs: *const ZSTD_seekable, 166 | frameIndex: ::core::ffi::c_uint, 167 | ) -> ::core::ffi::c_ulonglong; 168 | } 169 | extern "C" { 170 | pub fn ZSTD_seekable_getFrameCompressedSize( 171 | zs: *const ZSTD_seekable, 172 | frameIndex: ::core::ffi::c_uint, 173 | ) -> usize; 174 | } 175 | extern "C" { 176 | pub fn ZSTD_seekable_getFrameDecompressedSize( 177 | zs: *const ZSTD_seekable, 178 | frameIndex: ::core::ffi::c_uint, 179 | ) -> usize; 180 | } 181 | extern "C" { 182 | pub fn ZSTD_seekable_offsetToFrameIndex( 183 | zs: *const ZSTD_seekable, 184 | offset: ::core::ffi::c_ulonglong, 185 | ) -> ::core::ffi::c_uint; 186 | } 187 | extern "C" { 188 | pub fn ZSTD_seekTable_create_fromSeekable( 189 | zs: *const ZSTD_seekable, 190 | ) -> *mut ZSTD_seekTable; 191 | } 192 | extern "C" { 193 | pub fn ZSTD_seekTable_free(st: *mut ZSTD_seekTable) -> usize; 194 | } 195 | extern "C" { 196 | pub fn ZSTD_seekTable_getNumFrames( 197 | st: *const ZSTD_seekTable, 198 | ) -> ::core::ffi::c_uint; 199 | } 200 | extern "C" { 201 | pub fn ZSTD_seekTable_getFrameCompressedOffset( 202 | st: *const ZSTD_seekTable, 203 | frameIndex: ::core::ffi::c_uint, 204 | ) -> ::core::ffi::c_ulonglong; 205 | } 206 | extern "C" { 207 | pub fn ZSTD_seekTable_getFrameDecompressedOffset( 208 | st: *const ZSTD_seekTable, 209 | frameIndex: ::core::ffi::c_uint, 210 | ) -> ::core::ffi::c_ulonglong; 211 | } 212 | extern "C" { 213 | pub fn ZSTD_seekTable_getFrameCompressedSize( 214 | st: *const ZSTD_seekTable, 215 | frameIndex: ::core::ffi::c_uint, 216 | ) -> usize; 217 | } 218 | extern "C" { 219 | pub fn ZSTD_seekTable_getFrameDecompressedSize( 220 | st: *const ZSTD_seekTable, 221 | frameIndex: ::core::ffi::c_uint, 222 | ) -> usize; 223 | } 224 | extern "C" { 225 | pub fn ZSTD_seekTable_offsetToFrameIndex( 226 | st: *const ZSTD_seekTable, 227 | offset: ::core::ffi::c_ulonglong, 228 | ) -> ::core::ffi::c_uint; 229 | } 230 | pub type ZSTD_seekable_read = ::core::option::Option< 231 | unsafe extern "C" fn( 232 | opaque: *mut ::core::ffi::c_void, 233 | buffer: *mut ::core::ffi::c_void, 234 | n: usize, 235 | ) -> ::core::ffi::c_int, 236 | >; 237 | pub type ZSTD_seekable_seek = ::core::option::Option< 238 | unsafe extern "C" fn( 239 | opaque: *mut ::core::ffi::c_void, 240 | offset: ::core::ffi::c_longlong, 241 | origin: ::core::ffi::c_int, 242 | ) -> ::core::ffi::c_int, 243 | >; 244 | #[repr(C)] 245 | #[derive(Debug, Copy, Clone)] 246 | pub struct ZSTD_seekable_customFile { 247 | pub opaque: *mut ::core::ffi::c_void, 248 | pub read: ZSTD_seekable_read, 249 | pub seek: ZSTD_seekable_seek, 250 | } 251 | extern "C" { 252 | pub fn ZSTD_seekable_initAdvanced( 253 | zs: *mut ZSTD_seekable, 254 | src: ZSTD_seekable_customFile, 255 | ) -> usize; 256 | } 257 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![no_std] 5 | //! Low-level bindings to the [zstd] library. 6 | //! 7 | //! [zstd]: https://facebook.github.io/zstd/ 8 | 9 | #[cfg(target_arch = "wasm32")] 10 | extern crate alloc; 11 | 12 | #[cfg(target_arch = "wasm32")] 13 | mod wasm_shim; 14 | 15 | // If running bindgen, we'll end up with the correct bindings anyway. 16 | #[cfg(feature = "bindgen")] 17 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 18 | 19 | // The bindings used depend on a few feature flags. 20 | #[cfg(all(not(feature = "experimental"), not(feature = "bindgen")))] 21 | include!("bindings_zstd.rs"); 22 | 23 | #[cfg(all( 24 | not(feature = "experimental"), 25 | feature = "zdict_builder", 26 | not(feature = "bindgen") 27 | ))] 28 | include!("bindings_zdict.rs"); 29 | 30 | #[cfg(all(feature = "experimental", not(feature = "bindgen")))] 31 | include!("bindings_zstd_experimental.rs"); 32 | 33 | #[cfg(all( 34 | feature = "experimental", 35 | feature = "zdict_builder", 36 | not(feature = "bindgen") 37 | ))] 38 | include!("bindings_zdict_experimental.rs"); 39 | 40 | #[cfg(all(feature = "seekable", not(feature = "bindgen")))] 41 | include!("bindings_zstd_seekable.rs"); 42 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/src/wasm_shim.rs: -------------------------------------------------------------------------------- 1 | use alloc::alloc::{alloc, alloc_zeroed, dealloc, Layout}; 2 | use core::ffi::{c_int, c_void}; 3 | 4 | const USIZE_ALIGN: usize = core::mem::align_of::(); 5 | const USIZE_SIZE: usize = core::mem::size_of::(); 6 | 7 | #[no_mangle] 8 | pub extern "C" fn rust_zstd_wasm_shim_qsort( 9 | base: *mut c_void, 10 | n_items: usize, 11 | size: usize, 12 | compar: extern "C" fn(*const c_void, *const c_void) -> c_int, 13 | ) { 14 | unsafe { 15 | match size { 16 | 1 => qsort::<1>(base, n_items, compar), 17 | 2 => qsort::<2>(base, n_items, compar), 18 | 4 => qsort::<4>(base, n_items, compar), 19 | 8 => qsort::<8>(base, n_items, compar), 20 | 16 => qsort::<16>(base, n_items, compar), 21 | _ => panic!("Unsupported qsort item size"), 22 | } 23 | } 24 | } 25 | 26 | unsafe fn qsort( 27 | base: *mut c_void, 28 | n_items: usize, 29 | compar: extern "C" fn(*const c_void, *const c_void) -> c_int, 30 | ) { 31 | let base: &mut [[u8; N]] = 32 | core::slice::from_raw_parts_mut(base as *mut [u8; N], n_items); 33 | base.sort_unstable_by(|a, b| { 34 | match compar(a.as_ptr() as *const c_void, b.as_ptr() as *const c_void) 35 | { 36 | ..=-1 => core::cmp::Ordering::Less, 37 | 0 => core::cmp::Ordering::Equal, 38 | 1.. => core::cmp::Ordering::Greater, 39 | } 40 | }); 41 | } 42 | 43 | #[no_mangle] 44 | pub extern "C" fn rust_zstd_wasm_shim_malloc(size: usize) -> *mut c_void { 45 | wasm_shim_alloc::(size) 46 | } 47 | 48 | #[no_mangle] 49 | pub extern "C" fn rust_zstd_wasm_shim_memcmp( 50 | str1: *const c_void, 51 | str2: *const c_void, 52 | n: usize, 53 | ) -> i32 { 54 | // Safety: function contracts requires str1 and str2 at least `n`-long. 55 | unsafe { 56 | let str1: &[u8] = core::slice::from_raw_parts(str1 as *const u8, n); 57 | let str2: &[u8] = core::slice::from_raw_parts(str2 as *const u8, n); 58 | match str1.cmp(str2) { 59 | core::cmp::Ordering::Less => -1, 60 | core::cmp::Ordering::Equal => 0, 61 | core::cmp::Ordering::Greater => 1, 62 | } 63 | } 64 | } 65 | 66 | #[no_mangle] 67 | pub extern "C" fn rust_zstd_wasm_shim_calloc( 68 | nmemb: usize, 69 | size: usize, 70 | ) -> *mut c_void { 71 | // note: calloc expects the allocation to be zeroed 72 | wasm_shim_alloc::(nmemb * size) 73 | } 74 | 75 | #[inline] 76 | fn wasm_shim_alloc(size: usize) -> *mut c_void { 77 | // in order to recover the size upon free, we store the size below the allocation 78 | // special alignment is never requested via the malloc API, 79 | // so it's not stored, and usize-alignment is used 80 | // memory layout: [size] [allocation] 81 | 82 | let full_alloc_size = size + USIZE_SIZE; 83 | 84 | unsafe { 85 | let layout = 86 | Layout::from_size_align_unchecked(full_alloc_size, USIZE_ALIGN); 87 | 88 | let ptr = if ZEROED { 89 | alloc_zeroed(layout) 90 | } else { 91 | alloc(layout) 92 | }; 93 | 94 | // SAFETY: ptr is usize-aligned and we've allocated sufficient memory 95 | ptr.cast::().write(full_alloc_size); 96 | 97 | ptr.add(USIZE_SIZE).cast() 98 | } 99 | } 100 | 101 | #[no_mangle] 102 | pub unsafe extern "C" fn rust_zstd_wasm_shim_free(ptr: *mut c_void) { 103 | // the layout for the allocation needs to be recovered for dealloc 104 | // - the size must be recovered from directly below the allocation 105 | // - the alignment will always by USIZE_ALIGN 106 | 107 | let alloc_ptr = ptr.sub(USIZE_SIZE); 108 | // SAFETY: the allocation routines must uphold having a valid usize below the provided pointer 109 | let full_alloc_size = alloc_ptr.cast::().read(); 110 | 111 | let layout = 112 | Layout::from_size_align_unchecked(full_alloc_size, USIZE_ALIGN); 113 | dealloc(alloc_ptr.cast(), layout); 114 | } 115 | 116 | #[no_mangle] 117 | pub unsafe extern "C" fn rust_zstd_wasm_shim_memcpy( 118 | dest: *mut c_void, 119 | src: *const c_void, 120 | n: usize, 121 | ) -> *mut c_void { 122 | core::ptr::copy_nonoverlapping(src as *const u8, dest as *mut u8, n); 123 | dest 124 | } 125 | 126 | #[no_mangle] 127 | pub unsafe extern "C" fn rust_zstd_wasm_shim_memmove( 128 | dest: *mut c_void, 129 | src: *const c_void, 130 | n: usize, 131 | ) -> *mut c_void { 132 | core::ptr::copy(src as *const u8, dest as *mut u8, n); 133 | dest 134 | } 135 | 136 | #[no_mangle] 137 | pub unsafe extern "C" fn rust_zstd_wasm_shim_memset( 138 | dest: *mut c_void, 139 | c: c_int, 140 | n: usize, 141 | ) -> *mut c_void { 142 | core::ptr::write_bytes(dest as *mut u8, c as u8, n); 143 | dest 144 | } 145 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/test_it.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | for EXP in "experimental" ""; do 3 | for STD in "std" ""; do 4 | cargo test --features "$EXP $STD" 5 | done 6 | done 7 | 8 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/update_bindings.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | RUST_TARGET=1.64 4 | bindgen="bindgen --no-layout-tests --blocklist-type=max_align_t --rustified-enum=.* --use-core --rust-target $RUST_TARGET" 5 | experimental="-DZSTD_STATIC_LINKING_ONLY -DZDICT_STATIC_LINKING_ONLY -DZSTD_RUST_BINDINGS_EXPERIMENTAL" 6 | 7 | run_bindgen() 8 | { 9 | echo "/* 10 | This file is auto-generated from the public API of the zstd library. 11 | It is released under the same BSD license. 12 | 13 | $(cat zstd/LICENSE) 14 | */" 15 | 16 | $bindgen $@ 17 | } 18 | 19 | for EXPERIMENTAL_ARG in "$experimental" ""; do 20 | if [ -z "$EXPERIMENTAL_ARG" ]; then EXPERIMENTAL=""; else EXPERIMENTAL="_experimental"; fi 21 | 22 | SUFFIX=${EXPERIMENTAL} 23 | 24 | run_bindgen zstd.h \ 25 | --allowlist-type "ZSTD_.*" \ 26 | --allowlist-function "ZSTD_.*" \ 27 | --allowlist-var "ZSTD_.*" \ 28 | -- -Izstd/lib $EXPERIMENTAL_ARG > src/bindings_zstd${SUFFIX}.rs 29 | 30 | run_bindgen zdict.h \ 31 | --allowlist-type "ZDICT_.*" \ 32 | --allowlist-function "ZDICT_.*" \ 33 | --allowlist-var "ZDICT_.*" \ 34 | -- -Izstd/lib $EXPERIMENTAL_ARG > src/bindings_zdict${SUFFIX}.rs 35 | done 36 | 37 | # - ZSTD_seekable_initFile is blocked because it expects the c FILE type, rust files can directly be passed to init_advanced() 38 | run_bindgen zstd_seekable.h --allowlist-file ".*zstd_seekable.h$" --no-recursive-allowlist \ 39 | --blocklist-function ZSTD_seekable_initFile \ 40 | -- -Izstd/lib > src/bindings_zstd_seekable.rs 41 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/update_zstd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -o pipefail 4 | 5 | cd zstd 6 | CURRENT=$(git describe --tags) 7 | git fetch -q 8 | TAG=$(git tag -l | grep '^v1' | sort | tail -n 1) 9 | 10 | if [ $CURRENT != $TAG ] 11 | then 12 | git checkout $TAG 13 | cd .. 14 | git add zstd 15 | ./update_bindings.sh 16 | git add src/bindings*.rs 17 | cd .. 18 | ./update_consts.sh 19 | git add src/constants*.rs 20 | cd zstd-sys 21 | 22 | # Note: You'll need a forked version of cargo-bump that supports metadata 23 | # For instance https://github.com/gyscos/cargo-bump 24 | METADATA="zstd.${TAG/v/}" 25 | cargo bump patch --build $METADATA 26 | ZSTD_SYS_VERSION=$(cargo read-manifest | jq -r .version | cut -d+ -f1) 27 | git add Cargo.toml 28 | cd .. 29 | cargo add zstd-sys --path ./zstd-sys --vers "=${ZSTD_SYS_VERSION}" --no-default-features 30 | cargo bump patch --build $METADATA 31 | ZSTD_SAFE_VERSION=$(cargo read-manifest | jq -r .version | cut -d+ -f1) 32 | git add Cargo.toml 33 | cd .. 34 | cargo add zstd-safe --path ./zstd-safe --vers "=${ZSTD_SAFE_VERSION}" --no-default-features 35 | cargo bump patch --build $METADATA 36 | ZSTD_RS_VERSION=$(cargo read-manifest | jq -r .version | cut -d+ -f1) 37 | git add Cargo.toml 38 | 39 | cargo check 40 | 41 | git commit -m "Update zstd to $TAG" 42 | 43 | # Publish? 44 | read -p "Publish to crates.io? " -n 1 -r 45 | echo 46 | if [[ $REPLY =~ ^[Yy]$ ]] 47 | then 48 | cd zstd-safe/zstd-sys 49 | # Need to wait so that the index refreshes. 50 | cargo publish && sleep 5 51 | cd .. 52 | cargo publish && sleep 5 53 | cd .. 54 | cargo publish 55 | git tag $ZSTD_RS_VERSION 56 | else 57 | echo "Would have published $ZSTD_RS_VERSION" 58 | fi 59 | 60 | else 61 | echo "Already using zstd $TAG" 62 | fi 63 | 64 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/wasm-shim/assert.h: -------------------------------------------------------------------------------- 1 | #ifndef _ASSERT_H 2 | #define _ASSERT_H 3 | 4 | #define assert(expr) 5 | 6 | #endif // _ASSERT_H 7 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/wasm-shim/stdio.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _STDIO_H 4 | #define _STDIO_H 1 5 | 6 | #define fprintf(expr, ...) 7 | #define fflush(expr) 8 | 9 | #endif // _STDIO_H 10 | 11 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/wasm-shim/stdlib.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _STDLIB_H 4 | #define _STDLIB_H 1 5 | 6 | void* rust_zstd_wasm_shim_malloc(size_t size); 7 | void* rust_zstd_wasm_shim_calloc(size_t nmemb, size_t size); 8 | void rust_zstd_wasm_shim_free(void* ptr); 9 | void rust_zstd_wasm_shim_qsort(void* base, size_t nitems, size_t size, 10 | int (*compar)(const void*, const void*)); 11 | 12 | #define malloc(size) rust_zstd_wasm_shim_malloc(size) 13 | #define calloc(nmemb, size) rust_zstd_wasm_shim_calloc(nmemb, size) 14 | #define free(ptr) rust_zstd_wasm_shim_free(ptr) 15 | #define qsort(base, nitems, size, compar) \ 16 | rust_zstd_wasm_shim_qsort(base, nitems, size, compar) 17 | 18 | #endif // _STDLIB_H 19 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/wasm-shim/string.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _STRING_H 4 | #define _STRING_H 1 5 | 6 | int rust_zstd_wasm_shim_memcmp(const void *str1, const void *str2, size_t n); 7 | void *rust_zstd_wasm_shim_memcpy(void *restrict dest, const void *restrict src, size_t n); 8 | void *rust_zstd_wasm_shim_memmove(void *dest, const void *src, size_t n); 9 | void *rust_zstd_wasm_shim_memset(void *dest, int c, size_t n); 10 | 11 | inline int memcmp(const void *str1, const void *str2, size_t n) { 12 | return rust_zstd_wasm_shim_memcmp(str1, str2, n); 13 | } 14 | 15 | inline void *memcpy(void *restrict dest, const void *restrict src, size_t n) { 16 | return rust_zstd_wasm_shim_memcpy(dest, src, n); 17 | } 18 | 19 | inline void *memmove(void *dest, const void *src, size_t n) { 20 | return rust_zstd_wasm_shim_memmove(dest, src, n); 21 | } 22 | 23 | inline void *memset(void *dest, int c, size_t n) { 24 | return rust_zstd_wasm_shim_memset(dest, c, n); 25 | } 26 | 27 | #endif // _STRING_H 28 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/wasm-shim/time.h: -------------------------------------------------------------------------------- 1 | #ifndef _TIME_H 2 | #define _TIME_H 3 | 4 | #define CLOCKS_PER_SEC 1000 5 | 6 | typedef unsigned long long clock_t; 7 | 8 | // Clock is just use for progress reporting, which we disable anyway. 9 | inline clock_t clock() { 10 | return 0; 11 | } 12 | 13 | #endif // _TIME_H 14 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/zdict.h: -------------------------------------------------------------------------------- 1 | #ifdef PKG_CONFIG 2 | 3 | /* Just use installed headers */ 4 | #include 5 | // Don't use experimental features like zstdmt 6 | 7 | #else // #ifdef PKG_CONFIG 8 | 9 | #include "zstd/lib/zdict.h" 10 | 11 | #endif // #ifdef PKG_CONFIG 12 | 13 | 14 | /* This file is used to generate bindings for both headers. 15 | * Check update_bindings.sh to see how to use it. 16 | * Or use the `bindgen` feature, which will create the bindings automatically. */ 17 | 18 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/zstd.h: -------------------------------------------------------------------------------- 1 | #ifdef PKG_CONFIG 2 | 3 | /* Just use installed headers */ 4 | #include 5 | #ifdef ZSTD_RUST_BINDINGS_EXPERIMENTAL 6 | #include 7 | #endif // #ifdef ZSTD_RUST_BINDINGS_EXPERIMENTAL 8 | 9 | #else // #ifdef PKG_CONFIG 10 | 11 | #include "zstd/lib/zstd.h" 12 | #ifdef ZSTD_RUST_BINDINGS_EXPERIMENTAL 13 | #include "zstd/lib/zstd_errors.h" 14 | #endif // #ifdef ZSTD_RUST_BINDINGS_EXPERIMENTAL 15 | 16 | #endif // #ifdef PKG_CONFIG 17 | 18 | 19 | /* This file is used to generate bindings for both headers. 20 | * Check update_bindings.sh to see how to use it. 21 | * Or use the `bindgen` feature, which will create the bindings automatically. */ 22 | -------------------------------------------------------------------------------- /zstd-safe/zstd-sys/zstd_seekable.h: -------------------------------------------------------------------------------- 1 | #ifdef PKG_CONFIG 2 | 3 | /* Just use installed headers */ 4 | #include 5 | // Don't use experimental features like zstdmt 6 | 7 | #else // #ifdef PKG_CONFIG 8 | 9 | #include "zstd/contrib/seekable_format/zstd_seekable.h" 10 | 11 | #endif // #ifdef PKG_CONFIG 12 | 13 | 14 | /* This file is used to generate bindings for the Zstandard Seekable Format. 15 | * Check update_bindings.sh to see how to use it. 16 | * Or use the `bindgen` feature, which will create the bindings automatically. */ 17 | --------------------------------------------------------------------------------