├── .github ├── dependabot.yml └── workflows │ ├── ci-clippy.yml │ ├── ci-fmt.yml │ ├── ci-linux.yml │ ├── ci-typos.yml │ └── ci-wasm.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── actix_multipart.rs ├── cli_compress.rs └── file_extraction.rs ├── rustfmt.toml ├── src ├── base │ ├── mod.rs │ ├── read │ │ ├── io │ │ │ ├── combined_record.rs │ │ │ ├── compressed.rs │ │ │ ├── entry.rs │ │ │ ├── hashed.rs │ │ │ ├── locator.rs │ │ │ ├── mod.rs │ │ │ └── owned.rs │ │ ├── mem.rs │ │ ├── mod.rs │ │ ├── seek.rs │ │ └── stream.rs │ └── write │ │ ├── compressed_writer.rs │ │ ├── entry_stream.rs │ │ ├── entry_whole.rs │ │ ├── io │ │ ├── mod.rs │ │ └── offset.rs │ │ └── mod.rs ├── date │ ├── builder.rs │ └── mod.rs ├── entry │ ├── builder.rs │ └── mod.rs ├── error.rs ├── file │ ├── builder.rs │ └── mod.rs ├── lib.rs ├── spec │ ├── attribute.rs │ ├── compression.rs │ ├── consts.rs │ ├── extra_field.rs │ ├── header.rs │ ├── mod.rs │ ├── parse.rs │ └── version.rs ├── string.rs ├── tests │ ├── combined │ │ └── mod.rs │ ├── mod.rs │ ├── read │ │ ├── compression │ │ │ ├── bzip2.data │ │ │ ├── deflate.data │ │ │ ├── lzma.data │ │ │ ├── mod.rs │ │ │ ├── xz.data │ │ │ └── zstd.data │ │ ├── locator │ │ │ ├── empty-buffer-boundary.zip │ │ │ ├── empty-with-max-comment.zip │ │ │ ├── empty.zip │ │ │ └── mod.rs │ │ ├── mod.rs │ │ └── zip64 │ │ │ ├── mod.rs │ │ │ └── zip64.zip │ ├── spec │ │ ├── date.rs │ │ └── mod.rs │ └── write │ │ ├── mod.rs │ │ ├── offset │ │ └── mod.rs │ │ └── zip64 │ │ └── mod.rs ├── tokio │ ├── mod.rs │ └── read │ │ ├── fs.rs │ │ └── mod.rs └── utils.rs └── tests ├── common └── mod.rs ├── compress_test.rs ├── decompress_test.rs └── test_inputs ├── sample_data.deflate.zip ├── sample_data.store.zip ├── sample_data.zstd.zip ├── sample_data ├── alpha │ ├── back_to_front.txt │ └── front_to_back.txt └── numeric │ ├── forward.txt │ └── reverse.txt └── sample_data_utf8_extra.zip /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | # Workflow files stored in the 5 | # default location of `.github/workflows` 6 | directory: "/" 7 | schedule: 8 | interval: "daily" 9 | - package-ecosystem: "cargo" 10 | directory: "/" 11 | schedule: 12 | interval: "daily" 13 | -------------------------------------------------------------------------------- /.github/workflows/ci-clippy.yml: -------------------------------------------------------------------------------- 1 | name: clippy (Linux) 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Run clippy 20 | run: cargo clippy --all-features -- -D clippy::all -------------------------------------------------------------------------------- /.github/workflows/ci-fmt.yml: -------------------------------------------------------------------------------- 1 | name: rustfmt (Linux) 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Run rustfmt 20 | run: cargo fmt --check -------------------------------------------------------------------------------- /.github/workflows/ci-linux.yml: -------------------------------------------------------------------------------- 1 | name: Test (Linux) 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Test [no features] 21 | run: cargo test --verbose 22 | 23 | - name: Test ['chrono' feature] 24 | run: cargo test --verbose --features chrono 25 | 26 | - name: Test ['tokio' feature] 27 | run: cargo test --verbose --features tokio 28 | 29 | - name: Test ['tokio-fs' feature] 30 | run: cargo test --verbose --features tokio-fs 31 | 32 | - name: Test ['deflate' feature] 33 | run: cargo test --verbose --features deflate 34 | 35 | - name: Test ['bzip2' feature] 36 | run: cargo test --verbose --features bzip2 37 | 38 | - name: Test ['lzma' feature] 39 | run: cargo test --verbose --features lzma 40 | 41 | - name: Test ['zstd' feature] 42 | run: cargo test --verbose --features zstd 43 | 44 | - name: Test ['xz' feature] 45 | run: cargo test --verbose --features xz 46 | 47 | - name: Test ['deflate64' feature] 48 | run: cargo test --verbose --features deflate64 49 | 50 | - name: Test ['full' feature] 51 | run: cargo test --verbose --features full 52 | -------------------------------------------------------------------------------- /.github/workflows/ci-typos.yml: -------------------------------------------------------------------------------- 1 | name: typos (Linux) 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Install typos 21 | run: cargo install typos-cli 22 | 23 | - name: Run typos 24 | run: typos --format brief -------------------------------------------------------------------------------- /.github/workflows/ci-wasm.yml: -------------------------------------------------------------------------------- 1 | name: Build (WASM) 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | name: Build ['full-wasm' feature] on ${{ matrix.target }} 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | target: 19 | - wasm32-wasi 20 | - wasm32-unknown-unknown 21 | steps: 22 | - uses: actions/checkout@v4 23 | - run: rustup target add ${{ matrix.target }} 24 | - run: cargo build --verbose --target ${{ matrix.target }} --features full-wasm 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | /examples/**/target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | /Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | /examples/**/*.rs.bk 13 | 14 | # Ignore generated zip test file that is large 15 | /src/tests/read/zip64/zip64many.zip 16 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "async_zip" 3 | version = "0.0.17" 4 | edition = "2021" 5 | authors = ["Harry [hello@majored.pw]"] 6 | repository = "https://github.com/Majored/rs-async-zip" 7 | description = "An asynchronous ZIP archive reading/writing crate." 8 | readme = "README.md" 9 | license = "MIT" 10 | documentation = "https://docs.rs/async_zip/" 11 | homepage = "https://github.com/Majored/rs-async-zip" 12 | keywords = ["async", "zip", "archive", "tokio"] 13 | categories = ["asynchronous", "compression"] 14 | 15 | [features] 16 | full = ["chrono", "tokio-fs", "deflate", "bzip2", "lzma", "zstd", "xz", "deflate64"] 17 | 18 | # All features that are compatible with WASM 19 | full-wasm = ["chrono", "deflate", "zstd"] 20 | 21 | tokio = ["dep:tokio", "tokio-util", "tokio/io-util"] 22 | tokio-fs = ["tokio/fs"] 23 | 24 | deflate = ["async-compression/deflate"] 25 | bzip2 = ["async-compression/bzip2"] 26 | lzma = ["async-compression/lzma"] 27 | zstd = ["async-compression/zstd"] 28 | xz = ["async-compression/xz"] 29 | deflate64 = ["async-compression/deflate64"] 30 | 31 | [package.metadata.docs.rs] 32 | all-features = true 33 | # defines the configuration attribute `docsrs` 34 | rustdoc-args = ["--cfg", "docsrs"] 35 | 36 | [dependencies] 37 | crc32fast = "1" 38 | futures-lite = { version = "2.1.0", default-features = false, features = ["std"] } 39 | pin-project = "1" 40 | thiserror = "1" 41 | 42 | async-compression = { version = "0.4.2", default-features = false, features = ["futures-io"], optional = true } 43 | chrono = { version = "0.4", default-features = false, features = ["clock"], optional = true } 44 | tokio = { version = "1", default-features = false, optional = true } 45 | tokio-util = { version = "0.7", features = ["compat"], optional = true } 46 | 47 | [dev-dependencies] 48 | # tests 49 | tokio = { version = "1", features = ["full"] } 50 | tokio-util = { version = "0.7", features = ["compat"] } 51 | env_logger = "0.11.2" 52 | zip = "2.1.5" 53 | 54 | # shared across multiple examples 55 | anyhow = "1" 56 | sanitize-filename = "0.5" 57 | 58 | # actix_multipart 59 | actix-web = "4" 60 | actix-multipart = "0.7" 61 | futures = "0.3" 62 | derive_more = { version = "1.0", features = ["display", "error"] } 63 | uuid = { version = "1", features = ["v4", "serde"] } 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Harry 4 | Copyright (c) 2023 Cognite AS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # async_zip 2 | [![Crates.io](https://img.shields.io/crates/v/async_zip?style=flat-square)](https://crates.io/crates/async_zip) 3 | [![Crates.io](https://img.shields.io/crates/d/async_zip?style=flat-square)](https://crates.io/crates/async_zip) 4 | [![docs.rs](https://img.shields.io/docsrs/async_zip?style=flat-square)](https://docs.rs/async_zip/) 5 | [![GitHub Workflow Status (branch)](https://img.shields.io/github/actions/workflow/status/Majored/rs-async-zip/ci-linux.yml?branch=main&style=flat-square)](https://github.com/Majored/rs-async-zip/actions?query=branch%3Amain) 6 | [![GitHub](https://img.shields.io/github/license/Majored/rs-async-zip?style=flat-square)](https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 7 | 8 | An asynchronous ZIP archive reading/writing crate. 9 | 10 | ## Features 11 | - A base implementation atop `futures`'s IO traits. 12 | - An extended implementation atop `tokio`'s IO traits. 13 | - Support for Stored, Deflate, bzip2, LZMA, zstd, and xz compression methods. 14 | - Various different reading approaches (seek, stream, filesystem, in-memory buffer, etc). 15 | - Support for writing complete data (u8 slices) or streams using data descriptors. 16 | - Initial support for ZIP64 reading and writing. 17 | - Aims for reasonable [specification](https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md) compliance. 18 | 19 | ## Installation & Basic Usage 20 | 21 | ```toml 22 | [dependencies] 23 | async_zip = { version = "0.0.17", features = ["full"] } 24 | ``` 25 | 26 | A (soon to be) extensive list of [examples](https://github.com/Majored/rs-async-zip/tree/main/examples) can be found under the `/examples` directory. 27 | 28 | ### Feature Flags 29 | - `full` - Enables all below features. 30 | - `full-wasm` - Enables all below features that are compatible with WASM. 31 | - `chrono` - Enables support for parsing dates via `chrono`. 32 | - `tokio` - Enables support for the `tokio` implementation module. 33 | - `tokio-fs` - Enables support for the `tokio::fs` reading module. 34 | - `deflate` - Enables support for the Deflate compression method. 35 | - `bzip2` - Enables support for the bzip2 compression method. 36 | - `lzma` - Enables support for the LZMA compression method. 37 | - `zstd` - Enables support for the zstd compression method. 38 | - `xz` - Enables support for the xz compression method. 39 | 40 | ### Reading 41 | ```rust 42 | use tokio::{io::BufReader, fs::File}; 43 | use async_zip::tokio::read::seek::ZipFileReader; 44 | ... 45 | 46 | let mut file = BufReader::new(File::open("./Archive.zip").await?); 47 | let mut zip = ZipFileReader::with_tokio(&mut file).await?; 48 | 49 | let mut string = String::new(); 50 | let mut reader = zip.reader_with_entry(0).await?; 51 | reader.read_to_string_checked(&mut string).await?; 52 | 53 | println!("{}", string); 54 | ``` 55 | 56 | ### Writing 57 | ```rust 58 | use async_zip::tokio::write::ZipFileWriter; 59 | use async_zip::{Compression, ZipEntryBuilder}; 60 | use tokio::fs::File; 61 | ... 62 | 63 | let mut file = File::create("foo.zip").await?; 64 | let mut writer = ZipFileWriter::with_tokio(&mut file); 65 | 66 | let data = b"This is an example file."; 67 | let builder = ZipEntryBuilder::new("bar.txt".into(), Compression::Deflate); 68 | 69 | writer.write_entry_whole(builder, data).await?; 70 | writer.close().await?; 71 | ``` 72 | 73 | ## Contributions 74 | Whilst I will be continuing to maintain this crate myself, reasonable specification compliance is a huge undertaking for a single individual. As such, contributions will always be encouraged and appreciated. 75 | 76 | No contribution guidelines exist but additions should be developed with readability in mind, with appropriate comments, and make use of `rustfmt`. 77 | 78 | ## Issues & Support 79 | Whether you're wanting to report a bug you've come across during use of this crate or are seeking general help/assistance, please utilise the [issues tracker](https://github.com/Majored/rs-async-zip/issues) and provide as much detail as possible (eg. recreation steps). 80 | 81 | I try to respond to issues within a reasonable timeframe. 82 | -------------------------------------------------------------------------------- /examples/actix_multipart.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 FL33TW00D (https://github.com/FL33TW00D) 2 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 3 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE 4 | 5 | #[cfg(features = "deflate")] 6 | mod inner { 7 | use async_zip::write::ZipFileWriter; 8 | use async_zip::{Compression, ZipEntryBuilder}; 9 | 10 | use std::path::Path; 11 | 12 | use actix_multipart::Multipart; 13 | use actix_web::{web, App, HttpServer, Responder, ResponseError, Result}; 14 | use derive_more::{Display, Error}; 15 | use futures::StreamExt; 16 | use futures_lite::io::AsyncWriteExt; 17 | use tokio::fs::File; 18 | use uuid::Uuid; 19 | 20 | const TMP_DIR: &str = "./tmp/"; 21 | 22 | #[derive(Debug, Display, Error)] 23 | #[display("An error occurred during ZIP creation which was logged to stderr.")] 24 | struct CreationError; 25 | 26 | impl ResponseError for CreationError {} 27 | 28 | async fn do_main() -> std::io::Result<()> { 29 | let tmp_path = Path::new(TMP_DIR); 30 | 31 | if !tmp_path.exists() { 32 | tokio::fs::create_dir(tmp_path).await?; 33 | } 34 | 35 | let factory = || App::new().route("/", web::post().to(handler)); 36 | HttpServer::new(factory).bind(("127.0.0.1", 8080))?.run().await 37 | } 38 | 39 | async fn handler(multipart: Multipart) -> Result { 40 | match create_archive(multipart).await { 41 | Ok(name) => Ok(format!("Successfully created archive: {}", name)), 42 | Err(err) => { 43 | eprintln!("[ERROR] {:?}", err); 44 | Err(CreationError) 45 | } 46 | } 47 | } 48 | 49 | async fn create_archive(mut body: Multipart) -> Result { 50 | let archive_name = format!("tmp/{}", Uuid::new_v4()); 51 | let mut archive = File::create(archive_name.clone()).await?; 52 | let mut writer = ZipFileWriter::new(&mut archive); 53 | 54 | while let Some(item) = body.next().await { 55 | let mut field = item?; 56 | 57 | let filename = match field.content_disposition().get_filename() { 58 | Some(filename) => sanitize_filename::sanitize(filename), 59 | None => Uuid::new_v4().to_string(), 60 | }; 61 | 62 | let builder = ZipEntryBuilder::new(filename, Compression::Deflate); 63 | let mut entry_writer = writer.write_entry_stream(builder).await.unwrap(); 64 | 65 | while let Some(chunk) = field.next().await { 66 | entry_writer.write_all_buf(&mut chunk?).await?; 67 | } 68 | 69 | entry_writer.close().await.unwrap(); 70 | } 71 | 72 | writer.close().await.unwrap(); 73 | archive.shutdown().await.unwrap(); 74 | 75 | Ok(archive_name) 76 | } 77 | } 78 | 79 | #[actix_web::main] 80 | async fn main() -> std::io::Result<()> { 81 | #[cfg(features = "deflate")] 82 | { 83 | inner::do_main().await?; 84 | } 85 | 86 | Ok(()) 87 | } 88 | -------------------------------------------------------------------------------- /examples/cli_compress.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | #[tokio::main] 5 | async fn main() { 6 | #[cfg(features = "deflate")] 7 | if let Err(err) = inner::run().await { 8 | eprintln!("Error: {}", err); 9 | eprintln!("Usage: cli_compress "); 10 | std::process::exit(1); 11 | } 12 | } 13 | 14 | #[cfg(features = "deflate")] 15 | mod inner { 16 | 17 | use async_zip::base::write::ZipFileWriter; 18 | use async_zip::{Compression, ZipEntryBuilder}; 19 | 20 | use std::path::{Path, PathBuf}; 21 | 22 | use anyhow::{anyhow, bail, Result}; 23 | use futures_lite::io::AsyncReadExt; 24 | use tokio::fs::File; 25 | 26 | async fn run() -> Result<()> { 27 | let mut args = std::env::args().skip(1); 28 | 29 | let input_str = args.next().ok_or(anyhow!("No input file or directory specified."))?; 30 | let input_path = Path::new(&input_str); 31 | 32 | let output_str = args.next().ok_or(anyhow!("No output file specified."))?; 33 | let output_path = Path::new(&output_str); 34 | 35 | let input_pathbuf = input_path.canonicalize().map_err(|_| anyhow!("Unable to canonicalise input path."))?; 36 | let input_path = input_pathbuf.as_path(); 37 | 38 | if output_path.exists() { 39 | bail!("The output file specified already exists."); 40 | } 41 | if !input_path.exists() { 42 | bail!("The input file or directory specified doesn't exist."); 43 | } 44 | 45 | let mut output_writer = ZipFileWriter::new(File::create(output_path).await?); 46 | 47 | if input_path.is_dir() { 48 | handle_directory(input_path, &mut output_writer).await?; 49 | } else { 50 | handle_singular(input_path, &mut output_writer).await?; 51 | } 52 | 53 | output_writer.close().await?; 54 | println!("Successfully written ZIP file '{}'.", output_path.display()); 55 | 56 | Ok(()) 57 | } 58 | 59 | async fn handle_singular(input_path: &Path, writer: &mut ZipFileWriter) -> Result<()> { 60 | let filename = input_path.file_name().ok_or(anyhow!("Input path terminates in '...'."))?; 61 | let filename = filename.to_str().ok_or(anyhow!("Input path not valid UTF-8."))?; 62 | 63 | write_entry(filename, input_path, writer).await 64 | } 65 | 66 | async fn handle_directory(input_path: &Path, writer: &mut ZipFileWriter) -> Result<()> { 67 | let entries = walk_dir(input_path.into()).await?; 68 | let input_dir_str = input_path.as_os_str().to_str().ok_or(anyhow!("Input path not valid UTF-8."))?; 69 | 70 | for entry_path_buf in entries { 71 | let entry_path = entry_path_buf.as_path(); 72 | let entry_str = entry_path.as_os_str().to_str().ok_or(anyhow!("Directory file path not valid UTF-8."))?; 73 | 74 | if !entry_str.starts_with(input_dir_str) { 75 | bail!("Directory file path does not start with base input directory path."); 76 | } 77 | 78 | let entry_str = &entry_str[input_dir_str.len() + 1..]; 79 | write_entry(entry_str, entry_path, writer).await?; 80 | } 81 | 82 | Ok(()) 83 | } 84 | 85 | async fn write_entry(filename: &str, input_path: &Path, writer: &mut ZipFileWriter) -> Result<()> { 86 | let mut input_file = File::open(input_path).await?; 87 | let input_file_size = input_file.metadata().await?.len() as usize; 88 | 89 | let mut buffer = Vec::with_capacity(input_file_size); 90 | input_file.read_to_end(&mut buffer).await?; 91 | 92 | let builder = ZipEntryBuilder::new(filename.into(), Compression::Deflate); 93 | writer.write_entry_whole(builder, &buffer).await?; 94 | 95 | Ok(()) 96 | } 97 | 98 | async fn walk_dir(dir: PathBuf) -> Result> { 99 | let mut dirs = vec![dir]; 100 | let mut files = vec![]; 101 | 102 | while !dirs.is_empty() { 103 | let mut dir_iter = tokio::fs::read_dir(dirs.remove(0)).await?; 104 | 105 | while let Some(entry) = dir_iter.next_entry().await? { 106 | let entry_path_buf = entry.path(); 107 | 108 | if entry_path_buf.is_dir() { 109 | dirs.push(entry_path_buf); 110 | } else { 111 | files.push(entry_path_buf); 112 | } 113 | } 114 | } 115 | 116 | Ok(files) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /examples/file_extraction.rs: -------------------------------------------------------------------------------- 1 | //! Demonstrates how to safely extract everything from a ZIP file. 2 | //! 3 | //! Extracting zip files from untrusted sources without proper sanitization 4 | //! could be exploited by directory traversal attacks. 5 | //! 6 | //! 7 | //! This example tries to minimize that risk by following the implementation from 8 | //! Python's Standard Library. 9 | //! 10 | //! 11 | //! 12 | 13 | use std::{ 14 | env::current_dir, 15 | path::{Path, PathBuf}, 16 | }; 17 | 18 | use async_zip::base::read::seek::ZipFileReader; 19 | use tokio::{ 20 | fs::{create_dir_all, File, OpenOptions}, 21 | io::BufReader, 22 | }; 23 | use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt}; 24 | 25 | #[tokio::main] 26 | async fn main() { 27 | let archive = File::open("example.zip").await.expect("Failed to open zip file"); 28 | let out_dir = current_dir().expect("Failed to get current working directory"); 29 | unzip_file(archive, &out_dir).await; 30 | } 31 | 32 | /// Returns a relative path without reserved names, redundant separators, ".", or "..". 33 | fn sanitize_file_path(path: &str) -> PathBuf { 34 | // Replaces backwards slashes 35 | path.replace('\\', "/") 36 | // Sanitizes each component 37 | .split('/') 38 | .map(sanitize_filename::sanitize) 39 | .collect() 40 | } 41 | 42 | /// Extracts everything from the ZIP archive to the output directory 43 | async fn unzip_file(archive: File, out_dir: &Path) { 44 | let archive = BufReader::new(archive).compat(); 45 | let mut reader = ZipFileReader::new(archive).await.expect("Failed to read zip file"); 46 | for index in 0..reader.file().entries().len() { 47 | let entry = reader.file().entries().get(index).unwrap(); 48 | let path = out_dir.join(sanitize_file_path(entry.filename().as_str().unwrap())); 49 | // If the filename of the entry ends with '/', it is treated as a directory. 50 | // This is implemented by previous versions of this crate and the Python Standard Library. 51 | // https://docs.rs/async_zip/0.0.8/src/async_zip/read/mod.rs.html#63-65 52 | // https://github.com/python/cpython/blob/820ef62833bd2d84a141adedd9a05998595d6b6d/Lib/zipfile.py#L528 53 | let entry_is_dir = entry.dir().unwrap(); 54 | 55 | let mut entry_reader = reader.reader_without_entry(index).await.expect("Failed to read ZipEntry"); 56 | 57 | if entry_is_dir { 58 | // The directory may have been created if iteration is out of order. 59 | if !path.exists() { 60 | create_dir_all(&path).await.expect("Failed to create extracted directory"); 61 | } 62 | } else { 63 | // Creates parent directories. They may not exist if iteration is out of order 64 | // or the archive does not contain directory entries. 65 | let parent = path.parent().expect("A file entry should have parent directories"); 66 | if !parent.is_dir() { 67 | create_dir_all(parent).await.expect("Failed to create parent directories"); 68 | } 69 | let writer = OpenOptions::new() 70 | .write(true) 71 | .create_new(true) 72 | .open(&path) 73 | .await 74 | .expect("Failed to create extracted file"); 75 | futures_lite::io::copy(&mut entry_reader, &mut writer.compat_write()) 76 | .await 77 | .expect("Failed to copy to extracted file"); 78 | 79 | // Closes the file and manipulates its metadata here if you wish to preserve its metadata from the archive. 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | use_small_heuristics = "Max" -------------------------------------------------------------------------------- /src/base/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A base runtime-agnostic implementation using `futures`'s IO types. 5 | 6 | pub mod read; 7 | pub mod write; 8 | -------------------------------------------------------------------------------- /src/base/read/io/combined_record.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // Copyright (c) 2023 Cognite AS 3 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 4 | 5 | use crate::spec::header::{EndOfCentralDirectoryHeader, Zip64EndOfCentralDirectoryRecord}; 6 | 7 | /// Combines all the fields in EOCDR and Zip64EOCDR into one struct. 8 | #[derive(Debug)] 9 | pub struct CombinedCentralDirectoryRecord { 10 | pub version_made_by: Option, 11 | pub version_needed_to_extract: Option, 12 | pub disk_number: u32, 13 | pub disk_number_start_of_cd: u32, 14 | pub num_entries_in_directory_on_disk: u64, 15 | pub num_entries_in_directory: u64, 16 | pub directory_size: u64, 17 | pub offset_of_start_of_directory: u64, 18 | pub file_comment_length: u16, 19 | } 20 | 21 | impl CombinedCentralDirectoryRecord { 22 | /// Combine an EOCDR with an optional Zip64EOCDR. 23 | /// 24 | /// Fields that are set to their max value in the EOCDR will be overwritten by the contents of 25 | /// the corresponding Zip64EOCDR field. 26 | pub fn combine(eocdr: EndOfCentralDirectoryHeader, zip64eocdr: Zip64EndOfCentralDirectoryRecord) -> Self { 27 | let mut combined = Self::from(&eocdr); 28 | if eocdr.disk_num == u16::MAX { 29 | combined.disk_number = zip64eocdr.disk_number; 30 | } 31 | if eocdr.start_cent_dir_disk == u16::MAX { 32 | combined.disk_number_start_of_cd = zip64eocdr.disk_number_start_of_cd; 33 | } 34 | if eocdr.num_of_entries_disk == u16::MAX { 35 | combined.num_entries_in_directory_on_disk = zip64eocdr.num_entries_in_directory_on_disk; 36 | } 37 | if eocdr.num_of_entries == u16::MAX { 38 | combined.num_entries_in_directory = zip64eocdr.num_entries_in_directory; 39 | } 40 | if eocdr.size_cent_dir == u32::MAX { 41 | combined.directory_size = zip64eocdr.directory_size; 42 | } 43 | if eocdr.cent_dir_offset == u32::MAX { 44 | combined.offset_of_start_of_directory = zip64eocdr.offset_of_start_of_directory; 45 | } 46 | combined.version_made_by = Some(zip64eocdr.version_made_by); 47 | combined.version_needed_to_extract = Some(zip64eocdr.version_needed_to_extract); 48 | 49 | combined 50 | } 51 | } 52 | 53 | // An implementation for the case of no zip64EOCDR. 54 | impl From<&EndOfCentralDirectoryHeader> for CombinedCentralDirectoryRecord { 55 | fn from(header: &EndOfCentralDirectoryHeader) -> Self { 56 | Self { 57 | version_made_by: None, 58 | version_needed_to_extract: None, 59 | disk_number: header.disk_num as u32, 60 | disk_number_start_of_cd: header.start_cent_dir_disk as u32, 61 | num_entries_in_directory_on_disk: header.num_of_entries_disk as u64, 62 | num_entries_in_directory: header.num_of_entries as u64, 63 | directory_size: header.size_cent_dir as u64, 64 | offset_of_start_of_directory: header.cent_dir_offset as u64, 65 | file_comment_length: header.file_comm_length, 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/base/read/io/compressed.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::spec::Compression; 5 | 6 | use std::pin::Pin; 7 | use std::task::{Context, Poll}; 8 | 9 | #[cfg(any( 10 | feature = "deflate", 11 | feature = "bzip2", 12 | feature = "zstd", 13 | feature = "lzma", 14 | feature = "xz", 15 | feature = "deflate64" 16 | ))] 17 | use async_compression::futures::bufread; 18 | use futures_lite::io::{AsyncBufRead, AsyncRead}; 19 | use pin_project::pin_project; 20 | 21 | /// A wrapping reader which holds concrete types for all respective compression method readers. 22 | #[pin_project(project = CompressedReaderProj)] 23 | pub(crate) enum CompressedReader { 24 | Stored(#[pin] R), 25 | #[cfg(feature = "deflate")] 26 | Deflate(#[pin] bufread::DeflateDecoder), 27 | #[cfg(feature = "deflate64")] 28 | Deflate64(#[pin] bufread::Deflate64Decoder), 29 | #[cfg(feature = "bzip2")] 30 | Bz(#[pin] bufread::BzDecoder), 31 | #[cfg(feature = "lzma")] 32 | Lzma(#[pin] bufread::LzmaDecoder), 33 | #[cfg(feature = "zstd")] 34 | Zstd(#[pin] bufread::ZstdDecoder), 35 | #[cfg(feature = "xz")] 36 | Xz(#[pin] bufread::XzDecoder), 37 | } 38 | 39 | impl CompressedReader 40 | where 41 | R: AsyncBufRead + Unpin, 42 | { 43 | /// Constructs a new wrapping reader from a generic [`AsyncBufRead`] implementer. 44 | pub(crate) fn new(reader: R, compression: Compression) -> Self { 45 | match compression { 46 | Compression::Stored => CompressedReader::Stored(reader), 47 | #[cfg(feature = "deflate")] 48 | Compression::Deflate => CompressedReader::Deflate(bufread::DeflateDecoder::new(reader)), 49 | #[cfg(feature = "deflate64")] 50 | Compression::Deflate64 => CompressedReader::Deflate64(bufread::Deflate64Decoder::new(reader)), 51 | #[cfg(feature = "bzip2")] 52 | Compression::Bz => CompressedReader::Bz(bufread::BzDecoder::new(reader)), 53 | #[cfg(feature = "lzma")] 54 | Compression::Lzma => CompressedReader::Lzma(bufread::LzmaDecoder::new(reader)), 55 | #[cfg(feature = "zstd")] 56 | Compression::Zstd => CompressedReader::Zstd(bufread::ZstdDecoder::new(reader)), 57 | #[cfg(feature = "xz")] 58 | Compression::Xz => CompressedReader::Xz(bufread::XzDecoder::new(reader)), 59 | } 60 | } 61 | 62 | /// Consumes this reader and returns the inner value. 63 | pub(crate) fn into_inner(self) -> R { 64 | match self { 65 | CompressedReader::Stored(inner) => inner, 66 | #[cfg(feature = "deflate")] 67 | CompressedReader::Deflate(inner) => inner.into_inner(), 68 | #[cfg(feature = "deflate64")] 69 | CompressedReader::Deflate64(inner) => inner.into_inner(), 70 | #[cfg(feature = "bzip2")] 71 | CompressedReader::Bz(inner) => inner.into_inner(), 72 | #[cfg(feature = "lzma")] 73 | CompressedReader::Lzma(inner) => inner.into_inner(), 74 | #[cfg(feature = "zstd")] 75 | CompressedReader::Zstd(inner) => inner.into_inner(), 76 | #[cfg(feature = "xz")] 77 | CompressedReader::Xz(inner) => inner.into_inner(), 78 | } 79 | } 80 | } 81 | 82 | impl AsyncRead for CompressedReader 83 | where 84 | R: AsyncBufRead + Unpin, 85 | { 86 | fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { 87 | match self.project() { 88 | CompressedReaderProj::Stored(inner) => inner.poll_read(c, b), 89 | #[cfg(feature = "deflate")] 90 | CompressedReaderProj::Deflate(inner) => inner.poll_read(c, b), 91 | #[cfg(feature = "deflate64")] 92 | CompressedReaderProj::Deflate64(inner) => inner.poll_read(c, b), 93 | #[cfg(feature = "bzip2")] 94 | CompressedReaderProj::Bz(inner) => inner.poll_read(c, b), 95 | #[cfg(feature = "lzma")] 96 | CompressedReaderProj::Lzma(inner) => inner.poll_read(c, b), 97 | #[cfg(feature = "zstd")] 98 | CompressedReaderProj::Zstd(inner) => inner.poll_read(c, b), 99 | #[cfg(feature = "xz")] 100 | CompressedReaderProj::Xz(inner) => inner.poll_read(c, b), 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/base/read/io/entry.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::read::io::{compressed::CompressedReader, hashed::HashedReader, owned::OwnedReader}; 5 | use crate::entry::ZipEntry; 6 | use crate::error::{Result, ZipError}; 7 | use crate::spec::Compression; 8 | 9 | use std::pin::Pin; 10 | use std::task::{Context, Poll}; 11 | 12 | use futures_lite::io::{AsyncBufRead, AsyncRead, AsyncReadExt, Take}; 13 | use pin_project::pin_project; 14 | 15 | /// A type which encodes that [`ZipEntryReader`] has associated entry data. 16 | pub struct WithEntry<'a>(OwnedEntry<'a>); 17 | 18 | /// A type which encodes that [`ZipEntryReader`] has no associated entry data. 19 | pub struct WithoutEntry; 20 | 21 | /// A ZIP entry reader which may implement decompression. 22 | #[pin_project] 23 | pub struct ZipEntryReader<'a, R, E> { 24 | #[pin] 25 | reader: HashedReader>>>, 26 | entry: E, 27 | } 28 | 29 | impl<'a, R> ZipEntryReader<'a, R, WithoutEntry> 30 | where 31 | R: AsyncBufRead + Unpin, 32 | { 33 | /// Constructs a new entry reader from its required parameters (incl. an owned R). 34 | pub(crate) fn new_with_owned(reader: R, compression: Compression, size: u64) -> Self { 35 | let reader = HashedReader::new(CompressedReader::new(OwnedReader::Owned(reader).take(size), compression)); 36 | Self { reader, entry: WithoutEntry } 37 | } 38 | 39 | /// Constructs a new entry reader from its required parameters (incl. a mutable borrow of an R). 40 | pub(crate) fn new_with_borrow(reader: &'a mut R, compression: Compression, size: u64) -> Self { 41 | let reader = HashedReader::new(CompressedReader::new(OwnedReader::Borrow(reader).take(size), compression)); 42 | Self { reader, entry: WithoutEntry } 43 | } 44 | 45 | pub(crate) fn into_with_entry(self, entry: &'a ZipEntry) -> ZipEntryReader<'a, R, WithEntry<'a>> { 46 | ZipEntryReader { reader: self.reader, entry: WithEntry(OwnedEntry::Borrow(entry)) } 47 | } 48 | 49 | pub(crate) fn into_with_entry_owned(self, entry: ZipEntry) -> ZipEntryReader<'a, R, WithEntry<'a>> { 50 | ZipEntryReader { reader: self.reader, entry: WithEntry(OwnedEntry::Owned(entry)) } 51 | } 52 | } 53 | 54 | impl<'a, R, E> AsyncRead for ZipEntryReader<'a, R, E> 55 | where 56 | R: AsyncBufRead + Unpin, 57 | { 58 | fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { 59 | self.project().reader.poll_read(c, b) 60 | } 61 | } 62 | 63 | impl<'a, R, E> ZipEntryReader<'a, R, E> 64 | where 65 | R: AsyncBufRead + Unpin, 66 | { 67 | /// Computes and returns the CRC32 hash of bytes read by this reader so far. 68 | /// 69 | /// This hash should only be computed once EOF has been reached. 70 | pub fn compute_hash(&mut self) -> u32 { 71 | self.reader.swap_and_compute_hash() 72 | } 73 | 74 | /// Consumes this reader and returns the inner value. 75 | pub(crate) fn into_inner(self) -> R { 76 | self.reader.into_inner().into_inner().into_inner().owned_into_inner() 77 | } 78 | } 79 | 80 | impl ZipEntryReader<'_, R, WithEntry<'_>> 81 | where 82 | R: AsyncBufRead + Unpin, 83 | { 84 | /// Returns an immutable reference to the associated entry data. 85 | pub fn entry(&self) -> &'_ ZipEntry { 86 | self.entry.0.entry() 87 | } 88 | 89 | /// Reads all bytes until EOF has been reached, appending them to buf, and verifies the CRC32 values. 90 | /// 91 | /// This is a helper function synonymous to [`AsyncReadExt::read_to_end()`]. 92 | pub async fn read_to_end_checked(&mut self, buf: &mut Vec) -> Result { 93 | let read = self.read_to_end(buf).await?; 94 | 95 | if self.compute_hash() == self.entry.0.entry().crc32() { 96 | Ok(read) 97 | } else { 98 | Err(ZipError::CRC32CheckError) 99 | } 100 | } 101 | 102 | /// Reads all bytes until EOF has been reached, placing them into buf, and verifies the CRC32 values. 103 | /// 104 | /// This is a helper function synonymous to [`AsyncReadExt::read_to_string()`]. 105 | pub async fn read_to_string_checked(&mut self, buf: &mut String) -> Result { 106 | let read = self.read_to_string(buf).await?; 107 | 108 | if self.compute_hash() == self.entry.0.entry().crc32() { 109 | Ok(read) 110 | } else { 111 | Err(ZipError::CRC32CheckError) 112 | } 113 | } 114 | } 115 | 116 | enum OwnedEntry<'a> { 117 | Owned(ZipEntry), 118 | Borrow(&'a ZipEntry), 119 | } 120 | 121 | impl<'a> OwnedEntry<'a> { 122 | pub fn entry(&self) -> &'_ ZipEntry { 123 | match self { 124 | OwnedEntry::Owned(entry) => entry, 125 | OwnedEntry::Borrow(entry) => entry, 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/base/read/io/hashed.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::read::io::poll_result_ok; 5 | 6 | use std::pin::Pin; 7 | use std::task::{ready, Context, Poll}; 8 | 9 | use crc32fast::Hasher; 10 | use futures_lite::io::AsyncRead; 11 | use pin_project::pin_project; 12 | 13 | /// A wrapping reader which computes the CRC32 hash of data read via [`AsyncRead`]. 14 | #[pin_project] 15 | pub(crate) struct HashedReader { 16 | #[pin] 17 | pub(crate) reader: R, 18 | pub(crate) hasher: Hasher, 19 | } 20 | 21 | impl HashedReader 22 | where 23 | R: AsyncRead + Unpin, 24 | { 25 | /// Constructs a new wrapping reader from a generic [`AsyncRead`] implementer. 26 | pub(crate) fn new(reader: R) -> Self { 27 | Self { reader, hasher: Hasher::default() } 28 | } 29 | 30 | /// Swaps the internal hasher and returns the computed CRC32 hash. 31 | /// 32 | /// The internal hasher is taken and replaced with a newly-constructed one. As a result, this method should only be 33 | /// called once EOF has been reached and it's known that no more data will be read, else the computed hash(s) won't 34 | /// accurately represent the data read in. 35 | pub(crate) fn swap_and_compute_hash(&mut self) -> u32 { 36 | std::mem::take(&mut self.hasher).finalize() 37 | } 38 | 39 | /// Consumes this reader and returns the inner value. 40 | pub(crate) fn into_inner(self) -> R { 41 | self.reader 42 | } 43 | } 44 | 45 | impl AsyncRead for HashedReader 46 | where 47 | R: AsyncRead + Unpin, 48 | { 49 | fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { 50 | let project = self.project(); 51 | let written = poll_result_ok!(ready!(project.reader.poll_read(c, b))); 52 | project.hasher.update(&b[..written]); 53 | 54 | Poll::Ready(Ok(written)) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/base/read/io/locator.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! 5 | //! 6 | //! As with other ZIP libraries, we face the predicament that the end of central directory record may contain a 7 | //! variable-length file comment. As a result, we cannot just make the assumption that the start of this record is 8 | //! 18 bytes (the length of the EOCDR) offset from the end of the data - we must locate it ourselves. 9 | //! 10 | //! The `zip-rs` crate handles this by reading in reverse from the end of the data. This involves seeking backwards 11 | //! by a single byte each iteration and reading 4 bytes into a u32. Whether this is performant/acceptable within a 12 | //! a non-async context, I'm unsure, but it isn't desirable within an async context. Especially since we cannot just 13 | //! place a [`BufReader`] infront of the upstream reader (as its internal buffer is invalidated on each seek). 14 | //! 15 | //! Reading in reverse is still desirable as the use of file comments is limited and they're unlikely to be large. 16 | //! 17 | //! The below method is one that compromises on these two contention points. Please submit an issue or PR if you know 18 | //! of a better algorithm for this (and have tested/verified its performance). 19 | 20 | #[cfg(doc)] 21 | use futures_lite::io::BufReader; 22 | 23 | use crate::error::{Result as ZipResult, ZipError}; 24 | use crate::spec::consts::{EOCDR_LENGTH, EOCDR_SIGNATURE, SIGNATURE_LENGTH}; 25 | 26 | use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom}; 27 | 28 | /// The buffer size used when locating the EOCDR, equal to 2KiB. 29 | const BUFFER_SIZE: usize = 2048; 30 | 31 | /// The upper bound of where the EOCDR signature cannot be located. 32 | const EOCDR_UPPER_BOUND: u64 = EOCDR_LENGTH as u64; 33 | 34 | /// The lower bound of where the EOCDR signature cannot be located. 35 | const EOCDR_LOWER_BOUND: u64 = EOCDR_UPPER_BOUND + SIGNATURE_LENGTH as u64 + u16::MAX as u64; 36 | 37 | /// Locate the `end of central directory record` offset, if one exists. 38 | /// The returned offset excludes the signature (4 bytes) 39 | /// 40 | /// This method involves buffered reading in reverse and reverse linear searching along those buffers for the EOCDR 41 | /// signature. As a result of this buffered approach, we reduce seeks when compared to `zip-rs`'s method by a factor 42 | /// of the buffer size. We also then don't have to do individual u32 reads against the upstream reader. 43 | /// 44 | /// Whilst I haven't done any in-depth benchmarks, when reading a ZIP file with the maximum length comment, this method 45 | /// saw a reduction in location time by a factor of 500 when compared with the `zip-rs` method. 46 | pub async fn eocdr(mut reader: R) -> ZipResult 47 | where 48 | R: AsyncRead + AsyncSeek + Unpin, 49 | { 50 | let length = reader.seek(SeekFrom::End(0)).await?; 51 | let signature = &EOCDR_SIGNATURE.to_le_bytes(); 52 | let mut buffer: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE]; 53 | 54 | let mut position = length.saturating_sub((EOCDR_LENGTH + BUFFER_SIZE) as u64); 55 | reader.seek(SeekFrom::Start(position)).await?; 56 | 57 | loop { 58 | let read = reader.read(&mut buffer).await?; 59 | 60 | if let Some(match_index) = reverse_search_buffer(&buffer[..read], signature) { 61 | return Ok(position + (match_index + 1) as u64); 62 | } 63 | 64 | // If we hit the start of the data or the lower bound, we're unable to locate the EOCDR. 65 | if position == 0 || position <= length.saturating_sub(EOCDR_LOWER_BOUND) { 66 | return Err(ZipError::UnableToLocateEOCDR); 67 | } 68 | 69 | // To handle the case where the EOCDR signature crosses buffer boundaries, we simply overlap reads by the 70 | // signature length. This significantly reduces the complexity of handling partial matches with very little 71 | // overhead. 72 | position = position.saturating_sub((BUFFER_SIZE - SIGNATURE_LENGTH) as u64); 73 | reader.seek(SeekFrom::Start(position)).await?; 74 | } 75 | } 76 | 77 | /// A naive reverse linear search along the buffer for the specified signature bytes. 78 | /// 79 | /// This is already surprisingly performant. For instance, using memchr::memchr() to match for the first byte of the 80 | /// signature, and then manual byte comparisons for the remaining signature bytes was actually slower by a factor of 81 | /// 2.25. This method was explored as tokio's `read_until()` implementation uses memchr::memchr(). 82 | pub(crate) fn reverse_search_buffer(buffer: &[u8], signature: &[u8]) -> Option { 83 | 'outer: for index in (0..buffer.len()).rev() { 84 | for (signature_index, signature_byte) in signature.iter().rev().enumerate() { 85 | if let Some(next_index) = index.checked_sub(signature_index) { 86 | if buffer[next_index] != *signature_byte { 87 | continue 'outer; 88 | } 89 | } else { 90 | break 'outer; 91 | } 92 | } 93 | return Some(index); 94 | } 95 | None 96 | } 97 | -------------------------------------------------------------------------------- /src/base/read/io/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod combined_record; 5 | pub(crate) mod compressed; 6 | pub(crate) mod entry; 7 | pub(crate) mod hashed; 8 | pub(crate) mod locator; 9 | pub(crate) mod owned; 10 | 11 | pub use combined_record::CombinedCentralDirectoryRecord; 12 | 13 | use crate::string::{StringEncoding, ZipString}; 14 | use futures_lite::io::{AsyncRead, AsyncReadExt}; 15 | 16 | /// Read and return a dynamic length string from a reader which impls AsyncRead. 17 | pub(crate) async fn read_string(reader: R, length: usize, encoding: StringEncoding) -> std::io::Result 18 | where 19 | R: AsyncRead + Unpin, 20 | { 21 | Ok(ZipString::new(read_bytes(reader, length).await?, encoding)) 22 | } 23 | 24 | /// Read and return a dynamic length vector of bytes from a reader which impls AsyncRead. 25 | pub(crate) async fn read_bytes(reader: R, length: usize) -> std::io::Result> 26 | where 27 | R: AsyncRead + Unpin, 28 | { 29 | let mut buffer = Vec::with_capacity(length); 30 | reader.take(length as u64).read_to_end(&mut buffer).await?; 31 | 32 | Ok(buffer) 33 | } 34 | 35 | /// A macro that returns the inner value of an Ok or early-returns in the case of an Err. 36 | /// 37 | /// This is almost identical to the ? operator but handles the situation when a Result is used in combination with 38 | /// Poll (eg. tokio's IO traits such as AsyncRead). 39 | macro_rules! poll_result_ok { 40 | ($poll:expr) => { 41 | match $poll { 42 | Ok(inner) => inner, 43 | Err(err) => return Poll::Ready(Err(err)), 44 | } 45 | }; 46 | } 47 | 48 | use poll_result_ok; 49 | -------------------------------------------------------------------------------- /src/base/read/io/owned.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use std::pin::Pin; 5 | use std::task::{Context, Poll}; 6 | 7 | use futures_lite::io::{AsyncBufRead, AsyncRead}; 8 | use pin_project::pin_project; 9 | 10 | /// A wrapping reader which holds an owned R or a mutable borrow to R. 11 | /// 12 | /// This is used to represent whether the supplied reader can be acted on concurrently or not (with an owned value 13 | /// suggesting that R implements some method of synchronisation & cloning). 14 | #[pin_project(project = OwnedReaderProj)] 15 | pub(crate) enum OwnedReader<'a, R> { 16 | Owned(#[pin] R), 17 | Borrow(#[pin] &'a mut R), 18 | } 19 | 20 | impl<'a, R> OwnedReader<'a, R> 21 | where 22 | R: AsyncBufRead + Unpin, 23 | { 24 | /// Consumes an owned reader and returns the inner value. 25 | pub(crate) fn owned_into_inner(self) -> R { 26 | match self { 27 | OwnedReader::Owned(inner) => inner, 28 | OwnedReader::Borrow(_) => panic!("not OwnedReader::Owned value"), 29 | } 30 | } 31 | } 32 | 33 | impl<'a, R> AsyncBufRead for OwnedReader<'a, R> 34 | where 35 | R: AsyncBufRead + Unpin, 36 | { 37 | fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 38 | match self.project() { 39 | OwnedReaderProj::Owned(inner) => inner.poll_fill_buf(cx), 40 | OwnedReaderProj::Borrow(inner) => inner.poll_fill_buf(cx), 41 | } 42 | } 43 | 44 | fn consume(self: Pin<&mut Self>, amt: usize) { 45 | match self.project() { 46 | OwnedReaderProj::Owned(inner) => inner.consume(amt), 47 | OwnedReaderProj::Borrow(inner) => inner.consume(amt), 48 | } 49 | } 50 | } 51 | 52 | impl<'a, R> AsyncRead for OwnedReader<'a, R> 53 | where 54 | R: AsyncBufRead + Unpin, 55 | { 56 | fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { 57 | match self.project() { 58 | OwnedReaderProj::Owned(inner) => inner.poll_read(c, b), 59 | OwnedReaderProj::Borrow(inner) => inner.poll_read(c, b), 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/base/read/mem.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A concurrent ZIP reader which acts over an owned vector of bytes. 5 | //! 6 | //! Concurrency is achieved as a result of: 7 | //! - Wrapping the provided vector of bytes within an [`Arc`] to allow shared ownership. 8 | //! - Wrapping this [`Arc`] around a [`Cursor`] when reading (as the [`Arc`] can deref and coerce into a `&[u8]`). 9 | //! 10 | //! ### Usage 11 | //! Unlike the [`seek`] module, we no longer hold a mutable reference to any inner reader which in turn, allows the 12 | //! construction of concurrent [`ZipEntryReader`]s. Though, note that each individual [`ZipEntryReader`] cannot be sent 13 | //! between thread boundaries due to the masked lifetime requirement. Therefore, the overarching [`ZipFileReader`] 14 | //! should be cloned and moved into those contexts when needed. 15 | //! 16 | //! ### Concurrent Example 17 | //! ```no_run 18 | //! # use async_zip::base::read::mem::ZipFileReader; 19 | //! # use async_zip::error::Result; 20 | //! # use futures_lite::io::AsyncReadExt; 21 | //! # 22 | //! async fn run() -> Result<()> { 23 | //! let reader = ZipFileReader::new(Vec::new()).await?; 24 | //! let result = tokio::join!(read(&reader, 0), read(&reader, 1)); 25 | //! 26 | //! let data_0 = result.0?; 27 | //! let data_1 = result.1?; 28 | //! 29 | //! // Use data within current scope. 30 | //! 31 | //! Ok(()) 32 | //! } 33 | //! 34 | //! async fn read(reader: &ZipFileReader, index: usize) -> Result> { 35 | //! let mut entry = reader.reader_without_entry(index).await?; 36 | //! let mut data = Vec::new(); 37 | //! entry.read_to_end(&mut data).await?; 38 | //! Ok(data) 39 | //! } 40 | //! ``` 41 | //! 42 | //! ### Parallel Example 43 | //! ```no_run 44 | //! # use async_zip::base::read::mem::ZipFileReader; 45 | //! # use async_zip::error::Result; 46 | //! # use futures_lite::io::AsyncReadExt; 47 | //! # 48 | //! async fn run() -> Result<()> { 49 | //! let reader = ZipFileReader::new(Vec::new()).await?; 50 | //! 51 | //! let handle_0 = tokio::spawn(read(reader.clone(), 0)); 52 | //! let handle_1 = tokio::spawn(read(reader.clone(), 1)); 53 | //! 54 | //! let data_0 = handle_0.await.expect("thread panicked")?; 55 | //! let data_1 = handle_1.await.expect("thread panicked")?; 56 | //! 57 | //! // Use data within current scope. 58 | //! 59 | //! Ok(()) 60 | //! } 61 | //! 62 | //! async fn read(reader: ZipFileReader, index: usize) -> Result> { 63 | //! let mut entry = reader.reader_without_entry(index).await?; 64 | //! let mut data = Vec::new(); 65 | //! entry.read_to_end(&mut data).await?; 66 | //! Ok(data) 67 | //! } 68 | //! ``` 69 | 70 | #[cfg(doc)] 71 | use crate::base::read::seek; 72 | 73 | use crate::base::read::io::entry::ZipEntryReader; 74 | use crate::error::{Result, ZipError}; 75 | use crate::file::ZipFile; 76 | 77 | use std::sync::Arc; 78 | 79 | use futures_lite::io::Cursor; 80 | 81 | use super::io::entry::{WithEntry, WithoutEntry}; 82 | 83 | struct Inner { 84 | data: Vec, 85 | file: ZipFile, 86 | } 87 | 88 | // A concurrent ZIP reader which acts over an owned vector of bytes. 89 | #[derive(Clone)] 90 | pub struct ZipFileReader { 91 | inner: Arc, 92 | } 93 | 94 | impl ZipFileReader { 95 | /// Constructs a new ZIP reader from an owned vector of bytes. 96 | pub async fn new(data: Vec) -> Result { 97 | let file = crate::base::read::file(Cursor::new(&data)).await?; 98 | Ok(ZipFileReader::from_raw_parts(data, file)) 99 | } 100 | 101 | /// Constructs a ZIP reader from an owned vector of bytes and ZIP file information derived from those bytes. 102 | /// 103 | /// Providing a [`ZipFile`] that wasn't derived from those bytes may lead to inaccurate parsing. 104 | pub fn from_raw_parts(data: Vec, file: ZipFile) -> ZipFileReader { 105 | ZipFileReader { inner: Arc::new(Inner { data, file }) } 106 | } 107 | 108 | /// Returns this ZIP file's information. 109 | pub fn file(&self) -> &ZipFile { 110 | &self.inner.file 111 | } 112 | 113 | /// Returns the raw bytes provided to the reader during construction. 114 | pub fn data(&self) -> &[u8] { 115 | &self.inner.data 116 | } 117 | 118 | /// Returns a new entry reader if the provided index is valid. 119 | pub async fn reader_without_entry(&self, index: usize) -> Result, WithoutEntry>> { 120 | let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 121 | let mut cursor = Cursor::new(&self.inner.data[..]); 122 | 123 | stored_entry.seek_to_data_offset(&mut cursor).await?; 124 | 125 | Ok(ZipEntryReader::new_with_owned( 126 | cursor, 127 | stored_entry.entry.compression(), 128 | stored_entry.entry.compressed_size(), 129 | )) 130 | } 131 | 132 | /// Returns a new entry reader if the provided index is valid. 133 | pub async fn reader_with_entry(&self, index: usize) -> Result, WithEntry<'_>>> { 134 | let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 135 | let mut cursor = Cursor::new(&self.inner.data[..]); 136 | 137 | stored_entry.seek_to_data_offset(&mut cursor).await?; 138 | 139 | let reader = ZipEntryReader::new_with_owned( 140 | cursor, 141 | stored_entry.entry.compression(), 142 | stored_entry.entry.compressed_size(), 143 | ); 144 | 145 | Ok(reader.into_with_entry(stored_entry)) 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/base/read/seek.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A ZIP reader which acts over a seekable source. 5 | //! 6 | //! ### Example 7 | //! ```no_run 8 | //! # use async_zip::base::read::seek::ZipFileReader; 9 | //! # use async_zip::error::Result; 10 | //! # use futures_lite::io::AsyncReadExt; 11 | //! # use tokio::fs::File; 12 | //! # use tokio_util::compat::TokioAsyncReadCompatExt; 13 | //! # use tokio::io::BufReader; 14 | //! # 15 | //! async fn run() -> Result<()> { 16 | //! let mut data = BufReader::new(File::open("./foo.zip").await?); 17 | //! let mut reader = ZipFileReader::new(data.compat()).await?; 18 | //! 19 | //! let mut data = Vec::new(); 20 | //! let mut entry = reader.reader_without_entry(0).await?; 21 | //! entry.read_to_end(&mut data).await?; 22 | //! 23 | //! // Use data within current scope. 24 | //! 25 | //! Ok(()) 26 | //! } 27 | //! ``` 28 | 29 | use crate::base::read::io::entry::ZipEntryReader; 30 | use crate::error::{Result, ZipError}; 31 | use crate::file::ZipFile; 32 | 33 | #[cfg(feature = "tokio")] 34 | use crate::tokio::read::seek::ZipFileReader as TokioZipFileReader; 35 | 36 | use futures_lite::io::{AsyncBufRead, AsyncSeek}; 37 | 38 | #[cfg(feature = "tokio")] 39 | use tokio_util::compat::{Compat, TokioAsyncReadCompatExt}; 40 | 41 | use super::io::entry::{WithEntry, WithoutEntry}; 42 | 43 | /// A ZIP reader which acts over a seekable source. 44 | #[derive(Clone)] 45 | pub struct ZipFileReader { 46 | reader: R, 47 | file: ZipFile, 48 | } 49 | 50 | impl ZipFileReader 51 | where 52 | R: AsyncBufRead + AsyncSeek + Unpin, 53 | { 54 | /// Constructs a new ZIP reader from a seekable source. 55 | pub async fn new(mut reader: R) -> Result> { 56 | let file = crate::base::read::file(&mut reader).await?; 57 | Ok(ZipFileReader::from_raw_parts(reader, file)) 58 | } 59 | 60 | /// Constructs a ZIP reader from a seekable source and ZIP file information derived from that source. 61 | /// 62 | /// Providing a [`ZipFile`] that wasn't derived from that source may lead to inaccurate parsing. 63 | pub fn from_raw_parts(reader: R, file: ZipFile) -> ZipFileReader { 64 | ZipFileReader { reader, file } 65 | } 66 | 67 | /// Returns this ZIP file's information. 68 | pub fn file(&self) -> &ZipFile { 69 | &self.file 70 | } 71 | 72 | /// Returns a mutable reference to the inner seekable source. 73 | /// 74 | /// Swapping the source (eg. via std::mem operations) may lead to inaccurate parsing. 75 | pub fn inner_mut(&mut self) -> &mut R { 76 | &mut self.reader 77 | } 78 | 79 | /// Returns the inner seekable source by consuming self. 80 | pub fn into_inner(self) -> R { 81 | self.reader 82 | } 83 | 84 | /// Returns a new entry reader if the provided index is valid. 85 | pub async fn reader_without_entry(&mut self, index: usize) -> Result> { 86 | let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 87 | stored_entry.seek_to_data_offset(&mut self.reader).await?; 88 | 89 | Ok(ZipEntryReader::new_with_borrow( 90 | &mut self.reader, 91 | stored_entry.entry.compression(), 92 | stored_entry.entry.compressed_size(), 93 | )) 94 | } 95 | 96 | /// Returns a new entry reader if the provided index is valid. 97 | pub async fn reader_with_entry(&mut self, index: usize) -> Result>> { 98 | let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 99 | 100 | stored_entry.seek_to_data_offset(&mut self.reader).await?; 101 | 102 | let reader = ZipEntryReader::new_with_borrow( 103 | &mut self.reader, 104 | stored_entry.entry.compression(), 105 | stored_entry.entry.compressed_size(), 106 | ); 107 | 108 | Ok(reader.into_with_entry(stored_entry)) 109 | } 110 | 111 | /// Returns a new entry reader if the provided index is valid. 112 | /// Consumes self 113 | pub async fn into_entry<'a>(mut self, index: usize) -> Result> 114 | where 115 | R: 'a, 116 | { 117 | let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 118 | 119 | stored_entry.seek_to_data_offset(&mut self.reader).await?; 120 | 121 | Ok(ZipEntryReader::new_with_owned( 122 | self.reader, 123 | stored_entry.entry.compression(), 124 | stored_entry.entry.compressed_size(), 125 | )) 126 | } 127 | } 128 | 129 | #[cfg(feature = "tokio")] 130 | impl ZipFileReader> 131 | where 132 | R: tokio::io::AsyncBufRead + tokio::io::AsyncSeek + Unpin, 133 | { 134 | /// Constructs a new tokio-specific ZIP reader from a seekable source. 135 | pub async fn with_tokio(reader: R) -> Result> { 136 | let mut reader = reader.compat(); 137 | let file = crate::base::read::file(&mut reader).await?; 138 | Ok(ZipFileReader::from_raw_parts(reader, file)) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/base/read/stream.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A ZIP reader which acts over a non-seekable source. 5 | //! 6 | //! # API Design 7 | //! As opposed to other readers provided by this crate, it's important that the data of an entry is fully read before 8 | //! the proceeding entry is read. This is as a result of not being able to seek forwards or backwards, so we must end 9 | //! up at the start of the next entry. 10 | //! 11 | //! **We encode this invariant within Rust's type system so that it can be enforced at compile time.** 12 | //! 13 | //! This requires that any transition methods between these encoded types consume the reader and provide a new owned 14 | //! reader back. This is certainly something to keep in mind when working with this reader, but idiomatic code can 15 | //! still be produced nevertheless. 16 | //! 17 | //! # Considerations 18 | //! As the central directory of a ZIP archive is stored at the end of it, a non-seekable reader doesn't have access 19 | //! to it. We have to rely on information provided within the local file header which may not be accurate or complete. 20 | //! This results in: 21 | //! - The inability to read ZIP entries using the combination of a data descriptor and the Stored compression method. 22 | //! - No file comment being available (defaults to an empty string). 23 | //! - No internal or external file attributes being available (defaults to 0). 24 | //! - The extra field data potentially being inconsistent with what's stored in the central directory. 25 | //! - None of the following being available when the entry was written with a data descriptor (defaults to 0): 26 | //! - CRC 27 | //! - compressed size 28 | //! - uncompressed size 29 | //! 30 | //! # Example 31 | //! ```no_run 32 | //! # use futures_lite::io::Cursor; 33 | //! # use async_zip::error::Result; 34 | //! # use async_zip::base::read::stream::ZipFileReader; 35 | //! # 36 | //! # async fn run() -> Result<()> { 37 | //! let mut zip = ZipFileReader::new(Cursor::new([0; 0])); 38 | //! 39 | //! // Print the name of every file in a ZIP archive. 40 | //! while let Some(entry) = zip.next_with_entry().await? { 41 | //! println!("File: {}", entry.reader().entry().filename().as_str().unwrap()); 42 | //! zip = entry.skip().await?; 43 | //! } 44 | //! # 45 | //! # Ok(()) 46 | //! # } 47 | //! ``` 48 | 49 | use crate::base::read::io::entry::ZipEntryReader; 50 | use crate::error::Result; 51 | use crate::error::ZipError; 52 | 53 | use crate::spec::consts::DATA_DESCRIPTOR_LENGTH; 54 | use crate::spec::consts::DATA_DESCRIPTOR_SIGNATURE; 55 | use crate::spec::consts::SIGNATURE_LENGTH; 56 | #[cfg(feature = "tokio")] 57 | use crate::tokio::read::stream::Ready as TokioReady; 58 | 59 | use futures_lite::io::AsyncBufRead; 60 | use futures_lite::io::AsyncReadExt; 61 | 62 | #[cfg(feature = "tokio")] 63 | use tokio_util::compat::TokioAsyncReadCompatExt; 64 | 65 | use super::io::entry::WithEntry; 66 | use super::io::entry::WithoutEntry; 67 | 68 | /// A type which encodes that [`ZipFileReader`] is ready to open a new entry. 69 | pub struct Ready(R); 70 | 71 | /// A type which encodes that [`ZipFileReader`] is currently reading an entry. 72 | pub struct Reading<'a, R, E>(ZipEntryReader<'a, R, E>, bool); 73 | 74 | /// A ZIP reader which acts over a non-seekable source. 75 | /// 76 | /// See the [module-level docs](.) for more information. 77 | #[derive(Clone)] 78 | pub struct ZipFileReader(S); 79 | 80 | impl<'a, R> ZipFileReader> 81 | where 82 | R: AsyncBufRead + Unpin + 'a, 83 | { 84 | /// Constructs a new ZIP reader from a non-seekable source. 85 | pub fn new(reader: R) -> Self { 86 | Self(Ready(reader)) 87 | } 88 | 89 | /// Opens the next entry for reading if the central directory hasn’t yet been reached. 90 | pub async fn next_without_entry(mut self) -> Result>>> { 91 | let entry = match crate::base::read::lfh(&mut self.0 .0).await? { 92 | Some(entry) => entry, 93 | None => return Ok(None), 94 | }; 95 | 96 | let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size }; 97 | let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length); 98 | 99 | Ok(Some(ZipFileReader(Reading(reader, entry.data_descriptor)))) 100 | } 101 | 102 | /// Opens the next entry for reading if the central directory hasn’t yet been reached. 103 | pub async fn next_with_entry(mut self) -> Result>>>> { 104 | let entry = match crate::base::read::lfh(&mut self.0 .0).await? { 105 | Some(entry) => entry, 106 | None => return Ok(None), 107 | }; 108 | 109 | let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size }; 110 | let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length); 111 | let data_descriptor = entry.data_descriptor; 112 | 113 | Ok(Some(ZipFileReader(Reading(reader.into_with_entry_owned(entry), data_descriptor)))) 114 | } 115 | 116 | /// Consumes the `ZipFileReader` returning the original `reader` 117 | pub async fn into_inner(self) -> R { 118 | self.0 .0 119 | } 120 | } 121 | 122 | #[cfg(feature = "tokio")] 123 | impl ZipFileReader> 124 | where 125 | R: tokio::io::AsyncBufRead + Unpin, 126 | { 127 | /// Constructs a new tokio-specific ZIP reader from a non-seekable source. 128 | pub fn with_tokio(reader: R) -> ZipFileReader> { 129 | Self(Ready(reader.compat())) 130 | } 131 | } 132 | 133 | impl<'a, R, E> ZipFileReader> 134 | where 135 | R: AsyncBufRead + Unpin, 136 | { 137 | /// Returns an immutable reference to the inner entry reader. 138 | pub fn reader(&self) -> &ZipEntryReader<'a, R, E> { 139 | &self.0 .0 140 | } 141 | 142 | /// Returns a mutable reference to the inner entry reader. 143 | pub fn reader_mut(&mut self) -> &mut ZipEntryReader<'a, R, E> { 144 | &mut self.0 .0 145 | } 146 | 147 | /// Converts the reader back into the Ready state if EOF has been reached. 148 | pub async fn done(mut self) -> Result>> { 149 | if self.0 .0.read(&mut [0; 1]).await? != 0 { 150 | return Err(ZipError::EOFNotReached); 151 | } 152 | 153 | let mut inner = self.0 .0.into_inner(); 154 | 155 | // Has data descriptor. 156 | if self.0 .1 { 157 | consume_data_descriptor(&mut inner).await?; 158 | } 159 | 160 | Ok(ZipFileReader(Ready(inner))) 161 | } 162 | 163 | /// Reads until EOF and converts the reader back into the Ready state. 164 | pub async fn skip(mut self) -> Result>> { 165 | while self.0 .0.read(&mut [0; 2048]).await? != 0 {} 166 | let mut inner = self.0 .0.into_inner(); 167 | 168 | // Has data descriptor. 169 | if self.0 .1 { 170 | consume_data_descriptor(&mut inner).await?; 171 | } 172 | 173 | Ok(ZipFileReader(Ready(inner))) 174 | } 175 | } 176 | 177 | async fn consume_data_descriptor(reader: &mut R) -> Result<()> { 178 | let mut descriptor: [u8; DATA_DESCRIPTOR_LENGTH] = [0; DATA_DESCRIPTOR_LENGTH]; 179 | reader.read_exact(&mut descriptor).await?; 180 | 181 | if descriptor[0..SIGNATURE_LENGTH] == DATA_DESCRIPTOR_SIGNATURE.to_le_bytes() { 182 | let mut tail: [u8; SIGNATURE_LENGTH] = [0; SIGNATURE_LENGTH]; 183 | reader.read_exact(&mut tail).await?; 184 | } 185 | 186 | Ok(()) 187 | } 188 | -------------------------------------------------------------------------------- /src/base/write/compressed_writer.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::write::io::offset::AsyncOffsetWriter; 5 | use crate::spec::Compression; 6 | 7 | use std::io::Error; 8 | use std::pin::Pin; 9 | use std::task::{Context, Poll}; 10 | 11 | #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] 12 | use async_compression::futures::write; 13 | use futures_lite::io::AsyncWrite; 14 | 15 | pub enum CompressedAsyncWriter<'b, W: AsyncWrite + Unpin> { 16 | Stored(ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter>), 17 | #[cfg(feature = "deflate")] 18 | Deflate(write::DeflateEncoder>>), 19 | #[cfg(feature = "bzip2")] 20 | Bz(write::BzEncoder>>), 21 | #[cfg(feature = "lzma")] 22 | Lzma(write::LzmaEncoder>>), 23 | #[cfg(feature = "zstd")] 24 | Zstd(write::ZstdEncoder>>), 25 | #[cfg(feature = "xz")] 26 | Xz(write::XzEncoder>>), 27 | } 28 | 29 | impl<'b, W: AsyncWrite + Unpin> CompressedAsyncWriter<'b, W> { 30 | pub fn from_raw(writer: &'b mut AsyncOffsetWriter, compression: Compression) -> Self { 31 | match compression { 32 | Compression::Stored => CompressedAsyncWriter::Stored(ShutdownIgnoredWriter(writer)), 33 | #[cfg(feature = "deflate")] 34 | Compression::Deflate => { 35 | CompressedAsyncWriter::Deflate(write::DeflateEncoder::new(ShutdownIgnoredWriter(writer))) 36 | } 37 | #[cfg(feature = "deflate64")] 38 | Compression::Deflate64 => panic!("writing deflate64 is not supported"), 39 | #[cfg(feature = "bzip2")] 40 | Compression::Bz => CompressedAsyncWriter::Bz(write::BzEncoder::new(ShutdownIgnoredWriter(writer))), 41 | #[cfg(feature = "lzma")] 42 | Compression::Lzma => CompressedAsyncWriter::Lzma(write::LzmaEncoder::new(ShutdownIgnoredWriter(writer))), 43 | #[cfg(feature = "zstd")] 44 | Compression::Zstd => CompressedAsyncWriter::Zstd(write::ZstdEncoder::new(ShutdownIgnoredWriter(writer))), 45 | #[cfg(feature = "xz")] 46 | Compression::Xz => CompressedAsyncWriter::Xz(write::XzEncoder::new(ShutdownIgnoredWriter(writer))), 47 | } 48 | } 49 | 50 | pub fn into_inner(self) -> &'b mut AsyncOffsetWriter { 51 | match self { 52 | CompressedAsyncWriter::Stored(inner) => inner.into_inner(), 53 | #[cfg(feature = "deflate")] 54 | CompressedAsyncWriter::Deflate(inner) => inner.into_inner().into_inner(), 55 | #[cfg(feature = "bzip2")] 56 | CompressedAsyncWriter::Bz(inner) => inner.into_inner().into_inner(), 57 | #[cfg(feature = "lzma")] 58 | CompressedAsyncWriter::Lzma(inner) => inner.into_inner().into_inner(), 59 | #[cfg(feature = "zstd")] 60 | CompressedAsyncWriter::Zstd(inner) => inner.into_inner().into_inner(), 61 | #[cfg(feature = "xz")] 62 | CompressedAsyncWriter::Xz(inner) => inner.into_inner().into_inner(), 63 | } 64 | } 65 | } 66 | 67 | impl<'b, W: AsyncWrite + Unpin> AsyncWrite for CompressedAsyncWriter<'b, W> { 68 | fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { 69 | match *self { 70 | CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_write(cx, buf), 71 | #[cfg(feature = "deflate")] 72 | CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_write(cx, buf), 73 | #[cfg(feature = "bzip2")] 74 | CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_write(cx, buf), 75 | #[cfg(feature = "lzma")] 76 | CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_write(cx, buf), 77 | #[cfg(feature = "zstd")] 78 | CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_write(cx, buf), 79 | #[cfg(feature = "xz")] 80 | CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_write(cx, buf), 81 | } 82 | } 83 | 84 | fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 85 | match *self { 86 | CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_flush(cx), 87 | #[cfg(feature = "deflate")] 88 | CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_flush(cx), 89 | #[cfg(feature = "bzip2")] 90 | CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_flush(cx), 91 | #[cfg(feature = "lzma")] 92 | CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_flush(cx), 93 | #[cfg(feature = "zstd")] 94 | CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_flush(cx), 95 | #[cfg(feature = "xz")] 96 | CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_flush(cx), 97 | } 98 | } 99 | 100 | fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 101 | match *self { 102 | CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_close(cx), 103 | #[cfg(feature = "deflate")] 104 | CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_close(cx), 105 | #[cfg(feature = "bzip2")] 106 | CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_close(cx), 107 | #[cfg(feature = "lzma")] 108 | CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_close(cx), 109 | #[cfg(feature = "zstd")] 110 | CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_close(cx), 111 | #[cfg(feature = "xz")] 112 | CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_close(cx), 113 | } 114 | } 115 | } 116 | 117 | pub struct ShutdownIgnoredWriter(W); 118 | 119 | impl ShutdownIgnoredWriter { 120 | pub fn into_inner(self) -> W { 121 | self.0 122 | } 123 | } 124 | 125 | impl AsyncWrite for ShutdownIgnoredWriter { 126 | fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { 127 | Pin::new(&mut self.0).poll_write(cx, buf) 128 | } 129 | 130 | fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 131 | Pin::new(&mut self.0).poll_flush(cx) 132 | } 133 | 134 | fn poll_close(self: Pin<&mut Self>, _: &mut Context) -> Poll> { 135 | Poll::Ready(Ok(())) 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/base/write/entry_stream.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::write::compressed_writer::CompressedAsyncWriter; 5 | use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut; 6 | use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut; 7 | use crate::base::write::io::offset::AsyncOffsetWriter; 8 | use crate::base::write::CentralDirectoryEntry; 9 | use crate::base::write::ZipFileWriter; 10 | use crate::entry::ZipEntry; 11 | use crate::error::{Result, Zip64ErrorCase, ZipError}; 12 | use crate::spec::extra_field::ExtraFieldAsBytes; 13 | use crate::spec::header::InfoZipUnicodeCommentExtraField; 14 | use crate::spec::header::InfoZipUnicodePathExtraField; 15 | use crate::spec::header::{ 16 | CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, HeaderId, LocalFileHeader, 17 | Zip64ExtendedInformationExtraField, 18 | }; 19 | use crate::string::StringEncoding; 20 | 21 | use std::io::Error; 22 | use std::pin::Pin; 23 | use std::task::{Context, Poll}; 24 | 25 | use crate::base::read::get_zip64_extra_field_mut; 26 | use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE}; 27 | use crc32fast::Hasher; 28 | use futures_lite::io::{AsyncWrite, AsyncWriteExt}; 29 | 30 | /// An entry writer which supports the streaming of data (ie. the writing of unknown size or data at runtime). 31 | /// 32 | /// # Note 33 | /// - This writer cannot be manually constructed; instead, use [`ZipFileWriter::write_entry_stream()`]. 34 | /// - [`EntryStreamWriter::close()`] must be called before a stream writer goes out of scope. 35 | /// - Utilities for working with [`AsyncWrite`] values are provided by [`AsyncWriteExt`]. 36 | pub struct EntryStreamWriter<'b, W: AsyncWrite + Unpin> { 37 | writer: AsyncOffsetWriter>, 38 | cd_entries: &'b mut Vec, 39 | entry: ZipEntry, 40 | hasher: Hasher, 41 | lfh: LocalFileHeader, 42 | lfh_offset: u64, 43 | data_offset: u64, 44 | force_no_zip64: bool, 45 | /// To write back to the original writer if zip64 is required. 46 | is_zip64: &'b mut bool, 47 | } 48 | 49 | impl<'b, W: AsyncWrite + Unpin> EntryStreamWriter<'b, W> { 50 | pub(crate) async fn from_raw( 51 | writer: &'b mut ZipFileWriter, 52 | mut entry: ZipEntry, 53 | ) -> Result> { 54 | let lfh_offset = writer.writer.offset(); 55 | let lfh = EntryStreamWriter::write_lfh(writer, &mut entry).await?; 56 | let data_offset = writer.writer.offset(); 57 | let force_no_zip64 = writer.force_no_zip64; 58 | 59 | let cd_entries = &mut writer.cd_entries; 60 | let is_zip64 = &mut writer.is_zip64; 61 | let writer = AsyncOffsetWriter::new(CompressedAsyncWriter::from_raw(&mut writer.writer, entry.compression())); 62 | 63 | Ok(EntryStreamWriter { 64 | writer, 65 | cd_entries, 66 | entry, 67 | lfh, 68 | lfh_offset, 69 | data_offset, 70 | hasher: Hasher::new(), 71 | force_no_zip64, 72 | is_zip64, 73 | }) 74 | } 75 | 76 | async fn write_lfh(writer: &'b mut ZipFileWriter, entry: &mut ZipEntry) -> Result { 77 | // Always emit a zip64 extended field, even if we don't need it, because we *might* need it. 78 | // If we are forcing no zip, we will have to error later if the file is too large. 79 | let (lfh_compressed, lfh_uncompressed) = if !writer.force_no_zip64 { 80 | if !writer.is_zip64 { 81 | writer.is_zip64 = true; 82 | } 83 | entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(Zip64ExtendedInformationExtraField { 84 | header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD, 85 | uncompressed_size: Some(entry.uncompressed_size), 86 | compressed_size: Some(entry.compressed_size), 87 | relative_header_offset: None, 88 | disk_start_number: None, 89 | })); 90 | 91 | (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE) 92 | } else { 93 | if entry.compressed_size > NON_ZIP64_MAX_SIZE as u64 || entry.uncompressed_size > NON_ZIP64_MAX_SIZE as u64 94 | { 95 | return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); 96 | } 97 | 98 | (entry.compressed_size as u32, entry.uncompressed_size as u32) 99 | }; 100 | 101 | let utf8_without_alternative = 102 | entry.filename().is_utf8_without_alternative() && entry.comment().is_utf8_without_alternative(); 103 | if !utf8_without_alternative { 104 | if matches!(entry.filename().encoding(), StringEncoding::Utf8) { 105 | let u_file_name = entry.filename().as_bytes().to_vec(); 106 | if !u_file_name.is_empty() { 107 | let basic_crc32 = 108 | crc32fast::hash(entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes())); 109 | let upath_field = get_or_put_info_zip_unicode_path_extra_field_mut(entry.extra_fields.as_mut()); 110 | if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field { 111 | *crc32 = basic_crc32; 112 | *unicode = u_file_name; 113 | } 114 | } 115 | } 116 | if matches!(entry.comment().encoding(), StringEncoding::Utf8) { 117 | let u_comment = entry.comment().as_bytes().to_vec(); 118 | if !u_comment.is_empty() { 119 | let basic_crc32 = 120 | crc32fast::hash(entry.comment().alternative().unwrap_or_else(|| entry.comment().as_bytes())); 121 | let ucom_field = get_or_put_info_zip_unicode_comment_extra_field_mut(entry.extra_fields.as_mut()); 122 | if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field { 123 | *crc32 = basic_crc32; 124 | *unicode = u_comment; 125 | } 126 | } 127 | } 128 | } 129 | 130 | let filename_basic = entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes()); 131 | 132 | let lfh = LocalFileHeader { 133 | compressed_size: lfh_compressed, 134 | uncompressed_size: lfh_uncompressed, 135 | compression: entry.compression().into(), 136 | crc: entry.crc32, 137 | extra_field_length: entry 138 | .extra_fields() 139 | .count_bytes() 140 | .try_into() 141 | .map_err(|_| ZipError::ExtraFieldTooLarge)?, 142 | file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?, 143 | mod_time: entry.last_modification_date().time, 144 | mod_date: entry.last_modification_date().date, 145 | version: crate::spec::version::as_needed_to_extract(entry), 146 | flags: GeneralPurposeFlag { 147 | data_descriptor: true, 148 | encrypted: false, 149 | filename_unicode: utf8_without_alternative, 150 | }, 151 | }; 152 | 153 | writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?; 154 | writer.writer.write_all(&lfh.as_slice()).await?; 155 | writer.writer.write_all(filename_basic).await?; 156 | writer.writer.write_all(&entry.extra_fields().as_bytes()).await?; 157 | 158 | Ok(lfh) 159 | } 160 | 161 | /// Consumes this entry writer and completes all closing tasks. 162 | /// 163 | /// This includes: 164 | /// - Finalising the CRC32 hash value for the written data. 165 | /// - Calculating the compressed and uncompressed byte sizes. 166 | /// - Constructing a central directory header. 167 | /// - Pushing that central directory header to the [`ZipFileWriter`]'s store. 168 | /// 169 | /// Failure to call this function before going out of scope would result in a corrupted ZIP file. 170 | pub async fn close(mut self) -> Result<()> { 171 | self.writer.close().await?; 172 | 173 | let crc = self.hasher.finalize(); 174 | let uncompressed_size = self.writer.offset(); 175 | let inner_writer = self.writer.into_inner().into_inner(); 176 | let compressed_size = inner_writer.offset() - self.data_offset; 177 | 178 | let (cdr_compressed_size, cdr_uncompressed_size, lh_offset) = if self.force_no_zip64 { 179 | if uncompressed_size > NON_ZIP64_MAX_SIZE as u64 180 | || compressed_size > NON_ZIP64_MAX_SIZE as u64 181 | || self.lfh_offset > NON_ZIP64_MAX_SIZE as u64 182 | { 183 | return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); 184 | } 185 | (uncompressed_size as u32, compressed_size as u32, self.lfh_offset as u32) 186 | } else { 187 | // When streaming an entry, we are always using a zip64 field. 188 | match get_zip64_extra_field_mut(&mut self.entry.extra_fields) { 189 | // This case shouldn't be necessary but is included for completeness. 190 | None => { 191 | self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation( 192 | Zip64ExtendedInformationExtraField { 193 | header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD, 194 | uncompressed_size: Some(uncompressed_size), 195 | compressed_size: Some(compressed_size), 196 | relative_header_offset: Some(self.lfh_offset), 197 | disk_start_number: None, 198 | }, 199 | )); 200 | } 201 | Some(zip64) => { 202 | zip64.uncompressed_size = Some(uncompressed_size); 203 | zip64.compressed_size = Some(compressed_size); 204 | zip64.relative_header_offset = Some(self.lfh_offset); 205 | } 206 | } 207 | self.lfh.extra_field_length = 208 | self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?; 209 | 210 | (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE) 211 | }; 212 | 213 | inner_writer.write_all(&crate::spec::consts::DATA_DESCRIPTOR_SIGNATURE.to_le_bytes()).await?; 214 | inner_writer.write_all(&crc.to_le_bytes()).await?; 215 | inner_writer.write_all(&cdr_compressed_size.to_le_bytes()).await?; 216 | inner_writer.write_all(&cdr_uncompressed_size.to_le_bytes()).await?; 217 | 218 | let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()); 219 | 220 | let cdh = CentralDirectoryRecord { 221 | compressed_size: cdr_compressed_size, 222 | uncompressed_size: cdr_uncompressed_size, 223 | crc, 224 | v_made_by: crate::spec::version::as_made_by(), 225 | v_needed: self.lfh.version, 226 | compression: self.lfh.compression, 227 | extra_field_length: self.lfh.extra_field_length, 228 | file_name_length: self.lfh.file_name_length, 229 | file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?, 230 | mod_time: self.lfh.mod_time, 231 | mod_date: self.lfh.mod_date, 232 | flags: self.lfh.flags, 233 | disk_start: 0, 234 | inter_attr: self.entry.internal_file_attribute(), 235 | exter_attr: self.entry.external_file_attribute(), 236 | lh_offset, 237 | }; 238 | 239 | self.cd_entries.push(CentralDirectoryEntry { header: cdh, entry: self.entry }); 240 | // Ensure that we can fit this many files in this archive if forcing no zip64 241 | if self.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize { 242 | if self.force_no_zip64 { 243 | return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)); 244 | } 245 | if !*self.is_zip64 { 246 | *self.is_zip64 = true; 247 | } 248 | } 249 | 250 | Ok(()) 251 | } 252 | } 253 | 254 | impl<'a, W: AsyncWrite + Unpin> AsyncWrite for EntryStreamWriter<'a, W> { 255 | fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { 256 | let poll = Pin::new(&mut self.writer).poll_write(cx, buf); 257 | 258 | if let Poll::Ready(Ok(written)) = poll { 259 | self.hasher.update(&buf[0..written]); 260 | } 261 | 262 | poll 263 | } 264 | 265 | fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 266 | Pin::new(&mut self.writer).poll_flush(cx) 267 | } 268 | 269 | fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 270 | Pin::new(&mut self.writer).poll_close(cx) 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /src/base/write/entry_whole.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut; 5 | use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut; 6 | use crate::base::write::{CentralDirectoryEntry, ZipFileWriter}; 7 | use crate::entry::ZipEntry; 8 | use crate::error::{Result, Zip64ErrorCase, ZipError}; 9 | use crate::spec::extra_field::Zip64ExtendedInformationExtraFieldBuilder; 10 | use crate::spec::header::{InfoZipUnicodeCommentExtraField, InfoZipUnicodePathExtraField}; 11 | use crate::spec::{ 12 | extra_field::ExtraFieldAsBytes, 13 | header::{CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, LocalFileHeader}, 14 | Compression, 15 | }; 16 | use crate::StringEncoding; 17 | #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] 18 | use futures_lite::io::Cursor; 19 | 20 | use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE}; 21 | #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] 22 | use async_compression::futures::write; 23 | use futures_lite::io::{AsyncWrite, AsyncWriteExt}; 24 | 25 | pub struct EntryWholeWriter<'b, 'c, W: AsyncWrite + Unpin> { 26 | writer: &'b mut ZipFileWriter, 27 | entry: ZipEntry, 28 | data: &'c [u8], 29 | } 30 | 31 | impl<'b, 'c, W: AsyncWrite + Unpin> EntryWholeWriter<'b, 'c, W> { 32 | pub fn from_raw(writer: &'b mut ZipFileWriter, entry: ZipEntry, data: &'c [u8]) -> Self { 33 | Self { writer, entry, data } 34 | } 35 | 36 | pub async fn write(mut self) -> Result<()> { 37 | let mut _compressed_data: Option> = None; 38 | let compressed_data = match self.entry.compression() { 39 | Compression::Stored => self.data, 40 | #[cfg(any( 41 | feature = "deflate", 42 | feature = "bzip2", 43 | feature = "zstd", 44 | feature = "lzma", 45 | feature = "xz", 46 | feature = "deflate64" 47 | ))] 48 | _ => { 49 | _compressed_data = 50 | Some(compress(self.entry.compression(), self.data, self.entry.compression_level).await); 51 | _compressed_data.as_ref().unwrap() 52 | } 53 | }; 54 | 55 | let mut zip64_extra_field_builder = None; 56 | 57 | let (lfh_uncompressed_size, lfh_compressed_size) = if self.data.len() as u64 > NON_ZIP64_MAX_SIZE as u64 58 | || compressed_data.len() as u64 > NON_ZIP64_MAX_SIZE as u64 59 | { 60 | if self.writer.force_no_zip64 { 61 | return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); 62 | } 63 | if !self.writer.is_zip64 { 64 | self.writer.is_zip64 = true; 65 | } 66 | zip64_extra_field_builder = Some( 67 | Zip64ExtendedInformationExtraFieldBuilder::new() 68 | .sizes(compressed_data.len() as u64, self.data.len() as u64), 69 | ); 70 | (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE) 71 | } else { 72 | (self.data.len() as u32, compressed_data.len() as u32) 73 | }; 74 | 75 | let lh_offset = if self.writer.writer.offset() > NON_ZIP64_MAX_SIZE as u64 { 76 | if self.writer.force_no_zip64 { 77 | return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); 78 | } 79 | if !self.writer.is_zip64 { 80 | self.writer.is_zip64 = true; 81 | } 82 | 83 | if let Some(zip64_extra_field) = zip64_extra_field_builder { 84 | zip64_extra_field_builder = Some(zip64_extra_field.relative_header_offset(self.writer.writer.offset())); 85 | } else { 86 | zip64_extra_field_builder = Some( 87 | Zip64ExtendedInformationExtraFieldBuilder::new() 88 | .relative_header_offset(self.writer.writer.offset()), 89 | ); 90 | } 91 | NON_ZIP64_MAX_SIZE 92 | } else { 93 | self.writer.writer.offset() as u32 94 | }; 95 | 96 | if let Some(builder) = zip64_extra_field_builder { 97 | if !builder.eof_only() { 98 | self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(builder.build()?)); 99 | zip64_extra_field_builder = None; 100 | } else { 101 | zip64_extra_field_builder = Some(builder); 102 | } 103 | } 104 | 105 | let utf8_without_alternative = 106 | self.entry.filename().is_utf8_without_alternative() && self.entry.comment().is_utf8_without_alternative(); 107 | if !utf8_without_alternative { 108 | if matches!(self.entry.filename().encoding(), StringEncoding::Utf8) { 109 | let u_file_name = self.entry.filename().as_bytes().to_vec(); 110 | if !u_file_name.is_empty() { 111 | let basic_crc32 = crc32fast::hash( 112 | self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes()), 113 | ); 114 | let upath_field = 115 | get_or_put_info_zip_unicode_path_extra_field_mut(self.entry.extra_fields.as_mut()); 116 | if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field { 117 | *crc32 = basic_crc32; 118 | *unicode = u_file_name; 119 | } 120 | } 121 | } 122 | if matches!(self.entry.comment().encoding(), StringEncoding::Utf8) { 123 | let u_comment = self.entry.comment().as_bytes().to_vec(); 124 | if !u_comment.is_empty() { 125 | let basic_crc32 = crc32fast::hash( 126 | self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()), 127 | ); 128 | let ucom_field = 129 | get_or_put_info_zip_unicode_comment_extra_field_mut(self.entry.extra_fields.as_mut()); 130 | if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field { 131 | *crc32 = basic_crc32; 132 | *unicode = u_comment; 133 | } 134 | } 135 | } 136 | } 137 | 138 | let filename_basic = self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes()); 139 | let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()); 140 | 141 | let lf_header = LocalFileHeader { 142 | compressed_size: lfh_compressed_size, 143 | uncompressed_size: lfh_uncompressed_size, 144 | compression: self.entry.compression().into(), 145 | crc: crc32fast::hash(self.data), 146 | extra_field_length: self 147 | .entry 148 | .extra_fields() 149 | .count_bytes() 150 | .try_into() 151 | .map_err(|_| ZipError::ExtraFieldTooLarge)?, 152 | file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?, 153 | mod_time: self.entry.last_modification_date().time, 154 | mod_date: self.entry.last_modification_date().date, 155 | version: crate::spec::version::as_needed_to_extract(&self.entry), 156 | flags: GeneralPurposeFlag { 157 | data_descriptor: false, 158 | encrypted: false, 159 | filename_unicode: utf8_without_alternative, 160 | }, 161 | }; 162 | 163 | let mut header = CentralDirectoryRecord { 164 | v_made_by: crate::spec::version::as_made_by(), 165 | v_needed: lf_header.version, 166 | compressed_size: lf_header.compressed_size, 167 | uncompressed_size: lf_header.uncompressed_size, 168 | compression: lf_header.compression, 169 | crc: lf_header.crc, 170 | extra_field_length: lf_header.extra_field_length, 171 | file_name_length: lf_header.file_name_length, 172 | file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?, 173 | mod_time: lf_header.mod_time, 174 | mod_date: lf_header.mod_date, 175 | flags: lf_header.flags, 176 | disk_start: 0, 177 | inter_attr: self.entry.internal_file_attribute(), 178 | exter_attr: self.entry.external_file_attribute(), 179 | lh_offset, 180 | }; 181 | 182 | self.writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?; 183 | self.writer.writer.write_all(&lf_header.as_slice()).await?; 184 | self.writer.writer.write_all(filename_basic).await?; 185 | self.writer.writer.write_all(&self.entry.extra_fields().as_bytes()).await?; 186 | self.writer.writer.write_all(compressed_data).await?; 187 | 188 | if let Some(builder) = zip64_extra_field_builder { 189 | self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(builder.build()?)); 190 | header.extra_field_length = 191 | self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?; 192 | } 193 | 194 | self.writer.cd_entries.push(CentralDirectoryEntry { header, entry: self.entry }); 195 | // Ensure that we can fit this many files in this archive if forcing no zip64 196 | if self.writer.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize { 197 | if self.writer.force_no_zip64 { 198 | return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)); 199 | } 200 | if !self.writer.is_zip64 { 201 | self.writer.is_zip64 = true; 202 | } 203 | } 204 | Ok(()) 205 | } 206 | } 207 | 208 | #[cfg(any( 209 | feature = "deflate", 210 | feature = "bzip2", 211 | feature = "zstd", 212 | feature = "lzma", 213 | feature = "xz", 214 | feature = "deflate64" 215 | ))] 216 | async fn compress(compression: Compression, data: &[u8], level: async_compression::Level) -> Vec { 217 | // TODO: Reduce reallocations of Vec by making a lower-bound estimate of the length reduction and 218 | // pre-initialising the Vec to that length. Then truncate() to the actual number of bytes written. 219 | match compression { 220 | #[cfg(feature = "deflate")] 221 | Compression::Deflate => { 222 | let mut writer = write::DeflateEncoder::with_quality(Cursor::new(Vec::new()), level); 223 | writer.write_all(data).await.unwrap(); 224 | writer.close().await.unwrap(); 225 | writer.into_inner().into_inner() 226 | } 227 | #[cfg(feature = "deflate64")] 228 | Compression::Deflate64 => panic!("compressing deflate64 is not supported"), 229 | #[cfg(feature = "bzip2")] 230 | Compression::Bz => { 231 | let mut writer = write::BzEncoder::with_quality(Cursor::new(Vec::new()), level); 232 | writer.write_all(data).await.unwrap(); 233 | writer.close().await.unwrap(); 234 | writer.into_inner().into_inner() 235 | } 236 | #[cfg(feature = "lzma")] 237 | Compression::Lzma => { 238 | let mut writer = write::LzmaEncoder::with_quality(Cursor::new(Vec::new()), level); 239 | writer.write_all(data).await.unwrap(); 240 | writer.close().await.unwrap(); 241 | writer.into_inner().into_inner() 242 | } 243 | #[cfg(feature = "xz")] 244 | Compression::Xz => { 245 | let mut writer = write::XzEncoder::with_quality(Cursor::new(Vec::new()), level); 246 | writer.write_all(data).await.unwrap(); 247 | writer.close().await.unwrap(); 248 | writer.into_inner().into_inner() 249 | } 250 | #[cfg(feature = "zstd")] 251 | Compression::Zstd => { 252 | let mut writer = write::ZstdEncoder::with_quality(Cursor::new(Vec::new()), level); 253 | writer.write_all(data).await.unwrap(); 254 | writer.close().await.unwrap(); 255 | writer.into_inner().into_inner() 256 | } 257 | _ => unreachable!(), 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /src/base/write/io/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod offset; 5 | -------------------------------------------------------------------------------- /src/base/write/io/offset.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use std::io::{Error, IoSlice}; 5 | use std::pin::Pin; 6 | use std::task::{Context, Poll}; 7 | 8 | use futures_lite::io::AsyncWrite; 9 | use pin_project::pin_project; 10 | 11 | /// A wrapper around an [`AsyncWrite`] implementation which tracks the current byte offset. 12 | #[pin_project(project = OffsetWriterProj)] 13 | pub struct AsyncOffsetWriter { 14 | #[pin] 15 | inner: W, 16 | offset: u64, 17 | } 18 | 19 | impl AsyncOffsetWriter 20 | where 21 | W: AsyncWrite + Unpin, 22 | { 23 | /// Constructs a new wrapper from an inner [`AsyncWrite`] writer. 24 | pub fn new(inner: W) -> Self { 25 | Self { inner, offset: 0 } 26 | } 27 | 28 | /// Returns the current byte offset. 29 | pub fn offset(&self) -> u64 { 30 | self.offset 31 | } 32 | 33 | /// Consumes this wrapper and returns the inner [`AsyncWrite`] writer. 34 | pub fn into_inner(self) -> W { 35 | self.inner 36 | } 37 | 38 | pub fn inner_mut(&mut self) -> &mut W { 39 | &mut self.inner 40 | } 41 | } 42 | 43 | impl AsyncWrite for AsyncOffsetWriter 44 | where 45 | W: AsyncWrite + Unpin, 46 | { 47 | fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { 48 | let this = self.project(); 49 | let poll = this.inner.poll_write(cx, buf); 50 | 51 | if let Poll::Ready(Ok(inner)) = &poll { 52 | *this.offset += *inner as u64; 53 | } 54 | 55 | poll 56 | } 57 | 58 | fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 59 | self.project().inner.poll_flush(cx) 60 | } 61 | 62 | fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 63 | self.project().inner.poll_close(cx) 64 | } 65 | 66 | fn poll_write_vectored( 67 | self: Pin<&mut Self>, 68 | cx: &mut Context<'_>, 69 | bufs: &[IoSlice<'_>], 70 | ) -> Poll> { 71 | self.project().inner.poll_write_vectored(cx, bufs) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/base/write/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A module which supports writing ZIP files. 5 | //! 6 | //! # Example 7 | //! ### Whole data (u8 slice) 8 | //! ```no_run 9 | //! # #[cfg(feature = "deflate")] 10 | //! # { 11 | //! # use async_zip::{Compression, ZipEntryBuilder, base::write::ZipFileWriter}; 12 | //! # use async_zip::error::ZipError; 13 | //! # 14 | //! # async fn run() -> Result<(), ZipError> { 15 | //! let mut writer = ZipFileWriter::new(Vec::::new()); 16 | //! 17 | //! let data = b"This is an example file."; 18 | //! let opts = ZipEntryBuilder::new(String::from("foo.txt").into(), Compression::Deflate); 19 | //! 20 | //! writer.write_entry_whole(opts, data).await?; 21 | //! writer.close().await?; 22 | //! # Ok(()) 23 | //! # } 24 | //! # } 25 | //! ``` 26 | //! ### Stream data (unknown size & data) 27 | //! ```no_run 28 | //! # #[cfg(feature = "deflate")] 29 | //! # { 30 | //! # use async_zip::{Compression, ZipEntryBuilder, base::write::ZipFileWriter}; 31 | //! # use std::io::Cursor; 32 | //! # use async_zip::error::ZipError; 33 | //! # use futures_lite::io::AsyncWriteExt; 34 | //! # use tokio_util::compat::TokioAsyncWriteCompatExt; 35 | //! # 36 | //! # async fn run() -> Result<(), ZipError> { 37 | //! let mut writer = ZipFileWriter::new(Vec::::new()); 38 | //! 39 | //! let data = b"This is an example file."; 40 | //! let opts = ZipEntryBuilder::new(String::from("bar.txt").into(), Compression::Deflate); 41 | //! 42 | //! let mut entry_writer = writer.write_entry_stream(opts).await?; 43 | //! entry_writer.write_all(data).await.unwrap(); 44 | //! 45 | //! entry_writer.close().await?; 46 | //! writer.close().await?; 47 | //! # Ok(()) 48 | //! # } 49 | //! # } 50 | //! ``` 51 | 52 | pub(crate) mod compressed_writer; 53 | pub(crate) mod entry_stream; 54 | pub(crate) mod entry_whole; 55 | pub(crate) mod io; 56 | 57 | pub use entry_stream::EntryStreamWriter; 58 | 59 | #[cfg(feature = "tokio")] 60 | use tokio_util::compat::{Compat, TokioAsyncWriteCompatExt}; 61 | 62 | use crate::entry::ZipEntry; 63 | use crate::error::Result; 64 | use crate::spec::extra_field::ExtraFieldAsBytes; 65 | use crate::spec::header::{ 66 | CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, InfoZipUnicodeCommentExtraField, 67 | InfoZipUnicodePathExtraField, Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord, 68 | }; 69 | 70 | #[cfg(feature = "tokio")] 71 | use crate::tokio::write::ZipFileWriter as TokioZipFileWriter; 72 | 73 | use entry_whole::EntryWholeWriter; 74 | use io::offset::AsyncOffsetWriter; 75 | 76 | use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE}; 77 | use futures_lite::io::{AsyncWrite, AsyncWriteExt}; 78 | 79 | pub(crate) struct CentralDirectoryEntry { 80 | pub header: CentralDirectoryRecord, 81 | pub entry: ZipEntry, 82 | } 83 | 84 | /// A ZIP file writer which acts over AsyncWrite implementers. 85 | /// 86 | /// # Note 87 | /// - [`ZipFileWriter::close()`] must be called before a stream writer goes out of scope. 88 | pub struct ZipFileWriter { 89 | pub(crate) writer: AsyncOffsetWriter, 90 | pub(crate) cd_entries: Vec, 91 | /// If true, will error if a Zip64 struct must be written. 92 | force_no_zip64: bool, 93 | /// Whether to write Zip64 end of directory structs. 94 | pub(crate) is_zip64: bool, 95 | comment_opt: Option, 96 | } 97 | 98 | impl ZipFileWriter { 99 | /// Construct a new ZIP file writer from a mutable reference to a writer. 100 | pub fn new(writer: W) -> Self { 101 | Self { 102 | writer: AsyncOffsetWriter::new(writer), 103 | cd_entries: Vec::new(), 104 | comment_opt: None, 105 | is_zip64: false, 106 | force_no_zip64: false, 107 | } 108 | } 109 | 110 | /// Force the ZIP writer to operate in non-ZIP64 mode. 111 | /// If any files would need ZIP64, an error will be raised. 112 | pub fn force_no_zip64(mut self) -> Self { 113 | self.force_no_zip64 = true; 114 | self 115 | } 116 | 117 | /// Force the ZIP writer to emit Zip64 structs at the end of the archive. 118 | /// Zip64 extended fields will only be written if needed. 119 | pub fn force_zip64(mut self) -> Self { 120 | self.is_zip64 = true; 121 | self 122 | } 123 | 124 | /// Write a new ZIP entry of known size and data. 125 | pub async fn write_entry_whole>(&mut self, entry: E, data: &[u8]) -> Result<()> { 126 | EntryWholeWriter::from_raw(self, entry.into(), data).write().await 127 | } 128 | 129 | /// Write an entry of unknown size and data via streaming (ie. using a data descriptor). 130 | /// The generated Local File Header will be invalid, with no compressed size, uncompressed size, 131 | /// and a null CRC. This might cause problems with the destination reader. 132 | pub async fn write_entry_stream>(&mut self, entry: E) -> Result> { 133 | EntryStreamWriter::from_raw(self, entry.into()).await 134 | } 135 | 136 | /// Set the ZIP file comment. 137 | pub fn comment(&mut self, comment: String) { 138 | self.comment_opt = Some(comment); 139 | } 140 | 141 | /// Returns a mutable reference to the inner writer. 142 | /// 143 | /// Care should be taken when using this inner writer as doing so may invalidate internal state of this writer. 144 | pub fn inner_mut(&mut self) -> &mut W { 145 | self.writer.inner_mut() 146 | } 147 | 148 | /// Consumes this ZIP writer and completes all closing tasks. 149 | /// 150 | /// This includes: 151 | /// - Writing all central directory headers. 152 | /// - Writing the end of central directory header. 153 | /// - Writing the file comment. 154 | /// 155 | /// Failure to call this function before going out of scope would result in a corrupted ZIP file. 156 | pub async fn close(mut self) -> Result { 157 | let cd_offset = self.writer.offset(); 158 | 159 | for entry in &self.cd_entries { 160 | let filename_basic = 161 | entry.entry.filename().alternative().unwrap_or_else(|| entry.entry.filename().as_bytes()); 162 | let comment_basic = entry.entry.comment().alternative().unwrap_or_else(|| entry.entry.comment().as_bytes()); 163 | 164 | self.writer.write_all(&crate::spec::consts::CDH_SIGNATURE.to_le_bytes()).await?; 165 | self.writer.write_all(&entry.header.as_slice()).await?; 166 | self.writer.write_all(filename_basic).await?; 167 | self.writer.write_all(&entry.entry.extra_fields().as_bytes()).await?; 168 | self.writer.write_all(comment_basic).await?; 169 | } 170 | 171 | let central_directory_size = self.writer.offset() - cd_offset; 172 | let central_directory_size_u32 = if central_directory_size > NON_ZIP64_MAX_SIZE as u64 { 173 | NON_ZIP64_MAX_SIZE 174 | } else { 175 | central_directory_size as u32 176 | }; 177 | let num_entries_in_directory = self.cd_entries.len() as u64; 178 | let num_entries_in_directory_u16 = if num_entries_in_directory > NON_ZIP64_MAX_NUM_FILES as u64 { 179 | NON_ZIP64_MAX_NUM_FILES 180 | } else { 181 | num_entries_in_directory as u16 182 | }; 183 | let cd_offset_u32 = if cd_offset > NON_ZIP64_MAX_SIZE as u64 { 184 | if self.force_no_zip64 { 185 | return Err(crate::error::ZipError::Zip64Needed(crate::error::Zip64ErrorCase::LargeFile)); 186 | } else { 187 | self.is_zip64 = true; 188 | } 189 | NON_ZIP64_MAX_SIZE 190 | } else { 191 | cd_offset as u32 192 | }; 193 | 194 | // Add the zip64 EOCDR and EOCDL if we are in zip64 mode. 195 | if self.is_zip64 { 196 | let eocdr_offset = self.writer.offset(); 197 | 198 | let eocdr = Zip64EndOfCentralDirectoryRecord { 199 | size_of_zip64_end_of_cd_record: 44, 200 | version_made_by: crate::spec::version::as_made_by(), 201 | version_needed_to_extract: 46, 202 | disk_number: 0, 203 | disk_number_start_of_cd: 0, 204 | num_entries_in_directory_on_disk: num_entries_in_directory, 205 | num_entries_in_directory, 206 | directory_size: central_directory_size, 207 | offset_of_start_of_directory: cd_offset, 208 | }; 209 | self.writer.write_all(&crate::spec::consts::ZIP64_EOCDR_SIGNATURE.to_le_bytes()).await?; 210 | self.writer.write_all(&eocdr.as_bytes()).await?; 211 | 212 | let eocdl = Zip64EndOfCentralDirectoryLocator { 213 | number_of_disk_with_start_of_zip64_end_of_central_directory: 0, 214 | relative_offset: eocdr_offset, 215 | total_number_of_disks: 1, 216 | }; 217 | self.writer.write_all(&crate::spec::consts::ZIP64_EOCDL_SIGNATURE.to_le_bytes()).await?; 218 | self.writer.write_all(&eocdl.as_bytes()).await?; 219 | } 220 | 221 | let header = EndOfCentralDirectoryHeader { 222 | disk_num: 0, 223 | start_cent_dir_disk: 0, 224 | num_of_entries_disk: num_entries_in_directory_u16, 225 | num_of_entries: num_entries_in_directory_u16, 226 | size_cent_dir: central_directory_size_u32, 227 | cent_dir_offset: cd_offset_u32, 228 | file_comm_length: self.comment_opt.as_ref().map(|v| v.len() as u16).unwrap_or_default(), 229 | }; 230 | 231 | self.writer.write_all(&crate::spec::consts::EOCDR_SIGNATURE.to_le_bytes()).await?; 232 | self.writer.write_all(&header.as_slice()).await?; 233 | if let Some(comment) = self.comment_opt { 234 | self.writer.write_all(comment.as_bytes()).await?; 235 | } 236 | 237 | Ok(self.writer.into_inner()) 238 | } 239 | } 240 | 241 | #[cfg(feature = "tokio")] 242 | impl ZipFileWriter> 243 | where 244 | W: tokio::io::AsyncWrite + Unpin, 245 | { 246 | /// Construct a new ZIP file writer from a mutable reference to a writer. 247 | pub fn with_tokio(writer: W) -> TokioZipFileWriter { 248 | Self { 249 | writer: AsyncOffsetWriter::new(writer.compat_write()), 250 | cd_entries: Vec::new(), 251 | comment_opt: None, 252 | is_zip64: false, 253 | force_no_zip64: false, 254 | } 255 | } 256 | } 257 | 258 | pub(crate) fn get_or_put_info_zip_unicode_path_extra_field_mut( 259 | extra_fields: &mut Vec, 260 | ) -> &mut InfoZipUnicodePathExtraField { 261 | if !extra_fields.iter().any(|field| matches!(field, ExtraField::InfoZipUnicodePath(_))) { 262 | extra_fields 263 | .push(ExtraField::InfoZipUnicodePath(InfoZipUnicodePathExtraField::V1 { crc32: 0, unicode: vec![] })); 264 | } 265 | 266 | for field in extra_fields.iter_mut() { 267 | if let ExtraField::InfoZipUnicodePath(extra_field) = field { 268 | return extra_field; 269 | } 270 | } 271 | 272 | panic!("InfoZipUnicodePathExtraField not found after insertion") 273 | } 274 | 275 | pub(crate) fn get_or_put_info_zip_unicode_comment_extra_field_mut( 276 | extra_fields: &mut Vec, 277 | ) -> &mut InfoZipUnicodeCommentExtraField { 278 | if !extra_fields.iter().any(|field| matches!(field, ExtraField::InfoZipUnicodeComment(_))) { 279 | extra_fields 280 | .push(ExtraField::InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField::V1 { crc32: 0, unicode: vec![] })); 281 | } 282 | 283 | for field in extra_fields.iter_mut() { 284 | if let ExtraField::InfoZipUnicodeComment(extra_field) = field { 285 | return extra_field; 286 | } 287 | } 288 | 289 | panic!("InfoZipUnicodeCommentExtraField not found after insertion") 290 | } 291 | -------------------------------------------------------------------------------- /src/date/builder.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::ZipDateTime; 5 | 6 | /// A builder for [`ZipDateTime`]. 7 | pub struct ZipDateTimeBuilder(pub(crate) ZipDateTime); 8 | 9 | impl From for ZipDateTimeBuilder { 10 | fn from(date: ZipDateTime) -> Self { 11 | Self(date) 12 | } 13 | } 14 | 15 | impl Default for ZipDateTimeBuilder { 16 | fn default() -> Self { 17 | Self::new() 18 | } 19 | } 20 | 21 | impl ZipDateTimeBuilder { 22 | /// Constructs a new builder which defines the raw underlying data of a ZIP entry. 23 | pub fn new() -> Self { 24 | Self(ZipDateTime { date: 0, time: 0 }) 25 | } 26 | 27 | /// Sets the date and time's year. 28 | pub fn year(mut self, year: i32) -> Self { 29 | let year: u16 = (((year - 1980) << 9) & 0xFE00).try_into().unwrap(); 30 | self.0.date |= year; 31 | self 32 | } 33 | 34 | /// Sets the date and time's month. 35 | pub fn month(mut self, month: u32) -> Self { 36 | let month: u16 = ((month << 5) & 0x1E0).try_into().unwrap(); 37 | self.0.date |= month; 38 | self 39 | } 40 | 41 | /// Sets the date and time's day. 42 | pub fn day(mut self, day: u32) -> Self { 43 | let day: u16 = (day & 0x1F).try_into().unwrap(); 44 | self.0.date |= day; 45 | self 46 | } 47 | 48 | /// Sets the date and time's hour. 49 | pub fn hour(mut self, hour: u32) -> Self { 50 | let hour: u16 = ((hour << 11) & 0xF800).try_into().unwrap(); 51 | self.0.time |= hour; 52 | self 53 | } 54 | 55 | /// Sets the date and time's minute. 56 | pub fn minute(mut self, minute: u32) -> Self { 57 | let minute: u16 = ((minute << 5) & 0x7E0).try_into().unwrap(); 58 | self.0.time |= minute; 59 | self 60 | } 61 | 62 | /// Sets the date and time's second. 63 | /// 64 | /// Note that MS-DOS has a maximum granularity of two seconds. 65 | pub fn second(mut self, second: u32) -> Self { 66 | let second: u16 = ((second >> 1) & 0x1F).try_into().unwrap(); 67 | self.0.time |= second; 68 | self 69 | } 70 | 71 | /// Consumes this builder and returns a final [`ZipDateTime`]. 72 | /// 73 | /// This is equivalent to: 74 | /// ``` 75 | /// # use async_zip::{ZipDateTime, ZipDateTimeBuilder, Compression}; 76 | /// # 77 | /// # let builder = ZipDateTimeBuilder::new().year(2024).month(3).day(2); 78 | /// let date: ZipDateTime = builder.into(); 79 | /// ``` 80 | pub fn build(self) -> ZipDateTime { 81 | self.into() 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/date/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2024 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub mod builder; 5 | 6 | #[cfg(feature = "chrono")] 7 | use chrono::{DateTime, Datelike, LocalResult, TimeZone, Timelike, Utc}; 8 | 9 | use self::builder::ZipDateTimeBuilder; 10 | 11 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#446 12 | // https://learn.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime 13 | 14 | /// A date and time stored as per the MS-DOS representation used by ZIP files. 15 | #[derive(Debug, Default, PartialEq, Eq, Clone, Copy, Hash)] 16 | pub struct ZipDateTime { 17 | pub(crate) date: u16, 18 | pub(crate) time: u16, 19 | } 20 | 21 | impl ZipDateTime { 22 | /// Returns the year of this date & time. 23 | pub fn year(&self) -> i32 { 24 | (((self.date & 0xFE00) >> 9) + 1980).into() 25 | } 26 | 27 | /// Returns the month of this date & time. 28 | pub fn month(&self) -> u32 { 29 | ((self.date & 0x1E0) >> 5).into() 30 | } 31 | 32 | /// Returns the day of this date & time. 33 | pub fn day(&self) -> u32 { 34 | (self.date & 0x1F).into() 35 | } 36 | 37 | /// Returns the hour of this date & time. 38 | pub fn hour(&self) -> u32 { 39 | ((self.time & 0xF800) >> 11).into() 40 | } 41 | 42 | /// Returns the minute of this date & time. 43 | pub fn minute(&self) -> u32 { 44 | ((self.time & 0x7E0) >> 5).into() 45 | } 46 | 47 | /// Returns the second of this date & time. 48 | /// 49 | /// Note that MS-DOS has a maximum granularity of two seconds. 50 | pub fn second(&self) -> u32 { 51 | ((self.time & 0x1F) << 1).into() 52 | } 53 | 54 | /// Constructs chrono's [`DateTime`] representation of this date & time. 55 | /// 56 | /// Note that this requires the `chrono` feature. 57 | #[cfg(feature = "chrono")] 58 | pub fn as_chrono(&self) -> LocalResult> { 59 | self.into() 60 | } 61 | 62 | /// Constructs this date & time from chrono's [`DateTime`] representation. 63 | /// 64 | /// Note that this requires the `chrono` feature. 65 | #[cfg(feature = "chrono")] 66 | pub fn from_chrono(dt: &DateTime) -> Self { 67 | dt.into() 68 | } 69 | } 70 | 71 | impl From for ZipDateTime { 72 | fn from(builder: ZipDateTimeBuilder) -> Self { 73 | builder.0 74 | } 75 | } 76 | 77 | #[cfg(feature = "chrono")] 78 | impl From<&DateTime> for ZipDateTime { 79 | fn from(value: &DateTime) -> Self { 80 | let mut builder = ZipDateTimeBuilder::new(); 81 | 82 | builder = builder.year(value.date_naive().year()); 83 | builder = builder.month(value.date_naive().month()); 84 | builder = builder.day(value.date_naive().day()); 85 | builder = builder.hour(value.time().hour()); 86 | builder = builder.minute(value.time().minute()); 87 | builder = builder.second(value.time().second()); 88 | 89 | builder.build() 90 | } 91 | } 92 | 93 | #[cfg(feature = "chrono")] 94 | impl From<&ZipDateTime> for LocalResult> { 95 | fn from(value: &ZipDateTime) -> Self { 96 | Utc.with_ymd_and_hms(value.year(), value.month(), value.day(), value.hour(), value.minute(), value.second()) 97 | } 98 | } 99 | 100 | #[cfg(feature = "chrono")] 101 | impl From> for ZipDateTime { 102 | fn from(value: DateTime) -> Self { 103 | (&value).into() 104 | } 105 | } 106 | 107 | #[cfg(feature = "chrono")] 108 | impl From for LocalResult> { 109 | fn from(value: ZipDateTime) -> Self { 110 | (&value).into() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/entry/builder.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::entry::ZipEntry; 5 | use crate::spec::{attribute::AttributeCompatibility, header::ExtraField, Compression}; 6 | use crate::{date::ZipDateTime, string::ZipString}; 7 | 8 | /// A builder for [`ZipEntry`]. 9 | pub struct ZipEntryBuilder(pub(crate) ZipEntry); 10 | 11 | impl From for ZipEntryBuilder { 12 | fn from(entry: ZipEntry) -> Self { 13 | Self(entry) 14 | } 15 | } 16 | 17 | impl ZipEntryBuilder { 18 | /// Constructs a new builder which defines the raw underlying data of a ZIP entry. 19 | /// 20 | /// A filename and compression method are needed to construct the builder as minimal parameters. 21 | pub fn new(filename: ZipString, compression: Compression) -> Self { 22 | Self(ZipEntry::new(filename, compression)) 23 | } 24 | 25 | /// Sets the entry's filename. 26 | pub fn filename(mut self, filename: ZipString) -> Self { 27 | self.0.filename = filename; 28 | self 29 | } 30 | 31 | /// Sets the entry's compression method. 32 | pub fn compression(mut self, compression: Compression) -> Self { 33 | self.0.compression = compression; 34 | self 35 | } 36 | 37 | /// Set a size hint for the file, to be written into the local file header. 38 | /// Unlikely to be useful except for the case of streaming files to be Store'd. 39 | /// This size hint does not affect the central directory, nor does it affect whole files. 40 | pub fn size, M: Into>(mut self, compressed_size: N, uncompressed_size: M) -> Self { 41 | self.0.compressed_size = compressed_size.into(); 42 | self.0.uncompressed_size = uncompressed_size.into(); 43 | self 44 | } 45 | 46 | /// Set the deflate compression option. 47 | /// 48 | /// If the compression type isn't deflate, this option has no effect. 49 | #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] 50 | pub fn deflate_option(mut self, option: crate::DeflateOption) -> Self { 51 | self.0.compression_level = option.into_level(); 52 | self 53 | } 54 | 55 | /// Sets the entry's attribute host compatibility. 56 | pub fn attribute_compatibility(mut self, compatibility: AttributeCompatibility) -> Self { 57 | self.0.attribute_compatibility = compatibility; 58 | self 59 | } 60 | 61 | /// Sets the entry's last modification date. 62 | pub fn last_modification_date(mut self, date: ZipDateTime) -> Self { 63 | self.0.last_modification_date = date; 64 | self 65 | } 66 | 67 | /// Sets the entry's internal file attribute. 68 | pub fn internal_file_attribute(mut self, attribute: u16) -> Self { 69 | self.0.internal_file_attribute = attribute; 70 | self 71 | } 72 | 73 | /// Sets the entry's external file attribute. 74 | pub fn external_file_attribute(mut self, attribute: u32) -> Self { 75 | self.0.external_file_attribute = attribute; 76 | self 77 | } 78 | 79 | /// Sets the entry's extra field data. 80 | pub fn extra_fields(mut self, field: Vec) -> Self { 81 | self.0.extra_fields = field; 82 | self 83 | } 84 | 85 | /// Sets the entry's file comment. 86 | pub fn comment(mut self, comment: ZipString) -> Self { 87 | self.0.comment = comment; 88 | self 89 | } 90 | 91 | /// Sets the entry's Unix permissions mode. 92 | /// 93 | /// If the attribute host compatibility isn't set to Unix, this will have no effect. 94 | pub fn unix_permissions(mut self, mode: u16) -> Self { 95 | if matches!(self.0.attribute_compatibility, AttributeCompatibility::Unix) { 96 | self.0.external_file_attribute = (self.0.external_file_attribute & 0xFFFF) | (mode as u32) << 16; 97 | } 98 | self 99 | } 100 | 101 | /// Consumes this builder and returns a final [`ZipEntry`]. 102 | /// 103 | /// This is equivalent to: 104 | /// ``` 105 | /// # use async_zip::{ZipEntry, ZipEntryBuilder, Compression}; 106 | /// # 107 | /// # let builder = ZipEntryBuilder::new(String::from("foo.bar").into(), Compression::Stored); 108 | /// let entry: ZipEntry = builder.into(); 109 | /// ``` 110 | pub fn build(self) -> ZipEntry { 111 | self.into() 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/entry/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub mod builder; 5 | 6 | use std::ops::Deref; 7 | 8 | use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom}; 9 | 10 | use crate::entry::builder::ZipEntryBuilder; 11 | use crate::error::{Result, ZipError}; 12 | use crate::spec::{ 13 | attribute::AttributeCompatibility, 14 | consts::LFH_SIGNATURE, 15 | header::{ExtraField, LocalFileHeader}, 16 | Compression, 17 | }; 18 | use crate::{string::ZipString, ZipDateTime}; 19 | 20 | /// An immutable store of data about a ZIP entry. 21 | /// 22 | /// This type cannot be directly constructed so instead, the [`ZipEntryBuilder`] must be used. Internally this builder 23 | /// stores a [`ZipEntry`] so conversions between these two types via the [`From`] implementations will be 24 | /// non-allocating. 25 | #[derive(Clone, Debug)] 26 | pub struct ZipEntry { 27 | pub(crate) filename: ZipString, 28 | pub(crate) compression: Compression, 29 | #[cfg(any( 30 | feature = "deflate", 31 | feature = "bzip2", 32 | feature = "zstd", 33 | feature = "lzma", 34 | feature = "xz", 35 | feature = "deflate64" 36 | ))] 37 | pub(crate) compression_level: async_compression::Level, 38 | pub(crate) crc32: u32, 39 | pub(crate) uncompressed_size: u64, 40 | pub(crate) compressed_size: u64, 41 | pub(crate) attribute_compatibility: AttributeCompatibility, 42 | pub(crate) last_modification_date: ZipDateTime, 43 | pub(crate) internal_file_attribute: u16, 44 | pub(crate) external_file_attribute: u32, 45 | pub(crate) extra_fields: Vec, 46 | pub(crate) comment: ZipString, 47 | pub(crate) data_descriptor: bool, 48 | } 49 | 50 | impl From for ZipEntry { 51 | fn from(builder: ZipEntryBuilder) -> Self { 52 | builder.0 53 | } 54 | } 55 | 56 | impl ZipEntry { 57 | pub(crate) fn new(filename: ZipString, compression: Compression) -> Self { 58 | ZipEntry { 59 | filename, 60 | compression, 61 | #[cfg(any( 62 | feature = "deflate", 63 | feature = "bzip2", 64 | feature = "zstd", 65 | feature = "lzma", 66 | feature = "xz", 67 | feature = "deflate64" 68 | ))] 69 | compression_level: async_compression::Level::Default, 70 | crc32: 0, 71 | uncompressed_size: 0, 72 | compressed_size: 0, 73 | attribute_compatibility: AttributeCompatibility::Unix, 74 | last_modification_date: ZipDateTime::default(), 75 | internal_file_attribute: 0, 76 | external_file_attribute: 0, 77 | extra_fields: Vec::new(), 78 | comment: String::new().into(), 79 | data_descriptor: false, 80 | } 81 | } 82 | 83 | /// Returns the entry's filename. 84 | /// 85 | /// ## Note 86 | /// This will return the raw filename stored during ZIP creation. If calling this method on entries retrieved from 87 | /// untrusted ZIP files, the filename should be sanitised before being used as a path to prevent [directory 88 | /// traversal attacks](https://en.wikipedia.org/wiki/Directory_traversal_attack). 89 | pub fn filename(&self) -> &ZipString { 90 | &self.filename 91 | } 92 | 93 | /// Returns the entry's compression method. 94 | pub fn compression(&self) -> Compression { 95 | self.compression 96 | } 97 | 98 | /// Returns the entry's CRC32 value. 99 | pub fn crc32(&self) -> u32 { 100 | self.crc32 101 | } 102 | 103 | /// Returns the entry's uncompressed size. 104 | pub fn uncompressed_size(&self) -> u64 { 105 | self.uncompressed_size 106 | } 107 | 108 | /// Returns the entry's compressed size. 109 | pub fn compressed_size(&self) -> u64 { 110 | self.compressed_size 111 | } 112 | 113 | /// Returns the entry's attribute's host compatibility. 114 | pub fn attribute_compatibility(&self) -> AttributeCompatibility { 115 | self.attribute_compatibility 116 | } 117 | 118 | /// Returns the entry's last modification time & date. 119 | pub fn last_modification_date(&self) -> &ZipDateTime { 120 | &self.last_modification_date 121 | } 122 | 123 | /// Returns the entry's internal file attribute. 124 | pub fn internal_file_attribute(&self) -> u16 { 125 | self.internal_file_attribute 126 | } 127 | 128 | /// Returns the entry's external file attribute 129 | pub fn external_file_attribute(&self) -> u32 { 130 | self.external_file_attribute 131 | } 132 | 133 | /// Returns the entry's extra field data. 134 | pub fn extra_fields(&self) -> &[ExtraField] { 135 | &self.extra_fields 136 | } 137 | 138 | /// Returns the entry's file comment. 139 | pub fn comment(&self) -> &ZipString { 140 | &self.comment 141 | } 142 | 143 | /// Returns the entry's integer-based UNIX permissions. 144 | /// 145 | /// # Note 146 | /// This will return None if the attribute host compatibility is not listed as Unix. 147 | pub fn unix_permissions(&self) -> Option { 148 | if !matches!(self.attribute_compatibility, AttributeCompatibility::Unix) { 149 | return None; 150 | } 151 | 152 | Some(((self.external_file_attribute) >> 16) as u16) 153 | } 154 | 155 | /// Returns whether or not the entry represents a directory. 156 | pub fn dir(&self) -> Result { 157 | Ok(self.filename.as_str()?.ends_with('/')) 158 | } 159 | } 160 | 161 | /// An immutable store of data about how a ZIP entry is stored within a specific archive. 162 | /// 163 | /// Besides storing archive independent information like the size and timestamp it can also be used to query 164 | /// information about how the entry is stored in an archive. 165 | #[derive(Clone)] 166 | pub struct StoredZipEntry { 167 | pub(crate) entry: ZipEntry, 168 | // pub(crate) general_purpose_flag: GeneralPurposeFlag, 169 | pub(crate) file_offset: u64, 170 | pub(crate) header_size: u64, 171 | } 172 | 173 | impl StoredZipEntry { 174 | /// Returns the offset in bytes to where the header of the entry starts. 175 | pub fn header_offset(&self) -> u64 { 176 | self.file_offset 177 | } 178 | 179 | /// Returns the combined size in bytes of the header, the filename, and any extra fields. 180 | /// 181 | /// Note: This uses the extra field length stored in the central directory, which may differ from that stored in 182 | /// the local file header. See specification: 183 | pub fn header_size(&self) -> u64 { 184 | self.header_size 185 | } 186 | 187 | /// Seek to the offset in bytes where the data of the entry starts. 188 | pub(crate) async fn seek_to_data_offset(&self, mut reader: &mut R) -> Result<()> { 189 | // Seek to the header 190 | reader.seek(SeekFrom::Start(self.file_offset)).await?; 191 | 192 | // Check the signature 193 | let signature = { 194 | let mut buffer = [0; 4]; 195 | reader.read_exact(&mut buffer).await?; 196 | u32::from_le_bytes(buffer) 197 | }; 198 | 199 | match signature { 200 | LFH_SIGNATURE => (), 201 | actual => return Err(ZipError::UnexpectedHeaderError(actual, LFH_SIGNATURE)), 202 | }; 203 | 204 | // Skip the local file header and trailing data 205 | let header = LocalFileHeader::from_reader(&mut reader).await?; 206 | let trailing_size = (header.file_name_length as i64) + (header.extra_field_length as i64); 207 | reader.seek(SeekFrom::Current(trailing_size)).await?; 208 | 209 | Ok(()) 210 | } 211 | } 212 | 213 | impl Deref for StoredZipEntry { 214 | type Target = ZipEntry; 215 | 216 | fn deref(&self) -> &Self::Target { 217 | &self.entry 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A module which holds relevant error reporting structures/types. 5 | 6 | use std::fmt::{Display, Formatter}; 7 | use thiserror::Error; 8 | 9 | /// A Result type alias over ZipError to minimise repetition. 10 | pub type Result = std::result::Result; 11 | 12 | #[derive(Debug, PartialEq, Eq)] 13 | pub enum Zip64ErrorCase { 14 | TooManyFiles, 15 | LargeFile, 16 | } 17 | 18 | impl Display for Zip64ErrorCase { 19 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 20 | match self { 21 | Self::TooManyFiles => write!(f, "More than 65536 files in archive"), 22 | Self::LargeFile => write!(f, "File is larger than 4 GiB"), 23 | } 24 | } 25 | } 26 | 27 | /// An enum of possible errors and their descriptions. 28 | #[non_exhaustive] 29 | #[derive(Debug, Error)] 30 | pub enum ZipError { 31 | #[error("feature not supported: '{0}'")] 32 | FeatureNotSupported(&'static str), 33 | #[error("compression not supported: {0}")] 34 | CompressionNotSupported(u16), 35 | #[error("host attribute compatibility not supported: {0}")] 36 | AttributeCompatibilityNotSupported(u16), 37 | #[error("attempted to read a ZIP64 file whilst on a 32-bit target")] 38 | TargetZip64NotSupported, 39 | #[error("attempted to write a ZIP file with force_no_zip64 when ZIP64 is needed: {0}")] 40 | Zip64Needed(Zip64ErrorCase), 41 | #[error("end of file has not been reached")] 42 | EOFNotReached, 43 | #[error("extra fields exceeded maximum size")] 44 | ExtraFieldTooLarge, 45 | #[error("comment exceeded maximum size")] 46 | CommentTooLarge, 47 | #[error("filename exceeded maximum size")] 48 | FileNameTooLarge, 49 | #[error("attempted to convert non-UTF8 bytes to a string/str")] 50 | StringNotUtf8, 51 | 52 | #[error("unable to locate the end of central directory record")] 53 | UnableToLocateEOCDR, 54 | #[error("extra field size was indicated to be {0} but only {1} bytes remain")] 55 | InvalidExtraFieldHeader(u16, usize), 56 | #[error("zip64 extended information field was incomplete")] 57 | Zip64ExtendedFieldIncomplete, 58 | 59 | #[error("an upstream reader returned an error: {0}")] 60 | UpstreamReadError(#[from] std::io::Error), 61 | #[error("a computed CRC32 value did not match the expected value")] 62 | CRC32CheckError, 63 | #[error("entry index was out of bounds")] 64 | EntryIndexOutOfBounds, 65 | #[error("Encountered an unexpected header (actual: {0:#x}, expected: {1:#x}).")] 66 | UnexpectedHeaderError(u32, u32), 67 | 68 | #[error("Info-ZIP Unicode Comment Extra Field was incomplete")] 69 | InfoZipUnicodeCommentFieldIncomplete, 70 | #[error("Info-ZIP Unicode Path Extra Field was incomplete")] 71 | InfoZipUnicodePathFieldIncomplete, 72 | } 73 | -------------------------------------------------------------------------------- /src/file/builder.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::{file::ZipFile, string::ZipString}; 5 | 6 | /// A builder for [`ZipFile`]. 7 | pub struct ZipFileBuilder(pub(crate) ZipFile); 8 | 9 | impl From for ZipFileBuilder { 10 | fn from(file: ZipFile) -> Self { 11 | Self(file) 12 | } 13 | } 14 | 15 | impl Default for ZipFileBuilder { 16 | fn default() -> Self { 17 | ZipFileBuilder(ZipFile { entries: Vec::new(), zip64: false, comment: String::new().into() }) 18 | } 19 | } 20 | 21 | impl ZipFileBuilder { 22 | pub fn new() -> Self { 23 | Self::default() 24 | } 25 | 26 | /// Sets the file's comment. 27 | pub fn comment(mut self, comment: ZipString) -> Self { 28 | self.0.comment = comment; 29 | self 30 | } 31 | 32 | /// Consumes this builder and returns a final [`ZipFile`]. 33 | /// 34 | /// This is equivalent to: 35 | /// ``` 36 | /// # use async_zip::{ZipFile, ZipFileBuilder}; 37 | /// # 38 | /// # let builder = ZipFileBuilder::new(); 39 | /// let file: ZipFile = builder.into(); 40 | /// ``` 41 | pub fn build(self) -> ZipFile { 42 | self.into() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/file/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod builder; 5 | 6 | use crate::{entry::StoredZipEntry, string::ZipString}; 7 | use builder::ZipFileBuilder; 8 | 9 | /// An immutable store of data about a ZIP file. 10 | #[derive(Clone)] 11 | pub struct ZipFile { 12 | pub(crate) entries: Vec, 13 | pub(crate) zip64: bool, 14 | pub(crate) comment: ZipString, 15 | } 16 | 17 | impl From for ZipFile { 18 | fn from(builder: ZipFileBuilder) -> Self { 19 | builder.0 20 | } 21 | } 22 | 23 | impl ZipFile { 24 | /// Returns a list of this ZIP file's entries. 25 | pub fn entries(&self) -> &[StoredZipEntry] { 26 | &self.entries 27 | } 28 | 29 | /// Returns this ZIP file's trailing comment. 30 | pub fn comment(&self) -> &ZipString { 31 | &self.comment 32 | } 33 | 34 | /// Returns whether or not this ZIP file is zip64 35 | pub fn zip64(&self) -> bool { 36 | self.zip64 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021-2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | // Document all features on docs.rs 5 | #![cfg_attr(docsrs, feature(doc_cfg))] 6 | 7 | //! An asynchronous ZIP archive reading/writing crate. 8 | //! 9 | //! ## Features 10 | //! - A base implementation atop `futures`'s IO traits. 11 | //! - An extended implementation atop `tokio`'s IO traits. 12 | //! - Support for Stored, Deflate, bzip2, LZMA, zstd, and xz compression methods. 13 | //! - Various different reading approaches (seek, stream, filesystem, in-memory buffer). 14 | //! - Support for writing complete data (u8 slices) or stream writing using data descriptors. 15 | //! - Initial support for ZIP64 reading and writing. 16 | //! - Aims for reasonable [specification](https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md) compliance. 17 | //! 18 | //! ## Installation 19 | //! 20 | //! ```toml 21 | //! [dependencies] 22 | //! async_zip = { version = "0.0.17", features = ["full"] } 23 | //! ``` 24 | //! 25 | //! ### Feature Flags 26 | //! - `full` - Enables all below features. 27 | //! - `full-wasm` - Enables all below features that are compatible with WASM. 28 | //! - `chrono` - Enables support for parsing dates via `chrono`. 29 | //! - `tokio` - Enables support for the `tokio` implementation module. 30 | //! - `tokio-fs` - Enables support for the `tokio::fs` reading module. 31 | //! - `deflate` - Enables support for the Deflate compression method. 32 | //! - `bzip2` - Enables support for the bzip2 compression method. 33 | //! - `lzma` - Enables support for the LZMA compression method. 34 | //! - `zstd` - Enables support for the zstd compression method. 35 | //! - `xz` - Enables support for the xz compression method. 36 | //! 37 | //! [Read more.](https://github.com/Majored/rs-async-zip) 38 | 39 | pub mod base; 40 | pub mod error; 41 | 42 | #[cfg(feature = "tokio")] 43 | pub mod tokio; 44 | 45 | pub(crate) mod date; 46 | pub(crate) mod entry; 47 | pub(crate) mod file; 48 | pub(crate) mod spec; 49 | pub(crate) mod string; 50 | pub(crate) mod utils; 51 | 52 | #[cfg(test)] 53 | pub(crate) mod tests; 54 | 55 | pub use crate::spec::attribute::AttributeCompatibility; 56 | pub use crate::spec::compression::{Compression, DeflateOption}; 57 | 58 | pub use crate::date::{builder::ZipDateTimeBuilder, ZipDateTime}; 59 | pub use crate::entry::{builder::ZipEntryBuilder, StoredZipEntry, ZipEntry}; 60 | pub use crate::file::{builder::ZipFileBuilder, ZipFile}; 61 | 62 | pub use crate::string::{StringEncoding, ZipString}; 63 | -------------------------------------------------------------------------------- /src/spec/attribute.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::error::{Result, ZipError}; 5 | 6 | /// An attribute host compatibility supported by this crate. 7 | #[non_exhaustive] 8 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 9 | pub enum AttributeCompatibility { 10 | Unix, 11 | } 12 | 13 | impl TryFrom for AttributeCompatibility { 14 | type Error = ZipError; 15 | 16 | // Convert a u16 stored with little endianness into a supported attribute host compatibility. 17 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4422 18 | fn try_from(value: u16) -> Result { 19 | match value { 20 | 3 => Ok(AttributeCompatibility::Unix), 21 | _ => Err(ZipError::AttributeCompatibilityNotSupported(value)), 22 | } 23 | } 24 | } 25 | 26 | impl From<&AttributeCompatibility> for u16 { 27 | // Convert a supported attribute host compatibility into its relevant u16 stored with little endianness. 28 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4422 29 | fn from(compatibility: &AttributeCompatibility) -> Self { 30 | match compatibility { 31 | AttributeCompatibility::Unix => 3, 32 | } 33 | } 34 | } 35 | 36 | impl From for u16 { 37 | // Convert a supported attribute host compatibility into its relevant u16 stored with little endianness. 38 | fn from(compatibility: AttributeCompatibility) -> Self { 39 | (&compatibility).into() 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/spec/compression.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::error::{Result, ZipError}; 5 | 6 | #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] 7 | use async_compression::Level; 8 | 9 | /// A compression method supported by this crate. 10 | #[non_exhaustive] 11 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 12 | pub enum Compression { 13 | Stored, 14 | #[cfg(feature = "deflate")] 15 | Deflate, 16 | #[cfg(feature = "deflate64")] 17 | Deflate64, 18 | #[cfg(feature = "bzip2")] 19 | Bz, 20 | #[cfg(feature = "lzma")] 21 | Lzma, 22 | #[cfg(feature = "zstd")] 23 | Zstd, 24 | #[cfg(feature = "xz")] 25 | Xz, 26 | } 27 | 28 | impl TryFrom for Compression { 29 | type Error = ZipError; 30 | 31 | // Convert a u16 stored with little endianness into a supported compression method. 32 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#445 33 | fn try_from(value: u16) -> Result { 34 | match value { 35 | 0 => Ok(Compression::Stored), 36 | #[cfg(feature = "deflate")] 37 | 8 => Ok(Compression::Deflate), 38 | #[cfg(feature = "deflate64")] 39 | 9 => Ok(Compression::Deflate64), 40 | #[cfg(feature = "bzip2")] 41 | 12 => Ok(Compression::Bz), 42 | #[cfg(feature = "lzma")] 43 | 14 => Ok(Compression::Lzma), 44 | #[cfg(feature = "zstd")] 45 | 93 => Ok(Compression::Zstd), 46 | #[cfg(feature = "xz")] 47 | 95 => Ok(Compression::Xz), 48 | _ => Err(ZipError::CompressionNotSupported(value)), 49 | } 50 | } 51 | } 52 | 53 | impl From<&Compression> for u16 { 54 | // Convert a supported compression method into its relevant u16 stored with little endianness. 55 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#445 56 | fn from(compression: &Compression) -> u16 { 57 | match compression { 58 | Compression::Stored => 0, 59 | #[cfg(feature = "deflate")] 60 | Compression::Deflate => 8, 61 | #[cfg(feature = "deflate64")] 62 | Compression::Deflate64 => 9, 63 | #[cfg(feature = "bzip2")] 64 | Compression::Bz => 12, 65 | #[cfg(feature = "lzma")] 66 | Compression::Lzma => 14, 67 | #[cfg(feature = "zstd")] 68 | Compression::Zstd => 93, 69 | #[cfg(feature = "xz")] 70 | Compression::Xz => 95, 71 | } 72 | } 73 | } 74 | 75 | impl From for u16 { 76 | fn from(compression: Compression) -> u16 { 77 | (&compression).into() 78 | } 79 | } 80 | 81 | /// Level of compression data should be compressed with for deflate. 82 | #[derive(Debug, Clone, Copy)] 83 | pub enum DeflateOption { 84 | // Normal (-en) compression option was used. 85 | Normal, 86 | 87 | // Maximum (-exx/-ex) compression option was used. 88 | Maximum, 89 | 90 | // Fast (-ef) compression option was used. 91 | Fast, 92 | 93 | // Super Fast (-es) compression option was used. 94 | Super, 95 | 96 | /// Other implementation defined level. 97 | Other(i32), 98 | } 99 | 100 | #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] 101 | impl DeflateOption { 102 | pub(crate) fn into_level(self) -> Level { 103 | // FIXME: There's no clear documentation on what these specific levels defined in the ZIP specification relate 104 | // to. We want to be compatible with any other library, and not specific to `async_compression`'s levels. 105 | if let Self::Other(l) = self { 106 | Level::Precise(l) 107 | } else { 108 | Level::Default 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/spec/consts.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub const SIGNATURE_LENGTH: usize = 4; 5 | 6 | // Local file header constants 7 | // 8 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#437 9 | pub const LFH_SIGNATURE: u32 = 0x4034b50; 10 | #[allow(dead_code)] 11 | pub const LFH_LENGTH: usize = 26; 12 | 13 | // Central directory header constants 14 | // 15 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4312 16 | pub const CDH_SIGNATURE: u32 = 0x2014b50; 17 | #[allow(dead_code)] 18 | pub const CDH_LENGTH: usize = 42; 19 | 20 | // End of central directory record constants 21 | // 22 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316 23 | pub const EOCDR_SIGNATURE: u32 = 0x6054b50; 24 | /// The minimum length of the EOCDR, excluding the signature. 25 | pub const EOCDR_LENGTH: usize = 18; 26 | 27 | /// The signature for the zip64 end of central directory record. 28 | /// Ref: https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4314 29 | pub const ZIP64_EOCDR_SIGNATURE: u32 = 0x06064b50; 30 | /// The signature for the zip64 end of central directory locator. 31 | /// Ref: https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4315 32 | pub const ZIP64_EOCDL_SIGNATURE: u32 = 0x07064b50; 33 | /// The length of the ZIP64 EOCDL, including the signature. 34 | /// The EOCDL has a fixed size, thankfully. 35 | pub const ZIP64_EOCDL_LENGTH: u64 = 20; 36 | 37 | /// The contents of a header field when one must reference the zip64 version instead. 38 | pub const NON_ZIP64_MAX_SIZE: u32 = 0xFFFFFFFF; 39 | /// The maximum number of files or disks in a ZIP file before it requires ZIP64. 40 | pub const NON_ZIP64_MAX_NUM_FILES: u16 = 0xFFFF; 41 | 42 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#439 43 | pub const DATA_DESCRIPTOR_SIGNATURE: u32 = 0x8074b50; 44 | pub const DATA_DESCRIPTOR_LENGTH: usize = 12; 45 | -------------------------------------------------------------------------------- /src/spec/extra_field.rs: -------------------------------------------------------------------------------- 1 | // Copyright Cognite AS, 2023 2 | 3 | use crate::error::{Result as ZipResult, ZipError}; 4 | use crate::spec::header::{ 5 | ExtraField, HeaderId, InfoZipUnicodeCommentExtraField, InfoZipUnicodePathExtraField, UnknownExtraField, 6 | Zip64ExtendedInformationExtraField, 7 | }; 8 | 9 | use super::consts::NON_ZIP64_MAX_SIZE; 10 | 11 | pub(crate) trait ExtraFieldAsBytes { 12 | fn as_bytes(&self) -> Vec; 13 | 14 | fn count_bytes(&self) -> usize; 15 | } 16 | 17 | impl ExtraFieldAsBytes for &[ExtraField] { 18 | fn as_bytes(&self) -> Vec { 19 | let mut buffer = Vec::new(); 20 | for field in self.iter() { 21 | buffer.append(&mut field.as_bytes()); 22 | } 23 | buffer 24 | } 25 | 26 | fn count_bytes(&self) -> usize { 27 | self.iter().map(|field| field.count_bytes()).sum() 28 | } 29 | } 30 | 31 | impl ExtraFieldAsBytes for ExtraField { 32 | fn as_bytes(&self) -> Vec { 33 | match self { 34 | ExtraField::Zip64ExtendedInformation(field) => field.as_bytes(), 35 | ExtraField::InfoZipUnicodeComment(field) => field.as_bytes(), 36 | ExtraField::InfoZipUnicodePath(field) => field.as_bytes(), 37 | ExtraField::Unknown(field) => field.as_bytes(), 38 | } 39 | } 40 | 41 | fn count_bytes(&self) -> usize { 42 | match self { 43 | ExtraField::Zip64ExtendedInformation(field) => field.count_bytes(), 44 | ExtraField::InfoZipUnicodeComment(field) => field.count_bytes(), 45 | ExtraField::InfoZipUnicodePath(field) => field.count_bytes(), 46 | ExtraField::Unknown(field) => field.count_bytes(), 47 | } 48 | } 49 | } 50 | 51 | impl ExtraFieldAsBytes for UnknownExtraField { 52 | fn as_bytes(&self) -> Vec { 53 | let mut bytes = Vec::new(); 54 | let header_id: u16 = self.header_id.into(); 55 | bytes.append(&mut header_id.to_le_bytes().to_vec()); 56 | bytes.append(&mut self.data_size.to_le_bytes().to_vec()); 57 | bytes.append(&mut self.content.clone()); 58 | 59 | bytes 60 | } 61 | 62 | fn count_bytes(&self) -> usize { 63 | 4 + self.content.len() 64 | } 65 | } 66 | 67 | impl ExtraFieldAsBytes for Zip64ExtendedInformationExtraField { 68 | fn as_bytes(&self) -> Vec { 69 | let mut bytes = Vec::new(); 70 | let header_id: u16 = self.header_id.into(); 71 | bytes.append(&mut header_id.to_le_bytes().to_vec()); 72 | bytes.append(&mut (self.content_size() as u16).to_le_bytes().to_vec()); 73 | if let Some(uncompressed_size) = &self.uncompressed_size { 74 | bytes.append(&mut uncompressed_size.to_le_bytes().to_vec()); 75 | } 76 | if let Some(compressed_size) = &self.compressed_size { 77 | bytes.append(&mut compressed_size.to_le_bytes().to_vec()); 78 | } 79 | if let Some(relative_header_offset) = &self.relative_header_offset { 80 | bytes.append(&mut relative_header_offset.to_le_bytes().to_vec()); 81 | } 82 | if let Some(disk_start_number) = &self.disk_start_number { 83 | bytes.append(&mut disk_start_number.to_le_bytes().to_vec()); 84 | } 85 | 86 | bytes 87 | } 88 | 89 | fn count_bytes(&self) -> usize { 90 | 4 + self.content_size() 91 | } 92 | } 93 | 94 | impl ExtraFieldAsBytes for InfoZipUnicodeCommentExtraField { 95 | fn as_bytes(&self) -> Vec { 96 | let mut bytes = Vec::new(); 97 | let header_id: u16 = HeaderId::INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD.into(); 98 | bytes.append(&mut header_id.to_le_bytes().to_vec()); 99 | match self { 100 | InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } => { 101 | let data_size: u16 = (5 + unicode.len()).try_into().unwrap(); 102 | bytes.append(&mut data_size.to_le_bytes().to_vec()); 103 | bytes.push(1); 104 | bytes.append(&mut crc32.to_le_bytes().to_vec()); 105 | bytes.append(&mut unicode.clone()); 106 | } 107 | InfoZipUnicodeCommentExtraField::Unknown { version, data } => { 108 | let data_size: u16 = (1 + data.len()).try_into().unwrap(); 109 | bytes.append(&mut data_size.to_le_bytes().to_vec()); 110 | bytes.push(*version); 111 | bytes.append(&mut data.clone()); 112 | } 113 | } 114 | bytes 115 | } 116 | 117 | fn count_bytes(&self) -> usize { 118 | match self { 119 | InfoZipUnicodeCommentExtraField::V1 { unicode, .. } => 9 + unicode.len(), 120 | InfoZipUnicodeCommentExtraField::Unknown { data, .. } => 5 + data.len(), 121 | } 122 | } 123 | } 124 | 125 | impl ExtraFieldAsBytes for InfoZipUnicodePathExtraField { 126 | fn as_bytes(&self) -> Vec { 127 | let mut bytes = Vec::new(); 128 | let header_id: u16 = HeaderId::INFO_ZIP_UNICODE_PATH_EXTRA_FIELD.into(); 129 | bytes.append(&mut header_id.to_le_bytes().to_vec()); 130 | match self { 131 | InfoZipUnicodePathExtraField::V1 { crc32, unicode } => { 132 | let data_size: u16 = (5 + unicode.len()).try_into().unwrap(); 133 | bytes.append(&mut data_size.to_le_bytes().to_vec()); 134 | bytes.push(1); 135 | bytes.append(&mut crc32.to_le_bytes().to_vec()); 136 | bytes.append(&mut unicode.clone()); 137 | } 138 | InfoZipUnicodePathExtraField::Unknown { version, data } => { 139 | let data_size: u16 = (1 + data.len()).try_into().unwrap(); 140 | bytes.append(&mut data_size.to_le_bytes().to_vec()); 141 | bytes.push(*version); 142 | bytes.append(&mut data.clone()); 143 | } 144 | } 145 | bytes 146 | } 147 | 148 | fn count_bytes(&self) -> usize { 149 | match self { 150 | InfoZipUnicodePathExtraField::V1 { unicode, .. } => 9 + unicode.len(), 151 | InfoZipUnicodePathExtraField::Unknown { data, .. } => 5 + data.len(), 152 | } 153 | } 154 | } 155 | 156 | /// Parse a zip64 extra field from bytes. 157 | /// The content of "data" should exclude the header. 158 | fn zip64_extended_information_field_from_bytes( 159 | header_id: HeaderId, 160 | data: &[u8], 161 | uncompressed_size: u32, 162 | compressed_size: u32, 163 | ) -> ZipResult { 164 | // slice.take is nightly-only so we'll just use an index to track the current position 165 | let mut current_idx = 0; 166 | let uncompressed_size = if uncompressed_size == NON_ZIP64_MAX_SIZE && data.len() >= current_idx + 8 { 167 | let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap())); 168 | current_idx += 8; 169 | val 170 | } else { 171 | None 172 | }; 173 | 174 | let compressed_size = if compressed_size == NON_ZIP64_MAX_SIZE && data.len() >= current_idx + 8 { 175 | let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap())); 176 | current_idx += 8; 177 | val 178 | } else { 179 | None 180 | }; 181 | 182 | let relative_header_offset = if data.len() >= current_idx + 8 { 183 | let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap())); 184 | current_idx += 8; 185 | val 186 | } else { 187 | None 188 | }; 189 | 190 | #[allow(unused_assignments)] 191 | let disk_start_number = if data.len() >= current_idx + 4 { 192 | let val = Some(u32::from_le_bytes(data[current_idx..current_idx + 4].try_into().unwrap())); 193 | current_idx += 4; 194 | val 195 | } else { 196 | None 197 | }; 198 | 199 | Ok(Zip64ExtendedInformationExtraField { 200 | header_id, 201 | uncompressed_size, 202 | compressed_size, 203 | relative_header_offset, 204 | disk_start_number, 205 | }) 206 | } 207 | 208 | fn info_zip_unicode_comment_extra_field_from_bytes( 209 | _header_id: HeaderId, 210 | data_size: u16, 211 | data: &[u8], 212 | ) -> ZipResult { 213 | if data.is_empty() { 214 | return Err(ZipError::InfoZipUnicodeCommentFieldIncomplete); 215 | } 216 | let version = data[0]; 217 | match version { 218 | 1 => { 219 | if data.len() < 5 { 220 | return Err(ZipError::InfoZipUnicodeCommentFieldIncomplete); 221 | } 222 | let crc32 = u32::from_le_bytes(data[1..5].try_into().unwrap()); 223 | let unicode = data[5..(data_size as usize)].to_vec(); 224 | Ok(InfoZipUnicodeCommentExtraField::V1 { crc32, unicode }) 225 | } 226 | _ => Ok(InfoZipUnicodeCommentExtraField::Unknown { version, data: data[1..(data_size as usize)].to_vec() }), 227 | } 228 | } 229 | 230 | fn info_zip_unicode_path_extra_field_from_bytes( 231 | _header_id: HeaderId, 232 | data_size: u16, 233 | data: &[u8], 234 | ) -> ZipResult { 235 | if data.is_empty() { 236 | return Err(ZipError::InfoZipUnicodePathFieldIncomplete); 237 | } 238 | let version = data[0]; 239 | match version { 240 | 1 => { 241 | if data.len() < 5 { 242 | return Err(ZipError::InfoZipUnicodePathFieldIncomplete); 243 | } 244 | let crc32 = u32::from_le_bytes(data[1..5].try_into().unwrap()); 245 | let unicode = data[5..(data_size as usize)].to_vec(); 246 | Ok(InfoZipUnicodePathExtraField::V1 { crc32, unicode }) 247 | } 248 | _ => Ok(InfoZipUnicodePathExtraField::Unknown { version, data: data[1..(data_size as usize)].to_vec() }), 249 | } 250 | } 251 | 252 | pub(crate) fn extra_field_from_bytes( 253 | header_id: HeaderId, 254 | data_size: u16, 255 | data: &[u8], 256 | uncompressed_size: u32, 257 | compressed_size: u32, 258 | ) -> ZipResult { 259 | match header_id { 260 | HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD => Ok(ExtraField::Zip64ExtendedInformation( 261 | zip64_extended_information_field_from_bytes(header_id, data, uncompressed_size, compressed_size)?, 262 | )), 263 | HeaderId::INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD => Ok(ExtraField::InfoZipUnicodeComment( 264 | info_zip_unicode_comment_extra_field_from_bytes(header_id, data_size, data)?, 265 | )), 266 | HeaderId::INFO_ZIP_UNICODE_PATH_EXTRA_FIELD => Ok(ExtraField::InfoZipUnicodePath( 267 | info_zip_unicode_path_extra_field_from_bytes(header_id, data_size, data)?, 268 | )), 269 | _ => Ok(ExtraField::Unknown(UnknownExtraField { header_id, data_size, content: data.to_vec() })), 270 | } 271 | } 272 | 273 | pub struct Zip64ExtendedInformationExtraFieldBuilder { 274 | field: Zip64ExtendedInformationExtraField, 275 | } 276 | 277 | impl Zip64ExtendedInformationExtraFieldBuilder { 278 | pub fn new() -> Self { 279 | Self { 280 | field: Zip64ExtendedInformationExtraField { 281 | header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD, 282 | uncompressed_size: None, 283 | compressed_size: None, 284 | relative_header_offset: None, 285 | disk_start_number: None, 286 | }, 287 | } 288 | } 289 | 290 | pub fn sizes(mut self, compressed_size: u64, uncompressed_size: u64) -> Self { 291 | self.field.compressed_size = Some(compressed_size); 292 | self.field.uncompressed_size = Some(uncompressed_size); 293 | self 294 | } 295 | 296 | pub fn relative_header_offset(mut self, relative_header_offset: u64) -> Self { 297 | self.field.relative_header_offset = Some(relative_header_offset); 298 | self 299 | } 300 | 301 | #[allow(dead_code)] 302 | pub fn disk_start_number(mut self, disk_start_number: u32) -> Self { 303 | self.field.disk_start_number = Some(disk_start_number); 304 | self 305 | } 306 | 307 | pub fn eof_only(&self) -> bool { 308 | (self.field.uncompressed_size.is_none() && self.field.compressed_size.is_none()) 309 | && (self.field.relative_header_offset.is_some() || self.field.disk_start_number.is_some()) 310 | } 311 | 312 | pub fn build(self) -> ZipResult { 313 | let field = self.field; 314 | 315 | if field.content_size() == 0 { 316 | return Err(ZipError::Zip64ExtendedFieldIncomplete); 317 | } 318 | Ok(field) 319 | } 320 | } 321 | -------------------------------------------------------------------------------- /src/spec/header.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#437 5 | pub struct LocalFileHeader { 6 | pub version: u16, 7 | pub flags: GeneralPurposeFlag, 8 | pub compression: u16, 9 | pub mod_time: u16, 10 | pub mod_date: u16, 11 | pub crc: u32, 12 | pub compressed_size: u32, 13 | pub uncompressed_size: u32, 14 | pub file_name_length: u16, 15 | pub extra_field_length: u16, 16 | } 17 | 18 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#444 19 | #[derive(Copy, Clone)] 20 | pub struct GeneralPurposeFlag { 21 | pub encrypted: bool, 22 | pub data_descriptor: bool, 23 | pub filename_unicode: bool, 24 | } 25 | 26 | /// 2 byte header ids 27 | /// Ref https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#452 28 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 29 | pub struct HeaderId(pub u16); 30 | 31 | impl HeaderId { 32 | pub const ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD: HeaderId = HeaderId(0x0001); 33 | pub const INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD: HeaderId = HeaderId(0x6375); 34 | pub const INFO_ZIP_UNICODE_PATH_EXTRA_FIELD: HeaderId = HeaderId(0x7075); 35 | } 36 | 37 | impl From for HeaderId { 38 | fn from(value: u16) -> Self { 39 | HeaderId(value) 40 | } 41 | } 42 | 43 | impl From for u16 { 44 | fn from(value: HeaderId) -> Self { 45 | value.0 46 | } 47 | } 48 | 49 | /// Represents each extra field. 50 | /// Not strictly part of the spec, but is the most useful way to represent the data. 51 | #[derive(Clone, Debug)] 52 | #[non_exhaustive] 53 | pub enum ExtraField { 54 | Zip64ExtendedInformation(Zip64ExtendedInformationExtraField), 55 | InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField), 56 | InfoZipUnicodePath(InfoZipUnicodePathExtraField), 57 | Unknown(UnknownExtraField), 58 | } 59 | 60 | /// An extended information header for Zip64. 61 | /// This field is used both for local file headers and central directory records. 62 | /// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#453 63 | #[derive(Clone, Debug)] 64 | pub struct Zip64ExtendedInformationExtraField { 65 | pub header_id: HeaderId, 66 | pub uncompressed_size: Option, 67 | pub compressed_size: Option, 68 | // While not specified in the spec, these two fields are often left out in practice. 69 | pub relative_header_offset: Option, 70 | pub disk_start_number: Option, 71 | } 72 | 73 | impl Zip64ExtendedInformationExtraField { 74 | pub(crate) fn content_size(&self) -> usize { 75 | self.uncompressed_size.map(|_| 8).unwrap_or_default() 76 | + self.compressed_size.map(|_| 8).unwrap_or_default() 77 | + self.relative_header_offset.map(|_| 8).unwrap_or_default() 78 | + self.disk_start_number.map(|_| 8).unwrap_or_default() 79 | } 80 | } 81 | 82 | /// Stores the UTF-8 version of the file comment as stored in the central directory header. 83 | /// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#468 84 | #[derive(Clone, Debug)] 85 | pub enum InfoZipUnicodeCommentExtraField { 86 | V1 { crc32: u32, unicode: Vec }, 87 | Unknown { version: u8, data: Vec }, 88 | } 89 | 90 | /// Stores the UTF-8 version of the file name field as stored in the local header and central directory header. 91 | /// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#469 92 | #[derive(Clone, Debug)] 93 | pub enum InfoZipUnicodePathExtraField { 94 | V1 { crc32: u32, unicode: Vec }, 95 | Unknown { version: u8, data: Vec }, 96 | } 97 | 98 | /// Represents any unparsed extra field. 99 | #[derive(Clone, Debug)] 100 | pub struct UnknownExtraField { 101 | pub header_id: HeaderId, 102 | pub data_size: u16, 103 | pub content: Vec, 104 | } 105 | 106 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4312 107 | pub struct CentralDirectoryRecord { 108 | pub v_made_by: u16, 109 | pub v_needed: u16, 110 | pub flags: GeneralPurposeFlag, 111 | pub compression: u16, 112 | pub mod_time: u16, 113 | pub mod_date: u16, 114 | pub crc: u32, 115 | pub compressed_size: u32, 116 | pub uncompressed_size: u32, 117 | pub file_name_length: u16, 118 | pub extra_field_length: u16, 119 | pub file_comment_length: u16, 120 | pub disk_start: u16, 121 | pub inter_attr: u16, 122 | pub exter_attr: u32, 123 | pub lh_offset: u32, 124 | } 125 | 126 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316 127 | #[derive(Debug)] 128 | pub struct EndOfCentralDirectoryHeader { 129 | pub(crate) disk_num: u16, 130 | pub(crate) start_cent_dir_disk: u16, 131 | pub(crate) num_of_entries_disk: u16, 132 | pub(crate) num_of_entries: u16, 133 | pub(crate) size_cent_dir: u32, 134 | pub(crate) cent_dir_offset: u32, 135 | pub(crate) file_comm_length: u16, 136 | } 137 | 138 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4314 139 | #[derive(Debug, PartialEq)] 140 | pub struct Zip64EndOfCentralDirectoryRecord { 141 | /// The size of this Zip64EndOfCentralDirectoryRecord. 142 | /// This is specified because there is a variable-length extra zip64 information sector. 143 | /// However, we will gleefully ignore this sector because it is reserved for use by PKWare. 144 | pub size_of_zip64_end_of_cd_record: u64, 145 | pub version_made_by: u16, 146 | pub version_needed_to_extract: u16, 147 | pub disk_number: u32, 148 | pub disk_number_start_of_cd: u32, 149 | pub num_entries_in_directory_on_disk: u64, 150 | pub num_entries_in_directory: u64, 151 | pub directory_size: u64, 152 | pub offset_of_start_of_directory: u64, 153 | } 154 | 155 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4315 156 | #[derive(Debug, PartialEq)] 157 | pub struct Zip64EndOfCentralDirectoryLocator { 158 | pub number_of_disk_with_start_of_zip64_end_of_central_directory: u32, 159 | pub relative_offset: u64, 160 | pub total_number_of_disks: u32, 161 | } 162 | -------------------------------------------------------------------------------- /src/spec/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod attribute; 5 | pub(crate) mod compression; 6 | pub(crate) mod consts; 7 | pub(crate) mod extra_field; 8 | pub(crate) mod header; 9 | pub(crate) mod parse; 10 | pub(crate) mod version; 11 | 12 | pub use compression::Compression; 13 | -------------------------------------------------------------------------------- /src/spec/version.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::entry::ZipEntry; 5 | #[cfg(any( 6 | feature = "deflate", 7 | feature = "bzip2", 8 | feature = "zstd", 9 | feature = "lzma", 10 | feature = "xz", 11 | feature = "deflate64" 12 | ))] 13 | use crate::spec::Compression; 14 | 15 | pub(crate) const SPEC_VERSION_MADE_BY: u16 = 63; 16 | 17 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#443 18 | pub fn as_needed_to_extract(entry: &ZipEntry) -> u16 { 19 | let mut version = match entry.compression() { 20 | #[cfg(feature = "deflate")] 21 | Compression::Deflate => 20, 22 | #[cfg(feature = "deflate64")] 23 | Compression::Deflate64 => 21, 24 | #[cfg(feature = "bzip2")] 25 | Compression::Bz => 46, 26 | #[cfg(feature = "lzma")] 27 | Compression::Lzma => 63, 28 | _ => 10, 29 | }; 30 | 31 | if let Ok(true) = entry.dir() { 32 | version = std::cmp::max(version, 20); 33 | } 34 | 35 | version 36 | } 37 | 38 | // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#442 39 | pub fn as_made_by() -> u16 { 40 | // Default to UNIX mapping for the moment. 41 | 3 << 8 | SPEC_VERSION_MADE_BY 42 | } 43 | -------------------------------------------------------------------------------- /src/string.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::error::{Result, ZipError}; 5 | 6 | /// A string encoding supported by this crate. 7 | #[derive(Debug, Clone, Copy)] 8 | pub enum StringEncoding { 9 | Utf8, 10 | Raw, 11 | } 12 | 13 | /// A string wrapper for handling different encodings. 14 | #[derive(Debug, Clone)] 15 | pub struct ZipString { 16 | encoding: StringEncoding, 17 | raw: Vec, 18 | alternative: Option>, 19 | } 20 | 21 | impl ZipString { 22 | /// Constructs a new encoded string from its raw bytes and its encoding type. 23 | /// 24 | /// # Note 25 | /// If the provided encoding is [`StringEncoding::Utf8`] but the raw bytes are not valid UTF-8 (ie. a call to 26 | /// `std::str::from_utf8()` fails), the encoding is defaulted back to [`StringEncoding::Raw`]. 27 | pub fn new(raw: Vec, mut encoding: StringEncoding) -> Self { 28 | if let StringEncoding::Utf8 = encoding { 29 | if std::str::from_utf8(&raw).is_err() { 30 | encoding = StringEncoding::Raw; 31 | } 32 | } 33 | 34 | Self { encoding, raw, alternative: None } 35 | } 36 | 37 | /// Constructs a new encoded string from utf-8 data, with an alternative in native MBCS encoding. 38 | pub fn new_with_alternative(utf8: String, alternative: Vec) -> Self { 39 | Self { encoding: StringEncoding::Utf8, raw: utf8.into_bytes(), alternative: Some(alternative) } 40 | } 41 | 42 | /// Returns the raw bytes for this string. 43 | pub fn as_bytes(&self) -> &[u8] { 44 | &self.raw 45 | } 46 | 47 | /// Returns the encoding type for this string. 48 | pub fn encoding(&self) -> StringEncoding { 49 | self.encoding 50 | } 51 | 52 | /// Returns the alternative bytes (in native MBCS encoding) for this string. 53 | pub fn alternative(&self) -> Option<&[u8]> { 54 | self.alternative.as_deref() 55 | } 56 | 57 | /// Returns the raw bytes converted into a string slice. 58 | /// 59 | /// # Note 60 | /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`]. 61 | pub fn as_str(&self) -> Result<&str> { 62 | if !matches!(self.encoding, StringEncoding::Utf8) { 63 | return Err(ZipError::StringNotUtf8); 64 | } 65 | 66 | // SAFETY: 67 | // "The bytes passed in must be valid UTF-8.' 68 | // 69 | // This function will error if self.encoding is not StringEncoding::Utf8. 70 | // 71 | // self.encoding is only ever StringEncoding::Utf8 if this variant was provided to the constructor AND the 72 | // call to `std::str::from_utf8()` within the constructor succeeded. Mutable access to the inner vector is 73 | // never given and no method implemented on this type mutates the inner vector. 74 | 75 | Ok(unsafe { std::str::from_utf8_unchecked(&self.raw) }) 76 | } 77 | 78 | /// Returns the raw bytes converted to an owned string. 79 | /// 80 | /// # Note 81 | /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`]. 82 | pub fn into_string(self) -> Result { 83 | if !matches!(self.encoding, StringEncoding::Utf8) { 84 | return Err(ZipError::StringNotUtf8); 85 | } 86 | 87 | // SAFETY: See above. 88 | Ok(unsafe { String::from_utf8_unchecked(self.raw) }) 89 | } 90 | 91 | /// Returns the alternative bytes (in native MBCS encoding) converted to the owned. 92 | pub fn into_alternative(self) -> Option> { 93 | self.alternative 94 | } 95 | 96 | /// Returns whether this string is encoded as utf-8 without an alternative. 97 | pub fn is_utf8_without_alternative(&self) -> bool { 98 | matches!(self.encoding, StringEncoding::Utf8) && self.alternative.is_none() 99 | } 100 | } 101 | 102 | impl From for ZipString { 103 | fn from(value: String) -> Self { 104 | Self { encoding: StringEncoding::Utf8, raw: value.into_bytes(), alternative: None } 105 | } 106 | } 107 | 108 | impl From<&str> for ZipString { 109 | fn from(value: &str) -> Self { 110 | Self { encoding: StringEncoding::Utf8, raw: value.as_bytes().to_vec(), alternative: None } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/tests/combined/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | -------------------------------------------------------------------------------- /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod combined; 5 | pub(crate) mod read; 6 | pub(crate) mod spec; 7 | pub(crate) mod write; 8 | 9 | use std::sync::Once; 10 | static ENV_LOGGER: Once = Once::new(); 11 | 12 | /// Initialize the env logger for any tests that require it. 13 | /// Safe to call multiple times. 14 | fn init_logger() { 15 | ENV_LOGGER.call_once(|| env_logger::Builder::from_default_env().format_module_path(true).init()); 16 | } 17 | -------------------------------------------------------------------------------- /src/tests/read/compression/bzip2.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/compression/bzip2.data -------------------------------------------------------------------------------- /src/tests/read/compression/deflate.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/compression/deflate.data -------------------------------------------------------------------------------- /src/tests/read/compression/lzma.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/compression/lzma.data -------------------------------------------------------------------------------- /src/tests/read/compression/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::read::io::compressed::CompressedReader; 5 | use crate::spec::Compression; 6 | 7 | compressed_test_helper!(stored_test, Compression::Stored, "foo bar", "foo bar"); 8 | 9 | #[cfg(feature = "deflate")] 10 | compressed_test_helper!(deflate_test, Compression::Deflate, "foo bar", include_bytes!("deflate.data")); 11 | 12 | #[cfg(feature = "bzip2")] 13 | compressed_test_helper!(bz_test, Compression::Bz, "foo bar", include_bytes!("bzip2.data")); 14 | 15 | #[cfg(feature = "lzma")] 16 | compressed_test_helper!(lzma_test, Compression::Lzma, "foo bar", include_bytes!("lzma.data")); 17 | 18 | #[cfg(feature = "zstd")] 19 | compressed_test_helper!(zstd_test, Compression::Zstd, "foo bar", include_bytes!("zstd.data")); 20 | 21 | #[cfg(feature = "xz")] 22 | compressed_test_helper!(xz_test, Compression::Xz, "foo bar", include_bytes!("xz.data")); 23 | 24 | /// A helper macro for generating a CompressedReader test using a specific compression method. 25 | macro_rules! compressed_test_helper { 26 | ($name:ident, $typ:expr, $data_raw:expr, $data:expr) => { 27 | #[cfg(test)] 28 | #[tokio::test] 29 | async fn $name() { 30 | use futures_lite::io::{AsyncReadExt, Cursor}; 31 | 32 | let data = $data; 33 | let data_raw = $data_raw; 34 | 35 | let cursor = Cursor::new(data); 36 | let mut reader = CompressedReader::new(cursor, $typ); 37 | 38 | let mut read_data = String::new(); 39 | reader.read_to_string(&mut read_data).await.expect("read into CompressedReader failed"); 40 | 41 | assert_eq!(read_data, data_raw); 42 | } 43 | }; 44 | } 45 | 46 | use compressed_test_helper; 47 | -------------------------------------------------------------------------------- /src/tests/read/compression/xz.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/compression/xz.data -------------------------------------------------------------------------------- /src/tests/read/compression/zstd.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/compression/zstd.data -------------------------------------------------------------------------------- /src/tests/read/locator/empty-buffer-boundary.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/locator/empty-buffer-boundary.zip -------------------------------------------------------------------------------- /src/tests/read/locator/empty-with-max-comment.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/locator/empty-with-max-comment.zip -------------------------------------------------------------------------------- /src/tests/read/locator/empty.zip: -------------------------------------------------------------------------------- 1 | PK -------------------------------------------------------------------------------- /src/tests/read/locator/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | #[test] 5 | fn search_one_byte_test() { 6 | let buffer: &[u8] = &[0x0, 0x0, 0x0, 0x0, 0x0, 0x0]; 7 | let signature: &[u8] = &[0x1]; 8 | 9 | let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature); 10 | assert!(matched.is_none()); 11 | 12 | let buffer: &[u8] = &[0x2, 0x1, 0x0, 0x0, 0x0, 0x0]; 13 | let signature: &[u8] = &[0x1]; 14 | 15 | let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature); 16 | assert!(matched.is_some()); 17 | assert_eq!(1, matched.unwrap()); 18 | } 19 | 20 | #[test] 21 | fn search_two_byte_test() { 22 | let buffer: &[u8] = &[0x2, 0x1, 0x0, 0x0, 0x0, 0x0]; 23 | let signature: &[u8] = &[0x2, 0x1]; 24 | 25 | let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature); 26 | assert!(matched.is_some()); 27 | assert_eq!(1, matched.unwrap()); 28 | } 29 | 30 | #[tokio::test] 31 | async fn locator_empty_test() { 32 | use futures_lite::io::Cursor; 33 | 34 | let data = &include_bytes!("empty.zip"); 35 | let mut cursor = Cursor::new(data); 36 | let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await; 37 | 38 | assert!(eocdr.is_ok()); 39 | assert_eq!(eocdr.unwrap(), 4); 40 | } 41 | 42 | #[tokio::test] 43 | async fn locator_empty_max_comment_test() { 44 | use futures_lite::io::Cursor; 45 | 46 | let data = &include_bytes!("empty-with-max-comment.zip"); 47 | let mut cursor = Cursor::new(data); 48 | let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await; 49 | 50 | assert!(eocdr.is_ok()); 51 | assert_eq!(eocdr.unwrap(), 4); 52 | } 53 | 54 | #[tokio::test] 55 | async fn locator_buffer_boundary_test() { 56 | use futures_lite::io::Cursor; 57 | 58 | let data = &include_bytes!("empty-buffer-boundary.zip"); 59 | let mut cursor = Cursor::new(data); 60 | let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await; 61 | 62 | assert!(eocdr.is_ok()); 63 | assert_eq!(eocdr.unwrap(), 4); 64 | } 65 | -------------------------------------------------------------------------------- /src/tests/read/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod compression; 5 | pub(crate) mod locator; 6 | pub(crate) mod zip64; 7 | -------------------------------------------------------------------------------- /src/tests/read/zip64/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // Copyright (c) 2023 Cognite AS 3 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 4 | 5 | use futures_lite::io::AsyncReadExt; 6 | 7 | use crate::tests::init_logger; 8 | 9 | const ZIP64_ZIP_CONTENTS: &str = "Hello World!\n"; 10 | 11 | /// Tests opening and reading a zip64 archive. 12 | /// It contains one file named "-" with a zip 64 extended field header. 13 | #[tokio::test] 14 | async fn test_read_zip64_archive_mem() { 15 | use crate::base::read::mem::ZipFileReader; 16 | init_logger(); 17 | 18 | let data = include_bytes!("zip64.zip").to_vec(); 19 | 20 | let reader = ZipFileReader::new(data).await.unwrap(); 21 | let mut entry_reader = reader.reader_without_entry(0).await.unwrap(); 22 | 23 | let mut read_data = String::new(); 24 | entry_reader.read_to_string(&mut read_data).await.expect("read failed"); 25 | 26 | assert_eq!( 27 | read_data.chars().count(), 28 | ZIP64_ZIP_CONTENTS.chars().count(), 29 | "{read_data:?} != {ZIP64_ZIP_CONTENTS:?}" 30 | ); 31 | assert_eq!(read_data, ZIP64_ZIP_CONTENTS); 32 | } 33 | 34 | /// Like test_read_zip64_archive_mem() but for the streaming version 35 | #[tokio::test] 36 | async fn test_read_zip64_archive_stream() { 37 | use crate::base::read::stream::ZipFileReader; 38 | init_logger(); 39 | 40 | let data = include_bytes!("zip64.zip").to_vec(); 41 | 42 | let reader = ZipFileReader::new(data.as_slice()); 43 | let mut entry_reader = reader.next_without_entry().await.unwrap().unwrap(); 44 | 45 | let mut read_data = String::new(); 46 | entry_reader.reader_mut().read_to_string(&mut read_data).await.expect("read failed"); 47 | 48 | assert_eq!( 49 | read_data.chars().count(), 50 | ZIP64_ZIP_CONTENTS.chars().count(), 51 | "{read_data:?} != {ZIP64_ZIP_CONTENTS:?}" 52 | ); 53 | assert_eq!(read_data, ZIP64_ZIP_CONTENTS); 54 | } 55 | 56 | /// Generate an example file only if it doesn't exist already. 57 | /// The file is placed adjacent to this rs file. 58 | #[cfg(feature = "tokio")] 59 | fn generate_zip64many_zip() -> std::path::PathBuf { 60 | use std::io::Write; 61 | use zip::write::{ExtendedFileOptions, FileOptions}; 62 | 63 | let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); 64 | path.push("src/tests/read/zip64/zip64many.zip"); 65 | 66 | // Only recreate the zip if it doesnt already exist. 67 | if path.exists() { 68 | return path; 69 | } 70 | 71 | let zip_file = std::fs::File::create(&path).unwrap(); 72 | let mut zip = zip::ZipWriter::new(zip_file); 73 | let options: FileOptions<'_, ExtendedFileOptions> = 74 | FileOptions::default().compression_method(zip::CompressionMethod::Stored); 75 | 76 | for i in 0..2_u32.pow(16) + 1 { 77 | zip.start_file(format!("{i}.txt"), options.clone()).unwrap(); 78 | zip.write_all(b"\n").unwrap(); 79 | } 80 | 81 | zip.finish().unwrap(); 82 | 83 | path 84 | } 85 | 86 | /// Test reading a generated zip64 archive that contains more than 2^16 entries. 87 | #[cfg(feature = "tokio-fs")] 88 | #[tokio::test] 89 | async fn test_read_zip64_archive_many_entries() { 90 | use crate::tokio::read::fs::ZipFileReader; 91 | 92 | init_logger(); 93 | 94 | let path = generate_zip64many_zip(); 95 | 96 | let reader = ZipFileReader::new(path).await.unwrap(); 97 | 98 | // Verify that each entry exists and is has the contents "\n" 99 | for i in 0..2_u32.pow(16) + 1 { 100 | let entry = reader.file().entries().get(i as usize).unwrap(); 101 | eprintln!("{:?}", entry.filename().as_bytes()); 102 | assert_eq!(entry.filename.as_str().unwrap(), format!("{i}.txt")); 103 | let mut entry = reader.reader_without_entry(i as usize).await.unwrap(); 104 | let mut contents = String::new(); 105 | entry.read_to_string(&mut contents).await.unwrap(); 106 | assert_eq!(contents, "\n"); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/tests/read/zip64/zip64.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/src/tests/read/zip64/zip64.zip -------------------------------------------------------------------------------- /src/tests/spec/date.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | #[cfg(feature = "chrono")] 5 | use chrono::{TimeZone, Utc}; 6 | 7 | use crate::ZipDateTimeBuilder; 8 | 9 | #[test] 10 | #[cfg(feature = "chrono")] 11 | fn date_conversion_test_chrono() { 12 | let original_dt = Utc.timestamp_opt(1666544102, 0).unwrap(); 13 | let zip_dt = crate::ZipDateTime::from_chrono(&original_dt); 14 | let result_dt = zip_dt.as_chrono().single().expect("expected single unique result"); 15 | assert_eq!(result_dt, original_dt); 16 | } 17 | 18 | #[test] 19 | fn date_conversion_test() { 20 | let year = 2000; 21 | let month = 9; 22 | let day = 8; 23 | let hour = 7; 24 | let minute = 5; 25 | let second = 4; 26 | 27 | let mut builder = ZipDateTimeBuilder::new(); 28 | 29 | builder = builder.year(year); 30 | builder = builder.month(month); 31 | builder = builder.day(day); 32 | builder = builder.hour(hour); 33 | builder = builder.minute(minute); 34 | builder = builder.second(second); 35 | 36 | let built = builder.build(); 37 | 38 | assert_eq!(year, built.year()); 39 | assert_eq!(month, built.month()); 40 | assert_eq!(day, built.day()); 41 | assert_eq!(hour, built.hour()); 42 | assert_eq!(minute, built.minute()); 43 | assert_eq!(second, built.second()); 44 | } 45 | -------------------------------------------------------------------------------- /src/tests/spec/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | pub(crate) mod date; 5 | -------------------------------------------------------------------------------- /src/tests/write/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use futures_lite::io::AsyncWrite; 5 | use std::io::Error; 6 | use std::pin::Pin; 7 | use std::task::{Context, Poll}; 8 | 9 | pub(crate) mod offset; 10 | mod zip64; 11 | 12 | /// /dev/null for AsyncWrite. 13 | /// Useful for tests that involve writing, but not reading, large amounts of data. 14 | pub(crate) struct AsyncSink; 15 | 16 | // AsyncSink is always ready to receive bytes and throw them away. 17 | impl AsyncWrite for AsyncSink { 18 | fn poll_write(self: Pin<&mut Self>, _: &mut Context<'_>, buf: &[u8]) -> Poll> { 19 | Poll::Ready(Ok(buf.len())) 20 | } 21 | 22 | fn poll_flush(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll> { 23 | Poll::Ready(Ok(())) 24 | } 25 | 26 | fn poll_close(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll> { 27 | Poll::Ready(Ok(())) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/tests/write/offset/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::base::write::io::offset::AsyncOffsetWriter; 5 | 6 | #[tokio::test] 7 | async fn basic() { 8 | use futures_lite::io::AsyncWriteExt; 9 | use futures_lite::io::Cursor; 10 | 11 | let mut writer = AsyncOffsetWriter::new(Cursor::new(Vec::new())); 12 | assert_eq!(writer.offset(), 0); 13 | 14 | writer.write_all(b"Foo. Bar. Foo. Bar.").await.expect("failed to write data"); 15 | assert_eq!(writer.offset(), 19); 16 | 17 | writer.write_all(b"Foo. Foo.").await.expect("failed to write data"); 18 | assert_eq!(writer.offset(), 28); 19 | 20 | writer.write_all(b"Bar. Bar.").await.expect("failed to write data"); 21 | assert_eq!(writer.offset(), 37); 22 | } 23 | -------------------------------------------------------------------------------- /src/tests/write/zip64/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright Cognite AS, 2023 2 | 3 | use crate::base::write::ZipFileWriter; 4 | use crate::error::{Zip64ErrorCase, ZipError}; 5 | use crate::spec::consts::NON_ZIP64_MAX_SIZE; 6 | use crate::tests::init_logger; 7 | use crate::tests::write::AsyncSink; 8 | use crate::{Compression, ZipEntryBuilder}; 9 | use std::io::Read; 10 | 11 | use crate::spec::header::ExtraField; 12 | use futures_lite::io::AsyncWriteExt; 13 | 14 | // Useful constants for writing a large file. 15 | const BATCH_SIZE: usize = 100_000; 16 | const NUM_BATCHES: usize = NON_ZIP64_MAX_SIZE as usize / BATCH_SIZE + 1; 17 | const BATCHED_FILE_SIZE: usize = NUM_BATCHES * BATCH_SIZE; 18 | 19 | /// Test writing a small zip64 file. 20 | /// No zip64 extra fields will be emitted for EntryWhole. 21 | /// Z64 end of directory record & locator should be emitted 22 | #[tokio::test] 23 | async fn test_write_zip64_file() { 24 | init_logger(); 25 | 26 | let mut buffer = Vec::new(); 27 | let mut writer = ZipFileWriter::new(&mut buffer).force_zip64(); 28 | let entry = ZipEntryBuilder::new("file1".to_string().into(), Compression::Stored); 29 | writer.write_entry_whole(entry, &[0, 0, 0, 0]).await.unwrap(); 30 | let entry = ZipEntryBuilder::new("file2".to_string().into(), Compression::Stored); 31 | let mut entry_writer = writer.write_entry_stream(entry).await.unwrap(); 32 | entry_writer.write_all(&[0, 0, 0, 0]).await.unwrap(); 33 | entry_writer.close().await.unwrap(); 34 | writer.close().await.unwrap(); 35 | 36 | let cursor = std::io::Cursor::new(buffer); 37 | let mut zip = zip::read::ZipArchive::new(cursor).unwrap(); 38 | let mut file1 = zip.by_name("file1").unwrap(); 39 | assert_eq!(file1.extra_data(), Some(&[] as &[u8])); 40 | let mut buffer = Vec::new(); 41 | file1.read_to_end(&mut buffer).unwrap(); 42 | assert_eq!(buffer.as_slice(), &[0, 0, 0, 0]); 43 | drop(file1); 44 | 45 | let mut file2 = zip.by_name("file2").unwrap(); 46 | let mut buffer = Vec::new(); 47 | file2.read_to_end(&mut buffer).unwrap(); 48 | assert_eq!(buffer.as_slice(), &[0, 0, 0, 0]); 49 | } 50 | 51 | /// Test writing a large zip64 file. This test will use upwards of 4GB of memory. 52 | #[tokio::test] 53 | async fn test_write_large_zip64_file() { 54 | init_logger(); 55 | 56 | // Allocate space with some extra for metadata records 57 | let mut buffer = Vec::with_capacity(BATCHED_FILE_SIZE + 100_000); 58 | let mut writer = ZipFileWriter::new(&mut buffer); 59 | 60 | // Stream-written zip files are dubiously spec-conformant. We need to specify a valid file size 61 | // in order for rs-zip (and unzip) to correctly read these files. 62 | let entry = ZipEntryBuilder::new("file".to_string().into(), Compression::Stored) 63 | .size(BATCHED_FILE_SIZE as u64, BATCHED_FILE_SIZE as u64); 64 | let mut entry_writer = writer.write_entry_stream(entry).await.unwrap(); 65 | for _ in 0..NUM_BATCHES { 66 | entry_writer.write_all(&[0; BATCH_SIZE]).await.unwrap(); 67 | } 68 | entry_writer.close().await.unwrap(); 69 | 70 | assert!(writer.is_zip64); 71 | let cd_entry = writer.cd_entries.last().unwrap(); 72 | match &cd_entry.entry.extra_fields.last().unwrap() { 73 | ExtraField::Zip64ExtendedInformation(zip64) => { 74 | assert_eq!(zip64.compressed_size.unwrap(), BATCHED_FILE_SIZE as u64); 75 | assert_eq!(zip64.uncompressed_size.unwrap(), BATCHED_FILE_SIZE as u64); 76 | } 77 | e => panic!("Expected a Zip64 extended field, got {:?}", e), 78 | } 79 | assert_eq!(cd_entry.header.uncompressed_size, NON_ZIP64_MAX_SIZE); 80 | assert_eq!(cd_entry.header.compressed_size, NON_ZIP64_MAX_SIZE); 81 | writer.close().await.unwrap(); 82 | 83 | let cursor = std::io::Cursor::new(buffer); 84 | let mut archive = zip::read::ZipArchive::new(cursor).unwrap(); 85 | let mut file = archive.by_name("file").unwrap(); 86 | assert_eq!(file.compression(), zip::CompressionMethod::Stored); 87 | assert_eq!(file.size(), BATCHED_FILE_SIZE as u64); 88 | let mut buffer = [0; 100_000]; 89 | let mut bytes_total = 0; 90 | loop { 91 | let read_bytes = file.read(&mut buffer).unwrap(); 92 | if read_bytes == 0 { 93 | break; 94 | } 95 | bytes_total += read_bytes; 96 | } 97 | assert_eq!(bytes_total, BATCHED_FILE_SIZE); 98 | } 99 | 100 | /// Test writing a file, and reading it with async-zip 101 | #[tokio::test] 102 | async fn test_write_large_zip64_file_self_read() { 103 | use futures_lite::io::AsyncReadExt; 104 | 105 | init_logger(); 106 | 107 | // Allocate space with some extra for metadata records 108 | let mut buffer = Vec::with_capacity(BATCHED_FILE_SIZE + 100_000); 109 | let mut writer = ZipFileWriter::new(&mut buffer); 110 | 111 | let entry = ZipEntryBuilder::new("file".into(), Compression::Stored); 112 | let mut entry_writer = writer.write_entry_stream(entry).await.unwrap(); 113 | for _ in 0..NUM_BATCHES { 114 | entry_writer.write_all(&[0; BATCH_SIZE]).await.unwrap(); 115 | } 116 | entry_writer.close().await.unwrap(); 117 | writer.close().await.unwrap(); 118 | 119 | let reader = crate::base::read::mem::ZipFileReader::new(buffer).await.unwrap(); 120 | assert!(reader.file().zip64); 121 | assert_eq!(reader.file().entries[0].entry.filename().as_str().unwrap(), "file"); 122 | assert_eq!(reader.file().entries[0].entry.compressed_size, BATCHED_FILE_SIZE as u64); 123 | let mut entry = reader.reader_without_entry(0).await.unwrap(); 124 | 125 | let mut buffer = [0; 100_000]; 126 | let mut bytes_total = 0; 127 | loop { 128 | let read_bytes = entry.read(&mut buffer).await.unwrap(); 129 | if read_bytes == 0 { 130 | break; 131 | } 132 | bytes_total += read_bytes; 133 | } 134 | assert_eq!(bytes_total, BATCHED_FILE_SIZE); 135 | } 136 | 137 | /// Test writing a zip64 file with more than u16::MAX files. 138 | #[tokio::test] 139 | async fn test_write_zip64_file_many_entries() { 140 | init_logger(); 141 | 142 | // The generated file will likely be ~3MB in size. 143 | let mut buffer = Vec::with_capacity(3_500_000); 144 | 145 | let mut writer = ZipFileWriter::new(&mut buffer); 146 | for i in 0..=u16::MAX as u32 + 1 { 147 | let entry = ZipEntryBuilder::new(i.to_string().into(), Compression::Stored); 148 | writer.write_entry_whole(entry, &[]).await.unwrap(); 149 | } 150 | assert!(writer.is_zip64); 151 | writer.close().await.unwrap(); 152 | 153 | let cursor = std::io::Cursor::new(buffer); 154 | let mut zip = zip::read::ZipArchive::new(cursor).unwrap(); 155 | assert_eq!(zip.len(), u16::MAX as usize + 2); 156 | 157 | for i in 0..=u16::MAX as u32 + 1 { 158 | let mut file = zip.by_name(&i.to_string()).unwrap(); 159 | let mut buf = Vec::new(); 160 | file.read_to_end(&mut buf).unwrap(); 161 | } 162 | } 163 | 164 | /// Tests that EntryWholeWriter switches to Zip64 mode when writing too many files for a non-Zip64. 165 | #[tokio::test] 166 | async fn test_zip64_when_many_files_whole() { 167 | let mut sink = AsyncSink; 168 | let mut writer = ZipFileWriter::new(&mut sink); 169 | for i in 0..=u16::MAX as u32 + 1 { 170 | let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); 171 | writer.write_entry_whole(entry, &[]).await.unwrap() 172 | } 173 | assert!(writer.is_zip64); 174 | writer.close().await.unwrap(); 175 | } 176 | 177 | /// Tests that EntryStreamWriter switches to Zip64 mode when writing too many files for a non-Zip64. 178 | #[tokio::test] 179 | async fn test_zip64_when_many_files_stream() { 180 | let mut sink = AsyncSink; 181 | let mut writer = ZipFileWriter::new(&mut sink); 182 | for i in 0..=u16::MAX as u32 + 1 { 183 | let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); 184 | let entrywriter = writer.write_entry_stream(entry).await.unwrap(); 185 | entrywriter.close().await.unwrap(); 186 | } 187 | 188 | assert!(writer.is_zip64); 189 | writer.close().await.unwrap(); 190 | } 191 | 192 | /// Tests that when force_no_zip64 is true, EntryWholeWriter errors when trying to write more than 193 | /// u16::MAX files to a single archive. 194 | #[tokio::test] 195 | async fn test_force_no_zip64_errors_with_too_many_files_whole() { 196 | let mut sink = AsyncSink; 197 | let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64(); 198 | for i in 0..u16::MAX { 199 | let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); 200 | writer.write_entry_whole(entry, &[]).await.unwrap() 201 | } 202 | let entry = ZipEntryBuilder::new("65537".to_string().into(), Compression::Stored); 203 | let result = writer.write_entry_whole(entry, &[]).await; 204 | 205 | assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)))); 206 | } 207 | 208 | /// Tests that when force_no_zip64 is true, EntryStreamWriter errors when trying to write more than 209 | /// u16::MAX files to a single archive. 210 | #[tokio::test] 211 | async fn test_force_no_zip64_errors_with_too_many_files_stream() { 212 | let mut sink = AsyncSink; 213 | let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64(); 214 | for i in 0..u16::MAX { 215 | let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); 216 | let entrywriter = writer.write_entry_stream(entry).await.unwrap(); 217 | entrywriter.close().await.unwrap(); 218 | } 219 | let entry = ZipEntryBuilder::new("65537".to_string().into(), Compression::Stored); 220 | let entrywriter = writer.write_entry_stream(entry).await.unwrap(); 221 | let result = entrywriter.close().await; 222 | 223 | assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)))); 224 | } 225 | 226 | /// Tests that when force_no_zip64 is true, EntryStreamWriter errors when trying to write 227 | /// a file larger than ~4 GiB to an archive. 228 | #[tokio::test] 229 | async fn test_force_no_zip64_errors_with_too_large_file_stream() { 230 | let mut sink = AsyncSink; 231 | let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64(); 232 | 233 | let entry = ZipEntryBuilder::new("-".to_string().into(), Compression::Stored); 234 | let mut entrywriter = writer.write_entry_stream(entry).await.unwrap(); 235 | 236 | // Writing 4GB, 1kb at a time 237 | for _ in 0..NUM_BATCHES { 238 | entrywriter.write_all(&[0; BATCH_SIZE]).await.unwrap(); 239 | } 240 | let result = entrywriter.close().await; 241 | 242 | assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)))); 243 | } 244 | -------------------------------------------------------------------------------- /src/tokio/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A set of [`tokio`]-specific type aliases and features. 5 | //! 6 | //! # Usage 7 | //! With the `tokio` feature enabled, types from the [`base`] implementation will implement additional constructors 8 | //! for use with [`tokio`]. These constructors internally implement conversion between the required async IO traits. 9 | //! They are defined as: 10 | //! - [`base::read::seek::ZipFileReader::with_tokio()`] 11 | //! - [`base::read::stream::ZipFileReader::with_tokio()`] 12 | //! - [`base::write::ZipFileWriter::with_tokio()`] 13 | //! 14 | //! As a result of Rust's type inference, we are able to reuse the [`base`] implementation's types with considerable 15 | //! ease. There only exists one caveat with their use; the types returned by these constructors contain a wrapping 16 | //! compatibility type provided by an external crate. These compatibility types cannot be named unless you also pull in 17 | //! the [`tokio_util`] dependency manually. This is why we've provided type aliases within this module so that they can 18 | //! be named without needing to pull in a separate dependency. 19 | 20 | #[cfg(doc)] 21 | use crate::base; 22 | #[cfg(doc)] 23 | use tokio; 24 | #[cfg(doc)] 25 | use tokio_util; 26 | 27 | pub mod read; 28 | 29 | pub mod write { 30 | //! A module which supports writing ZIP files. 31 | 32 | #[cfg(doc)] 33 | use crate::base; 34 | use tokio_util::compat::Compat; 35 | 36 | /// A [`tokio`]-specific type alias for [`base::write::ZipFileWriter`]; 37 | pub type ZipFileWriter = crate::base::write::ZipFileWriter>; 38 | 39 | /// A [`tokio`]-specific type alias for [`base::write::EntryStreamWriter`]; 40 | pub type EntryStreamWriter<'a, W> = crate::base::write::EntryStreamWriter<'a, Compat>; 41 | } 42 | -------------------------------------------------------------------------------- /src/tokio/read/fs.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A concurrent ZIP reader which acts over a file system path. 5 | //! 6 | //! Concurrency is achieved as a result of: 7 | //! - Wrapping the provided path within an [`Arc`] to allow shared ownership. 8 | //! - Constructing a new [`File`] from the path when reading. 9 | //! 10 | //! ### Usage 11 | //! Unlike the [`seek`] module, we no longer hold a mutable reference to any inner reader which in turn, allows the 12 | //! construction of concurrent [`ZipEntryReader`]s. Though, note that each individual [`ZipEntryReader`] cannot be sent 13 | //! between thread boundaries due to the masked lifetime requirement. Therefore, the overarching [`ZipFileReader`] 14 | //! should be cloned and moved into those contexts when needed. 15 | //! 16 | //! ### Concurrent Example 17 | //! ```no_run 18 | //! # use async_zip::tokio::read::fs::ZipFileReader; 19 | //! # use async_zip::error::Result; 20 | //! # use futures_lite::io::AsyncReadExt; 21 | //! # 22 | //! async fn run() -> Result<()> { 23 | //! let reader = ZipFileReader::new("./foo.zip").await?; 24 | //! let result = tokio::join!(read(&reader, 0), read(&reader, 1)); 25 | //! 26 | //! let data_0 = result.0?; 27 | //! let data_1 = result.1?; 28 | //! 29 | //! // Use data within current scope. 30 | //! 31 | //! Ok(()) 32 | //! } 33 | //! 34 | //! async fn read(reader: &ZipFileReader, index: usize) -> Result> { 35 | //! let mut entry = reader.reader_without_entry(index).await?; 36 | //! let mut data = Vec::new(); 37 | //! entry.read_to_end(&mut data).await?; 38 | //! Ok(data) 39 | //! } 40 | //! ``` 41 | //! 42 | //! ### Parallel Example 43 | //! ```no_run 44 | //! # use async_zip::tokio::read::fs::ZipFileReader; 45 | //! # use async_zip::error::Result; 46 | //! # use futures_lite::io::AsyncReadExt; 47 | //! # 48 | //! async fn run() -> Result<()> { 49 | //! let reader = ZipFileReader::new("./foo.zip").await?; 50 | //! 51 | //! let handle_0 = tokio::spawn(read(reader.clone(), 0)); 52 | //! let handle_1 = tokio::spawn(read(reader.clone(), 1)); 53 | //! 54 | //! let data_0 = handle_0.await.expect("thread panicked")?; 55 | //! let data_1 = handle_1.await.expect("thread panicked")?; 56 | //! 57 | //! // Use data within current scope. 58 | //! 59 | //! Ok(()) 60 | //! } 61 | //! 62 | //! async fn read(reader: ZipFileReader, index: usize) -> Result> { 63 | //! let mut entry = reader.reader_without_entry(index).await?; 64 | //! let mut data = Vec::new(); 65 | //! entry.read_to_end(&mut data).await?; 66 | //! Ok(data) 67 | //! } 68 | //! ``` 69 | 70 | #[cfg(doc)] 71 | use crate::base::read::seek; 72 | 73 | use crate::base::read::io::entry::{WithEntry, WithoutEntry, ZipEntryReader}; 74 | use crate::error::{Result, ZipError}; 75 | use crate::file::ZipFile; 76 | 77 | use std::path::{Path, PathBuf}; 78 | use std::sync::Arc; 79 | 80 | use tokio::fs::File; 81 | use tokio::io::BufReader; 82 | use tokio_util::compat::{Compat, TokioAsyncReadCompatExt}; 83 | 84 | struct Inner { 85 | path: PathBuf, 86 | file: ZipFile, 87 | } 88 | 89 | /// A concurrent ZIP reader which acts over a file system path. 90 | #[derive(Clone)] 91 | pub struct ZipFileReader { 92 | inner: Arc, 93 | } 94 | 95 | impl ZipFileReader { 96 | /// Constructs a new ZIP reader from a file system path. 97 | pub async fn new

(path: P) -> Result 98 | where 99 | P: AsRef, 100 | { 101 | let file = crate::base::read::file(File::open(&path).await?.compat()).await?; 102 | Ok(ZipFileReader::from_raw_parts(path, file)) 103 | } 104 | 105 | /// Constructs a ZIP reader from a file system path and ZIP file information derived from that path. 106 | /// 107 | /// Providing a [`ZipFile`] that wasn't derived from that path may lead to inaccurate parsing. 108 | pub fn from_raw_parts

(path: P, file: ZipFile) -> ZipFileReader 109 | where 110 | P: AsRef, 111 | { 112 | ZipFileReader { inner: Arc::new(Inner { path: path.as_ref().to_owned(), file }) } 113 | } 114 | 115 | /// Returns this ZIP file's information. 116 | pub fn file(&self) -> &ZipFile { 117 | &self.inner.file 118 | } 119 | 120 | /// Returns the file system path provided to the reader during construction. 121 | pub fn path(&self) -> &Path { 122 | &self.inner.path 123 | } 124 | 125 | /// Returns a new entry reader if the provided index is valid. 126 | pub async fn reader_without_entry( 127 | &self, 128 | index: usize, 129 | ) -> Result>, WithoutEntry>> { 130 | let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 131 | let mut fs_file = BufReader::new(File::open(&self.inner.path).await?).compat(); 132 | 133 | stored_entry.seek_to_data_offset(&mut fs_file).await?; 134 | 135 | Ok(ZipEntryReader::new_with_owned( 136 | fs_file, 137 | stored_entry.entry.compression(), 138 | stored_entry.entry.compressed_size(), 139 | )) 140 | } 141 | 142 | /// Returns a new entry reader if the provided index is valid. 143 | pub async fn reader_with_entry( 144 | &self, 145 | index: usize, 146 | ) -> Result>, WithEntry<'_>>> { 147 | let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; 148 | let mut fs_file = BufReader::new(File::open(&self.inner.path).await?).compat(); 149 | 150 | stored_entry.seek_to_data_offset(&mut fs_file).await?; 151 | 152 | let reader = ZipEntryReader::new_with_owned( 153 | fs_file, 154 | stored_entry.entry.compression(), 155 | stored_entry.entry.compressed_size(), 156 | ); 157 | 158 | Ok(reader.into_with_entry(stored_entry)) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/tokio/read/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | //! A module which supports reading ZIP files. 5 | 6 | use tokio_util::compat::Compat; 7 | 8 | #[cfg(feature = "tokio-fs")] 9 | pub mod fs; 10 | #[cfg(doc)] 11 | use crate::base; 12 | #[cfg(doc)] 13 | use tokio; 14 | 15 | /// A [`tokio`]-specific type alias for [`base::read::ZipEntryReader`]; 16 | pub type ZipEntryReader<'a, R, E> = crate::base::read::ZipEntryReader<'a, Compat, E>; 17 | 18 | pub mod seek { 19 | //! A ZIP reader which acts over a seekable source. 20 | use tokio_util::compat::Compat; 21 | 22 | #[cfg(doc)] 23 | use crate::base; 24 | #[cfg(doc)] 25 | use tokio; 26 | 27 | /// A [`tokio`]-specific type alias for [`base::read::seek::ZipFileReader`]; 28 | pub type ZipFileReader = crate::base::read::seek::ZipFileReader>; 29 | } 30 | 31 | pub mod stream { 32 | //! A ZIP reader which acts over a non-seekable source. 33 | 34 | #[cfg(doc)] 35 | use crate::base; 36 | #[cfg(doc)] 37 | use tokio; 38 | use tokio_util::compat::Compat; 39 | 40 | /// A [`tokio`]-specific type alias for [`base::read::stream::Reading`]; 41 | pub type Reading<'a, R, E> = crate::base::read::stream::Reading<'a, Compat, E>; 42 | /// A [`tokio`]-specific type alias for [`base::read::stream::Ready`]; 43 | pub type Ready = crate::base::read::stream::Ready>; 44 | } 45 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use crate::error::{Result, ZipError}; 5 | use futures_lite::io::{AsyncRead, AsyncReadExt}; 6 | 7 | // Assert that the next four-byte signature read by a reader which impls AsyncRead matches the expected signature. 8 | pub(crate) async fn assert_signature(reader: &mut R, expected: u32) -> Result<()> { 9 | let signature = { 10 | let mut buffer = [0; 4]; 11 | reader.read_exact(&mut buffer).await?; 12 | u32::from_le_bytes(buffer) 13 | }; 14 | match signature { 15 | actual if actual == expected => Ok(()), 16 | actual => Err(ZipError::UnexpectedHeaderError(actual, expected)), 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use async_zip::base::read::mem; 5 | use async_zip::base::read::seek; 6 | use async_zip::base::write::ZipFileWriter; 7 | use async_zip::Compression; 8 | use async_zip::ZipEntryBuilder; 9 | use futures_lite::io::AsyncWriteExt; 10 | use tokio::fs::File; 11 | use tokio::io::BufReader; 12 | use tokio_util::compat::TokioAsyncReadCompatExt; 13 | 14 | const FOLDER_PREFIX: &str = "tests/test_inputs"; 15 | 16 | const FILE_LIST: &[&str] = &[ 17 | "sample_data/alpha/back_to_front.txt", 18 | "sample_data/alpha/front_to_back.txt", 19 | "sample_data/numeric/forward.txt", 20 | "sample_data/numeric/reverse.txt", 21 | ]; 22 | 23 | pub async fn compress_to_mem(compress: Compression) -> Vec { 24 | let mut bytes = Vec::with_capacity(10_000); 25 | let mut writer = ZipFileWriter::new(&mut bytes); 26 | 27 | for fname in FILE_LIST { 28 | let content = tokio::fs::read(format!("{FOLDER_PREFIX}/{fname}")).await.unwrap(); 29 | let opts = ZipEntryBuilder::new(fname.to_string().into(), compress); 30 | 31 | let mut entry_writer = writer.write_entry_stream(opts).await.unwrap(); 32 | entry_writer.write_all(&content).await.unwrap(); 33 | entry_writer.close().await.unwrap(); 34 | } 35 | writer.close().await.unwrap(); 36 | bytes 37 | } 38 | 39 | #[cfg(feature = "tokio-fs")] 40 | pub async fn check_decompress_fs(fname: &str) { 41 | use async_zip::tokio::read::fs; 42 | let zip = fs::ZipFileReader::new(fname).await.unwrap(); 43 | let zip_entries: Vec<_> = zip.file().entries().to_vec(); 44 | for (idx, entry) in zip_entries.into_iter().enumerate() { 45 | // TODO: resolve unwrap usage 46 | if entry.dir().unwrap() { 47 | continue; 48 | } 49 | // TODO: resolve unwrap usage 50 | let fname = entry.filename().as_str().unwrap(); 51 | let mut output = String::new(); 52 | let mut reader = zip.reader_with_entry(idx).await.unwrap(); 53 | let _ = reader.read_to_string_checked(&mut output).await.unwrap(); 54 | let fs_file = format!("{FOLDER_PREFIX}/{fname}"); 55 | let expected = tokio::fs::read_to_string(fs_file).await.unwrap(); 56 | assert_eq!(output, expected, "for {fname}, expect zip data to match file data"); 57 | } 58 | } 59 | 60 | pub async fn check_decompress_seek(fname: &str) { 61 | let file = BufReader::new(File::open(fname).await.unwrap()); 62 | let mut file_compat = file.compat(); 63 | let mut zip = seek::ZipFileReader::new(&mut file_compat).await.unwrap(); 64 | let zip_entries: Vec<_> = zip.file().entries().to_vec(); 65 | for (idx, entry) in zip_entries.into_iter().enumerate() { 66 | // TODO: resolve unwrap usage 67 | if entry.dir().unwrap() { 68 | continue; 69 | } 70 | // TODO: resolve unwrap usage 71 | let fname = entry.filename().as_str().unwrap(); 72 | let mut output = String::new(); 73 | let mut reader = zip.reader_with_entry(idx).await.unwrap(); 74 | let _ = reader.read_to_string_checked(&mut output).await.unwrap(); 75 | let fs_file = format!("tests/test_inputs/{fname}"); 76 | let expected = tokio::fs::read_to_string(fs_file).await.unwrap(); 77 | assert_eq!(output, expected, "for {fname}, expect zip data to match file data"); 78 | } 79 | } 80 | 81 | pub async fn check_decompress_mem(zip_data: Vec) { 82 | let zip = mem::ZipFileReader::new(zip_data).await.unwrap(); 83 | let zip_entries: Vec<_> = zip.file().entries().to_vec(); 84 | for (idx, entry) in zip_entries.into_iter().enumerate() { 85 | // TODO: resolve unwrap usage 86 | if entry.dir().unwrap() { 87 | continue; 88 | } 89 | // TODO: resolve unwrap usage 90 | let fname = entry.filename().as_str().unwrap(); 91 | let mut output = String::new(); 92 | let mut reader = zip.reader_with_entry(idx).await.unwrap(); 93 | let _ = reader.read_to_string_checked(&mut output).await.unwrap(); 94 | let fs_file = format!("{FOLDER_PREFIX}/{fname}"); 95 | let expected = tokio::fs::read_to_string(fs_file).await.unwrap(); 96 | assert_eq!(output, expected, "for {fname}, expect zip data to match file data"); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /tests/compress_test.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use async_zip::{Compression, ZipEntryBuilder, ZipString}; 5 | use futures_lite::AsyncWriteExt; 6 | 7 | mod common; 8 | 9 | #[cfg(feature = "zstd")] 10 | #[tokio::test] 11 | async fn zip_zstd_in_out() { 12 | let zip_data = common::compress_to_mem(Compression::Zstd).await; 13 | common::check_decompress_mem(zip_data).await 14 | } 15 | 16 | #[cfg(feature = "deflate")] 17 | #[tokio::test] 18 | async fn zip_decompress_in_out() { 19 | let zip_data = common::compress_to_mem(Compression::Deflate).await; 20 | common::check_decompress_mem(zip_data).await 21 | } 22 | 23 | #[tokio::test] 24 | async fn zip_store_in_out() { 25 | let zip_data = common::compress_to_mem(Compression::Stored).await; 26 | common::check_decompress_mem(zip_data).await 27 | } 28 | 29 | #[tokio::test] 30 | async fn zip_utf8_extra_in_out_stream() { 31 | let mut zip_bytes = Vec::with_capacity(10_000); 32 | 33 | { 34 | // writing 35 | let content = "Test".as_bytes(); 36 | let mut writer = async_zip::base::write::ZipFileWriter::new(&mut zip_bytes); 37 | let filename = 38 | ZipString::new_with_alternative("\u{4E2D}\u{6587}.txt".to_string(), b"\xD6\xD0\xCe\xC4.txt".to_vec()); 39 | let opts = ZipEntryBuilder::new(filename, Compression::Stored); 40 | 41 | let mut entry_writer = writer.write_entry_stream(opts).await.unwrap(); 42 | entry_writer.write_all(content).await.unwrap(); 43 | entry_writer.close().await.unwrap(); 44 | 45 | writer.close().await.unwrap(); 46 | } 47 | 48 | { 49 | // reading 50 | let zip = async_zip::base::read::mem::ZipFileReader::new(zip_bytes).await.unwrap(); 51 | let zip_entries: Vec<_> = zip.file().entries().to_vec(); 52 | assert_eq!(zip_entries.len(), 1); 53 | assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt"); 54 | assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref())); 55 | } 56 | } 57 | 58 | #[tokio::test] 59 | async fn zip_utf8_extra_in_out_whole() { 60 | let mut zip_bytes = Vec::with_capacity(10_000); 61 | 62 | { 63 | // writing 64 | let content = "Test".as_bytes(); 65 | let mut writer = async_zip::base::write::ZipFileWriter::new(&mut zip_bytes); 66 | let filename = 67 | ZipString::new_with_alternative("\u{4E2D}\u{6587}.txt".to_string(), b"\xD6\xD0\xCe\xC4.txt".to_vec()); 68 | let opts = ZipEntryBuilder::new(filename, Compression::Stored); 69 | writer.write_entry_whole(opts, content).await.unwrap(); 70 | writer.close().await.unwrap(); 71 | } 72 | 73 | { 74 | // reading 75 | let zip = async_zip::base::read::mem::ZipFileReader::new(zip_bytes).await.unwrap(); 76 | let zip_entries: Vec<_> = zip.file().entries().to_vec(); 77 | assert_eq!(zip_entries.len(), 1); 78 | assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt"); 79 | assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref())); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /tests/decompress_test.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Harry [Majored] [hello@majored.pw] 2 | // MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) 3 | 4 | use tokio::io::BufReader; 5 | use tokio_util::compat::TokioAsyncReadCompatExt; 6 | 7 | mod common; 8 | 9 | const ZSTD_ZIP_FILE: &str = "tests/test_inputs/sample_data.zstd.zip"; 10 | const DEFLATE_ZIP_FILE: &str = "tests/test_inputs/sample_data.deflate.zip"; 11 | const STORE_ZIP_FILE: &str = "tests/test_inputs/sample_data.store.zip"; 12 | const UTF8_EXTRA_ZIP_FILE: &str = "tests/test_inputs/sample_data_utf8_extra.zip"; 13 | 14 | #[cfg(feature = "zstd")] 15 | #[tokio::test] 16 | async fn decompress_zstd_zip_seek() { 17 | common::check_decompress_seek(ZSTD_ZIP_FILE).await 18 | } 19 | 20 | #[cfg(feature = "deflate")] 21 | #[tokio::test] 22 | async fn decompress_deflate_zip_seek() { 23 | common::check_decompress_seek(DEFLATE_ZIP_FILE).await 24 | } 25 | 26 | #[tokio::test] 27 | async fn check_empty_zip_seek() { 28 | let mut data: Vec = Vec::new(); 29 | async_zip::base::write::ZipFileWriter::new(futures::io::Cursor::new(&mut data)).close().await.unwrap(); 30 | async_zip::base::read::seek::ZipFileReader::new(futures::io::Cursor::new(&data)).await.unwrap(); 31 | } 32 | 33 | #[tokio::test] 34 | async fn decompress_store_zip_seek() { 35 | common::check_decompress_seek(STORE_ZIP_FILE).await 36 | } 37 | 38 | #[cfg(feature = "zstd")] 39 | #[tokio::test] 40 | async fn decompress_zstd_zip_mem() { 41 | let content = tokio::fs::read(ZSTD_ZIP_FILE).await.unwrap(); 42 | common::check_decompress_mem(content).await 43 | } 44 | 45 | #[cfg(feature = "deflate")] 46 | #[tokio::test] 47 | async fn decompress_deflate_zip_mem() { 48 | let content = tokio::fs::read(DEFLATE_ZIP_FILE).await.unwrap(); 49 | common::check_decompress_mem(content).await 50 | } 51 | 52 | #[tokio::test] 53 | async fn decompress_store_zip_mem() { 54 | let content = tokio::fs::read(STORE_ZIP_FILE).await.unwrap(); 55 | common::check_decompress_mem(content).await 56 | } 57 | 58 | #[cfg(feature = "zstd")] 59 | #[cfg(feature = "tokio-fs")] 60 | #[tokio::test] 61 | async fn decompress_zstd_zip_fs() { 62 | common::check_decompress_fs(ZSTD_ZIP_FILE).await 63 | } 64 | 65 | #[cfg(feature = "deflate")] 66 | #[cfg(feature = "tokio-fs")] 67 | #[tokio::test] 68 | async fn decompress_deflate_zip_fs() { 69 | common::check_decompress_fs(DEFLATE_ZIP_FILE).await 70 | } 71 | 72 | #[cfg(feature = "tokio-fs")] 73 | #[tokio::test] 74 | async fn decompress_store_zip_fs() { 75 | common::check_decompress_fs(STORE_ZIP_FILE).await 76 | } 77 | 78 | #[tokio::test] 79 | async fn decompress_zip_with_utf8_extra() { 80 | let file = BufReader::new(tokio::fs::File::open(UTF8_EXTRA_ZIP_FILE).await.unwrap()); 81 | let mut file_compat = file.compat(); 82 | let zip = async_zip::base::read::seek::ZipFileReader::new(&mut file_compat).await.unwrap(); 83 | let zip_entries: Vec<_> = zip.file().entries().to_vec(); 84 | assert_eq!(zip_entries.len(), 1); 85 | assert_eq!(zip_entries[0].header_size(), 93); 86 | assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt"); 87 | assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref())); 88 | } 89 | -------------------------------------------------------------------------------- /tests/test_inputs/sample_data.deflate.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/tests/test_inputs/sample_data.deflate.zip -------------------------------------------------------------------------------- /tests/test_inputs/sample_data.store.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/tests/test_inputs/sample_data.store.zip -------------------------------------------------------------------------------- /tests/test_inputs/sample_data.zstd.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/tests/test_inputs/sample_data.zstd.zip -------------------------------------------------------------------------------- /tests/test_inputs/sample_data/alpha/back_to_front.txt: -------------------------------------------------------------------------------- 1 | Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a 2 | Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a 3 | Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a 4 | Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a 5 | -------------------------------------------------------------------------------- /tests/test_inputs/sample_data/alpha/front_to_back.txt: -------------------------------------------------------------------------------- 1 | A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z 2 | A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z 3 | A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z 4 | A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z 5 | -------------------------------------------------------------------------------- /tests/test_inputs/sample_data/numeric/forward.txt: -------------------------------------------------------------------------------- 1 | 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 2 | -------------------------------------------------------------------------------- /tests/test_inputs/sample_data/numeric/reverse.txt: -------------------------------------------------------------------------------- 1 | 32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1 2 | -------------------------------------------------------------------------------- /tests/test_inputs/sample_data_utf8_extra.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Majored/rs-async-zip/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/tests/test_inputs/sample_data_utf8_extra.zip --------------------------------------------------------------------------------