├── .editorconfig ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── release-plz.yml │ └── release.yml ├── .gitignore ├── .rustfmt.toml ├── .vscode └── settings.json ├── CHANGELOG.md ├── CITATION.cff ├── COPYRIGHT ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── SPEC.md ├── crates ├── ozarc │ ├── Cargo.toml │ └── src │ │ ├── framing.rs │ │ └── lib.rs ├── zarc-cli │ ├── Cargo.toml │ ├── build.rs │ ├── manifest.rc │ ├── src │ │ ├── args.rs │ │ ├── debug.rs │ │ ├── list_files.rs │ │ ├── logs.rs │ │ ├── main.rs │ │ ├── pack.rs │ │ └── unpack.rs │ ├── wix │ │ └── main.wxs │ └── zarc.exe.manifest └── zarc │ ├── Cargo.toml │ └── src │ ├── constants.rs │ ├── decode.rs │ ├── decode │ ├── directory.rs │ ├── error.rs │ ├── frame_iterator.rs │ ├── open.rs │ └── zstd_iterator.rs │ ├── directory.rs │ ├── directory │ ├── edition.rs │ ├── elements.rs │ ├── file.rs │ ├── frame.rs │ ├── posix_owner.rs │ ├── specials.rs │ ├── strings.rs │ └── timestamps.rs │ ├── encode.rs │ ├── encode │ ├── add_file.rs │ ├── content_frame.rs │ ├── directory.rs │ └── lowlevel_frames.rs │ ├── header.rs │ ├── integrity.rs │ ├── lib.rs │ ├── metadata.rs │ ├── metadata │ ├── decode.rs │ └── encode.rs │ ├── ondemand.rs │ ├── owner_cache.rs │ └── trailer.rs └── zarc.magic /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yml] 12 | indent_style = space 13 | indent_size = 2 14 | 15 | [*.md] 16 | indent_style = space 17 | indent_size = 2 18 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | - package-ecosystem: cargo 8 | directory: / 9 | schedule: 10 | interval: weekly 11 | - package-ecosystem: cargo 12 | directory: /crates/ozarc 13 | schedule: 14 | interval: weekly 15 | - package-ecosystem: cargo 16 | directory: /crates/zarc 17 | schedule: 18 | interval: weekly 19 | - package-ecosystem: cargo 20 | directory: /crates/zarc-cli 21 | schedule: 22 | interval: weekly 23 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | push: 7 | branches: 8 | - main 9 | tags-ignore: 10 | - "*" 11 | 12 | env: 13 | CARGO_TERM_COLOR: always 14 | CARGO_UNSTABLE_SPARSE_REGISTRY: "true" 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.ref || github.run_id }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | test: 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | platform: 26 | - macos 27 | - ubuntu 28 | - windows 29 | command: 30 | - test 31 | - clippy 32 | 33 | name: ${{ matrix.platform }} / ${{ matrix.command }} 34 | runs-on: "${{ matrix.platform }}-latest" 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Configure toolchain 39 | run: | 40 | rustup toolchain install --profile minimal --no-self-update stable 41 | rustup default stable 42 | 43 | # https://github.com/actions/cache/issues/752 44 | - if: ${{ runner.os == 'Windows' }} 45 | name: Use GNU tar 46 | shell: cmd 47 | run: | 48 | echo "Adding GNU tar to PATH" 49 | echo C:\Program Files\Git\usr\bin>>"%GITHUB_PATH%" 50 | 51 | - if: ${{ runner.os == 'Linux' }} 52 | run: | 53 | sudo apt-get update 54 | sudo apt-get install -y libext2fs-dev 55 | 56 | - name: Cargo caching 57 | uses: actions/cache@v4 58 | with: 59 | path: | 60 | ~/.cargo/registry/index/ 61 | ~/.cargo/registry/cache/ 62 | ~/.cargo/git/db/ 63 | key: ${{ runner.os }}-cargo-stable-${{ hashFiles('**/Cargo.lock') }} 64 | restore-keys: | 65 | ${{ runner.os }}-cargo-stable- 66 | ${{ runner.os }}-cargo- 67 | 68 | - name: Compilation caching 69 | uses: actions/cache@v4 70 | with: 71 | path: target/ 72 | key: ${{ runner.os }}-target-stable-${{ hashFiles('**/Cargo.lock') }} 73 | 74 | - run: cargo ${{ matrix.command }} 75 | 76 | tests-pass: 77 | if: always() 78 | name: Tests pass 79 | needs: [test] 80 | runs-on: ubuntu-latest 81 | steps: 82 | - uses: re-actors/alls-green@release/v1 83 | with: 84 | jobs: ${{ toJSON(needs) }} 85 | -------------------------------------------------------------------------------- /.github/workflows/release-plz.yml: -------------------------------------------------------------------------------- 1 | name: Release plz 2 | 3 | permissions: 4 | pull-requests: write 5 | contents: write 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | release-plz: 14 | if: false 15 | name: Release-plz 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | - name: Configure toolchain 23 | run: | 24 | rustup toolchain install --profile minimal --no-self-update stable 25 | rustup default stable 26 | - name: Run release-plz 27 | uses: MarcoIeni/release-plz-action@v0.5 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 31 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023, axodotdev 2 | # SPDX-License-Identifier: MIT or Apache-2.0 3 | # 4 | # CI that: 5 | # 6 | # * checks for a Git Tag that looks like a release 7 | # * builds artifacts with cargo-dist (archives, installers, hashes) 8 | # * uploads those artifacts to temporary workflow zip 9 | # * on success, uploads the artifacts to Axo Releases and makes an Announcement 10 | 11 | name: Release 12 | 13 | permissions: 14 | contents: write 15 | 16 | # This task will run whenever you push a git tag that looks like a version 17 | # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. 18 | # Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where 19 | # PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION 20 | # must be a Cargo-style SemVer Version (must have at least major.minor.patch). 21 | # 22 | # If PACKAGE_NAME is specified, then the announcement will be for that 23 | # package (erroring out if it doesn't have the given version or isn't cargo-dist-able). 24 | # 25 | # If PACKAGE_NAME isn't specified, then the announcement will be for all 26 | # (cargo-dist-able) packages in the workspace with that version (this mode is 27 | # intended for workspaces with only one dist-able package, or with all dist-able 28 | # packages versioned/released in lockstep). 29 | # 30 | # If you push multiple tags at once, separate instances of this workflow will 31 | # spin up, creating an independent announcement for each one. However Github 32 | # will hard limit this to 3 tags per commit, as it will assume more tags is a 33 | # mistake. 34 | # 35 | # If there's a prerelease-style suffix to the version, then the release(s) 36 | # will be marked as a prerelease. 37 | on: 38 | push: 39 | tags: 40 | - '**[0-9]+.[0-9]+.[0-9]+*' 41 | pull_request: 42 | 43 | jobs: 44 | # Run 'cargo dist plan' (or host) to determine what tasks we need to do 45 | plan: 46 | runs-on: ubuntu-latest 47 | outputs: 48 | val: ${{ steps.plan.outputs.manifest }} 49 | tag: ${{ !github.event.pull_request && github.ref_name || '' }} 50 | tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} 51 | publishing: ${{ !github.event.pull_request }} 52 | env: 53 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 54 | AXO_RELEASES_TOKEN: ${{ secrets.AXO_RELEASES_TOKEN }} 55 | steps: 56 | - uses: actions/checkout@v4 57 | with: 58 | submodules: recursive 59 | - name: Install cargo-dist 60 | # we specify bash to get pipefail; it guards against the `curl` command 61 | # failing. otherwise `sh` won't catch that `curl` returned non-0 62 | shell: bash 63 | run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh" 64 | # sure would be cool if github gave us proper conditionals... 65 | # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible 66 | # functionality based on whether this is a pull_request, and whether it's from a fork. 67 | # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* 68 | # but also really annoying to build CI around when it needs secrets to work right.) 69 | - id: plan 70 | run: | 71 | cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || (env.AXO_RELEASES_TOKEN && 'host --steps=check') || 'plan' }} --output-format=json > dist-manifest.json 72 | echo "cargo dist ran successfully" 73 | cat dist-manifest.json 74 | echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" 75 | - name: "Upload dist-manifest.json" 76 | uses: actions/upload-artifact@v3 77 | with: 78 | name: artifacts 79 | path: dist-manifest.json 80 | 81 | # Build and packages all the platform-specific things 82 | build-local-artifacts: 83 | name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) 84 | # Let the initial task tell us to not run (currently very blunt) 85 | needs: 86 | - plan 87 | if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} 88 | strategy: 89 | fail-fast: false 90 | # Target platforms/runners are computed by cargo-dist in create-release. 91 | # Each member of the matrix has the following arguments: 92 | # 93 | # - runner: the github runner 94 | # - dist-args: cli flags to pass to cargo dist 95 | # - install-dist: expression to run to install cargo-dist on the runner 96 | # 97 | # Typically there will be: 98 | # - 1 "global" task that builds universal installers 99 | # - N "local" tasks that build each platform's binaries and platform-specific installers 100 | matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} 101 | runs-on: ${{ matrix.runner }} 102 | env: 103 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 104 | BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json 105 | steps: 106 | - uses: actions/checkout@v4 107 | with: 108 | submodules: recursive 109 | - uses: swatinem/rust-cache@v2 110 | - name: Install cargo-dist 111 | run: ${{ matrix.install_dist }} 112 | # Get the dist-manifest 113 | - name: Fetch local artifacts 114 | uses: actions/download-artifact@v3 115 | with: 116 | name: artifacts 117 | path: target/distrib/ 118 | - name: Install dependencies 119 | run: | 120 | ${{ matrix.packages_install }} 121 | - name: Build artifacts 122 | run: | 123 | # Actually do builds and make zips and whatnot 124 | cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json 125 | echo "cargo dist ran successfully" 126 | - id: cargo-dist 127 | name: Post-build 128 | # We force bash here just because github makes it really hard to get values up 129 | # to "real" actions without writing to env-vars, and writing to env-vars has 130 | # inconsistent syntax between shell and powershell. 131 | shell: bash 132 | run: | 133 | # Parse out what we just built and upload it to scratch storage 134 | echo "paths<> "$GITHUB_OUTPUT" 135 | jq --raw-output ".artifacts[]?.path | select( . != null )" dist-manifest.json >> "$GITHUB_OUTPUT" 136 | echo "EOF" >> "$GITHUB_OUTPUT" 137 | 138 | cp dist-manifest.json "$BUILD_MANIFEST_NAME" 139 | - name: "Upload artifacts" 140 | uses: actions/upload-artifact@v3 141 | with: 142 | name: artifacts 143 | path: | 144 | ${{ steps.cargo-dist.outputs.paths }} 145 | ${{ env.BUILD_MANIFEST_NAME }} 146 | 147 | # Build and package all the platform-agnostic(ish) things 148 | build-global-artifacts: 149 | needs: 150 | - plan 151 | - build-local-artifacts 152 | runs-on: "ubuntu-20.04" 153 | env: 154 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 155 | BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json 156 | steps: 157 | - uses: actions/checkout@v4 158 | with: 159 | submodules: recursive 160 | - name: Install cargo-dist 161 | run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh" 162 | # Get all the local artifacts for the global tasks to use (for e.g. checksums) 163 | - name: Fetch local artifacts 164 | uses: actions/download-artifact@v3 165 | with: 166 | name: artifacts 167 | path: target/distrib/ 168 | - id: cargo-dist 169 | shell: bash 170 | run: | 171 | cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json 172 | echo "cargo dist ran successfully" 173 | 174 | # Parse out what we just built and upload it to scratch storage 175 | echo "paths<> "$GITHUB_OUTPUT" 176 | jq --raw-output ".artifacts[]?.path | select( . != null )" dist-manifest.json >> "$GITHUB_OUTPUT" 177 | echo "EOF" >> "$GITHUB_OUTPUT" 178 | 179 | cp dist-manifest.json "$BUILD_MANIFEST_NAME" 180 | - name: "Upload artifacts" 181 | uses: actions/upload-artifact@v3 182 | with: 183 | name: artifacts 184 | path: | 185 | ${{ steps.cargo-dist.outputs.paths }} 186 | ${{ env.BUILD_MANIFEST_NAME }} 187 | # Uploads the artifacts to Axo Releases and tentatively creates Releases for them. 188 | # This makes perma URLs like /v1.0.0/ live for subsequent publish steps to use, but 189 | # leaves them "disconnected" from the release history (for the purposes of 190 | # "list the releases" or "give me the latest releases"). 191 | # 192 | # If all the subsequent "publish" steps succeed, the "announce" job will "connect" 193 | # the releases and concepts like "latest" will be updated. Otherwise you're hopefully 194 | # in a decent position to roll back the release without anyone noticing it! 195 | # This is imperfect with things like "publish to crates.io" being irreversible, but 196 | # at worst you're in a better position to yank the version with minimum disruption. 197 | host: 198 | needs: 199 | - plan 200 | - build-local-artifacts 201 | - build-global-artifacts 202 | # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine) 203 | if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} 204 | env: 205 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 206 | AXO_RELEASES_TOKEN: ${{ secrets.AXO_RELEASES_TOKEN }} 207 | runs-on: "ubuntu-20.04" 208 | outputs: 209 | val: ${{ steps.host.outputs.manifest }} 210 | steps: 211 | - uses: actions/checkout@v4 212 | with: 213 | submodules: recursive 214 | - name: Install cargo-dist 215 | run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh" 216 | # Fetch artifacts from scratch-storage 217 | - name: Fetch artifacts 218 | uses: actions/download-artifact@v3 219 | with: 220 | name: artifacts 221 | path: target/distrib/ 222 | # Upload files to Axo Releases and create the Releases 223 | - id: host 224 | shell: bash 225 | run: | 226 | cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json 227 | echo "artifacts uploaded and released successfully" 228 | cat dist-manifest.json 229 | echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" 230 | - name: "Upload dist-manifest.json" 231 | uses: actions/upload-artifact@v3 232 | with: 233 | name: artifacts 234 | path: dist-manifest.json 235 | 236 | # Create an Announcement for all the Axo Releases, updating the "latest" release 237 | announce: 238 | needs: 239 | - plan 240 | - host 241 | # use "always() && ..." to allow us to wait for all publish jobs while 242 | # still allowing individual publish jobs to skip themselves (for prereleases). 243 | # "host" however must run to completion, no skipping allowed! 244 | if: ${{ always() && needs.host.result == 'success' }} 245 | runs-on: "ubuntu-20.04" 246 | env: 247 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 248 | AXO_RELEASES_TOKEN: ${{ secrets.AXO_RELEASES_TOKEN }} 249 | steps: 250 | - uses: actions/checkout@v4 251 | with: 252 | submodules: recursive 253 | - name: Install cargo-dist 254 | run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh" 255 | - name: Fetch Axo Artifacts 256 | uses: actions/download-artifact@v3 257 | with: 258 | name: artifacts 259 | path: target/distrib/ 260 | - name: Announce Axo Releases 261 | run: | 262 | cargo dist host --steps=announce ${{ needs.plan.outputs.tag-flag }} 263 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.zarc 3 | *.zip 4 | *.tar* 5 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | hard_tabs = true 2 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.cargo.features": [ 3 | "expose-internals" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.0.0 (2024-01-20) 4 | 5 | Initial release. 6 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: | 3 | If you use this software, please cite it using these metadata. 4 | title: "Zarc: a new archive format as a construction over the Z-standard file format" 5 | 6 | version: "0.1.0" 7 | date-released: 2023-12-27 8 | 9 | repository-code: https://github.com/passcod/zarc 10 | license: Apache-2.0 OR MIT 11 | 12 | authors: 13 | - family-names: Saparelli 14 | given-names: Félix 15 | orcid: https://orcid.org/0000-0002-2010-630X 16 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | Short version for non-lawyers: 2 | 3 | This project is dual-licensed under Apache 2.0 and MIT terms. 4 | 5 | 6 | Longer version: 7 | 8 | Copyrights in this project are retained by their contributors. No copyright 9 | assignment is required to contribute. 10 | 11 | Some files include explicit copyright notices and/or license notices. 12 | For full authorship information, see the version control history. 13 | 14 | Except as otherwise noted (below and/or in individual files), the project is 15 | licensed under the Apache License, Version 2.0 or 16 | or the MIT license 17 | or , at your option. 18 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | "crates/ozarc", 5 | "crates/zarc", 6 | "crates/zarc-cli", 7 | ] 8 | 9 | # Config for 'cargo dist' 10 | [workspace.metadata.dist] 11 | # The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) 12 | cargo-dist-version = "0.8.1" 13 | # CI backends to support 14 | ci = ["github"] 15 | # The installers to generate for each app 16 | installers = ["shell", "powershell", "homebrew", "msi"] 17 | # Target platforms to build apps for (Rust target-triple syntax) 18 | targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] 19 | # Publish jobs to run in CI 20 | pr-run-mode = "plan" 21 | # Where to host releases 22 | hosting = ["axodotdev"] 23 | # The archive format to use for non-windows builds (defaults .tar.xz) 24 | unix-archive = ".tar.zstd" 25 | 26 | [workspace.metadata.dist.dependencies.apt] 27 | libext2fs-dev = '*' 28 | 29 | [profile.release] 30 | lto = true 31 | debug = 1 # for stack traces 32 | codegen-units = 1 33 | 34 | [profile.dev.build-override] 35 | opt-level = 0 36 | codegen-units = 1024 37 | debug = false 38 | debug-assertions = false 39 | overflow-checks = false 40 | incremental = false 41 | 42 | # The profile that 'cargo dist' will build with 43 | [profile.dist] 44 | inherits = "release" 45 | strip = "symbols" 46 | 47 | [profile.release.build-override] 48 | opt-level = 0 49 | codegen-units = 1024 50 | debug = false 51 | debug-assertions = false 52 | overflow-checks = false 53 | incremental = false 54 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /SPEC.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Zarc is a file format specified on top of the Zstandard Compression Format aka RFC8878. 4 | 5 | Zarc is a toy file format: it has received no review, only has a single implementation, and is not considered mature enough for serious use. 6 | 7 | Zarc is intended to be fairly simple to parse given a zstd decoder, while providing some interesting features, like: 8 | 9 | - always-on strong hashing and integrity verification; 10 | - full support for extended attributes (xattrs); 11 | - high resolution timestamps; 12 | - user-provided metadata at both archive and file level; 13 | - basic deduplication via content-addressing; 14 | - minimal uncompressed overhead; 15 | - appending files is reasonably cheap; 16 | - capable of handling archives larger than memory, or even archives containing more file metadata than would fit in memory (allowed by spec but not yet implemented). 17 | 18 | ## Version 19 | 20 | The version of the Zarc format is 1. 21 | 22 | The version of this spec is 1.0.0. 23 | 24 | **CAUTION:** the format is currently unstable and changes without version bump or notice. 25 | 26 | ## Magic 27 | 28 | The Zarc magic number is 0xDCAA65 in little-endian. 29 | 30 | It is the string `Zarc` *de*coded as Base64: 31 | 32 | ```console 33 | $ echo -n 'Zarc' | base64 -d | hexyl -p 34 | 65 aa dc 35 | ``` 36 | 37 | ## Zstd Format 38 | 39 | Here's a quick recap of the zstd format: 40 | 41 | - The format is a sequence of frames 42 | - Frames can either be Zstandard frames or Skippable frames 43 | - A standard zstd decoder will skip Skippable frames 44 | - Numbers are little-endian 45 | - Zstandard frames: 46 | - `[magic][header][blocks...][checksum]` 47 | - Magic is 0xFD2FB528 48 | - Header is 2-14 bytes, described in spec 49 | - Checksum is optional, last 4 bytes of xxhash64 50 | - Blocks are: 51 | - `[last][type][size][data]` 52 | - Last is 1 bit (boolean) 53 | - Type is 2 bits (enum) 54 | - Size is 21 bits, unsigned 55 | - Type describes: 56 | 0. Raw block (`data` is uncompressed, verbatim) 57 | 1. RLE block (`data` is a single byte, `size` is how many times it's repeated verbatim) 58 | 2. Compressed block 59 | 3. Reserved 60 | - Skippable frames: 61 | - `[magic][size][data]` 62 | - Magic is 0x184D2A5? where the last nibble **?** is any value from 0x0 to 0xF 63 | - Size is unsigned 32-bit int 64 | 65 | Further reading: 66 | - Informational RFC8878: 67 | - Most up-to-date spec: 68 | 69 | ## Zarc Format 70 | 71 | A Zarc is a defined sequence of zstd frames: 72 | 73 | - one **[Header](#zarc-header)**, a Skippable frame (0x0), used to identify a file as a Zarc 74 | - zero or more **Zstandard frames**, one for each file (modulo deduplication and special files) 75 | - one **[Directory](#zarc-directory)**, a Zstandard (compressed) frame, which contains file list and metadata 76 | - one **[Trailer](#zarc-trailer)**, a Skippable frame (0xF), used to find and check the Directory 77 | 78 | Zarcs are explicitly files: this is not a format suitable for streaming from a network location, 79 | unless random access / seek semantics are available (e.g. using the `Range` HTTP header). 80 | 81 | # Zarc Header 82 | 83 | This is a Skippable frame with magic nibble = 0. 84 | 85 | It contains: 86 | 87 | | **`Magic`** | **`Zarc Version`** | 88 | |:-----------:|:------------------:| 89 | | 3 bytes | 1 byte | 90 | | `65 aa dc` | `01` | 91 | 92 | This combined with the Skippable frame header, makes a Zarc file always start with the same 12 bytes: 93 | 94 | | **`Zstd Magic`** | **`Frame Size`** | **`Zarc Magic`** | **`Zarc File Version`** | 95 | |:----------------:|:----------------:|:----------------:|:-----------------------:| 96 | | 4 bytes | 4 bytes | 3 bytes | 1 byte | 97 | | `50 2a 4d 18` | `04 00 00 00` | `65 aa dc` | `01` | 98 | 99 | # Zarc Directory 100 | 101 | This is a Zstandard frame. 102 | 103 | It contains a stream of [CBOR](https://cbor.io)-encoded Elements, which are framed with a Kind and a length. 104 | 105 | | **`Kind`** | **`Length of Payload`** | _reserved_ | **`Payload`** | 106 | |:----------:|:-----------------------:|:----------:|:-------------:| 107 | | LE U8 | LE U16 | 1 byte | CBOR | 108 | 109 | Element Kinds are described below, along with their integer and CBOR payload structure. 110 | Elements of a same Kind are NOT required to be next to each other. 111 | Order is insignificant unless stated. 112 | 113 | Implementations MUST ignore Element Kinds they do not recognise. 114 | 115 | > **Non-normative note:** the _reserved_ byte is there mainly for possible expansion of the payload length. 116 | > 64K per element looks pretty large from here, but who knows what the future brings. 117 | 118 | ## Kind `1`: Editions 119 | 120 | _Map: unsigned integer keys -> CBOR._ 121 | 122 | Editions record core metadata about an archive, and also provide a mechanism for retaining the metadata of _previous versions_ of the archive, if it gets appended or edited. 123 | At least one edition must be present. 124 | 125 | ### Key `0`: Number 126 | 127 | _Non-zero unsigned integer._ **Mandatory.** 128 | 129 | The number of editions in a file is technically unlimited, but as of this version MUST be less than 65536. 130 | For practical purposes implementations SHOULD warn when creating more than 1000 editions, and MAY set that limit lower. 131 | 132 | Creating an edition involves incrementing the edition number, so the latest edition of the file is `max(edition list)`. 133 | 134 | This is used in Frame and File types as the `Edition` field. 135 | 136 | ### Key `1`: Written At 137 | 138 | _Timestamp or DateTime._ **Mandatory.** 139 | 140 | When this version was created. 141 | 142 | ### Key `2`: Digest Type 143 | 144 | _8-bit unsigned integer._ **Mandatory.** 145 | 146 | Same as the Trailer value, the digest type in use by that edition. 147 | 148 | ### Key `10`: User Metadata 149 | 150 | _Map: text string keys -> boolean or text or byte string._ **Optional.** 151 | 152 | User metadata of this edition. 153 | 154 | ## Kind `2`: Files 155 | 156 | _Map: unsigned integer keys -> CBOR._ 157 | 158 | ### Key `0`: Edition 159 | 160 | _Unsigned integer._ **Mandatory.** 161 | 162 | The edition this file entry was added to the archive. 163 | 164 | ### Key `1`: Name 165 | 166 | _Array of: text string or byte string._ **Mandatory.** 167 | 168 | If items are of the UTF-8 _Text string_ CBOR type, then they represent UTF-8-encoded Unicode pathname components. 169 | If items are of the _Byte string_ CBOR type instead, then they represent raw (non-Unicode) pathname components. 170 | 171 | Windows implementations MUST convert raw UTF-16 to UTF-8 during encoding, and from raw bytes to UTF-8 during decoding, and replace invalid wide character sequences with the Unicode REPLACEMENT CHARACTER. 172 | 173 | Non-Unicode pathnames may not be supported on all filesystems / operating systems. 174 | Implementations SHOULD strongly prefer UTF-8, and SHOULD warn when paths do not convert cleanly. 175 | 176 | Zarc makes no effort to restrict valid pathnames. 177 | The exception is that the components `.` and `..` are disallowed. 178 | A Zarc decoder MUST reject such pathnames. 179 | 180 | Pathnames are encoded in components. 181 | That is, the Unix pathname `foo/bar/baz.qux` and the Windows pathname `foo\bar\baz.qux` are encoded the same way. 182 | Pathnames can mix UTF-8 and non-Unicode components. 183 | 184 | Pathnames do not include drive letters or fileshare prefixes. 185 | (It is not possible to construct a Zarc archive spanning multiple Windows drives.) 186 | 187 | Pathnames do not encode whether a path is absolute or relative: all paths inside a Zarc archive are relative to an arbitrary root provided by the user when packing or unpacking. 188 | 189 | It is possible to have several identical pathname in a Zarc Directory. 190 | Implementations SHOULD provide an option to use the first or last or other selection criteria, but MUST default to preferring the last of a set of identical pathnames. 191 | 192 | ### Key `2`: Frame Digest 193 | 194 | _Byte string._ **Conditional.** 195 | 196 | The hash of a frame of content. 197 | This must be the same value as the `h` field of a **Framelist** item. 198 | 199 | Multiple files can reference the same content frame: this provides file-level deduplication. 200 | 201 | The algorithm of the hash is described by the **Hash Algorithm** field above. 202 | 203 | This may be absent for some special files (described later). 204 | 205 | ### Key `3`: POSIX File Mode 206 | 207 | _Unsigned integer._ **Optional.** 208 | 209 | Unix mode bits as an unsigned 32-bit integer. 210 | 211 | If this is not set, implementations SHOULD use a default mode as appropriate. 212 | 213 | ### Key `4`: POSIX File Owner 214 | 215 | _Tuple (encoded as an array)._ **Optional.** 216 | 217 | The user that owns this file. 218 | This is a structure with at least one of the following types of data: 219 | 220 | - _Unsigned integer._ the user ID 221 | - _Text string._ the user name as UTF-8 (or ASCII) 222 | 223 | There SHOULD NOT be more than one unsigned integer; if there are, the last value wins out. 224 | 225 | Implementations SHOULD prefer the name to the ID if there is an existing user named thus on the system with a different ID. 226 | Implementations SHOULD prefer to encode IDs as 32-bit unsigned integers, but MUST accept 8-bit, 16-bit, and 64-bit unsigned integers as well. 227 | 228 | ### Key `5`: POSIX File Group 229 | 230 | _Tuple (encoded as an array)._ **Optional.** 231 | 232 | The group that owns this file. 233 | This is a structure with at least one of the following types of data: 234 | 235 | - _Unsigned integer._ the group ID 236 | - _Text string._ the group name as UTF-8 (or ASCII) 237 | 238 | Implementations SHOULD prefer the name to the ID if there is an existing group named thus on the system with a different ID. 239 | 240 | ### Key `6`: File Timestamps 241 | 242 | _Map: unsigned integer keys -> timestamp._ **Optional.** 243 | 244 | Timestamps associated with this file. Any of: 245 | 246 | - `1`: birth time or file creation time 247 | - `2`: mtime or file modification time 248 | - `3`: atime or file access time — this SHOULD be the access time prior to the Zarc tool reading the file 249 | 250 | Timestamps can be stored in either: 251 | - [RFC3339 in _text string_ with semantic tag `0`](https://www.rfc-editor.org/rfc/rfc8949.html#name-standard-date-time-string) 252 | - [seconds from epoch as unsigned or negative integer, or binary64 floating point, with semantic tag `1`](https://www.rfc-editor.org/rfc/rfc8949.html#name-epoch-based-date-time) 253 | 254 | > **Non-normative implementation note:** the Zarc reference implementation _accepts_ all formats for a timestamp, but always _writes_ RFC3339 text string datetimes. 255 | 256 | ### Key `7`: Special File Types 257 | 258 | _Pair: [unsigned integer, (pathname)?]._ **Optional.** 259 | 260 | This is a structure which encodes special file types. 261 | 262 | The mandatory first array item is the type of the special file. 263 | Implementations SHOULD ignore unknown or impractical special types. 264 | 265 | - `1` — **directory entry.** 266 | May be used to encode metadata or (x)attributes against a directory. 267 | 268 | - `10` — **unspecified symlink.** 269 | MUST be followed by the pathname of the link target. 270 | - `11` — **internal symlink.** 271 | MUST be followed by the pathname of another file contained in this Zarc. 272 | - `12` — **external absolute symlink.** 273 | MUST be followed by the absolute pathname of a file to symlink to. 274 | Implementations MAY reject this (e.g. for security reasons). 275 | - `13` — **external relative symlink.** 276 | MUST be followed by the relative pathname of a file to symlink to. 277 | Implementations MAY reject this (e.g. for security reasons). 278 | 279 | - `20` — **unspecified hardlink.** 280 | MUST be followed by the pathname of another file contained in this Zarc. 281 | - `21` — **internal hardlink.** 282 | MUST be followed by the pathname of another file contained in this Zarc. 283 | - `22` — **external hardlink.** 284 | MUST be followed by the absolute pathname of a file to hardlink to. 285 | Implementations MAY reject this (e.g. for security reasons). 286 | 287 | Pathnames (as the conditional second array item) are either: 288 | - _Byte string_ or _Text string_. An absolute or relative full pathname with platform-specific separators; 289 | - _Array(byte or text string)._ An array of components as for Filemap Names, except that `.` and `..` components are allowed. 290 | 291 | The second form is preferred, for portability. 292 | 293 | ### Key `10`: File User Metadata 294 | 295 | _Map: text string keys -> boolean or text or byte string._ **Optional.** 296 | 297 | Arbitrary user-provided metadata for this file entry. 298 | 299 | ### Key `11`: File Attributes 300 | 301 | _Map: text string keys -> boolean or text or byte string._ **Optional.** 302 | 303 | A map of values (typically boolean flags) which keys SHOULD correspond to [file attributes](https://en.wikipedia.org/wiki/Chattr). 304 | 305 | Implementations MAY ignore attributes if obtaining or setting them is impossible or impractical. 306 | 307 | Attribute keys MUST either have a prefix signifying the system they apply to: 308 | 309 | - `win32.` for Windows 310 | - `linux.` for Linux 311 | - `bsd.` for BSDs, including MacOS 312 | - `_` for implementation-defined prefixes (e.g. `_ncc1701.`) 313 | 314 | OR be one of these defined unprefixed values: 315 | 316 | - `append-only` 317 | - `compressed` 318 | - `immutable` 319 | - `read-only` 320 | 321 | > **Note:** attributes are metadata only, they have no bearing on the Zarc file format semantics. 322 | 323 | ### Key `12`: Extended File Attributes 324 | 325 | _Map: text string keys -> boolean or text or byte string._ **Optional.** 326 | 327 | A map of extended attributes (`xattr`). 328 | 329 | Zarc imposes no restriction on the format of attribute names, nor on the content or length of attribute values. 330 | 331 | Implementations MAY ignore extended attributes if obtaining or setting them is impossible or impractical. 332 | On Linux, implementations MAY assume a `user` namespace for unprefixed keys. 333 | 334 | ## Kind `3`: Frames 335 | 336 | _Map: unsigned integer keys -> CBOR._ **Mandatory, collect-up.** 337 | 338 | Structures of this type SHOULD appear in offset order. 339 | 340 | ### Key `0`: Edition Added 341 | 342 | _Unsigned integer._ **Mandatory.** 343 | 344 | The edition this frame was added to the archive. 345 | 346 | ### Key `1`: Frame Offset 347 | 348 | _Integer._ **Mandatory.** 349 | 350 | The offset in bytes from the start of the Zarc file to the first byte of the Zstandard frame header this entry describes. 351 | 352 | There MUST NOT be duplicate Frame Offsets in the Frame list. 353 | 354 | ### Key `2`: Frame Content Digest 355 | 356 | _Byte string._ **Mandatory.** 357 | 358 | The digest of the frame contents. 359 | 360 | Implementations MUST check that frame contents match this digest (unless "insecure" mode is used). 361 | 362 | ### Key `3`: Framed Size 363 | 364 | _Integer._ **Mandatory.** 365 | 366 | The size of the entire frame in bytes. 367 | 368 | This may be used to request that range of bytes from a remote source without reading too far or incrementally via block information. 369 | 370 | ### Key `4`: Uncompressed Content Length 371 | 372 | _Integer._ **Mandatory.** 373 | 374 | The length of the uncompressed content of the frame in bytes. 375 | 376 | This is a complement to the Frame Content Size field available on the Zstandard Frame directly, as that field can be absent depending on zstd parameters. 377 | 378 | This can be used to e.g.: 379 | - avoid unpacking frames which exceed available memory or storage; 380 | - preallocate storage before unpacking; 381 | - estimate the uncompressed total size of the archive. 382 | 383 | # Zarc Trailer 384 | 385 | This is a Skippable frame with magic nibble = F. 386 | 387 | It contains: 388 | 389 | | _reserved_ | **`Digest Type`** | **`Digest`**| **`Digest Type`** | 390 | |:----------:|:-----------------:|:-----------:|:-----------------:| 391 | | 1 byte | 1 byte | _n_ bytes | 1 byte | 392 | 393 | | **`Directory Offset`** | **`Uncompressed Length`** | 394 | |:-------------------------:|:-------------------------:| 395 | | 8 bytes | 8 bytes | 396 | 397 | | **`Check Byte`** | **`Zarc Version`** | **`Magic`** | 398 | |:----------------:|:------------------:|:-----------:| 399 | | 1 byte | 1 byte | 3 bytes | 400 | | | `01` | `65 aa dc` | 401 | 402 | > **Non-normative implementation note:** This looks upside down, because you can read it from the end. 403 | > The last three bytes of a Zarc file will always be `65 aa dc`, _preceded_ by the version, _preceded_ by the check byte, etc. 404 | > The critical fixed-width fields are all at the end, so they can be read by seeking to a fixed offset from the end. 405 | > The `Digest Type` is then used to derive the length of the `Digest` field. 406 | > It's also duplicated on the other side of the `Digest`, so that the trailer can be read from both sides. 407 | > Going 8 bytes further back from the 'start' of the trailer will yield the Zstd Skippable frame header if you so wish to check that. 408 | 409 | ### `Directory Offset` 410 | 411 | _Signed 64-bit integer._ 412 | 413 | This is EITHER: 414 | 415 | - a **positive** value, the offset from the **start** of the file to the first byte of the Zstandard frame containing the Zarc Directory. 416 | - a **negative** value, the offset from the **end** of the file to the first byte of the Zstandard frame containing the Zarc Directory. 417 | 418 | ### `Uncompressed Length` 419 | 420 | This is the uncompressed length of the Zarc Directory structure. 421 | 422 | This may be used to decide whether to decompress the directory in memory or stream it. 423 | 424 | ### `Digest Type` 425 | 426 | Defines the algorithm used for computing digests, as well as the length of the digest fields: 427 | 428 | - `0`: not used. This value must not appear. 429 | - `1`: [BLAKE3](https://github.com/BLAKE3-team/BLAKE3) hash function, 32-byte digests. 430 | 431 | ### `Check Byte` 432 | 433 | This is the result of XOR'ing every other byte of the trailer together. 434 | 435 | It can be used as a quick check for corruption. 436 | -------------------------------------------------------------------------------- /crates/ozarc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ozarc" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | authors = ["Félix Saparelli "] 7 | license = "Apache-2.0 OR MIT" 8 | description = "Zstandard implementation in pure Rust" 9 | keywords = ["archive", "file-format", "zstd"] 10 | categories = ["encoding", "compression"] 11 | 12 | documentation = "https://github.com/passcod/zarc" 13 | repository = "https://github.com/passcod/zarc" 14 | 15 | [dependencies] 16 | deku = { version = "0.16.0", features = ["logging"], git = "https://github.com/sharksforarms/deku" } 17 | log = "0.4.20" 18 | tracing = "0.1.40" 19 | 20 | -------------------------------------------------------------------------------- /crates/ozarc/src/framing.rs: -------------------------------------------------------------------------------- 1 | //! Zstd file format parsing types. 2 | //! 3 | //! [Spec (Informational RFC8878)](https://datatracker.ietf.org/doc/html/rfc8878) 4 | //! 5 | //! Here's a quick recap of the zstd format, full specification available at link above: 6 | //! 7 | //! - The format is a sequence of frames 8 | //! - Frames can either be [Zstandard frames](ZstandardFrame) or [Skippable frames](SkippableFrame) 9 | //! - A standard zstd decoder will skip Skippable frames 10 | //! - Numbers are little-endian 11 | //! - Zstandard frames: 12 | //! - `[magic][header][blocks...][checksum]` 13 | //! - Magic is 0xFD2FB528 14 | //! - [Header](ZstandardFrameDescriptor) is 2-14 bytes, described in spec above 15 | //! - Checksum is optional, last 4 bytes of xxhash64 16 | //! - [Blocks](ZstandardBlock) are: 17 | //! - `[last][type][size][data]` 18 | //! - Last is 1 bit (boolean) 19 | //! - Type is 2 bits (enum) 20 | //! - Size is 21 bits, unsigned 21 | //! - [Type](ZstandardBlockType) describes: 22 | //! 0. Raw block (`data` is uncompressed, verbatim) 23 | //! 1. RLE block (`data` is a single byte, `size` is how many times it's repeated verbatim) 24 | //! 2. Compressed block 25 | //! 3. Reserved 26 | //! - Skippable frames: 27 | //! - `[magic][size][data]` 28 | //! - Magic is 0x184D2A5? where the last nibble **?** is any value from 0 to F 29 | //! - Size is unsigned 32-bit int 30 | 31 | use deku::prelude::*; 32 | 33 | /// Magic number for a [Skippable Frame](SkippableFrame). 34 | /// 35 | /// This is only bytes 1-3 of the magic, and the first byte is any value from 0x50 to 0x5F. 36 | pub const SKIPPABLE_FRAME_MAGIC: &[u8] = b"\x2A\x4D\x18"; 37 | 38 | /// Magic number for a [Zstandard Frame](ZstandardFrame). 39 | pub const ZSTANDARD_FRAME_MAGIC: &[u8] = b"\x28\xB5\x2F\xFD"; 40 | 41 | /// The overhead of a [Skippable Frame](SkippableFrame) in bytes. 42 | /// 43 | /// This is the size of the magic and size fields. 44 | pub const SKIPPABLE_FRAME_OVERHEAD: usize = 8; 45 | 46 | /// A "Skippable" frame. 47 | /// 48 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-skippable-frames) 49 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 50 | #[deku(endian = "little")] 51 | pub struct SkippableFrame { 52 | #[deku(bytes = "4", assert = "SkippableFrame::valid_magic(magic)")] 53 | magic: u32, 54 | 55 | #[deku(bytes = "4")] 56 | size: u32, 57 | 58 | /// The user data contained in the frame. 59 | #[deku(count = "size")] 60 | pub data: Vec, 61 | } 62 | 63 | impl SkippableFrame { 64 | fn valid_magic(magic: &u32) -> bool { 65 | let magic_bytes = magic.to_le_bytes(); 66 | magic_bytes[0] >= 0x50 67 | && magic_bytes[0] <= 0x5F 68 | && &magic_bytes[1..4] == SKIPPABLE_FRAME_MAGIC 69 | } 70 | 71 | /// Create a new skippable frame. 72 | /// 73 | /// Panics if the nibble is greater than 15. 74 | pub fn new(nibble: u8, data: Vec) -> Self { 75 | assert!( 76 | nibble < 16, 77 | "skippable frame nibble must be between 0 and 15" 78 | ); 79 | Self { 80 | magic: u32::from_le_bytes([0x50 + nibble, 0x2A, 0x4D, 0x18]), 81 | size: data 82 | .len() 83 | .try_into() 84 | .expect("skippable frame data is too long"), 85 | data, 86 | } 87 | } 88 | 89 | /// The magic nibble of this frame. 90 | pub fn nibble(&self) -> u8 { 91 | (self.magic.to_le_bytes()[0] - 0x50) & 0x0F 92 | } 93 | 94 | /// The length of the frame's content. 95 | pub fn size(&self) -> usize { 96 | self.size as usize 97 | } 98 | } 99 | 100 | /// A Zstandard Frame header. 101 | /// 102 | /// See [`ZstandardFrameHeader`] and [`ZstandardBlock`] to read manually (without loading it all in 103 | /// memory at once). 104 | /// 105 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-zstandard-frames) 106 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 107 | #[deku(endian = "little")] 108 | pub struct ZstandardFrame { 109 | /// Header. 110 | pub header: ZstandardFrameHeader, 111 | 112 | /// Blocks. 113 | /// 114 | /// Those are the actual content of the frame. 115 | #[deku(until = "|b: &ZstandardBlock| b.header.last")] 116 | pub blocks: Vec, 117 | 118 | /// Optional 32-bit checksum. 119 | /// 120 | /// The lower 4 bytes of the [xxhash64](https://cyan4973.github.io/xxHash/) digested from the 121 | /// original content and a seed of zero. 122 | /// 123 | /// Only present if [`ZstandardFrameDescriptor::checksum`] is set. 124 | #[deku(bytes = 4, cond = "header.frame_descriptor.checksum")] 125 | pub checksum: Option, 126 | } 127 | 128 | /// A Zstandard Frame header. 129 | /// 130 | /// This doesn't include the blocks and checksum, so you need to do your own accounting and parse 131 | /// the blocks until the last, then read the checksum if it's present. See [`ZstandardFrame`] for 132 | /// an easier interface, at the cost of loading all the blocks in memory. 133 | /// 134 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-zstandard-frames) 135 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 136 | #[deku( 137 | magic = b"\x28\xB5\x2F\xFD", 138 | endian = "endian", 139 | ctx = "endian: deku::ctx::Endian", 140 | ctx_default = "deku::ctx::Endian::Little" 141 | )] 142 | pub struct ZstandardFrameHeader { 143 | /// The frame descriptor. 144 | /// 145 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.1) 146 | /// 147 | /// Describes what other fields are present in the frame header. 148 | pub frame_descriptor: ZstandardFrameDescriptor, 149 | 150 | /// Minimum memory needed to decode the frame. 151 | /// 152 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor) 153 | #[deku(bytes = 1, cond = "!frame_descriptor.single_segment")] 154 | pub window_descriptor: Option, 155 | 156 | /// Dictionary ID. 157 | /// 158 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.3) 159 | /// 160 | /// See [`ZstandardFrame::dictionary_id()`] for the value as an integer. 161 | #[deku(count = "frame_descriptor.did_length()")] 162 | pub did: Vec, 163 | 164 | /// Original (uncompressed) size. 165 | /// 166 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-frame_content_size) 167 | /// 168 | /// This field is optional. 169 | /// 170 | /// This needs to be interpreted before it can be used. See [`ZstandardFrame::size()`]. 171 | #[deku(count = "frame_descriptor.fcs_length()")] 172 | pub frame_content_size: Vec, 173 | } 174 | 175 | impl ZstandardFrameHeader { 176 | /// The uncompressed length of the frame's content in bytes. 177 | pub fn uncompressed_size(&self) -> u64 { 178 | match self.frame_descriptor.fcs_length() { 179 | 0 => 0, 180 | 1 => u64::from(self.frame_content_size[0]), 181 | 2 => { 182 | u64::from(u16::from_le_bytes([ 183 | self.frame_content_size[0], 184 | self.frame_content_size[1], 185 | ])) + 256 186 | } 187 | 4 => u64::from(u32::from_le_bytes([ 188 | self.frame_content_size[0], 189 | self.frame_content_size[1], 190 | self.frame_content_size[2], 191 | self.frame_content_size[3], 192 | ])), 193 | 8 => u64::from_le_bytes([ 194 | self.frame_content_size[0], 195 | self.frame_content_size[1], 196 | self.frame_content_size[2], 197 | self.frame_content_size[3], 198 | self.frame_content_size[4], 199 | self.frame_content_size[5], 200 | self.frame_content_size[6], 201 | self.frame_content_size[7], 202 | ]), 203 | _ => unreachable!(), 204 | } 205 | } 206 | 207 | /// The dictionary ID as an integer. 208 | pub fn dictionary_id(&self) -> u32 { 209 | self.did.iter().fold(0, |acc, &x| acc << 8 | x as u32) 210 | } 211 | } 212 | 213 | /// Frame descriptor for a [Zstandard Frame](ZstandardFrame). 214 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 215 | #[deku(endian = "endian", ctx = "endian: deku::ctx::Endian")] 216 | pub struct ZstandardFrameDescriptor { 217 | /// [Frame content size (FCS)](ZstandardFrame::frame_content_size) field size flag. 218 | /// 219 | /// This is _not_ the size of the FCS field itself, but a flag that needs to be interpreted in 220 | /// conjunction with [`single_segment`](ZstandardFrameDescriptor::single_segment) to determine 221 | /// the size of the FCS field. 222 | /// 223 | /// The [`ZstandardFrameDescriptor::fcs_length()`] method performs this calculation. 224 | #[deku(bits = 2)] 225 | pub fcs_size: u8, 226 | 227 | /// If this flag is set, data must be regenerated within a single continuous memory segment. 228 | /// 229 | /// This is also used in the calculation for [`ZstandardFrame::frame_content_size`]'s length. 230 | #[deku(bits = 1)] 231 | pub single_segment: bool, 232 | 233 | /// Unused. Always false. 234 | #[deku(bits = 1)] 235 | pub unused_bit: bool, 236 | 237 | /// Reserved. Always false. 238 | #[deku(bits = 1)] 239 | pub reserved_bit: bool, 240 | 241 | /// Whether the frame has a [checksum](ZstandardFrame::checksum). 242 | #[deku(bits = 1)] 243 | pub checksum: bool, 244 | 245 | /// [Dictionary ID (DID)](ZstandardFrame::did) field size flag. 246 | /// 247 | /// This is _not_ the size of the DID field itself, but a flag that needs to be interpreted to 248 | /// determine the size of the DID field. 249 | /// 250 | /// The [`ZstandardFrameDescriptor::did_length()`] method performs this calculation. 251 | #[deku(bits = 2)] 252 | pub did_size: u8, 253 | } 254 | 255 | impl ZstandardFrameDescriptor { 256 | /// The length in bytes of the [DID](ZstandardFrame::did) field. 257 | pub fn did_length(&self) -> usize { 258 | match self.did_size { 259 | 0 => 0, 260 | 1 => 1, 261 | 2 => 2, 262 | 3 => 4, 263 | _ => unreachable!(), 264 | } 265 | } 266 | 267 | /// The length in bytes of the [FCS](ZstandardFrame::frame_content_size) field. 268 | pub fn fcs_length(&self) -> usize { 269 | match self.fcs_size { 270 | 0 if self.single_segment => 1, 271 | 0 => 0, 272 | 1 => 2, 273 | 2 => 4, 274 | 3 => 8, 275 | _ => unreachable!(), 276 | } 277 | } 278 | } 279 | 280 | /// A Zstandard block. 281 | /// 282 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-blocks) 283 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 284 | #[deku( 285 | endian = "endian", 286 | ctx = "endian: deku::ctx::Endian", 287 | ctx_default = "deku::ctx::Endian::Little" 288 | )] 289 | pub struct ZstandardBlock { 290 | /// The block header. 291 | pub header: ZstandardBlockHeader, 292 | 293 | /// The block data. 294 | #[deku(count = "header.actual_size()")] 295 | pub data: Vec, 296 | } 297 | 298 | /// The header for a Zstandard block. 299 | /// 300 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-blocks) 301 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 302 | #[deku( 303 | endian = "endian", 304 | ctx = "endian: deku::ctx::Endian", 305 | ctx_default = "deku::ctx::Endian::Little" 306 | )] 307 | pub struct ZstandardBlockHeader { 308 | #[deku(bits = "5")] 309 | size_low: u8, 310 | 311 | /// The block type. 312 | pub block_type: ZstandardBlockType, 313 | 314 | /// Whether this is the last block in the frame. 315 | #[deku(bits = "1")] 316 | pub last: bool, 317 | 318 | #[deku(bits = "16")] 319 | size_high: u16, 320 | } 321 | 322 | impl ZstandardBlockHeader { 323 | /// Create a new Zstandard block header. 324 | pub fn new(block_type: ZstandardBlockType, last: bool, size: u32) -> Self { 325 | assert!(size < 2_u32.pow(24)); 326 | 327 | let [a, b, c, d] = u32::to_be_bytes(size << 3); 328 | let size_high = u16::from_be_bytes([b, c]); 329 | let size_low = d >> 3; 330 | tracing::trace!( 331 | field = %format!("{a:08b} {b:08b} {c:08b} {d:08b}"), 332 | high = %format!("{size_high:016b}"), 333 | low = %format!("{size_low:08b}"), 334 | "block header size bit wrangling (write)" 335 | ); 336 | 337 | Self { 338 | size_low, 339 | block_type, 340 | last, 341 | size_high, 342 | } 343 | } 344 | 345 | fn size(&self) -> u32 { 346 | let [a, b] = u16::to_be_bytes(self.size_high); 347 | let c = self.size_low << 3; 348 | let real_size = u32::from_be_bytes([0, a, b, c]) >> 3; 349 | tracing::trace!( 350 | high = %format!("{:016b}", self.size_high), 351 | low = %format!("{:08b}", self.size_low), 352 | real_dec = %real_size, 353 | real_hex = %format!("{real_size:02x?}"), 354 | "block header size bit wrangling (read)" 355 | ); 356 | 357 | real_size 358 | } 359 | 360 | /// If this is an RLE, how many times is the byte repeated? 361 | pub fn rle_count(&self) -> Option { 362 | if self.block_type == ZstandardBlockType::Rle { 363 | Some(self.size()) 364 | } else { 365 | None 366 | } 367 | } 368 | 369 | /// How many bytes of data are in this block. 370 | pub fn actual_size(&self) -> u32 { 371 | match self.block_type { 372 | ZstandardBlockType::Raw | ZstandardBlockType::Compressed => self.size(), 373 | ZstandardBlockType::Rle => 1, 374 | ZstandardBlockType::Reserved => panic!("corrupt zstd: reserved block type"), 375 | } 376 | } 377 | } 378 | 379 | /// The type of a Zstandard block. 380 | /// 381 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-block_type) 382 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 383 | #[deku( 384 | endian = "endian", 385 | ctx = "endian: deku::ctx::Endian", 386 | type = "u8", 387 | bits = "2" 388 | )] 389 | pub enum ZstandardBlockType { 390 | /// An uncompressed block. 391 | #[deku(id = "0b00")] // = 0 392 | Raw, 393 | 394 | /// A block with a single byte repeated many times. 395 | #[deku(id = "0b01")] // = 1 396 | Rle, 397 | 398 | /// A compressed block. 399 | #[deku(id = "0b10")] // = 2 400 | Compressed, 401 | 402 | /// Reserved. 403 | #[deku(id = "0b11")] // = 3 404 | Reserved, 405 | } 406 | -------------------------------------------------------------------------------- /crates/ozarc/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Zstd file format parser. 2 | //! 3 | //! This crate has the ambition of becoming a Zstandard implementation in pure Rust. For now, it 4 | //! only implements types for encoding and decoding the framing of the file format. 5 | 6 | pub mod framing; 7 | -------------------------------------------------------------------------------- /crates/zarc-cli/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zarc-cli" 3 | version = "0.0.1" 4 | edition = "2021" 5 | 6 | authors = ["Félix Saparelli "] 7 | license = "Apache-2.0 OR MIT" 8 | description = "Archive format based on Zstd: CLI tool" 9 | keywords = ["archive", "file-format", "zstd", "zarc"] 10 | categories = ["command-line-utilities"] 11 | 12 | documentation = "https://github.com/passcod/zarc" 13 | repository = "https://github.com/passcod/zarc" 14 | 15 | [package.metadata.wix] 16 | upgrade-guid = "96E0968E-C5AC-4BFF-81DE-948816F542C8" 17 | path-guid = "52718143-483B-4009-8A46-4270582DE95E" 18 | license = false 19 | eula = false 20 | 21 | [[bin]] 22 | name = "zarc" 23 | path = "src/main.rs" 24 | 25 | [dependencies] 26 | base64ct = { version = "1.6.0", features = ["std"] } 27 | blake3 = { version = "1.5.0", features = ["rayon"] } 28 | chrono = "0.4.31" 29 | clap = { version = "4.4.11", features = ["derive", "cargo", "wrap_help", "string"] } 30 | deku = { version = "0.16.0", features = ["logging"], git = "https://github.com/sharksforarms/deku" } 31 | miette = { version = "5.10.0", features = ["fancy"] } 32 | minicbor = { version = "0.20.0", features = ["std"] } 33 | ozarc = { version = "0.0.0", path = "../ozarc" } 34 | regex = "1.10.3" 35 | tracing = "0.1.40" 36 | tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] } 37 | umask = "2.1.0" 38 | walkdir = "2.4.0" 39 | zarc = { version = "0.0.0", path = "../zarc" } 40 | zstd-safe = { version = "7.0.0", features = ["experimental"] } 41 | 42 | [build-dependencies] 43 | bosion = "1.0.2" 44 | embed-resource = "2.4.0" 45 | -------------------------------------------------------------------------------- /crates/zarc-cli/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | embed_resource::compile("manifest.rc", embed_resource::NONE); 3 | bosion::gather(); 4 | 5 | if std::env::var("CARGO_FEATURE_EYRA").is_ok() { 6 | println!("cargo:rustc-link-arg=-nostartfiles"); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /crates/zarc-cli/manifest.rc: -------------------------------------------------------------------------------- 1 | #define RT_MANIFEST 24 2 | 1 RT_MANIFEST "zarc.exe.manifest" 3 | -------------------------------------------------------------------------------- /crates/zarc-cli/src/args.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, path::PathBuf}; 2 | 3 | use clap::{ArgAction, Parser, Subcommand, ValueHint}; 4 | 5 | use crate::{ 6 | // debug::DebugArgs, 7 | list_files::ListFilesArgs, 8 | pack::PackArgs, 9 | unpack::UnpackArgs, 10 | }; 11 | 12 | /// Zarc: a novel archive format and tool. 13 | /// 14 | /// Zarc is a file archive format that uses both Zstd compression and the Zstd file format. It is 15 | /// designed as a replacement for tar and zip rather than zstd, gzip, bzip2, or xz. This is the 16 | /// reference implementation. 17 | #[derive(Debug, Clone, Parser)] 18 | #[command( 19 | name = "zarc", 20 | bin_name = "zarc", 21 | author, 22 | version, 23 | infer_subcommands = true, 24 | after_help = "Want more detail? Try the long '--help' flag!", 25 | after_long_help = "Didn't expect this much output? Use the short '-h' flag to get short help." 26 | )] 27 | #[cfg_attr(debug_assertions, command(before_help = "⚠ DEBUG BUILD ⚠"))] 28 | pub struct Args { 29 | /// Set diagnostic log level 30 | /// 31 | /// This enables diagnostic logging, which is useful for investigating bugs or gaining more 32 | /// insight into Zarc encoding and decoding. Use multiple times to increase verbosity. 33 | /// 34 | /// Goes up to '-vvvv'. When submitting bug reports, default to a '-vvv' log level. 35 | /// 36 | /// You may want to use with '--log-file' to avoid polluting your terminal. 37 | /// 38 | /// If $RUST_LOG is set, this flag is ignored. 39 | #[arg( 40 | long, 41 | short, 42 | action = ArgAction::Count, 43 | num_args = 0, 44 | )] 45 | pub verbose: Option, 46 | 47 | /// Write diagnostic logs to a file 48 | /// 49 | /// This writes diagnostic logs to a file, instead of the terminal, in JSON format. If a log 50 | /// level was not already specified, this will set it to '-vvv'. 51 | /// 52 | /// If a path is not provided, the default is the working directory. Note that with 53 | /// '--ignore-nothing', the write events to the log will likely get picked up by Watchexec, 54 | /// causing a loop; prefer setting a path outside of the watched directory. 55 | /// 56 | /// If the path provided is a directory, a file will be created in that directory. The file name 57 | /// will be the current date and time, in the format 'zarc.YYYY-MM-DDTHH-MM-SSZ.log'. 58 | #[arg( 59 | long, 60 | num_args = 0..=1, 61 | default_missing_value = ".", 62 | value_hint = ValueHint::AnyPath, 63 | value_name = "PATH", 64 | )] 65 | pub log_file: Option, 66 | 67 | /// What to do 68 | #[command(subcommand)] 69 | pub action: Action, 70 | } 71 | 72 | #[derive(Debug, Clone, Subcommand)] 73 | pub enum Action { 74 | /// Pack files into a Zarc archive. 75 | Pack(PackArgs), 76 | 77 | /// Unpack files from Zarc archive. 78 | Unpack(UnpackArgs), 79 | 80 | /// Walk a Zarc and print filenames. 81 | ListFiles(ListFilesArgs), 82 | // /// Walk a Zarc and print detailed information about its structure. 83 | // Debug(DebugArgs), 84 | } 85 | 86 | #[test] 87 | fn verify_cli() { 88 | use clap::CommandFactory; 89 | Args::command().debug_assert() 90 | } 91 | -------------------------------------------------------------------------------- /crates/zarc-cli/src/list_files.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use clap::{Parser, ValueHint}; 4 | use regex::Regex; 5 | use tracing::info; 6 | use zarc::{decode::Decoder, directory::SpecialFileKind}; 7 | 8 | #[derive(Debug, Clone, Parser)] 9 | pub struct ListFilesArgs { 10 | /// Input file. 11 | #[arg( 12 | value_hint = ValueHint::AnyPath, 13 | value_name = "PATH", 14 | )] 15 | pub input: PathBuf, 16 | 17 | /// List only files. 18 | #[arg(long)] 19 | pub only_files: bool, 20 | 21 | /// Indicate filetypes with suffixes. 22 | /// 23 | /// Directories are marked with a '/' suffix, symlinks with `@`, hardlinks with `#`. 24 | #[arg(long)] 25 | pub decorate: bool, 26 | 27 | /// Filter files by name (with a regex). 28 | /// 29 | /// Can be given multiple times, and files will be matched if they match any of the regexes. 30 | #[arg(long, value_name = "REGEX")] 31 | pub filter: Vec, 32 | } 33 | 34 | pub(crate) fn list_files(args: ListFilesArgs) -> miette::Result<()> { 35 | info!("initialise decoder"); 36 | let mut zarc = Decoder::open(args.input)?; 37 | zarc.read_directory()?; 38 | let zarc = zarc; 39 | 40 | info!("list files"); 41 | for entry in zarc.files() { 42 | if args.only_files && entry.special.is_some() { 43 | continue; 44 | } 45 | 46 | let name = entry.name.to_path().display().to_string(); 47 | if !args.filter.is_empty() && !args.filter.iter().any(|filter| filter.is_match(&name)) { 48 | continue; 49 | } 50 | 51 | print!("{name}"); 52 | match entry.special.as_ref().and_then(|sp| sp.kind) { 53 | Some(SpecialFileKind::Directory) => print!("/"), 54 | Some(kind) if kind.is_symlink() => print!("@"), 55 | Some(kind) if kind.is_hardlink() => print!("#"), 56 | _ => (), 57 | } 58 | 59 | println!(); 60 | } 61 | 62 | Ok(()) 63 | } 64 | -------------------------------------------------------------------------------- /crates/zarc-cli/src/logs.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | env::var, 3 | fs::{metadata, File}, 4 | io::{Error, Result}, 5 | sync::Mutex, 6 | }; 7 | 8 | use tracing::info; 9 | 10 | use crate::args::Args; 11 | 12 | pub fn from_env() -> Result { 13 | if var("RUST_LOG").is_ok() { 14 | tracing_subscriber::fmt::try_init().map_err(Error::other)?; 15 | Ok(true) 16 | } else { 17 | Ok(false) 18 | } 19 | } 20 | 21 | pub fn from_args(args: &Args) -> Result<()> { 22 | let verbosity = args.verbose.unwrap_or(0); 23 | if verbosity > 0 { 24 | let log_file = if let Some(file) = &args.log_file { 25 | let is_dir = metadata(file).map_or(false, |info| info.is_dir()); 26 | let path = if is_dir { 27 | let filename = format!( 28 | "zarc.{}.log", 29 | chrono::Utc::now().format("%Y-%m-%dT%H-%M-%SZ") 30 | ); 31 | file.join(filename) 32 | } else { 33 | file.to_owned() 34 | }; 35 | 36 | // TODO: use tracing-appender instead 37 | Some(File::create(path)?) 38 | } else { 39 | None 40 | }; 41 | 42 | let mut builder = tracing_subscriber::fmt().with_env_filter(match verbosity { 43 | 0 => unreachable!("checked by if earlier"), 44 | 1 => "warn", 45 | 2 => "info", 46 | 3 => "debug", 47 | _ => "trace", 48 | }); 49 | 50 | if verbosity > 2 { 51 | use tracing_subscriber::fmt::format::FmtSpan; 52 | builder = builder.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE); 53 | } 54 | 55 | match if let Some(writer) = log_file { 56 | builder.json().with_writer(Mutex::new(writer)).try_init() 57 | } else if verbosity > 3 { 58 | builder.pretty().try_init() 59 | } else { 60 | builder.try_init() 61 | } { 62 | Ok(_) => info!("logging initialised"), 63 | Err(e) => eprintln!("Failed to initialise logging, continuing with none\n{e}"), 64 | } 65 | } 66 | 67 | Ok(()) 68 | } 69 | -------------------------------------------------------------------------------- /crates/zarc-cli/src/main.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::unwrap_used)] 2 | #![deny(rust_2018_idioms)] 3 | 4 | use clap::Parser; 5 | use miette::IntoDiagnostic; 6 | use tracing::{debug, warn}; 7 | 8 | use crate::args::Action; 9 | 10 | mod args; 11 | // mod debug; 12 | mod list_files; 13 | mod logs; 14 | mod pack; 15 | mod unpack; 16 | 17 | fn main() -> miette::Result<()> { 18 | let logs_on = logs::from_env().into_diagnostic()?; 19 | 20 | debug!("parsing arguments"); 21 | let args = args::Args::parse(); 22 | 23 | if logs_on { 24 | warn!("ignoring logging options from args"); 25 | } else { 26 | logs::from_args(&args).into_diagnostic()?; 27 | } 28 | 29 | debug!(?args, "got arguments"); 30 | 31 | match args.action { 32 | // Action::Debug(args) => debug::debug(args).into_diagnostic(), 33 | Action::ListFiles(args) => list_files::list_files(args), 34 | Action::Pack(args) => pack::pack(args).into_diagnostic(), 35 | Action::Unpack(args) => unpack::unpack(args), 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /crates/zarc-cli/src/pack.rs: -------------------------------------------------------------------------------- 1 | use std::{fs::File, path::PathBuf}; 2 | 3 | use base64ct::{Base64, Encoding}; 4 | use clap::{Parser, ValueHint}; 5 | use tracing::{debug, info}; 6 | use walkdir::WalkDir; 7 | use zarc::encode::{Encoder, ZstdParameter, ZstdStrategy}; 8 | 9 | #[derive(Debug, Clone, Parser)] 10 | pub struct PackArgs { 11 | /// Output file. 12 | #[arg(long, 13 | value_hint = ValueHint::AnyPath, 14 | value_name = "PATH", 15 | )] 16 | pub output: PathBuf, 17 | 18 | /// Paths to pack. 19 | #[arg( 20 | value_hint = ValueHint::AnyPath, 21 | value_name = "PATH", 22 | )] 23 | pub paths: Vec, 24 | 25 | /// Compression level. 26 | /// 27 | /// Can be negative (disables compression), or up to 20 (22 with an ultra strategy). 28 | #[arg( 29 | long, 30 | allow_negative_numbers = true, 31 | value_parser = clap::value_parser!(i32).range((-1<<17)..22), 32 | )] 33 | pub level: Option, 34 | 35 | /// Zstd parameter. 36 | /// 37 | /// Some values take a boolean, others take an unsigned integer, and the Strategy parameter 38 | /// takes a string. By default, ChecksumFlag is true, and all others are at zstd default. 39 | /// 40 | /// This is an advanced API and not all values will produce valid Zarcs, caution advised. 41 | #[arg( 42 | long, 43 | value_name = "PARAM=VALUE", 44 | value_parser = ParseZstdParam, 45 | )] 46 | pub zstd: Vec, 47 | 48 | /// Disable compression completely. 49 | /// 50 | /// This will write all file content uncompressed, not even going through zstd at all. 51 | /// 52 | /// Use this if you want to compress the entire Zarc externally. 53 | #[arg(long)] 54 | pub store: bool, 55 | 56 | /// Follow symlinks. 57 | /// 58 | /// This destroys symlinks inside the Zarc: when unpacked, files will be duplicated. 59 | /// 60 | /// You may want '--follow-external-symlinks' instead. 61 | #[arg(long, short = 'L')] 62 | pub follow_symlinks: bool, 63 | 64 | /// Follow external symlinks. 65 | /// 66 | /// By default, zarc stores all symlinks as symlinks. If symlinks point to content external to 67 | /// the Zarc, the symlink when unpacked may point somewhere different or break. 68 | /// 69 | /// With this flag, zarc will evaluate symlinks and store them as symlinks if they are relative 70 | /// symlinks that point to other files in the Zarc, but will follow symlinks (and flatten them 71 | /// into stored files) if they are absolute or relative but pointing "outside" of the Zarc. 72 | /// 73 | /// See also the variant '--follow-and-store-external-symlinks'. 74 | #[arg(long, hide = true)] 75 | pub follow_external_symlinks: bool, 76 | 77 | /// Follow external symlinks, but also store the symlink target. 78 | /// 79 | /// Like '--follow-external-symlinks', but stores the symlink's original external target path 80 | /// alongside the stored file content. When unpacking, Zarc can decide to restore external symlinks 81 | /// or to unpack the stored content. 82 | #[arg(long, hide = true)] 83 | pub follow_and_store_external_symlinks: bool, 84 | } 85 | 86 | #[derive(Clone)] 87 | struct ParseZstdParam; 88 | 89 | const ZSTD_PARAM_LIST_BOOL: [&str; 4] = [ 90 | "EnableLongDistanceMatching", 91 | "ContentSizeFlag", 92 | "ChecksumFlag", 93 | "DictIdFlag", 94 | ]; 95 | 96 | const ZSTD_PARAM_LIST_U32: [&str; 13] = [ 97 | "WindowLog", 98 | "HashLog", 99 | "ChainLog", 100 | "SearchLog", 101 | "MinMatch", 102 | "TargetLength", 103 | "LdmHashLog", 104 | "LdmMinMatch", 105 | "LdmBucketSizeLog", 106 | "LdmHashRateLog", 107 | "NbWorkers", 108 | "JobSize", 109 | "OverlapSizeLog", 110 | ]; 111 | 112 | const ZSTD_STRATEGY_NAMES: [&str; 9] = [ 113 | "fast", "dfast", "greedy", "lazy", "lazy2", "btlazy2", "btopt", "btultra", "btultra2", 114 | ]; 115 | 116 | impl clap::builder::TypedValueParser for ParseZstdParam { 117 | type Value = ZstdParameter; 118 | 119 | fn parse_ref( 120 | &self, 121 | cmd: &clap::Command, 122 | arg: Option<&clap::Arg>, 123 | value: &std::ffi::OsStr, 124 | ) -> Result { 125 | use clap::{builder::*, error::*}; 126 | let val = StringValueParser::new().parse_ref(cmd, arg, value)?; 127 | 128 | let (left, right) = val.split_once('=').ok_or_else(|| { 129 | let mut err = 130 | Error::raw(ErrorKind::ValueValidation, "expected a key=value pair").with_cmd(cmd); 131 | if let Some(arg) = arg { 132 | err.insert( 133 | ContextKind::InvalidArg, 134 | ContextValue::String(arg.to_string()), 135 | ); 136 | } 137 | err 138 | })?; 139 | 140 | match left { 141 | "Strategy" => Ok(ZstdParameter::Strategy(match right { 142 | "fast" => ZstdStrategy::ZSTD_fast, 143 | "dfast" => ZstdStrategy::ZSTD_dfast, 144 | "greedy" => ZstdStrategy::ZSTD_greedy, 145 | "lazy" => ZstdStrategy::ZSTD_lazy, 146 | "lazy2" => ZstdStrategy::ZSTD_lazy2, 147 | "btlazy2" => ZstdStrategy::ZSTD_btlazy2, 148 | "btopt" => ZstdStrategy::ZSTD_btopt, 149 | "btultra" => ZstdStrategy::ZSTD_btultra, 150 | "btultra2" => ZstdStrategy::ZSTD_btultra2, 151 | _ => { 152 | return Err(Error::raw( 153 | ErrorKind::ValueValidation, 154 | "unknown Strategy value", 155 | )) 156 | } 157 | })), 158 | flag if ZSTD_PARAM_LIST_BOOL.contains(&flag) => { 159 | let val: bool = 160 | BoolishValueParser::new().parse_ref(cmd, arg, std::ffi::OsStr::new(right))?; 161 | Ok(match flag { 162 | "EnableLongDistanceMatching" => ZstdParameter::EnableLongDistanceMatching(val), 163 | "ContentSizeFlag" => ZstdParameter::ContentSizeFlag(val), 164 | "ChecksumFlag" => ZstdParameter::ChecksumFlag(val), 165 | "DictIdFlag" => ZstdParameter::DictIdFlag(val), 166 | _ => unreachable!(), 167 | }) 168 | } 169 | tune if ZSTD_PARAM_LIST_U32.contains(&tune) => { 170 | let val: u64 = RangedU64ValueParser::new() 171 | .range(0..(u32::MAX as _)) 172 | .parse_ref(cmd, arg, std::ffi::OsStr::new(right))?; 173 | 174 | #[allow(clippy::unwrap_used)] // UNWRAP: checked by range 175 | let val = u32::try_from(val).unwrap(); 176 | 177 | Ok(match tune { 178 | "WindowLog" => ZstdParameter::WindowLog(val), 179 | "HashLog" => ZstdParameter::HashLog(val), 180 | "ChainLog" => ZstdParameter::ChainLog(val), 181 | "SearchLog" => ZstdParameter::SearchLog(val), 182 | "MinMatch" => ZstdParameter::MinMatch(val), 183 | "TargetLength" => ZstdParameter::TargetLength(val), 184 | "LdmHashLog" => ZstdParameter::LdmHashLog(val), 185 | "LdmMinMatch" => ZstdParameter::LdmMinMatch(val), 186 | "LdmBucketSizeLog" => ZstdParameter::LdmBucketSizeLog(val), 187 | "LdmHashRateLog" => ZstdParameter::LdmHashRateLog(val), 188 | "NbWorkers" => ZstdParameter::NbWorkers(val), 189 | "JobSize" => ZstdParameter::JobSize(val), 190 | "OverlapSizeLog" => ZstdParameter::OverlapSizeLog(val), 191 | _ => unreachable!(), 192 | }) 193 | } 194 | _ => Err(Error::raw(ErrorKind::ValueValidation, "unknown parameter")), 195 | } 196 | } 197 | 198 | fn possible_values( 199 | &self, 200 | ) -> Option + '_>> { 201 | Some(Box::new( 202 | ZSTD_PARAM_LIST_BOOL 203 | .iter() 204 | .map(|name| clap::builder::PossibleValue::new(format!("{name}=true"))) 205 | .chain( 206 | ZSTD_PARAM_LIST_U32 207 | .iter() 208 | .map(|name| clap::builder::PossibleValue::new(format!("{name}=0"))), 209 | ) 210 | .chain( 211 | ZSTD_STRATEGY_NAMES.iter().map(|value| { 212 | clap::builder::PossibleValue::new(format!("Strategy={value}")) 213 | }), 214 | ), 215 | )) 216 | } 217 | } 218 | 219 | pub(crate) fn pack(args: PackArgs) -> std::io::Result<()> { 220 | info!(path=?args.output, "create output file"); 221 | let mut file = File::create(args.output)?; 222 | 223 | info!("initialise encoder"); 224 | let mut zarc = Encoder::new(&mut file)?; 225 | 226 | debug!("enable zstd checksums"); 227 | zarc.set_zstd_parameter(ZstdParameter::ChecksumFlag(true))?; 228 | 229 | if let Some(level) = args.level { 230 | debug!(%level, "set compression level"); 231 | zarc.set_zstd_parameter(ZstdParameter::CompressionLevel(level))?; 232 | } 233 | 234 | for param in args.zstd { 235 | debug!(?param, "set zstd parameter"); 236 | zarc.set_zstd_parameter(param)?; 237 | } 238 | 239 | if args.store { 240 | debug!("disable compression for content"); 241 | zarc.enable_compression(false); 242 | } 243 | 244 | for path in &args.paths { 245 | info!("walk {path:?}"); 246 | for entry in WalkDir::new(path).follow_links(args.follow_symlinks) { 247 | let entry = match entry { 248 | Ok(file) => file, 249 | Err(err) => { 250 | eprintln!("read error: {err}"); 251 | continue; 252 | } 253 | }; 254 | 255 | let filename = entry.path(); 256 | debug!("read {filename:?}"); 257 | 258 | let mut file = zarc.build_file_with_metadata(filename, args.follow_symlinks)?; 259 | if entry.file_type().is_file() { 260 | let content = std::fs::read(filename)?; 261 | file.digest(zarc.add_data_frame(&content)?); 262 | } 263 | zarc.add_file_entry(file)?; 264 | } 265 | } 266 | 267 | info!("finalising zarc"); 268 | let digest = zarc.finalise()?; 269 | 270 | println!("digest: {}", Base64::encode_string(&digest)); 271 | Ok(()) 272 | } 273 | -------------------------------------------------------------------------------- /crates/zarc-cli/src/unpack.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fs::{create_dir_all, DirBuilder, File}, 3 | io::Write, 4 | path::PathBuf, 5 | }; 6 | 7 | use base64ct::{Base64, Encoding}; 8 | use clap::{Parser, ValueHint}; 9 | use miette::{bail, IntoDiagnostic}; 10 | use regex::Regex; 11 | use tracing::{error, info, warn}; 12 | use zarc::{ 13 | decode::Decoder, 14 | integrity::Digest, 15 | metadata::decode::{set_ownership, set_permissions, set_timestamps}, 16 | }; 17 | 18 | #[derive(Debug, Clone, Parser)] 19 | pub struct UnpackArgs { 20 | /// Input file. 21 | #[arg( 22 | value_hint = ValueHint::AnyPath, 23 | value_name = "PATH", 24 | )] 25 | pub input: PathBuf, 26 | 27 | /// Filter files by name (with a regex). 28 | /// 29 | /// Can be given multiple times, and files will be matched if they match any of the regexes. 30 | #[arg(long, value_name = "REGEX")] 31 | pub filter: Vec, 32 | 33 | /// Verify that the Zarc directory matches the given digest. 34 | #[arg(long, value_name = "DIGEST")] 35 | pub verify: Option, 36 | } 37 | 38 | pub(crate) fn unpack(args: UnpackArgs) -> miette::Result<()> { 39 | info!("initialise decoder"); 40 | let mut zarc = Decoder::open(args.input)?; 41 | 42 | if let Some(string) = args.verify { 43 | let expected = Digest(Base64::decode_vec(&string).into_diagnostic()?); 44 | if expected != zarc.trailer().digest { 45 | bail!( 46 | "integrity failure: zarc file digest is {}", 47 | Base64::encode_string(&zarc.trailer().digest) 48 | ); 49 | } 50 | } else { 51 | eprintln!("digest: {}", Base64::encode_string(&zarc.trailer().digest)); 52 | } 53 | 54 | zarc.read_directory()?; 55 | let zarc = zarc; 56 | 57 | // zarc.frames().for_each(|frame| { 58 | // info!(offset=%frame.offset, digest=%Base64::encode_string(frame.digest.as_slice()), "frame"); 59 | // }); 60 | 61 | let mut unpacked = 0_u64; 62 | for entry in zarc.files() { 63 | let name = entry.name.to_path().display().to_string(); 64 | if !args.filter.is_empty() && !args.filter.iter().any(|filter| filter.is_match(&name)) { 65 | continue; 66 | } 67 | 68 | if entry.is_dir() { 69 | let path = entry.name.to_path(); 70 | info!(?path, "unpack dir"); 71 | let mut dir = DirBuilder::new(); 72 | dir.recursive(true); 73 | #[cfg(unix)] 74 | if let Some(mode) = entry.mode { 75 | use std::os::unix::fs::DirBuilderExt; 76 | dir.mode(mode); 77 | } 78 | dir.create(&path).into_diagnostic()?; 79 | 80 | let file = File::open(path).into_diagnostic()?; 81 | set_metadata(entry, &file)?; 82 | } else if entry.is_normal() { 83 | if let Some(digest) = &entry.digest { 84 | extract_file(entry, digest, &zarc)?; 85 | unpacked += 1; 86 | } 87 | } 88 | } 89 | 90 | eprintln!("unpacked {unpacked} files"); 91 | Ok(()) 92 | } 93 | 94 | fn extract_file( 95 | entry: &zarc::directory::File, 96 | digest: &zarc::integrity::Digest, 97 | zarc: &Decoder, 98 | ) -> miette::Result<()> { 99 | info!(path=?entry.name.to_path(), digest=%Base64::encode_string(digest.as_slice()), "unpack file"); 100 | let path = entry.name.to_path(); 101 | 102 | if let Some(dir) = path.parent() { 103 | // create parent dir just in case its entry wasn't in the zarc 104 | create_dir_all(dir).into_diagnostic()?; 105 | } 106 | 107 | let Some(mut frame) = zarc.read_content_frame(digest).into_diagnostic()? else { 108 | warn!("frame not found"); 109 | return Ok(()); 110 | }; 111 | 112 | let mut file = File::create(path).into_diagnostic()?; 113 | 114 | for bytes in &mut frame { 115 | file.write_all(&bytes.into_diagnostic()?) 116 | .into_diagnostic()?; 117 | } 118 | if !frame.verify().unwrap_or(false) { 119 | error!(path=?entry.name, "frame verification failed!"); 120 | } 121 | 122 | set_metadata(entry, &file)?; 123 | Ok(()) 124 | } 125 | 126 | fn set_metadata(entry: &zarc::directory::File, file: &File) -> miette::Result<()> { 127 | set_ownership(file, entry).into_diagnostic()?; 128 | 129 | let mut perms = file.metadata().into_diagnostic()?.permissions(); 130 | set_permissions(&mut perms, entry).into_diagnostic()?; 131 | file.set_permissions(perms).into_diagnostic()?; 132 | 133 | if let Some(ts) = &entry.timestamps { 134 | set_timestamps(file, ts).into_diagnostic()?; 135 | } 136 | 137 | Ok(()) 138 | } 139 | -------------------------------------------------------------------------------- /crates/zarc-cli/wix/main.wxs: -------------------------------------------------------------------------------- 1 | 2 | 17 | 18 | 46 | 47 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 69 | 70 | 80 | 81 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 106 | 111 | 112 | 113 | 114 | 122 | 123 | 124 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 145 | 146 | 150 | 151 | 152 | 153 | 154 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 191 | 1 192 | 1 193 | 194 | 195 | 196 | 197 | 202 | 203 | 204 | 205 | 213 | 214 | 215 | 216 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /crates/zarc-cli/zarc.exe.manifest: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | 9 | 10 | 11 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | true 37 | UTF-8 38 | SegmentHeap 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /crates/zarc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zarc" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | authors = ["Félix Saparelli "] 7 | license = "Apache-2.0 OR MIT" 8 | description = "Archive format based on Zstd" 9 | keywords = ["archive", "file-format", "zstd", "zarc"] 10 | categories = ["encoding", "compression"] 11 | 12 | documentation = "https://github.com/passcod/zarc" 13 | repository = "https://github.com/passcod/zarc" 14 | 15 | [dependencies] 16 | blake3 = "1.5.0" 17 | chrono = "0.4.31" 18 | deku = { version = "0.16.0", features = ["logging"], git = "https://github.com/sharksforarms/deku" } 19 | log = "0.4.20" 20 | miette = "5.10.0" 21 | minicbor = { version = "0.20.0", features = ["derive", "std"] } 22 | ozarc = { version = "0.0.0", path = "../ozarc" } 23 | subtle = "2.5.0" 24 | thiserror = "1.0.56" 25 | tracing = "0.1.40" 26 | visibility = "0.1.0" 27 | zstd = "0.13.0" 28 | zstd-safe = { version = "7.0.0", features = ["std"] } 29 | 30 | [target.'cfg(unix)'.dependencies] 31 | nix = { version = "0.27.1", features = ["user"] } 32 | xattr = { version = "1.2.0", optional = true } 33 | 34 | [target.'cfg(target_os="linux")'.dependencies] 35 | e2p-fileflags = { version = "0.1.0", optional = true } 36 | 37 | [target.'cfg(windows)'.dependencies] 38 | windows = { version = "0.52.0", features = ["Win32_Storage_FileSystem"], optional = true } 39 | 40 | [features] 41 | default = ["metadata"] 42 | 43 | ## Enable the metadata gathering module, which pulls in platform-specific libraries 44 | metadata = ["dep:e2p-fileflags", "nix/fs", "dep:windows", "dep:xattr"] 45 | 46 | ## Expose internal encoding/decoding functions 47 | expose-internals = [] 48 | 49 | [package.metadata.docs.rs] 50 | all-features = true 51 | rustdoc-args = ["--cfg", "docsrs"] 52 | 53 | -------------------------------------------------------------------------------- /crates/zarc/src/constants.rs: -------------------------------------------------------------------------------- 1 | /// Magic bytes 2 | pub const ZARC_MAGIC: [u8; 3] = [0x65, 0xAA, 0xDC]; 3 | 4 | /// Zarc format version 5 | pub const ZARC_VERSION: u8 = 1; 6 | -------------------------------------------------------------------------------- /crates/zarc/src/decode.rs: -------------------------------------------------------------------------------- 1 | //! Decoder types and functions. 2 | 3 | use std::{ 4 | collections::{BTreeMap, HashMap}, 5 | num::NonZeroU16, 6 | }; 7 | 8 | use crate::{ 9 | directory::{Edition, File, Frame, Pathname}, 10 | integrity::Digest, 11 | ondemand::OnDemand, 12 | trailer::Trailer, 13 | }; 14 | 15 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 16 | #[doc(inline)] 17 | pub(crate) use self::zstd_iterator::ZstdFrameIterator; 18 | 19 | #[doc(inline)] 20 | pub use self::frame_iterator::FrameIterator; 21 | 22 | mod directory; 23 | pub mod error; 24 | mod frame_iterator; 25 | mod open; 26 | mod zstd_iterator; 27 | 28 | /// Decoder context. 29 | /// 30 | /// Reader needs to be Seek, as Zarc reads the file backwards from the end to find the trailer and directory. 31 | #[derive(Debug)] 32 | pub struct Decoder { 33 | // given by user 34 | reader: R, 35 | 36 | // obtained from trailer 37 | file_length: u64, 38 | trailer: Trailer, 39 | 40 | // obtained from directory 41 | editions: BTreeMap, 42 | files: Vec, 43 | frames: HashMap, 44 | files_by_name: BTreeMap>, 45 | files_by_digest: HashMap>, 46 | } 47 | 48 | impl Decoder { 49 | /// Length of the file in bytes. 50 | pub fn file_length(&self) -> u64 { 51 | self.file_length 52 | } 53 | 54 | /// The trailer metadata. 55 | pub fn trailer(&self) -> &Trailer { 56 | &self.trailer 57 | } 58 | 59 | /// Iterate through the editions. 60 | pub fn editions(&self) -> impl Iterator { 61 | self.editions.values() 62 | } 63 | 64 | /// Get edition metadata by number. 65 | pub fn edition(&self, number: impl TryInto) -> Option<&Edition> { 66 | number 67 | .try_into() 68 | .ok() 69 | .and_then(|number| self.editions.get(&number)) 70 | } 71 | 72 | /// Get the latest (current) edition. 73 | pub fn latest_edition(&self) -> Option<&Edition> { 74 | self.editions.values().last() 75 | } 76 | 77 | /// Iterate through the files. 78 | pub fn files(&self) -> impl Iterator { 79 | self.files.iter() 80 | } 81 | 82 | /// Get file entries that have a particular (path)name. 83 | pub fn files_by_name(&self, name: impl Into) -> Option> { 84 | self.files_by_name 85 | .get(&name.into()) 86 | .map(Vec::as_slice) 87 | .map(|v| v.iter().filter_map(|i| self.files.get(*i)).collect()) 88 | } 89 | 90 | /// Get files that reference a frame from its digest. 91 | pub fn files_by_digest(&self, digest: &Digest) -> Option> { 92 | self.files_by_digest 93 | .get(digest) 94 | .map(Vec::as_slice) 95 | .map(|v| v.iter().filter_map(|i| self.files.get(*i)).collect()) 96 | } 97 | 98 | /// Iterate through the frames. 99 | pub fn frames(&self) -> impl Iterator { 100 | self.frames.values() 101 | } 102 | 103 | /// Get frame metadata by digest. 104 | pub fn frame(&self, digest: &Digest) -> Option<&Frame> { 105 | self.frames.get(digest) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /crates/zarc/src/decode/directory.rs: -------------------------------------------------------------------------------- 1 | use std::mem::take; 2 | 3 | use blake3::Hasher; 4 | use deku::DekuContainerRead; 5 | use ozarc::framing::{ZstandardBlockHeader, ZstandardFrameHeader}; 6 | use tracing::{debug, instrument, trace, warn}; 7 | 8 | use crate::{ 9 | directory::{Element, ElementFrame}, 10 | integrity::Digest, 11 | ondemand::OnDemand, 12 | }; 13 | 14 | use super::{ 15 | error::{ErrorKind, Result, SimpleError}, 16 | Decoder, 17 | }; 18 | 19 | impl Decoder { 20 | /// Read a Zstandard frame header. 21 | /// 22 | /// This reads the frame header, checks that it's a Zstandard frame, and leaves the reader at 23 | /// the start of the first block. The frame header is returned. 24 | /// 25 | /// This does not read the frame's payload: you need to do that yourself, reading blocks one at 26 | /// a time until the one marked `last`, and then reading the checksum 27 | /// [if present as per this header](ozarc::framing::ZstandardFrameDescriptor.checksum). 28 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 29 | #[instrument(level = "debug", skip(reader))] 30 | fn read_zstandard_frame_header(reader: &mut R::Reader) -> Result { 31 | let (bits_read, header) = 32 | ZstandardFrameHeader::from_reader((reader, 0)).map_err(SimpleError::from_deku)?; 33 | debug!(%bits_read, ?header, "read zstandard frame header"); 34 | Ok(header) 35 | } 36 | 37 | /// Read a Zstandard frame block header. 38 | /// 39 | /// This reads the block header, checks that it's a Zstandard block, and leaves the reader at 40 | /// the start of the block's payload. The block header is returned. 41 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 42 | #[instrument(level = "debug", skip(reader))] 43 | fn read_zstandard_block_header(reader: &mut R::Reader) -> Result { 44 | let (bits_read, header) = 45 | ZstandardBlockHeader::from_reader((reader, 0)).map_err(SimpleError::from_deku)?; 46 | debug!(%bits_read, ?header, "read zstandard block header"); 47 | Ok(header) 48 | } 49 | 50 | /// Read the Zarc Directory. 51 | /// 52 | /// After this returns, the Zarc file is ready for reading, using the files() iterator to sift 53 | /// through the available file records and extract them on demand. 54 | #[instrument(level = "debug", skip(self))] 55 | pub fn read_directory(&mut self) -> Result<()> { 56 | let mut hasher = Hasher::new(); 57 | let mut editions = take(&mut self.editions); 58 | let mut frames = take(&mut self.frames); 59 | let mut files = take(&mut self.files); 60 | let mut files_by_name = take(&mut self.files_by_name); 61 | let mut files_by_digest = take(&mut self.files_by_digest); 62 | 63 | // start a new decompression session 64 | let frame = self.read_zstandard_frame(self.trailer.directory_offset as _)?; 65 | for data in frame { 66 | let data = data?; 67 | hasher.update(&data); 68 | 69 | let mut bytes = &data[..]; 70 | loop { 71 | let ((rest, _), element) = 72 | ElementFrame::from_bytes((&bytes, 0)).map_err(SimpleError::from_deku)?; 73 | bytes = rest; 74 | 75 | trace!(?element, "read element"); 76 | let Some(element) = element.element()? else { 77 | warn!(kind=?element.kind, "unknown element kind"); 78 | continue; 79 | }; 80 | 81 | match element { 82 | Element::Edition(edition) => { 83 | editions.insert(edition.number, *edition); 84 | } 85 | Element::Frame(frame) => { 86 | frames.insert(frame.digest.clone(), *frame); 87 | } 88 | Element::File(file) => { 89 | let name = file.name.clone(); 90 | let digest = file.digest.clone(); 91 | files.push(*file); 92 | let index = files.len() - 1; 93 | files_by_name.entry(name).or_default().push(index); 94 | if let Some(digest) = digest { 95 | files_by_digest.entry(digest).or_default().push(index); 96 | } 97 | } 98 | } 99 | 100 | if bytes.is_empty() { 101 | trace!("done with this chunk of data"); 102 | break; 103 | } 104 | } 105 | } 106 | 107 | self.editions = editions; 108 | self.frames = frames; 109 | self.files = files; 110 | self.files_by_name = files_by_name; 111 | self.files_by_digest = files_by_digest; 112 | 113 | trace!("finished reading directory, verify digest"); 114 | if self.trailer.digest != Digest(hasher.finalize().as_bytes().to_vec()) { 115 | return Err(ErrorKind::DirectoryIntegrity("digest").into()); 116 | } 117 | 118 | Ok(()) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /crates/zarc/src/decode/error.rs: -------------------------------------------------------------------------------- 1 | //! Error types for [`Decoder`](super::Decoder). 2 | use std::borrow::Cow; 3 | 4 | use deku::DekuError; 5 | use miette::{Diagnostic, SourceSpan}; 6 | use thiserror::Error; 7 | 8 | /// Convenience return type. 9 | pub type Result = std::result::Result; 10 | 11 | /// Combined return error type for [`Decoder`](super::Decoder) methods. 12 | #[derive(Error, Diagnostic, Debug)] 13 | pub enum Error { 14 | /// I/O error. 15 | #[error(transparent)] 16 | Io(#[from] std::io::Error), 17 | 18 | /// Zstd error. 19 | #[error("zstd decompression error: {0}")] 20 | Zstd(String), 21 | 22 | /// CBOR error. 23 | #[error(transparent)] 24 | Cbor(#[from] minicbor::decode::Error), 25 | 26 | /// Decoder error that's just a message. 27 | #[error(transparent)] 28 | Simple(#[from] SimpleError), 29 | 30 | /// Decoder error that includes source. 31 | #[error(transparent)] 32 | Source(#[from] SourceError), 33 | } 34 | 35 | pub(crate) fn zstd(code: usize) -> Error { 36 | let msg = zstd_safe::get_error_name(code); 37 | Error::Zstd(msg.into()) 38 | } 39 | 40 | /// Decoder error. 41 | #[derive(Error, Diagnostic, Debug)] 42 | #[error("zarc decode: {message}")] 43 | pub struct SimpleError { 44 | /// Error kind. 45 | pub kind: ErrorKind, 46 | 47 | /// Error message. 48 | pub message: Cow<'static, str>, 49 | } 50 | 51 | /// Decoder error. 52 | #[derive(Error, Diagnostic, Debug)] 53 | #[error("zarc decode: {message}")] 54 | pub struct SourceError { 55 | /// Error kind. 56 | pub kind: ErrorKind, 57 | 58 | /// Error message. 59 | pub message: Cow<'static, str>, 60 | 61 | /// Error location in zarc file. 62 | #[label("here")] 63 | pub at: SourceSpan, 64 | 65 | /// Snippet of zarc file. 66 | #[source_code] 67 | pub snippet: String, 68 | } 69 | 70 | impl SimpleError { 71 | /// New error without source. 72 | pub fn new(kind: ErrorKind) -> Self { 73 | Self { 74 | kind, 75 | message: kind.default_message(), 76 | } 77 | } 78 | 79 | /// New simple error from deku. 80 | pub fn from_deku(orig: DekuError) -> Self { 81 | Self::new(ErrorKind::Parse).with_message(orig.to_string()) 82 | } 83 | 84 | /// Change the error message. 85 | pub fn with_message(mut self, message: impl Into>) -> Self { 86 | self.message = message.into(); 87 | self 88 | } 89 | } 90 | 91 | impl SourceError { 92 | /// New error with source snippet. 93 | pub fn new(kind: ErrorKind, snippet: &[u8], at_byte: usize) -> Self { 94 | Self { 95 | kind, 96 | message: kind.default_message(), 97 | snippet: format!("{snippet:02x?}"), 98 | at: SourceSpan::from(( 99 | (at_byte * 2) + 1, // to account for [ 100 | 2, // always 2 bytes for the hex value 101 | )), 102 | } 103 | } 104 | 105 | /// New error with source snippet, extracted from a larger source. 106 | pub fn from_source(kind: ErrorKind, source: &[u8], at_byte: usize, context: usize) -> Self { 107 | let start = at_byte.saturating_sub(context); 108 | let end = at_byte.saturating_add(context).min(source.len()); 109 | Self::new(kind, &source[start..end], at_byte.saturating_sub(start)) 110 | } 111 | 112 | /// New error from deku. 113 | pub fn from_deku(orig: DekuError, source: &[u8], at_byte: usize, context: usize) -> Self { 114 | Self::from_source(ErrorKind::Parse, source, at_byte, context).with_message(orig.to_string()) 115 | } 116 | 117 | /// Change the error message. 118 | pub fn with_message(mut self, message: impl Into>) -> Self { 119 | self.message = message.into(); 120 | self 121 | } 122 | } 123 | 124 | /// Decoder error kind. 125 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 126 | pub enum ErrorKind { 127 | /// Zstd initialization error. 128 | ZstdInit, 129 | 130 | /// Invalid skippable frame magic nibble. 131 | InvalidNibble { 132 | /// Expected nibble value 133 | expected: u8, 134 | /// Value actually found 135 | actual: u8, 136 | }, 137 | 138 | /// Unsupported zarc format version. 139 | UnsupportedZarcVersion(u8), 140 | 141 | /// When using internal methods manually, you can read sections of a Zarc file out of order, 142 | /// before necessary details are available, which will cause this error. The public API 143 | /// guarantees this never occurs. 144 | ReadOrderViolation(&'static str), 145 | 146 | /// Unintended magic header was malformed. 147 | InvalidUnintendedMagic, 148 | 149 | /// The file version number is repeated several times in a Zarc file, and they must all match. 150 | MismatchedFileVersion, 151 | 152 | /// The directory's integrity is compromised. 153 | DirectoryIntegrity(&'static str), 154 | 155 | /// Parse error. 156 | Parse, 157 | } 158 | 159 | impl ErrorKind { 160 | /// Get the default error message for this error kind. 161 | pub fn default_message(self) -> Cow<'static, str> { 162 | match self { 163 | ErrorKind::ZstdInit => Cow::Borrowed("zstd initialization error"), 164 | ErrorKind::InvalidNibble { expected, actual } => Cow::Owned(format!( 165 | "invalid skippable frame magic nibble: expected 0x{expected:X}, got 0x{actual:X}" 166 | )), 167 | ErrorKind::UnsupportedZarcVersion(version) => Cow::Owned(format!( 168 | "unsupported zarc version {version}, this zarc supports versions {:?}", 169 | [crate::constants::ZARC_VERSION] 170 | )), 171 | ErrorKind::ReadOrderViolation(what) => { 172 | Cow::Owned(format!("read order violation: {what}")) 173 | } 174 | ErrorKind::InvalidUnintendedMagic => Cow::Borrowed("malformed unintended magic header"), 175 | ErrorKind::MismatchedFileVersion => Cow::Borrowed("mismatched file version"), 176 | ErrorKind::DirectoryIntegrity(what) => { 177 | Cow::Owned(format!("directory integrity compromised: {what}")) 178 | } 179 | ErrorKind::Parse => Cow::Borrowed("parse error"), 180 | } 181 | } 182 | } 183 | 184 | impl From for SimpleError { 185 | fn from(ek: ErrorKind) -> Self { 186 | Self::new(ek) 187 | } 188 | } 189 | 190 | impl From for Error { 191 | fn from(ek: ErrorKind) -> Self { 192 | Self::Simple(ek.into()) 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /crates/zarc/src/decode/frame_iterator.rs: -------------------------------------------------------------------------------- 1 | //! Decoder types and functions. 2 | 3 | use std::io::{Read, Seek}; 4 | 5 | use crate::{integrity::Digest, ondemand::OnDemand}; 6 | 7 | use super::{error::Result, Decoder, ZstdFrameIterator}; 8 | 9 | impl Decoder { 10 | /// Decompress a content frame by digest. 11 | /// 12 | /// This returns an iterator of chunks of bytes. Each call to the iterator decompresses some 13 | /// data and returns it, until the frame is exhausted. 14 | pub fn read_content_frame( 15 | &self, 16 | digest: &Digest, 17 | ) -> Result>> { 18 | let Some(entry) = self.frames.get(digest) else { 19 | return Ok(None); 20 | }; 21 | 22 | Ok(Some(FrameIterator::new( 23 | self.read_zstandard_frame(entry.offset)?, 24 | digest.clone(), 25 | entry.uncompressed, 26 | ))) 27 | } 28 | } 29 | 30 | /// Iterator over a Zarc content frame's chunks. 31 | /// 32 | /// This is returned by [`Decoder::read_content_frame()`][super::Decoder::read_content_frame]. 33 | /// 34 | /// Each call to the iterator decompresses some data and returns it, until the frame is exhausted. 35 | /// It also computes the frame's digest as it goes, so you can check it against the one you used to 36 | /// request the frame. 37 | #[derive(Debug)] 38 | pub struct FrameIterator<'zstd, R> { 39 | framer: ZstdFrameIterator<'zstd, R>, 40 | hasher: blake3::Hasher, 41 | digest: Digest, 42 | uncompressed_size: u64, 43 | uncompressed_read: u64, 44 | } 45 | 46 | impl<'zstd, R> FrameIterator<'zstd, R> { 47 | pub(crate) fn new( 48 | framer: ZstdFrameIterator<'zstd, R>, 49 | digest: Digest, 50 | uncompressed_size: u64, 51 | ) -> Self { 52 | Self { 53 | framer, 54 | hasher: blake3::Hasher::new(), 55 | digest, 56 | uncompressed_size, 57 | uncompressed_read: 0, 58 | } 59 | } 60 | 61 | /// Return the uncompressed size of the frame. 62 | pub fn uncompressed_size(&self) -> u64 { 63 | self.uncompressed_size 64 | } 65 | 66 | /// How many (uncompressed) bytes are left to go. 67 | pub fn bytes_left(&self) -> u64 { 68 | self.uncompressed_size 69 | .saturating_sub(self.uncompressed_read) 70 | } 71 | 72 | /// Return the digest of the frame. 73 | /// 74 | /// Returns None if the iterator isn't yet done. 75 | pub fn digest(&self) -> Option { 76 | if self.framer.is_done() { 77 | Some(Digest(self.hasher.finalize().as_bytes().to_vec())) 78 | } else { 79 | None 80 | } 81 | } 82 | 83 | /// Check the digest of the frame. 84 | /// 85 | /// Returns None if the iterator isn't yet done. 86 | pub fn verify(&self) -> Option { 87 | self.digest().map(|d| d == self.digest) 88 | } 89 | } 90 | 91 | impl<'zstd, R: Read + Seek> Iterator for FrameIterator<'zstd, R> { 92 | type Item = Result>; 93 | 94 | fn next(&mut self) -> Option { 95 | let data = self.framer.next()?; 96 | 97 | if let Ok(data) = &data { 98 | self.uncompressed_read += data.len() as u64; 99 | self.hasher.update(data); 100 | } 101 | 102 | Some(data) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /crates/zarc/src/decode/open.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::{Cursor, Read, Seek, SeekFrom}, 3 | num::NonZeroU8, 4 | }; 5 | 6 | use deku::DekuContainerRead; 7 | use ozarc::framing::SkippableFrame; 8 | use tracing::{debug, instrument, trace, warn}; 9 | 10 | use crate::{ 11 | header::Header, 12 | ondemand::OnDemand, 13 | trailer::{Epilogue, Trailer, EPILOGUE_LENGTH}, 14 | }; 15 | 16 | use super::{ 17 | error::{ErrorKind, Result, SimpleError}, 18 | Decoder, 19 | }; 20 | 21 | impl Decoder { 22 | /// Read a Skippable frame, checking its nibble. 23 | /// 24 | /// Reads and returns the entire frame's payload, and thus seeks to the end of the frame. 25 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 26 | #[instrument(level = "debug", skip(reader))] 27 | fn read_skippable_frame(reader: &mut R::Reader, nibble: u8) -> Result { 28 | let (bits_read, frame) = 29 | SkippableFrame::from_reader((reader, 0)).map_err(SimpleError::from_deku)?; 30 | debug!(%bits_read, frame=format!("{frame:02x?}"), nibble=%format!("0x{:X}", frame.nibble()), "read skippable frame"); 31 | 32 | if frame.nibble() != nibble { 33 | return Err(ErrorKind::InvalidNibble { 34 | expected: nibble, 35 | actual: frame.nibble(), 36 | } 37 | .into()); 38 | } 39 | 40 | Ok(frame) 41 | } 42 | 43 | /// Read a Zarc header. 44 | /// 45 | /// Returns the file version in the header. 46 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 47 | #[instrument(level = "debug", skip(ondemand))] 48 | fn read_header(ondemand: &R) -> Result { 49 | let mut reader = ondemand.open()?; 50 | let frame = Self::read_skippable_frame(&mut reader, 0x0)?; 51 | 52 | let mut content = Cursor::new(frame.data); 53 | let (bits_read, header) = 54 | Header::from_reader((&mut content, 0)).map_err(SimpleError::from_deku)?; 55 | debug!(%bits_read, header=format!("{header:02x?}"), "read zarc header"); 56 | 57 | debug_assert_ne!(crate::constants::ZARC_VERSION, 0); 58 | debug_assert_ne!(header.version, 0); 59 | if header.version != crate::constants::ZARC_VERSION { 60 | return Err(ErrorKind::UnsupportedZarcVersion(header.version).into()); 61 | } 62 | 63 | Ok(unsafe { 64 | // SAFETY: the version is valid and zarc versions start at 1 65 | NonZeroU8::new_unchecked(header.version) 66 | }) 67 | } 68 | 69 | /// Read the Zarc Trailer. 70 | /// 71 | /// This opens a new reader, seeks to the end, and reads the [trailer][crate::trailer]. 72 | /// 73 | /// Returns the trailer and the length of the file. 74 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 75 | #[instrument(level = "debug", skip(ondemand))] 76 | fn read_trailer(ondemand: &R) -> Result<(Trailer, u64)> { 77 | let mut reader = ondemand.open()?; 78 | 79 | // seek to the end to figure out how long this file is 80 | reader.seek(SeekFrom::End(0))?; 81 | let file_length = reader.stream_position()?; 82 | let ending_length = file_length.min(1024); 83 | trace!(%file_length, reading_bytes=%ending_length, "reading end of file"); 84 | 85 | // read up to 1KB from the end of the file 86 | reader.seek(SeekFrom::End(-(ending_length as i64)))?; 87 | let mut ending = Vec::with_capacity(ending_length as _); 88 | let bytes = reader.read_to_end(&mut ending)?; 89 | trace!(%bytes, data=%format!("{bytes:02x?}"), "read end of file"); 90 | debug_assert_eq!(bytes, ending_length as _); 91 | 92 | // read the epilogue out of the end of the ending 93 | let ((rest, remaining_bits), epilogue) = 94 | Epilogue::from_bytes((&ending[(bytes - EPILOGUE_LENGTH)..], 0)) 95 | .map_err(SimpleError::from_deku)?; 96 | debug!(?epilogue, "read zarc trailer epilogue"); 97 | 98 | if remaining_bits > 0 { 99 | trace!(%remaining_bits, ?rest, "some data remaining"); 100 | return Err(SimpleError::new(ErrorKind::Parse) 101 | .with_message(format!( 102 | "parse error: too much data ({remaining_bits} bits) {rest:02x?}" 103 | )) 104 | .into()); 105 | } 106 | 107 | // check we have enough data 108 | let trailer_length = epilogue.full_length(); 109 | if bytes < trailer_length { 110 | todo!("read more bytes"); 111 | } 112 | 113 | // complete reading the trailer 114 | // UNWRAP: we know we have enough data, we just checked 115 | let mut trailer = epilogue.complete(&ending).expect("not enough data"); 116 | debug!(bytes=%trailer.len(), trailer=format!("{trailer:02x?}"), "read zarc trailer"); 117 | 118 | // compare the check byte 119 | let check_byte = trailer.compute_check(); 120 | if check_byte != epilogue.check { 121 | return Err(SimpleError::new(ErrorKind::Parse) 122 | .with_message(format!( 123 | "parse error: trailer check byte doesn't match (expected 0x{:02X}, got 0x{check_byte:02X})", 124 | epilogue.check 125 | )) 126 | .into()); 127 | } 128 | 129 | trailer.make_offset_positive(file_length); 130 | debug!(offset=%trailer.directory_offset, "reified directory offset"); 131 | 132 | Ok((trailer, file_length)) 133 | } 134 | 135 | /// Open a Zarc for reading. 136 | /// 137 | /// This checks the [header][crate::header], reads the [trailer][crate::trailer], and verifies 138 | /// the integrity of the trailer. 139 | /// 140 | /// You'll then need to read the directory and extract some files! 141 | pub fn open(reader: R) -> Result { 142 | let version = Self::read_header(&reader)?; 143 | let (trailer, file_length) = Self::read_trailer(&reader)?; 144 | if version.get() != trailer.version { 145 | warn!(header=%version, trailer=%trailer.version, "zarc version mismatch in header and trailer"); 146 | } 147 | 148 | Ok(Self { 149 | reader, 150 | file_length, 151 | trailer, 152 | editions: Default::default(), 153 | files: Default::default(), 154 | frames: Default::default(), 155 | files_by_name: Default::default(), 156 | files_by_digest: Default::default(), 157 | }) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /crates/zarc/src/decode/zstd_iterator.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt, 3 | io::{Read, Seek, SeekFrom}, 4 | }; 5 | 6 | use tracing::{debug, instrument, trace}; 7 | use zstd_safe::{DCtx, InBuffer, OutBuffer}; 8 | 9 | use crate::ondemand::OnDemand; 10 | 11 | use super::{ 12 | error::{self, ErrorKind, Result}, 13 | Decoder, 14 | }; 15 | 16 | impl Decoder { 17 | /// Read a Zstandard frame, decompressing it on demand. 18 | /// 19 | /// This opens a new reader, seeks to the position given, and returns an iterator of chunks of 20 | /// bytes. Each call to the iterator decompresses some data and returns it, until the frame is 21 | /// exhausted. 22 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 23 | #[instrument(level = "debug", skip(self))] 24 | pub(crate) fn read_zstandard_frame( 25 | &self, 26 | offset: u64, 27 | ) -> Result> { 28 | let mut reader = self.reader.open()?; 29 | let zstd = DCtx::try_create().ok_or(ErrorKind::ZstdInit)?; 30 | // TODO method to create zstd context with the parameters saved against Decoder 31 | 32 | debug!(%offset, "seek to frame"); 33 | reader.seek(SeekFrom::Start(offset))?; 34 | 35 | Ok(ZstdFrameIterator::new(reader, zstd, offset)) 36 | } 37 | } 38 | 39 | /// Iterator over a zstandard frame's chunks. 40 | /// 41 | /// This is returned by [`Decoder::read_zstandard_frame()`][super::Decoder::read_zstandard_frame]. 42 | /// 43 | /// Each call to the iterator decompresses some data and returns it, until the frame is exhausted. 44 | /// It also computes the frame's digest as it goes, so you can check it against the one you used to 45 | /// request the frame. 46 | pub struct ZstdFrameIterator<'zstd, R> { 47 | reader: R, 48 | zstd: DCtx<'zstd>, 49 | start_offset: u64, 50 | done: bool, 51 | } 52 | 53 | impl fmt::Debug for ZstdFrameIterator<'_, R> { 54 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 55 | f.debug_struct("ZstdFrameIterator") 56 | .field("reader", &self.reader) 57 | .field("zstd", &"zstd-safe decompression context") 58 | .field("start_offset", &self.start_offset) 59 | .field("done", &self.done) 60 | .finish() 61 | } 62 | } 63 | 64 | impl<'zstd, R> ZstdFrameIterator<'zstd, R> { 65 | /// Return `true` if the iterator is done, without advancing it. 66 | pub fn is_done(&self) -> bool { 67 | self.done 68 | } 69 | } 70 | 71 | impl<'zstd, R: Read + Seek> ZstdFrameIterator<'zstd, R> { 72 | pub(crate) fn new(reader: R, zstd: DCtx<'zstd>, start_offset: u64) -> Self { 73 | Self { 74 | reader, 75 | zstd, 76 | start_offset, 77 | done: false, 78 | } 79 | } 80 | 81 | /// Perform one step of a stream decompression. 82 | /// 83 | /// This cursor is left at wherever the decompression stopped, which may be in the middle of a 84 | /// block or frame; the next call to this method will continue from there. 85 | /// 86 | /// Returns the data that was decompressed and a boolean to indicate if the frame is done. 87 | #[instrument(level = "trace", skip(self))] 88 | fn decompress_step(&mut self) -> Result<(Vec, bool)> { 89 | let input_size = DCtx::in_size().max(1024); 90 | let mut input_buf = vec![0; input_size]; 91 | let bytes = self.reader.read(&mut input_buf)?; 92 | trace!(desired=%input_size, obtained=%bytes, "read from reader to give to zstd"); 93 | let mut input = InBuffer { 94 | src: &input_buf[..bytes], 95 | pos: 0, 96 | }; 97 | 98 | let output_size = DCtx::out_size().max(1024); 99 | let mut output_buf: Vec = Vec::with_capacity(output_size); 100 | trace!(bytes=%output_size, "allocated zstd output buffer"); 101 | let mut output = OutBuffer::around(&mut output_buf); 102 | 103 | trace!("decompressing"); 104 | let mut input_hint = self 105 | .zstd 106 | .decompress_stream(&mut output, &mut input) 107 | .map_err(error::zstd)?; 108 | trace!( 109 | %input_hint, 110 | frame_done=%input_hint == 0, 111 | input_pos=%input.pos, 112 | input_size=%input.src.len(), 113 | output_pos=%output.pos(), 114 | output_size=%output.capacity(), 115 | "decompressed" 116 | ); 117 | 118 | while output.pos() == output.capacity() { 119 | trace!("zstd wants more output space"); 120 | let new_output_size = DCtx::out_size().max(1024); 121 | output_buf.reserve(output_size + new_output_size); 122 | trace!(total=%output_buf.capacity(), "allocated larger zstd output buffer"); 123 | output = OutBuffer::around(&mut output_buf); 124 | 125 | trace!("decompressing again without changing input"); 126 | input_hint = self 127 | .zstd 128 | .decompress_stream(&mut output, &mut input) 129 | .map_err(error::zstd)?; 130 | trace!( 131 | %input_hint, 132 | frame_done=%input_hint == 0, 133 | input_pos=%input.pos, 134 | input_size=%input.src.len(), 135 | output_pos=%output.pos(), 136 | output_size=%output.capacity(), 137 | "decompressed" 138 | ); 139 | } 140 | 141 | let output_written = output.as_slice().len(); 142 | trace!(bytes = output_written, "zstd has finished with the input"); 143 | 144 | #[allow(clippy::drop_non_drop)] 145 | drop(output); // to release the mutable borrow on output_buf 146 | 147 | if output_written != output_buf.len() { 148 | trace!("shrink output buffer to actual written size"); 149 | output_buf.truncate(output_written); 150 | } 151 | 152 | Ok((output_buf, input_hint == 0)) 153 | } 154 | } 155 | 156 | impl<'zstd, R: Read + Seek> Iterator for ZstdFrameIterator<'zstd, R> { 157 | type Item = Result>; 158 | 159 | fn next(&mut self) -> Option { 160 | if self.done { 161 | return None; 162 | } 163 | 164 | let (data, done) = match self.decompress_step() { 165 | Ok(ok) => ok, 166 | Err(err) => return Some(Err(err)), 167 | }; 168 | 169 | if done { 170 | self.done = true; 171 | } 172 | 173 | Some(Ok(data)) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /crates/zarc/src/directory.rs: -------------------------------------------------------------------------------- 1 | //! Common types defining the binary format structures. 2 | 3 | #[doc(inline)] 4 | pub use self::edition::*; 5 | #[doc(inline)] 6 | pub use self::elements::*; 7 | #[doc(inline)] 8 | pub use self::file::*; 9 | #[doc(inline)] 10 | pub use self::frame::*; 11 | #[doc(inline)] 12 | pub use self::posix_owner::*; 13 | #[doc(inline)] 14 | pub use self::specials::*; 15 | #[doc(inline)] 16 | pub use self::strings::*; 17 | #[doc(inline)] 18 | pub use self::timestamps::*; 19 | 20 | mod edition; 21 | mod elements; 22 | mod file; 23 | mod frame; 24 | mod posix_owner; 25 | mod specials; 26 | mod strings; 27 | mod timestamps; 28 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/edition.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, num::NonZeroU16}; 2 | 3 | use minicbor::{Decode, Encode}; 4 | 5 | use super::{strings::AttributeValue, timestamps::Timestamp}; 6 | use crate::integrity::DigestType; 7 | 8 | /// Metadata about a (previous) version of the Zarc Directory 9 | /// 10 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#kind-1-editions) 11 | #[derive(Clone, Debug, PartialEq, Encode, Decode)] 12 | #[cbor(map)] 13 | pub struct Edition { 14 | /// Edition number. 15 | /// 16 | /// Used for referencing it in frames and files. 17 | #[n(0)] 18 | pub number: NonZeroU16, 19 | 20 | /// Version creation date. 21 | #[n(1)] 22 | pub written_at: Timestamp, 23 | 24 | /// Digest algorithm used by this edition. 25 | #[n(2)] 26 | pub digest_type: DigestType, 27 | 28 | /// User Metadata of that version. 29 | /// 30 | /// You can write a Some(empty HashMap), but you'll save two bytes if you write a None instead. 31 | /// This is pretty cheap here, but adds up for the similar fields in [`files`](FilemapEntry). 32 | #[n(10)] 33 | pub user_metadata: Option>, 34 | } 35 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/elements.rs: -------------------------------------------------------------------------------- 1 | use std::num::TryFromIntError; 2 | 3 | use deku::prelude::*; 4 | 5 | use super::{edition::Edition, file::File, frame::Frame}; 6 | 7 | /// Zarc Directory Element framing 8 | /// 9 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#zarc-directory) 10 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 11 | #[deku(endian = "little")] 12 | pub struct ElementFrame { 13 | /// Element kind. 14 | pub kind: ElementKind, 15 | 16 | /// Length of CBOR data. 17 | #[deku(bytes = "2", update = "self.payload.len()", pad_bytes_after = "1")] 18 | pub length: u16, 19 | 20 | /// CBOR data. 21 | /// 22 | /// This is at most 65536 bytes. 23 | #[deku(count = "length")] 24 | pub payload: Vec, 25 | } 26 | 27 | impl ElementFrame { 28 | /// Encode an [Element] into a CBOR payload. 29 | /// 30 | /// CBOR encoding is infallible; this returns `Err` if the element is too large to fit (>64K). 31 | pub fn create(element: &Element) -> Result { 32 | let payload = element.to_vec(); 33 | u16::try_from(payload.len()).map(|length| Self { 34 | kind: element.kind(), 35 | length, 36 | payload, 37 | }) 38 | } 39 | 40 | /// Decode the CBOR payload into its [Element]. 41 | /// 42 | /// Returns `Ok(None)` if the element kind is unknown. 43 | pub fn element(&self) -> Result, minicbor::decode::Error> { 44 | match self.kind { 45 | ElementKind::Edition => { 46 | minicbor::decode(&self.payload).map(|e| Some(Element::Edition(e))) 47 | } 48 | ElementKind::File => minicbor::decode(&self.payload).map(|e| Some(Element::File(e))), 49 | ElementKind::Frame => minicbor::decode(&self.payload).map(|e| Some(Element::Frame(e))), 50 | ElementKind::Unknown(_) => Ok(None), 51 | } 52 | } 53 | } 54 | 55 | /// Kind of an element (including unknown variant). 56 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, DekuRead, DekuWrite)] 57 | #[deku(endian = "endian", type = "u8", ctx = "endian: deku::ctx::Endian")] 58 | pub enum ElementKind { 59 | /// [Edition] 60 | #[deku(id = "1")] 61 | Edition, 62 | 63 | /// [File] 64 | #[deku(id = "2")] 65 | File, 66 | 67 | /// [Frame] 68 | #[deku(id = "3")] 69 | Frame, 70 | 71 | /// Unknown element kind. 72 | #[deku(id_pat = "_")] 73 | Unknown(u8), 74 | } 75 | 76 | /// Elements supported by Zarc. 77 | #[derive(Clone, Debug, PartialEq)] 78 | pub enum Element { 79 | /// [Edition] 80 | Edition(Box), 81 | /// [File] 82 | File(Box), 83 | /// [Frame] 84 | Frame(Box), 85 | } 86 | 87 | impl Element { 88 | /// Get the [ElementKind] of this element. 89 | pub fn kind(&self) -> ElementKind { 90 | match self { 91 | Element::Edition(_) => ElementKind::Edition, 92 | Element::File(_) => ElementKind::File, 93 | Element::Frame(_) => ElementKind::Frame, 94 | } 95 | } 96 | 97 | /// Write the [Element] into a CBOR payload. 98 | pub fn to_vec(&self) -> Vec { 99 | #[allow(clippy::unwrap_used)] // UNWRAP: minicbor encoding is infallible 100 | match self { 101 | Element::Edition(edition) => minicbor::to_vec(edition), 102 | Element::File(file) => minicbor::to_vec(file), 103 | Element::Frame(frame) => minicbor::to_vec(frame), 104 | } 105 | .unwrap() 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/file.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, num::NonZeroU16}; 2 | 3 | use minicbor::{Decode, Encode}; 4 | 5 | use super::{ 6 | posix_owner::PosixOwner, 7 | specials::SpecialFile, 8 | strings::{AttributeValue, Pathname}, 9 | timestamps::Timestamps, 10 | }; 11 | use crate::integrity::Digest; 12 | 13 | /// Zarc Directory File Entry 14 | /// 15 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#kind-2-files) 16 | #[derive(Clone, Debug, PartialEq, Encode, Decode)] 17 | #[cbor(map)] 18 | pub struct File { 19 | /// Edition that added this entry. 20 | #[n(0)] 21 | pub edition: NonZeroU16, 22 | 23 | /// Pathname. 24 | #[n(1)] 25 | pub name: Pathname, 26 | 27 | /// Hash of a frame of content. 28 | #[n(2)] 29 | pub digest: Option, 30 | 31 | /// POSIX mode. 32 | #[n(3)] 33 | pub mode: Option, 34 | 35 | /// POSIX user. 36 | #[n(4)] 37 | pub user: Option, 38 | 39 | /// POSIX group. 40 | #[n(5)] 41 | pub group: Option, 42 | 43 | /// Timestamps. 44 | #[n(6)] 45 | pub timestamps: Option, 46 | 47 | /// Special files. 48 | #[n(7)] 49 | pub special: Option, 50 | 51 | /// User metadata. 52 | #[n(10)] 53 | pub user_metadata: Option>, 54 | 55 | /// File attributes. 56 | #[n(11)] 57 | pub attributes: Option>, 58 | 59 | /// Extended attributes. 60 | #[n(12)] 61 | pub extended_attributes: Option>, 62 | } 63 | 64 | impl File { 65 | /// Returns `true` if this is _not_ a special file _and_ it has a frame. 66 | pub fn is_normal(&self) -> bool { 67 | self.digest.is_some() && self.special.is_none() 68 | } 69 | 70 | /// Returns `true` if this is a directory. 71 | /// 72 | /// See also [`SpecialFile::is_dir`]. 73 | pub fn is_dir(&self) -> bool { 74 | self.special.as_ref().map_or(false, SpecialFile::is_dir) 75 | } 76 | 77 | /// Returns `true` if this is a link. 78 | /// 79 | /// See also [`SpecialFile::is_link`]. 80 | pub fn is_link(&self) -> bool { 81 | self.special.as_ref().map_or(false, SpecialFile::is_link) 82 | } 83 | 84 | /// Returns `true` if this is a symlink. 85 | /// 86 | /// See also [`SpecialFile::is_symlink`]. 87 | pub fn is_symlink(&self) -> bool { 88 | self.special.as_ref().map_or(false, SpecialFile::is_symlink) 89 | } 90 | 91 | /// Returns `true` if this is a hardlink. 92 | /// 93 | /// See also [`SpecialFile::is_hardlink`]. 94 | pub fn is_hardlink(&self) -> bool { 95 | self.special 96 | .as_ref() 97 | .map_or(false, SpecialFile::is_hardlink) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/frame.rs: -------------------------------------------------------------------------------- 1 | use std::num::NonZeroU16; 2 | 3 | use minicbor::{Decode, Encode}; 4 | 5 | use crate::integrity::Digest; 6 | 7 | /// Zarc Directory Frame Entry 8 | /// 9 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#kind-3-frames) 10 | #[derive(Clone, Debug, PartialEq, Encode, Decode)] 11 | #[cbor(map)] 12 | pub struct Frame { 13 | /// Edition which added this frame. 14 | #[n(0)] 15 | pub edition: NonZeroU16, 16 | 17 | /// Frame offset. 18 | #[n(1)] 19 | pub offset: u64, 20 | 21 | /// Hash of the frame. 22 | #[n(2)] 23 | pub digest: Digest, 24 | 25 | /// Entire frame length in bytes. 26 | #[n(3)] 27 | pub length: u64, 28 | 29 | /// Uncompressed content size in bytes. 30 | #[n(4)] 31 | pub uncompressed: u64, 32 | } 33 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/posix_owner.rs: -------------------------------------------------------------------------------- 1 | #[cfg(unix)] 2 | use std::sync::Mutex; 3 | 4 | #[cfg(unix)] 5 | use crate::owner_cache::OwnerCache; 6 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder}; 7 | #[cfg(unix)] 8 | use nix::unistd::{Gid, Group, Uid, User}; 9 | 10 | #[cfg(unix)] 11 | thread_local! { 12 | static OWNER_CACHE: Mutex = Mutex::new(OwnerCache::default()); 13 | } 14 | 15 | /// POSIX owner information (user or group). 16 | #[derive(Clone, Debug, Default, PartialEq)] 17 | pub struct PosixOwner { 18 | /// Owner numeric ID. 19 | pub id: Option, 20 | 21 | /// Owner name. 22 | pub name: Option, 23 | } 24 | 25 | impl PosixOwner { 26 | /// Create from a user ID. 27 | /// 28 | /// On non-Unix, this always succeeds and returns a `PosixOwner` with the ID set only. 29 | /// 30 | /// On Unix, this resolves the user from the system and returns a `PosixOwner` with both the ID 31 | /// ID and the username set, iff the user exists. 32 | pub fn from_uid(uid: u32) -> std::io::Result> { 33 | #[cfg(unix)] 34 | { 35 | OWNER_CACHE 36 | .with(|oc| { 37 | oc.lock() 38 | .expect("owner cache poisoned") 39 | .user_from_uid(Uid::from_raw(uid)) 40 | }) 41 | .map(|u| u.map(Into::into)) 42 | } 43 | 44 | #[cfg(not(unix))] 45 | { 46 | Ok(Some(Self { 47 | id: Some(uid as _), 48 | name: None, 49 | })) 50 | } 51 | } 52 | 53 | /// Create from a group ID. 54 | /// 55 | /// On non-Unix, this always succeeds and returns a `PosixOwner` with the ID set only. 56 | /// 57 | /// On Unix, this resolves the group from the system and returns a `PosixOwner` with both the ID 58 | /// and the group name set, iff the group exists. 59 | pub fn from_gid(gid: u32) -> std::io::Result> { 60 | #[cfg(unix)] 61 | { 62 | OWNER_CACHE 63 | .with(|oc| { 64 | oc.lock() 65 | .expect("owner cache poisoned") 66 | .group_from_gid(Gid::from_raw(gid)) 67 | }) 68 | .map(|u| u.map(Into::into)) 69 | } 70 | 71 | #[cfg(not(unix))] 72 | { 73 | Ok(Some(Self { 74 | id: Some(gid as _), 75 | name: None, 76 | })) 77 | } 78 | } 79 | 80 | /// Convert to a user ID valid on the current system. 81 | /// 82 | /// - If only `id` is present, this checks and returns it. 83 | /// - If only `name` is present, this resolves the user from the system and returns its ID if it exists. 84 | /// - If both are present, and: 85 | /// - `id` matches the resolved ID from the name, this returns `id`. 86 | /// - `id` does not match the resolved ID from the name, this returns the ID of the resolved user. 87 | /// - `name` does not resolve to a user on the system, this returns `id`. 88 | /// 89 | /// Additionally if the `id` is larger than a u32, this returns an error. 90 | #[cfg(unix)] 91 | pub fn to_real_uid(&self) -> std::io::Result> { 92 | match self { 93 | Self { 94 | id: None, 95 | name: None, 96 | } => Ok(None), 97 | 98 | Self { 99 | id: Some(id), 100 | name: None, 101 | } => u32::try_from(*id) 102 | .map_err(std::io::Error::other) 103 | .and_then(|uid| { 104 | OWNER_CACHE.with(|oc| { 105 | oc.lock() 106 | .expect("owner cache poisoned") 107 | .user_from_uid(Uid::from_raw(uid)) 108 | }) 109 | }) 110 | .map(|u| u.map(|u| u.uid)), 111 | 112 | Self { 113 | id: None, 114 | name: Some(name), 115 | } => OWNER_CACHE 116 | .with(|oc| { 117 | oc.lock() 118 | .expect("owner cache poisoned") 119 | .user_from_name(name) 120 | }) 121 | .map(|u| u.map(|u| u.uid)), 122 | 123 | Self { 124 | id: Some(id), 125 | name: Some(name), 126 | } => { 127 | let id = u32::try_from(*id).map_err(std::io::Error::other)?; 128 | 129 | if let Some(user) = OWNER_CACHE.with(|oc| { 130 | oc.lock() 131 | .expect("owner cache poisoned") 132 | .user_from_name(name) 133 | })? { 134 | Ok(Some(user.uid)) 135 | } else { 136 | Ok(Some(Uid::from_raw(id))) 137 | } 138 | } 139 | } 140 | } 141 | 142 | /// Convert to a group ID valid on the current system. 143 | /// 144 | /// - If only `id` is present, this checks and returns it. 145 | /// - If only `name` is present, this resolves the group from the system and returns its ID if it exists. 146 | /// - If both are present, and: 147 | /// - `id` matches the resolved ID from the name, this returns `id`. 148 | /// - `id` does not match the resolved ID from the name, this returns the ID of the resolved group. 149 | /// - `name` does not resolve to a group on the system, this returns `id`. 150 | /// 151 | /// Additionally if the `id` is larger than a u32, this returns an error. 152 | #[cfg(unix)] 153 | pub fn to_real_gid(&self) -> std::io::Result> { 154 | match self { 155 | Self { 156 | id: None, 157 | name: None, 158 | } => Ok(None), 159 | 160 | Self { 161 | id: Some(id), 162 | name: None, 163 | } => u32::try_from(*id) 164 | .map_err(std::io::Error::other) 165 | .and_then(|gid| { 166 | OWNER_CACHE.with(|oc| { 167 | oc.lock() 168 | .expect("owner cache poisoned") 169 | .group_from_gid(Gid::from_raw(gid)) 170 | }) 171 | }) 172 | .map(|u| u.map(|u| u.gid)), 173 | 174 | Self { 175 | id: None, 176 | name: Some(name), 177 | } => OWNER_CACHE 178 | .with(|oc| { 179 | oc.lock() 180 | .expect("owner cache poisoned") 181 | .group_from_name(name) 182 | }) 183 | .map(|u| u.map(|u| u.gid)), 184 | 185 | Self { 186 | id: Some(id), 187 | name: Some(name), 188 | } => { 189 | let id = u32::try_from(*id).map_err(std::io::Error::other)?; 190 | 191 | if let Some(group) = OWNER_CACHE.with(|oc| { 192 | oc.lock() 193 | .expect("owner cache poisoned") 194 | .group_from_name(name) 195 | })? { 196 | Ok(Some(group.gid)) 197 | } else { 198 | Ok(Some(Gid::from_raw(id))) 199 | } 200 | } 201 | } 202 | } 203 | } 204 | 205 | #[cfg(unix)] 206 | impl From for PosixOwner { 207 | fn from(user: User) -> Self { 208 | Self { 209 | id: Some(user.uid.as_raw() as _), 210 | name: Some(user.name), 211 | } 212 | } 213 | } 214 | 215 | #[cfg(unix)] 216 | impl From for PosixOwner { 217 | fn from(group: Group) -> Self { 218 | Self { 219 | id: Some(group.gid.as_raw() as _), 220 | name: Some(group.name), 221 | } 222 | } 223 | } 224 | 225 | impl Encode for PosixOwner { 226 | fn encode( 227 | &self, 228 | e: &mut Encoder, 229 | _ctx: &mut C, 230 | ) -> Result<(), minicbor::encode::Error> { 231 | e.array(match (self.id.is_some(), self.name.is_some()) { 232 | (true, true) => 2, 233 | (true, false) | (false, true) => 1, 234 | (false, false) => 0, 235 | })?; 236 | 237 | if let Some(id) = &self.id { 238 | e.u64(*id)?; 239 | } 240 | 241 | if let Some(name) = &self.name { 242 | e.encode(name)?; 243 | } 244 | 245 | Ok(()) 246 | } 247 | } 248 | 249 | impl<'b, C> Decode<'b, C> for PosixOwner { 250 | fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result { 251 | let mut id = None; 252 | let mut name = None; 253 | 254 | let max = d.array()?.unwrap_or(u64::MAX); 255 | for _ in 0..max { 256 | match d.datatype()? { 257 | Type::Break => break, 258 | Type::U8 => { 259 | id = Some(d.u8()? as _); 260 | } 261 | Type::U16 => { 262 | id = Some(d.u16()? as _); 263 | } 264 | Type::U32 => { 265 | id = Some(d.u32()? as _); 266 | } 267 | Type::U64 => { 268 | id = Some(d.u64()?); 269 | } 270 | Type::String | Type::StringIndef => { 271 | name = Some(d.decode()?); 272 | } 273 | ty => return Err(minicbor::decode::Error::type_mismatch(ty)), 274 | } 275 | } 276 | 277 | Ok(Self { id, name }) 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/specials.rs: -------------------------------------------------------------------------------- 1 | use std::path::{Component, Path}; 2 | 3 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder}; 4 | 5 | use super::strings::{CborString, Pathname}; 6 | 7 | /// Special File metadata. 8 | /// 9 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#30-special-file-types) 10 | #[derive(Clone, Debug, Default, PartialEq, Encode, Decode)] 11 | #[cbor(array)] 12 | pub struct SpecialFile { 13 | /// Kind of special file. 14 | /// 15 | /// Will be `None` for unknown kinds. 16 | #[n(0)] 17 | pub kind: Option, 18 | 19 | /// Link target. 20 | #[n(1)] 21 | pub link_target: Option, 22 | } 23 | 24 | impl SpecialFile { 25 | /// Returns `true` if this is a directory. 26 | /// 27 | /// See also [`SpecialFileKind::is_dir`]. 28 | pub fn is_dir(&self) -> bool { 29 | self.kind.map_or(false, SpecialFileKind::is_dir) 30 | } 31 | 32 | /// Returns `true` if this is a link. 33 | /// 34 | /// See also [`SpecialFileKind::is_link`]. 35 | pub fn is_link(&self) -> bool { 36 | self.kind.map_or(false, SpecialFileKind::is_link) 37 | } 38 | 39 | /// Returns `true` if this is a symlink. 40 | /// 41 | /// See also [`SpecialFileKind::is_symlink`]. 42 | pub fn is_symlink(&self) -> bool { 43 | self.kind.map_or(false, SpecialFileKind::is_symlink) 44 | } 45 | 46 | /// Returns `true` if this is a hardlink. 47 | /// 48 | /// See also [`SpecialFileKind::is_hardlink`]. 49 | pub fn is_hardlink(&self) -> bool { 50 | self.kind.map_or(false, SpecialFileKind::is_hardlink) 51 | } 52 | } 53 | 54 | /// Special File kinds. 55 | /// 56 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#30-special-file-types) 57 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Encode, Decode)] 58 | #[cbor(index_only)] 59 | pub enum SpecialFileKind { 60 | /// Directory. 61 | /// 62 | /// To encode metadata/attributes against a directory. 63 | #[n(1)] 64 | Directory = 1, 65 | 66 | /// A symlink. 67 | /// 68 | /// Some kind of symlink, but without specifying what exactly it is. 69 | #[n(10)] 70 | Symlink = 10, 71 | 72 | /// Internal symbolic link. 73 | /// 74 | /// Must point to a file that exists within this Zarc. 75 | #[n(11)] 76 | InternalSymlink = 11, 77 | 78 | /// External absolute symbolic link. 79 | #[n(12)] 80 | ExternalAbsoluteSymlink = 12, 81 | 82 | /// External relative symbolic link. 83 | #[n(13)] 84 | ExternalRelativeSymlink = 13, 85 | 86 | /// A hardlink. 87 | /// 88 | /// Some kind of hardlink, but without specifying what exactly it is. 89 | #[n(20)] 90 | Hardlink = 20, 91 | 92 | /// Internal hardlink. 93 | /// 94 | /// Must point to a file that exists within this Zarc. 95 | #[n(21)] 96 | InternalHardlink = 21, 97 | 98 | /// External hardlink. 99 | #[n(22)] 100 | ExternalHardlink = 22, 101 | } 102 | 103 | impl SpecialFileKind { 104 | /// Returns `true` if this is a directory. 105 | pub fn is_dir(self) -> bool { 106 | matches!(self, Self::Directory) 107 | } 108 | 109 | /// Returns `true` if this is a link. 110 | /// 111 | /// This covers all the symlink and hardlink variants. 112 | pub fn is_link(self) -> bool { 113 | self.is_symlink() || self.is_hardlink() 114 | } 115 | 116 | /// Returns `true` if this is a symlink. 117 | /// 118 | /// This covers all the symlink variants. 119 | pub fn is_symlink(self) -> bool { 120 | matches!( 121 | self, 122 | Self::Symlink 123 | | Self::InternalSymlink 124 | | Self::ExternalAbsoluteSymlink 125 | | Self::ExternalRelativeSymlink 126 | ) 127 | } 128 | 129 | /// Returns `true` if this is a hardlink. 130 | /// 131 | /// This covers all the hardlink variants. 132 | pub fn is_hardlink(self) -> bool { 133 | matches!( 134 | self, 135 | Self::Hardlink | Self::InternalHardlink | Self::ExternalHardlink 136 | ) 137 | } 138 | } 139 | 140 | /// Target of link (for [`SpecialFile`]) 141 | /// 142 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#30-special-file-types) 143 | #[derive(Clone, Debug, PartialEq)] 144 | pub enum LinkTarget { 145 | /// Target as full pathname. 146 | FullPath(CborString), 147 | 148 | /// Target as array of path components. 149 | Components(Vec), 150 | } 151 | 152 | impl From for LinkTarget { 153 | fn from(pathname: Pathname) -> Self { 154 | Self::Components(pathname.0) 155 | } 156 | } 157 | 158 | impl From<&Path> for LinkTarget { 159 | fn from(path: &Path) -> Self { 160 | if path.is_absolute() 161 | || path 162 | .components() 163 | .any(|c| !matches!(c, Component::Normal(_))) 164 | { 165 | Self::FullPath(CborString::from(path.as_os_str())) 166 | } else { 167 | Self::from(Pathname::from_normal_components(path)) 168 | } 169 | } 170 | } 171 | 172 | impl Encode for LinkTarget { 173 | fn encode( 174 | &self, 175 | e: &mut Encoder, 176 | ctx: &mut C, 177 | ) -> Result<(), minicbor::encode::Error> { 178 | match self { 179 | Self::FullPath(s) => s.encode(e, ctx), 180 | Self::Components(v) => { 181 | e.array(v.len().try_into().expect("path way too long"))?; 182 | for s in v { 183 | s.encode(e, ctx)?; 184 | } 185 | Ok(()) 186 | } 187 | } 188 | } 189 | } 190 | 191 | impl<'b, C> Decode<'b, C> for LinkTarget { 192 | fn decode(d: &mut Decoder<'b>, ctx: &mut C) -> Result { 193 | match d.datatype()? { 194 | Type::Array => todo!(), 195 | Type::ArrayIndef => todo!(), 196 | _ => CborString::decode(d, ctx).map(Self::FullPath), 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/strings.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | ffi::OsStr, 3 | path::{Component, Path, PathBuf}, 4 | }; 5 | 6 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder}; 7 | 8 | /// Pathname as components. 9 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Encode, Decode)] 10 | #[cbor(transparent)] 11 | pub struct Pathname( 12 | /// Components of the path. 13 | #[n(0)] // but unused because of transparent 14 | pub Vec, 15 | // double space is from rustfmt: https://github.com/rust-lang/rustfmt/issues/5997 16 | ); 17 | 18 | impl Pathname { 19 | /// Converts a Path, ignoring all non-normal components. 20 | pub fn from_normal_components(path: &Path) -> Self { 21 | Self( 22 | path.components() 23 | .filter_map(|c| { 24 | if let Component::Normal(comp) = c { 25 | Some(CborString::from(comp)) 26 | } else { 27 | None 28 | } 29 | }) 30 | .collect(), 31 | ) 32 | } 33 | 34 | /// Converts to a (platform-specific) Path. 35 | pub fn to_path(&self) -> PathBuf { 36 | let mut path = PathBuf::new(); 37 | for comp in &self.0 { 38 | match comp { 39 | CborString::Text(text) => { 40 | path.push(text); 41 | } 42 | CborString::Binary(bytes) => { 43 | #[cfg(unix)] 44 | { 45 | use std::os::unix::ffi::OsStrExt; 46 | path.push(OsStr::from_bytes(bytes)); 47 | } 48 | #[cfg(not(unix))] 49 | { 50 | path.push(String::from_utf8_lossy(bytes).to_string()); 51 | } 52 | } 53 | } 54 | } 55 | 56 | path 57 | } 58 | } 59 | 60 | /// CBOR Text or Byte string. 61 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 62 | pub enum CborString { 63 | /// UTF-8 text string value. 64 | Text(String), 65 | 66 | /// Non-unicode byte string value. 67 | Binary(Vec), 68 | } 69 | 70 | impl CborString { 71 | /// Convert from bytes that might be UTF-8. 72 | pub fn from_maybe_utf8(bytes: Vec) -> Self { 73 | match String::from_utf8(bytes) { 74 | Ok(string) => Self::Text(string), 75 | Err(err) => Self::Binary(err.into_bytes()), 76 | } 77 | } 78 | } 79 | 80 | impl From<&OsStr> for CborString { 81 | fn from(string: &OsStr) -> Self { 82 | if let Some(unicode) = string.to_str() { 83 | Self::Text(unicode.into()) 84 | } else { 85 | #[cfg(unix)] 86 | { 87 | use std::os::unix::ffi::OsStrExt; 88 | Self::Binary(string.as_bytes().into()) 89 | } 90 | #[cfg(windows)] 91 | { 92 | use std::os::windows::ffi::OsStrExt; 93 | Self::Text(String::from_utf16_lossy( 94 | &string.encode_wide().collect::>(), 95 | )) 96 | } 97 | } 98 | } 99 | } 100 | 101 | impl From<&str> for CborString { 102 | fn from(string: &str) -> Self { 103 | Self::Text(string.into()) 104 | } 105 | } 106 | 107 | impl From for CborString { 108 | fn from(string: String) -> Self { 109 | Self::Text(string) 110 | } 111 | } 112 | 113 | impl Encode for CborString { 114 | fn encode( 115 | &self, 116 | e: &mut Encoder, 117 | ctx: &mut C, 118 | ) -> Result<(), minicbor::encode::Error> { 119 | match self { 120 | Self::Text(s) => s.encode(e, ctx), 121 | Self::Binary(b) => <&minicbor::bytes::ByteSlice>::from(b.as_slice()).encode(e, ctx), 122 | } 123 | } 124 | } 125 | 126 | impl<'b, C> Decode<'b, C> for CborString { 127 | fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result { 128 | match d.datatype()? { 129 | Type::String => d.str().map(|s| Self::Text(s.into())), 130 | Type::StringIndef => Ok(Self::Text(d.str_iter()?.try_fold( 131 | String::new(), 132 | |mut string, s| { 133 | s.map(|s| { 134 | string.push_str(s); 135 | string 136 | }) 137 | }, 138 | )?)), 139 | Type::Bytes => d.bytes().map(|b| Self::Binary(b.into())), 140 | Type::BytesIndef => Ok(Self::Binary(d.bytes_iter()?.try_fold( 141 | Vec::new(), 142 | |mut vec, b| { 143 | b.map(|b| { 144 | vec.extend(b); 145 | vec 146 | }) 147 | }, 148 | )?)), 149 | ty => Err(minicbor::decode::Error::type_mismatch(ty)), 150 | } 151 | } 152 | } 153 | 154 | /// Attributes can be booleans or text or byte strings. 155 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 156 | pub enum AttributeValue { 157 | /// A boolean. 158 | Boolean(bool), 159 | 160 | /// A string. 161 | String(CborString), 162 | } 163 | 164 | impl AttributeValue { 165 | /// Get the value as a bool if it is one. 166 | pub fn as_bool(&self) -> Option { 167 | match self { 168 | Self::Boolean(b) => Some(*b), 169 | _ => None, 170 | } 171 | } 172 | } 173 | 174 | impl From for AttributeValue { 175 | fn from(b: bool) -> Self { 176 | Self::Boolean(b) 177 | } 178 | } 179 | 180 | impl From for AttributeValue 181 | where 182 | T: Into, 183 | { 184 | fn from(string: T) -> Self { 185 | Self::String(string.into()) 186 | } 187 | } 188 | 189 | impl Encode for AttributeValue { 190 | fn encode( 191 | &self, 192 | e: &mut Encoder, 193 | ctx: &mut C, 194 | ) -> Result<(), minicbor::encode::Error> { 195 | match self { 196 | Self::Boolean(b) => b.encode(e, ctx), 197 | Self::String(s) => s.encode(e, ctx), 198 | } 199 | } 200 | } 201 | 202 | impl<'b, C> Decode<'b, C> for AttributeValue { 203 | fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result { 204 | match d.datatype()? { 205 | Type::String | Type::StringIndef | Type::Bytes | Type::BytesIndef => { 206 | d.decode().map(Self::String) 207 | } 208 | Type::Bool => d.decode().map(Self::Boolean), 209 | ty => Err(minicbor::decode::Error::type_mismatch(ty)), 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /crates/zarc/src/directory/timestamps.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, time::SystemTime}; 2 | 3 | use chrono::{DateTime, Utc}; 4 | use minicbor::{ 5 | data::{Tag, Type}, 6 | Decode, Decoder, Encode, Encoder, 7 | }; 8 | 9 | /// Directory Filemap Entry Timestamps. 10 | #[derive(Clone, Debug, Default, PartialEq, Encode, Decode)] 11 | #[cbor(map)] 12 | pub struct Timestamps { 13 | /// Creation time (birth time). 14 | #[n(1)] 15 | pub created: Option, 16 | 17 | /// Modification time (mtime). 18 | #[n(2)] 19 | pub modified: Option, 20 | 21 | /// Access time (atime). 22 | #[n(3)] 23 | pub accessed: Option, 24 | } 25 | 26 | /// A timestamp. 27 | /// 28 | /// Internally this is a [`chrono`] type, and always encodes to an RFC3339 tagged text string. 29 | /// However for flexibility it can decode from a CBOR epoch-based timestamp as well. 30 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 31 | pub struct Timestamp(pub DateTime); 32 | 33 | impl Timestamp { 34 | /// The current date and time. 35 | pub fn now() -> Self { 36 | Self(Utc::now()) 37 | } 38 | } 39 | 40 | impl From for Timestamp { 41 | fn from(st: SystemTime) -> Self { 42 | Self(st.into()) 43 | } 44 | } 45 | 46 | impl From for SystemTime { 47 | fn from(ts: Timestamp) -> Self { 48 | ts.0.into() 49 | } 50 | } 51 | 52 | impl From> for Timestamp { 53 | fn from(dt: DateTime) -> Self { 54 | Self(dt) 55 | } 56 | } 57 | 58 | impl From for DateTime { 59 | fn from(ts: Timestamp) -> Self { 60 | ts.0 61 | } 62 | } 63 | 64 | impl fmt::Display for Timestamp { 65 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 66 | write!(f, "{}", self.0) 67 | } 68 | } 69 | 70 | impl Encode for Timestamp { 71 | fn encode( 72 | &self, 73 | e: &mut Encoder, 74 | _ctx: &mut C, 75 | ) -> Result<(), minicbor::encode::Error> { 76 | e.tag(Tag::DateTime)?.str(&self.0.to_rfc3339()).map(drop) 77 | } 78 | } 79 | 80 | impl<'b, C> Decode<'b, C> for Timestamp { 81 | fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result { 82 | let p = d.position(); 83 | match d.tag()? { 84 | Tag::DateTime => Ok(Self( 85 | DateTime::parse_from_rfc3339(d.str()?) 86 | .map_err(|err| minicbor::decode::Error::message(err.to_string()).at(p))? 87 | .into(), 88 | )), 89 | Tag::Timestamp => match d.datatype()? { 90 | Type::U32 => DateTime::::from_timestamp(i64::from(d.u32()?), 0), 91 | Type::U64 => DateTime::::from_timestamp( 92 | i64::try_from(d.u64()?).map_err(|err| { 93 | minicbor::decode::Error::message(format!("timestamp out of range: {err}")) 94 | .at(p) 95 | })?, 96 | 0, 97 | ), 98 | Type::I32 => DateTime::::from_timestamp(i64::from(d.i32()?), 0), 99 | Type::I64 => DateTime::::from_timestamp(d.i64()?, 0), 100 | Type::Int => DateTime::::from_timestamp( 101 | i64::try_from(d.int()?).map_err(|err| { 102 | minicbor::decode::Error::message(format!("timestamp out of range: {err}")) 103 | .at(p) 104 | })?, 105 | 0, 106 | ), 107 | Type::F32 => { 108 | let f = d.f32()?; 109 | DateTime::::from_timestamp(f.trunc() as _, (f.fract() * 1.0e9) as _) 110 | } 111 | Type::F64 => { 112 | let f = d.f64()?; 113 | DateTime::::from_timestamp(f.trunc() as _, (f.fract() * 1.0e9) as _) 114 | } 115 | ty => return Err(minicbor::decode::Error::type_mismatch(ty)), 116 | } 117 | .ok_or_else(|| minicbor::decode::Error::message("timestamp out of range").at(p)) 118 | .map(Self), 119 | other => Err(minicbor::decode::Error::message(format!( 120 | "expected Timestamp or DateTime tag, got {other:?}" 121 | )) 122 | .at(p)), 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /crates/zarc/src/encode.rs: -------------------------------------------------------------------------------- 1 | //! Encoder types and functions. 2 | 3 | use std::{ 4 | collections::{BTreeMap, HashMap}, 5 | fmt, 6 | io::{Error, Result, Write}, 7 | num::NonZeroU16, 8 | }; 9 | 10 | use tracing::{instrument, trace}; 11 | use zstd_safe::CCtx; 12 | pub use zstd_safe::{CParameter as ZstdParameter, Strategy as ZstdStrategy}; 13 | 14 | use crate::{ 15 | directory::{File, Frame, Pathname}, 16 | header::FILE_MAGIC, 17 | integrity::Digest, 18 | map_zstd_error, 19 | }; 20 | 21 | mod add_file; 22 | mod content_frame; 23 | mod directory; 24 | mod lowlevel_frames; 25 | 26 | /// Zarc encoder context. 27 | pub struct Encoder<'writer, W: Write> { 28 | writer: &'writer mut W, 29 | zstd: CCtx<'writer>, 30 | edition: NonZeroU16, 31 | files: Vec>, 32 | frames: HashMap, 33 | files_by_name: BTreeMap>, 34 | files_by_digest: HashMap>, 35 | offset: usize, 36 | compress: bool, 37 | } 38 | 39 | impl fmt::Debug for Encoder<'_, W> { 40 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 41 | f.debug_struct("Encoder") 42 | .field("writer", &self.writer) 43 | .field("zstd", &"zstd-safe compression context") 44 | .field("edition", &self.edition) 45 | .field("files", &self.files) 46 | .field("frames", &self.frames) 47 | .field("files_by_name", &self.files_by_name) 48 | .field("files_by_digest", &self.files_by_digest) 49 | .field("offset", &self.offset) 50 | .field("compress", &self.compress) 51 | .finish() 52 | } 53 | } 54 | 55 | impl<'writer, W: Write> Encoder<'writer, W> { 56 | /// Create a new encoder and write the header. 57 | #[instrument(level = "trace", skip(writer))] 58 | pub fn new(writer: &'writer mut W) -> Result { 59 | trace!("create zstd context"); 60 | let mut zstd = 61 | CCtx::try_create().ok_or_else(|| Error::other("failed allocating zstd context"))?; 62 | zstd.init(0).map_err(map_zstd_error)?; 63 | 64 | trace!("write zarc magic"); 65 | let offset = writer.write(&FILE_MAGIC)?; 66 | 67 | Ok(Self { 68 | writer, 69 | zstd, 70 | edition: unsafe { NonZeroU16::new_unchecked(1) }, 71 | files: Vec::new(), 72 | frames: HashMap::new(), 73 | files_by_name: BTreeMap::new(), 74 | files_by_digest: HashMap::new(), 75 | offset, 76 | compress: true, 77 | }) 78 | } 79 | 80 | /// Set a zstd parameter. 81 | /// 82 | /// This will apply to future data frames. 83 | #[instrument(level = "trace", skip(self))] 84 | pub fn set_zstd_parameter(&mut self, parameter: ZstdParameter) -> Result<()> { 85 | self.zstd 86 | .set_parameter(parameter) 87 | .map_err(map_zstd_error) 88 | .map(drop) 89 | } 90 | 91 | /// Enable or disable compression. 92 | /// 93 | /// This well apply to future data frames. 94 | #[instrument(level = "trace", skip(self))] 95 | pub fn enable_compression(&mut self, compress: bool) { 96 | self.compress = compress; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /crates/zarc/src/encode/add_file.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::{Error, Result, Write}, 3 | path::Path, 4 | }; 5 | 6 | use tracing::{instrument, trace}; 7 | 8 | use crate::{ 9 | directory::{ 10 | AttributeValue, File, Pathname, PosixOwner, SpecialFile, SpecialFileKind, Timestamp, 11 | Timestamps, 12 | }, 13 | integrity::Digest, 14 | metadata::encode::build_filemap, 15 | }; 16 | 17 | use super::Encoder; 18 | 19 | impl<'writer, W: Write> Encoder<'writer, W> { 20 | /// Add a file entry. 21 | #[instrument(level = "trace", skip(self))] 22 | pub fn add_file_entry(&mut self, entry: impl Into + std::fmt::Debug) -> Result<()> { 23 | let entry = entry.into(); 24 | 25 | if let Some(hash) = &entry.digest { 26 | if !self.frames.contains_key(hash) { 27 | return Err(Error::other( 28 | "cannot add file entry referencing unknown data frame", 29 | )); 30 | } 31 | } 32 | 33 | let name = entry.name.clone(); 34 | let digest = entry.digest.clone(); 35 | 36 | self.files.push(Some(entry)); 37 | let index = self.files.len() - 1; 38 | trace!(index, "added file entry"); 39 | 40 | self.files_by_name.entry(name).or_default().push(index); 41 | if let Some(digest) = digest { 42 | self.files_by_digest.entry(digest).or_default().push(index); 43 | } 44 | 45 | Ok(()) 46 | } 47 | 48 | /// Get a builder for a file entry. 49 | /// 50 | /// Don't forget to set the digest to the content frame! 51 | #[instrument(level = "trace", skip(self))] 52 | pub fn build_file(&self, name: impl Into + std::fmt::Debug) -> FileBuilder { 53 | FileBuilder(File { 54 | edition: self.edition, 55 | name: name.into(), 56 | digest: Default::default(), 57 | mode: Default::default(), 58 | user: Default::default(), 59 | group: Default::default(), 60 | timestamps: Default::default(), 61 | special: Default::default(), 62 | user_metadata: Default::default(), 63 | attributes: Default::default(), 64 | extended_attributes: Default::default(), 65 | }) 66 | } 67 | 68 | /// Start building a file from an existing file. 69 | /// 70 | /// This will read the metadata of a file on the filesystem and return a [`FileBuilder`] to add 71 | /// or change metadata before adding it to the encoder. 72 | /// 73 | /// Don't forget to set the digest to the content frame! 74 | #[instrument(level = "trace", skip(self))] 75 | pub fn build_file_with_metadata( 76 | &self, 77 | path: impl AsRef + std::fmt::Debug, 78 | follow_symlinks: bool, 79 | ) -> std::io::Result { 80 | let path = path.as_ref(); 81 | build_filemap(self.edition, path, follow_symlinks).map(FileBuilder) 82 | } 83 | } 84 | 85 | /// Builder for a file entry. 86 | /// 87 | /// Create with [`Encoder::build_file()`], then insert into the Encoder with 88 | /// [`Encoder::add_file_entry()`]. 89 | #[derive(Clone, Debug)] 90 | pub struct FileBuilder(pub File); 91 | 92 | // TODO: symlinks and hardlinks 93 | 94 | impl FileBuilder { 95 | /// Set the digest of a content frame. 96 | /// 97 | /// This doesn't check that the digest is valid or that the content frame exists, but that will 98 | /// be checked later when the file is added to the encoder. 99 | pub fn digest(&mut self, digest: impl Into) -> &mut Self { 100 | self.0.digest = Some(digest.into()); 101 | self 102 | } 103 | 104 | /// Make this a directory. 105 | /// 106 | /// This will clear the digest if it was set. 107 | pub fn directory(&mut self) -> &mut Self { 108 | self.0.digest = None; 109 | self.0.special = Some(SpecialFile { 110 | kind: Some(SpecialFileKind::Directory), 111 | ..Default::default() 112 | }); 113 | self 114 | } 115 | 116 | /// Set the POSIX mode of the file. 117 | /// 118 | /// This does the same thing regardless of platform, so it can be used to set the mode of files 119 | /// even when running on Windows if the desired value is known. 120 | pub fn mode(&mut self, mode: u32) -> &mut Self { 121 | self.0.mode = Some(mode); 122 | self 123 | } 124 | 125 | /// Set the user that owns the file by name. 126 | pub fn user_name(&mut self, username: &str) -> &mut Self { 127 | let name = username.to_string(); 128 | if let Some(user) = self.0.user.as_mut() { 129 | user.name = Some(name); 130 | } else { 131 | self.0.user = Some(PosixOwner { 132 | name: Some(name), 133 | ..Default::default() 134 | }) 135 | } 136 | self 137 | } 138 | 139 | /// Set the user that owns the file by ID. 140 | pub fn user_id(&mut self, id: u64) -> &mut Self { 141 | if let Some(user) = self.0.user.as_mut() { 142 | user.id = Some(id); 143 | } else { 144 | self.0.user = Some(PosixOwner { 145 | id: Some(id), 146 | ..Default::default() 147 | }) 148 | } 149 | self 150 | } 151 | 152 | /// Set the group that owns the file by name. 153 | pub fn group_name(&mut self, groupname: &str) -> &mut Self { 154 | let name = groupname.to_string(); 155 | if let Some(group) = self.0.group.as_mut() { 156 | group.name = Some(name); 157 | } else { 158 | self.0.group = Some(PosixOwner { 159 | name: Some(name), 160 | ..Default::default() 161 | }) 162 | } 163 | self 164 | } 165 | 166 | /// Set the group that owns the file by ID. 167 | pub fn group_id(&mut self, id: u64) -> &mut Self { 168 | if let Some(group) = self.0.group.as_mut() { 169 | group.id = Some(id); 170 | } else { 171 | self.0.group = Some(PosixOwner { 172 | id: Some(id), 173 | ..Default::default() 174 | }) 175 | } 176 | self 177 | } 178 | 179 | /// Set the timestamps of the file. 180 | pub fn timestamps(&mut self, timestamps: impl Into) -> &mut Self { 181 | self.0.timestamps = Some(timestamps.into()); 182 | self 183 | } 184 | 185 | /// Set the accessed timestamp of the file. 186 | pub fn time_accessed(&mut self, timestamps: impl Into) -> &mut Self { 187 | if let Some(ts) = self.0.timestamps.as_mut() { 188 | ts.accessed = Some(timestamps.into()); 189 | } else { 190 | self.0.timestamps = Some(Timestamps { 191 | accessed: Some(timestamps.into()), 192 | ..Default::default() 193 | }) 194 | } 195 | self 196 | } 197 | 198 | /// Set the modified timestamp of the file. 199 | pub fn time_modified(&mut self, timestamps: impl Into) -> &mut Self { 200 | if let Some(ts) = self.0.timestamps.as_mut() { 201 | ts.modified = Some(timestamps.into()); 202 | } else { 203 | self.0.timestamps = Some(Timestamps { 204 | modified: Some(timestamps.into()), 205 | ..Default::default() 206 | }) 207 | } 208 | self 209 | } 210 | 211 | /// Set the created timestamp of the file. 212 | pub fn time_created(&mut self, timestamps: impl Into) -> &mut Self { 213 | if let Some(ts) = self.0.timestamps.as_mut() { 214 | ts.created = Some(timestamps.into()); 215 | } else { 216 | self.0.timestamps = Some(Timestamps { 217 | created: Some(timestamps.into()), 218 | ..Default::default() 219 | }) 220 | } 221 | self 222 | } 223 | 224 | /// Add user metadata. 225 | pub fn user_metadata( 226 | &mut self, 227 | key: impl Into, 228 | value: impl Into, 229 | ) -> &mut Self { 230 | self.0 231 | .user_metadata 232 | .get_or_insert_with(Default::default) 233 | .insert(key.into(), value.into()); 234 | self 235 | } 236 | 237 | /// Add an attribute. 238 | /// 239 | /// See [`file_attributes`](crate::metadata::encode::file_attributes) for a list of attributes. 240 | pub fn attribute( 241 | &mut self, 242 | key: impl Into, 243 | value: impl Into, 244 | ) -> &mut Self { 245 | self.0 246 | .attributes 247 | .get_or_insert_with(Default::default) 248 | .insert(key.into(), value.into()); 249 | self 250 | } 251 | 252 | /// Add an extended attribute. 253 | pub fn extended_attribute( 254 | &mut self, 255 | key: impl Into, 256 | value: impl Into, 257 | ) -> &mut Self { 258 | self.0 259 | .extended_attributes 260 | .get_or_insert_with(Default::default) 261 | .insert(key.into(), value.into()); 262 | self 263 | } 264 | } 265 | 266 | impl From for File { 267 | fn from(builder: FileBuilder) -> Self { 268 | builder.0 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /crates/zarc/src/encode/content_frame.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, Result, Write}; 2 | 3 | use tracing::{instrument, trace}; 4 | use zstd_safe::ResetDirective; 5 | 6 | use crate::{directory::Frame, integrity::Digest, map_zstd_error}; 7 | 8 | use super::Encoder; 9 | 10 | impl<'writer, W: Write> Encoder<'writer, W> { 11 | /// Add a frame of data. 12 | /// 13 | /// Processes the entire input in memory. 14 | /// 15 | /// Returns the hash of the data, so it can be referenced in a filemap entry. 16 | /// 17 | /// If the content hashes to a frame that already exists, returns the hash without storing 18 | /// a duplicate frame. 19 | #[instrument(level = "trace", skip(self, content))] 20 | pub fn add_data_frame(&mut self, content: &[u8]) -> Result { 21 | // collect pre-compression values 22 | let offset = self.offset.try_into().map_err(Error::other)?; 23 | let uncompressed_size = content.len(); 24 | 25 | // compute content hash 26 | let digest = blake3::hash(content); 27 | let digest = Digest(digest.as_bytes().to_vec()); 28 | trace!(%uncompressed_size, digest=%format!("{digest:02x?}"), "computed digest"); 29 | 30 | if self.frames.contains_key(&digest) { 31 | trace!("frame already exists, skipping"); 32 | return Ok(digest); 33 | } 34 | 35 | let bytes = if self.compress { 36 | // start new compression context 37 | self.zstd 38 | .reset(ResetDirective::SessionOnly) 39 | .map_err(map_zstd_error)?; 40 | 41 | self.write_compressed_frame(content) 42 | } else { 43 | self.write_uncompressed_frame(content) 44 | }?; 45 | self.offset += bytes; 46 | 47 | // push frame to list 48 | self.frames.insert( 49 | digest.clone(), 50 | Frame { 51 | edition: self.edition, 52 | offset, 53 | digest: digest.clone(), 54 | length: bytes as _, 55 | uncompressed: uncompressed_size as _, 56 | }, 57 | ); 58 | 59 | Ok(digest) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /crates/zarc/src/encode/directory.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::{Error, Result, Write}, 3 | mem::take, 4 | }; 5 | 6 | use blake3::Hasher; 7 | use deku::DekuContainerWrite; 8 | use ozarc::framing::SKIPPABLE_FRAME_OVERHEAD; 9 | use tracing::{debug, instrument, trace}; 10 | 11 | use crate::{ 12 | constants::ZARC_VERSION, 13 | directory::{Edition, Element, ElementFrame, Timestamp}, 14 | integrity::{Digest, DigestType}, 15 | trailer::Trailer, 16 | }; 17 | 18 | use super::Encoder; 19 | 20 | impl<'writer, W: Write> Encoder<'writer, W> { 21 | #[instrument(level = "trace", skip(buf, hasher))] 22 | fn write_element(buf: &mut Vec, hasher: &mut Hasher, element: &Element) -> Result<()> { 23 | let frame = ElementFrame::create(element).map_err(Error::other)?; 24 | let bytes = frame.to_bytes().map_err(Error::other)?; 25 | buf.write_all(&bytes)?; 26 | hasher.update(&bytes); 27 | trace!( 28 | kind = ?element.kind(), 29 | length = %bytes.len(), 30 | bytes = %format!("{bytes:02x?}"), 31 | "wrote element" 32 | ); 33 | Ok(()) 34 | } 35 | 36 | /// Write the directory and trailer. 37 | /// 38 | /// Flushes the writer and drops all state, returns the digest of the directory. 39 | #[instrument(level = "debug", skip(self))] 40 | pub fn finalise(mut self) -> Result { 41 | let mut directory = Vec::new(); 42 | let digest_type = DigestType::Blake3; 43 | let mut hasher = Hasher::new(); // TODO: get hasher from DigestType 44 | 45 | Self::write_element( 46 | &mut directory, 47 | &mut hasher, 48 | &Element::Edition(Box::new(Edition { 49 | number: self.edition, 50 | written_at: Timestamp::now(), 51 | digest_type, 52 | user_metadata: Default::default(), 53 | })), 54 | )?; 55 | 56 | for (name, indices) in take(&mut self.files_by_name) { 57 | debug!(?name, "write file and frame elements"); 58 | 59 | for index in indices { 60 | let Some(file) = self.files.get_mut(index).and_then(Option::take) else { 61 | // this shouldn't happen, but it's cheap to just skip instead of unwrapping 62 | continue; 63 | }; 64 | 65 | // we always want to insert a frame element before the linked file element 66 | if let Some(digest) = &file.digest { 67 | // if we've already written it, this will be None 68 | if let Some(frame) = self.frames.remove(digest) { 69 | Self::write_element( 70 | &mut directory, 71 | &mut hasher, 72 | &Element::Frame(Box::new(frame)), 73 | )?; 74 | } 75 | } 76 | 77 | Self::write_element(&mut directory, &mut hasher, &Element::File(Box::new(file)))?; 78 | } 79 | } 80 | 81 | // we should have written every frame, but just in case 82 | // (or if user inserted frames not linked to files) 83 | for frame in take(&mut self.frames).into_values() { 84 | Self::write_element( 85 | &mut directory, 86 | &mut hasher, 87 | &Element::Frame(Box::new(frame)), 88 | )?; 89 | } 90 | 91 | let digest = hasher.finalize(); 92 | trace!(?digest, "hashed directory"); 93 | let digest = Digest(digest.as_bytes().to_vec()); 94 | 95 | let bytes = self.write_compressed_frame(&directory)?; 96 | trace!(%bytes, "wrote directory"); 97 | 98 | let mut trailer = Trailer { 99 | version: ZARC_VERSION, 100 | digest_type, 101 | directory_offset: 0, 102 | directory_uncompressed_size: directory.len() as _, 103 | digest: digest.clone(), 104 | }; 105 | trailer.directory_offset = -((bytes + SKIPPABLE_FRAME_OVERHEAD + trailer.len()) as i64); 106 | trace!(?trailer, "built trailer"); 107 | 108 | let trailer_bytes = trailer.to_bytes(); 109 | trace!( 110 | bytes = %format!("{trailer_bytes:02x?}"), 111 | length = %trailer_bytes.len(), 112 | "serialised trailer" 113 | ); 114 | 115 | let bytes = self.write_skippable_frame(0xF, trailer_bytes)?; 116 | trace!(%bytes, "wrote trailer"); 117 | 118 | self.writer.flush()?; 119 | trace!("flushed writer"); 120 | 121 | Ok(digest) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /crates/zarc/src/encode/lowlevel_frames.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Result, Write}; 2 | 3 | use deku::DekuContainerWrite; 4 | use tracing::{instrument, trace}; 5 | 6 | use crate::map_zstd_error; 7 | 8 | use super::Encoder; 9 | 10 | impl<'writer, W: Write> Encoder<'writer, W> { 11 | /// Write a compressed frame. 12 | /// 13 | /// Zstd-safe is bad at writing data, so we always write to a buffer in memory and then write 14 | /// that buffer to the writer. 15 | /// 16 | /// Returns the amount of bytes written. 17 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 18 | #[instrument(level = "trace", skip(self, data))] 19 | pub(crate) fn write_compressed_frame(&mut self, data: &[u8]) -> Result { 20 | // start with a buffer slightly larger than the input 21 | let mut buffer: Vec = Vec::with_capacity(data.len() + 1024.max(data.len() / 10)); 22 | 23 | trace!( 24 | bytes = %format!("{data:02x?}"), 25 | length = %data.len(), 26 | buffer_size = %buffer.capacity(), 27 | "compress data into buffer" 28 | ); 29 | self.zstd 30 | .compress2(&mut buffer, data) 31 | .map_err(map_zstd_error)?; 32 | 33 | trace!( 34 | bytes = %format!("{buffer:02x?}"), 35 | length = %buffer.len(), 36 | "write buffer to writer" 37 | ); 38 | self.writer.write(&buffer) 39 | } 40 | 41 | /// Write an uncompressed frame. 42 | /// 43 | /// Zstd can't write fully-uncompressed data, so we use [`ozarc`]'s types to write raw blocks 44 | /// and the frame directly. 45 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 46 | #[instrument(level = "trace", skip(self, data))] 47 | pub(crate) fn write_uncompressed_frame(&mut self, data: &[u8]) -> Result { 48 | use ozarc::framing::*; 49 | let mut frame = ZstandardFrame { 50 | header: ZstandardFrameHeader { 51 | frame_descriptor: ZstandardFrameDescriptor { 52 | fcs_size: 3, 53 | single_segment: false, 54 | unused_bit: false, 55 | reserved_bit: false, 56 | checksum: false, 57 | did_size: 0, 58 | }, 59 | window_descriptor: None, 60 | did: Vec::new(), 61 | #[allow(clippy::unwrap_used)] // UNWRAP: realistically we'll never have more than u64 bytes of content 62 | frame_content_size: u64::try_from(data.len()).unwrap().to_le_bytes().to_vec(), 63 | }, 64 | blocks: data 65 | .chunks(u16::MAX as _) 66 | .map(|data| ZstandardBlock { 67 | header: ZstandardBlockHeader::new( 68 | ZstandardBlockType::Raw, 69 | false, 70 | #[allow(clippy::unwrap_used)] // UNWRAP: chunks() limits to u16 71 | u32::try_from(data.len()).unwrap(), 72 | ), 73 | data: data.into(), 74 | }) 75 | .collect(), 76 | checksum: None, 77 | }; 78 | 79 | if let Some(last) = frame.blocks.last_mut() { 80 | last.header.last = true; 81 | } 82 | 83 | self.writer.write(&frame.to_bytes()?) 84 | } 85 | 86 | /// Write a skippable frame. 87 | /// 88 | /// Zstd-safe doesn't have an API for this, so we use [`ozarc`]. 89 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))] 90 | #[instrument(level = "trace", skip(self, magic, data))] 91 | pub(crate) fn write_skippable_frame(&mut self, magic: u8, data: Vec) -> Result { 92 | trace!( 93 | bytes = %format!("{data:02x?}"), 94 | length = %data.len(), 95 | magic, 96 | "compose data into frame" 97 | ); 98 | let frame = ozarc::framing::SkippableFrame::new(magic, data); 99 | let buffer = frame.to_bytes()?; 100 | 101 | trace!( 102 | bytes = %format!("{buffer:02x?}"), 103 | length = %buffer.len(), 104 | "write buffer to writer" 105 | ); 106 | self.writer.write(&buffer) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /crates/zarc/src/header.rs: -------------------------------------------------------------------------------- 1 | //! Zarc Header structure and byte array 2 | //! 3 | //! The purpose of the header is to identify the file as a Zarc file. It also has the file version 4 | //! number, but this can be considered part of the "file magic" rather than actual metadata. 5 | //! 6 | //! This module has two implementations of the header: [`Header`] which lets you decode the header 7 | //! from the skippable frame's payload, and [`FILE_MAGIC`] which is a constant byte array that 8 | //! includes the Zstd framing and can be matched byte-for-byte against the start of a Zarc file. 9 | 10 | use deku::prelude::*; 11 | 12 | use super::constants::{ZARC_MAGIC, ZARC_VERSION}; 13 | 14 | /// Zarc Header 15 | /// 16 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#zarc-header) 17 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 18 | #[deku(endian = "little")] 19 | pub struct Header { 20 | /// Magic number. Asserted to match [`ZARC_MAGIC`]. 21 | #[deku(count = "3", assert = "*magic == ZARC_MAGIC")] 22 | pub magic: Vec, 23 | 24 | /// Zarc format version number. Should match [`ZARC_VERSION`]. 25 | #[deku(bytes = "1")] 26 | pub version: u8, 27 | } 28 | 29 | /// Static file magic 30 | /// 31 | /// This is a zstd Skippable frame containing the Zarc Header, as a hardcoded constant. 32 | /// 33 | /// In a valid Zarc file, the first 12 bytes will match exactly. 34 | #[rustfmt::skip] 35 | pub const FILE_MAGIC: [u8; 12] = [ 36 | 0x50, 0x2A, 0x4D, 0x18, // zstd skippable frame 37 | 0x04, 0x00, 0x00, 0x00, // payload size = 4 bytes 38 | 0x65, 0xAA, 0xDC, // zarc magic 39 | ZARC_VERSION, // zarc version 40 | ]; 41 | -------------------------------------------------------------------------------- /crates/zarc/src/integrity.rs: -------------------------------------------------------------------------------- 1 | //! Types supporting file integrity (checksums). 2 | 3 | use deku::prelude::*; 4 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder}; 5 | 6 | /// Digest newtype. 7 | /// 8 | /// This is a wrapper around a byte vector, which is the actual digest. 9 | /// 10 | /// Currently only BLAKE3 is supported, but this type is designed to be generic over algorithms. 11 | /// 12 | /// The `PartialEq` and `Eq` implementations are constant-time. 13 | #[allow(clippy::derived_hash_with_manual_eq)] 14 | #[derive(Clone, Debug, Eq, Hash, DekuWrite)] 15 | pub struct Digest(pub Vec); 16 | 17 | impl PartialEq for Digest { 18 | fn eq(&self, other: &Self) -> bool { 19 | use subtle::ConstantTimeEq; 20 | self.0.ct_eq(&other.0).into() 21 | } 22 | } 23 | 24 | impl std::ops::Deref for Digest { 25 | type Target = Vec; 26 | 27 | fn deref(&self) -> &Self::Target { 28 | &self.0 29 | } 30 | } 31 | 32 | impl From> for Digest { 33 | fn from(bytes: Vec) -> Self { 34 | Self(bytes) 35 | } 36 | } 37 | 38 | impl<'a, Ctx> DekuReader<'a, Ctx> for Digest 39 | where 40 | Vec: DekuReader<'a, Ctx>, 41 | Ctx: Copy, 42 | { 43 | fn from_reader_with_ctx( 44 | reader: &mut deku::reader::Reader<'_, R>, 45 | ctx: Ctx, 46 | ) -> Result 47 | where 48 | Self: Sized, 49 | { 50 | Vec::::from_reader_with_ctx(reader, ctx).map(Self) 51 | } 52 | } 53 | 54 | impl Encode for Digest { 55 | fn encode( 56 | &self, 57 | e: &mut Encoder, 58 | _ctx: &mut C, 59 | ) -> Result<(), minicbor::encode::Error> { 60 | e.bytes(&self.0).map(drop) 61 | } 62 | } 63 | 64 | impl<'b, C> Decode<'b, C> for Digest { 65 | fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result { 66 | match d.datatype()? { 67 | Type::Bytes => d.bytes().map(|b| Self(b.into())), 68 | Type::BytesIndef => Ok(Self(d.bytes_iter()?.try_fold( 69 | Vec::new(), 70 | |mut vec, b| { 71 | b.map(|b| { 72 | vec.extend(b); 73 | vec 74 | }) 75 | }, 76 | )?)), 77 | ty => Err(minicbor::decode::Error::type_mismatch(ty)), 78 | } 79 | } 80 | } 81 | 82 | impl From for Digest { 83 | fn from(value: blake3::Hash) -> Self { 84 | Self(value.as_bytes().to_vec()) 85 | } 86 | } 87 | 88 | /// Available digest algorithms. 89 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Encode, Decode, DekuRead, DekuWrite)] 90 | #[deku(endian = "endian", type = "u8", ctx = "endian: deku::ctx::Endian")] 91 | #[cbor(index_only)] 92 | pub enum DigestType { 93 | /// BLAKE3 hash function. 94 | #[n(1)] 95 | Blake3 = 1, 96 | } 97 | 98 | impl DigestType { 99 | /// Length in bytes of a digest of this type. 100 | pub const fn digest_len(self) -> usize { 101 | match self { 102 | Self::Blake3 => blake3::OUT_LEN, 103 | } 104 | } 105 | 106 | /// Verify that a block of data matches the given digest. 107 | pub fn verify_data(self, expected: &Digest, data: &[u8]) -> bool { 108 | match self { 109 | Self::Blake3 => { 110 | let actual = blake3::hash(data); 111 | let Ok(expected_bytes) = expected.as_slice().try_into() else { 112 | return false; 113 | }; 114 | blake3::Hash::from_bytes(expected_bytes) == actual 115 | } 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /crates/zarc/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Zarc: Archive format based on Zstd. 2 | //! 3 | //! [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md) 4 | //! 5 | //! TBD 6 | 7 | #![warn(clippy::unwrap_used, missing_docs)] 8 | #![deny(rust_2018_idioms)] 9 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 10 | 11 | #[doc(inline)] 12 | pub use self::constants::*; 13 | mod constants; 14 | 15 | pub mod decode; 16 | pub mod directory; 17 | pub mod encode; 18 | pub mod header; 19 | pub mod integrity; 20 | #[cfg(feature = "metadata")] 21 | pub mod metadata; 22 | pub mod ondemand; 23 | #[cfg(unix)] 24 | pub mod owner_cache; 25 | pub mod trailer; 26 | 27 | pub(crate) fn map_zstd_error(code: usize) -> std::io::Error { 28 | let msg = zstd_safe::get_error_name(code); 29 | std::io::Error::other(msg) 30 | } 31 | -------------------------------------------------------------------------------- /crates/zarc/src/metadata.rs: -------------------------------------------------------------------------------- 1 | //! Helpers to read/write metadata for the Filemap. 2 | 3 | pub mod decode; 4 | pub mod encode; 5 | -------------------------------------------------------------------------------- /crates/zarc/src/metadata/decode.rs: -------------------------------------------------------------------------------- 1 | //! Helpers to write file metadata when decoding [`File`](directory::File)s. 2 | 3 | use std::fs::{File as FsFile, FileTimes, Permissions}; 4 | 5 | use tracing::instrument; 6 | 7 | use crate::directory::{File, Timestamps}; 8 | 9 | /// Set the timestamps of the file. 10 | #[instrument(level = "trace")] 11 | pub fn set_timestamps(file: &FsFile, ts: &Timestamps) -> std::io::Result<()> { 12 | // On Windows, creation date is supported by std. 13 | // On Linux, birthtime can't be set. 14 | // On Apple/BSD, it should be able to: 15 | // https://github.com/ronomon/utimes/blob/master/binding.cc 16 | // but `nix` doesn't have setattrlist 17 | 18 | file.set_times(ts.into()) 19 | } 20 | 21 | impl From<&Timestamps> for FileTimes { 22 | fn from(ts: &Timestamps) -> Self { 23 | let mut ft = Self::new(); 24 | if let Some(accessed) = ts.accessed { 25 | ft = ft.set_accessed(accessed.into()); 26 | } 27 | if let Some(modified) = ts.modified { 28 | ft = ft.set_modified(modified.into()); 29 | } 30 | #[cfg(windows)] 31 | if let Some(created) = ts.created { 32 | use std::os::windows::fs::FileTimesExt; 33 | ft = ft.set_created(created.into()); 34 | } 35 | 36 | ft 37 | } 38 | } 39 | 40 | /// Set the permissions of a file. 41 | /// 42 | /// This uses `readonly` from attributes on Windows, `mode` if present on unix, and finally 43 | /// `readonly` on unix if `mode` wasn't there. 44 | #[instrument(level = "trace")] 45 | pub fn set_permissions(permissions: &mut Permissions, meta: &File) -> std::io::Result<()> { 46 | let readonly = meta.attributes.as_ref().and_then(|attrs| { 47 | attrs 48 | .get("read-only") 49 | .or_else(|| attrs.get("win32.read-only")) 50 | .and_then(|v| v.as_bool()) 51 | }); 52 | 53 | #[cfg(windows)] 54 | { 55 | if let Some(readonly) = readonly { 56 | permissions.set_readonly(readonly) 57 | } 58 | } 59 | 60 | #[cfg(unix)] 61 | { 62 | use std::os::unix::fs::PermissionsExt; 63 | if let Some(mode) = meta.mode { 64 | permissions.set_mode(mode); 65 | } else if let Some(readonly) = readonly { 66 | permissions.set_readonly(readonly); 67 | } 68 | } 69 | 70 | Ok(()) 71 | } 72 | 73 | /// Set the ownership of a file. 74 | /// 75 | /// This uses `owner` and `group` if present, otherwise it does nothing. 76 | /// 77 | /// On non-Unix systems, this does nothing. 78 | #[instrument(level = "trace")] 79 | pub fn set_ownership(file: &FsFile, meta: &File) -> std::io::Result<()> { 80 | #[cfg(unix)] 81 | { 82 | use std::os::fd::AsRawFd; 83 | 84 | let uid = meta 85 | .user 86 | .as_ref() 87 | .map(|user| user.to_real_uid()) 88 | .transpose()? 89 | .flatten(); 90 | 91 | let gid = meta 92 | .group 93 | .as_ref() 94 | .map(|group| group.to_real_gid()) 95 | .transpose()? 96 | .flatten(); 97 | 98 | let fd = file.as_raw_fd(); 99 | tracing::trace!(%fd, ?uid, ?gid, "setting ownership"); 100 | nix::unistd::fchown(fd, uid, gid)?; 101 | } 102 | 103 | Ok(()) 104 | } 105 | -------------------------------------------------------------------------------- /crates/zarc/src/metadata/encode.rs: -------------------------------------------------------------------------------- 1 | //! Helpers to read file metadata to encode [`File`]s. 2 | 3 | use std::{ 4 | collections::HashMap, 5 | fs::{self, Metadata}, 6 | io::Result, 7 | num::NonZeroU16, 8 | path::Path, 9 | }; 10 | 11 | use tracing::{instrument, trace, warn}; 12 | 13 | use crate::directory::{ 14 | AttributeValue, File, Pathname, PosixOwner, SpecialFile, SpecialFileKind, Timestamp, Timestamps, 15 | }; 16 | 17 | /// Build a [`FilemapEntry`] from a filename. 18 | /// 19 | /// Doesn't set the digest: you need to do that manually afterwards. 20 | /// 21 | /// Try using [`Decoder::build_file_with_metadata`] instead. 22 | /// 23 | /// This will perform syscalls; these are logged at trace level. Some errors are ignored. To get 24 | /// more control you can use the individual functions [in this module](self). 25 | /// 26 | /// [`readdir(3)`]: https://man.archlinux.org/man/readdir.3 27 | #[instrument(level = "trace")] 28 | pub fn build_filemap(edition: NonZeroU16, path: &Path, follow_links: bool) -> Result { 29 | let name = Pathname::from_normal_components(path); 30 | 31 | trace!("reading immediate metadata"); 32 | let symeta = fs::symlink_metadata(path)?; 33 | let is_symlink = symeta.is_symlink(); 34 | 35 | let link_target = if is_symlink { 36 | trace!("reading link target"); 37 | Some(fs::read_link(path)?) 38 | } else { 39 | None 40 | }; 41 | 42 | let meta = if follow_links && is_symlink { 43 | trace!("reading metadata"); 44 | fs::metadata(path)? 45 | } else { 46 | symeta 47 | }; 48 | trace!(?name, ?meta, "retrieved file metadata"); 49 | 50 | let file_type = meta.file_type(); 51 | 52 | Ok(File { 53 | edition, 54 | digest: None, 55 | name, 56 | user: owner_user(&meta) 57 | .map_err(|err| warn!(%err, "can't resolve user")) 58 | .unwrap_or_default(), 59 | group: owner_group(&meta) 60 | .map_err(|err| warn!(%err, "can't resolve group")) 61 | .unwrap_or_default(), 62 | mode: posix_mode(&meta), 63 | special: if file_type.is_dir() { 64 | Some(SpecialFile { 65 | kind: Some(SpecialFileKind::Directory), 66 | link_target: None, 67 | }) 68 | } else if is_symlink { 69 | Some(SpecialFile { 70 | kind: Some(SpecialFileKind::Symlink), 71 | link_target: link_target.map(|path| path.as_path().into()), 72 | }) 73 | } else { 74 | None 75 | }, 76 | timestamps: Some(timestamps(&meta)), 77 | attributes: file_attributes(path, &meta) 78 | .map_err(|err| warn!(%err, "can't resolve attributes")) 79 | .unwrap_or_default(), 80 | extended_attributes: file_extended_attributes(path) 81 | .map_err(|err| warn!(%err, "can't resolve extended attributes")) 82 | .unwrap_or_default(), 83 | user_metadata: None, 84 | }) 85 | } 86 | 87 | /// Get the timestamps of the file. 88 | #[instrument(level = "trace")] 89 | pub fn timestamps(meta: &Metadata) -> Timestamps { 90 | Timestamps { 91 | created: meta.created().map(Timestamp::from).ok(), 92 | modified: meta.modified().map(Timestamp::from).ok(), 93 | accessed: meta.accessed().map(Timestamp::from).ok(), 94 | } 95 | } 96 | 97 | /// Get the owning user of the file. 98 | /// 99 | /// On non-unix, always returns `Ok(None)`. 100 | #[instrument(level = "trace")] 101 | pub fn owner_user(meta: &Metadata) -> Result> { 102 | #[cfg(unix)] 103 | { 104 | use std::os::unix::fs::MetadataExt; 105 | PosixOwner::from_uid(meta.uid()) 106 | } 107 | 108 | #[cfg(not(unix))] 109 | { 110 | Ok(None) 111 | } 112 | } 113 | 114 | /// Get the owning group of the file. 115 | /// 116 | /// On non-unix, always returns `None`. 117 | #[instrument(level = "trace")] 118 | pub fn owner_group(meta: &Metadata) -> Result> { 119 | #[cfg(unix)] 120 | { 121 | use std::os::unix::fs::MetadataExt; 122 | PosixOwner::from_gid(meta.gid()) 123 | } 124 | 125 | #[cfg(not(unix))] 126 | { 127 | Ok(None) 128 | } 129 | } 130 | 131 | /// Get the mode of the file. 132 | /// 133 | /// On non-unix, always returns `None`. 134 | #[instrument(level = "trace")] 135 | pub fn posix_mode(meta: &Metadata) -> Option { 136 | #[cfg(unix)] 137 | { 138 | use std::os::unix::fs::MetadataExt; 139 | Some(meta.mode()) 140 | } 141 | 142 | #[cfg(not(unix))] 143 | { 144 | None 145 | } 146 | } 147 | 148 | /// Get attributes for a file, given its path and [`Metadata`]. 149 | /// 150 | /// Returns `Ok(None)` on unsupported systems. 151 | /// 152 | /// ## Linux 153 | /// 154 | /// Translates present [`lsattr`/`chattr`][chattr] flags to boolean true at string keys, 155 | /// prefixed by `linux.`. Some flags are not translated; this list is exhaustive: 156 | /// 157 | /// - `append-only` for `APPEND` or [the `a` flag](https://man.archlinux.org/man/chattr.1#a) 158 | /// - `casefold` for `CASEFOLD` or [the `F` flag](https://man.archlinux.org/man/chattr.1#F) 159 | /// - `compressed` for `COMPR` or [the `c` flag](https://man.archlinux.org/man/chattr.1#c) 160 | /// - `delete-undo` for `UNRM` or [the `u` flag](https://man.archlinux.org/man/chattr.1#u) 161 | /// - `delete-zero` for `SECRM` or [the `s` flag](https://man.archlinux.org/man/chattr.1#s) 162 | /// - `dir-sync` for `DIRSYNC` or [the `D` flag](https://man.archlinux.org/man/chattr.1#D) 163 | /// - `encrypted` for `ENCRYPT` or [the `E` flag](https://man.archlinux.org/man/chattr.1#E) 164 | /// - `file-sync` for `SYNC` or [the `S` flag](https://man.archlinux.org/man/chattr.1#S) 165 | /// - `immutable` for `IMMUTABLE` or [the `i` flag](https://man.archlinux.org/man/chattr.1#i) 166 | /// - `no-atime` for `NOATIME` or [the `A` flag](https://man.archlinux.org/man/chattr.1#A) 167 | /// - `no-backup` for `NODUMP` or [the `d` flag](https://man.archlinux.org/man/chattr.1#d) 168 | /// - `no-cow` for `NOCOW` or [the `C` flag](https://man.archlinux.org/man/chattr.1#C) 169 | /// - `not-compressed` for `NOCOMPR` or [the `m` flag](https://man.archlinux.org/man/chattr.1#m) 170 | /// 171 | /// ## MacOS, iOS, FreeBSD, NetBSD 172 | /// 173 | /// Translates present [`chflags`][chflags] flags to boolean true at string keys, 174 | /// prefixed by `bsd.`. Some flags are not translated; this list is exhaustive: 175 | /// 176 | /// - `append-only` for `SF_APPEND` or `UF_APPEND` 177 | /// - `archived` for `SF_ARCHIVED` 178 | /// - `immutable` for `SF_IMMUTABLE` or `UF_IMMUTABLE` 179 | /// - `no-backup` for `UF_NODUMP` 180 | /// 181 | /// ## Windows 182 | /// 183 | /// Translates present [`FILE_ATTRIBUTE_*`][win32-file-attrs] flags to boolean true at string keys, 184 | /// prefixed by `win32.`. Some flags are not translated; this list is exhaustive: 185 | /// 186 | /// - `archived` for `FILE_ATTRIBUTE_ARCHIVE` 187 | /// - `compressed` for `FILE_ATTRIBUTE_COMPRESSED` 188 | /// - `encrypted` for `FILE_ATTRIBUTE_ENCRYPTED` 189 | /// - `hidden` for `FILE_ATTRIBUTE_HIDDEN` 190 | /// - `not-content-indexed` for `FILE_ATTRIBUTE_NOT_CONTENT_INDEXED` (opts the file out of content 191 | /// indexing from Windows' crawlers, e.g. for the search functionality in Explorer and Start) 192 | /// - `system` for `FILE_ATTRIBUTE_SYSTEM` 193 | /// - `temporary` for `FILE_ATTRIBUTE_TEMPORARY` 194 | /// 195 | /// ## Common 196 | /// 197 | /// If these flags are present in any of the platforms that support them, they will also be present 198 | /// as unprefixed keys: 199 | /// 200 | /// - `append-only` 201 | /// - `compressed` 202 | /// - `immutable` 203 | /// 204 | /// If the file is read-only, this unprefixed flag will be present: 205 | /// 206 | /// - `read-only` 207 | /// 208 | /// [chattr]: https://man.archlinux.org/man/chattr.1 209 | /// [chflags]: https://man.freebsd.org/cgi/man.cgi?query=chflags&sektion=1&apropos=0&manpath=FreeBSD+14.0-RELEASE+and+Ports 210 | /// [win32-file-attrs]: https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants 211 | #[instrument(level = "trace")] 212 | pub fn file_attributes( 213 | path: &Path, 214 | meta: &Metadata, 215 | ) -> Result>> { 216 | let mut attrs = HashMap::new(); 217 | #[cfg(target_os = "linux")] 218 | { 219 | use e2p_fileflags::{FileFlags, Flags}; 220 | let flags = path.flags()?; 221 | attrs.extend( 222 | [ 223 | ("append-only", flags.contains(Flags::APPEND)), 224 | ("casefold", flags.contains(Flags::CASEFOLD)), 225 | ("compressed", flags.contains(Flags::COMPR)), 226 | ("delete-undo", flags.contains(Flags::UNRM)), 227 | ("delete-zero", flags.contains(Flags::SECRM)), 228 | ("dir-sync", flags.contains(Flags::DIRSYNC)), 229 | ("encrypted", flags.contains(Flags::ENCRYPT)), 230 | ("file-sync", flags.contains(Flags::SYNC)), 231 | ("immutable", flags.contains(Flags::IMMUTABLE)), 232 | ("no-atime", flags.contains(Flags::NOATIME)), 233 | ("no-backup", flags.contains(Flags::NODUMP)), 234 | ("no-cow", flags.contains(Flags::NOCOW)), 235 | ("not-compressed", flags.contains(Flags::NOCOMPR)), 236 | ] 237 | .into_iter() 238 | .filter(|(_, v)| *v) 239 | .map(|(k, _)| (format!("linux.{k}"), AttributeValue::Boolean(true))), 240 | ); 241 | } 242 | 243 | #[cfg(any( 244 | target_os = "macos", 245 | target_os = "ios", 246 | target_os = "freebsd", 247 | target_os = "netbsd" 248 | ))] 249 | { 250 | use nix::sys::stat::{stat, FileFlag}; 251 | 252 | let flags = FileFlag::from_bits_retain(stat(path)?.st_flags); 253 | attrs.extend( 254 | [ 255 | ( 256 | "append-only", 257 | flags.contains(FileFlag::SF_APPEND) || flags.contains(FileFlag::UF_APPEND), 258 | ), 259 | ("archived", flags.contains(FileFlag::SF_ARCHIVED)), 260 | ( 261 | "immutable", 262 | flags.contains(FileFlag::SF_IMMUTABLE) 263 | || flags.contains(FileFlag::UF_IMMUTABLE), 264 | ), 265 | ("no-backup", flags.contains(FileFlag::UF_NODUMP)), 266 | ] 267 | .into_iter() 268 | .filter(|(_, v)| *v) 269 | .map(|(k, _)| (format!("bsd.{k}"), AttributeValue::Boolean(true))), 270 | ); 271 | } 272 | 273 | #[cfg(windows)] 274 | { 275 | use std::os::windows::fs::MetadataExt; 276 | use windows::Win32::Storage::FileSystem; 277 | 278 | let flags = FileSystem::FILE_FLAGS_AND_ATTRIBUTES(meta.file_attributes()); 279 | 280 | attrs.extend( 281 | [ 282 | ( 283 | "archived", 284 | flags.contains(FileSystem::FILE_ATTRIBUTE_ARCHIVE), 285 | ), 286 | ( 287 | "compressed", 288 | flags.contains(FileSystem::FILE_ATTRIBUTE_COMPRESSED), 289 | ), 290 | ( 291 | "encrypted", 292 | flags.contains(FileSystem::FILE_ATTRIBUTE_ENCRYPTED), 293 | ), 294 | ("hidden", flags.contains(FileSystem::FILE_ATTRIBUTE_HIDDEN)), 295 | ( 296 | "not-content-indexed", 297 | flags.contains(FileSystem::FILE_ATTRIBUTE_NOT_CONTENT_INDEXED), 298 | ), 299 | ("system", flags.contains(FileSystem::FILE_ATTRIBUTE_SYSTEM)), 300 | ( 301 | "temporary", 302 | flags.contains(FileSystem::FILE_ATTRIBUTE_TEMPORARY), 303 | ), 304 | ] 305 | .into_iter() 306 | .filter(|(_, v)| *v) 307 | .map(|(k, _)| (format!("win32.{k}"), AttributeValue::Boolean(true))), 308 | ); 309 | } 310 | 311 | if attrs.is_empty() { 312 | Ok(None) 313 | } else { 314 | if attrs.contains_key("linux.append-only") || attrs.contains_key("bsd.append-only") { 315 | attrs.insert("append-only".to_string(), AttributeValue::Boolean(true)); 316 | } 317 | if attrs.contains_key("linux.immutable") || attrs.contains_key("bsd.immutable") { 318 | attrs.insert("immutable".to_string(), AttributeValue::Boolean(true)); 319 | } 320 | if attrs.contains_key("linux.compressed") || attrs.contains_key("win32.compressed") { 321 | attrs.insert("compressed".to_string(), AttributeValue::Boolean(true)); 322 | } 323 | if meta.permissions().readonly() { 324 | attrs.insert("read-only".to_string(), AttributeValue::Boolean(true)); 325 | } 326 | 327 | Ok(Some(attrs)) 328 | } 329 | } 330 | 331 | /// Get extended attributes for a file, given its path. 332 | /// 333 | /// Returns `Ok(None)` on unsupported systems. 334 | /// 335 | /// Supported: 336 | /// - Android 337 | /// - FreeBSD 338 | /// - Linux 339 | /// - MacOS 340 | /// - NetBSD 341 | /// 342 | #[instrument(level = "trace")] 343 | pub fn file_extended_attributes(path: &Path) -> Result>> { 344 | #[cfg(unix)] 345 | { 346 | if xattr::SUPPORTED_PLATFORM { 347 | let list = xattr::list(path)?; 348 | let size_hint = list.size_hint(); 349 | let mut map = HashMap::with_capacity(size_hint.1.unwrap_or(size_hint.0)); 350 | for osname in list { 351 | match osname.to_str() { 352 | None => tracing::error!(?osname, ?path, "not storing non-Unicode xattr"), 353 | Some(name) => { 354 | if let Some(value) = xattr::get(path, &osname)? { 355 | map.insert( 356 | name.to_string(), 357 | crate::directory::CborString::from_maybe_utf8(value).into(), 358 | ); 359 | } 360 | } 361 | } 362 | } 363 | 364 | Ok(Some(map)) 365 | } else { 366 | Ok(None) 367 | } 368 | } 369 | 370 | #[cfg(not(unix))] 371 | Ok(None) 372 | } 373 | -------------------------------------------------------------------------------- /crates/zarc/src/ondemand.rs: -------------------------------------------------------------------------------- 1 | //! On-demand reader+seek trait and implementations. 2 | //! 3 | //! This is a trait that allows for obtaining multiple reader+seeker instances from a single byte 4 | //! source. Zarc uses it to allow for reading from multiple places in the source at the same time. 5 | //! 6 | //! This is implemented for files ([`Path`] and [`PathBuf`]) in this crate. 7 | 8 | use std::{ 9 | fs::File, 10 | io::{Read, Result, Seek}, 11 | path::{Path, PathBuf}, 12 | }; 13 | 14 | /// On-demand independent readers for a byte source. 15 | pub trait OnDemand { 16 | /// The output reader type. 17 | type Reader: Read + Seek; 18 | 19 | /// Open an independent reader for this byte source. 20 | fn open(&self) -> Result; 21 | } 22 | 23 | impl OnDemand for &Path { 24 | type Reader = File; 25 | 26 | fn open(&self) -> Result { 27 | File::open(self) 28 | } 29 | } 30 | 31 | impl OnDemand for PathBuf { 32 | type Reader = File; 33 | 34 | fn open(&self) -> Result { 35 | File::open(self) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /crates/zarc/src/owner_cache.rs: -------------------------------------------------------------------------------- 1 | //! Caching lookup for user and group names. 2 | //! 3 | //! Looking up user and group names is very slow! In testing, this could often account for over 90% 4 | //! of the time spent in `zarc` when creating a new archive, and similarly when unpacking. To speed 5 | //! this up, we cache the results of these lookups at runtime, with the assumption that id/name 6 | //! mappings for users and groups won't change during an invocation of the program. 7 | 8 | use std::collections::HashMap; 9 | 10 | use nix::unistd::{Gid, Group, Uid, User}; 11 | 12 | /// A cache of user and group info. 13 | #[derive(Clone, Debug, Default)] 14 | pub struct OwnerCache { 15 | users: HashMap, 16 | groups: HashMap, 17 | uid_by_name: HashMap, 18 | gid_by_name: HashMap, 19 | } 20 | 21 | impl OwnerCache { 22 | /// Get a user from a UID, from cache or the system. 23 | pub fn user_from_uid(&mut self, uid: Uid) -> std::io::Result> { 24 | if let Some(user) = self.users.get(&uid) { 25 | return Ok(Some(user.clone())); 26 | } 27 | 28 | let user = User::from_uid(uid)?; 29 | if let Some(user) = user.as_ref() { 30 | self.users.insert(uid, user.clone()); 31 | self.uid_by_name.insert(user.name.to_owned(), user.uid); 32 | } 33 | Ok(user) 34 | } 35 | 36 | /// Get a group from a GID, from cache or the system. 37 | pub fn group_from_gid(&mut self, gid: Gid) -> std::io::Result> { 38 | if let Some(group) = self.groups.get(&gid) { 39 | return Ok(Some(group.clone())); 40 | } 41 | 42 | let group = Group::from_gid(gid)?; 43 | if let Some(group) = group.as_ref() { 44 | self.groups.insert(gid, group.clone()); 45 | self.gid_by_name.insert(group.name.to_owned(), group.gid); 46 | } 47 | Ok(group) 48 | } 49 | 50 | /// Get a user from a name, from cache or the system. 51 | pub fn user_from_name(&mut self, name: &str) -> std::io::Result> { 52 | if let Some(uid) = self.uid_by_name.get(name) { 53 | return self.user_from_uid(*uid); 54 | } 55 | 56 | let user = User::from_name(name)?; 57 | if let Some(user) = user.as_ref() { 58 | self.users.insert(user.uid, user.clone()); 59 | self.uid_by_name.insert(name.to_owned(), user.uid); 60 | } 61 | Ok(user) 62 | } 63 | 64 | /// Get a group from a UID, from cache or the system. 65 | pub fn group_from_name(&mut self, name: &str) -> std::io::Result> { 66 | if let Some(gid) = self.gid_by_name.get(name) { 67 | return self.group_from_gid(*gid); 68 | } 69 | 70 | let group = Group::from_name(name)?; 71 | if let Some(group) = group.as_ref() { 72 | self.groups.insert(group.gid, group.clone()); 73 | self.gid_by_name.insert(name.to_owned(), group.gid); 74 | } 75 | Ok(group) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /crates/zarc/src/trailer.rs: -------------------------------------------------------------------------------- 1 | //! Zarc Trailer structure 2 | //! 3 | //! This is the last part of a Zarc archive, and contains the critical metadata of the archive. 4 | //! Where [the header][super::header] is used to identify a Zarc file, this is used to actually 5 | //! decode it. 6 | //! 7 | //! The peculiarity of the trailer is that it's parsed backwards from the end. The digest field is 8 | //! potentially variable in length, and the only way to know its length is to read one of two bytes 9 | //! in the trailer, at either sides of that variable field. 10 | //! 11 | //! However, reading a file backward is obnoxious and possibly slow, so the way this module works is 12 | //! with the [`Epilogue`], comprising the last six fields of the trailer, all fixed-size. You should 13 | //! use [`EPILOGUE_LENGTH`] to seek and read these bytes from the end, parse them, and then use 14 | //! [`Epilogue::full_length()`] to seek and read the remaining bytes, and finally pass them to 15 | //! [`Epilogue::complete()`] to obtain a [`Trailer`]. 16 | //! 17 | //! Additionally, what you probably want to do for performance is to read, for example, a kilobyte 18 | //! from the end of the file at once, and then be reasonably sure that the whole trailer is in it. 19 | //! 20 | //! The trailer also has [`PROLOGUE_LENGTH`] bytes of "prologue", which this library ignores (but 21 | //! will write correctly). The prologue contains a duplicate of the digest type, and can be used to 22 | //! read the trailer "forward", if you really want to, though this library provides no support here. 23 | 24 | use deku::prelude::*; 25 | 26 | use super::integrity::{Digest, DigestType}; 27 | 28 | /// Zarc Trailer 29 | /// 30 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#zarc-trailer) 31 | #[derive(Clone, Debug, Eq, PartialEq)] 32 | pub struct Trailer { 33 | /// Digest of the directory. 34 | pub digest: Digest, 35 | 36 | /// Digest (hash) algorithm. 37 | pub digest_type: DigestType, 38 | 39 | /// Offset in bytes to the start of the [Directory][crate::directory]'s Zstandard frame. 40 | pub directory_offset: i64, 41 | 42 | /// Uncompressed size in bytes of the directory. 43 | pub directory_uncompressed_size: u64, 44 | 45 | /// Zarc format version number. 46 | /// 47 | /// Should match [`ZARC_VERSION`][crate::ZARC_VERSION]. 48 | pub version: u8, 49 | } 50 | 51 | impl Trailer { 52 | /// Write the trailer to a writer. 53 | pub fn to_writer(&self, writer: &mut W) -> std::io::Result<()> { 54 | // reserved field and duplicated digest type 55 | writer.write_all(&[0, self.digest_type as u8])?; 56 | 57 | writer.write_all(&self.digest)?; 58 | 59 | let epilogue = Epilogue::from(self) 60 | .to_bytes() 61 | .map_err(std::io::Error::other)?; 62 | writer.write_all(&epilogue) 63 | } 64 | 65 | /// Write the trailer to a vector. 66 | pub fn to_bytes(&self) -> Vec { 67 | let mut bytes = Vec::with_capacity(self.len()); 68 | bytes.extend(self.digest.iter()); 69 | 70 | // UNWRAP: there's no way to construct an epilogue that doesn't serialise 71 | #[allow(clippy::unwrap_used)] 72 | bytes.extend(Epilogue::from(self).to_bytes().unwrap()); 73 | 74 | bytes 75 | } 76 | 77 | /// The full length of the trailer in bytes. 78 | #[allow(clippy::len_without_is_empty)] // CLIPPY: this is not a collection 79 | pub fn len(&self) -> usize { 80 | self.digest.len() + EPILOGUE_LENGTH 81 | } 82 | 83 | /// Make the offset positive. 84 | /// 85 | /// Having the offset negative is very useful when _writing_ the trailer, but generally a pain 86 | /// when using it to decode the archive, so this method inverts it given the file length. 87 | /// 88 | /// Does nothing if the offset is already positive. 89 | /// 90 | /// See also [`Epilogue::make_offset_positive()`]. 91 | pub fn make_offset_positive(&mut self, file_length: u64) { 92 | if self.directory_offset < 0 { 93 | self.directory_offset += file_length as i64; 94 | } 95 | } 96 | 97 | /// Compute the check byte. 98 | pub fn compute_check(&self) -> u8 { 99 | let mut bytes = Vec::with_capacity(self.len()); 100 | bytes.extend(&[0, self.digest_type as u8]); 101 | bytes.extend(self.digest.iter()); 102 | 103 | // UNWRAP: there's no way to construct an epilogue that doesn't serialise 104 | #[allow(clippy::unwrap_used)] 105 | bytes.extend(self.epilogue_without_check().to_bytes().unwrap()); 106 | 107 | bytes.iter().fold(0, |check, x| check ^ *x) 108 | } 109 | 110 | /// Get the epilogue from this trailer, but set the check byte to 0. 111 | fn epilogue_without_check(&self) -> Epilogue { 112 | Epilogue { 113 | check: 0, 114 | digest_type: self.digest_type, 115 | directory_offset: self.directory_offset, 116 | directory_uncompressed_size: self.directory_uncompressed_size, 117 | version: self.version, 118 | magic: crate::ZARC_MAGIC.to_vec(), 119 | } 120 | } 121 | } 122 | 123 | impl From<&Trailer> for Epilogue { 124 | fn from(trailer: &Trailer) -> Self { 125 | let mut epilogue = trailer.epilogue_without_check(); 126 | epilogue.check = trailer.compute_check(); 127 | epilogue 128 | } 129 | } 130 | 131 | /// Length of the prologue in bytes. 132 | /// 133 | /// This is the wire length, not the size of the struct. 134 | pub const PROLOGUE_LENGTH: usize = 2; 135 | 136 | /// Length of the epilogue in bytes. 137 | /// 138 | /// This is the wire length, not the size of the struct. 139 | pub const EPILOGUE_LENGTH: usize = 22; 140 | 141 | /// The last eight fields of the trailer, which are all fixed-size. 142 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)] 143 | #[deku(endian = "little")] 144 | pub struct Epilogue { 145 | /// Digest (hash) algorithm. 146 | pub digest_type: DigestType, 147 | 148 | /// Offset in bytes to the start of the [Directory][crate::directory]'s Zstandard frame. 149 | /// 150 | /// A positive value is from the start of the file, a negative value is from the end. 151 | #[deku(bytes = "8")] 152 | pub directory_offset: i64, 153 | 154 | /// Uncompressed size in bytes of the directory. 155 | #[deku(bytes = "8")] 156 | pub directory_uncompressed_size: u64, 157 | 158 | /// Check byte. 159 | #[deku(bytes = "1")] 160 | pub check: u8, 161 | 162 | /// Zarc format version number. 163 | /// 164 | /// Should match [`ZARC_VERSION`][crate::ZARC_VERSION]. 165 | #[deku(bytes = "1")] 166 | pub version: u8, 167 | 168 | /// Magic number. 169 | /// 170 | /// Should match [`ZARC_MAGIC`][crate::ZARC_MAGIC]. 171 | #[deku(count = "3")] 172 | pub magic: Vec, 173 | } 174 | 175 | impl Epilogue { 176 | /// The full length of the trailer including the variable fields. 177 | pub const fn full_length(&self) -> usize { 178 | PROLOGUE_LENGTH + self.digest_type.digest_len() + EPILOGUE_LENGTH 179 | } 180 | 181 | /// Reparse the trailer from the full bytes. 182 | /// 183 | /// This copies the bytes it needs. 184 | /// 185 | /// Returns `Err(bytes needed)` if there's not enough data to parse the trailer. 186 | /// Passing in too much data is fine, so long as the epilogue is at the end. 187 | pub fn complete(&self, all_bytes: &[u8]) -> Result { 188 | if all_bytes.len() < self.full_length() { 189 | return Err(self.full_length() - all_bytes.len()); 190 | } 191 | 192 | let head = all_bytes.len() - (self.digest_type.digest_len() + EPILOGUE_LENGTH); 193 | let size = self.digest_type.digest_len(); 194 | let digest = all_bytes[head..(head + size)].to_vec(); 195 | 196 | Ok(Trailer { 197 | digest: Digest(digest), 198 | digest_type: self.digest_type, 199 | directory_offset: self.directory_offset, 200 | directory_uncompressed_size: self.directory_uncompressed_size, 201 | version: self.version, 202 | }) 203 | } 204 | 205 | /// Make the offset positive. 206 | /// 207 | /// Having the offset negative is very useful when _writing_ the trailer, but generally a pain 208 | /// when using it to decode the archive, so this method inverts it given the file length. 209 | /// 210 | /// Does nothing if the offset is already positive. 211 | /// 212 | /// See also [`Trailer::make_offset_positive()`]. 213 | pub fn make_offset_positive(&mut self, file_length: u64) { 214 | if self.directory_offset < 0 { 215 | self.directory_offset += file_length as i64; 216 | } 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /zarc.magic: -------------------------------------------------------------------------------- 1 | 0 string \x50\x2A\x4D\x18\x04\x00\x00\x00\x65\xAA\xDC Zarc archive file 2 | >11 byte x version %d 3 | --------------------------------------------------------------------------------