├── .editorconfig
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   ├── release-plz.yml
    │   └── release.yml
├── .gitignore
├── .rustfmt.toml
├── .vscode
    └── settings.json
├── CHANGELOG.md
├── CITATION.cff
├── COPYRIGHT
├── Cargo.lock
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── SPEC.md
├── crates
    ├── ozarc
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── framing.rs
    │   │   └── lib.rs
    ├── zarc-cli
    │   ├── Cargo.toml
    │   ├── build.rs
    │   ├── manifest.rc
    │   ├── src
    │   │   ├── args.rs
    │   │   ├── debug.rs
    │   │   ├── list_files.rs
    │   │   ├── logs.rs
    │   │   ├── main.rs
    │   │   ├── pack.rs
    │   │   └── unpack.rs
    │   ├── wix
    │   │   └── main.wxs
    │   └── zarc.exe.manifest
    └── zarc
    │   ├── Cargo.toml
    │   └── src
    │       ├── constants.rs
    │       ├── decode.rs
    │       ├── decode
    │           ├── directory.rs
    │           ├── error.rs
    │           ├── frame_iterator.rs
    │           ├── open.rs
    │           └── zstd_iterator.rs
    │       ├── directory.rs
    │       ├── directory
    │           ├── edition.rs
    │           ├── elements.rs
    │           ├── file.rs
    │           ├── frame.rs
    │           ├── posix_owner.rs
    │           ├── specials.rs
    │           ├── strings.rs
    │           └── timestamps.rs
    │       ├── encode.rs
    │       ├── encode
    │           ├── add_file.rs
    │           ├── content_frame.rs
    │           ├── directory.rs
    │           └── lowlevel_frames.rs
    │       ├── header.rs
    │       ├── integrity.rs
    │       ├── lib.rs
    │       ├── metadata.rs
    │       ├── metadata
    │           ├── decode.rs
    │           └── encode.rs
    │       ├── ondemand.rs
    │       ├── owner_cache.rs
    │       └── trailer.rs
└── zarc.magic


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = tab
 5 | indent_size = 4
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | 
11 | [*.yml]
12 | indent_style = space
13 | indent_size = 2
14 | 
15 | [*.md]
16 | indent_style = space
17 | indent_size = 2
18 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: /
 5 |     schedule:
 6 |       interval: weekly
 7 |   - package-ecosystem: cargo
 8 |     directory: /
 9 |     schedule:
10 |       interval: weekly
11 |   - package-ecosystem: cargo
12 |     directory: /crates/ozarc
13 |     schedule:
14 |       interval: weekly
15 |   - package-ecosystem: cargo
16 |     directory: /crates/zarc
17 |     schedule:
18 |       interval: weekly
19 |   - package-ecosystem: cargo
20 |     directory: /crates/zarc-cli
21 |     schedule:
22 |       interval: weekly
23 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     tags-ignore:
10 |       - "*"
11 | 
12 | env:
13 |   CARGO_TERM_COLOR: always
14 |   CARGO_UNSTABLE_SPARSE_REGISTRY: "true"
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
18 |   cancel-in-progress: true
19 | 
20 | jobs:
21 |   test:
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         platform:
26 |           - macos
27 |           - ubuntu
28 |           - windows
29 |         command:
30 |           - test
31 |           - clippy
32 | 
33 |     name: ${{ matrix.platform }} / ${{ matrix.command }}
34 |     runs-on: "${{ matrix.platform }}-latest"
35 | 
36 |     steps:
37 |     - uses: actions/checkout@v4
38 |     - name: Configure toolchain
39 |       run: |
40 |         rustup toolchain install --profile minimal --no-self-update stable
41 |         rustup default stable
42 | 
43 |     # https://github.com/actions/cache/issues/752
44 |     - if: ${{ runner.os == 'Windows' }}
45 |       name: Use GNU tar
46 |       shell: cmd
47 |       run: |
48 |         echo "Adding GNU tar to PATH"
49 |         echo C:\Program Files\Git\usr\bin>>"%GITHUB_PATH%"
50 | 
51 |     - if: ${{ runner.os == 'Linux' }}
52 |       run: |
53 |         sudo apt-get update
54 |         sudo apt-get install -y libext2fs-dev
55 | 
56 |     - name: Cargo caching
57 |       uses: actions/cache@v4
58 |       with:
59 |         path: |
60 |           ~/.cargo/registry/index/
61 |           ~/.cargo/registry/cache/
62 |           ~/.cargo/git/db/
63 |         key: ${{ runner.os }}-cargo-stable-${{ hashFiles('**/Cargo.lock') }}
64 |         restore-keys: |
65 |           ${{ runner.os }}-cargo-stable-
66 |           ${{ runner.os }}-cargo-
67 | 
68 |     - name: Compilation caching
69 |       uses: actions/cache@v4
70 |       with:
71 |         path: target/
72 |         key: ${{ runner.os }}-target-stable-${{ hashFiles('**/Cargo.lock') }}
73 | 
74 |     - run: cargo ${{ matrix.command }}
75 | 
76 |   tests-pass:
77 |     if: always()
78 |     name: Tests pass
79 |     needs: [test]
80 |     runs-on: ubuntu-latest
81 |     steps:
82 |     - uses: re-actors/alls-green@release/v1
83 |       with:
84 |         jobs: ${{ toJSON(needs) }}
85 | 


--------------------------------------------------------------------------------
/.github/workflows/release-plz.yml:
--------------------------------------------------------------------------------
 1 | name: Release plz
 2 | 
 3 | permissions:
 4 |   pull-requests: write
 5 |   contents: write
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - main
11 | 
12 | jobs:
13 |   release-plz:
14 |     if: false
15 |     name: Release-plz
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Checkout repository
19 |         uses: actions/checkout@v4
20 |         with:
21 |           fetch-depth: 0
22 |       - name: Configure toolchain
23 |         run: |
24 |           rustup toolchain install --profile minimal --no-self-update stable
25 |           rustup default stable
26 |       - name: Run release-plz
27 |         uses: MarcoIeni/release-plz-action@v0.5
28 |         env:
29 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30 |           CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
31 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | # Copyright 2022-2023, axodotdev
  2 | # SPDX-License-Identifier: MIT or Apache-2.0
  3 | #
  4 | # CI that:
  5 | #
  6 | # * checks for a Git Tag that looks like a release
  7 | # * builds artifacts with cargo-dist (archives, installers, hashes)
  8 | # * uploads those artifacts to temporary workflow zip
  9 | # * on success, uploads the artifacts to Axo Releases and makes an Announcement
 10 | 
 11 | name: Release
 12 | 
 13 | permissions:
 14 |   contents: write
 15 | 
 16 | # This task will run whenever you push a git tag that looks like a version
 17 | # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
 18 | # Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where
 19 | # PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION
 20 | # must be a Cargo-style SemVer Version (must have at least major.minor.patch).
 21 | #
 22 | # If PACKAGE_NAME is specified, then the announcement will be for that
 23 | # package (erroring out if it doesn't have the given version or isn't cargo-dist-able).
 24 | #
 25 | # If PACKAGE_NAME isn't specified, then the announcement will be for all
 26 | # (cargo-dist-able) packages in the workspace with that version (this mode is
 27 | # intended for workspaces with only one dist-able package, or with all dist-able
 28 | # packages versioned/released in lockstep).
 29 | #
 30 | # If you push multiple tags at once, separate instances of this workflow will
 31 | # spin up, creating an independent announcement for each one. However Github
 32 | # will hard limit this to 3 tags per commit, as it will assume more tags is a
 33 | # mistake.
 34 | #
 35 | # If there's a prerelease-style suffix to the version, then the release(s)
 36 | # will be marked as a prerelease.
 37 | on:
 38 |   push:
 39 |     tags:
 40 |       - '**[0-9]+.[0-9]+.[0-9]+*'
 41 |   pull_request:
 42 | 
 43 | jobs:
 44 |   # Run 'cargo dist plan' (or host) to determine what tasks we need to do
 45 |   plan:
 46 |     runs-on: ubuntu-latest
 47 |     outputs:
 48 |       val: ${{ steps.plan.outputs.manifest }}
 49 |       tag: ${{ !github.event.pull_request && github.ref_name || '' }}
 50 |       tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }}
 51 |       publishing: ${{ !github.event.pull_request }}
 52 |     env:
 53 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 54 |       AXO_RELEASES_TOKEN: ${{ secrets.AXO_RELEASES_TOKEN }}
 55 |     steps:
 56 |       - uses: actions/checkout@v4
 57 |         with:
 58 |           submodules: recursive
 59 |       - name: Install cargo-dist
 60 |         # we specify bash to get pipefail; it guards against the `curl` command
 61 |         # failing. otherwise `sh` won't catch that `curl` returned non-0
 62 |         shell: bash
 63 |         run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh"
 64 |       # sure would be cool if github gave us proper conditionals...
 65 |       # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
 66 |       # functionality based on whether this is a pull_request, and whether it's from a fork.
 67 |       # (PRs run on the *source* but secrets are usually on the *target* -- that's *good*
 68 |       # but also really annoying to build CI around when it needs secrets to work right.)
 69 |       - id: plan
 70 |         run: |
 71 |           cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || (env.AXO_RELEASES_TOKEN && 'host --steps=check') || 'plan' }} --output-format=json > dist-manifest.json
 72 |           echo "cargo dist ran successfully"
 73 |           cat dist-manifest.json
 74 |           echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
 75 |       - name: "Upload dist-manifest.json"
 76 |         uses: actions/upload-artifact@v3
 77 |         with:
 78 |           name: artifacts
 79 |           path: dist-manifest.json
 80 | 
 81 |   # Build and packages all the platform-specific things
 82 |   build-local-artifacts:
 83 |     name: build-local-artifacts (${{ join(matrix.targets, ', ') }})
 84 |     # Let the initial task tell us to not run (currently very blunt)
 85 |     needs:
 86 |       - plan
 87 |     if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
 88 |     strategy:
 89 |       fail-fast: false
 90 |       # Target platforms/runners are computed by cargo-dist in create-release.
 91 |       # Each member of the matrix has the following arguments:
 92 |       #
 93 |       # - runner: the github runner
 94 |       # - dist-args: cli flags to pass to cargo dist
 95 |       # - install-dist: expression to run to install cargo-dist on the runner
 96 |       #
 97 |       # Typically there will be:
 98 |       # - 1 "global" task that builds universal installers
 99 |       # - N "local" tasks that build each platform's binaries and platform-specific installers
100 |       matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
101 |     runs-on: ${{ matrix.runner }}
102 |     env:
103 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
104 |       BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
105 |     steps:
106 |       - uses: actions/checkout@v4
107 |         with:
108 |           submodules: recursive
109 |       - uses: swatinem/rust-cache@v2
110 |       - name: Install cargo-dist
111 |         run: ${{ matrix.install_dist }}
112 |       # Get the dist-manifest
113 |       - name: Fetch local artifacts
114 |         uses: actions/download-artifact@v3
115 |         with:
116 |           name: artifacts
117 |           path: target/distrib/
118 |       - name: Install dependencies
119 |         run: |
120 |           ${{ matrix.packages_install }}
121 |       - name: Build artifacts
122 |         run: |
123 |           # Actually do builds and make zips and whatnot
124 |           cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
125 |           echo "cargo dist ran successfully"
126 |       - id: cargo-dist
127 |         name: Post-build
128 |         # We force bash here just because github makes it really hard to get values up
129 |         # to "real" actions without writing to env-vars, and writing to env-vars has
130 |         # inconsistent syntax between shell and powershell.
131 |         shell: bash
132 |         run: |
133 |           # Parse out what we just built and upload it to scratch storage
134 |           echo "paths<<EOF" >> "$GITHUB_OUTPUT"
135 |           jq --raw-output ".artifacts[]?.path | select( . != null )" dist-manifest.json >> "$GITHUB_OUTPUT"
136 |           echo "EOF" >> "$GITHUB_OUTPUT"
137 | 
138 |           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
139 |       - name: "Upload artifacts"
140 |         uses: actions/upload-artifact@v3
141 |         with:
142 |           name: artifacts
143 |           path: |
144 |             ${{ steps.cargo-dist.outputs.paths }}
145 |             ${{ env.BUILD_MANIFEST_NAME }}
146 | 
147 |   # Build and package all the platform-agnostic(ish) things
148 |   build-global-artifacts:
149 |     needs:
150 |       - plan
151 |       - build-local-artifacts
152 |     runs-on: "ubuntu-20.04"
153 |     env:
154 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
155 |       BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
156 |     steps:
157 |       - uses: actions/checkout@v4
158 |         with:
159 |           submodules: recursive
160 |       - name: Install cargo-dist
161 |         run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh"
162 |       # Get all the local artifacts for the global tasks to use (for e.g. checksums)
163 |       - name: Fetch local artifacts
164 |         uses: actions/download-artifact@v3
165 |         with:
166 |           name: artifacts
167 |           path: target/distrib/
168 |       - id: cargo-dist
169 |         shell: bash
170 |         run: |
171 |           cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
172 |           echo "cargo dist ran successfully"
173 | 
174 |           # Parse out what we just built and upload it to scratch storage
175 |           echo "paths<<EOF" >> "$GITHUB_OUTPUT"
176 |           jq --raw-output ".artifacts[]?.path | select( . != null )" dist-manifest.json >> "$GITHUB_OUTPUT"
177 |           echo "EOF" >> "$GITHUB_OUTPUT"
178 | 
179 |           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
180 |       - name: "Upload artifacts"
181 |         uses: actions/upload-artifact@v3
182 |         with:
183 |           name: artifacts
184 |           path: |
185 |             ${{ steps.cargo-dist.outputs.paths }}
186 |             ${{ env.BUILD_MANIFEST_NAME }}
187 |   # Uploads the artifacts to Axo Releases and tentatively creates Releases for them.
188 |   # This makes perma URLs like /v1.0.0/ live for subsequent publish steps to use, but
189 |   # leaves them "disconnected" from the release history (for the purposes of
190 |   # "list the releases" or "give me the latest releases").
191 |   #
192 |   # If all the subsequent "publish" steps succeed, the "announce" job will "connect"
193 |   # the releases and concepts like "latest" will be updated. Otherwise you're hopefully
194 |   # in a decent position to roll back the release without anyone noticing it!
195 |   # This is imperfect with things like "publish to crates.io" being irreversible, but
196 |   # at worst you're in a better position to yank the version with minimum disruption.
197 |   host:
198 |     needs:
199 |       - plan
200 |       - build-local-artifacts
201 |       - build-global-artifacts
202 |     # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine)
203 |     if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
204 |     env:
205 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
206 |       AXO_RELEASES_TOKEN: ${{ secrets.AXO_RELEASES_TOKEN }}
207 |     runs-on: "ubuntu-20.04"
208 |     outputs:
209 |       val: ${{ steps.host.outputs.manifest }}
210 |     steps:
211 |       - uses: actions/checkout@v4
212 |         with:
213 |           submodules: recursive
214 |       - name: Install cargo-dist
215 |         run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh"
216 |       # Fetch artifacts from scratch-storage
217 |       - name: Fetch artifacts
218 |         uses: actions/download-artifact@v3
219 |         with:
220 |           name: artifacts
221 |           path: target/distrib/
222 |       # Upload files to Axo Releases and create the Releases
223 |       - id: host
224 |         shell: bash
225 |         run: |
226 |           cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
227 |           echo "artifacts uploaded and released successfully"
228 |           cat dist-manifest.json
229 |           echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
230 |       - name: "Upload dist-manifest.json"
231 |         uses: actions/upload-artifact@v3
232 |         with:
233 |           name: artifacts
234 |           path: dist-manifest.json
235 | 
236 |   # Create an Announcement for all the Axo Releases, updating the "latest" release
237 |   announce:
238 |     needs:
239 |       - plan
240 |       - host
241 |     # use "always() && ..." to allow us to wait for all publish jobs while
242 |     # still allowing individual publish jobs to skip themselves (for prereleases).
243 |     # "host" however must run to completion, no skipping allowed!
244 |     if: ${{ always() && needs.host.result == 'success' }}
245 |     runs-on: "ubuntu-20.04"
246 |     env:
247 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
248 |       AXO_RELEASES_TOKEN: ${{ secrets.AXO_RELEASES_TOKEN }}
249 |     steps:
250 |       - uses: actions/checkout@v4
251 |         with:
252 |           submodules: recursive
253 |       - name: Install cargo-dist
254 |         run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.8.1/cargo-dist-installer.sh | sh"
255 |       - name: Fetch Axo Artifacts
256 |         uses: actions/download-artifact@v3
257 |         with:
258 |           name: artifacts
259 |           path: target/distrib/
260 |       - name: Announce Axo Releases
261 |         run: |
262 |           cargo dist host --steps=announce ${{ needs.plan.outputs.tag-flag }}
263 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | *.zarc
3 | *.zip
4 | *.tar*
5 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | hard_tabs = true
2 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "rust-analyzer.cargo.features": [
3 | 		"expose-internals"
4 | 	]
5 | }
6 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | ## 0.0.0 (2024-01-20)
4 | 
5 | Initial release.
6 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: |
 3 |   If you use this software, please cite it using these metadata.
 4 | title: "Zarc: a new archive format as a construction over the Z-standard file format"
 5 | 
 6 | version: "0.1.0"
 7 | date-released: 2023-12-27
 8 | 
 9 | repository-code: https://github.com/passcod/zarc
10 | license: Apache-2.0 OR MIT
11 | 
12 | authors:
13 |   - family-names: Saparelli
14 |     given-names: Félix
15 |     orcid: https://orcid.org/0000-0002-2010-630X
16 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Short version for non-lawyers:
 2 | 
 3 | This project is dual-licensed under Apache 2.0 and MIT terms.
 4 | 
 5 | 
 6 | Longer version:
 7 | 
 8 | Copyrights in this project are retained by their contributors. No copyright
 9 | assignment is required to contribute.
10 | 
11 | Some files include explicit copyright notices and/or license notices.
12 | For full authorship information, see the version control history.
13 | 
14 | Except as otherwise noted (below and/or in individual files), the project is
15 | licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or
16 | <https://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT>
17 | or <https://opensource.org/licenses/MIT>, at your option.
18 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | resolver = "2"
 3 | members = [
 4 | 	"crates/ozarc",
 5 | 	"crates/zarc",
 6 | 	"crates/zarc-cli",
 7 | ]
 8 | 
 9 | # Config for 'cargo dist'
10 | [workspace.metadata.dist]
11 | # The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
12 | cargo-dist-version = "0.8.1"
13 | # CI backends to support
14 | ci = ["github"]
15 | # The installers to generate for each app
16 | installers = ["shell", "powershell", "homebrew", "msi"]
17 | # Target platforms to build apps for (Rust target-triple syntax)
18 | targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
19 | # Publish jobs to run in CI
20 | pr-run-mode = "plan"
21 | # Where to host releases
22 | hosting = ["axodotdev"]
23 | # The archive format to use for non-windows builds (defaults .tar.xz)
24 | unix-archive = ".tar.zstd"
25 | 
26 | [workspace.metadata.dist.dependencies.apt]
27 | libext2fs-dev = '*'
28 | 
29 | [profile.release]
30 | lto = true
31 | debug = 1 # for stack traces
32 | codegen-units = 1
33 | 
34 | [profile.dev.build-override]
35 | opt-level = 0
36 | codegen-units = 1024
37 | debug = false
38 | debug-assertions = false
39 | overflow-checks = false
40 | incremental = false
41 | 
42 | # The profile that 'cargo dist' will build with
43 | [profile.dist]
44 | inherits = "release"
45 | strip = "symbols"
46 | 
47 | [profile.release.build-override]
48 | opt-level = 0
49 | codegen-units = 1024
50 | debug = false
51 | debug-assertions = false
52 | overflow-checks = false
53 | incremental = false
54 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any
 2 | person obtaining a copy of this software and associated
 3 | documentation files (the "Software"), to deal in the
 4 | Software without restriction, including without
 5 | limitation the rights to use, copy, modify, merge,
 6 | publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software
 8 | is furnished to do so, subject to the following
 9 | conditions:
10 | 
11 | The above copyright notice and this permission notice
12 | shall be included in all copies or substantial portions
13 | of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/SPEC.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | 
  3 | Zarc is a file format specified on top of the Zstandard Compression Format aka RFC8878.
  4 | 
  5 | Zarc is a toy file format: it has received no review, only has a single implementation, and is not considered mature enough for serious use.
  6 | 
  7 | Zarc is intended to be fairly simple to parse given a zstd decoder, while providing some interesting features, like:
  8 | 
  9 | - always-on strong hashing and integrity verification;
 10 | - full support for extended attributes (xattrs);
 11 | - high resolution timestamps;
 12 | - user-provided metadata at both archive and file level;
 13 | - basic deduplication via content-addressing;
 14 | - minimal uncompressed overhead;
 15 | - appending files is reasonably cheap;
 16 | - capable of handling archives larger than memory, or even archives containing more file metadata than would fit in memory (allowed by spec but not yet implemented).
 17 | 
 18 | ## Version
 19 | 
 20 | The version of the Zarc format is 1.
 21 | 
 22 | The version of this spec is 1.0.0.
 23 | 
 24 | **CAUTION:** the format is currently unstable and changes without version bump or notice.
 25 | 
 26 | ## Magic
 27 | 
 28 | The Zarc magic number is 0xDCAA65 in little-endian.
 29 | 
 30 | It is the string `Zarc` *de*coded as Base64:
 31 | 
 32 | ```console
 33 | $ echo -n 'Zarc' | base64 -d | hexyl -p
 34 | 65 aa dc
 35 | ```
 36 | 
 37 | ## Zstd Format
 38 | 
 39 | Here's a quick recap of the zstd format:
 40 | 
 41 | - The format is a sequence of frames
 42 | - Frames can either be Zstandard frames or Skippable frames
 43 | - A standard zstd decoder will skip Skippable frames
 44 | - Numbers are little-endian
 45 | - Zstandard frames:
 46 |   - `[magic][header][blocks...][checksum]`
 47 |   - Magic is 0xFD2FB528
 48 |   - Header is 2-14 bytes, described in spec
 49 |   - Checksum is optional, last 4 bytes of xxhash64
 50 |   - Blocks are:
 51 |     - `[last][type][size][data]`
 52 |       - Last is 1 bit (boolean)
 53 |       - Type is 2 bits (enum)
 54 |       - Size is 21 bits, unsigned
 55 |     - Type describes:
 56 |       0. Raw block (`data` is uncompressed, verbatim)
 57 |       1. RLE block (`data` is a single byte, `size` is how many times it's repeated verbatim)
 58 |       2. Compressed block
 59 |       3. Reserved
 60 | - Skippable frames:
 61 |   - `[magic][size][data]`
 62 |   - Magic is 0x184D2A5? where the last nibble **?** is any value from 0x0 to 0xF
 63 |   - Size is unsigned 32-bit int
 64 | 
 65 | Further reading:
 66 | - Informational RFC8878: <https://datatracker.ietf.org/doc/html/rfc8878>
 67 | - Most up-to-date spec: <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md>
 68 | 
 69 | ## Zarc Format
 70 | 
 71 | A Zarc is a defined sequence of zstd frames:
 72 | 
 73 | - one **[Header](#zarc-header)**, a Skippable frame (0x0), used to identify a file as a Zarc
 74 | - zero or more **Zstandard frames**, one for each file (modulo deduplication and special files)
 75 | - one **[Directory](#zarc-directory)**, a Zstandard (compressed) frame, which contains file list and metadata
 76 | - one **[Trailer](#zarc-trailer)**, a Skippable frame (0xF), used to find and check the Directory
 77 | 
 78 | Zarcs are explicitly files: this is not a format suitable for streaming from a network location,
 79 | unless random access / seek semantics are available (e.g. using the `Range` HTTP header).
 80 | 
 81 | # Zarc Header
 82 | 
 83 | This is a Skippable frame with magic nibble = 0.
 84 | 
 85 | It contains:
 86 | 
 87 | | **`Magic`** | **`Zarc Version`** |
 88 | |:-----------:|:------------------:|
 89 | |   3 bytes   |       1 byte       |
 90 | |  `65 aa dc` |        `01`        |
 91 | 
 92 | This combined with the Skippable frame header, makes a Zarc file always start with the same 12 bytes:
 93 | 
 94 | | **`Zstd Magic`** | **`Frame Size`** | **`Zarc Magic`** | **`Zarc File Version`** |
 95 | |:----------------:|:----------------:|:----------------:|:-----------------------:|
 96 | |      4 bytes     |      4 bytes     |     3 bytes      |          1 byte         |
 97 | |   `50 2a 4d 18`  |   `04 00 00 00`  |    `65 aa dc`    |           `01`          |
 98 | 
 99 | # Zarc Directory
100 | 
101 | This is a Zstandard frame.
102 | 
103 | It contains a stream of [CBOR](https://cbor.io)-encoded Elements, which are framed with a Kind and a length.
104 | 
105 | | **`Kind`** | **`Length of Payload`** | _reserved_ | **`Payload`** |
106 | |:----------:|:-----------------------:|:----------:|:-------------:|
107 | |    LE U8   |         LE U16          |   1 byte   |      CBOR     |
108 | 
109 | Element Kinds are described below, along with their integer and CBOR payload structure.
110 | Elements of a same Kind are NOT required to be next to each other.
111 | Order is insignificant unless stated.
112 | 
113 | Implementations MUST ignore Element Kinds they do not recognise.
114 | 
115 | > **Non-normative note:** the _reserved_ byte is there mainly for possible expansion of the payload length.
116 | > 64K per element looks pretty large from here, but who knows what the future brings.
117 | 
118 | ## Kind `1`: Editions
119 | 
120 | _Map: unsigned integer keys -> CBOR._
121 | 
122 | Editions record core metadata about an archive, and also provide a mechanism for retaining the metadata of _previous versions_ of the archive, if it gets appended or edited.
123 | At least one edition must be present.
124 | 
125 | ### Key `0`: Number
126 | 
127 | _Non-zero unsigned integer._ **Mandatory.**
128 | 
129 | The number of editions in a file is technically unlimited, but as of this version MUST be less than 65536.
130 | For practical purposes implementations SHOULD warn when creating more than 1000 editions, and MAY set that limit lower.
131 | 
132 | Creating an edition involves incrementing the edition number, so the latest edition of the file is `max(edition list)`.
133 | 
134 | This is used in Frame and File types as the `Edition` field.
135 | 
136 | ### Key `1`: Written At
137 | 
138 | _Timestamp or DateTime._ **Mandatory.**
139 | 
140 | When this version was created.
141 | 
142 | ### Key `2`: Digest Type
143 | 
144 | _8-bit unsigned integer._ **Mandatory.**
145 | 
146 | Same as the Trailer value, the digest type in use by that edition.
147 | 
148 | ### Key `10`: User Metadata
149 | 
150 | _Map: text string keys -> boolean or text or byte string._ **Optional.**
151 | 
152 | User metadata of this edition.
153 | 
154 | ## Kind `2`: Files
155 | 
156 | _Map: unsigned integer keys -> CBOR._
157 | 
158 | ### Key `0`: Edition
159 | 
160 | _Unsigned integer._ **Mandatory.**
161 | 
162 | The edition this file entry was added to the archive.
163 | 
164 | ### Key `1`: Name
165 | 
166 | _Array of: text string or byte string._ **Mandatory.**
167 | 
168 | If items are of the UTF-8 _Text string_ CBOR type, then they represent UTF-8-encoded Unicode pathname components.
169 | If items are of the _Byte string_ CBOR type instead, then they represent raw (non-Unicode) pathname components.
170 | 
171 | Windows implementations MUST convert raw UTF-16 to UTF-8 during encoding, and from raw bytes to UTF-8 during decoding, and replace invalid wide character sequences with the Unicode REPLACEMENT CHARACTER.
172 | 
173 | Non-Unicode pathnames may not be supported on all filesystems / operating systems.
174 | Implementations SHOULD strongly prefer UTF-8, and SHOULD warn when paths do not convert cleanly.
175 | 
176 | Zarc makes no effort to restrict valid pathnames.
177 | The exception is that the components `.` and `..` are disallowed.
178 | A Zarc decoder MUST reject such pathnames.
179 | 
180 | Pathnames are encoded in components.
181 | That is, the Unix pathname `foo/bar/baz.qux` and the Windows pathname `foo\bar\baz.qux` are encoded the same way.
182 | Pathnames can mix UTF-8 and non-Unicode components.
183 | 
184 | Pathnames do not include drive letters or fileshare prefixes.
185 | (It is not possible to construct a Zarc archive spanning multiple Windows drives.)
186 | 
187 | Pathnames do not encode whether a path is absolute or relative: all paths inside a Zarc archive are relative to an arbitrary root provided by the user when packing or unpacking.
188 | 
189 | It is possible to have several identical pathname in a Zarc Directory.
190 | Implementations SHOULD provide an option to use the first or last or other selection criteria, but MUST default to preferring the last of a set of identical pathnames.
191 | 
192 | ### Key `2`: Frame Digest
193 | 
194 | _Byte string._ **Conditional.**
195 | 
196 | The hash of a frame of content.
197 | This must be the same value as the `h` field of a **Framelist** item.
198 | 
199 | Multiple files can reference the same content frame: this provides file-level deduplication.
200 | 
201 | The algorithm of the hash is described by the **Hash Algorithm** field above.
202 | 
203 | This may be absent for some special files (described later).
204 | 
205 | ### Key `3`: POSIX File Mode
206 | 
207 | _Unsigned integer._ **Optional.**
208 | 
209 | Unix mode bits as an unsigned 32-bit integer.
210 | 
211 | If this is not set, implementations SHOULD use a default mode as appropriate.
212 | 
213 | ### Key `4`: POSIX File Owner
214 | 
215 | _Tuple (encoded as an array)._ **Optional.**
216 | 
217 | The user that owns this file.
218 | This is a structure with at least one of the following types of data:
219 | 
220 | - _Unsigned integer._ the user ID
221 | - _Text string._ the user name as UTF-8 (or ASCII)
222 | 
223 | There SHOULD NOT be more than one unsigned integer; if there are, the last value wins out.
224 | 
225 | Implementations SHOULD prefer the name to the ID if there is an existing user named thus on the system with a different ID.
226 | Implementations SHOULD prefer to encode IDs as 32-bit unsigned integers, but MUST accept 8-bit, 16-bit, and 64-bit unsigned integers as well.
227 | 
228 | ### Key `5`: POSIX File Group
229 | 
230 | _Tuple (encoded as an array)._ **Optional.**
231 | 
232 | The group that owns this file.
233 | This is a structure with at least one of the following types of data:
234 | 
235 | - _Unsigned integer._ the group ID
236 | - _Text string._ the group name as UTF-8 (or ASCII)
237 | 
238 | Implementations SHOULD prefer the name to the ID if there is an existing group named thus on the system with a different ID.
239 | 
240 | ### Key `6`: File Timestamps
241 | 
242 | _Map: unsigned integer keys -> timestamp._ **Optional.**
243 | 
244 | Timestamps associated with this file. Any of:
245 | 
246 | - `1`: birth time or file creation time
247 | - `2`: mtime or file modification time
248 | - `3`: atime or file access time — this SHOULD be the access time prior to the Zarc tool reading the file
249 | 
250 | Timestamps can be stored in either:
251 | - [RFC3339 in _text string_ with semantic tag `0`](https://www.rfc-editor.org/rfc/rfc8949.html#name-standard-date-time-string)
252 | - [seconds from epoch as unsigned or negative integer, or binary64 floating point, with semantic tag `1`](https://www.rfc-editor.org/rfc/rfc8949.html#name-epoch-based-date-time)
253 | 
254 | > **Non-normative implementation note:** the Zarc reference implementation _accepts_ all formats for a timestamp, but always _writes_ RFC3339 text string datetimes.
255 | 
256 | ### Key `7`: Special File Types
257 | 
258 | _Pair: [unsigned integer, (pathname)?]._ **Optional.**
259 | 
260 | This is a structure which encodes special file types.
261 | 
262 | The mandatory first array item is the type of the special file.
263 | Implementations SHOULD ignore unknown or impractical special types.
264 | 
265 |   - `1` — **directory entry.**
266 |     May be used to encode metadata or (x)attributes against a directory.
267 | 
268 |   - `10` — **unspecified symlink.**
269 |     MUST be followed by the pathname of the link target.
270 |     - `11` — **internal symlink.**
271 |       MUST be followed by the pathname of another file contained in this Zarc.
272 |     - `12` — **external absolute symlink.**
273 |       MUST be followed by the absolute pathname of a file to symlink to.
274 |       Implementations MAY reject this (e.g. for security reasons).
275 |     - `13` — **external relative symlink.**
276 |       MUST be followed by the relative pathname of a file to symlink to.
277 |       Implementations MAY reject this (e.g. for security reasons).
278 | 
279 |   - `20` — **unspecified hardlink.**
280 |     MUST be followed by the pathname of another file contained in this Zarc.
281 |     - `21` — **internal hardlink.**
282 |       MUST be followed by the pathname of another file contained in this Zarc.
283 |     - `22` — **external hardlink.**
284 |       MUST be followed by the absolute pathname of a file to hardlink to.
285 |       Implementations MAY reject this (e.g. for security reasons).
286 | 
287 | Pathnames (as the conditional second array item) are either:
288 | - _Byte string_ or _Text string_. An absolute or relative full pathname with platform-specific separators;
289 | - _Array(byte or text string)._ An array of components as for Filemap Names, except that `.` and `..` components are allowed.
290 | 
291 | The second form is preferred, for portability.
292 | 
293 | ### Key `10`: File User Metadata
294 | 
295 | _Map: text string keys -> boolean or text or byte string._ **Optional.**
296 | 
297 | Arbitrary user-provided metadata for this file entry.
298 | 
299 | ### Key `11`: File Attributes
300 | 
301 | _Map: text string keys -> boolean or text or byte string._ **Optional.**
302 | 
303 | A map of values (typically boolean flags) which keys SHOULD correspond to [file attributes](https://en.wikipedia.org/wiki/Chattr).
304 | 
305 | Implementations MAY ignore attributes if obtaining or setting them is impossible or impractical.
306 | 
307 | Attribute keys MUST either have a prefix signifying the system they apply to:
308 | 
309 | - `win32.` for Windows
310 | - `linux.` for Linux
311 | - `bsd.` for BSDs, including MacOS
312 | - `_` for implementation-defined prefixes (e.g. `_ncc1701.`)
313 | 
314 | OR be one of these defined unprefixed values:
315 | 
316 | - `append-only`
317 | - `compressed`
318 | - `immutable`
319 | - `read-only`
320 | 
321 | > **Note:** attributes are metadata only, they have no bearing on the Zarc file format semantics.
322 | 
323 | ### Key `12`: Extended File Attributes
324 | 
325 | _Map: text string keys -> boolean or text or byte string._ **Optional.**
326 | 
327 | A map of extended attributes (`xattr`).
328 | 
329 | Zarc imposes no restriction on the format of attribute names, nor on the content or length of attribute values.
330 | 
331 | Implementations MAY ignore extended attributes if obtaining or setting them is impossible or impractical.
332 | On Linux, implementations MAY assume a `user` namespace for unprefixed keys.
333 | 
334 | ## Kind `3`: Frames
335 | 
336 | _Map: unsigned integer keys -> CBOR._ **Mandatory, collect-up.**
337 | 
338 | Structures of this type SHOULD appear in offset order.
339 | 
340 | ### Key `0`: Edition Added
341 | 
342 | _Unsigned integer._ **Mandatory.**
343 | 
344 | The edition this frame was added to the archive.
345 | 
346 | ### Key `1`: Frame Offset
347 | 
348 | _Integer._ **Mandatory.**
349 | 
350 | The offset in bytes from the start of the Zarc file to the first byte of the Zstandard frame header this entry describes.
351 | 
352 | There MUST NOT be duplicate Frame Offsets in the Frame list.
353 | 
354 | ### Key `2`: Frame Content Digest
355 | 
356 | _Byte string._ **Mandatory.**
357 | 
358 | The digest of the frame contents.
359 | 
360 | Implementations MUST check that frame contents match this digest (unless "insecure" mode is used).
361 | 
362 | ### Key `3`: Framed Size
363 | 
364 | _Integer._ **Mandatory.**
365 | 
366 | The size of the entire frame in bytes.
367 | 
368 | This may be used to request that range of bytes from a remote source without reading too far or incrementally via block information.
369 | 
370 | ### Key `4`: Uncompressed Content Length
371 | 
372 | _Integer._ **Mandatory.**
373 | 
374 | The length of the uncompressed content of the frame in bytes.
375 | 
376 | This is a complement to the Frame Content Size field available on the Zstandard Frame directly, as that field can be absent depending on zstd parameters.
377 | 
378 | This can be used to e.g.:
379 | - avoid unpacking frames which exceed available memory or storage;
380 | - preallocate storage before unpacking;
381 | - estimate the uncompressed total size of the archive.
382 | 
383 | # Zarc Trailer
384 | 
385 | This is a Skippable frame with magic nibble = F.
386 | 
387 | It contains:
388 | 
389 | | _reserved_ | **`Digest Type`** | **`Digest`**| **`Digest Type`** |
390 | |:----------:|:-----------------:|:-----------:|:-----------------:|
391 | |   1 byte   |       1 byte      |  _n_ bytes  |       1 byte      |
392 | 
393 | |   **`Directory Offset`**  | **`Uncompressed Length`** |
394 | |:-------------------------:|:-------------------------:|
395 | |           8 bytes         |           8 bytes         |
396 | 
397 | | **`Check Byte`** | **`Zarc Version`** | **`Magic`** |
398 | |:----------------:|:------------------:|:-----------:|
399 | |      1 byte      |       1 byte       |   3 bytes   |
400 | |                  |        `01`        |  `65 aa dc` |
401 | 
402 | > **Non-normative implementation note:** This looks upside down, because you can read it from the end.
403 | > The last three bytes of a Zarc file will always be `65 aa dc`, _preceded_ by the version, _preceded_ by the check byte, etc.
404 | > The critical fixed-width fields are all at the end, so they can be read by seeking to a fixed offset from the end.
405 | > The `Digest Type` is then used to derive the length of the `Digest` field.
406 | > It's also duplicated on the other side of the `Digest`, so that the trailer can be read from both sides.
407 | > Going 8 bytes further back from the 'start' of the trailer will yield the Zstd Skippable frame header if you so wish to check that.
408 | 
409 | ### `Directory Offset`
410 | 
411 | _Signed 64-bit integer._
412 | 
413 | This is EITHER:
414 | 
415 | - a **positive** value, the offset from the **start** of the file to the first byte of the Zstandard frame containing the Zarc Directory.
416 | - a **negative** value, the offset from the **end**   of the file to the first byte of the Zstandard frame containing the Zarc Directory.
417 | 
418 | ### `Uncompressed Length`
419 | 
420 | This is the uncompressed length of the Zarc Directory structure.
421 | 
422 | This may be used to decide whether to decompress the directory in memory or stream it.
423 | 
424 | ### `Digest Type`
425 | 
426 | Defines the algorithm used for computing digests, as well as the length of the digest fields:
427 | 
428 | - `0`: not used. This value must not appear.
429 | - `1`: [BLAKE3](https://github.com/BLAKE3-team/BLAKE3) hash function, 32-byte digests.
430 | 
431 | ### `Check Byte`
432 | 
433 | This is the result of XOR'ing every other byte of the trailer together.
434 | 
435 | It can be used as a quick check for corruption.
436 | 


--------------------------------------------------------------------------------
/crates/ozarc/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ozarc"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | authors = ["Félix Saparelli <felix@passcod.name>"]
 7 | license = "Apache-2.0 OR MIT"
 8 | description = "Zstandard implementation in pure Rust"
 9 | keywords = ["archive", "file-format", "zstd"]
10 | categories = ["encoding", "compression"]
11 | 
12 | documentation = "https://github.com/passcod/zarc"
13 | repository = "https://github.com/passcod/zarc"
14 | 
15 | [dependencies]
16 | deku = { version = "0.16.0", features = ["logging"], git = "https://github.com/sharksforarms/deku" }
17 | log = "0.4.20"
18 | tracing = "0.1.40"
19 | 
20 | 


--------------------------------------------------------------------------------
/crates/ozarc/src/framing.rs:
--------------------------------------------------------------------------------
  1 | //! Zstd file format parsing types.
  2 | //!
  3 | //! [Spec (Informational RFC8878)](https://datatracker.ietf.org/doc/html/rfc8878)
  4 | //!
  5 | //! Here's a quick recap of the zstd format, full specification available at link above:
  6 | //!
  7 | //! - The format is a sequence of frames
  8 | //! - Frames can either be [Zstandard frames](ZstandardFrame) or [Skippable frames](SkippableFrame)
  9 | //! - A standard zstd decoder will skip Skippable frames
 10 | //! - Numbers are little-endian
 11 | //! - Zstandard frames:
 12 | //!   - `[magic][header][blocks...][checksum]`
 13 | //!   - Magic is 0xFD2FB528
 14 | //!   - [Header](ZstandardFrameDescriptor) is 2-14 bytes, described in spec above
 15 | //!   - Checksum is optional, last 4 bytes of xxhash64
 16 | //!   - [Blocks](ZstandardBlock) are:
 17 | //!     - `[last][type][size][data]`
 18 | //!       - Last is 1 bit (boolean)
 19 | //!       - Type is 2 bits (enum)
 20 | //!       - Size is 21 bits, unsigned
 21 | //!     - [Type](ZstandardBlockType) describes:
 22 | //!       0. Raw block (`data` is uncompressed, verbatim)
 23 | //!       1. RLE block (`data` is a single byte, `size` is how many times it's repeated verbatim)
 24 | //!       2. Compressed block
 25 | //!       3. Reserved
 26 | //! - Skippable frames:
 27 | //!   - `[magic][size][data]`
 28 | //!   - Magic is 0x184D2A5? where the last nibble **?** is any value from 0 to F
 29 | //!   - Size is unsigned 32-bit int
 30 | 
 31 | use deku::prelude::*;
 32 | 
 33 | /// Magic number for a [Skippable Frame](SkippableFrame).
 34 | ///
 35 | /// This is only bytes 1-3 of the magic, and the first byte is any value from 0x50 to 0x5F.
 36 | pub const SKIPPABLE_FRAME_MAGIC: &[u8] = b"\x2A\x4D\x18";
 37 | 
 38 | /// Magic number for a [Zstandard Frame](ZstandardFrame).
 39 | pub const ZSTANDARD_FRAME_MAGIC: &[u8] = b"\x28\xB5\x2F\xFD";
 40 | 
 41 | /// The overhead of a [Skippable Frame](SkippableFrame) in bytes.
 42 | ///
 43 | /// This is the size of the magic and size fields.
 44 | pub const SKIPPABLE_FRAME_OVERHEAD: usize = 8;
 45 | 
 46 | /// A "Skippable" frame.
 47 | ///
 48 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-skippable-frames)
 49 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
 50 | #[deku(endian = "little")]
 51 | pub struct SkippableFrame {
 52 | 	#[deku(bytes = "4", assert = "SkippableFrame::valid_magic(magic)")]
 53 | 	magic: u32,
 54 | 
 55 | 	#[deku(bytes = "4")]
 56 | 	size: u32,
 57 | 
 58 | 	/// The user data contained in the frame.
 59 | 	#[deku(count = "size")]
 60 | 	pub data: Vec<u8>,
 61 | }
 62 | 
 63 | impl SkippableFrame {
 64 | 	fn valid_magic(magic: &u32) -> bool {
 65 | 		let magic_bytes = magic.to_le_bytes();
 66 | 		magic_bytes[0] >= 0x50
 67 | 			&& magic_bytes[0] <= 0x5F
 68 | 			&& &magic_bytes[1..4] == SKIPPABLE_FRAME_MAGIC
 69 | 	}
 70 | 
 71 | 	/// Create a new skippable frame.
 72 | 	///
 73 | 	/// Panics if the nibble is greater than 15.
 74 | 	pub fn new(nibble: u8, data: Vec<u8>) -> Self {
 75 | 		assert!(
 76 | 			nibble < 16,
 77 | 			"skippable frame nibble must be between 0 and 15"
 78 | 		);
 79 | 		Self {
 80 | 			magic: u32::from_le_bytes([0x50 + nibble, 0x2A, 0x4D, 0x18]),
 81 | 			size: data
 82 | 				.len()
 83 | 				.try_into()
 84 | 				.expect("skippable frame data is too long"),
 85 | 			data,
 86 | 		}
 87 | 	}
 88 | 
 89 | 	/// The magic nibble of this frame.
 90 | 	pub fn nibble(&self) -> u8 {
 91 | 		(self.magic.to_le_bytes()[0] - 0x50) & 0x0F
 92 | 	}
 93 | 
 94 | 	/// The length of the frame's content.
 95 | 	pub fn size(&self) -> usize {
 96 | 		self.size as usize
 97 | 	}
 98 | }
 99 | 
100 | /// A Zstandard Frame header.
101 | ///
102 | /// See [`ZstandardFrameHeader`] and [`ZstandardBlock`] to read manually (without loading it all in
103 | /// memory at once).
104 | ///
105 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-zstandard-frames)
106 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
107 | #[deku(endian = "little")]
108 | pub struct ZstandardFrame {
109 | 	/// Header.
110 | 	pub header: ZstandardFrameHeader,
111 | 
112 | 	/// Blocks.
113 | 	///
114 | 	/// Those are the actual content of the frame.
115 | 	#[deku(until = "|b: &ZstandardBlock| b.header.last")]
116 | 	pub blocks: Vec<ZstandardBlock>,
117 | 
118 | 	/// Optional 32-bit checksum.
119 | 	///
120 | 	/// The lower 4 bytes of the [xxhash64](https://cyan4973.github.io/xxHash/) digested from the
121 | 	/// original content and a seed of zero.
122 | 	///
123 | 	/// Only present if [`ZstandardFrameDescriptor::checksum`] is set.
124 | 	#[deku(bytes = 4, cond = "header.frame_descriptor.checksum")]
125 | 	pub checksum: Option<u32>,
126 | }
127 | 
128 | /// A Zstandard Frame header.
129 | ///
130 | /// This doesn't include the blocks and checksum, so you need to do your own accounting and parse
131 | /// the blocks until the last, then read the checksum if it's present. See [`ZstandardFrame`] for
132 | /// an easier interface, at the cost of loading all the blocks in memory.
133 | ///
134 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-zstandard-frames)
135 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
136 | #[deku(
137 | 	magic = b"\x28\xB5\x2F\xFD",
138 | 	endian = "endian",
139 | 	ctx = "endian: deku::ctx::Endian",
140 | 	ctx_default = "deku::ctx::Endian::Little"
141 | )]
142 | pub struct ZstandardFrameHeader {
143 | 	/// The frame descriptor.
144 | 	///
145 | 	/// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.1)
146 | 	///
147 | 	/// Describes what other fields are present in the frame header.
148 | 	pub frame_descriptor: ZstandardFrameDescriptor,
149 | 
150 | 	/// Minimum memory needed to decode the frame.
151 | 	///
152 | 	/// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor)
153 | 	#[deku(bytes = 1, cond = "!frame_descriptor.single_segment")]
154 | 	pub window_descriptor: Option<u8>,
155 | 
156 | 	/// Dictionary ID.
157 | 	///
158 | 	/// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.3)
159 | 	///
160 | 	/// See [`ZstandardFrame::dictionary_id()`] for the value as an integer.
161 | 	#[deku(count = "frame_descriptor.did_length()")]
162 | 	pub did: Vec<u8>,
163 | 
164 | 	/// Original (uncompressed) size.
165 | 	///
166 | 	/// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-frame_content_size)
167 | 	///
168 | 	/// This field is optional.
169 | 	///
170 | 	/// This needs to be interpreted before it can be used. See [`ZstandardFrame::size()`].
171 | 	#[deku(count = "frame_descriptor.fcs_length()")]
172 | 	pub frame_content_size: Vec<u8>,
173 | }
174 | 
175 | impl ZstandardFrameHeader {
176 | 	/// The uncompressed length of the frame's content in bytes.
177 | 	pub fn uncompressed_size(&self) -> u64 {
178 | 		match self.frame_descriptor.fcs_length() {
179 | 			0 => 0,
180 | 			1 => u64::from(self.frame_content_size[0]),
181 | 			2 => {
182 | 				u64::from(u16::from_le_bytes([
183 | 					self.frame_content_size[0],
184 | 					self.frame_content_size[1],
185 | 				])) + 256
186 | 			}
187 | 			4 => u64::from(u32::from_le_bytes([
188 | 				self.frame_content_size[0],
189 | 				self.frame_content_size[1],
190 | 				self.frame_content_size[2],
191 | 				self.frame_content_size[3],
192 | 			])),
193 | 			8 => u64::from_le_bytes([
194 | 				self.frame_content_size[0],
195 | 				self.frame_content_size[1],
196 | 				self.frame_content_size[2],
197 | 				self.frame_content_size[3],
198 | 				self.frame_content_size[4],
199 | 				self.frame_content_size[5],
200 | 				self.frame_content_size[6],
201 | 				self.frame_content_size[7],
202 | 			]),
203 | 			_ => unreachable!(),
204 | 		}
205 | 	}
206 | 
207 | 	/// The dictionary ID as an integer.
208 | 	pub fn dictionary_id(&self) -> u32 {
209 | 		self.did.iter().fold(0, |acc, &x| acc << 8 | x as u32)
210 | 	}
211 | }
212 | 
213 | /// Frame descriptor for a [Zstandard Frame](ZstandardFrame).
214 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
215 | #[deku(endian = "endian", ctx = "endian: deku::ctx::Endian")]
216 | pub struct ZstandardFrameDescriptor {
217 | 	/// [Frame content size (FCS)](ZstandardFrame::frame_content_size) field size flag.
218 | 	///
219 | 	/// This is _not_ the size of the FCS field itself, but a flag that needs to be interpreted in
220 | 	/// conjunction with [`single_segment`](ZstandardFrameDescriptor::single_segment) to determine
221 | 	/// the size of the FCS field.
222 | 	///
223 | 	/// The [`ZstandardFrameDescriptor::fcs_length()`] method performs this calculation.
224 | 	#[deku(bits = 2)]
225 | 	pub fcs_size: u8,
226 | 
227 | 	/// If this flag is set, data must be regenerated within a single continuous memory segment.
228 | 	///
229 | 	/// This is also used in the calculation for [`ZstandardFrame::frame_content_size`]'s length.
230 | 	#[deku(bits = 1)]
231 | 	pub single_segment: bool,
232 | 
233 | 	/// Unused. Always false.
234 | 	#[deku(bits = 1)]
235 | 	pub unused_bit: bool,
236 | 
237 | 	/// Reserved. Always false.
238 | 	#[deku(bits = 1)]
239 | 	pub reserved_bit: bool,
240 | 
241 | 	/// Whether the frame has a [checksum](ZstandardFrame::checksum).
242 | 	#[deku(bits = 1)]
243 | 	pub checksum: bool,
244 | 
245 | 	/// [Dictionary ID (DID)](ZstandardFrame::did) field size flag.
246 | 	///
247 | 	/// This is _not_ the size of the DID field itself, but a flag that needs to be interpreted to
248 | 	/// determine the size of the DID field.
249 | 	///
250 | 	/// The [`ZstandardFrameDescriptor::did_length()`] method performs this calculation.
251 | 	#[deku(bits = 2)]
252 | 	pub did_size: u8,
253 | }
254 | 
255 | impl ZstandardFrameDescriptor {
256 | 	/// The length in bytes of the [DID](ZstandardFrame::did) field.
257 | 	pub fn did_length(&self) -> usize {
258 | 		match self.did_size {
259 | 			0 => 0,
260 | 			1 => 1,
261 | 			2 => 2,
262 | 			3 => 4,
263 | 			_ => unreachable!(),
264 | 		}
265 | 	}
266 | 
267 | 	/// The length in bytes of the [FCS](ZstandardFrame::frame_content_size) field.
268 | 	pub fn fcs_length(&self) -> usize {
269 | 		match self.fcs_size {
270 | 			0 if self.single_segment => 1,
271 | 			0 => 0,
272 | 			1 => 2,
273 | 			2 => 4,
274 | 			3 => 8,
275 | 			_ => unreachable!(),
276 | 		}
277 | 	}
278 | }
279 | 
280 | /// A Zstandard block.
281 | ///
282 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-blocks)
283 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
284 | #[deku(
285 | 	endian = "endian",
286 | 	ctx = "endian: deku::ctx::Endian",
287 | 	ctx_default = "deku::ctx::Endian::Little"
288 | )]
289 | pub struct ZstandardBlock {
290 | 	/// The block header.
291 | 	pub header: ZstandardBlockHeader,
292 | 
293 | 	/// The block data.
294 | 	#[deku(count = "header.actual_size()")]
295 | 	pub data: Vec<u8>,
296 | }
297 | 
298 | /// The header for a Zstandard block.
299 | ///
300 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-blocks)
301 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
302 | #[deku(
303 | 	endian = "endian",
304 | 	ctx = "endian: deku::ctx::Endian",
305 | 	ctx_default = "deku::ctx::Endian::Little"
306 | )]
307 | pub struct ZstandardBlockHeader {
308 | 	#[deku(bits = "5")]
309 | 	size_low: u8,
310 | 
311 | 	/// The block type.
312 | 	pub block_type: ZstandardBlockType,
313 | 
314 | 	/// Whether this is the last block in the frame.
315 | 	#[deku(bits = "1")]
316 | 	pub last: bool,
317 | 
318 | 	#[deku(bits = "16")]
319 | 	size_high: u16,
320 | }
321 | 
322 | impl ZstandardBlockHeader {
323 | 	/// Create a new Zstandard block header.
324 | 	pub fn new(block_type: ZstandardBlockType, last: bool, size: u32) -> Self {
325 | 		assert!(size < 2_u32.pow(24));
326 | 
327 | 		let [a, b, c, d] = u32::to_be_bytes(size << 3);
328 | 		let size_high = u16::from_be_bytes([b, c]);
329 | 		let size_low = d >> 3;
330 | 		tracing::trace!(
331 | 			field = %format!("{a:08b} {b:08b} {c:08b} {d:08b}"),
332 | 			high = %format!("{size_high:016b}"),
333 | 			low = %format!("{size_low:08b}"),
334 | 			"block header size bit wrangling (write)"
335 | 		);
336 | 
337 | 		Self {
338 | 			size_low,
339 | 			block_type,
340 | 			last,
341 | 			size_high,
342 | 		}
343 | 	}
344 | 
345 | 	fn size(&self) -> u32 {
346 | 		let [a, b] = u16::to_be_bytes(self.size_high);
347 | 		let c = self.size_low << 3;
348 | 		let real_size = u32::from_be_bytes([0, a, b, c]) >> 3;
349 | 		tracing::trace!(
350 | 			high = %format!("{:016b}", self.size_high),
351 | 			low = %format!("{:08b}", self.size_low),
352 | 			real_dec = %real_size,
353 | 			real_hex = %format!("{real_size:02x?}"),
354 | 			"block header size bit wrangling (read)"
355 | 		);
356 | 
357 | 		real_size
358 | 	}
359 | 
360 | 	/// If this is an RLE, how many times is the byte repeated?
361 | 	pub fn rle_count(&self) -> Option<u32> {
362 | 		if self.block_type == ZstandardBlockType::Rle {
363 | 			Some(self.size())
364 | 		} else {
365 | 			None
366 | 		}
367 | 	}
368 | 
369 | 	/// How many bytes of data are in this block.
370 | 	pub fn actual_size(&self) -> u32 {
371 | 		match self.block_type {
372 | 			ZstandardBlockType::Raw | ZstandardBlockType::Compressed => self.size(),
373 | 			ZstandardBlockType::Rle => 1,
374 | 			ZstandardBlockType::Reserved => panic!("corrupt zstd: reserved block type"),
375 | 		}
376 | 	}
377 | }
378 | 
379 | /// The type of a Zstandard block.
380 | ///
381 | /// [Spec](https://datatracker.ietf.org/doc/html/rfc8878#name-block_type)
382 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
383 | #[deku(
384 | 	endian = "endian",
385 | 	ctx = "endian: deku::ctx::Endian",
386 | 	type = "u8",
387 | 	bits = "2"
388 | )]
389 | pub enum ZstandardBlockType {
390 | 	/// An uncompressed block.
391 | 	#[deku(id = "0b00")] // = 0
392 | 	Raw,
393 | 
394 | 	/// A block with a single byte repeated many times.
395 | 	#[deku(id = "0b01")] // = 1
396 | 	Rle,
397 | 
398 | 	/// A compressed block.
399 | 	#[deku(id = "0b10")] // = 2
400 | 	Compressed,
401 | 
402 | 	/// Reserved.
403 | 	#[deku(id = "0b11")] // = 3
404 | 	Reserved,
405 | }
406 | 


--------------------------------------------------------------------------------
/crates/ozarc/src/lib.rs:
--------------------------------------------------------------------------------
1 | //! Zstd file format parser.
2 | //!
3 | //! This crate has the ambition of becoming a Zstandard implementation in pure Rust. For now, it
4 | //! only implements types for encoding and decoding the framing of the file format.
5 | 
6 | pub mod framing;
7 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "zarc-cli"
 3 | version = "0.0.1"
 4 | edition = "2021"
 5 | 
 6 | authors = ["Félix Saparelli <felix@passcod.name>"]
 7 | license = "Apache-2.0 OR MIT"
 8 | description = "Archive format based on Zstd: CLI tool"
 9 | keywords = ["archive", "file-format", "zstd", "zarc"]
10 | categories = ["command-line-utilities"]
11 | 
12 | documentation = "https://github.com/passcod/zarc"
13 | repository = "https://github.com/passcod/zarc"
14 | 
15 | [package.metadata.wix]
16 | upgrade-guid = "96E0968E-C5AC-4BFF-81DE-948816F542C8"
17 | path-guid = "52718143-483B-4009-8A46-4270582DE95E"
18 | license = false
19 | eula = false
20 | 
21 | [[bin]]
22 | name = "zarc"
23 | path = "src/main.rs"
24 | 
25 | [dependencies]
26 | base64ct = { version = "1.6.0", features = ["std"] }
27 | blake3 = { version = "1.5.0", features = ["rayon"] }
28 | chrono = "0.4.31"
29 | clap = { version = "4.4.11", features = ["derive", "cargo", "wrap_help", "string"] }
30 | deku = { version = "0.16.0", features = ["logging"], git = "https://github.com/sharksforarms/deku" }
31 | miette = { version = "5.10.0", features = ["fancy"] }
32 | minicbor = { version = "0.20.0", features = ["std"] }
33 | ozarc = { version = "0.0.0", path = "../ozarc" }
34 | regex = "1.10.3"
35 | tracing = "0.1.40"
36 | tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] }
37 | umask = "2.1.0"
38 | walkdir = "2.4.0"
39 | zarc = { version = "0.0.0", path = "../zarc" }
40 | zstd-safe = { version = "7.0.0", features = ["experimental"] }
41 | 
42 | [build-dependencies]
43 | bosion = "1.0.2"
44 | embed-resource = "2.4.0"
45 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/build.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 | 	embed_resource::compile("manifest.rc", embed_resource::NONE);
3 | 	bosion::gather();
4 | 
5 | 	if std::env::var("CARGO_FEATURE_EYRA").is_ok() {
6 | 		println!("cargo:rustc-link-arg=-nostartfiles");
7 | 	}
8 | }
9 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/manifest.rc:
--------------------------------------------------------------------------------
1 | #define RT_MANIFEST 24
2 | 1 RT_MANIFEST "zarc.exe.manifest"
3 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/src/args.rs:
--------------------------------------------------------------------------------
 1 | use std::{fmt::Debug, path::PathBuf};
 2 | 
 3 | use clap::{ArgAction, Parser, Subcommand, ValueHint};
 4 | 
 5 | use crate::{
 6 | 	// debug::DebugArgs,
 7 | 	list_files::ListFilesArgs,
 8 | 	pack::PackArgs,
 9 | 	unpack::UnpackArgs,
10 | };
11 | 
12 | /// Zarc: a novel archive format and tool.
13 | ///
14 | /// Zarc is a file archive format that uses both Zstd compression and the Zstd file format. It is
15 | /// designed as a replacement for tar and zip rather than zstd, gzip, bzip2, or xz. This is the
16 | /// reference implementation.
17 | #[derive(Debug, Clone, Parser)]
18 | #[command(
19 | 	name = "zarc",
20 | 	bin_name = "zarc",
21 | 	author,
22 | 	version,
23 | 	infer_subcommands = true,
24 | 	after_help = "Want more detail? Try the long '--help' flag!",
25 | 	after_long_help = "Didn't expect this much output? Use the short '-h' flag to get short help."
26 | )]
27 | #[cfg_attr(debug_assertions, command(before_help = "⚠ DEBUG BUILD ⚠"))]
28 | pub struct Args {
29 | 	/// Set diagnostic log level
30 | 	///
31 | 	/// This enables diagnostic logging, which is useful for investigating bugs or gaining more
32 | 	/// insight into Zarc encoding and decoding. Use multiple times to increase verbosity.
33 | 	///
34 | 	/// Goes up to '-vvvv'. When submitting bug reports, default to a '-vvv' log level.
35 | 	///
36 | 	/// You may want to use with '--log-file' to avoid polluting your terminal.
37 | 	///
38 | 	/// If $RUST_LOG is set, this flag is ignored.
39 | 	#[arg(
40 | 		long,
41 | 		short,
42 | 		action = ArgAction::Count,
43 | 		num_args = 0,
44 | 	)]
45 | 	pub verbose: Option<u8>,
46 | 
47 | 	/// Write diagnostic logs to a file
48 | 	///
49 | 	/// This writes diagnostic logs to a file, instead of the terminal, in JSON format. If a log
50 | 	/// level was not already specified, this will set it to '-vvv'.
51 | 	///
52 | 	/// If a path is not provided, the default is the working directory. Note that with
53 | 	/// '--ignore-nothing', the write events to the log will likely get picked up by Watchexec,
54 | 	/// causing a loop; prefer setting a path outside of the watched directory.
55 | 	///
56 | 	/// If the path provided is a directory, a file will be created in that directory. The file name
57 | 	/// will be the current date and time, in the format 'zarc.YYYY-MM-DDTHH-MM-SSZ.log'.
58 | 	#[arg(
59 | 		long,
60 | 		num_args = 0..=1,
61 | 		default_missing_value = ".",
62 | 		value_hint = ValueHint::AnyPath,
63 | 		value_name = "PATH",
64 | 	)]
65 | 	pub log_file: Option<PathBuf>,
66 | 
67 | 	/// What to do
68 | 	#[command(subcommand)]
69 | 	pub action: Action,
70 | }
71 | 
72 | #[derive(Debug, Clone, Subcommand)]
73 | pub enum Action {
74 | 	/// Pack files into a Zarc archive.
75 | 	Pack(PackArgs),
76 | 
77 | 	/// Unpack files from Zarc archive.
78 | 	Unpack(UnpackArgs),
79 | 
80 | 	/// Walk a Zarc and print filenames.
81 | 	ListFiles(ListFilesArgs),
82 | 	// /// Walk a Zarc and print detailed information about its structure.
83 | 	// Debug(DebugArgs),
84 | }
85 | 
86 | #[test]
87 | fn verify_cli() {
88 | 	use clap::CommandFactory;
89 | 	Args::command().debug_assert()
90 | }
91 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/src/list_files.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | use clap::{Parser, ValueHint};
 4 | use regex::Regex;
 5 | use tracing::info;
 6 | use zarc::{decode::Decoder, directory::SpecialFileKind};
 7 | 
 8 | #[derive(Debug, Clone, Parser)]
 9 | pub struct ListFilesArgs {
10 | 	/// Input file.
11 | 	#[arg(
12 | 		value_hint = ValueHint::AnyPath,
13 | 		value_name = "PATH",
14 | 	)]
15 | 	pub input: PathBuf,
16 | 
17 | 	/// List only files.
18 | 	#[arg(long)]
19 | 	pub only_files: bool,
20 | 
21 | 	/// Indicate filetypes with suffixes.
22 | 	///
23 | 	/// Directories are marked with a '/' suffix, symlinks with `@`, hardlinks with `#`.
24 | 	#[arg(long)]
25 | 	pub decorate: bool,
26 | 
27 | 	/// Filter files by name (with a regex).
28 | 	///
29 | 	/// Can be given multiple times, and files will be matched if they match any of the regexes.
30 | 	#[arg(long, value_name = "REGEX")]
31 | 	pub filter: Vec<Regex>,
32 | }
33 | 
34 | pub(crate) fn list_files(args: ListFilesArgs) -> miette::Result<()> {
35 | 	info!("initialise decoder");
36 | 	let mut zarc = Decoder::open(args.input)?;
37 | 	zarc.read_directory()?;
38 | 	let zarc = zarc;
39 | 
40 | 	info!("list files");
41 | 	for entry in zarc.files() {
42 | 		if args.only_files && entry.special.is_some() {
43 | 			continue;
44 | 		}
45 | 
46 | 		let name = entry.name.to_path().display().to_string();
47 | 		if !args.filter.is_empty() && !args.filter.iter().any(|filter| filter.is_match(&name)) {
48 | 			continue;
49 | 		}
50 | 
51 | 		print!("{name}");
52 | 		match entry.special.as_ref().and_then(|sp| sp.kind) {
53 | 			Some(SpecialFileKind::Directory) => print!("/"),
54 | 			Some(kind) if kind.is_symlink() => print!("@"),
55 | 			Some(kind) if kind.is_hardlink() => print!("#"),
56 | 			_ => (),
57 | 		}
58 | 
59 | 		println!();
60 | 	}
61 | 
62 | 	Ok(())
63 | }
64 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/src/logs.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 | 	env::var,
 3 | 	fs::{metadata, File},
 4 | 	io::{Error, Result},
 5 | 	sync::Mutex,
 6 | };
 7 | 
 8 | use tracing::info;
 9 | 
10 | use crate::args::Args;
11 | 
12 | pub fn from_env() -> Result<bool> {
13 | 	if var("RUST_LOG").is_ok() {
14 | 		tracing_subscriber::fmt::try_init().map_err(Error::other)?;
15 | 		Ok(true)
16 | 	} else {
17 | 		Ok(false)
18 | 	}
19 | }
20 | 
21 | pub fn from_args(args: &Args) -> Result<()> {
22 | 	let verbosity = args.verbose.unwrap_or(0);
23 | 	if verbosity > 0 {
24 | 		let log_file = if let Some(file) = &args.log_file {
25 | 			let is_dir = metadata(file).map_or(false, |info| info.is_dir());
26 | 			let path = if is_dir {
27 | 				let filename = format!(
28 | 					"zarc.{}.log",
29 | 					chrono::Utc::now().format("%Y-%m-%dT%H-%M-%SZ")
30 | 				);
31 | 				file.join(filename)
32 | 			} else {
33 | 				file.to_owned()
34 | 			};
35 | 
36 | 			// TODO: use tracing-appender instead
37 | 			Some(File::create(path)?)
38 | 		} else {
39 | 			None
40 | 		};
41 | 
42 | 		let mut builder = tracing_subscriber::fmt().with_env_filter(match verbosity {
43 | 			0 => unreachable!("checked by if earlier"),
44 | 			1 => "warn",
45 | 			2 => "info",
46 | 			3 => "debug",
47 | 			_ => "trace",
48 | 		});
49 | 
50 | 		if verbosity > 2 {
51 | 			use tracing_subscriber::fmt::format::FmtSpan;
52 | 			builder = builder.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE);
53 | 		}
54 | 
55 | 		match if let Some(writer) = log_file {
56 | 			builder.json().with_writer(Mutex::new(writer)).try_init()
57 | 		} else if verbosity > 3 {
58 | 			builder.pretty().try_init()
59 | 		} else {
60 | 			builder.try_init()
61 | 		} {
62 | 			Ok(_) => info!("logging initialised"),
63 | 			Err(e) => eprintln!("Failed to initialise logging, continuing with none\n{e}"),
64 | 		}
65 | 	}
66 | 
67 | 	Ok(())
68 | }
69 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![warn(clippy::unwrap_used)]
 2 | #![deny(rust_2018_idioms)]
 3 | 
 4 | use clap::Parser;
 5 | use miette::IntoDiagnostic;
 6 | use tracing::{debug, warn};
 7 | 
 8 | use crate::args::Action;
 9 | 
10 | mod args;
11 | // mod debug;
12 | mod list_files;
13 | mod logs;
14 | mod pack;
15 | mod unpack;
16 | 
17 | fn main() -> miette::Result<()> {
18 | 	let logs_on = logs::from_env().into_diagnostic()?;
19 | 
20 | 	debug!("parsing arguments");
21 | 	let args = args::Args::parse();
22 | 
23 | 	if logs_on {
24 | 		warn!("ignoring logging options from args");
25 | 	} else {
26 | 		logs::from_args(&args).into_diagnostic()?;
27 | 	}
28 | 
29 | 	debug!(?args, "got arguments");
30 | 
31 | 	match args.action {
32 | 		// Action::Debug(args) => debug::debug(args).into_diagnostic(),
33 | 		Action::ListFiles(args) => list_files::list_files(args),
34 | 		Action::Pack(args) => pack::pack(args).into_diagnostic(),
35 | 		Action::Unpack(args) => unpack::unpack(args),
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/src/pack.rs:
--------------------------------------------------------------------------------
  1 | use std::{fs::File, path::PathBuf};
  2 | 
  3 | use base64ct::{Base64, Encoding};
  4 | use clap::{Parser, ValueHint};
  5 | use tracing::{debug, info};
  6 | use walkdir::WalkDir;
  7 | use zarc::encode::{Encoder, ZstdParameter, ZstdStrategy};
  8 | 
  9 | #[derive(Debug, Clone, Parser)]
 10 | pub struct PackArgs {
 11 | 	/// Output file.
 12 | 	#[arg(long,
 13 | 		value_hint = ValueHint::AnyPath,
 14 | 		value_name = "PATH",
 15 | 	)]
 16 | 	pub output: PathBuf,
 17 | 
 18 | 	/// Paths to pack.
 19 | 	#[arg(
 20 | 		value_hint = ValueHint::AnyPath,
 21 | 		value_name = "PATH",
 22 | 	)]
 23 | 	pub paths: Vec<PathBuf>,
 24 | 
 25 | 	/// Compression level.
 26 | 	///
 27 | 	/// Can be negative (disables compression), or up to 20 (22 with an ultra strategy).
 28 | 	#[arg(
 29 | 		long,
 30 | 		allow_negative_numbers = true,
 31 | 		value_parser = clap::value_parser!(i32).range((-1<<17)..22),
 32 | 	)]
 33 | 	pub level: Option<i32>,
 34 | 
 35 | 	/// Zstd parameter.
 36 | 	///
 37 | 	/// Some values take a boolean, others take an unsigned integer, and the Strategy parameter
 38 | 	/// takes a string. By default, ChecksumFlag is true, and all others are at zstd default.
 39 | 	///
 40 | 	/// This is an advanced API and not all values will produce valid Zarcs, caution advised.
 41 | 	#[arg(
 42 | 		long,
 43 | 		value_name = "PARAM=VALUE",
 44 | 		value_parser = ParseZstdParam,
 45 | 	)]
 46 | 	pub zstd: Vec<ZstdParameter>,
 47 | 
 48 | 	/// Disable compression completely.
 49 | 	///
 50 | 	/// This will write all file content uncompressed, not even going through zstd at all.
 51 | 	///
 52 | 	/// Use this if you want to compress the entire Zarc externally.
 53 | 	#[arg(long)]
 54 | 	pub store: bool,
 55 | 
 56 | 	/// Follow symlinks.
 57 | 	///
 58 | 	/// This destroys symlinks inside the Zarc: when unpacked, files will be duplicated.
 59 | 	///
 60 | 	/// You may want '--follow-external-symlinks' instead.
 61 | 	#[arg(long, short = 'L')]
 62 | 	pub follow_symlinks: bool,
 63 | 
 64 | 	/// Follow external symlinks.
 65 | 	///
 66 | 	/// By default, zarc stores all symlinks as symlinks. If symlinks point to content external to
 67 | 	/// the Zarc, the symlink when unpacked may point somewhere different or break.
 68 | 	///
 69 | 	/// With this flag, zarc will evaluate symlinks and store them as symlinks if they are relative
 70 | 	/// symlinks that point to other files in the Zarc, but will follow symlinks (and flatten them
 71 | 	/// into stored files) if they are absolute or relative but pointing "outside" of the Zarc.
 72 | 	///
 73 | 	/// See also the variant '--follow-and-store-external-symlinks'.
 74 | 	#[arg(long, hide = true)]
 75 | 	pub follow_external_symlinks: bool,
 76 | 
 77 | 	/// Follow external symlinks, but also store the symlink target.
 78 | 	///
 79 | 	/// Like '--follow-external-symlinks', but stores the symlink's original external target path
 80 | 	/// alongside the stored file content. When unpacking, Zarc can decide to restore external symlinks
 81 | 	/// or to unpack the stored content.
 82 | 	#[arg(long, hide = true)]
 83 | 	pub follow_and_store_external_symlinks: bool,
 84 | }
 85 | 
 86 | #[derive(Clone)]
 87 | struct ParseZstdParam;
 88 | 
 89 | const ZSTD_PARAM_LIST_BOOL: [&str; 4] = [
 90 | 	"EnableLongDistanceMatching",
 91 | 	"ContentSizeFlag",
 92 | 	"ChecksumFlag",
 93 | 	"DictIdFlag",
 94 | ];
 95 | 
 96 | const ZSTD_PARAM_LIST_U32: [&str; 13] = [
 97 | 	"WindowLog",
 98 | 	"HashLog",
 99 | 	"ChainLog",
100 | 	"SearchLog",
101 | 	"MinMatch",
102 | 	"TargetLength",
103 | 	"LdmHashLog",
104 | 	"LdmMinMatch",
105 | 	"LdmBucketSizeLog",
106 | 	"LdmHashRateLog",
107 | 	"NbWorkers",
108 | 	"JobSize",
109 | 	"OverlapSizeLog",
110 | ];
111 | 
112 | const ZSTD_STRATEGY_NAMES: [&str; 9] = [
113 | 	"fast", "dfast", "greedy", "lazy", "lazy2", "btlazy2", "btopt", "btultra", "btultra2",
114 | ];
115 | 
116 | impl clap::builder::TypedValueParser for ParseZstdParam {
117 | 	type Value = ZstdParameter;
118 | 
119 | 	fn parse_ref(
120 | 		&self,
121 | 		cmd: &clap::Command,
122 | 		arg: Option<&clap::Arg>,
123 | 		value: &std::ffi::OsStr,
124 | 	) -> Result<Self::Value, clap::Error> {
125 | 		use clap::{builder::*, error::*};
126 | 		let val = StringValueParser::new().parse_ref(cmd, arg, value)?;
127 | 
128 | 		let (left, right) = val.split_once('=').ok_or_else(|| {
129 | 			let mut err =
130 | 				Error::raw(ErrorKind::ValueValidation, "expected a key=value pair").with_cmd(cmd);
131 | 			if let Some(arg) = arg {
132 | 				err.insert(
133 | 					ContextKind::InvalidArg,
134 | 					ContextValue::String(arg.to_string()),
135 | 				);
136 | 			}
137 | 			err
138 | 		})?;
139 | 
140 | 		match left {
141 | 			"Strategy" => Ok(ZstdParameter::Strategy(match right {
142 | 				"fast" => ZstdStrategy::ZSTD_fast,
143 | 				"dfast" => ZstdStrategy::ZSTD_dfast,
144 | 				"greedy" => ZstdStrategy::ZSTD_greedy,
145 | 				"lazy" => ZstdStrategy::ZSTD_lazy,
146 | 				"lazy2" => ZstdStrategy::ZSTD_lazy2,
147 | 				"btlazy2" => ZstdStrategy::ZSTD_btlazy2,
148 | 				"btopt" => ZstdStrategy::ZSTD_btopt,
149 | 				"btultra" => ZstdStrategy::ZSTD_btultra,
150 | 				"btultra2" => ZstdStrategy::ZSTD_btultra2,
151 | 				_ => {
152 | 					return Err(Error::raw(
153 | 						ErrorKind::ValueValidation,
154 | 						"unknown Strategy value",
155 | 					))
156 | 				}
157 | 			})),
158 | 			flag if ZSTD_PARAM_LIST_BOOL.contains(&flag) => {
159 | 				let val: bool =
160 | 					BoolishValueParser::new().parse_ref(cmd, arg, std::ffi::OsStr::new(right))?;
161 | 				Ok(match flag {
162 | 					"EnableLongDistanceMatching" => ZstdParameter::EnableLongDistanceMatching(val),
163 | 					"ContentSizeFlag" => ZstdParameter::ContentSizeFlag(val),
164 | 					"ChecksumFlag" => ZstdParameter::ChecksumFlag(val),
165 | 					"DictIdFlag" => ZstdParameter::DictIdFlag(val),
166 | 					_ => unreachable!(),
167 | 				})
168 | 			}
169 | 			tune if ZSTD_PARAM_LIST_U32.contains(&tune) => {
170 | 				let val: u64 = RangedU64ValueParser::new()
171 | 					.range(0..(u32::MAX as _))
172 | 					.parse_ref(cmd, arg, std::ffi::OsStr::new(right))?;
173 | 
174 | 				#[allow(clippy::unwrap_used)] // UNWRAP: checked by range
175 | 				let val = u32::try_from(val).unwrap();
176 | 
177 | 				Ok(match tune {
178 | 					"WindowLog" => ZstdParameter::WindowLog(val),
179 | 					"HashLog" => ZstdParameter::HashLog(val),
180 | 					"ChainLog" => ZstdParameter::ChainLog(val),
181 | 					"SearchLog" => ZstdParameter::SearchLog(val),
182 | 					"MinMatch" => ZstdParameter::MinMatch(val),
183 | 					"TargetLength" => ZstdParameter::TargetLength(val),
184 | 					"LdmHashLog" => ZstdParameter::LdmHashLog(val),
185 | 					"LdmMinMatch" => ZstdParameter::LdmMinMatch(val),
186 | 					"LdmBucketSizeLog" => ZstdParameter::LdmBucketSizeLog(val),
187 | 					"LdmHashRateLog" => ZstdParameter::LdmHashRateLog(val),
188 | 					"NbWorkers" => ZstdParameter::NbWorkers(val),
189 | 					"JobSize" => ZstdParameter::JobSize(val),
190 | 					"OverlapSizeLog" => ZstdParameter::OverlapSizeLog(val),
191 | 					_ => unreachable!(),
192 | 				})
193 | 			}
194 | 			_ => Err(Error::raw(ErrorKind::ValueValidation, "unknown parameter")),
195 | 		}
196 | 	}
197 | 
198 | 	fn possible_values(
199 | 		&self,
200 | 	) -> Option<Box<dyn Iterator<Item = clap::builder::PossibleValue> + '_>> {
201 | 		Some(Box::new(
202 | 			ZSTD_PARAM_LIST_BOOL
203 | 				.iter()
204 | 				.map(|name| clap::builder::PossibleValue::new(format!("{name}=true")))
205 | 				.chain(
206 | 					ZSTD_PARAM_LIST_U32
207 | 						.iter()
208 | 						.map(|name| clap::builder::PossibleValue::new(format!("{name}=0"))),
209 | 				)
210 | 				.chain(
211 | 					ZSTD_STRATEGY_NAMES.iter().map(|value| {
212 | 						clap::builder::PossibleValue::new(format!("Strategy={value}"))
213 | 					}),
214 | 				),
215 | 		))
216 | 	}
217 | }
218 | 
219 | pub(crate) fn pack(args: PackArgs) -> std::io::Result<()> {
220 | 	info!(path=?args.output, "create output file");
221 | 	let mut file = File::create(args.output)?;
222 | 
223 | 	info!("initialise encoder");
224 | 	let mut zarc = Encoder::new(&mut file)?;
225 | 
226 | 	debug!("enable zstd checksums");
227 | 	zarc.set_zstd_parameter(ZstdParameter::ChecksumFlag(true))?;
228 | 
229 | 	if let Some(level) = args.level {
230 | 		debug!(%level, "set compression level");
231 | 		zarc.set_zstd_parameter(ZstdParameter::CompressionLevel(level))?;
232 | 	}
233 | 
234 | 	for param in args.zstd {
235 | 		debug!(?param, "set zstd parameter");
236 | 		zarc.set_zstd_parameter(param)?;
237 | 	}
238 | 
239 | 	if args.store {
240 | 		debug!("disable compression for content");
241 | 		zarc.enable_compression(false);
242 | 	}
243 | 
244 | 	for path in &args.paths {
245 | 		info!("walk {path:?}");
246 | 		for entry in WalkDir::new(path).follow_links(args.follow_symlinks) {
247 | 			let entry = match entry {
248 | 				Ok(file) => file,
249 | 				Err(err) => {
250 | 					eprintln!("read error: {err}");
251 | 					continue;
252 | 				}
253 | 			};
254 | 
255 | 			let filename = entry.path();
256 | 			debug!("read {filename:?}");
257 | 
258 | 			let mut file = zarc.build_file_with_metadata(filename, args.follow_symlinks)?;
259 | 			if entry.file_type().is_file() {
260 | 				let content = std::fs::read(filename)?;
261 | 				file.digest(zarc.add_data_frame(&content)?);
262 | 			}
263 | 			zarc.add_file_entry(file)?;
264 | 		}
265 | 	}
266 | 
267 | 	info!("finalising zarc");
268 | 	let digest = zarc.finalise()?;
269 | 
270 | 	println!("digest: {}", Base64::encode_string(&digest));
271 | 	Ok(())
272 | }
273 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/src/unpack.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 | 	fs::{create_dir_all, DirBuilder, File},
  3 | 	io::Write,
  4 | 	path::PathBuf,
  5 | };
  6 | 
  7 | use base64ct::{Base64, Encoding};
  8 | use clap::{Parser, ValueHint};
  9 | use miette::{bail, IntoDiagnostic};
 10 | use regex::Regex;
 11 | use tracing::{error, info, warn};
 12 | use zarc::{
 13 | 	decode::Decoder,
 14 | 	integrity::Digest,
 15 | 	metadata::decode::{set_ownership, set_permissions, set_timestamps},
 16 | };
 17 | 
 18 | #[derive(Debug, Clone, Parser)]
 19 | pub struct UnpackArgs {
 20 | 	/// Input file.
 21 | 	#[arg(
 22 | 		value_hint = ValueHint::AnyPath,
 23 | 		value_name = "PATH",
 24 | 	)]
 25 | 	pub input: PathBuf,
 26 | 
 27 | 	/// Filter files by name (with a regex).
 28 | 	///
 29 | 	/// Can be given multiple times, and files will be matched if they match any of the regexes.
 30 | 	#[arg(long, value_name = "REGEX")]
 31 | 	pub filter: Vec<Regex>,
 32 | 
 33 | 	/// Verify that the Zarc directory matches the given digest.
 34 | 	#[arg(long, value_name = "DIGEST")]
 35 | 	pub verify: Option<String>,
 36 | }
 37 | 
 38 | pub(crate) fn unpack(args: UnpackArgs) -> miette::Result<()> {
 39 | 	info!("initialise decoder");
 40 | 	let mut zarc = Decoder::open(args.input)?;
 41 | 
 42 | 	if let Some(string) = args.verify {
 43 | 		let expected = Digest(Base64::decode_vec(&string).into_diagnostic()?);
 44 | 		if expected != zarc.trailer().digest {
 45 | 			bail!(
 46 | 				"integrity failure: zarc file digest is {}",
 47 | 				Base64::encode_string(&zarc.trailer().digest)
 48 | 			);
 49 | 		}
 50 | 	} else {
 51 | 		eprintln!("digest: {}", Base64::encode_string(&zarc.trailer().digest));
 52 | 	}
 53 | 
 54 | 	zarc.read_directory()?;
 55 | 	let zarc = zarc;
 56 | 
 57 | 	// zarc.frames().for_each(|frame| {
 58 | 	// 	info!(offset=%frame.offset, digest=%Base64::encode_string(frame.digest.as_slice()), "frame");
 59 | 	// });
 60 | 
 61 | 	let mut unpacked = 0_u64;
 62 | 	for entry in zarc.files() {
 63 | 		let name = entry.name.to_path().display().to_string();
 64 | 		if !args.filter.is_empty() && !args.filter.iter().any(|filter| filter.is_match(&name)) {
 65 | 			continue;
 66 | 		}
 67 | 
 68 | 		if entry.is_dir() {
 69 | 			let path = entry.name.to_path();
 70 | 			info!(?path, "unpack dir");
 71 | 			let mut dir = DirBuilder::new();
 72 | 			dir.recursive(true);
 73 | 			#[cfg(unix)]
 74 | 			if let Some(mode) = entry.mode {
 75 | 				use std::os::unix::fs::DirBuilderExt;
 76 | 				dir.mode(mode);
 77 | 			}
 78 | 			dir.create(&path).into_diagnostic()?;
 79 | 
 80 | 			let file = File::open(path).into_diagnostic()?;
 81 | 			set_metadata(entry, &file)?;
 82 | 		} else if entry.is_normal() {
 83 | 			if let Some(digest) = &entry.digest {
 84 | 				extract_file(entry, digest, &zarc)?;
 85 | 				unpacked += 1;
 86 | 			}
 87 | 		}
 88 | 	}
 89 | 
 90 | 	eprintln!("unpacked {unpacked} files");
 91 | 	Ok(())
 92 | }
 93 | 
 94 | fn extract_file(
 95 | 	entry: &zarc::directory::File,
 96 | 	digest: &zarc::integrity::Digest,
 97 | 	zarc: &Decoder<PathBuf>,
 98 | ) -> miette::Result<()> {
 99 | 	info!(path=?entry.name.to_path(), digest=%Base64::encode_string(digest.as_slice()), "unpack file");
100 | 	let path = entry.name.to_path();
101 | 
102 | 	if let Some(dir) = path.parent() {
103 | 		// create parent dir just in case its entry wasn't in the zarc
104 | 		create_dir_all(dir).into_diagnostic()?;
105 | 	}
106 | 
107 | 	let Some(mut frame) = zarc.read_content_frame(digest).into_diagnostic()? else {
108 | 		warn!("frame not found");
109 | 		return Ok(());
110 | 	};
111 | 
112 | 	let mut file = File::create(path).into_diagnostic()?;
113 | 
114 | 	for bytes in &mut frame {
115 | 		file.write_all(&bytes.into_diagnostic()?)
116 | 			.into_diagnostic()?;
117 | 	}
118 | 	if !frame.verify().unwrap_or(false) {
119 | 		error!(path=?entry.name, "frame verification failed!");
120 | 	}
121 | 
122 | 	set_metadata(entry, &file)?;
123 | 	Ok(())
124 | }
125 | 
126 | fn set_metadata(entry: &zarc::directory::File, file: &File) -> miette::Result<()> {
127 | 	set_ownership(file, entry).into_diagnostic()?;
128 | 
129 | 	let mut perms = file.metadata().into_diagnostic()?.permissions();
130 | 	set_permissions(&mut perms, entry).into_diagnostic()?;
131 | 	file.set_permissions(perms).into_diagnostic()?;
132 | 
133 | 	if let Some(ts) = &entry.timestamps {
134 | 		set_timestamps(file, ts).into_diagnostic()?;
135 | 	}
136 | 
137 | 	Ok(())
138 | }
139 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/wix/main.wxs:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding='windows-1252'?>
  2 | <!--
  3 |   Copyright (C) 2017 Christopher R. Field.
  4 | 
  5 |   Licensed under the Apache License, Version 2.0 (the "License");
  6 |   you may not use this file except in compliance with the License.
  7 |   You may obtain a copy of the License at
  8 | 
  9 |   http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |   Unless required by applicable law or agreed to in writing, software
 12 |   distributed under the License is distributed on an "AS IS" BASIS,
 13 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |   See the License for the specific language governing permissions and
 15 |   limitations under the License.
 16 | -->
 17 | 
 18 | <!--
 19 |   The "cargo wix" subcommand provides a variety of predefined variables available
 20 |   for customization of this template. The values for each variable are set at
 21 |   installer creation time. The following variables are available:
 22 | 
 23 |   TargetTriple      = The rustc target triple name.
 24 |   TargetEnv         = The rustc target environment. This is typically either
 25 |                       "msvc" or "gnu" depending on the toolchain downloaded and
 26 |                       installed.
 27 |   TargetVendor      = The rustc target vendor. This is typically "pc", but Rust
 28 |                       does support other vendors, like "uwp".
 29 |   CargoTargetBinDir = The complete path to the directory containing the
 30 |                       binaries (exes) to include. The default would be
 31 |                       "target\release\". If an explicit rustc target triple is
 32 |                       used, i.e. cross-compiling, then the default path would
 33 |                       be "target\<CARGO_TARGET>\<CARGO_PROFILE>",
 34 |                       where "<CARGO_TARGET>" is replaced with the "CargoTarget"
 35 |                       variable value and "<CARGO_PROFILE>" is replaced with the
 36 |                       value from the "CargoProfile" variable. This can also
 37 |                       be overridden manually with the "target-bin-dir" flag.
 38 |   CargoTargetDir    = The path to the directory for the build artifacts, i.e.
 39 |                       "target".
 40 |   CargoProfile      = The cargo profile used to build the binaries
 41 |                       (usually "debug" or "release").
 42 |   Version           = The version for the installer. The default is the
 43 |                       "Major.Minor.Fix" semantic versioning number of the Rust
 44 |                       package.
 45 | -->
 46 | 
 47 | <!--
 48 |   Please do not remove these pre-processor If-Else blocks. These are used with
 49 |   the `cargo wix` subcommand to automatically determine the installation
 50 |   destination for 32-bit versus 64-bit installers. Removal of these lines will
 51 |   cause installation errors.
 52 | -->
 53 | <?if $(sys.BUILDARCH) = x64 or $(sys.BUILDARCH) = arm64 ?>
 54 |     <?define PlatformProgramFilesFolder = "ProgramFiles64Folder" ?>
 55 | <?else ?>
 56 |     <?define PlatformProgramFilesFolder = "ProgramFilesFolder" ?>
 57 | <?endif ?>
 58 | 
 59 | <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
 60 | 
 61 |     <Product
 62 |         Id='*'
 63 |         Name='zarc-cli'
 64 |         UpgradeCode='96E0968E-C5AC-4BFF-81DE-948816F542C8'
 65 |         Manufacturer='Félix Saparelli'
 66 |         Language='1033'
 67 |         Codepage='1252'
 68 |         Version='$(var.Version)'>
 69 | 
 70 |         <Package Id='*'
 71 |             Keywords='Installer'
 72 |             Description='Archive format based on Zstd: CLI tool'
 73 |             Manufacturer='Félix Saparelli'
 74 |             InstallerVersion='450'
 75 |             Languages='1033'
 76 |             Compressed='yes'
 77 |             InstallScope='perMachine'
 78 |             SummaryCodepage='1252'
 79 |             />
 80 | 
 81 |         <MajorUpgrade
 82 |             Schedule='afterInstallInitialize'
 83 |             DowngradeErrorMessage='A newer version of [ProductName] is already installed. Setup will now exit.'/>
 84 | 
 85 |         <Media Id='1' Cabinet='media1.cab' EmbedCab='yes' DiskPrompt='CD-ROM #1'/>
 86 |         <Property Id='DiskPrompt' Value='zarc-cli Installation'/>
 87 | 
 88 |         <Directory Id='TARGETDIR' Name='SourceDir'>
 89 |             <Directory Id='$(var.PlatformProgramFilesFolder)' Name='PFiles'>
 90 |                 <Directory Id='APPLICATIONFOLDER' Name='zarc-cli'>
 91 |                     
 92 |                     <!--
 93 |                       Enabling the license sidecar file in the installer is a four step process:
 94 | 
 95 |                       1. Uncomment the `Component` tag and its contents.
 96 |                       2. Change the value for the `Source` attribute in the `File` tag to a path
 97 |                          to the file that should be included as the license sidecar file. The path
 98 |                          can, and probably should be, relative to this file.
 99 |                       3. Change the value for the `Name` attribute in the `File` tag to the
100 |                          desired name for the file when it is installed alongside the `bin` folder
101 |                          in the installation directory. This can be omitted if the desired name is
102 |                          the same as the file name.
103 |                       4. Uncomment the `ComponentRef` tag with the Id attribute value of "License"
104 |                          further down in this file.
105 |                     -->
106 |                     <!--
107 |                     <Component Id='License' Guid='*'>
108 |                         <File Id='LicenseFile' Name='ChangeMe' DiskId='1' Source='C:\Path\To\File' KeyPath='yes'/>
109 |                     </Component>
110 |                     -->
111 | 
112 |                     <Directory Id='Bin' Name='bin'>
113 |                         <Component Id='Path' Guid='52718143-483B-4009-8A46-4270582DE95E' KeyPath='yes'>
114 |                             <Environment
115 |                                 Id='PATH'
116 |                                 Name='PATH'
117 |                                 Value='[Bin]'
118 |                                 Permanent='no'
119 |                                 Part='last'
120 |                                 Action='set'
121 |                                 System='yes'/>
122 |                         </Component>
123 |                         <Component Id='binary0' Guid='*'>
124 |                             <File
125 |                                 Id='exe0'
126 |                                 Name='zarc.exe'
127 |                                 DiskId='1'
128 |                                 Source='$(var.CargoTargetBinDir)\zarc.exe'
129 |                                 KeyPath='yes'/>
130 |                         </Component>
131 |                     </Directory>
132 |                 </Directory>
133 |             </Directory>
134 |         </Directory>
135 | 
136 |         <Feature
137 |             Id='Binaries'
138 |             Title='Application'
139 |             Description='Installs all binaries and the license.'
140 |             Level='1'
141 |             ConfigurableDirectory='APPLICATIONFOLDER'
142 |             AllowAdvertise='no'
143 |             Display='expand'
144 |             Absent='disallow'>
145 |             
146 |             <!--
147 |               Uncomment the following `ComponentRef` tag to add the license
148 |               sidecar file to the installer.
149 |             -->
150 |             <!--<ComponentRef Id='License'/>-->
151 | 
152 |             <ComponentRef Id='binary0'/>
153 | 
154 |             <Feature
155 |                 Id='Environment'
156 |                 Title='PATH Environment Variable'
157 |                 Description='Add the install location of the [ProductName] executable to the PATH system environment variable. This allows the [ProductName] executable to be called from any location.'
158 |                 Level='1'
159 |                 Absent='allow'>
160 |                 <ComponentRef Id='Path'/>
161 |             </Feature>
162 |         </Feature>
163 | 
164 |         <SetProperty Id='ARPINSTALLLOCATION' Value='[APPLICATIONFOLDER]' After='CostFinalize'/>
165 | 
166 |         
167 |         <!--
168 |           Uncomment the following `Icon` and `Property` tags to change the product icon.
169 | 
170 |           The product icon is the graphic that appears in the Add/Remove
171 |           Programs control panel for the application.
172 |         -->
173 |         <!--<Icon Id='ProductICO' SourceFile='wix\Product.ico'/>-->
174 |         <!--<Property Id='ARPPRODUCTICON' Value='ProductICO' />-->
175 | 
176 |         <Property Id='ARPHELPLINK' Value='https://github.com/passcod/zarc'/>
177 |         
178 |         <UI>
179 |             <UIRef Id='WixUI_FeatureTree'/>
180 |             
181 |             <!--
182 |               Enabling the EULA dialog in the installer is a three step process:
183 | 
184 |                 1. Comment out or remove the two `Publish` tags that follow the
185 |                    `WixVariable` tag.
186 |                 2. Uncomment the `<WixVariable Id='WixUILicenseRtf' Value='Path\to\Eula.rft'>` tag further down
187 |                 3. Replace the `Value` attribute of the `WixVariable` tag with
188 |                    the path to a RTF file that will be used as the EULA and
189 |                    displayed in the license agreement dialog.
190 |             -->
191 |             <Publish Dialog='WelcomeDlg' Control='Next' Event='NewDialog' Value='CustomizeDlg' Order='99'>1</Publish>
192 |             <Publish Dialog='CustomizeDlg' Control='Back' Event='NewDialog' Value='WelcomeDlg' Order='99'>1</Publish>
193 | 
194 |         </UI>
195 | 
196 |         
197 |         <!--
198 |           Enabling the EULA dialog in the installer requires uncommenting
199 |           the following `WixUILicenseRTF` tag and changing the `Value`
200 |           attribute.
201 |         -->
202 |         <!-- <WixVariable Id='WixUILicenseRtf' Value='Relative\Path\to\Eula.rtf'/> -->
203 | 
204 |         
205 |         <!--
206 |           Uncomment the next `WixVariable` tag to customize the installer's
207 |           Graphical User Interface (GUI) and add a custom banner image across
208 |           the top of each screen. See the WiX Toolset documentation for details
209 |           about customization.
210 | 
211 |           The banner BMP dimensions are 493 x 58 pixels.
212 |         -->
213 |         <!--<WixVariable Id='WixUIBannerBmp' Value='wix\Banner.bmp'/>-->
214 | 
215 |         
216 |         <!--
217 |           Uncomment the next `WixVariable` tag to customize the installer's
218 |           Graphical User Interface (GUI) and add a custom image to the first
219 |           dialog, or screen. See the WiX Toolset documentation for details about
220 |           customization.
221 | 
222 |           The dialog BMP dimensions are 493 x 312 pixels.
223 |         -->
224 |         <!--<WixVariable Id='WixUIDialogBmp' Value='wix\Dialog.bmp'/>-->
225 | 
226 |     </Product>
227 | 
228 | </Wix>
229 | 


--------------------------------------------------------------------------------
/crates/zarc-cli/zarc.exe.manifest:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 2 | <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
 3 | 	<assemblyIdentity
 4 | 		type="win32"
 5 | 		name="Zarc.Cli.zarc"
 6 | 		version="0.1.0.0"
 7 | 	/>
 8 | 
 9 | 	<trustInfo>
10 | 		<security>
11 | 			<!--
12 | 			UAC settings:
13 | 			- app should run at same integrity level as calling process
14 | 			- app does not need to manipulate windows belonging to
15 | 			higher-integrity-level processes
16 | 			-->
17 | 			<requestedPrivileges>
18 | 				<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
19 | 			</requestedPrivileges>
20 | 		</security>
21 | 	</trustInfo>
22 | 
23 | 	<compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
24 | 		<application>
25 | 			<!-- Windows 10, 11 -->
26 | 			<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
27 | 			<!-- Windows 8.1 -->
28 | 			<supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
29 | 			<!-- Windows 8 -->
30 | 			<supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
31 | 		</application>
32 | 	</compatibility>
33 | 
34 | 	<application xmlns="urn:schemas-microsoft-com:asm.v3">
35 | 		<windowsSettings xmlns:ws="http://schemas.microsoft.com/SMI/2020/WindowsSettings">
36 | 			<ws:longPathAware xmlns:ws="http://schemas.microsoft.com/SMI/2016/WindowsSettings">true</ws:longPathAware>
37 | 			<ws:activeCodePage xmlns:ws="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</ws:activeCodePage>
38 | 			<ws:heapType xmlns:ws="http://schemas.microsoft.com/SMI/2020/WindowsSettings">SegmentHeap</ws:heapType>
39 | 		</windowsSettings>
40 | 	</application>
41 | </assembly>
42 | 


--------------------------------------------------------------------------------
/crates/zarc/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "zarc"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | authors = ["Félix Saparelli <felix@passcod.name>"]
 7 | license = "Apache-2.0 OR MIT"
 8 | description = "Archive format based on Zstd"
 9 | keywords = ["archive", "file-format", "zstd", "zarc"]
10 | categories = ["encoding", "compression"]
11 | 
12 | documentation = "https://github.com/passcod/zarc"
13 | repository = "https://github.com/passcod/zarc"
14 | 
15 | [dependencies]
16 | blake3 = "1.5.0"
17 | chrono = "0.4.31"
18 | deku = { version = "0.16.0", features = ["logging"], git = "https://github.com/sharksforarms/deku" }
19 | log = "0.4.20"
20 | miette = "5.10.0"
21 | minicbor = { version = "0.20.0", features = ["derive", "std"] }
22 | ozarc = { version = "0.0.0", path = "../ozarc" }
23 | subtle = "2.5.0"
24 | thiserror = "1.0.56"
25 | tracing = "0.1.40"
26 | visibility = "0.1.0"
27 | zstd = "0.13.0"
28 | zstd-safe = { version = "7.0.0", features = ["std"] }
29 | 
30 | [target.'cfg(unix)'.dependencies]
31 | nix = { version = "0.27.1", features = ["user"] }
32 | xattr = { version = "1.2.0", optional = true }
33 | 
34 | [target.'cfg(target_os="linux")'.dependencies]
35 | e2p-fileflags = { version = "0.1.0", optional = true }
36 | 
37 | [target.'cfg(windows)'.dependencies]
38 | windows = { version = "0.52.0", features = ["Win32_Storage_FileSystem"], optional = true }
39 | 
40 | [features]
41 | default = ["metadata"]
42 | 
43 | ## Enable the metadata gathering module, which pulls in platform-specific libraries
44 | metadata = ["dep:e2p-fileflags", "nix/fs", "dep:windows", "dep:xattr"]
45 | 
46 | ## Expose internal encoding/decoding functions
47 | expose-internals = []
48 | 
49 | [package.metadata.docs.rs]
50 | all-features = true
51 | rustdoc-args = ["--cfg", "docsrs"]
52 | 
53 | 


--------------------------------------------------------------------------------
/crates/zarc/src/constants.rs:
--------------------------------------------------------------------------------
1 | /// Magic bytes
2 | pub const ZARC_MAGIC: [u8; 3] = [0x65, 0xAA, 0xDC];
3 | 
4 | /// Zarc format version
5 | pub const ZARC_VERSION: u8 = 1;
6 | 


--------------------------------------------------------------------------------
/crates/zarc/src/decode.rs:
--------------------------------------------------------------------------------
  1 | //! Decoder types and functions.
  2 | 
  3 | use std::{
  4 | 	collections::{BTreeMap, HashMap},
  5 | 	num::NonZeroU16,
  6 | };
  7 | 
  8 | use crate::{
  9 | 	directory::{Edition, File, Frame, Pathname},
 10 | 	integrity::Digest,
 11 | 	ondemand::OnDemand,
 12 | 	trailer::Trailer,
 13 | };
 14 | 
 15 | #[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 16 | #[doc(inline)]
 17 | pub(crate) use self::zstd_iterator::ZstdFrameIterator;
 18 | 
 19 | #[doc(inline)]
 20 | pub use self::frame_iterator::FrameIterator;
 21 | 
 22 | mod directory;
 23 | pub mod error;
 24 | mod frame_iterator;
 25 | mod open;
 26 | mod zstd_iterator;
 27 | 
 28 | /// Decoder context.
 29 | ///
 30 | /// Reader needs to be Seek, as Zarc reads the file backwards from the end to find the trailer and directory.
 31 | #[derive(Debug)]
 32 | pub struct Decoder<R> {
 33 | 	// given by user
 34 | 	reader: R,
 35 | 
 36 | 	// obtained from trailer
 37 | 	file_length: u64,
 38 | 	trailer: Trailer,
 39 | 
 40 | 	// obtained from directory
 41 | 	editions: BTreeMap<NonZeroU16, Edition>,
 42 | 	files: Vec<File>,
 43 | 	frames: HashMap<Digest, Frame>,
 44 | 	files_by_name: BTreeMap<Pathname, Vec<usize>>,
 45 | 	files_by_digest: HashMap<Digest, Vec<usize>>,
 46 | }
 47 | 
 48 | impl<R: OnDemand> Decoder<R> {
 49 | 	/// Length of the file in bytes.
 50 | 	pub fn file_length(&self) -> u64 {
 51 | 		self.file_length
 52 | 	}
 53 | 
 54 | 	/// The trailer metadata.
 55 | 	pub fn trailer(&self) -> &Trailer {
 56 | 		&self.trailer
 57 | 	}
 58 | 
 59 | 	/// Iterate through the editions.
 60 | 	pub fn editions(&self) -> impl Iterator<Item = &Edition> {
 61 | 		self.editions.values()
 62 | 	}
 63 | 
 64 | 	/// Get edition metadata by number.
 65 | 	pub fn edition(&self, number: impl TryInto<NonZeroU16>) -> Option<&Edition> {
 66 | 		number
 67 | 			.try_into()
 68 | 			.ok()
 69 | 			.and_then(|number| self.editions.get(&number))
 70 | 	}
 71 | 
 72 | 	/// Get the latest (current) edition.
 73 | 	pub fn latest_edition(&self) -> Option<&Edition> {
 74 | 		self.editions.values().last()
 75 | 	}
 76 | 
 77 | 	/// Iterate through the files.
 78 | 	pub fn files(&self) -> impl Iterator<Item = &File> {
 79 | 		self.files.iter()
 80 | 	}
 81 | 
 82 | 	/// Get file entries that have a particular (path)name.
 83 | 	pub fn files_by_name(&self, name: impl Into<Pathname>) -> Option<Vec<&File>> {
 84 | 		self.files_by_name
 85 | 			.get(&name.into())
 86 | 			.map(Vec::as_slice)
 87 | 			.map(|v| v.iter().filter_map(|i| self.files.get(*i)).collect())
 88 | 	}
 89 | 
 90 | 	/// Get files that reference a frame from its digest.
 91 | 	pub fn files_by_digest(&self, digest: &Digest) -> Option<Vec<&File>> {
 92 | 		self.files_by_digest
 93 | 			.get(digest)
 94 | 			.map(Vec::as_slice)
 95 | 			.map(|v| v.iter().filter_map(|i| self.files.get(*i)).collect())
 96 | 	}
 97 | 
 98 | 	/// Iterate through the frames.
 99 | 	pub fn frames(&self) -> impl Iterator<Item = &Frame> {
100 | 		self.frames.values()
101 | 	}
102 | 
103 | 	/// Get frame metadata by digest.
104 | 	pub fn frame(&self, digest: &Digest) -> Option<&Frame> {
105 | 		self.frames.get(digest)
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/crates/zarc/src/decode/directory.rs:
--------------------------------------------------------------------------------
  1 | use std::mem::take;
  2 | 
  3 | use blake3::Hasher;
  4 | use deku::DekuContainerRead;
  5 | use ozarc::framing::{ZstandardBlockHeader, ZstandardFrameHeader};
  6 | use tracing::{debug, instrument, trace, warn};
  7 | 
  8 | use crate::{
  9 | 	directory::{Element, ElementFrame},
 10 | 	integrity::Digest,
 11 | 	ondemand::OnDemand,
 12 | };
 13 | 
 14 | use super::{
 15 | 	error::{ErrorKind, Result, SimpleError},
 16 | 	Decoder,
 17 | };
 18 | 
 19 | impl<R: OnDemand> Decoder<R> {
 20 | 	/// Read a Zstandard frame header.
 21 | 	///
 22 | 	/// This reads the frame header, checks that it's a Zstandard frame, and leaves the reader at
 23 | 	/// the start of the first block. The frame header is returned.
 24 | 	///
 25 | 	/// This does not read the frame's payload: you need to do that yourself, reading blocks one at
 26 | 	/// a time until the one marked `last`, and then reading the checksum
 27 | 	/// [if present as per this header](ozarc::framing::ZstandardFrameDescriptor.checksum).
 28 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 29 | 	#[instrument(level = "debug", skip(reader))]
 30 | 	fn read_zstandard_frame_header(reader: &mut R::Reader) -> Result<ZstandardFrameHeader> {
 31 | 		let (bits_read, header) =
 32 | 			ZstandardFrameHeader::from_reader((reader, 0)).map_err(SimpleError::from_deku)?;
 33 | 		debug!(%bits_read, ?header, "read zstandard frame header");
 34 | 		Ok(header)
 35 | 	}
 36 | 
 37 | 	/// Read a Zstandard frame block header.
 38 | 	///
 39 | 	/// This reads the block header, checks that it's a Zstandard block, and leaves the reader at
 40 | 	/// the start of the block's payload. The block header is returned.
 41 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 42 | 	#[instrument(level = "debug", skip(reader))]
 43 | 	fn read_zstandard_block_header(reader: &mut R::Reader) -> Result<ZstandardBlockHeader> {
 44 | 		let (bits_read, header) =
 45 | 			ZstandardBlockHeader::from_reader((reader, 0)).map_err(SimpleError::from_deku)?;
 46 | 		debug!(%bits_read, ?header, "read zstandard block header");
 47 | 		Ok(header)
 48 | 	}
 49 | 
 50 | 	/// Read the Zarc Directory.
 51 | 	///
 52 | 	/// After this returns, the Zarc file is ready for reading, using the files() iterator to sift
 53 | 	/// through the available file records and extract them on demand.
 54 | 	#[instrument(level = "debug", skip(self))]
 55 | 	pub fn read_directory(&mut self) -> Result<()> {
 56 | 		let mut hasher = Hasher::new();
 57 | 		let mut editions = take(&mut self.editions);
 58 | 		let mut frames = take(&mut self.frames);
 59 | 		let mut files = take(&mut self.files);
 60 | 		let mut files_by_name = take(&mut self.files_by_name);
 61 | 		let mut files_by_digest = take(&mut self.files_by_digest);
 62 | 
 63 | 		// start a new decompression session
 64 | 		let frame = self.read_zstandard_frame(self.trailer.directory_offset as _)?;
 65 | 		for data in frame {
 66 | 			let data = data?;
 67 | 			hasher.update(&data);
 68 | 
 69 | 			let mut bytes = &data[..];
 70 | 			loop {
 71 | 				let ((rest, _), element) =
 72 | 					ElementFrame::from_bytes((&bytes, 0)).map_err(SimpleError::from_deku)?;
 73 | 				bytes = rest;
 74 | 
 75 | 				trace!(?element, "read element");
 76 | 				let Some(element) = element.element()? else {
 77 | 					warn!(kind=?element.kind, "unknown element kind");
 78 | 					continue;
 79 | 				};
 80 | 
 81 | 				match element {
 82 | 					Element::Edition(edition) => {
 83 | 						editions.insert(edition.number, *edition);
 84 | 					}
 85 | 					Element::Frame(frame) => {
 86 | 						frames.insert(frame.digest.clone(), *frame);
 87 | 					}
 88 | 					Element::File(file) => {
 89 | 						let name = file.name.clone();
 90 | 						let digest = file.digest.clone();
 91 | 						files.push(*file);
 92 | 						let index = files.len() - 1;
 93 | 						files_by_name.entry(name).or_default().push(index);
 94 | 						if let Some(digest) = digest {
 95 | 							files_by_digest.entry(digest).or_default().push(index);
 96 | 						}
 97 | 					}
 98 | 				}
 99 | 
100 | 				if bytes.is_empty() {
101 | 					trace!("done with this chunk of data");
102 | 					break;
103 | 				}
104 | 			}
105 | 		}
106 | 
107 | 		self.editions = editions;
108 | 		self.frames = frames;
109 | 		self.files = files;
110 | 		self.files_by_name = files_by_name;
111 | 		self.files_by_digest = files_by_digest;
112 | 
113 | 		trace!("finished reading directory, verify digest");
114 | 		if self.trailer.digest != Digest(hasher.finalize().as_bytes().to_vec()) {
115 | 			return Err(ErrorKind::DirectoryIntegrity("digest").into());
116 | 		}
117 | 
118 | 		Ok(())
119 | 	}
120 | }
121 | 


--------------------------------------------------------------------------------
/crates/zarc/src/decode/error.rs:
--------------------------------------------------------------------------------
  1 | //! Error types for [`Decoder`](super::Decoder).
  2 | use std::borrow::Cow;
  3 | 
  4 | use deku::DekuError;
  5 | use miette::{Diagnostic, SourceSpan};
  6 | use thiserror::Error;
  7 | 
  8 | /// Convenience return type.
  9 | pub type Result<T> = std::result::Result<T, Error>;
 10 | 
 11 | /// Combined return error type for [`Decoder`](super::Decoder) methods.
 12 | #[derive(Error, Diagnostic, Debug)]
 13 | pub enum Error {
 14 | 	/// I/O error.
 15 | 	#[error(transparent)]
 16 | 	Io(#[from] std::io::Error),
 17 | 
 18 | 	/// Zstd error.
 19 | 	#[error("zstd decompression error: {0}")]
 20 | 	Zstd(String),
 21 | 
 22 | 	/// CBOR error.
 23 | 	#[error(transparent)]
 24 | 	Cbor(#[from] minicbor::decode::Error),
 25 | 
 26 | 	/// Decoder error that's just a message.
 27 | 	#[error(transparent)]
 28 | 	Simple(#[from] SimpleError),
 29 | 
 30 | 	/// Decoder error that includes source.
 31 | 	#[error(transparent)]
 32 | 	Source(#[from] SourceError),
 33 | }
 34 | 
 35 | pub(crate) fn zstd(code: usize) -> Error {
 36 | 	let msg = zstd_safe::get_error_name(code);
 37 | 	Error::Zstd(msg.into())
 38 | }
 39 | 
 40 | /// Decoder error.
 41 | #[derive(Error, Diagnostic, Debug)]
 42 | #[error("zarc decode: {message}")]
 43 | pub struct SimpleError {
 44 | 	/// Error kind.
 45 | 	pub kind: ErrorKind,
 46 | 
 47 | 	/// Error message.
 48 | 	pub message: Cow<'static, str>,
 49 | }
 50 | 
 51 | /// Decoder error.
 52 | #[derive(Error, Diagnostic, Debug)]
 53 | #[error("zarc decode: {message}")]
 54 | pub struct SourceError {
 55 | 	/// Error kind.
 56 | 	pub kind: ErrorKind,
 57 | 
 58 | 	/// Error message.
 59 | 	pub message: Cow<'static, str>,
 60 | 
 61 | 	/// Error location in zarc file.
 62 | 	#[label("here")]
 63 | 	pub at: SourceSpan,
 64 | 
 65 | 	/// Snippet of zarc file.
 66 | 	#[source_code]
 67 | 	pub snippet: String,
 68 | }
 69 | 
 70 | impl SimpleError {
 71 | 	/// New error without source.
 72 | 	pub fn new(kind: ErrorKind) -> Self {
 73 | 		Self {
 74 | 			kind,
 75 | 			message: kind.default_message(),
 76 | 		}
 77 | 	}
 78 | 
 79 | 	/// New simple error from deku.
 80 | 	pub fn from_deku(orig: DekuError) -> Self {
 81 | 		Self::new(ErrorKind::Parse).with_message(orig.to_string())
 82 | 	}
 83 | 
 84 | 	/// Change the error message.
 85 | 	pub fn with_message(mut self, message: impl Into<Cow<'static, str>>) -> Self {
 86 | 		self.message = message.into();
 87 | 		self
 88 | 	}
 89 | }
 90 | 
 91 | impl SourceError {
 92 | 	/// New error with source snippet.
 93 | 	pub fn new(kind: ErrorKind, snippet: &[u8], at_byte: usize) -> Self {
 94 | 		Self {
 95 | 			kind,
 96 | 			message: kind.default_message(),
 97 | 			snippet: format!("{snippet:02x?}"),
 98 | 			at: SourceSpan::from((
 99 | 				(at_byte * 2) + 1, // to account for [
100 | 				2,                 // always 2 bytes for the hex value
101 | 			)),
102 | 		}
103 | 	}
104 | 
105 | 	/// New error with source snippet, extracted from a larger source.
106 | 	pub fn from_source(kind: ErrorKind, source: &[u8], at_byte: usize, context: usize) -> Self {
107 | 		let start = at_byte.saturating_sub(context);
108 | 		let end = at_byte.saturating_add(context).min(source.len());
109 | 		Self::new(kind, &source[start..end], at_byte.saturating_sub(start))
110 | 	}
111 | 
112 | 	/// New error from deku.
113 | 	pub fn from_deku(orig: DekuError, source: &[u8], at_byte: usize, context: usize) -> Self {
114 | 		Self::from_source(ErrorKind::Parse, source, at_byte, context).with_message(orig.to_string())
115 | 	}
116 | 
117 | 	/// Change the error message.
118 | 	pub fn with_message(mut self, message: impl Into<Cow<'static, str>>) -> Self {
119 | 		self.message = message.into();
120 | 		self
121 | 	}
122 | }
123 | 
124 | /// Decoder error kind.
125 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
126 | pub enum ErrorKind {
127 | 	/// Zstd initialization error.
128 | 	ZstdInit,
129 | 
130 | 	/// Invalid skippable frame magic nibble.
131 | 	InvalidNibble {
132 | 		/// Expected nibble value
133 | 		expected: u8,
134 | 		/// Value actually found
135 | 		actual: u8,
136 | 	},
137 | 
138 | 	/// Unsupported zarc format version.
139 | 	UnsupportedZarcVersion(u8),
140 | 
141 | 	/// When using internal methods manually, you can read sections of a Zarc file out of order,
142 | 	/// before necessary details are available, which will cause this error. The public API
143 | 	/// guarantees this never occurs.
144 | 	ReadOrderViolation(&'static str),
145 | 
146 | 	/// Unintended magic header was malformed.
147 | 	InvalidUnintendedMagic,
148 | 
149 | 	/// The file version number is repeated several times in a Zarc file, and they must all match.
150 | 	MismatchedFileVersion,
151 | 
152 | 	/// The directory's integrity is compromised.
153 | 	DirectoryIntegrity(&'static str),
154 | 
155 | 	/// Parse error.
156 | 	Parse,
157 | }
158 | 
159 | impl ErrorKind {
160 | 	/// Get the default error message for this error kind.
161 | 	pub fn default_message(self) -> Cow<'static, str> {
162 | 		match self {
163 | 			ErrorKind::ZstdInit => Cow::Borrowed("zstd initialization error"),
164 | 			ErrorKind::InvalidNibble { expected, actual } => Cow::Owned(format!(
165 | 				"invalid skippable frame magic nibble: expected 0x{expected:X}, got 0x{actual:X}"
166 | 			)),
167 | 			ErrorKind::UnsupportedZarcVersion(version) => Cow::Owned(format!(
168 | 				"unsupported zarc version {version}, this zarc supports versions {:?}",
169 | 				[crate::constants::ZARC_VERSION]
170 | 			)),
171 | 			ErrorKind::ReadOrderViolation(what) => {
172 | 				Cow::Owned(format!("read order violation: {what}"))
173 | 			}
174 | 			ErrorKind::InvalidUnintendedMagic => Cow::Borrowed("malformed unintended magic header"),
175 | 			ErrorKind::MismatchedFileVersion => Cow::Borrowed("mismatched file version"),
176 | 			ErrorKind::DirectoryIntegrity(what) => {
177 | 				Cow::Owned(format!("directory integrity compromised: {what}"))
178 | 			}
179 | 			ErrorKind::Parse => Cow::Borrowed("parse error"),
180 | 		}
181 | 	}
182 | }
183 | 
184 | impl From<ErrorKind> for SimpleError {
185 | 	fn from(ek: ErrorKind) -> Self {
186 | 		Self::new(ek)
187 | 	}
188 | }
189 | 
190 | impl From<ErrorKind> for Error {
191 | 	fn from(ek: ErrorKind) -> Self {
192 | 		Self::Simple(ek.into())
193 | 	}
194 | }
195 | 


--------------------------------------------------------------------------------
/crates/zarc/src/decode/frame_iterator.rs:
--------------------------------------------------------------------------------
  1 | //! Decoder types and functions.
  2 | 
  3 | use std::io::{Read, Seek};
  4 | 
  5 | use crate::{integrity::Digest, ondemand::OnDemand};
  6 | 
  7 | use super::{error::Result, Decoder, ZstdFrameIterator};
  8 | 
  9 | impl<R: OnDemand> Decoder<R> {
 10 | 	/// Decompress a content frame by digest.
 11 | 	///
 12 | 	/// This returns an iterator of chunks of bytes. Each call to the iterator decompresses some
 13 | 	/// data and returns it, until the frame is exhausted.
 14 | 	pub fn read_content_frame(
 15 | 		&self,
 16 | 		digest: &Digest,
 17 | 	) -> Result<Option<FrameIterator<'_, R::Reader>>> {
 18 | 		let Some(entry) = self.frames.get(digest) else {
 19 | 			return Ok(None);
 20 | 		};
 21 | 
 22 | 		Ok(Some(FrameIterator::new(
 23 | 			self.read_zstandard_frame(entry.offset)?,
 24 | 			digest.clone(),
 25 | 			entry.uncompressed,
 26 | 		)))
 27 | 	}
 28 | }
 29 | 
 30 | /// Iterator over a Zarc content frame's chunks.
 31 | ///
 32 | /// This is returned by [`Decoder::read_content_frame()`][super::Decoder::read_content_frame].
 33 | ///
 34 | /// Each call to the iterator decompresses some data and returns it, until the frame is exhausted.
 35 | /// It also computes the frame's digest as it goes, so you can check it against the one you used to
 36 | /// request the frame.
 37 | #[derive(Debug)]
 38 | pub struct FrameIterator<'zstd, R> {
 39 | 	framer: ZstdFrameIterator<'zstd, R>,
 40 | 	hasher: blake3::Hasher,
 41 | 	digest: Digest,
 42 | 	uncompressed_size: u64,
 43 | 	uncompressed_read: u64,
 44 | }
 45 | 
 46 | impl<'zstd, R> FrameIterator<'zstd, R> {
 47 | 	pub(crate) fn new(
 48 | 		framer: ZstdFrameIterator<'zstd, R>,
 49 | 		digest: Digest,
 50 | 		uncompressed_size: u64,
 51 | 	) -> Self {
 52 | 		Self {
 53 | 			framer,
 54 | 			hasher: blake3::Hasher::new(),
 55 | 			digest,
 56 | 			uncompressed_size,
 57 | 			uncompressed_read: 0,
 58 | 		}
 59 | 	}
 60 | 
 61 | 	/// Return the uncompressed size of the frame.
 62 | 	pub fn uncompressed_size(&self) -> u64 {
 63 | 		self.uncompressed_size
 64 | 	}
 65 | 
 66 | 	/// How many (uncompressed) bytes are left to go.
 67 | 	pub fn bytes_left(&self) -> u64 {
 68 | 		self.uncompressed_size
 69 | 			.saturating_sub(self.uncompressed_read)
 70 | 	}
 71 | 
 72 | 	/// Return the digest of the frame.
 73 | 	///
 74 | 	/// Returns None if the iterator isn't yet done.
 75 | 	pub fn digest(&self) -> Option<Digest> {
 76 | 		if self.framer.is_done() {
 77 | 			Some(Digest(self.hasher.finalize().as_bytes().to_vec()))
 78 | 		} else {
 79 | 			None
 80 | 		}
 81 | 	}
 82 | 
 83 | 	/// Check the digest of the frame.
 84 | 	///
 85 | 	/// Returns None if the iterator isn't yet done.
 86 | 	pub fn verify(&self) -> Option<bool> {
 87 | 		self.digest().map(|d| d == self.digest)
 88 | 	}
 89 | }
 90 | 
 91 | impl<'zstd, R: Read + Seek> Iterator for FrameIterator<'zstd, R> {
 92 | 	type Item = Result<Vec<u8>>;
 93 | 
 94 | 	fn next(&mut self) -> Option<Self::Item> {
 95 | 		let data = self.framer.next()?;
 96 | 
 97 | 		if let Ok(data) = &data {
 98 | 			self.uncompressed_read += data.len() as u64;
 99 | 			self.hasher.update(data);
100 | 		}
101 | 
102 | 		Some(data)
103 | 	}
104 | }
105 | 


--------------------------------------------------------------------------------
/crates/zarc/src/decode/open.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 | 	io::{Cursor, Read, Seek, SeekFrom},
  3 | 	num::NonZeroU8,
  4 | };
  5 | 
  6 | use deku::DekuContainerRead;
  7 | use ozarc::framing::SkippableFrame;
  8 | use tracing::{debug, instrument, trace, warn};
  9 | 
 10 | use crate::{
 11 | 	header::Header,
 12 | 	ondemand::OnDemand,
 13 | 	trailer::{Epilogue, Trailer, EPILOGUE_LENGTH},
 14 | };
 15 | 
 16 | use super::{
 17 | 	error::{ErrorKind, Result, SimpleError},
 18 | 	Decoder,
 19 | };
 20 | 
 21 | impl<R: OnDemand> Decoder<R> {
 22 | 	/// Read a Skippable frame, checking its nibble.
 23 | 	///
 24 | 	/// Reads and returns the entire frame's payload, and thus seeks to the end of the frame.
 25 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 26 | 	#[instrument(level = "debug", skip(reader))]
 27 | 	fn read_skippable_frame(reader: &mut R::Reader, nibble: u8) -> Result<SkippableFrame> {
 28 | 		let (bits_read, frame) =
 29 | 			SkippableFrame::from_reader((reader, 0)).map_err(SimpleError::from_deku)?;
 30 | 		debug!(%bits_read, frame=format!("{frame:02x?}"), nibble=%format!("0x{:X}", frame.nibble()), "read skippable frame");
 31 | 
 32 | 		if frame.nibble() != nibble {
 33 | 			return Err(ErrorKind::InvalidNibble {
 34 | 				expected: nibble,
 35 | 				actual: frame.nibble(),
 36 | 			}
 37 | 			.into());
 38 | 		}
 39 | 
 40 | 		Ok(frame)
 41 | 	}
 42 | 
 43 | 	/// Read a Zarc header.
 44 | 	///
 45 | 	/// Returns the file version in the header.
 46 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 47 | 	#[instrument(level = "debug", skip(ondemand))]
 48 | 	fn read_header(ondemand: &R) -> Result<NonZeroU8> {
 49 | 		let mut reader = ondemand.open()?;
 50 | 		let frame = Self::read_skippable_frame(&mut reader, 0x0)?;
 51 | 
 52 | 		let mut content = Cursor::new(frame.data);
 53 | 		let (bits_read, header) =
 54 | 			Header::from_reader((&mut content, 0)).map_err(SimpleError::from_deku)?;
 55 | 		debug!(%bits_read, header=format!("{header:02x?}"), "read zarc header");
 56 | 
 57 | 		debug_assert_ne!(crate::constants::ZARC_VERSION, 0);
 58 | 		debug_assert_ne!(header.version, 0);
 59 | 		if header.version != crate::constants::ZARC_VERSION {
 60 | 			return Err(ErrorKind::UnsupportedZarcVersion(header.version).into());
 61 | 		}
 62 | 
 63 | 		Ok(unsafe {
 64 | 			// SAFETY: the version is valid and zarc versions start at 1
 65 | 			NonZeroU8::new_unchecked(header.version)
 66 | 		})
 67 | 	}
 68 | 
 69 | 	/// Read the Zarc Trailer.
 70 | 	///
 71 | 	/// This opens a new reader, seeks to the end, and reads the [trailer][crate::trailer].
 72 | 	///
 73 | 	/// Returns the trailer and the length of the file.
 74 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 75 | 	#[instrument(level = "debug", skip(ondemand))]
 76 | 	fn read_trailer(ondemand: &R) -> Result<(Trailer, u64)> {
 77 | 		let mut reader = ondemand.open()?;
 78 | 
 79 | 		// seek to the end to figure out how long this file is
 80 | 		reader.seek(SeekFrom::End(0))?;
 81 | 		let file_length = reader.stream_position()?;
 82 | 		let ending_length = file_length.min(1024);
 83 | 		trace!(%file_length, reading_bytes=%ending_length, "reading end of file");
 84 | 
 85 | 		// read up to 1KB from the end of the file
 86 | 		reader.seek(SeekFrom::End(-(ending_length as i64)))?;
 87 | 		let mut ending = Vec::with_capacity(ending_length as _);
 88 | 		let bytes = reader.read_to_end(&mut ending)?;
 89 | 		trace!(%bytes, data=%format!("{bytes:02x?}"), "read end of file");
 90 | 		debug_assert_eq!(bytes, ending_length as _);
 91 | 
 92 | 		// read the epilogue out of the end of the ending
 93 | 		let ((rest, remaining_bits), epilogue) =
 94 | 			Epilogue::from_bytes((&ending[(bytes - EPILOGUE_LENGTH)..], 0))
 95 | 				.map_err(SimpleError::from_deku)?;
 96 | 		debug!(?epilogue, "read zarc trailer epilogue");
 97 | 
 98 | 		if remaining_bits > 0 {
 99 | 			trace!(%remaining_bits, ?rest, "some data remaining");
100 | 			return Err(SimpleError::new(ErrorKind::Parse)
101 | 				.with_message(format!(
102 | 					"parse error: too much data ({remaining_bits} bits) {rest:02x?}"
103 | 				))
104 | 				.into());
105 | 		}
106 | 
107 | 		// check we have enough data
108 | 		let trailer_length = epilogue.full_length();
109 | 		if bytes < trailer_length {
110 | 			todo!("read more bytes");
111 | 		}
112 | 
113 | 		// complete reading the trailer
114 | 		// UNWRAP: we know we have enough data, we just checked
115 | 		let mut trailer = epilogue.complete(&ending).expect("not enough data");
116 | 		debug!(bytes=%trailer.len(), trailer=format!("{trailer:02x?}"), "read zarc trailer");
117 | 
118 | 		// compare the check byte
119 | 		let check_byte = trailer.compute_check();
120 | 		if check_byte != epilogue.check {
121 | 			return Err(SimpleError::new(ErrorKind::Parse)
122 | 				.with_message(format!(
123 | 				"parse error: trailer check byte doesn't match (expected 0x{:02X}, got 0x{check_byte:02X})",
124 | 				epilogue.check
125 | 			))
126 | 				.into());
127 | 		}
128 | 
129 | 		trailer.make_offset_positive(file_length);
130 | 		debug!(offset=%trailer.directory_offset, "reified directory offset");
131 | 
132 | 		Ok((trailer, file_length))
133 | 	}
134 | 
135 | 	/// Open a Zarc for reading.
136 | 	///
137 | 	/// This checks the [header][crate::header], reads the [trailer][crate::trailer], and verifies
138 | 	/// the integrity of the trailer.
139 | 	///
140 | 	/// You'll then need to read the directory and extract some files!
141 | 	pub fn open(reader: R) -> Result<Self> {
142 | 		let version = Self::read_header(&reader)?;
143 | 		let (trailer, file_length) = Self::read_trailer(&reader)?;
144 | 		if version.get() != trailer.version {
145 | 			warn!(header=%version, trailer=%trailer.version, "zarc version mismatch in header and trailer");
146 | 		}
147 | 
148 | 		Ok(Self {
149 | 			reader,
150 | 			file_length,
151 | 			trailer,
152 | 			editions: Default::default(),
153 | 			files: Default::default(),
154 | 			frames: Default::default(),
155 | 			files_by_name: Default::default(),
156 | 			files_by_digest: Default::default(),
157 | 		})
158 | 	}
159 | }
160 | 


--------------------------------------------------------------------------------
/crates/zarc/src/decode/zstd_iterator.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 | 	fmt,
  3 | 	io::{Read, Seek, SeekFrom},
  4 | };
  5 | 
  6 | use tracing::{debug, instrument, trace};
  7 | use zstd_safe::{DCtx, InBuffer, OutBuffer};
  8 | 
  9 | use crate::ondemand::OnDemand;
 10 | 
 11 | use super::{
 12 | 	error::{self, ErrorKind, Result},
 13 | 	Decoder,
 14 | };
 15 | 
 16 | impl<R: OnDemand> Decoder<R> {
 17 | 	/// Read a Zstandard frame, decompressing it on demand.
 18 | 	///
 19 | 	/// This opens a new reader, seeks to the position given, and returns an iterator of chunks of
 20 | 	/// bytes. Each call to the iterator decompresses some data and returns it, until the frame is
 21 | 	/// exhausted.
 22 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 23 | 	#[instrument(level = "debug", skip(self))]
 24 | 	pub(crate) fn read_zstandard_frame(
 25 | 		&self,
 26 | 		offset: u64,
 27 | 	) -> Result<ZstdFrameIterator<'_, R::Reader>> {
 28 | 		let mut reader = self.reader.open()?;
 29 | 		let zstd = DCtx::try_create().ok_or(ErrorKind::ZstdInit)?;
 30 | 		// TODO method to create zstd context with the parameters saved against Decoder
 31 | 
 32 | 		debug!(%offset, "seek to frame");
 33 | 		reader.seek(SeekFrom::Start(offset))?;
 34 | 
 35 | 		Ok(ZstdFrameIterator::new(reader, zstd, offset))
 36 | 	}
 37 | }
 38 | 
 39 | /// Iterator over a zstandard frame's chunks.
 40 | ///
 41 | /// This is returned by [`Decoder::read_zstandard_frame()`][super::Decoder::read_zstandard_frame].
 42 | ///
 43 | /// Each call to the iterator decompresses some data and returns it, until the frame is exhausted.
 44 | /// It also computes the frame's digest as it goes, so you can check it against the one you used to
 45 | /// request the frame.
 46 | pub struct ZstdFrameIterator<'zstd, R> {
 47 | 	reader: R,
 48 | 	zstd: DCtx<'zstd>,
 49 | 	start_offset: u64,
 50 | 	done: bool,
 51 | }
 52 | 
 53 | impl<R: fmt::Debug> fmt::Debug for ZstdFrameIterator<'_, R> {
 54 | 	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 55 | 		f.debug_struct("ZstdFrameIterator")
 56 | 			.field("reader", &self.reader)
 57 | 			.field("zstd", &"zstd-safe decompression context")
 58 | 			.field("start_offset", &self.start_offset)
 59 | 			.field("done", &self.done)
 60 | 			.finish()
 61 | 	}
 62 | }
 63 | 
 64 | impl<'zstd, R> ZstdFrameIterator<'zstd, R> {
 65 | 	/// Return `true` if the iterator is done, without advancing it.
 66 | 	pub fn is_done(&self) -> bool {
 67 | 		self.done
 68 | 	}
 69 | }
 70 | 
 71 | impl<'zstd, R: Read + Seek> ZstdFrameIterator<'zstd, R> {
 72 | 	pub(crate) fn new(reader: R, zstd: DCtx<'zstd>, start_offset: u64) -> Self {
 73 | 		Self {
 74 | 			reader,
 75 | 			zstd,
 76 | 			start_offset,
 77 | 			done: false,
 78 | 		}
 79 | 	}
 80 | 
 81 | 	/// Perform one step of a stream decompression.
 82 | 	///
 83 | 	/// This cursor is left at wherever the decompression stopped, which may be in the middle of a
 84 | 	/// block or frame; the next call to this method will continue from there.
 85 | 	///
 86 | 	/// Returns the data that was decompressed and a boolean to indicate if the frame is done.
 87 | 	#[instrument(level = "trace", skip(self))]
 88 | 	fn decompress_step(&mut self) -> Result<(Vec<u8>, bool)> {
 89 | 		let input_size = DCtx::in_size().max(1024);
 90 | 		let mut input_buf = vec![0; input_size];
 91 | 		let bytes = self.reader.read(&mut input_buf)?;
 92 | 		trace!(desired=%input_size, obtained=%bytes, "read from reader to give to zstd");
 93 | 		let mut input = InBuffer {
 94 | 			src: &input_buf[..bytes],
 95 | 			pos: 0,
 96 | 		};
 97 | 
 98 | 		let output_size = DCtx::out_size().max(1024);
 99 | 		let mut output_buf: Vec<u8> = Vec::with_capacity(output_size);
100 | 		trace!(bytes=%output_size, "allocated zstd output buffer");
101 | 		let mut output = OutBuffer::around(&mut output_buf);
102 | 
103 | 		trace!("decompressing");
104 | 		let mut input_hint = self
105 | 			.zstd
106 | 			.decompress_stream(&mut output, &mut input)
107 | 			.map_err(error::zstd)?;
108 | 		trace!(
109 | 			%input_hint,
110 | 			frame_done=%input_hint == 0,
111 | 			input_pos=%input.pos,
112 | 			input_size=%input.src.len(),
113 | 			output_pos=%output.pos(),
114 | 			output_size=%output.capacity(),
115 | 			"decompressed"
116 | 		);
117 | 
118 | 		while output.pos() == output.capacity() {
119 | 			trace!("zstd wants more output space");
120 | 			let new_output_size = DCtx::out_size().max(1024);
121 | 			output_buf.reserve(output_size + new_output_size);
122 | 			trace!(total=%output_buf.capacity(), "allocated larger zstd output buffer");
123 | 			output = OutBuffer::around(&mut output_buf);
124 | 
125 | 			trace!("decompressing again without changing input");
126 | 			input_hint = self
127 | 				.zstd
128 | 				.decompress_stream(&mut output, &mut input)
129 | 				.map_err(error::zstd)?;
130 | 			trace!(
131 | 				%input_hint,
132 | 				frame_done=%input_hint == 0,
133 | 				input_pos=%input.pos,
134 | 				input_size=%input.src.len(),
135 | 				output_pos=%output.pos(),
136 | 				output_size=%output.capacity(),
137 | 				"decompressed"
138 | 			);
139 | 		}
140 | 
141 | 		let output_written = output.as_slice().len();
142 | 		trace!(bytes = output_written, "zstd has finished with the input");
143 | 
144 | 		#[allow(clippy::drop_non_drop)]
145 | 		drop(output); // to release the mutable borrow on output_buf
146 | 
147 | 		if output_written != output_buf.len() {
148 | 			trace!("shrink output buffer to actual written size");
149 | 			output_buf.truncate(output_written);
150 | 		}
151 | 
152 | 		Ok((output_buf, input_hint == 0))
153 | 	}
154 | }
155 | 
156 | impl<'zstd, R: Read + Seek> Iterator for ZstdFrameIterator<'zstd, R> {
157 | 	type Item = Result<Vec<u8>>;
158 | 
159 | 	fn next(&mut self) -> Option<Self::Item> {
160 | 		if self.done {
161 | 			return None;
162 | 		}
163 | 
164 | 		let (data, done) = match self.decompress_step() {
165 | 			Ok(ok) => ok,
166 | 			Err(err) => return Some(Err(err)),
167 | 		};
168 | 
169 | 		if done {
170 | 			self.done = true;
171 | 		}
172 | 
173 | 		Some(Ok(data))
174 | 	}
175 | }
176 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory.rs:
--------------------------------------------------------------------------------
 1 | //! Common types defining the binary format structures.
 2 | 
 3 | #[doc(inline)]
 4 | pub use self::edition::*;
 5 | #[doc(inline)]
 6 | pub use self::elements::*;
 7 | #[doc(inline)]
 8 | pub use self::file::*;
 9 | #[doc(inline)]
10 | pub use self::frame::*;
11 | #[doc(inline)]
12 | pub use self::posix_owner::*;
13 | #[doc(inline)]
14 | pub use self::specials::*;
15 | #[doc(inline)]
16 | pub use self::strings::*;
17 | #[doc(inline)]
18 | pub use self::timestamps::*;
19 | 
20 | mod edition;
21 | mod elements;
22 | mod file;
23 | mod frame;
24 | mod posix_owner;
25 | mod specials;
26 | mod strings;
27 | mod timestamps;
28 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/edition.rs:
--------------------------------------------------------------------------------
 1 | use std::{collections::HashMap, num::NonZeroU16};
 2 | 
 3 | use minicbor::{Decode, Encode};
 4 | 
 5 | use super::{strings::AttributeValue, timestamps::Timestamp};
 6 | use crate::integrity::DigestType;
 7 | 
 8 | /// Metadata about a (previous) version of the Zarc Directory
 9 | ///
10 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#kind-1-editions)
11 | #[derive(Clone, Debug, PartialEq, Encode, Decode)]
12 | #[cbor(map)]
13 | pub struct Edition {
14 | 	/// Edition number.
15 | 	///
16 | 	/// Used for referencing it in frames and files.
17 | 	#[n(0)]
18 | 	pub number: NonZeroU16,
19 | 
20 | 	/// Version creation date.
21 | 	#[n(1)]
22 | 	pub written_at: Timestamp,
23 | 
24 | 	/// Digest algorithm used by this edition.
25 | 	#[n(2)]
26 | 	pub digest_type: DigestType,
27 | 
28 | 	/// User Metadata of that version.
29 | 	///
30 | 	/// You can write a Some(empty HashMap), but you'll save two bytes if you write a None instead.
31 | 	/// This is pretty cheap here, but adds up for the similar fields in [`files`](FilemapEntry).
32 | 	#[n(10)]
33 | 	pub user_metadata: Option<HashMap<String, AttributeValue>>,
34 | }
35 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/elements.rs:
--------------------------------------------------------------------------------
  1 | use std::num::TryFromIntError;
  2 | 
  3 | use deku::prelude::*;
  4 | 
  5 | use super::{edition::Edition, file::File, frame::Frame};
  6 | 
  7 | /// Zarc Directory Element framing
  8 | ///
  9 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#zarc-directory)
 10 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
 11 | #[deku(endian = "little")]
 12 | pub struct ElementFrame {
 13 | 	/// Element kind.
 14 | 	pub kind: ElementKind,
 15 | 
 16 | 	/// Length of CBOR data.
 17 | 	#[deku(bytes = "2", update = "self.payload.len()", pad_bytes_after = "1")]
 18 | 	pub length: u16,
 19 | 
 20 | 	/// CBOR data.
 21 | 	///
 22 | 	/// This is at most 65536 bytes.
 23 | 	#[deku(count = "length")]
 24 | 	pub payload: Vec<u8>,
 25 | }
 26 | 
 27 | impl ElementFrame {
 28 | 	/// Encode an [Element] into a CBOR payload.
 29 | 	///
 30 | 	/// CBOR encoding is infallible; this returns `Err` if the element is too large to fit (>64K).
 31 | 	pub fn create(element: &Element) -> Result<Self, TryFromIntError> {
 32 | 		let payload = element.to_vec();
 33 | 		u16::try_from(payload.len()).map(|length| Self {
 34 | 			kind: element.kind(),
 35 | 			length,
 36 | 			payload,
 37 | 		})
 38 | 	}
 39 | 
 40 | 	/// Decode the CBOR payload into its [Element].
 41 | 	///
 42 | 	/// Returns `Ok(None)` if the element kind is unknown.
 43 | 	pub fn element(&self) -> Result<Option<Element>, minicbor::decode::Error> {
 44 | 		match self.kind {
 45 | 			ElementKind::Edition => {
 46 | 				minicbor::decode(&self.payload).map(|e| Some(Element::Edition(e)))
 47 | 			}
 48 | 			ElementKind::File => minicbor::decode(&self.payload).map(|e| Some(Element::File(e))),
 49 | 			ElementKind::Frame => minicbor::decode(&self.payload).map(|e| Some(Element::Frame(e))),
 50 | 			ElementKind::Unknown(_) => Ok(None),
 51 | 		}
 52 | 	}
 53 | }
 54 | 
 55 | /// Kind of an element (including unknown variant).
 56 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, DekuRead, DekuWrite)]
 57 | #[deku(endian = "endian", type = "u8", ctx = "endian: deku::ctx::Endian")]
 58 | pub enum ElementKind {
 59 | 	/// [Edition]
 60 | 	#[deku(id = "1")]
 61 | 	Edition,
 62 | 
 63 | 	/// [File]
 64 | 	#[deku(id = "2")]
 65 | 	File,
 66 | 
 67 | 	/// [Frame]
 68 | 	#[deku(id = "3")]
 69 | 	Frame,
 70 | 
 71 | 	/// Unknown element kind.
 72 | 	#[deku(id_pat = "_")]
 73 | 	Unknown(u8),
 74 | }
 75 | 
 76 | /// Elements supported by Zarc.
 77 | #[derive(Clone, Debug, PartialEq)]
 78 | pub enum Element {
 79 | 	/// [Edition]
 80 | 	Edition(Box<Edition>),
 81 | 	/// [File]
 82 | 	File(Box<File>),
 83 | 	/// [Frame]
 84 | 	Frame(Box<Frame>),
 85 | }
 86 | 
 87 | impl Element {
 88 | 	/// Get the [ElementKind] of this element.
 89 | 	pub fn kind(&self) -> ElementKind {
 90 | 		match self {
 91 | 			Element::Edition(_) => ElementKind::Edition,
 92 | 			Element::File(_) => ElementKind::File,
 93 | 			Element::Frame(_) => ElementKind::Frame,
 94 | 		}
 95 | 	}
 96 | 
 97 | 	/// Write the [Element] into a CBOR payload.
 98 | 	pub fn to_vec(&self) -> Vec<u8> {
 99 | 		#[allow(clippy::unwrap_used)] // UNWRAP: minicbor encoding is infallible
100 | 		match self {
101 | 			Element::Edition(edition) => minicbor::to_vec(edition),
102 | 			Element::File(file) => minicbor::to_vec(file),
103 | 			Element::Frame(frame) => minicbor::to_vec(frame),
104 | 		}
105 | 		.unwrap()
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/file.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, num::NonZeroU16};
  2 | 
  3 | use minicbor::{Decode, Encode};
  4 | 
  5 | use super::{
  6 | 	posix_owner::PosixOwner,
  7 | 	specials::SpecialFile,
  8 | 	strings::{AttributeValue, Pathname},
  9 | 	timestamps::Timestamps,
 10 | };
 11 | use crate::integrity::Digest;
 12 | 
 13 | /// Zarc Directory File Entry
 14 | ///
 15 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#kind-2-files)
 16 | #[derive(Clone, Debug, PartialEq, Encode, Decode)]
 17 | #[cbor(map)]
 18 | pub struct File {
 19 | 	/// Edition that added this entry.
 20 | 	#[n(0)]
 21 | 	pub edition: NonZeroU16,
 22 | 
 23 | 	/// Pathname.
 24 | 	#[n(1)]
 25 | 	pub name: Pathname,
 26 | 
 27 | 	/// Hash of a frame of content.
 28 | 	#[n(2)]
 29 | 	pub digest: Option<Digest>,
 30 | 
 31 | 	/// POSIX mode.
 32 | 	#[n(3)]
 33 | 	pub mode: Option<u32>,
 34 | 
 35 | 	/// POSIX user.
 36 | 	#[n(4)]
 37 | 	pub user: Option<PosixOwner>,
 38 | 
 39 | 	/// POSIX group.
 40 | 	#[n(5)]
 41 | 	pub group: Option<PosixOwner>,
 42 | 
 43 | 	/// Timestamps.
 44 | 	#[n(6)]
 45 | 	pub timestamps: Option<Timestamps>,
 46 | 
 47 | 	/// Special files.
 48 | 	#[n(7)]
 49 | 	pub special: Option<SpecialFile>,
 50 | 
 51 | 	/// User metadata.
 52 | 	#[n(10)]
 53 | 	pub user_metadata: Option<HashMap<String, AttributeValue>>,
 54 | 
 55 | 	/// File attributes.
 56 | 	#[n(11)]
 57 | 	pub attributes: Option<HashMap<String, AttributeValue>>,
 58 | 
 59 | 	/// Extended attributes.
 60 | 	#[n(12)]
 61 | 	pub extended_attributes: Option<HashMap<String, AttributeValue>>,
 62 | }
 63 | 
 64 | impl File {
 65 | 	/// Returns `true` if this is _not_ a special file _and_ it has a frame.
 66 | 	pub fn is_normal(&self) -> bool {
 67 | 		self.digest.is_some() && self.special.is_none()
 68 | 	}
 69 | 
 70 | 	/// Returns `true` if this is a directory.
 71 | 	///
 72 | 	/// See also [`SpecialFile::is_dir`].
 73 | 	pub fn is_dir(&self) -> bool {
 74 | 		self.special.as_ref().map_or(false, SpecialFile::is_dir)
 75 | 	}
 76 | 
 77 | 	/// Returns `true` if this is a link.
 78 | 	///
 79 | 	/// See also [`SpecialFile::is_link`].
 80 | 	pub fn is_link(&self) -> bool {
 81 | 		self.special.as_ref().map_or(false, SpecialFile::is_link)
 82 | 	}
 83 | 
 84 | 	/// Returns `true` if this is a symlink.
 85 | 	///
 86 | 	/// See also [`SpecialFile::is_symlink`].
 87 | 	pub fn is_symlink(&self) -> bool {
 88 | 		self.special.as_ref().map_or(false, SpecialFile::is_symlink)
 89 | 	}
 90 | 
 91 | 	/// Returns `true` if this is a hardlink.
 92 | 	///
 93 | 	/// See also [`SpecialFile::is_hardlink`].
 94 | 	pub fn is_hardlink(&self) -> bool {
 95 | 		self.special
 96 | 			.as_ref()
 97 | 			.map_or(false, SpecialFile::is_hardlink)
 98 | 	}
 99 | }
100 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/frame.rs:
--------------------------------------------------------------------------------
 1 | use std::num::NonZeroU16;
 2 | 
 3 | use minicbor::{Decode, Encode};
 4 | 
 5 | use crate::integrity::Digest;
 6 | 
 7 | /// Zarc Directory Frame Entry
 8 | ///
 9 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#kind-3-frames)
10 | #[derive(Clone, Debug, PartialEq, Encode, Decode)]
11 | #[cbor(map)]
12 | pub struct Frame {
13 | 	/// Edition which added this frame.
14 | 	#[n(0)]
15 | 	pub edition: NonZeroU16,
16 | 
17 | 	/// Frame offset.
18 | 	#[n(1)]
19 | 	pub offset: u64,
20 | 
21 | 	/// Hash of the frame.
22 | 	#[n(2)]
23 | 	pub digest: Digest,
24 | 
25 | 	/// Entire frame length in bytes.
26 | 	#[n(3)]
27 | 	pub length: u64,
28 | 
29 | 	/// Uncompressed content size in bytes.
30 | 	#[n(4)]
31 | 	pub uncompressed: u64,
32 | }
33 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/posix_owner.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(unix)]
  2 | use std::sync::Mutex;
  3 | 
  4 | #[cfg(unix)]
  5 | use crate::owner_cache::OwnerCache;
  6 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder};
  7 | #[cfg(unix)]
  8 | use nix::unistd::{Gid, Group, Uid, User};
  9 | 
 10 | #[cfg(unix)]
 11 | thread_local! {
 12 | 	static OWNER_CACHE: Mutex<OwnerCache> = Mutex::new(OwnerCache::default());
 13 | }
 14 | 
 15 | /// POSIX owner information (user or group).
 16 | #[derive(Clone, Debug, Default, PartialEq)]
 17 | pub struct PosixOwner {
 18 | 	/// Owner numeric ID.
 19 | 	pub id: Option<u64>,
 20 | 
 21 | 	/// Owner name.
 22 | 	pub name: Option<String>,
 23 | }
 24 | 
 25 | impl PosixOwner {
 26 | 	/// Create from a user ID.
 27 | 	///
 28 | 	/// On non-Unix, this always succeeds and returns a `PosixOwner` with the ID set only.
 29 | 	///
 30 | 	/// On Unix, this resolves the user from the system and returns a `PosixOwner` with both the ID
 31 | 	/// ID and the username set, iff the user exists.
 32 | 	pub fn from_uid(uid: u32) -> std::io::Result<Option<Self>> {
 33 | 		#[cfg(unix)]
 34 | 		{
 35 | 			OWNER_CACHE
 36 | 				.with(|oc| {
 37 | 					oc.lock()
 38 | 						.expect("owner cache poisoned")
 39 | 						.user_from_uid(Uid::from_raw(uid))
 40 | 				})
 41 | 				.map(|u| u.map(Into::into))
 42 | 		}
 43 | 
 44 | 		#[cfg(not(unix))]
 45 | 		{
 46 | 			Ok(Some(Self {
 47 | 				id: Some(uid as _),
 48 | 				name: None,
 49 | 			}))
 50 | 		}
 51 | 	}
 52 | 
 53 | 	/// Create from a group ID.
 54 | 	///
 55 | 	/// On non-Unix, this always succeeds and returns a `PosixOwner` with the ID set only.
 56 | 	///
 57 | 	/// On Unix, this resolves the group from the system and returns a `PosixOwner` with both the ID
 58 | 	/// and the group name set, iff the group exists.
 59 | 	pub fn from_gid(gid: u32) -> std::io::Result<Option<Self>> {
 60 | 		#[cfg(unix)]
 61 | 		{
 62 | 			OWNER_CACHE
 63 | 				.with(|oc| {
 64 | 					oc.lock()
 65 | 						.expect("owner cache poisoned")
 66 | 						.group_from_gid(Gid::from_raw(gid))
 67 | 				})
 68 | 				.map(|u| u.map(Into::into))
 69 | 		}
 70 | 
 71 | 		#[cfg(not(unix))]
 72 | 		{
 73 | 			Ok(Some(Self {
 74 | 				id: Some(gid as _),
 75 | 				name: None,
 76 | 			}))
 77 | 		}
 78 | 	}
 79 | 
 80 | 	/// Convert to a user ID valid on the current system.
 81 | 	///
 82 | 	/// - If only `id` is present, this checks and returns it.
 83 | 	/// - If only `name` is present, this resolves the user from the system and returns its ID if it exists.
 84 | 	/// - If both are present, and:
 85 | 	///   - `id` matches the resolved ID from the name, this returns `id`.
 86 | 	///   - `id` does not match the resolved ID from the name, this returns the ID of the resolved user.
 87 | 	///   - `name` does not resolve to a user on the system, this returns `id`.
 88 | 	///
 89 | 	/// Additionally if the `id` is larger than a u32, this returns an error.
 90 | 	#[cfg(unix)]
 91 | 	pub fn to_real_uid(&self) -> std::io::Result<Option<Uid>> {
 92 | 		match self {
 93 | 			Self {
 94 | 				id: None,
 95 | 				name: None,
 96 | 			} => Ok(None),
 97 | 
 98 | 			Self {
 99 | 				id: Some(id),
100 | 				name: None,
101 | 			} => u32::try_from(*id)
102 | 				.map_err(std::io::Error::other)
103 | 				.and_then(|uid| {
104 | 					OWNER_CACHE.with(|oc| {
105 | 						oc.lock()
106 | 							.expect("owner cache poisoned")
107 | 							.user_from_uid(Uid::from_raw(uid))
108 | 					})
109 | 				})
110 | 				.map(|u| u.map(|u| u.uid)),
111 | 
112 | 			Self {
113 | 				id: None,
114 | 				name: Some(name),
115 | 			} => OWNER_CACHE
116 | 				.with(|oc| {
117 | 					oc.lock()
118 | 						.expect("owner cache poisoned")
119 | 						.user_from_name(name)
120 | 				})
121 | 				.map(|u| u.map(|u| u.uid)),
122 | 
123 | 			Self {
124 | 				id: Some(id),
125 | 				name: Some(name),
126 | 			} => {
127 | 				let id = u32::try_from(*id).map_err(std::io::Error::other)?;
128 | 
129 | 				if let Some(user) = OWNER_CACHE.with(|oc| {
130 | 					oc.lock()
131 | 						.expect("owner cache poisoned")
132 | 						.user_from_name(name)
133 | 				})? {
134 | 					Ok(Some(user.uid))
135 | 				} else {
136 | 					Ok(Some(Uid::from_raw(id)))
137 | 				}
138 | 			}
139 | 		}
140 | 	}
141 | 
142 | 	/// Convert to a group ID valid on the current system.
143 | 	///
144 | 	/// - If only `id` is present, this checks and returns it.
145 | 	/// - If only `name` is present, this resolves the group from the system and returns its ID if it exists.
146 | 	/// - If both are present, and:
147 | 	///   - `id` matches the resolved ID from the name, this returns `id`.
148 | 	///   - `id` does not match the resolved ID from the name, this returns the ID of the resolved group.
149 | 	///   - `name` does not resolve to a group on the system, this returns `id`.
150 | 	///
151 | 	/// Additionally if the `id` is larger than a u32, this returns an error.
152 | 	#[cfg(unix)]
153 | 	pub fn to_real_gid(&self) -> std::io::Result<Option<Gid>> {
154 | 		match self {
155 | 			Self {
156 | 				id: None,
157 | 				name: None,
158 | 			} => Ok(None),
159 | 
160 | 			Self {
161 | 				id: Some(id),
162 | 				name: None,
163 | 			} => u32::try_from(*id)
164 | 				.map_err(std::io::Error::other)
165 | 				.and_then(|gid| {
166 | 					OWNER_CACHE.with(|oc| {
167 | 						oc.lock()
168 | 							.expect("owner cache poisoned")
169 | 							.group_from_gid(Gid::from_raw(gid))
170 | 					})
171 | 				})
172 | 				.map(|u| u.map(|u| u.gid)),
173 | 
174 | 			Self {
175 | 				id: None,
176 | 				name: Some(name),
177 | 			} => OWNER_CACHE
178 | 				.with(|oc| {
179 | 					oc.lock()
180 | 						.expect("owner cache poisoned")
181 | 						.group_from_name(name)
182 | 				})
183 | 				.map(|u| u.map(|u| u.gid)),
184 | 
185 | 			Self {
186 | 				id: Some(id),
187 | 				name: Some(name),
188 | 			} => {
189 | 				let id = u32::try_from(*id).map_err(std::io::Error::other)?;
190 | 
191 | 				if let Some(group) = OWNER_CACHE.with(|oc| {
192 | 					oc.lock()
193 | 						.expect("owner cache poisoned")
194 | 						.group_from_name(name)
195 | 				})? {
196 | 					Ok(Some(group.gid))
197 | 				} else {
198 | 					Ok(Some(Gid::from_raw(id)))
199 | 				}
200 | 			}
201 | 		}
202 | 	}
203 | }
204 | 
205 | #[cfg(unix)]
206 | impl From<User> for PosixOwner {
207 | 	fn from(user: User) -> Self {
208 | 		Self {
209 | 			id: Some(user.uid.as_raw() as _),
210 | 			name: Some(user.name),
211 | 		}
212 | 	}
213 | }
214 | 
215 | #[cfg(unix)]
216 | impl From<Group> for PosixOwner {
217 | 	fn from(group: Group) -> Self {
218 | 		Self {
219 | 			id: Some(group.gid.as_raw() as _),
220 | 			name: Some(group.name),
221 | 		}
222 | 	}
223 | }
224 | 
225 | impl<C> Encode<C> for PosixOwner {
226 | 	fn encode<W: minicbor::encode::write::Write>(
227 | 		&self,
228 | 		e: &mut Encoder<W>,
229 | 		_ctx: &mut C,
230 | 	) -> Result<(), minicbor::encode::Error<W::Error>> {
231 | 		e.array(match (self.id.is_some(), self.name.is_some()) {
232 | 			(true, true) => 2,
233 | 			(true, false) | (false, true) => 1,
234 | 			(false, false) => 0,
235 | 		})?;
236 | 
237 | 		if let Some(id) = &self.id {
238 | 			e.u64(*id)?;
239 | 		}
240 | 
241 | 		if let Some(name) = &self.name {
242 | 			e.encode(name)?;
243 | 		}
244 | 
245 | 		Ok(())
246 | 	}
247 | }
248 | 
249 | impl<'b, C> Decode<'b, C> for PosixOwner {
250 | 	fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result<Self, minicbor::decode::Error> {
251 | 		let mut id = None;
252 | 		let mut name = None;
253 | 
254 | 		let max = d.array()?.unwrap_or(u64::MAX);
255 | 		for _ in 0..max {
256 | 			match d.datatype()? {
257 | 				Type::Break => break,
258 | 				Type::U8 => {
259 | 					id = Some(d.u8()? as _);
260 | 				}
261 | 				Type::U16 => {
262 | 					id = Some(d.u16()? as _);
263 | 				}
264 | 				Type::U32 => {
265 | 					id = Some(d.u32()? as _);
266 | 				}
267 | 				Type::U64 => {
268 | 					id = Some(d.u64()?);
269 | 				}
270 | 				Type::String | Type::StringIndef => {
271 | 					name = Some(d.decode()?);
272 | 				}
273 | 				ty => return Err(minicbor::decode::Error::type_mismatch(ty)),
274 | 			}
275 | 		}
276 | 
277 | 		Ok(Self { id, name })
278 | 	}
279 | }
280 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/specials.rs:
--------------------------------------------------------------------------------
  1 | use std::path::{Component, Path};
  2 | 
  3 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder};
  4 | 
  5 | use super::strings::{CborString, Pathname};
  6 | 
  7 | /// Special File metadata.
  8 | ///
  9 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#30-special-file-types)
 10 | #[derive(Clone, Debug, Default, PartialEq, Encode, Decode)]
 11 | #[cbor(array)]
 12 | pub struct SpecialFile {
 13 | 	/// Kind of special file.
 14 | 	///
 15 | 	/// Will be `None` for unknown kinds.
 16 | 	#[n(0)]
 17 | 	pub kind: Option<SpecialFileKind>,
 18 | 
 19 | 	/// Link target.
 20 | 	#[n(1)]
 21 | 	pub link_target: Option<LinkTarget>,
 22 | }
 23 | 
 24 | impl SpecialFile {
 25 | 	/// Returns `true` if this is a directory.
 26 | 	///
 27 | 	/// See also [`SpecialFileKind::is_dir`].
 28 | 	pub fn is_dir(&self) -> bool {
 29 | 		self.kind.map_or(false, SpecialFileKind::is_dir)
 30 | 	}
 31 | 
 32 | 	/// Returns `true` if this is a link.
 33 | 	///
 34 | 	/// See also [`SpecialFileKind::is_link`].
 35 | 	pub fn is_link(&self) -> bool {
 36 | 		self.kind.map_or(false, SpecialFileKind::is_link)
 37 | 	}
 38 | 
 39 | 	/// Returns `true` if this is a symlink.
 40 | 	///
 41 | 	/// See also [`SpecialFileKind::is_symlink`].
 42 | 	pub fn is_symlink(&self) -> bool {
 43 | 		self.kind.map_or(false, SpecialFileKind::is_symlink)
 44 | 	}
 45 | 
 46 | 	/// Returns `true` if this is a hardlink.
 47 | 	///
 48 | 	/// See also [`SpecialFileKind::is_hardlink`].
 49 | 	pub fn is_hardlink(&self) -> bool {
 50 | 		self.kind.map_or(false, SpecialFileKind::is_hardlink)
 51 | 	}
 52 | }
 53 | 
 54 | /// Special File kinds.
 55 | ///
 56 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#30-special-file-types)
 57 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Encode, Decode)]
 58 | #[cbor(index_only)]
 59 | pub enum SpecialFileKind {
 60 | 	/// Directory.
 61 | 	///
 62 | 	/// To encode metadata/attributes against a directory.
 63 | 	#[n(1)]
 64 | 	Directory = 1,
 65 | 
 66 | 	/// A symlink.
 67 | 	///
 68 | 	/// Some kind of symlink, but without specifying what exactly it is.
 69 | 	#[n(10)]
 70 | 	Symlink = 10,
 71 | 
 72 | 	/// Internal symbolic link.
 73 | 	///
 74 | 	/// Must point to a file that exists within this Zarc.
 75 | 	#[n(11)]
 76 | 	InternalSymlink = 11,
 77 | 
 78 | 	/// External absolute symbolic link.
 79 | 	#[n(12)]
 80 | 	ExternalAbsoluteSymlink = 12,
 81 | 
 82 | 	/// External relative symbolic link.
 83 | 	#[n(13)]
 84 | 	ExternalRelativeSymlink = 13,
 85 | 
 86 | 	/// A hardlink.
 87 | 	///
 88 | 	/// Some kind of hardlink, but without specifying what exactly it is.
 89 | 	#[n(20)]
 90 | 	Hardlink = 20,
 91 | 
 92 | 	/// Internal hardlink.
 93 | 	///
 94 | 	/// Must point to a file that exists within this Zarc.
 95 | 	#[n(21)]
 96 | 	InternalHardlink = 21,
 97 | 
 98 | 	/// External hardlink.
 99 | 	#[n(22)]
100 | 	ExternalHardlink = 22,
101 | }
102 | 
103 | impl SpecialFileKind {
104 | 	/// Returns `true` if this is a directory.
105 | 	pub fn is_dir(self) -> bool {
106 | 		matches!(self, Self::Directory)
107 | 	}
108 | 
109 | 	/// Returns `true` if this is a link.
110 | 	///
111 | 	/// This covers all the symlink and hardlink variants.
112 | 	pub fn is_link(self) -> bool {
113 | 		self.is_symlink() || self.is_hardlink()
114 | 	}
115 | 
116 | 	/// Returns `true` if this is a symlink.
117 | 	///
118 | 	/// This covers all the symlink variants.
119 | 	pub fn is_symlink(self) -> bool {
120 | 		matches!(
121 | 			self,
122 | 			Self::Symlink
123 | 				| Self::InternalSymlink
124 | 				| Self::ExternalAbsoluteSymlink
125 | 				| Self::ExternalRelativeSymlink
126 | 		)
127 | 	}
128 | 
129 | 	/// Returns `true` if this is a hardlink.
130 | 	///
131 | 	/// This covers all the hardlink variants.
132 | 	pub fn is_hardlink(self) -> bool {
133 | 		matches!(
134 | 			self,
135 | 			Self::Hardlink | Self::InternalHardlink | Self::ExternalHardlink
136 | 		)
137 | 	}
138 | }
139 | 
140 | /// Target of link (for [`SpecialFile`])
141 | ///
142 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#30-special-file-types)
143 | #[derive(Clone, Debug, PartialEq)]
144 | pub enum LinkTarget {
145 | 	/// Target as full pathname.
146 | 	FullPath(CborString),
147 | 
148 | 	/// Target as array of path components.
149 | 	Components(Vec<CborString>),
150 | }
151 | 
152 | impl From<Pathname> for LinkTarget {
153 | 	fn from(pathname: Pathname) -> Self {
154 | 		Self::Components(pathname.0)
155 | 	}
156 | }
157 | 
158 | impl From<&Path> for LinkTarget {
159 | 	fn from(path: &Path) -> Self {
160 | 		if path.is_absolute()
161 | 			|| path
162 | 				.components()
163 | 				.any(|c| !matches!(c, Component::Normal(_)))
164 | 		{
165 | 			Self::FullPath(CborString::from(path.as_os_str()))
166 | 		} else {
167 | 			Self::from(Pathname::from_normal_components(path))
168 | 		}
169 | 	}
170 | }
171 | 
172 | impl<C> Encode<C> for LinkTarget {
173 | 	fn encode<W: minicbor::encode::write::Write>(
174 | 		&self,
175 | 		e: &mut Encoder<W>,
176 | 		ctx: &mut C,
177 | 	) -> Result<(), minicbor::encode::Error<W::Error>> {
178 | 		match self {
179 | 			Self::FullPath(s) => s.encode(e, ctx),
180 | 			Self::Components(v) => {
181 | 				e.array(v.len().try_into().expect("path way too long"))?;
182 | 				for s in v {
183 | 					s.encode(e, ctx)?;
184 | 				}
185 | 				Ok(())
186 | 			}
187 | 		}
188 | 	}
189 | }
190 | 
191 | impl<'b, C> Decode<'b, C> for LinkTarget {
192 | 	fn decode(d: &mut Decoder<'b>, ctx: &mut C) -> Result<Self, minicbor::decode::Error> {
193 | 		match d.datatype()? {
194 | 			Type::Array => todo!(),
195 | 			Type::ArrayIndef => todo!(),
196 | 			_ => CborString::decode(d, ctx).map(Self::FullPath),
197 | 		}
198 | 	}
199 | }
200 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/strings.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 | 	ffi::OsStr,
  3 | 	path::{Component, Path, PathBuf},
  4 | };
  5 | 
  6 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder};
  7 | 
  8 | /// Pathname as components.
  9 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Encode, Decode)]
 10 | #[cbor(transparent)]
 11 | pub struct Pathname(
 12 | 	/// Components of the path.
 13 | 	#[n(0)] // but unused because of transparent
 14 | 	pub  Vec<CborString>,
 15 | 	// double space is from rustfmt: https://github.com/rust-lang/rustfmt/issues/5997
 16 | );
 17 | 
 18 | impl Pathname {
 19 | 	/// Converts a Path, ignoring all non-normal components.
 20 | 	pub fn from_normal_components(path: &Path) -> Self {
 21 | 		Self(
 22 | 			path.components()
 23 | 				.filter_map(|c| {
 24 | 					if let Component::Normal(comp) = c {
 25 | 						Some(CborString::from(comp))
 26 | 					} else {
 27 | 						None
 28 | 					}
 29 | 				})
 30 | 				.collect(),
 31 | 		)
 32 | 	}
 33 | 
 34 | 	/// Converts to a (platform-specific) Path.
 35 | 	pub fn to_path(&self) -> PathBuf {
 36 | 		let mut path = PathBuf::new();
 37 | 		for comp in &self.0 {
 38 | 			match comp {
 39 | 				CborString::Text(text) => {
 40 | 					path.push(text);
 41 | 				}
 42 | 				CborString::Binary(bytes) => {
 43 | 					#[cfg(unix)]
 44 | 					{
 45 | 						use std::os::unix::ffi::OsStrExt;
 46 | 						path.push(OsStr::from_bytes(bytes));
 47 | 					}
 48 | 					#[cfg(not(unix))]
 49 | 					{
 50 | 						path.push(String::from_utf8_lossy(bytes).to_string());
 51 | 					}
 52 | 				}
 53 | 			}
 54 | 		}
 55 | 
 56 | 		path
 57 | 	}
 58 | }
 59 | 
 60 | /// CBOR Text or Byte string.
 61 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
 62 | pub enum CborString {
 63 | 	/// UTF-8 text string value.
 64 | 	Text(String),
 65 | 
 66 | 	/// Non-unicode byte string value.
 67 | 	Binary(Vec<u8>),
 68 | }
 69 | 
 70 | impl CborString {
 71 | 	/// Convert from bytes that might be UTF-8.
 72 | 	pub fn from_maybe_utf8(bytes: Vec<u8>) -> Self {
 73 | 		match String::from_utf8(bytes) {
 74 | 			Ok(string) => Self::Text(string),
 75 | 			Err(err) => Self::Binary(err.into_bytes()),
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | impl From<&OsStr> for CborString {
 81 | 	fn from(string: &OsStr) -> Self {
 82 | 		if let Some(unicode) = string.to_str() {
 83 | 			Self::Text(unicode.into())
 84 | 		} else {
 85 | 			#[cfg(unix)]
 86 | 			{
 87 | 				use std::os::unix::ffi::OsStrExt;
 88 | 				Self::Binary(string.as_bytes().into())
 89 | 			}
 90 | 			#[cfg(windows)]
 91 | 			{
 92 | 				use std::os::windows::ffi::OsStrExt;
 93 | 				Self::Text(String::from_utf16_lossy(
 94 | 					&string.encode_wide().collect::<Vec<u16>>(),
 95 | 				))
 96 | 			}
 97 | 		}
 98 | 	}
 99 | }
100 | 
101 | impl From<&str> for CborString {
102 | 	fn from(string: &str) -> Self {
103 | 		Self::Text(string.into())
104 | 	}
105 | }
106 | 
107 | impl From<String> for CborString {
108 | 	fn from(string: String) -> Self {
109 | 		Self::Text(string)
110 | 	}
111 | }
112 | 
113 | impl<C> Encode<C> for CborString {
114 | 	fn encode<W: minicbor::encode::write::Write>(
115 | 		&self,
116 | 		e: &mut Encoder<W>,
117 | 		ctx: &mut C,
118 | 	) -> Result<(), minicbor::encode::Error<W::Error>> {
119 | 		match self {
120 | 			Self::Text(s) => s.encode(e, ctx),
121 | 			Self::Binary(b) => <&minicbor::bytes::ByteSlice>::from(b.as_slice()).encode(e, ctx),
122 | 		}
123 | 	}
124 | }
125 | 
126 | impl<'b, C> Decode<'b, C> for CborString {
127 | 	fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result<Self, minicbor::decode::Error> {
128 | 		match d.datatype()? {
129 | 			Type::String => d.str().map(|s| Self::Text(s.into())),
130 | 			Type::StringIndef => Ok(Self::Text(d.str_iter()?.try_fold(
131 | 				String::new(),
132 | 				|mut string, s| {
133 | 					s.map(|s| {
134 | 						string.push_str(s);
135 | 						string
136 | 					})
137 | 				},
138 | 			)?)),
139 | 			Type::Bytes => d.bytes().map(|b| Self::Binary(b.into())),
140 | 			Type::BytesIndef => Ok(Self::Binary(d.bytes_iter()?.try_fold(
141 | 				Vec::new(),
142 | 				|mut vec, b| {
143 | 					b.map(|b| {
144 | 						vec.extend(b);
145 | 						vec
146 | 					})
147 | 				},
148 | 			)?)),
149 | 			ty => Err(minicbor::decode::Error::type_mismatch(ty)),
150 | 		}
151 | 	}
152 | }
153 | 
154 | /// Attributes can be booleans or text or byte strings.
155 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
156 | pub enum AttributeValue {
157 | 	/// A boolean.
158 | 	Boolean(bool),
159 | 
160 | 	/// A string.
161 | 	String(CborString),
162 | }
163 | 
164 | impl AttributeValue {
165 | 	/// Get the value as a bool if it is one.
166 | 	pub fn as_bool(&self) -> Option<bool> {
167 | 		match self {
168 | 			Self::Boolean(b) => Some(*b),
169 | 			_ => None,
170 | 		}
171 | 	}
172 | }
173 | 
174 | impl From<bool> for AttributeValue {
175 | 	fn from(b: bool) -> Self {
176 | 		Self::Boolean(b)
177 | 	}
178 | }
179 | 
180 | impl<T> From<T> for AttributeValue
181 | where
182 | 	T: Into<CborString>,
183 | {
184 | 	fn from(string: T) -> Self {
185 | 		Self::String(string.into())
186 | 	}
187 | }
188 | 
189 | impl<C> Encode<C> for AttributeValue {
190 | 	fn encode<W: minicbor::encode::write::Write>(
191 | 		&self,
192 | 		e: &mut Encoder<W>,
193 | 		ctx: &mut C,
194 | 	) -> Result<(), minicbor::encode::Error<W::Error>> {
195 | 		match self {
196 | 			Self::Boolean(b) => b.encode(e, ctx),
197 | 			Self::String(s) => s.encode(e, ctx),
198 | 		}
199 | 	}
200 | }
201 | 
202 | impl<'b, C> Decode<'b, C> for AttributeValue {
203 | 	fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result<Self, minicbor::decode::Error> {
204 | 		match d.datatype()? {
205 | 			Type::String | Type::StringIndef | Type::Bytes | Type::BytesIndef => {
206 | 				d.decode().map(Self::String)
207 | 			}
208 | 			Type::Bool => d.decode().map(Self::Boolean),
209 | 			ty => Err(minicbor::decode::Error::type_mismatch(ty)),
210 | 		}
211 | 	}
212 | }
213 | 


--------------------------------------------------------------------------------
/crates/zarc/src/directory/timestamps.rs:
--------------------------------------------------------------------------------
  1 | use std::{fmt, time::SystemTime};
  2 | 
  3 | use chrono::{DateTime, Utc};
  4 | use minicbor::{
  5 | 	data::{Tag, Type},
  6 | 	Decode, Decoder, Encode, Encoder,
  7 | };
  8 | 
  9 | /// Directory Filemap Entry Timestamps.
 10 | #[derive(Clone, Debug, Default, PartialEq, Encode, Decode)]
 11 | #[cbor(map)]
 12 | pub struct Timestamps {
 13 | 	/// Creation time (birth time).
 14 | 	#[n(1)]
 15 | 	pub created: Option<Timestamp>,
 16 | 
 17 | 	/// Modification time (mtime).
 18 | 	#[n(2)]
 19 | 	pub modified: Option<Timestamp>,
 20 | 
 21 | 	/// Access time (atime).
 22 | 	#[n(3)]
 23 | 	pub accessed: Option<Timestamp>,
 24 | }
 25 | 
 26 | /// A timestamp.
 27 | ///
 28 | /// Internally this is a [`chrono`] type, and always encodes to an RFC3339 tagged text string.
 29 | /// However for flexibility it can decode from a CBOR epoch-based timestamp as well.
 30 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 31 | pub struct Timestamp(pub DateTime<Utc>);
 32 | 
 33 | impl Timestamp {
 34 | 	/// The current date and time.
 35 | 	pub fn now() -> Self {
 36 | 		Self(Utc::now())
 37 | 	}
 38 | }
 39 | 
 40 | impl From<SystemTime> for Timestamp {
 41 | 	fn from(st: SystemTime) -> Self {
 42 | 		Self(st.into())
 43 | 	}
 44 | }
 45 | 
 46 | impl From<Timestamp> for SystemTime {
 47 | 	fn from(ts: Timestamp) -> Self {
 48 | 		ts.0.into()
 49 | 	}
 50 | }
 51 | 
 52 | impl From<DateTime<Utc>> for Timestamp {
 53 | 	fn from(dt: DateTime<Utc>) -> Self {
 54 | 		Self(dt)
 55 | 	}
 56 | }
 57 | 
 58 | impl From<Timestamp> for DateTime<Utc> {
 59 | 	fn from(ts: Timestamp) -> Self {
 60 | 		ts.0
 61 | 	}
 62 | }
 63 | 
 64 | impl fmt::Display for Timestamp {
 65 | 	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 66 | 		write!(f, "{}", self.0)
 67 | 	}
 68 | }
 69 | 
 70 | impl<C> Encode<C> for Timestamp {
 71 | 	fn encode<W: minicbor::encode::write::Write>(
 72 | 		&self,
 73 | 		e: &mut Encoder<W>,
 74 | 		_ctx: &mut C,
 75 | 	) -> Result<(), minicbor::encode::Error<W::Error>> {
 76 | 		e.tag(Tag::DateTime)?.str(&self.0.to_rfc3339()).map(drop)
 77 | 	}
 78 | }
 79 | 
 80 | impl<'b, C> Decode<'b, C> for Timestamp {
 81 | 	fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result<Self, minicbor::decode::Error> {
 82 | 		let p = d.position();
 83 | 		match d.tag()? {
 84 | 			Tag::DateTime => Ok(Self(
 85 | 				DateTime::parse_from_rfc3339(d.str()?)
 86 | 					.map_err(|err| minicbor::decode::Error::message(err.to_string()).at(p))?
 87 | 					.into(),
 88 | 			)),
 89 | 			Tag::Timestamp => match d.datatype()? {
 90 | 				Type::U32 => DateTime::<Utc>::from_timestamp(i64::from(d.u32()?), 0),
 91 | 				Type::U64 => DateTime::<Utc>::from_timestamp(
 92 | 					i64::try_from(d.u64()?).map_err(|err| {
 93 | 						minicbor::decode::Error::message(format!("timestamp out of range: {err}"))
 94 | 							.at(p)
 95 | 					})?,
 96 | 					0,
 97 | 				),
 98 | 				Type::I32 => DateTime::<Utc>::from_timestamp(i64::from(d.i32()?), 0),
 99 | 				Type::I64 => DateTime::<Utc>::from_timestamp(d.i64()?, 0),
100 | 				Type::Int => DateTime::<Utc>::from_timestamp(
101 | 					i64::try_from(d.int()?).map_err(|err| {
102 | 						minicbor::decode::Error::message(format!("timestamp out of range: {err}"))
103 | 							.at(p)
104 | 					})?,
105 | 					0,
106 | 				),
107 | 				Type::F32 => {
108 | 					let f = d.f32()?;
109 | 					DateTime::<Utc>::from_timestamp(f.trunc() as _, (f.fract() * 1.0e9) as _)
110 | 				}
111 | 				Type::F64 => {
112 | 					let f = d.f64()?;
113 | 					DateTime::<Utc>::from_timestamp(f.trunc() as _, (f.fract() * 1.0e9) as _)
114 | 				}
115 | 				ty => return Err(minicbor::decode::Error::type_mismatch(ty)),
116 | 			}
117 | 			.ok_or_else(|| minicbor::decode::Error::message("timestamp out of range").at(p))
118 | 			.map(Self),
119 | 			other => Err(minicbor::decode::Error::message(format!(
120 | 				"expected Timestamp or DateTime tag, got {other:?}"
121 | 			))
122 | 			.at(p)),
123 | 		}
124 | 	}
125 | }
126 | 


--------------------------------------------------------------------------------
/crates/zarc/src/encode.rs:
--------------------------------------------------------------------------------
 1 | //! Encoder types and functions.
 2 | 
 3 | use std::{
 4 | 	collections::{BTreeMap, HashMap},
 5 | 	fmt,
 6 | 	io::{Error, Result, Write},
 7 | 	num::NonZeroU16,
 8 | };
 9 | 
10 | use tracing::{instrument, trace};
11 | use zstd_safe::CCtx;
12 | pub use zstd_safe::{CParameter as ZstdParameter, Strategy as ZstdStrategy};
13 | 
14 | use crate::{
15 | 	directory::{File, Frame, Pathname},
16 | 	header::FILE_MAGIC,
17 | 	integrity::Digest,
18 | 	map_zstd_error,
19 | };
20 | 
21 | mod add_file;
22 | mod content_frame;
23 | mod directory;
24 | mod lowlevel_frames;
25 | 
26 | /// Zarc encoder context.
27 | pub struct Encoder<'writer, W: Write> {
28 | 	writer: &'writer mut W,
29 | 	zstd: CCtx<'writer>,
30 | 	edition: NonZeroU16,
31 | 	files: Vec<Option<File>>,
32 | 	frames: HashMap<Digest, Frame>,
33 | 	files_by_name: BTreeMap<Pathname, Vec<usize>>,
34 | 	files_by_digest: HashMap<Digest, Vec<usize>>,
35 | 	offset: usize,
36 | 	compress: bool,
37 | }
38 | 
39 | impl<W: Write + fmt::Debug> fmt::Debug for Encoder<'_, W> {
40 | 	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 | 		f.debug_struct("Encoder")
42 | 			.field("writer", &self.writer)
43 | 			.field("zstd", &"zstd-safe compression context")
44 | 			.field("edition", &self.edition)
45 | 			.field("files", &self.files)
46 | 			.field("frames", &self.frames)
47 | 			.field("files_by_name", &self.files_by_name)
48 | 			.field("files_by_digest", &self.files_by_digest)
49 | 			.field("offset", &self.offset)
50 | 			.field("compress", &self.compress)
51 | 			.finish()
52 | 	}
53 | }
54 | 
55 | impl<'writer, W: Write> Encoder<'writer, W> {
56 | 	/// Create a new encoder and write the header.
57 | 	#[instrument(level = "trace", skip(writer))]
58 | 	pub fn new(writer: &'writer mut W) -> Result<Self> {
59 | 		trace!("create zstd context");
60 | 		let mut zstd =
61 | 			CCtx::try_create().ok_or_else(|| Error::other("failed allocating zstd context"))?;
62 | 		zstd.init(0).map_err(map_zstd_error)?;
63 | 
64 | 		trace!("write zarc magic");
65 | 		let offset = writer.write(&FILE_MAGIC)?;
66 | 
67 | 		Ok(Self {
68 | 			writer,
69 | 			zstd,
70 | 			edition: unsafe { NonZeroU16::new_unchecked(1) },
71 | 			files: Vec::new(),
72 | 			frames: HashMap::new(),
73 | 			files_by_name: BTreeMap::new(),
74 | 			files_by_digest: HashMap::new(),
75 | 			offset,
76 | 			compress: true,
77 | 		})
78 | 	}
79 | 
80 | 	/// Set a zstd parameter.
81 | 	///
82 | 	/// This will apply to future data frames.
83 | 	#[instrument(level = "trace", skip(self))]
84 | 	pub fn set_zstd_parameter(&mut self, parameter: ZstdParameter) -> Result<()> {
85 | 		self.zstd
86 | 			.set_parameter(parameter)
87 | 			.map_err(map_zstd_error)
88 | 			.map(drop)
89 | 	}
90 | 
91 | 	/// Enable or disable compression.
92 | 	///
93 | 	/// This well apply to future data frames.
94 | 	#[instrument(level = "trace", skip(self))]
95 | 	pub fn enable_compression(&mut self, compress: bool) {
96 | 		self.compress = compress;
97 | 	}
98 | }
99 | 


--------------------------------------------------------------------------------
/crates/zarc/src/encode/add_file.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 | 	io::{Error, Result, Write},
  3 | 	path::Path,
  4 | };
  5 | 
  6 | use tracing::{instrument, trace};
  7 | 
  8 | use crate::{
  9 | 	directory::{
 10 | 		AttributeValue, File, Pathname, PosixOwner, SpecialFile, SpecialFileKind, Timestamp,
 11 | 		Timestamps,
 12 | 	},
 13 | 	integrity::Digest,
 14 | 	metadata::encode::build_filemap,
 15 | };
 16 | 
 17 | use super::Encoder;
 18 | 
 19 | impl<'writer, W: Write> Encoder<'writer, W> {
 20 | 	/// Add a file entry.
 21 | 	#[instrument(level = "trace", skip(self))]
 22 | 	pub fn add_file_entry(&mut self, entry: impl Into<File> + std::fmt::Debug) -> Result<()> {
 23 | 		let entry = entry.into();
 24 | 
 25 | 		if let Some(hash) = &entry.digest {
 26 | 			if !self.frames.contains_key(hash) {
 27 | 				return Err(Error::other(
 28 | 					"cannot add file entry referencing unknown data frame",
 29 | 				));
 30 | 			}
 31 | 		}
 32 | 
 33 | 		let name = entry.name.clone();
 34 | 		let digest = entry.digest.clone();
 35 | 
 36 | 		self.files.push(Some(entry));
 37 | 		let index = self.files.len() - 1;
 38 | 		trace!(index, "added file entry");
 39 | 
 40 | 		self.files_by_name.entry(name).or_default().push(index);
 41 | 		if let Some(digest) = digest {
 42 | 			self.files_by_digest.entry(digest).or_default().push(index);
 43 | 		}
 44 | 
 45 | 		Ok(())
 46 | 	}
 47 | 
 48 | 	/// Get a builder for a file entry.
 49 | 	///
 50 | 	/// Don't forget to set the digest to the content frame!
 51 | 	#[instrument(level = "trace", skip(self))]
 52 | 	pub fn build_file(&self, name: impl Into<Pathname> + std::fmt::Debug) -> FileBuilder {
 53 | 		FileBuilder(File {
 54 | 			edition: self.edition,
 55 | 			name: name.into(),
 56 | 			digest: Default::default(),
 57 | 			mode: Default::default(),
 58 | 			user: Default::default(),
 59 | 			group: Default::default(),
 60 | 			timestamps: Default::default(),
 61 | 			special: Default::default(),
 62 | 			user_metadata: Default::default(),
 63 | 			attributes: Default::default(),
 64 | 			extended_attributes: Default::default(),
 65 | 		})
 66 | 	}
 67 | 
 68 | 	/// Start building a file from an existing file.
 69 | 	///
 70 | 	/// This will read the metadata of a file on the filesystem and return a [`FileBuilder`] to add
 71 | 	/// or change metadata before adding it to the encoder.
 72 | 	///
 73 | 	/// Don't forget to set the digest to the content frame!
 74 | 	#[instrument(level = "trace", skip(self))]
 75 | 	pub fn build_file_with_metadata(
 76 | 		&self,
 77 | 		path: impl AsRef<Path> + std::fmt::Debug,
 78 | 		follow_symlinks: bool,
 79 | 	) -> std::io::Result<FileBuilder> {
 80 | 		let path = path.as_ref();
 81 | 		build_filemap(self.edition, path, follow_symlinks).map(FileBuilder)
 82 | 	}
 83 | }
 84 | 
 85 | /// Builder for a file entry.
 86 | ///
 87 | /// Create with [`Encoder::build_file()`], then insert into the Encoder with
 88 | /// [`Encoder::add_file_entry()`].
 89 | #[derive(Clone, Debug)]
 90 | pub struct FileBuilder(pub File);
 91 | 
 92 | // TODO: symlinks and hardlinks
 93 | 
 94 | impl FileBuilder {
 95 | 	/// Set the digest of a content frame.
 96 | 	///
 97 | 	/// This doesn't check that the digest is valid or that the content frame exists, but that will
 98 | 	/// be checked later when the file is added to the encoder.
 99 | 	pub fn digest(&mut self, digest: impl Into<Digest>) -> &mut Self {
100 | 		self.0.digest = Some(digest.into());
101 | 		self
102 | 	}
103 | 
104 | 	/// Make this a directory.
105 | 	///
106 | 	/// This will clear the digest if it was set.
107 | 	pub fn directory(&mut self) -> &mut Self {
108 | 		self.0.digest = None;
109 | 		self.0.special = Some(SpecialFile {
110 | 			kind: Some(SpecialFileKind::Directory),
111 | 			..Default::default()
112 | 		});
113 | 		self
114 | 	}
115 | 
116 | 	/// Set the POSIX mode of the file.
117 | 	///
118 | 	/// This does the same thing regardless of platform, so it can be used to set the mode of files
119 | 	/// even when running on Windows if the desired value is known.
120 | 	pub fn mode(&mut self, mode: u32) -> &mut Self {
121 | 		self.0.mode = Some(mode);
122 | 		self
123 | 	}
124 | 
125 | 	/// Set the user that owns the file by name.
126 | 	pub fn user_name(&mut self, username: &str) -> &mut Self {
127 | 		let name = username.to_string();
128 | 		if let Some(user) = self.0.user.as_mut() {
129 | 			user.name = Some(name);
130 | 		} else {
131 | 			self.0.user = Some(PosixOwner {
132 | 				name: Some(name),
133 | 				..Default::default()
134 | 			})
135 | 		}
136 | 		self
137 | 	}
138 | 
139 | 	/// Set the user that owns the file by ID.
140 | 	pub fn user_id(&mut self, id: u64) -> &mut Self {
141 | 		if let Some(user) = self.0.user.as_mut() {
142 | 			user.id = Some(id);
143 | 		} else {
144 | 			self.0.user = Some(PosixOwner {
145 | 				id: Some(id),
146 | 				..Default::default()
147 | 			})
148 | 		}
149 | 		self
150 | 	}
151 | 
152 | 	/// Set the group that owns the file by name.
153 | 	pub fn group_name(&mut self, groupname: &str) -> &mut Self {
154 | 		let name = groupname.to_string();
155 | 		if let Some(group) = self.0.group.as_mut() {
156 | 			group.name = Some(name);
157 | 		} else {
158 | 			self.0.group = Some(PosixOwner {
159 | 				name: Some(name),
160 | 				..Default::default()
161 | 			})
162 | 		}
163 | 		self
164 | 	}
165 | 
166 | 	/// Set the group that owns the file by ID.
167 | 	pub fn group_id(&mut self, id: u64) -> &mut Self {
168 | 		if let Some(group) = self.0.group.as_mut() {
169 | 			group.id = Some(id);
170 | 		} else {
171 | 			self.0.group = Some(PosixOwner {
172 | 				id: Some(id),
173 | 				..Default::default()
174 | 			})
175 | 		}
176 | 		self
177 | 	}
178 | 
179 | 	/// Set the timestamps of the file.
180 | 	pub fn timestamps(&mut self, timestamps: impl Into<Timestamps>) -> &mut Self {
181 | 		self.0.timestamps = Some(timestamps.into());
182 | 		self
183 | 	}
184 | 
185 | 	/// Set the accessed timestamp of the file.
186 | 	pub fn time_accessed(&mut self, timestamps: impl Into<Timestamp>) -> &mut Self {
187 | 		if let Some(ts) = self.0.timestamps.as_mut() {
188 | 			ts.accessed = Some(timestamps.into());
189 | 		} else {
190 | 			self.0.timestamps = Some(Timestamps {
191 | 				accessed: Some(timestamps.into()),
192 | 				..Default::default()
193 | 			})
194 | 		}
195 | 		self
196 | 	}
197 | 
198 | 	/// Set the modified timestamp of the file.
199 | 	pub fn time_modified(&mut self, timestamps: impl Into<Timestamp>) -> &mut Self {
200 | 		if let Some(ts) = self.0.timestamps.as_mut() {
201 | 			ts.modified = Some(timestamps.into());
202 | 		} else {
203 | 			self.0.timestamps = Some(Timestamps {
204 | 				modified: Some(timestamps.into()),
205 | 				..Default::default()
206 | 			})
207 | 		}
208 | 		self
209 | 	}
210 | 
211 | 	/// Set the created timestamp of the file.
212 | 	pub fn time_created(&mut self, timestamps: impl Into<Timestamp>) -> &mut Self {
213 | 		if let Some(ts) = self.0.timestamps.as_mut() {
214 | 			ts.created = Some(timestamps.into());
215 | 		} else {
216 | 			self.0.timestamps = Some(Timestamps {
217 | 				created: Some(timestamps.into()),
218 | 				..Default::default()
219 | 			})
220 | 		}
221 | 		self
222 | 	}
223 | 
224 | 	/// Add user metadata.
225 | 	pub fn user_metadata(
226 | 		&mut self,
227 | 		key: impl Into<String>,
228 | 		value: impl Into<AttributeValue>,
229 | 	) -> &mut Self {
230 | 		self.0
231 | 			.user_metadata
232 | 			.get_or_insert_with(Default::default)
233 | 			.insert(key.into(), value.into());
234 | 		self
235 | 	}
236 | 
237 | 	/// Add an attribute.
238 | 	///
239 | 	/// See [`file_attributes`](crate::metadata::encode::file_attributes) for a list of attributes.
240 | 	pub fn attribute(
241 | 		&mut self,
242 | 		key: impl Into<String>,
243 | 		value: impl Into<AttributeValue>,
244 | 	) -> &mut Self {
245 | 		self.0
246 | 			.attributes
247 | 			.get_or_insert_with(Default::default)
248 | 			.insert(key.into(), value.into());
249 | 		self
250 | 	}
251 | 
252 | 	/// Add an extended attribute.
253 | 	pub fn extended_attribute(
254 | 		&mut self,
255 | 		key: impl Into<String>,
256 | 		value: impl Into<AttributeValue>,
257 | 	) -> &mut Self {
258 | 		self.0
259 | 			.extended_attributes
260 | 			.get_or_insert_with(Default::default)
261 | 			.insert(key.into(), value.into());
262 | 		self
263 | 	}
264 | }
265 | 
266 | impl From<FileBuilder> for File {
267 | 	fn from(builder: FileBuilder) -> Self {
268 | 		builder.0
269 | 	}
270 | }
271 | 


--------------------------------------------------------------------------------
/crates/zarc/src/encode/content_frame.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{Error, Result, Write};
 2 | 
 3 | use tracing::{instrument, trace};
 4 | use zstd_safe::ResetDirective;
 5 | 
 6 | use crate::{directory::Frame, integrity::Digest, map_zstd_error};
 7 | 
 8 | use super::Encoder;
 9 | 
10 | impl<'writer, W: Write> Encoder<'writer, W> {
11 | 	/// Add a frame of data.
12 | 	///
13 | 	/// Processes the entire input in memory.
14 | 	///
15 | 	/// Returns the hash of the data, so it can be referenced in a filemap entry.
16 | 	///
17 | 	/// If the content hashes to a frame that already exists, returns the hash without storing
18 | 	/// a duplicate frame.
19 | 	#[instrument(level = "trace", skip(self, content))]
20 | 	pub fn add_data_frame(&mut self, content: &[u8]) -> Result<Digest> {
21 | 		// collect pre-compression values
22 | 		let offset = self.offset.try_into().map_err(Error::other)?;
23 | 		let uncompressed_size = content.len();
24 | 
25 | 		// compute content hash
26 | 		let digest = blake3::hash(content);
27 | 		let digest = Digest(digest.as_bytes().to_vec());
28 | 		trace!(%uncompressed_size, digest=%format!("{digest:02x?}"), "computed digest");
29 | 
30 | 		if self.frames.contains_key(&digest) {
31 | 			trace!("frame already exists, skipping");
32 | 			return Ok(digest);
33 | 		}
34 | 
35 | 		let bytes = if self.compress {
36 | 			// start new compression context
37 | 			self.zstd
38 | 				.reset(ResetDirective::SessionOnly)
39 | 				.map_err(map_zstd_error)?;
40 | 
41 | 			self.write_compressed_frame(content)
42 | 		} else {
43 | 			self.write_uncompressed_frame(content)
44 | 		}?;
45 | 		self.offset += bytes;
46 | 
47 | 		// push frame to list
48 | 		self.frames.insert(
49 | 			digest.clone(),
50 | 			Frame {
51 | 				edition: self.edition,
52 | 				offset,
53 | 				digest: digest.clone(),
54 | 				length: bytes as _,
55 | 				uncompressed: uncompressed_size as _,
56 | 			},
57 | 		);
58 | 
59 | 		Ok(digest)
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/crates/zarc/src/encode/directory.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 | 	io::{Error, Result, Write},
  3 | 	mem::take,
  4 | };
  5 | 
  6 | use blake3::Hasher;
  7 | use deku::DekuContainerWrite;
  8 | use ozarc::framing::SKIPPABLE_FRAME_OVERHEAD;
  9 | use tracing::{debug, instrument, trace};
 10 | 
 11 | use crate::{
 12 | 	constants::ZARC_VERSION,
 13 | 	directory::{Edition, Element, ElementFrame, Timestamp},
 14 | 	integrity::{Digest, DigestType},
 15 | 	trailer::Trailer,
 16 | };
 17 | 
 18 | use super::Encoder;
 19 | 
 20 | impl<'writer, W: Write> Encoder<'writer, W> {
 21 | 	#[instrument(level = "trace", skip(buf, hasher))]
 22 | 	fn write_element(buf: &mut Vec<u8>, hasher: &mut Hasher, element: &Element) -> Result<()> {
 23 | 		let frame = ElementFrame::create(element).map_err(Error::other)?;
 24 | 		let bytes = frame.to_bytes().map_err(Error::other)?;
 25 | 		buf.write_all(&bytes)?;
 26 | 		hasher.update(&bytes);
 27 | 		trace!(
 28 | 			kind = ?element.kind(),
 29 | 			length = %bytes.len(),
 30 | 			bytes = %format!("{bytes:02x?}"),
 31 | 			"wrote element"
 32 | 		);
 33 | 		Ok(())
 34 | 	}
 35 | 
 36 | 	/// Write the directory and trailer.
 37 | 	///
 38 | 	/// Flushes the writer and drops all state, returns the digest of the directory.
 39 | 	#[instrument(level = "debug", skip(self))]
 40 | 	pub fn finalise(mut self) -> Result<Digest> {
 41 | 		let mut directory = Vec::new();
 42 | 		let digest_type = DigestType::Blake3;
 43 | 		let mut hasher = Hasher::new(); // TODO: get hasher from DigestType
 44 | 
 45 | 		Self::write_element(
 46 | 			&mut directory,
 47 | 			&mut hasher,
 48 | 			&Element::Edition(Box::new(Edition {
 49 | 				number: self.edition,
 50 | 				written_at: Timestamp::now(),
 51 | 				digest_type,
 52 | 				user_metadata: Default::default(),
 53 | 			})),
 54 | 		)?;
 55 | 
 56 | 		for (name, indices) in take(&mut self.files_by_name) {
 57 | 			debug!(?name, "write file and frame elements");
 58 | 
 59 | 			for index in indices {
 60 | 				let Some(file) = self.files.get_mut(index).and_then(Option::take) else {
 61 | 					// this shouldn't happen, but it's cheap to just skip instead of unwrapping
 62 | 					continue;
 63 | 				};
 64 | 
 65 | 				// we always want to insert a frame element before the linked file element
 66 | 				if let Some(digest) = &file.digest {
 67 | 					// if we've already written it, this will be None
 68 | 					if let Some(frame) = self.frames.remove(digest) {
 69 | 						Self::write_element(
 70 | 							&mut directory,
 71 | 							&mut hasher,
 72 | 							&Element::Frame(Box::new(frame)),
 73 | 						)?;
 74 | 					}
 75 | 				}
 76 | 
 77 | 				Self::write_element(&mut directory, &mut hasher, &Element::File(Box::new(file)))?;
 78 | 			}
 79 | 		}
 80 | 
 81 | 		// we should have written every frame, but just in case
 82 | 		// (or if user inserted frames not linked to files)
 83 | 		for frame in take(&mut self.frames).into_values() {
 84 | 			Self::write_element(
 85 | 				&mut directory,
 86 | 				&mut hasher,
 87 | 				&Element::Frame(Box::new(frame)),
 88 | 			)?;
 89 | 		}
 90 | 
 91 | 		let digest = hasher.finalize();
 92 | 		trace!(?digest, "hashed directory");
 93 | 		let digest = Digest(digest.as_bytes().to_vec());
 94 | 
 95 | 		let bytes = self.write_compressed_frame(&directory)?;
 96 | 		trace!(%bytes, "wrote directory");
 97 | 
 98 | 		let mut trailer = Trailer {
 99 | 			version: ZARC_VERSION,
100 | 			digest_type,
101 | 			directory_offset: 0,
102 | 			directory_uncompressed_size: directory.len() as _,
103 | 			digest: digest.clone(),
104 | 		};
105 | 		trailer.directory_offset = -((bytes + SKIPPABLE_FRAME_OVERHEAD + trailer.len()) as i64);
106 | 		trace!(?trailer, "built trailer");
107 | 
108 | 		let trailer_bytes = trailer.to_bytes();
109 | 		trace!(
110 | 			bytes = %format!("{trailer_bytes:02x?}"),
111 | 			length = %trailer_bytes.len(),
112 | 			"serialised trailer"
113 | 		);
114 | 
115 | 		let bytes = self.write_skippable_frame(0xF, trailer_bytes)?;
116 | 		trace!(%bytes, "wrote trailer");
117 | 
118 | 		self.writer.flush()?;
119 | 		trace!("flushed writer");
120 | 
121 | 		Ok(digest)
122 | 	}
123 | }
124 | 


--------------------------------------------------------------------------------
/crates/zarc/src/encode/lowlevel_frames.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Result, Write};
  2 | 
  3 | use deku::DekuContainerWrite;
  4 | use tracing::{instrument, trace};
  5 | 
  6 | use crate::map_zstd_error;
  7 | 
  8 | use super::Encoder;
  9 | 
 10 | impl<'writer, W: Write> Encoder<'writer, W> {
 11 | 	/// Write a compressed frame.
 12 | 	///
 13 | 	/// Zstd-safe is bad at writing data, so we always write to a buffer in memory and then write
 14 | 	/// that buffer to the writer.
 15 | 	///
 16 | 	/// Returns the amount of bytes written.
 17 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 18 | 	#[instrument(level = "trace", skip(self, data))]
 19 | 	pub(crate) fn write_compressed_frame(&mut self, data: &[u8]) -> Result<usize> {
 20 | 		// start with a buffer slightly larger than the input
 21 | 		let mut buffer: Vec<u8> = Vec::with_capacity(data.len() + 1024.max(data.len() / 10));
 22 | 
 23 | 		trace!(
 24 | 			bytes = %format!("{data:02x?}"),
 25 | 			length = %data.len(),
 26 | 			buffer_size = %buffer.capacity(),
 27 | 			"compress data into buffer"
 28 | 		);
 29 | 		self.zstd
 30 | 			.compress2(&mut buffer, data)
 31 | 			.map_err(map_zstd_error)?;
 32 | 
 33 | 		trace!(
 34 | 			bytes = %format!("{buffer:02x?}"),
 35 | 			length = %buffer.len(),
 36 | 			"write buffer to writer"
 37 | 		);
 38 | 		self.writer.write(&buffer)
 39 | 	}
 40 | 
 41 | 	/// Write an uncompressed frame.
 42 | 	///
 43 | 	/// Zstd can't write fully-uncompressed data, so we use [`ozarc`]'s types to write raw blocks
 44 | 	/// and the frame directly.
 45 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 46 | 	#[instrument(level = "trace", skip(self, data))]
 47 | 	pub(crate) fn write_uncompressed_frame(&mut self, data: &[u8]) -> Result<usize> {
 48 | 		use ozarc::framing::*;
 49 | 		let mut frame = ZstandardFrame {
 50 | 			header: ZstandardFrameHeader {
 51 | 				frame_descriptor: ZstandardFrameDescriptor {
 52 | 					fcs_size: 3,
 53 | 					single_segment: false,
 54 | 					unused_bit: false,
 55 | 					reserved_bit: false,
 56 | 					checksum: false,
 57 | 					did_size: 0,
 58 | 				},
 59 | 				window_descriptor: None,
 60 | 				did: Vec::new(),
 61 | 				#[allow(clippy::unwrap_used)] // UNWRAP: realistically we'll never have more than u64 bytes of content
 62 | 				frame_content_size: u64::try_from(data.len()).unwrap().to_le_bytes().to_vec(),
 63 | 			},
 64 | 			blocks: data
 65 | 				.chunks(u16::MAX as _)
 66 | 				.map(|data| ZstandardBlock {
 67 | 					header: ZstandardBlockHeader::new(
 68 | 						ZstandardBlockType::Raw,
 69 | 						false,
 70 | 						#[allow(clippy::unwrap_used)] // UNWRAP: chunks() limits to u16
 71 | 						u32::try_from(data.len()).unwrap(),
 72 | 					),
 73 | 					data: data.into(),
 74 | 				})
 75 | 				.collect(),
 76 | 			checksum: None,
 77 | 		};
 78 | 
 79 | 		if let Some(last) = frame.blocks.last_mut() {
 80 | 			last.header.last = true;
 81 | 		}
 82 | 
 83 | 		self.writer.write(&frame.to_bytes()?)
 84 | 	}
 85 | 
 86 | 	/// Write a skippable frame.
 87 | 	///
 88 | 	/// Zstd-safe doesn't have an API for this, so we use [`ozarc`].
 89 | 	#[cfg_attr(feature = "expose-internals", visibility::make(pub))]
 90 | 	#[instrument(level = "trace", skip(self, magic, data))]
 91 | 	pub(crate) fn write_skippable_frame(&mut self, magic: u8, data: Vec<u8>) -> Result<usize> {
 92 | 		trace!(
 93 | 			bytes = %format!("{data:02x?}"),
 94 | 			length = %data.len(),
 95 | 			magic,
 96 | 			"compose data into frame"
 97 | 		);
 98 | 		let frame = ozarc::framing::SkippableFrame::new(magic, data);
 99 | 		let buffer = frame.to_bytes()?;
100 | 
101 | 		trace!(
102 | 			bytes = %format!("{buffer:02x?}"),
103 | 			length = %buffer.len(),
104 | 			"write buffer to writer"
105 | 		);
106 | 		self.writer.write(&buffer)
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/crates/zarc/src/header.rs:
--------------------------------------------------------------------------------
 1 | //! Zarc Header structure and byte array
 2 | //!
 3 | //! The purpose of the header is to identify the file as a Zarc file. It also has the file version
 4 | //! number, but this can be considered part of the "file magic" rather than actual metadata.
 5 | //!
 6 | //! This module has two implementations of the header: [`Header`] which lets you decode the header
 7 | //! from the skippable frame's payload, and [`FILE_MAGIC`] which is a constant byte array that
 8 | //! includes the Zstd framing and can be matched byte-for-byte against the start of a Zarc file.
 9 | 
10 | use deku::prelude::*;
11 | 
12 | use super::constants::{ZARC_MAGIC, ZARC_VERSION};
13 | 
14 | /// Zarc Header
15 | ///
16 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#zarc-header)
17 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
18 | #[deku(endian = "little")]
19 | pub struct Header {
20 | 	/// Magic number. Asserted to match [`ZARC_MAGIC`].
21 | 	#[deku(count = "3", assert = "*magic == ZARC_MAGIC")]
22 | 	pub magic: Vec<u8>,
23 | 
24 | 	/// Zarc format version number. Should match [`ZARC_VERSION`].
25 | 	#[deku(bytes = "1")]
26 | 	pub version: u8,
27 | }
28 | 
29 | /// Static file magic
30 | ///
31 | /// This is a zstd Skippable frame containing the Zarc Header, as a hardcoded constant.
32 | ///
33 | /// In a valid Zarc file, the first 12 bytes will match exactly.
34 | #[rustfmt::skip]
35 | pub const FILE_MAGIC: [u8; 12] = [
36 | 	0x50, 0x2A, 0x4D, 0x18, // zstd skippable frame
37 | 	0x04, 0x00, 0x00, 0x00, // payload size = 4 bytes
38 | 	0x65, 0xAA, 0xDC, // zarc magic
39 | 	ZARC_VERSION, // zarc version
40 | ];
41 | 


--------------------------------------------------------------------------------
/crates/zarc/src/integrity.rs:
--------------------------------------------------------------------------------
  1 | //! Types supporting file integrity (checksums).
  2 | 
  3 | use deku::prelude::*;
  4 | use minicbor::{data::Type, Decode, Decoder, Encode, Encoder};
  5 | 
  6 | /// Digest newtype.
  7 | ///
  8 | /// This is a wrapper around a byte vector, which is the actual digest.
  9 | ///
 10 | /// Currently only BLAKE3 is supported, but this type is designed to be generic over algorithms.
 11 | ///
 12 | /// The `PartialEq` and `Eq` implementations are constant-time.
 13 | #[allow(clippy::derived_hash_with_manual_eq)]
 14 | #[derive(Clone, Debug, Eq, Hash, DekuWrite)]
 15 | pub struct Digest(pub Vec<u8>);
 16 | 
 17 | impl PartialEq for Digest {
 18 | 	fn eq(&self, other: &Self) -> bool {
 19 | 		use subtle::ConstantTimeEq;
 20 | 		self.0.ct_eq(&other.0).into()
 21 | 	}
 22 | }
 23 | 
 24 | impl std::ops::Deref for Digest {
 25 | 	type Target = Vec<u8>;
 26 | 
 27 | 	fn deref(&self) -> &Self::Target {
 28 | 		&self.0
 29 | 	}
 30 | }
 31 | 
 32 | impl From<Vec<u8>> for Digest {
 33 | 	fn from(bytes: Vec<u8>) -> Self {
 34 | 		Self(bytes)
 35 | 	}
 36 | }
 37 | 
 38 | impl<'a, Ctx> DekuReader<'a, Ctx> for Digest
 39 | where
 40 | 	Vec<u8>: DekuReader<'a, Ctx>,
 41 | 	Ctx: Copy,
 42 | {
 43 | 	fn from_reader_with_ctx<R: deku::no_std_io::Read>(
 44 | 		reader: &mut deku::reader::Reader<'_, R>,
 45 | 		ctx: Ctx,
 46 | 	) -> Result<Self, DekuError>
 47 | 	where
 48 | 		Self: Sized,
 49 | 	{
 50 | 		Vec::<u8>::from_reader_with_ctx(reader, ctx).map(Self)
 51 | 	}
 52 | }
 53 | 
 54 | impl<C> Encode<C> for Digest {
 55 | 	fn encode<W: minicbor::encode::write::Write>(
 56 | 		&self,
 57 | 		e: &mut Encoder<W>,
 58 | 		_ctx: &mut C,
 59 | 	) -> Result<(), minicbor::encode::Error<W::Error>> {
 60 | 		e.bytes(&self.0).map(drop)
 61 | 	}
 62 | }
 63 | 
 64 | impl<'b, C> Decode<'b, C> for Digest {
 65 | 	fn decode(d: &mut Decoder<'b>, _ctx: &mut C) -> Result<Self, minicbor::decode::Error> {
 66 | 		match d.datatype()? {
 67 | 			Type::Bytes => d.bytes().map(|b| Self(b.into())),
 68 | 			Type::BytesIndef => Ok(Self(d.bytes_iter()?.try_fold(
 69 | 				Vec::new(),
 70 | 				|mut vec, b| {
 71 | 					b.map(|b| {
 72 | 						vec.extend(b);
 73 | 						vec
 74 | 					})
 75 | 				},
 76 | 			)?)),
 77 | 			ty => Err(minicbor::decode::Error::type_mismatch(ty)),
 78 | 		}
 79 | 	}
 80 | }
 81 | 
 82 | impl From<blake3::Hash> for Digest {
 83 | 	fn from(value: blake3::Hash) -> Self {
 84 | 		Self(value.as_bytes().to_vec())
 85 | 	}
 86 | }
 87 | 
 88 | /// Available digest algorithms.
 89 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Encode, Decode, DekuRead, DekuWrite)]
 90 | #[deku(endian = "endian", type = "u8", ctx = "endian: deku::ctx::Endian")]
 91 | #[cbor(index_only)]
 92 | pub enum DigestType {
 93 | 	/// BLAKE3 hash function.
 94 | 	#[n(1)]
 95 | 	Blake3 = 1,
 96 | }
 97 | 
 98 | impl DigestType {
 99 | 	/// Length in bytes of a digest of this type.
100 | 	pub const fn digest_len(self) -> usize {
101 | 		match self {
102 | 			Self::Blake3 => blake3::OUT_LEN,
103 | 		}
104 | 	}
105 | 
106 | 	/// Verify that a block of data matches the given digest.
107 | 	pub fn verify_data(self, expected: &Digest, data: &[u8]) -> bool {
108 | 		match self {
109 | 			Self::Blake3 => {
110 | 				let actual = blake3::hash(data);
111 | 				let Ok(expected_bytes) = expected.as_slice().try_into() else {
112 | 					return false;
113 | 				};
114 | 				blake3::Hash::from_bytes(expected_bytes) == actual
115 | 			}
116 | 		}
117 | 	}
118 | }
119 | 


--------------------------------------------------------------------------------
/crates/zarc/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Zarc: Archive format based on Zstd.
 2 | //!
 3 | //! [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md)
 4 | //!
 5 | //! TBD
 6 | 
 7 | #![warn(clippy::unwrap_used, missing_docs)]
 8 | #![deny(rust_2018_idioms)]
 9 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
10 | 
11 | #[doc(inline)]
12 | pub use self::constants::*;
13 | mod constants;
14 | 
15 | pub mod decode;
16 | pub mod directory;
17 | pub mod encode;
18 | pub mod header;
19 | pub mod integrity;
20 | #[cfg(feature = "metadata")]
21 | pub mod metadata;
22 | pub mod ondemand;
23 | #[cfg(unix)]
24 | pub mod owner_cache;
25 | pub mod trailer;
26 | 
27 | pub(crate) fn map_zstd_error(code: usize) -> std::io::Error {
28 | 	let msg = zstd_safe::get_error_name(code);
29 | 	std::io::Error::other(msg)
30 | }
31 | 


--------------------------------------------------------------------------------
/crates/zarc/src/metadata.rs:
--------------------------------------------------------------------------------
1 | //! Helpers to read/write metadata for the Filemap.
2 | 
3 | pub mod decode;
4 | pub mod encode;
5 | 


--------------------------------------------------------------------------------
/crates/zarc/src/metadata/decode.rs:
--------------------------------------------------------------------------------
  1 | //! Helpers to write file metadata when decoding [`File`](directory::File)s.
  2 | 
  3 | use std::fs::{File as FsFile, FileTimes, Permissions};
  4 | 
  5 | use tracing::instrument;
  6 | 
  7 | use crate::directory::{File, Timestamps};
  8 | 
  9 | /// Set the timestamps of the file.
 10 | #[instrument(level = "trace")]
 11 | pub fn set_timestamps(file: &FsFile, ts: &Timestamps) -> std::io::Result<()> {
 12 | 	// On Windows, creation date is supported by std.
 13 | 	// On Linux, birthtime can't be set.
 14 | 	// On Apple/BSD, it should be able to:
 15 | 	// https://github.com/ronomon/utimes/blob/master/binding.cc
 16 | 	// but `nix` doesn't have setattrlist
 17 | 
 18 | 	file.set_times(ts.into())
 19 | }
 20 | 
 21 | impl From<&Timestamps> for FileTimes {
 22 | 	fn from(ts: &Timestamps) -> Self {
 23 | 		let mut ft = Self::new();
 24 | 		if let Some(accessed) = ts.accessed {
 25 | 			ft = ft.set_accessed(accessed.into());
 26 | 		}
 27 | 		if let Some(modified) = ts.modified {
 28 | 			ft = ft.set_modified(modified.into());
 29 | 		}
 30 | 		#[cfg(windows)]
 31 | 		if let Some(created) = ts.created {
 32 | 			use std::os::windows::fs::FileTimesExt;
 33 | 			ft = ft.set_created(created.into());
 34 | 		}
 35 | 
 36 | 		ft
 37 | 	}
 38 | }
 39 | 
 40 | /// Set the permissions of a file.
 41 | ///
 42 | /// This uses `readonly` from attributes on Windows, `mode` if present on unix, and finally
 43 | /// `readonly` on unix if `mode` wasn't there.
 44 | #[instrument(level = "trace")]
 45 | pub fn set_permissions(permissions: &mut Permissions, meta: &File) -> std::io::Result<()> {
 46 | 	let readonly = meta.attributes.as_ref().and_then(|attrs| {
 47 | 		attrs
 48 | 			.get("read-only")
 49 | 			.or_else(|| attrs.get("win32.read-only"))
 50 | 			.and_then(|v| v.as_bool())
 51 | 	});
 52 | 
 53 | 	#[cfg(windows)]
 54 | 	{
 55 | 		if let Some(readonly) = readonly {
 56 | 			permissions.set_readonly(readonly)
 57 | 		}
 58 | 	}
 59 | 
 60 | 	#[cfg(unix)]
 61 | 	{
 62 | 		use std::os::unix::fs::PermissionsExt;
 63 | 		if let Some(mode) = meta.mode {
 64 | 			permissions.set_mode(mode);
 65 | 		} else if let Some(readonly) = readonly {
 66 | 			permissions.set_readonly(readonly);
 67 | 		}
 68 | 	}
 69 | 
 70 | 	Ok(())
 71 | }
 72 | 
 73 | /// Set the ownership of a file.
 74 | ///
 75 | /// This uses `owner` and `group` if present, otherwise it does nothing.
 76 | ///
 77 | /// On non-Unix systems, this does nothing.
 78 | #[instrument(level = "trace")]
 79 | pub fn set_ownership(file: &FsFile, meta: &File) -> std::io::Result<()> {
 80 | 	#[cfg(unix)]
 81 | 	{
 82 | 		use std::os::fd::AsRawFd;
 83 | 
 84 | 		let uid = meta
 85 | 			.user
 86 | 			.as_ref()
 87 | 			.map(|user| user.to_real_uid())
 88 | 			.transpose()?
 89 | 			.flatten();
 90 | 
 91 | 		let gid = meta
 92 | 			.group
 93 | 			.as_ref()
 94 | 			.map(|group| group.to_real_gid())
 95 | 			.transpose()?
 96 | 			.flatten();
 97 | 
 98 | 		let fd = file.as_raw_fd();
 99 | 		tracing::trace!(%fd, ?uid, ?gid, "setting ownership");
100 | 		nix::unistd::fchown(fd, uid, gid)?;
101 | 	}
102 | 
103 | 	Ok(())
104 | }
105 | 


--------------------------------------------------------------------------------
/crates/zarc/src/metadata/encode.rs:
--------------------------------------------------------------------------------
  1 | //! Helpers to read file metadata to encode [`File`]s.
  2 | 
  3 | use std::{
  4 | 	collections::HashMap,
  5 | 	fs::{self, Metadata},
  6 | 	io::Result,
  7 | 	num::NonZeroU16,
  8 | 	path::Path,
  9 | };
 10 | 
 11 | use tracing::{instrument, trace, warn};
 12 | 
 13 | use crate::directory::{
 14 | 	AttributeValue, File, Pathname, PosixOwner, SpecialFile, SpecialFileKind, Timestamp, Timestamps,
 15 | };
 16 | 
 17 | /// Build a [`FilemapEntry`] from a filename.
 18 | ///
 19 | /// Doesn't set the digest: you need to do that manually afterwards.
 20 | ///
 21 | /// Try using [`Decoder::build_file_with_metadata`] instead.
 22 | ///
 23 | /// This will perform syscalls; these are logged at trace level. Some errors are ignored. To get
 24 | /// more control you can use the individual functions [in this module](self).
 25 | ///
 26 | /// [`readdir(3)`]: https://man.archlinux.org/man/readdir.3
 27 | #[instrument(level = "trace")]
 28 | pub fn build_filemap(edition: NonZeroU16, path: &Path, follow_links: bool) -> Result<File> {
 29 | 	let name = Pathname::from_normal_components(path);
 30 | 
 31 | 	trace!("reading immediate metadata");
 32 | 	let symeta = fs::symlink_metadata(path)?;
 33 | 	let is_symlink = symeta.is_symlink();
 34 | 
 35 | 	let link_target = if is_symlink {
 36 | 		trace!("reading link target");
 37 | 		Some(fs::read_link(path)?)
 38 | 	} else {
 39 | 		None
 40 | 	};
 41 | 
 42 | 	let meta = if follow_links && is_symlink {
 43 | 		trace!("reading metadata");
 44 | 		fs::metadata(path)?
 45 | 	} else {
 46 | 		symeta
 47 | 	};
 48 | 	trace!(?name, ?meta, "retrieved file metadata");
 49 | 
 50 | 	let file_type = meta.file_type();
 51 | 
 52 | 	Ok(File {
 53 | 		edition,
 54 | 		digest: None,
 55 | 		name,
 56 | 		user: owner_user(&meta)
 57 | 			.map_err(|err| warn!(%err, "can't resolve user"))
 58 | 			.unwrap_or_default(),
 59 | 		group: owner_group(&meta)
 60 | 			.map_err(|err| warn!(%err, "can't resolve group"))
 61 | 			.unwrap_or_default(),
 62 | 		mode: posix_mode(&meta),
 63 | 		special: if file_type.is_dir() {
 64 | 			Some(SpecialFile {
 65 | 				kind: Some(SpecialFileKind::Directory),
 66 | 				link_target: None,
 67 | 			})
 68 | 		} else if is_symlink {
 69 | 			Some(SpecialFile {
 70 | 				kind: Some(SpecialFileKind::Symlink),
 71 | 				link_target: link_target.map(|path| path.as_path().into()),
 72 | 			})
 73 | 		} else {
 74 | 			None
 75 | 		},
 76 | 		timestamps: Some(timestamps(&meta)),
 77 | 		attributes: file_attributes(path, &meta)
 78 | 			.map_err(|err| warn!(%err, "can't resolve attributes"))
 79 | 			.unwrap_or_default(),
 80 | 		extended_attributes: file_extended_attributes(path)
 81 | 			.map_err(|err| warn!(%err, "can't resolve extended attributes"))
 82 | 			.unwrap_or_default(),
 83 | 		user_metadata: None,
 84 | 	})
 85 | }
 86 | 
 87 | /// Get the timestamps of the file.
 88 | #[instrument(level = "trace")]
 89 | pub fn timestamps(meta: &Metadata) -> Timestamps {
 90 | 	Timestamps {
 91 | 		created: meta.created().map(Timestamp::from).ok(),
 92 | 		modified: meta.modified().map(Timestamp::from).ok(),
 93 | 		accessed: meta.accessed().map(Timestamp::from).ok(),
 94 | 	}
 95 | }
 96 | 
 97 | /// Get the owning user of the file.
 98 | ///
 99 | /// On non-unix, always returns `Ok(None)`.
100 | #[instrument(level = "trace")]
101 | pub fn owner_user(meta: &Metadata) -> Result<Option<PosixOwner>> {
102 | 	#[cfg(unix)]
103 | 	{
104 | 		use std::os::unix::fs::MetadataExt;
105 | 		PosixOwner::from_uid(meta.uid())
106 | 	}
107 | 
108 | 	#[cfg(not(unix))]
109 | 	{
110 | 		Ok(None)
111 | 	}
112 | }
113 | 
114 | /// Get the owning group of the file.
115 | ///
116 | /// On non-unix, always returns `None`.
117 | #[instrument(level = "trace")]
118 | pub fn owner_group(meta: &Metadata) -> Result<Option<PosixOwner>> {
119 | 	#[cfg(unix)]
120 | 	{
121 | 		use std::os::unix::fs::MetadataExt;
122 | 		PosixOwner::from_gid(meta.gid())
123 | 	}
124 | 
125 | 	#[cfg(not(unix))]
126 | 	{
127 | 		Ok(None)
128 | 	}
129 | }
130 | 
131 | /// Get the mode of the file.
132 | ///
133 | /// On non-unix, always returns `None`.
134 | #[instrument(level = "trace")]
135 | pub fn posix_mode(meta: &Metadata) -> Option<u32> {
136 | 	#[cfg(unix)]
137 | 	{
138 | 		use std::os::unix::fs::MetadataExt;
139 | 		Some(meta.mode())
140 | 	}
141 | 
142 | 	#[cfg(not(unix))]
143 | 	{
144 | 		None
145 | 	}
146 | }
147 | 
148 | /// Get attributes for a file, given its path and [`Metadata`].
149 | ///
150 | /// Returns `Ok(None)` on unsupported systems.
151 | ///
152 | /// ## Linux
153 | ///
154 | /// Translates present [`lsattr`/`chattr`][chattr] flags to boolean true at string keys,
155 | /// prefixed by `linux.`. Some flags are not translated; this list is exhaustive:
156 | ///
157 | /// - `append-only` for `APPEND` or [the `a` flag](https://man.archlinux.org/man/chattr.1#a)
158 | /// - `casefold` for `CASEFOLD` or [the `F` flag](https://man.archlinux.org/man/chattr.1#F)
159 | /// - `compressed` for `COMPR` or [the `c` flag](https://man.archlinux.org/man/chattr.1#c)
160 | /// - `delete-undo` for `UNRM` or [the `u` flag](https://man.archlinux.org/man/chattr.1#u)
161 | /// - `delete-zero` for `SECRM` or [the `s` flag](https://man.archlinux.org/man/chattr.1#s)
162 | /// - `dir-sync` for `DIRSYNC` or [the `D` flag](https://man.archlinux.org/man/chattr.1#D)
163 | /// - `encrypted` for `ENCRYPT` or [the `E` flag](https://man.archlinux.org/man/chattr.1#E)
164 | /// - `file-sync` for `SYNC` or [the `S` flag](https://man.archlinux.org/man/chattr.1#S)
165 | /// - `immutable` for `IMMUTABLE` or [the `i` flag](https://man.archlinux.org/man/chattr.1#i)
166 | /// - `no-atime` for `NOATIME` or [the `A` flag](https://man.archlinux.org/man/chattr.1#A)
167 | /// - `no-backup` for `NODUMP` or [the `d` flag](https://man.archlinux.org/man/chattr.1#d)
168 | /// - `no-cow` for `NOCOW` or [the `C` flag](https://man.archlinux.org/man/chattr.1#C)
169 | /// - `not-compressed` for `NOCOMPR` or [the `m` flag](https://man.archlinux.org/man/chattr.1#m)
170 | ///
171 | /// ## MacOS, iOS, FreeBSD, NetBSD
172 | ///
173 | /// Translates present [`chflags`][chflags] flags to boolean true at string keys,
174 | /// prefixed by `bsd.`. Some flags are not translated; this list is exhaustive:
175 | ///
176 | /// - `append-only` for `SF_APPEND` or `UF_APPEND`
177 | /// - `archived` for `SF_ARCHIVED`
178 | /// - `immutable` for `SF_IMMUTABLE` or `UF_IMMUTABLE`
179 | /// - `no-backup` for `UF_NODUMP`
180 | ///
181 | /// ## Windows
182 | ///
183 | /// Translates present [`FILE_ATTRIBUTE_*`][win32-file-attrs] flags to boolean true at string keys,
184 | /// prefixed by `win32.`. Some flags are not translated; this list is exhaustive:
185 | ///
186 | /// - `archived` for `FILE_ATTRIBUTE_ARCHIVE`
187 | /// - `compressed` for `FILE_ATTRIBUTE_COMPRESSED`
188 | /// - `encrypted` for `FILE_ATTRIBUTE_ENCRYPTED`
189 | /// - `hidden` for `FILE_ATTRIBUTE_HIDDEN`
190 | /// - `not-content-indexed` for `FILE_ATTRIBUTE_NOT_CONTENT_INDEXED` (opts the file out of content
191 | ///   indexing from Windows' crawlers, e.g. for the search functionality in Explorer and Start)
192 | /// - `system` for `FILE_ATTRIBUTE_SYSTEM`
193 | /// - `temporary` for `FILE_ATTRIBUTE_TEMPORARY`
194 | ///
195 | /// ## Common
196 | ///
197 | /// If these flags are present in any of the platforms that support them, they will also be present
198 | /// as unprefixed keys:
199 | ///
200 | /// - `append-only`
201 | /// - `compressed`
202 | /// - `immutable`
203 | ///
204 | /// If the file is read-only, this unprefixed flag will be present:
205 | ///
206 | /// - `read-only`
207 | ///
208 | /// [chattr]: https://man.archlinux.org/man/chattr.1
209 | /// [chflags]: https://man.freebsd.org/cgi/man.cgi?query=chflags&sektion=1&apropos=0&manpath=FreeBSD+14.0-RELEASE+and+Ports
210 | /// [win32-file-attrs]: https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
211 | #[instrument(level = "trace")]
212 | pub fn file_attributes(
213 | 	path: &Path,
214 | 	meta: &Metadata,
215 | ) -> Result<Option<HashMap<String, AttributeValue>>> {
216 | 	let mut attrs = HashMap::new();
217 | 	#[cfg(target_os = "linux")]
218 | 	{
219 | 		use e2p_fileflags::{FileFlags, Flags};
220 | 		let flags = path.flags()?;
221 | 		attrs.extend(
222 | 			[
223 | 				("append-only", flags.contains(Flags::APPEND)),
224 | 				("casefold", flags.contains(Flags::CASEFOLD)),
225 | 				("compressed", flags.contains(Flags::COMPR)),
226 | 				("delete-undo", flags.contains(Flags::UNRM)),
227 | 				("delete-zero", flags.contains(Flags::SECRM)),
228 | 				("dir-sync", flags.contains(Flags::DIRSYNC)),
229 | 				("encrypted", flags.contains(Flags::ENCRYPT)),
230 | 				("file-sync", flags.contains(Flags::SYNC)),
231 | 				("immutable", flags.contains(Flags::IMMUTABLE)),
232 | 				("no-atime", flags.contains(Flags::NOATIME)),
233 | 				("no-backup", flags.contains(Flags::NODUMP)),
234 | 				("no-cow", flags.contains(Flags::NOCOW)),
235 | 				("not-compressed", flags.contains(Flags::NOCOMPR)),
236 | 			]
237 | 			.into_iter()
238 | 			.filter(|(_, v)| *v)
239 | 			.map(|(k, _)| (format!("linux.{k}"), AttributeValue::Boolean(true))),
240 | 		);
241 | 	}
242 | 
243 | 	#[cfg(any(
244 | 		target_os = "macos",
245 | 		target_os = "ios",
246 | 		target_os = "freebsd",
247 | 		target_os = "netbsd"
248 | 	))]
249 | 	{
250 | 		use nix::sys::stat::{stat, FileFlag};
251 | 
252 | 		let flags = FileFlag::from_bits_retain(stat(path)?.st_flags);
253 | 		attrs.extend(
254 | 			[
255 | 				(
256 | 					"append-only",
257 | 					flags.contains(FileFlag::SF_APPEND) || flags.contains(FileFlag::UF_APPEND),
258 | 				),
259 | 				("archived", flags.contains(FileFlag::SF_ARCHIVED)),
260 | 				(
261 | 					"immutable",
262 | 					flags.contains(FileFlag::SF_IMMUTABLE)
263 | 						|| flags.contains(FileFlag::UF_IMMUTABLE),
264 | 				),
265 | 				("no-backup", flags.contains(FileFlag::UF_NODUMP)),
266 | 			]
267 | 			.into_iter()
268 | 			.filter(|(_, v)| *v)
269 | 			.map(|(k, _)| (format!("bsd.{k}"), AttributeValue::Boolean(true))),
270 | 		);
271 | 	}
272 | 
273 | 	#[cfg(windows)]
274 | 	{
275 | 		use std::os::windows::fs::MetadataExt;
276 | 		use windows::Win32::Storage::FileSystem;
277 | 
278 | 		let flags = FileSystem::FILE_FLAGS_AND_ATTRIBUTES(meta.file_attributes());
279 | 
280 | 		attrs.extend(
281 | 			[
282 | 				(
283 | 					"archived",
284 | 					flags.contains(FileSystem::FILE_ATTRIBUTE_ARCHIVE),
285 | 				),
286 | 				(
287 | 					"compressed",
288 | 					flags.contains(FileSystem::FILE_ATTRIBUTE_COMPRESSED),
289 | 				),
290 | 				(
291 | 					"encrypted",
292 | 					flags.contains(FileSystem::FILE_ATTRIBUTE_ENCRYPTED),
293 | 				),
294 | 				("hidden", flags.contains(FileSystem::FILE_ATTRIBUTE_HIDDEN)),
295 | 				(
296 | 					"not-content-indexed",
297 | 					flags.contains(FileSystem::FILE_ATTRIBUTE_NOT_CONTENT_INDEXED),
298 | 				),
299 | 				("system", flags.contains(FileSystem::FILE_ATTRIBUTE_SYSTEM)),
300 | 				(
301 | 					"temporary",
302 | 					flags.contains(FileSystem::FILE_ATTRIBUTE_TEMPORARY),
303 | 				),
304 | 			]
305 | 			.into_iter()
306 | 			.filter(|(_, v)| *v)
307 | 			.map(|(k, _)| (format!("win32.{k}"), AttributeValue::Boolean(true))),
308 | 		);
309 | 	}
310 | 
311 | 	if attrs.is_empty() {
312 | 		Ok(None)
313 | 	} else {
314 | 		if attrs.contains_key("linux.append-only") || attrs.contains_key("bsd.append-only") {
315 | 			attrs.insert("append-only".to_string(), AttributeValue::Boolean(true));
316 | 		}
317 | 		if attrs.contains_key("linux.immutable") || attrs.contains_key("bsd.immutable") {
318 | 			attrs.insert("immutable".to_string(), AttributeValue::Boolean(true));
319 | 		}
320 | 		if attrs.contains_key("linux.compressed") || attrs.contains_key("win32.compressed") {
321 | 			attrs.insert("compressed".to_string(), AttributeValue::Boolean(true));
322 | 		}
323 | 		if meta.permissions().readonly() {
324 | 			attrs.insert("read-only".to_string(), AttributeValue::Boolean(true));
325 | 		}
326 | 
327 | 		Ok(Some(attrs))
328 | 	}
329 | }
330 | 
331 | /// Get extended attributes for a file, given its path.
332 | ///
333 | /// Returns `Ok(None)` on unsupported systems.
334 | ///
335 | /// Supported:
336 | /// - Android
337 | /// - FreeBSD
338 | /// - Linux
339 | /// - MacOS
340 | /// - NetBSD
341 | ///
342 | #[instrument(level = "trace")]
343 | pub fn file_extended_attributes(path: &Path) -> Result<Option<HashMap<String, AttributeValue>>> {
344 | 	#[cfg(unix)]
345 | 	{
346 | 		if xattr::SUPPORTED_PLATFORM {
347 | 			let list = xattr::list(path)?;
348 | 			let size_hint = list.size_hint();
349 | 			let mut map = HashMap::with_capacity(size_hint.1.unwrap_or(size_hint.0));
350 | 			for osname in list {
351 | 				match osname.to_str() {
352 | 					None => tracing::error!(?osname, ?path, "not storing non-Unicode xattr"),
353 | 					Some(name) => {
354 | 						if let Some(value) = xattr::get(path, &osname)? {
355 | 							map.insert(
356 | 								name.to_string(),
357 | 								crate::directory::CborString::from_maybe_utf8(value).into(),
358 | 							);
359 | 						}
360 | 					}
361 | 				}
362 | 			}
363 | 
364 | 			Ok(Some(map))
365 | 		} else {
366 | 			Ok(None)
367 | 		}
368 | 	}
369 | 
370 | 	#[cfg(not(unix))]
371 | 	Ok(None)
372 | }
373 | 


--------------------------------------------------------------------------------
/crates/zarc/src/ondemand.rs:
--------------------------------------------------------------------------------
 1 | //! On-demand reader+seek trait and implementations.
 2 | //!
 3 | //! This is a trait that allows for obtaining multiple reader+seeker instances from a single byte
 4 | //! source. Zarc uses it to allow for reading from multiple places in the source at the same time.
 5 | //!
 6 | //! This is implemented for files ([`Path`] and [`PathBuf`]) in this crate.
 7 | 
 8 | use std::{
 9 | 	fs::File,
10 | 	io::{Read, Result, Seek},
11 | 	path::{Path, PathBuf},
12 | };
13 | 
14 | /// On-demand independent readers for a byte source.
15 | pub trait OnDemand {
16 | 	/// The output reader type.
17 | 	type Reader: Read + Seek;
18 | 
19 | 	/// Open an independent reader for this byte source.
20 | 	fn open(&self) -> Result<Self::Reader>;
21 | }
22 | 
23 | impl OnDemand for &Path {
24 | 	type Reader = File;
25 | 
26 | 	fn open(&self) -> Result<Self::Reader> {
27 | 		File::open(self)
28 | 	}
29 | }
30 | 
31 | impl OnDemand for PathBuf {
32 | 	type Reader = File;
33 | 
34 | 	fn open(&self) -> Result<Self::Reader> {
35 | 		File::open(self)
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/crates/zarc/src/owner_cache.rs:
--------------------------------------------------------------------------------
 1 | //! Caching lookup for user and group names.
 2 | //!
 3 | //! Looking up user and group names is very slow! In testing, this could often account for over 90%
 4 | //! of the time spent in `zarc` when creating a new archive, and similarly when unpacking. To speed
 5 | //! this up, we cache the results of these lookups at runtime, with the assumption that id/name
 6 | //! mappings for users and groups won't change during an invocation of the program.
 7 | 
 8 | use std::collections::HashMap;
 9 | 
10 | use nix::unistd::{Gid, Group, Uid, User};
11 | 
12 | /// A cache of user and group info.
13 | #[derive(Clone, Debug, Default)]
14 | pub struct OwnerCache {
15 | 	users: HashMap<Uid, User>,
16 | 	groups: HashMap<Gid, Group>,
17 | 	uid_by_name: HashMap<String, Uid>,
18 | 	gid_by_name: HashMap<String, Gid>,
19 | }
20 | 
21 | impl OwnerCache {
22 | 	/// Get a user from a UID, from cache or the system.
23 | 	pub fn user_from_uid(&mut self, uid: Uid) -> std::io::Result<Option<User>> {
24 | 		if let Some(user) = self.users.get(&uid) {
25 | 			return Ok(Some(user.clone()));
26 | 		}
27 | 
28 | 		let user = User::from_uid(uid)?;
29 | 		if let Some(user) = user.as_ref() {
30 | 			self.users.insert(uid, user.clone());
31 | 			self.uid_by_name.insert(user.name.to_owned(), user.uid);
32 | 		}
33 | 		Ok(user)
34 | 	}
35 | 
36 | 	/// Get a group from a GID, from cache or the system.
37 | 	pub fn group_from_gid(&mut self, gid: Gid) -> std::io::Result<Option<Group>> {
38 | 		if let Some(group) = self.groups.get(&gid) {
39 | 			return Ok(Some(group.clone()));
40 | 		}
41 | 
42 | 		let group = Group::from_gid(gid)?;
43 | 		if let Some(group) = group.as_ref() {
44 | 			self.groups.insert(gid, group.clone());
45 | 			self.gid_by_name.insert(group.name.to_owned(), group.gid);
46 | 		}
47 | 		Ok(group)
48 | 	}
49 | 
50 | 	/// Get a user from a name, from cache or the system.
51 | 	pub fn user_from_name(&mut self, name: &str) -> std::io::Result<Option<User>> {
52 | 		if let Some(uid) = self.uid_by_name.get(name) {
53 | 			return self.user_from_uid(*uid);
54 | 		}
55 | 
56 | 		let user = User::from_name(name)?;
57 | 		if let Some(user) = user.as_ref() {
58 | 			self.users.insert(user.uid, user.clone());
59 | 			self.uid_by_name.insert(name.to_owned(), user.uid);
60 | 		}
61 | 		Ok(user)
62 | 	}
63 | 
64 | 	/// Get a group from a UID, from cache or the system.
65 | 	pub fn group_from_name(&mut self, name: &str) -> std::io::Result<Option<Group>> {
66 | 		if let Some(gid) = self.gid_by_name.get(name) {
67 | 			return self.group_from_gid(*gid);
68 | 		}
69 | 
70 | 		let group = Group::from_name(name)?;
71 | 		if let Some(group) = group.as_ref() {
72 | 			self.groups.insert(group.gid, group.clone());
73 | 			self.gid_by_name.insert(name.to_owned(), group.gid);
74 | 		}
75 | 		Ok(group)
76 | 	}
77 | }
78 | 


--------------------------------------------------------------------------------
/crates/zarc/src/trailer.rs:
--------------------------------------------------------------------------------
  1 | //! Zarc Trailer structure
  2 | //!
  3 | //! This is the last part of a Zarc archive, and contains the critical metadata of the archive.
  4 | //! Where [the header][super::header] is used to identify a Zarc file, this is used to actually
  5 | //! decode it.
  6 | //!
  7 | //! The peculiarity of the trailer is that it's parsed backwards from the end. The digest field is
  8 | //! potentially variable in length, and the only way to know its length is to read one of two bytes
  9 | //! in the trailer, at either sides of that variable field.
 10 | //!
 11 | //! However, reading a file backward is obnoxious and possibly slow, so the way this module works is
 12 | //! with the [`Epilogue`], comprising the last six fields of the trailer, all fixed-size. You should
 13 | //! use [`EPILOGUE_LENGTH`] to seek and read these bytes from the end, parse them, and then use
 14 | //! [`Epilogue::full_length()`] to seek and read the remaining bytes, and finally pass them to
 15 | //! [`Epilogue::complete()`] to obtain a [`Trailer`].
 16 | //!
 17 | //! Additionally, what you probably want to do for performance is to read, for example, a kilobyte
 18 | //! from the end of the file at once, and then be reasonably sure that the whole trailer is in it.
 19 | //!
 20 | //! The trailer also has [`PROLOGUE_LENGTH`] bytes of "prologue", which this library ignores (but
 21 | //! will write correctly). The prologue contains a duplicate of the digest type, and can be used to
 22 | //! read the trailer "forward", if you really want to, though this library provides no support here.
 23 | 
 24 | use deku::prelude::*;
 25 | 
 26 | use super::integrity::{Digest, DigestType};
 27 | 
 28 | /// Zarc Trailer
 29 | ///
 30 | /// [Spec](https://github.com/passcod/zarc/blob/main/SPEC.md#zarc-trailer)
 31 | #[derive(Clone, Debug, Eq, PartialEq)]
 32 | pub struct Trailer {
 33 | 	/// Digest of the directory.
 34 | 	pub digest: Digest,
 35 | 
 36 | 	/// Digest (hash) algorithm.
 37 | 	pub digest_type: DigestType,
 38 | 
 39 | 	/// Offset in bytes to the start of the [Directory][crate::directory]'s Zstandard frame.
 40 | 	pub directory_offset: i64,
 41 | 
 42 | 	/// Uncompressed size in bytes of the directory.
 43 | 	pub directory_uncompressed_size: u64,
 44 | 
 45 | 	/// Zarc format version number.
 46 | 	///
 47 | 	/// Should match [`ZARC_VERSION`][crate::ZARC_VERSION].
 48 | 	pub version: u8,
 49 | }
 50 | 
 51 | impl Trailer {
 52 | 	/// Write the trailer to a writer.
 53 | 	pub fn to_writer<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
 54 | 		// reserved field and duplicated digest type
 55 | 		writer.write_all(&[0, self.digest_type as u8])?;
 56 | 
 57 | 		writer.write_all(&self.digest)?;
 58 | 
 59 | 		let epilogue = Epilogue::from(self)
 60 | 			.to_bytes()
 61 | 			.map_err(std::io::Error::other)?;
 62 | 		writer.write_all(&epilogue)
 63 | 	}
 64 | 
 65 | 	/// Write the trailer to a vector.
 66 | 	pub fn to_bytes(&self) -> Vec<u8> {
 67 | 		let mut bytes = Vec::with_capacity(self.len());
 68 | 		bytes.extend(self.digest.iter());
 69 | 
 70 | 		// UNWRAP: there's no way to construct an epilogue that doesn't serialise
 71 | 		#[allow(clippy::unwrap_used)]
 72 | 		bytes.extend(Epilogue::from(self).to_bytes().unwrap());
 73 | 
 74 | 		bytes
 75 | 	}
 76 | 
 77 | 	/// The full length of the trailer in bytes.
 78 | 	#[allow(clippy::len_without_is_empty)] // CLIPPY: this is not a collection
 79 | 	pub fn len(&self) -> usize {
 80 | 		self.digest.len() + EPILOGUE_LENGTH
 81 | 	}
 82 | 
 83 | 	/// Make the offset positive.
 84 | 	///
 85 | 	/// Having the offset negative is very useful when _writing_ the trailer, but generally a pain
 86 | 	/// when using it to decode the archive, so this method inverts it given the file length.
 87 | 	///
 88 | 	/// Does nothing if the offset is already positive.
 89 | 	///
 90 | 	/// See also [`Epilogue::make_offset_positive()`].
 91 | 	pub fn make_offset_positive(&mut self, file_length: u64) {
 92 | 		if self.directory_offset < 0 {
 93 | 			self.directory_offset += file_length as i64;
 94 | 		}
 95 | 	}
 96 | 
 97 | 	/// Compute the check byte.
 98 | 	pub fn compute_check(&self) -> u8 {
 99 | 		let mut bytes = Vec::with_capacity(self.len());
100 | 		bytes.extend(&[0, self.digest_type as u8]);
101 | 		bytes.extend(self.digest.iter());
102 | 
103 | 		// UNWRAP: there's no way to construct an epilogue that doesn't serialise
104 | 		#[allow(clippy::unwrap_used)]
105 | 		bytes.extend(self.epilogue_without_check().to_bytes().unwrap());
106 | 
107 | 		bytes.iter().fold(0, |check, x| check ^ *x)
108 | 	}
109 | 
110 | 	/// Get the epilogue from this trailer, but set the check byte to 0.
111 | 	fn epilogue_without_check(&self) -> Epilogue {
112 | 		Epilogue {
113 | 			check: 0,
114 | 			digest_type: self.digest_type,
115 | 			directory_offset: self.directory_offset,
116 | 			directory_uncompressed_size: self.directory_uncompressed_size,
117 | 			version: self.version,
118 | 			magic: crate::ZARC_MAGIC.to_vec(),
119 | 		}
120 | 	}
121 | }
122 | 
123 | impl From<&Trailer> for Epilogue {
124 | 	fn from(trailer: &Trailer) -> Self {
125 | 		let mut epilogue = trailer.epilogue_without_check();
126 | 		epilogue.check = trailer.compute_check();
127 | 		epilogue
128 | 	}
129 | }
130 | 
131 | /// Length of the prologue in bytes.
132 | ///
133 | /// This is the wire length, not the size of the struct.
134 | pub const PROLOGUE_LENGTH: usize = 2;
135 | 
136 | /// Length of the epilogue in bytes.
137 | ///
138 | /// This is the wire length, not the size of the struct.
139 | pub const EPILOGUE_LENGTH: usize = 22;
140 | 
141 | /// The last eight fields of the trailer, which are all fixed-size.
142 | #[derive(Clone, Debug, Eq, PartialEq, DekuRead, DekuWrite)]
143 | #[deku(endian = "little")]
144 | pub struct Epilogue {
145 | 	/// Digest (hash) algorithm.
146 | 	pub digest_type: DigestType,
147 | 
148 | 	/// Offset in bytes to the start of the [Directory][crate::directory]'s Zstandard frame.
149 | 	///
150 | 	/// A positive value is from the start of the file, a negative value is from the end.
151 | 	#[deku(bytes = "8")]
152 | 	pub directory_offset: i64,
153 | 
154 | 	/// Uncompressed size in bytes of the directory.
155 | 	#[deku(bytes = "8")]
156 | 	pub directory_uncompressed_size: u64,
157 | 
158 | 	/// Check byte.
159 | 	#[deku(bytes = "1")]
160 | 	pub check: u8,
161 | 
162 | 	/// Zarc format version number.
163 | 	///
164 | 	/// Should match [`ZARC_VERSION`][crate::ZARC_VERSION].
165 | 	#[deku(bytes = "1")]
166 | 	pub version: u8,
167 | 
168 | 	/// Magic number.
169 | 	///
170 | 	/// Should match [`ZARC_MAGIC`][crate::ZARC_MAGIC].
171 | 	#[deku(count = "3")]
172 | 	pub magic: Vec<u8>,
173 | }
174 | 
175 | impl Epilogue {
176 | 	/// The full length of the trailer including the variable fields.
177 | 	pub const fn full_length(&self) -> usize {
178 | 		PROLOGUE_LENGTH + self.digest_type.digest_len() + EPILOGUE_LENGTH
179 | 	}
180 | 
181 | 	/// Reparse the trailer from the full bytes.
182 | 	///
183 | 	/// This copies the bytes it needs.
184 | 	///
185 | 	/// Returns `Err(bytes needed)` if there's not enough data to parse the trailer.
186 | 	/// Passing in too much data is fine, so long as the epilogue is at the end.
187 | 	pub fn complete(&self, all_bytes: &[u8]) -> Result<Trailer, usize> {
188 | 		if all_bytes.len() < self.full_length() {
189 | 			return Err(self.full_length() - all_bytes.len());
190 | 		}
191 | 
192 | 		let head = all_bytes.len() - (self.digest_type.digest_len() + EPILOGUE_LENGTH);
193 | 		let size = self.digest_type.digest_len();
194 | 		let digest = all_bytes[head..(head + size)].to_vec();
195 | 
196 | 		Ok(Trailer {
197 | 			digest: Digest(digest),
198 | 			digest_type: self.digest_type,
199 | 			directory_offset: self.directory_offset,
200 | 			directory_uncompressed_size: self.directory_uncompressed_size,
201 | 			version: self.version,
202 | 		})
203 | 	}
204 | 
205 | 	/// Make the offset positive.
206 | 	///
207 | 	/// Having the offset negative is very useful when _writing_ the trailer, but generally a pain
208 | 	/// when using it to decode the archive, so this method inverts it given the file length.
209 | 	///
210 | 	/// Does nothing if the offset is already positive.
211 | 	///
212 | 	/// See also [`Trailer::make_offset_positive()`].
213 | 	pub fn make_offset_positive(&mut self, file_length: u64) {
214 | 		if self.directory_offset < 0 {
215 | 			self.directory_offset += file_length as i64;
216 | 		}
217 | 	}
218 | }
219 | 


--------------------------------------------------------------------------------
/zarc.magic:
--------------------------------------------------------------------------------
1 | 0    string  \x50\x2A\x4D\x18\x04\x00\x00\x00\x65\xAA\xDC  Zarc archive file
2 | >11  byte    x                                             version %d
3 | 


--------------------------------------------------------------------------------