├── .cargo
    └── config.toml
├── .gitattributes
├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report_bin.md
    │   └── bug_report_lib.md
    ├── SUPPORT.md
    ├── pull_request_template.md
    └── workflows
    │   ├── build.yml
    │   └── test.yml
├── .gitignore
├── .readthedocs.yaml
├── .vscode
    └── settings.json
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE.txt
├── README.md
├── doc
    ├── Makefile
    ├── README.md
    ├── cli_reference.md
    ├── compiling.md
    ├── conf.py
    ├── downloads.md
    ├── export_import.md
    ├── index.md
    ├── install.md
    ├── install_manual.md
    ├── integration.md
    ├── intro_to_cli.md
    ├── make.bat
    ├── requirements.txt
    ├── setup.md
    └── usage_examples.md
├── examples
    ├── README.md
    ├── c-sharp
    │   ├── .gitignore
    │   ├── .vscode
    │   │   └── settings.json
    │   ├── WarcatExample.sln
    │   ├── WarcatExample
    │   │   ├── Decode.cs
    │   │   ├── Encode.cs
    │   │   ├── Message.cs
    │   │   ├── Program.cs
    │   │   └── WarcatExample.csproj
    │   └── example.warc
    ├── decode.rs
    ├── encode.rs
    ├── example.warc
    └── python
    │   ├── decode.py
    │   ├── encode.py
    │   └── message.py
├── misc
    └── release_digests
    │   ├── README.md
    │   ├── keys.toml
    │   ├── v0.1.0.toml
    │   ├── v0.2.0.toml
    │   ├── v0.3.0.toml
    │   ├── v0.3.1.toml
    │   ├── v0.3.2.toml
    │   ├── v0.3.3.toml
    │   └── v0.3.4.toml
├── roadmap.md
├── src
    ├── app.rs
    ├── app
    │   ├── arg.rs
    │   ├── common.rs
    │   ├── dump_help.rs
    │   ├── export.rs
    │   ├── extract.rs
    │   ├── filter.rs
    │   ├── format.rs
    │   ├── get.rs
    │   ├── import.rs
    │   ├── io.rs
    │   ├── list.rs
    │   ├── logging.rs
    │   ├── model.rs
    │   ├── progress.rs
    │   ├── self_.rs
    │   └── verify.rs
    ├── compress.rs
    ├── compress
    │   ├── decode.rs
    │   ├── encode.rs
    │   ├── zstd.rs
    │   └── zstd
    │   │   ├── decode.rs
    │   │   └── encode.rs
    ├── dataseq.rs
    ├── digest.rs
    ├── error.rs
    ├── extract.rs
    ├── fields.rs
    ├── fields
    │   ├── de.rs
    │   └── ser.rs
    ├── header.rs
    ├── header
    │   └── fields.rs
    ├── http.rs
    ├── http
    │   ├── h1.rs
    │   └── h1
    │   │   ├── codec.rs
    │   │   ├── codec
    │   │       ├── chunked.rs
    │   │       └── compress.rs
    │   │   ├── error.rs
    │   │   ├── header.rs
    │   │   ├── header
    │   │       ├── fields.rs
    │   │       └── parse.rs
    │   │   ├── recv.rs
    │   │   └── send.rs
    ├── io.rs
    ├── lib.rs
    ├── main.rs
    ├── parse.rs
    ├── parse
    │   ├── fields.rs
    │   ├── fields_str.rs
    │   ├── header_deliminator.rs
    │   └── warc.rs
    ├── util.rs
    ├── verify.rs
    ├── warc.rs
    └── warc
    │   ├── decode.rs
    │   └── encode.rs
├── tests
    ├── test_decode.rs
    └── warc_generator.rs
└── xtask
    ├── Cargo.toml
    ├── README.md
    └── src
        ├── digest.rs
        ├── dist_license.txt
        ├── dist_readme.txt
        ├── doc.rs
        ├── gh.rs
        ├── license.rs
        ├── main.rs
        └── package.rs


/.cargo/config.toml:
--------------------------------------------------------------------------------
1 | [alias]
2 | xtask = "run --release --package xtask --"
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.warc binary


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | * If you encounter a bug and want to report it, please visit the [*Issues*](https://github.com/chfoo/warcat-rs/issues) page. Try searching if the problem already exists to help avoiding duplicate reports. When reporting bugs, try to fill out as much as the template as possible.
 4 | * If there is something limiting the functionality of the software/library and you have details on greatly improving it, file a feature request in *Issues* page as well.
 5 | * If you want to contribute some bug fixes, documentation, tests, examples, please feel free to submit a Pull Request. If you want to submit a feature and unsure whether it is useful, feel free to file a Issue first.
 6 | * If you need help using Warcat, brainstorming ideas, or want to have a general discussion, please use the [*Discussions*](https://github.com/chfoo/warcat-rs/discussions) page instead. Keeping the Issues page on-topic will help make it organized.
 7 | 
 8 | ## Style guide
 9 | 
10 | * Please configure your IDE to use [Rustfmt](https://github.com/rust-lang/rustfmt). This is the code style formatting used by the project.
11 | * Also configure your IDE to use [Clippy](https://github.com/rust-lang/rust-clippy). This is optional but recommended.
12 |   * Important: CLI code is put under the `bin` feature which is not on by default. (This is a workaround to keep library crate lightweight.) You need to configure your IDE/Clippy to enable the `bin` feature.
13 | * There is an inadvertent use mixed of line endings (CRLF/LF). For old files, please keep them as is for now. For new files, use LF.


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report_bin.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report (application)
 3 | about: Create a bug report for the CLI application
 4 | title: "[✏️Put a short title here, something error when doing something]"
 5 | labels:
 6 | assignees:
 7 | ---
 8 | 
 9 | **Describe the bug**
10 | 
11 | [✏️A clear and concise description of what the bug is.]
12 | 
13 | **To Reproduce**
14 | 
15 | Program arguments:
16 | 
17 | ```
18 | [✏️e.g. warcat extract --input my_warc_file.warc.gz --output workspace/the_data/]
19 | ```
20 | 
21 | Steps to reproduce the behavior:
22 | 
23 | 1. [✏️Remove this list if not applicable.]
24 | 2. ...
25 | 3. ...
26 | 
27 | **Expected behavior**
28 | 
29 | [✏️A clear and concise description of what you expected to happen.]
30 | 
31 | **Screenshots/Logs**
32 | 
33 | [✏️If applicable, attach sample files, screenshots, or log files to help explain your problem. Otherwise, delete this section.]
34 | 
35 | **System**
36 | 
37 |  - OS: [✏️e.g. Windows 11, macOS 15, Ubuntu 24.04]
38 |  - Terminal: [✏️e.g. Windows Console, Windows Terminal, macOS Terminal, GNOME Console, Konsole]
39 |  - Program Version (Check with `--version`): [✏️e.g. 1.0.0.]
40 | 
41 | **Additional context**
42 | 
43 | [✏️Add any other context about the problem here or delete this section.]
44 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report_lib.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report (library)
 3 | about: Create a bug report for the library
 4 | title: "[✏️Put a short title here, something error when calling something]"
 5 | labels:
 6 | assignees:
 7 | ---
 8 | 
 9 | **Describe the bug**
10 | 
11 | [✏️A clear and concise description of what the bug is.]
12 | 
13 | **To Reproduce**
14 | 
15 | ```
16 | [✏️Sample code here that reproduces the behavior or attach a sample program.]
17 | ```
18 | 
19 | **Expected behavior**
20 | 
21 | [✏️A clear and concise description of what you expected to happen.]
22 | 
23 | **Screenshots/Logs**
24 | 
25 | [✏️If applicable, attach screenshots or log files to help explain your problem. Otherwise, delete this section.]
26 | 
27 | **System**
28 | 
29 |  - OS: [✏️e.g. Windows 11, macOS 15, Ubuntu 24.04]
30 |  - Rust Version (Check with `rustc --version`): [✏️e.g. 1.80.0]
31 |  - Crate Version (Check with `cargo tree --package warcat --depth 0`): [✏️e.g. 1.0.0]
32 | 
33 | **Additional context**
34 | 
35 | [✏️Add any other context about the problem here or delete this section. ]
36 | 


--------------------------------------------------------------------------------
/.github/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | * If you need help with using Warcat, you can ask in the [*Discussions*](https://github.com/chfoo/warcat-rs/discussions) page.
 4 | * If you want to file a bug report, use the [*Issues*](https://github.com/chfoo/warcat-rs/issues) page.
 5 | * Please note that I'm not always available to provide help. Check out alternative places to ask questions in the section below.
 6 | 
 7 | ## Alternative forums
 8 | 
 9 | * For chatting about archiving in general, join [#archiveteam-bs](ircs://irc.hackint.org:6697/archiveteam-bs) on Hackint ([details](https://wiki.archiveteam.org/index.php/Archiveteam:IRC)). Note that this is chat room; you may not receive a instant response due to time zones.
10 | * Alternatively on [ArchiveTeam Reddit](https://www.reddit.com/r/Archiveteam/). Note that this is a low traffic forum.
11 | * For help on how to use command line programs or software in general, try searching or asking on [Super User](https://superuser.com/).
12 | * For help on programming software in general, try searching or asking on [Stack Overflow](https://stackoverflow.com/).


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | **Summary:**
 2 | 
 3 | [✏️A description of the changes proposed in the pull request.]
 4 | 
 5 | **Related issues:**
 6 | 
 7 | [✏️Any references to related issues if applicable. Otherwise, delete this section.]
 8 | 
 9 | **Other:**
10 | 
11 | [✏️Comments on whether your PR needs further testing, has working tests, part of a Hacktoberfest, etc. Otherwise, delete this section.]


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and package
 2 | 
 3 | on: workflow_dispatch
 4 | 
 5 | env:
 6 |   CARGO_TERM_COLOR: always
 7 | 
 8 | jobs:
 9 |   build:
10 |     strategy:
11 |       fail-fast: false
12 |       matrix:
13 |         params:
14 |           - os: ubuntu-22.04
15 |             target: x86_64-unknown-linux-musl
16 |             apt-install: musl-tools
17 |             use-cross-rs: false
18 |           - os: ubuntu-22.04
19 |             target: aarch64-unknown-linux-musl
20 |             apt-install: musl-tools
21 |             use-cross-rs: true
22 |           - os: windows-2022
23 |             target: x86_64-pc-windows-msvc
24 |             use-cross-rs: false
25 |           - os: windows-2022
26 |             target: aarch64-pc-windows-msvc
27 |             use-cross-rs: false
28 |           - os: macos-14
29 |             target: x86_64-apple-darwin
30 |             use-cross-rs: false
31 |           - os: macos-14
32 |             target: aarch64-apple-darwin
33 |             use-cross-rs: false
34 | 
35 |     runs-on: ${{ matrix.params.os }}
36 |     steps:
37 |     - uses: kaven-universe/github-action-current-date-time@v1.4.0
38 |       name: Current date time
39 |       id: datetime
40 |       with:
41 |         format: YYYYMMDD_HHmmss
42 |     - uses: imesense/gha-echo-action@v0.2
43 |       name: Debug info
44 |       with:
45 |         input-string: |
46 |           OS: ${{ matrix.params.os }}
47 |           Target: ${{ matrix.params.target }}
48 |           Date: ${{ steps.datetime.outputs.time }}
49 |     - name: Install packages (apt)
50 |       if: ${{ matrix.params.apt-install }}
51 |       run: sudo apt-get -y install ${{ matrix.params.apt-install }}
52 |     - uses: actions/checkout@v4
53 |     - uses: Swatinem/rust-cache@v2
54 |       with:
55 |         key: ${{ matrix.params.os }}.${{ matrix.params.target }}
56 |     - name: Install target
57 |       run: rustup target add ${{ matrix.params.target }}
58 |     - name: Run release build
59 |       if: ${{ !matrix.params.use-cross-rs }}
60 |       run: cargo build --features=bin --release --verbose --target ${{ matrix.params.target }}
61 |     - name: Run cross release build
62 |       if: ${{ matrix.params.use-cross-rs }}
63 |       uses: houseabsolute/actions-rust-cross@v1.0.4
64 |       with:
65 |         command: build
66 |         args: "--features=bin --release --verbose"
67 |         target: ${{ matrix.params.target }}
68 |         cross-version: 51f46f296253d8122c927c5bb933e3c4f27cc317
69 |     - name: Package binary
70 |       run: cargo xtask package-bin ${{ matrix.params.target }}
71 |     - uses: actions/upload-artifact@v4
72 |       name: Save artifact
73 |       with:
74 |         name: artifact.${{ matrix.params.target }}.${{ steps.datetime.outputs.time }}
75 |         if-no-files-found: error
76 |         path: |
77 |           target/xtask-package-bin-output/*


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Cargo test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main", "gh" ]
 6 |   pull_request:
 7 |     branches: [ "main", "gh" ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   test:
14 |     name: Test on latest Ubuntu
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - uses: Swatinem/rust-cache@v2
19 |     - name: Run tests
20 |       run: cargo test --verbose --features=bin
21 |   lint:
22 |     name: Lint check on latest Ubuntu
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |     - uses: actions/checkout@v4
26 |     - uses: Swatinem/rust-cache@v2
27 |     - name: Run clippy
28 |       run: cargo clippy --verbose --features=bin
29 |     - name: Make annotation
30 |       run: if ! cargo clippy --quiet --features=bin -- -D warnings; then echo "::warning::Lint check failed"; fi
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Rust output
2 | /target/
3 | 
4 | # Sphinx output
5 | /doc/_build/
6 | 
7 | # Python output
8 | __pycache__


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.12"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: doc/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #   - pdf
28 | #   - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | python:
34 |   install:
35 |     - requirements: doc/requirements.txt


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "rust-analyzer.cargo.features": [
3 |         "bin"
4 |     ]
5 | }


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 0.3.4 (2025-06-02)
 4 | 
 5 | * Fixed: WARC header incorrectly rejected as invalid (#8).
 6 | * Fixed: Unexpected end of file error during decompression of the last record.
 7 | * Changed: `--id` is now optional for `get export` and `get extract` (#7).
 8 | 
 9 | ### Library
10 | 
11 | * Added: PushDecompressor::write_eof()
12 | * Changed: PushDecoderEvent and PushDecoder::write_eof()
13 | 
14 | ## 0.3.3 (2025-05-26)
15 | 
16 | * Fixed: parse error for HTTP responses without a space after the status code (#3).
17 | * Fixed: wrong record boundaries listed for uncompressed WARC files (#4).
18 | * Added: new errors for unknown headers or unexpected compressed files (#5).
19 | 
20 | ## 0.3.2 (2024-11-14)
21 | 
22 | * Fixed: application named with version isn't detected as installer on macOS/Linux.
23 | 
24 | ## 0.3.1 (2024-10-22)
25 | 
26 | * Fixed: memory error reading ".warc.zst" files with compressed dictionaries.
27 | * Fixed: corrupted data reading and file offsets for highly compressed ".warc.zst" files.
28 | * Fixed: exclude-check from verify command not respected.
29 | * Fixed: ANSI codes written to log files.
30 | * Fixed: corrupted decoding Chunk-Transfer Encoding in cases where data aligns within a boundary.
31 | 
32 | ## 0.3.0 (2024-10-20)
33 | 
34 | * Fixed: false positive Payload Digest problem during verify for "revisit" records.
35 | * Added: Get command for exporting/extracting single records.
36 | * Added: Record-at-time compression check to verify.
37 | * Added: Zstandard (.warc.zst) support.
38 | 
39 | ### Library
40 | 
41 | * Changed: `compress`: structs now take a configuration, renamed function for reading concatenated members
42 | * Added `warc::PushDecoder`.
43 | 
44 | ## 0.2.0 (2024-10-12)
45 | 
46 | * Fixed: HTTP decoder (and Extract command) incorrectly truncated data with Content-Length.
47 | * Fixed: Verify functionality: block and payload digest checks were not functional.
48 | * Added: filter options for Extract command.
49 | * Added: extract option for Export command.
50 | * Changed: Made the EndOfFile message explicit for the Export and Import commands.
51 | 
52 | ## 0.1.0 (2024-10-11)
53 | 
54 | * First release.
55 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [workspace]
  2 | members = ["xtask"]
  3 | 
  4 | [package]
  5 | name = "warcat"
  6 | version = "0.3.4"
  7 | edition = "2024"
  8 | license = "MPL-2.0"
  9 | authors = ["Christopher Foo", "Warcat-rs contributors"]
 10 | description = "Command-line tool and library for handling Web ARChive (WARC) files"
 11 | repository = "https://github.com/chfoo/warcat-rs"
 12 | categories = ["command-line-utilities", "parser-implementations"]
 13 | keywords = ["archiving", "warc"]
 14 | rust-version = "1.85"
 15 | exclude = [
 16 |     "/.cargo",
 17 |     "/.github/ISSUE_TEMPLATE",
 18 |     "/.github/pull_request_template.md",
 19 |     "/.github/workflows",
 20 |     "/.vscode",
 21 |     "/.readthedocs.yaml",
 22 |     "/misc"
 23 | ]
 24 | 
 25 | [lints.rust]
 26 | 
 27 | [dependencies]
 28 | # Dependencies for the binary, enabled by "bin" feature:
 29 | anyhow = { version = "1.0.86", optional = true }
 30 | clap = { version = "4.5.16", features = ["cargo", "derive"], optional = true }
 31 | clap-markdown = { version = "0.1.4", optional = true }
 32 | indicatif = { version = "0.17.8", optional = true }
 33 | takecrate = { version = "1.0.0", optional = true }
 34 | tempfile = { version = "3.12.0", optional = true }
 35 | tracing-subscriber = { version = "0.3.18", features = ["json"], optional = true }
 36 | # Everything:
 37 | blake2 = "0.10.6"
 38 | blake3 = { version = "1.5.4", features = ["pure", "traits-preview"] }
 39 | brotli = "8.0.1"
 40 | chrono = "0.4.38"
 41 | ciborium = "0.2.2"
 42 | crc32c = "0.6.8"
 43 | crc32fast = "1.4.2"
 44 | csv = "1.3.0"
 45 | data-encoding = "2.6.0"
 46 | digest = "0.10.7"
 47 | flate2 = "1.0.31"
 48 | md-5 = "0.10.6"
 49 | nom = "8.0.0"
 50 | percent-encoding = "2.3.1"
 51 | redb = "2.1.3"
 52 | regex = { version = "1.10.6", default-features = false, features = ["std", "perf"] }
 53 | serde = "1.0.209"
 54 | serde_json = "1.0.127"
 55 | serde_with = { version = "3.11.0", features = ["base64", "hex"] }
 56 | sha1 = "0.10.6"
 57 | sha2 = "0.10.8"
 58 | sha3 = "0.10.8"
 59 | thiserror = "2.0.0"
 60 | tracing = "0.1.40"
 61 | url = "2.5.2"
 62 | uuid = { version = "1.10.0", features = ["v7"] }
 63 | xxhash-rust = { version = "0.8.12", features = ["std", "xxh3"] }
 64 | zstd = { version = "0.13.2", optional = true }
 65 | 
 66 | [dev-dependencies]
 67 | anyhow = "1.0.86"
 68 | rand = "0.9.1"
 69 | rand_xoshiro = "0.7.0"
 70 | tracing-test = { version = "0.2.5", features = ["no-env-filter"] }
 71 | 
 72 | [features]
 73 | default = ["zstd"]
 74 | 
 75 | # Enables support for Zstandard and related APIs.
 76 | # zstd is optional because the crate relies on a C library that might not
 77 | # be fully portable.
 78 | zstd = ["dep:zstd"]
 79 | 
 80 | # FIXME: blake3: a way to provide a "blake3-opt" feature to enable
 81 | # compiling native code. The crate misuses the "pure" feature as a
 82 | # subtractive feature and defaults to compiling. This is undesirable as it can
 83 | # only check whether a compiler is supported, not whether it is installed.
 84 | 
 85 | # This feature is intended to be used only for building the binary (main.rs)
 86 | bin = [
 87 |     "dep:anyhow",
 88 |     "dep:clap",
 89 |     "dep:clap-markdown",
 90 |     "dep:indicatif",
 91 |     "dep:takecrate",
 92 |     "dep:tempfile",
 93 |     "dep:tracing-subscriber",
 94 |     "serde/derive",
 95 | ]
 96 | 
 97 | [[bin]]
 98 | name = "warcat"
 99 | required-features = ["bin"]
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # warcat-rs
 2 | 
 3 | Command-line tool and Rust library for handling Web ARChive (WARC) files.
 4 | 
 5 | This project is a rewrite of the [warcat](https://github.com/chfoo/warcat/) project.
 6 | 
 7 | ## Getting started
 8 | 
 9 | * [💿 Downloads](https://github.com/chfoo/warcat-rs/releases)
10 | * [📖 User guide ![Read the Docs](https://img.shields.io/readthedocs/warcat-rs)
11 | ](https://warcat-rs.readthedocs.io/)
12 | * [📦 Crate repository ![Crates.io Version](https://img.shields.io/crates/v/warcat)
13 | ](https://crates.io/crates/warcat)
14 | * [📑 API documentation ![docs.rs](https://img.shields.io/docsrs/warcat)
15 | ](https://docs.rs/warcat)
16 | 
17 | ## Compiling
18 | 
19 | If you want to compile the program yourself, set up a [Rust environment](https://www.rust-lang.org/tools/install).
20 | 
21 | * Project requires Rust 1.85 or higher.
22 | 
23 | Once you have Rust installed, use the cargo build tool:
24 | 
25 | ```sh
26 | cargo build --features=bin --release
27 | ```
28 | 
29 | The program will be placed in the `target` directory.
30 | 
31 | ## Contributing & support
32 | 
33 | * [Contributing](https://github.com/chfoo/warcat-rs/blob/main/.github/CONTRIBUTING.md)
34 | * [Support](https://github.com/chfoo/warcat-rs/blob/main/.github/SUPPORT.md)
35 | * [Changelog](https://github.com/chfoo/warcat-rs/blob/main/CHANGELOG.md)
36 | * [Development roadmap](https://github.com/chfoo/warcat-rs/blob/main/roadmap.md)
37 | 
38 | ## License
39 | 
40 | Copyright 2024-2025 Christopher Foo and Warcat-rs contributors. Licensed under Mozilla Public License 2.0


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | # Documentation
2 | 
3 | This directory contains a [Sphinx documentation](https://www.sphinx-doc.org/) project. It is written in [MyST](https://myst-parser.readthedocs.io/) which is a superset of [CommonMark](https://commonmark.org/) which in turn standardizes Markdown.
4 | 


--------------------------------------------------------------------------------
/doc/compiling.md:
--------------------------------------------------------------------------------
 1 | # Compiling it yourself (advanced)
 2 | 
 3 | Compiling the application should only be done when you are comfortable of doing it yourself.
 4 | 
 5 | ## Steps
 6 | 
 7 | Set up a [Rust environment](https://www.rust-lang.org/tools/install). The latest version of Rust should work. (Rust versions ≥ 1.80, < 2.0 are supported.)
 8 | 
 9 | Once you have Rust installed, use the cargo build tool:
10 | 
11 | ```sh
12 | cargo build --features=bin --release
13 | ```
14 | 
15 | The program will be placed in the `target` directory. You can run it as is, or install it by adding a "-installer" suffix to the filename before running it.


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = 'Warcat'
10 | copyright = '2024-2025 Warcat contributors'
11 | author = 'Warcat contributors'
12 | 
13 | # -- General configuration ---------------------------------------------------
14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
15 | 
16 | extensions = ['myst_parser']
17 | 
18 | templates_path = ['_templates']
19 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'README.md']
20 | 
21 | 
22 | 
23 | # -- Options for HTML output -------------------------------------------------
24 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
25 | 
26 | html_theme = 'sphinx_book_theme'
27 | html_static_path = ['_static']
28 | 


--------------------------------------------------------------------------------
/doc/downloads.md:
--------------------------------------------------------------------------------
 1 | # Downloads
 2 | 
 3 | Downloads are available on [Releases](https://github.com/chfoo/warcat-rs/releases) page.
 4 | 
 5 | ## Supported platforms
 6 | 
 7 | * Windows 10 or newer
 8 | * macOS 10.12 or newer
 9 | * Linux (kernel compatible with musl 1.2.3)
10 | 
11 | ### CPU types
12 | 
13 | * x86_64 (x64): 64-bit Intel and AMD CPUs
14 |   * Typically for most Windows and Linux devices
15 | * aarch64 (arm64): 64-bit ARM CPUs
16 |   * Typically for newer macOS devices


--------------------------------------------------------------------------------
/doc/export_import.md:
--------------------------------------------------------------------------------
  1 | # Export/import format
  2 | 
  3 | This section describes the message format used during the export and import commands.
  4 | 
  5 | ## Message types
  6 | 
  7 | ### Metadata
  8 | 
  9 | The metadata message is provided during only the export command. It is produced at a start of a WARC record.
 10 | 
 11 | map:
 12 | 
 13 | * `Metadata` - map
 14 |   * `file` - string: The input filename of the WARC.
 15 |   * `position` - integer: The position in the WARC file where the record is located. For compressed files, this position is only valid if the file was compressed by concatenating compressed streams.
 16 | 
 17 | Example:
 18 | 
 19 | ```json
 20 | {
 21 |     "Metadata": {
 22 |         "file": "./my_file.warc.gz",
 23 |         "position": 123
 24 |     }
 25 | }
 26 | ```
 27 | 
 28 | ### Header
 29 | 
 30 | The header message is provided for both export and import commands. It is produced when a header from a WARC record has been read.
 31 | 
 32 | map:
 33 | 
 34 | * `Header` - map
 35 |   * `version` - string: The WARC version string such as "WARC/1.1"
 36 |   * `fields` - array[[string, string]]: Name-value pairs.
 37 | 
 38 | ```json
 39 | {
 40 |     "Header": {
 41 |         "version": "WARC/1.1",
 42 |         "fields": [
 43 |             ["WARC-Record-Type": "metadata"],
 44 |             ["Content-Length": "123"]
 45 |         ]
 46 |     }
 47 | }
 48 | ```
 49 | 
 50 | ### Block chunk
 51 | 
 52 | The block chunk message is provided for export and import commands. It is produced when a segment of a block from a WARC record has been read.
 53 | 
 54 | map:
 55 | 
 56 | * `BlockChunk` - map
 57 |   * `data` - bytes: A segment of block data. For JSON, this is a string in base64 standard (with padding) encoding.
 58 | 
 59 | ```json
 60 | {
 61 |     "BlockChunk": {
 62 |         "data": "Zm9vYmFy"
 63 |     }
 64 | }
 65 | ```
 66 | 
 67 | ### Block end
 68 | 
 69 | The block chunk message is provided for export and import commands. It is produced at the end of reading a block and WARC record.
 70 | 
 71 | map:
 72 | 
 73 | * `BlockEnd` - map
 74 |   * `crc32` - integer (optional, unsigned 32-bit): CRC32 (ITU-T V.42) checksum of the block data.
 75 |   * `crc32c` - integer (optional, unsigned 32-bit): CRC32C checksum of the block data.
 76 |   * `xxh3` - integer (optional, unsigned 64-bit): XxHash XXH3 checksum of the block data.
 77 | 
 78 | The checksum is used to ensure that processing of messages was properly implemented.
 79 | 
 80 | When importing, it is required that at least one of the fields "crc32", "crc32c", or "xxh3" be provided. When exporting, all fields will be filled.
 81 | 
 82 | ```json
 83 | {
 84 |     "BlockEnd": {
 85 |         "crc32c": 123456
 86 |     }
 87 | }
 88 | ```
 89 | 
 90 | ### Extract metadata
 91 | 
 92 | The extract metadata message is provided during only the export command with the extract option.
 93 | 
 94 | map:
 95 | 
 96 | * `ExtractMetadata` - map
 97 |   * `has_content` - boolean: Whether data can be extracted from this record.
 98 |   * `file_path_components` - array\[string\]: A safe filename for writing to disk.
 99 |   * `is_truncated` - bool: As recorded in the header field, whether the content is truncated.
100 | 
101 | Example:
102 | 
103 | ```json
104 | {
105 |     "ExtractMetadata": {
106 |         "has_content": true,
107 |         "file_path_components": ["http", "www.example.com", "index.html"],
108 |         "is_truncated": false
109 |     }
110 | }
111 | ```
112 | 
113 | ### Extract chunk
114 | 
115 | The extract chunk message is provided for export command with the extract option. It is produced when content can be extracted from a segment of block data.
116 | 
117 | map:
118 | 
119 | * `ExtractChunk` - map
120 |   * `data` - bytes: A segment of block data. For JSON, this is a string in base64 standard (with padding) encoding.
121 | 
122 | ```json
123 | {
124 |     "ExtractChunk": {
125 |         "data": "Zm9vYmFy"
126 |     }
127 | }
128 | ```
129 | 
130 | ### Extract end
131 | 
132 | The extract end message is provided for export command with extract option. It is produced at the end of extracting a block record.
133 | 
134 | map:
135 | 
136 | * `ExtractEnd` - map
137 |   * `crc32` - integer (optional, unsigned 32-bit): CRC32 (ITU-T V.42) checksum of the extracted content.
138 |   * `crc32c` - integer (optional, unsigned 32-bit): CRC32C checksum of the extracted content.
139 |   * `xxh3`a - integer (optional, unsigned 64-bit): XxHash XXH3 checksum of the extracted content.
140 | 
141 | ```json
142 | {
143 |     "ExtractEnd": {
144 |         "crc32c": 123456
145 |     }
146 | }
147 | ```
148 | 
149 | ### End of file
150 | 
151 | The end of file message indicates the output stream is ending and no other messages will be sent.
152 | 
153 | map:
154 | 
155 | * `EndOfFile` - map
156 | 
157 | ```json
158 | {
159 |     "EndOfFile": {}
160 | }
161 | ```
162 | 
163 | ## Message flows
164 | 
165 | During the export command, every record consists of:
166 | 
167 | * 1 `Metadata`
168 | * 1 `Header`
169 | * 0 or more `BlockChunk`
170 | * 1 `BlockEnd`
171 | 
172 | During the import command, every record consists of:
173 | 
174 | * 1 `Header`
175 | * 0 or more `BlockChunk`
176 | * 1 `BlockEnd`
177 | 
178 | After all records are processed, the `EndOfFile` message is sent.


--------------------------------------------------------------------------------
/doc/index.md:
--------------------------------------------------------------------------------
 1 | # Warcat User Guide
 2 | 
 3 | This documentation provides a user guide for the Warcat application.
 4 | 
 5 | * [Project homepage](https://github.com/chfoo/warcat-rs)
 6 | * If you intended to see API docs, see [this page](https://docs.rs/warcat)
 7 | 
 8 | ```{toctree}
 9 | :maxdepth: 2
10 | :caption: Contents:
11 | 
12 | setup
13 | intro_to_cli
14 | usage_examples
15 | integration
16 | export_import
17 | cli_reference
18 | ```
19 | 


--------------------------------------------------------------------------------
/doc/install.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Unzip the compressed file
 4 | 
 5 | Before the application can be run, it needs to be unzipped from the compressed file you have downloaded.
 6 | 
 7 | * How to [unzip on Windows](https://support.microsoft.com/en-us/windows/zip-and-unzip-files-f6dde0a7-0fec-8294-e1d3-703ed85e7ebc)
 8 | * How to [unzip on macOS](https://support.apple.com/en-us/guide/mac-help/mchlp2528/mac)
 9 | 
10 | ## Installer
11 | 
12 | The application supports installing itself which is the default behavior. You can double-click to run it.
13 | 
14 | ### Disable the installer functionality
15 | 
16 | To run the application as a standalone program, remove the "-installer" suffix from the filename.
17 | 
18 | To manually install, see [this section](install_manual.md).
19 | 
20 | ## Common problems
21 | 
22 | ### Windows
23 | 
24 | The application is not a signed application and Windows may refuse to run it. To allow an exception, click on "Details" and select "Run anyway".
25 | 
26 | ### macOS
27 | 
28 | The application is not a signed application and macOS will refuse to run it by default.
29 | 
30 | To allow an exception, right-click the program file and select "Open" or [follow these instructions](https://support.apple.com/en-us/guide/mac-help/mh40616/mac).
31 | 
32 | ### macOS and Linux
33 | 
34 | If you get an error saying it is not an executable program, you need to set the executable bit of the file. To do this, open the terminal and run a similar command to `chmod +x warcat-1.2.3`.


--------------------------------------------------------------------------------
/doc/install_manual.md:
--------------------------------------------------------------------------------
 1 | # Manual installation (advanced)
 2 | 
 3 | If you do not want to use the automated installer, you can follow instructions below to install it to your user account.
 4 | 
 5 | ## Windows
 6 | 
 7 | Place the executable in the  `%LOCALAPPDATA%\Programs\warcat\bin\` folder. To access the Programs folder, press Windows+R and open `%LOCALAPPDATA%\Programs`. Then, create the folders needed if they do not exist.
 8 | 
 9 | Ensure it is in `Path` the environment variable. To edit them, press Windows+R and open
10 | `rundll32 sysdm.cpl,EditEnvironmentVariables`. Then,
11 | 
12 | 1. Under User variables, select Path
13 | 2. Press "Edit..." to open a dialog window with a list
14 | 3. Press "New" to edit a blank line
15 | 4. Enter `%LOCALAPPDATA%\Programs\warcat\bin\` in the list.
16 | 5. Press "OK" to close the dialog window with a list
17 | 6. Press "OK" to save changes
18 | 7. If you have any opened Console/Terminal windows, close and reopen them again for changes to take effect.
19 | 
20 | ## macOS or Linux
21 | 
22 | Place the binary to the `$HOME/.local/bin` directory. You may need to create the directory if it does not exist.
23 | 
24 | Ensure it is in the `PATH` environment variable. Check if this section is in `$HOME/.profile` configuration file:
25 | 
26 | ```sh
27 | if [ -d "$HOME/.local/bin" ] ; then
28 |     PATH="$HOME/.local/bin:$PATH"
29 | fi
30 | ```
31 | 
32 | If not, add it. Then, log out and in for changes to take effect. (If you do not want to close existing terminal windows, run `source $HOME/.profile`.)


--------------------------------------------------------------------------------
/doc/integration.md:
--------------------------------------------------------------------------------
 1 | # Integration to other programs
 2 | 
 3 | Integration to other programs is done through standard input and output using the `export` and `import` commands.
 4 | 
 5 | For reading WARC files, the `export` command will format the data into messages such as JSON which your program can ingest and process. Likewise for writing WARC files, the `import` command accepts messages from your program.
 6 | 
 7 | For working examples, [see here](https://github.com/chfoo/warcat-rs/tree/main/examples).
 8 | 
 9 | The format of the messages is documented in the next section.
10 | 
11 | ## Overview
12 | 
13 | In order to integrate with your programming language of choice, your language's library must be able to launch other programs and communicate using standard in or standard out.
14 | 
15 | This section will use pseudocode to explain an overview on how to read a WARC file.
16 | 
17 | To begin reading, run the warcat program with options to export and output in JSON Lines:
18 | 
19 | ```
20 | process <- run_process("warcat", "export", "--input", "example.warc.gz", "--format=jsonl")
21 | ```
22 | 
23 | Next, get the record header by reading lines containing JSON:
24 | 
25 | ```
26 | metadata_line <- process.stdout.read_line()
27 | metadata <- decode_json(metadata_line)
28 | 
29 | print("Reading file " + metadata["Metadata"]["file"])
30 | 
31 | header_line <- process.stdout.read_line()
32 | header <- decode_json(header_line)
33 | 
34 | header_fields <- header["Header"]["fields"]
35 | 
36 | for each field <- header_fields do
37 |     name = field[0]
38 |     value = field[1]
39 | 
40 |     print("Header name: " + name + " value: " + value)
41 | end for
42 | ```
43 | 
44 | Next, get the record block data:
45 | 
46 | ```
47 | message_line <- process.stdout.read_line()
48 | message <- decode_json(message_line)
49 | 
50 | loop do
51 |     if message.has_key("BlockEnd") then
52 |         break loop
53 |     end if
54 | 
55 |     block_chunk <- message
56 |     b64_data <- block_chunk["BlockChunk"]["data"]
57 |     data <- decode_base64(b64_data)
58 | 
59 |     print("Read " + data.length() + " bytes")
60 | end loop
61 | ```
62 | 
63 | Once you have read the end of the record, repeat the steps for each record until the end of file message is reached:
64 | 
65 | ```
66 | message_line <- process.stdout.read_line()
67 | message <- decode_json(message_line)
68 | is_end_of_file <- message.has_key("EndOfFile")
69 | ```
70 | 


--------------------------------------------------------------------------------
/doc/intro_to_cli.md:
--------------------------------------------------------------------------------
 1 | # Introduction to the CLI application
 2 | 
 3 | To begin, open the terminal application.
 4 | 
 5 | On Windows, right-click the Start icon or press Windows+X. Then, select [Terminal](https://learn.microsoft.com/en-us/windows/terminal/).
 6 | 
 7 | On macOS, open Finder, then select Applications, Utilities, then [Terminal](https://support.apple.com/en-us/guide/terminal/apd5265185d-f365-44cb-8b09-71a064a42125/mac).
 8 | 
 9 | On Linux, open Applications. Select System, then Terminal. Or, search for "terminal".
10 | 
11 | The terminal application will then present a command line interface (CLI). On Windows, this is [PowerShell](https://learn.microsoft.com/en-us/powershell/). On macOS or Linux, this is typically [Bash shell](https://www.gnu.org/software/bash/manual/bash.html).
12 | 
13 | If you have the application is under the search path (PATH environment variable), to run it, type:
14 | 
15 | ```sh
16 | warcat
17 | ```
18 | 
19 | and press enter.
20 | 
21 | Or, enter the location of the executable directly. For example (Windows):
22 | 
23 | ```powershell
24 | .\Downloads\warcat.exe
25 | ```
26 | 
27 | macOS/Linux:
28 | ```sh
29 | ./Downloads/warcat
30 | ```
31 | 
32 | and press enter.
33 | 
34 | If it is successful, the warcat application will display help information.
35 | 
36 | Entering
37 | 
38 | ```sh
39 | warcat help
40 | ```
41 | 
42 | will also show a list of commands and options. `help` is known as an argument that is passed to the program.
43 | 
44 | For example using the `list` command:
45 | 
46 | ```sh
47 | warcat list --input my_warc_file.warc.gz
48 | ```
49 | 
50 | The above command has 3 arguments to the program:
51 | 
52 | 1. `list` is the command.
53 | 2. `--input` is an option. It starts with 2 hyphens. This specifies that the program should accept an input filename.
54 | 3. `my_warc_file.warc.gz` is a value to the `input` option.
55 | 
56 | If an option value has spaces or special symbols, put quotation marks:
57 | 
58 | ```sh
59 | warcat list --input "My WARC File (Copy).warc.gz"
60 | ```
61 | 
62 | Option values can also be specified by a `=` character if it helps with clarity:
63 | 
64 | ```sh
65 | warcat list --input=my_warc_file.warc.gz
66 | warcat list --input="My WARC File (Copy).warc.gz"
67 | ```
68 | 
69 | Note that some options don't take a value. These options are also known as flags (as in boolean true/false):
70 | 
71 | ```sh
72 | warcat --quiet
73 | ```
74 | 
75 | If you need help in a command, enter something like:
76 | 
77 | ```sh
78 | warcat help list
79 | ```


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==8.0.2
2 | sphinx-book-theme==1.1.3
3 | myst-parser==4.0.0


--------------------------------------------------------------------------------
/doc/setup.md:
--------------------------------------------------------------------------------
 1 | # Setting up Warcat
 2 | 
 3 | ```{toctree}
 4 | :maxdepth: 1
 5 | :caption: Contents:
 6 | 
 7 | downloads
 8 | install
 9 | compiling
10 | ```
11 | 
12 | ```{toctree}
13 | :hidden:
14 | 
15 | install_manual
16 | ```


--------------------------------------------------------------------------------
/doc/usage_examples.md:
--------------------------------------------------------------------------------
 1 | # Usage examples
 2 | 
 3 | ## Extract everything
 4 | 
 5 | Extract resources from a WARC file as much as possible:
 6 | 
 7 | ```sh
 8 | warcat extract --input my_warc_file.warc.gz --output my_output_folder
 9 | ```
10 | 
11 | ## Extract a single item
12 | 
13 | First locate where the item is within the WARC file:
14 | 
15 | ```sh
16 | warcat list --input my_warc_file.warc.gz --format csv
17 | ```
18 | 
19 | For the purposes of this example, we'll use this hypothetical listing:
20 | 
21 | ```csv
22 | 45678,<urn:example:abcdef>,response,application/http; msgtype=response,https://example.com/index.html
23 | ```
24 | 
25 | Then provide the position and ID to the `get extract` command:
26 | 
27 | ```sh
28 | warcat get extract --input my_warc_file.warc.gz --position 45678 --id "<urn:example:abcdef>" --output index.html
29 | ```
30 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | For languages other than Rust, please ensure `warcat` is accessible on your search path ("PATH").
 4 | 
 5 | ## Rust
 6 | 
 7 | From the project root directory, run:
 8 | 
 9 | ```sh
10 | cargo run --example decode
11 | cargo run --example encode
12 | ```
13 | 
14 | ## C#
15 | 
16 | From the `examples/c-sharp/` directory, run:
17 | 
18 | ```sh
19 | dotnet run --project WarcatExample
20 | ```
21 | 
22 | ## Python
23 | 
24 | From the project root directory, run:
25 | 
26 | ```sh
27 | python examples/python/decode.py
28 | python examples/python/encode.py
29 | ```


--------------------------------------------------------------------------------
/examples/c-sharp/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | obj/


--------------------------------------------------------------------------------
/examples/c-sharp/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "dotnet.defaultSolution": "WarcatExample.sln"
3 | }


--------------------------------------------------------------------------------
/examples/c-sharp/WarcatExample.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.0.31903.59
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WarcatExample", "WarcatExample\WarcatExample.csproj", "{2AF7D93D-A2EA-4F23-B80E-2317C74E244B}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(SolutionProperties) = preSolution
14 | 		HideSolutionNode = FALSE
15 | 	EndGlobalSection
16 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
17 | 		{2AF7D93D-A2EA-4F23-B80E-2317C74E244B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
18 | 		{2AF7D93D-A2EA-4F23-B80E-2317C74E244B}.Debug|Any CPU.Build.0 = Debug|Any CPU
19 | 		{2AF7D93D-A2EA-4F23-B80E-2317C74E244B}.Release|Any CPU.ActiveCfg = Release|Any CPU
20 | 		{2AF7D93D-A2EA-4F23-B80E-2317C74E244B}.Release|Any CPU.Build.0 = Release|Any CPU
21 | 	EndGlobalSection
22 | EndGlobal
23 | 


--------------------------------------------------------------------------------
/examples/c-sharp/WarcatExample/Decode.cs:
--------------------------------------------------------------------------------
 1 | // Example on how to read WARC files.
 2 | using System.Diagnostics;
 3 | using System.Text.Json;
 4 | 
 5 | namespace WarcatExample;
 6 | 
 7 | class Decode
 8 | {
 9 |     public static void Run()
10 |     {
11 |         var options = Message.Options();
12 | 
13 |         // Launch the warcat program. The options provided will tell it to write
14 |         // JSON as a line to standard out.
15 |         // Ensure you have warcat on the search path or adjust the path as needed.
16 |         using (var process = new Process())
17 |         {
18 |             process.StartInfo.FileName = "warcat";
19 |             process.StartInfo.ArgumentList.Add("export");
20 |             process.StartInfo.ArgumentList.Add("--input=example.warc");
21 |             process.StartInfo.ArgumentList.Add("--format=jsonl");
22 |             process.StartInfo.RedirectStandardOutput = true;
23 |             process.Start();
24 | 
25 |             while (true)
26 |             {
27 |                 var line = process.StandardOutput.ReadLine();
28 | 
29 |                 if (line == null)
30 |                 {
31 |                     break;
32 |                 }
33 | 
34 |                 // Decode each message
35 |                 var message = JsonSerializer.Deserialize<Message>(line, options)!;
36 | 
37 |                 if (message.Header != null)
38 |                 {
39 |                     // We decoded the start of the record.
40 |                     foreach (var field in message.Header.Fields)
41 |                     {
42 |                         Console.WriteLine($"{field[0]}:{field[1]}");
43 |                     }
44 |                 }
45 |                 else if (message.BlockChunk != null)
46 |                 {
47 |                     // We decoded the body of the record.
48 |                     Console.WriteLine($"{message.BlockChunk.Data.Length}");
49 |                 }
50 |                 else if (message.EndOfFile != null)
51 |                 {
52 |                     // The end of the record was reached.
53 |                     Console.WriteLine("---");
54 |                 }
55 |             }
56 |         }
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/examples/c-sharp/WarcatExample/Encode.cs:
--------------------------------------------------------------------------------
 1 | // Example on how to write WARC files.
 2 | using System.Diagnostics;
 3 | using System.IO.Hashing;
 4 | using System.Text;
 5 | using System.Text.Json;
 6 | 
 7 | namespace WarcatExample;
 8 | 
 9 | class Encode
10 | {
11 |     public static void Run()
12 |     {
13 |         var options = Message.Options();
14 | 
15 |         // Launch the warcat program. The options provided will tell it to read
16 |         // JSON as a line from standard in.
17 |         // Ensure you have warcat on the search path or adjust the path as needed.
18 |         using (var process = new Process())
19 |         {
20 |             process.StartInfo.FileName = "warcat";
21 |             process.StartInfo.ArgumentList.Add("import");
22 |             process.StartInfo.ArgumentList.Add("--compression=none");
23 |             process.StartInfo.ArgumentList.Add("--format=jsonl");
24 |             process.StartInfo.RedirectStandardInput = true;
25 |             process.Start();
26 | 
27 |             // Write a record header with the given header fields.
28 |             // Note: this header is not valid; it is simply a concise demonstration.
29 | 
30 |             var header = new Message()
31 |             {
32 |                 Header = new Header()
33 |                 {
34 |                     Version = "WARC/1.1",
35 |                     Fields = [
36 |                         ["WARC-Record-Type", "resource"],
37 |                             ["Content-Length", "12"],
38 |                         ]
39 |                 }
40 |             };
41 |             process.StandardInput.WriteLine(JsonSerializer.Serialize(header, options));
42 | 
43 |             // Write the record block data.
44 |             var hasher = new XxHash3();
45 | 
46 |             var data = Encoding.UTF8.GetBytes("Hello world!");
47 |             hasher.Append(data);
48 | 
49 |             var block_chunk = new Message()
50 |             {
51 |                 BlockChunk = new BlockChunk()
52 |                 {
53 |                     Data = data
54 |                 }
55 |             };
56 |             process.StandardInput.WriteLine(JsonSerializer.Serialize(block_chunk, options));
57 | 
58 |             // Write the end of the block message.
59 |             var block_end = new Message()
60 |             {
61 |                 BlockEnd = new BlockEnd()
62 |                 {
63 |                     Xxh3 = hasher.GetCurrentHashAsUInt64()
64 |                 }
65 |             };
66 |             process.StandardInput.WriteLine(JsonSerializer.Serialize(block_end, options));
67 | 
68 |             // Finish writing the file.
69 |             process.StandardInput.WriteLine(JsonSerializer.Serialize(new Message() { EndOfFile = new EndOfFile() }, options));
70 |         }
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/examples/c-sharp/WarcatExample/Message.cs:
--------------------------------------------------------------------------------
 1 | using System.Text.Json;
 2 | using System.Text.Json.Serialization;
 3 | 
 4 | namespace WarcatExample;
 5 | 
 6 | public class Message
 7 | {
 8 |     [JsonPropertyName("Metadata")]
 9 |     public Metadata? Metadata { get; set; }
10 |     [JsonPropertyName("Header")]
11 |     public Header? Header { get; set; }
12 |     [JsonPropertyName("BlockChunk")]
13 |     public BlockChunk? BlockChunk { get; set; }
14 |     [JsonPropertyName("BlockEnd")]
15 |     public BlockEnd? BlockEnd { get; set; }
16 |     [JsonPropertyName("ExtractMetadata")]
17 |     public ExtractMetadata? ExtractMetadata { get; set; }
18 |     [JsonPropertyName("ExtractChunk")]
19 |     public ExtractChunk? ExtractChunk { get; set; }
20 |     [JsonPropertyName("ExtractEnd")]
21 |     public ExtractEnd? ExtractEnd { get; set; }
22 |     [JsonPropertyName("EndOfFile")]
23 |     public EndOfFile? EndOfFile { get; set; }
24 | 
25 |     public static JsonSerializerOptions Options()
26 |     {
27 |         // Use snake_case for names.
28 |         var options = new JsonSerializerOptions
29 |         {
30 |             PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
31 |             DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
32 |         };
33 | 
34 |         return options;
35 |     }
36 | }
37 | 
38 | public class Metadata
39 | {
40 |     public required string File { get; set; }
41 |     public required ulong Position { get; set; }
42 | }
43 | 
44 | public class Header
45 | {
46 |     public required string Version { get; set; }
47 |     public required List<string[]> Fields { get; set; }
48 | }
49 | 
50 | public class BlockChunk
51 | {
52 |     public required byte[] Data { get; set; }
53 | }
54 | 
55 | public class BlockEnd
56 | {
57 |     public uint? Crc32 { get; set; }
58 |     public uint? Crc32c { get; set; }
59 |     public ulong? Xxh3 { get; set; }
60 | }
61 | 
62 | public class ExtractMetadata
63 | {
64 |     public required bool HasContent { get; set; }
65 |     public required List<string> FilePathComponents { get; set; }
66 |     public required bool IsTruncated { get; set; }
67 | }
68 | 
69 | public class ExtractChunk
70 | {
71 |     public required byte[] Data { get; set; }
72 | }
73 | 
74 | public class ExtractEnd
75 | {
76 |     public uint? Crc32 { get; set; }
77 |     public uint? Crc32c { get; set; }
78 |     public ulong? Xxh3 { get; set; }
79 | }
80 | 
81 | public class EndOfFile { }
82 | 


--------------------------------------------------------------------------------
/examples/c-sharp/WarcatExample/Program.cs:
--------------------------------------------------------------------------------
 1 | ﻿if (args.Length == 0)
 2 | {
 3 |     System.Console.WriteLine("Specify 'encode' or 'decode'");
 4 |     return 1;
 5 | }
 6 | 
 7 | if (args[0] == "encode")
 8 | {
 9 |     WarcatExample.Encode.Run();
10 | }
11 | else
12 | {
13 |     WarcatExample.Decode.Run();
14 | }
15 | 
16 | return 0;
17 | 


--------------------------------------------------------------------------------
/examples/c-sharp/WarcatExample/WarcatExample.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |   </PropertyGroup>
 9 | 
10 |   <ItemGroup>
11 |     <PackageReference Include="System.IO.Hashing" Version="8.0.0" />
12 |   </ItemGroup>
13 | 
14 | </Project>
15 | 


--------------------------------------------------------------------------------
/examples/c-sharp/example.warc:
--------------------------------------------------------------------------------
1 | WARC/1.1
2 | WARC-Record-Type: resource
3 | Content-Length: 12
4 | 
5 | Hello world!
6 | 
7 | 


--------------------------------------------------------------------------------
/examples/decode.rs:
--------------------------------------------------------------------------------
 1 | //! Example showing how to decode a WARC file by records.
 2 | use std::{fs::File, io::Read};
 3 | 
 4 | use warcat::warc::{Decoder, DecoderConfig};
 5 | 
 6 | fn main() -> anyhow::Result<()> {
 7 |     // Source file
 8 |     let mut warc_file = File::open("examples/example.warc")?;
 9 | 
10 |     // Configure the compression format if needed, otherwise use default
11 |     let config = DecoderConfig::default();
12 | 
13 |     // Create a new WARC decoder
14 |     let mut decoder = Decoder::new(&mut warc_file, config)?;
15 | 
16 |     loop {
17 |         // Check for end of file
18 |         if !decoder.has_next_record()? {
19 |             break;
20 |         }
21 | 
22 |         // Get the header of the WARC record and a decoder for the
23 |         // block part of a record. Note that `read_header()` consumes the
24 |         // decoder and returns another decoder with a different type. This
25 |         // is known as the typestate pattern.
26 |         let (header, mut block_decoder) = decoder.read_header()?;
27 |         println!("Header: {:?}", header);
28 | 
29 |         // Reading the block is like reading a file
30 |         let mut buf = Vec::new();
31 |         block_decoder.read_to_end(&mut buf)?;
32 |         println!("Block len: {}", buf.len());
33 | 
34 |         // Get a header decoder. Again, this is the typestate pattern.
35 |         decoder = block_decoder.finish_block()?;
36 |     }
37 | 
38 |     // Get the inner reader if needed
39 |     let _file = decoder.into_inner();
40 | 
41 |     Ok(())
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/encode.rs:
--------------------------------------------------------------------------------
 1 | //! Example on how to encode a WARC file by records
 2 | use std::io::Write;
 3 | 
 4 | use warcat::{
 5 |     header::WarcHeader,
 6 |     warc::{Encoder, EncoderConfig},
 7 | };
 8 | 
 9 | fn main() -> anyhow::Result<()> {
10 |     // For this example, our file is just a in-memory buffer
11 |     let mut warc_file = Vec::new();
12 | 
13 |     // Configure the compression format if needed, otherwise use default
14 |     let config = EncoderConfig::default();
15 | 
16 |     // Create a new WARC encoder
17 |     let mut encoder = Encoder::new(&mut warc_file, config);
18 | 
19 |     // Write a header of a WARC record and return a block encoder.
20 |     // Note that `write_header()` consumes the encoder and returns a
21 |     // decoder of a different type. This is known as the typestate pattern.
22 |     let header = WarcHeader::new(12, "Resource");
23 |     let mut block_encoder = encoder.write_header(&header)?;
24 | 
25 |     // Write the block like a file.
26 |     block_encoder.write_all(b"Hello world!")?;
27 | 
28 |     // Get a header encoder. Again, this is the typestate pattern.
29 |     encoder = block_encoder.finish_block()?;
30 | 
31 |     // Get the inner writer if needed
32 |     let _file = encoder.finish()?;
33 | 
34 |     println!("Wrote {} bytes", warc_file.len());
35 | 
36 |     Ok(())
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/example.warc:
--------------------------------------------------------------------------------
1 | WARC/1.1
2 | WARC-Record-Type: resource
3 | Content-Length: 12
4 | 
5 | Hello world!
6 | 
7 | 


--------------------------------------------------------------------------------
/examples/python/decode.py:
--------------------------------------------------------------------------------
 1 | # Example on how to read WARC files.
 2 | import subprocess
 3 | 
 4 | import message
 5 | 
 6 | 
 7 | def main():
 8 |     # Launch the warcat program. The options provided will tell it to write
 9 |     # JSON as a line to standard out.
10 |     # Ensure you have warcat on the search path or adjust the path as needed.
11 |     with subprocess.Popen(
12 |         [
13 |             "warcat",
14 |             "export",
15 |             "--input=examples/example.warc",
16 |             "--format=jsonl",
17 |         ],
18 |         stdout=subprocess.PIPE,
19 |     ) as process:
20 |         # Decode each message by using our helper module.
21 |         for msg in message.decode(process.stdout):
22 |             if isinstance(msg, message.Header):
23 |                 # We decoded the start of the record.
24 |                 print(msg.fields)
25 |             elif isinstance(msg, message.BlockChunk):
26 |                 # We decoded the body of the record.
27 |                 print(len(msg.data))
28 |             elif isinstance(msg, message.BlockEnd):
29 |                 # The end of the record was reached.
30 |                 print("---")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/examples/python/encode.py:
--------------------------------------------------------------------------------
 1 | # Example on how to write WARC files.
 2 | import subprocess
 3 | import zlib
 4 | 
 5 | import message
 6 | 
 7 | 
 8 | def main():
 9 |     # Launch the warcat program. The options provided will tell it to read
10 |     # JSON as a line from standard in.
11 |     # Ensure you have warcat on the search path or adjust the path as needed.
12 |     with subprocess.Popen(
13 |         [
14 |             "warcat",
15 |             "import",
16 |             "--compression=none",
17 |             "--format=jsonl",
18 |         ],
19 |         stdin=subprocess.PIPE,
20 |     ) as process:
21 |         # Write a record header with the given header fields.
22 |         # Note: this header is not valid; it is simply a concise demonstration.
23 |         header = message.Header(
24 |             "WARC/1.1",
25 |             [
26 |                 ("WARC-Record-Type", "resource"),
27 |                 ("Content-Length", "12"),
28 |             ],
29 |         )
30 |         message.encode(process.stdin, header)
31 | 
32 |         # Write the record block data.
33 |         checksum = 0
34 | 
35 |         data = b"Hello world!"
36 |         checksum = zlib.crc32(data, checksum)
37 | 
38 |         block_chunk = message.BlockChunk(data)
39 |         message.encode(process.stdin, block_chunk)
40 | 
41 |         # Write the end of the block message.
42 |         block_end = message.BlockEnd(checksum)
43 |         message.encode(process.stdin, block_end)
44 | 
45 |         # Finish writing the file.
46 |         message.encode(process.stdin, message.EndOfFile())
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     main()
51 | 


--------------------------------------------------------------------------------
/examples/python/message.py:
--------------------------------------------------------------------------------
  1 | # This is a helper module that assists and in encoding/decoding JSON messages from warcat.
  2 | import json
  3 | import base64
  4 | import io
  5 | 
  6 | 
  7 | # Represents the Metadata message.
  8 | class Metadata:
  9 |     file: str
 10 |     position: int
 11 | 
 12 |     def __init__(self, file: str, position: int):
 13 |         self.file = file
 14 |         self.position = position
 15 | 
 16 |     def deserialize(file: str, position: str):
 17 |         return Metadata(file, int(position))
 18 | 
 19 |     def serialize(self) -> dict:
 20 |         return {
 21 |             "Metadata": {
 22 |                 "file": self.file,
 23 |                 "position": self.position,
 24 |             }
 25 |         }
 26 | 
 27 | 
 28 | # Represents the Header message.
 29 | class Header:
 30 |     version: str
 31 |     fields: list
 32 | 
 33 |     def __init__(self, version: str, fields: list):
 34 |         self.version = version
 35 |         self.fields = fields
 36 | 
 37 |     def deserialize(version: str, fields: list):
 38 |         return Header(version, fields)
 39 | 
 40 |     def serialize(self) -> dict:
 41 |         return {
 42 |             "Header": {
 43 |                 "version": self.version,
 44 |                 "fields": self.fields,
 45 |             }
 46 |         }
 47 | 
 48 | 
 49 | # Represents the BlockChunk message.
 50 | class BlockChunk:
 51 |     data: bytes
 52 | 
 53 |     def __init__(self, data: bytes):
 54 |         self.data = data
 55 | 
 56 |     def deserialize(data: str):
 57 |         return BlockChunk(base64.b64decode(data))
 58 | 
 59 |     def serialize(self) -> dict:
 60 |         return {"BlockChunk": {"data": base64.b64encode(self.data).decode("utf8")}}
 61 | 
 62 | 
 63 | # Represents the BlockEnd message.
 64 | class BlockEnd:
 65 |     crc32c: int
 66 | 
 67 |     def __init__(self, crc32: int = None, crc32c: int = None, xxh3: int = None):
 68 |         self.crc32 = crc32
 69 |         self.crc32c = crc32c
 70 |         self.xxh3 = xxh3
 71 | 
 72 |     def deserialize(crc32: int = None, crc32c: int = None, xxh3: int = None):
 73 |         return BlockEnd(crc32, crc32c, xxh3)
 74 | 
 75 |     def serialize(self) -> dict:
 76 |         return {
 77 |             "BlockEnd": {"crc32": self.crc32, "crc32c": self.crc32c, "xxh3": self.xxh3}
 78 |         }
 79 | 
 80 | 
 81 | # Represents the EndOfFile message
 82 | class EndOfFile:
 83 |     def __init__(self):
 84 |         pass
 85 | 
 86 |     def deserialize():
 87 |         return EndOfFile()
 88 | 
 89 |     def serialize(self) -> dict:
 90 |         return {"EndOfFile": {}}
 91 | 
 92 | 
 93 | MESSAGE_TABLE = {
 94 |     "Metadata": Metadata,
 95 |     "Header": Header,
 96 |     "BlockChunk": BlockChunk,
 97 |     "BlockEnd": BlockEnd,
 98 |     "EndOfFile": EndOfFile,
 99 | }
100 | 
101 | 
102 | class MessageEncoder(json.JSONEncoder):
103 |     def default(self, o):
104 |         if hasattr(o, "serialize"):
105 |             return o.serialize()
106 | 
107 |         return super().default(o)
108 | 
109 | 
110 | def message_object_hook(obj: dict):
111 |     for k, v in MESSAGE_TABLE.items():
112 |         if k in obj:
113 |             return MESSAGE_TABLE[k].deserialize(**obj[k])
114 | 
115 |     return obj
116 | 
117 | 
118 | # Write a message as a line of JSON to the given stream.
119 | def encode(stream: io.BufferedIOBase, message):
120 |     data = MessageEncoder().encode(message).encode("utf8")
121 | 
122 |     stream.write(data)
123 |     stream.write(b"\n")
124 | 
125 | 
126 | # A generator that produces messages by reading lines containing JSON from
127 | # the given stream.
128 | def decode(stream: io.BufferedIOBase):
129 |     for line in stream.readlines():
130 |         segment = line.decode("utf8")
131 | 
132 |         yield json.loads(segment, object_hook=message_object_hook)
133 | 


--------------------------------------------------------------------------------
/misc/release_digests/README.md:
--------------------------------------------------------------------------------
 1 | # Release digests
 2 | 
 3 | Hash digests and signatures of published releases.
 4 | 
 5 | These files are provided for those who want to verify the downloads.
 6 | 
 7 | Digests are in hexadecimal.
 8 | 
 9 | These file are intended to be updated by the publisher and to be authenticated using signed git commits. The xtask crate can be used to generate the contents.
10 | 
11 | PGP and Minisign files are not uploaded because GitHub's interface shows all uploaded files in a single list which can confuse users with too many files to choose from.


--------------------------------------------------------------------------------
/misc/release_digests/keys.toml:
--------------------------------------------------------------------------------
1 | [minisign-public-keys]
2 | chfoo = "RWQuQKHwtF7mWxV+/DmYv9NAic64DuxIjr8JDers7Aru4WJSfGZPiLqx"
3 | 


--------------------------------------------------------------------------------
/misc/release_digests/v0.1.0.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.1.0-linux-aarch64.tar.gz"]
 4 | sha256 = "0ebe17af4a55b7a1c958bf0bb1bf64c9ca0ff7d69a4261ae72ccd84da5e30295"
 5 | sha512 = "0cdcfd077dd11d1c3b12980bf1c2ec4eb31df1c087b752e70662f7fd836a43f80e419cc14a3628af184e19ba362121844052e9fe8b9e00157aa73e632e715bbb"
 6 | blake2b = "80e1c0246ec39231f0c900ee30804fc81733e5002db584751e885bf35608faedc906cda0ff7c999f697cd9849debf3aeef0fc8fbb8990a5eb4c79b6d8fe01437"
 7 | blake3 = "a1f540491b4d3bd8856f2fc2ebdb1926d4992c988364a67f82b1a250b98f9efb"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mW+bXhuP7T2wHqfu0i895qd5u4NeNfZ4tdfnD3dqbxqE4/lRGVCNsgM6GYfyzGA8w+qsw/TGQh7vq/F4GkgHtUw0=
11 | trusted comment: timestamp:1728978389
12 | EzdOPEWcIvMctC4VDHhjJKT4wx/qvtMpSDZDpVNgBSIvemf8qkdnbQx7reKLXuOlGyP5iJFoTOXlYh+rJK7bAw==
13 | """
14 | 
15 | [files."warcat-0.1.0-linux-x86_64.tar.gz"]
16 | sha256 = "846d6309ecfa1e03eb59edcece79bfe76c7fcad4e8c07c461843c90a09d38cd4"
17 | sha512 = "7cfa5c0a27f9b940fc6a43ec87c197861d30b79dbfad72fc6867ceda48d4ad83cc8f950c7310aa8c77d0bb01e95139e7a0abb3fb46ccf083e10902d2a0ac1418"
18 | blake2b = "fe426de7c158e3557a09811bed2dd3e28f0fbba8f8d7b5b40fb958cc6411c15fb5189f61b59c59edac60cddec6f01d9c9523d725a8af425f29dd87ca8e706a94"
19 | blake3 = "bc11a7ec992318809a611318a46694e2ca2efb417a4bb5525c6c896b872ccb69"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mW/+IjDs6Eqiq6xLLyvxyn3jW1+iDEjR0EF3qj4c6FkSb9ahJOitzu0jIn9+mRIF6EFB50IO5OJHwJdc09ppTcwg=
23 | trusted comment: timestamp:1728978389
24 | gpIZdh2hydI4yYkHfqhfl6bo1B3x/lASihJS1jR+YavyvMOnFQkariewHHl22PzXQHzzeJqSHJ1VLgCYog9wCA==
25 | """
26 | 
27 | [files."warcat-0.1.0-macos-aarch64.tgz"]
28 | sha256 = "66399651b2a6327262352dc22ee45da57f6a3f9d4d2ea24f24884313e5c32f49"
29 | sha512 = "910d292234d39c360f57c437f62bb6141900cd99fc5aba8a562a58406f5443fe91523252639a1be5b1e645276fcc606efb48b147d5fc9620ab0eb852bb5275f2"
30 | blake2b = "00cb723c7c7841521f0bd671fa52d4c7cd7f79bbfd91ac4d73dec791bfb4ad79aad2352df2bae50028997ee52c6dbdc650f3266ed388b1dbccb3a0d90631d33c"
31 | blake3 = "3a47793f2650dffbf91dd0bef1c18f9c41c619721772a78e12b6a660efd86c72"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mWxK4MMuvLqDiiZ9guft39n9JC8q8bI6Fh/U+xn1MgbXWXjAPIZIkYOzr90tO/4wBPEsl4VtRHVd6obBn8MNpTgw=
35 | trusted comment: timestamp:1728978389
36 | d/44ZG9Masie+f/WqU1AyBQtNnwqNpbOQtSuZ3ZMLB0/2liWJNNWk2V6tT0vq/TwvIVfS1pLJ9Uio1btW8KbCQ==
37 | """
38 | 
39 | [files."warcat-0.1.0-macos-x86_64.tgz"]
40 | sha256 = "596410f7afabe41b9e43f0e335d10642c95ba2af437153ce5a0eeddc582c990c"
41 | sha512 = "ba906154bfd23e4904c73cc7752f4978a06315141dc6bd3ee2a781f9057c4e765245870c49c03f7fd749ea1c1588f63090e15f1664c4a09dd127afe452304efa"
42 | blake2b = "9de64ef6c99d1d28e7dfb24029f73c19c87a2562db0c21af8b28c92a91026c634efc1286c6f6f78a53f55f6b9fa5524adc19dfd51db86a7f6020ef359eb0649b"
43 | blake3 = "e52d4c56a97e66074832c7850e6545ed7378779d6940cc446cf43d5aca1d3b22"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mW6KCRqUj6KtchRhudKpYLdM5dOCVJfDut/iF6SuPAdCArEjqUS/cxstS2vqadQZ2ls6dHxr3O+/D8brVBWt6dwA=
47 | trusted comment: timestamp:1728978389
48 | lMd7kMueZozMix9jpI13dY6JDf2LbUKwUZwssUG5588EPHo3r3aGxQzmQbfg5/nF4jgOCZDPw1j3SEA5gP99Dg==
49 | """
50 | 
51 | [files."warcat-0.1.0-windows-aarch64.zip"]
52 | sha256 = "9d14610de3f03a3cb67d770702906d77d9577921b0cc56b64c0e861b860366bc"
53 | sha512 = "eae838140b109f19d3208fd6741b23d1cf43d0706ef100df83874c834ddcd4b48041968a7d007ee98a472d493053319d01c784f18e80031e3277bc66c47e39fd"
54 | blake2b = "77981f4d67d4aff0c5c21cb9625eab730ce1f566ad252013b3d65905abf8f8476d2ea86a2b966ef9b49430128943525d5495686bea307f9f4cf48b8876b2d3c6"
55 | blake3 = "3200b094fccfed461af5442bbb558dec497ab0c654abaa794a029d16985a3752"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mW4i4DEj8V+SHjKnFyll9RSsLf6dOcrWvs+d0WYL6lCX+hnFMMTjg07bvllLhWZxFyQJRGYNvz02x+Dul7V8inww=
59 | trusted comment: timestamp:1728978389
60 | hYIJgW2H0BP8RTACNAT8pMh/Xqka3VqgjtrM44WS2BOhcQDGu+hnXHf8WZeCc1ecechaOG4oki20QlRxtTaSDg==
61 | """
62 | 
63 | [files."warcat-0.1.0-windows-x86_64.zip"]
64 | sha256 = "cc5096df315f6c6d98503a63685b1e45ff4395412c4cb894966dd396cbef86ef"
65 | sha512 = "856409ea4b399298094a2181e863bdf2e6112989c6f6939a18e11ce5e7b5bd19ee3124c287e4b5bb799c061d475a8971b587bc40f9295dc2eebd3aecc2622dbf"
66 | blake2b = "9503df06ac18930f9a3938c915e953880b1dcb422ad0e2757cce5da99c2d0b4dafa522f36f17ca49b18f7ab77d5628345d2c5a6ebb4d5d09890b4a37cbb87284"
67 | blake3 = "206931c5d99506ff6355e7d1ed78101163b8fdc8f28b411d19d8868bf1a11ab2"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW63+qSubLSfrejtgNII+7UIqAriTat2e8TiR+is3/gBkW+DUF2lm1U7nKtgMtCNZdr14fAmS8GbU9vYWDd9Qcgg=
71 | trusted comment: timestamp:1728978389
72 | 1iYQVwNe46HsY7zZCUySdumWQX8Rg6fFCndT7keEGN/spMEl6QQEMBU3JOT+T+EBqg7AWqpCZOneEc95lzU9Cg==
73 | """
74 | 


--------------------------------------------------------------------------------
/misc/release_digests/v0.2.0.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.2.0-linux-aarch64.tar.gz"]
 4 | sha256 = "2a51fd906c0e6fa336413c9c28fef0ba4a3f7da58ac8e9c5124ea4c768109452"
 5 | sha512 = "4bfba495a56de8e0501c57e012e4db7c921ce144f839e1ce2d56b805a466b69192d33d7c7583d40852ef99cca3d09cac1c32ab3652bc9bb079797baf7e81d06b"
 6 | blake2b = "f3e6c7aee6d771e95e0c96e24171de142bfbe3d3001334e08859f21fccbd79b260f417b30f77c7d31cc8f96e9f5506ebeaeb4b8d03831320d7dff1a945705f51"
 7 | blake3 = "9a2f3e13a57e0794812047782386d96838841b5cd276a7bc405899e9c5b02725"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mW29ZDrTDN7kXMS6lfOtOxyEtDKkLNdyApSTxpK+rJkMiuJs+aOX/DAI2NSPsD+ToUo69VsJs2+x+rGBXpAj76ws=
11 | trusted comment: timestamp:1728978389
12 | K4nDuKcks6FcXknnCRXPfLIrZfAxQsY31ls6BZjzcQs5Yn3DZ7UvG27b5tj0rkWH2kacWDOzvKrDBYyhaNf3AQ==
13 | """
14 | 
15 | [files."warcat-0.2.0-linux-x86_64.tar.gz"]
16 | sha256 = "b85d093e2dbce2b143923d6b9204165235eeea2219296b3f6763e512a43f20d2"
17 | sha512 = "0db021d416b0f38d362c2015d5905418574eb352bd35b29c2b93e6788767f2e82c4d380c91934af8b3a1042d574799c18ea15ba207768814200ceb2399c9b0f1"
18 | blake2b = "d66cb71aac57126995653fe195dba857236c46319c51a16cdea30d14abff570316da0df2e07da5966e81b027b8e0041e25378275f12e7eee4470c55442c11c7e"
19 | blake3 = "9b902f728c190566f0ee489ae1794a2f15a2e99d451888cd8c4ab9dba59de607"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mW6+t2rQMqgsWIfx9Y4nfD3sbPeNNJkdFkWs6OU8Jpno386ZGxZFDta6ToViozgk96MUjjKyilIf2EFhZyQqFFwI=
23 | trusted comment: timestamp:1728978389
24 | cAbX8ogfJogclVWSk4XYE30Y++k4WXAtJbsO1Ak1Vj1iFcWZbq9T3a8Cc3wCi9aNYvJaR+CvsFD4KBiKyHRqBg==
25 | """
26 | 
27 | [files."warcat-0.2.0-macos-aarch64.tgz"]
28 | sha256 = "7108a64406ca9949e137ecbae3986fd37e860c9d5c5c526085c334f08c4453aa"
29 | sha512 = "b2a7171208d39a634978971f3cbbfddc092b0f487ab044d023e52035968921b1032afea134323b11c284bb1a9861166fb6acbc17d778709619d54d29b0976950"
30 | blake2b = "86a5c8d45ebbfb9d91122331ecf16c39a898c0fb0a33985013b57b2f57e811275a5625bf71b77c28af401591d1339900897fd40d7f453b39cde74ef678ad7705"
31 | blake3 = "72616bd00e24f5b00981860e94904e10ccf64a0128e4c15fcb41f41e8ba8152d"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mW3oeI6HIKz19fogsqpYZmsx3N6xrNJkhNfVkmoTtYfljEC9SbaS9jfQwZlNFwBNZCk4VYuzpJH1NH1el1xm64AU=
35 | trusted comment: timestamp:1728978389
36 | jgoOLYPm/j4Of1WEGSscWZeGY1hD4l+I0h9FRmhkvDmXZW0ohbyWW6D+GHH6YBnF7M7tjPPq1HwRyLOL73JPDA==
37 | """
38 | 
39 | [files."warcat-0.2.0-macos-x86_64.tgz"]
40 | sha256 = "9c406ba6eec2236b027a94508dbce7d939e65fa6d875f1e029c5e5b86596cf45"
41 | sha512 = "db6c2e4fa96c7b5a39a73494abf6475d1de05c14a8ed724fb308c8143ef920e34f63b6b2599e54e7bb0bf473367a6c86728849f391c90db699bc03b8c2ce6f3a"
42 | blake2b = "15c8a1afa6f01080d6205a5051e46afd5c79cba75a6dc8d58aaf188198d6f8faa75f1d44a5eedbc54e2c590505b90341ba9144bed245280a44ee91e8e7d2fdab"
43 | blake3 = "58251a94f9a271e6d67acc992057c61cf1bbfdacc7114b73f7eb17b9dc8498a7"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mW84VqjJwHHdbkR5E6gFlRLN451oeHVmLM6ztIJYEWpCrBAEWKZtOHhxqXyviJcPEVm+6f+yD1segt7PUy3/eEAc=
47 | trusted comment: timestamp:1728978390
48 | g5bsEHQ6/jUJITQjDh+xXdAN/tYSdpvdO2iw295bGpmKOuNa/LGTC3ZHu3mtD5Kt4YgRutbRuZxUfBs7uYh5BA==
49 | """
50 | 
51 | [files."warcat-0.2.0-windows-aarch64.zip"]
52 | sha256 = "34874c0cbf10fd55d056757f24d0e869c36c23411e7c405384b5f431af6b9237"
53 | sha512 = "4625debf653147c9959bf0941976191dde3a42610a936482f7dddda06d82b0b3bdebb45ae2ccea141a77b0c08f619bc212a9065359c0a49f6d4c9a0401cdd92d"
54 | blake2b = "bcca9e0f6d461bdb2c6d12b77d6fc4e58d3b568ed14f8572c4886e55f53d8fe1108f5411c6120113fe18e1c24285fab24bde3ea7f160f0ceeca4e24d6dc3adf7"
55 | blake3 = "c4d25b8613fe43f327755111dfad4499ace3fa7e35a89d714ef48005fdc3114b"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mW+jGbbjGXQumuC+jOplUJVE8knjlrOwbgcR8q9PTcVfKdep1Q6srtm/c+/jsL9TqdDyUUNqi92VpoVvEALIctwI=
59 | trusted comment: timestamp:1728978390
60 | tSggzsS8js0vtgosl2uON77MAyjNH56twBPHu8wcdO5dkMoIAGHl5Co2/H/L0OtyEhZn/7r/b2Xa4+72CC6RBw==
61 | """
62 | 
63 | [files."warcat-0.2.0-windows-x86_64.zip"]
64 | sha256 = "c252580a57da42641154da3131110aabd3c79462d8499f0bbb4407af067c1861"
65 | sha512 = "c356afdf787bbe6b8fea1c1d6f947246c120ceb4677bfb1e2c5ec3113b09d46ef2f81436980c9e41f150a33597f63d9c5d5ea030de311fa5e9b9e0149fd37567"
66 | blake2b = "6b9df7ec2a4ccae4e4c32196ffbbc73ffcd45b920cee08b29950438a4979b0563367c5ef3e15fc7fc5d9c942af2b19fc40706ae58b405bdc21af9ecfa53f221a"
67 | blake3 = "ebebe4cc439cd7b5108141166beeb9b60f43515785cf4ac97662a4c64658308e"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW7qI255kp3l9RUtgZVij+hrAbE8cHBa0sMI2CqUvyNfOZjEnaGUPfl8mP3bFnQvBgxY2GP/ysdDAp5f7SvcwCQo=
71 | trusted comment: timestamp:1728978390
72 | Onrfl4wIUJox3iAC/MTf274MQwCQcvncaOKguGJD6yQJoDb55G1jQ/cODWkTtki3x/VjTAov18hoTZDiOl/aDQ==
73 | """


--------------------------------------------------------------------------------
/misc/release_digests/v0.3.0.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.3.0-linux-aarch64.tar.gz"]
 4 | sha256 = "5a061616dafef0df239e43ad3491977b628febb99e44066c2ad6b0a9cd92e42d"
 5 | sha512 = "5919377ef028d5d22a5a97f39bc51e744a81406f46b0645e487da0e8ee327f64c0e8630c83b0dd450c27c264649af40d7f7b8605fe0b1a1f44e3b92a6a77f5cc"
 6 | blake2b = "dd3c2c4a386266a53a0e7a147f09243baa5c6786a8321e6bb2af98cc9a850f57438af704d7a4ccccaaadb206f0bb9dc37bc2aa710e94a76bcd1d6bd2a9e5a3b8"
 7 | blake3 = "a34c3d419b72b7fd75cce5f8a1c6b99cca56eb88e4d68d4d1ccf7e3b8d15930b"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mW2glP/mfzY1+yqGzqIB7Y2DHkGvz4R/5YBXaia4co6/53I66bwHHqVGvRfVzSwsBn8L481+zzpg76fYIb2RAgg4=
11 | trusted comment: timestamp:1729508179
12 | XV7DRudNouqe38es3cSMbc1DB2VN0yJyuUImfkoq7nfYsRo+2KcOv0BAEFhCBdjUYExbeG35FFaQxj3e4bWwDQ==
13 | """
14 | 
15 | [files."warcat-0.3.0-linux-x86_64.tar.gz"]
16 | sha256 = "36b2bfe4fe633f81499fb7bd33ad78f155852bc062bc5c4253d724b42670f4fc"
17 | sha512 = "8a4a577cca44a7e6085a1df5d2b4bb2cede0e35cc4ac9a336077aa21bb69944f6b8f2864625d7dd2d871268ed12e60f5c4d329ba4acf4360d01152f02371b28d"
18 | blake2b = "1b70ce138eb72c218ddfce7d092b00d478392af4d764a1e829b209bc65b2fe393d0eede5a70008f19428d4dd4f6d4c5f0ae31df1c535fcb065255a94f98c5cde"
19 | blake3 = "94ff9651028d0c16f2007ce8e99e3decef6d0e37dd1559e2db8ff563761714cc"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mWyNjnkNrJiXja0r5B8EZJ+heYwWe1Gy4++UmZT0LdsngzLIP0P9x1o4i4ag0vvp40x8z8QQu85yc4va4iTWxygY=
23 | trusted comment: timestamp:1729508179
24 | haeqbaf44hBuIoBXiJ0L1/Nq4l/GMJBOdm7W8WvDBDJBHLDE+KKYRp3PmbUcVOZLz5/+Yr7lLeIoZzBL53PmCQ==
25 | """
26 | 
27 | [files."warcat-0.3.0-macos-aarch64.tgz"]
28 | sha256 = "f4af75a06c7b69512322769d95e7494a332ee0d566f204736c2d647d79fd8ebc"
29 | sha512 = "1b47bb338c58afe34f2bdfad6aae8d037a582aeea7d2c844d3e886f617f12a1eeb4938a5ad2833e99a2b8150a00041b209015585a04ed14f9b9950ebd8e8e04a"
30 | blake2b = "781e7180b2dc2020e101ce01a7e90149f928ec4672a8ac53f35fa5395a10b8d704ee5c389c0a8d08818dd591031e5097b434dd930f07d120281de0405877111f"
31 | blake3 = "0924084bb9f4644c78db377d69c2a4fa7e8d4c54e00a894d86eefa7b30086901"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mW7tMZgq1+NCPhnqab83FH1mWz/WuVwghKEF3SpPilmTQGqCKV8WkGN6Cg60f1uGKFmlxXD046ipuHXxse6wMKQs=
35 | trusted comment: timestamp:1729508179
36 | Vp8d/VcJhingkgc0/djgmg2gFsGvTUduk0N1bTj7N73vrpzrbcmVG5aXTjm9VcQ8zUOpc0Vk2QDRe+Ww23+JCA==
37 | """
38 | 
39 | [files."warcat-0.3.0-macos-x86_64.tgz"]
40 | sha256 = "a15d30f4cbdd2394a1218d2692008077cd5754d6040b2adf8081311770ec9800"
41 | sha512 = "fc1125bab186bf8cacc7acdb0f31d69e1aa6bd92b2aeae5d9210cd1b5a9a613997c119d62d39330d12b3bf49a74082e533673bc0aa198520a370d4551ba85792"
42 | blake2b = "17a33aec8132c7c9c32cc5988d883b797daec0795cc6845e9c96ca7331cc41e383040d0125baffc6d4f3a80d1f300f577f787dc83232056a327dba63935d7fb3"
43 | blake3 = "754aa921560f8b6d38b5f6def654ea32e4c353ddc6265183a283f28365b3fc2f"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mW450mR2d1ariE5pq3OjeVHDCUfNEhNHGi9PlEmEnkAwNI/H8bp8jEUAzID0HjG9tJiMwHrhD2IbSjvMUq/VEuwo=
47 | trusted comment: timestamp:1729508179
48 | cIRQh5nVXse9ROtdr91x5Ti907P4/0jGW49gKqy2qzO3EQmL/AH5+KxjwDW62B2QyijlFeYS/YcqD5xWO9mRAQ==
49 | """
50 | 
51 | [files."warcat-0.3.0-windows-aarch64.zip"]
52 | sha256 = "90874e7fef402150386f6fe068cd1412031e6a6b9bbe65b264ba763870e36dad"
53 | sha512 = "22aeca4a8a2040c6277d2148719fc95943248eef315e8a32a989c6a3f01b5e4dd6ecd07c5eebd7dc3a6325e75c2082ce68d48dd82c781eebdac05eb113e03fa6"
54 | blake2b = "43f12064eb728d7ff628fdec5948a81b2c31fcf4203ba0ac9b0f03596e90ff1463519e63ae49ccc03516da7227296b7e0e82793b118ed41b2a1d4daed7f1331f"
55 | blake3 = "83d1c2c9cdfdf40b5eccd8b05cc8a2a89bed6cf94a759cb05f9fa4ff7f12aa69"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mW9GGFWkc1EiPU2ANVwZiUwGd8LY2t0qmmdsFR1RMBmcgi4OYwoPb+c5gcNXXc94YS1+0/28hG2rHQnZgwCv3ygQ=
59 | trusted comment: timestamp:1729508179
60 | KPrCjs0/iGkXvkGssn+kE1dHs856xMxOQNmiWJjMIt0ZLcbiA1oGP5LKUPhCXpSvIlAP04MtozM7smDBXRj3AA==
61 | """
62 | 
63 | [files."warcat-0.3.0-windows-x86_64.zip"]
64 | sha256 = "caa918d8f5e34ba391c18a2b66251c11c41002585e3656b8d3e2a12d0a707284"
65 | sha512 = "7a485a55dad0f08bbf297ea2815894d0127749e6e6142ed63616868ff72c2db4b7d3105faef60c675fa04cc339f6b577dfc876efdb7ce7ee434978de1a2c4b88"
66 | blake2b = "e021289784d23170ebe38a81009c5233c25aff9c033451bbf3d14456f4c5f59752f248a7eb36756073fddab25d51ddb5eb25eee35fb810b2d464d273b9598a97"
67 | blake3 = "ede997240902ce46718992d7e67e55e82aec914b06cbcf3545cf4ff30e6fb8c3"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW0RLXxdrRIh+kzv7RBhJTo+rlik2gzZotkEbeJ6VyhtbJwQNN9MJZ2W6wwkwIjPShAgmWFHmfALU/BSIeSp+tgI=
71 | trusted comment: timestamp:1729508179
72 | 6zUwlz/SgmCDtnNBywJmQNP9BGsle3sdTUze/m+eWFEGh0Hdbruz/HzpjIh2eBriIZ2ND5OVp06yr1AYfY72Cw==
73 | """
74 | 


--------------------------------------------------------------------------------
/misc/release_digests/v0.3.1.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.3.1-linux-aarch64.tar.gz"]
 4 | sha256 = "db7a498f3b5461635cdccbcd3795265d6fedb2f3c983238ae06f3f87f56197bb"
 5 | sha512 = "3e99f7f990d15893097adee12d76125531ca588bc2c7ca5c66888a0110b79f6fa200fa56acd2c32873a57ff70f6d2220c41e64652fedf054db3a21f79bab185a"
 6 | blake2b = "4839a4e79ac340ffbf0b8b35cac96a81f50d0aa8e965f9c4fda313f5a1e5e6792717fe77062006c4ec397dbc7c72de4812b8c2463789d5137faf1a36d76731dd"
 7 | blake3 = "af2c260c6cc53a6fcfcb3576638fb64f07ab9f3703483f99161cd585dd57d731"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mWzZVBWQl1OOrKdlYTA7gB67II9mvwGwRmU1EEqF6749uGehC1mrW48VTGmug0us0pEA7Do8QKKUHwN4Y/wDEpQk=
11 | trusted comment: timestamp:1729611596
12 | Z8sBZmCHU7xQPdZx/rNkOIxFJ4tKpBxXaXJF6yiQbDp36BBeHOy06AXIWIr+mDpb7o2icqT0nHk61aqGE1+jBA==
13 | """
14 | 
15 | [files."warcat-0.3.1-linux-x86_64.tar.gz"]
16 | sha256 = "69bf2e09b86315ddbbc9fc5873584e31b9a7d98c5238ea53b9a753a741722898"
17 | sha512 = "834d03b8f7db79bba997ad02e4aea2472f4676aff2ad683847bf80f7f1f45ebfb9d0aa08c437bb0a98fe149c0d7e342e6b6edb0ba356bc75e62f6f0dce707e4b"
18 | blake2b = "1e6e70c3b98a5460301f0269c8d91b1bca98b6bd6bfdce6fd1973e4413778f18e8c4a2db272e26327a6b80b383fa54d8a33b07a496988e654d59c3c02e95bbfd"
19 | blake3 = "24c4643c08f23e60e9a298e6dcb05707c53f62019e4b0eb146c07accf4926fa7"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mWwnCvGPbYJ0BZHfi9kiQBz3Vp709+0BJrknH4qVD1BXT8rdEOHRLyUS8oPcELlDzXpYkomBxr2sSo73AS1J9PAg=
23 | trusted comment: timestamp:1729611596
24 | O6CWwlSUolDNkDqW3YNH1qjyRXh3wX6etbA6mLawusaRFYUix5feQ+xUFX98CPhiQp79nJs5sU9jioOFDqanBQ==
25 | """
26 | 
27 | [files."warcat-0.3.1-macos-aarch64.tgz"]
28 | sha256 = "a51fe97bcbd01ac7354b47296d2acd9c382913699275e9a7238ddf9c1a17baf2"
29 | sha512 = "03f616f16733a0c353c42f17cacc03b167bacffdf1f32f19791131214f674c15bb8bfc8a2d9c103c0f19b1f1788eff184b0413182bb250101fd7bc1597f3314f"
30 | blake2b = "1b7a52d0cd2249c18838884b64c6ee9fb855efcd5d1effebc03abad66f12afb043a970406d9fcd90aa1d23dc00622a85a60e6e8a194d98df76e84610e1335349"
31 | blake3 = "64fb8498b10be5b2bc2bba8227c0a1973227a9a75ce78b5c0641036eb7d5ffa5"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mW18BMOk1Fs8+SvIPFcivrGBURRQZtD74np8nsV0SME/jie+Pov3VCxT96qE4tsCn5A2TDvVyCrxKoJMBubHBHw8=
35 | trusted comment: timestamp:1729611596
36 | JS8RGbYhCvR0ZEYRqUImfwbZOgrwH7u9rX4eqDAdOXdPMbQn7CzkAgfVELx6V5VbY/8UhLqx+FYHYGcYGoBFCQ==
37 | """
38 | 
39 | [files."warcat-0.3.1-macos-x86_64.tgz"]
40 | sha256 = "16876e9fff07b11a22571c84e61e7baee33ee99d7754e4750e49c65d5aba5ed1"
41 | sha512 = "0f233dd84c4171b1e6c380bb0b9c0e620d0d088857e5c55b0df9398e0e9f2104a5e3fffde5025e0ffca598574f2a8c8e023a5bc120b4283262b25b1728d79345"
42 | blake2b = "9b7c30e32f6f3383d919963db2d8475cadd5d035bfc266b83b7a19a2be6524b22ad5b01d61745063e7c82ac24bcfd954db6ea0e3e4889c0ce1144a2b568f9e69"
43 | blake3 = "8001d0074b47d6e8ebd91835236d0ff9ad747097db37dcf96fdff3f0142b2dee"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mWx2g3o9n/EajWgIoC5Fzaza0AuTMjEEE3sIjdY7E43jpOZAcsdbHr3UGn8yEjQ5dVYWivcndaRuuTOW8aPeRZQg=
47 | trusted comment: timestamp:1729611597
48 | zPSPGcF8EZF7da93HoJJnqgnhNy2O4pWa3cgXu39CPSqGwmPR7XQS1XVGo5DMUBfP7ZHLRquIZx1Vo0W4GaLAA==
49 | """
50 | 
51 | [files."warcat-0.3.1-windows-aarch64.zip"]
52 | sha256 = "07dd870555eea8729e0fda030920cedf8319c0b10b50996f09d2fdddca284265"
53 | sha512 = "8c770f83696a78780fab9bacb43dfa7450a24ca49229c0f911bd4ad9ab77f19716f400f4421d51e9b95c8e22f762bafdecca2494a0eec2b7ed62125ae0242fda"
54 | blake2b = "a29402e987f6f5e43f35b76ef3847288a9ac585050f6a40f29ffe559a749b237f5909d680d0e5f3031ef29fa368fa698c7f2fd69f731d253bc145b55a64bb275"
55 | blake3 = "5fd2e69589836a883bda842c14323e517823ad724f77fd87739ee127f8834ccd"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mW5/4HT0kLpr8uLMXE3EGWTwgwg0I+ZIW9TmTAIBuX8A0MJ/zULP9fAh6PbczBXpj1XboieJeqqCqRD6G5L27SgM=
59 | trusted comment: timestamp:1729611597
60 | 6SnhbKWz1YtYsHcQuEdzZI/Ed5Mqx1vuBjV+DbL/+JCB9NAbha/U50wwdXSEmwJzxX7QXxzBOOaaeAG9drONDw==
61 | """
62 | 
63 | [files."warcat-0.3.1-windows-x86_64.zip"]
64 | sha256 = "a3e8919292e23761565685dcccdd65b5a7c9a564593e21cc1f2e10ab6c5c8931"
65 | sha512 = "dbdf820d6155c9d8b15829a86aa9e3870e4c2fa57d7b9df08acfa5feb46914a9853bcd78bfb189c10373c4b624ba301af52b89573813e25517701047287885c7"
66 | blake2b = "2fc21d8f8fb4d7096d9c9cddd1860f2adb53a2cb9eb1d48c4d19719b33b8e8d39c09a32b130ced07f06cfd8f5dd6fd6590d95276bd54c14383faa7e3f3397774"
67 | blake3 = "2d405777001aeea60acee3f4da0db886cb4f4432a0649e5e8b649e610d846d1a"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW0fyddxLCdU/iQ+G04Bk9Fa2arbnT304Xi1NLnoTzsIrfCWs12pHsn5HB3bLddt4edsRpV/30apsnvn2ezaKGQ4=
71 | trusted comment: timestamp:1729611597
72 | /YfdCn/V2jqbTSQu09znhgF5T1CuRTcTs+h5p3Mark2lVQokebEICb+mv/Pe/2xSLwRlRoaa/4O4gFqgUWNcCA==
73 | """


--------------------------------------------------------------------------------
/misc/release_digests/v0.3.2.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.3.2-linux-aarch64.tar.gz"]
 4 | sha256 = "2c4c8b7e84bf60cc5b21f01ab4d2e575e5b246989ddcce77b1cc94215a1e20e6"
 5 | sha512 = "c88a93cd43af142f9af47ee399d70a78e9bcad1d2d64b71379adb2a8a9b4c4ed231b7df13b19a6591cda4df5ee45683fea042e2b342633fa04874fcc57de664f"
 6 | blake2b = "28a4a0788d9179678f9283fd98597f4d2c4284e574f11129c785bd5ffc5b9db8a0077ab4ab1ef7df0197801e54885e89ccc6628f655bba27cd7ec61c444b5f47"
 7 | blake3 = "7995c640649198c9ae3e549ba21fc038d7e95d08ffbdac46273b786fb3292c0f"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mWxNyyySj7+Mq6LP2ZuTZC3FgYeNbjqokuzpda68tJP5Oy8ryDYWv9VHKiFfNwwVzugpxJQsEqRSNYzw0P78XSgc=
11 | trusted comment: timestamp:1731620824
12 | TZ3C3Tdif4Ba9LbG2rRulzgkawXn4ZeF6p6wnxJNqpxvIpQ/RexQsmEpqQibcFHG8rebVSbNm3y9hevS0+WHBg==
13 | """
14 | 
15 | [files."warcat-0.3.2-linux-x86_64.tar.gz"]
16 | sha256 = "2543a8789dacd8ec5670146edab1682fa530f3c643e617ba6dc9200a056fc8b3"
17 | sha512 = "d52f8eba7d8867519b63ed1edc8469fb4c8fcce40c0188c3b789e5e5fdf5c8032a3864a2b9e9f57e358d8971f61105da506588f09c87f809bdb9bb09aa0dce6d"
18 | blake2b = "9340154e85e40a8abe66dca1e42fa9b72a4ff3d22d5304bafcf5d24848d8fb6dab670dbb02947b02379bc32fd7241a9c8c14b465ee046c46276dfa034c2d1306"
19 | blake3 = "39095b5bf23bef0bac3e87fca6f43210e4891e23a95fe6c6335422b8cab9effb"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mW1qFZ8sJkVXGtFrKH2Fokha1EPAxGpTfXdftysmJ/EkCcr6l+HETqxytgszRAAxWKd/ylaSzhKmfj5SuTEl5HAc=
23 | trusted comment: timestamp:1731620824
24 | 9vHqEh5ZmcAgEWUXgpnJ/FTvft9fdY1stzQBxYps3CLPOXY9AkjZXXAJK0xQsgNuci5QD5DTNGPFhXtSvLYFCQ==
25 | """
26 | 
27 | [files."warcat-0.3.2-macos-aarch64.tgz"]
28 | sha256 = "6ed9822215a289ed3056dcd57049937b667bd77ee5ae772f566bdb66ecc42258"
29 | sha512 = "8da71adfddeaefd539305becd1d7cf01290cdd00974f4385c336b4299030fbe5e919ac25c0349c34c61e9af19508a1d9536ff0128db06f1ceb58f5e678e5aae2"
30 | blake2b = "036a68788cfc524cb2a32528e2bdc78197af71fbe98cb5e00b002131981b976d09a2d0c30963fd43ee1ef742f05b2da51d94ccac167c4d54ed9a2b718b350675"
31 | blake3 = "2632fc21455140de891c9f514ba3674d6cda382add1bf5875d556e5234d16f5c"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mW+QYr40RvEFjBtkMI1rFJnt7A38M57OR7iVG9/GDsv6eBU2HMMnBFWtv+efmgMtnFr2cmoOhouRbe8w+qgrqOQA=
35 | trusted comment: timestamp:1731620824
36 | 3VMy/L/+2aH2NP4F317DbCtb0uqRVwRNS1Ues1IqHNTlE2wMQgv3v3H++zZTcxcibUvwAmtLUCHIZkVy+YuIDQ==
37 | """
38 | 
39 | [files."warcat-0.3.2-macos-x86_64.tgz"]
40 | sha256 = "f336bd5cc4ed383e278bd1c1b0ce16db55df1c7a4f57b2bca90ed2ae02cb26e1"
41 | sha512 = "534954200c095769ddeee520106795a6e0901f3272a6672bc34b34f789e38c731dfb5f010b59b49099fc6e26e0babf9c689830d9851b61b22a4d9afa252f2862"
42 | blake2b = "35fbae98443191ef40a1325be6dfb2cac54fc757b664943e259b2cfc14b37747844b44bf238d58332836d5c7ad9374972883e3ec012c568c776d32c9958a90f5"
43 | blake3 = "430a0a1ec11f6d06e5f8487958abbd08f7c2c2b1a5831a02748064a3e25d10d5"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mWxRPIJtm1LU6LpHKSU70bV04RGHKP4t+iaUDaq8s2LPseh0LEt1xmbLQFevp2Td7J7/O8w+iWDgDYasDDV4RHwQ=
47 | trusted comment: timestamp:1731620824
48 | VmzK6tVL4m9514wNj/nQM+kCsXVda4JDt2pdv4rU/VGN8VJ7ZTnVInwYM8G2/F11jdDoXykczAM57ZtvaWb2BQ==
49 | """
50 | 
51 | [files."warcat-0.3.2-windows-aarch64.zip"]
52 | sha256 = "e3b86877b8016ce31732797940476c124b5f8caa4cc05ee9ee08dbf5eaa18606"
53 | sha512 = "8b5244afc7f7da9000c735c38c779743215089ca5d862bc96a811346ca9b5f5e8d70ffe44cc7793dc56d6ba34213f7f25bacf8f338c89524c0ae4c74beb91d15"
54 | blake2b = "1bafd9e07ce3470b28ff49d91ea0b2b7b42bb0ad725c62bba195b0270c4bdb88ec33d488882af6f84ffb95c29e8992d7ecd7e64a9b11f433e25d3bbfc305f247"
55 | blake3 = "691ccf6eed798b98b644b185abfee281c3d73ed6da88e908d29f3e03bf440ca4"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mW9IWe4OsvpgFAqlfQbgkEW3wq7qm10f2RlaAgBdtzdhH5y5OSWX18h5t1/eFYUvsc7EJkJr+6evHcSbXCSZmGAM=
59 | trusted comment: timestamp:1731620824
60 | or69yv7Uv1iSZGN5ISztpNDM4F+UgljqaGNzOB+cJEyzIDoYWFo1zzEeIKHHubc8RHyzQz032Bz9V2vxthZJBg==
61 | """
62 | 
63 | [files."warcat-0.3.2-windows-x86_64.zip"]
64 | sha256 = "86e8b7bbfe1117dc890c2caca90d63efab41e682f47b9c81eb8a2aadf8fbe62e"
65 | sha512 = "643f95208f3ee476ee0180cdcfd690947500e390c0a0a7f8a33ee103fc5ee5d89090d71d4787b027951d664cb8470dced9912c131cc9b8a22263afeaf55c28b5"
66 | blake2b = "520d566b89669f785e5c49b15b977bb2cfb1acb726952f48e1b4e04729823f2a4a4557b5a9c09964ab065f7e919d8fafefb3669533901cdd08574e682f68e805"
67 | blake3 = "c81cc5136d2362a4c7b2f0fa04134c21d935b36ed0f7ddb0408d6c3ec16ae00e"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW60n6KlQrFdn1b4ue3p7DbA3JoD1Q1c8akvHSmu0RBQ95rOECCUNcTQXcx/2UM1msyR3OwO2FluXKZ5DG9NRzwU=
71 | trusted comment: timestamp:1731620824
72 | 9iQS2QD7+JCy1ODFhaP9gnVtPqq/yB3Ubw0QFlyPU/kEEz2pax8ewJBha4nz+Hma/5jgePvYd9JuIttN1ZM6DQ==
73 | """
74 | 


--------------------------------------------------------------------------------
/misc/release_digests/v0.3.3.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.3.3-linux-aarch64.tar.gz"]
 4 | sha256 = "e4bb918467fbb7fc4f4dbeda8faceee76a71bc3b45e516665d3d0b1138fb7974"
 5 | sha512 = "4d95f9bee1690a01005758fc1bf3e222817ecbc39d29ab6e3cd54be2763ebb5669b5cbea72236c4bbe74039353643f51e156d6e3e0e45cc174cc67cbd402c809"
 6 | blake2b = "b9b3f075b2f045d9cfb7b5f7a0cc7bc25263522190347fdcec9f5661be04eda68144627d1ba938fc9148a19835471eee303b726a88a31679cfbd705665aecb24"
 7 | blake3 = "b348d964e743cdcf82f455dac7457ea0b25166f70e352463d1252c32958d50a2"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mW42veK9lQEUVufcJ80g6DJ0hKRVGIM9hQ8KDjEaUMgwKGH15xezLGlxEEBlJwN1llhyZr35N09LYB4aaTskhLg0=
11 | trusted comment: timestamp:1748237016
12 | g59H2OHzSY5Le0tsvcEFlwz3X8NPDXovxtx19k3Er3Q0/OTUavrgHCNRYokpRPXcgJWMUSq6ajXW/298PT9FCQ==
13 | """
14 | 
15 | [files."warcat-0.3.3-linux-x86_64.tar.gz"]
16 | sha256 = "3005a5becb621e067e832a89c2c43301cd8dfd79549b0503b5370ad801a6cc5a"
17 | sha512 = "41a919b6e2e5f7c310adbaeb6809182eac4fbd424bba5631c11ba3eb5957baa3a8f57e55e05783bda84c9a22097a041079f522dc0beb768e8ddfe3531b6d3306"
18 | blake2b = "b9830c77ad4f66315eae0cc7695de628d10b8a7afc1e7e5eedc384bff8c0f62a7d5929e27877e61a329bab3ed998381efa2193d77fc5bd4df959078b0c5e0ad4"
19 | blake3 = "d0f5d1df6651dc117396f6a4ba784ca14a4e19d581794a0a3b665faf1c9d47ab"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mWw1spmlvisGpn3L9Nnohx9jPvIqA+K1iZ+NXyMdNOvU/VrOFhfG3+mR8o/+mxbmKgpcqWUyZxZ4dCDOIx2AZRQ8=
23 | trusted comment: timestamp:1748237016
24 | lSa5nNRCGUuhnvzm1TzwcpriJ9fiJIpJSL01M2h1pHcFONKOjiaanDbLKiE4tWopvVf6fyHDJShOogoh3kaqBQ==
25 | """
26 | 
27 | [files."warcat-0.3.3-macos-aarch64.tgz"]
28 | sha256 = "deeec9cc99284da3fbc5c783e7ad4cbc5d2382807163505d29748d8a99f22cf9"
29 | sha512 = "f4f61dab95136eee19dd21076efc0138395e8163a2c72546f28b8358bd18a7c3af06a0573d89a15dee2afb08d1148586d425984873033938d59f7a377ae02f23"
30 | blake2b = "e9b254bee7cf427a639bb5784a97ab6e3a86be03ab17a097d641a9cc69deb1155059c646ffc54d2be87ee5f82d441def5330f6631e5fe85a46565ccdf14483a7"
31 | blake3 = "4d7d1b6e60e6755f4087574fe039b351a65a85c1963b6cc1718ec9b56e33bb8a"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mW/EdcNltIyNGZtcBirOt6cONSeLXE39k91Whv1MKi2mple7g1C/qfgh+oXRLMeStbkbNcpFN+iYBRNOqT7DWjQQ=
35 | trusted comment: timestamp:1748237016
36 | TfKGdTrjdc/UqIOSYhWQVWE8ap2FddLEe7StfumHuz2jtuwQYp3wvAWHoROQFMCzzhBDWIPGRIvRMEUdLMh9AA==
37 | """
38 | 
39 | [files."warcat-0.3.3-macos-x86_64.tgz"]
40 | sha256 = "768fca5de72c20be9747cf2fc31bf52a55391ebbf69b4940420e43bd2dee9ad3"
41 | sha512 = "ec194aabcb99a42992a2179ecefa79e2c797e31e4d0154be4218eb6216ecaedc2738ce16a32e03b400d5e61eed536f900a9654e7ba334c7cbbf9713f0ac60445"
42 | blake2b = "064949dd1910be51ae98af6b6cecd0586042ac978c9d81234e28f27003d1e7251e5fb38e95efe576d51e1ec8903f7f370db4faaac8151a0cbccea5d9ecc4881a"
43 | blake3 = "b3391e624d9ca6bf02da3b7bbbfcaa55f499ed1683e4375b8ea294a418048761"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mW4XhetPbXij4ndIOP8NAlrfpPbI/AMGkLm39eiP6c+q8p9slNLCIud+CI/ByuowKHYZUX/t/EwBQicUMCavKewE=
47 | trusted comment: timestamp:1748237016
48 | XZySMItAunjsSGIH8lZDp+jU9NSAVeG5IS0u0xIDtVRO1BsQWrcmo+UHaq+5k9vQQtL4pwfxQkRcy/G/PgQcBQ==
49 | """
50 | 
51 | [files."warcat-0.3.3-windows-aarch64.zip"]
52 | sha256 = "e9039a69bb77b9c34ddc8225f2f6dee30c26d879030e06ea8350bece89cfd628"
53 | sha512 = "f5c31dce3adde00ce236bfda17e2e79d831345c445e570eb74e621dfeb9c9b35b881a7701ad7d61395d546bef69b1b59944b48fbdb8d6b53894c8fd27c55fd8d"
54 | blake2b = "4f8f08f578e7f1c0243d575f8ed6abc0381f7f0b45d0f6d05f8d49f93639aff8d6d948306f8c71f7b0929d2c2b33a6a9d62d52299ed2f92fd4e0ad60df006e56"
55 | blake3 = "32bd372553be0b02379e3f094330c956c3181bb5b444937a6f250bb8d4c43946"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mWwUIIRhscwmfhC99vERX0e28Wr61RNFkFZ3On1eS/z2O9JYpU/8hMc8wJTwwBKlGXsjJl5yPZgndlHJ15gT5Dgc=
59 | trusted comment: timestamp:1748237016
60 | Eb6EYoVRSo0y0I83myjT9a/cYNHNTTZaeGvK3s5ih3PaOq5uUv1GHi9d3GfO5g/ItvxGBrRmdX4FzHe+U4MVDA==
61 | """
62 | 
63 | [files."warcat-0.3.3-windows-x86_64.zip"]
64 | sha256 = "803eaabd2eca7e33b25899e5f55b9fc5481869dcf8f74c0bf64a11fde34ce616"
65 | sha512 = "1fdce43d39f2a191f7dcd49681f8bb11bd2fb33fbef862ce3063080ce511f17c95785178e5bb68d7c496b841ed4f5ff6500fb2770e9a9df4185ddd1eea46e146"
66 | blake2b = "2d4301c701118a74b508a4f1a52add5df5f6e78ffcaa5d8f3f3e2da917ca098062ce3b48d8f1216464f338b6029ab216a3245c75061ab727c49aadab4a555ea8"
67 | blake3 = "5175dbc197c88f0fae260e569931053c8c7a92279cec1b9c5d77e3e023979f19"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW2odmVvqLm9p8mMFYmklcyRaj4/R2uaZ8Brzs/HWSxrDyWuq3uIgXJGt/0okbhrxq5ewlDVjm+asiYybhHQ68QE=
71 | trusted comment: timestamp:1748237016
72 | EdHxwfiYcZlif+/PYbx/ksFLeXFjjhsdrJ+hEsdAFfJIlGzVL1O+XU0VWFn63RtZuKKd7T5Izh7KqlfKdy+WCw==
73 | """
74 | 


--------------------------------------------------------------------------------
/misc/release_digests/v0.3.4.toml:
--------------------------------------------------------------------------------
 1 | [files]
 2 | 
 3 | [files."warcat-0.3.4-linux-aarch64.tar.gz"]
 4 | sha256 = "16ee0a729a91dad7728d12791da0153500697659bf7bb2078a09b7674d19b64b"
 5 | sha512 = "58a2653a395933931ae52edcff53f094a71920a8d67866b1bda34fc58f51acee2cb96209cb15878704a714e73b984a9e2b217350daa1c291c6372a4952a764fe"
 6 | blake2b = "5f536df37e8db1509df1883b26d87ab538b015f63047c320d90fd79bca535c94824ecee7e73fd662ed2a5f60ab544bf9eb3dfec06e8c3a2c9da45525459b7ae9"
 7 | blake3 = "d4c023ee3af169c8d9833133a2f7a3197f5bc47b99035b5bb29606b078d22057"
 8 | minisign = """
 9 | untrusted comment: signature from rsign secret key
10 | RUQuQKHwtF7mW3PWRDCS7M3DQx3rEb4Brwz+v9SNtqX3zcPWmPEdWUeXldtLPeTlJIpSIHj67hH2doJ+vpszm9dJ4oEBx0Ol+wc=
11 | trusted comment: timestamp:1748855122
12 | xmbrMbSMuzjFM/9py4/66p00rniUyW9mbghgpbW9l4iR/lcuWcJ6t0rYkDvfrFB5aQ3GE7E72sN8c/98RG2kBQ==
13 | """
14 | 
15 | [files."warcat-0.3.4-linux-x86_64.tar.gz"]
16 | sha256 = "97435b1337d4edcd4fcb803ebebc4744df39224cb69610095869cc2dc3e69ea8"
17 | sha512 = "e7ae2d5cf271ecefddd31bf3e4d12f8f2f0a901744588afc45da107d3e1d1118871b16c05515b11aabf60334b0e9c5f376578c8d9770e1cf953037c820a14ea8"
18 | blake2b = "28e1b468cb84e0d491ab94e530bb50af86ef942afecfe0d521601a1f40c619ced5e0d652135894ecdae369719f17436ef654c8565151cef45742bfa49d04ecc2"
19 | blake3 = "0684948757b867cf78bd0f68efaf8557ee2592ce8e5dc0b5b50247370fb4591f"
20 | minisign = """
21 | untrusted comment: signature from rsign secret key
22 | RUQuQKHwtF7mW+ITks3Q2UmG+lQlPEzhTpVLAh3TFqQYEzQD99sbnl5J+K1lrxEoW/4tVovB82+dddn4CnwXvYwGjIMryyM2YAk=
23 | trusted comment: timestamp:1748855122
24 | ON8zlyt/9j7R6X20Q4TCDj697p/SW+6sUgZThHyLFTiW25VNOg+uwpkngqKOBeMiKNCufN9lCtnQscl5kDdHAQ==
25 | """
26 | 
27 | [files."warcat-0.3.4-macos-aarch64.tgz"]
28 | sha256 = "9032e55d7521b246c4a6b2bbf2f5a3b997a9d523b18cb327e4c01e81083214ef"
29 | sha512 = "8965fc38923f25c37b1e23897bf690d5c7d37bd45a5e6c9ce9374c65dc8ce7ff31c7375556c3c6d4038d92807aa30f2c385cb1d794264ffcbf2ad9e3ea3b8156"
30 | blake2b = "5d9d32c8449dae2cf28605e51ea6dc6ffcd5401df0e08e50dd94c3da51d736d7885032d6b6698a638154c5ca3290fcb39147d91e9174c33c53614465cecae963"
31 | blake3 = "f0ae34eb36b95f8f489ccb36c979a809440ddb7a5b5c5704a8a6cf1f9c9a3a88"
32 | minisign = """
33 | untrusted comment: signature from rsign secret key
34 | RUQuQKHwtF7mWzO/7YgeELilPKSSmy+FnQWuQIzS4W3PvWEUcViUZ2Yeuz1zLLX3CEf5X1OoknVFIOzu0VfwCa5bynnu8gQAggQ=
35 | trusted comment: timestamp:1748855122
36 | OFv7JdniPRQXZAJoNBPzyxub5mUFR3VR/4zcL5kFth9lApUP81lPCLI0ndaUVjFepRvqWyvoP+m0KEdGMZmzBA==
37 | """
38 | 
39 | [files."warcat-0.3.4-macos-x86_64.tgz"]
40 | sha256 = "67f1d7937a9d01255923e6ee2c52af6410638ad2aab9133ace9a57248d0183c7"
41 | sha512 = "7da6a090bea2d5c5eba3173fe814ab34b588051b63de037c71495dfd430a79c4da306366c11cf4a7b065ae3a00e1eb264d696a210291c5db0306c10168b04bd3"
42 | blake2b = "f888814504851ba2da95410ed5e1704d02f88a03d2f7f0999560b43d493a48ccf8309e6e3df4a4ecb0e7f81477cff0d3978c652acf72f98c7b66ec5b52f2ec44"
43 | blake3 = "c3ec7cb1d58b224a758d148062ac7b1f53197b0043dbf1d869de231ca63b7edc"
44 | minisign = """
45 | untrusted comment: signature from rsign secret key
46 | RUQuQKHwtF7mW+tyXzmPSoPB9ZSaqfZ6soP9xSE5065gWv8+iX0Y7Pf2zTrA8C8hk2f35yZTSFd8luKjvMxAR/UPsggTF3nP0gU=
47 | trusted comment: timestamp:1748855122
48 | pJmeUtNEM4PUDs5NQHGV7UZn7fzsG9J4BPne5NR2XkRn0WItbAUUbQeVX12mt0rB/sjCWRnjoCIigrd5HkplBQ==
49 | """
50 | 
51 | [files."warcat-0.3.4-windows-aarch64.zip"]
52 | sha256 = "9600a93d56b8c33a329d03b03952ba11170e48fe5423d874129fcda037d4ca8b"
53 | sha512 = "60697264a82116ac37adef2eefb83beb4d69ed6b35433a9212fa9b5dbe0c340b6cbc819ff6f4ed778a8d9fd3d918772d1cffea129394139929f2d847358d75d8"
54 | blake2b = "a35b772082e6638bb5d70b8f8b925dd808d6ede1577a81a4f3b3a1ee5a4f262e3f025104292917bbc940bea38764662fc0dea9d43a01ead3f17c4fae2fe9512b"
55 | blake3 = "671ebaff5bfc70fc54269818cd9c997905daa77a047345edc20837f012749ae5"
56 | minisign = """
57 | untrusted comment: signature from rsign secret key
58 | RUQuQKHwtF7mW/uj82KHauSeIqogN1R0am5EjW3T+bkaZpGRERAZKweXztsnM2moP3gdVLRVc0lNPV0mxaErKn2Lg9JeJr0Xhwg=
59 | trusted comment: timestamp:1748855122
60 | FtdS2HfvAq4WY1H1YQlgGfHCdSyM5K+Ja4xDtbdGn2+d/tZ4lxr7r0THBItcQznJcwJ8BDSlAe0TP7Pyq09SBQ==
61 | """
62 | 
63 | [files."warcat-0.3.4-windows-x86_64.zip"]
64 | sha256 = "5147af022507e300de74ac1b05ad96aa37df54d9088c80b7fb04979a5da1d770"
65 | sha512 = "ce7cc41a2d6bb0cf6c8214e995d7522811ed3e6b042d65294541a096c386f18f56c1ce7c7f04d7b4a7a5493adae2f2168748cbc8d00e9f16ae5b72e703409655"
66 | blake2b = "e488a581efd19844b0d57255092980be6af1e685fd4b747591ebda1648e83ffb1984a4d5d6d70cafb32d4177d6e498e1db786b5b3b8c18ed9a5eaf607e84f69f"
67 | blake3 = "78892ecb3eb6bdd218a067d2fd5141eec388fbb932c27da97ef9f236c7327e6c"
68 | minisign = """
69 | untrusted comment: signature from rsign secret key
70 | RUQuQKHwtF7mW/UjGgfqoE1otDuNKtCwcUf3KWCavEKOD1pnpWEEANDuD+w2fJvt1PhSBeWlgaFzX3vlTp7noBzedGdnM9qohgI=
71 | trusted comment: timestamp:1748855122
72 | I3vNPDyosSd1rKIW+L7uN3UsdXDMyRQnK1Q1D/fg1rrNbixFzcaVxSBlU/QXxEQA7KiGcyWxrv2w6KWg46DJDA==
73 | """
74 | 


--------------------------------------------------------------------------------
/roadmap.md:
--------------------------------------------------------------------------------
 1 | # Development Roadmap
 2 | 
 3 | ## Current work
 4 | 
 5 | * Testing of Zstd support
 6 | 
 7 | ## Planned future features
 8 | 
 9 | * WARC indexing support
10 | * Extract command: support segment records.
11 | * Split and join WARC files
12 | * Similar to the import command, a guided way to write HTTP requests/responses


--------------------------------------------------------------------------------
/src/app.rs:
--------------------------------------------------------------------------------
 1 | use std::process::ExitCode;
 2 | 
 3 | use clap::Parser;
 4 | 
 5 | use self::arg::Args;
 6 | use self::arg::Command;
 7 | 
 8 | mod arg;
 9 | mod common;
10 | mod dump_help;
11 | mod export;
12 | mod extract;
13 | mod filter;
14 | mod format;
15 | mod get;
16 | mod import;
17 | mod io;
18 | mod list;
19 | mod logging;
20 | mod model;
21 | mod progress;
22 | mod self_;
23 | mod verify;
24 | 
25 | pub fn run() -> ExitCode {
26 |     match run_impl() {
27 |         Ok(exit_code) => exit_code,
28 |         Err(error) => {
29 |             tracing::error!(?error);
30 |             eprintln!("{:#}", error);
31 |             ExitCode::FAILURE
32 |         }
33 |     }
34 | }
35 | 
36 | fn run_impl() -> anyhow::Result<ExitCode> {
37 |     if self::self_::is_installer() {
38 |         self::self_::install_interactive()?;
39 |         return Ok(ExitCode::SUCCESS);
40 |     }
41 | 
42 |     let args = Args::parse();
43 | 
44 |     if args.quiet {
45 |         self::progress::disable_global_progress_bar();
46 |     }
47 | 
48 |     self::logging::set_up_logging(args.log_level, args.log_file.as_deref(), args.log_json)?;
49 | 
50 |     let exit_code = match args.command {
51 |         Command::Export(args) => {
52 |             self::export::export(&args)?;
53 |             ExitCode::SUCCESS
54 |         }
55 |         Command::Import(args) => {
56 |             self::import::import(&args)?;
57 |             ExitCode::SUCCESS
58 |         }
59 |         Command::List(args) => {
60 |             self::list::list(&args)?;
61 |             ExitCode::SUCCESS
62 |         }
63 |         Command::Get(args) => {
64 |             self::get::get(&args)?;
65 |             ExitCode::SUCCESS
66 |         }
67 |         Command::Extract(args) => {
68 |             self::extract::extract(&args)?;
69 |             ExitCode::SUCCESS
70 |         }
71 |         Command::Verify(args) => self::verify::verify(&args)?,
72 |         Command::Self_(args) => {
73 |             self::self_::self_(&args)?;
74 |             ExitCode::SUCCESS
75 |         }
76 |         Command::DumpHelp => {
77 |             self::dump_help::dump_help()?;
78 |             ExitCode::SUCCESS
79 |         }
80 |     };
81 | 
82 |     self::progress::global_progress_bar().println("Done.")?;
83 | 
84 |     Ok(exit_code)
85 | }
86 | 


--------------------------------------------------------------------------------
/src/app/common.rs:
--------------------------------------------------------------------------------
  1 | use std::{io::Read, path::Path};
  2 | 
  3 | use anyhow::Context;
  4 | use indicatif::ProgressBar;
  5 | 
  6 | use crate::{
  7 |     compress::{Dictionary, Format},
  8 |     header::WarcHeader,
  9 |     io::LogicalPosition,
 10 |     warc::{DecStateBlock, DecStateHeader, Decoder, DecoderConfig},
 11 | };
 12 | 
 13 | use super::io::{ProgramInput, ProgramOutput};
 14 | 
 15 | const BUFFER_LENGTH: usize = crate::io::IO_BUFFER_LENGTH;
 16 | 
 17 | pub fn open_input(path: &Path) -> anyhow::Result<ProgramInput> {
 18 |     ProgramInput::open(path).context("opening input file failed")
 19 | }
 20 | 
 21 | pub fn open_output(path: &Path) -> anyhow::Result<ProgramOutput> {
 22 |     ProgramOutput::open(path).context("opening output file failed")
 23 | }
 24 | 
 25 | pub enum ReaderEvent<'a> {
 26 |     Header {
 27 |         header: WarcHeader,
 28 |         record_boundary_position: u64,
 29 |     },
 30 |     Block {
 31 |         data: &'a [u8],
 32 |     },
 33 | }
 34 | 
 35 | #[derive(Debug)]
 36 | enum ReaderState {
 37 |     None,
 38 |     Header(Decoder<DecStateHeader, ProgramInput>),
 39 |     Block(Decoder<DecStateBlock, ProgramInput>),
 40 | }
 41 | 
 42 | impl ReaderState {
 43 |     fn take(&mut self) -> Self {
 44 |         std::mem::replace(self, Self::None)
 45 |     }
 46 | 
 47 |     #[allow(clippy::result_large_err)]
 48 |     fn try_into_header(self) -> Result<Decoder<DecStateHeader, ProgramInput>, Self> {
 49 |         if let Self::Header(v) = self {
 50 |             Ok(v)
 51 |         } else {
 52 |             Err(self)
 53 |         }
 54 |     }
 55 | 
 56 |     #[allow(clippy::result_large_err)]
 57 |     fn try_into_block(self) -> Result<Decoder<DecStateBlock, ProgramInput>, Self> {
 58 |         if let Self::Block(v) = self {
 59 |             Ok(v)
 60 |         } else {
 61 |             Err(self)
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | pub struct ReaderPipeline<C>
 67 | where
 68 |     C: FnMut(ReaderEvent) -> anyhow::Result<()>,
 69 | {
 70 |     progress_bar: ProgressBar,
 71 |     state: ReaderState,
 72 |     buf: Vec<u8>,
 73 |     callback: C,
 74 |     pub has_record_at_time_compression_fault: bool,
 75 | }
 76 | 
 77 | impl<C> ReaderPipeline<C>
 78 | where
 79 |     C: FnMut(ReaderEvent) -> anyhow::Result<()>,
 80 | {
 81 |     pub fn new(
 82 |         callback: C,
 83 |         input: ProgramInput,
 84 |         compression_format: Format,
 85 |         file_len: Option<u64>,
 86 |     ) -> anyhow::Result<Self> {
 87 |         let progress_bar = super::progress::make_bytes_progress_bar(file_len);
 88 | 
 89 |         let mut config = DecoderConfig::default();
 90 |         config.decompressor.format = compression_format;
 91 |         config.decompressor.dictionary = Dictionary::WarcZstd(Vec::new());
 92 | 
 93 |         let reader = Decoder::new(input, config)?;
 94 | 
 95 |         Ok(Self {
 96 |             progress_bar,
 97 |             state: ReaderState::Header(reader),
 98 |             buf: Vec::new(),
 99 |             callback,
100 |             has_record_at_time_compression_fault: false,
101 |         })
102 |     }
103 | 
104 |     pub fn run(&mut self) -> anyhow::Result<()> {
105 |         super::progress::global_progress_bar().add(self.progress_bar.clone());
106 | 
107 |         loop {
108 |             self.process_header()?;
109 |             self.process_block()?;
110 | 
111 |             let mut reader = self.state.take().try_into_header().unwrap();
112 |             let has_more = reader.has_next_record()?;
113 |             self.state = ReaderState::Header(reader);
114 | 
115 |             if !has_more {
116 |                 break;
117 |             }
118 |         }
119 | 
120 |         self.progress_bar.finish();
121 |         super::progress::global_progress_bar().remove(&self.progress_bar);
122 | 
123 |         Ok(())
124 |     }
125 | 
126 |     fn process_header(&mut self) -> anyhow::Result<()> {
127 |         let reader = self.state.take().try_into_header().unwrap();
128 | 
129 |         self.has_record_at_time_compression_fault = reader.has_record_at_time_compression_fault();
130 | 
131 |         let (header, reader) = reader.read_header().context("invalid WARC header")?;
132 | 
133 |         let record_id = header
134 |             .fields
135 |             .get("WARC-Record-ID")
136 |             .map(|s| s.as_str())
137 |             .unwrap_or_default();
138 |         self.progress_bar
139 |             .set_message(format!("Processing record {}", record_id));
140 |         tracing::info!(record_id, "processing record");
141 |         self.progress_bar.set_position(reader.logical_position());
142 | 
143 |         (self.callback)(ReaderEvent::Header {
144 |             header,
145 |             record_boundary_position: reader.record_boundary_position(),
146 |         })?;
147 | 
148 |         self.state = ReaderState::Block(reader);
149 | 
150 |         Ok(())
151 |     }
152 | 
153 |     fn process_block(&mut self) -> anyhow::Result<()> {
154 |         let mut reader = self.state.take().try_into_block().unwrap();
155 | 
156 |         loop {
157 |             self.buf.resize(BUFFER_LENGTH, 0);
158 | 
159 |             let read_length = reader.read(&mut self.buf)?;
160 |             self.buf.truncate(read_length);
161 | 
162 |             if read_length == 0 {
163 |                 break;
164 |             }
165 | 
166 |             self.progress_bar.set_position(reader.logical_position());
167 | 
168 |             (self.callback)(ReaderEvent::Block { data: &self.buf })?;
169 |         }
170 | 
171 |         (self.callback)(ReaderEvent::Block { data: &[] })?;
172 | 
173 |         self.state = ReaderState::Header(reader.finish_block()?);
174 | 
175 |         Ok(())
176 |     }
177 | }
178 | 


--------------------------------------------------------------------------------
/src/app/dump_help.rs:
--------------------------------------------------------------------------------
 1 | pub fn dump_help() -> anyhow::Result<()> {
 2 |     let config = clap_markdown::MarkdownOptions::new()
 3 |         .show_footer(false)
 4 |         .show_table_of_contents(false)
 5 |         .title("{title}".to_string());
 6 | 
 7 |     println!(
 8 |         "{}",
 9 |         clap_markdown::help_markdown_custom::<super::arg::Args>(&config)
10 |     );
11 | 
12 |     Ok(())
13 | }
14 | 


--------------------------------------------------------------------------------
/src/app/filter.rs:
--------------------------------------------------------------------------------
  1 | use regex::Regex;
  2 | 
  3 | use crate::header::WarcHeader;
  4 | 
  5 | #[derive(Debug, Clone)]
  6 | pub struct FieldFilter {
  7 |     includes: Vec<(String, Option<String>)>,
  8 |     excludes: Vec<(String, Option<String>)>,
  9 |     include_patterns: Vec<(String, Regex)>,
 10 |     exclude_patterns: Vec<(String, Regex)>,
 11 | }
 12 | 
 13 | impl FieldFilter {
 14 |     pub fn new() -> Self {
 15 |         Self {
 16 |             includes: Vec::new(),
 17 |             excludes: Vec::new(),
 18 |             include_patterns: Vec::new(),
 19 |             exclude_patterns: Vec::new(),
 20 |         }
 21 |     }
 22 | 
 23 |     pub fn add_include(&mut self, rule: &str) {
 24 |         if let Some((name, value)) = rule.split_once(":") {
 25 |             self.includes
 26 |                 .push((name.to_string(), Some(value.to_string())));
 27 |         } else {
 28 |             self.includes.push((rule.to_string(), None));
 29 |         }
 30 |     }
 31 | 
 32 |     pub fn add_exclude(&mut self, rule: &str) {
 33 |         if let Some((name, value)) = rule.split_once(":") {
 34 |             self.excludes
 35 |                 .push((name.to_string(), Some(value.to_string())));
 36 |         } else {
 37 |             self.excludes.push((rule.to_string(), None));
 38 |         }
 39 |     }
 40 | 
 41 |     pub fn add_include_pattern(&mut self, rule: &str) -> anyhow::Result<()> {
 42 |         let (name, value) = rule.split_once(":").unwrap_or((rule, ""));
 43 | 
 44 |         self.include_patterns
 45 |             .push((name.to_string(), Regex::new(value)?));
 46 | 
 47 |         Ok(())
 48 |     }
 49 | 
 50 |     pub fn add_exclude_pattern(&mut self, rule: &str) -> anyhow::Result<()> {
 51 |         let (name, value) = rule.split_once(":").unwrap_or((rule, ""));
 52 | 
 53 |         self.exclude_patterns
 54 |             .push((name.to_string(), Regex::new(value)?));
 55 | 
 56 |         Ok(())
 57 |     }
 58 | 
 59 |     pub fn is_allow(&self, header: &WarcHeader) -> bool {
 60 |         for (rule_name, rule_value) in &self.excludes {
 61 |             if let Some(rule_value) = rule_value {
 62 |                 for value in header.fields.get_all(rule_name) {
 63 |                     if value == rule_value {
 64 |                         return false;
 65 |                     }
 66 |                 }
 67 |             } else if header.fields.contains_name(rule_name) {
 68 |                 return false;
 69 |             }
 70 |         }
 71 | 
 72 |         for (rule_name, value_pattern) in &self.exclude_patterns {
 73 |             for value in header.fields.get_all(rule_name) {
 74 |                 if value_pattern.is_match(value) {
 75 |                     return false;
 76 |                 }
 77 |             }
 78 |         }
 79 | 
 80 |         for (rule_name, rule_value) in &self.includes {
 81 |             if let Some(rule_value) = rule_value {
 82 |                 for value in header.fields.get_all(rule_name) {
 83 |                     if value == rule_value {
 84 |                         return true;
 85 |                     }
 86 |                 }
 87 |             } else if header.fields.contains_name(rule_name) {
 88 |                 return true;
 89 |             }
 90 |         }
 91 | 
 92 |         for (rule_name, value_pattern) in &self.include_patterns {
 93 |             for value in header.fields.get_all(rule_name) {
 94 |                 if value_pattern.is_match(value) {
 95 |                     return true;
 96 |                 }
 97 |             }
 98 |         }
 99 | 
100 |         self.includes.is_empty() && self.include_patterns.is_empty()
101 |     }
102 | }
103 | 
104 | #[cfg(test)]
105 | mod tests {
106 |     use super::*;
107 | 
108 |     #[test]
109 |     fn test_filter() {
110 |         let mut header1 = WarcHeader::empty();
111 |         header1.fields.insert("n".to_string(), "cat".to_string());
112 |         let mut header2 = WarcHeader::empty();
113 |         header2.fields.insert("n".to_string(), "dog".to_string());
114 |         let mut header3 = WarcHeader::empty();
115 |         header3.fields.insert("n".to_string(), "bird".to_string());
116 |         let mut header4 = WarcHeader::empty();
117 |         header4
118 |             .fields
119 |             .insert("n".to_string(), "cat-and-dog".to_string());
120 | 
121 |         let mut filter = FieldFilter::new();
122 |         filter.add_include("n:dog");
123 |         filter.add_exclude("n:cat");
124 | 
125 |         assert!(!filter.is_allow(&header1));
126 |         assert!(filter.is_allow(&header2));
127 |         assert!(!filter.is_allow(&header3));
128 |         assert!(!filter.is_allow(&header4));
129 |     }
130 | 
131 |     #[test]
132 |     fn test_filter_empty_value() {
133 |         let mut header1 = WarcHeader::empty();
134 |         header1.fields.insert("a".to_string(), "".to_string());
135 |         let mut header2 = WarcHeader::empty();
136 |         header2.fields.insert("b".to_string(), "".to_string());
137 | 
138 |         let mut filter = FieldFilter::new();
139 |         filter.add_include("a");
140 |         filter.add_exclude("b");
141 | 
142 |         assert!(filter.is_allow(&header1));
143 |         assert!(!filter.is_allow(&header2));
144 |     }
145 | 
146 |     #[test]
147 |     fn test_filter_regex() {
148 |         let mut header1 = WarcHeader::empty();
149 |         header1.fields.insert("n".to_string(), "cat".to_string());
150 |         let mut header2 = WarcHeader::empty();
151 |         header2.fields.insert("n".to_string(), "dog".to_string());
152 |         let mut header3 = WarcHeader::empty();
153 |         header3.fields.insert("n".to_string(), "bird".to_string());
154 |         let mut header4 = WarcHeader::empty();
155 |         header4
156 |             .fields
157 |             .insert("n".to_string(), "cat-and-dog".to_string());
158 | 
159 |         let mut filter = FieldFilter::new();
160 |         filter.add_include_pattern(r"n:\bdog\b").unwrap();
161 |         filter.add_exclude_pattern(r"n:\bcat\b").unwrap();
162 | 
163 |         assert!(!filter.is_allow(&header1));
164 |         assert!(filter.is_allow(&header2));
165 |         assert!(!filter.is_allow(&header3));
166 |         assert!(!filter.is_allow(&header4));
167 |     }
168 | }
169 | 


--------------------------------------------------------------------------------
/src/app/format.rs:
--------------------------------------------------------------------------------
 1 | use std::path::Path;
 2 | 
 3 | use crate::compress::Format;
 4 | 
 5 | pub fn filename_compression_format(path: &Path) -> Option<Format> {
 6 |     if let Some(filename) = path.file_name() {
 7 |         let filename = filename.to_string_lossy().to_ascii_lowercase();
 8 | 
 9 |         if filename.ends_with(".warc") {
10 |             return Some(Format::Identity);
11 |         }
12 |         if filename.ends_with(".warc.gz") {
13 |             return Some(Format::Gzip);
14 |         }
15 |         #[cfg(feature = "zstd")]
16 |         if filename.ends_with(".warc.zst") {
17 |             return Some(Format::Zstandard);
18 |         }
19 |     }
20 | 
21 |     None
22 | }
23 | 


--------------------------------------------------------------------------------
/src/app/get.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Read, Seek, Write};
  2 | 
  3 | use crate::{
  4 |     app::export::Exporter,
  5 |     compress::{Dictionary, Format},
  6 |     dataseq::SeqWriter,
  7 |     error::{ProtocolError, ProtocolErrorKind},
  8 |     extract::WarcExtractor,
  9 |     header::fields::FieldsExt,
 10 |     warc::{Decoder, DecoderConfig},
 11 | };
 12 | 
 13 | use super::arg::{GetCommand, GetExportSubcommand, GetExtractSubcommand, GetSubcommand};
 14 | 
 15 | pub fn get(args: &GetCommand) -> anyhow::Result<()> {
 16 |     match &args.subcommand {
 17 |         GetSubcommand::Export(sub_args) => export(sub_args),
 18 |         GetSubcommand::Extract(sub_args) => extract(sub_args),
 19 |     }
 20 | }
 21 | 
 22 | // FIXME: refactor the copypaste boilerplate
 23 | 
 24 | fn export(args: &GetExportSubcommand) -> anyhow::Result<()> {
 25 |     let input_path = &args.input;
 26 |     let output_path = &args.output;
 27 |     let span = tracing::info_span!("export", path = ?input_path);
 28 |     let _span_guard = span.enter();
 29 | 
 30 |     let input = super::common::open_input(input_path)?;
 31 |     let output = super::common::open_output(output_path)?;
 32 | 
 33 |     tracing::info!("opened file");
 34 | 
 35 |     let compression_format = args.compression.try_into_native(input_path)?;
 36 |     let seq_format = args.format.into();
 37 |     let writer = SeqWriter::new(output, seq_format);
 38 | 
 39 |     let mut exporter = Exporter::new(input_path, writer, args.no_block, args.extract);
 40 | 
 41 |     let mut config = DecoderConfig::default();
 42 |     config.decompressor.format = compression_format;
 43 |     config.decompressor.dictionary = get_dictionary(compression_format);
 44 | 
 45 |     let mut decoder = Decoder::new(input, config)?;
 46 | 
 47 |     if args.position != 0 {
 48 |         decoder.prepare_for_seek()?;
 49 |         decoder
 50 |             .get_mut()
 51 |             .seek(std::io::SeekFrom::Start(args.position))?;
 52 |     }
 53 | 
 54 |     let (header, mut decoder) = decoder.read_header()?;
 55 | 
 56 |     let record_id = header.fields.get_or_default("WARC-Record-ID");
 57 | 
 58 |     if args.id.as_ref().is_some_and(|id| id != record_id) {
 59 |         return Err(ProtocolError::new(ProtocolErrorKind::NotFound).into());
 60 |     }
 61 | 
 62 |     let progress_bar = super::progress::make_bytes_progress_bar(Some(header.content_length()?));
 63 |     super::progress::global_progress_bar().add(progress_bar.clone());
 64 | 
 65 |     exporter.process_header(&header, decoder.record_boundary_position())?;
 66 | 
 67 |     let mut buf = Vec::with_capacity(8192);
 68 | 
 69 |     loop {
 70 |         buf.resize(8192, 0);
 71 | 
 72 |         let bytes_read = decoder.read(&mut buf)?;
 73 | 
 74 |         if bytes_read == 0 {
 75 |             break;
 76 |         }
 77 | 
 78 |         progress_bar.inc(bytes_read as u64);
 79 |         buf.truncate(bytes_read);
 80 |         exporter.process_block(&buf)?;
 81 |     }
 82 | 
 83 |     decoder.finish_block()?;
 84 |     exporter.finish()?;
 85 | 
 86 |     tracing::info!("closed file");
 87 | 
 88 |     progress_bar.finish();
 89 |     super::progress::global_progress_bar().remove(&progress_bar);
 90 | 
 91 |     Ok(())
 92 | }
 93 | 
 94 | fn extract(args: &GetExtractSubcommand) -> anyhow::Result<()> {
 95 |     let input_path = &args.input;
 96 |     let output_path = &args.output;
 97 |     let span = tracing::info_span!("export", path = ?input_path);
 98 |     let _span_guard = span.enter();
 99 | 
100 |     let input = super::common::open_input(input_path)?;
101 |     let mut output = super::common::open_output(output_path)?;
102 | 
103 |     tracing::info!("opened file");
104 | 
105 |     let compression_format = args.compression.try_into_native(input_path)?;
106 | 
107 |     let mut extractor = WarcExtractor::new();
108 | 
109 |     let mut config = DecoderConfig::default();
110 |     config.decompressor.format = compression_format;
111 |     config.decompressor.dictionary = get_dictionary(compression_format);
112 | 
113 |     let mut decoder = Decoder::new(input, config)?;
114 | 
115 |     if args.position != 0 {
116 |         decoder.prepare_for_seek()?;
117 |         decoder
118 |             .get_mut()
119 |             .seek(std::io::SeekFrom::Start(args.position))?;
120 |     }
121 | 
122 |     let (header, mut decoder) = decoder.read_header()?;
123 | 
124 |     let record_id = header.fields.get_or_default("WARC-Record-ID");
125 | 
126 |     if args.id.as_ref().is_some_and(|id| id != record_id) {
127 |         return Err(ProtocolError::new(ProtocolErrorKind::NotFound).into());
128 |     }
129 | 
130 |     let progress_bar = super::progress::make_bytes_progress_bar(Some(header.content_length()?));
131 |     super::progress::global_progress_bar().add(progress_bar.clone());
132 | 
133 |     extractor.read_header(&header)?;
134 | 
135 |     if !extractor.has_content() {
136 |         return Err(ProtocolError::new(ProtocolErrorKind::NoContent).into());
137 |     }
138 | 
139 |     let mut buf = Vec::with_capacity(8192);
140 | 
141 |     loop {
142 |         buf.resize(8192, 0);
143 | 
144 |         let bytes_read = decoder.read(&mut buf)?;
145 | 
146 |         if bytes_read == 0 {
147 |             break;
148 |         }
149 | 
150 |         progress_bar.inc(bytes_read as u64);
151 |         buf.truncate(bytes_read);
152 |         extractor.extract_data(&buf, &mut output)?;
153 |     }
154 | 
155 |     decoder.finish_block()?;
156 |     output.flush()?;
157 | 
158 |     tracing::info!("closed file");
159 | 
160 |     progress_bar.finish();
161 |     super::progress::global_progress_bar().remove(&progress_bar);
162 | 
163 |     Ok(())
164 | }
165 | 
166 | fn get_dictionary(format: Format) -> Dictionary {
167 |     #[cfg(feature = "zstd")]
168 |     if format == Format::Zstandard {
169 |         return Dictionary::WarcZstd(Vec::new());
170 |     }
171 | 
172 |     Dictionary::None
173 | }
174 | 


--------------------------------------------------------------------------------
/src/app/io.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     fs::File,
 3 |     io::{Read, Seek, Stdin, Stdout, Write},
 4 |     path::Path,
 5 | };
 6 | 
 7 | use crate::error::{ProtocolError, ProtocolErrorKind};
 8 | 
 9 | #[derive(Debug)]
10 | pub enum ProgramInput {
11 |     File(File),
12 |     Stdin(Stdin),
13 | }
14 | 
15 | impl ProgramInput {
16 |     pub fn open<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
17 |         let path = path.as_ref();
18 | 
19 |         if path.to_str() == Some("-") {
20 |             Ok(Self::Stdin(std::io::stdin()))
21 |         } else {
22 |             let file = File::options().read(true).open(path)?;
23 |             Ok(Self::File(file))
24 |         }
25 |     }
26 | }
27 | 
28 | impl Read for ProgramInput {
29 |     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
30 |         match self {
31 |             ProgramInput::File(r) => r.read(buf),
32 |             ProgramInput::Stdin(r) => r.read(buf),
33 |         }
34 |     }
35 | }
36 | 
37 | impl Seek for ProgramInput {
38 |     fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
39 |         match self {
40 |             ProgramInput::File(file) => file.seek(pos),
41 |             ProgramInput::Stdin(_stdin) => Err(std::io::Error::other(ProtocolError::new(
42 |                 ProtocolErrorKind::IoNotSeekable,
43 |             ))),
44 |         }
45 |     }
46 | }
47 | 
48 | #[derive(Debug)]
49 | pub enum ProgramOutput {
50 |     File(File),
51 |     Stdout(Stdout),
52 | }
53 | 
54 | impl ProgramOutput {
55 |     pub fn open<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
56 |         let path = path.as_ref();
57 | 
58 |         if path.to_str() == Some("-") {
59 |             Ok(Self::Stdout(std::io::stdout()))
60 |         } else {
61 |             let file = File::options()
62 |                 .write(true)
63 |                 .create(true)
64 |                 .truncate(true)
65 |                 .open(path)?;
66 |             Ok(Self::File(file))
67 |         }
68 |     }
69 | }
70 | 
71 | impl Write for ProgramOutput {
72 |     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
73 |         match self {
74 |             ProgramOutput::File(w) => w.write(buf),
75 |             ProgramOutput::Stdout(w) => w.write(buf),
76 |         }
77 |     }
78 | 
79 |     fn flush(&mut self) -> std::io::Result<()> {
80 |         match self {
81 |             ProgramOutput::File(w) => w.flush(),
82 |             ProgramOutput::Stdout(w) => w.flush(),
83 |         }
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/app/list.rs:
--------------------------------------------------------------------------------
 1 | use crate::{app::common::ReaderEvent, dataseq::SeqWriter};
 2 | 
 3 | use super::{arg::ListCommand, common::ReaderPipeline};
 4 | 
 5 | pub fn list(args: &ListCommand) -> anyhow::Result<()> {
 6 |     let output_path = &args.output;
 7 |     let seq_format = args.format.into();
 8 | 
 9 |     for input_path in &args.input {
10 |         let span = tracing::info_span!("list", path = ?input_path);
11 |         let _span_guard = span.enter();
12 | 
13 |         let input = super::common::open_input(input_path)?;
14 |         let output = super::common::open_output(output_path)?;
15 | 
16 |         tracing::info!("opened file");
17 | 
18 |         let compression_format = args.compression.try_into_native(input_path)?;
19 |         let file_len = std::fs::metadata(input_path).map(|m| m.len()).ok();
20 |         let mut writer = SeqWriter::new(output, seq_format);
21 | 
22 |         ReaderPipeline::new(
23 |             |event| match event {
24 |                 ReaderEvent::Header {
25 |                     header,
26 |                     record_boundary_position,
27 |                 } => {
28 |                     let mut values = Vec::new();
29 | 
30 |                     for name in &args.field {
31 |                         if name == ":position" {
32 |                             values.push(serde_json::Value::Number(record_boundary_position.into()));
33 |                         } else if name == ":file" {
34 |                             values.push(serde_json::Value::String(
35 |                                 input_path.to_string_lossy().to_string(),
36 |                             ));
37 |                         } else {
38 |                             let value = header.fields.get(name).cloned().unwrap_or_default();
39 |                             values.push(serde_json::Value::String(value));
40 |                         }
41 |                     }
42 | 
43 |                     writer.put(values)?;
44 | 
45 |                     Ok(())
46 |                 }
47 |                 ReaderEvent::Block { data: _ } => Ok(()),
48 |             },
49 |             input,
50 |             compression_format,
51 |             file_len,
52 |         )?
53 |         .run()?;
54 | 
55 |         tracing::info!("closed file");
56 |     }
57 | 
58 |     Ok(())
59 | }
60 | 


--------------------------------------------------------------------------------
/src/app/logging.rs:
--------------------------------------------------------------------------------
  1 | use std::{fs::File, io::Write, path::Path, str::FromStr, sync::Mutex};
  2 | 
  3 | use tracing_subscriber::{layer::SubscriberExt, Layer};
  4 | 
  5 | use super::progress::global_progress_bar;
  6 | 
  7 | #[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
  8 | pub enum Level {
  9 |     Trace,
 10 |     Debug,
 11 |     Info,
 12 |     Warn,
 13 |     Error,
 14 |     Off,
 15 | }
 16 | 
 17 | impl Level {
 18 |     fn as_level_filter(&self) -> tracing_subscriber::filter::LevelFilter {
 19 |         match self {
 20 |             Self::Trace => tracing_subscriber::filter::LevelFilter::TRACE,
 21 |             Self::Debug => tracing_subscriber::filter::LevelFilter::DEBUG,
 22 |             Self::Info => tracing_subscriber::filter::LevelFilter::INFO,
 23 |             Self::Warn => tracing_subscriber::filter::LevelFilter::WARN,
 24 |             Self::Error => tracing_subscriber::filter::LevelFilter::ERROR,
 25 |             Self::Off => tracing_subscriber::filter::LevelFilter::OFF,
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | impl Default for Level {
 31 |     fn default() -> Self {
 32 |         Self::Off
 33 |     }
 34 | }
 35 | 
 36 | impl FromStr for Level {
 37 |     type Err = ();
 38 | 
 39 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 40 |         match s {
 41 |             "trace" => Ok(Self::Trace),
 42 |             "debug" => Ok(Self::Debug),
 43 |             "info" => Ok(Self::Info),
 44 |             "warn" => Ok(Self::Warn),
 45 |             "error" => Ok(Self::Error),
 46 |             "off" => Ok(Self::Off),
 47 |             _ => Err(()),
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | struct ProgressBarMutexWriter<W: Write> {
 53 |     dest: W,
 54 | }
 55 | 
 56 | impl<W: Write> ProgressBarMutexWriter<W> {
 57 |     fn new(dest: W) -> Self {
 58 |         Self { dest }
 59 |     }
 60 | }
 61 | 
 62 | impl<W: Write> Write for ProgressBarMutexWriter<W> {
 63 |     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
 64 |         global_progress_bar().suspend(|| self.dest.write(buf))
 65 |     }
 66 | 
 67 |     fn flush(&mut self) -> std::io::Result<()> {
 68 |         global_progress_bar().suspend(|| self.dest.flush())
 69 |     }
 70 | }
 71 | 
 72 | pub fn set_up_logging(level: Level, file: Option<&Path>, json: bool) -> std::io::Result<()> {
 73 |     let file_sub = if let Some(path) = file {
 74 |         let writer = File::options().create(true).append(true).open(path)?;
 75 |         Some(
 76 |             tracing_subscriber::fmt::layer()
 77 |                 .with_ansi(false)
 78 |                 .with_writer(Mutex::new(writer)),
 79 |         )
 80 |     } else {
 81 |         None
 82 |     };
 83 | 
 84 |     let stderr_sub = if file.is_none() {
 85 |         let writer = ProgressBarMutexWriter::new(std::io::stderr());
 86 |         Some(tracing_subscriber::fmt::layer().with_writer(Mutex::new(writer)))
 87 |     } else {
 88 |         None
 89 |     };
 90 | 
 91 |     let json_sub = if json {
 92 |         Some(tracing_subscriber::fmt::layer().json())
 93 |     } else {
 94 |         None
 95 |     };
 96 | 
 97 |     let sub = tracing_subscriber::Registry::default();
 98 |     let sub = sub.with(file_sub.with_filter(level.as_level_filter()));
 99 |     let sub = sub.with(stderr_sub.with_filter(level.as_level_filter()));
100 |     let sub = sub.with(json_sub);
101 |     tracing::subscriber::set_global_default(sub).unwrap();
102 | 
103 |     tracing::debug!("logging configured");
104 | 
105 |     Ok(())
106 | }
107 | 


--------------------------------------------------------------------------------
/src/app/model.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | use serde::{Deserialize, Serialize};
 4 | 
 5 | #[derive(Debug, Clone, Deserialize, Serialize)]
 6 | pub enum WarcMessage {
 7 |     Metadata(Metadata),
 8 |     Header(Header),
 9 |     BlockChunk(BlockChunk),
10 |     BlockEnd(BlockEnd),
11 |     ExtractMetadata(ExtractMetadata),
12 |     ExtractChunk(ExtractChunk),
13 |     ExtractEnd(ExtractEnd),
14 |     EndOfFile(EndOfFile),
15 | }
16 | 
17 | #[derive(Debug, Clone, Deserialize, Serialize)]
18 | pub struct Metadata {
19 |     pub file: PathBuf,
20 |     pub position: u64,
21 | }
22 | 
23 | #[derive(Debug, Clone, Deserialize, Serialize)]
24 | pub struct Header {
25 |     pub version: String,
26 |     pub fields: Vec<(String, String)>,
27 | }
28 | 
29 | #[serde_with::serde_as]
30 | #[derive(Debug, Clone, Deserialize, Serialize)]
31 | pub struct BlockChunk {
32 |     #[serde_as(as = "serde_with::IfIsHumanReadable<serde_with::base64::Base64,serde_with::Bytes>")]
33 |     pub data: Vec<u8>,
34 | }
35 | 
36 | #[derive(Debug, Clone, Deserialize, Serialize)]
37 | pub struct BlockEnd {
38 |     pub crc32: Option<u32>,
39 |     pub crc32c: Option<u32>,
40 |     pub xxh3: Option<u64>,
41 | }
42 | 
43 | #[derive(Debug, Clone, Deserialize, Serialize)]
44 | pub struct ExtractMetadata {
45 |     pub has_content: bool,
46 |     pub file_path_components: Vec<String>,
47 |     pub is_truncated: bool,
48 | }
49 | 
50 | #[serde_with::serde_as]
51 | #[derive(Debug, Clone, Deserialize, Serialize)]
52 | pub struct ExtractChunk {
53 |     #[serde_as(as = "serde_with::IfIsHumanReadable<serde_with::base64::Base64,serde_with::Bytes>")]
54 |     pub data: Vec<u8>,
55 | }
56 | 
57 | #[derive(Debug, Clone, Deserialize, Serialize)]
58 | pub struct ExtractEnd {
59 |     pub crc32: Option<u32>,
60 |     pub crc32c: Option<u32>,
61 |     pub xxh3: Option<u64>,
62 | }
63 | 
64 | #[derive(Debug, Clone, Deserialize, Serialize)]
65 | pub struct EndOfFile {}
66 | 


--------------------------------------------------------------------------------
/src/app/progress.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::{LazyLock, Mutex, MutexGuard};
 2 | 
 3 | use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle};
 4 | 
 5 | pub fn global_progress_bar() -> MutexGuard<'static, MultiProgress> {
 6 |     static PROGRESS_BAR: LazyLock<Mutex<MultiProgress>> = LazyLock::new(|| {
 7 |         let bar = MultiProgress::with_draw_target(ProgressDrawTarget::stderr_with_hz(4));
 8 |         bar.set_move_cursor(true);
 9 |         Mutex::new(bar)
10 |     });
11 | 
12 |     (*PROGRESS_BAR).lock().unwrap()
13 | }
14 | 
15 | pub fn disable_global_progress_bar() {
16 |     global_progress_bar().set_draw_target(ProgressDrawTarget::hidden());
17 | }
18 | 
19 | pub fn make_bytes_progress_bar(len: Option<u64>) -> ProgressBar {
20 |     if let Some(len) = len {
21 |         let style = ProgressStyle::with_template(
22 |             "[{bar:30.cyan/cyan.dim}] {percent:.green}% {binary_bytes:.dim} / {binary_total_bytes:.dim} {msg}",
23 |         )
24 |         .unwrap();
25 |         let style = style.progress_chars("=>.");
26 | 
27 |         ProgressBar::new(len).with_style(style)
28 |     } else {
29 |         let style = ProgressStyle::with_template("{spinner:.cyan} {msg}").unwrap();
30 |         let style = style.tick_strings(&[
31 |             "[=   ]", "[ =  ]", "[  = ]", "[   =)", "[   =]", "[  = ]", "[ =  ]", "(=   ]",
32 |             "[====]",
33 |         ]);
34 | 
35 |         ProgressBar::new_spinner().with_style(style)
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/app/self_.rs:
--------------------------------------------------------------------------------
 1 | use regex::Regex;
 2 | use takecrate::{
 3 |     inst::{InstallConfig, PackageManifest},
 4 |     manifest::AppId,
 5 | };
 6 | 
 7 | use super::arg::{SelfCommand, SelfSubcommand};
 8 | 
 9 | pub fn self_(args: &SelfCommand) -> anyhow::Result<()> {
10 |     match &args.command {
11 |         SelfSubcommand::Install { quiet } => {
12 |             if *quiet {
13 |                 install_quiet()
14 |             } else {
15 |                 install_interactive()
16 |             }
17 |         }
18 |         SelfSubcommand::Uninstall { quiet } => {
19 |             if *quiet {
20 |                 uninstall_quiet()
21 |             } else {
22 |                 uninstall_interactive()
23 |             }
24 |         }
25 |     }
26 | }
27 | 
28 | pub fn is_installer() -> bool {
29 |     if std::env::args().len() > 1 {
30 |         return false;
31 |     }
32 | 
33 |     let name = std::env::current_exe().unwrap_or_default();
34 |     let name = if !std::env::consts::EXE_SUFFIX.is_empty() {
35 |         name.file_stem().unwrap_or(name.as_os_str())
36 |     } else {
37 |         name.as_os_str()
38 |     };
39 |     let name = name.to_string_lossy();
40 |     let pattern = Regex::new(r"(?-ui:[. _-]installer)$").unwrap();
41 |     pattern.is_match(&name)
42 | }
43 | 
44 | pub fn install_interactive() -> anyhow::Result<()> {
45 |     let manifest = package_manifest()?;
46 |     takecrate::install_interactive(&manifest)?;
47 |     Ok(())
48 | }
49 | 
50 | pub fn install_quiet() -> anyhow::Result<()> {
51 |     let manifest = package_manifest()?;
52 |     let config = InstallConfig::new_user()?;
53 |     takecrate::install(&manifest, &config)?;
54 |     Ok(())
55 | }
56 | 
57 | pub fn uninstall_interactive() -> anyhow::Result<()> {
58 |     let app_id = app_id();
59 |     takecrate::uninstall_interactive(&app_id)?;
60 |     Ok(())
61 | }
62 | 
63 | pub fn uninstall_quiet() -> anyhow::Result<()> {
64 |     let app_id = app_id();
65 |     takecrate::uninstall(&app_id)?;
66 |     Ok(())
67 | }
68 | 
69 | fn app_id() -> AppId {
70 |     AppId::new("io.github.chfoo.warcat-rs").unwrap()
71 | }
72 | 
73 | fn package_manifest() -> anyhow::Result<PackageManifest> {
74 |     let mut manifest = PackageManifest::new(&app_id())
75 |         .with_interactive_uninstall_args(&["self", "uninstall"])
76 |         .with_quiet_uninstall_args(&["self", "uninstall", "--quiet"])
77 |         .with_self_exe_renamed(format!("warcat{}", std::env::consts::EXE_SUFFIX))?;
78 | 
79 |     manifest.app_metadata.display_name = "Warcat".to_string();
80 |     manifest.app_metadata.display_version = clap::crate_version!().to_string();
81 | 
82 |     Ok(manifest)
83 | }
84 | 


--------------------------------------------------------------------------------
/src/app/verify.rs:
--------------------------------------------------------------------------------
  1 | use std::{cell::RefCell, process::ExitCode, rc::Rc};
  2 | 
  3 | use crate::{
  4 |     app::common::{ReaderEvent, ReaderPipeline},
  5 |     dataseq::SeqWriter,
  6 |     verify::{Check, Verifier, VerifyStatus},
  7 | };
  8 | 
  9 | use super::arg::VerifyCommand;
 10 | 
 11 | const VERIFY_FAILED_EXIT_CODE: u8 = 8;
 12 | 
 13 | pub fn verify(args: &VerifyCommand) -> anyhow::Result<ExitCode> {
 14 |     let output_path = &args.output;
 15 |     let output = super::common::open_output(output_path)?;
 16 |     let seq_format = args.format.into();
 17 | 
 18 |     let mut writer = SeqWriter::new(output, seq_format);
 19 |     let mut problem_count = 0u64;
 20 |     let mut verifier = if let Some(path) = &args.database {
 21 |         Verifier::open(path)?
 22 |     } else {
 23 |         Verifier::new()
 24 |     };
 25 | 
 26 |     for exclude in &args.exclude_check {
 27 |         verifier.checks_mut().remove(&Check::from(*exclude));
 28 |     }
 29 | 
 30 |     let verifier = Rc::new(RefCell::new(verifier));
 31 | 
 32 |     for input_path in &args.input {
 33 |         let span = tracing::info_span!("verify", path = ?input_path);
 34 |         let _span_guard = span.enter();
 35 | 
 36 |         let input = super::common::open_input(input_path)?;
 37 | 
 38 |         tracing::info!("opened file");
 39 | 
 40 |         let compression_format = args.compression.try_into_native(input_path)?;
 41 |         let file_len = std::fs::metadata(input_path).map(|m| m.len()).ok();
 42 | 
 43 |         let mut reader = ReaderPipeline::new(
 44 |             |event| match event {
 45 |                 ReaderEvent::Header {
 46 |                     header,
 47 |                     record_boundary_position: _,
 48 |                 } => {
 49 |                     let mut verifier = verifier.borrow_mut();
 50 | 
 51 |                     for problem in verifier.problems() {
 52 |                         problem_count += 1;
 53 |                         writer.put(problem)?;
 54 |                     }
 55 |                     verifier.problems_mut().clear();
 56 |                     verifier.begin_record(&header)?;
 57 | 
 58 |                     Ok(())
 59 |                 }
 60 |                 ReaderEvent::Block { data } => {
 61 |                     let mut verifier = verifier.borrow_mut();
 62 | 
 63 |                     if data.is_empty() {
 64 |                         verifier.end_record();
 65 |                     } else {
 66 |                         verifier.block_data(data);
 67 |                     }
 68 | 
 69 |                     Ok(())
 70 |                 }
 71 |             },
 72 |             input,
 73 |             compression_format,
 74 |             file_len,
 75 |         )?;
 76 |         reader.run()?;
 77 | 
 78 |         let mut verifier = verifier.borrow_mut();
 79 | 
 80 |         if reader.has_record_at_time_compression_fault {
 81 |             verifier.add_not_record_at_time_compression();
 82 |         }
 83 | 
 84 |         loop {
 85 |             let action = verifier.verify_end()?;
 86 | 
 87 |             for problem in verifier.problems() {
 88 |                 problem_count += 1;
 89 |                 writer.put(problem)?;
 90 |             }
 91 |             verifier.problems_mut().clear();
 92 | 
 93 |             match action {
 94 |                 VerifyStatus::HasMore => {}
 95 |                 VerifyStatus::Done => break,
 96 |             }
 97 |         }
 98 | 
 99 |         tracing::info!("closed file");
100 |     }
101 | 
102 |     let exit_code = if problem_count == 0 {
103 |         ExitCode::SUCCESS
104 |     } else {
105 |         ExitCode::from(VERIFY_FAILED_EXIT_CODE)
106 |     };
107 | 
108 |     Ok(exit_code)
109 | }
110 | 


--------------------------------------------------------------------------------
/src/compress/encode.rs:
--------------------------------------------------------------------------------
  1 | use std::{fmt::Debug, io::Write};
  2 | 
  3 | #[cfg(feature = "zstd")]
  4 | use super::zstd::ZstdEncoder;
  5 | use brotli::CompressorWriter as BrEncoder;
  6 | use flate2::write::{GzEncoder, ZlibEncoder};
  7 | 
  8 | use super::{Dictionary, Format, Level};
  9 | 
 10 | pub enum Encoder<W: Write> {
 11 |     Identity(W),
 12 |     Deflate(ZlibEncoder<W>),
 13 |     Gzip(GzEncoder<W>),
 14 |     Brotli(Box<BrEncoder<W>>),
 15 |     #[cfg(feature = "zstd")]
 16 |     Zstandard(ZstdEncoder<W>),
 17 |     None,
 18 | }
 19 | 
 20 | impl<W: Write> Encoder<W> {
 21 |     pub fn new(dest: W, format: Format, level: Level, dictionary: &Dictionary) -> Encoder<W> {
 22 |         let level = get_encoder_level(format, level);
 23 | 
 24 |         match format {
 25 |             Format::Identity => Encoder::Identity(dest),
 26 |             Format::Deflate => Encoder::Deflate(ZlibEncoder::new(
 27 |                 dest,
 28 |                 flate2::Compression::new(level as u32),
 29 |             )),
 30 |             Format::Gzip => {
 31 |                 Encoder::Gzip(GzEncoder::new(dest, flate2::Compression::new(level as u32)))
 32 |             }
 33 |             Format::Brotli => {
 34 |                 Encoder::Brotli(Box::new(BrEncoder::new(dest, 4096, level as u32, 22)))
 35 |             }
 36 |             #[cfg(feature = "zstd")]
 37 |             Format::Zstandard => {
 38 |                 Encoder::Zstandard(ZstdEncoder::new(dest, level, dictionary.clone()).unwrap())
 39 |             }
 40 |         }
 41 |     }
 42 | 
 43 |     pub fn get_ref(&self) -> &W {
 44 |         match self {
 45 |             Self::Identity(w) => w,
 46 |             Self::Deflate(codec) => codec.get_ref(),
 47 |             Self::Gzip(codec) => codec.get_ref(),
 48 |             Self::Brotli(codec) => codec.get_ref(),
 49 |             #[cfg(feature = "zstd")]
 50 |             Self::Zstandard(codec) => codec.get_ref(),
 51 |             Self::None => unreachable!(),
 52 |         }
 53 |     }
 54 | 
 55 |     pub fn get_mut(&mut self) -> &mut W {
 56 |         match self {
 57 |             Self::Identity(w) => w,
 58 |             Self::Deflate(codec) => codec.get_mut(),
 59 |             Self::Gzip(codec) => codec.get_mut(),
 60 |             Self::Brotli(codec) => codec.get_mut(),
 61 |             #[cfg(feature = "zstd")]
 62 |             Self::Zstandard(codec) => codec.get_mut(),
 63 |             Self::None => unreachable!(),
 64 |         }
 65 |     }
 66 | 
 67 |     pub fn finish(self) -> std::io::Result<W> {
 68 |         match self {
 69 |             Self::Identity(w) => Ok(w),
 70 |             Self::Deflate(codec) => codec.finish(),
 71 |             Self::Gzip(codec) => codec.finish(),
 72 |             Self::Brotli(codec) => Ok(codec.into_inner()),
 73 |             #[cfg(feature = "zstd")]
 74 |             Self::Zstandard(codec) => codec.finish(),
 75 |             Self::None => unreachable!(),
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | impl<W: Write> Write for Encoder<W> {
 81 |     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
 82 |         match self {
 83 |             Self::Identity(w) => w.write(buf),
 84 |             Self::Deflate(w) => w.write(buf),
 85 |             Self::Gzip(w) => w.write(buf),
 86 |             Self::Brotli(w) => w.write(buf),
 87 |             #[cfg(feature = "zstd")]
 88 |             Self::Zstandard(w) => w.write(buf),
 89 |             Self::None => unreachable!(),
 90 |         }
 91 |     }
 92 | 
 93 |     fn flush(&mut self) -> std::io::Result<()> {
 94 |         match self {
 95 |             Self::Identity(w) => w.flush(),
 96 |             Self::Deflate(w) => w.flush(),
 97 |             Self::Gzip(w) => w.flush(),
 98 |             Self::Brotli(w) => w.flush(),
 99 |             #[cfg(feature = "zstd")]
100 |             Self::Zstandard(w) => w.flush(),
101 |             Self::None => unreachable!(),
102 |         }
103 |     }
104 | }
105 | 
106 | impl<W: Write> Debug for Encoder<W> {
107 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 |         match self {
109 |             Self::Identity(_arg0) => f.debug_tuple("Identity").finish(),
110 |             Self::Deflate(_arg0) => f.debug_tuple("Deflate").finish(),
111 |             Self::Gzip(_arg0) => f.debug_tuple("Gzip").finish(),
112 |             Self::Brotli(_arg0) => f.debug_tuple("Brotli").finish(),
113 |             #[cfg(feature = "zstd")]
114 |             Self::Zstandard(_arg0) => f.debug_tuple("Zstandard").finish(),
115 |             Self::None => write!(f, "None"),
116 |         }
117 |     }
118 | }
119 | 
120 | fn get_encoder_level(format: Format, level: Level) -> i32 {
121 |     match format {
122 |         Format::Identity => match level {
123 |             Level::Balanced => 0,
124 |             Level::High => 0,
125 |             Level::Low => 0,
126 |         },
127 |         Format::Deflate | Format::Gzip => match level {
128 |             Level::Balanced => 6,
129 |             Level::High => 9,
130 |             Level::Low => 1,
131 |         },
132 | 
133 |         Format::Brotli => match level {
134 |             Level::Balanced => 4,
135 |             Level::High => 7,
136 |             Level::Low => 0,
137 |         },
138 |         #[cfg(feature = "zstd")]
139 |         Format::Zstandard => match level {
140 |             Level::Balanced => 3,
141 |             Level::High => 9,
142 |             Level::Low => 1,
143 |         },
144 |     }
145 | }
146 | 


--------------------------------------------------------------------------------
/src/compress/zstd.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Read;
 2 | 
 3 | #[cfg(feature = "zstd")]
 4 | pub(crate) use decode::{ZstdDecoder, ZstdPushDecoder};
 5 | #[cfg(feature = "zstd")]
 6 | pub(crate) use encode::ZstdEncoder;
 7 | 
 8 | #[cfg(feature = "zstd")]
 9 | mod decode;
10 | #[cfg(feature = "zstd")]
11 | mod encode;
12 | 
13 | const WARC_DICT_FRAME: u32 = 0x184D2A5D;
14 | const ZSTD_FRAME: u32 = 0xFD2FB528;
15 | const BULK_BUFFER_LENGTH: usize = 16 * 1024 * 1024;
16 | 
17 | pub fn is_skippable_frame(magic_number: u32) -> bool {
18 |     (0x184D2A50..=0x184D2A5F).contains(&magic_number)
19 | }
20 | 
21 | pub fn extract_warc_zst_dictionary<R: Read>(
22 |     mut input: R,
23 | ) -> Result<Vec<u8>, WarcZstDictExtractError> {
24 |     let mut buf = [0u8; 8];
25 | 
26 |     input.read_exact(&mut buf)?;
27 | 
28 |     let magic_number = u32::from_le_bytes(buf[0..4].try_into().unwrap());
29 |     let length = u32::from_le_bytes(buf[4..8].try_into().unwrap());
30 | 
31 |     if length > BULK_BUFFER_LENGTH as u32 {
32 |         return Err(WarcZstDictExtractError::TooLarge);
33 |     }
34 | 
35 |     if magic_number != WARC_DICT_FRAME {
36 |         return Err(WarcZstDictExtractError::NotDict);
37 |     }
38 | 
39 |     let mut buf = vec![0u8; length as usize];
40 |     input.read_exact(&mut buf)?;
41 | 
42 |     if buf.starts_with(&ZSTD_FRAME.to_le_bytes()) {
43 |         #[cfg(feature = "zstd")]
44 |         {
45 |             let buf2 = zstd::bulk::decompress(&buf, BULK_BUFFER_LENGTH)?;
46 | 
47 |             Ok(buf2)
48 |         }
49 |         #[cfg(not(feature = "zstd"))]
50 |         {
51 |             Err(std::io::Error::other(
52 |                 "failed to read compressed .warc.zst dictionary: zstd feature is not enabled",
53 |             ))
54 |         }
55 |     } else {
56 |         Ok(buf)
57 |     }
58 | }
59 | 
60 | #[derive(Debug, thiserror::Error)]
61 | pub enum WarcZstDictExtractError {
62 |     #[error("dictionary too large")]
63 |     TooLarge,
64 |     #[error("not a .warc.zst dictionary")]
65 |     NotDict,
66 |     #[error(transparent)]
67 |     Other(#[from] std::io::Error),
68 | }
69 | 


--------------------------------------------------------------------------------
/src/compress/zstd/encode.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Write;
  2 | 
  3 | use zstd::stream::write::Encoder as ZstdEncoderImpl;
  4 | 
  5 | use crate::compress::Dictionary;
  6 | 
  7 | use super::WARC_DICT_FRAME;
  8 | 
  9 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 10 | enum WarcDictionaryState {
 11 |     None,
 12 |     PendingFrameWrite,
 13 |     Ok,
 14 | }
 15 | 
 16 | pub struct ZstdEncoder<W: Write> {
 17 |     level: i32,
 18 |     dictionary: Dictionary,
 19 |     warc_dict_state: WarcDictionaryState,
 20 |     encoder_impl: Option<ZstdEncoderImpl<'static, W>>,
 21 | }
 22 | 
 23 | impl<W: Write> ZstdEncoder<W> {
 24 |     pub fn new(dest: W, level: i32, dictionary: Dictionary) -> std::io::Result<Self> {
 25 |         let warc_dict_state = match &dictionary {
 26 |             Dictionary::None => WarcDictionaryState::None,
 27 |             Dictionary::Zstd(_vec) => WarcDictionaryState::None,
 28 |             Dictionary::WarcZstd(_vec) => WarcDictionaryState::PendingFrameWrite,
 29 |         };
 30 |         let mut encoder_impl = match &dictionary {
 31 |             Dictionary::None => ZstdEncoderImpl::new(dest, level)?,
 32 |             Dictionary::Zstd(vec) => ZstdEncoderImpl::with_dictionary(dest, level, vec)?,
 33 |             Dictionary::WarcZstd(vec) => ZstdEncoderImpl::with_dictionary(dest, level, vec)?,
 34 |         };
 35 |         Self::config_encoder(&mut encoder_impl)?;
 36 |         Ok(Self {
 37 |             level,
 38 |             dictionary,
 39 |             warc_dict_state,
 40 |             encoder_impl: Some(encoder_impl),
 41 |         })
 42 |     }
 43 | 
 44 |     fn config_encoder(encoder: &mut ZstdEncoderImpl<'static, W>) -> std::io::Result<()> {
 45 |         encoder.include_checksum(true)?;
 46 |         Ok(())
 47 |     }
 48 | 
 49 |     pub fn get_ref(&self) -> &W {
 50 |         self.encoder_impl.as_ref().unwrap().get_ref()
 51 |     }
 52 | 
 53 |     pub fn get_mut(&mut self) -> &mut W {
 54 |         self.encoder_impl.as_mut().unwrap().get_mut()
 55 |     }
 56 | 
 57 |     fn write_warc_dictionary(&mut self) -> std::io::Result<()> {
 58 |         if let Dictionary::WarcZstd(data) = &self.dictionary {
 59 |             let dest = self.encoder_impl.as_mut().unwrap().get_mut();
 60 |             dest.write_all(&WARC_DICT_FRAME.to_le_bytes())?;
 61 |             dest.write_all(&(data.len() as u32).to_le_bytes())?;
 62 |             dest.write_all(data)?;
 63 |         }
 64 | 
 65 |         Ok(())
 66 |     }
 67 | 
 68 |     pub fn finish(self) -> std::io::Result<W> {
 69 |         self.encoder_impl.unwrap().finish()
 70 |     }
 71 | 
 72 |     pub fn start_new_frame(&mut self) -> std::io::Result<()> {
 73 |         // FIXME: We should be reusing the zstd context but the API is a bit difficult.
 74 | 
 75 |         let dest = self.encoder_impl.take().unwrap().finish()?;
 76 | 
 77 |         let mut encoder_impl = match &self.dictionary {
 78 |             Dictionary::None => ZstdEncoderImpl::new(dest, self.level)?,
 79 |             Dictionary::Zstd(vec) => ZstdEncoderImpl::with_dictionary(dest, self.level, vec)?,
 80 |             Dictionary::WarcZstd(vec) => ZstdEncoderImpl::with_dictionary(dest, self.level, vec)?,
 81 |         };
 82 |         Self::config_encoder(&mut encoder_impl)?;
 83 | 
 84 |         self.encoder_impl = Some(encoder_impl);
 85 | 
 86 |         Ok(())
 87 |     }
 88 | }
 89 | 
 90 | impl<W: Write> Write for ZstdEncoder<W> {
 91 |     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
 92 |         if self.warc_dict_state == WarcDictionaryState::PendingFrameWrite {
 93 |             self.warc_dict_state = WarcDictionaryState::Ok;
 94 | 
 95 |             self.write_warc_dictionary()?;
 96 |         }
 97 | 
 98 |         self.encoder_impl.as_mut().unwrap().write(buf)
 99 |     }
100 | 
101 |     fn flush(&mut self) -> std::io::Result<()> {
102 |         self.encoder_impl.as_mut().unwrap().flush()
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/src/fields/de.rs:
--------------------------------------------------------------------------------
 1 | use std::marker::PhantomData;
 2 | 
 3 | use serde::{de::Visitor, Deserialize, Deserializer};
 4 | 
 5 | use super::FieldMap;
 6 | 
 7 | struct FieldMapVisitor<N, V> {
 8 |     _n: PhantomData<N>,
 9 |     _v: PhantomData<V>,
10 | }
11 | 
12 | impl<N, V> FieldMapVisitor<N, V> {
13 |     fn new() -> Self {
14 |         Self {
15 |             _n: PhantomData,
16 |             _v: PhantomData,
17 |         }
18 |     }
19 | }
20 | 
21 | impl<'de, N, V> Visitor<'de> for FieldMapVisitor<N, V>
22 | where
23 |     N: Deserialize<'de>,
24 |     V: Deserialize<'de>,
25 | {
26 |     type Value = FieldMap<N, V>;
27 | 
28 |     fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
29 |         formatter.write_str("sequence of two-item tuples")
30 |     }
31 | 
32 |     fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
33 |     where
34 |         A: serde::de::SeqAccess<'de>,
35 |     {
36 |         let mut items = Vec::new();
37 | 
38 |         while let Some(item) = seq.next_element()? {
39 |             items.push(item);
40 |         }
41 | 
42 |         Ok(FieldMap { fields: items })
43 |     }
44 | }
45 | 
46 | impl<'de, N, V> Deserialize<'de> for FieldMap<N, V>
47 | where
48 |     N: Deserialize<'de>,
49 |     V: Deserialize<'de>,
50 | {
51 |     fn deserialize<D>(deserializer: D) -> Result<FieldMap<N, V>, D::Error>
52 |     where
53 |         D: Deserializer<'de>,
54 |     {
55 |         deserializer.deserialize_seq(FieldMapVisitor::new())
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/fields/ser.rs:
--------------------------------------------------------------------------------
 1 | use serde::{ser::SerializeSeq, Serialize, Serializer};
 2 | 
 3 | use super::FieldMap;
 4 | 
 5 | impl<N, V> Serialize for FieldMap<N, V>
 6 | where
 7 |     N: Serialize,
 8 |     V: Serialize,
 9 | {
10 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
11 |     where
12 |         S: Serializer,
13 |     {
14 |         let mut seq = serializer.serialize_seq(Some(self.fields.len()))?;
15 | 
16 |         for item in &self.fields {
17 |             seq.serialize_element(item)?;
18 |         }
19 |         seq.end()
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/header.rs:
--------------------------------------------------------------------------------
  1 | //! WARC headers
  2 | use std::io::Write;
  3 | 
  4 | use chrono::Utc;
  5 | 
  6 | use crate::{
  7 |     error::{ParseError, ProtocolError, ProtocolErrorKind},
  8 |     fields::FieldMap,
  9 | };
 10 | 
 11 | pub mod fields;
 12 | 
 13 | pub type WarcFields = FieldMap<String, String>;
 14 | 
 15 | /// Data structure for representing a WARC header.
 16 | #[derive(Debug, Clone)]
 17 | pub struct WarcHeader {
 18 |     /// The version string such as "WARC/1.1".
 19 |     pub version: String,
 20 |     /// The name-value fields of the header.
 21 |     pub fields: WarcFields,
 22 | }
 23 | 
 24 | impl WarcHeader {
 25 |     /// Create a new empty header.
 26 |     ///
 27 |     /// The version and fields will be empty.
 28 |     pub fn empty() -> Self {
 29 |         Self {
 30 |             version: String::new(),
 31 |             fields: FieldMap::new(),
 32 |         }
 33 |     }
 34 | 
 35 |     /// Create a new header with the bare minimum values.
 36 |     ///
 37 |     /// The user supplies the `Content-Length` and `WARC-Type`.
 38 |     /// `WARC-Record-ID` and `WARC-Date` is automatically generated.
 39 |     pub fn new<WT>(content_length: u64, warc_type: WT) -> Self
 40 |     where
 41 |         WT: Into<String>,
 42 |     {
 43 |         let mut header = WarcHeader::empty();
 44 |         header.version = "WARC/1.1".to_string();
 45 |         let uuid = uuid::Uuid::now_v7();
 46 |         let date_now = Utc::now();
 47 | 
 48 |         header
 49 |             .fields
 50 |             .insert("WARC-Record-ID".to_string(), format!("<{}>", uuid.urn()));
 51 |         header
 52 |             .fields
 53 |             .insert("WARC-Type".to_string(), warc_type.into());
 54 |         header
 55 |             .fields
 56 |             .insert("WARC-Date".to_string(), date_now.to_rfc3339());
 57 |         header.set_content_length(content_length);
 58 | 
 59 |         header
 60 |     }
 61 | 
 62 |     /// Parses a WARC header from the given bytes.
 63 |     pub fn parse(input: &[u8]) -> Result<Self, ParseError> {
 64 |         let (remain, version) = crate::parse::warc::version_line(input)?;
 65 | 
 66 |         let mut header = Self::empty();
 67 |         header.version = String::from_utf8(version.to_vec())?;
 68 | 
 69 |         let (_remain, pairs) = crate::parse::fields::field_pairs(remain)?;
 70 | 
 71 |         for pair in pairs {
 72 |             let name = String::from_utf8(pair.name.to_vec())?;
 73 |             let value = String::from_utf8(crate::parse::remove_line_folding(pair.value).to_vec())?;
 74 | 
 75 |             header.fields.insert(name, value);
 76 |         }
 77 | 
 78 |         Ok(header)
 79 |     }
 80 | 
 81 |     /// Returns the value of `Content-Length` as an integer.
 82 |     pub fn content_length(&self) -> Result<u64, ProtocolError> {
 83 |         if let Some(value) = self.fields.get_u64_strict("Content-Length") {
 84 |             Ok(value.map_err(|e| {
 85 |                 ProtocolError::new(ProtocolErrorKind::InvalidContentLength).with_source(e)
 86 |             })?)
 87 |         } else {
 88 |             Err(ProtocolError::new(ProtocolErrorKind::InvalidContentLength))
 89 |         }
 90 |     }
 91 | 
 92 |     /// Sets the value of `Content-Length` as an integer.
 93 |     pub fn set_content_length(&mut self, value: u64) {
 94 |         self.fields
 95 |             .insert("Content-Length".to_string(), value.to_string());
 96 |     }
 97 | 
 98 |     /// Returns whether the header is a valid WARC formatted header.
 99 |     ///
100 |     /// **Important:** This function does not validate whether the *contents* of
101 |     /// the header conforms to the WARC specification!
102 |     pub fn validate(&self) -> Result<(), ParseError> {
103 |         crate::parse::warc::version(self.version.as_bytes())?;
104 | 
105 |         for (name, value) in &self.fields {
106 |             crate::parse::validate_field_name(name.as_bytes())?;
107 |             crate::parse::validate_field_value(value.as_bytes(), false)?;
108 |         }
109 | 
110 |         Ok(())
111 |     }
112 | 
113 |     /// Write the WARC header as serialized bytes.
114 |     pub fn serialize<W: Write>(&self, mut buf: W) -> std::io::Result<()> {
115 |         buf.write_all(self.version.as_bytes())?;
116 |         buf.write_all(b"\r\n")?;
117 | 
118 |         for (name, value) in &self.fields {
119 |             buf.write_all(name.as_bytes())?;
120 |             buf.write_all(b": ")?;
121 |             buf.write_all(value.as_bytes())?;
122 |             buf.write_all(b"\r\n")?;
123 |         }
124 | 
125 |         buf.write_all(b"\r\n")?;
126 | 
127 |         Ok(())
128 |     }
129 | }
130 | 
131 | #[cfg(test)]
132 | mod tests {
133 |     use super::*;
134 | 
135 |     #[test]
136 |     fn test_header_parse_serialize() {
137 |         let data = "WARC/1.1\r\n\
138 |             WARC-Record-ID: <example:123456>\r\n\
139 |             Content-Length: 0\r\n\
140 |             \r\n";
141 |         let header = WarcHeader::parse(data.as_bytes()).unwrap();
142 | 
143 |         assert_eq!(&header.version, "WARC/1.1");
144 |         assert_eq!(header.fields.len(), 2);
145 | 
146 |         let mut buf = Vec::new();
147 | 
148 |         header.serialize(&mut buf).unwrap();
149 | 
150 |         assert_eq!(&buf, data.as_bytes());
151 |     }
152 | }
153 | 


--------------------------------------------------------------------------------
/src/header/fields.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, net::IpAddr, str::FromStr};
  2 | 
  3 | use chrono::{DateTime, FixedOffset};
  4 | use url::Url;
  5 | 
  6 | use crate::error::ParseError;
  7 | 
  8 | use super::WarcFields;
  9 | 
 10 | pub trait FieldsExt {
 11 |     /// Returns the value if the name is present, otherwise empty string.
 12 |     fn get_or_default<N: AsRef<str>>(&self, name: N) -> &str;
 13 | 
 14 |     /// Parse a "content-type" field.
 15 |     fn get_media_type<N: AsRef<str>>(&self, name: N) -> Option<Result<MediaType, ParseError>>;
 16 | 
 17 |     /// Parse a ISO8601 field.
 18 |     fn get_date<N: AsRef<str>>(&self, name: N)
 19 |         -> Option<Result<DateTime<FixedOffset>, ParseError>>;
 20 | 
 21 |     /// Returns whether the value is delimitated by `<` and `>`.
 22 |     fn is_formatted_bad_spec_url<N: AsRef<str>>(&self, name: N) -> bool;
 23 | 
 24 |     /// Returns the value with the deliminator `<` and `>` removed.
 25 |     fn get_url_str<N: AsRef<str>>(&self, name: N) -> Option<&str>;
 26 | 
 27 |     /// Parse a URL (with the deliminator `<` and `>` removed).
 28 |     fn get_url<N: AsRef<str>>(&self, name: N) -> Option<Result<Url, ParseError>>;
 29 | 
 30 |     /// Parse an IP address.
 31 |     fn get_ip_addr<N: AsRef<str>>(&self, name: N) -> Option<Result<IpAddr, ParseError>>;
 32 | }
 33 | 
 34 | #[derive(Debug, Clone, Default)]
 35 | pub struct MediaType {
 36 |     pub type_: String,
 37 |     pub subtype: String,
 38 |     pub parameters: HashMap<String, String>,
 39 | }
 40 | 
 41 | impl MediaType {
 42 |     pub fn empty() -> Self {
 43 |         Self {
 44 |             ..Default::default()
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | impl FromStr for MediaType {
 50 |     type Err = ParseError;
 51 | 
 52 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 53 |         let (_remain, output) = crate::parse::fields::media_type(s.as_bytes())?;
 54 | 
 55 |         Ok(Self {
 56 |             type_: String::from_utf8_lossy(output.type_).to_string(),
 57 |             subtype: String::from_utf8_lossy(output.subtype).to_string(),
 58 |             parameters: HashMap::from_iter(output.parameters.iter().map(|(k, v)| {
 59 |                 (
 60 |                     String::from_utf8_lossy(k).to_string(),
 61 |                     String::from_utf8_lossy(v).to_string(),
 62 |                 )
 63 |             })),
 64 |         })
 65 |     }
 66 | }
 67 | 
 68 | impl FieldsExt for WarcFields {
 69 |     fn get_or_default<N: AsRef<str>>(&self, name: N) -> &str {
 70 |         self.get(name.as_ref())
 71 |             .map(String::as_str)
 72 |             .unwrap_or_default()
 73 |     }
 74 | 
 75 |     fn get_media_type<N: AsRef<str>>(&self, name: N) -> Option<Result<MediaType, ParseError>> {
 76 |         self.get(name.as_ref())
 77 |             .map(|value| MediaType::from_str(value))
 78 |     }
 79 | 
 80 |     fn get_date<N: AsRef<str>>(
 81 |         &self,
 82 |         name: N,
 83 |     ) -> Option<Result<DateTime<FixedOffset>, ParseError>> {
 84 |         self.get(name.as_ref())
 85 |             .map(|value| DateTime::parse_from_rfc3339(value).map_err(|error| error.into()))
 86 |     }
 87 | 
 88 |     fn is_formatted_bad_spec_url<N: AsRef<str>>(&self, name: N) -> bool {
 89 |         if let Some(value) = self.get(name.as_ref()) {
 90 |             value.starts_with("<") && value.ends_with(">")
 91 |         } else {
 92 |             false
 93 |         }
 94 |     }
 95 | 
 96 |     fn get_url_str<N: AsRef<str>>(&self, name: N) -> Option<&str> {
 97 |         if let Some(value) = self.get(name.as_ref()) {
 98 |             if value.starts_with("<") && value.ends_with(">") {
 99 |                 Some(value.trim_start_matches("<").trim_end_matches(">"))
100 |             } else {
101 |                 Some(value)
102 |             }
103 |         } else {
104 |             None
105 |         }
106 |     }
107 | 
108 |     fn get_url<N: AsRef<str>>(&self, name: N) -> Option<Result<Url, ParseError>> {
109 |         if let Some(value) = self.get(name.as_ref()) {
110 |             let value = if value.starts_with("<") && value.ends_with(">") {
111 |                 value.trim_start_matches("<").trim_end_matches(">")
112 |             } else {
113 |                 value
114 |             };
115 | 
116 |             Some(Url::parse(value).map_err(|error| error.into()))
117 |         } else {
118 |             None
119 |         }
120 |     }
121 | 
122 |     fn get_ip_addr<N: AsRef<str>>(&self, name: N) -> Option<Result<IpAddr, ParseError>> {
123 |         self.get(name.as_ref())
124 |             .map(|value| IpAddr::from_str(value).map_err(|error| error.into()))
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/http.rs:
--------------------------------------------------------------------------------
1 | //! Things dealing with the HTTP protocol
2 | pub mod h1;
3 | 


--------------------------------------------------------------------------------
/src/http/h1.rs:
--------------------------------------------------------------------------------
1 | //! Minimal, low-level HTTP 1.1 protocol implementation
2 | //!
3 | //! This module is sans-IO; it doesn't use networking sockets.
4 | pub mod codec;
5 | pub mod error;
6 | pub mod header;
7 | pub mod recv;
8 | pub mod send;
9 | 


--------------------------------------------------------------------------------
/src/http/h1/codec/compress.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Write;
  2 | use std::str::FromStr;
  3 | 
  4 | use crate::{
  5 |     compress::{Compressor, Format as CompressionFormat, PushDecompressor},
  6 |     error::{GeneralError, ProtocolError, ProtocolErrorKind},
  7 | };
  8 | 
  9 | use super::Codec;
 10 | 
 11 | #[derive(Debug)]
 12 | pub struct CompressionEncoder {
 13 |     compressor: Option<Compressor<Vec<u8>>>,
 14 | }
 15 | 
 16 | impl CompressionEncoder {
 17 |     pub fn new(compressor: Compressor<Vec<u8>>) -> Self {
 18 |         Self {
 19 |             compressor: Some(compressor),
 20 |         }
 21 |     }
 22 | 
 23 |     pub fn try_of_name(name: &str) -> Result<Self, ProtocolError> {
 24 |         let format = CompressionFormat::from_str(name)
 25 |             .map_err(|_| ProtocolError::new(ProtocolErrorKind::UnsupportedCompressionFormat))?;
 26 | 
 27 |         Ok(Self::new(Compressor::new(Vec::new(), format)))
 28 |     }
 29 | }
 30 | 
 31 | impl Codec for CompressionEncoder {
 32 |     fn transform(&mut self, input: &[u8], output: &mut Vec<u8>) -> Result<(), GeneralError> {
 33 |         if let Some(compressor) = &mut self.compressor {
 34 |             compressor.write_all(input)?;
 35 | 
 36 |             output.extend_from_slice(compressor.get_ref());
 37 |             compressor.get_mut().clear();
 38 |         }
 39 | 
 40 |         Ok(())
 41 |     }
 42 | 
 43 |     fn finish_input(&mut self, output: &mut Vec<u8>) -> Result<(), GeneralError> {
 44 |         if let Some(mut compressor) = self.compressor.take() {
 45 |             compressor.flush()?;
 46 | 
 47 |             let buf = compressor.finish()?;
 48 | 
 49 |             output.extend_from_slice(&buf);
 50 |         }
 51 | 
 52 |         Ok(())
 53 |     }
 54 | }
 55 | 
 56 | #[derive(Debug)]
 57 | pub struct CompressionDecoder {
 58 |     decompressor: PushDecompressor<Vec<u8>>,
 59 | }
 60 | 
 61 | impl CompressionDecoder {
 62 |     pub fn new(decompressor: PushDecompressor<Vec<u8>>) -> Self {
 63 |         Self { decompressor }
 64 |     }
 65 | 
 66 |     pub fn try_of_name(name: &str) -> Result<Self, GeneralError> {
 67 |         let format = CompressionFormat::from_str(name)
 68 |             .map_err(|_| ProtocolError::new(ProtocolErrorKind::UnsupportedCompressionFormat))?;
 69 | 
 70 |         Ok(Self::new(PushDecompressor::new(Vec::new(), format)?))
 71 |     }
 72 | }
 73 | 
 74 | impl Codec for CompressionDecoder {
 75 |     fn transform(&mut self, input: &[u8], output: &mut Vec<u8>) -> Result<(), GeneralError> {
 76 |         self.decompressor.write_all(input)?;
 77 |         self.decompressor.flush()?;
 78 | 
 79 |         output.extend_from_slice(self.decompressor.get_ref());
 80 |         self.decompressor.get_mut().clear();
 81 | 
 82 |         Ok(())
 83 |     }
 84 | }
 85 | 
 86 | #[cfg(test)]
 87 | mod tests {
 88 |     use super::*;
 89 | 
 90 |     #[test]
 91 |     fn test_compression() {
 92 |         let mut encoder = CompressionEncoder::try_of_name("gzip").unwrap();
 93 |         let mut buf = Vec::new();
 94 | 
 95 |         encoder.transform(b"Hello world!", &mut buf).unwrap();
 96 |         encoder.finish_input(&mut buf).unwrap();
 97 | 
 98 |         let mut output = Vec::new();
 99 | 
100 |         let mut decoder = CompressionDecoder::try_of_name("gzip").unwrap();
101 |         decoder.transform(&buf, &mut output).unwrap();
102 | 
103 |         assert_eq!(&output, b"Hello world!");
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/http/h1/error.rs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chfoo/warcat-rs/cf5f71a67e5c19464039eadde41c616149c5ec11/src/http/h1/error.rs


--------------------------------------------------------------------------------
/src/http/h1/header/fields.rs:
--------------------------------------------------------------------------------
 1 | use std::borrow::Cow;
 2 | 
 3 | use super::{HeaderFields, Hstring};
 4 | 
 5 | pub trait FieldsExt {
 6 |     fn get_comma_list<'a>(&'a self, name: &'a str) -> impl Iterator<Item = Cow<'a, str>>;
 7 | 
 8 |     fn get_u64_strict<N: AsRef<str>>(
 9 |         &self,
10 |         name: N,
11 |     ) -> Option<Result<u64, std::num::ParseIntError>>;
12 | }
13 | 
14 | impl FieldsExt for HeaderFields {
15 |     fn get_comma_list<'a>(&'a self, name: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
16 |         let mut list = Vec::new();
17 | 
18 |         for value in self.get_all(name) {
19 |             if let Some(value) = value.as_text() {
20 |                 for item in value.split(",") {
21 |                     let item = crate::util::to_ascii_lowercase_cow(item.trim());
22 | 
23 |                     if !list.contains(&item) {
24 |                         list.push(item);
25 |                     }
26 |                 }
27 |             }
28 |         }
29 | 
30 |         list.into_iter()
31 |     }
32 | 
33 |     fn get_u64_strict<N: AsRef<str>>(
34 |         &self,
35 |         name: N,
36 |     ) -> Option<Result<u64, std::num::ParseIntError>> {
37 |         if let Some(Hstring::Text(value)) = self.get(name.as_ref()) {
38 |             Some(crate::parse::parse_u64_strict(value))
39 |         } else {
40 |             None
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/http/h1/header/parse.rs:
--------------------------------------------------------------------------------
  1 | use nom::{
  2 |     IResult, Parser,
  3 |     branch::alt,
  4 |     bytes::complete::{tag, tag_no_case, take_while, take_while1},
  5 |     character::complete::{digit1, line_ending},
  6 |     combinator::{map, recognize, verify},
  7 |     sequence::terminated,
  8 | };
  9 | 
 10 | pub enum StartLine<'a> {
 11 |     RequestLine(RequestLine<'a>),
 12 |     StatusLine(StatusLine<'a>),
 13 | }
 14 | 
 15 | pub struct RequestLine<'a> {
 16 |     pub method: &'a [u8],
 17 |     pub request_target: &'a [u8],
 18 |     pub http_version: &'a [u8],
 19 | }
 20 | 
 21 | pub struct StatusLine<'a> {
 22 |     pub http_version: &'a [u8],
 23 |     pub status_code: &'a [u8],
 24 |     pub reason_phrase: &'a [u8],
 25 | }
 26 | 
 27 | pub fn start_line(input: &[u8]) -> IResult<&[u8], StartLine<'_>> {
 28 |     let status_line = map(status_line, StartLine::StatusLine);
 29 |     let request_line = map(request_line, StartLine::RequestLine);
 30 | 
 31 |     terminated(alt((status_line, request_line)), line_ending).parse(input)
 32 | }
 33 | 
 34 | pub fn request_line(input: &[u8]) -> IResult<&[u8], RequestLine<'_>> {
 35 |     let parts = (method, tag(" "), request_target, tag(" "), http_version);
 36 | 
 37 |     #[allow(clippy::type_complexity)]
 38 |     map(parts, |output: (&[u8], &[u8], &[u8], &[u8], &[u8])| {
 39 |         RequestLine {
 40 |             method: output.0,
 41 |             request_target: output.2,
 42 |             http_version: output.4,
 43 |         }
 44 |     })
 45 |     .parse(input)
 46 | }
 47 | 
 48 | pub fn status_line(input: &[u8]) -> IResult<&[u8], StatusLine<'_>> {
 49 |     alt((status_line_strict, status_line_non_strict)).parse(input)
 50 | }
 51 | 
 52 | fn status_line_strict(input: &[u8]) -> IResult<&[u8], StatusLine<'_>> {
 53 |     let parts = (http_version, tag(" "), status_code, tag(" "), reason_phrase);
 54 | 
 55 |     #[allow(clippy::type_complexity)]
 56 |     map(parts, |output: (&[u8], &[u8], &[u8], &[u8], &[u8])| {
 57 |         StatusLine {
 58 |             http_version: output.0,
 59 |             status_code: output.2,
 60 |             reason_phrase: output.4,
 61 |         }
 62 |     })
 63 |     .parse(input)
 64 | }
 65 | 
 66 | fn status_line_non_strict(input: &[u8]) -> IResult<&[u8], StatusLine<'_>> {
 67 |     // https://mailman.nginx.org/pipermail/nginx/2013-June/039186.html
 68 |     let parts = (http_version, tag(" "), status_code);
 69 | 
 70 |     map(parts, |output: (&[u8], &[u8], &[u8])| StatusLine {
 71 |         http_version: output.0,
 72 |         status_code: output.2,
 73 |         reason_phrase: b"",
 74 |     })
 75 |     .parse(input)
 76 | }
 77 | 
 78 | fn method(input: &[u8]) -> IResult<&[u8], &[u8]> {
 79 |     crate::parse::fields::token(input)
 80 | }
 81 | 
 82 | fn request_target(input: &[u8]) -> IResult<&[u8], &[u8]> {
 83 |     take_while1(|c: u8| c.is_ascii_graphic())(input)
 84 | }
 85 | 
 86 | fn http_version(input: &[u8]) -> IResult<&[u8], &[u8]> {
 87 |     // Newer HTTP specifications requires the http-name to be case-sensitive,
 88 |     // but we should be lenient instead.
 89 |     recognize((
 90 |         tag_no_case("HTTP"),
 91 |         tag("/"),
 92 |         one_digit,
 93 |         tag("."),
 94 |         one_digit,
 95 |     ))
 96 |     .parse(input)
 97 | }
 98 | 
 99 | fn one_digit(input: &[u8]) -> IResult<&[u8], &[u8]> {
100 |     verify(digit1, |i: &[u8]| i.len() == 1).parse(input)
101 | }
102 | 
103 | fn status_code(input: &[u8]) -> IResult<&[u8], &[u8]> {
104 |     verify(digit1, |i: &[u8]| i.len() == 3).parse(input)
105 | }
106 | 
107 | fn reason_phrase(input: &[u8]) -> IResult<&[u8], &[u8]> {
108 |     take_while(|b: u8| {
109 |         b.is_ascii_graphic() || b == b' ' || b == b'\t' || crate::parse::fields::is_obs_text(b)
110 |     })(input)
111 | }
112 | 


--------------------------------------------------------------------------------
/src/http/h1/send.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::VecDeque, io::Read};
  2 | 
  3 | use crate::error::GeneralError;
  4 | 
  5 | use super::{
  6 |     codec::CodecPipeline,
  7 |     header::{MessageHeader, TrailerFields},
  8 | };
  9 | 
 10 | /// Encodes a HTTP request/response message.
 11 | ///
 12 | /// Important: This struct makes no semantic validation! It simply outputs
 13 | /// what you call.
 14 | pub struct Sender {
 15 |     codec_pipeline: CodecPipeline,
 16 |     output_buf: VecDeque<u8>,
 17 | }
 18 | 
 19 | impl Sender {
 20 |     pub fn new() -> Self {
 21 |         Self {
 22 |             codec_pipeline: CodecPipeline::default(),
 23 |             output_buf: VecDeque::new(),
 24 |         }
 25 |     }
 26 | 
 27 |     /// Send the header.
 28 |     pub fn send_header(&mut self, header: &MessageHeader) -> Result<(), GeneralError> {
 29 |         let mut codecs = Vec::new();
 30 |         super::codec::build_encoders(header, &mut codecs)?;
 31 | 
 32 |         self.codec_pipeline = CodecPipeline::new(codecs);
 33 | 
 34 |         header.serialize(&mut self.output_buf).unwrap();
 35 | 
 36 |         Ok(())
 37 |     }
 38 | 
 39 |     /// Send body data.
 40 |     pub fn send_body(&mut self, data: &[u8]) -> Result<(), GeneralError> {
 41 |         self.codec_pipeline.transform(data, &mut self.output_buf)?;
 42 | 
 43 |         Ok(())
 44 |     }
 45 | 
 46 |     /// Ends the message with a chunked-transfer encoding.
 47 |     ///
 48 |     /// Flushes any buffered output and outputs the trailer.
 49 |     pub fn send_trailer(&mut self, fields: &TrailerFields) -> Result<(), GeneralError> {
 50 |         self.codec_pipeline.finish_input(&mut self.output_buf)?;
 51 | 
 52 |         fields.serialize(&mut self.output_buf).unwrap();
 53 | 
 54 |         Ok(())
 55 |     }
 56 | 
 57 |     /// Ends the message, flushing any buffered output.
 58 |     pub fn end_message(&mut self) -> Result<(), GeneralError> {
 59 |         self.codec_pipeline.finish_input(&mut self.output_buf)?;
 60 | 
 61 |         Ok(())
 62 |     }
 63 | 
 64 |     /// At the end of the message, reset the internal state for a new message.
 65 |     pub fn reset(&mut self) {
 66 |         self.codec_pipeline = CodecPipeline::default();
 67 |     }
 68 | 
 69 |     /// Writes the output data into the given buffer and returns the amount written.
 70 |     pub fn read_output(&mut self, buf: &mut [u8]) -> usize {
 71 |         self.output_buf.read(buf).unwrap()
 72 |     }
 73 | }
 74 | 
 75 | impl Default for Sender {
 76 |     fn default() -> Self {
 77 |         Self::new()
 78 |     }
 79 | }
 80 | 
 81 | #[cfg(test)]
 82 | mod tests {
 83 |     use super::*;
 84 | 
 85 |     #[tracing_test::traced_test]
 86 |     #[test]
 87 |     fn test_send() {
 88 |         let mut output = Vec::new();
 89 |         let mut sender = Sender::new();
 90 | 
 91 |         let header = MessageHeader::new_request("GET", "/index.html");
 92 |         sender.send_header(&header).unwrap();
 93 |         sender.send_body(b"Hello world!").unwrap();
 94 |         sender.end_message().unwrap();
 95 | 
 96 |         loop {
 97 |             let mut buf = [0u8; 1024];
 98 |             let len = sender.read_output(&mut buf);
 99 | 
100 |             if len == 0 {
101 |                 break;
102 |             }
103 | 
104 |             output.extend_from_slice(&buf[0..len]);
105 |         }
106 | 
107 |         assert_eq!(
108 |             output,
109 |             b"GET /index.html HTTP/1.1\r\n\
110 |             \r\n\
111 |             Hello world!"
112 |         );
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/io.rs:
--------------------------------------------------------------------------------
  1 | //! IO utilities
  2 | use std::io::{BufRead, Read, Write};
  3 | 
  4 | pub(crate) const IO_BUFFER_LENGTH: usize = 4096;
  5 | 
  6 | /// Indicate position in the stream
  7 | pub trait LogicalPosition {
  8 |     /// Returns the position in the stream without accounting for seeks.
  9 |     ///
 10 |     /// This value should be the same as the number of bytes read from
 11 |     /// the stream.
 12 |     fn logical_position(&self) -> u64;
 13 | }
 14 | 
 15 | /// A [`BufRead`] implementation
 16 | ///
 17 | /// This is an alternative to [`std::io::BufReader`] but implements [`LogicalPosition`]
 18 | /// and allows getting a stream position without seekable streams.
 19 | #[derive(Debug)]
 20 | pub struct BufferReader<R: Read> {
 21 |     reader: R,
 22 |     buffer: Vec<u8>,
 23 |     buffer_position: usize,
 24 |     logical_position: u64,
 25 | }
 26 | 
 27 | impl<R: Read> BufferReader<R> {
 28 |     /// Create a new buffered reader.
 29 |     pub fn new(reader: R) -> Self {
 30 |         Self {
 31 |             reader,
 32 |             buffer: Vec::new(),
 33 |             buffer_position: 0,
 34 |             logical_position: 0,
 35 |         }
 36 |     }
 37 | 
 38 |     /// Returns a reference to the underlying reader.
 39 |     pub fn get_ref(&self) -> &R {
 40 |         &self.reader
 41 |     }
 42 | 
 43 |     /// Returns a mutable reference to the underlying reader.
 44 |     ///
 45 |     /// Modifying the underlying reader may cause unexpected behavior.
 46 |     pub fn get_mut(&mut self) -> &mut R {
 47 |         &mut self.reader
 48 |     }
 49 | 
 50 |     /// Returns the underlying reader.
 51 |     pub fn into_inner(self) -> R {
 52 |         self.reader
 53 |     }
 54 | 
 55 |     /// Returns a slice of the internal buffer.
 56 |     pub fn buffer(&self) -> &[u8] {
 57 |         &self.buffer[self.buffer_position..]
 58 |     }
 59 | 
 60 |     /// Fills the internal buffer with more data from the underlying reader.
 61 |     ///
 62 |     /// Returns the number of bytes read.
 63 |     pub fn fill_buffer(&mut self) -> std::io::Result<usize> {
 64 |         let original_len = self.buffer.len();
 65 |         self.buffer.resize(original_len + IO_BUFFER_LENGTH, 0);
 66 | 
 67 |         let range = original_len..;
 68 | 
 69 |         match self.reader.read(&mut self.buffer[range]) {
 70 |             Ok(read_len) => {
 71 |                 self.buffer.truncate(original_len + read_len);
 72 |                 Ok(read_len)
 73 |             }
 74 |             Err(error) => {
 75 |                 self.buffer.truncate(original_len);
 76 |                 Err(error)
 77 |             }
 78 |         }
 79 |     }
 80 | 
 81 |     /// Fills the internal buffer only if it is empty.
 82 |     pub fn fill_buffer_if_empty(&mut self) -> std::io::Result<usize> {
 83 |         if self.buffer.is_empty() {
 84 |             self.fill_buffer()
 85 |         } else {
 86 |             Ok(0)
 87 |         }
 88 |     }
 89 | 
 90 |     fn compact_buffer(&mut self) {
 91 |         self.buffer.drain(..self.buffer_position);
 92 |         self.buffer_position = 0;
 93 |     }
 94 | 
 95 |     fn read_using_buffer(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
 96 |         self.fill_buffer_if_empty()?;
 97 | 
 98 |         let range = self.buffer_position..self.buffer.len().min(self.buffer_position + buf.len());
 99 |         let write_len = range.len();
100 | 
101 |         buf.write_all(&self.buffer[range])?;
102 |         self.buffer_position += write_len;
103 | 
104 |         self.clean_up_buffer();
105 | 
106 |         Ok(write_len)
107 |     }
108 | 
109 |     fn clean_up_buffer(&mut self) {
110 |         if self.buffer_position >= self.buffer.len() {
111 |             self.buffer.clear();
112 |             self.buffer_position = 0;
113 |         } else if self.buffer_position > IO_BUFFER_LENGTH {
114 |             self.compact_buffer();
115 |         }
116 |     }
117 | }
118 | 
119 | impl<R: Read> Read for BufferReader<R> {
120 |     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
121 |         let read_len = if buf.len() >= IO_BUFFER_LENGTH && self.buffer.is_empty() {
122 |             self.reader.read(buf)
123 |         } else {
124 |             self.read_using_buffer(buf)
125 |         }?;
126 | 
127 |         self.logical_position += read_len as u64;
128 |         Ok(read_len)
129 |     }
130 | }
131 | 
132 | impl<R: Read> BufRead for BufferReader<R> {
133 |     fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
134 |         self.fill_buffer_if_empty()?;
135 | 
136 |         Ok(self.buffer())
137 |     }
138 | 
139 |     fn consume(&mut self, amt: usize) {
140 |         self.buffer_position += amt;
141 |         self.logical_position += amt as u64;
142 |         self.clean_up_buffer();
143 |     }
144 | }
145 | 
146 | impl<R: Read> LogicalPosition for BufferReader<R> {
147 |     fn logical_position(&self) -> u64 {
148 |         self.logical_position
149 |     }
150 | }
151 | 
152 | #[cfg(test)]
153 | mod tests {
154 |     use std::io::Cursor;
155 | 
156 |     use super::*;
157 | 
158 |     #[test]
159 |     fn test_buffer_reader() {
160 |         let mut source = Vec::new();
161 |         let data_len = 50000;
162 | 
163 |         for i in 0..data_len {
164 |             source.push(i as u8);
165 |         }
166 | 
167 |         let mut r = BufferReader::new(Cursor::new(source));
168 |         let mut actual = Vec::new();
169 |         let mut remain_len = data_len;
170 |         let mut buf = Vec::new();
171 | 
172 |         for buf_size in [10, 2000, 4000, 4096, 4096, 5000].iter().cycle() {
173 |             if remain_len == 0 {
174 |                 break;
175 |             }
176 |             let read_len = (*buf_size).min(remain_len);
177 |             buf.resize(read_len, 0);
178 |             r.read_exact(&mut buf).unwrap();
179 | 
180 |             actual.extend_from_slice(&buf);
181 |             remain_len -= read_len;
182 |         }
183 | 
184 |         let source = r.into_inner().into_inner();
185 | 
186 |         assert_eq!(source, actual);
187 |     }
188 | 
189 |     #[test]
190 |     fn test_buffer_reader_until() {
191 |         let mut source = Vec::new();
192 |         let data_len = 10000;
193 | 
194 |         for i in 0..data_len {
195 |             if i == 5000 {
196 |                 source.push(b'\n');
197 |             } else {
198 |                 source.push(0);
199 |             }
200 |         }
201 | 
202 |         let mut r = BufferReader::new(Cursor::new(source));
203 |         let mut buf = Vec::new();
204 |         r.read_until(b'\n', &mut buf).unwrap();
205 | 
206 |         assert_eq!(buf.len(), 5001);
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Warcat: WARC Archiving Tool
 2 | //!
 3 | //! This crate provides both a library API and a binary CLI application.
 4 | //! The library can be used to read and write WARC files and
 5 | //! as well perform functions provided by the binary.
 6 | //!
 7 | //! In general cases, users working with WARC files do not need to program
 8 | //! directly with the library. The CLI application (the tool portion) is
 9 | //! designed to be part of a Unix-style pipeline.
10 | //!
11 | //! This documentation is for the library portion.
12 | //! For details on the CLI, see the [user guide](https://warcat-rs.readthedocs.io/).
13 | //!
14 | //! The library is designed first in mind for the binary, so some parts of
15 | //! the API will be unstable or not relevant.
16 | //!
17 | //! The main entrypoints to this library is [`warc::Decoder`]/[`warc::PushDecoder`] and [`warc::Encoder`].
18 | 
19 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
20 | 
21 | pub mod compress;
22 | pub mod dataseq;
23 | pub mod digest;
24 | pub mod error;
25 | pub mod extract;
26 | pub mod fields;
27 | pub mod header;
28 | pub mod http;
29 | pub mod io;
30 | pub mod parse;
31 | pub(crate) mod util;
32 | pub mod verify;
33 | pub mod warc;
34 | 
35 | #[cfg(feature = "bin")]
36 | #[doc(hidden)]
37 | pub mod app;
38 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
1 | fn main() -> std::process::ExitCode {
2 |     warcat::app::run()
3 | }
4 | 


--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
 1 | //! Parsing utilities.
 2 | use std::{borrow::Cow, cell::LazyCell};
 3 | 
 4 | use nom::Parser;
 5 | use regex::bytes::Regex;
 6 | 
 7 | use crate::error::ParseError;
 8 | 
 9 | pub(crate) mod fields;
10 | pub(crate) mod header_deliminator;
11 | pub(crate) mod warc;
12 | 
13 | /// Get the index (inclusive) of the header deliminator (an empty line).
14 | pub fn scan_header_deliminator(data: &[u8]) -> Option<usize> {
15 |     match header_deliminator::field_lines(data) {
16 |         Ok((_input, output)) => Some(output.len()),
17 |         Err(_) => None,
18 |     }
19 | }
20 | 
21 | /// Parse a HTTP-like fields of name-value pairs.
22 | pub fn parse_name_value_fields(value: &[u8]) -> Result<Vec<fields::FieldPairRef>, ParseError> {
23 |     match fields::field_pairs(value) {
24 |         Ok((_input, output)) => Ok(output),
25 |         Err(error) => Err(error.into()),
26 |     }
27 | }
28 | 
29 | /// Returns whether the value is a valid name in a HTTP-like field.
30 | pub fn validate_field_name(value: &[u8]) -> Result<(), ParseError> {
31 |     match nom::combinator::all_consuming(fields::field_name).parse(value) {
32 |         Ok((_input, _output)) => Ok(()),
33 |         Err(error) => Err(error.into()),
34 |     }
35 | }
36 | 
37 | /// Returns whether the value is a valid value in a HTTP-like field.
38 | ///
39 | /// When `multiline` is `true`, obsolete line folding is permitted.
40 | pub fn validate_field_value(value: &[u8], multiline: bool) -> Result<(), ParseError> {
41 |     let f = if multiline {
42 |         fields::field_value
43 |     } else {
44 |         fields::field_value_no_multline
45 |     };
46 |     match nom::combinator::all_consuming(f).parse(value) {
47 |         Ok((_input, _output)) => Ok(()),
48 |         Err(error) => Err(error.into()),
49 |     }
50 | }
51 | 
52 | /// Parse a value into a `u64`.
53 | ///
54 | /// Unlike [`u64::try_from()`], only ASCII digits are permitted. Use of std
55 | /// library parsing functions may lead to security issues.
56 | pub fn parse_u64_strict(value: &str) -> Result<u64, std::num::ParseIntError> {
57 |     if !value.chars().all(|c| c.is_ascii_digit()) {
58 |         return "?".parse();
59 |     }
60 | 
61 |     value.parse()
62 | }
63 | 
64 | /// Remove line folding from a HTTP-like field value.
65 | pub fn remove_line_folding(value: &[u8]) -> Cow<'_, [u8]> {
66 |     let re = LazyCell::new(|| Regex::new(r"(?:\r\n|\n)[ \t]+").unwrap());
67 |     re.replace_all(value, b" ")
68 | }
69 | 
70 | #[cfg(test)]
71 | mod tests {
72 |     use super::*;
73 | 
74 |     #[test]
75 |     fn test_scan_header_none() {
76 |         assert_eq!(scan_header_deliminator(b""), None);
77 |         assert_eq!(scan_header_deliminator(b"a"), None);
78 |     }
79 | 
80 |     #[test]
81 |     fn test_scan_header() {
82 |         assert_eq!(scan_header_deliminator(b"\r\nz"), Some(2));
83 |         assert_eq!(scan_header_deliminator(b"a\r\n\r\nz"), Some(5));
84 |         assert_eq!(scan_header_deliminator(b"a\r\nb\r\n\r\nz"), Some(8));
85 |         assert_eq!(scan_header_deliminator(b"a\nb\n\nz"), Some(5));
86 |     }
87 | 
88 |     #[test]
89 |     fn test_remove_line_folding() {
90 |         assert_eq!(*remove_line_folding(b"abc"), *b"abc");
91 |         assert_eq!(*remove_line_folding(b"abc\r\n  def"), *b"abc def");
92 |         assert_eq!(
93 |             *remove_line_folding(b"abc\r\n  def\r\n\t123"),
94 |             *b"abc def 123"
95 |         );
96 |         assert_eq!(*remove_line_folding(b"abc\n  def"), *b"abc def");
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/parse/fields.rs:
--------------------------------------------------------------------------------
  1 | use nom::{
  2 |     IResult, Parser,
  3 |     branch::alt,
  4 |     bytes::complete::{tag, take_while, take_while_m_n, take_while1},
  5 |     character::complete::{line_ending, space0, space1},
  6 |     combinator::{all_consuming, map, recognize},
  7 |     multi::{many0, many0_count},
  8 |     sequence::{delimited, pair, preceded, separated_pair, terminated},
  9 | };
 10 | 
 11 | pub struct FieldPairRef<'a> {
 12 |     pub name: &'a [u8],
 13 |     pub value: &'a [u8],
 14 | }
 15 | 
 16 | impl<'a> From<(&'a [u8], &'a [u8])> for FieldPairRef<'a> {
 17 |     fn from(value: (&'a [u8], &'a [u8])) -> Self {
 18 |         Self {
 19 |             name: value.0,
 20 |             value: value.1,
 21 |         }
 22 |     }
 23 | }
 24 | 
 25 | pub fn field_pairs(input: &[u8]) -> IResult<&[u8], Vec<FieldPairRef<'_>>> {
 26 |     many0(terminated(field_pair, line_ending)).parse(input)
 27 | }
 28 | 
 29 | fn field_pair(input: &[u8]) -> IResult<&[u8], FieldPairRef<'_>> {
 30 |     let val = delimited(space0, field_value, space0);
 31 |     let pair = separated_pair(field_name, tag(":"), val);
 32 | 
 33 |     map(pair, |p| p.into()).parse(input)
 34 | }
 35 | 
 36 | pub fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> {
 37 |     token(input)
 38 | }
 39 | 
 40 | pub fn token(input: &[u8]) -> IResult<&[u8], &[u8]> {
 41 |     take_while1(is_tchar)(input)
 42 | }
 43 | 
 44 | pub fn field_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
 45 |     let a = alt((field_content, obs_fold));
 46 |     recognize(many0_count(a)).parse(input)
 47 | }
 48 | 
 49 | pub fn field_value_no_multline(input: &[u8]) -> IResult<&[u8], &[u8]> {
 50 |     recognize(many0_count(field_content)).parse(input)
 51 | }
 52 | 
 53 | fn field_content(input: &[u8]) -> IResult<&[u8], &[u8]> {
 54 |     recognize(pair(
 55 |         take_while_m_n(1, 1, is_field_vchar),
 56 |         take_while(is_field_char),
 57 |     ))
 58 |     .parse(input)
 59 | }
 60 | 
 61 | fn is_field_vchar(b: u8) -> bool {
 62 |     b.is_ascii_graphic() || is_obs_text(b)
 63 | }
 64 | 
 65 | fn is_field_char(b: u8) -> bool {
 66 |     is_field_vchar(b) || b == b' ' || b == b'\t'
 67 | }
 68 | 
 69 | pub fn is_tchar(b: u8) -> bool {
 70 |     b.is_ascii_alphanumeric() || b"!#$%&'*+-.^_`|~".contains(&b)
 71 | }
 72 | 
 73 | pub fn is_obs_text(b: u8) -> bool {
 74 |     b >= 0x80
 75 | }
 76 | 
 77 | fn obs_fold(input: &[u8]) -> IResult<&[u8], &[u8]> {
 78 |     recognize(pair(line_ending, space1)).parse(input)
 79 | }
 80 | 
 81 | pub struct MediaType<'a> {
 82 |     pub type_: &'a [u8],
 83 |     pub subtype: &'a [u8],
 84 |     pub parameters: Vec<(&'a [u8], &'a [u8])>,
 85 | }
 86 | 
 87 | pub fn media_type(input: &[u8]) -> IResult<&[u8], MediaType<'_>> {
 88 |     let types = separated_pair(type_, tag("/"), subtype);
 89 | 
 90 |     map(
 91 |         all_consuming(pair(types, parameters)),
 92 |         |(types, parameters)| MediaType {
 93 |             type_: types.0,
 94 |             subtype: types.1,
 95 |             parameters,
 96 |         },
 97 |     )
 98 |     .parse(input)
 99 | }
100 | 
101 | fn type_(input: &[u8]) -> IResult<&[u8], &[u8]> {
102 |     token(input)
103 | }
104 | 
105 | fn subtype(input: &[u8]) -> IResult<&[u8], &[u8]> {
106 |     token(input)
107 | }
108 | 
109 | type ParametersList<'a> = Vec<(&'a [u8], &'a [u8])>;
110 | 
111 | fn parameters(input: &[u8]) -> IResult<&[u8], ParametersList> {
112 |     many0(preceded(delimited(space0, tag(";"), space0), parameter)).parse(input)
113 | }
114 | 
115 | fn parameter(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
116 |     separated_pair(attribute, tag("="), value).parse(input)
117 | }
118 | 
119 | fn attribute(input: &[u8]) -> IResult<&[u8], &[u8]> {
120 |     token(input)
121 | }
122 | 
123 | fn value(input: &[u8]) -> IResult<&[u8], &[u8]> {
124 |     // FIXME: implement quoted-string
125 |     token(input)
126 | }
127 | 
128 | #[cfg(test)]
129 | mod tests {
130 |     use super::*;
131 | 
132 |     #[test]
133 |     fn test_field_pairs_empty() {
134 |         let (_remain, output) = field_pairs(b"").unwrap();
135 |         assert!(output.is_empty());
136 |     }
137 | 
138 |     #[test]
139 |     fn test_field_pairs_1() {
140 |         let (_remain, output) = field_pairs(b"n1:\r\n").unwrap();
141 | 
142 |         assert_eq!(output.len(), 1);
143 |         assert_eq!(output[0].name, b"n1");
144 |         assert_eq!(output[0].value, b"");
145 | 
146 |         let (_remain, output) = field_pairs(b"n1:v1\r\n").unwrap();
147 | 
148 |         assert_eq!(output.len(), 1);
149 |         assert_eq!(output[0].name, b"n1");
150 |         assert_eq!(output[0].value, b"v1");
151 |     }
152 | 
153 |     #[test]
154 |     fn test_field_pairs_many() {
155 |         let (_remain, output) = field_pairs(b"n1:v1\r\nn2:\r\nn3:v3\r\n").unwrap();
156 | 
157 |         assert_eq!(output.len(), 3);
158 |         assert_eq!(output[0].name, b"n1");
159 |         assert_eq!(output[0].value, b"v1");
160 |         assert_eq!(output[1].name, b"n2");
161 |         assert_eq!(output[1].value, b"");
162 |         assert_eq!(output[2].name, b"n3");
163 |         assert_eq!(output[2].value, b"v3");
164 |     }
165 | 
166 |     #[test]
167 |     fn test_field_pairs_line_folding() {
168 |         let (_remain, output) = field_pairs(b"n1:v1\r\n  1\r\nn2:v2\r\n").unwrap();
169 | 
170 |         assert_eq!(output.len(), 2);
171 |         assert_eq!(output[0].name, b"n1");
172 |         assert_eq!(output[0].value, b"v1\r\n  1");
173 |         assert_eq!(output[1].name, b"n2");
174 |         assert_eq!(output[1].value, b"v2");
175 |     }
176 | }
177 | 


--------------------------------------------------------------------------------
/src/parse/fields_str.rs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chfoo/warcat-rs/cf5f71a67e5c19464039eadde41c616149c5ec11/src/parse/fields_str.rs


--------------------------------------------------------------------------------
/src/parse/header_deliminator.rs:
--------------------------------------------------------------------------------
 1 | use nom::{
 2 |     bytes::complete::take_till1, character::complete::line_ending, combinator::recognize,
 3 |     multi::many0_count, sequence::terminated, IResult, Parser,
 4 | };
 5 | 
 6 | fn field_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
 7 |     terminated(take_till1(|b| b == b'\r' || b == b'\n'), line_ending).parse(input)
 8 | }
 9 | 
10 | pub fn field_lines(input: &[u8]) -> IResult<&[u8], &[u8]> {
11 |     recognize(terminated(many0_count(field_line), line_ending)).parse(input)
12 | }
13 | 


--------------------------------------------------------------------------------
/src/parse/warc.rs:
--------------------------------------------------------------------------------
 1 | use nom::{
 2 |     bytes::complete::{tag, take_while},
 3 |     character::complete::line_ending,
 4 |     combinator::recognize,
 5 |     sequence::{pair, terminated},
 6 |     IResult, Parser,
 7 | };
 8 | 
 9 | pub fn version(input: &[u8]) -> IResult<&[u8], &[u8]> {
10 |     let tag = tag("WARC/");
11 |     let digits = take_while(|c: u8| c.is_ascii_digit() || c == b'.');
12 | 
13 |     recognize(pair(tag, digits)).parse(input)
14 | }
15 | 
16 | pub fn version_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
17 |     terminated(version, line_ending).parse(input)
18 | }
19 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
 1 | use std::borrow::Cow;
 2 | 
 3 | pub fn to_ascii_uppercase_cow(text: &str) -> Cow<'_, str> {
 4 |     if text.chars().any(|c| c.is_ascii_lowercase()) {
 5 |         Cow::Owned(text.to_ascii_uppercase())
 6 |     } else {
 7 |         Cow::Borrowed(text)
 8 |     }
 9 | }
10 | 
11 | pub fn to_ascii_lowercase_cow(text: &str) -> Cow<'_, str> {
12 |     if text.chars().any(|c| c.is_ascii_uppercase()) {
13 |         Cow::Owned(text.to_ascii_lowercase())
14 |     } else {
15 |         Cow::Borrowed(text)
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/warc.rs:
--------------------------------------------------------------------------------
1 | //! WARC file format
2 | pub use decode::*;
3 | pub use encode::*;
4 | 
5 | mod decode;
6 | mod encode;
7 | 


--------------------------------------------------------------------------------
/src/warc/encode.rs:
--------------------------------------------------------------------------------
  1 | //! WARC file writing
  2 | use std::io::{BufWriter, Write};
  3 | 
  4 | use crate::{
  5 |     compress::{Compressor, CompressorConfig},
  6 |     error::GeneralError,
  7 |     header::WarcHeader,
  8 | };
  9 | 
 10 | /// Configuration for a [`Encoder`].
 11 | #[derive(Debug, Clone, Default)]
 12 | #[non_exhaustive]
 13 | pub struct EncoderConfig {
 14 |     /// Configuration for compressing the written file
 15 |     pub compressor: CompressorConfig,
 16 | }
 17 | 
 18 | pub struct EncStateHeader;
 19 | pub struct EncStateBlock {
 20 |     length: u64,
 21 |     written: u64,
 22 | }
 23 | 
 24 | /// WARC format writer
 25 | pub struct Encoder<S, W: Write> {
 26 |     state: S,
 27 |     output: BufWriter<Compressor<W>>,
 28 |     config: EncoderConfig,
 29 | }
 30 | 
 31 | impl<S, W: Write> Encoder<S, W> {
 32 |     pub fn get_ref(&self) -> &W {
 33 |         self.output.get_ref().get_ref()
 34 |     }
 35 | 
 36 |     pub fn get_mut(&mut self) -> &mut W {
 37 |         self.output.get_mut().get_mut()
 38 |     }
 39 | }
 40 | 
 41 | impl<W: Write> Encoder<EncStateHeader, W> {
 42 |     /// Create a new encoder.
 43 |     ///
 44 |     /// The destination writer should not be a compression stream. To enable
 45 |     /// compression, you must configure it with [`EncoderConfig`].
 46 |     pub fn new(dest: W, config: EncoderConfig) -> Self {
 47 |         let output = Compressor::with_config(dest, config.compressor.clone());
 48 | 
 49 |         Self {
 50 |             state: EncStateHeader,
 51 |             output: BufWriter::new(output),
 52 |             config,
 53 |         }
 54 |     }
 55 | 
 56 |     /// Start a new WARC record with a given header.
 57 |     ///
 58 |     /// The validation function will be called on the header before
 59 |     /// writing it to the stream.
 60 |     ///
 61 |     /// Consumes the writer and returns a writer that has typestate
 62 |     /// transitioned to writing the WARC block portion of the record.
 63 |     pub fn write_header(
 64 |         mut self,
 65 |         header: &WarcHeader,
 66 |     ) -> Result<Encoder<EncStateBlock, W>, GeneralError> {
 67 |         header.validate()?;
 68 |         header.serialize(&mut self.output)?;
 69 | 
 70 |         let length = header.content_length()?;
 71 | 
 72 |         Ok(Encoder {
 73 |             state: EncStateBlock { length, written: 0 },
 74 |             output: self.output,
 75 |             config: self.config,
 76 |         })
 77 |     }
 78 | 
 79 |     /// Flushes any buffered data and returns the underlying stream.
 80 |     ///
 81 |     /// You must call this function before dropping the struct in order
 82 |     /// to have a valid WARC file.
 83 |     pub fn finish(self) -> std::io::Result<W> {
 84 |         self.output.into_inner()?.finish()
 85 |     }
 86 | }
 87 | 
 88 | impl<W: Write> Encoder<EncStateBlock, W> {
 89 |     fn write_block_impl(&mut self, buf: &[u8]) -> std::io::Result<usize> {
 90 |         let remain_length = self.state.length - self.state.written;
 91 |         let buf_upper = buf
 92 |             .len()
 93 |             .min(usize::try_from(remain_length).unwrap_or(usize::MAX));
 94 |         let buf = &buf[0..buf_upper];
 95 | 
 96 |         let write_length = self.output.write(buf)?;
 97 |         self.state.written += write_length as u64;
 98 | 
 99 |         debug_assert!(self.state.length >= self.state.written);
100 | 
101 |         if self.state.length == self.state.written {
102 |             self.write_finish_block()?;
103 |         }
104 | 
105 |         Ok(write_length)
106 |     }
107 | 
108 |     fn write_finish_block(&mut self) -> std::io::Result<()> {
109 |         self.output.write_all(b"\r\n\r\n")?;
110 |         self.output.flush()?;
111 |         self.output.get_mut().start_new_segment()?;
112 |         Ok(())
113 |     }
114 | 
115 |     /// Indicate writing the block portion of a WARC record has completed.
116 |     ///
117 |     /// Consumes the writer and returns a typestate transitioned
118 |     /// writer for writing a new record.
119 |     pub fn finish_block(self) -> std::io::Result<Encoder<EncStateHeader, W>> {
120 |         if self.state.length != self.state.written {
121 |             return Err(std::io::Error::other(ContentLengthMismatch::new(
122 |                 self.state.length,
123 |                 self.state.written,
124 |             )));
125 |         }
126 | 
127 |         Ok(Encoder {
128 |             state: EncStateHeader,
129 |             output: self.output,
130 |             config: self.config,
131 |         })
132 |     }
133 | }
134 | 
135 | impl<W: Write> Write for Encoder<EncStateBlock, W> {
136 |     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
137 |         self.write_block_impl(buf)
138 |     }
139 | 
140 |     fn flush(&mut self) -> std::io::Result<()> {
141 |         self.output.flush()
142 |     }
143 | }
144 | 
145 | /// Error for a block size mismatch in a WARC record.
146 | #[derive(Debug, Default, thiserror::Error)]
147 | #[error("content length mismatch: expected {expected}, got {expected}")]
148 | pub struct ContentLengthMismatch {
149 |     expected: u64,
150 |     actual: u64,
151 | }
152 | 
153 | impl ContentLengthMismatch {
154 |     pub fn new(expected: u64, actual: u64) -> Self {
155 |         Self { expected, actual }
156 |     }
157 | }
158 | 
159 | #[cfg(test)]
160 | mod tests {
161 |     use super::*;
162 | 
163 |     #[tracing_test::traced_test]
164 |     #[test]
165 |     fn test_writer() {
166 |         let buf = Vec::new();
167 |         let writer = Encoder::new(buf, EncoderConfig::default());
168 | 
169 |         let header = WarcHeader::new(12, "a");
170 |         let mut writer = writer.write_header(&header).unwrap();
171 |         writer.write_all(b"Hello world!").unwrap();
172 |         let writer = writer.finish_block().unwrap();
173 | 
174 |         let header = WarcHeader::new(0, "a");
175 |         let mut writer = writer.write_header(&header).unwrap();
176 |         writer.write_all(b"").unwrap();
177 |         let writer = writer.finish_block().unwrap();
178 | 
179 |         let buf = writer.finish().unwrap();
180 | 
181 |         assert!(buf.starts_with(b"WARC/1.1\r\n"));
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/tests/test_decode.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Cursor, Read, Write};
  2 | 
  3 | use warcat::{
  4 |     compress::Dictionary,
  5 |     io::LogicalPosition,
  6 |     verify::Verifier,
  7 |     warc::{Decoder, DecoderConfig, PushDecoder, PushDecoderEvent},
  8 | };
  9 | 
 10 | mod warc_generator;
 11 | 
 12 | #[tracing_test::traced_test]
 13 | #[test]
 14 | fn test_decode_gzip() {
 15 |     let (input, offsets) = warc_generator::generate_warc_gzip();
 16 |     dbg!(input.len());
 17 | 
 18 |     let mut config = DecoderConfig::default();
 19 |     config.decompressor.format = warcat::compress::Format::Gzip;
 20 | 
 21 |     check_push_decoder(input.clone(), config.clone(), offsets);
 22 |     check_decoder(input, config);
 23 | }
 24 | 
 25 | #[cfg(feature = "zstd")]
 26 | #[tracing_test::traced_test]
 27 | #[test]
 28 | fn test_decode_zst() {
 29 |     let (input, offsets) = warc_generator::generate_warc_zst(false);
 30 |     dbg!(input.len());
 31 | 
 32 |     let mut config = DecoderConfig::default();
 33 |     config.decompressor.format = warcat::compress::Format::Zstandard;
 34 |     config.decompressor.dictionary = Dictionary::WarcZstd(Vec::new());
 35 | 
 36 |     check_push_decoder(input.clone(), config.clone(), offsets);
 37 |     check_decoder(input, config);
 38 | }
 39 | 
 40 | #[cfg(feature = "zstd")]
 41 | #[tracing_test::traced_test]
 42 | #[test]
 43 | fn test_decode_zst_compressed_dict() {
 44 |     let (input, offsets) = warc_generator::generate_warc_zst(true);
 45 |     dbg!(input.len());
 46 | 
 47 |     let mut config = DecoderConfig::default();
 48 |     config.decompressor.format = warcat::compress::Format::Zstandard;
 49 |     config.decompressor.dictionary = Dictionary::WarcZstd(Vec::new());
 50 | 
 51 |     check_push_decoder(input, config, offsets);
 52 | }
 53 | 
 54 | fn check_push_decoder(input: Vec<u8>, config: DecoderConfig, mut offsets: Vec<u64>) {
 55 |     let mut decoder = PushDecoder::new(config).unwrap();
 56 |     let mut verifier = Verifier::new();
 57 |     let mut input = Cursor::new(input);
 58 | 
 59 |     // dbg!(&offsets);
 60 | 
 61 |     loop {
 62 |         match decoder.get_event().unwrap() {
 63 |             PushDecoderEvent::Ready | PushDecoderEvent::WantData => {
 64 |                 let mut buf = vec![0; 4096];
 65 |                 let len = input.read(&mut buf).unwrap();
 66 |                 buf.truncate(len);
 67 |                 decoder.write_all(&buf).unwrap();
 68 | 
 69 |                 if len == 0 {
 70 |                     decoder.write_eof().unwrap();
 71 |                     break;
 72 |                 }
 73 |             }
 74 | 
 75 |             PushDecoderEvent::Continue => {}
 76 |             PushDecoderEvent::Header { header } => {
 77 |                 assert_eq!(decoder.record_boundary_position(), offsets[0]);
 78 |                 offsets.drain(0..1);
 79 |                 verifier.begin_record(&header).unwrap();
 80 |             }
 81 |             PushDecoderEvent::BlockData { data } => {
 82 |                 verifier.block_data(data);
 83 |             }
 84 |             PushDecoderEvent::EndRecord => {
 85 |                 verifier.end_record();
 86 |             }
 87 |             PushDecoderEvent::Finished => {
 88 |                 break;
 89 |             }
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | fn check_decoder(input: Vec<u8>, config: DecoderConfig) {
 95 |     let mut decoder = Decoder::new(Cursor::new(input), config).unwrap();
 96 |     let mut verifier = Verifier::new();
 97 |     let mut count = 0;
 98 | 
 99 |     while decoder.has_next_record().unwrap() {
100 |         dbg!(count);
101 |         dbg!(decoder.logical_position());
102 |         dbg!(&decoder.get_ref().position());
103 | 
104 |         let (header, mut block_decoder) = decoder.read_header().unwrap();
105 | 
106 |         verifier.begin_record(&header).unwrap();
107 | 
108 |         let mut buf = [0u8; 4096];
109 |         loop {
110 |             let read_len = block_decoder.read(&mut buf).unwrap();
111 | 
112 |             if read_len == 0 {
113 |                 break;
114 |             }
115 | 
116 |             verifier.block_data(&buf[0..read_len]);
117 |         }
118 | 
119 |         verifier.end_record();
120 |         decoder = block_decoder.finish_block().unwrap();
121 | 
122 |         if !verifier.problems().is_empty() {
123 |             println!("{:?}", verifier.problems());
124 |         }
125 |         assert!(verifier.problems().is_empty());
126 | 
127 |         count += 1;
128 |     }
129 | 
130 |     decoder.into_inner();
131 | 
132 |     println!("{:?}", verifier.problems());
133 |     assert!(verifier.problems().is_empty());
134 | }
135 | 


--------------------------------------------------------------------------------
/tests/warc_generator.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Write;
 2 | 
 3 | use rand::{Rng, RngCore};
 4 | use rand_xoshiro::{Xoshiro256PlusPlus, rand_core::SeedableRng};
 5 | use warcat::{
 6 |     compress::Dictionary,
 7 |     digest::{AlgorithmName, Digest, Hasher},
 8 |     header::WarcHeader,
 9 |     warc::{EncStateHeader, Encoder, EncoderConfig},
10 | };
11 | 
12 | pub fn generate_warc_gzip() -> (Vec<u8>, Vec<u64>) {
13 |     let mut config = EncoderConfig::default();
14 |     config.compressor.format = warcat::compress::Format::Gzip;
15 |     let encoder = Encoder::new(Vec::new(), config);
16 | 
17 |     generate(encoder)
18 | }
19 | 
20 | #[cfg(feature = "zstd")]
21 | pub fn generate_warc_zst(compressed_dict: bool) -> (Vec<u8>, Vec<u64>) {
22 |     let mut sample = vec![0; 10000];
23 |     let mut rng = Xoshiro256PlusPlus::seed_from_u64(1234567);
24 |     rng.fill_bytes(&mut sample);
25 |     let sizes = [100usize; 100];
26 | 
27 |     let mut dictionary = zstd::dict::from_continuous(&sample, &sizes, 10000).unwrap();
28 | 
29 |     if compressed_dict {
30 |         dictionary = zstd::bulk::compress(&dictionary, 3).unwrap();
31 |     }
32 | 
33 |     let mut config = EncoderConfig::default();
34 |     config.compressor.format = warcat::compress::Format::Zstandard;
35 |     config.compressor.dictionary = Dictionary::WarcZstd(dictionary);
36 |     let encoder = Encoder::new(Vec::new(), config);
37 | 
38 |     generate(encoder)
39 | }
40 | 
41 | fn generate(mut encoder: Encoder<EncStateHeader, Vec<u8>>) -> (Vec<u8>, Vec<u64>) {
42 |     let mut offsets = Vec::new();
43 | 
44 |     for round in 0..100 {
45 |         offsets.push(encoder.get_ref().len() as u64);
46 |         let mut rng = Xoshiro256PlusPlus::seed_from_u64(round);
47 | 
48 |         let length: u64 = rng.random_range(100 + round * 1234..200 + round * 1234);
49 | 
50 |         let mut data: Vec<u8> = vec![0; length as usize];
51 | 
52 |         if rng.random_bool(0.5) {
53 |             // Easy to compress
54 |             for value in data.iter_mut().step_by(10) {
55 |                 *value = 0xff;
56 |             }
57 |         } else {
58 |             // Difficult to compress
59 |             rng.fill_bytes(&mut data);
60 |         }
61 | 
62 |         let mut hasher = Hasher::new(AlgorithmName::Sha1);
63 |         hasher.update(&data);
64 |         let digest = Digest::new(AlgorithmName::Sha1, hasher.finish());
65 | 
66 |         let mut header = WarcHeader::new(length, "resource");
67 |         header
68 |             .fields
69 |             .insert("WARC-Block-Digest".to_string(), digest.to_string());
70 |         header.fields.insert(
71 |             "WARC-Target-URI".to_string(),
72 |             "urn:example:test".to_string(),
73 |         );
74 | 
75 |         let mut block_encoder = encoder.write_header(&header).unwrap();
76 |         block_encoder.write_all(&data).unwrap();
77 |         encoder = block_encoder.finish_block().unwrap();
78 |     }
79 | 
80 |     (encoder.finish().unwrap(), offsets)
81 | }
82 | 


--------------------------------------------------------------------------------
/xtask/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "xtask"
 3 | version = "0.0.0"
 4 | edition = "2024"
 5 | publish = false
 6 | 
 7 | [dependencies]
 8 | anyhow = "1.0.86"
 9 | blake2 = "0.10.6"
10 | blake3 = { version = "1.5.4", features = ["pure", "traits-preview"] }
11 | cargo_metadata = "0.19.2"
12 | cargo-license = "0.6.1"
13 | cargo-license_cargo_metadata = { package = "cargo_metadata", version = "0.18.1"}
14 | clap = { version = "4.5.16", features = ["derive"] }
15 | data-encoding = "2.6.0"
16 | digest = "0.10.7"
17 | minisign = "0.7.8"
18 | reqwest = { version = "0.12.8", default-features = false, features = ["blocking", "rustls-tls", "gzip", "json"], optional = true }
19 | rpassword = "7.3.1"
20 | serde_json = "1.0.128"
21 | sha2 = "0.10.8"
22 | tempfile = "3.13.0"
23 | toml_edit = "0.22.22"
24 | zip = { version = "4.0.0", default-features = false, features = ["deflate64", "deflate"] }
25 | 
26 | [features]
27 | default = []
28 | bloat = ["dep:reqwest"]


--------------------------------------------------------------------------------
/xtask/README.md:
--------------------------------------------------------------------------------
1 | # xtask
2 | 
3 | This is a [cargo xtask](https://github.com/matklad/cargo-xtask) crate.
4 | 


--------------------------------------------------------------------------------
/xtask/src/digest.rs:
--------------------------------------------------------------------------------
 1 | use std::{io::Cursor, path::Path};
 2 | 
 3 | use data_encoding::HEXLOWER;
 4 | use digest::Digest;
 5 | use minisign::SecretKey;
 6 | 
 7 | pub fn compute_digests(minisign_secret_key: Option<&Path>) -> anyhow::Result<()> {
 8 |     let minisign_secret_key = if let Some(path) = minisign_secret_key {
 9 |         Some(get_minisign_secret_key(path)?)
10 |     } else {
11 |         None
12 |     };
13 | 
14 |     let package_dir = crate::package::target_dir()?.join("github-artifacts");
15 | 
16 |     let mut entries: Vec<_> = package_dir.read_dir()?.collect();
17 |     entries.sort_unstable_by_key(|item| item.as_ref().unwrap().file_name());
18 | 
19 |     let mut doc = toml_edit::DocumentMut::new();
20 |     let mut file_table = toml_edit::Table::new();
21 | 
22 |     for entry in entries {
23 |         let entry = entry.unwrap();
24 |         let filename = entry
25 |             .file_name()
26 |             .into_string()
27 |             .map_err(|_| anyhow::anyhow!("non-utf-8 path"))?;
28 | 
29 |         let data = std::fs::read(entry.path())?;
30 | 
31 |         let mut sha256_hasher = sha2::Sha256::new();
32 |         let mut sha512_hasher = sha2::Sha512::new();
33 |         let mut blake2b_hasher = blake2::Blake2b512::new();
34 |         let mut blake3_hasher = blake3::Hasher::new();
35 | 
36 |         sha256_hasher.update(&data);
37 |         sha512_hasher.update(&data);
38 |         blake2b_hasher.update(&data);
39 |         blake3_hasher.update(&data);
40 | 
41 |         let sha256_digest = sha256_hasher.finalize();
42 |         let sha512_digest = sha512_hasher.finalize();
43 |         let blake2b_digest = blake2b_hasher.finalize();
44 |         let blake3_digest = blake3_hasher.finalize();
45 | 
46 |         let mut values = toml_edit::Table::new();
47 |         values.insert("sha256", HEXLOWER.encode(&sha256_digest).into());
48 |         values.insert("sha512", HEXLOWER.encode(&sha512_digest).into());
49 |         values.insert("blake2b", HEXLOWER.encode(&blake2b_digest).into());
50 |         values.insert("blake3", HEXLOWER.encode(blake3_digest.as_slice()).into());
51 | 
52 |         if let Some(key) = &minisign_secret_key {
53 |             let signature = minisign::sign(None, key, Cursor::new(&data), None, None)?;
54 |             values.insert("minisign", signature.to_string().into());
55 |         }
56 | 
57 |         file_table.insert(&filename, toml_edit::Item::Table(values));
58 |     }
59 | 
60 |     doc.insert("files", toml_edit::Item::Table(file_table));
61 |     let text = doc.to_string();
62 |     println!("{}", text);
63 | 
64 |     Ok(())
65 | }
66 | 
67 | fn get_minisign_secret_key(path: &Path) -> anyhow::Result<SecretKey> {
68 |     let password = rpassword::prompt_password("Secret key password: ")?;
69 |     eprintln!("Loading key...");
70 |     let key = minisign::SecretKey::from_file(path, Some(password))?;
71 |     eprintln!("OK");
72 |     Ok(key)
73 | }
74 | 


--------------------------------------------------------------------------------
/xtask/src/dist_readme.txt:
--------------------------------------------------------------------------------
 1 | Warcat-rs
 2 | =========
 3 | 
 4 | This package contains warcat, a command-line tool for handling Web ARChive (WARC) files.
 5 | 
 6 | To disable the installer functionality, rename the file and remove the "-installer" part.
 7 | 
 8 | Project repository: https://github.com/chfoo/warcat-rs
 9 | 
10 | User guide: https://warcat-rs.readthedocs.io/
11 | 
12 | Support: https://github.com/chfoo/warcat-rs/blob/main/.github/SUPPORT.md
13 | 
14 | Contributing: https://github.com/chfoo/warcat-rs/blob/main/.github/CONTRIBUTING.md
15 | 


--------------------------------------------------------------------------------
/xtask/src/doc.rs:
--------------------------------------------------------------------------------
 1 | use std::process::Command;
 2 | 
 3 | pub fn build_doc() -> anyhow::Result<()> {
 4 |     let status = if cfg!(windows) {
 5 |         Command::new("cmd.exe")
 6 |             .arg("/c")
 7 |             .arg("make.bat")
 8 |             .arg("html")
 9 |             .current_dir("doc/")
10 |             .status()?
11 |     } else {
12 |         Command::new("make")
13 |             .arg("html")
14 |             .current_dir("doc/")
15 |             .status()?
16 |     };
17 | 
18 |     if !status.success() {
19 |         anyhow::bail!("command failure {:?}", status.code())
20 |     } else {
21 |         Ok(())
22 |     }
23 | }
24 | 
25 | pub fn gen_cli_doc() -> anyhow::Result<()> {
26 |     let cargo = std::env::var("CARGO")?;
27 |     let output = Command::new(cargo)
28 |         .arg("run")
29 |         .arg("--features=bin")
30 |         .arg("--")
31 |         .arg("dump-help")
32 |         .stderr(std::process::Stdio::inherit())
33 |         .output()?;
34 | 
35 |     let text = String::from_utf8(output.stdout)?;
36 |     let text = text.replace("{title}", "CLI Reference");
37 | 
38 |     let text = "% ATTENTION: This file was automatically generated using cargo xtask.\n\
39 |         % Do not manually edit this file!\n\n"
40 |         .to_owned()
41 |         + &text;
42 | 
43 |     std::fs::write("doc/cli_reference.md", text.as_bytes())?;
44 | 
45 |     Ok(())
46 | }
47 | 


--------------------------------------------------------------------------------
/xtask/src/gh.rs:
--------------------------------------------------------------------------------
  1 | use std::{fs::File, path::Path};
  2 | 
  3 | use reqwest::{
  4 |     blocking::Client,
  5 |     header::{HeaderMap, HeaderValue},
  6 | };
  7 | use zip::ZipArchive;
  8 | 
  9 | const REPO_USER: &str = "chfoo";
 10 | const REPO_NAME: &str = "warcat-rs";
 11 | 
 12 | pub fn download_artifacts(access_token: &Path, workflow_id: &str) -> anyhow::Result<()> {
 13 |     let token = std::fs::read_to_string(access_token)?;
 14 |     let token = token.trim_ascii();
 15 | 
 16 |     let mut headers = HeaderMap::new();
 17 |     let mut token_value = HeaderValue::from_str(&format!("Bearer {}", token))?;
 18 |     token_value.set_sensitive(true);
 19 |     headers.insert("Accept", "application/vnd.github+json".try_into()?);
 20 |     headers.insert("Authorization", token_value);
 21 |     headers.insert("X-GitHub-Api-Version", "2022-11-28".try_into()?);
 22 |     headers.insert("User-Agent", "warcat-rs-xtask".try_into()?);
 23 | 
 24 |     let client = Client::builder()
 25 |         .https_only(true)
 26 |         .gzip(true)
 27 |         .default_headers(headers)
 28 |         .build()?;
 29 | 
 30 |     eprintln!("Getting artifacts..");
 31 |     let response = client
 32 |         .get(format!(
 33 |             "https://api.github.com/repos/{}/{}/actions/runs/{}/artifacts",
 34 |             REPO_USER, REPO_NAME, workflow_id
 35 |         ))
 36 |         .send()?;
 37 | 
 38 |     eprintln!(" .. {}", response.status());
 39 | 
 40 |     if !response.status().is_success() {
 41 |         eprintln!("  {:?}", &response);
 42 |         eprintln!("  {:?}", response.text());
 43 | 
 44 |         anyhow::bail!("response error")
 45 |     }
 46 | 
 47 |     let doc: serde_json::Value = response.json()?;
 48 | 
 49 |     let artifacts = doc
 50 |         .as_object()
 51 |         .unwrap()
 52 |         .get("artifacts")
 53 |         .unwrap()
 54 |         .as_array()
 55 |         .unwrap();
 56 | 
 57 |     let artifact_ids: Vec<u64> = artifacts
 58 |         .iter()
 59 |         .map(|value| {
 60 |             value
 61 |                 .as_object()
 62 |                 .unwrap()
 63 |                 .get("id")
 64 |                 .unwrap()
 65 |                 .as_u64()
 66 |                 .unwrap()
 67 |         })
 68 |         .collect();
 69 | 
 70 |     let download_dir = tempfile::tempdir()?;
 71 |     let output_dir = super::package::target_dir()?.join("github-artifacts");
 72 | 
 73 |     eprintln!("Output directory {:?}", output_dir);
 74 |     std::fs::create_dir_all(&output_dir)?;
 75 | 
 76 |     for artifact_id in artifact_ids {
 77 |         eprintln!("Downloading artifact {}", artifact_id);
 78 |         let mut response = client
 79 |             .get(format!(
 80 |                 "https://api.github.com/repos/{}/{}/actions/artifacts/{}/zip",
 81 |                 REPO_USER, REPO_NAME, artifact_id
 82 |             ))
 83 |             .send()?;
 84 | 
 85 |         eprintln!(" .. {}", response.status());
 86 |         response.error_for_status_ref()?;
 87 | 
 88 |         let artifact_path = download_dir.path().join(format!("{}.zip", artifact_id));
 89 |         let mut file = File::options()
 90 |             .write(true)
 91 |             .truncate(true)
 92 |             .create(true)
 93 |             .open(&artifact_path)?;
 94 |         std::io::copy(&mut response, &mut file)?;
 95 | 
 96 |         eprintln!("Extracting {:?}", &artifact_path);
 97 |         let file = File::open(&artifact_path)?;
 98 |         let mut zip = ZipArchive::new(file)?;
 99 |         zip.extract(&output_dir)?;
100 |     }
101 | 
102 |     download_dir.close()?;
103 |     eprintln!("Done");
104 |     Ok(())
105 | }
106 | 


--------------------------------------------------------------------------------
/xtask/src/license.rs:
--------------------------------------------------------------------------------
 1 | use std::{fs::File, io::Write};
 2 | 
 3 | use cargo_license::GetDependenciesOpt;
 4 | use cargo_license_cargo_metadata::MetadataCommand;
 5 | 
 6 | pub fn generate_license_file() -> anyhow::Result<()> {
 7 |     let mut command = MetadataCommand::new();
 8 |     command.features(cargo_license_cargo_metadata::CargoOpt::SomeFeatures(vec![
 9 |         "bin".to_string()
10 |     ]));
11 | 
12 |     let opt = GetDependenciesOpt {
13 |         avoid_build_deps: true,
14 |         avoid_dev_deps: true,
15 |         ..Default::default()
16 |     };
17 | 
18 |     let dependencies = cargo_license::get_dependencies_from_cargo_lock(command, opt)?;
19 | 
20 |     let mut file = File::options()
21 |         .write(true)
22 |         .create(true)
23 |         .truncate(true)
24 |         .open("xtask/src/dist_license.txt")?;
25 | 
26 |     writeln!(
27 |         file,
28 |         "Automatically generated using xtask. Do not manually edit!"
29 |     )?;
30 |     writeln!(file, "</>")?;
31 | 
32 |     writeln!(file, "License")?;
33 |     writeln!(file, "=======")?;
34 |     writeln!(file)?;
35 | 
36 |     for dependency in dependencies {
37 |         writeln!(file, "{} {}", &dependency.name, &dependency.version)?;
38 |         writeln!(file, "-----")?;
39 |         writeln!(file)?;
40 | 
41 |         writeln!(file, "Authors:")?;
42 |         for author in dependency
43 |             .authors
44 |             .as_deref()
45 |             .unwrap_or("<unknown>")
46 |             .split("|")
47 |         {
48 |             writeln!(file, "    {}", author)?
49 |         }
50 | 
51 |         writeln!(file, "License:")?;
52 |         writeln!(
53 |             file,
54 |             "    {}",
55 |             dependency.license.as_deref().unwrap_or("<unknown>")
56 |         )?;
57 | 
58 |         writeln!(file, "Repository:")?;
59 |         writeln!(
60 |             file,
61 |             "    {}",
62 |             dependency.repository.as_deref().unwrap_or("<unknown>")
63 |         )?;
64 | 
65 |         writeln!(file)?;
66 |         writeln!(file)?;
67 |     }
68 | 
69 |     Ok(())
70 | }
71 | 


--------------------------------------------------------------------------------
/xtask/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | use clap::{Parser, Subcommand};
 4 | 
 5 | mod digest;
 6 | mod doc;
 7 | #[cfg(feature = "bloat")]
 8 | mod gh;
 9 | mod license;
10 | mod package;
11 | 
12 | #[derive(Parser, Debug)]
13 | #[command(version)]
14 | struct Args {
15 |     #[command(subcommand)]
16 |     command: Command,
17 | }
18 | 
19 | #[derive(Debug, Subcommand)]
20 | pub enum Command {
21 |     /// Convenience command to build Sphinx HTML user guide.
22 |     BuildDoc,
23 |     /// Generate CLI reference to doc directory.
24 |     GenCliDoc,
25 |     /// Package a built release binary along with supporting files for distribution.
26 |     PackageBin { target: String },
27 |     /// Download the artifacts from GitHub Actions containing the packages.
28 |     DownloadArtifacts {
29 |         #[arg(long, short)]
30 |         access_token: PathBuf,
31 |         #[arg(long, short)]
32 |         workflow_id: String,
33 |     },
34 |     /// Output a hash of the packages.
35 |     Digests {
36 |         #[arg(long)]
37 |         minisign_secret_key: Option<PathBuf>,
38 |     },
39 |     /// Generate the license file of dependencies.
40 |     GenLicense,
41 | }
42 | 
43 | fn main() -> anyhow::Result<()> {
44 |     let args = Args::parse();
45 | 
46 |     match args.command {
47 |         Command::BuildDoc => crate::doc::build_doc(),
48 |         Command::GenCliDoc => crate::doc::gen_cli_doc(),
49 |         Command::PackageBin { target } => crate::package::package_bin(&target),
50 |         Command::DownloadArtifacts {
51 |             access_token,
52 |             workflow_id,
53 |         } => {
54 |             #[cfg(feature = "bloat")]
55 |             {
56 |                 crate::gh::download_artifacts(&access_token, &workflow_id)
57 |             }
58 |             #[cfg(not(feature = "bloat"))]
59 |             {
60 |                 let _ = access_token;
61 |                 let _ = workflow_id;
62 |                 unimplemented!("feature 'bloat' required")
63 |             }
64 |         }
65 |         Command::Digests {
66 |             minisign_secret_key,
67 |         } => crate::digest::compute_digests(minisign_secret_key.as_deref()),
68 |         Command::GenLicense => crate::license::generate_license_file(),
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/xtask/src/package.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     env::consts::EXE_SUFFIX,
  3 |     io::Write,
  4 |     path::{Path, PathBuf},
  5 |     process::Command,
  6 | };
  7 | 
  8 | use tempfile::NamedTempFile;
  9 | 
 10 | pub fn package_bin(target_triple: &str) -> anyhow::Result<()> {
 11 |     let packager = Packager::new(target_triple.to_string());
 12 | 
 13 |     match std::env::consts::OS {
 14 |         "windows" => packager.package_zip(),
 15 |         "macos" => packager.package_tar("tgz"),
 16 |         "linux" => packager.package_tar("tar.gz"),
 17 |         _ => unimplemented!(),
 18 |     }
 19 | }
 20 | 
 21 | struct Packager {
 22 |     target_triple: String,
 23 | }
 24 | 
 25 | impl Packager {
 26 |     fn new(target_triple: String) -> Self {
 27 |         Self { target_triple }
 28 |     }
 29 | 
 30 |     fn package_zip(&self) -> anyhow::Result<()> {
 31 |         let staging_dir = self.prepare_staging_dir()?;
 32 |         let output_dir = self.prepare_output_dir()?;
 33 |         let package_name = self.build_package_name()?;
 34 |         let output_file = output_dir.join(format!("{}.zip", package_name));
 35 | 
 36 |         eprintln!("Creating archive {:?} of {:?}", output_file, staging_dir);
 37 |         let status = Command::new(r"C:\Program Files\7-Zip\7z.exe")
 38 |             .arg("a")
 39 |             .arg(&output_file)
 40 |             .arg("./")
 41 |             .current_dir(&staging_dir)
 42 |             .status()?;
 43 | 
 44 |         anyhow::ensure!(status.success());
 45 |         eprintln!("Done");
 46 | 
 47 |         Ok(())
 48 |     }
 49 | 
 50 |     fn package_tar(&self, archive_extension: &str) -> anyhow::Result<()> {
 51 |         let staging_dir = self.prepare_staging_dir()?;
 52 |         let output_dir = self.prepare_output_dir()?;
 53 |         let package_name = self.build_package_name()?;
 54 |         let output_file = output_dir.join(format!("{}.{}", package_name, archive_extension));
 55 | 
 56 |         let mut staging_dir_contents = Vec::new();
 57 | 
 58 |         for entry in std::fs::read_dir(&staging_dir)? {
 59 |             let entry = entry?;
 60 | 
 61 |             staging_dir_contents.push(entry.file_name());
 62 |         }
 63 | 
 64 |         eprintln!("Creating archive {:?} of {:?}", output_file, staging_dir);
 65 |         let status = Command::new("tar")
 66 |             .arg("-c")
 67 |             .arg("-f")
 68 |             .arg(&output_file)
 69 |             .arg("-v")
 70 |             .arg("-z")
 71 |             .args(staging_dir_contents)
 72 |             .current_dir(&staging_dir)
 73 |             .status()?;
 74 | 
 75 |         anyhow::ensure!(status.success());
 76 |         eprintln!("Done");
 77 | 
 78 |         Ok(())
 79 |     }
 80 | 
 81 |     fn build_package_name(&self) -> anyhow::Result<String> {
 82 |         let version = package_version()?;
 83 |         let friendly_target = target_triple_to_friendly_name(&self.target_triple);
 84 |         let package_name = format!("warcat-{}-{}", version, friendly_target);
 85 | 
 86 |         Ok(package_name)
 87 |     }
 88 | 
 89 |     fn prepare_staging_dir(&self) -> anyhow::Result<PathBuf> {
 90 |         let version = package_version()?;
 91 |         // let package_name = self.build_package_name()?;
 92 | 
 93 |         let target_dir = target_dir()?;
 94 |         let staging_dir = target_dir.join("xtask-package-bin-staging");
 95 |         // let content_dir = staging_dir.join(&package_name);
 96 |         let content_dir = staging_dir.clone();
 97 | 
 98 |         if staging_dir.exists() {
 99 |             eprintln!("Removing directory {:?}", staging_dir);
100 |             std::fs::remove_dir_all(&staging_dir)?;
101 |         }
102 | 
103 |         eprintln!("Creating directory {:?}", content_dir);
104 |         std::fs::create_dir_all(&content_dir)?;
105 | 
106 |         let source_bin_path = target_dir
107 |             .join(&self.target_triple)
108 |             .join("release")
109 |             .join(format!("warcat{}", EXE_SUFFIX));
110 | 
111 |         let dest_bin_path = content_dir.join(format!("warcat-{}-installer{}", version, EXE_SUFFIX));
112 |         let license_file = self.license_file()?;
113 | 
114 |         for (from, to) in [
115 |             (source_bin_path.as_path(), dest_bin_path.as_path()),
116 |             (license_file.path(), &content_dir.join("license.txt")),
117 |             (
118 |                 Path::new("xtask/src/dist_readme.txt"),
119 |                 &content_dir.join("readme.txt"),
120 |             ),
121 |         ] {
122 |             eprintln!("Copying {:?} -> {:?}", from, to);
123 |             std::fs::copy(from, to)?;
124 |         }
125 | 
126 |         Ok(staging_dir)
127 |     }
128 | 
129 |     fn prepare_output_dir(&self) -> anyhow::Result<PathBuf> {
130 |         let target_dir = target_dir()?;
131 |         let output_dir = target_dir.join("xtask-package-bin-output");
132 | 
133 |         if output_dir.exists() {
134 |             eprintln!("Removing directory {:?}", output_dir);
135 |             std::fs::remove_dir_all(&output_dir)?;
136 |         }
137 | 
138 |         eprintln!("Creating directory {:?}", output_dir);
139 |         std::fs::create_dir_all(&output_dir)?;
140 | 
141 |         Ok(output_dir)
142 |     }
143 | 
144 |     fn license_file(&self) -> anyhow::Result<NamedTempFile> {
145 |         let mut file = NamedTempFile::new()?;
146 | 
147 |         let content = std::fs::read_to_string("xtask/src/dist_license.txt")?;
148 |         let (_header, content) = content.split_once("</>").expect("missing license template header");
149 |         let content = content.trim_ascii_start();
150 | 
151 |         file.write_all(content.as_bytes())?;
152 |         file.flush()?;
153 | 
154 |         Ok(file)
155 |     }
156 | }
157 | 
158 | fn target_triple_to_friendly_name(target_triple: &str) -> &str {
159 |     match target_triple {
160 |         "x86_64-pc-windows-msvc" => "windows-x86_64",
161 |         "aarch64-pc-windows-msvc" => "windows-aarch64",
162 |         "x86_64-apple-darwin" => "macos-x86_64",
163 |         "aarch64-apple-darwin" => "macos-aarch64",
164 |         "x86_64-unknown-linux-musl" => "linux-x86_64",
165 |         "aarch64-unknown-linux-musl" => "linux-aarch64",
166 |         _ => unimplemented!(),
167 |     }
168 | }
169 | 
170 | pub fn target_dir() -> anyhow::Result<PathBuf> {
171 |     let metadata = cargo_metadata::MetadataCommand::new().exec()?;
172 |     Ok(metadata.target_directory.into_std_path_buf())
173 | }
174 | 
175 | fn package_version() -> anyhow::Result<String> {
176 |     let metadata = cargo_metadata::MetadataCommand::new().exec()?;
177 |     let package = metadata
178 |         .packages
179 |         .iter()
180 |         .find(|package| package.name == "warcat")
181 |         .ok_or_else(|| anyhow::anyhow!("couldn't get package version"))?;
182 |     Ok(package.version.to_string())
183 | }
184 | 


--------------------------------------------------------------------------------