├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── constructions ├── .gitignore └── inconsistency-types.json ├── parsers ├── .gitignore ├── 01-infozip │ ├── Dockerfile │ └── unzip ├── 02-7zip │ ├── Dockerfile │ └── unzip ├── 03-p7zip │ ├── Dockerfile │ └── unzip ├── 04-winrar │ ├── Dockerfile │ ├── run.sh │ └── unzip ├── 05-ada-zip-ada │ ├── Dockerfile │ └── unzip ├── 06-c-go-unarr │ ├── Dockerfile │ └── unzip ├── 07-c-libarchive │ ├── Dockerfile │ └── unzip ├── 08-c-libzip │ ├── Dockerfile │ ├── unzip │ └── unzip.php ├── 09-c-minizip │ ├── Dockerfile │ └── unzip ├── 10-c-minizip-ng │ ├── Dockerfile │ └── unzip ├── 11-c-zip │ ├── Dockerfile │ └── unzip.c ├── 12-c-zziplib │ ├── Dockerfile │ └── unzip ├── 13-csharp-dotnetzip │ ├── Dockerfile │ └── unzip.cs ├── 14-csharp-sharpcompress │ ├── Dockerfile │ └── unzip.cs ├── 15-csharp-sharpziplib │ ├── Dockerfile │ └── unzip.cs ├── 16-csharp-system-io-compression │ ├── Dockerfile │ └── unzip.cs ├── 17-cpp-android-libziparchive │ ├── Dockerfile │ └── unzip.cpp ├── 18-cpp-poco │ ├── Dockerfile │ └── unzip.cpp ├── 19-d-std-zip │ ├── Dockerfile │ └── unzip.d ├── 20-dart-archive │ ├── Dockerfile │ ├── pubspec.yaml │ └── unzip.dart ├── 21-erlang-zip │ ├── Dockerfile │ └── unzip ├── 22-go-archive-zip │ ├── Dockerfile │ └── unzip.go ├── 23-haskell-zip │ ├── Dockerfile │ └── unzip ├── 24-haskell-zip-archive │ ├── Dockerfile │ └── unzip ├── 25-java-commons-compress-stream │ ├── Dockerfile │ ├── unzip.sh │ └── unzip │ │ ├── .gitignore │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── unzip │ │ └── App.java ├── 26-java-commons-compress-zipfile │ ├── Dockerfile │ ├── unzip.sh │ └── unzip │ │ ├── .gitignore │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── unzip │ │ └── App.java ├── 27-java-util-zip-zipfile │ ├── Dockerfile │ ├── unzip.sh │ └── unzip │ │ ├── .gitignore │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── unzip │ │ └── App.java ├── 28-java-util-zip-zipinputstream │ ├── Dockerfile │ ├── unzip.sh │ └── unzip │ │ ├── .gitignore │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── unzip │ │ └── App.java ├── 29-java-zip4j-zipfile │ ├── Dockerfile │ ├── unzip.sh │ └── unzip │ │ ├── .gitignore │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── unzip │ │ └── App.java ├── 30-java-zip4j-zipinputstream │ ├── Dockerfile │ ├── unzip.sh │ └── unzip │ │ ├── .gitignore │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── unzip │ │ └── App.java ├── 31-nodejs-ronomon-zip │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 32-nodejs-adm-zip │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 33-nodejs-decompress-zip │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 34-nodejs-jszip │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 35-nodejs-node-stream-zip │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 36-nodejs-unzipper-extract │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 37-nodejs-unzipper-open │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 38-nodejs-yauzl-v2 │ ├── Dockerfile │ └── unzip ├── 39-nodejs-yauzl-v3 │ ├── Dockerfile │ └── unzip ├── 40-nodejs-zipjs │ ├── Dockerfile │ ├── unzip │ └── unzip.js ├── 41-php-phardata │ ├── Dockerfile │ ├── unzip │ └── unzip.php ├── 42-php-phpzip │ ├── Dockerfile │ ├── unzip │ └── unzip.php ├── 43-pascal-paszlib │ ├── Dockerfile │ └── unzip.pp ├── 44-perl-archive-zip │ ├── Dockerfile │ ├── unzip │ └── unzip.pl ├── 45-python-zipfile │ ├── Dockerfile │ ├── unzip │ └── unzip.py ├── 46-racket-file-unzip │ ├── Dockerfile │ └── unzip.rkt ├── 47-ruby-rubyzip-file │ ├── Dockerfile │ ├── unzip │ └── unzip.rb ├── 48-ruby-rubyzip-inputstream │ ├── Dockerfile │ ├── unzip │ └── unzip.rb ├── 49-rust-zip │ ├── Dockerfile │ └── unzip.rs ├── 50-swift-zipfoundation │ ├── Dockerfile │ └── src │ │ ├── Package.swift │ │ └── Sources │ │ └── main.swift ├── README.md ├── parallel-unzip-all.sh ├── parsers.json ├── prepare.sh ├── testcase.sh └── unzip-all.sh ├── tools ├── ablation-study.sh ├── fuzz-stats.py ├── inconsistency-table.py ├── parsers-to-table.py ├── prepare.sh └── run-parsers.sh └── zip-diff ├── .gitignore ├── Cargo.lock ├── Cargo.toml └── src ├── cdh.rs ├── compress.rs ├── construction ├── a.rs ├── a │ ├── a1.rs │ ├── a2.rs │ ├── a3.rs │ ├── a4.rs │ └── a5.rs ├── b.rs ├── b │ ├── b1.rs │ ├── b2.rs │ ├── b3.rs │ └── b4.rs ├── c.rs ├── c │ ├── c1.rs │ ├── c2.rs │ ├── c3.rs │ ├── c4.rs │ └── c5.rs ├── main.rs └── utils.rs ├── count └── main.rs ├── dd.rs ├── eocd.rs ├── extra.rs ├── fields.rs ├── fuzz ├── config.rs ├── corpus.rs ├── execute.rs ├── feature.rs ├── generate.rs ├── main.rs ├── mutation.rs ├── rand_utils.rs └── stats.rs ├── hash.rs ├── lfh.rs ├── lib.rs ├── utils.rs └── zip.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /evaluation 2 | /constructions/*/ 3 | *.json 4 | *.pdf 5 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: ZipDiff 3 | type: software 4 | authors: 5 | - given-names: Yufan 6 | family-names: You 7 | email: ouuansteve@gmail.com 8 | affiliation: Tsinghua University 9 | orcid: 'https://orcid.org/0009-0004-2975-2107' 10 | - given-names: Jianjun 11 | family-names: Chen 12 | email: jianjun@tsinghua.edu.cn 13 | affiliation: Tsinghua University 14 | orcid: 'https://orcid.org/0000-0001-7511-1117' 15 | - given-names: Qi 16 | family-names: Wang 17 | affiliation: Tsinghua University 18 | orcid: 'https://orcid.org/0009-0008-5707-3223' 19 | - given-names: Haixin 20 | family-names: Duan 21 | affiliation: Tsinghua University 22 | orcid: 'https://orcid.org/0000-0003-0083-733X' 23 | doi: 10.5281/zenodo.15526863 24 | repository-code: 'https://github.com/ouuan/ZipDiff' 25 | repository-artifact: 'https://doi.org/10.5281/zenodo.15526863' 26 | abstract: A differential fuzzer for ZIP parsers. 27 | keywords: 28 | - differential fuzzer 29 | - zip 30 | license: Apache-2.0 31 | preferred-citation: 32 | type: conference-paper 33 | authors: 34 | - given-names: Yufan 35 | family-names: You 36 | email: ouuansteve@gmail.com 37 | affiliation: Tsinghua University 38 | orcid: 'https://orcid.org/0009-0004-2975-2107' 39 | - given-names: Jianjun 40 | family-names: Chen 41 | email: jianjun@tsinghua.edu.cn 42 | affiliation: Tsinghua University 43 | orcid: 'https://orcid.org/0000-0001-7511-1117' 44 | - given-names: Qi 45 | family-names: Wang 46 | affiliation: Tsinghua University 47 | orcid: 'https://orcid.org/0009-0008-5707-3223' 48 | - given-names: Haixin 49 | family-names: Duan 50 | affiliation: Tsinghua University 51 | orcid: 'https://orcid.org/0000-0003-0083-733X' 52 | year: 2025 53 | month: 8 54 | title: "My ZIP isn't your ZIP: Identifying and Exploiting Semantic Gaps Between ZIP Parsers" 55 | conference: 56 | name: "34th USENIX Security Symposium" 57 | location: "Seattle, WA" 58 | date-start: 2025-08-13 59 | date-end: 2025-08-15 60 | publisher: 61 | name: "USENIX Association" 62 | isbn: "978-1-939133-52-6" 63 | url: "https://www.usenix.org/conference/usenixsecurity25/presentation/you" 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZipDiff 2 | 3 | A differential fuzzer for ZIP parsers. 4 | 5 | This is the source code for the USENIX Security '25 paper [My ZIP isn’t your ZIP: Identifying and Exploiting Semantic Gaps Between ZIP Parsers](https://www.usenix.org/conference/usenixsecurity25/presentation/you). 6 | 7 | Zotero permanent link and Docker image files: https://doi.org/10.5281/zenodo.15526863 8 | 9 | This artifact has received the Results Reproduced badge: [Artifact appendix](https://secartifacts.github.io/usenixsec2025/appendix-files/sec25cycle2ae-final28.pdf) 10 | 11 | Artifacts Available badge 12 | Artifacts Functional badge 13 | Results Reproduced badge 14 | 15 | ## Environment 16 | 17 | - Linux 18 | - [Rust](https://www.rust-lang.org/tools/install) (tested on 1.86, any version is fine as long as the code compiles successfully) 19 | - [Docker](https://docs.docker.com/engine/install/) and [Docker Compose plugin](https://docs.docker.com/compose/install/linux/) 20 | - Python 3 with `numpy` and `matplotlib` to generate tables and figures 21 | - The full fuzzing process is resource-intensive, as it runs many ZIP parsers in parallel. It is recommended to have at least 128 GB of RAM and 300 GB of disk space. While it can also run on systems with fewer RAM, you may encounter significant performance degration, primarily due to uncached disk I/O, since the unzipped outputs can be quite large. 22 | 23 | The exact environment used by the authors: 24 | 25 | - Ubuntu 23.10 with Linux 6.5.0-44 26 | - Rust 1.86.0 27 | - Docker 27.1.1 with Docker Compose 2.33.1 28 | - Python 3.13.3 with numpy 2.3.0 and matplotlib 3.10.3 29 | - CPU: Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz with 112 logical CPUs 30 | - Memory and storage: 944G RAM + 44T disk (less than 1T was used) 31 | 32 | ## File Structure 33 | 34 | - [`parsers`](./parsers) 35 | - Subdirectories: Source files to build Docker images of the tested parsers. Each Docker image correspond to a tested ZIP parser. 36 | - [`parsers.json`](./parsers/parsers.json): Information of the parsers. 37 | - [`zip-diff`](./zip-diff): Rust code 38 | - The library crate: A helper ZIP library. 39 | - The [`fuzz`](./zip-diff/src/fuzz) binary crate: The differential fuzzer ZipDiff. 40 | - The [`construction`](./zip-diff/src/construction) binary crate: Construction of ambiguous ZIP files corresponding to the types and variants described in the paper. 41 | - The [`count`](./zip-diff/src/count) binary crate: Count the types of ambiguities between each parser pair. 42 | - [`tools`](./tools): 43 | - [`prepare.sh`](./tools/prepare.sh): Copy common scripts (`unzip-all.sh`, `parallel-unzip-all.sh`, `testcase.sh`) into the parser subdirectories (into their Docker build contexts) and generate the `docker-compose.yml` config file. 44 | - [`run-parsers.sh`](./tools/run-parsers.sh): Test the parsers against specified ZIP files (for manual testing). 45 | - [`ablation-study.sh`](./tools/ablation-study.sh): Reproduce the ablation study in the paper. 46 | - [`fuzz-stats.py`](./tools/fuzz-stats.py): Draw the ablation study graph and summarize the stats. 47 | - [`inconsistency-table.py`](./tools/inconsistency-table.py): Generate the parser inconsistency LaTeX table. 48 | - [`parsers-to-table.py`](./tools/parsers-to-table.py): Retrieve GitHub stargazer counts and generate the LaTeX parser list. 49 | - [`constructions`](./constructions): This directory is used to place the constructed ambiguous ZIP files. The [`inconsistency-types.json`](./constructions/inconsistency-types.json) file is generated by the `count` component and records the list of inconsistency types between each pair of parsers. 50 | 51 | ## Preparation 52 | 53 | - Build ZIP parser Docker images: 54 | 55 | ```console 56 | tools/prepare.sh 57 | cd parsers 58 | sudo docker compose build 59 | ``` 60 | 61 | Alternatively, if you want to save some time or make sure the versions match the evaluation in the paper, you can load the images from [files on Zenodo](https://doi.org/10.5281/zenodo.15526863): 62 | 63 | ```console 64 | for i in *.tar.bz2; do 65 | docker load -i "$i" 66 | done 67 | ``` 68 | 69 | - Build the Rust binaries: 70 | 71 | ```console 72 | cd zip-diff 73 | cargo build --release 74 | ``` 75 | 76 | ## Minimal Working Example 77 | 78 | You can test if parsers are working by testing them on a ZIP file: (assuming that the `zip` command is installed) 79 | 80 | ```console 81 | pushd /tmp 82 | echo test > test.txt 83 | zip -0 test.zip test.txt 84 | popd 85 | tools/run-parsers.sh /tmp/test.zip 86 | ``` 87 | 88 | If everything goes well, you will see logs from Docker compose and the parsers, and then the results will be available at `evaluation/results/tmp/test.zip`: 89 | 90 | ``` 91 | 01-infozip 92 | └── test.txt 93 | 02-7zip 94 | └── test.txt 95 | …… 96 | 50-swift-zipfoundation 97 | └── test.txt 98 | ``` 99 | 100 | You can verify that all parsers successfully extracted `test.txt` from the ZIP archive. 101 | 102 | A short 2-min fuzzing can be used to test that the fuzzer is working well: `sudo target/release/fuzz -b 10 -s 120`. This will run fuzzing for two minutes with only ten samples per batch. The fuzzer will print logs for each iteration. The log text should contain `ok: 50`, indicating that all parsers are working fine. The results will be available at `evaluation/stats.json`, `evaluation/samples` and `evaluation/results`. 103 | 104 | ## Running the Fuzzer 105 | 106 | ```console 107 | cd zip-diff 108 | sudo target/release/fuzz 109 | ``` 110 | 111 | Here root permission is required because the outputs are written inside Docker and are owned by root. Sometimes the outputs have incorrect permission bits and cannot be read by regular users even if the user is the file owner. 112 | 113 | By default, the parser will run indefinitely and the results will be stored at `evaluation/stats.json`, `evaluation/samples`, and `evaluation/results`. 114 | 115 | The fuzzer can be terminated at any time by Ctrl+C. You can also tell the fuzzer to stop after a specific time by setting the `-s, --stop-after-seconds` option. 116 | 117 | The fuzzer does not automatically clear data from previous execution and the files might be mixed together. You should either remove the files left from previous execution if they are not needed, or specify different `--samples-dir`, `--results-dir`, and `--stats-file` locations. The ZIP file samples generated by the fuzzer are stored in `--samples-dir`, and the corresponding parser outputs are stored in `--results-dir`. You can check the outputs to see that the parsers produce inconsistent outputs for the same input samples. 118 | 119 | The `-b, --batch-size` option can be reduced when there are not enough RAM or disk space. 120 | 121 | ## Reproducing the ablation study 122 | 123 | 1. Run `sudo tools/ablation-study.sh`. It will run five 24-hour fuzzing sessions for each of the three setups, for a total of 15 days. 124 | 2. Run `python3 tools/fuzz-stats.py evaluation/stats/*` to draw the graph at `inconsistent-pair-cdf.pdf` (Figure 4 in the paper). 125 | 126 | The full results took around 100GB of disk space for the authors. At runtime it may temporarily take another ~500GB of disk space. You can lower the `$BATCH_SIZE` in `ablation-study.sh` to reduce the required amount of RAM and disk space. 127 | 128 | ## Testing the constructed ambiguous ZIP files 129 | 130 | ```console 131 | cd zip-diff 132 | target/release/construction 133 | sudo target/release/count 134 | ``` 135 | 136 | The `construction` crate provides constructions of the ZIP parsing ambiguities described in the paper Section 5.2. 137 | 138 | The `count` step summarizes the number of inconsistencies between each pair of ZIP parsers. It took about 40 minutes for the authors. 139 | 140 | The inconsistency details are stored at `constructions/inconsistency-types.json`. You can run `tools/inconsistency-table.py` to generate the LaTeX table (Table 4 in the paper). 141 | -------------------------------------------------------------------------------- /constructions/.gitignore: -------------------------------------------------------------------------------- 1 | !inconsistency-types.json 2 | -------------------------------------------------------------------------------- /parsers/.gitignore: -------------------------------------------------------------------------------- 1 | docker-compose.yml 2 | */unzip-all.sh 3 | */parallel-unzip-all.sh 4 | */testcase.sh 5 | !parsers.json 6 | -------------------------------------------------------------------------------- /parsers/01-infozip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 2 | 3 | RUN apk add unzip 4 | 5 | COPY unzip unzip-all.sh / 6 | 7 | ENTRYPOINT ["/unzip-all.sh"] 8 | -------------------------------------------------------------------------------- /parsers/01-infozip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | unzip -t "$1" 6 | unzip -o "$1" -d "$2" 7 | -------------------------------------------------------------------------------- /parsers/02-7zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim 2 | 3 | RUN echo 'deb http://deb.debian.org/debian bookworm-backports main' > /etc/apt/sources.list.d/backports.list 4 | RUN apt-get update 5 | RUN apt-get install -t bookworm-backports -y 7zip 6 | 7 | COPY unzip unzip-all.sh / 8 | 9 | ENTRYPOINT ["/unzip-all.sh"] 10 | -------------------------------------------------------------------------------- /parsers/02-7zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | 7z t "$1" && 7z x -aoa "$1" -o"$2" 6 | -------------------------------------------------------------------------------- /parsers/03-p7zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y p7zip-full 5 | 6 | COPY unzip unzip-all.sh / 7 | 8 | ENTRYPOINT ["/unzip-all.sh"] 9 | -------------------------------------------------------------------------------- /parsers/03-p7zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | 7z t "$1" 6 | 7z x -aoa "$1" -o"$2" 7 | -------------------------------------------------------------------------------- /parsers/04-winrar/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim 2 | 3 | RUN dpkg --add-architecture i386 4 | RUN sed -i 's/Components: main/Components: main contrib/g' /etc/apt/sources.list.d/debian.sources 5 | RUN apt-get update 6 | RUN apt-get install -y xvfb winetricks wine32 parallel 7 | 8 | RUN winetricks msxml6 && sleep 1 9 | 10 | ADD https://www.win-rar.com/fileadmin/winrar-versions/winrar/winrar-x32-701.exe /tmp/winrar.exe 11 | RUN xvfb-run wine /tmp/winrar.exe /S && sleep 1 12 | 13 | # abort build if installation fails 14 | RUN stat '/root/.wine/drive_c/Program Files/WinRAR' 15 | 16 | COPY run.sh parallel-unzip-all.sh testcase.sh unzip / 17 | 18 | ENTRYPOINT ["/run.sh"] 19 | -------------------------------------------------------------------------------- /parsers/04-winrar/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export WINEDEBUG=-all 4 | xvfb-run -a /parallel-unzip-all.sh 50% 5 | -------------------------------------------------------------------------------- /parsers/04-winrar/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | wine 'C:\Program Files\WinRAR\WinRAR.exe' x -ibck -y -ppassword "Z:${1//\//\\}" '*.*' "Z:${2//\//\\}" 6 | -------------------------------------------------------------------------------- /parsers/05-ada-zip-ada/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim AS build 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y gprbuild gnat-12 unzip 5 | 6 | ADD https://github.com/zertovitch/zip-ada/archive/aaba1a767a47851df075a9884457052719e0488f.zip src.zip 7 | RUN unzip src.zip 8 | RUN mv zip-ada-* src 9 | 10 | WORKDIR /src 11 | RUN gprbuild -p -P zipada.gpr 12 | 13 | FROM debian:12-slim 14 | 15 | RUN apt-get update 16 | RUN apt-get install -y libgnat-12 17 | 18 | COPY --from=build /src/unzipada / 19 | 20 | COPY unzip unzip-all.sh / 21 | 22 | ENTRYPOINT ["/unzip-all.sh"] 23 | -------------------------------------------------------------------------------- /parsers/05-ada-zip-ada/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | cd / 6 | 7 | # it somehow does not accept absolute paths 8 | /unzipada -d "${2#/}" "${1#/}" 9 | -------------------------------------------------------------------------------- /parsers/06-c-go-unarr/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.22.3-alpine3.20 AS build 2 | 3 | RUN apk add gcc musl-dev 4 | 5 | RUN go install github.com/gen2brain/go-unarr/cmd/unarr@v0.2.4 6 | 7 | FROM alpine:3.20 8 | 9 | COPY --from=build /go/bin/unarr / 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/06-c-go-unarr/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | /unarr "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/07-c-libarchive/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 AS build 2 | 3 | RUN apk add g++ make 4 | 5 | WORKDIR /tmp 6 | ADD https://github.com/libarchive/libarchive/releases/download/v3.7.7/libarchive-3.7.7.tar.gz . 7 | RUN tar xf libarchive-*.tar.gz 8 | RUN mv libarchive-*/ /src 9 | 10 | WORKDIR /src 11 | RUN ./configure 12 | RUN make -j$(nproc --ignore=2) bsdunzip 13 | 14 | FROM alpine:3.20 15 | 16 | COPY --from=build /src/bsdunzip /usr/local/bin/ 17 | 18 | COPY unzip unzip-all.sh / 19 | 20 | ENTRYPOINT ["/unzip-all.sh"] 21 | -------------------------------------------------------------------------------- /parsers/07-c-libarchive/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | bsdunzip -t "$1" 6 | bsdunzip -o -d "$2" "$1" 7 | -------------------------------------------------------------------------------- /parsers/08-c-libzip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 2 | 3 | RUN apk add php83 4 | RUN apk add php83-zip 5 | 6 | COPY unzip.php / 7 | 8 | COPY unzip unzip-all.sh / 9 | 10 | ENTRYPOINT [ "/unzip-all.sh" ] 11 | -------------------------------------------------------------------------------- /parsers/08-c-libzip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | php83 /unzip.php "$@" 4 | -------------------------------------------------------------------------------- /parsers/08-c-libzip/unzip.php: -------------------------------------------------------------------------------- 1 | open($argv[1], ZipArchive::CHECKCONS) === true) { 6 | $zip->extractTo($argv[2]); 7 | $zip->close(); 8 | } else { 9 | exit(1); 10 | } 11 | -------------------------------------------------------------------------------- /parsers/09-c-minizip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim AS build 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y gcc make 5 | 6 | WORKDIR /tmp 7 | ADD https://github.com/madler/zlib/releases/download/v1.3.1/zlib-1.3.1.tar.gz . 8 | RUN tar xf zlib-*.tar.gz 9 | RUN mv zlib-*/ /src 10 | 11 | WORKDIR /src 12 | RUN ./configure 13 | RUN make libz.a 14 | RUN make -C contrib/minizip miniunz 15 | 16 | FROM debian:12-slim 17 | 18 | COPY --from=build /src/contrib/minizip/miniunz /usr/local/bin/miniunzip 19 | 20 | COPY unzip unzip-all.sh / 21 | 22 | ENTRYPOINT ["/unzip-all.sh"] 23 | -------------------------------------------------------------------------------- /parsers/09-c-minizip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | miniunzip -x -o "$1" -d "$2" 4 | -------------------------------------------------------------------------------- /parsers/10-c-minizip-ng/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 2 | 3 | RUN apk add minizip-ng --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main 4 | 5 | COPY unzip unzip-all.sh / 6 | 7 | ENTRYPOINT ["/unzip-all.sh"] 8 | -------------------------------------------------------------------------------- /parsers/10-c-minizip-ng/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | minizip -x -o -d "$2" "$1" 4 | -------------------------------------------------------------------------------- /parsers/11-c-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim AS build 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y gcc 5 | 6 | ADD https://github.com/kuba--/zip/archive/refs/tags/v0.3.2.tar.gz src.tar.gz 7 | RUN tar xf src.tar.gz && mv zip-*/src . 8 | 9 | WORKDIR /src 10 | 11 | COPY unzip.c . 12 | 13 | RUN gcc unzip.c zip.c -O2 -o unzip 14 | 15 | FROM debian:12-slim 16 | 17 | COPY --from=build /src/unzip / 18 | 19 | COPY unzip-all.sh / 20 | ENTRYPOINT ["/unzip-all.sh"] 21 | -------------------------------------------------------------------------------- /parsers/11-c-zip/unzip.c: -------------------------------------------------------------------------------- 1 | #include "zip.h" 2 | 3 | int main(int args, char **argv) 4 | { 5 | return zip_extract(argv[1], argv[2], NULL, NULL); 6 | } 7 | -------------------------------------------------------------------------------- /parsers/12-c-zziplib/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim AS build 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y cmake zlib1g-dev 5 | 6 | WORKDIR /tmp 7 | ADD https://github.com/gdraheim/zziplib/archive/refs/tags/v0.13.78.tar.gz . 8 | RUN tar xf *.tar.gz 9 | RUN mv zziplib-*/ /src 10 | 11 | WORKDIR /src 12 | RUN cmake -S . -B build -D ZZIPBINS=On -D BUILD_SHARED_LIBS=Off -D ZZIPWRAP=Off -D ZZIPSDL=Off -D ZZIPTEST=Off -D ZZIPDOCS=Off 13 | RUN cmake --build build 14 | 15 | FROM debian:12-slim 16 | 17 | RUN apt-get update 18 | RUN apt-get install -y zlib1g 19 | 20 | COPY --from=build /src/build/bins/unzzip /usr/local/bin 21 | 22 | COPY unzip unzip-all.sh / 23 | 24 | ENTRYPOINT [ "/unzip-all.sh" ] 25 | -------------------------------------------------------------------------------- /parsers/12-c-zziplib/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | cd "$2" 6 | unzzip "$1" 7 | -------------------------------------------------------------------------------- /parsers/13-csharp-dotnetzip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build 2 | 3 | WORKDIR /unzip 4 | 5 | RUN dotnet new console 6 | 7 | RUN dotnet add package DotNetZip --version 1.16.0 8 | 9 | RUN rm ./*.cs 10 | 11 | COPY unzip.cs . 12 | 13 | RUN dotnet build -c Release 14 | 15 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine 16 | 17 | RUN apk add parallel 18 | 19 | COPY --from=build /unzip/bin/Release/net9.0 /build 20 | 21 | RUN ln -s /build/unzip /unzip 22 | 23 | COPY testcase.sh parallel-unzip-all.sh / 24 | ENTRYPOINT ["/parallel-unzip-all.sh"] 25 | -------------------------------------------------------------------------------- /parsers/13-csharp-dotnetzip/unzip.cs: -------------------------------------------------------------------------------- 1 | using Ionic.Zip; 2 | 3 | if (ZipFile.CheckZip(args[0])) 4 | { 5 | using (ZipFile zip = ZipFile.Read(args[0])) 6 | { 7 | zip.ExtractAll(args[1], ExtractExistingFileAction.OverwriteSilently); 8 | } 9 | } 10 | else 11 | { 12 | System.Environment.Exit(1); 13 | } 14 | -------------------------------------------------------------------------------- /parsers/14-csharp-sharpcompress/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build 2 | 3 | WORKDIR /unzip 4 | 5 | RUN dotnet new console 6 | 7 | RUN dotnet add package SharpCompress --version 0.38.0 8 | 9 | RUN rm ./*.cs 10 | 11 | COPY unzip.cs . 12 | 13 | RUN dotnet build -c Release 14 | 15 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine 16 | 17 | RUN apk add parallel 18 | 19 | COPY --from=build /unzip/bin/Release/net9.0 /build 20 | 21 | RUN ln -s /build/unzip /unzip 22 | 23 | COPY testcase.sh parallel-unzip-all.sh / 24 | ENTRYPOINT ["/parallel-unzip-all.sh"] 25 | -------------------------------------------------------------------------------- /parsers/14-csharp-sharpcompress/unzip.cs: -------------------------------------------------------------------------------- 1 | using SharpCompress.Archives.Zip; 2 | using SharpCompress.Common; 3 | using SharpCompress.Readers; 4 | 5 | using (var archive = ZipArchive.Open(args[0])) 6 | { 7 | var opt = new ExtractionOptions() 8 | { 9 | ExtractFullPath = true, 10 | Overwrite = true 11 | }; 12 | archive.ExtractAllEntries().WriteAllToDirectory(args[1], opt); 13 | } 14 | -------------------------------------------------------------------------------- /parsers/15-csharp-sharpziplib/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build 2 | 3 | WORKDIR /unzip 4 | 5 | RUN dotnet new console 6 | 7 | RUN dotnet add package SharpZipLib --version 1.4.2 8 | 9 | RUN rm ./*.cs 10 | 11 | COPY unzip.cs . 12 | 13 | RUN dotnet build -c Release 14 | 15 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine 16 | 17 | COPY --from=build /unzip/bin/Release/net9.0 /build 18 | 19 | RUN ln -s /build/unzip /unzip 20 | 21 | COPY unzip-all.sh / 22 | ENTRYPOINT ["/unzip-all.sh"] 23 | -------------------------------------------------------------------------------- /parsers/15-csharp-sharpziplib/unzip.cs: -------------------------------------------------------------------------------- 1 | using ICSharpCode.SharpZipLib.Zip; 2 | 3 | using (ZipFile zipFile = new ZipFile(args[0])) 4 | { 5 | if (!zipFile.TestArchive(true)) 6 | { 7 | System.Environment.Exit(1); 8 | } 9 | } 10 | 11 | new FastZip().ExtractZip(args[0], args[1], null); 12 | -------------------------------------------------------------------------------- /parsers/16-csharp-system-io-compression/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build 2 | 3 | WORKDIR /unzip 4 | 5 | RUN dotnet new console 6 | 7 | RUN rm ./*.cs 8 | 9 | COPY unzip.cs . 10 | 11 | RUN dotnet build -c Release 12 | 13 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine 14 | 15 | COPY --from=build /unzip/bin/Release/net9.0 /build 16 | 17 | RUN ln -s /build/unzip /unzip 18 | 19 | COPY unzip-all.sh / 20 | ENTRYPOINT ["/unzip-all.sh"] 21 | -------------------------------------------------------------------------------- /parsers/16-csharp-system-io-compression/unzip.cs: -------------------------------------------------------------------------------- 1 | using System.IO.Compression; 2 | 3 | ZipFile.ExtractToDirectory(args[0], args[1], true); 4 | -------------------------------------------------------------------------------- /parsers/17-cpp-android-libziparchive/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12-slim AS build 2 | 3 | RUN echo 'deb http://deb.debian.org/debian bookworm-backports main' > /etc/apt/sources.list.d/backports.list 4 | RUN apt-get update 5 | RUN apt-get install -y g++ 6 | RUN apt-get install -t bookworm-backports -y android-libziparchive-dev android-libbase-dev 7 | 8 | WORKDIR /src 9 | COPY unzip.cpp . 10 | RUN g++ unzip.cpp -O2 -o unzip -I/usr/include/android -L/usr/lib/x86_64-linux-gnu/android -lziparchive 11 | 12 | FROM debian:12-slim 13 | 14 | COPY --from=build /usr/lib/x86_64-linux-gnu/android/*.so.0 /usr/lib/ 15 | COPY --from=build /src/unzip / 16 | COPY unzip-all.sh / 17 | ENTRYPOINT ["/unzip-all.sh"] 18 | -------------------------------------------------------------------------------- /parsers/17-cpp-android-libziparchive/unzip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main(int argc, char **argv) 10 | { 11 | ZipArchiveHandle archive; 12 | if (OpenArchive(argv[1], &archive) < 0) 13 | { 14 | fputs("Failed to open ZIP archive", stderr); 15 | return 1; 16 | } 17 | 18 | void *cookie; 19 | if (StartIteration(archive, &cookie) < 0) 20 | { 21 | fputs("Failed to iterate over ZIP entries", stderr); 22 | return 2; 23 | } 24 | 25 | const auto targetDir = std::filesystem::path(argv[2]); 26 | 27 | while (true) 28 | { 29 | ZipEntry entry; 30 | std::string name; 31 | 32 | const int status = Next(cookie, &entry, &name); 33 | if (status == -1) 34 | break; 35 | if (status < -1) 36 | { 37 | fputs("Failed to get next entry", stderr); 38 | return 3; 39 | } 40 | 41 | const auto target = targetDir / name; 42 | 43 | if (name.back() == '/') 44 | { 45 | std::filesystem::create_directories(target); 46 | } 47 | else 48 | { 49 | std::filesystem::create_directories(target.parent_path()); 50 | 51 | int fd = creat(target.c_str(), 0644); 52 | if (fd < 0) 53 | { 54 | fputs("Failed to open output file", stderr); 55 | return 4; 56 | } 57 | 58 | if (ExtractEntryToFile(archive, &entry, fd) < 0) 59 | { 60 | fputs("Failed to extract to output file", stderr); 61 | return 5; 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /parsers/18-cpp-poco/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 AS build 2 | 3 | RUN apk add g++ make cmake 4 | 5 | ADD https://github.com/pocoproject/poco/archive/refs/tags/poco-1.13.3-release.tar.gz src.tar.gz 6 | RUN tar xf src.tar.gz && mv poco-* poco 7 | 8 | WORKDIR /poco 9 | RUN cmake -S . -B build -D BUILD_SHARED_LIBS=OFF -D ENABLE_ZIP=ON -D ENABLE_FOUNDATION=ON -D ENABLE_XML=ON -D ENABLE_ENCODINGS=OFF -D ENABLE_ENCODINGS_COMPILER=OFF -D ENABLE_JSON=OFF -D ENABLE_MONGODB=OFF -D ENABLE_DATA_SQLITE=OFF -D ENABLE_REDIS=OFF -D ENABLE_PROMETHEUS=OFF -D ENABLE_UTIL=OFF -D ENABLE_NET=OFF -D ENABLE_SEVENZIP=OFF -D ENABLE_CPPPARSER=OFF -D ENABLE_POCODOC=OFF -D ENABLE_PAGECOMPILER=OFF -D ENABLE_PAGECOMPILER_FILE2PAGE=OFF -D ENABLE_ACTIVERECORD=OFF -D ENABLE_ACTIVERECORD_COMPILER=OFF 10 | RUN cmake --build build --config Release -j$(nproc) 11 | RUN cmake --install build 12 | 13 | WORKDIR /src 14 | COPY unzip.cpp . 15 | RUN g++ unzip.cpp -O2 -o unzip -static -lPocoZip -lPocoFoundation 16 | 17 | FROM alpine:3.20 18 | COPY --from=build /src/unzip / 19 | COPY unzip-all.sh / 20 | ENTRYPOINT ["/unzip-all.sh"] 21 | -------------------------------------------------------------------------------- /parsers/18-cpp-poco/unzip.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | void onDecompressError(const void *pSender, 8 | std::pair &info) 9 | { 10 | const Poco::Zip::ZipLocalFileHeader &header = info.first; 11 | const std::string &errorMsg = info.second; 12 | 13 | std::cerr << "Error decompressing file: " << header.getFileName() << std::endl; 14 | std::cerr << "Error message: " << errorMsg << std::endl; 15 | 16 | std::exit(1); 17 | } 18 | 19 | int main(int argc, char **argv) 20 | { 21 | std::ifstream inp(argv[1], std::ios::binary); 22 | Poco::Zip::Decompress dec(inp, Poco::Path(argv[2])); 23 | dec.EError += Poco::delegate(&onDecompressError); 24 | dec.decompressAllFiles(); 25 | } 26 | -------------------------------------------------------------------------------- /parsers/19-d-std-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 AS build 2 | 3 | WORKDIR /src 4 | 5 | RUN apk add dmd gcc musl-dev --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main 6 | 7 | COPY unzip.d . 8 | 9 | RUN dmd unzip.d 10 | 11 | FROM alpine:3.20 12 | 13 | RUN apk add llvm-libunwind 14 | 15 | COPY --from=build /src/unzip / 16 | 17 | COPY unzip-all.sh / 18 | ENTRYPOINT ["/unzip-all.sh"] 19 | -------------------------------------------------------------------------------- /parsers/19-d-std-zip/unzip.d: -------------------------------------------------------------------------------- 1 | import std.algorithm; 2 | import std.file; 3 | import std.path; 4 | import std.zip; 5 | 6 | void main(string[] args) 7 | { 8 | auto zip = new ZipArchive(read(args[1])); 9 | chdir(args[2]); 10 | 11 | foreach (name, am; zip.directory) 12 | { 13 | if (am.name.endsWith('/')) { 14 | am.name.mkdirRecurse; 15 | } else { 16 | am.name.dirName.mkdirRecurse; 17 | zip.expand(am); 18 | write(am.name, am.expandedData); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /parsers/20-dart-archive/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dart:3.5 AS build 2 | 3 | WORKDIR /src 4 | 5 | COPY pubspec.yaml . 6 | RUN dart pub get 7 | 8 | COPY unzip.dart . 9 | RUN dart compile exe unzip.dart -o unzip 10 | 11 | FROM debian:12-slim 12 | COPY --from=build /src/unzip / 13 | COPY unzip-all.sh / 14 | ENTRYPOINT ["/unzip-all.sh"] 15 | -------------------------------------------------------------------------------- /parsers/20-dart-archive/pubspec.yaml: -------------------------------------------------------------------------------- 1 | name: unzip 2 | environment: 3 | sdk: ">=3.0.0" 4 | dependencies: 5 | archive: 3.6.1 6 | -------------------------------------------------------------------------------- /parsers/20-dart-archive/unzip.dart: -------------------------------------------------------------------------------- 1 | import 'package:archive/archive_io.dart'; 2 | 3 | void main(List args) { 4 | final archive = ZipDecoder().decodeBuffer(InputFileStream(args[0])); 5 | extractArchiveToDisk(archive, args[1]); 6 | } 7 | -------------------------------------------------------------------------------- /parsers/21-erlang-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM erlang:27.1.2.0-alpine 2 | 3 | RUN apk add parallel 4 | 5 | COPY unzip parallel-unzip-all.sh testcase.sh / 6 | 7 | ENTRYPOINT [ "/parallel-unzip-all.sh" ] 8 | -------------------------------------------------------------------------------- /parsers/21-erlang-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | cd "$2" 6 | erl -noshell -eval "case zip:unzip(\"$1\") of {ok, _} -> erlang:halt(0); {error, Reason} -> io:format(\"Error: ~p~n\", [Reason]), erlang:halt(1) end." -s init stop 7 | -------------------------------------------------------------------------------- /parsers/22-go-archive-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.22.3-alpine3.20 AS build 2 | 3 | WORKDIR /src 4 | 5 | RUN go mod init main 6 | 7 | RUN go get github.com/evilsocket/islazy/zip 8 | 9 | COPY unzip.go . 10 | 11 | RUN go build unzip.go 12 | 13 | FROM alpine:3.20 14 | 15 | COPY --from=build /src/unzip / 16 | 17 | COPY unzip-all.sh / 18 | ENTRYPOINT ["/unzip-all.sh"] 19 | -------------------------------------------------------------------------------- /parsers/22-go-archive-zip/unzip.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "github.com/evilsocket/islazy/zip" 6 | ) 7 | 8 | func main() { 9 | if _, err := zip.Unzip(os.Args[1], os.Args[2]); err != nil { 10 | panic(err) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /parsers/23-haskell-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM haskell:9-slim AS build 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y libbz2-dev 5 | 6 | RUN cabal update 7 | RUN cabal install zip-2.1.0 8 | 9 | FROM debian:12-slim 10 | 11 | WORKDIR /data 12 | 13 | RUN apt-get update 14 | RUN apt-get install -y bzip2 15 | 16 | COPY --from=build /root/.local/bin/haskell-zip-app / 17 | 18 | COPY unzip unzip-all.sh / 19 | 20 | ENTRYPOINT ["/unzip-all.sh"] 21 | -------------------------------------------------------------------------------- /parsers/23-haskell-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | /haskell-zip-app uncompress "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/24-haskell-zip-archive/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM haskell:9-slim AS build 2 | 3 | RUN cabal update 4 | RUN cabal install zip-archive-0.4.3.2 -f executable 5 | 6 | FROM debian:12-slim 7 | 8 | COPY --from=build /root/.local/bin/zip-archive / 9 | 10 | COPY unzip unzip-all.sh / 11 | 12 | ENTRYPOINT ["/unzip-all.sh"] 13 | -------------------------------------------------------------------------------- /parsers/24-haskell-zip-archive/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | cd "$2" 6 | 7 | /zip-archive -d "$1" 8 | -------------------------------------------------------------------------------- /parsers/25-java-commons-compress-stream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.10-jdk21-alpine AS build 2 | 3 | COPY unzip /unzip 4 | 5 | WORKDIR /unzip 6 | 7 | RUN gradle build 8 | 9 | FROM eclipse-temurin:21.0.5_11-jre-alpine 10 | 11 | RUN apk add parallel 12 | 13 | COPY --from=build /unzip/build/libs/unzip.jar / 14 | 15 | COPY unzip.sh /unzip 16 | 17 | COPY testcase.sh parallel-unzip-all.sh / 18 | ENTRYPOINT ["/parallel-unzip-all.sh"] 19 | -------------------------------------------------------------------------------- /parsers/25-java-commons-compress-stream/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -jar /unzip.jar "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/25-java-commons-compress-stream/unzip/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .gradle 3 | .project 4 | .settings 5 | app 6 | build 7 | -------------------------------------------------------------------------------- /parsers/25-java-commons-compress-stream/unzip/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | } 8 | 9 | dependencies { 10 | implementation 'org.apache.commons:commons-compress:1.27.1' 11 | } 12 | 13 | java { 14 | toolchain { 15 | languageVersion = JavaLanguageVersion.of(21) 16 | } 17 | } 18 | 19 | application { 20 | mainClass = 'unzip.App' 21 | } 22 | 23 | jar { 24 | manifest { 25 | attributes 'Main-Class': application.mainClass 26 | } 27 | from { 28 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 29 | } 30 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE 31 | } 32 | -------------------------------------------------------------------------------- /parsers/25-java-commons-compress-stream/unzip/src/main/java/unzip/App.java: -------------------------------------------------------------------------------- 1 | package unzip; 2 | 3 | import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; 4 | import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 5 | 6 | import java.io.File; 7 | import java.io.FileInputStream; 8 | import java.io.FileOutputStream; 9 | import java.io.InputStream; 10 | import java.io.IOException; 11 | import java.io.OutputStream; 12 | import java.nio.file.Paths; 13 | 14 | public class App { 15 | public static void main(String[] args) { 16 | try { 17 | InputStream inputStream = new FileInputStream(args[0]); 18 | ZipArchiveInputStream zipInputStream = new ZipArchiveInputStream(inputStream); 19 | ZipArchiveEntry entry; 20 | while ((entry = zipInputStream.getNextEntry()) != null) { 21 | File extractedFile = Paths.get(args[1], entry.getName()).toFile(); 22 | if (entry.isDirectory()) { 23 | extractedFile.mkdirs(); 24 | } else { 25 | extractedFile.getParentFile().mkdirs(); 26 | try (OutputStream outputStream = new FileOutputStream(extractedFile)) { 27 | int readLen; 28 | byte[] readBuffer = new byte[4096]; 29 | while ((readLen = zipInputStream.read(readBuffer)) != -1) { 30 | outputStream.write(readBuffer, 0, readLen); 31 | } 32 | } 33 | } 34 | } 35 | } catch (IOException e) { 36 | e.printStackTrace(); 37 | System.exit(1); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /parsers/26-java-commons-compress-zipfile/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.10-jdk21-alpine AS build 2 | 3 | COPY unzip /unzip 4 | 5 | WORKDIR /unzip 6 | 7 | RUN gradle build 8 | 9 | FROM eclipse-temurin:21.0.5_11-jre-alpine 10 | 11 | RUN apk add parallel 12 | 13 | COPY --from=build /unzip/build/libs/unzip.jar / 14 | 15 | COPY unzip.sh /unzip 16 | 17 | COPY testcase.sh parallel-unzip-all.sh / 18 | ENTRYPOINT ["/parallel-unzip-all.sh"] 19 | -------------------------------------------------------------------------------- /parsers/26-java-commons-compress-zipfile/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -jar /unzip.jar "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/26-java-commons-compress-zipfile/unzip/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .gradle 3 | .project 4 | .settings 5 | app 6 | build 7 | -------------------------------------------------------------------------------- /parsers/26-java-commons-compress-zipfile/unzip/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | } 8 | 9 | dependencies { 10 | implementation 'org.codehaus.plexus:plexus-archiver:4.10.0' 11 | implementation 'org.slf4j:slf4j-simple:2.0.16' 12 | } 13 | 14 | java { 15 | toolchain { 16 | languageVersion = JavaLanguageVersion.of(21) 17 | } 18 | } 19 | 20 | application { 21 | mainClass = 'unzip.App' 22 | } 23 | 24 | jar { 25 | manifest { 26 | attributes 'Main-Class': application.mainClass 27 | } 28 | from { 29 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 30 | } 31 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE 32 | } 33 | -------------------------------------------------------------------------------- /parsers/26-java-commons-compress-zipfile/unzip/src/main/java/unzip/App.java: -------------------------------------------------------------------------------- 1 | package unzip; 2 | 3 | import java.io.File; 4 | import org.codehaus.plexus.archiver.zip.ZipUnArchiver; 5 | 6 | public class App { 7 | public static void main(String[] args) { 8 | var unarchiver = new ZipUnArchiver(new File(args[0])); 9 | unarchiver.setDestDirectory(new File(args[1])); 10 | unarchiver.extract(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /parsers/27-java-util-zip-zipfile/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.10-jdk21-alpine AS build 2 | 3 | COPY unzip /unzip 4 | 5 | WORKDIR /unzip 6 | 7 | RUN gradle build 8 | 9 | FROM eclipse-temurin:21.0.5_11-jre-alpine 10 | 11 | COPY --from=build /unzip/build/libs/unzip.jar / 12 | 13 | COPY unzip.sh /unzip 14 | 15 | COPY unzip-all.sh / 16 | ENTRYPOINT ["/unzip-all.sh"] 17 | -------------------------------------------------------------------------------- /parsers/27-java-util-zip-zipfile/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -jar /unzip.jar "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/27-java-util-zip-zipfile/unzip/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .gradle 3 | .project 4 | .settings 5 | app 6 | build 7 | -------------------------------------------------------------------------------- /parsers/27-java-util-zip-zipfile/unzip/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | } 8 | 9 | dependencies { 10 | implementation 'org.zeroturnaround:zt-zip:1.17' 11 | implementation 'org.slf4j:slf4j-simple:2.0.16' 12 | } 13 | 14 | java { 15 | toolchain { 16 | languageVersion = JavaLanguageVersion.of(21) 17 | } 18 | } 19 | 20 | application { 21 | mainClass = 'unzip.App' 22 | } 23 | 24 | jar { 25 | manifest { 26 | attributes 'Main-Class': application.mainClass 27 | } 28 | from { 29 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 30 | } 31 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE 32 | } 33 | -------------------------------------------------------------------------------- /parsers/27-java-util-zip-zipfile/unzip/src/main/java/unzip/App.java: -------------------------------------------------------------------------------- 1 | package unzip; 2 | 3 | import java.io.File; 4 | import org.zeroturnaround.zip.ZipUtil; 5 | 6 | public class App { 7 | public static void main(String[] args) { 8 | ZipUtil.unpack(new File(args[0]), new File(args[1])); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /parsers/28-java-util-zip-zipinputstream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.10-jdk21-alpine AS build 2 | 3 | COPY unzip /unzip 4 | 5 | WORKDIR /unzip 6 | 7 | RUN gradle build 8 | 9 | FROM eclipse-temurin:21.0.5_11-jre-alpine 10 | 11 | COPY --from=build /unzip/build/libs/unzip.jar / 12 | 13 | COPY unzip.sh /unzip 14 | 15 | COPY unzip-all.sh / 16 | ENTRYPOINT ["/unzip-all.sh"] 17 | -------------------------------------------------------------------------------- /parsers/28-java-util-zip-zipinputstream/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -jar /unzip.jar "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/28-java-util-zip-zipinputstream/unzip/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .gradle 3 | .project 4 | .settings 5 | app 6 | build 7 | -------------------------------------------------------------------------------- /parsers/28-java-util-zip-zipinputstream/unzip/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | } 8 | 9 | dependencies { 10 | implementation 'org.zeroturnaround:zt-zip:1.17' 11 | implementation 'org.slf4j:slf4j-simple:2.0.16' 12 | } 13 | 14 | java { 15 | toolchain { 16 | languageVersion = JavaLanguageVersion.of(21) 17 | } 18 | } 19 | 20 | application { 21 | mainClass = 'unzip.App' 22 | } 23 | 24 | jar { 25 | manifest { 26 | attributes 'Main-Class': application.mainClass 27 | } 28 | from { 29 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 30 | } 31 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE 32 | } 33 | -------------------------------------------------------------------------------- /parsers/28-java-util-zip-zipinputstream/unzip/src/main/java/unzip/App.java: -------------------------------------------------------------------------------- 1 | package unzip; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import org.zeroturnaround.zip.ZipUtil; 6 | 7 | public class App { 8 | public static void main(String[] args) { 9 | try { 10 | ZipUtil.unpack(new FileInputStream(new File(args[0])), new File(args[1])); 11 | } catch (Exception e) { 12 | e.printStackTrace(); 13 | System.exit(1); 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /parsers/29-java-zip4j-zipfile/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.10-jdk21-alpine AS build 2 | 3 | COPY unzip /unzip 4 | 5 | WORKDIR /unzip 6 | 7 | RUN gradle build 8 | 9 | FROM eclipse-temurin:21.0.5_11-jre-alpine 10 | 11 | COPY --from=build /unzip/build/libs/unzip.jar / 12 | 13 | COPY unzip.sh /unzip 14 | 15 | COPY unzip-all.sh / 16 | ENTRYPOINT ["/unzip-all.sh"] 17 | -------------------------------------------------------------------------------- /parsers/29-java-zip4j-zipfile/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -jar /unzip.jar "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/29-java-zip4j-zipfile/unzip/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .gradle 3 | .project 4 | .settings 5 | app 6 | build 7 | -------------------------------------------------------------------------------- /parsers/29-java-zip4j-zipfile/unzip/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | } 8 | 9 | dependencies { 10 | implementation 'net.lingala.zip4j:zip4j:2.11.5' 11 | } 12 | 13 | java { 14 | toolchain { 15 | languageVersion = JavaLanguageVersion.of(21) 16 | } 17 | } 18 | 19 | application { 20 | mainClass = 'unzip.App' 21 | } 22 | 23 | jar { 24 | manifest { 25 | attributes 'Main-Class': application.mainClass 26 | } 27 | from { 28 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /parsers/29-java-zip4j-zipfile/unzip/src/main/java/unzip/App.java: -------------------------------------------------------------------------------- 1 | package unzip; 2 | 3 | import net.lingala.zip4j.ZipFile; 4 | import net.lingala.zip4j.exception.ZipException; 5 | 6 | public class App { 7 | public static void main(String[] args) { 8 | try { 9 | new ZipFile(args[0]).extractAll(args[1]); 10 | } catch (ZipException e) { 11 | System.err.println("Error during extraction: " + e.getMessage()); 12 | System.exit(1); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /parsers/30-java-zip4j-zipinputstream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.10-jdk21-alpine AS build 2 | 3 | COPY unzip /unzip 4 | 5 | WORKDIR /unzip 6 | 7 | RUN gradle build 8 | 9 | FROM eclipse-temurin:21.0.5_11-jre-alpine 10 | 11 | COPY --from=build /unzip/build/libs/unzip.jar / 12 | 13 | COPY unzip.sh /unzip 14 | 15 | COPY unzip-all.sh / 16 | ENTRYPOINT ["/unzip-all.sh"] 17 | -------------------------------------------------------------------------------- /parsers/30-java-zip4j-zipinputstream/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | java -jar /unzip.jar "$1" "$2" 4 | -------------------------------------------------------------------------------- /parsers/30-java-zip4j-zipinputstream/unzip/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .gradle 3 | .project 4 | .settings 5 | app 6 | build 7 | -------------------------------------------------------------------------------- /parsers/30-java-zip4j-zipinputstream/unzip/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | } 8 | 9 | dependencies { 10 | implementation 'net.lingala.zip4j:zip4j:2.11.5' 11 | } 12 | 13 | java { 14 | toolchain { 15 | languageVersion = JavaLanguageVersion.of(21) 16 | } 17 | } 18 | 19 | application { 20 | mainClass = 'unzip.App' 21 | } 22 | 23 | jar { 24 | manifest { 25 | attributes 'Main-Class': application.mainClass 26 | } 27 | from { 28 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /parsers/30-java-zip4j-zipinputstream/unzip/src/main/java/unzip/App.java: -------------------------------------------------------------------------------- 1 | package unzip; 2 | 3 | import net.lingala.zip4j.io.inputstream.ZipInputStream; 4 | import net.lingala.zip4j.model.LocalFileHeader; 5 | 6 | import java.io.File; 7 | import java.io.FileInputStream; 8 | import java.io.FileOutputStream; 9 | import java.io.InputStream; 10 | import java.io.IOException; 11 | import java.io.OutputStream; 12 | import java.nio.file.Paths; 13 | 14 | public class App { 15 | public static void main(String[] args) { 16 | try { 17 | InputStream inputStream = new FileInputStream(args[0]); 18 | ZipInputStream zipInputStream = new ZipInputStream(inputStream); 19 | LocalFileHeader localFileHeader; 20 | while ((localFileHeader = zipInputStream.getNextEntry()) != null) { 21 | File extractedFile = Paths.get(args[1], localFileHeader.getFileName()).toFile(); 22 | if (localFileHeader.isDirectory()) { 23 | extractedFile.mkdirs(); 24 | } else { 25 | extractedFile.getParentFile().mkdirs(); 26 | try (OutputStream outputStream = new FileOutputStream(extractedFile)) { 27 | int readLen; 28 | byte[] readBuffer = new byte[4096]; 29 | while ((readLen = zipInputStream.read(readBuffer)) != -1) { 30 | outputStream.write(readBuffer, 0, readLen); 31 | } 32 | } 33 | } 34 | } 35 | } catch (IOException e) { 36 | e.printStackTrace(); 37 | System.exit(1); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /parsers/31-nodejs-ronomon-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN npm init -y 6 | 7 | RUN npm install @ronomon/zip@1.12.0 8 | 9 | COPY unzip.js . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/31-nodejs-ronomon-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/31-nodejs-ronomon-zip/unzip.js: -------------------------------------------------------------------------------- 1 | const ZIP = require('@ronomon/zip'); 2 | const { dirname } = require('path'); 3 | const { readFileSync, writeFileSync, mkdirSync } = require('fs'); 4 | 5 | const buffer = readFileSync(process.argv[2]); 6 | try { 7 | const headers = ZIP.decode(buffer); 8 | process.chdir(process.argv[3]); 9 | for (const header of headers) { 10 | if (header.directory) { 11 | mkdirSync(header.fileName, { recursive: true }); 12 | } else { 13 | mkdirSync(dirname(header.fileName), { recursive: true }); 14 | const data = ZIP.inflate(header, buffer); 15 | writeFileSync(header.fileName, data); 16 | } 17 | } 18 | } catch (error) { 19 | console.error(error.message); 20 | process.exit(1); 21 | } 22 | -------------------------------------------------------------------------------- /parsers/32-nodejs-adm-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN apk add parallel 6 | 7 | RUN npm init -y 8 | 9 | RUN npm install adm-zip@0.5.16 10 | 11 | COPY unzip.js . 12 | 13 | COPY unzip / 14 | 15 | COPY testcase.sh parallel-unzip-all.sh / 16 | ENTRYPOINT ["/parallel-unzip-all.sh"] 17 | -------------------------------------------------------------------------------- /parsers/32-nodejs-adm-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/32-nodejs-adm-zip/unzip.js: -------------------------------------------------------------------------------- 1 | const AdmZip = require('adm-zip'); 2 | 3 | const zip = new AdmZip(process.argv[2]); 4 | zip.extractAllTo(process.argv[3]); 5 | -------------------------------------------------------------------------------- /parsers/33-nodejs-decompress-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN apk add parallel 6 | 7 | RUN npm init -y 8 | 9 | RUN npm install decompress-zip@0.3.3 10 | 11 | COPY unzip.js . 12 | 13 | COPY unzip testcase.sh parallel-unzip-all.sh / 14 | 15 | ENTRYPOINT ["/parallel-unzip-all.sh", "50%"] 16 | -------------------------------------------------------------------------------- /parsers/33-nodejs-decompress-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/33-nodejs-decompress-zip/unzip.js: -------------------------------------------------------------------------------- 1 | const DecompressZip = require('decompress-zip'); 2 | 3 | const zip = new DecompressZip(process.argv[2]); 4 | 5 | zip.on('error', (err) => { 6 | console.error(err); 7 | process.exit(1); 8 | }); 9 | 10 | zip.extract({ path: process.argv[3] }); 11 | -------------------------------------------------------------------------------- /parsers/34-nodejs-jszip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN npm init -y 6 | 7 | RUN npm install jszip@3.10.1 8 | 9 | COPY unzip.js . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/34-nodejs-jszip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/34-nodejs-jszip/unzip.js: -------------------------------------------------------------------------------- 1 | const { loadAsync } = require('jszip'); 2 | const { dirname } = require('path'); 3 | const { readFile, mkdir, writeFile } = require('fs/promises'); 4 | 5 | (async () => { 6 | const file = await readFile(process.argv[2]); 7 | const zip = await loadAsync(file); 8 | 9 | process.chdir(process.argv[3]); 10 | 11 | for (const entry of Object.values(zip.files)) { 12 | if (entry.dir) { 13 | await mkdir(entry.name, { recursive: true }); 14 | } else { 15 | await mkdir(dirname(entry.name), { recursive: true }); 16 | const content = await entry.async('nodebuffer'); 17 | await writeFile(entry.name, content); 18 | } 19 | } 20 | })(); 21 | -------------------------------------------------------------------------------- /parsers/35-nodejs-node-stream-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN npm init -y 6 | 7 | RUN npm install node-stream-zip@1.15.0 8 | 9 | COPY unzip.js . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/35-nodejs-node-stream-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/35-nodejs-node-stream-zip/unzip.js: -------------------------------------------------------------------------------- 1 | const StreamZip = require('node-stream-zip'); 2 | 3 | const zip = new StreamZip.async({ file: process.argv[2]}); 4 | zip.extract(null, process.argv[3]); 5 | -------------------------------------------------------------------------------- /parsers/36-nodejs-unzipper-extract/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN npm init -y 6 | 7 | RUN npm install unzipper@0.12.3 8 | 9 | COPY unzip.js . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/36-nodejs-unzipper-extract/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/36-nodejs-unzipper-extract/unzip.js: -------------------------------------------------------------------------------- 1 | const { createReadStream } = require('fs'); 2 | const { Extract } = require('unzipper'); 3 | 4 | const extract = Extract({ path: process.argv[3] }); 5 | createReadStream(process.argv[2]).pipe(extract); 6 | extract.on('error', (error) => { 7 | console.error(error); 8 | process.exit(1); 9 | }); 10 | -------------------------------------------------------------------------------- /parsers/37-nodejs-unzipper-open/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN npm init -y 6 | 7 | RUN npm install unzipper@0.12.3 8 | 9 | COPY unzip.js . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/37-nodejs-unzipper-open/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/37-nodejs-unzipper-open/unzip.js: -------------------------------------------------------------------------------- 1 | const { Open } = require('unzipper'); 2 | 3 | (async () => { 4 | try { 5 | const d = await Open.file(process.argv[2]); 6 | await d.extract({ path: process.argv[3] }); 7 | } catch (err) { 8 | console.error(err); 9 | process.exit(1); 10 | } 11 | })(); 12 | -------------------------------------------------------------------------------- /parsers/38-nodejs-yauzl-v2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | RUN npm install extract-zip@2.0.1 -g 4 | 5 | COPY unzip unzip-all.sh / 6 | 7 | ENTRYPOINT ["/unzip-all.sh"] 8 | -------------------------------------------------------------------------------- /parsers/38-nodejs-yauzl-v2/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | extract-zip "$@" 4 | -------------------------------------------------------------------------------- /parsers/39-nodejs-yauzl-v3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | RUN npm install extract-zip@2.0.1 -g 4 | 5 | WORKDIR /usr/local/lib/node_modules/extract-zip 6 | RUN sed -i 's/"yauzl":.*/"yauzl": "3.2.0"/' package.json 7 | RUN npm install 8 | 9 | COPY unzip unzip-all.sh / 10 | 11 | ENTRYPOINT ["/unzip-all.sh"] 12 | -------------------------------------------------------------------------------- /parsers/39-nodejs-yauzl-v3/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | extract-zip "$@" 4 | -------------------------------------------------------------------------------- /parsers/40-nodejs-zipjs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /src 4 | 5 | RUN npm init -y 6 | 7 | RUN npm install @zip.js/zip.js@2.7.53 8 | 9 | COPY unzip.js . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT ["/unzip-all.sh"] 14 | -------------------------------------------------------------------------------- /parsers/40-nodejs-zipjs/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | node /src/unzip.js "$@" 4 | -------------------------------------------------------------------------------- /parsers/40-nodejs-zipjs/unzip.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-await-in-loop */ 2 | 3 | const { BlobReader, ZipReader, Uint8ArrayWriter } = require('@zip.js/zip.js'); 4 | const { dirname } = require('path'); 5 | const { openAsBlob } = require('fs'); 6 | const { mkdir, writeFile } = require('fs/promises'); 7 | 8 | (async () => { 9 | process.chdir(process.argv[3]); 10 | const file = await openAsBlob(process.argv[2]); 11 | const reader = new ZipReader(new BlobReader(file)); 12 | for (const entry of await reader.getEntries()) { 13 | if (entry.directory) { 14 | await mkdir(entry.filename, { recursive: true }); 15 | } else { 16 | const data = await entry.getData(new Uint8ArrayWriter()); 17 | await mkdir(dirname(entry.filename), { recursive: true }); 18 | await writeFile(entry.filename, data); 19 | } 20 | } 21 | })(); 22 | -------------------------------------------------------------------------------- /parsers/41-php-phardata/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 2 | 3 | RUN apk add php83 4 | RUN apk add php83-phar 5 | 6 | COPY unzip.php / 7 | 8 | COPY unzip unzip-all.sh / 9 | 10 | ENTRYPOINT [ "/unzip-all.sh" ] 11 | -------------------------------------------------------------------------------- /parsers/41-php-phardata/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | php83 /unzip.php "$@" 4 | -------------------------------------------------------------------------------- /parsers/41-php-phardata/unzip.php: -------------------------------------------------------------------------------- 1 | extractTo($argv[2], null, true); 5 | -------------------------------------------------------------------------------- /parsers/42-php-phpzip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 2 | 3 | WORKDIR /src 4 | 5 | RUN apk add composer 6 | 7 | RUN composer require nelexa/zip:4.0.2 8 | 9 | COPY unzip.php . 10 | 11 | COPY unzip unzip-all.sh / 12 | 13 | ENTRYPOINT [ "/unzip-all.sh" ] 14 | -------------------------------------------------------------------------------- /parsers/42-php-phpzip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | php83 /src/unzip.php "$@" 4 | -------------------------------------------------------------------------------- /parsers/42-php-phpzip/unzip.php: -------------------------------------------------------------------------------- 1 | openFile($argv[1])->extractTo($argv[2]); 7 | -------------------------------------------------------------------------------- /parsers/43-pascal-paszlib/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM freepascal/fpc:3.2.2-full AS build 2 | 3 | WORKDIR /src 4 | 5 | COPY unzip.pp . 6 | 7 | RUN fpc unzip 8 | 9 | FROM alpine:3.20 10 | 11 | COPY --from=build /src/unzip / 12 | 13 | COPY unzip-all.sh / 14 | 15 | ENTRYPOINT ["/unzip-all.sh"] 16 | -------------------------------------------------------------------------------- /parsers/43-pascal-paszlib/unzip.pp: -------------------------------------------------------------------------------- 1 | uses 2 | Zipper; 3 | var 4 | UnZipper: TUnZipper; 5 | begin 6 | UnZipper := TUnZipper.Create; 7 | UnZipper.FileName := paramStr(1); 8 | UnZipper.OutputPath := paramStr(2); 9 | UnZipper.Examine; 10 | UnZipper.UnZipAllFiles; 11 | UnZipper.Free; 12 | end. 13 | -------------------------------------------------------------------------------- /parsers/44-perl-archive-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.20 2 | 3 | RUN apk add perl-archive-zip 4 | 5 | COPY unzip.pl / 6 | 7 | COPY unzip unzip-all.sh / 8 | 9 | ENTRYPOINT [ "/unzip-all.sh" ] 10 | -------------------------------------------------------------------------------- /parsers/44-perl-archive-zip/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | perl /unzip.pl "$@" 4 | -------------------------------------------------------------------------------- /parsers/44-perl-archive-zip/unzip.pl: -------------------------------------------------------------------------------- 1 | use strict; 2 | use Archive::Zip qw(:ERROR_CODES); 3 | 4 | my $zip = Archive::Zip->new(); 5 | my $status = $zip->read($ARGV[0]); 6 | die 'Failed to read ZIP' if $status != AZ_OK; 7 | $status = $zip->extractTree('', $ARGV[1]); 8 | die 'Failed to extract ZIP' if $status != AZ_OK; 9 | -------------------------------------------------------------------------------- /parsers/45-python-zipfile/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.13.0-alpine 2 | 3 | COPY unzip.py / 4 | 5 | COPY unzip unzip-all.sh / 6 | 7 | ENTRYPOINT [ "/unzip-all.sh" ] 8 | -------------------------------------------------------------------------------- /parsers/45-python-zipfile/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python /unzip.py "$@" 4 | -------------------------------------------------------------------------------- /parsers/45-python-zipfile/unzip.py: -------------------------------------------------------------------------------- 1 | from zipfile import ZipFile 2 | from sys import argv 3 | 4 | zip = ZipFile(argv[1], 'r') 5 | error_file = zip.testzip() 6 | if error_file is None: 7 | zip.extractall(argv[2]) 8 | else: 9 | print(f"Error in file {error_file}") 10 | exit(1) 11 | -------------------------------------------------------------------------------- /parsers/46-racket-file-unzip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM racket/racket:8.15-bc-full AS build 2 | 3 | WORKDIR /src 4 | 5 | COPY unzip.rkt . 6 | 7 | RUN raco exe unzip.rkt 8 | 9 | RUN raco distribute /unzip unzip 10 | 11 | FROM debian:12-slim 12 | 13 | RUN apt-get update 14 | RUN apt-get install -y parallel 15 | 16 | COPY --from=build /unzip /build 17 | 18 | RUN ln -s /build/bin/unzip /unzip 19 | 20 | COPY parallel-unzip-all.sh testcase.sh / 21 | 22 | ENTRYPOINT [ "/parallel-unzip-all.sh" ] 23 | -------------------------------------------------------------------------------- /parsers/46-racket-file-unzip/unzip.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | (require file/unzip) 3 | 4 | (let ([args (current-command-line-arguments)]) 5 | (define src (vector-ref args 0)) 6 | (define dest (vector-ref args 1)) 7 | (unzip src (make-filesystem-entry-reader #:dest dest #:exists 'replace))) 8 | -------------------------------------------------------------------------------- /parsers/47-ruby-rubyzip-file/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3.3-alpine 2 | 3 | RUN gem install rubyzip -v 2.3.2 4 | 5 | COPY unzip.rb / 6 | 7 | COPY unzip unzip-all.sh / 8 | 9 | ENTRYPOINT [ "/unzip-all.sh" ] 10 | -------------------------------------------------------------------------------- /parsers/47-ruby-rubyzip-file/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ruby /unzip.rb "$@" 4 | -------------------------------------------------------------------------------- /parsers/47-ruby-rubyzip-file/unzip.rb: -------------------------------------------------------------------------------- 1 | require 'zip' 2 | 3 | Zip.on_exists_proc = true 4 | Dir.chdir(ARGV[1]) 5 | 6 | Zip::File.open(ARGV[0]) do |zip_file| 7 | zip_file.each do |entry| 8 | entry_path = File.join(Dir.pwd, entry.name) 9 | FileUtils.mkdir_p(File.dirname(entry_path)) 10 | entry.extract 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /parsers/48-ruby-rubyzip-inputstream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3.3-alpine 2 | 3 | RUN gem install rubyzip -v 2.3.2 4 | 5 | COPY unzip.rb / 6 | 7 | COPY unzip unzip-all.sh / 8 | 9 | ENTRYPOINT [ "/unzip-all.sh" ] 10 | -------------------------------------------------------------------------------- /parsers/48-ruby-rubyzip-inputstream/unzip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ruby /unzip.rb "$@" 4 | -------------------------------------------------------------------------------- /parsers/48-ruby-rubyzip-inputstream/unzip.rb: -------------------------------------------------------------------------------- 1 | require 'zip' 2 | 3 | Zip.on_exists_proc = true 4 | Dir.chdir(ARGV[1]) 5 | 6 | stream = Zip::InputStream.new(ARGV[0]) 7 | while entry = stream.get_next_entry 8 | entry_path = File.join(Dir.pwd, entry.name) 9 | FileUtils.mkdir_p(File.dirname(entry_path)) 10 | entry.extract 11 | end 12 | -------------------------------------------------------------------------------- /parsers/49-rust-zip/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:1.82-slim-bookworm AS build 2 | 3 | WORKDIR /src/unzip 4 | 5 | RUN cargo init 6 | 7 | RUN cargo add zip@2.2.0 8 | 9 | COPY unzip.rs src/main.rs 10 | 11 | RUN cargo build --release 12 | 13 | FROM debian:12-slim 14 | 15 | COPY --from=build /src/unzip/target/release/unzip / 16 | 17 | COPY unzip-all.sh / 18 | 19 | ENTRYPOINT [ "/unzip-all.sh" ] 20 | -------------------------------------------------------------------------------- /parsers/49-rust-zip/unzip.rs: -------------------------------------------------------------------------------- 1 | use zip::read::ZipArchive; 2 | use std::fs::File; 3 | 4 | fn main() { 5 | let mut args = std::env::args().skip(1); 6 | let src = args.next().expect("no src in args"); 7 | let dst = args.next().expect("no dst in args"); 8 | let file = File::open(src).expect("failed to open input file"); 9 | let mut archive = ZipArchive::new(file).expect("failed to read input ZIP"); 10 | archive.extract(dst).expect("failed to extract"); 11 | } 12 | -------------------------------------------------------------------------------- /parsers/50-swift-zipfoundation/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM swift:5.10 AS build 2 | 3 | COPY src /src 4 | WORKDIR /src 5 | 6 | RUN swift build -c release --static-swift-stdlib 7 | RUN mv "$(swift build -c release --show-bin-path)/unzip" / 8 | 9 | FROM debian:12-slim 10 | 11 | RUN apt-get update 12 | RUN apt-get install -y parallel 13 | 14 | COPY --from=build /unzip / 15 | 16 | COPY parallel-unzip-all.sh testcase.sh / 17 | 18 | ENTRYPOINT [ "/parallel-unzip-all.sh" ] 19 | -------------------------------------------------------------------------------- /parsers/50-swift-zipfoundation/src/Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 5.10 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "unzip", 7 | dependencies: [ 8 | .package(url: "https://github.com/weichsel/ZIPFoundation.git", exact: "0.9.19"), 9 | ], 10 | targets: [ 11 | .executableTarget(name: "unzip", dependencies: ["ZIPFoundation"]), 12 | ] 13 | ) 14 | -------------------------------------------------------------------------------- /parsers/50-swift-zipfoundation/src/Sources/main.swift: -------------------------------------------------------------------------------- 1 | import ZIPFoundation 2 | import Foundation 3 | 4 | let fileManager = FileManager() 5 | var sourceURL = URL(fileURLWithPath: CommandLine.arguments[1]) 6 | var destinationURL = URL(fileURLWithPath: CommandLine.arguments[2]) 7 | do { 8 | try fileManager.unzipItem(at: sourceURL, to: destinationURL) 9 | } catch { 10 | print("Extraction of ZIP archive failed with error: \(error)") 11 | exit(1) 12 | } 13 | -------------------------------------------------------------------------------- /parsers/README.md: -------------------------------------------------------------------------------- 1 | Tested parsers as listed in Table 3 in the paper. Note that we sometimes use a wrapper library to test the underlying parser library. For example, we use the PHP `ZipArchive` for libzip. 2 | 3 | You can find detailed information of the parsers in [`parsers.json`](./parsers.json). 4 | -------------------------------------------------------------------------------- /parsers/parallel-unzip-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd /input && parallel -j"${1:-25%}" /testcase.sh ::: * 4 | -------------------------------------------------------------------------------- /parsers/prepare.sh: -------------------------------------------------------------------------------- 1 | ../tools/prepare.sh -------------------------------------------------------------------------------- /parsers/testcase.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | mkdir -p /output/"$1" 6 | if ! timeout 1m /unzip "$(realpath "$1")" /output/"$1"; then 7 | while ! rm -rf /output/"$1"; do echo "Failed to rm -rf /output/$1"; done 8 | touch /output/"$1" 9 | fi 10 | -------------------------------------------------------------------------------- /parsers/unzip-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | cd /input 6 | 7 | for i in *; do 8 | mkdir -p /output/"$i" 9 | if ! timeout 1m /unzip "$(realpath "$i")" /output/"$i"; then 10 | while ! rm -rf /output/"$i"; do echo "Failed to rm -rf /output/$i"; done 11 | touch /output/"$i" 12 | fi 13 | done 14 | -------------------------------------------------------------------------------- /tools/ablation-study.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xeuo pipefail 4 | 5 | TIMES=5 6 | BATCH_SIZE=500 7 | STOP_SECONDS=$(( 24 * 60 * 60 )) 8 | base="$(dirname "$(dirname "$(realpath "$0")")")" 9 | DATA="$base/evaluation" 10 | 11 | for _ in $(seq 1 $TIMES); do 12 | for i in full argmax-ucb byte-only; do 13 | cd "$base/zip-diff" 14 | case "$i" in 15 | full) arg= ;; 16 | argmax-ucb) arg=--argmax-ucb ;; 17 | byte-only) arg=--byte-mutation-only ;; 18 | esac 19 | key="$(date -Is)-$i" 20 | session="$DATA/sessions/$key" 21 | target/release/fuzz -b "$BATCH_SIZE" -s "$STOP_SECONDS" $arg \ 22 | --input-dir "$DATA/bind/input" \ 23 | --output-dir "$DATA/bind/output" \ 24 | --samples-dir "$session/samples" \ 25 | --results-dir "$session/results" \ 26 | --stats-file "$DATA/stats/$key.json" 27 | cd ../parsers 28 | docker compose down 29 | done 30 | done 31 | -------------------------------------------------------------------------------- /tools/fuzz-stats.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from matplotlib.ticker import FuncFormatter 6 | 7 | # required by HotCRP 8 | plt.rcParams['pdf.fonttype'] = 42 9 | plt.rcParams['ps.fonttype'] = 42 10 | 11 | def load_stats_files(): 12 | stats = [] 13 | for filename in sys.argv[1:]: 14 | with open(filename) as f: 15 | try: 16 | stat = json.load(f) 17 | stats.append(stat) 18 | print(f'Loaded {filename}') 19 | except json.JSONDecodeError: 20 | print(f'Error parsing {filename}') 21 | return stats 22 | 23 | 24 | def group_stats_by_config(stats): 25 | groups = {'full': [], 'argmax_ucb': [], 'byte_mutation_only': []} 26 | 27 | for stat in stats: 28 | argmax_ucb = stat.get('argmax_ucb', False) 29 | byte_mutation_only = stat.get('byte_mutation_only', False) 30 | if argmax_ucb and byte_mutation_only: 31 | print(f'Warning: Found unusual configuration with both flags enabled') 32 | continue 33 | elif argmax_ucb: 34 | groups['argmax_ucb'].append(stat) 35 | elif byte_mutation_only: 36 | groups['byte_mutation_only'].append(stat) 37 | else: 38 | groups['full'].append(stat) 39 | 40 | for name, group in groups.items(): 41 | print(f'{name}: {len(group)} sessions') 42 | 43 | return groups 44 | 45 | 46 | def interpolate_data(stats_group, metric, max_time=24*60*60, num_points=500): 47 | timepoints = np.linspace(0, max_time, num_points) 48 | interpolated = [] 49 | 50 | for stat in stats_group: 51 | iterations = stat.get('iterations', []) 52 | times = [it.get('seconds_used', 0) for it in iterations] 53 | values = [it.get(metric, 0) for it in iterations] 54 | 55 | interp_values = np.interp( 56 | timepoints, 57 | times, 58 | values, 59 | left=0, 60 | ) 61 | interpolated.append(interp_values) 62 | 63 | return ( 64 | timepoints / 3600, 65 | np.median(interpolated, axis=0), 66 | ) 67 | 68 | 69 | def plot_metric( 70 | stats_groups, 71 | metric, 72 | output_file, 73 | break_y_axis=None, 74 | y_top=None, 75 | legend_loc='best', 76 | ): 77 | # https://tsitsul.in/blog/coloropt/ 78 | colors = { 79 | 'full': '#4053d3', 80 | 'argmax_ucb': '#ddb310', 81 | 'byte_mutation_only': '#b51d14', 82 | } 83 | labels = { 84 | 'full': 'Full Setup', 85 | 'argmax_ucb': 'Argmax-Based UCB', 86 | 'byte_mutation_only': 'Byte Mutation Only', 87 | } 88 | metric_title = { 89 | 'incons_count': 'Inconsistent Pairs (Median)', 90 | } 91 | 92 | timepoints = np.array([0, 24]) 93 | 94 | if break_y_axis: 95 | fig, (ax_top, ax_bottom) = plt.subplots( 96 | 2, 97 | 1, 98 | figsize=(6, 4), 99 | sharex=True, 100 | gridspec_kw={'height_ratios': [6, 1], 'hspace': 0.12}, 101 | ) 102 | axes = [ax_top, ax_bottom] 103 | else: 104 | fig, ax = plt.subplots(figsize=(6, 4)) 105 | axes = [ax] 106 | 107 | # blend overlapping lines 108 | for t in range(10): 109 | for config_name in reversed(colors): 110 | stats_group = stats_groups.get(config_name) 111 | if not stats_group: 112 | continue 113 | 114 | timepoints, median_values = interpolate_data(stats_group, metric) 115 | 116 | if len(timepoints) == 0: 117 | continue 118 | 119 | for i, ax in enumerate(axes): 120 | y = median_values 121 | if break_y_axis and i == 1: 122 | y = np.where(y <= break_y_axis, y, np.nan) 123 | ax.plot( 124 | timepoints, 125 | y, 126 | alpha=0.8**t, 127 | color=colors[config_name], 128 | label=labels[config_name] if t == 0 else None, 129 | ) 130 | 131 | # Configure each axis 132 | for ax in axes: 133 | ax.grid(True, linestyle='--', alpha=0.7) 134 | ax.yaxis.set_major_formatter( 135 | FuncFormatter( 136 | lambda x, _: f'{round(x/1000)}k' if x >= 10000 else f'{round(x)}' 137 | ) 138 | ) 139 | 140 | if timepoints[-1] == 24: 141 | axes[0].set_xticks(np.arange(0, 25, 4)) 142 | handles, labels = axes[0].get_legend_handles_labels() 143 | axes[0].legend(handles[::-1], labels[::-1], loc=legend_loc) 144 | axes[-1].set_xlabel('Time (hours)') 145 | 146 | if break_y_axis and ax_top and ax_bottom: 147 | ax_top.set_ylim(bottom=break_y_axis, top=y_top) 148 | ax_bottom.set_ylim(top=break_y_axis) 149 | 150 | ax_top.tick_params(bottom=False) 151 | ax_bottom.set_yticks([0, break_y_axis]) 152 | 153 | ax_top.spines['bottom'].set_visible(False) 154 | ax_bottom.spines['top'].set_visible(False) 155 | 156 | # Add break markers 157 | d = 0.015 158 | kwargs = dict(transform=ax_top.transAxes, color='k', clip_on=False) 159 | ax_top.plot((-d, +d), (-d, +d), **kwargs) 160 | ax_top.plot((1 - d, 1 + d), (-d, +d), **kwargs) 161 | kwargs.update(transform=ax_bottom.transAxes) 162 | ax_bottom.plot((-d, +d), (1 - d, 1 + d), **kwargs) 163 | ax_bottom.plot((1 - d, 1 + d,), (1 - d, 1 + d,), **kwargs) 164 | 165 | fig.subplots_adjust(left=0.15) 166 | fig.text(0.04, 0.5, metric_title[metric], va='center', rotation='vertical') 167 | 168 | else: 169 | axes[0].set_ylabel(metric_title[metric]) 170 | plt.tight_layout() 171 | 172 | plt.savefig(output_file, bbox_inches='tight', pad_inches=0) 173 | print(f'Plot for {metric} saved to {output_file}') 174 | plt.close(fig) 175 | 176 | 177 | def calc_incons(stats_groups, total_pairs): 178 | total_consistent_sets = [] 179 | 180 | for config_name, stats_group in stats_groups.items(): 181 | if not stats_group: 182 | continue 183 | 184 | consistent_sets = [] 185 | incons = [] 186 | 187 | for stats in stats_group: 188 | consistent_set = set(map( 189 | lambda pair: (pair[0], pair[1]), 190 | stats['consistent_pairs'] 191 | )) 192 | incons.append(total_pairs - len(consistent_set)) 193 | consistent_sets.append(consistent_set) 194 | total_consistent_sets.append(consistent_set) 195 | 196 | overall_incons = total_pairs - len(set.intersection(*consistent_sets)) 197 | median_incons = np.median(incons) 198 | avg_incons = np.mean(incons) 199 | 200 | print(f'{config_name}: {overall_incons = } {median_incons = :.1f} {avg_incons = :.1f}') 201 | 202 | print(f'{len(set.intersection(*total_consistent_sets)) = }') 203 | print(set.intersection(*total_consistent_sets)) 204 | 205 | 206 | stats = load_stats_files() 207 | if not stats: 208 | print('No valid stats files provided.') 209 | exit(1) 210 | stats_groups = group_stats_by_config(stats) 211 | total_pairs = stats[0]['iterations'][-1]['incons_count'] + len(stats[0]['consistent_pairs']) 212 | plot_metric( 213 | stats_groups, 214 | 'incons_count', 215 | 'inconsistent-pair-cdf.pdf', 216 | break_y_axis=1000, 217 | y_top=1210, 218 | ) 219 | calc_incons(stats_groups, total_pairs) 220 | -------------------------------------------------------------------------------- /tools/inconsistency-table.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | from os.path import dirname 4 | 5 | PARSER_COUNT = 50 6 | COL_WIDTH = '-2.5pt' 7 | 8 | with open(f'{dirname(__file__)}/../constructions/inconsistency-types.json') as f: 9 | data = json.load(f) 10 | 11 | s = f'\\begin{{tabular}}{{|*{{{PARSER_COUNT + 1}}}{{wc{{{COL_WIDTH}}}|}}}}\n\\hline\n' 12 | 13 | for i in range(PARSER_COUNT): 14 | s += f' & {i + 1}' 15 | 16 | s += r' \\ \hline' + '\n' 17 | 18 | total_types = 0 19 | total_pairs = 0 20 | 21 | for i in range(PARSER_COUNT): 22 | s += f'{i+1}' 23 | for j in range(PARSER_COUNT): 24 | x = len(data[i * PARSER_COUNT + j]['inconsistency_types']) 25 | total_types += x 26 | if x > 0: 27 | total_pairs += 1 28 | s += f' & \\cellcolor{{blue!{0 if x == 0 else x * 3 + 10}}}{"-" if i == j else x}' 29 | s += r' \\ \hline' + '\n' 30 | 31 | s += '\\end{tabular}' 32 | print(s) 33 | 34 | total_types /= 2 35 | total_pairs /= 2 36 | print(f'{total_types = }\n{total_pairs = }', file=sys.stderr) 37 | -------------------------------------------------------------------------------- /tools/parsers-to-table.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import requests 5 | import subprocess 6 | from typing import List, Dict 7 | 8 | with open(f'{os.path.dirname(__file__)}/../parsers/parsers.json') as f: 9 | parser_map = json.load(f) 10 | 11 | gh_token = os.environ.get('GITHUB_TOKEN') or subprocess.check_output(['gh', 'auth', 'token']).decode() 12 | queries = [] 13 | for key, parser in parser_map.items(): 14 | if 'github' not in parser: 15 | continue 16 | owner, name = parser['github'].split('/') 17 | queries.append(f'_{len(queries)}: repository(owner: "{owner}", name: "{name}") {{ stargazerCount nameWithOwner }}') 18 | query = f"""query {{ 19 | {'\n '.join(queries)} 20 | }}""" 21 | response = requests.post( 22 | 'https://api.github.com/graphql', 23 | headers={ "Authorization": f"token {gh_token.strip()}"}, 24 | json={ "query": query } 25 | ) 26 | if not response.ok: 27 | print(response.text) 28 | exit(1) 29 | star_map = {} 30 | for data in response.json()['data'].values(): 31 | star_map[data['nameWithOwner']] = data['stargazerCount'] 32 | 33 | parsers : List[Dict[str, str]] = sorted(parser_map.values(), key = lambda p : (p['type'], p['language'], p['name'].lower(), p['version'])) 34 | 35 | for i, parser in enumerate(parsers): 36 | name = parser["name"] 37 | std = parser.get("std", False) 38 | lang = parser["language"] 39 | if std: 40 | lang += '*' 41 | ver = parser['version'] 42 | repo = parser.get('github') 43 | link = parser.get('link') 44 | if repo: 45 | name = rf'\href{{https://github.com/{repo}}}{{{name}}}' 46 | star = star_map[repo] 47 | if star >= 1000: 48 | star = f'{star/1000:.1f}'.rstrip('0').rstrip('.') 49 | star += 'k' 50 | else: 51 | if link: 52 | name = rf'\href{{{link}}}{{{name}}}' 53 | else: 54 | print(f'no link for {name}', file=sys.stderr) 55 | star = '-' 56 | print(rf' {i+1} & {name} & {lang} & {ver} & {star} \\ \hline'.replace('#', r'\#')) 57 | -------------------------------------------------------------------------------- /tools/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | base="$(dirname "$(dirname "$(realpath "$0")")")" 6 | input_dir="${INPUT_DIR:-$base/evaluation/input}" 7 | output_dir="${OUTPUT_DIR:-$base/evaluation/output}" 8 | 9 | cd "$base"/parsers 10 | echo "services:" > docker-compose.yml 11 | 12 | for i in */; do 13 | cp unzip-all.sh parallel-unzip-all.sh testcase.sh "$i" 14 | parser=${i%/} 15 | echo " $parser: 16 | build: $parser 17 | volumes: 18 | - $input_dir:/input:ro 19 | - $output_dir/$parser:/output" >> docker-compose.yml 20 | done 21 | -------------------------------------------------------------------------------- /tools/run-parsers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | base="$(dirname "$(dirname "$(realpath "$0")")")" 6 | 7 | "$base"/tools/prepare.sh 8 | 9 | sudo rm -rf "$base"/evaluation/{input,output} 10 | mkdir -p "$base/evaluation/input" 11 | 12 | for i in $(seq 1 $#); do 13 | testcase="$(realpath "${!i}")" 14 | cp "$testcase" "$base/evaluation/input/$i.zip" 15 | done 16 | 17 | pushd "$base/parsers" 18 | sudo docker compose up 19 | popd 20 | 21 | for i in $(seq 1 $#); do 22 | testcase="$(realpath "${!i}")" 23 | result="$base/evaluation/results/${testcase#"$base/"}" 24 | sudo rm -rf "$result" 25 | mkdir -p "$result" 26 | for p in "$base/parsers/"*/; do 27 | parser="$(basename "$p")" 28 | sudo mv "$base/evaluation/output/$parser/$i.zip" "$result/$parser" & 29 | done 30 | done 31 | 32 | wait 33 | -------------------------------------------------------------------------------- /zip-diff/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /zip-diff/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zip-diff" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "Differential fuzzing for ZIP parsers." 6 | license = "Apache-2.0" 7 | default-run = "fuzz" 8 | 9 | [dependencies] 10 | anyhow = { version = "1.0.75", features = ["backtrace"] } 11 | binwrite = "0.2.1" 12 | bitflags = "2.4.1" 13 | blake3 = { version = "1.5.4", features = ["rayon", "mmap"] } 14 | byteorder = "1.5.0" 15 | bzip2 = "0.4.4" 16 | chrono = { version = "0.4.38", default-features = false, features = ["now"] } 17 | clap = { version = "4.5.36", features = ["derive"] } 18 | crc32fast = "1.3.2" 19 | downcast-rs = "1.2.0" 20 | dyn-clone = "1.0.16" 21 | educe = { version = "0.5.11", default-features = false, features = ["Debug", "Default"] } 22 | fixedbitset = "0.5.7" 23 | flate2 = "1.0.28" 24 | fs4 = "0.13.1" 25 | itertools = "0.13.0" 26 | lzma-rs = "0.3.0" 27 | num-traits = "0.2.19" 28 | rand = "0.8.5" 29 | rayon = "1.10.0" 30 | serde = { version = "1.0.210", features = ["derive"] } 31 | serde_json = "1.0.128" 32 | sysinfo = { version = "0.34.2", default-features = false, features = ["system"] } 33 | tar = "0.4.42" 34 | vec_box = "1.0.0" 35 | walkdir = "2.5.0" 36 | zip_structs = "0.2.1" 37 | zstd = { version = "0.13.2", features = ["zstdmt"] } 38 | 39 | [[bin]] 40 | name = "fuzz" 41 | path = "src/fuzz/main.rs" 42 | 43 | [[bin]] 44 | name = "construction" 45 | path = "src/construction/main.rs" 46 | 47 | [[bin]] 48 | name = "count" 49 | path = "src/count/main.rs" 50 | -------------------------------------------------------------------------------- /zip-diff/src/cdh.rs: -------------------------------------------------------------------------------- 1 | use crate::extra::{ExtraField, Zip64ExtendedInfo}; 2 | use crate::fields::*; 3 | use crate::lfh::LocalFileHeader; 4 | use crate::utils::{binwrite_transform, BinCount}; 5 | use crate::zip::FileEntry; 6 | use anyhow::{bail, Context, Result}; 7 | use binwrite::BinWrite; 8 | use educe::Educe; 9 | 10 | #[derive(BinWrite, Clone, Educe)] 11 | #[educe(Debug, Default)] 12 | pub struct CentralDirectoryHeader { 13 | #[educe(Default = Self::SIGNATURE)] 14 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 15 | pub signature: u32, 16 | #[educe(Default = 20)] 17 | pub version_made_by: u16, 18 | #[educe(Default = 20)] 19 | pub version_needed: u16, 20 | #[binwrite(with(binwrite_transform))] 21 | pub general_purpose_flag: GeneralPurposeFlag, 22 | #[binwrite(with(binwrite_transform))] 23 | pub compression_method: CompressionMethod, 24 | pub last_mod: DosDateTime, 25 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 26 | pub crc32: u32, 27 | pub compressed_size: u32, 28 | pub uncompressed_size: u32, 29 | pub file_name_length: u16, 30 | pub extra_field_length: u16, 31 | pub file_comment_length: u16, 32 | pub disk_number_start: u16, 33 | #[binwrite(with(binwrite_transform))] 34 | pub internal_file_attributes: InternalFileAttributes, 35 | pub external_file_attributes: u32, 36 | pub relative_header_offset: u32, 37 | #[educe(Debug(method(crate::utils::fmt_utf8)))] 38 | pub file_name: Vec, 39 | pub extra_fields: Vec, 40 | /// only one of `extra_fields` and `extra_fields_raw` can be set 41 | #[educe(Debug(method(crate::utils::fmt_hex)))] 42 | pub extra_fields_raw: Vec, 43 | pub file_comment: Vec, 44 | 45 | #[binwrite(ignore)] 46 | pub zip64: Zip64ExtendedInfo, 47 | } 48 | 49 | impl CentralDirectoryHeader { 50 | pub const SIGNATURE: u32 = 0x02014b50; 51 | 52 | /// Set CDH field and ZIP64 field according to size 53 | pub fn set_offset(&mut self, offset: usize, force_zip64: bool) { 54 | if !force_zip64 { 55 | if let Ok(offset) = offset.try_into() { 56 | self.relative_header_offset = offset; 57 | self.zip64.relative_header_offset = None; 58 | return; 59 | } 60 | } 61 | self.relative_header_offset = u32::MAX; 62 | self.zip64.relative_header_offset = Some(offset as u64); 63 | } 64 | 65 | /// Finalize extra fields, add ZIP64 field 66 | pub fn finalize(&mut self) -> Result<()> { 67 | if !self.zip64.is_empty() { 68 | self.extra_fields.push(ExtraField { 69 | header_id: 0, 70 | size: 0, 71 | data: Box::new(self.zip64.clone()), 72 | }); 73 | } 74 | 75 | if !self.extra_fields.is_empty() && !self.extra_fields_raw.is_empty() { 76 | bail!("extra_fields and extra_fields_raw cannot be set at the same time"); 77 | } 78 | 79 | if self.extra_fields.is_empty() { 80 | self.extra_field_length = self 81 | .extra_fields_raw 82 | .len() 83 | .try_into() 84 | .context("Extra fields too long")?; 85 | } else { 86 | for field in &mut self.extra_fields { 87 | field.finalize()?; 88 | } 89 | 90 | self.extra_field_length = self 91 | .extra_fields 92 | .byte_count() 93 | .context("Failed to count extra fields")? 94 | .try_into() 95 | .context("Extra fields too long")?; 96 | } 97 | 98 | Ok(()) 99 | } 100 | } 101 | 102 | impl From<&LocalFileHeader> for CentralDirectoryHeader { 103 | fn from(lfh: &LocalFileHeader) -> Self { 104 | Self { 105 | version_made_by: lfh.version_needed, 106 | version_needed: lfh.version_needed, 107 | general_purpose_flag: lfh.general_purpose_flag, 108 | compression_method: lfh.compression_method, 109 | last_mod: lfh.last_mod, 110 | crc32: lfh.crc32, 111 | compressed_size: lfh.compressed_size, 112 | uncompressed_size: lfh.uncompressed_size, 113 | file_name_length: lfh.file_name_length, 114 | extra_field_length: lfh.extra_field_length, 115 | file_name: lfh.file_name.clone(), 116 | extra_fields: lfh.extra_fields.clone(), 117 | extra_fields_raw: lfh.extra_fields_raw.clone(), 118 | zip64: lfh.zip64.clone(), 119 | ..Default::default() 120 | } 121 | } 122 | } 123 | 124 | impl From<&FileEntry> for CentralDirectoryHeader { 125 | fn from(fe: &FileEntry) -> Self { 126 | match &fe.dd { 127 | None => (&fe.lfh).into(), 128 | Some(dd) => Self { 129 | version_made_by: fe.lfh.version_needed, 130 | version_needed: fe.lfh.version_needed, 131 | general_purpose_flag: fe.lfh.general_purpose_flag, 132 | compression_method: fe.lfh.compression_method, 133 | last_mod: fe.lfh.last_mod, 134 | crc32: dd.crc32, 135 | compressed_size: dd.compressed_size.saturate(), 136 | uncompressed_size: dd.uncompressed_size.saturate(), 137 | file_name_length: fe.lfh.file_name_length, 138 | extra_field_length: fe.lfh.extra_field_length, 139 | file_name: fe.lfh.file_name.clone(), 140 | extra_fields: fe.lfh.extra_fields.clone(), 141 | extra_fields_raw: fe.lfh.extra_fields_raw.clone(), 142 | zip64: fe.lfh.zip64.clone(), 143 | ..Default::default() 144 | }, 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /zip-diff/src/compress.rs: -------------------------------------------------------------------------------- 1 | use crate::fields::CompressionMethod; 2 | use anyhow::{bail, Context, Result}; 3 | use bzip2::{bufread::BzDecoder, write::BzEncoder, Compression as BzCompression}; 4 | use flate2::{bufread::DeflateDecoder, write::DeflateEncoder, Compression as DeflateCompression}; 5 | use lzma_rs::{lzma_compress, lzma_decompress, xz_compress, xz_decompress}; 6 | use std::io::{Cursor, Read, Write}; 7 | 8 | pub fn compress(method: CompressionMethod, data: &[u8]) -> Result> { 9 | match method { 10 | CompressionMethod::STORED => Ok(Vec::from(data)), 11 | CompressionMethod::DEFLATED => { 12 | let mut encoder = DeflateEncoder::new(Vec::new(), DeflateCompression::default()); 13 | encoder.write_all(data).context("Failed to deflate")?; 14 | encoder.finish().context("Failed to deflate") 15 | } 16 | CompressionMethod::BZIP2 => { 17 | let mut encoder = BzEncoder::new(Vec::new(), BzCompression::default()); 18 | encoder.write_all(data).context("Failed to bzip2")?; 19 | encoder.finish().context("Failed to bzip2") 20 | } 21 | CompressionMethod::ZSTD => zstd::encode_all(data, 0).context("Failed to ZSTD compress"), 22 | CompressionMethod::LZMA => { 23 | let mut input = Cursor::new(data); 24 | let mut output = Vec::new(); 25 | lzma_compress(&mut input, &mut output).context("Failed to LZMA compress")?; 26 | Ok(output) 27 | } 28 | CompressionMethod::XZ => { 29 | let mut input = Cursor::new(data); 30 | let mut output = Vec::new(); 31 | xz_compress(&mut input, &mut output).context("Failed to XZ compress")?; 32 | Ok(output) 33 | } 34 | _ => bail!("Compression method {:?} not implemented", method), 35 | } 36 | } 37 | 38 | pub fn decompress(method: CompressionMethod, data: &[u8]) -> Result> { 39 | match method { 40 | CompressionMethod::STORED => Ok(Vec::from(data)), 41 | CompressionMethod::DEFLATED => { 42 | let mut decoder = DeflateDecoder::new(data); 43 | let mut buf = Vec::new(); 44 | decoder.read_to_end(&mut buf).context("Failed to inflate")?; 45 | Ok(buf) 46 | } 47 | CompressionMethod::BZIP2 => { 48 | let mut decoder = BzDecoder::new(data); 49 | let mut buf = Vec::new(); 50 | decoder.read_to_end(&mut buf).context("Failed to bunzip2")?; 51 | Ok(buf) 52 | } 53 | CompressionMethod::ZSTD => zstd::decode_all(data).context("Failed to ZSTD decompress"), 54 | CompressionMethod::LZMA => { 55 | let mut input = Cursor::new(data); 56 | let mut output = Vec::new(); 57 | lzma_decompress(&mut input, &mut output).context("Failed to LZMA decompress")?; 58 | Ok(output) 59 | } 60 | CompressionMethod::XZ => { 61 | let mut input = Cursor::new(data); 62 | let mut output = Vec::new(); 63 | xz_decompress(&mut input, &mut output).context("Failed to XZ decompress")?; 64 | Ok(output) 65 | } 66 | _ => bail!("Decompression method {:?} not implemented", method), 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /zip-diff/src/construction/a.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | 3 | pub mod a1; 4 | pub mod a2; 5 | pub mod a3; 6 | pub mod a4; 7 | pub mod a5; 8 | 9 | pub fn main() -> Result<()> { 10 | a1::main()?; 11 | a2::main()?; 12 | a3::main()?; 13 | a4::main()?; 14 | a5::main()?; 15 | Ok(()) 16 | } 17 | -------------------------------------------------------------------------------- /zip-diff/src/construction/a/a1.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase; 2 | use anyhow::Result; 3 | use zip_diff::fields::CompressionMethod; 4 | use zip_diff::zip::ZipArchive; 5 | 6 | const DATA: &[u8] = b"test"; 7 | 8 | fn stored_lfh() -> Result { 9 | let mut zip = ZipArchive::default(); 10 | zip.add_file("test", DATA, CompressionMethod::DEFLATED, false, false)?; 11 | zip.finalize()?; 12 | zip.files[0].lfh.compression_method = CompressionMethod::STORED; 13 | zip.files[0].lfh.compressed_size = DATA.len().try_into().unwrap(); 14 | Ok(zip) 15 | } 16 | 17 | fn stored_cdh() -> Result { 18 | let mut zip = ZipArchive::default(); 19 | zip.add_file("test", DATA, CompressionMethod::DEFLATED, false, false)?; 20 | zip.finalize()?; 21 | zip.cd[0].compression_method = CompressionMethod::STORED; 22 | zip.cd[0].compressed_size = DATA.len().try_into().unwrap(); 23 | Ok(zip) 24 | } 25 | 26 | pub fn main() -> Result<()> { 27 | testcase(stored_lfh)?; 28 | testcase(stored_cdh)?; 29 | Ok(()) 30 | } 31 | -------------------------------------------------------------------------------- /zip-diff/src/construction/a/a2.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, testcase_arg}; 2 | use anyhow::Result; 3 | use bitflags::bitflags; 4 | use zip_diff::dd::{DataDescriptor, U32or64}; 5 | use zip_diff::extra::Zip64ExtendedInfo; 6 | use zip_diff::fields::{CompressionMethod, GeneralPurposeFlag}; 7 | use zip_diff::utils::crc32_patch; 8 | use zip_diff::zip::ZipArchive; 9 | 10 | const DATA: &[u8] = b"test"; 11 | 12 | #[derive(Clone, Copy)] 13 | struct LfhCdh(u8); 14 | 15 | bitflags! { 16 | impl LfhCdh: u8 { 17 | const Deflated = 1 << 0; 18 | const LfhCompressed = 1 << 1; 19 | const LfhUncompressed = 1 << 2; 20 | const CdhCompressed = 1 << 3; 21 | const CdhUncompressed = 1 << 4; 22 | } 23 | } 24 | 25 | #[derive(Clone, Copy)] 26 | struct DataDescriptorFlags(u8); 27 | 28 | bitflags! { 29 | impl DataDescriptorFlags: u8 { 30 | const CompressedZero = 1 << 0; 31 | const UncompressedZero = 1 << 1; 32 | const Size64 = 1 << 2; 33 | } 34 | } 35 | 36 | #[derive(Clone, Copy)] 37 | struct Zip64Flags(u8); 38 | 39 | bitflags! { 40 | impl Zip64Flags: u8 { 41 | const CompressedSize = 1 << 0; 42 | const UncompressedSize = 1 << 1; 43 | } 44 | } 45 | 46 | struct Args { 47 | lfh_cdh_flags: LfhCdh, 48 | lfh_zip64: Zip64Flags, 49 | cdh_zip64: Zip64Flags, 50 | dd_flags: Option, 51 | } 52 | 53 | fn size_confusion(args: Args) -> Result { 54 | let mut zip = ZipArchive::default(); 55 | 56 | let mut data = Vec::from(DATA); 57 | let patch = crc32_patch(&data, 0); 58 | data.extend(patch.to_le_bytes()); 59 | 60 | let compression = if args.lfh_cdh_flags.contains(LfhCdh::Deflated) { 61 | CompressionMethod::DEFLATED 62 | } else { 63 | CompressionMethod::STORED 64 | }; 65 | 66 | zip.add_file("test", &data, compression, false, false)?; 67 | zip.finalize()?; 68 | 69 | if let Some(flags) = args.dd_flags { 70 | let lfh = &mut zip.files[0].lfh; 71 | let cdh = &mut zip.cd[0]; 72 | 73 | let compressed_size = if flags.contains(DataDescriptorFlags::CompressedZero) { 74 | 0 75 | } else { 76 | lfh.compressed_size 77 | }; 78 | 79 | let uncompressed_size = if flags.contains(DataDescriptorFlags::UncompressedZero) { 80 | 0 81 | } else { 82 | lfh.uncompressed_size 83 | }; 84 | 85 | let (compressed_size, uncompressed_size) = if flags.contains(DataDescriptorFlags::Size64) { 86 | ( 87 | U32or64::U64(compressed_size.into()), 88 | U32or64::U64(uncompressed_size.into()), 89 | ) 90 | } else { 91 | ( 92 | U32or64::U32(compressed_size), 93 | U32or64::U32(uncompressed_size), 94 | ) 95 | }; 96 | 97 | let dd = DataDescriptor { 98 | compressed_size, 99 | uncompressed_size, 100 | ..Default::default() 101 | }; 102 | 103 | lfh.general_purpose_flag 104 | .insert(GeneralPurposeFlag::DataDescriptor); 105 | cdh.general_purpose_flag 106 | .insert(GeneralPurposeFlag::DataDescriptor); 107 | zip.files[0].dd = Some(dd); 108 | } 109 | 110 | let lfh = &mut zip.files[0].lfh; 111 | let cdh = &mut zip.cd[0]; 112 | 113 | if args.lfh_cdh_flags.contains(LfhCdh::LfhCompressed) { 114 | lfh.compressed_size = 0; 115 | } 116 | if args.lfh_cdh_flags.contains(LfhCdh::LfhUncompressed) { 117 | lfh.uncompressed_size = 0; 118 | } 119 | if args.lfh_cdh_flags.contains(LfhCdh::CdhCompressed) { 120 | cdh.compressed_size = 0; 121 | } 122 | if args.lfh_cdh_flags.contains(LfhCdh::CdhUncompressed) { 123 | cdh.uncompressed_size = 0; 124 | } 125 | 126 | if !args.lfh_zip64.is_empty() { 127 | let compressed_size = if args.lfh_zip64.contains(Zip64Flags::CompressedSize) { 128 | let size = lfh.compressed_size; 129 | lfh.compressed_size = u32::MAX; 130 | Some(size.into()) 131 | } else { 132 | None 133 | }; 134 | let original_size = if args.lfh_zip64.contains(Zip64Flags::UncompressedSize) { 135 | let size = lfh.uncompressed_size; 136 | lfh.uncompressed_size = u32::MAX; 137 | Some(size.into()) 138 | } else { 139 | None 140 | }; 141 | let zip64 = Zip64ExtendedInfo { 142 | compressed_size, 143 | original_size, 144 | ..Default::default() 145 | }; 146 | lfh.extra_fields.push(zip64.into()); 147 | } 148 | 149 | if !args.cdh_zip64.is_empty() { 150 | let compressed_size = if args.cdh_zip64.contains(Zip64Flags::CompressedSize) { 151 | let size = cdh.compressed_size; 152 | cdh.compressed_size = u32::MAX; 153 | Some(size.into()) 154 | } else { 155 | None 156 | }; 157 | let original_size = if args.cdh_zip64.contains(Zip64Flags::UncompressedSize) { 158 | let size = cdh.uncompressed_size; 159 | cdh.uncompressed_size = u32::MAX; 160 | Some(size.into()) 161 | } else { 162 | None 163 | }; 164 | let zip64 = Zip64ExtendedInfo { 165 | compressed_size, 166 | original_size, 167 | ..Default::default() 168 | }; 169 | cdh.extra_fields.push(zip64.into()); 170 | } 171 | 172 | zip.set_offsets(0)?; 173 | 174 | Ok(zip) 175 | } 176 | 177 | fn multiple_zip64() -> Result { 178 | let mut zip = ZipArchive::default(); 179 | zip.add_file("test", DATA, CompressionMethod::STORED, true, false)?; 180 | zip.finalize()?; 181 | let zip64 = Zip64ExtendedInfo { 182 | original_size: Some(0), 183 | compressed_size: Some(0), 184 | relative_header_offset: None, 185 | disk_start_number: None, 186 | }; 187 | zip.files[0].lfh.extra_fields.push(zip64.clone().into()); 188 | zip.cd[0].extra_fields.push(zip64.into()); 189 | zip.set_offsets(0)?; 190 | Ok(zip) 191 | } 192 | 193 | pub fn main() -> Result<()> { 194 | for i in 0..32 { 195 | let lfh_cdh_flags = LfhCdh::from_bits_truncate(i); 196 | for i in 0..=8 { 197 | let dd_flags = if i == 8 { 198 | None 199 | } else { 200 | Some(DataDescriptorFlags::from_bits_truncate(i)) 201 | }; 202 | for i in 0..4 { 203 | let lfh_zip64 = Zip64Flags::from_bits_truncate(i); 204 | for i in 0..4 { 205 | let cdh_zip64 = Zip64Flags::from_bits_truncate(i); 206 | let args = Args { 207 | lfh_cdh_flags, 208 | dd_flags, 209 | lfh_zip64, 210 | cdh_zip64, 211 | }; 212 | testcase_arg(size_confusion, args)?; 213 | } 214 | } 215 | } 216 | } 217 | testcase(multiple_zip64)?; 218 | Ok(()) 219 | } 220 | -------------------------------------------------------------------------------- /zip-diff/src/construction/a/a3.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, CRC32A, CRC32B}; 2 | use anyhow::Result; 3 | use zip_diff::extra::InfoZipUnicodePath; 4 | use zip_diff::fields::GeneralPurposeFlag; 5 | use zip_diff::zip::ZipArchive; 6 | 7 | const DATA: &[u8] = b"test"; 8 | 9 | fn lfh_cdh() -> Result { 10 | let mut zip = ZipArchive::default(); 11 | zip.add_simple("lfh", DATA)?; 12 | zip.finalize()?; 13 | zip.cd[0].file_name = "cdh".into(); 14 | Ok(zip) 15 | } 16 | 17 | fn up_lfh_cdh() -> Result { 18 | let mut zip = ZipArchive::default(); 19 | zip.add_simple("original", DATA)?; 20 | let lfh = &mut zip.files[0].lfh; 21 | let name_crc32 = crc32fast::hash(&lfh.file_name); 22 | let up = InfoZipUnicodePath { 23 | version: 1, 24 | name_crc32, 25 | unicode_name: "lfh".into(), 26 | }; 27 | lfh.extra_fields.push(up.into()); 28 | zip.finalize()?; 29 | let cd_up: &mut InfoZipUnicodePath = zip.cd[0].extra_fields[0].data.downcast_mut().unwrap(); 30 | cd_up.unicode_name = "cdh".into(); 31 | Ok(zip) 32 | } 33 | 34 | fn up_version() -> Result { 35 | let mut zip = ZipArchive::default(); 36 | 37 | zip.add_simple("v0-original", DATA)?; 38 | let lfh = &mut zip.files[0].lfh; 39 | let name_crc32 = crc32fast::hash(&lfh.file_name); 40 | let up = InfoZipUnicodePath { 41 | version: 0, 42 | name_crc32, 43 | unicode_name: "v0-up".into(), 44 | }; 45 | lfh.extra_fields.push(up.into()); 46 | 47 | zip.add_simple("v2-original", DATA)?; 48 | let lfh = &mut zip.files[1].lfh; 49 | let name_crc32 = crc32fast::hash(&lfh.file_name); 50 | let up = InfoZipUnicodePath { 51 | version: 2, 52 | name_crc32, 53 | unicode_name: "v2-up".into(), 54 | }; 55 | lfh.extra_fields.push(up.into()); 56 | 57 | zip.finalize()?; 58 | Ok(zip) 59 | } 60 | 61 | fn up_incorrect_crc32() -> Result { 62 | let mut zip = ZipArchive::default(); 63 | 64 | zip.add_simple("original", DATA)?; 65 | let lfh = &mut zip.files[0].lfh; 66 | let up = InfoZipUnicodePath { 67 | version: 1, 68 | name_crc32: 0, 69 | unicode_name: "up".into(), 70 | }; 71 | lfh.extra_fields.push(up.into()); 72 | 73 | zip.finalize()?; 74 | Ok(zip) 75 | } 76 | 77 | fn up_crc32_source() -> Result { 78 | let mut zip = ZipArchive::default(); 79 | 80 | zip.add_simple("1-original", DATA)?; 81 | let lfh = &mut zip.files[0].lfh; 82 | let name_crc32 = crc32fast::hash(&lfh.file_name); 83 | let up1 = InfoZipUnicodePath { 84 | version: 1, 85 | name_crc32, 86 | unicode_name: "1-up1".into(), 87 | }; 88 | let up2 = InfoZipUnicodePath { 89 | version: 1, 90 | name_crc32, 91 | unicode_name: "1-up2".into(), 92 | }; 93 | lfh.extra_fields.push(up1.into()); 94 | lfh.extra_fields.push(up2.into()); 95 | 96 | zip.add_simple("2-original", DATA)?; 97 | let lfh = &mut zip.files[1].lfh; 98 | let name_crc32 = crc32fast::hash(&lfh.file_name); 99 | let up1 = InfoZipUnicodePath { 100 | version: 1, 101 | name_crc32, 102 | unicode_name: "2-up1".into(), 103 | }; 104 | let name_crc32 = crc32fast::hash(up1.unicode_name.as_bytes()); 105 | let up2 = InfoZipUnicodePath { 106 | version: 1, 107 | name_crc32, 108 | unicode_name: "2-up2".into(), 109 | }; 110 | lfh.extra_fields.push(up1.into()); 111 | lfh.extra_fields.push(up2.into()); 112 | 113 | zip.finalize()?; 114 | Ok(zip) 115 | } 116 | 117 | fn up_invalid() -> Result { 118 | let mut zip = ZipArchive::default(); 119 | 120 | zip.add_simple("original", DATA)?; 121 | let lfh = &mut zip.files[0].lfh; 122 | let name_crc32 = crc32fast::hash(&lfh.file_name); 123 | let up1 = InfoZipUnicodePath { 124 | version: 1, 125 | name_crc32, 126 | unicode_name: "up-valid".into(), 127 | }; 128 | // invalid for both version and CRC32 129 | let up2 = InfoZipUnicodePath { 130 | version: 2, 131 | name_crc32: 0, 132 | unicode_name: "up-invalid".into(), 133 | }; 134 | lfh.extra_fields.push(up1.into()); 135 | lfh.extra_fields.push(up2.into()); 136 | 137 | // Same CRC32 to make sure CRC32 check in up3 does not fail regardless of the filename source 138 | zip.add_simple(&format!("{CRC32A}{CRC32A}"), DATA)?; 139 | let lfh = &mut zip.files[1].lfh; 140 | let name_crc32 = crc32fast::hash(&lfh.file_name); 141 | let up1 = InfoZipUnicodePath { 142 | version: 1, 143 | name_crc32, 144 | unicode_name: format!("{CRC32A}{CRC32B}"), 145 | }; 146 | let up2 = InfoZipUnicodePath { 147 | version: 2, 148 | name_crc32: 0, 149 | unicode_name: format!("{CRC32B}{CRC32A}"), 150 | }; 151 | let up3 = InfoZipUnicodePath { 152 | version: 1, 153 | name_crc32, 154 | unicode_name: format!("{CRC32B}{CRC32B}"), 155 | }; 156 | lfh.extra_fields.push(up1.into()); 157 | lfh.extra_fields.push(up2.into()); 158 | lfh.extra_fields.push(up3.into()); 159 | 160 | zip.finalize()?; 161 | Ok(zip) 162 | } 163 | 164 | fn up_efs() -> Result { 165 | let mut zip = ZipArchive::default(); 166 | 167 | zip.add_simple("original", DATA)?; 168 | let lfh = &mut zip.files[0].lfh; 169 | lfh.general_purpose_flag 170 | .insert(GeneralPurposeFlag::LanguageEncoding); 171 | let name_crc32 = crc32fast::hash(&lfh.file_name); 172 | let up = InfoZipUnicodePath { 173 | version: 1, 174 | name_crc32, 175 | unicode_name: "up".into(), 176 | }; 177 | lfh.extra_fields.push(up.into()); 178 | 179 | zip.finalize()?; 180 | Ok(zip) 181 | } 182 | 183 | pub fn main() -> Result<()> { 184 | testcase(lfh_cdh)?; 185 | testcase(up_lfh_cdh)?; 186 | testcase(up_version)?; 187 | testcase(up_incorrect_crc32)?; 188 | testcase(up_crc32_source)?; 189 | testcase(up_invalid)?; 190 | testcase(up_efs)?; 191 | Ok(()) 192 | } 193 | -------------------------------------------------------------------------------- /zip-diff/src/construction/a/a4.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, testcase_arg}; 2 | use anyhow::Result; 3 | use zip_diff::zip::ZipArchive; 4 | 5 | const DOS_ATTR: u32 = 0x10; 6 | const UNIX_ATTR: u32 = 0x4000 << 16; 7 | const DOS_VER: u16 = 0; 8 | const UNIX_VER: u16 = 3 << 8; 9 | const OSX_VER: u16 = 19 << 8; 10 | 11 | fn slash() -> Result { 12 | let mut zip = ZipArchive::default(); 13 | zip.add_simple("test/", b"test")?; 14 | zip.finalize()?; 15 | Ok(zip) 16 | } 17 | 18 | fn backslash() -> Result { 19 | let mut zip = ZipArchive::default(); 20 | zip.add_simple("test\\", b"test")?; 21 | zip.finalize()?; 22 | Ok(zip) 23 | } 24 | 25 | fn slash_empty() -> Result { 26 | let mut zip = ZipArchive::default(); 27 | zip.add_simple("test/", b"")?; 28 | zip.finalize()?; 29 | Ok(zip) 30 | } 31 | 32 | fn external_attr(arg: u8) -> Result { 33 | let mut zip = ZipArchive::default(); 34 | zip.add_simple("test", b"test")?; 35 | zip.finalize()?; 36 | zip.cd[0].external_file_attributes |= if arg / 3 == 0 { DOS_ATTR } else { UNIX_ATTR }; 37 | zip.cd[0].version_made_by |= match arg % 3 { 38 | 0 => DOS_VER, 39 | 1 => UNIX_VER, 40 | 2 => OSX_VER, 41 | _ => unreachable!(), 42 | }; 43 | Ok(zip) 44 | } 45 | 46 | pub fn main() -> Result<()> { 47 | testcase(slash)?; 48 | testcase(backslash)?; 49 | testcase(slash_empty)?; 50 | (0..6).try_for_each(|arg| testcase_arg(external_attr, arg))?; 51 | Ok(()) 52 | } 53 | -------------------------------------------------------------------------------- /zip-diff/src/construction/a/a5.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase; 2 | use anyhow::Result; 3 | use zip_diff::fields::GeneralPurposeFlag; 4 | use zip_diff::zip::ZipArchive; 5 | 6 | fn lfh_enc() -> Result { 7 | let mut zip = ZipArchive::default(); 8 | 9 | zip.add_simple("test", b"test")?; 10 | zip.finalize()?; 11 | zip.files[0] 12 | .lfh 13 | .general_purpose_flag 14 | .insert(GeneralPurposeFlag::Encrypted); 15 | 16 | Ok(zip) 17 | } 18 | 19 | fn cdh_enc() -> Result { 20 | let mut zip = ZipArchive::default(); 21 | 22 | zip.add_simple("test", b"test")?; 23 | zip.finalize()?; 24 | zip.cd[0] 25 | .general_purpose_flag 26 | .insert(GeneralPurposeFlag::Encrypted); 27 | 28 | Ok(zip) 29 | } 30 | 31 | fn first_enc() -> Result { 32 | let mut zip = ZipArchive::default(); 33 | 34 | zip.add_simple("first", b"first")?; 35 | zip.add_simple("second", b"second")?; 36 | zip.files[0] 37 | .lfh 38 | .general_purpose_flag 39 | .insert(GeneralPurposeFlag::Encrypted); 40 | zip.finalize()?; 41 | 42 | Ok(zip) 43 | } 44 | 45 | pub fn main() -> Result<()> { 46 | testcase(lfh_enc)?; 47 | testcase(cdh_enc)?; 48 | testcase(first_enc)?; 49 | Ok(()) 50 | } 51 | -------------------------------------------------------------------------------- /zip-diff/src/construction/b.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | 3 | pub mod b1; 4 | pub mod b2; 5 | pub mod b3; 6 | pub mod b4; 7 | 8 | pub fn main() -> Result<()> { 9 | b1::main()?; 10 | b2::main()?; 11 | b3::main()?; 12 | b4::main()?; 13 | Ok(()) 14 | } 15 | -------------------------------------------------------------------------------- /zip-diff/src/construction/b/b1.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase; 2 | use anyhow::Result; 3 | use zip_diff::zip::ZipArchive; 4 | 5 | fn duplicate() -> Result { 6 | let mut zip = ZipArchive::default(); 7 | zip.add_simple("test", b"a")?; 8 | zip.add_simple("test", b"b")?; 9 | zip.finalize()?; 10 | Ok(zip) 11 | } 12 | 13 | pub fn main() -> Result<()> { 14 | testcase(duplicate)?; 15 | Ok(()) 16 | } 17 | -------------------------------------------------------------------------------- /zip-diff/src/construction/b/b2.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase_arg; 2 | use anyhow::Result; 3 | use zip_diff::zip::ZipArchive; 4 | 5 | enum Host { 6 | Dos, 7 | Unix, 8 | Both, 9 | } 10 | 11 | const UNIX_VER: u16 = 3 << 8; 12 | 13 | fn special_byte((byte, host): (u8, Host)) -> Result { 14 | let mut zip = ZipArchive::default(); 15 | zip.add_simple("a b", b"")?; 16 | zip.files[0].lfh.file_name[1] = byte; 17 | if matches!(host, Host::Both) { 18 | zip.files.push(zip.files[0].clone()); 19 | } 20 | zip.finalize()?; 21 | if matches!(host, Host::Unix | Host::Both) { 22 | zip.cd[0].version_made_by |= UNIX_VER; 23 | } 24 | Ok(zip) 25 | } 26 | 27 | fn two_special_bytes((a, b): (u8, u8)) -> Result { 28 | let mut zip = ZipArchive::default(); 29 | zip.add_simple("a b", b"")?; 30 | zip.add_simple("a b", b"")?; 31 | zip.files[0].lfh.file_name[1] = a; 32 | zip.files[1].lfh.file_name[1] = b; 33 | zip.finalize()?; 34 | Ok(zip) 35 | } 36 | 37 | pub fn main() -> Result<()> { 38 | for byte in 0..=u8::MAX { 39 | if byte.is_ascii_alphanumeric() { 40 | continue; 41 | } 42 | for host in [Host::Dos, Host::Unix, Host::Both] { 43 | testcase_arg(special_byte, (byte, host))?; 44 | } 45 | } 46 | for a in (0..=u8::MAX) 47 | .step_by(8) 48 | .filter(|&x| !x.is_ascii_alphanumeric()) 49 | { 50 | for b in (7..=u8::MAX) 51 | .step_by(8) 52 | .filter(|&x| !x.is_ascii_alphanumeric()) 53 | { 54 | testcase_arg(two_special_bytes, (a, b))?; 55 | } 56 | } 57 | Ok(()) 58 | } 59 | -------------------------------------------------------------------------------- /zip-diff/src/construction/b/b3.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase_arg; 2 | use anyhow::Result; 3 | use zip_diff::zip::ZipArchive; 4 | 5 | fn canonical_first(path: &str) -> Result { 6 | let mut zip = ZipArchive::default(); 7 | zip.add_simple("a/b", b"a")?; 8 | zip.add_simple(path, b"b")?; 9 | zip.finalize()?; 10 | Ok(zip) 11 | } 12 | 13 | fn canonical_second(path: &str) -> Result { 14 | let mut zip = ZipArchive::default(); 15 | zip.add_simple(path, b"a")?; 16 | zip.add_simple("a/b", b"b")?; 17 | zip.finalize()?; 18 | Ok(zip) 19 | } 20 | 21 | pub fn main() -> Result<()> { 22 | [ 23 | "/a/b", 24 | "a//b", 25 | "a\\b", 26 | "./a/b", 27 | "a/./b", 28 | "a/b/.", 29 | "../a/b", 30 | ".../a/b", 31 | "a/.../b", 32 | "c/../a/b", 33 | ] 34 | .into_iter() 35 | .try_for_each(|path| { 36 | testcase_arg(canonical_first, path)?; 37 | testcase_arg(canonical_second, path) 38 | })?; 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /zip-diff/src/construction/b/b4.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase; 2 | use anyhow::Result; 3 | use zip_diff::zip::ZipArchive; 4 | 5 | fn casing() -> Result { 6 | let mut zip = ZipArchive::default(); 7 | zip.add_simple("test.txt", b"a")?; 8 | zip.add_simple("test.TXT", b"b")?; 9 | zip.finalize()?; 10 | Ok(zip) 11 | } 12 | 13 | pub fn main() -> Result<()> { 14 | testcase(casing)?; 15 | Ok(()) 16 | } 17 | -------------------------------------------------------------------------------- /zip-diff/src/construction/c.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | 3 | pub mod c1; 4 | pub mod c2; 5 | pub mod c3; 6 | pub mod c4; 7 | pub mod c5; 8 | 9 | pub fn main() -> Result<()> { 10 | c1::main()?; 11 | c2::main()?; 12 | c3::main()?; 13 | c4::main()?; 14 | c5::main()?; 15 | Ok(()) 16 | } 17 | -------------------------------------------------------------------------------- /zip-diff/src/construction/c/c1.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, testcase_arg}; 2 | use anyhow::Result; 3 | use binwrite::BinWrite; 4 | use zip_diff::dd::{DataDescriptor, U32or64}; 5 | use zip_diff::eocd::EndOfCentralDirectoryRecord; 6 | use zip_diff::fields::CompressionMethod; 7 | use zip_diff::lfh::LocalFileHeader; 8 | use zip_diff::utils::{crc32_patch, BinCount}; 9 | use zip_diff::zip::{FileEntry, ZipArchive}; 10 | 11 | fn no_cdh_for_lfh() -> Result { 12 | let mut zip = ZipArchive::default(); 13 | 14 | zip.add_simple("a", b"a")?; 15 | zip.add_simple("b", b"b")?; 16 | zip.finalize()?; 17 | 18 | let cdh = zip.cd.pop().unwrap(); 19 | zip.eocdr.this_disk_cdh_count -= 1; 20 | zip.eocdr.total_cdh_count -= 1; 21 | zip.eocdr.size_of_cd -= cdh.byte_count()? as u32; 22 | 23 | Ok(zip) 24 | } 25 | 26 | fn truncating_lfh_stream_via_fake_records() -> Result { 27 | let mut zip = ZipArchive::default(); 28 | 29 | zip.add_simple("a", b"a")?; 30 | zip.add_simple("b", b"b")?; 31 | zip.add_simple("c", b"c")?; 32 | zip.finalize()?; 33 | 34 | let eocdr = EndOfCentralDirectoryRecord { 35 | this_disk_cdh_count: 1, 36 | total_cdh_count: 1, 37 | size_of_cd: zip.cd[0].byte_count()?.try_into()?, 38 | offset_of_cd_wrt_starting_disk: zip.files[0].byte_count()?.try_into()?, 39 | ..Default::default() 40 | }; 41 | 42 | zip.cd[0].write(&mut zip.files[0].data)?; 43 | eocdr.write(&mut zip.files[1].data)?; 44 | zip.finalize()?; 45 | 46 | Ok(zip) 47 | } 48 | 49 | fn truncating_lfh_stream_via_lfh_inside_comments() -> Result { 50 | let mut zip = ZipArchive::default(); 51 | 52 | zip.add_simple("a", b"a")?; 53 | zip.add_simple("b", b"b")?; 54 | zip.add_simple("c", b"b")?; 55 | zip.finalize()?; 56 | 57 | let entry2 = zip.files.pop().unwrap(); 58 | let entry1 = zip.files.pop().unwrap(); 59 | 60 | let mut offset = zip.files.byte_count()?; 61 | zip.eocdr.offset_of_cd_wrt_starting_disk = offset.try_into()?; 62 | 63 | offset += zip.cd[0..1].byte_count()?; 64 | let cdh = &mut zip.cd[1]; 65 | entry1.write(&mut cdh.file_comment)?; 66 | cdh.file_comment_length = cdh.file_comment.len().try_into()?; 67 | cdh.relative_header_offset = offset.try_into()?; 68 | 69 | let cdh = &mut zip.cd[2]; 70 | offset += cdh.file_comment.len() + cdh.byte_count()? + zip.eocdr.byte_count()?; 71 | entry2.write(&mut zip.eocdr.zip_file_comment)?; 72 | zip.eocdr.zip_file_comment_length = zip.eocdr.zip_file_comment.len().try_into()?; 73 | cdh.relative_header_offset = offset.try_into()?; 74 | 75 | Ok(zip) 76 | } 77 | 78 | fn lfh_desync(overlap: bool) -> Result { 79 | let mut zip = ZipArchive::default(); 80 | 81 | let mut buf = Vec::new(); 82 | let entry = FileEntry::new("a", b"a", CompressionMethod::STORED, false, false)?; 83 | entry.write(&mut buf)?; 84 | 85 | zip.add_simple("junk", &buf)?; 86 | zip.add_simple("b", b"b")?; 87 | zip.finalize()?; 88 | 89 | let mut offset = LocalFileHeader { 90 | file_name: "junk".into(), 91 | ..Default::default() 92 | } 93 | .byte_count()?; 94 | let mut cd = Vec::new(); 95 | entry.push_into_cd(&mut cd, &mut offset)?; 96 | 97 | if overlap { 98 | let mut offset = 0; 99 | zip.files[0].push_into_cd(&mut cd, &mut offset)?; 100 | zip.files[1].push_into_cd(&mut cd, &mut offset)?; 101 | } 102 | 103 | let eocdr = EndOfCentralDirectoryRecord { 104 | this_disk_cdh_count: cd.len().try_into()?, 105 | total_cdh_count: cd.len().try_into()?, 106 | size_of_cd: cd.byte_count()?.try_into()?, 107 | offset_of_cd_wrt_starting_disk: zip.byte_count()?.try_into()?, 108 | ..Default::default() 109 | }; 110 | 111 | cd.write(&mut zip.eocdr.zip_file_comment)?; 112 | eocdr.write(&mut zip.eocdr.zip_file_comment)?; 113 | zip.eocdr.zip_file_comment_length = zip.eocdr.zip_file_comment.len().try_into()?; 114 | 115 | Ok(zip) 116 | } 117 | 118 | fn dd_pos(deflated: bool) -> Result { 119 | let file_a = FileEntry::new("a", b"a", CompressionMethod::STORED, false, false)?; 120 | let file_b = FileEntry::new("b", b"b", CompressionMethod::STORED, false, false)?; 121 | 122 | let junk1b = FileEntry::new( 123 | "junk1", 124 | b"", 125 | if deflated { 126 | CompressionMethod::DEFLATED 127 | } else { 128 | CompressionMethod::STORED 129 | }, 130 | false, 131 | true, 132 | )?; 133 | 134 | let junk1a_bare = junk1b.clone(); 135 | let junk2_bare = FileEntry::new("junk2", b"", CompressionMethod::STORED, false, false)?; 136 | let junk3_bare = FileEntry::new("junk3", b"", CompressionMethod::STORED, false, false)?; 137 | 138 | let junk2_len = 139 | junk1a_bare.dd.unwrap().byte_count()? + file_b.byte_count()? + junk3_bare.byte_count()? + 4; 140 | let junk2_lfh = LocalFileHeader { 141 | compressed_size: junk2_len as u32, 142 | uncompressed_size: junk2_len as u32, 143 | ..junk2_bare.lfh.clone() 144 | }; 145 | 146 | let mut junk1a_data = junk1a_bare.data; 147 | junk1b.dd.as_ref().unwrap().write(&mut junk1a_data)?; 148 | file_b.write(&mut junk1a_data)?; 149 | junk2_lfh.write(&mut junk1a_data)?; 150 | 151 | let junk1a_dd = DataDescriptor { 152 | signature: Some(DataDescriptor::SIGNATURE), 153 | crc32: crc32fast::hash(&junk1a_data), 154 | compressed_size: U32or64::U32(junk1a_data.len() as u32), 155 | uncompressed_size: U32or64::U32(junk1a_data.len() as u32), 156 | }; 157 | 158 | let mut zip_b_tmp = ZipArchive::default(); 159 | zip_b_tmp.files.push(junk1b.clone()); 160 | zip_b_tmp.files.push(file_b); 161 | zip_b_tmp.files.push(junk2_bare); 162 | zip_b_tmp.finalize()?; 163 | let junk3_len = 4 + zip_b_tmp.cd.byte_count()? + zip_b_tmp.eocdr.byte_count()? + 4; 164 | let junk3_lfh = LocalFileHeader { 165 | compressed_size: junk3_len as u32, 166 | uncompressed_size: junk3_len as u32, 167 | ..junk3_bare.lfh.clone() 168 | }; 169 | 170 | let mut junk2_data = Vec::new(); 171 | junk1a_dd.write(&mut junk2_data)?; 172 | file_a.write(&mut junk2_data)?; 173 | junk3_lfh.write(&mut junk2_data)?; 174 | let junk2_patch = crc32_patch(&junk2_data, junk2_lfh.crc32); 175 | junk2_patch.write(&mut junk2_data)?; 176 | 177 | let junk1a = FileEntry { 178 | lfh: junk1a_bare.lfh, 179 | data: junk1a_data, 180 | dd: Some(junk1a_dd), 181 | }; 182 | 183 | let junk2 = FileEntry { 184 | lfh: junk2_lfh, 185 | data: junk2_data, 186 | dd: None, 187 | }; 188 | 189 | let mut zip_a_tmp = ZipArchive::default(); 190 | zip_a_tmp.files.push(junk1a.clone()); 191 | zip_a_tmp.files.push(file_a); 192 | zip_a_tmp.files.push(junk3_bare); 193 | zip_a_tmp.finalize()?; 194 | 195 | let mut zip_b = zip_b_tmp; 196 | *zip_b.files.last_mut().unwrap() = junk2; 197 | zip_b.finalize()?; 198 | zip_b.eocdr.zip_file_comment_length = 199 | (4 + zip_a_tmp.cd.byte_count()? + zip_a_tmp.eocdr.byte_count()?) as u16; 200 | 201 | let mut junk3_data = Vec::new(); 202 | junk2_patch.write(&mut junk3_data)?; 203 | zip_b.cd.write(&mut junk3_data)?; 204 | zip_b.eocdr.write(&mut junk3_data)?; 205 | let junk3_patch = crc32_patch(&junk3_data, junk3_lfh.crc32); 206 | junk3_patch.write(&mut junk3_data)?; 207 | 208 | let junk3 = FileEntry { 209 | lfh: junk3_lfh, 210 | data: junk3_data, 211 | dd: None, 212 | }; 213 | 214 | let mut zip_a = zip_a_tmp; 215 | *zip_a.files.last_mut().unwrap() = junk3; 216 | zip_a.finalize()?; 217 | zip_a.cd[0].compression_method = CompressionMethod::STORED; 218 | 219 | Ok(zip_a) 220 | } 221 | 222 | pub fn main() -> Result<()> { 223 | testcase(no_cdh_for_lfh)?; 224 | testcase(truncating_lfh_stream_via_fake_records)?; 225 | testcase(truncating_lfh_stream_via_lfh_inside_comments)?; 226 | [false, true] 227 | .iter() 228 | .try_for_each(|overlap| testcase_arg(lfh_desync, *overlap))?; 229 | [false, true] 230 | .iter() 231 | .try_for_each(|deflate| testcase_arg(dd_pos, *deflate))?; 232 | Ok(()) 233 | } 234 | -------------------------------------------------------------------------------- /zip-diff/src/construction/c/c2.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::testcase_arg; 2 | use anyhow::Result; 3 | use zip_diff::utils::BinCount; 4 | use zip_diff::zip::ZipArchive; 5 | 6 | enum Arg { 7 | LongCommentLength, 8 | ShortCommentLength, 9 | LfhCdhMismatch, 10 | } 11 | 12 | fn eocdr_selection(arg: Arg) -> Result<[ZipArchive; 3]> { 13 | let mut zip1 = ZipArchive::default(); 14 | zip1.add_simple("a", b"a")?; 15 | zip1.finalize()?; 16 | zip1.eocdr.zip_file_comment.push(b'\0'); 17 | 18 | let zip_size = zip1.byte_count()?; 19 | zip1.eocdr.zip_file_comment_length = (zip_size * 2 + 1).try_into()?; 20 | 21 | let mut zip2 = ZipArchive::default(); 22 | zip2.add_simple("b", b"b")?; 23 | zip2.finalize()?; 24 | zip2.set_offsets(zip_size)?; 25 | zip2.eocdr.zip_file_comment.push(b'\0'); 26 | zip2.eocdr.zip_file_comment_length = (zip_size + 1).try_into()?; 27 | 28 | let mut zip3 = ZipArchive::default(); 29 | zip3.add_simple("c", b"c")?; 30 | zip3.finalize()?; 31 | zip3.set_offsets(zip_size * 2)?; 32 | zip3.eocdr.zip_file_comment.push(b'\0'); 33 | zip3.eocdr.zip_file_comment_length = 1; 34 | 35 | match arg { 36 | Arg::LongCommentLength => { 37 | zip1.eocdr.zip_file_comment_length += 1; 38 | zip3.eocdr.zip_file_comment_length += 1; 39 | } 40 | Arg::ShortCommentLength => { 41 | zip1.eocdr.zip_file_comment_length -= 1; 42 | zip3.eocdr.zip_file_comment_length -= 1; 43 | } 44 | Arg::LfhCdhMismatch => { 45 | zip1.cd[0].version_needed = 10; 46 | zip3.cd[0].version_needed = 10; 47 | } 48 | } 49 | 50 | Ok([zip1, zip2, zip3]) 51 | } 52 | 53 | pub fn main() -> Result<()> { 54 | testcase_arg(eocdr_selection, Arg::LongCommentLength)?; 55 | testcase_arg(eocdr_selection, Arg::ShortCommentLength)?; 56 | testcase_arg(eocdr_selection, Arg::LfhCdhMismatch)?; 57 | Ok(()) 58 | } 59 | -------------------------------------------------------------------------------- /zip-diff/src/construction/c/c3.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, testcase_arg}; 2 | use anyhow::Result; 3 | use bitflags::bitflags; 4 | use zip_diff::utils::BinCount; 5 | use zip_diff::zip::ZipArchive; 6 | 7 | struct CdhCountFlags(u8); 8 | 9 | bitflags! { 10 | impl CdhCountFlags: u8 { 11 | const ThisDiskCount = 1 << 0; 12 | const TotalCount = 1 << 1; 13 | const CdSize = 1 << 2; 14 | } 15 | } 16 | 17 | fn cdh_count(flags: CdhCountFlags) -> Result { 18 | let mut zip = ZipArchive::default(); 19 | 20 | zip.add_simple("a", b"a")?; 21 | zip.add_simple("b", b"b")?; 22 | zip.finalize()?; 23 | 24 | if flags.contains(CdhCountFlags::ThisDiskCount) { 25 | zip.eocdr.this_disk_cdh_count -= 1; 26 | } 27 | 28 | if flags.contains(CdhCountFlags::TotalCount) { 29 | zip.eocdr.total_cdh_count -= 1; 30 | } 31 | 32 | if flags.contains(CdhCountFlags::CdSize) { 33 | zip.eocdr.size_of_cd = zip.cd[0].byte_count()?.try_into()?; 34 | } 35 | 36 | Ok(zip) 37 | } 38 | 39 | fn modulo_65536() -> Result { 40 | let mut zip = ZipArchive::default(); 41 | 42 | for i in 1u32..=65537 { 43 | zip.add_simple(&format!("{:x}/{:x}", i / 256, i % 256), b"")?; 44 | } 45 | 46 | zip.finalize()?; 47 | 48 | let zip64_eocdr = zip.zip64_eocdr.as_mut().unwrap(); 49 | zip64_eocdr.this_disk_cdh_count -= 65536; 50 | zip64_eocdr.total_cdh_count -= 65536; 51 | zip.eocdr = (&*zip64_eocdr).try_into()?; 52 | zip.zip64_eocdr = None; 53 | zip.zip64_eocdl = None; 54 | 55 | Ok(zip) 56 | } 57 | 58 | pub fn main() -> Result<()> { 59 | (1..8).try_for_each(|flags| testcase_arg(cdh_count, CdhCountFlags::from_bits_truncate(flags)))?; 60 | testcase(modulo_65536)?; 61 | Ok(()) 62 | } 63 | -------------------------------------------------------------------------------- /zip-diff/src/construction/c/c4.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, EntryGroup, CRC32A, CRC32B}; 2 | use anyhow::Result; 3 | use binwrite::BinWrite; 4 | use zip_diff::cdh::CentralDirectoryHeader; 5 | use zip_diff::eocd::EndOfCentralDirectoryRecord; 6 | use zip_diff::lfh::LocalFileHeader; 7 | use zip_diff::utils::BinCount; 8 | use zip_diff::zip::ZipArchive; 9 | 10 | #[derive(BinWrite)] 11 | pub struct CdOffsetZip { 12 | pub groups: Vec, 13 | pub eocdr: EndOfCentralDirectoryRecord, 14 | } 15 | 16 | pub fn cd_offset() -> Result { 17 | let mut zip = ZipArchive::default(); 18 | 19 | zip.add_simple("stream", b"a")?; 20 | zip.add_simple("eocdr", b"a")?; 21 | 22 | const FILENAME: &str = "adjac"; 23 | let cd_size = CentralDirectoryHeader::from(&zip.files[1].lfh).byte_count()?; 24 | let lfh_size = LocalFileHeader { 25 | file_name: FILENAME.into(), 26 | ..Default::default() 27 | } 28 | .byte_count()?; 29 | 30 | let content_width = cd_size - lfh_size; 31 | zip.add_simple(FILENAME, format!("{CRC32A:A<0$}", content_width).as_bytes())?; 32 | zip.add_simple(FILENAME, format!("{CRC32B:A<0$}", content_width).as_bytes())?; 33 | 34 | zip.finalize()?; 35 | 36 | // This is required for correct LFH offset adjustment 37 | // This is ensured by adjusting the file name length 38 | assert_eq!(cd_size, zip.files[3].byte_count()?); 39 | // This is required so that the CD size in EOCDR is correct for both central directories 40 | assert_eq!(cd_size, zip.cd[2].byte_count()?); 41 | 42 | { 43 | zip.cd[3].relative_header_offset = zip.cd[2].relative_header_offset; 44 | // Make sure the CDHs match, as they will have the same CDH but different LFH offsets 45 | // In particular, the filename and CRC32 must be the same 46 | let mut tmp1 = Vec::new(); 47 | zip.cd[2].write(&mut tmp1)?; 48 | let mut tmp2 = Vec::new(); 49 | zip.cd[3].write(&mut tmp2)?; 50 | assert_eq!(tmp1, tmp2); 51 | } 52 | 53 | // for streaming mode parsers 54 | let group1 = EntryGroup { 55 | files: vec![zip.files[0].clone()], 56 | cd: vec![zip.cd[0].clone()], 57 | }; 58 | 59 | let mut group2 = EntryGroup { 60 | // first file for parsers that use the CDH at the offset in EOCDR 61 | // second for parsers that use the adjacent central directory but does not adjust LFH offsets 62 | files: zip.files[1..=2].to_vec(), 63 | cd: vec![zip.cd[1].clone()], 64 | }; 65 | group2.cd[0].relative_header_offset = group1.byte_count()?.try_into()?; 66 | 67 | // for parsers that use the adjacent central directory and adjusts LFH offsets accordingly 68 | let mut group3 = EntryGroup { 69 | files: vec![zip.files[3].clone()], 70 | cd: vec![zip.cd[2].clone()], 71 | }; 72 | group3.cd[0].relative_header_offset = 73 | group2.cd[0].relative_header_offset + u32::try_from(zip.files[1].byte_count()?)?; 74 | 75 | let eocdr = EndOfCentralDirectoryRecord { 76 | this_disk_cdh_count: 1, 77 | total_cdh_count: 1, 78 | size_of_cd: cd_size.try_into()?, 79 | offset_of_cd_wrt_starting_disk: group2.cd[0].relative_header_offset 80 | + u32::try_from(group2.files.byte_count()?)?, 81 | ..Default::default() 82 | }; 83 | 84 | Ok(CdOffsetZip { 85 | groups: vec![group1, group2, group3], 86 | eocdr, 87 | }) 88 | } 89 | 90 | pub fn main() -> Result<()> { 91 | testcase(cd_offset)?; 92 | Ok(()) 93 | } 94 | -------------------------------------------------------------------------------- /zip-diff/src/construction/c/c5.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{testcase, testcase_arg}; 2 | use anyhow::Result; 3 | use binwrite::BinWrite; 4 | use bitflags::bitflags; 5 | use zip_diff::eocd::{ 6 | EndOfCentralDirectoryRecord, Zip64EndOfCentralDirectoryLocator, 7 | Zip64EndOfCentralDirectoryRecord, Zip64ExtensibleDataSector, 8 | }; 9 | use zip_diff::extra::Zip64ExtendedInfo; 10 | use zip_diff::utils::BinCount; 11 | use zip_diff::zip::ZipArchive; 12 | 13 | struct Zip64Flags(u8); 14 | 15 | bitflags! { 16 | impl Zip64Flags: u8 { 17 | const DiskNumberFF = 1 << 0; 18 | const CdhCountFF = 1 << 1; 19 | const CdSizeFF = 1 << 2; 20 | const CdOffsetFF = 1 << 3; 21 | const EocdlGap = 1 << 4; 22 | const MoreFilesInZip64 = 1 << 5; 23 | } 24 | } 25 | 26 | fn use_zip64_eocdr(flags: Zip64Flags) -> Result { 27 | let mut zip1 = ZipArchive::default(); 28 | zip1.add_simple("a", b"a")?; 29 | if !flags.contains(Zip64Flags::MoreFilesInZip64) { 30 | zip1.add_simple("b", b"b")?; 31 | } 32 | zip1.finalize()?; 33 | 34 | let mut zip2 = ZipArchive::default(); 35 | zip2.add_simple("c", b"c")?; 36 | if flags.contains(Zip64Flags::MoreFilesInZip64) { 37 | zip2.add_simple("d", b"d")?; 38 | } 39 | zip2.finalize()?; 40 | zip2.set_offsets(zip1.files.byte_count()? + zip1.cd.byte_count()?)?; 41 | zip2.set_eocd(true)?; 42 | 43 | let cdh = zip1.cd.last_mut().unwrap(); 44 | zip2.files.write(&mut cdh.file_comment)?; 45 | zip2.cd.write(&mut cdh.file_comment)?; 46 | zip2.zip64_eocdr.unwrap().write(&mut cdh.file_comment)?; 47 | zip2.zip64_eocdl.unwrap().write(&mut cdh.file_comment)?; 48 | if flags.contains(Zip64Flags::EocdlGap) { 49 | 0u8.write(&mut cdh.file_comment)?; 50 | } 51 | cdh.file_comment_length = cdh.file_comment.len().try_into()?; 52 | 53 | zip1.set_eocd(false)?; 54 | 55 | if flags.contains(Zip64Flags::DiskNumberFF) { 56 | zip1.eocdr.number_of_this_disk = u16::MAX; 57 | zip1.eocdr.start_of_cd_disk_number = u16::MAX; 58 | } 59 | 60 | if flags.contains(Zip64Flags::CdhCountFF) { 61 | zip1.eocdr.this_disk_cdh_count = u16::MAX; 62 | zip1.eocdr.total_cdh_count = u16::MAX; 63 | } 64 | 65 | if flags.contains(Zip64Flags::CdSizeFF) { 66 | zip1.eocdr.size_of_cd = u32::MAX; 67 | } 68 | 69 | if flags.contains(Zip64Flags::CdOffsetFF) { 70 | zip1.eocdr.offset_of_cd_wrt_starting_disk = u32::MAX; 71 | } 72 | 73 | Ok(zip1) 74 | } 75 | 76 | fn eocdl_or_search() -> Result { 77 | let mut zip1 = ZipArchive::default(); 78 | zip1.add_simple("a", b"a")?; 79 | zip1.finalize()?; 80 | zip1.set_eocd(true)?; 81 | 82 | let mut zip2 = ZipArchive::default(); 83 | zip2.add_simple("b", b"b")?; 84 | zip2.finalize()?; 85 | zip2.set_offsets(zip1.files.byte_count()? + zip1.cd.byte_count()?)?; 86 | zip2.set_eocd(true)?; 87 | 88 | // hide ZIP64 EOCDR of zip1 in the ZIP64 EOCDR extensible data sector of zip2 89 | let zip64_eocdr_size = zip1.zip64_eocdr.as_ref().unwrap().byte_count()?; 90 | let zip64_eocdr_2 = zip2.zip64_eocdr.as_mut().unwrap(); 91 | let extensible_header = Zip64ExtensibleDataSector { 92 | header_id: 0x1337, // an unknown ID 93 | size: zip64_eocdr_size.try_into()?, 94 | data: Box::new(Zip64ExtendedInfo::default()), // empty data 95 | }; 96 | zip64_eocdr_2.size += u64::try_from(extensible_header.byte_count()? + zip64_eocdr_size)?; 97 | zip64_eocdr_2.extensible_data_sector.push(extensible_header); 98 | 99 | let cdh = &mut zip1.cd[0]; 100 | zip2.files.write(&mut cdh.file_comment)?; 101 | zip2.cd.write(&mut cdh.file_comment)?; 102 | zip2.zip64_eocdr 103 | .as_ref() 104 | .unwrap() 105 | .write(&mut cdh.file_comment)?; 106 | cdh.file_comment_length = cdh.file_comment.len().try_into()?; 107 | 108 | zip1.set_eocd(true)?; 109 | zip1.zip64_eocdl.as_mut().unwrap().zip64_eocdr_offset -= 110 | u64::try_from(zip2.zip64_eocdr.unwrap().byte_count()?)?; 111 | 112 | Ok(zip1) 113 | } 114 | 115 | struct CdhCountFlags(u8); 116 | 117 | bitflags! { 118 | impl CdhCountFlags: u8 { 119 | const ThisDiskCount = 1 << 0; 120 | const TotalCount = 1 << 1; 121 | const CdSize = 1 << 2; 122 | } 123 | } 124 | 125 | fn cdh_count(flags: CdhCountFlags) -> Result { 126 | let mut zip = ZipArchive::default(); 127 | 128 | zip.add_simple("a", b"a")?; 129 | zip.add_simple("b", b"b")?; 130 | zip.finalize()?; 131 | zip.set_eocd(true)?; 132 | 133 | let eocdr = zip.zip64_eocdr.as_mut().unwrap(); 134 | 135 | if flags.contains(CdhCountFlags::ThisDiskCount) { 136 | eocdr.this_disk_cdh_count -= 1; 137 | } 138 | 139 | if flags.contains(CdhCountFlags::TotalCount) { 140 | eocdr.total_cdh_count -= 1; 141 | } 142 | 143 | if flags.contains(CdhCountFlags::CdSize) { 144 | eocdr.size_of_cd = zip.cd[0].byte_count()?.try_into()?; 145 | } 146 | 147 | Ok(zip) 148 | } 149 | 150 | fn cd_offset(adjust_zip64_offset: bool) -> Result> { 151 | let zip = super::c4::cd_offset()?; 152 | let eocdr = zip.eocdr; 153 | 154 | let mut buf = Vec::new(); 155 | zip.groups.write(&mut buf)?; 156 | 157 | let mut zip64_eocdr = Zip64EndOfCentralDirectoryRecord { 158 | this_disk_cdh_count: eocdr.this_disk_cdh_count.into(), 159 | total_cdh_count: eocdr.this_disk_cdh_count.into(), 160 | size_of_cd: eocdr.size_of_cd.into(), 161 | offset_of_cd_wrt_starting_disk: eocdr.offset_of_cd_wrt_starting_disk.into(), 162 | ..Default::default() 163 | }; 164 | zip64_eocdr.finalize()?; 165 | 166 | let zip64_offset = if adjust_zip64_offset { 167 | zip.groups[0..=1].byte_count()? 168 | } else { 169 | buf.len() 170 | }; 171 | 172 | let eocdl = Zip64EndOfCentralDirectoryLocator::from_offset(zip64_offset.try_into()?); 173 | let eocdr = EndOfCentralDirectoryRecord::all_ff(); 174 | 175 | zip64_eocdr.write(&mut buf)?; 176 | eocdl.write(&mut buf)?; 177 | eocdr.write(&mut buf)?; 178 | 179 | Ok(buf) 180 | } 181 | 182 | pub fn main() -> Result<()> { 183 | (0..64).try_for_each(|i| testcase_arg(use_zip64_eocdr, Zip64Flags::from_bits_truncate(i)))?; 184 | testcase(eocdl_or_search)?; 185 | (1..8).try_for_each(|i| testcase_arg(cdh_count, CdhCountFlags::from_bits_truncate(i)))?; 186 | testcase_arg(cd_offset, false)?; 187 | testcase_arg(cd_offset, true)?; 188 | Ok(()) 189 | } 190 | -------------------------------------------------------------------------------- /zip-diff/src/construction/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | 3 | mod a; 4 | mod b; 5 | mod c; 6 | mod utils; 7 | 8 | fn main() -> Result<()> { 9 | a::main()?; 10 | b::main()?; 11 | c::main()?; 12 | Ok(()) 13 | } 14 | -------------------------------------------------------------------------------- /zip-diff/src/construction/utils.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use binwrite::BinWrite; 3 | use std::any::type_name_of_val; 4 | use std::collections::BTreeMap; 5 | use std::fs::{create_dir_all, File}; 6 | use std::io::{BufWriter, Write}; 7 | use std::path::PathBuf; 8 | use std::sync::Mutex; 9 | use zip_diff::cdh::CentralDirectoryHeader; 10 | use zip_diff::zip::{FileEntry, ZipArchive}; 11 | 12 | static WRITE_COUNTER: Mutex> = Mutex::new(BTreeMap::new()); 13 | 14 | fn write_core(ambiguity_type: &str, data: Z) -> Result<()> { 15 | let count = *WRITE_COUNTER 16 | .lock() 17 | .unwrap() 18 | .entry(ambiguity_type.to_string()) 19 | .and_modify(|e| *e += 1) 20 | .or_insert(1); 21 | let path = format!("../constructions/{ambiguity_type}/{ambiguity_type}-{count}.zip"); 22 | let path = PathBuf::from(path); 23 | create_dir_all(path.parent().unwrap())?; 24 | let file = File::create(path).context("failed to create sample file")?; 25 | let mut writer = BufWriter::new(file); 26 | data.write(&mut writer) 27 | .context("failed to write sample file")?; 28 | writer.flush().context("failed to flush sample file writer") 29 | } 30 | 31 | pub fn testcase(construction: F) -> Result<()> 32 | where 33 | Z: BinWrite, 34 | F: FnOnce() -> Result, 35 | { 36 | let ambiguity_type = type_name_of_val(&construction).rsplit("::").nth(1).unwrap(); 37 | let data = construction()?; 38 | write_core(ambiguity_type, data) 39 | } 40 | 41 | pub fn testcase_arg(construction: F, arg: A) -> Result<()> 42 | where 43 | Z: BinWrite, 44 | F: FnOnce(A) -> Result, 45 | { 46 | let ambiguity_type = type_name_of_val(&construction).rsplit("::").nth(1).unwrap(); 47 | let data = construction(arg)?; 48 | write_core(ambiguity_type, data) 49 | } 50 | 51 | #[derive(BinWrite)] 52 | pub struct EntryGroup { 53 | pub files: Vec, 54 | pub cd: Vec, 55 | } 56 | 57 | impl From for EntryGroup { 58 | fn from(zip: ZipArchive) -> Self { 59 | Self { 60 | files: zip.files, 61 | cd: zip.cd, 62 | } 63 | } 64 | } 65 | 66 | // Two strings with the same length and CRC32 67 | // https://www.thecodingforums.com/threads/finding-two-strings-with-the-same-crc32.889011/#post-4775592 68 | pub const CRC32A: &str = "oxueekz"; 69 | pub const CRC32B: &str = "pyqptgs"; 70 | -------------------------------------------------------------------------------- /zip-diff/src/count/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{bail, Context, Result}; 2 | use serde::{Deserialize, Serialize}; 3 | use std::collections::{BTreeMap, BTreeSet}; 4 | use std::ffi::OsString; 5 | use std::fs::{copy, create_dir_all, read_dir, remove_dir_all, File}; 6 | use std::io::{BufReader, BufWriter, ErrorKind, Write}; 7 | use std::path::Path; 8 | use std::process::{Command, Stdio}; 9 | use zip_diff::hash::read_parsing_result; 10 | 11 | const SAMPLES_DIR: &str = "../constructions"; 12 | const INPUT_DIR: &str = "../constructions/input"; 13 | const OUTPUT_DIR: &str = "../constructions/output"; 14 | 15 | const TYPES: &[&str] = &[ 16 | "a1", "a2", "a3", "a4", "a5", // Redundant Metadata 17 | "b1", "b2", "b3", "b4", // File Path Processing 18 | "c1", "c2", "c3", "c4", "c5", // ZIP Structure Positioning 19 | ]; 20 | 21 | #[derive(Serialize)] 22 | struct InconsistencyItem<'a> { 23 | parsers: (&'a str, &'a str), 24 | inconsistency_types: Vec<&'static str>, 25 | } 26 | 27 | #[derive(Deserialize)] 28 | pub struct ParserInfo { 29 | pub name: String, 30 | pub version: String, 31 | pub r#type: String, 32 | pub language: String, 33 | } 34 | 35 | fn main() -> Result<()> { 36 | let parsers_file = 37 | File::open("../parsers/parsers.json").context("failed to read parsers.json")?; 38 | let parsers_reader = BufReader::new(parsers_file); 39 | let parser_map: BTreeMap = serde_json::from_reader(parsers_reader)?; 40 | let mut parsers = parser_map.into_iter().collect::>(); 41 | parsers.sort_by_cached_key(|(_, parser)| { 42 | ( 43 | parser.r#type.clone(), 44 | parser.language.clone(), 45 | parser.name.to_lowercase(), 46 | parser.version.clone(), 47 | ) 48 | }); 49 | let parsers = parsers.into_iter().map(|(key, _)| key).collect::>(); 50 | 51 | if let Err(err) = remove_dir_all(INPUT_DIR) { 52 | if err.kind() != ErrorKind::NotFound { 53 | bail!("failed to remove input directory: {err}"); 54 | } 55 | } 56 | if let Err(err) = remove_dir_all(OUTPUT_DIR) { 57 | if err.kind() != ErrorKind::NotFound { 58 | bail!("failed to remove output directory: {err}"); 59 | } 60 | } 61 | create_dir_all(INPUT_DIR).context("failed to remove input directory")?; 62 | 63 | let mut testcases = Vec::<(&str, OsString)>::new(); 64 | 65 | for t in TYPES { 66 | let dir = Path::new(SAMPLES_DIR).join(t); 67 | if !dir.try_exists()? { 68 | continue; 69 | } 70 | let entries = read_dir(dir)?; 71 | for entry in entries { 72 | let entry = entry?; 73 | if entry.file_name().into_string().unwrap().starts_with(t) 74 | && entry.file_type()?.is_file() 75 | { 76 | testcases.push((t, entry.file_name())); 77 | copy(entry.path(), Path::new(INPUT_DIR).join(entry.file_name())) 78 | .context("failed to copy sample to input directory")?; 79 | } 80 | } 81 | } 82 | 83 | let parser_prepare_status = Command::new("../parsers/prepare.sh") 84 | .env("INPUT_DIR", INPUT_DIR) 85 | .env("OUTPUT_DIR", OUTPUT_DIR) 86 | .status() 87 | .expect("failed to execute parsers/prepare.sh"); 88 | if !parser_prepare_status.success() { 89 | bail!("parsers/prepare.sh failed"); 90 | } 91 | 92 | Command::new("docker") 93 | .arg("compose") 94 | .arg("up") 95 | .current_dir("../parsers") 96 | .stdout(Stdio::null()) 97 | .stderr(Stdio::null()) 98 | .spawn() 99 | .context("failed to start docker compose")? 100 | .wait() 101 | .context("failed to run docker compose")?; 102 | 103 | let outputs = parsers 104 | .iter() 105 | .map(|parser| { 106 | testcases 107 | .iter() 108 | .map(|(_, t)| read_parsing_result(Path::new(OUTPUT_DIR).join(parser).join(t), true)) 109 | .collect::>() 110 | }) 111 | .collect::>(); 112 | 113 | let mut results = Vec::new(); 114 | 115 | for (parser1, outputs1) in parsers.iter().zip(&outputs) { 116 | for (parser2, outputs2) in parsers.iter().zip(&outputs) { 117 | let inconsistency_types = outputs1 118 | .iter() 119 | .zip(outputs2) 120 | .zip(&testcases) 121 | .filter_map(|((o1, o2), (t, _))| o1.inconsistent_with(o2).then_some(*t)) 122 | .collect::>() 123 | .into_iter() 124 | .collect(); 125 | results.push(InconsistencyItem { 126 | parsers: (parser1, parser2), 127 | inconsistency_types, 128 | }) 129 | } 130 | } 131 | 132 | let results_file = File::create(Path::new(SAMPLES_DIR).join("inconsistency-types.json")) 133 | .context("failed to create result file")?; 134 | let mut results_writer = BufWriter::new(results_file); 135 | serde_json::to_writer_pretty(&mut results_writer, &results) 136 | .context("failed to write results")?; 137 | results_writer.flush()?; 138 | 139 | Ok(()) 140 | } 141 | -------------------------------------------------------------------------------- /zip-diff/src/dd.rs: -------------------------------------------------------------------------------- 1 | use crate::lfh::LocalFileHeader; 2 | use crate::utils::binwrite_option; 3 | use binwrite::{BinWrite, WriterOption}; 4 | use educe::Educe; 5 | use std::io::{Result, Write}; 6 | 7 | #[derive(BinWrite, Clone, Debug, Educe)] 8 | #[educe(Default)] 9 | pub struct DataDescriptor { 10 | #[binwrite(with(binwrite_option))] 11 | #[educe(Default = Some(Self::SIGNATURE))] 12 | pub signature: Option, 13 | pub crc32: u32, 14 | #[binwrite(with(binwrite_u32or64))] 15 | pub compressed_size: U32or64, 16 | #[binwrite(with(binwrite_u32or64))] 17 | pub uncompressed_size: U32or64, 18 | } 19 | 20 | #[derive(Clone, Debug)] 21 | pub enum U32or64 { 22 | U32(u32), 23 | U64(u64), 24 | } 25 | 26 | impl DataDescriptor { 27 | pub const SIGNATURE: u32 = 0x08074b50; 28 | } 29 | 30 | fn binwrite_u32or64(val: &U32or64, writer: &mut W, options: &WriterOption) -> Result<()> { 31 | match val { 32 | U32or64::U32(val) => val.write_options(writer, options), 33 | U32or64::U64(val) => val.write_options(writer, options), 34 | } 35 | } 36 | 37 | impl U32or64 { 38 | pub fn saturate(&self) -> u32 { 39 | match self { 40 | U32or64::U32(val) => *val, 41 | U32or64::U64(val) => { 42 | if *val > u32::MAX as u64 { 43 | u32::MAX 44 | } else { 45 | *val as u32 46 | } 47 | } 48 | } 49 | } 50 | } 51 | 52 | impl Default for U32or64 { 53 | fn default() -> Self { 54 | Self::U32(0) 55 | } 56 | } 57 | 58 | impl From<&LocalFileHeader> for DataDescriptor { 59 | fn from(value: &LocalFileHeader) -> Self { 60 | Self { 61 | signature: Some(Self::SIGNATURE), 62 | crc32: value.crc32, 63 | compressed_size: match value.zip64.compressed_size { 64 | None => U32or64::U32(value.compressed_size), 65 | Some(size) => U32or64::U64(size), 66 | }, 67 | uncompressed_size: match value.zip64.original_size { 68 | None => U32or64::U32(value.uncompressed_size), 69 | Some(size) => U32or64::U64(size), 70 | }, 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /zip-diff/src/eocd.rs: -------------------------------------------------------------------------------- 1 | use crate::extra::ExtraFieldType; 2 | use crate::fields::CompressionMethod; 3 | use crate::utils::{binwrite_option, binwrite_transform, BinCount}; 4 | use anyhow::{Context, Result}; 5 | use binwrite::BinWrite; 6 | use educe::Educe; 7 | 8 | #[derive(BinWrite, Clone, Educe)] 9 | #[educe(Debug, Default)] 10 | pub struct EndOfCentralDirectoryRecord { 11 | #[educe(Default = Self::SIGNATURE)] 12 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 13 | pub signature: u32, 14 | pub number_of_this_disk: u16, 15 | /// number of the disk with the start of the central directory 16 | pub start_of_cd_disk_number: u16, 17 | /// total number of entries in the central directory on this disk 18 | pub this_disk_cdh_count: u16, 19 | /// total number of entries in the central directory 20 | pub total_cdh_count: u16, 21 | /// size of the central directory 22 | pub size_of_cd: u32, 23 | /// offset of start of central directory with respect to the starting disk number 24 | pub offset_of_cd_wrt_starting_disk: u32, 25 | pub zip_file_comment_length: u16, 26 | pub zip_file_comment: Vec, 27 | } 28 | 29 | impl EndOfCentralDirectoryRecord { 30 | pub const SIGNATURE: u32 = 0x06054b50; 31 | 32 | pub fn all_ff() -> Self { 33 | Self { 34 | number_of_this_disk: u16::MAX, 35 | start_of_cd_disk_number: u16::MAX, 36 | this_disk_cdh_count: u16::MAX, 37 | total_cdh_count: u16::MAX, 38 | size_of_cd: u32::MAX, 39 | offset_of_cd_wrt_starting_disk: u32::MAX, 40 | ..Default::default() 41 | } 42 | } 43 | } 44 | 45 | impl TryFrom<&Zip64EndOfCentralDirectoryRecord> for EndOfCentralDirectoryRecord { 46 | type Error = anyhow::Error; 47 | 48 | fn try_from(zip64: &Zip64EndOfCentralDirectoryRecord) -> Result { 49 | Ok(Self { 50 | number_of_this_disk: zip64.number_of_this_disk.try_into()?, 51 | start_of_cd_disk_number: zip64.start_of_cd_disk_number.try_into()?, 52 | this_disk_cdh_count: zip64.this_disk_cdh_count.try_into()?, 53 | total_cdh_count: zip64.total_cdh_count.try_into()?, 54 | size_of_cd: zip64.size_of_cd.try_into()?, 55 | offset_of_cd_wrt_starting_disk: zip64.offset_of_cd_wrt_starting_disk.try_into()?, 56 | ..Default::default() 57 | }) 58 | } 59 | } 60 | 61 | #[derive(BinWrite, Clone, Educe)] 62 | #[educe(Debug, Default)] 63 | pub struct Zip64EndOfCentralDirectoryLocator { 64 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 65 | #[educe(Default = Self::SIGNATURE)] 66 | pub signature: u32, 67 | /// number of the disk with the start of the zip64 end of central directory 68 | pub zip64_eocdr_disk_number: u32, 69 | /// relative offset of the zip64 end of central directory record 70 | pub zip64_eocdr_offset: u64, 71 | #[educe(Default = 1)] 72 | pub total_number_of_disks: u32, 73 | } 74 | 75 | impl Zip64EndOfCentralDirectoryLocator { 76 | pub const SIGNATURE: u32 = 0x07064b50; 77 | 78 | pub fn from_offset(offset: u64) -> Self { 79 | Self { 80 | zip64_eocdr_offset: offset, 81 | ..Default::default() 82 | } 83 | } 84 | } 85 | 86 | #[derive(BinWrite, Clone, Educe)] 87 | #[educe(Debug, Default)] 88 | pub struct Zip64EndOfCentralDirectoryRecord { 89 | #[educe(Default = Self::SIGNATURE)] 90 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 91 | pub signature: u32, 92 | pub size: u64, 93 | #[educe(Default = 20)] 94 | pub version_made_by: u16, 95 | #[educe(Default = 20)] 96 | pub version_needed: u16, 97 | pub number_of_this_disk: u32, 98 | /// number of the disk with the start of the central directory 99 | pub start_of_cd_disk_number: u32, 100 | /// total number of entries in the central directory on this disk 101 | pub this_disk_cdh_count: u64, 102 | /// total number of entries in the central directory 103 | pub total_cdh_count: u64, 104 | /// size of the central directory 105 | pub size_of_cd: u64, 106 | /// offset of start of central directory with respect to the starting disk number 107 | pub offset_of_cd_wrt_starting_disk: u64, 108 | #[binwrite(with(binwrite_option))] 109 | pub v2: Option, 110 | pub extensible_data_sector: Vec, 111 | } 112 | 113 | #[derive(BinWrite, Clone, Debug, Default)] 114 | pub struct Zip64EocdrV2 { 115 | #[binwrite(with(binwrite_transform))] 116 | pub compression_method: CompressionMethod, 117 | pub compressed_size: u64, 118 | pub original_size: u64, 119 | pub encrypt_alg: u16, 120 | pub key_bit_len: u16, 121 | pub encrypt_flags: u16, 122 | pub hash_alg: u16, 123 | pub hash_len: u16, 124 | pub hash_data: Vec, 125 | } 126 | 127 | impl Zip64EndOfCentralDirectoryRecord { 128 | pub const SIGNATURE: u32 = 0x06064b50; 129 | 130 | pub fn finalize(&mut self) -> Result<()> { 131 | for field in &mut self.extensible_data_sector { 132 | field.finalize()?; 133 | } 134 | self.size = 135 | self.extensible_data_sector 136 | .byte_count() 137 | .context("Failed to count ZIP64 EOCDR extensible data sector")? as u64 138 | + 44; 139 | if let Some(v2) = &self.v2 { 140 | self.size += v2.byte_count()? as u64; 141 | } 142 | Ok(()) 143 | } 144 | 145 | pub fn use_v2(&mut self) -> Result<()> { 146 | self.version_made_by = 62; 147 | self.version_needed = 62; 148 | self.v2 = Some(Zip64EocdrV2 { 149 | compressed_size: self.size_of_cd, 150 | original_size: self.size_of_cd, 151 | ..Default::default() 152 | }); 153 | self.finalize() 154 | } 155 | } 156 | 157 | #[derive(BinWrite, Clone, Debug)] 158 | pub struct Zip64ExtensibleDataSector { 159 | pub header_id: u16, 160 | pub size: u32, 161 | #[binwrite(with(binwrite_transform))] 162 | pub data: Box, 163 | } 164 | 165 | impl Zip64ExtensibleDataSector { 166 | pub fn finalize(&mut self) -> Result<()> { 167 | self.header_id = self.data.header_id(); 168 | self.size = self 169 | .data 170 | .binary_encode() 171 | .context("Failed to count extensible data sector size")? 172 | .len() 173 | .try_into() 174 | .context("Extensible data sector too long")?; 175 | Ok(()) 176 | } 177 | } 178 | 179 | impl From for Zip64ExtensibleDataSector { 180 | fn from(data: T) -> Self { 181 | Self { 182 | header_id: 0, 183 | size: 0, 184 | data: Box::new(data), 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /zip-diff/src/extra.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{binwrite_option, binwrite_transform, BinWriteTransform}; 2 | use anyhow::{Context, Result}; 3 | use binwrite::BinWrite; 4 | use bitflags::bitflags; 5 | use downcast_rs::{impl_downcast, Downcast}; 6 | use dyn_clone::{clone_trait_object, DynClone}; 7 | use std::fmt::Debug; 8 | 9 | #[derive(BinWrite, Clone, Debug)] 10 | pub struct ExtraField { 11 | pub header_id: u16, 12 | pub size: u16, 13 | #[binwrite(with(binwrite_transform))] 14 | pub data: Box, 15 | } 16 | 17 | impl ExtraField { 18 | pub fn finalize(&mut self) -> Result<()> { 19 | self.header_id = self.data.header_id(); 20 | self.size = self 21 | .data 22 | .binary_encode() 23 | .context("Failed to count extra field size")? 24 | .len() 25 | .try_into() 26 | .context("Extra field too long")?; 27 | Ok(()) 28 | } 29 | } 30 | 31 | impl From for ExtraField { 32 | fn from(data: T) -> Self { 33 | Self { 34 | header_id: 0, 35 | size: 0, 36 | data: Box::new(data), 37 | } 38 | } 39 | } 40 | 41 | pub trait ExtraFieldType: BinaryEncode + Debug + DynClone + Downcast + Send + Sync { 42 | // a function is used instead of an associated const to make it object-safe 43 | fn header_id(&self) -> u16; 44 | } 45 | 46 | impl_downcast!(ExtraFieldType); 47 | clone_trait_object!(ExtraFieldType); 48 | 49 | #[derive(BinWrite, Clone, Default, Debug)] 50 | pub struct Zip64ExtendedInfo { 51 | #[binwrite(with(binwrite_option))] 52 | pub original_size: Option, 53 | #[binwrite(with(binwrite_option))] 54 | pub compressed_size: Option, 55 | #[binwrite(with(binwrite_option))] 56 | pub relative_header_offset: Option, 57 | #[binwrite(with(binwrite_option))] 58 | pub disk_start_number: Option, 59 | } 60 | 61 | impl ExtraFieldType for Zip64ExtendedInfo { 62 | fn header_id(&self) -> u16 { 63 | 1 64 | } 65 | } 66 | 67 | impl Zip64ExtendedInfo { 68 | pub fn is_empty(&self) -> bool { 69 | self.original_size.is_none() 70 | && self.compressed_size.is_none() 71 | && self.relative_header_offset.is_none() 72 | && self.disk_start_number.is_none() 73 | } 74 | } 75 | 76 | #[derive(Clone, Copy, Default, Debug)] 77 | pub struct PatchDescriptorFlag(u32); 78 | 79 | bitflags! { 80 | impl PatchDescriptorFlag: u32 { 81 | const AutoDetection = 1 << 0; 82 | const SelfPatch = 1 << 1; 83 | const ActionAdd = 1 << 4; 84 | const ActionDelete = 2 << 4; 85 | const ActionPatch = 3 << 4; 86 | const ReactionToAbsentSkip = 1 << 8; 87 | const ReactionToAbsentIgnore = 2 << 8; 88 | const ReactionToAbsentFail = 3 << 8; 89 | const ReactionToNewerSkip = 1 << 10; 90 | const ReactionToNewerIgnore = 2 << 10; 91 | const ReactionToNewerFail = 3 << 10; 92 | const ReactionToUnknownSkip = 1 << 12; 93 | const ReactionToUnknownIgnore = 2 << 12; 94 | const ReactionToUnknownFail = 3 << 12; 95 | const _ = !0; 96 | } 97 | } 98 | 99 | impl BinWriteTransform for PatchDescriptorFlag { 100 | type Type = u32; 101 | fn binwrite_transform(&self) -> std::io::Result { 102 | Ok(self.0) 103 | } 104 | } 105 | 106 | #[derive(BinWrite, Clone, Default, Debug)] 107 | pub struct PatchDescriptor { 108 | pub version: u16, 109 | #[binwrite(with(binwrite_transform))] 110 | pub flags: PatchDescriptorFlag, 111 | pub old_size: u32, 112 | pub old_crc: u32, 113 | pub new_size: u32, 114 | pub new_crc: u32, 115 | } 116 | 117 | impl ExtraFieldType for PatchDescriptor { 118 | fn header_id(&self) -> u16 { 119 | 0xf 120 | } 121 | } 122 | 123 | #[derive(BinWrite, Clone, Default, Debug)] 124 | pub struct InfoZipUnicodePath { 125 | pub version: u8, 126 | pub name_crc32: u32, 127 | pub unicode_name: String, 128 | } 129 | 130 | impl ExtraFieldType for InfoZipUnicodePath { 131 | fn header_id(&self) -> u16 { 132 | 0x7075 133 | } 134 | } 135 | 136 | impl InfoZipUnicodePath { 137 | pub fn new(unicode_name: String, name: &str) -> Self { 138 | Self { 139 | version: 1, 140 | name_crc32: crc32fast::hash(name.as_bytes()), 141 | unicode_name, 142 | } 143 | } 144 | } 145 | 146 | // BinWrite is not object-safe. 147 | // The following is to make BinWrite with Box possible. 148 | 149 | pub trait BinaryEncode { 150 | fn binary_encode(&self) -> std::io::Result>; 151 | } 152 | 153 | impl BinaryEncode for T { 154 | fn binary_encode(&self) -> std::io::Result> { 155 | let mut bytes = Vec::new(); 156 | self.write(&mut bytes)?; 157 | Ok(bytes) 158 | } 159 | } 160 | 161 | impl BinWriteTransform for Box { 162 | type Type = Vec; 163 | 164 | fn binwrite_transform(&self) -> std::io::Result { 165 | self.binary_encode() 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /zip-diff/src/fields.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::BinWriteTransform; 2 | use binwrite::BinWrite; 3 | use bitflags::bitflags; 4 | use chrono::{DateTime, Datelike, Timelike, Utc}; 5 | 6 | #[derive(Clone, Copy, Default, Debug)] 7 | pub struct GeneralPurposeFlag(u16); 8 | 9 | bitflags! { 10 | impl GeneralPurposeFlag: u16 { 11 | const Encrypted = 1 << 0; 12 | const Compression1 = 1 << 1; 13 | const Compression2 = 1 << 2; 14 | const DataDescriptor = 1 << 3; 15 | const PatchData = 1 << 5; 16 | const StrongEncryption = 1 << 6; 17 | const LanguageEncoding = 1 << 11; 18 | const EncryptedCentralDirectory = 1 << 13; 19 | const _ = !0; 20 | } 21 | } 22 | 23 | impl BinWriteTransform for GeneralPurposeFlag { 24 | type Type = u16; 25 | fn binwrite_transform(&self) -> std::io::Result { 26 | Ok(self.0) 27 | } 28 | } 29 | 30 | #[derive(Clone, Copy, Default, Debug, PartialEq, Eq)] 31 | pub struct CompressionMethod(pub u16); 32 | 33 | impl CompressionMethod { 34 | pub const STORED: Self = Self(0); 35 | pub const SHRUNK: Self = Self(1); 36 | pub const REDUCED1: Self = Self(2); 37 | pub const REDUCED2: Self = Self(3); 38 | pub const REDUCED3: Self = Self(4); 39 | pub const REDUCED4: Self = Self(5); 40 | pub const IMPLODED: Self = Self(6); 41 | pub const DEFLATED: Self = Self(8); 42 | pub const DEFLATE64: Self = Self(9); 43 | pub const BZIP2: Self = Self(12); 44 | pub const LZMA: Self = Self(14); 45 | pub const ZSTD: Self = Self(93); 46 | pub const MP3: Self = Self(94); 47 | pub const XZ: Self = Self(95); 48 | pub const JPEG: Self = Self(96); 49 | } 50 | 51 | impl BinWriteTransform for CompressionMethod { 52 | type Type = u16; 53 | fn binwrite_transform(&self) -> std::io::Result { 54 | Ok(self.0) 55 | } 56 | } 57 | 58 | #[derive(Clone, Copy, Default, Debug)] 59 | pub struct InternalFileAttributes(u16); 60 | 61 | bitflags! { 62 | impl InternalFileAttributes: u16 { 63 | const TextFile = 1 << 0; 64 | const RecordLengthControl = 1 << 2; 65 | const _ = !0; 66 | } 67 | } 68 | 69 | impl BinWriteTransform for InternalFileAttributes { 70 | type Type = u16; 71 | fn binwrite_transform(&self) -> std::io::Result { 72 | Ok(self.0) 73 | } 74 | } 75 | 76 | #[derive(BinWrite, Clone, Copy, Debug)] 77 | pub struct DosDateTime { 78 | pub time: u16, 79 | pub date: u16, 80 | } 81 | 82 | impl DosDateTime { 83 | pub fn new(time: u16, date: u16) -> Self { 84 | Self { time, date } 85 | } 86 | } 87 | 88 | impl From> for DosDateTime { 89 | fn from(dt: DateTime) -> Self { 90 | let date = ((((dt.year() - 1980) as u32) << 9) | (dt.month() << 5) | dt.day()) as u16; 91 | let time = ((dt.hour() << 11) | (dt.minute() << 5) | (dt.second() / 2)) as u16; 92 | DosDateTime { date, time } 93 | } 94 | } 95 | 96 | impl Default for DosDateTime { 97 | fn default() -> Self { 98 | Utc::now().into() 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/config.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use fs4::available_space; 3 | use serde::Deserialize; 4 | use std::collections::BTreeMap; 5 | use std::fs::{create_dir_all, File}; 6 | use std::path::PathBuf; 7 | use std::sync::LazyLock; 8 | use std::time::{Duration, Instant}; 9 | use sysinfo::System; 10 | 11 | pub struct Config { 12 | pub batch_size: usize, 13 | pub parsers: Vec, 14 | pub parsers_dir: PathBuf, 15 | pub input_dir: PathBuf, 16 | pub output_dir: PathBuf, 17 | pub samples_dir: PathBuf, 18 | pub results_dir: PathBuf, 19 | pub stats_file: PathBuf, 20 | pub argmax_ucb: bool, 21 | pub byte_mutation_only: bool, 22 | pub stop_at: Option, 23 | } 24 | 25 | pub static CONFIG: LazyLock = LazyLock::new(|| { 26 | let opts = Cli::parse(); 27 | 28 | create_dir_all(&opts.input_dir).expect("failed to create input dir"); 29 | create_dir_all(&opts.output_dir).expect("failed to create output dir"); 30 | let batch_size = opts.batch_size.unwrap_or_else(|| default_batch_size(&opts)); 31 | 32 | let parsers_dir = PathBuf::from(opts.parsers_dir); 33 | let input_dir = PathBuf::from(opts.input_dir); 34 | let output_dir = PathBuf::from(opts.output_dir); 35 | let samples_dir = PathBuf::from(opts.samples_dir); 36 | let results_dir = PathBuf::from(opts.results_dir); 37 | 38 | let stats_file = PathBuf::from(opts.stats_file); 39 | create_dir_all(stats_file.parent().expect("stats file path has no parent")) 40 | .expect("failed to create parent dir for stats file"); 41 | 42 | let parsers_file = 43 | File::open(parsers_dir.join("parsers.json")).expect("failed to open parsers.json"); 44 | let parser_map: BTreeMap = 45 | serde_json::from_reader(parsers_file).expect("failed to read parsers.json"); 46 | 47 | let stop_at = opts 48 | .stop_after_seconds 49 | .map(|secs| Instant::now() + Duration::from_secs(secs)); 50 | 51 | Config { 52 | batch_size, 53 | parsers: parser_map.into_keys().collect(), 54 | parsers_dir, 55 | input_dir, 56 | output_dir, 57 | samples_dir, 58 | results_dir, 59 | stats_file, 60 | argmax_ucb: opts.argmax_ucb, 61 | byte_mutation_only: opts.byte_mutation_only, 62 | stop_at, 63 | } 64 | }); 65 | 66 | fn default_batch_size(opts: &Cli) -> usize { 67 | let mut sys = System::new(); 68 | sys.refresh_memory(); 69 | let ram = sys.total_memory(); 70 | let ram_batch_size = ram.div_ceil(1024 * 1024 * 1024).saturating_sub(20) as usize; 71 | if ram_batch_size < 100 { 72 | eprintln!("Warning: Available RAM is below the recommended minimum"); 73 | } 74 | let disk = 75 | available_space(&opts.output_dir).expect("failed to get available space for output dir"); 76 | let disk_batch_size = disk.div_ceil(2 * 1024 * 1024 * 1024) as usize; 77 | if disk_batch_size < 100 { 78 | eprintln!("Warning: Available disk space is below the recommended minimum"); 79 | } 80 | ram_batch_size.min(disk_batch_size) 81 | } 82 | 83 | #[derive(Parser)] 84 | struct Cli { 85 | /// number of samples per execution batch [default: depends on available resources] 86 | #[arg(short, long)] 87 | batch_size: Option, 88 | /// Stop running after how many seconds [default: infinite] 89 | #[arg(short, long)] 90 | stop_after_seconds: Option, 91 | /// directory to find the parsers 92 | #[arg(long, default_value = "../parsers")] 93 | parsers_dir: String, 94 | /// directory to temporarily save input samples for parsers in Docker 95 | #[arg(long, default_value = "../evaluation/input")] 96 | input_dir: String, 97 | /// directory to temporarily save temporary outputs for parsers in Docker 98 | #[arg(long, default_value = "../evaluation/output")] 99 | output_dir: String, 100 | /// directory to store interesting samples 101 | #[arg(long, default_value = "../evaluation/samples")] 102 | samples_dir: String, 103 | /// directory to store outputs of interesting samples 104 | #[arg(long, default_value = "../evaluation/results")] 105 | results_dir: String, 106 | /// file to save the fuzz stats 107 | #[arg(long, default_value = "../evaluation/stats.json")] 108 | stats_file: String, 109 | /// Use argmax UCB instead of softmax UCB 110 | #[arg(long, default_value_t = false)] 111 | argmax_ucb: bool, 112 | /// Use byte-level mutations only without ZIP-level mutations 113 | #[arg(long, default_value_t = false)] 114 | byte_mutation_only: bool, 115 | } 116 | 117 | #[allow(dead_code)] 118 | #[derive(Deserialize)] 119 | struct ParserInfo { 120 | name: String, 121 | version: String, 122 | } 123 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/corpus.rs: -------------------------------------------------------------------------------- 1 | use crate::feature::{Feature, PAIR_LIST}; 2 | use crate::Input; 3 | use blake3::Hash; 4 | use rand::distributions::WeightedIndex; 5 | use rand::prelude::*; 6 | use rayon::prelude::*; 7 | use std::cmp::Reverse; 8 | use std::collections::HashSet; 9 | 10 | pub struct Seed { 11 | pub input: Input, 12 | pub hash: Hash, 13 | pub size: usize, 14 | pub feat: Feature, 15 | pub mutations: Vec<&'static str>, 16 | pub output_large: bool, 17 | pub selection_count: usize, 18 | pub fixed_energy: f64, 19 | } 20 | 21 | impl Seed { 22 | pub fn new( 23 | input: Input, 24 | hash: Hash, 25 | size: usize, 26 | feat: Feature, 27 | mutations: Vec<&'static str>, 28 | output_large: bool, 29 | ) -> Self { 30 | let mutation_count_energy = (-(mutations.len() as f64) / 4.0).exp(); 31 | let size_energy = 100.0 / size.max(50) as f64; 32 | let ok_energy = feat.ok.count_ones(..) as f64 / feat.ok.len() as f64; 33 | Self { 34 | input, 35 | hash, 36 | size, 37 | feat, 38 | mutations, 39 | output_large, 40 | selection_count: 0, 41 | fixed_energy: mutation_count_energy + size_energy + ok_energy, 42 | } 43 | } 44 | } 45 | 46 | pub struct Corpus { 47 | seeds: Vec, 48 | feature_sum: Feature, 49 | hash_set: HashSet, 50 | weighted_index: Option>, 51 | } 52 | 53 | impl Corpus { 54 | pub fn new() -> Self { 55 | Self { 56 | seeds: Vec::new(), 57 | feature_sum: Feature::new(), 58 | hash_set: HashSet::new(), 59 | weighted_index: None, 60 | } 61 | } 62 | 63 | pub fn len(&self) -> usize { 64 | self.seeds.len() 65 | } 66 | 67 | pub fn zip_count(&self) -> usize { 68 | self.seeds 69 | .iter() 70 | .filter(|seed| matches!(seed.input, Input::Zip(_))) 71 | .count() 72 | } 73 | 74 | pub fn incons_count(&self) -> usize { 75 | self.feature_sum.inconsistency.count_ones(..) 76 | } 77 | 78 | pub fn feature_sum_summary(&self) -> String { 79 | self.feature_sum.summary() 80 | } 81 | 82 | pub fn consistent_pairs(&self) -> Vec<&'static (String, String)> { 83 | self.feature_sum.consistent_pairs() 84 | } 85 | 86 | pub fn best_seeds(&self) -> impl Iterator { 87 | self.feature_sum.inconsistency.ones().map(|i| { 88 | let best = self 89 | .seeds 90 | .iter() 91 | .filter(|seed| seed.feat.inconsistency.contains(i)) 92 | .max_by_key(|seed| { 93 | ( 94 | Reverse(seed.mutations.len()), 95 | seed.feat.inconsistency.count_ones(..), 96 | seed.feat.ok.count_ones(..), 97 | Reverse(seed.size), 98 | ) 99 | }) 100 | .unwrap(); 101 | let (a, b) = &PAIR_LIST[i]; 102 | (a, b, best) 103 | }) 104 | } 105 | 106 | pub fn insert_hash(&mut self, hash: Hash) -> bool { 107 | self.hash_set.insert(hash) 108 | } 109 | 110 | pub fn is_feature_interesting(&self, feat: &Feature) -> bool { 111 | self.seeds 112 | .par_iter() 113 | .all(|old| !feat.is_covered_by(&old.feat)) 114 | } 115 | 116 | pub fn insert_seed(&mut self, seed: Seed) { 117 | self.feature_sum |= &seed.feat; 118 | self.weighted_index = None; 119 | self.seeds.retain(|old| !old.feat.is_covered_by(&seed.feat)); 120 | self.seeds.push(seed); 121 | } 122 | 123 | pub fn construct_weights(&mut self) { 124 | let incons_popularity = self 125 | .seeds 126 | .par_iter() 127 | .fold_with( 128 | vec![0usize; self.feature_sum.inconsistency.len()], 129 | |mut sum, seed| { 130 | seed.feat.inconsistency.ones().for_each(|i| sum[i] += 1); 131 | sum 132 | }, 133 | ) 134 | .reduce_with(|mut a, b| { 135 | a.iter_mut().zip(b).for_each(|(x, y)| *x += y); 136 | a 137 | }) 138 | .unwrap(); 139 | let incons_energy_coef = 140 | self.seeds.len() as f64 / self.feature_sum.inconsistency.count_ones(..) as f64; 141 | let weights = self 142 | .seeds 143 | .par_iter() 144 | .map(|seed| { 145 | let selection_energy = (-(seed.selection_count as f64) / 4.0).exp(); 146 | let incons_energy = seed 147 | .feat 148 | .inconsistency 149 | .ones() 150 | .map(|i| incons_energy_coef / incons_popularity[i] as f64) 151 | .sum::(); 152 | let energy = seed.fixed_energy + incons_energy + selection_energy; 153 | if seed.output_large { 154 | energy / 10.0 155 | } else { 156 | energy 157 | } 158 | }) 159 | .collect::>(); 160 | self.weighted_index = Some(WeightedIndex::new(weights).expect("invalid weights")); 161 | } 162 | 163 | pub fn select_seed(&self, rng: &mut ThreadRng) -> (usize, &Seed) { 164 | let index = self 165 | .weighted_index 166 | .as_ref() 167 | .expect("weights not constructed") 168 | .sample(rng); 169 | (index, &self.seeds[index]) 170 | } 171 | 172 | pub fn record_selection(&mut self, index: usize) { 173 | self.seeds[index].selection_count += 1; 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/execute.rs: -------------------------------------------------------------------------------- 1 | use crate::config::CONFIG; 2 | use crate::corpus::{Corpus, Seed}; 3 | use crate::feature::Feature; 4 | use crate::mutation::{Sample, UcbHandle}; 5 | use rayon::prelude::*; 6 | use std::fs; 7 | use std::io::{BufWriter, Write}; 8 | use std::path::Path; 9 | use std::process::{Command, Stdio}; 10 | use std::time::Instant; 11 | use tar::Builder as TarBuilder; 12 | use walkdir::WalkDir; 13 | use zstd::Encoder as ZstdEncoder; 14 | 15 | // An input is (input sample, mutation names, UCB handles) 16 | // Returns the number of actually executed (dedupped) inputs. 17 | pub fn execute(corpus: &mut Corpus, samples: Vec) -> Vec<(Vec, bool)> { 18 | save_inputs(&samples); 19 | let count = samples.len(); 20 | println!("Executing {count} inputs"); 21 | let start = Instant::now(); 22 | run_parsers(); 23 | println!( 24 | "Finished executing {count} inputs in {:.3} seconds", 25 | start.elapsed().as_secs_f64() 26 | ); 27 | let start = Instant::now(); 28 | let ucb_results = collect_results(corpus, samples); 29 | println!( 30 | "Collected results in {:.3} seconds", 31 | start.elapsed().as_secs_f64() 32 | ); 33 | ucb_results 34 | } 35 | 36 | fn save_inputs(samples: &[Sample]) { 37 | fs::remove_dir_all(&CONFIG.input_dir).ok(); 38 | fs::remove_dir_all(&CONFIG.output_dir).ok(); 39 | fs::create_dir_all(&CONFIG.input_dir).expect("failed to create input dir"); 40 | 41 | for sample in samples { 42 | let path = CONFIG.input_dir.join(&sample.name); 43 | fs::write(path, &sample.bytes).expect("failed to save input file"); 44 | } 45 | } 46 | 47 | fn run_parsers() { 48 | Command::new("docker") 49 | .arg("compose") 50 | .arg("up") 51 | .current_dir(&CONFIG.parsers_dir) 52 | .stdout(Stdio::null()) 53 | .stderr(Stdio::null()) 54 | .spawn() 55 | .expect("failed to run parsers") 56 | .wait() 57 | .expect("failed to wait for parsers"); 58 | } 59 | 60 | fn collect_results(corpus: &mut Corpus, samples: Vec) -> Vec<(Vec, bool)> { 61 | samples 62 | .into_iter() 63 | .filter_map(|s| { 64 | let filename = { 65 | let hash = s.hash.to_string(); 66 | let dir = hash.split_at(2).0; 67 | format!("{dir}/{hash}.zip") 68 | }; 69 | let sample_path = CONFIG.samples_dir.join(&filename); 70 | if sample_path.exists() { 71 | return None; 72 | } 73 | let feat = Feature::par_read(&s.name); 74 | let interesting = corpus.is_feature_interesting(&feat); 75 | if interesting { 76 | fs::create_dir_all(sample_path.parent().unwrap()) 77 | .expect("failed to create data dir"); 78 | fs::rename(CONFIG.input_dir.join(&s.name), sample_path) 79 | .expect("failed to move input file"); 80 | let results_dir = CONFIG.results_dir.join(&filename); 81 | fs::create_dir_all(&results_dir).expect("failed to create results dir"); 82 | // First move small outputs in parallel with rayon. 83 | // Then compress large outputs with parallelized ZSTD. 84 | let large_outputs = CONFIG 85 | .parsers 86 | .par_iter() 87 | .filter_map(|parser| { 88 | let output_path = CONFIG.output_dir.join(parser).join(&s.name); 89 | let result_path = results_dir.join(parser); 90 | if output_path.is_dir() && du(&output_path) > 1024 * 1024 { 91 | // tar.zst if larger than 1 MiB 92 | Some((result_path.with_extension("tar.zst"), output_path)) 93 | } else if matches!(output_path.try_exists(), Ok(false)) { 94 | fs::write(&result_path, b"").expect(&format!( 95 | "failed to write error result to {}", 96 | result_path.display(), 97 | )); 98 | None 99 | } else { 100 | fs::rename(&output_path, &result_path).expect(&format!( 101 | "failed to move {} to {}", 102 | output_path.display(), 103 | result_path.display() 104 | )); 105 | None 106 | } 107 | }) 108 | .collect::>(); 109 | large_outputs.iter().for_each(archive_dir); 110 | corpus.insert_seed(Seed::new( 111 | s.input, 112 | s.hash, 113 | s.bytes.len(), 114 | feat, 115 | s.mutations, 116 | !large_outputs.is_empty(), 117 | )); 118 | } 119 | Some((s.ucb_handles, interesting)) 120 | }) 121 | .collect() 122 | } 123 | 124 | fn archive_dir((dest, src): &(impl AsRef, impl AsRef)) { 125 | let file = fs::File::create(dest).expect("failed to create result tar.zst"); 126 | let mut writer = BufWriter::new(file); 127 | let mut zstd = ZstdEncoder::new(&mut writer, 1).expect("failed to create ZSTD writer"); 128 | zstd.multithread(rayon::current_num_threads() as u32) 129 | .expect("failed to set multithread ZSTD"); 130 | { 131 | let mut tar = TarBuilder::new(&mut zstd); 132 | tar.append_dir_all("", src) 133 | .expect("failed to archive output"); 134 | tar.finish().expect("failed to finish TAR"); 135 | } 136 | zstd.finish().expect("failed to finish ZSTD"); 137 | writer.flush().expect("failed to flush output archive"); 138 | // remove here to avoid occupying I/O cache 139 | fs::remove_dir_all(src).expect("failed to remove output directory"); 140 | } 141 | 142 | fn du(path: impl AsRef) -> u64 { 143 | WalkDir::new(path) 144 | .into_iter() 145 | .filter_map(|entry| Some(entry.ok()?.metadata().ok()?.len())) 146 | .sum() 147 | } 148 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/feature.rs: -------------------------------------------------------------------------------- 1 | use crate::config::CONFIG; 2 | use fixedbitset::FixedBitSet; 3 | use std::ops::BitOrAssign; 4 | use std::path::Path; 5 | use std::sync::LazyLock; 6 | use zip_diff::hash::{read_parsing_result, ParsingResult}; 7 | 8 | #[derive(Clone, PartialEq, Eq, Hash)] 9 | pub struct Feature { 10 | pub ok: FixedBitSet, 11 | pub inconsistency: FixedBitSet, 12 | } 13 | 14 | pub static PAIR_LIST: LazyLock> = LazyLock::new(Feature::pair_list); 15 | 16 | impl Feature { 17 | pub fn new() -> Self { 18 | let n = CONFIG.parsers.len(); 19 | let ok = FixedBitSet::with_capacity(n); 20 | let inconsistency = FixedBitSet::with_capacity(n * (n - 1) / 2); 21 | Self { ok, inconsistency } 22 | } 23 | 24 | pub fn par_read(name: impl AsRef) -> Self { 25 | let mut feature = Self::new(); 26 | feature.apply_testcase(name, true); 27 | feature 28 | } 29 | 30 | pub fn apply_testcase(&mut self, name: impl AsRef, par: bool) { 31 | let results = CONFIG 32 | .parsers 33 | .iter() 34 | .map(|parser| read_parsing_result(CONFIG.output_dir.join(parser).join(&name), par)) 35 | .collect::>(); 36 | 37 | let mut p = 0; 38 | for (i, x) in results.iter().enumerate() { 39 | if matches!(x, ParsingResult::Ok(_)) { 40 | self.ok.insert(i); 41 | } 42 | for y in &results[..i] { 43 | if x.inconsistent_with(y) { 44 | self.inconsistency.insert(p); 45 | } 46 | p += 1; 47 | } 48 | } 49 | } 50 | 51 | pub fn is_covered_by(&self, by: &Self) -> bool { 52 | self.inconsistency.is_subset(&by.inconsistency) && self.ok.is_subset(&by.ok) 53 | } 54 | 55 | pub fn consistent_pairs(&self) -> Vec<&'static (String, String)> { 56 | self.inconsistency.zeroes().map(|i| &PAIR_LIST[i]).collect() 57 | } 58 | 59 | pub fn summary(&self) -> String { 60 | let ok_count = self.ok.count_ones(..); 61 | let incons_count = self.inconsistency.count_ones(..); 62 | format!("ok: {ok_count:2}, incons: {incons_count:4}") 63 | } 64 | 65 | fn pair_list() -> Vec<(String, String)> { 66 | let mut result = Vec::new(); 67 | for (i, x) in CONFIG.parsers.iter().enumerate() { 68 | for y in CONFIG.parsers.iter().take(i) { 69 | result.push((x.clone(), y.clone())); 70 | } 71 | } 72 | result 73 | } 74 | } 75 | 76 | impl BitOrAssign<&Feature> for Feature { 77 | fn bitor_assign(&mut self, rhs: &Feature) { 78 | self.ok |= &rhs.ok; 79 | self.inconsistency |= &rhs.inconsistency; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/generate.rs: -------------------------------------------------------------------------------- 1 | use crate::config::CONFIG; 2 | use rand::distributions::{DistString, Standard}; 3 | use rand::prelude::*; 4 | use zip_diff::fields::CompressionMethod; 5 | use zip_diff::zip::ZipArchive; 6 | 7 | pub fn init_corpus() -> Vec { 8 | let mut result = Vec::new(); 9 | 10 | let mut zip = ZipArchive::default(); 11 | zip.add_simple("a", b"a").unwrap(); 12 | zip.add_simple("b/c", b"c").unwrap(); 13 | zip.add_simple("b/d", b"d").unwrap(); 14 | zip.finalize().unwrap(); 15 | result.push(zip); 16 | 17 | let mut rng = thread_rng(); 18 | 19 | for _ in 0..CONFIG.batch_size { 20 | let mut zip = ZipArchive::default(); 21 | let count = rng.gen_range(0..5); 22 | for _ in 0..count { 23 | let name_len = rng.gen_range(0..5); 24 | let name = Standard.sample_string(&mut rng, name_len); 25 | let data_len = rng.gen_range(0..10); 26 | let mut data = Vec::with_capacity(data_len); 27 | data.resize_with(data_len, || rng.gen()); 28 | let compression = match rng.gen_range(0..16) { 29 | 0..8 => CompressionMethod::STORED, 30 | 8..12 => CompressionMethod::DEFLATED, 31 | 12 => CompressionMethod::BZIP2, 32 | 13 => CompressionMethod::ZSTD, 33 | 14 => CompressionMethod::LZMA, 34 | 15 => CompressionMethod::XZ, 35 | _ => unreachable!(), 36 | }; 37 | zip.add_file( 38 | &name, 39 | &data, 40 | compression, 41 | rng.gen_ratio(1, 5), 42 | rng.gen_ratio(1, 5), 43 | ) 44 | .unwrap(); 45 | } 46 | if rng.gen_ratio(1, 5) { 47 | zip.set_eocd(true).unwrap(); 48 | } 49 | zip.finalize().unwrap(); 50 | result.push(zip); 51 | } 52 | 53 | result 54 | } 55 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/main.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::expect_fun_call)] 2 | 3 | mod config; 4 | mod corpus; 5 | mod execute; 6 | mod feature; 7 | mod generate; 8 | mod mutation; 9 | mod rand_utils; 10 | mod stats; 11 | 12 | use binwrite::BinWrite; 13 | use config::CONFIG; 14 | use corpus::Corpus; 15 | use execute::execute; 16 | use mutation::Mutator; 17 | use rand::thread_rng; 18 | use stats::Stats; 19 | use std::fs::canonicalize; 20 | use std::process::Command; 21 | use std::time::Instant; 22 | use zip_diff::zip::ZipArchive; 23 | 24 | #[derive(Clone)] 25 | pub enum Input { 26 | Zip(Box), 27 | Bytes(Vec), 28 | } 29 | 30 | fn main() { 31 | let input_dir = canonicalize(&CONFIG.input_dir).expect("failed to canonicalize input dir"); 32 | let output_dir = canonicalize(&CONFIG.output_dir).expect("failed to canonicalize output dir"); 33 | let parser_prepare_status = Command::new(CONFIG.parsers_dir.join("prepare.sh")) 34 | .env("INPUT_DIR", input_dir) 35 | .env("OUTPUT_DIR", output_dir) 36 | .status() 37 | .expect("failed to execute prepare.sh"); 38 | assert!(parser_prepare_status.success(), "prepare.sh failed"); 39 | 40 | let mut mutator = Mutator::new(); 41 | let mut stats = Stats::new(); 42 | let mut corpus = Corpus::new(); 43 | let rng = &mut thread_rng(); 44 | 45 | let initial_samples = generate::init_corpus() 46 | .into_iter() 47 | .filter_map(|zip| { 48 | let input = if CONFIG.byte_mutation_only { 49 | let mut buf = Vec::new(); 50 | zip.write(&mut buf) 51 | .expect("failed to convert initial ZIP to bytes"); 52 | Input::Bytes(buf) 53 | } else { 54 | Input::Zip(Box::new(zip)) 55 | }; 56 | let sample = mutator.generate_sample(&input, &[], 0, rng); 57 | corpus.insert_hash(sample.hash).then_some(sample) 58 | }) 59 | .collect(); 60 | 61 | execute(&mut corpus, initial_samples); 62 | 63 | loop { 64 | println!( 65 | "inputs: {}, corpus size: {} ({} zips), sum: {}", 66 | stats.input_count(), 67 | corpus.len(), 68 | corpus.zip_count(), 69 | corpus.feature_sum_summary() 70 | ); 71 | corpus.construct_weights(); 72 | mutator.construct_ucb(); 73 | let (seed_indices, samples): (Vec<_>, Vec<_>) = std::iter::repeat(()) 74 | .filter_map(|_| { 75 | let (seed_index, seed) = corpus.select_seed(rng); 76 | let mutate_times = rand_utils::rand_len(rng); 77 | let sample = 78 | mutator.generate_sample(&seed.input, &seed.mutations, mutate_times, rng); 79 | corpus 80 | .insert_hash(sample.hash) 81 | .then_some((seed_index, sample)) 82 | }) 83 | .take(CONFIG.batch_size) 84 | .unzip(); 85 | for index in seed_indices { 86 | corpus.record_selection(index); 87 | } 88 | let ucb_results = execute(&mut corpus, samples); 89 | mutator.record_ucb(&ucb_results); 90 | stats.record_iteration(ucb_results.len(), &corpus, &mutator); 91 | stats.save(); 92 | if let Some(stop_at) = CONFIG.stop_at { 93 | if Instant::now() > stop_at { 94 | break; 95 | } 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/rand_utils.rs: -------------------------------------------------------------------------------- 1 | use crate::CONFIG; 2 | use num_traits::{NumCast, Saturating, Unsigned, Zero}; 3 | use rand::distributions::uniform::{SampleRange, SampleUniform}; 4 | use rand::distributions::{Standard, WeightedIndex}; 5 | use rand::prelude::*; 6 | use zip_diff::zip::ZipArchive; 7 | 8 | pub struct Ucb { 9 | scores: Vec, 10 | trials: Vec, 11 | weighted_index: Option>, 12 | } 13 | 14 | impl Ucb { 15 | pub fn new(len: usize) -> Self { 16 | Self { 17 | scores: vec![0.0; len], 18 | trials: vec![0.0; len], 19 | weighted_index: None, 20 | } 21 | } 22 | 23 | pub fn construct(&mut self) { 24 | for i in 0..self.scores.len() { 25 | // recent results are more important than old results 26 | self.scores[i] *= 0.995; 27 | self.trials[i] *= 0.995; 28 | } 29 | let double_ln_total_trial: f64 = 2.0 * self.trials.iter().sum::().max(1.0).ln(); 30 | let weights = self 31 | .scores 32 | .iter() 33 | .zip(self.trials.iter().map(|t| t.max(1.0))) 34 | .map(|(score, trial)| { 35 | let ucb = score / trial + (double_ln_total_trial / trial).sqrt(); 36 | (ucb * 5.0).exp() // softmax temperature 37 | }); 38 | self.weighted_index = if CONFIG.argmax_ucb { 39 | let mut max_weight = f64::NEG_INFINITY; 40 | for w in weights.clone() { 41 | if w > max_weight { 42 | max_weight = w; 43 | } 44 | } 45 | Some( 46 | WeightedIndex::new(weights.map(|w| { 47 | if w == max_weight { 48 | 1.0 49 | } else { 50 | 1e-6 // not zero to avoid loop when always fail to mutate 51 | } 52 | })) 53 | .unwrap(), 54 | ) 55 | } else { 56 | Some(WeightedIndex::new(weights).unwrap()) 57 | }; 58 | } 59 | 60 | pub fn sample(&self, rng: &mut R) -> usize { 61 | self.weighted_index 62 | .as_ref() 63 | .expect("need to construt before sampling") 64 | .sample(rng) 65 | } 66 | 67 | pub fn record(&mut self, id: usize, trial: f64, score: f64) { 68 | self.trials[id] += trial; 69 | self.scores[id] += score; 70 | self.weighted_index = None; 71 | } 72 | 73 | pub fn scores(&self) -> &[f64] { 74 | &self.scores 75 | } 76 | 77 | pub fn trials(&self) -> &[f64] { 78 | &self.trials 79 | } 80 | } 81 | 82 | #[derive(Clone, Copy, PartialEq, Eq)] 83 | pub enum HeaderLocation { 84 | Lfh, 85 | Cdh, 86 | Both, 87 | } 88 | 89 | impl HeaderLocation { 90 | pub fn lfh(self) -> bool { 91 | matches!(self, Self::Lfh | Self::Both) 92 | } 93 | 94 | pub fn cdh(self) -> bool { 95 | matches!(self, Self::Cdh | Self::Both) 96 | } 97 | } 98 | 99 | impl Distribution for Standard { 100 | fn sample(&self, rng: &mut R) -> HeaderLocation { 101 | let i = (0..5).choose(rng).unwrap(); 102 | match i { 103 | 0 => HeaderLocation::Lfh, 104 | 1 => HeaderLocation::Cdh, 105 | _ => HeaderLocation::Both, 106 | } 107 | } 108 | } 109 | 110 | pub fn rand_header(zip: &ZipArchive, rng: &mut R) -> Option<(usize, HeaderLocation)> { 111 | let loc = rng.gen(); 112 | 113 | let len = match loc { 114 | HeaderLocation::Lfh => zip.files.len(), 115 | HeaderLocation::Cdh => zip.cd.len(), 116 | HeaderLocation::Both => zip.files.len().min(zip.cd.len()), 117 | }; 118 | 119 | let index = (0..len).choose(rng)?; 120 | 121 | Some((index, loc)) 122 | } 123 | 124 | /// returns a random number in 1..=32, returns x with possibility 2^-x 125 | pub fn rand_len(rng: &mut R) -> usize { 126 | rng.next_u64().trailing_zeros() as usize + 1 127 | } 128 | 129 | pub fn mutate_len(size: &mut N, rng: &mut R) 130 | where 131 | R: RngCore, 132 | N: Copy + Saturating + Zero + Unsigned + NumCast, 133 | { 134 | let delta = N::from(rand_len(rng)).unwrap(); 135 | if size.is_zero() || rng.gen() { 136 | *size = size.saturating_add(delta); 137 | } else { 138 | *size = size.saturating_sub(delta); 139 | } 140 | } 141 | 142 | pub fn rand_range(rng: &mut G, range: R) -> Option<(T, T)> 143 | where 144 | G: Rng, 145 | T: SampleUniform + Ord, 146 | R: SampleRange + Clone, 147 | { 148 | if range.is_empty() { 149 | return None; 150 | } 151 | let x = rng.gen_range(range.clone()); 152 | let y = rng.gen_range(range); 153 | if x < y { 154 | Some((x, y)) 155 | } else { 156 | Some((y, x)) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /zip-diff/src/fuzz/stats.rs: -------------------------------------------------------------------------------- 1 | use crate::config::CONFIG; 2 | use crate::corpus::Corpus; 3 | use crate::mutation::{MutationStats, Mutator}; 4 | use serde::Serialize; 5 | use std::collections::BTreeMap; 6 | use std::fs::File; 7 | use std::time::Instant; 8 | 9 | #[derive(Serialize)] 10 | struct Iteration { 11 | input_count: usize, 12 | corpus_size: usize, 13 | incons_count: usize, 14 | seconds_used: f64, 15 | } 16 | 17 | #[derive(Serialize)] 18 | struct SeedStat { 19 | hash: String, 20 | mutations: Vec<&'static str>, 21 | ok_count: usize, 22 | incons_count: usize, 23 | selection_count: usize, 24 | } 25 | 26 | #[derive(Serialize)] 27 | pub struct Stats { 28 | #[serde(skip)] 29 | start_at: Instant, 30 | /// total number of generated inputs 31 | input_count: usize, 32 | /// hash of best seeds 33 | best_seeds: Vec, 34 | /// map from parser pair to best seed hash 35 | best_seed_map: BTreeMap<&'static String, BTreeMap<&'static String, String>>, 36 | /// fuzzing iteration history 37 | iterations: Vec, 38 | /// parser pairs that are consistent in the test cases 39 | consistent_pairs: Vec<&'static (String, String)>, 40 | /// Mutation trials 41 | mutations: Option, 42 | // ablation configs 43 | argmax_ucb: bool, 44 | byte_mutation_only: bool, 45 | } 46 | 47 | impl Stats { 48 | pub fn new() -> Self { 49 | Self { 50 | start_at: Instant::now(), 51 | input_count: 0, 52 | best_seeds: Vec::new(), 53 | best_seed_map: BTreeMap::new(), 54 | iterations: Vec::new(), 55 | consistent_pairs: Vec::new(), 56 | mutations: None, 57 | argmax_ucb: CONFIG.argmax_ucb, 58 | byte_mutation_only: CONFIG.byte_mutation_only, 59 | } 60 | } 61 | 62 | pub fn record_iteration(&mut self, new_input_count: usize, corpus: &Corpus, mutator: &Mutator) { 63 | self.input_count += new_input_count; 64 | let mut best_seeds = Vec::new(); 65 | self.best_seed_map = BTreeMap::new(); 66 | for (a, b, seed) in corpus.best_seeds() { 67 | self.best_seed_map 68 | .entry(a) 69 | .or_default() 70 | .insert(b, seed.hash.to_string()); 71 | best_seeds.push(seed); 72 | } 73 | best_seeds.sort_unstable_by_key(|seed| seed.hash.as_bytes()); 74 | best_seeds.dedup_by_key(|seed| &seed.hash); 75 | self.best_seeds = best_seeds 76 | .into_iter() 77 | .map(|seed| SeedStat { 78 | hash: seed.hash.to_string(), 79 | mutations: seed.mutations.clone(), 80 | ok_count: seed.feat.ok.count_ones(..), 81 | incons_count: seed.feat.inconsistency.count_ones(..), 82 | selection_count: seed.selection_count, 83 | }) 84 | .collect(); 85 | self.iterations.push(Iteration { 86 | input_count: self.input_count, 87 | corpus_size: corpus.len(), 88 | incons_count: corpus.incons_count(), 89 | seconds_used: self.start_at.elapsed().as_secs_f64(), 90 | }); 91 | self.consistent_pairs = corpus.consistent_pairs(); 92 | self.mutations = Some(mutator.stats()); 93 | } 94 | 95 | pub fn input_count(&self) -> usize { 96 | self.input_count 97 | } 98 | 99 | pub fn save(&self) { 100 | let file = File::create(&CONFIG.stats_file).expect("failed to create stats file"); 101 | serde_json::to_writer_pretty(file, self).expect("failed to write stats"); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /zip-diff/src/hash.rs: -------------------------------------------------------------------------------- 1 | use blake3::{Hash, Hasher}; 2 | use std::path::Path; 3 | 4 | #[derive(Clone, Copy)] 5 | pub enum ParsingResult { 6 | Ok(Hash), 7 | Err, 8 | } 9 | 10 | impl ParsingResult { 11 | pub fn inconsistent_with(&self, rhs: &Self) -> bool { 12 | match (self, rhs) { 13 | (ParsingResult::Ok(lhs), ParsingResult::Ok(rhs)) => lhs != rhs, 14 | _ => false, 15 | } 16 | } 17 | } 18 | 19 | pub fn read_parsing_result(path: impl AsRef, par: bool) -> ParsingResult { 20 | let path = path.as_ref(); 21 | if path.is_dir() { 22 | ParsingResult::Ok(dirhash(path, par).unwrap_or(Hash::from_bytes(Default::default()))) 23 | } else { 24 | ParsingResult::Err 25 | } 26 | } 27 | 28 | // Returns `None` for empty directory 29 | fn dirhash(path: impl AsRef, par: bool) -> Option { 30 | let path = path.as_ref(); 31 | let path_display = path.display(); 32 | let mut hasher = Hasher::new(); 33 | 34 | if path.is_symlink() { 35 | hasher.update(b"L"); 36 | hasher.update( 37 | &path 38 | .read_link() 39 | .unwrap_or_else(|_| panic!("failed to read link {path_display}")) 40 | .into_os_string() 41 | .into_encoded_bytes(), 42 | ); 43 | } else if path.is_file() { 44 | hasher.update(b"F"); 45 | if par { 46 | hasher.update_mmap_rayon(path) 47 | } else { 48 | hasher.update_mmap(path) 49 | } 50 | .unwrap_or_else(|_| panic!("failed to read file {path_display}")); 51 | } else if path.is_dir() { 52 | hasher.update(b"D"); 53 | let mut children = path 54 | .read_dir() 55 | .unwrap_or_else(|_| panic!("failed to read dir {path_display}")) 56 | .filter_map(|entry| { 57 | let entry = 58 | entry.unwrap_or_else(|_| panic!("failed to read dir entry in {path_display}")); 59 | let entry_path = entry.path(); 60 | let mut hasher = Hasher::new(); 61 | let name = entry.file_name().into_encoded_bytes(); 62 | if name.iter().all(|x| { 63 | x.is_ascii_alphanumeric() || matches!(x, b'.' | b'_' | b'-' | b'[' | b']') 64 | }) { 65 | hasher.update(b"N"); 66 | hasher.update(&name); 67 | } else { 68 | // treat all special file names as the same 69 | hasher.update(b"S"); 70 | } 71 | hasher.update( 72 | dirhash(entry_path, par)? /* ignore empty dir */ 73 | .as_bytes(), 74 | ); 75 | Some(hasher.finalize().into()) 76 | }) 77 | .collect::>(); 78 | if children.is_empty() { 79 | return None; 80 | } 81 | children.sort_unstable(); 82 | for child in children { 83 | hasher.update(&child); 84 | } 85 | } else { 86 | panic!("file does not exist, permission error, or unknown file type: {path_display}"); 87 | } 88 | 89 | Some(hasher.finalize()) 90 | } 91 | -------------------------------------------------------------------------------- /zip-diff/src/lfh.rs: -------------------------------------------------------------------------------- 1 | use crate::extra::{ExtraField, Zip64ExtendedInfo}; 2 | use crate::fields::*; 3 | use crate::utils::{binwrite_transform, BinCount}; 4 | use anyhow::{bail, Context, Result}; 5 | use binwrite::BinWrite; 6 | use educe::Educe; 7 | 8 | #[derive(BinWrite, Clone, Educe)] 9 | #[educe(Debug, Default)] 10 | pub struct LocalFileHeader { 11 | #[educe(Default = Self::SIGNATURE)] 12 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 13 | pub signature: u32, 14 | #[educe(Default = 20)] 15 | pub version_needed: u16, 16 | #[binwrite(with(binwrite_transform))] 17 | pub general_purpose_flag: GeneralPurposeFlag, 18 | #[binwrite(with(binwrite_transform))] 19 | pub compression_method: CompressionMethod, 20 | pub last_mod: DosDateTime, 21 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))] 22 | pub crc32: u32, 23 | pub compressed_size: u32, 24 | pub uncompressed_size: u32, 25 | pub file_name_length: u16, 26 | pub extra_field_length: u16, 27 | #[educe(Debug(method(crate::utils::fmt_utf8)))] 28 | pub file_name: Vec, 29 | pub extra_fields: Vec, 30 | /// only one of `extra_fields` and `extra_fields_raw` can be set 31 | #[educe(Debug(method(crate::utils::fmt_hex)))] 32 | pub extra_fields_raw: Vec, 33 | 34 | #[binwrite(ignore)] 35 | pub zip64: Zip64ExtendedInfo, 36 | #[binwrite(ignore)] 37 | pub keep_empty_zip64: bool, 38 | } 39 | 40 | impl LocalFileHeader { 41 | pub const SIGNATURE: u32 = 0x04034b50; 42 | 43 | /// Set LFH field and ZIP64 field according to size 44 | pub fn set_compressed_size(&mut self, size: usize, force_zip64: bool) { 45 | if !force_zip64 { 46 | if let Ok(size) = size.try_into() { 47 | self.compressed_size = size; 48 | self.zip64.compressed_size = None; 49 | return; 50 | } 51 | } 52 | self.compressed_size = u32::MAX; 53 | self.zip64.compressed_size = Some(size as u64); 54 | } 55 | 56 | /// Set LFH field and ZIP64 field according to size 57 | pub fn set_uncompressed_size(&mut self, size: usize, force_zip64: bool) { 58 | if !force_zip64 { 59 | if let Ok(size) = size.try_into() { 60 | self.uncompressed_size = size; 61 | self.zip64.original_size = None; 62 | return; 63 | } 64 | } 65 | self.uncompressed_size = u32::MAX; 66 | self.zip64.original_size = Some(size as u64); 67 | } 68 | 69 | pub fn set_file_name(&mut self, file_name: &str) -> Result<()> { 70 | file_name.as_bytes().clone_into(&mut self.file_name); 71 | self.file_name_length = self.file_name.len().try_into()?; 72 | Ok(()) 73 | } 74 | 75 | /// Finalize extra fields, add ZIP64 field 76 | pub fn finalize(&mut self) -> Result<()> { 77 | if self.keep_empty_zip64 || !self.zip64.is_empty() { 78 | self.extra_fields.push(ExtraField { 79 | header_id: 0, 80 | size: 0, 81 | data: Box::new(self.zip64.clone()), 82 | }); 83 | } 84 | 85 | if !self.extra_fields.is_empty() && !self.extra_fields_raw.is_empty() { 86 | bail!("extra_fields and extra_fields_raw cannot be set at the same time"); 87 | } 88 | 89 | if self.extra_fields.is_empty() { 90 | self.extra_field_length = self 91 | .extra_fields_raw 92 | .len() 93 | .try_into() 94 | .context("Extra fields too long")?; 95 | } else { 96 | for field in &mut self.extra_fields { 97 | field.finalize()?; 98 | } 99 | 100 | self.extra_field_length = self 101 | .extra_fields 102 | .byte_count() 103 | .context("Failed to count extra fields")? 104 | .try_into() 105 | .context("Extra fields too long")?; 106 | } 107 | 108 | Ok(()) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /zip-diff/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod cdh; 2 | pub mod compress; 3 | pub mod dd; 4 | pub mod eocd; 5 | pub mod extra; 6 | pub mod fields; 7 | pub mod hash; 8 | pub mod lfh; 9 | pub mod utils; 10 | pub mod zip; 11 | -------------------------------------------------------------------------------- /zip-diff/src/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::cdh::CentralDirectoryHeader; 2 | use crate::compress::decompress; 3 | use crate::fields::CompressionMethod; 4 | use crate::zip::FileEntry; 5 | use anyhow::{anyhow, Context, Result}; 6 | use binwrite::{BinWrite, WriterOption}; 7 | use std::fmt::{self, Debug, Formatter}; 8 | use std::io::{self, Write}; 9 | 10 | pub fn binwrite_option( 11 | option: &Option, 12 | writer: &mut W, 13 | options: &WriterOption, 14 | ) -> io::Result<()> 15 | where 16 | W: Write, 17 | T: BinWrite, 18 | { 19 | if let Some(val) = option { 20 | val.write_options(writer, options)?; 21 | } 22 | Ok(()) 23 | } 24 | 25 | pub trait BinWriteTransform { 26 | type Type: BinWrite; 27 | 28 | fn binwrite_transform(&self) -> io::Result; 29 | } 30 | 31 | pub fn binwrite_transform(var: &T, writer: &mut W, options: &WriterOption) -> io::Result<()> 32 | where 33 | W: Write, 34 | T: BinWriteTransform, 35 | { 36 | var.binwrite_transform()?.write_options(writer, options) 37 | } 38 | 39 | pub trait BinCount { 40 | /// Count how many bytes would be written via `BinWrite`. 41 | fn byte_count(&self) -> Result; 42 | } 43 | 44 | impl BinCount for T { 45 | fn byte_count(&self) -> Result { 46 | let mut counter = WriteCounter::new(); 47 | self.write(&mut counter)?; 48 | Ok(counter.count) 49 | } 50 | } 51 | 52 | struct WriteCounter { 53 | count: usize, 54 | } 55 | 56 | impl WriteCounter { 57 | fn new() -> Self { 58 | WriteCounter { count: 0 } 59 | } 60 | } 61 | 62 | impl Write for WriteCounter { 63 | fn write(&mut self, buf: &[u8]) -> io::Result { 64 | self.count += buf.len(); 65 | Ok(buf.len()) 66 | } 67 | 68 | fn flush(&mut self) -> io::Result<()> { 69 | Ok(()) 70 | } 71 | } 72 | 73 | pub fn find_file(iter: I, file_name: &str) -> Result<(usize, T)> 74 | where 75 | T: GetFileName, 76 | I: IntoIterator, 77 | { 78 | iter.into_iter() 79 | .enumerate() 80 | .find(|(_, f)| f.get_file_name() == file_name.as_bytes()) 81 | .context(format!("Failed to find {}", file_name)) 82 | } 83 | 84 | pub trait GetFileName { 85 | fn get_file_name(&self) -> &[u8]; 86 | } 87 | 88 | impl GetFileName for FileEntry { 89 | fn get_file_name(&self) -> &[u8] { 90 | &self.lfh.file_name 91 | } 92 | } 93 | 94 | impl GetFileName for &FileEntry { 95 | fn get_file_name(&self) -> &[u8] { 96 | &self.lfh.file_name 97 | } 98 | } 99 | 100 | impl GetFileName for &mut FileEntry { 101 | fn get_file_name(&self) -> &[u8] { 102 | &self.lfh.file_name 103 | } 104 | } 105 | 106 | impl GetFileName for &mut CentralDirectoryHeader { 107 | fn get_file_name(&self) -> &[u8] { 108 | &self.file_name 109 | } 110 | } 111 | 112 | pub fn align_entry_size(entries: &mut [&mut FileEntry], padding: u8) -> Result<()> { 113 | for entry in entries.iter_mut() { 114 | entry.data = decompress(entry.lfh.compression_method, &entry.data)?; 115 | } 116 | 117 | let max_len = entries 118 | .iter() 119 | .map(|entry| entry.data.len()) 120 | .max() 121 | .ok_or(anyhow!("no entry provided"))?; 122 | 123 | for entry in entries { 124 | entry.data.resize(max_len, padding); 125 | entry.lfh.compressed_size = max_len as u32; 126 | entry.lfh.uncompressed_size = max_len as u32; 127 | entry.lfh.compression_method = CompressionMethod::STORED; 128 | entry.lfh.crc32 = crc32fast::hash(&entry.data); 129 | } 130 | 131 | Ok(()) 132 | } 133 | 134 | pub fn fmt_utf8(b: &[u8], f: &mut Formatter) -> fmt::Result { 135 | std::str::from_utf8(b).map_err(|_| fmt::Error)?.fmt(f) 136 | } 137 | 138 | pub fn fmt_hex(b: &[u8], f: &mut Formatter) -> fmt::Result { 139 | for x in b { 140 | write!(f, "{x:02x} ")?; 141 | } 142 | Ok(()) 143 | } 144 | 145 | // reference: https://github.com/shuax/custom_crc32 146 | pub fn crc32_patch(data: &[u8], target: u32) -> u32 { 147 | const fn crc32_rev(byte: u32) -> u32 { 148 | const POLY: u32 = 0xedb88320; 149 | let mut x = byte << 24; 150 | let mut i = 0; 151 | while i < 8 { 152 | if x & 0x80000000 != 0 { 153 | x = ((x ^ POLY) << 1) | 1; 154 | } else { 155 | x <<= 1; 156 | } 157 | i += 1; 158 | } 159 | x 160 | } 161 | 162 | let current = !crc32fast::hash(data); 163 | let mut result = !target; 164 | for i in 0..4 { 165 | result = (result << 8) ^ crc32_rev(result >> 24) ^ ((current >> ((3 - i) * 8)) & 0xff); 166 | } 167 | result 168 | } 169 | -------------------------------------------------------------------------------- /zip-diff/src/zip.rs: -------------------------------------------------------------------------------- 1 | use crate::cdh::CentralDirectoryHeader; 2 | use crate::compress::compress; 3 | use crate::dd::{DataDescriptor, U32or64}; 4 | use crate::eocd::{ 5 | EndOfCentralDirectoryRecord, Zip64EndOfCentralDirectoryLocator, 6 | Zip64EndOfCentralDirectoryRecord, 7 | }; 8 | use crate::fields::{CompressionMethod, GeneralPurposeFlag}; 9 | use crate::lfh::LocalFileHeader; 10 | use crate::utils::{binwrite_option, BinCount}; 11 | use anyhow::{Context, Result}; 12 | use binwrite::BinWrite; 13 | use educe::Educe; 14 | use std::fmt::{self, Formatter}; 15 | 16 | #[derive(BinWrite, Clone, Default, Debug)] 17 | pub struct ZipArchive { 18 | pub files: Vec, 19 | pub cd: Vec, 20 | #[binwrite(with(binwrite_option))] 21 | pub zip64_eocdr: Option, 22 | #[binwrite(with(binwrite_option))] 23 | pub zip64_eocdl: Option, 24 | pub eocdr: EndOfCentralDirectoryRecord, 25 | } 26 | 27 | #[derive(BinWrite, Clone, Default, Educe)] 28 | #[educe(Debug)] 29 | pub struct FileEntry { 30 | pub lfh: LocalFileHeader, 31 | #[educe(Debug(method = fmt_len))] 32 | pub data: Vec, 33 | #[binwrite(with(binwrite_option))] 34 | pub dd: Option, 35 | } 36 | 37 | fn fmt_len(v: &[T], f: &mut Formatter<'_>) -> fmt::Result { 38 | write!(f, "Vec<{}> ({})", std::any::type_name::(), v.len()) 39 | } 40 | 41 | impl FileEntry { 42 | pub fn new( 43 | name: &str, 44 | uncompressed_data: &[u8], 45 | compression_method: CompressionMethod, 46 | force_zip64: bool, 47 | use_dd: bool, 48 | ) -> Result { 49 | let compressed_data = compress(compression_method, uncompressed_data)?; 50 | let crc32 = crc32fast::hash(uncompressed_data); 51 | 52 | let mut lfh = LocalFileHeader { 53 | compression_method, 54 | file_name_length: name.len().try_into().context("File name too long")?, 55 | file_name: name.into(), 56 | ..Default::default() 57 | }; 58 | 59 | // When data descriptor is used, also set these fields for CDH. 60 | lfh.crc32 = crc32; 61 | lfh.set_compressed_size(compressed_data.len(), force_zip64); 62 | lfh.set_uncompressed_size(uncompressed_data.len(), force_zip64); 63 | 64 | let dd = if use_dd { 65 | lfh.general_purpose_flag 66 | .insert(GeneralPurposeFlag::DataDescriptor); 67 | 68 | let compressed_size = if let Some(size) = lfh.zip64.compressed_size { 69 | lfh.keep_empty_zip64 = true; 70 | U32or64::U64(size) 71 | } else { 72 | U32or64::U32(lfh.compressed_size) 73 | }; 74 | 75 | let uncompressed_size = if let Some(size) = lfh.zip64.original_size { 76 | lfh.keep_empty_zip64 = true; 77 | U32or64::U64(size) 78 | } else { 79 | U32or64::U32(lfh.uncompressed_size) 80 | }; 81 | 82 | Some(DataDescriptor { 83 | signature: Some(DataDescriptor::SIGNATURE), 84 | crc32, 85 | compressed_size, 86 | uncompressed_size, 87 | }) 88 | } else { 89 | None 90 | }; 91 | 92 | Ok(Self { 93 | lfh, 94 | data: compressed_data, 95 | dd, 96 | }) 97 | } 98 | 99 | pub fn push_into_cd( 100 | &self, 101 | cd: &mut Vec, 102 | offset: &mut usize, 103 | ) -> Result<()> { 104 | let mut cdh: CentralDirectoryHeader = self.into(); 105 | cdh.set_offset(*offset, false); 106 | cdh.finalize()?; 107 | cd.push(cdh); 108 | *offset += self.byte_count()?; 109 | Ok(()) 110 | } 111 | } 112 | 113 | impl ZipArchive { 114 | pub fn add_file( 115 | &mut self, 116 | name: &str, 117 | uncompressed_data: &[u8], 118 | compression_method: CompressionMethod, 119 | force_zip64: bool, 120 | use_dd: bool, 121 | ) -> Result<()> { 122 | self.files.push(FileEntry::new( 123 | name, 124 | uncompressed_data, 125 | compression_method, 126 | force_zip64, 127 | use_dd, 128 | )?); 129 | Ok(()) 130 | } 131 | 132 | pub fn add_simple(&mut self, name: &str, data: &[u8]) -> Result<()> { 133 | self.add_file(name, data, CompressionMethod::STORED, false, false) 134 | } 135 | 136 | pub fn set_eocd(&mut self, force_zip64: bool) -> Result<()> { 137 | let mut offset = 0; 138 | if let Some(last_cdh) = self.cd.last() { 139 | offset += last_cdh.relative_header_offset as usize; 140 | } 141 | if let Some(last_file) = self.files.last() { 142 | offset += last_file.byte_count()?; 143 | } 144 | 145 | let mut zip64_eocdr = Zip64EndOfCentralDirectoryRecord { 146 | this_disk_cdh_count: self.cd.len() as u64, 147 | total_cdh_count: self.cd.len() as u64, 148 | size_of_cd: self.cd.byte_count()? as u64, 149 | offset_of_cd_wrt_starting_disk: offset as u64, 150 | ..Default::default() 151 | }; 152 | 153 | if let (false, Ok(eocdr)) = ( 154 | force_zip64, 155 | TryInto::::try_into(&zip64_eocdr), 156 | ) { 157 | self.eocdr = eocdr; 158 | self.zip64_eocdl = None; 159 | self.zip64_eocdr = None; 160 | } else { 161 | zip64_eocdr.finalize()?; 162 | self.eocdr = EndOfCentralDirectoryRecord::all_ff(); 163 | self.zip64_eocdl = Some(Zip64EndOfCentralDirectoryLocator::from_offset( 164 | offset as u64 + zip64_eocdr.size_of_cd, 165 | )); 166 | self.zip64_eocdr = Some(zip64_eocdr); 167 | } 168 | 169 | Ok(()) 170 | } 171 | 172 | pub fn finalize(&mut self) -> Result<()> { 173 | self.cd.clear(); 174 | 175 | let mut offset: usize = 0; 176 | 177 | for file in &mut self.files { 178 | let mut cdh: CentralDirectoryHeader = (&*file).into(); 179 | cdh.set_offset(offset, false); 180 | cdh.finalize()?; 181 | self.cd.push(cdh); 182 | file.lfh.finalize()?; 183 | offset += file.byte_count().context("Failed to count file bytes")?; 184 | } 185 | 186 | self.set_eocd(false) 187 | } 188 | 189 | pub fn set_offsets(&mut self, base: usize) -> Result<()> { 190 | let mut offset: usize = base; 191 | 192 | for (file, cdh) in self.files.iter_mut().zip(self.cd.iter_mut()) { 193 | cdh.set_offset(offset, false); 194 | cdh.finalize()?; 195 | file.lfh.finalize()?; 196 | offset += file.byte_count().context("Failed to count file bytes")?; 197 | } 198 | 199 | self.set_eocd(false) 200 | } 201 | } 202 | --------------------------------------------------------------------------------