├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── constructions
├── .gitignore
└── inconsistency-types.json
├── parsers
├── .gitignore
├── 01-infozip
│ ├── Dockerfile
│ └── unzip
├── 02-7zip
│ ├── Dockerfile
│ └── unzip
├── 03-p7zip
│ ├── Dockerfile
│ └── unzip
├── 04-winrar
│ ├── Dockerfile
│ ├── run.sh
│ └── unzip
├── 05-ada-zip-ada
│ ├── Dockerfile
│ └── unzip
├── 06-c-go-unarr
│ ├── Dockerfile
│ └── unzip
├── 07-c-libarchive
│ ├── Dockerfile
│ └── unzip
├── 08-c-libzip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.php
├── 09-c-minizip
│ ├── Dockerfile
│ └── unzip
├── 10-c-minizip-ng
│ ├── Dockerfile
│ └── unzip
├── 11-c-zip
│ ├── Dockerfile
│ └── unzip.c
├── 12-c-zziplib
│ ├── Dockerfile
│ └── unzip
├── 13-csharp-dotnetzip
│ ├── Dockerfile
│ └── unzip.cs
├── 14-csharp-sharpcompress
│ ├── Dockerfile
│ └── unzip.cs
├── 15-csharp-sharpziplib
│ ├── Dockerfile
│ └── unzip.cs
├── 16-csharp-system-io-compression
│ ├── Dockerfile
│ └── unzip.cs
├── 17-cpp-android-libziparchive
│ ├── Dockerfile
│ └── unzip.cpp
├── 18-cpp-poco
│ ├── Dockerfile
│ └── unzip.cpp
├── 19-d-std-zip
│ ├── Dockerfile
│ └── unzip.d
├── 20-dart-archive
│ ├── Dockerfile
│ ├── pubspec.yaml
│ └── unzip.dart
├── 21-erlang-zip
│ ├── Dockerfile
│ └── unzip
├── 22-go-archive-zip
│ ├── Dockerfile
│ └── unzip.go
├── 23-haskell-zip
│ ├── Dockerfile
│ └── unzip
├── 24-haskell-zip-archive
│ ├── Dockerfile
│ └── unzip
├── 25-java-commons-compress-stream
│ ├── Dockerfile
│ ├── unzip.sh
│ └── unzip
│ │ ├── .gitignore
│ │ ├── build.gradle
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── unzip
│ │ └── App.java
├── 26-java-commons-compress-zipfile
│ ├── Dockerfile
│ ├── unzip.sh
│ └── unzip
│ │ ├── .gitignore
│ │ ├── build.gradle
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── unzip
│ │ └── App.java
├── 27-java-util-zip-zipfile
│ ├── Dockerfile
│ ├── unzip.sh
│ └── unzip
│ │ ├── .gitignore
│ │ ├── build.gradle
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── unzip
│ │ └── App.java
├── 28-java-util-zip-zipinputstream
│ ├── Dockerfile
│ ├── unzip.sh
│ └── unzip
│ │ ├── .gitignore
│ │ ├── build.gradle
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── unzip
│ │ └── App.java
├── 29-java-zip4j-zipfile
│ ├── Dockerfile
│ ├── unzip.sh
│ └── unzip
│ │ ├── .gitignore
│ │ ├── build.gradle
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── unzip
│ │ └── App.java
├── 30-java-zip4j-zipinputstream
│ ├── Dockerfile
│ ├── unzip.sh
│ └── unzip
│ │ ├── .gitignore
│ │ ├── build.gradle
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── unzip
│ │ └── App.java
├── 31-nodejs-ronomon-zip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 32-nodejs-adm-zip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 33-nodejs-decompress-zip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 34-nodejs-jszip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 35-nodejs-node-stream-zip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 36-nodejs-unzipper-extract
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 37-nodejs-unzipper-open
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 38-nodejs-yauzl-v2
│ ├── Dockerfile
│ └── unzip
├── 39-nodejs-yauzl-v3
│ ├── Dockerfile
│ └── unzip
├── 40-nodejs-zipjs
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.js
├── 41-php-phardata
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.php
├── 42-php-phpzip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.php
├── 43-pascal-paszlib
│ ├── Dockerfile
│ └── unzip.pp
├── 44-perl-archive-zip
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.pl
├── 45-python-zipfile
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.py
├── 46-racket-file-unzip
│ ├── Dockerfile
│ └── unzip.rkt
├── 47-ruby-rubyzip-file
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.rb
├── 48-ruby-rubyzip-inputstream
│ ├── Dockerfile
│ ├── unzip
│ └── unzip.rb
├── 49-rust-zip
│ ├── Dockerfile
│ └── unzip.rs
├── 50-swift-zipfoundation
│ ├── Dockerfile
│ └── src
│ │ ├── Package.swift
│ │ └── Sources
│ │ └── main.swift
├── README.md
├── parallel-unzip-all.sh
├── parsers.json
├── prepare.sh
├── testcase.sh
└── unzip-all.sh
├── tools
├── ablation-study.sh
├── fuzz-stats.py
├── inconsistency-table.py
├── parsers-to-table.py
├── prepare.sh
└── run-parsers.sh
└── zip-diff
├── .gitignore
├── Cargo.lock
├── Cargo.toml
└── src
├── cdh.rs
├── compress.rs
├── construction
├── a.rs
├── a
│ ├── a1.rs
│ ├── a2.rs
│ ├── a3.rs
│ ├── a4.rs
│ └── a5.rs
├── b.rs
├── b
│ ├── b1.rs
│ ├── b2.rs
│ ├── b3.rs
│ └── b4.rs
├── c.rs
├── c
│ ├── c1.rs
│ ├── c2.rs
│ ├── c3.rs
│ ├── c4.rs
│ └── c5.rs
├── main.rs
└── utils.rs
├── count
└── main.rs
├── dd.rs
├── eocd.rs
├── extra.rs
├── fields.rs
├── fuzz
├── config.rs
├── corpus.rs
├── execute.rs
├── feature.rs
├── generate.rs
├── main.rs
├── mutation.rs
├── rand_utils.rs
└── stats.rs
├── hash.rs
├── lfh.rs
├── lib.rs
├── utils.rs
└── zip.rs
/.gitignore:
--------------------------------------------------------------------------------
1 | /evaluation
2 | /constructions/*/
3 | *.json
4 | *.pdf
5 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | title: ZipDiff
3 | type: software
4 | authors:
5 | - given-names: Yufan
6 | family-names: You
7 | email: ouuansteve@gmail.com
8 | affiliation: Tsinghua University
9 | orcid: 'https://orcid.org/0009-0004-2975-2107'
10 | - given-names: Jianjun
11 | family-names: Chen
12 | email: jianjun@tsinghua.edu.cn
13 | affiliation: Tsinghua University
14 | orcid: 'https://orcid.org/0000-0001-7511-1117'
15 | - given-names: Qi
16 | family-names: Wang
17 | affiliation: Tsinghua University
18 | orcid: 'https://orcid.org/0009-0008-5707-3223'
19 | - given-names: Haixin
20 | family-names: Duan
21 | affiliation: Tsinghua University
22 | orcid: 'https://orcid.org/0000-0003-0083-733X'
23 | doi: 10.5281/zenodo.15526863
24 | repository-code: 'https://github.com/ouuan/ZipDiff'
25 | repository-artifact: 'https://doi.org/10.5281/zenodo.15526863'
26 | abstract: A differential fuzzer for ZIP parsers.
27 | keywords:
28 | - differential fuzzer
29 | - zip
30 | license: Apache-2.0
31 | preferred-citation:
32 | type: conference-paper
33 | authors:
34 | - given-names: Yufan
35 | family-names: You
36 | email: ouuansteve@gmail.com
37 | affiliation: Tsinghua University
38 | orcid: 'https://orcid.org/0009-0004-2975-2107'
39 | - given-names: Jianjun
40 | family-names: Chen
41 | email: jianjun@tsinghua.edu.cn
42 | affiliation: Tsinghua University
43 | orcid: 'https://orcid.org/0000-0001-7511-1117'
44 | - given-names: Qi
45 | family-names: Wang
46 | affiliation: Tsinghua University
47 | orcid: 'https://orcid.org/0009-0008-5707-3223'
48 | - given-names: Haixin
49 | family-names: Duan
50 | affiliation: Tsinghua University
51 | orcid: 'https://orcid.org/0000-0003-0083-733X'
52 | year: 2025
53 | month: 8
54 | title: "My ZIP isn't your ZIP: Identifying and Exploiting Semantic Gaps Between ZIP Parsers"
55 | conference:
56 | name: "34th USENIX Security Symposium"
57 | location: "Seattle, WA"
58 | date-start: 2025-08-13
59 | date-end: 2025-08-15
60 | publisher:
61 | name: "USENIX Association"
62 | isbn: "978-1-939133-52-6"
63 | url: "https://www.usenix.org/conference/usenixsecurity25/presentation/you"
64 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ZipDiff
2 |
3 | A differential fuzzer for ZIP parsers.
4 |
5 | This is the source code for the USENIX Security '25 paper [My ZIP isn’t your ZIP: Identifying and Exploiting Semantic Gaps Between ZIP Parsers](https://www.usenix.org/conference/usenixsecurity25/presentation/you).
6 |
7 | Zotero permanent link and Docker image files: https://doi.org/10.5281/zenodo.15526863
8 |
9 | This artifact has received the Results Reproduced badge: [Artifact appendix](https://secartifacts.github.io/usenixsec2025/appendix-files/sec25cycle2ae-final28.pdf)
10 |
11 |
12 |
13 |
14 |
15 | ## Environment
16 |
17 | - Linux
18 | - [Rust](https://www.rust-lang.org/tools/install) (tested on 1.86, any version is fine as long as the code compiles successfully)
19 | - [Docker](https://docs.docker.com/engine/install/) and [Docker Compose plugin](https://docs.docker.com/compose/install/linux/)
20 | - Python 3 with `numpy` and `matplotlib` to generate tables and figures
21 | - The full fuzzing process is resource-intensive, as it runs many ZIP parsers in parallel. It is recommended to have at least 128 GB of RAM and 300 GB of disk space. While it can also run on systems with fewer RAM, you may encounter significant performance degration, primarily due to uncached disk I/O, since the unzipped outputs can be quite large.
22 |
23 | The exact environment used by the authors:
24 |
25 | - Ubuntu 23.10 with Linux 6.5.0-44
26 | - Rust 1.86.0
27 | - Docker 27.1.1 with Docker Compose 2.33.1
28 | - Python 3.13.3 with numpy 2.3.0 and matplotlib 3.10.3
29 | - CPU: Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz with 112 logical CPUs
30 | - Memory and storage: 944G RAM + 44T disk (less than 1T was used)
31 |
32 | ## File Structure
33 |
34 | - [`parsers`](./parsers)
35 | - Subdirectories: Source files to build Docker images of the tested parsers. Each Docker image correspond to a tested ZIP parser.
36 | - [`parsers.json`](./parsers/parsers.json): Information of the parsers.
37 | - [`zip-diff`](./zip-diff): Rust code
38 | - The library crate: A helper ZIP library.
39 | - The [`fuzz`](./zip-diff/src/fuzz) binary crate: The differential fuzzer ZipDiff.
40 | - The [`construction`](./zip-diff/src/construction) binary crate: Construction of ambiguous ZIP files corresponding to the types and variants described in the paper.
41 | - The [`count`](./zip-diff/src/count) binary crate: Count the types of ambiguities between each parser pair.
42 | - [`tools`](./tools):
43 | - [`prepare.sh`](./tools/prepare.sh): Copy common scripts (`unzip-all.sh`, `parallel-unzip-all.sh`, `testcase.sh`) into the parser subdirectories (into their Docker build contexts) and generate the `docker-compose.yml` config file.
44 | - [`run-parsers.sh`](./tools/run-parsers.sh): Test the parsers against specified ZIP files (for manual testing).
45 | - [`ablation-study.sh`](./tools/ablation-study.sh): Reproduce the ablation study in the paper.
46 | - [`fuzz-stats.py`](./tools/fuzz-stats.py): Draw the ablation study graph and summarize the stats.
47 | - [`inconsistency-table.py`](./tools/inconsistency-table.py): Generate the parser inconsistency LaTeX table.
48 | - [`parsers-to-table.py`](./tools/parsers-to-table.py): Retrieve GitHub stargazer counts and generate the LaTeX parser list.
49 | - [`constructions`](./constructions): This directory is used to place the constructed ambiguous ZIP files. The [`inconsistency-types.json`](./constructions/inconsistency-types.json) file is generated by the `count` component and records the list of inconsistency types between each pair of parsers.
50 |
51 | ## Preparation
52 |
53 | - Build ZIP parser Docker images:
54 |
55 | ```console
56 | tools/prepare.sh
57 | cd parsers
58 | sudo docker compose build
59 | ```
60 |
61 | Alternatively, if you want to save some time or make sure the versions match the evaluation in the paper, you can load the images from [files on Zenodo](https://doi.org/10.5281/zenodo.15526863):
62 |
63 | ```console
64 | for i in *.tar.bz2; do
65 | docker load -i "$i"
66 | done
67 | ```
68 |
69 | - Build the Rust binaries:
70 |
71 | ```console
72 | cd zip-diff
73 | cargo build --release
74 | ```
75 |
76 | ## Minimal Working Example
77 |
78 | You can test if parsers are working by testing them on a ZIP file: (assuming that the `zip` command is installed)
79 |
80 | ```console
81 | pushd /tmp
82 | echo test > test.txt
83 | zip -0 test.zip test.txt
84 | popd
85 | tools/run-parsers.sh /tmp/test.zip
86 | ```
87 |
88 | If everything goes well, you will see logs from Docker compose and the parsers, and then the results will be available at `evaluation/results/tmp/test.zip`:
89 |
90 | ```
91 | 01-infozip
92 | └── test.txt
93 | 02-7zip
94 | └── test.txt
95 | ……
96 | 50-swift-zipfoundation
97 | └── test.txt
98 | ```
99 |
100 | You can verify that all parsers successfully extracted `test.txt` from the ZIP archive.
101 |
102 | A short 2-min fuzzing can be used to test that the fuzzer is working well: `sudo target/release/fuzz -b 10 -s 120`. This will run fuzzing for two minutes with only ten samples per batch. The fuzzer will print logs for each iteration. The log text should contain `ok: 50`, indicating that all parsers are working fine. The results will be available at `evaluation/stats.json`, `evaluation/samples` and `evaluation/results`.
103 |
104 | ## Running the Fuzzer
105 |
106 | ```console
107 | cd zip-diff
108 | sudo target/release/fuzz
109 | ```
110 |
111 | Here root permission is required because the outputs are written inside Docker and are owned by root. Sometimes the outputs have incorrect permission bits and cannot be read by regular users even if the user is the file owner.
112 |
113 | By default, the parser will run indefinitely and the results will be stored at `evaluation/stats.json`, `evaluation/samples`, and `evaluation/results`.
114 |
115 | The fuzzer can be terminated at any time by Ctrl+C. You can also tell the fuzzer to stop after a specific time by setting the `-s, --stop-after-seconds` option.
116 |
117 | The fuzzer does not automatically clear data from previous execution and the files might be mixed together. You should either remove the files left from previous execution if they are not needed, or specify different `--samples-dir`, `--results-dir`, and `--stats-file` locations. The ZIP file samples generated by the fuzzer are stored in `--samples-dir`, and the corresponding parser outputs are stored in `--results-dir`. You can check the outputs to see that the parsers produce inconsistent outputs for the same input samples.
118 |
119 | The `-b, --batch-size` option can be reduced when there are not enough RAM or disk space.
120 |
121 | ## Reproducing the ablation study
122 |
123 | 1. Run `sudo tools/ablation-study.sh`. It will run five 24-hour fuzzing sessions for each of the three setups, for a total of 15 days.
124 | 2. Run `python3 tools/fuzz-stats.py evaluation/stats/*` to draw the graph at `inconsistent-pair-cdf.pdf` (Figure 4 in the paper).
125 |
126 | The full results took around 100GB of disk space for the authors. At runtime it may temporarily take another ~500GB of disk space. You can lower the `$BATCH_SIZE` in `ablation-study.sh` to reduce the required amount of RAM and disk space.
127 |
128 | ## Testing the constructed ambiguous ZIP files
129 |
130 | ```console
131 | cd zip-diff
132 | target/release/construction
133 | sudo target/release/count
134 | ```
135 |
136 | The `construction` crate provides constructions of the ZIP parsing ambiguities described in the paper Section 5.2.
137 |
138 | The `count` step summarizes the number of inconsistencies between each pair of ZIP parsers. It took about 40 minutes for the authors.
139 |
140 | The inconsistency details are stored at `constructions/inconsistency-types.json`. You can run `tools/inconsistency-table.py` to generate the LaTeX table (Table 4 in the paper).
141 |
--------------------------------------------------------------------------------
/constructions/.gitignore:
--------------------------------------------------------------------------------
1 | !inconsistency-types.json
2 |
--------------------------------------------------------------------------------
/parsers/.gitignore:
--------------------------------------------------------------------------------
1 | docker-compose.yml
2 | */unzip-all.sh
3 | */parallel-unzip-all.sh
4 | */testcase.sh
5 | !parsers.json
6 |
--------------------------------------------------------------------------------
/parsers/01-infozip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20
2 |
3 | RUN apk add unzip
4 |
5 | COPY unzip unzip-all.sh /
6 |
7 | ENTRYPOINT ["/unzip-all.sh"]
8 |
--------------------------------------------------------------------------------
/parsers/01-infozip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | unzip -t "$1"
6 | unzip -o "$1" -d "$2"
7 |
--------------------------------------------------------------------------------
/parsers/02-7zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim
2 |
3 | RUN echo 'deb http://deb.debian.org/debian bookworm-backports main' > /etc/apt/sources.list.d/backports.list
4 | RUN apt-get update
5 | RUN apt-get install -t bookworm-backports -y 7zip
6 |
7 | COPY unzip unzip-all.sh /
8 |
9 | ENTRYPOINT ["/unzip-all.sh"]
10 |
--------------------------------------------------------------------------------
/parsers/02-7zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | 7z t "$1" && 7z x -aoa "$1" -o"$2"
6 |
--------------------------------------------------------------------------------
/parsers/03-p7zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim
2 |
3 | RUN apt-get update
4 | RUN apt-get install -y p7zip-full
5 |
6 | COPY unzip unzip-all.sh /
7 |
8 | ENTRYPOINT ["/unzip-all.sh"]
9 |
--------------------------------------------------------------------------------
/parsers/03-p7zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | 7z t "$1"
6 | 7z x -aoa "$1" -o"$2"
7 |
--------------------------------------------------------------------------------
/parsers/04-winrar/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim
2 |
3 | RUN dpkg --add-architecture i386
4 | RUN sed -i 's/Components: main/Components: main contrib/g' /etc/apt/sources.list.d/debian.sources
5 | RUN apt-get update
6 | RUN apt-get install -y xvfb winetricks wine32 parallel
7 |
8 | RUN winetricks msxml6 && sleep 1
9 |
10 | ADD https://www.win-rar.com/fileadmin/winrar-versions/winrar/winrar-x32-701.exe /tmp/winrar.exe
11 | RUN xvfb-run wine /tmp/winrar.exe /S && sleep 1
12 |
13 | # abort build if installation fails
14 | RUN stat '/root/.wine/drive_c/Program Files/WinRAR'
15 |
16 | COPY run.sh parallel-unzip-all.sh testcase.sh unzip /
17 |
18 | ENTRYPOINT ["/run.sh"]
19 |
--------------------------------------------------------------------------------
/parsers/04-winrar/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export WINEDEBUG=-all
4 | xvfb-run -a /parallel-unzip-all.sh 50%
5 |
--------------------------------------------------------------------------------
/parsers/04-winrar/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | wine 'C:\Program Files\WinRAR\WinRAR.exe' x -ibck -y -ppassword "Z:${1//\//\\}" '*.*' "Z:${2//\//\\}"
6 |
--------------------------------------------------------------------------------
/parsers/05-ada-zip-ada/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim AS build
2 |
3 | RUN apt-get update
4 | RUN apt-get install -y gprbuild gnat-12 unzip
5 |
6 | ADD https://github.com/zertovitch/zip-ada/archive/aaba1a767a47851df075a9884457052719e0488f.zip src.zip
7 | RUN unzip src.zip
8 | RUN mv zip-ada-* src
9 |
10 | WORKDIR /src
11 | RUN gprbuild -p -P zipada.gpr
12 |
13 | FROM debian:12-slim
14 |
15 | RUN apt-get update
16 | RUN apt-get install -y libgnat-12
17 |
18 | COPY --from=build /src/unzipada /
19 |
20 | COPY unzip unzip-all.sh /
21 |
22 | ENTRYPOINT ["/unzip-all.sh"]
23 |
--------------------------------------------------------------------------------
/parsers/05-ada-zip-ada/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | cd /
6 |
7 | # it somehow does not accept absolute paths
8 | /unzipada -d "${2#/}" "${1#/}"
9 |
--------------------------------------------------------------------------------
/parsers/06-c-go-unarr/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.22.3-alpine3.20 AS build
2 |
3 | RUN apk add gcc musl-dev
4 |
5 | RUN go install github.com/gen2brain/go-unarr/cmd/unarr@v0.2.4
6 |
7 | FROM alpine:3.20
8 |
9 | COPY --from=build /go/bin/unarr /
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/06-c-go-unarr/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | /unarr "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/07-c-libarchive/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20 AS build
2 |
3 | RUN apk add g++ make
4 |
5 | WORKDIR /tmp
6 | ADD https://github.com/libarchive/libarchive/releases/download/v3.7.7/libarchive-3.7.7.tar.gz .
7 | RUN tar xf libarchive-*.tar.gz
8 | RUN mv libarchive-*/ /src
9 |
10 | WORKDIR /src
11 | RUN ./configure
12 | RUN make -j$(nproc --ignore=2) bsdunzip
13 |
14 | FROM alpine:3.20
15 |
16 | COPY --from=build /src/bsdunzip /usr/local/bin/
17 |
18 | COPY unzip unzip-all.sh /
19 |
20 | ENTRYPOINT ["/unzip-all.sh"]
21 |
--------------------------------------------------------------------------------
/parsers/07-c-libarchive/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | bsdunzip -t "$1"
6 | bsdunzip -o -d "$2" "$1"
7 |
--------------------------------------------------------------------------------
/parsers/08-c-libzip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20
2 |
3 | RUN apk add php83
4 | RUN apk add php83-zip
5 |
6 | COPY unzip.php /
7 |
8 | COPY unzip unzip-all.sh /
9 |
10 | ENTRYPOINT [ "/unzip-all.sh" ]
11 |
--------------------------------------------------------------------------------
/parsers/08-c-libzip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | php83 /unzip.php "$@"
4 |
--------------------------------------------------------------------------------
/parsers/08-c-libzip/unzip.php:
--------------------------------------------------------------------------------
1 | open($argv[1], ZipArchive::CHECKCONS) === true) {
6 | $zip->extractTo($argv[2]);
7 | $zip->close();
8 | } else {
9 | exit(1);
10 | }
11 |
--------------------------------------------------------------------------------
/parsers/09-c-minizip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim AS build
2 |
3 | RUN apt-get update
4 | RUN apt-get install -y gcc make
5 |
6 | WORKDIR /tmp
7 | ADD https://github.com/madler/zlib/releases/download/v1.3.1/zlib-1.3.1.tar.gz .
8 | RUN tar xf zlib-*.tar.gz
9 | RUN mv zlib-*/ /src
10 |
11 | WORKDIR /src
12 | RUN ./configure
13 | RUN make libz.a
14 | RUN make -C contrib/minizip miniunz
15 |
16 | FROM debian:12-slim
17 |
18 | COPY --from=build /src/contrib/minizip/miniunz /usr/local/bin/miniunzip
19 |
20 | COPY unzip unzip-all.sh /
21 |
22 | ENTRYPOINT ["/unzip-all.sh"]
23 |
--------------------------------------------------------------------------------
/parsers/09-c-minizip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | miniunzip -x -o "$1" -d "$2"
4 |
--------------------------------------------------------------------------------
/parsers/10-c-minizip-ng/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20
2 |
3 | RUN apk add minizip-ng --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
4 |
5 | COPY unzip unzip-all.sh /
6 |
7 | ENTRYPOINT ["/unzip-all.sh"]
8 |
--------------------------------------------------------------------------------
/parsers/10-c-minizip-ng/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | minizip -x -o -d "$2" "$1"
4 |
--------------------------------------------------------------------------------
/parsers/11-c-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim AS build
2 |
3 | RUN apt-get update
4 | RUN apt-get install -y gcc
5 |
6 | ADD https://github.com/kuba--/zip/archive/refs/tags/v0.3.2.tar.gz src.tar.gz
7 | RUN tar xf src.tar.gz && mv zip-*/src .
8 |
9 | WORKDIR /src
10 |
11 | COPY unzip.c .
12 |
13 | RUN gcc unzip.c zip.c -O2 -o unzip
14 |
15 | FROM debian:12-slim
16 |
17 | COPY --from=build /src/unzip /
18 |
19 | COPY unzip-all.sh /
20 | ENTRYPOINT ["/unzip-all.sh"]
21 |
--------------------------------------------------------------------------------
/parsers/11-c-zip/unzip.c:
--------------------------------------------------------------------------------
1 | #include "zip.h"
2 |
3 | int main(int args, char **argv)
4 | {
5 | return zip_extract(argv[1], argv[2], NULL, NULL);
6 | }
7 |
--------------------------------------------------------------------------------
/parsers/12-c-zziplib/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim AS build
2 |
3 | RUN apt-get update
4 | RUN apt-get install -y cmake zlib1g-dev
5 |
6 | WORKDIR /tmp
7 | ADD https://github.com/gdraheim/zziplib/archive/refs/tags/v0.13.78.tar.gz .
8 | RUN tar xf *.tar.gz
9 | RUN mv zziplib-*/ /src
10 |
11 | WORKDIR /src
12 | RUN cmake -S . -B build -D ZZIPBINS=On -D BUILD_SHARED_LIBS=Off -D ZZIPWRAP=Off -D ZZIPSDL=Off -D ZZIPTEST=Off -D ZZIPDOCS=Off
13 | RUN cmake --build build
14 |
15 | FROM debian:12-slim
16 |
17 | RUN apt-get update
18 | RUN apt-get install -y zlib1g
19 |
20 | COPY --from=build /src/build/bins/unzzip /usr/local/bin
21 |
22 | COPY unzip unzip-all.sh /
23 |
24 | ENTRYPOINT [ "/unzip-all.sh" ]
25 |
--------------------------------------------------------------------------------
/parsers/12-c-zziplib/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | cd "$2"
6 | unzzip "$1"
7 |
--------------------------------------------------------------------------------
/parsers/13-csharp-dotnetzip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build
2 |
3 | WORKDIR /unzip
4 |
5 | RUN dotnet new console
6 |
7 | RUN dotnet add package DotNetZip --version 1.16.0
8 |
9 | RUN rm ./*.cs
10 |
11 | COPY unzip.cs .
12 |
13 | RUN dotnet build -c Release
14 |
15 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine
16 |
17 | RUN apk add parallel
18 |
19 | COPY --from=build /unzip/bin/Release/net9.0 /build
20 |
21 | RUN ln -s /build/unzip /unzip
22 |
23 | COPY testcase.sh parallel-unzip-all.sh /
24 | ENTRYPOINT ["/parallel-unzip-all.sh"]
25 |
--------------------------------------------------------------------------------
/parsers/13-csharp-dotnetzip/unzip.cs:
--------------------------------------------------------------------------------
1 | using Ionic.Zip;
2 |
3 | if (ZipFile.CheckZip(args[0]))
4 | {
5 | using (ZipFile zip = ZipFile.Read(args[0]))
6 | {
7 | zip.ExtractAll(args[1], ExtractExistingFileAction.OverwriteSilently);
8 | }
9 | }
10 | else
11 | {
12 | System.Environment.Exit(1);
13 | }
14 |
--------------------------------------------------------------------------------
/parsers/14-csharp-sharpcompress/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build
2 |
3 | WORKDIR /unzip
4 |
5 | RUN dotnet new console
6 |
7 | RUN dotnet add package SharpCompress --version 0.38.0
8 |
9 | RUN rm ./*.cs
10 |
11 | COPY unzip.cs .
12 |
13 | RUN dotnet build -c Release
14 |
15 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine
16 |
17 | RUN apk add parallel
18 |
19 | COPY --from=build /unzip/bin/Release/net9.0 /build
20 |
21 | RUN ln -s /build/unzip /unzip
22 |
23 | COPY testcase.sh parallel-unzip-all.sh /
24 | ENTRYPOINT ["/parallel-unzip-all.sh"]
25 |
--------------------------------------------------------------------------------
/parsers/14-csharp-sharpcompress/unzip.cs:
--------------------------------------------------------------------------------
1 | using SharpCompress.Archives.Zip;
2 | using SharpCompress.Common;
3 | using SharpCompress.Readers;
4 |
5 | using (var archive = ZipArchive.Open(args[0]))
6 | {
7 | var opt = new ExtractionOptions()
8 | {
9 | ExtractFullPath = true,
10 | Overwrite = true
11 | };
12 | archive.ExtractAllEntries().WriteAllToDirectory(args[1], opt);
13 | }
14 |
--------------------------------------------------------------------------------
/parsers/15-csharp-sharpziplib/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build
2 |
3 | WORKDIR /unzip
4 |
5 | RUN dotnet new console
6 |
7 | RUN dotnet add package SharpZipLib --version 1.4.2
8 |
9 | RUN rm ./*.cs
10 |
11 | COPY unzip.cs .
12 |
13 | RUN dotnet build -c Release
14 |
15 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine
16 |
17 | COPY --from=build /unzip/bin/Release/net9.0 /build
18 |
19 | RUN ln -s /build/unzip /unzip
20 |
21 | COPY unzip-all.sh /
22 | ENTRYPOINT ["/unzip-all.sh"]
23 |
--------------------------------------------------------------------------------
/parsers/15-csharp-sharpziplib/unzip.cs:
--------------------------------------------------------------------------------
1 | using ICSharpCode.SharpZipLib.Zip;
2 |
3 | using (ZipFile zipFile = new ZipFile(args[0]))
4 | {
5 | if (!zipFile.TestArchive(true))
6 | {
7 | System.Environment.Exit(1);
8 | }
9 | }
10 |
11 | new FastZip().ExtractZip(args[0], args[1], null);
12 |
--------------------------------------------------------------------------------
/parsers/16-csharp-system-io-compression/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mcr.microsoft.com/dotnet/sdk:9.0-alpine AS build
2 |
3 | WORKDIR /unzip
4 |
5 | RUN dotnet new console
6 |
7 | RUN rm ./*.cs
8 |
9 | COPY unzip.cs .
10 |
11 | RUN dotnet build -c Release
12 |
13 | FROM mcr.microsoft.com/dotnet/runtime:9.0-alpine
14 |
15 | COPY --from=build /unzip/bin/Release/net9.0 /build
16 |
17 | RUN ln -s /build/unzip /unzip
18 |
19 | COPY unzip-all.sh /
20 | ENTRYPOINT ["/unzip-all.sh"]
21 |
--------------------------------------------------------------------------------
/parsers/16-csharp-system-io-compression/unzip.cs:
--------------------------------------------------------------------------------
1 | using System.IO.Compression;
2 |
3 | ZipFile.ExtractToDirectory(args[0], args[1], true);
4 |
--------------------------------------------------------------------------------
/parsers/17-cpp-android-libziparchive/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM debian:12-slim AS build
2 |
3 | RUN echo 'deb http://deb.debian.org/debian bookworm-backports main' > /etc/apt/sources.list.d/backports.list
4 | RUN apt-get update
5 | RUN apt-get install -y g++
6 | RUN apt-get install -t bookworm-backports -y android-libziparchive-dev android-libbase-dev
7 |
8 | WORKDIR /src
9 | COPY unzip.cpp .
10 | RUN g++ unzip.cpp -O2 -o unzip -I/usr/include/android -L/usr/lib/x86_64-linux-gnu/android -lziparchive
11 |
12 | FROM debian:12-slim
13 |
14 | COPY --from=build /usr/lib/x86_64-linux-gnu/android/*.so.0 /usr/lib/
15 | COPY --from=build /src/unzip /
16 | COPY unzip-all.sh /
17 | ENTRYPOINT ["/unzip-all.sh"]
18 |
--------------------------------------------------------------------------------
/parsers/17-cpp-android-libziparchive/unzip.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | int main(int argc, char **argv)
10 | {
11 | ZipArchiveHandle archive;
12 | if (OpenArchive(argv[1], &archive) < 0)
13 | {
14 | fputs("Failed to open ZIP archive", stderr);
15 | return 1;
16 | }
17 |
18 | void *cookie;
19 | if (StartIteration(archive, &cookie) < 0)
20 | {
21 | fputs("Failed to iterate over ZIP entries", stderr);
22 | return 2;
23 | }
24 |
25 | const auto targetDir = std::filesystem::path(argv[2]);
26 |
27 | while (true)
28 | {
29 | ZipEntry entry;
30 | std::string name;
31 |
32 | const int status = Next(cookie, &entry, &name);
33 | if (status == -1)
34 | break;
35 | if (status < -1)
36 | {
37 | fputs("Failed to get next entry", stderr);
38 | return 3;
39 | }
40 |
41 | const auto target = targetDir / name;
42 |
43 | if (name.back() == '/')
44 | {
45 | std::filesystem::create_directories(target);
46 | }
47 | else
48 | {
49 | std::filesystem::create_directories(target.parent_path());
50 |
51 | int fd = creat(target.c_str(), 0644);
52 | if (fd < 0)
53 | {
54 | fputs("Failed to open output file", stderr);
55 | return 4;
56 | }
57 |
58 | if (ExtractEntryToFile(archive, &entry, fd) < 0)
59 | {
60 | fputs("Failed to extract to output file", stderr);
61 | return 5;
62 | }
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/parsers/18-cpp-poco/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20 AS build
2 |
3 | RUN apk add g++ make cmake
4 |
5 | ADD https://github.com/pocoproject/poco/archive/refs/tags/poco-1.13.3-release.tar.gz src.tar.gz
6 | RUN tar xf src.tar.gz && mv poco-* poco
7 |
8 | WORKDIR /poco
9 | RUN cmake -S . -B build -D BUILD_SHARED_LIBS=OFF -D ENABLE_ZIP=ON -D ENABLE_FOUNDATION=ON -D ENABLE_XML=ON -D ENABLE_ENCODINGS=OFF -D ENABLE_ENCODINGS_COMPILER=OFF -D ENABLE_JSON=OFF -D ENABLE_MONGODB=OFF -D ENABLE_DATA_SQLITE=OFF -D ENABLE_REDIS=OFF -D ENABLE_PROMETHEUS=OFF -D ENABLE_UTIL=OFF -D ENABLE_NET=OFF -D ENABLE_SEVENZIP=OFF -D ENABLE_CPPPARSER=OFF -D ENABLE_POCODOC=OFF -D ENABLE_PAGECOMPILER=OFF -D ENABLE_PAGECOMPILER_FILE2PAGE=OFF -D ENABLE_ACTIVERECORD=OFF -D ENABLE_ACTIVERECORD_COMPILER=OFF
10 | RUN cmake --build build --config Release -j$(nproc)
11 | RUN cmake --install build
12 |
13 | WORKDIR /src
14 | COPY unzip.cpp .
15 | RUN g++ unzip.cpp -O2 -o unzip -static -lPocoZip -lPocoFoundation
16 |
17 | FROM alpine:3.20
18 | COPY --from=build /src/unzip /
19 | COPY unzip-all.sh /
20 | ENTRYPOINT ["/unzip-all.sh"]
21 |
--------------------------------------------------------------------------------
/parsers/18-cpp-poco/unzip.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include
5 | #include
6 |
7 | void onDecompressError(const void *pSender,
8 | std::pair &info)
9 | {
10 | const Poco::Zip::ZipLocalFileHeader &header = info.first;
11 | const std::string &errorMsg = info.second;
12 |
13 | std::cerr << "Error decompressing file: " << header.getFileName() << std::endl;
14 | std::cerr << "Error message: " << errorMsg << std::endl;
15 |
16 | std::exit(1);
17 | }
18 |
19 | int main(int argc, char **argv)
20 | {
21 | std::ifstream inp(argv[1], std::ios::binary);
22 | Poco::Zip::Decompress dec(inp, Poco::Path(argv[2]));
23 | dec.EError += Poco::delegate(&onDecompressError);
24 | dec.decompressAllFiles();
25 | }
26 |
--------------------------------------------------------------------------------
/parsers/19-d-std-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20 AS build
2 |
3 | WORKDIR /src
4 |
5 | RUN apk add dmd gcc musl-dev --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
6 |
7 | COPY unzip.d .
8 |
9 | RUN dmd unzip.d
10 |
11 | FROM alpine:3.20
12 |
13 | RUN apk add llvm-libunwind
14 |
15 | COPY --from=build /src/unzip /
16 |
17 | COPY unzip-all.sh /
18 | ENTRYPOINT ["/unzip-all.sh"]
19 |
--------------------------------------------------------------------------------
/parsers/19-d-std-zip/unzip.d:
--------------------------------------------------------------------------------
1 | import std.algorithm;
2 | import std.file;
3 | import std.path;
4 | import std.zip;
5 |
6 | void main(string[] args)
7 | {
8 | auto zip = new ZipArchive(read(args[1]));
9 | chdir(args[2]);
10 |
11 | foreach (name, am; zip.directory)
12 | {
13 | if (am.name.endsWith('/')) {
14 | am.name.mkdirRecurse;
15 | } else {
16 | am.name.dirName.mkdirRecurse;
17 | zip.expand(am);
18 | write(am.name, am.expandedData);
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/parsers/20-dart-archive/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM dart:3.5 AS build
2 |
3 | WORKDIR /src
4 |
5 | COPY pubspec.yaml .
6 | RUN dart pub get
7 |
8 | COPY unzip.dart .
9 | RUN dart compile exe unzip.dart -o unzip
10 |
11 | FROM debian:12-slim
12 | COPY --from=build /src/unzip /
13 | COPY unzip-all.sh /
14 | ENTRYPOINT ["/unzip-all.sh"]
15 |
--------------------------------------------------------------------------------
/parsers/20-dart-archive/pubspec.yaml:
--------------------------------------------------------------------------------
1 | name: unzip
2 | environment:
3 | sdk: ">=3.0.0"
4 | dependencies:
5 | archive: 3.6.1
6 |
--------------------------------------------------------------------------------
/parsers/20-dart-archive/unzip.dart:
--------------------------------------------------------------------------------
1 | import 'package:archive/archive_io.dart';
2 |
3 | void main(List args) {
4 | final archive = ZipDecoder().decodeBuffer(InputFileStream(args[0]));
5 | extractArchiveToDisk(archive, args[1]);
6 | }
7 |
--------------------------------------------------------------------------------
/parsers/21-erlang-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM erlang:27.1.2.0-alpine
2 |
3 | RUN apk add parallel
4 |
5 | COPY unzip parallel-unzip-all.sh testcase.sh /
6 |
7 | ENTRYPOINT [ "/parallel-unzip-all.sh" ]
8 |
--------------------------------------------------------------------------------
/parsers/21-erlang-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | cd "$2"
6 | erl -noshell -eval "case zip:unzip(\"$1\") of {ok, _} -> erlang:halt(0); {error, Reason} -> io:format(\"Error: ~p~n\", [Reason]), erlang:halt(1) end." -s init stop
7 |
--------------------------------------------------------------------------------
/parsers/22-go-archive-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.22.3-alpine3.20 AS build
2 |
3 | WORKDIR /src
4 |
5 | RUN go mod init main
6 |
7 | RUN go get github.com/evilsocket/islazy/zip
8 |
9 | COPY unzip.go .
10 |
11 | RUN go build unzip.go
12 |
13 | FROM alpine:3.20
14 |
15 | COPY --from=build /src/unzip /
16 |
17 | COPY unzip-all.sh /
18 | ENTRYPOINT ["/unzip-all.sh"]
19 |
--------------------------------------------------------------------------------
/parsers/22-go-archive-zip/unzip.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 | "github.com/evilsocket/islazy/zip"
6 | )
7 |
8 | func main() {
9 | if _, err := zip.Unzip(os.Args[1], os.Args[2]); err != nil {
10 | panic(err)
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/parsers/23-haskell-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM haskell:9-slim AS build
2 |
3 | RUN apt-get update
4 | RUN apt-get install -y libbz2-dev
5 |
6 | RUN cabal update
7 | RUN cabal install zip-2.1.0
8 |
9 | FROM debian:12-slim
10 |
11 | WORKDIR /data
12 |
13 | RUN apt-get update
14 | RUN apt-get install -y bzip2
15 |
16 | COPY --from=build /root/.local/bin/haskell-zip-app /
17 |
18 | COPY unzip unzip-all.sh /
19 |
20 | ENTRYPOINT ["/unzip-all.sh"]
21 |
--------------------------------------------------------------------------------
/parsers/23-haskell-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | /haskell-zip-app uncompress "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/24-haskell-zip-archive/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM haskell:9-slim AS build
2 |
3 | RUN cabal update
4 | RUN cabal install zip-archive-0.4.3.2 -f executable
5 |
6 | FROM debian:12-slim
7 |
8 | COPY --from=build /root/.local/bin/zip-archive /
9 |
10 | COPY unzip unzip-all.sh /
11 |
12 | ENTRYPOINT ["/unzip-all.sh"]
13 |
--------------------------------------------------------------------------------
/parsers/24-haskell-zip-archive/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | cd "$2"
6 |
7 | /zip-archive -d "$1"
8 |
--------------------------------------------------------------------------------
/parsers/25-java-commons-compress-stream/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gradle:8.10-jdk21-alpine AS build
2 |
3 | COPY unzip /unzip
4 |
5 | WORKDIR /unzip
6 |
7 | RUN gradle build
8 |
9 | FROM eclipse-temurin:21.0.5_11-jre-alpine
10 |
11 | RUN apk add parallel
12 |
13 | COPY --from=build /unzip/build/libs/unzip.jar /
14 |
15 | COPY unzip.sh /unzip
16 |
17 | COPY testcase.sh parallel-unzip-all.sh /
18 | ENTRYPOINT ["/parallel-unzip-all.sh"]
19 |
--------------------------------------------------------------------------------
/parsers/25-java-commons-compress-stream/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | java -jar /unzip.jar "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/25-java-commons-compress-stream/unzip/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .gradle
3 | .project
4 | .settings
5 | app
6 | build
7 |
--------------------------------------------------------------------------------
/parsers/25-java-commons-compress-stream/unzip/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | }
4 |
5 | repositories {
6 | mavenCentral()
7 | }
8 |
9 | dependencies {
10 | implementation 'org.apache.commons:commons-compress:1.27.1'
11 | }
12 |
13 | java {
14 | toolchain {
15 | languageVersion = JavaLanguageVersion.of(21)
16 | }
17 | }
18 |
19 | application {
20 | mainClass = 'unzip.App'
21 | }
22 |
23 | jar {
24 | manifest {
25 | attributes 'Main-Class': application.mainClass
26 | }
27 | from {
28 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
29 | }
30 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE
31 | }
32 |
--------------------------------------------------------------------------------
/parsers/25-java-commons-compress-stream/unzip/src/main/java/unzip/App.java:
--------------------------------------------------------------------------------
1 | package unzip;
2 |
3 | import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
4 | import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
5 |
6 | import java.io.File;
7 | import java.io.FileInputStream;
8 | import java.io.FileOutputStream;
9 | import java.io.InputStream;
10 | import java.io.IOException;
11 | import java.io.OutputStream;
12 | import java.nio.file.Paths;
13 |
14 | public class App {
15 | public static void main(String[] args) {
16 | try {
17 | InputStream inputStream = new FileInputStream(args[0]);
18 | ZipArchiveInputStream zipInputStream = new ZipArchiveInputStream(inputStream);
19 | ZipArchiveEntry entry;
20 | while ((entry = zipInputStream.getNextEntry()) != null) {
21 | File extractedFile = Paths.get(args[1], entry.getName()).toFile();
22 | if (entry.isDirectory()) {
23 | extractedFile.mkdirs();
24 | } else {
25 | extractedFile.getParentFile().mkdirs();
26 | try (OutputStream outputStream = new FileOutputStream(extractedFile)) {
27 | int readLen;
28 | byte[] readBuffer = new byte[4096];
29 | while ((readLen = zipInputStream.read(readBuffer)) != -1) {
30 | outputStream.write(readBuffer, 0, readLen);
31 | }
32 | }
33 | }
34 | }
35 | } catch (IOException e) {
36 | e.printStackTrace();
37 | System.exit(1);
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/parsers/26-java-commons-compress-zipfile/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gradle:8.10-jdk21-alpine AS build
2 |
3 | COPY unzip /unzip
4 |
5 | WORKDIR /unzip
6 |
7 | RUN gradle build
8 |
9 | FROM eclipse-temurin:21.0.5_11-jre-alpine
10 |
11 | RUN apk add parallel
12 |
13 | COPY --from=build /unzip/build/libs/unzip.jar /
14 |
15 | COPY unzip.sh /unzip
16 |
17 | COPY testcase.sh parallel-unzip-all.sh /
18 | ENTRYPOINT ["/parallel-unzip-all.sh"]
19 |
--------------------------------------------------------------------------------
/parsers/26-java-commons-compress-zipfile/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | java -jar /unzip.jar "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/26-java-commons-compress-zipfile/unzip/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .gradle
3 | .project
4 | .settings
5 | app
6 | build
7 |
--------------------------------------------------------------------------------
/parsers/26-java-commons-compress-zipfile/unzip/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | }
4 |
5 | repositories {
6 | mavenCentral()
7 | }
8 |
9 | dependencies {
10 | implementation 'org.codehaus.plexus:plexus-archiver:4.10.0'
11 | implementation 'org.slf4j:slf4j-simple:2.0.16'
12 | }
13 |
14 | java {
15 | toolchain {
16 | languageVersion = JavaLanguageVersion.of(21)
17 | }
18 | }
19 |
20 | application {
21 | mainClass = 'unzip.App'
22 | }
23 |
24 | jar {
25 | manifest {
26 | attributes 'Main-Class': application.mainClass
27 | }
28 | from {
29 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
30 | }
31 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE
32 | }
33 |
--------------------------------------------------------------------------------
/parsers/26-java-commons-compress-zipfile/unzip/src/main/java/unzip/App.java:
--------------------------------------------------------------------------------
1 | package unzip;
2 |
3 | import java.io.File;
4 | import org.codehaus.plexus.archiver.zip.ZipUnArchiver;
5 |
6 | public class App {
7 | public static void main(String[] args) {
8 | var unarchiver = new ZipUnArchiver(new File(args[0]));
9 | unarchiver.setDestDirectory(new File(args[1]));
10 | unarchiver.extract();
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/parsers/27-java-util-zip-zipfile/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gradle:8.10-jdk21-alpine AS build
2 |
3 | COPY unzip /unzip
4 |
5 | WORKDIR /unzip
6 |
7 | RUN gradle build
8 |
9 | FROM eclipse-temurin:21.0.5_11-jre-alpine
10 |
11 | COPY --from=build /unzip/build/libs/unzip.jar /
12 |
13 | COPY unzip.sh /unzip
14 |
15 | COPY unzip-all.sh /
16 | ENTRYPOINT ["/unzip-all.sh"]
17 |
--------------------------------------------------------------------------------
/parsers/27-java-util-zip-zipfile/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | java -jar /unzip.jar "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/27-java-util-zip-zipfile/unzip/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .gradle
3 | .project
4 | .settings
5 | app
6 | build
7 |
--------------------------------------------------------------------------------
/parsers/27-java-util-zip-zipfile/unzip/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | }
4 |
5 | repositories {
6 | mavenCentral()
7 | }
8 |
9 | dependencies {
10 | implementation 'org.zeroturnaround:zt-zip:1.17'
11 | implementation 'org.slf4j:slf4j-simple:2.0.16'
12 | }
13 |
14 | java {
15 | toolchain {
16 | languageVersion = JavaLanguageVersion.of(21)
17 | }
18 | }
19 |
20 | application {
21 | mainClass = 'unzip.App'
22 | }
23 |
24 | jar {
25 | manifest {
26 | attributes 'Main-Class': application.mainClass
27 | }
28 | from {
29 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
30 | }
31 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE
32 | }
33 |
--------------------------------------------------------------------------------
/parsers/27-java-util-zip-zipfile/unzip/src/main/java/unzip/App.java:
--------------------------------------------------------------------------------
1 | package unzip;
2 |
3 | import java.io.File;
4 | import org.zeroturnaround.zip.ZipUtil;
5 |
6 | public class App {
7 | public static void main(String[] args) {
8 | ZipUtil.unpack(new File(args[0]), new File(args[1]));
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/parsers/28-java-util-zip-zipinputstream/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gradle:8.10-jdk21-alpine AS build
2 |
3 | COPY unzip /unzip
4 |
5 | WORKDIR /unzip
6 |
7 | RUN gradle build
8 |
9 | FROM eclipse-temurin:21.0.5_11-jre-alpine
10 |
11 | COPY --from=build /unzip/build/libs/unzip.jar /
12 |
13 | COPY unzip.sh /unzip
14 |
15 | COPY unzip-all.sh /
16 | ENTRYPOINT ["/unzip-all.sh"]
17 |
--------------------------------------------------------------------------------
/parsers/28-java-util-zip-zipinputstream/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | java -jar /unzip.jar "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/28-java-util-zip-zipinputstream/unzip/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .gradle
3 | .project
4 | .settings
5 | app
6 | build
7 |
--------------------------------------------------------------------------------
/parsers/28-java-util-zip-zipinputstream/unzip/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | }
4 |
5 | repositories {
6 | mavenCentral()
7 | }
8 |
9 | dependencies {
10 | implementation 'org.zeroturnaround:zt-zip:1.17'
11 | implementation 'org.slf4j:slf4j-simple:2.0.16'
12 | }
13 |
14 | java {
15 | toolchain {
16 | languageVersion = JavaLanguageVersion.of(21)
17 | }
18 | }
19 |
20 | application {
21 | mainClass = 'unzip.App'
22 | }
23 |
24 | jar {
25 | manifest {
26 | attributes 'Main-Class': application.mainClass
27 | }
28 | from {
29 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
30 | }
31 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE
32 | }
33 |
--------------------------------------------------------------------------------
/parsers/28-java-util-zip-zipinputstream/unzip/src/main/java/unzip/App.java:
--------------------------------------------------------------------------------
1 | package unzip;
2 |
3 | import java.io.File;
4 | import java.io.FileInputStream;
5 | import org.zeroturnaround.zip.ZipUtil;
6 |
7 | public class App {
8 | public static void main(String[] args) {
9 | try {
10 | ZipUtil.unpack(new FileInputStream(new File(args[0])), new File(args[1]));
11 | } catch (Exception e) {
12 | e.printStackTrace();
13 | System.exit(1);
14 | }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/parsers/29-java-zip4j-zipfile/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gradle:8.10-jdk21-alpine AS build
2 |
3 | COPY unzip /unzip
4 |
5 | WORKDIR /unzip
6 |
7 | RUN gradle build
8 |
9 | FROM eclipse-temurin:21.0.5_11-jre-alpine
10 |
11 | COPY --from=build /unzip/build/libs/unzip.jar /
12 |
13 | COPY unzip.sh /unzip
14 |
15 | COPY unzip-all.sh /
16 | ENTRYPOINT ["/unzip-all.sh"]
17 |
--------------------------------------------------------------------------------
/parsers/29-java-zip4j-zipfile/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | java -jar /unzip.jar "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/29-java-zip4j-zipfile/unzip/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .gradle
3 | .project
4 | .settings
5 | app
6 | build
7 |
--------------------------------------------------------------------------------
/parsers/29-java-zip4j-zipfile/unzip/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | }
4 |
5 | repositories {
6 | mavenCentral()
7 | }
8 |
9 | dependencies {
10 | implementation 'net.lingala.zip4j:zip4j:2.11.5'
11 | }
12 |
13 | java {
14 | toolchain {
15 | languageVersion = JavaLanguageVersion.of(21)
16 | }
17 | }
18 |
19 | application {
20 | mainClass = 'unzip.App'
21 | }
22 |
23 | jar {
24 | manifest {
25 | attributes 'Main-Class': application.mainClass
26 | }
27 | from {
28 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/parsers/29-java-zip4j-zipfile/unzip/src/main/java/unzip/App.java:
--------------------------------------------------------------------------------
1 | package unzip;
2 |
3 | import net.lingala.zip4j.ZipFile;
4 | import net.lingala.zip4j.exception.ZipException;
5 |
6 | public class App {
7 | public static void main(String[] args) {
8 | try {
9 | new ZipFile(args[0]).extractAll(args[1]);
10 | } catch (ZipException e) {
11 | System.err.println("Error during extraction: " + e.getMessage());
12 | System.exit(1);
13 | }
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/parsers/30-java-zip4j-zipinputstream/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gradle:8.10-jdk21-alpine AS build
2 |
3 | COPY unzip /unzip
4 |
5 | WORKDIR /unzip
6 |
7 | RUN gradle build
8 |
9 | FROM eclipse-temurin:21.0.5_11-jre-alpine
10 |
11 | COPY --from=build /unzip/build/libs/unzip.jar /
12 |
13 | COPY unzip.sh /unzip
14 |
15 | COPY unzip-all.sh /
16 | ENTRYPOINT ["/unzip-all.sh"]
17 |
--------------------------------------------------------------------------------
/parsers/30-java-zip4j-zipinputstream/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | java -jar /unzip.jar "$1" "$2"
4 |
--------------------------------------------------------------------------------
/parsers/30-java-zip4j-zipinputstream/unzip/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .gradle
3 | .project
4 | .settings
5 | app
6 | build
7 |
--------------------------------------------------------------------------------
/parsers/30-java-zip4j-zipinputstream/unzip/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | }
4 |
5 | repositories {
6 | mavenCentral()
7 | }
8 |
9 | dependencies {
10 | implementation 'net.lingala.zip4j:zip4j:2.11.5'
11 | }
12 |
13 | java {
14 | toolchain {
15 | languageVersion = JavaLanguageVersion.of(21)
16 | }
17 | }
18 |
19 | application {
20 | mainClass = 'unzip.App'
21 | }
22 |
23 | jar {
24 | manifest {
25 | attributes 'Main-Class': application.mainClass
26 | }
27 | from {
28 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/parsers/30-java-zip4j-zipinputstream/unzip/src/main/java/unzip/App.java:
--------------------------------------------------------------------------------
1 | package unzip;
2 |
3 | import net.lingala.zip4j.io.inputstream.ZipInputStream;
4 | import net.lingala.zip4j.model.LocalFileHeader;
5 |
6 | import java.io.File;
7 | import java.io.FileInputStream;
8 | import java.io.FileOutputStream;
9 | import java.io.InputStream;
10 | import java.io.IOException;
11 | import java.io.OutputStream;
12 | import java.nio.file.Paths;
13 |
14 | public class App {
15 | public static void main(String[] args) {
16 | try {
17 | InputStream inputStream = new FileInputStream(args[0]);
18 | ZipInputStream zipInputStream = new ZipInputStream(inputStream);
19 | LocalFileHeader localFileHeader;
20 | while ((localFileHeader = zipInputStream.getNextEntry()) != null) {
21 | File extractedFile = Paths.get(args[1], localFileHeader.getFileName()).toFile();
22 | if (localFileHeader.isDirectory()) {
23 | extractedFile.mkdirs();
24 | } else {
25 | extractedFile.getParentFile().mkdirs();
26 | try (OutputStream outputStream = new FileOutputStream(extractedFile)) {
27 | int readLen;
28 | byte[] readBuffer = new byte[4096];
29 | while ((readLen = zipInputStream.read(readBuffer)) != -1) {
30 | outputStream.write(readBuffer, 0, readLen);
31 | }
32 | }
33 | }
34 | }
35 | } catch (IOException e) {
36 | e.printStackTrace();
37 | System.exit(1);
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/parsers/31-nodejs-ronomon-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN npm init -y
6 |
7 | RUN npm install @ronomon/zip@1.12.0
8 |
9 | COPY unzip.js .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/31-nodejs-ronomon-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/31-nodejs-ronomon-zip/unzip.js:
--------------------------------------------------------------------------------
1 | const ZIP = require('@ronomon/zip');
2 | const { dirname } = require('path');
3 | const { readFileSync, writeFileSync, mkdirSync } = require('fs');
4 |
5 | const buffer = readFileSync(process.argv[2]);
6 | try {
7 | const headers = ZIP.decode(buffer);
8 | process.chdir(process.argv[3]);
9 | for (const header of headers) {
10 | if (header.directory) {
11 | mkdirSync(header.fileName, { recursive: true });
12 | } else {
13 | mkdirSync(dirname(header.fileName), { recursive: true });
14 | const data = ZIP.inflate(header, buffer);
15 | writeFileSync(header.fileName, data);
16 | }
17 | }
18 | } catch (error) {
19 | console.error(error.message);
20 | process.exit(1);
21 | }
22 |
--------------------------------------------------------------------------------
/parsers/32-nodejs-adm-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN apk add parallel
6 |
7 | RUN npm init -y
8 |
9 | RUN npm install adm-zip@0.5.16
10 |
11 | COPY unzip.js .
12 |
13 | COPY unzip /
14 |
15 | COPY testcase.sh parallel-unzip-all.sh /
16 | ENTRYPOINT ["/parallel-unzip-all.sh"]
17 |
--------------------------------------------------------------------------------
/parsers/32-nodejs-adm-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/32-nodejs-adm-zip/unzip.js:
--------------------------------------------------------------------------------
1 | const AdmZip = require('adm-zip');
2 |
3 | const zip = new AdmZip(process.argv[2]);
4 | zip.extractAllTo(process.argv[3]);
5 |
--------------------------------------------------------------------------------
/parsers/33-nodejs-decompress-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN apk add parallel
6 |
7 | RUN npm init -y
8 |
9 | RUN npm install decompress-zip@0.3.3
10 |
11 | COPY unzip.js .
12 |
13 | COPY unzip testcase.sh parallel-unzip-all.sh /
14 |
15 | ENTRYPOINT ["/parallel-unzip-all.sh", "50%"]
16 |
--------------------------------------------------------------------------------
/parsers/33-nodejs-decompress-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/33-nodejs-decompress-zip/unzip.js:
--------------------------------------------------------------------------------
1 | const DecompressZip = require('decompress-zip');
2 |
3 | const zip = new DecompressZip(process.argv[2]);
4 |
5 | zip.on('error', (err) => {
6 | console.error(err);
7 | process.exit(1);
8 | });
9 |
10 | zip.extract({ path: process.argv[3] });
11 |
--------------------------------------------------------------------------------
/parsers/34-nodejs-jszip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN npm init -y
6 |
7 | RUN npm install jszip@3.10.1
8 |
9 | COPY unzip.js .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/34-nodejs-jszip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/34-nodejs-jszip/unzip.js:
--------------------------------------------------------------------------------
1 | const { loadAsync } = require('jszip');
2 | const { dirname } = require('path');
3 | const { readFile, mkdir, writeFile } = require('fs/promises');
4 |
5 | (async () => {
6 | const file = await readFile(process.argv[2]);
7 | const zip = await loadAsync(file);
8 |
9 | process.chdir(process.argv[3]);
10 |
11 | for (const entry of Object.values(zip.files)) {
12 | if (entry.dir) {
13 | await mkdir(entry.name, { recursive: true });
14 | } else {
15 | await mkdir(dirname(entry.name), { recursive: true });
16 | const content = await entry.async('nodebuffer');
17 | await writeFile(entry.name, content);
18 | }
19 | }
20 | })();
21 |
--------------------------------------------------------------------------------
/parsers/35-nodejs-node-stream-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN npm init -y
6 |
7 | RUN npm install node-stream-zip@1.15.0
8 |
9 | COPY unzip.js .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/35-nodejs-node-stream-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/35-nodejs-node-stream-zip/unzip.js:
--------------------------------------------------------------------------------
1 | const StreamZip = require('node-stream-zip');
2 |
3 | const zip = new StreamZip.async({ file: process.argv[2]});
4 | zip.extract(null, process.argv[3]);
5 |
--------------------------------------------------------------------------------
/parsers/36-nodejs-unzipper-extract/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN npm init -y
6 |
7 | RUN npm install unzipper@0.12.3
8 |
9 | COPY unzip.js .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/36-nodejs-unzipper-extract/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/36-nodejs-unzipper-extract/unzip.js:
--------------------------------------------------------------------------------
1 | const { createReadStream } = require('fs');
2 | const { Extract } = require('unzipper');
3 |
4 | const extract = Extract({ path: process.argv[3] });
5 | createReadStream(process.argv[2]).pipe(extract);
6 | extract.on('error', (error) => {
7 | console.error(error);
8 | process.exit(1);
9 | });
10 |
--------------------------------------------------------------------------------
/parsers/37-nodejs-unzipper-open/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN npm init -y
6 |
7 | RUN npm install unzipper@0.12.3
8 |
9 | COPY unzip.js .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/37-nodejs-unzipper-open/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/37-nodejs-unzipper-open/unzip.js:
--------------------------------------------------------------------------------
1 | const { Open } = require('unzipper');
2 |
3 | (async () => {
4 | try {
5 | const d = await Open.file(process.argv[2]);
6 | await d.extract({ path: process.argv[3] });
7 | } catch (err) {
8 | console.error(err);
9 | process.exit(1);
10 | }
11 | })();
12 |
--------------------------------------------------------------------------------
/parsers/38-nodejs-yauzl-v2/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | RUN npm install extract-zip@2.0.1 -g
4 |
5 | COPY unzip unzip-all.sh /
6 |
7 | ENTRYPOINT ["/unzip-all.sh"]
8 |
--------------------------------------------------------------------------------
/parsers/38-nodejs-yauzl-v2/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | extract-zip "$@"
4 |
--------------------------------------------------------------------------------
/parsers/39-nodejs-yauzl-v3/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | RUN npm install extract-zip@2.0.1 -g
4 |
5 | WORKDIR /usr/local/lib/node_modules/extract-zip
6 | RUN sed -i 's/"yauzl":.*/"yauzl": "3.2.0"/' package.json
7 | RUN npm install
8 |
9 | COPY unzip unzip-all.sh /
10 |
11 | ENTRYPOINT ["/unzip-all.sh"]
12 |
--------------------------------------------------------------------------------
/parsers/39-nodejs-yauzl-v3/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | extract-zip "$@"
4 |
--------------------------------------------------------------------------------
/parsers/40-nodejs-zipjs/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:20-alpine
2 |
3 | WORKDIR /src
4 |
5 | RUN npm init -y
6 |
7 | RUN npm install @zip.js/zip.js@2.7.53
8 |
9 | COPY unzip.js .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT ["/unzip-all.sh"]
14 |
--------------------------------------------------------------------------------
/parsers/40-nodejs-zipjs/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | node /src/unzip.js "$@"
4 |
--------------------------------------------------------------------------------
/parsers/40-nodejs-zipjs/unzip.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-await-in-loop */
2 |
3 | const { BlobReader, ZipReader, Uint8ArrayWriter } = require('@zip.js/zip.js');
4 | const { dirname } = require('path');
5 | const { openAsBlob } = require('fs');
6 | const { mkdir, writeFile } = require('fs/promises');
7 |
8 | (async () => {
9 | process.chdir(process.argv[3]);
10 | const file = await openAsBlob(process.argv[2]);
11 | const reader = new ZipReader(new BlobReader(file));
12 | for (const entry of await reader.getEntries()) {
13 | if (entry.directory) {
14 | await mkdir(entry.filename, { recursive: true });
15 | } else {
16 | const data = await entry.getData(new Uint8ArrayWriter());
17 | await mkdir(dirname(entry.filename), { recursive: true });
18 | await writeFile(entry.filename, data);
19 | }
20 | }
21 | })();
22 |
--------------------------------------------------------------------------------
/parsers/41-php-phardata/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20
2 |
3 | RUN apk add php83
4 | RUN apk add php83-phar
5 |
6 | COPY unzip.php /
7 |
8 | COPY unzip unzip-all.sh /
9 |
10 | ENTRYPOINT [ "/unzip-all.sh" ]
11 |
--------------------------------------------------------------------------------
/parsers/41-php-phardata/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | php83 /unzip.php "$@"
4 |
--------------------------------------------------------------------------------
/parsers/41-php-phardata/unzip.php:
--------------------------------------------------------------------------------
1 | extractTo($argv[2], null, true);
5 |
--------------------------------------------------------------------------------
/parsers/42-php-phpzip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20
2 |
3 | WORKDIR /src
4 |
5 | RUN apk add composer
6 |
7 | RUN composer require nelexa/zip:4.0.2
8 |
9 | COPY unzip.php .
10 |
11 | COPY unzip unzip-all.sh /
12 |
13 | ENTRYPOINT [ "/unzip-all.sh" ]
14 |
--------------------------------------------------------------------------------
/parsers/42-php-phpzip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | php83 /src/unzip.php "$@"
4 |
--------------------------------------------------------------------------------
/parsers/42-php-phpzip/unzip.php:
--------------------------------------------------------------------------------
1 | openFile($argv[1])->extractTo($argv[2]);
7 |
--------------------------------------------------------------------------------
/parsers/43-pascal-paszlib/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM freepascal/fpc:3.2.2-full AS build
2 |
3 | WORKDIR /src
4 |
5 | COPY unzip.pp .
6 |
7 | RUN fpc unzip
8 |
9 | FROM alpine:3.20
10 |
11 | COPY --from=build /src/unzip /
12 |
13 | COPY unzip-all.sh /
14 |
15 | ENTRYPOINT ["/unzip-all.sh"]
16 |
--------------------------------------------------------------------------------
/parsers/43-pascal-paszlib/unzip.pp:
--------------------------------------------------------------------------------
1 | uses
2 | Zipper;
3 | var
4 | UnZipper: TUnZipper;
5 | begin
6 | UnZipper := TUnZipper.Create;
7 | UnZipper.FileName := paramStr(1);
8 | UnZipper.OutputPath := paramStr(2);
9 | UnZipper.Examine;
10 | UnZipper.UnZipAllFiles;
11 | UnZipper.Free;
12 | end.
13 |
--------------------------------------------------------------------------------
/parsers/44-perl-archive-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.20
2 |
3 | RUN apk add perl-archive-zip
4 |
5 | COPY unzip.pl /
6 |
7 | COPY unzip unzip-all.sh /
8 |
9 | ENTRYPOINT [ "/unzip-all.sh" ]
10 |
--------------------------------------------------------------------------------
/parsers/44-perl-archive-zip/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | perl /unzip.pl "$@"
4 |
--------------------------------------------------------------------------------
/parsers/44-perl-archive-zip/unzip.pl:
--------------------------------------------------------------------------------
1 | use strict;
2 | use Archive::Zip qw(:ERROR_CODES);
3 |
4 | my $zip = Archive::Zip->new();
5 | my $status = $zip->read($ARGV[0]);
6 | die 'Failed to read ZIP' if $status != AZ_OK;
7 | $status = $zip->extractTree('', $ARGV[1]);
8 | die 'Failed to extract ZIP' if $status != AZ_OK;
9 |
--------------------------------------------------------------------------------
/parsers/45-python-zipfile/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.13.0-alpine
2 |
3 | COPY unzip.py /
4 |
5 | COPY unzip unzip-all.sh /
6 |
7 | ENTRYPOINT [ "/unzip-all.sh" ]
8 |
--------------------------------------------------------------------------------
/parsers/45-python-zipfile/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | python /unzip.py "$@"
4 |
--------------------------------------------------------------------------------
/parsers/45-python-zipfile/unzip.py:
--------------------------------------------------------------------------------
1 | from zipfile import ZipFile
2 | from sys import argv
3 |
4 | zip = ZipFile(argv[1], 'r')
5 | error_file = zip.testzip()
6 | if error_file is None:
7 | zip.extractall(argv[2])
8 | else:
9 | print(f"Error in file {error_file}")
10 | exit(1)
11 |
--------------------------------------------------------------------------------
/parsers/46-racket-file-unzip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM racket/racket:8.15-bc-full AS build
2 |
3 | WORKDIR /src
4 |
5 | COPY unzip.rkt .
6 |
7 | RUN raco exe unzip.rkt
8 |
9 | RUN raco distribute /unzip unzip
10 |
11 | FROM debian:12-slim
12 |
13 | RUN apt-get update
14 | RUN apt-get install -y parallel
15 |
16 | COPY --from=build /unzip /build
17 |
18 | RUN ln -s /build/bin/unzip /unzip
19 |
20 | COPY parallel-unzip-all.sh testcase.sh /
21 |
22 | ENTRYPOINT [ "/parallel-unzip-all.sh" ]
23 |
--------------------------------------------------------------------------------
/parsers/46-racket-file-unzip/unzip.rkt:
--------------------------------------------------------------------------------
1 | #lang racket
2 | (require file/unzip)
3 |
4 | (let ([args (current-command-line-arguments)])
5 | (define src (vector-ref args 0))
6 | (define dest (vector-ref args 1))
7 | (unzip src (make-filesystem-entry-reader #:dest dest #:exists 'replace)))
8 |
--------------------------------------------------------------------------------
/parsers/47-ruby-rubyzip-file/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ruby:3.3-alpine
2 |
3 | RUN gem install rubyzip -v 2.3.2
4 |
5 | COPY unzip.rb /
6 |
7 | COPY unzip unzip-all.sh /
8 |
9 | ENTRYPOINT [ "/unzip-all.sh" ]
10 |
--------------------------------------------------------------------------------
/parsers/47-ruby-rubyzip-file/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ruby /unzip.rb "$@"
4 |
--------------------------------------------------------------------------------
/parsers/47-ruby-rubyzip-file/unzip.rb:
--------------------------------------------------------------------------------
1 | require 'zip'
2 |
3 | Zip.on_exists_proc = true
4 | Dir.chdir(ARGV[1])
5 |
6 | Zip::File.open(ARGV[0]) do |zip_file|
7 | zip_file.each do |entry|
8 | entry_path = File.join(Dir.pwd, entry.name)
9 | FileUtils.mkdir_p(File.dirname(entry_path))
10 | entry.extract
11 | end
12 | end
13 |
--------------------------------------------------------------------------------
/parsers/48-ruby-rubyzip-inputstream/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ruby:3.3-alpine
2 |
3 | RUN gem install rubyzip -v 2.3.2
4 |
5 | COPY unzip.rb /
6 |
7 | COPY unzip unzip-all.sh /
8 |
9 | ENTRYPOINT [ "/unzip-all.sh" ]
10 |
--------------------------------------------------------------------------------
/parsers/48-ruby-rubyzip-inputstream/unzip:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ruby /unzip.rb "$@"
4 |
--------------------------------------------------------------------------------
/parsers/48-ruby-rubyzip-inputstream/unzip.rb:
--------------------------------------------------------------------------------
1 | require 'zip'
2 |
3 | Zip.on_exists_proc = true
4 | Dir.chdir(ARGV[1])
5 |
6 | stream = Zip::InputStream.new(ARGV[0])
7 | while entry = stream.get_next_entry
8 | entry_path = File.join(Dir.pwd, entry.name)
9 | FileUtils.mkdir_p(File.dirname(entry_path))
10 | entry.extract
11 | end
12 |
--------------------------------------------------------------------------------
/parsers/49-rust-zip/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM rust:1.82-slim-bookworm AS build
2 |
3 | WORKDIR /src/unzip
4 |
5 | RUN cargo init
6 |
7 | RUN cargo add zip@2.2.0
8 |
9 | COPY unzip.rs src/main.rs
10 |
11 | RUN cargo build --release
12 |
13 | FROM debian:12-slim
14 |
15 | COPY --from=build /src/unzip/target/release/unzip /
16 |
17 | COPY unzip-all.sh /
18 |
19 | ENTRYPOINT [ "/unzip-all.sh" ]
20 |
--------------------------------------------------------------------------------
/parsers/49-rust-zip/unzip.rs:
--------------------------------------------------------------------------------
1 | use zip::read::ZipArchive;
2 | use std::fs::File;
3 |
4 | fn main() {
5 | let mut args = std::env::args().skip(1);
6 | let src = args.next().expect("no src in args");
7 | let dst = args.next().expect("no dst in args");
8 | let file = File::open(src).expect("failed to open input file");
9 | let mut archive = ZipArchive::new(file).expect("failed to read input ZIP");
10 | archive.extract(dst).expect("failed to extract");
11 | }
12 |
--------------------------------------------------------------------------------
/parsers/50-swift-zipfoundation/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM swift:5.10 AS build
2 |
3 | COPY src /src
4 | WORKDIR /src
5 |
6 | RUN swift build -c release --static-swift-stdlib
7 | RUN mv "$(swift build -c release --show-bin-path)/unzip" /
8 |
9 | FROM debian:12-slim
10 |
11 | RUN apt-get update
12 | RUN apt-get install -y parallel
13 |
14 | COPY --from=build /unzip /
15 |
16 | COPY parallel-unzip-all.sh testcase.sh /
17 |
18 | ENTRYPOINT [ "/parallel-unzip-all.sh" ]
19 |
--------------------------------------------------------------------------------
/parsers/50-swift-zipfoundation/src/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 5.10
2 |
3 | import PackageDescription
4 |
5 | let package = Package(
6 | name: "unzip",
7 | dependencies: [
8 | .package(url: "https://github.com/weichsel/ZIPFoundation.git", exact: "0.9.19"),
9 | ],
10 | targets: [
11 | .executableTarget(name: "unzip", dependencies: ["ZIPFoundation"]),
12 | ]
13 | )
14 |
--------------------------------------------------------------------------------
/parsers/50-swift-zipfoundation/src/Sources/main.swift:
--------------------------------------------------------------------------------
1 | import ZIPFoundation
2 | import Foundation
3 |
4 | let fileManager = FileManager()
5 | var sourceURL = URL(fileURLWithPath: CommandLine.arguments[1])
6 | var destinationURL = URL(fileURLWithPath: CommandLine.arguments[2])
7 | do {
8 | try fileManager.unzipItem(at: sourceURL, to: destinationURL)
9 | } catch {
10 | print("Extraction of ZIP archive failed with error: \(error)")
11 | exit(1)
12 | }
13 |
--------------------------------------------------------------------------------
/parsers/README.md:
--------------------------------------------------------------------------------
1 | Tested parsers as listed in Table 3 in the paper. Note that we sometimes use a wrapper library to test the underlying parser library. For example, we use the PHP `ZipArchive` for libzip.
2 |
3 | You can find detailed information of the parsers in [`parsers.json`](./parsers.json).
4 |
--------------------------------------------------------------------------------
/parsers/parallel-unzip-all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd /input && parallel -j"${1:-25%}" /testcase.sh ::: *
4 |
--------------------------------------------------------------------------------
/parsers/prepare.sh:
--------------------------------------------------------------------------------
1 | ../tools/prepare.sh
--------------------------------------------------------------------------------
/parsers/testcase.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | mkdir -p /output/"$1"
6 | if ! timeout 1m /unzip "$(realpath "$1")" /output/"$1"; then
7 | while ! rm -rf /output/"$1"; do echo "Failed to rm -rf /output/$1"; done
8 | touch /output/"$1"
9 | fi
10 |
--------------------------------------------------------------------------------
/parsers/unzip-all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eu
4 |
5 | cd /input
6 |
7 | for i in *; do
8 | mkdir -p /output/"$i"
9 | if ! timeout 1m /unzip "$(realpath "$i")" /output/"$i"; then
10 | while ! rm -rf /output/"$i"; do echo "Failed to rm -rf /output/$i"; done
11 | touch /output/"$i"
12 | fi
13 | done
14 |
--------------------------------------------------------------------------------
/tools/ablation-study.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -xeuo pipefail
4 |
5 | TIMES=5
6 | BATCH_SIZE=500
7 | STOP_SECONDS=$(( 24 * 60 * 60 ))
8 | base="$(dirname "$(dirname "$(realpath "$0")")")"
9 | DATA="$base/evaluation"
10 |
11 | for _ in $(seq 1 $TIMES); do
12 | for i in full argmax-ucb byte-only; do
13 | cd "$base/zip-diff"
14 | case "$i" in
15 | full) arg= ;;
16 | argmax-ucb) arg=--argmax-ucb ;;
17 | byte-only) arg=--byte-mutation-only ;;
18 | esac
19 | key="$(date -Is)-$i"
20 | session="$DATA/sessions/$key"
21 | target/release/fuzz -b "$BATCH_SIZE" -s "$STOP_SECONDS" $arg \
22 | --input-dir "$DATA/bind/input" \
23 | --output-dir "$DATA/bind/output" \
24 | --samples-dir "$session/samples" \
25 | --results-dir "$session/results" \
26 | --stats-file "$DATA/stats/$key.json"
27 | cd ../parsers
28 | docker compose down
29 | done
30 | done
31 |
--------------------------------------------------------------------------------
/tools/fuzz-stats.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import json
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | from matplotlib.ticker import FuncFormatter
6 |
7 | # required by HotCRP
8 | plt.rcParams['pdf.fonttype'] = 42
9 | plt.rcParams['ps.fonttype'] = 42
10 |
11 | def load_stats_files():
12 | stats = []
13 | for filename in sys.argv[1:]:
14 | with open(filename) as f:
15 | try:
16 | stat = json.load(f)
17 | stats.append(stat)
18 | print(f'Loaded {filename}')
19 | except json.JSONDecodeError:
20 | print(f'Error parsing {filename}')
21 | return stats
22 |
23 |
24 | def group_stats_by_config(stats):
25 | groups = {'full': [], 'argmax_ucb': [], 'byte_mutation_only': []}
26 |
27 | for stat in stats:
28 | argmax_ucb = stat.get('argmax_ucb', False)
29 | byte_mutation_only = stat.get('byte_mutation_only', False)
30 | if argmax_ucb and byte_mutation_only:
31 | print(f'Warning: Found unusual configuration with both flags enabled')
32 | continue
33 | elif argmax_ucb:
34 | groups['argmax_ucb'].append(stat)
35 | elif byte_mutation_only:
36 | groups['byte_mutation_only'].append(stat)
37 | else:
38 | groups['full'].append(stat)
39 |
40 | for name, group in groups.items():
41 | print(f'{name}: {len(group)} sessions')
42 |
43 | return groups
44 |
45 |
46 | def interpolate_data(stats_group, metric, max_time=24*60*60, num_points=500):
47 | timepoints = np.linspace(0, max_time, num_points)
48 | interpolated = []
49 |
50 | for stat in stats_group:
51 | iterations = stat.get('iterations', [])
52 | times = [it.get('seconds_used', 0) for it in iterations]
53 | values = [it.get(metric, 0) for it in iterations]
54 |
55 | interp_values = np.interp(
56 | timepoints,
57 | times,
58 | values,
59 | left=0,
60 | )
61 | interpolated.append(interp_values)
62 |
63 | return (
64 | timepoints / 3600,
65 | np.median(interpolated, axis=0),
66 | )
67 |
68 |
69 | def plot_metric(
70 | stats_groups,
71 | metric,
72 | output_file,
73 | break_y_axis=None,
74 | y_top=None,
75 | legend_loc='best',
76 | ):
77 | # https://tsitsul.in/blog/coloropt/
78 | colors = {
79 | 'full': '#4053d3',
80 | 'argmax_ucb': '#ddb310',
81 | 'byte_mutation_only': '#b51d14',
82 | }
83 | labels = {
84 | 'full': 'Full Setup',
85 | 'argmax_ucb': 'Argmax-Based UCB',
86 | 'byte_mutation_only': 'Byte Mutation Only',
87 | }
88 | metric_title = {
89 | 'incons_count': 'Inconsistent Pairs (Median)',
90 | }
91 |
92 | timepoints = np.array([0, 24])
93 |
94 | if break_y_axis:
95 | fig, (ax_top, ax_bottom) = plt.subplots(
96 | 2,
97 | 1,
98 | figsize=(6, 4),
99 | sharex=True,
100 | gridspec_kw={'height_ratios': [6, 1], 'hspace': 0.12},
101 | )
102 | axes = [ax_top, ax_bottom]
103 | else:
104 | fig, ax = plt.subplots(figsize=(6, 4))
105 | axes = [ax]
106 |
107 | # blend overlapping lines
108 | for t in range(10):
109 | for config_name in reversed(colors):
110 | stats_group = stats_groups.get(config_name)
111 | if not stats_group:
112 | continue
113 |
114 | timepoints, median_values = interpolate_data(stats_group, metric)
115 |
116 | if len(timepoints) == 0:
117 | continue
118 |
119 | for i, ax in enumerate(axes):
120 | y = median_values
121 | if break_y_axis and i == 1:
122 | y = np.where(y <= break_y_axis, y, np.nan)
123 | ax.plot(
124 | timepoints,
125 | y,
126 | alpha=0.8**t,
127 | color=colors[config_name],
128 | label=labels[config_name] if t == 0 else None,
129 | )
130 |
131 | # Configure each axis
132 | for ax in axes:
133 | ax.grid(True, linestyle='--', alpha=0.7)
134 | ax.yaxis.set_major_formatter(
135 | FuncFormatter(
136 | lambda x, _: f'{round(x/1000)}k' if x >= 10000 else f'{round(x)}'
137 | )
138 | )
139 |
140 | if timepoints[-1] == 24:
141 | axes[0].set_xticks(np.arange(0, 25, 4))
142 | handles, labels = axes[0].get_legend_handles_labels()
143 | axes[0].legend(handles[::-1], labels[::-1], loc=legend_loc)
144 | axes[-1].set_xlabel('Time (hours)')
145 |
146 | if break_y_axis and ax_top and ax_bottom:
147 | ax_top.set_ylim(bottom=break_y_axis, top=y_top)
148 | ax_bottom.set_ylim(top=break_y_axis)
149 |
150 | ax_top.tick_params(bottom=False)
151 | ax_bottom.set_yticks([0, break_y_axis])
152 |
153 | ax_top.spines['bottom'].set_visible(False)
154 | ax_bottom.spines['top'].set_visible(False)
155 |
156 | # Add break markers
157 | d = 0.015
158 | kwargs = dict(transform=ax_top.transAxes, color='k', clip_on=False)
159 | ax_top.plot((-d, +d), (-d, +d), **kwargs)
160 | ax_top.plot((1 - d, 1 + d), (-d, +d), **kwargs)
161 | kwargs.update(transform=ax_bottom.transAxes)
162 | ax_bottom.plot((-d, +d), (1 - d, 1 + d), **kwargs)
163 | ax_bottom.plot((1 - d, 1 + d,), (1 - d, 1 + d,), **kwargs)
164 |
165 | fig.subplots_adjust(left=0.15)
166 | fig.text(0.04, 0.5, metric_title[metric], va='center', rotation='vertical')
167 |
168 | else:
169 | axes[0].set_ylabel(metric_title[metric])
170 | plt.tight_layout()
171 |
172 | plt.savefig(output_file, bbox_inches='tight', pad_inches=0)
173 | print(f'Plot for {metric} saved to {output_file}')
174 | plt.close(fig)
175 |
176 |
177 | def calc_incons(stats_groups, total_pairs):
178 | total_consistent_sets = []
179 |
180 | for config_name, stats_group in stats_groups.items():
181 | if not stats_group:
182 | continue
183 |
184 | consistent_sets = []
185 | incons = []
186 |
187 | for stats in stats_group:
188 | consistent_set = set(map(
189 | lambda pair: (pair[0], pair[1]),
190 | stats['consistent_pairs']
191 | ))
192 | incons.append(total_pairs - len(consistent_set))
193 | consistent_sets.append(consistent_set)
194 | total_consistent_sets.append(consistent_set)
195 |
196 | overall_incons = total_pairs - len(set.intersection(*consistent_sets))
197 | median_incons = np.median(incons)
198 | avg_incons = np.mean(incons)
199 |
200 | print(f'{config_name}: {overall_incons = } {median_incons = :.1f} {avg_incons = :.1f}')
201 |
202 | print(f'{len(set.intersection(*total_consistent_sets)) = }')
203 | print(set.intersection(*total_consistent_sets))
204 |
205 |
206 | stats = load_stats_files()
207 | if not stats:
208 | print('No valid stats files provided.')
209 | exit(1)
210 | stats_groups = group_stats_by_config(stats)
211 | total_pairs = stats[0]['iterations'][-1]['incons_count'] + len(stats[0]['consistent_pairs'])
212 | plot_metric(
213 | stats_groups,
214 | 'incons_count',
215 | 'inconsistent-pair-cdf.pdf',
216 | break_y_axis=1000,
217 | y_top=1210,
218 | )
219 | calc_incons(stats_groups, total_pairs)
220 |
--------------------------------------------------------------------------------
/tools/inconsistency-table.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import json
3 | from os.path import dirname
4 |
5 | PARSER_COUNT = 50
6 | COL_WIDTH = '-2.5pt'
7 |
8 | with open(f'{dirname(__file__)}/../constructions/inconsistency-types.json') as f:
9 | data = json.load(f)
10 |
11 | s = f'\\begin{{tabular}}{{|*{{{PARSER_COUNT + 1}}}{{wc{{{COL_WIDTH}}}|}}}}\n\\hline\n'
12 |
13 | for i in range(PARSER_COUNT):
14 | s += f' & {i + 1}'
15 |
16 | s += r' \\ \hline' + '\n'
17 |
18 | total_types = 0
19 | total_pairs = 0
20 |
21 | for i in range(PARSER_COUNT):
22 | s += f'{i+1}'
23 | for j in range(PARSER_COUNT):
24 | x = len(data[i * PARSER_COUNT + j]['inconsistency_types'])
25 | total_types += x
26 | if x > 0:
27 | total_pairs += 1
28 | s += f' & \\cellcolor{{blue!{0 if x == 0 else x * 3 + 10}}}{"-" if i == j else x}'
29 | s += r' \\ \hline' + '\n'
30 |
31 | s += '\\end{tabular}'
32 | print(s)
33 |
34 | total_types /= 2
35 | total_pairs /= 2
36 | print(f'{total_types = }\n{total_pairs = }', file=sys.stderr)
37 |
--------------------------------------------------------------------------------
/tools/parsers-to-table.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import requests
5 | import subprocess
6 | from typing import List, Dict
7 |
8 | with open(f'{os.path.dirname(__file__)}/../parsers/parsers.json') as f:
9 | parser_map = json.load(f)
10 |
11 | gh_token = os.environ.get('GITHUB_TOKEN') or subprocess.check_output(['gh', 'auth', 'token']).decode()
12 | queries = []
13 | for key, parser in parser_map.items():
14 | if 'github' not in parser:
15 | continue
16 | owner, name = parser['github'].split('/')
17 | queries.append(f'_{len(queries)}: repository(owner: "{owner}", name: "{name}") {{ stargazerCount nameWithOwner }}')
18 | query = f"""query {{
19 | {'\n '.join(queries)}
20 | }}"""
21 | response = requests.post(
22 | 'https://api.github.com/graphql',
23 | headers={ "Authorization": f"token {gh_token.strip()}"},
24 | json={ "query": query }
25 | )
26 | if not response.ok:
27 | print(response.text)
28 | exit(1)
29 | star_map = {}
30 | for data in response.json()['data'].values():
31 | star_map[data['nameWithOwner']] = data['stargazerCount']
32 |
33 | parsers : List[Dict[str, str]] = sorted(parser_map.values(), key = lambda p : (p['type'], p['language'], p['name'].lower(), p['version']))
34 |
35 | for i, parser in enumerate(parsers):
36 | name = parser["name"]
37 | std = parser.get("std", False)
38 | lang = parser["language"]
39 | if std:
40 | lang += '*'
41 | ver = parser['version']
42 | repo = parser.get('github')
43 | link = parser.get('link')
44 | if repo:
45 | name = rf'\href{{https://github.com/{repo}}}{{{name}}}'
46 | star = star_map[repo]
47 | if star >= 1000:
48 | star = f'{star/1000:.1f}'.rstrip('0').rstrip('.')
49 | star += 'k'
50 | else:
51 | if link:
52 | name = rf'\href{{{link}}}{{{name}}}'
53 | else:
54 | print(f'no link for {name}', file=sys.stderr)
55 | star = '-'
56 | print(rf' {i+1} & {name} & {lang} & {ver} & {star} \\ \hline'.replace('#', r'\#'))
57 |
--------------------------------------------------------------------------------
/tools/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -euo pipefail
4 |
5 | base="$(dirname "$(dirname "$(realpath "$0")")")"
6 | input_dir="${INPUT_DIR:-$base/evaluation/input}"
7 | output_dir="${OUTPUT_DIR:-$base/evaluation/output}"
8 |
9 | cd "$base"/parsers
10 | echo "services:" > docker-compose.yml
11 |
12 | for i in */; do
13 | cp unzip-all.sh parallel-unzip-all.sh testcase.sh "$i"
14 | parser=${i%/}
15 | echo " $parser:
16 | build: $parser
17 | volumes:
18 | - $input_dir:/input:ro
19 | - $output_dir/$parser:/output" >> docker-compose.yml
20 | done
21 |
--------------------------------------------------------------------------------
/tools/run-parsers.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -euo pipefail
4 |
5 | base="$(dirname "$(dirname "$(realpath "$0")")")"
6 |
7 | "$base"/tools/prepare.sh
8 |
9 | sudo rm -rf "$base"/evaluation/{input,output}
10 | mkdir -p "$base/evaluation/input"
11 |
12 | for i in $(seq 1 $#); do
13 | testcase="$(realpath "${!i}")"
14 | cp "$testcase" "$base/evaluation/input/$i.zip"
15 | done
16 |
17 | pushd "$base/parsers"
18 | sudo docker compose up
19 | popd
20 |
21 | for i in $(seq 1 $#); do
22 | testcase="$(realpath "${!i}")"
23 | result="$base/evaluation/results/${testcase#"$base/"}"
24 | sudo rm -rf "$result"
25 | mkdir -p "$result"
26 | for p in "$base/parsers/"*/; do
27 | parser="$(basename "$p")"
28 | sudo mv "$base/evaluation/output/$parser/$i.zip" "$result/$parser" &
29 | done
30 | done
31 |
32 | wait
33 |
--------------------------------------------------------------------------------
/zip-diff/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 |
--------------------------------------------------------------------------------
/zip-diff/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "zip-diff"
3 | version = "0.1.0"
4 | edition = "2021"
5 | description = "Differential fuzzing for ZIP parsers."
6 | license = "Apache-2.0"
7 | default-run = "fuzz"
8 |
9 | [dependencies]
10 | anyhow = { version = "1.0.75", features = ["backtrace"] }
11 | binwrite = "0.2.1"
12 | bitflags = "2.4.1"
13 | blake3 = { version = "1.5.4", features = ["rayon", "mmap"] }
14 | byteorder = "1.5.0"
15 | bzip2 = "0.4.4"
16 | chrono = { version = "0.4.38", default-features = false, features = ["now"] }
17 | clap = { version = "4.5.36", features = ["derive"] }
18 | crc32fast = "1.3.2"
19 | downcast-rs = "1.2.0"
20 | dyn-clone = "1.0.16"
21 | educe = { version = "0.5.11", default-features = false, features = ["Debug", "Default"] }
22 | fixedbitset = "0.5.7"
23 | flate2 = "1.0.28"
24 | fs4 = "0.13.1"
25 | itertools = "0.13.0"
26 | lzma-rs = "0.3.0"
27 | num-traits = "0.2.19"
28 | rand = "0.8.5"
29 | rayon = "1.10.0"
30 | serde = { version = "1.0.210", features = ["derive"] }
31 | serde_json = "1.0.128"
32 | sysinfo = { version = "0.34.2", default-features = false, features = ["system"] }
33 | tar = "0.4.42"
34 | vec_box = "1.0.0"
35 | walkdir = "2.5.0"
36 | zip_structs = "0.2.1"
37 | zstd = { version = "0.13.2", features = ["zstdmt"] }
38 |
39 | [[bin]]
40 | name = "fuzz"
41 | path = "src/fuzz/main.rs"
42 |
43 | [[bin]]
44 | name = "construction"
45 | path = "src/construction/main.rs"
46 |
47 | [[bin]]
48 | name = "count"
49 | path = "src/count/main.rs"
50 |
--------------------------------------------------------------------------------
/zip-diff/src/cdh.rs:
--------------------------------------------------------------------------------
1 | use crate::extra::{ExtraField, Zip64ExtendedInfo};
2 | use crate::fields::*;
3 | use crate::lfh::LocalFileHeader;
4 | use crate::utils::{binwrite_transform, BinCount};
5 | use crate::zip::FileEntry;
6 | use anyhow::{bail, Context, Result};
7 | use binwrite::BinWrite;
8 | use educe::Educe;
9 |
10 | #[derive(BinWrite, Clone, Educe)]
11 | #[educe(Debug, Default)]
12 | pub struct CentralDirectoryHeader {
13 | #[educe(Default = Self::SIGNATURE)]
14 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
15 | pub signature: u32,
16 | #[educe(Default = 20)]
17 | pub version_made_by: u16,
18 | #[educe(Default = 20)]
19 | pub version_needed: u16,
20 | #[binwrite(with(binwrite_transform))]
21 | pub general_purpose_flag: GeneralPurposeFlag,
22 | #[binwrite(with(binwrite_transform))]
23 | pub compression_method: CompressionMethod,
24 | pub last_mod: DosDateTime,
25 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
26 | pub crc32: u32,
27 | pub compressed_size: u32,
28 | pub uncompressed_size: u32,
29 | pub file_name_length: u16,
30 | pub extra_field_length: u16,
31 | pub file_comment_length: u16,
32 | pub disk_number_start: u16,
33 | #[binwrite(with(binwrite_transform))]
34 | pub internal_file_attributes: InternalFileAttributes,
35 | pub external_file_attributes: u32,
36 | pub relative_header_offset: u32,
37 | #[educe(Debug(method(crate::utils::fmt_utf8)))]
38 | pub file_name: Vec,
39 | pub extra_fields: Vec,
40 | /// only one of `extra_fields` and `extra_fields_raw` can be set
41 | #[educe(Debug(method(crate::utils::fmt_hex)))]
42 | pub extra_fields_raw: Vec,
43 | pub file_comment: Vec,
44 |
45 | #[binwrite(ignore)]
46 | pub zip64: Zip64ExtendedInfo,
47 | }
48 |
49 | impl CentralDirectoryHeader {
50 | pub const SIGNATURE: u32 = 0x02014b50;
51 |
52 | /// Set CDH field and ZIP64 field according to size
53 | pub fn set_offset(&mut self, offset: usize, force_zip64: bool) {
54 | if !force_zip64 {
55 | if let Ok(offset) = offset.try_into() {
56 | self.relative_header_offset = offset;
57 | self.zip64.relative_header_offset = None;
58 | return;
59 | }
60 | }
61 | self.relative_header_offset = u32::MAX;
62 | self.zip64.relative_header_offset = Some(offset as u64);
63 | }
64 |
65 | /// Finalize extra fields, add ZIP64 field
66 | pub fn finalize(&mut self) -> Result<()> {
67 | if !self.zip64.is_empty() {
68 | self.extra_fields.push(ExtraField {
69 | header_id: 0,
70 | size: 0,
71 | data: Box::new(self.zip64.clone()),
72 | });
73 | }
74 |
75 | if !self.extra_fields.is_empty() && !self.extra_fields_raw.is_empty() {
76 | bail!("extra_fields and extra_fields_raw cannot be set at the same time");
77 | }
78 |
79 | if self.extra_fields.is_empty() {
80 | self.extra_field_length = self
81 | .extra_fields_raw
82 | .len()
83 | .try_into()
84 | .context("Extra fields too long")?;
85 | } else {
86 | for field in &mut self.extra_fields {
87 | field.finalize()?;
88 | }
89 |
90 | self.extra_field_length = self
91 | .extra_fields
92 | .byte_count()
93 | .context("Failed to count extra fields")?
94 | .try_into()
95 | .context("Extra fields too long")?;
96 | }
97 |
98 | Ok(())
99 | }
100 | }
101 |
102 | impl From<&LocalFileHeader> for CentralDirectoryHeader {
103 | fn from(lfh: &LocalFileHeader) -> Self {
104 | Self {
105 | version_made_by: lfh.version_needed,
106 | version_needed: lfh.version_needed,
107 | general_purpose_flag: lfh.general_purpose_flag,
108 | compression_method: lfh.compression_method,
109 | last_mod: lfh.last_mod,
110 | crc32: lfh.crc32,
111 | compressed_size: lfh.compressed_size,
112 | uncompressed_size: lfh.uncompressed_size,
113 | file_name_length: lfh.file_name_length,
114 | extra_field_length: lfh.extra_field_length,
115 | file_name: lfh.file_name.clone(),
116 | extra_fields: lfh.extra_fields.clone(),
117 | extra_fields_raw: lfh.extra_fields_raw.clone(),
118 | zip64: lfh.zip64.clone(),
119 | ..Default::default()
120 | }
121 | }
122 | }
123 |
124 | impl From<&FileEntry> for CentralDirectoryHeader {
125 | fn from(fe: &FileEntry) -> Self {
126 | match &fe.dd {
127 | None => (&fe.lfh).into(),
128 | Some(dd) => Self {
129 | version_made_by: fe.lfh.version_needed,
130 | version_needed: fe.lfh.version_needed,
131 | general_purpose_flag: fe.lfh.general_purpose_flag,
132 | compression_method: fe.lfh.compression_method,
133 | last_mod: fe.lfh.last_mod,
134 | crc32: dd.crc32,
135 | compressed_size: dd.compressed_size.saturate(),
136 | uncompressed_size: dd.uncompressed_size.saturate(),
137 | file_name_length: fe.lfh.file_name_length,
138 | extra_field_length: fe.lfh.extra_field_length,
139 | file_name: fe.lfh.file_name.clone(),
140 | extra_fields: fe.lfh.extra_fields.clone(),
141 | extra_fields_raw: fe.lfh.extra_fields_raw.clone(),
142 | zip64: fe.lfh.zip64.clone(),
143 | ..Default::default()
144 | },
145 | }
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/zip-diff/src/compress.rs:
--------------------------------------------------------------------------------
1 | use crate::fields::CompressionMethod;
2 | use anyhow::{bail, Context, Result};
3 | use bzip2::{bufread::BzDecoder, write::BzEncoder, Compression as BzCompression};
4 | use flate2::{bufread::DeflateDecoder, write::DeflateEncoder, Compression as DeflateCompression};
5 | use lzma_rs::{lzma_compress, lzma_decompress, xz_compress, xz_decompress};
6 | use std::io::{Cursor, Read, Write};
7 |
8 | pub fn compress(method: CompressionMethod, data: &[u8]) -> Result> {
9 | match method {
10 | CompressionMethod::STORED => Ok(Vec::from(data)),
11 | CompressionMethod::DEFLATED => {
12 | let mut encoder = DeflateEncoder::new(Vec::new(), DeflateCompression::default());
13 | encoder.write_all(data).context("Failed to deflate")?;
14 | encoder.finish().context("Failed to deflate")
15 | }
16 | CompressionMethod::BZIP2 => {
17 | let mut encoder = BzEncoder::new(Vec::new(), BzCompression::default());
18 | encoder.write_all(data).context("Failed to bzip2")?;
19 | encoder.finish().context("Failed to bzip2")
20 | }
21 | CompressionMethod::ZSTD => zstd::encode_all(data, 0).context("Failed to ZSTD compress"),
22 | CompressionMethod::LZMA => {
23 | let mut input = Cursor::new(data);
24 | let mut output = Vec::new();
25 | lzma_compress(&mut input, &mut output).context("Failed to LZMA compress")?;
26 | Ok(output)
27 | }
28 | CompressionMethod::XZ => {
29 | let mut input = Cursor::new(data);
30 | let mut output = Vec::new();
31 | xz_compress(&mut input, &mut output).context("Failed to XZ compress")?;
32 | Ok(output)
33 | }
34 | _ => bail!("Compression method {:?} not implemented", method),
35 | }
36 | }
37 |
38 | pub fn decompress(method: CompressionMethod, data: &[u8]) -> Result> {
39 | match method {
40 | CompressionMethod::STORED => Ok(Vec::from(data)),
41 | CompressionMethod::DEFLATED => {
42 | let mut decoder = DeflateDecoder::new(data);
43 | let mut buf = Vec::new();
44 | decoder.read_to_end(&mut buf).context("Failed to inflate")?;
45 | Ok(buf)
46 | }
47 | CompressionMethod::BZIP2 => {
48 | let mut decoder = BzDecoder::new(data);
49 | let mut buf = Vec::new();
50 | decoder.read_to_end(&mut buf).context("Failed to bunzip2")?;
51 | Ok(buf)
52 | }
53 | CompressionMethod::ZSTD => zstd::decode_all(data).context("Failed to ZSTD decompress"),
54 | CompressionMethod::LZMA => {
55 | let mut input = Cursor::new(data);
56 | let mut output = Vec::new();
57 | lzma_decompress(&mut input, &mut output).context("Failed to LZMA decompress")?;
58 | Ok(output)
59 | }
60 | CompressionMethod::XZ => {
61 | let mut input = Cursor::new(data);
62 | let mut output = Vec::new();
63 | xz_decompress(&mut input, &mut output).context("Failed to XZ decompress")?;
64 | Ok(output)
65 | }
66 | _ => bail!("Decompression method {:?} not implemented", method),
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/a.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 |
3 | pub mod a1;
4 | pub mod a2;
5 | pub mod a3;
6 | pub mod a4;
7 | pub mod a5;
8 |
9 | pub fn main() -> Result<()> {
10 | a1::main()?;
11 | a2::main()?;
12 | a3::main()?;
13 | a4::main()?;
14 | a5::main()?;
15 | Ok(())
16 | }
17 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/a/a1.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase;
2 | use anyhow::Result;
3 | use zip_diff::fields::CompressionMethod;
4 | use zip_diff::zip::ZipArchive;
5 |
6 | const DATA: &[u8] = b"test";
7 |
8 | fn stored_lfh() -> Result {
9 | let mut zip = ZipArchive::default();
10 | zip.add_file("test", DATA, CompressionMethod::DEFLATED, false, false)?;
11 | zip.finalize()?;
12 | zip.files[0].lfh.compression_method = CompressionMethod::STORED;
13 | zip.files[0].lfh.compressed_size = DATA.len().try_into().unwrap();
14 | Ok(zip)
15 | }
16 |
17 | fn stored_cdh() -> Result {
18 | let mut zip = ZipArchive::default();
19 | zip.add_file("test", DATA, CompressionMethod::DEFLATED, false, false)?;
20 | zip.finalize()?;
21 | zip.cd[0].compression_method = CompressionMethod::STORED;
22 | zip.cd[0].compressed_size = DATA.len().try_into().unwrap();
23 | Ok(zip)
24 | }
25 |
26 | pub fn main() -> Result<()> {
27 | testcase(stored_lfh)?;
28 | testcase(stored_cdh)?;
29 | Ok(())
30 | }
31 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/a/a2.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, testcase_arg};
2 | use anyhow::Result;
3 | use bitflags::bitflags;
4 | use zip_diff::dd::{DataDescriptor, U32or64};
5 | use zip_diff::extra::Zip64ExtendedInfo;
6 | use zip_diff::fields::{CompressionMethod, GeneralPurposeFlag};
7 | use zip_diff::utils::crc32_patch;
8 | use zip_diff::zip::ZipArchive;
9 |
10 | const DATA: &[u8] = b"test";
11 |
12 | #[derive(Clone, Copy)]
13 | struct LfhCdh(u8);
14 |
15 | bitflags! {
16 | impl LfhCdh: u8 {
17 | const Deflated = 1 << 0;
18 | const LfhCompressed = 1 << 1;
19 | const LfhUncompressed = 1 << 2;
20 | const CdhCompressed = 1 << 3;
21 | const CdhUncompressed = 1 << 4;
22 | }
23 | }
24 |
25 | #[derive(Clone, Copy)]
26 | struct DataDescriptorFlags(u8);
27 |
28 | bitflags! {
29 | impl DataDescriptorFlags: u8 {
30 | const CompressedZero = 1 << 0;
31 | const UncompressedZero = 1 << 1;
32 | const Size64 = 1 << 2;
33 | }
34 | }
35 |
36 | #[derive(Clone, Copy)]
37 | struct Zip64Flags(u8);
38 |
39 | bitflags! {
40 | impl Zip64Flags: u8 {
41 | const CompressedSize = 1 << 0;
42 | const UncompressedSize = 1 << 1;
43 | }
44 | }
45 |
46 | struct Args {
47 | lfh_cdh_flags: LfhCdh,
48 | lfh_zip64: Zip64Flags,
49 | cdh_zip64: Zip64Flags,
50 | dd_flags: Option,
51 | }
52 |
53 | fn size_confusion(args: Args) -> Result {
54 | let mut zip = ZipArchive::default();
55 |
56 | let mut data = Vec::from(DATA);
57 | let patch = crc32_patch(&data, 0);
58 | data.extend(patch.to_le_bytes());
59 |
60 | let compression = if args.lfh_cdh_flags.contains(LfhCdh::Deflated) {
61 | CompressionMethod::DEFLATED
62 | } else {
63 | CompressionMethod::STORED
64 | };
65 |
66 | zip.add_file("test", &data, compression, false, false)?;
67 | zip.finalize()?;
68 |
69 | if let Some(flags) = args.dd_flags {
70 | let lfh = &mut zip.files[0].lfh;
71 | let cdh = &mut zip.cd[0];
72 |
73 | let compressed_size = if flags.contains(DataDescriptorFlags::CompressedZero) {
74 | 0
75 | } else {
76 | lfh.compressed_size
77 | };
78 |
79 | let uncompressed_size = if flags.contains(DataDescriptorFlags::UncompressedZero) {
80 | 0
81 | } else {
82 | lfh.uncompressed_size
83 | };
84 |
85 | let (compressed_size, uncompressed_size) = if flags.contains(DataDescriptorFlags::Size64) {
86 | (
87 | U32or64::U64(compressed_size.into()),
88 | U32or64::U64(uncompressed_size.into()),
89 | )
90 | } else {
91 | (
92 | U32or64::U32(compressed_size),
93 | U32or64::U32(uncompressed_size),
94 | )
95 | };
96 |
97 | let dd = DataDescriptor {
98 | compressed_size,
99 | uncompressed_size,
100 | ..Default::default()
101 | };
102 |
103 | lfh.general_purpose_flag
104 | .insert(GeneralPurposeFlag::DataDescriptor);
105 | cdh.general_purpose_flag
106 | .insert(GeneralPurposeFlag::DataDescriptor);
107 | zip.files[0].dd = Some(dd);
108 | }
109 |
110 | let lfh = &mut zip.files[0].lfh;
111 | let cdh = &mut zip.cd[0];
112 |
113 | if args.lfh_cdh_flags.contains(LfhCdh::LfhCompressed) {
114 | lfh.compressed_size = 0;
115 | }
116 | if args.lfh_cdh_flags.contains(LfhCdh::LfhUncompressed) {
117 | lfh.uncompressed_size = 0;
118 | }
119 | if args.lfh_cdh_flags.contains(LfhCdh::CdhCompressed) {
120 | cdh.compressed_size = 0;
121 | }
122 | if args.lfh_cdh_flags.contains(LfhCdh::CdhUncompressed) {
123 | cdh.uncompressed_size = 0;
124 | }
125 |
126 | if !args.lfh_zip64.is_empty() {
127 | let compressed_size = if args.lfh_zip64.contains(Zip64Flags::CompressedSize) {
128 | let size = lfh.compressed_size;
129 | lfh.compressed_size = u32::MAX;
130 | Some(size.into())
131 | } else {
132 | None
133 | };
134 | let original_size = if args.lfh_zip64.contains(Zip64Flags::UncompressedSize) {
135 | let size = lfh.uncompressed_size;
136 | lfh.uncompressed_size = u32::MAX;
137 | Some(size.into())
138 | } else {
139 | None
140 | };
141 | let zip64 = Zip64ExtendedInfo {
142 | compressed_size,
143 | original_size,
144 | ..Default::default()
145 | };
146 | lfh.extra_fields.push(zip64.into());
147 | }
148 |
149 | if !args.cdh_zip64.is_empty() {
150 | let compressed_size = if args.cdh_zip64.contains(Zip64Flags::CompressedSize) {
151 | let size = cdh.compressed_size;
152 | cdh.compressed_size = u32::MAX;
153 | Some(size.into())
154 | } else {
155 | None
156 | };
157 | let original_size = if args.cdh_zip64.contains(Zip64Flags::UncompressedSize) {
158 | let size = cdh.uncompressed_size;
159 | cdh.uncompressed_size = u32::MAX;
160 | Some(size.into())
161 | } else {
162 | None
163 | };
164 | let zip64 = Zip64ExtendedInfo {
165 | compressed_size,
166 | original_size,
167 | ..Default::default()
168 | };
169 | cdh.extra_fields.push(zip64.into());
170 | }
171 |
172 | zip.set_offsets(0)?;
173 |
174 | Ok(zip)
175 | }
176 |
177 | fn multiple_zip64() -> Result {
178 | let mut zip = ZipArchive::default();
179 | zip.add_file("test", DATA, CompressionMethod::STORED, true, false)?;
180 | zip.finalize()?;
181 | let zip64 = Zip64ExtendedInfo {
182 | original_size: Some(0),
183 | compressed_size: Some(0),
184 | relative_header_offset: None,
185 | disk_start_number: None,
186 | };
187 | zip.files[0].lfh.extra_fields.push(zip64.clone().into());
188 | zip.cd[0].extra_fields.push(zip64.into());
189 | zip.set_offsets(0)?;
190 | Ok(zip)
191 | }
192 |
193 | pub fn main() -> Result<()> {
194 | for i in 0..32 {
195 | let lfh_cdh_flags = LfhCdh::from_bits_truncate(i);
196 | for i in 0..=8 {
197 | let dd_flags = if i == 8 {
198 | None
199 | } else {
200 | Some(DataDescriptorFlags::from_bits_truncate(i))
201 | };
202 | for i in 0..4 {
203 | let lfh_zip64 = Zip64Flags::from_bits_truncate(i);
204 | for i in 0..4 {
205 | let cdh_zip64 = Zip64Flags::from_bits_truncate(i);
206 | let args = Args {
207 | lfh_cdh_flags,
208 | dd_flags,
209 | lfh_zip64,
210 | cdh_zip64,
211 | };
212 | testcase_arg(size_confusion, args)?;
213 | }
214 | }
215 | }
216 | }
217 | testcase(multiple_zip64)?;
218 | Ok(())
219 | }
220 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/a/a3.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, CRC32A, CRC32B};
2 | use anyhow::Result;
3 | use zip_diff::extra::InfoZipUnicodePath;
4 | use zip_diff::fields::GeneralPurposeFlag;
5 | use zip_diff::zip::ZipArchive;
6 |
7 | const DATA: &[u8] = b"test";
8 |
9 | fn lfh_cdh() -> Result {
10 | let mut zip = ZipArchive::default();
11 | zip.add_simple("lfh", DATA)?;
12 | zip.finalize()?;
13 | zip.cd[0].file_name = "cdh".into();
14 | Ok(zip)
15 | }
16 |
17 | fn up_lfh_cdh() -> Result {
18 | let mut zip = ZipArchive::default();
19 | zip.add_simple("original", DATA)?;
20 | let lfh = &mut zip.files[0].lfh;
21 | let name_crc32 = crc32fast::hash(&lfh.file_name);
22 | let up = InfoZipUnicodePath {
23 | version: 1,
24 | name_crc32,
25 | unicode_name: "lfh".into(),
26 | };
27 | lfh.extra_fields.push(up.into());
28 | zip.finalize()?;
29 | let cd_up: &mut InfoZipUnicodePath = zip.cd[0].extra_fields[0].data.downcast_mut().unwrap();
30 | cd_up.unicode_name = "cdh".into();
31 | Ok(zip)
32 | }
33 |
34 | fn up_version() -> Result {
35 | let mut zip = ZipArchive::default();
36 |
37 | zip.add_simple("v0-original", DATA)?;
38 | let lfh = &mut zip.files[0].lfh;
39 | let name_crc32 = crc32fast::hash(&lfh.file_name);
40 | let up = InfoZipUnicodePath {
41 | version: 0,
42 | name_crc32,
43 | unicode_name: "v0-up".into(),
44 | };
45 | lfh.extra_fields.push(up.into());
46 |
47 | zip.add_simple("v2-original", DATA)?;
48 | let lfh = &mut zip.files[1].lfh;
49 | let name_crc32 = crc32fast::hash(&lfh.file_name);
50 | let up = InfoZipUnicodePath {
51 | version: 2,
52 | name_crc32,
53 | unicode_name: "v2-up".into(),
54 | };
55 | lfh.extra_fields.push(up.into());
56 |
57 | zip.finalize()?;
58 | Ok(zip)
59 | }
60 |
61 | fn up_incorrect_crc32() -> Result {
62 | let mut zip = ZipArchive::default();
63 |
64 | zip.add_simple("original", DATA)?;
65 | let lfh = &mut zip.files[0].lfh;
66 | let up = InfoZipUnicodePath {
67 | version: 1,
68 | name_crc32: 0,
69 | unicode_name: "up".into(),
70 | };
71 | lfh.extra_fields.push(up.into());
72 |
73 | zip.finalize()?;
74 | Ok(zip)
75 | }
76 |
77 | fn up_crc32_source() -> Result {
78 | let mut zip = ZipArchive::default();
79 |
80 | zip.add_simple("1-original", DATA)?;
81 | let lfh = &mut zip.files[0].lfh;
82 | let name_crc32 = crc32fast::hash(&lfh.file_name);
83 | let up1 = InfoZipUnicodePath {
84 | version: 1,
85 | name_crc32,
86 | unicode_name: "1-up1".into(),
87 | };
88 | let up2 = InfoZipUnicodePath {
89 | version: 1,
90 | name_crc32,
91 | unicode_name: "1-up2".into(),
92 | };
93 | lfh.extra_fields.push(up1.into());
94 | lfh.extra_fields.push(up2.into());
95 |
96 | zip.add_simple("2-original", DATA)?;
97 | let lfh = &mut zip.files[1].lfh;
98 | let name_crc32 = crc32fast::hash(&lfh.file_name);
99 | let up1 = InfoZipUnicodePath {
100 | version: 1,
101 | name_crc32,
102 | unicode_name: "2-up1".into(),
103 | };
104 | let name_crc32 = crc32fast::hash(up1.unicode_name.as_bytes());
105 | let up2 = InfoZipUnicodePath {
106 | version: 1,
107 | name_crc32,
108 | unicode_name: "2-up2".into(),
109 | };
110 | lfh.extra_fields.push(up1.into());
111 | lfh.extra_fields.push(up2.into());
112 |
113 | zip.finalize()?;
114 | Ok(zip)
115 | }
116 |
117 | fn up_invalid() -> Result {
118 | let mut zip = ZipArchive::default();
119 |
120 | zip.add_simple("original", DATA)?;
121 | let lfh = &mut zip.files[0].lfh;
122 | let name_crc32 = crc32fast::hash(&lfh.file_name);
123 | let up1 = InfoZipUnicodePath {
124 | version: 1,
125 | name_crc32,
126 | unicode_name: "up-valid".into(),
127 | };
128 | // invalid for both version and CRC32
129 | let up2 = InfoZipUnicodePath {
130 | version: 2,
131 | name_crc32: 0,
132 | unicode_name: "up-invalid".into(),
133 | };
134 | lfh.extra_fields.push(up1.into());
135 | lfh.extra_fields.push(up2.into());
136 |
137 | // Same CRC32 to make sure CRC32 check in up3 does not fail regardless of the filename source
138 | zip.add_simple(&format!("{CRC32A}{CRC32A}"), DATA)?;
139 | let lfh = &mut zip.files[1].lfh;
140 | let name_crc32 = crc32fast::hash(&lfh.file_name);
141 | let up1 = InfoZipUnicodePath {
142 | version: 1,
143 | name_crc32,
144 | unicode_name: format!("{CRC32A}{CRC32B}"),
145 | };
146 | let up2 = InfoZipUnicodePath {
147 | version: 2,
148 | name_crc32: 0,
149 | unicode_name: format!("{CRC32B}{CRC32A}"),
150 | };
151 | let up3 = InfoZipUnicodePath {
152 | version: 1,
153 | name_crc32,
154 | unicode_name: format!("{CRC32B}{CRC32B}"),
155 | };
156 | lfh.extra_fields.push(up1.into());
157 | lfh.extra_fields.push(up2.into());
158 | lfh.extra_fields.push(up3.into());
159 |
160 | zip.finalize()?;
161 | Ok(zip)
162 | }
163 |
164 | fn up_efs() -> Result {
165 | let mut zip = ZipArchive::default();
166 |
167 | zip.add_simple("original", DATA)?;
168 | let lfh = &mut zip.files[0].lfh;
169 | lfh.general_purpose_flag
170 | .insert(GeneralPurposeFlag::LanguageEncoding);
171 | let name_crc32 = crc32fast::hash(&lfh.file_name);
172 | let up = InfoZipUnicodePath {
173 | version: 1,
174 | name_crc32,
175 | unicode_name: "up".into(),
176 | };
177 | lfh.extra_fields.push(up.into());
178 |
179 | zip.finalize()?;
180 | Ok(zip)
181 | }
182 |
183 | pub fn main() -> Result<()> {
184 | testcase(lfh_cdh)?;
185 | testcase(up_lfh_cdh)?;
186 | testcase(up_version)?;
187 | testcase(up_incorrect_crc32)?;
188 | testcase(up_crc32_source)?;
189 | testcase(up_invalid)?;
190 | testcase(up_efs)?;
191 | Ok(())
192 | }
193 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/a/a4.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, testcase_arg};
2 | use anyhow::Result;
3 | use zip_diff::zip::ZipArchive;
4 |
5 | const DOS_ATTR: u32 = 0x10;
6 | const UNIX_ATTR: u32 = 0x4000 << 16;
7 | const DOS_VER: u16 = 0;
8 | const UNIX_VER: u16 = 3 << 8;
9 | const OSX_VER: u16 = 19 << 8;
10 |
11 | fn slash() -> Result {
12 | let mut zip = ZipArchive::default();
13 | zip.add_simple("test/", b"test")?;
14 | zip.finalize()?;
15 | Ok(zip)
16 | }
17 |
18 | fn backslash() -> Result {
19 | let mut zip = ZipArchive::default();
20 | zip.add_simple("test\\", b"test")?;
21 | zip.finalize()?;
22 | Ok(zip)
23 | }
24 |
25 | fn slash_empty() -> Result {
26 | let mut zip = ZipArchive::default();
27 | zip.add_simple("test/", b"")?;
28 | zip.finalize()?;
29 | Ok(zip)
30 | }
31 |
32 | fn external_attr(arg: u8) -> Result {
33 | let mut zip = ZipArchive::default();
34 | zip.add_simple("test", b"test")?;
35 | zip.finalize()?;
36 | zip.cd[0].external_file_attributes |= if arg / 3 == 0 { DOS_ATTR } else { UNIX_ATTR };
37 | zip.cd[0].version_made_by |= match arg % 3 {
38 | 0 => DOS_VER,
39 | 1 => UNIX_VER,
40 | 2 => OSX_VER,
41 | _ => unreachable!(),
42 | };
43 | Ok(zip)
44 | }
45 |
46 | pub fn main() -> Result<()> {
47 | testcase(slash)?;
48 | testcase(backslash)?;
49 | testcase(slash_empty)?;
50 | (0..6).try_for_each(|arg| testcase_arg(external_attr, arg))?;
51 | Ok(())
52 | }
53 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/a/a5.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase;
2 | use anyhow::Result;
3 | use zip_diff::fields::GeneralPurposeFlag;
4 | use zip_diff::zip::ZipArchive;
5 |
6 | fn lfh_enc() -> Result {
7 | let mut zip = ZipArchive::default();
8 |
9 | zip.add_simple("test", b"test")?;
10 | zip.finalize()?;
11 | zip.files[0]
12 | .lfh
13 | .general_purpose_flag
14 | .insert(GeneralPurposeFlag::Encrypted);
15 |
16 | Ok(zip)
17 | }
18 |
19 | fn cdh_enc() -> Result {
20 | let mut zip = ZipArchive::default();
21 |
22 | zip.add_simple("test", b"test")?;
23 | zip.finalize()?;
24 | zip.cd[0]
25 | .general_purpose_flag
26 | .insert(GeneralPurposeFlag::Encrypted);
27 |
28 | Ok(zip)
29 | }
30 |
31 | fn first_enc() -> Result {
32 | let mut zip = ZipArchive::default();
33 |
34 | zip.add_simple("first", b"first")?;
35 | zip.add_simple("second", b"second")?;
36 | zip.files[0]
37 | .lfh
38 | .general_purpose_flag
39 | .insert(GeneralPurposeFlag::Encrypted);
40 | zip.finalize()?;
41 |
42 | Ok(zip)
43 | }
44 |
45 | pub fn main() -> Result<()> {
46 | testcase(lfh_enc)?;
47 | testcase(cdh_enc)?;
48 | testcase(first_enc)?;
49 | Ok(())
50 | }
51 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/b.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 |
3 | pub mod b1;
4 | pub mod b2;
5 | pub mod b3;
6 | pub mod b4;
7 |
8 | pub fn main() -> Result<()> {
9 | b1::main()?;
10 | b2::main()?;
11 | b3::main()?;
12 | b4::main()?;
13 | Ok(())
14 | }
15 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/b/b1.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase;
2 | use anyhow::Result;
3 | use zip_diff::zip::ZipArchive;
4 |
5 | fn duplicate() -> Result {
6 | let mut zip = ZipArchive::default();
7 | zip.add_simple("test", b"a")?;
8 | zip.add_simple("test", b"b")?;
9 | zip.finalize()?;
10 | Ok(zip)
11 | }
12 |
13 | pub fn main() -> Result<()> {
14 | testcase(duplicate)?;
15 | Ok(())
16 | }
17 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/b/b2.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase_arg;
2 | use anyhow::Result;
3 | use zip_diff::zip::ZipArchive;
4 |
5 | enum Host {
6 | Dos,
7 | Unix,
8 | Both,
9 | }
10 |
11 | const UNIX_VER: u16 = 3 << 8;
12 |
13 | fn special_byte((byte, host): (u8, Host)) -> Result {
14 | let mut zip = ZipArchive::default();
15 | zip.add_simple("a b", b"")?;
16 | zip.files[0].lfh.file_name[1] = byte;
17 | if matches!(host, Host::Both) {
18 | zip.files.push(zip.files[0].clone());
19 | }
20 | zip.finalize()?;
21 | if matches!(host, Host::Unix | Host::Both) {
22 | zip.cd[0].version_made_by |= UNIX_VER;
23 | }
24 | Ok(zip)
25 | }
26 |
27 | fn two_special_bytes((a, b): (u8, u8)) -> Result {
28 | let mut zip = ZipArchive::default();
29 | zip.add_simple("a b", b"")?;
30 | zip.add_simple("a b", b"")?;
31 | zip.files[0].lfh.file_name[1] = a;
32 | zip.files[1].lfh.file_name[1] = b;
33 | zip.finalize()?;
34 | Ok(zip)
35 | }
36 |
37 | pub fn main() -> Result<()> {
38 | for byte in 0..=u8::MAX {
39 | if byte.is_ascii_alphanumeric() {
40 | continue;
41 | }
42 | for host in [Host::Dos, Host::Unix, Host::Both] {
43 | testcase_arg(special_byte, (byte, host))?;
44 | }
45 | }
46 | for a in (0..=u8::MAX)
47 | .step_by(8)
48 | .filter(|&x| !x.is_ascii_alphanumeric())
49 | {
50 | for b in (7..=u8::MAX)
51 | .step_by(8)
52 | .filter(|&x| !x.is_ascii_alphanumeric())
53 | {
54 | testcase_arg(two_special_bytes, (a, b))?;
55 | }
56 | }
57 | Ok(())
58 | }
59 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/b/b3.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase_arg;
2 | use anyhow::Result;
3 | use zip_diff::zip::ZipArchive;
4 |
5 | fn canonical_first(path: &str) -> Result {
6 | let mut zip = ZipArchive::default();
7 | zip.add_simple("a/b", b"a")?;
8 | zip.add_simple(path, b"b")?;
9 | zip.finalize()?;
10 | Ok(zip)
11 | }
12 |
13 | fn canonical_second(path: &str) -> Result {
14 | let mut zip = ZipArchive::default();
15 | zip.add_simple(path, b"a")?;
16 | zip.add_simple("a/b", b"b")?;
17 | zip.finalize()?;
18 | Ok(zip)
19 | }
20 |
21 | pub fn main() -> Result<()> {
22 | [
23 | "/a/b",
24 | "a//b",
25 | "a\\b",
26 | "./a/b",
27 | "a/./b",
28 | "a/b/.",
29 | "../a/b",
30 | ".../a/b",
31 | "a/.../b",
32 | "c/../a/b",
33 | ]
34 | .into_iter()
35 | .try_for_each(|path| {
36 | testcase_arg(canonical_first, path)?;
37 | testcase_arg(canonical_second, path)
38 | })?;
39 | Ok(())
40 | }
41 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/b/b4.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase;
2 | use anyhow::Result;
3 | use zip_diff::zip::ZipArchive;
4 |
5 | fn casing() -> Result {
6 | let mut zip = ZipArchive::default();
7 | zip.add_simple("test.txt", b"a")?;
8 | zip.add_simple("test.TXT", b"b")?;
9 | zip.finalize()?;
10 | Ok(zip)
11 | }
12 |
13 | pub fn main() -> Result<()> {
14 | testcase(casing)?;
15 | Ok(())
16 | }
17 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/c.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 |
3 | pub mod c1;
4 | pub mod c2;
5 | pub mod c3;
6 | pub mod c4;
7 | pub mod c5;
8 |
9 | pub fn main() -> Result<()> {
10 | c1::main()?;
11 | c2::main()?;
12 | c3::main()?;
13 | c4::main()?;
14 | c5::main()?;
15 | Ok(())
16 | }
17 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/c/c1.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, testcase_arg};
2 | use anyhow::Result;
3 | use binwrite::BinWrite;
4 | use zip_diff::dd::{DataDescriptor, U32or64};
5 | use zip_diff::eocd::EndOfCentralDirectoryRecord;
6 | use zip_diff::fields::CompressionMethod;
7 | use zip_diff::lfh::LocalFileHeader;
8 | use zip_diff::utils::{crc32_patch, BinCount};
9 | use zip_diff::zip::{FileEntry, ZipArchive};
10 |
11 | fn no_cdh_for_lfh() -> Result {
12 | let mut zip = ZipArchive::default();
13 |
14 | zip.add_simple("a", b"a")?;
15 | zip.add_simple("b", b"b")?;
16 | zip.finalize()?;
17 |
18 | let cdh = zip.cd.pop().unwrap();
19 | zip.eocdr.this_disk_cdh_count -= 1;
20 | zip.eocdr.total_cdh_count -= 1;
21 | zip.eocdr.size_of_cd -= cdh.byte_count()? as u32;
22 |
23 | Ok(zip)
24 | }
25 |
26 | fn truncating_lfh_stream_via_fake_records() -> Result {
27 | let mut zip = ZipArchive::default();
28 |
29 | zip.add_simple("a", b"a")?;
30 | zip.add_simple("b", b"b")?;
31 | zip.add_simple("c", b"c")?;
32 | zip.finalize()?;
33 |
34 | let eocdr = EndOfCentralDirectoryRecord {
35 | this_disk_cdh_count: 1,
36 | total_cdh_count: 1,
37 | size_of_cd: zip.cd[0].byte_count()?.try_into()?,
38 | offset_of_cd_wrt_starting_disk: zip.files[0].byte_count()?.try_into()?,
39 | ..Default::default()
40 | };
41 |
42 | zip.cd[0].write(&mut zip.files[0].data)?;
43 | eocdr.write(&mut zip.files[1].data)?;
44 | zip.finalize()?;
45 |
46 | Ok(zip)
47 | }
48 |
49 | fn truncating_lfh_stream_via_lfh_inside_comments() -> Result {
50 | let mut zip = ZipArchive::default();
51 |
52 | zip.add_simple("a", b"a")?;
53 | zip.add_simple("b", b"b")?;
54 | zip.add_simple("c", b"b")?;
55 | zip.finalize()?;
56 |
57 | let entry2 = zip.files.pop().unwrap();
58 | let entry1 = zip.files.pop().unwrap();
59 |
60 | let mut offset = zip.files.byte_count()?;
61 | zip.eocdr.offset_of_cd_wrt_starting_disk = offset.try_into()?;
62 |
63 | offset += zip.cd[0..1].byte_count()?;
64 | let cdh = &mut zip.cd[1];
65 | entry1.write(&mut cdh.file_comment)?;
66 | cdh.file_comment_length = cdh.file_comment.len().try_into()?;
67 | cdh.relative_header_offset = offset.try_into()?;
68 |
69 | let cdh = &mut zip.cd[2];
70 | offset += cdh.file_comment.len() + cdh.byte_count()? + zip.eocdr.byte_count()?;
71 | entry2.write(&mut zip.eocdr.zip_file_comment)?;
72 | zip.eocdr.zip_file_comment_length = zip.eocdr.zip_file_comment.len().try_into()?;
73 | cdh.relative_header_offset = offset.try_into()?;
74 |
75 | Ok(zip)
76 | }
77 |
78 | fn lfh_desync(overlap: bool) -> Result {
79 | let mut zip = ZipArchive::default();
80 |
81 | let mut buf = Vec::new();
82 | let entry = FileEntry::new("a", b"a", CompressionMethod::STORED, false, false)?;
83 | entry.write(&mut buf)?;
84 |
85 | zip.add_simple("junk", &buf)?;
86 | zip.add_simple("b", b"b")?;
87 | zip.finalize()?;
88 |
89 | let mut offset = LocalFileHeader {
90 | file_name: "junk".into(),
91 | ..Default::default()
92 | }
93 | .byte_count()?;
94 | let mut cd = Vec::new();
95 | entry.push_into_cd(&mut cd, &mut offset)?;
96 |
97 | if overlap {
98 | let mut offset = 0;
99 | zip.files[0].push_into_cd(&mut cd, &mut offset)?;
100 | zip.files[1].push_into_cd(&mut cd, &mut offset)?;
101 | }
102 |
103 | let eocdr = EndOfCentralDirectoryRecord {
104 | this_disk_cdh_count: cd.len().try_into()?,
105 | total_cdh_count: cd.len().try_into()?,
106 | size_of_cd: cd.byte_count()?.try_into()?,
107 | offset_of_cd_wrt_starting_disk: zip.byte_count()?.try_into()?,
108 | ..Default::default()
109 | };
110 |
111 | cd.write(&mut zip.eocdr.zip_file_comment)?;
112 | eocdr.write(&mut zip.eocdr.zip_file_comment)?;
113 | zip.eocdr.zip_file_comment_length = zip.eocdr.zip_file_comment.len().try_into()?;
114 |
115 | Ok(zip)
116 | }
117 |
118 | fn dd_pos(deflated: bool) -> Result {
119 | let file_a = FileEntry::new("a", b"a", CompressionMethod::STORED, false, false)?;
120 | let file_b = FileEntry::new("b", b"b", CompressionMethod::STORED, false, false)?;
121 |
122 | let junk1b = FileEntry::new(
123 | "junk1",
124 | b"",
125 | if deflated {
126 | CompressionMethod::DEFLATED
127 | } else {
128 | CompressionMethod::STORED
129 | },
130 | false,
131 | true,
132 | )?;
133 |
134 | let junk1a_bare = junk1b.clone();
135 | let junk2_bare = FileEntry::new("junk2", b"", CompressionMethod::STORED, false, false)?;
136 | let junk3_bare = FileEntry::new("junk3", b"", CompressionMethod::STORED, false, false)?;
137 |
138 | let junk2_len =
139 | junk1a_bare.dd.unwrap().byte_count()? + file_b.byte_count()? + junk3_bare.byte_count()? + 4;
140 | let junk2_lfh = LocalFileHeader {
141 | compressed_size: junk2_len as u32,
142 | uncompressed_size: junk2_len as u32,
143 | ..junk2_bare.lfh.clone()
144 | };
145 |
146 | let mut junk1a_data = junk1a_bare.data;
147 | junk1b.dd.as_ref().unwrap().write(&mut junk1a_data)?;
148 | file_b.write(&mut junk1a_data)?;
149 | junk2_lfh.write(&mut junk1a_data)?;
150 |
151 | let junk1a_dd = DataDescriptor {
152 | signature: Some(DataDescriptor::SIGNATURE),
153 | crc32: crc32fast::hash(&junk1a_data),
154 | compressed_size: U32or64::U32(junk1a_data.len() as u32),
155 | uncompressed_size: U32or64::U32(junk1a_data.len() as u32),
156 | };
157 |
158 | let mut zip_b_tmp = ZipArchive::default();
159 | zip_b_tmp.files.push(junk1b.clone());
160 | zip_b_tmp.files.push(file_b);
161 | zip_b_tmp.files.push(junk2_bare);
162 | zip_b_tmp.finalize()?;
163 | let junk3_len = 4 + zip_b_tmp.cd.byte_count()? + zip_b_tmp.eocdr.byte_count()? + 4;
164 | let junk3_lfh = LocalFileHeader {
165 | compressed_size: junk3_len as u32,
166 | uncompressed_size: junk3_len as u32,
167 | ..junk3_bare.lfh.clone()
168 | };
169 |
170 | let mut junk2_data = Vec::new();
171 | junk1a_dd.write(&mut junk2_data)?;
172 | file_a.write(&mut junk2_data)?;
173 | junk3_lfh.write(&mut junk2_data)?;
174 | let junk2_patch = crc32_patch(&junk2_data, junk2_lfh.crc32);
175 | junk2_patch.write(&mut junk2_data)?;
176 |
177 | let junk1a = FileEntry {
178 | lfh: junk1a_bare.lfh,
179 | data: junk1a_data,
180 | dd: Some(junk1a_dd),
181 | };
182 |
183 | let junk2 = FileEntry {
184 | lfh: junk2_lfh,
185 | data: junk2_data,
186 | dd: None,
187 | };
188 |
189 | let mut zip_a_tmp = ZipArchive::default();
190 | zip_a_tmp.files.push(junk1a.clone());
191 | zip_a_tmp.files.push(file_a);
192 | zip_a_tmp.files.push(junk3_bare);
193 | zip_a_tmp.finalize()?;
194 |
195 | let mut zip_b = zip_b_tmp;
196 | *zip_b.files.last_mut().unwrap() = junk2;
197 | zip_b.finalize()?;
198 | zip_b.eocdr.zip_file_comment_length =
199 | (4 + zip_a_tmp.cd.byte_count()? + zip_a_tmp.eocdr.byte_count()?) as u16;
200 |
201 | let mut junk3_data = Vec::new();
202 | junk2_patch.write(&mut junk3_data)?;
203 | zip_b.cd.write(&mut junk3_data)?;
204 | zip_b.eocdr.write(&mut junk3_data)?;
205 | let junk3_patch = crc32_patch(&junk3_data, junk3_lfh.crc32);
206 | junk3_patch.write(&mut junk3_data)?;
207 |
208 | let junk3 = FileEntry {
209 | lfh: junk3_lfh,
210 | data: junk3_data,
211 | dd: None,
212 | };
213 |
214 | let mut zip_a = zip_a_tmp;
215 | *zip_a.files.last_mut().unwrap() = junk3;
216 | zip_a.finalize()?;
217 | zip_a.cd[0].compression_method = CompressionMethod::STORED;
218 |
219 | Ok(zip_a)
220 | }
221 |
222 | pub fn main() -> Result<()> {
223 | testcase(no_cdh_for_lfh)?;
224 | testcase(truncating_lfh_stream_via_fake_records)?;
225 | testcase(truncating_lfh_stream_via_lfh_inside_comments)?;
226 | [false, true]
227 | .iter()
228 | .try_for_each(|overlap| testcase_arg(lfh_desync, *overlap))?;
229 | [false, true]
230 | .iter()
231 | .try_for_each(|deflate| testcase_arg(dd_pos, *deflate))?;
232 | Ok(())
233 | }
234 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/c/c2.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::testcase_arg;
2 | use anyhow::Result;
3 | use zip_diff::utils::BinCount;
4 | use zip_diff::zip::ZipArchive;
5 |
6 | enum Arg {
7 | LongCommentLength,
8 | ShortCommentLength,
9 | LfhCdhMismatch,
10 | }
11 |
12 | fn eocdr_selection(arg: Arg) -> Result<[ZipArchive; 3]> {
13 | let mut zip1 = ZipArchive::default();
14 | zip1.add_simple("a", b"a")?;
15 | zip1.finalize()?;
16 | zip1.eocdr.zip_file_comment.push(b'\0');
17 |
18 | let zip_size = zip1.byte_count()?;
19 | zip1.eocdr.zip_file_comment_length = (zip_size * 2 + 1).try_into()?;
20 |
21 | let mut zip2 = ZipArchive::default();
22 | zip2.add_simple("b", b"b")?;
23 | zip2.finalize()?;
24 | zip2.set_offsets(zip_size)?;
25 | zip2.eocdr.zip_file_comment.push(b'\0');
26 | zip2.eocdr.zip_file_comment_length = (zip_size + 1).try_into()?;
27 |
28 | let mut zip3 = ZipArchive::default();
29 | zip3.add_simple("c", b"c")?;
30 | zip3.finalize()?;
31 | zip3.set_offsets(zip_size * 2)?;
32 | zip3.eocdr.zip_file_comment.push(b'\0');
33 | zip3.eocdr.zip_file_comment_length = 1;
34 |
35 | match arg {
36 | Arg::LongCommentLength => {
37 | zip1.eocdr.zip_file_comment_length += 1;
38 | zip3.eocdr.zip_file_comment_length += 1;
39 | }
40 | Arg::ShortCommentLength => {
41 | zip1.eocdr.zip_file_comment_length -= 1;
42 | zip3.eocdr.zip_file_comment_length -= 1;
43 | }
44 | Arg::LfhCdhMismatch => {
45 | zip1.cd[0].version_needed = 10;
46 | zip3.cd[0].version_needed = 10;
47 | }
48 | }
49 |
50 | Ok([zip1, zip2, zip3])
51 | }
52 |
53 | pub fn main() -> Result<()> {
54 | testcase_arg(eocdr_selection, Arg::LongCommentLength)?;
55 | testcase_arg(eocdr_selection, Arg::ShortCommentLength)?;
56 | testcase_arg(eocdr_selection, Arg::LfhCdhMismatch)?;
57 | Ok(())
58 | }
59 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/c/c3.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, testcase_arg};
2 | use anyhow::Result;
3 | use bitflags::bitflags;
4 | use zip_diff::utils::BinCount;
5 | use zip_diff::zip::ZipArchive;
6 |
7 | struct CdhCountFlags(u8);
8 |
9 | bitflags! {
10 | impl CdhCountFlags: u8 {
11 | const ThisDiskCount = 1 << 0;
12 | const TotalCount = 1 << 1;
13 | const CdSize = 1 << 2;
14 | }
15 | }
16 |
17 | fn cdh_count(flags: CdhCountFlags) -> Result {
18 | let mut zip = ZipArchive::default();
19 |
20 | zip.add_simple("a", b"a")?;
21 | zip.add_simple("b", b"b")?;
22 | zip.finalize()?;
23 |
24 | if flags.contains(CdhCountFlags::ThisDiskCount) {
25 | zip.eocdr.this_disk_cdh_count -= 1;
26 | }
27 |
28 | if flags.contains(CdhCountFlags::TotalCount) {
29 | zip.eocdr.total_cdh_count -= 1;
30 | }
31 |
32 | if flags.contains(CdhCountFlags::CdSize) {
33 | zip.eocdr.size_of_cd = zip.cd[0].byte_count()?.try_into()?;
34 | }
35 |
36 | Ok(zip)
37 | }
38 |
39 | fn modulo_65536() -> Result {
40 | let mut zip = ZipArchive::default();
41 |
42 | for i in 1u32..=65537 {
43 | zip.add_simple(&format!("{:x}/{:x}", i / 256, i % 256), b"")?;
44 | }
45 |
46 | zip.finalize()?;
47 |
48 | let zip64_eocdr = zip.zip64_eocdr.as_mut().unwrap();
49 | zip64_eocdr.this_disk_cdh_count -= 65536;
50 | zip64_eocdr.total_cdh_count -= 65536;
51 | zip.eocdr = (&*zip64_eocdr).try_into()?;
52 | zip.zip64_eocdr = None;
53 | zip.zip64_eocdl = None;
54 |
55 | Ok(zip)
56 | }
57 |
58 | pub fn main() -> Result<()> {
59 | (1..8).try_for_each(|flags| testcase_arg(cdh_count, CdhCountFlags::from_bits_truncate(flags)))?;
60 | testcase(modulo_65536)?;
61 | Ok(())
62 | }
63 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/c/c4.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, EntryGroup, CRC32A, CRC32B};
2 | use anyhow::Result;
3 | use binwrite::BinWrite;
4 | use zip_diff::cdh::CentralDirectoryHeader;
5 | use zip_diff::eocd::EndOfCentralDirectoryRecord;
6 | use zip_diff::lfh::LocalFileHeader;
7 | use zip_diff::utils::BinCount;
8 | use zip_diff::zip::ZipArchive;
9 |
10 | #[derive(BinWrite)]
11 | pub struct CdOffsetZip {
12 | pub groups: Vec,
13 | pub eocdr: EndOfCentralDirectoryRecord,
14 | }
15 |
16 | pub fn cd_offset() -> Result {
17 | let mut zip = ZipArchive::default();
18 |
19 | zip.add_simple("stream", b"a")?;
20 | zip.add_simple("eocdr", b"a")?;
21 |
22 | const FILENAME: &str = "adjac";
23 | let cd_size = CentralDirectoryHeader::from(&zip.files[1].lfh).byte_count()?;
24 | let lfh_size = LocalFileHeader {
25 | file_name: FILENAME.into(),
26 | ..Default::default()
27 | }
28 | .byte_count()?;
29 |
30 | let content_width = cd_size - lfh_size;
31 | zip.add_simple(FILENAME, format!("{CRC32A:A<0$}", content_width).as_bytes())?;
32 | zip.add_simple(FILENAME, format!("{CRC32B:A<0$}", content_width).as_bytes())?;
33 |
34 | zip.finalize()?;
35 |
36 | // This is required for correct LFH offset adjustment
37 | // This is ensured by adjusting the file name length
38 | assert_eq!(cd_size, zip.files[3].byte_count()?);
39 | // This is required so that the CD size in EOCDR is correct for both central directories
40 | assert_eq!(cd_size, zip.cd[2].byte_count()?);
41 |
42 | {
43 | zip.cd[3].relative_header_offset = zip.cd[2].relative_header_offset;
44 | // Make sure the CDHs match, as they will have the same CDH but different LFH offsets
45 | // In particular, the filename and CRC32 must be the same
46 | let mut tmp1 = Vec::new();
47 | zip.cd[2].write(&mut tmp1)?;
48 | let mut tmp2 = Vec::new();
49 | zip.cd[3].write(&mut tmp2)?;
50 | assert_eq!(tmp1, tmp2);
51 | }
52 |
53 | // for streaming mode parsers
54 | let group1 = EntryGroup {
55 | files: vec![zip.files[0].clone()],
56 | cd: vec![zip.cd[0].clone()],
57 | };
58 |
59 | let mut group2 = EntryGroup {
60 | // first file for parsers that use the CDH at the offset in EOCDR
61 | // second for parsers that use the adjacent central directory but does not adjust LFH offsets
62 | files: zip.files[1..=2].to_vec(),
63 | cd: vec![zip.cd[1].clone()],
64 | };
65 | group2.cd[0].relative_header_offset = group1.byte_count()?.try_into()?;
66 |
67 | // for parsers that use the adjacent central directory and adjusts LFH offsets accordingly
68 | let mut group3 = EntryGroup {
69 | files: vec![zip.files[3].clone()],
70 | cd: vec![zip.cd[2].clone()],
71 | };
72 | group3.cd[0].relative_header_offset =
73 | group2.cd[0].relative_header_offset + u32::try_from(zip.files[1].byte_count()?)?;
74 |
75 | let eocdr = EndOfCentralDirectoryRecord {
76 | this_disk_cdh_count: 1,
77 | total_cdh_count: 1,
78 | size_of_cd: cd_size.try_into()?,
79 | offset_of_cd_wrt_starting_disk: group2.cd[0].relative_header_offset
80 | + u32::try_from(group2.files.byte_count()?)?,
81 | ..Default::default()
82 | };
83 |
84 | Ok(CdOffsetZip {
85 | groups: vec![group1, group2, group3],
86 | eocdr,
87 | })
88 | }
89 |
90 | pub fn main() -> Result<()> {
91 | testcase(cd_offset)?;
92 | Ok(())
93 | }
94 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/c/c5.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{testcase, testcase_arg};
2 | use anyhow::Result;
3 | use binwrite::BinWrite;
4 | use bitflags::bitflags;
5 | use zip_diff::eocd::{
6 | EndOfCentralDirectoryRecord, Zip64EndOfCentralDirectoryLocator,
7 | Zip64EndOfCentralDirectoryRecord, Zip64ExtensibleDataSector,
8 | };
9 | use zip_diff::extra::Zip64ExtendedInfo;
10 | use zip_diff::utils::BinCount;
11 | use zip_diff::zip::ZipArchive;
12 |
13 | struct Zip64Flags(u8);
14 |
15 | bitflags! {
16 | impl Zip64Flags: u8 {
17 | const DiskNumberFF = 1 << 0;
18 | const CdhCountFF = 1 << 1;
19 | const CdSizeFF = 1 << 2;
20 | const CdOffsetFF = 1 << 3;
21 | const EocdlGap = 1 << 4;
22 | const MoreFilesInZip64 = 1 << 5;
23 | }
24 | }
25 |
26 | fn use_zip64_eocdr(flags: Zip64Flags) -> Result {
27 | let mut zip1 = ZipArchive::default();
28 | zip1.add_simple("a", b"a")?;
29 | if !flags.contains(Zip64Flags::MoreFilesInZip64) {
30 | zip1.add_simple("b", b"b")?;
31 | }
32 | zip1.finalize()?;
33 |
34 | let mut zip2 = ZipArchive::default();
35 | zip2.add_simple("c", b"c")?;
36 | if flags.contains(Zip64Flags::MoreFilesInZip64) {
37 | zip2.add_simple("d", b"d")?;
38 | }
39 | zip2.finalize()?;
40 | zip2.set_offsets(zip1.files.byte_count()? + zip1.cd.byte_count()?)?;
41 | zip2.set_eocd(true)?;
42 |
43 | let cdh = zip1.cd.last_mut().unwrap();
44 | zip2.files.write(&mut cdh.file_comment)?;
45 | zip2.cd.write(&mut cdh.file_comment)?;
46 | zip2.zip64_eocdr.unwrap().write(&mut cdh.file_comment)?;
47 | zip2.zip64_eocdl.unwrap().write(&mut cdh.file_comment)?;
48 | if flags.contains(Zip64Flags::EocdlGap) {
49 | 0u8.write(&mut cdh.file_comment)?;
50 | }
51 | cdh.file_comment_length = cdh.file_comment.len().try_into()?;
52 |
53 | zip1.set_eocd(false)?;
54 |
55 | if flags.contains(Zip64Flags::DiskNumberFF) {
56 | zip1.eocdr.number_of_this_disk = u16::MAX;
57 | zip1.eocdr.start_of_cd_disk_number = u16::MAX;
58 | }
59 |
60 | if flags.contains(Zip64Flags::CdhCountFF) {
61 | zip1.eocdr.this_disk_cdh_count = u16::MAX;
62 | zip1.eocdr.total_cdh_count = u16::MAX;
63 | }
64 |
65 | if flags.contains(Zip64Flags::CdSizeFF) {
66 | zip1.eocdr.size_of_cd = u32::MAX;
67 | }
68 |
69 | if flags.contains(Zip64Flags::CdOffsetFF) {
70 | zip1.eocdr.offset_of_cd_wrt_starting_disk = u32::MAX;
71 | }
72 |
73 | Ok(zip1)
74 | }
75 |
76 | fn eocdl_or_search() -> Result {
77 | let mut zip1 = ZipArchive::default();
78 | zip1.add_simple("a", b"a")?;
79 | zip1.finalize()?;
80 | zip1.set_eocd(true)?;
81 |
82 | let mut zip2 = ZipArchive::default();
83 | zip2.add_simple("b", b"b")?;
84 | zip2.finalize()?;
85 | zip2.set_offsets(zip1.files.byte_count()? + zip1.cd.byte_count()?)?;
86 | zip2.set_eocd(true)?;
87 |
88 | // hide ZIP64 EOCDR of zip1 in the ZIP64 EOCDR extensible data sector of zip2
89 | let zip64_eocdr_size = zip1.zip64_eocdr.as_ref().unwrap().byte_count()?;
90 | let zip64_eocdr_2 = zip2.zip64_eocdr.as_mut().unwrap();
91 | let extensible_header = Zip64ExtensibleDataSector {
92 | header_id: 0x1337, // an unknown ID
93 | size: zip64_eocdr_size.try_into()?,
94 | data: Box::new(Zip64ExtendedInfo::default()), // empty data
95 | };
96 | zip64_eocdr_2.size += u64::try_from(extensible_header.byte_count()? + zip64_eocdr_size)?;
97 | zip64_eocdr_2.extensible_data_sector.push(extensible_header);
98 |
99 | let cdh = &mut zip1.cd[0];
100 | zip2.files.write(&mut cdh.file_comment)?;
101 | zip2.cd.write(&mut cdh.file_comment)?;
102 | zip2.zip64_eocdr
103 | .as_ref()
104 | .unwrap()
105 | .write(&mut cdh.file_comment)?;
106 | cdh.file_comment_length = cdh.file_comment.len().try_into()?;
107 |
108 | zip1.set_eocd(true)?;
109 | zip1.zip64_eocdl.as_mut().unwrap().zip64_eocdr_offset -=
110 | u64::try_from(zip2.zip64_eocdr.unwrap().byte_count()?)?;
111 |
112 | Ok(zip1)
113 | }
114 |
115 | struct CdhCountFlags(u8);
116 |
117 | bitflags! {
118 | impl CdhCountFlags: u8 {
119 | const ThisDiskCount = 1 << 0;
120 | const TotalCount = 1 << 1;
121 | const CdSize = 1 << 2;
122 | }
123 | }
124 |
125 | fn cdh_count(flags: CdhCountFlags) -> Result {
126 | let mut zip = ZipArchive::default();
127 |
128 | zip.add_simple("a", b"a")?;
129 | zip.add_simple("b", b"b")?;
130 | zip.finalize()?;
131 | zip.set_eocd(true)?;
132 |
133 | let eocdr = zip.zip64_eocdr.as_mut().unwrap();
134 |
135 | if flags.contains(CdhCountFlags::ThisDiskCount) {
136 | eocdr.this_disk_cdh_count -= 1;
137 | }
138 |
139 | if flags.contains(CdhCountFlags::TotalCount) {
140 | eocdr.total_cdh_count -= 1;
141 | }
142 |
143 | if flags.contains(CdhCountFlags::CdSize) {
144 | eocdr.size_of_cd = zip.cd[0].byte_count()?.try_into()?;
145 | }
146 |
147 | Ok(zip)
148 | }
149 |
150 | fn cd_offset(adjust_zip64_offset: bool) -> Result> {
151 | let zip = super::c4::cd_offset()?;
152 | let eocdr = zip.eocdr;
153 |
154 | let mut buf = Vec::new();
155 | zip.groups.write(&mut buf)?;
156 |
157 | let mut zip64_eocdr = Zip64EndOfCentralDirectoryRecord {
158 | this_disk_cdh_count: eocdr.this_disk_cdh_count.into(),
159 | total_cdh_count: eocdr.this_disk_cdh_count.into(),
160 | size_of_cd: eocdr.size_of_cd.into(),
161 | offset_of_cd_wrt_starting_disk: eocdr.offset_of_cd_wrt_starting_disk.into(),
162 | ..Default::default()
163 | };
164 | zip64_eocdr.finalize()?;
165 |
166 | let zip64_offset = if adjust_zip64_offset {
167 | zip.groups[0..=1].byte_count()?
168 | } else {
169 | buf.len()
170 | };
171 |
172 | let eocdl = Zip64EndOfCentralDirectoryLocator::from_offset(zip64_offset.try_into()?);
173 | let eocdr = EndOfCentralDirectoryRecord::all_ff();
174 |
175 | zip64_eocdr.write(&mut buf)?;
176 | eocdl.write(&mut buf)?;
177 | eocdr.write(&mut buf)?;
178 |
179 | Ok(buf)
180 | }
181 |
182 | pub fn main() -> Result<()> {
183 | (0..64).try_for_each(|i| testcase_arg(use_zip64_eocdr, Zip64Flags::from_bits_truncate(i)))?;
184 | testcase(eocdl_or_search)?;
185 | (1..8).try_for_each(|i| testcase_arg(cdh_count, CdhCountFlags::from_bits_truncate(i)))?;
186 | testcase_arg(cd_offset, false)?;
187 | testcase_arg(cd_offset, true)?;
188 | Ok(())
189 | }
190 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/main.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 |
3 | mod a;
4 | mod b;
5 | mod c;
6 | mod utils;
7 |
8 | fn main() -> Result<()> {
9 | a::main()?;
10 | b::main()?;
11 | c::main()?;
12 | Ok(())
13 | }
14 |
--------------------------------------------------------------------------------
/zip-diff/src/construction/utils.rs:
--------------------------------------------------------------------------------
1 | use anyhow::{Context, Result};
2 | use binwrite::BinWrite;
3 | use std::any::type_name_of_val;
4 | use std::collections::BTreeMap;
5 | use std::fs::{create_dir_all, File};
6 | use std::io::{BufWriter, Write};
7 | use std::path::PathBuf;
8 | use std::sync::Mutex;
9 | use zip_diff::cdh::CentralDirectoryHeader;
10 | use zip_diff::zip::{FileEntry, ZipArchive};
11 |
12 | static WRITE_COUNTER: Mutex> = Mutex::new(BTreeMap::new());
13 |
14 | fn write_core(ambiguity_type: &str, data: Z) -> Result<()> {
15 | let count = *WRITE_COUNTER
16 | .lock()
17 | .unwrap()
18 | .entry(ambiguity_type.to_string())
19 | .and_modify(|e| *e += 1)
20 | .or_insert(1);
21 | let path = format!("../constructions/{ambiguity_type}/{ambiguity_type}-{count}.zip");
22 | let path = PathBuf::from(path);
23 | create_dir_all(path.parent().unwrap())?;
24 | let file = File::create(path).context("failed to create sample file")?;
25 | let mut writer = BufWriter::new(file);
26 | data.write(&mut writer)
27 | .context("failed to write sample file")?;
28 | writer.flush().context("failed to flush sample file writer")
29 | }
30 |
31 | pub fn testcase(construction: F) -> Result<()>
32 | where
33 | Z: BinWrite,
34 | F: FnOnce() -> Result,
35 | {
36 | let ambiguity_type = type_name_of_val(&construction).rsplit("::").nth(1).unwrap();
37 | let data = construction()?;
38 | write_core(ambiguity_type, data)
39 | }
40 |
41 | pub fn testcase_arg(construction: F, arg: A) -> Result<()>
42 | where
43 | Z: BinWrite,
44 | F: FnOnce(A) -> Result,
45 | {
46 | let ambiguity_type = type_name_of_val(&construction).rsplit("::").nth(1).unwrap();
47 | let data = construction(arg)?;
48 | write_core(ambiguity_type, data)
49 | }
50 |
51 | #[derive(BinWrite)]
52 | pub struct EntryGroup {
53 | pub files: Vec,
54 | pub cd: Vec,
55 | }
56 |
57 | impl From for EntryGroup {
58 | fn from(zip: ZipArchive) -> Self {
59 | Self {
60 | files: zip.files,
61 | cd: zip.cd,
62 | }
63 | }
64 | }
65 |
66 | // Two strings with the same length and CRC32
67 | // https://www.thecodingforums.com/threads/finding-two-strings-with-the-same-crc32.889011/#post-4775592
68 | pub const CRC32A: &str = "oxueekz";
69 | pub const CRC32B: &str = "pyqptgs";
70 |
--------------------------------------------------------------------------------
/zip-diff/src/count/main.rs:
--------------------------------------------------------------------------------
1 | use anyhow::{bail, Context, Result};
2 | use serde::{Deserialize, Serialize};
3 | use std::collections::{BTreeMap, BTreeSet};
4 | use std::ffi::OsString;
5 | use std::fs::{copy, create_dir_all, read_dir, remove_dir_all, File};
6 | use std::io::{BufReader, BufWriter, ErrorKind, Write};
7 | use std::path::Path;
8 | use std::process::{Command, Stdio};
9 | use zip_diff::hash::read_parsing_result;
10 |
11 | const SAMPLES_DIR: &str = "../constructions";
12 | const INPUT_DIR: &str = "../constructions/input";
13 | const OUTPUT_DIR: &str = "../constructions/output";
14 |
15 | const TYPES: &[&str] = &[
16 | "a1", "a2", "a3", "a4", "a5", // Redundant Metadata
17 | "b1", "b2", "b3", "b4", // File Path Processing
18 | "c1", "c2", "c3", "c4", "c5", // ZIP Structure Positioning
19 | ];
20 |
21 | #[derive(Serialize)]
22 | struct InconsistencyItem<'a> {
23 | parsers: (&'a str, &'a str),
24 | inconsistency_types: Vec<&'static str>,
25 | }
26 |
27 | #[derive(Deserialize)]
28 | pub struct ParserInfo {
29 | pub name: String,
30 | pub version: String,
31 | pub r#type: String,
32 | pub language: String,
33 | }
34 |
35 | fn main() -> Result<()> {
36 | let parsers_file =
37 | File::open("../parsers/parsers.json").context("failed to read parsers.json")?;
38 | let parsers_reader = BufReader::new(parsers_file);
39 | let parser_map: BTreeMap = serde_json::from_reader(parsers_reader)?;
40 | let mut parsers = parser_map.into_iter().collect::>();
41 | parsers.sort_by_cached_key(|(_, parser)| {
42 | (
43 | parser.r#type.clone(),
44 | parser.language.clone(),
45 | parser.name.to_lowercase(),
46 | parser.version.clone(),
47 | )
48 | });
49 | let parsers = parsers.into_iter().map(|(key, _)| key).collect::>();
50 |
51 | if let Err(err) = remove_dir_all(INPUT_DIR) {
52 | if err.kind() != ErrorKind::NotFound {
53 | bail!("failed to remove input directory: {err}");
54 | }
55 | }
56 | if let Err(err) = remove_dir_all(OUTPUT_DIR) {
57 | if err.kind() != ErrorKind::NotFound {
58 | bail!("failed to remove output directory: {err}");
59 | }
60 | }
61 | create_dir_all(INPUT_DIR).context("failed to remove input directory")?;
62 |
63 | let mut testcases = Vec::<(&str, OsString)>::new();
64 |
65 | for t in TYPES {
66 | let dir = Path::new(SAMPLES_DIR).join(t);
67 | if !dir.try_exists()? {
68 | continue;
69 | }
70 | let entries = read_dir(dir)?;
71 | for entry in entries {
72 | let entry = entry?;
73 | if entry.file_name().into_string().unwrap().starts_with(t)
74 | && entry.file_type()?.is_file()
75 | {
76 | testcases.push((t, entry.file_name()));
77 | copy(entry.path(), Path::new(INPUT_DIR).join(entry.file_name()))
78 | .context("failed to copy sample to input directory")?;
79 | }
80 | }
81 | }
82 |
83 | let parser_prepare_status = Command::new("../parsers/prepare.sh")
84 | .env("INPUT_DIR", INPUT_DIR)
85 | .env("OUTPUT_DIR", OUTPUT_DIR)
86 | .status()
87 | .expect("failed to execute parsers/prepare.sh");
88 | if !parser_prepare_status.success() {
89 | bail!("parsers/prepare.sh failed");
90 | }
91 |
92 | Command::new("docker")
93 | .arg("compose")
94 | .arg("up")
95 | .current_dir("../parsers")
96 | .stdout(Stdio::null())
97 | .stderr(Stdio::null())
98 | .spawn()
99 | .context("failed to start docker compose")?
100 | .wait()
101 | .context("failed to run docker compose")?;
102 |
103 | let outputs = parsers
104 | .iter()
105 | .map(|parser| {
106 | testcases
107 | .iter()
108 | .map(|(_, t)| read_parsing_result(Path::new(OUTPUT_DIR).join(parser).join(t), true))
109 | .collect::>()
110 | })
111 | .collect::>();
112 |
113 | let mut results = Vec::new();
114 |
115 | for (parser1, outputs1) in parsers.iter().zip(&outputs) {
116 | for (parser2, outputs2) in parsers.iter().zip(&outputs) {
117 | let inconsistency_types = outputs1
118 | .iter()
119 | .zip(outputs2)
120 | .zip(&testcases)
121 | .filter_map(|((o1, o2), (t, _))| o1.inconsistent_with(o2).then_some(*t))
122 | .collect::>()
123 | .into_iter()
124 | .collect();
125 | results.push(InconsistencyItem {
126 | parsers: (parser1, parser2),
127 | inconsistency_types,
128 | })
129 | }
130 | }
131 |
132 | let results_file = File::create(Path::new(SAMPLES_DIR).join("inconsistency-types.json"))
133 | .context("failed to create result file")?;
134 | let mut results_writer = BufWriter::new(results_file);
135 | serde_json::to_writer_pretty(&mut results_writer, &results)
136 | .context("failed to write results")?;
137 | results_writer.flush()?;
138 |
139 | Ok(())
140 | }
141 |
--------------------------------------------------------------------------------
/zip-diff/src/dd.rs:
--------------------------------------------------------------------------------
1 | use crate::lfh::LocalFileHeader;
2 | use crate::utils::binwrite_option;
3 | use binwrite::{BinWrite, WriterOption};
4 | use educe::Educe;
5 | use std::io::{Result, Write};
6 |
7 | #[derive(BinWrite, Clone, Debug, Educe)]
8 | #[educe(Default)]
9 | pub struct DataDescriptor {
10 | #[binwrite(with(binwrite_option))]
11 | #[educe(Default = Some(Self::SIGNATURE))]
12 | pub signature: Option,
13 | pub crc32: u32,
14 | #[binwrite(with(binwrite_u32or64))]
15 | pub compressed_size: U32or64,
16 | #[binwrite(with(binwrite_u32or64))]
17 | pub uncompressed_size: U32or64,
18 | }
19 |
20 | #[derive(Clone, Debug)]
21 | pub enum U32or64 {
22 | U32(u32),
23 | U64(u64),
24 | }
25 |
26 | impl DataDescriptor {
27 | pub const SIGNATURE: u32 = 0x08074b50;
28 | }
29 |
30 | fn binwrite_u32or64(val: &U32or64, writer: &mut W, options: &WriterOption) -> Result<()> {
31 | match val {
32 | U32or64::U32(val) => val.write_options(writer, options),
33 | U32or64::U64(val) => val.write_options(writer, options),
34 | }
35 | }
36 |
37 | impl U32or64 {
38 | pub fn saturate(&self) -> u32 {
39 | match self {
40 | U32or64::U32(val) => *val,
41 | U32or64::U64(val) => {
42 | if *val > u32::MAX as u64 {
43 | u32::MAX
44 | } else {
45 | *val as u32
46 | }
47 | }
48 | }
49 | }
50 | }
51 |
52 | impl Default for U32or64 {
53 | fn default() -> Self {
54 | Self::U32(0)
55 | }
56 | }
57 |
58 | impl From<&LocalFileHeader> for DataDescriptor {
59 | fn from(value: &LocalFileHeader) -> Self {
60 | Self {
61 | signature: Some(Self::SIGNATURE),
62 | crc32: value.crc32,
63 | compressed_size: match value.zip64.compressed_size {
64 | None => U32or64::U32(value.compressed_size),
65 | Some(size) => U32or64::U64(size),
66 | },
67 | uncompressed_size: match value.zip64.original_size {
68 | None => U32or64::U32(value.uncompressed_size),
69 | Some(size) => U32or64::U64(size),
70 | },
71 | }
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/zip-diff/src/eocd.rs:
--------------------------------------------------------------------------------
1 | use crate::extra::ExtraFieldType;
2 | use crate::fields::CompressionMethod;
3 | use crate::utils::{binwrite_option, binwrite_transform, BinCount};
4 | use anyhow::{Context, Result};
5 | use binwrite::BinWrite;
6 | use educe::Educe;
7 |
8 | #[derive(BinWrite, Clone, Educe)]
9 | #[educe(Debug, Default)]
10 | pub struct EndOfCentralDirectoryRecord {
11 | #[educe(Default = Self::SIGNATURE)]
12 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
13 | pub signature: u32,
14 | pub number_of_this_disk: u16,
15 | /// number of the disk with the start of the central directory
16 | pub start_of_cd_disk_number: u16,
17 | /// total number of entries in the central directory on this disk
18 | pub this_disk_cdh_count: u16,
19 | /// total number of entries in the central directory
20 | pub total_cdh_count: u16,
21 | /// size of the central directory
22 | pub size_of_cd: u32,
23 | /// offset of start of central directory with respect to the starting disk number
24 | pub offset_of_cd_wrt_starting_disk: u32,
25 | pub zip_file_comment_length: u16,
26 | pub zip_file_comment: Vec,
27 | }
28 |
29 | impl EndOfCentralDirectoryRecord {
30 | pub const SIGNATURE: u32 = 0x06054b50;
31 |
32 | pub fn all_ff() -> Self {
33 | Self {
34 | number_of_this_disk: u16::MAX,
35 | start_of_cd_disk_number: u16::MAX,
36 | this_disk_cdh_count: u16::MAX,
37 | total_cdh_count: u16::MAX,
38 | size_of_cd: u32::MAX,
39 | offset_of_cd_wrt_starting_disk: u32::MAX,
40 | ..Default::default()
41 | }
42 | }
43 | }
44 |
45 | impl TryFrom<&Zip64EndOfCentralDirectoryRecord> for EndOfCentralDirectoryRecord {
46 | type Error = anyhow::Error;
47 |
48 | fn try_from(zip64: &Zip64EndOfCentralDirectoryRecord) -> Result {
49 | Ok(Self {
50 | number_of_this_disk: zip64.number_of_this_disk.try_into()?,
51 | start_of_cd_disk_number: zip64.start_of_cd_disk_number.try_into()?,
52 | this_disk_cdh_count: zip64.this_disk_cdh_count.try_into()?,
53 | total_cdh_count: zip64.total_cdh_count.try_into()?,
54 | size_of_cd: zip64.size_of_cd.try_into()?,
55 | offset_of_cd_wrt_starting_disk: zip64.offset_of_cd_wrt_starting_disk.try_into()?,
56 | ..Default::default()
57 | })
58 | }
59 | }
60 |
61 | #[derive(BinWrite, Clone, Educe)]
62 | #[educe(Debug, Default)]
63 | pub struct Zip64EndOfCentralDirectoryLocator {
64 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
65 | #[educe(Default = Self::SIGNATURE)]
66 | pub signature: u32,
67 | /// number of the disk with the start of the zip64 end of central directory
68 | pub zip64_eocdr_disk_number: u32,
69 | /// relative offset of the zip64 end of central directory record
70 | pub zip64_eocdr_offset: u64,
71 | #[educe(Default = 1)]
72 | pub total_number_of_disks: u32,
73 | }
74 |
75 | impl Zip64EndOfCentralDirectoryLocator {
76 | pub const SIGNATURE: u32 = 0x07064b50;
77 |
78 | pub fn from_offset(offset: u64) -> Self {
79 | Self {
80 | zip64_eocdr_offset: offset,
81 | ..Default::default()
82 | }
83 | }
84 | }
85 |
86 | #[derive(BinWrite, Clone, Educe)]
87 | #[educe(Debug, Default)]
88 | pub struct Zip64EndOfCentralDirectoryRecord {
89 | #[educe(Default = Self::SIGNATURE)]
90 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
91 | pub signature: u32,
92 | pub size: u64,
93 | #[educe(Default = 20)]
94 | pub version_made_by: u16,
95 | #[educe(Default = 20)]
96 | pub version_needed: u16,
97 | pub number_of_this_disk: u32,
98 | /// number of the disk with the start of the central directory
99 | pub start_of_cd_disk_number: u32,
100 | /// total number of entries in the central directory on this disk
101 | pub this_disk_cdh_count: u64,
102 | /// total number of entries in the central directory
103 | pub total_cdh_count: u64,
104 | /// size of the central directory
105 | pub size_of_cd: u64,
106 | /// offset of start of central directory with respect to the starting disk number
107 | pub offset_of_cd_wrt_starting_disk: u64,
108 | #[binwrite(with(binwrite_option))]
109 | pub v2: Option,
110 | pub extensible_data_sector: Vec,
111 | }
112 |
113 | #[derive(BinWrite, Clone, Debug, Default)]
114 | pub struct Zip64EocdrV2 {
115 | #[binwrite(with(binwrite_transform))]
116 | pub compression_method: CompressionMethod,
117 | pub compressed_size: u64,
118 | pub original_size: u64,
119 | pub encrypt_alg: u16,
120 | pub key_bit_len: u16,
121 | pub encrypt_flags: u16,
122 | pub hash_alg: u16,
123 | pub hash_len: u16,
124 | pub hash_data: Vec,
125 | }
126 |
127 | impl Zip64EndOfCentralDirectoryRecord {
128 | pub const SIGNATURE: u32 = 0x06064b50;
129 |
130 | pub fn finalize(&mut self) -> Result<()> {
131 | for field in &mut self.extensible_data_sector {
132 | field.finalize()?;
133 | }
134 | self.size =
135 | self.extensible_data_sector
136 | .byte_count()
137 | .context("Failed to count ZIP64 EOCDR extensible data sector")? as u64
138 | + 44;
139 | if let Some(v2) = &self.v2 {
140 | self.size += v2.byte_count()? as u64;
141 | }
142 | Ok(())
143 | }
144 |
145 | pub fn use_v2(&mut self) -> Result<()> {
146 | self.version_made_by = 62;
147 | self.version_needed = 62;
148 | self.v2 = Some(Zip64EocdrV2 {
149 | compressed_size: self.size_of_cd,
150 | original_size: self.size_of_cd,
151 | ..Default::default()
152 | });
153 | self.finalize()
154 | }
155 | }
156 |
157 | #[derive(BinWrite, Clone, Debug)]
158 | pub struct Zip64ExtensibleDataSector {
159 | pub header_id: u16,
160 | pub size: u32,
161 | #[binwrite(with(binwrite_transform))]
162 | pub data: Box,
163 | }
164 |
165 | impl Zip64ExtensibleDataSector {
166 | pub fn finalize(&mut self) -> Result<()> {
167 | self.header_id = self.data.header_id();
168 | self.size = self
169 | .data
170 | .binary_encode()
171 | .context("Failed to count extensible data sector size")?
172 | .len()
173 | .try_into()
174 | .context("Extensible data sector too long")?;
175 | Ok(())
176 | }
177 | }
178 |
179 | impl From for Zip64ExtensibleDataSector {
180 | fn from(data: T) -> Self {
181 | Self {
182 | header_id: 0,
183 | size: 0,
184 | data: Box::new(data),
185 | }
186 | }
187 | }
188 |
--------------------------------------------------------------------------------
/zip-diff/src/extra.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::{binwrite_option, binwrite_transform, BinWriteTransform};
2 | use anyhow::{Context, Result};
3 | use binwrite::BinWrite;
4 | use bitflags::bitflags;
5 | use downcast_rs::{impl_downcast, Downcast};
6 | use dyn_clone::{clone_trait_object, DynClone};
7 | use std::fmt::Debug;
8 |
9 | #[derive(BinWrite, Clone, Debug)]
10 | pub struct ExtraField {
11 | pub header_id: u16,
12 | pub size: u16,
13 | #[binwrite(with(binwrite_transform))]
14 | pub data: Box,
15 | }
16 |
17 | impl ExtraField {
18 | pub fn finalize(&mut self) -> Result<()> {
19 | self.header_id = self.data.header_id();
20 | self.size = self
21 | .data
22 | .binary_encode()
23 | .context("Failed to count extra field size")?
24 | .len()
25 | .try_into()
26 | .context("Extra field too long")?;
27 | Ok(())
28 | }
29 | }
30 |
31 | impl From for ExtraField {
32 | fn from(data: T) -> Self {
33 | Self {
34 | header_id: 0,
35 | size: 0,
36 | data: Box::new(data),
37 | }
38 | }
39 | }
40 |
41 | pub trait ExtraFieldType: BinaryEncode + Debug + DynClone + Downcast + Send + Sync {
42 | // a function is used instead of an associated const to make it object-safe
43 | fn header_id(&self) -> u16;
44 | }
45 |
46 | impl_downcast!(ExtraFieldType);
47 | clone_trait_object!(ExtraFieldType);
48 |
49 | #[derive(BinWrite, Clone, Default, Debug)]
50 | pub struct Zip64ExtendedInfo {
51 | #[binwrite(with(binwrite_option))]
52 | pub original_size: Option,
53 | #[binwrite(with(binwrite_option))]
54 | pub compressed_size: Option,
55 | #[binwrite(with(binwrite_option))]
56 | pub relative_header_offset: Option,
57 | #[binwrite(with(binwrite_option))]
58 | pub disk_start_number: Option,
59 | }
60 |
61 | impl ExtraFieldType for Zip64ExtendedInfo {
62 | fn header_id(&self) -> u16 {
63 | 1
64 | }
65 | }
66 |
67 | impl Zip64ExtendedInfo {
68 | pub fn is_empty(&self) -> bool {
69 | self.original_size.is_none()
70 | && self.compressed_size.is_none()
71 | && self.relative_header_offset.is_none()
72 | && self.disk_start_number.is_none()
73 | }
74 | }
75 |
76 | #[derive(Clone, Copy, Default, Debug)]
77 | pub struct PatchDescriptorFlag(u32);
78 |
79 | bitflags! {
80 | impl PatchDescriptorFlag: u32 {
81 | const AutoDetection = 1 << 0;
82 | const SelfPatch = 1 << 1;
83 | const ActionAdd = 1 << 4;
84 | const ActionDelete = 2 << 4;
85 | const ActionPatch = 3 << 4;
86 | const ReactionToAbsentSkip = 1 << 8;
87 | const ReactionToAbsentIgnore = 2 << 8;
88 | const ReactionToAbsentFail = 3 << 8;
89 | const ReactionToNewerSkip = 1 << 10;
90 | const ReactionToNewerIgnore = 2 << 10;
91 | const ReactionToNewerFail = 3 << 10;
92 | const ReactionToUnknownSkip = 1 << 12;
93 | const ReactionToUnknownIgnore = 2 << 12;
94 | const ReactionToUnknownFail = 3 << 12;
95 | const _ = !0;
96 | }
97 | }
98 |
99 | impl BinWriteTransform for PatchDescriptorFlag {
100 | type Type = u32;
101 | fn binwrite_transform(&self) -> std::io::Result {
102 | Ok(self.0)
103 | }
104 | }
105 |
106 | #[derive(BinWrite, Clone, Default, Debug)]
107 | pub struct PatchDescriptor {
108 | pub version: u16,
109 | #[binwrite(with(binwrite_transform))]
110 | pub flags: PatchDescriptorFlag,
111 | pub old_size: u32,
112 | pub old_crc: u32,
113 | pub new_size: u32,
114 | pub new_crc: u32,
115 | }
116 |
117 | impl ExtraFieldType for PatchDescriptor {
118 | fn header_id(&self) -> u16 {
119 | 0xf
120 | }
121 | }
122 |
123 | #[derive(BinWrite, Clone, Default, Debug)]
124 | pub struct InfoZipUnicodePath {
125 | pub version: u8,
126 | pub name_crc32: u32,
127 | pub unicode_name: String,
128 | }
129 |
130 | impl ExtraFieldType for InfoZipUnicodePath {
131 | fn header_id(&self) -> u16 {
132 | 0x7075
133 | }
134 | }
135 |
136 | impl InfoZipUnicodePath {
137 | pub fn new(unicode_name: String, name: &str) -> Self {
138 | Self {
139 | version: 1,
140 | name_crc32: crc32fast::hash(name.as_bytes()),
141 | unicode_name,
142 | }
143 | }
144 | }
145 |
146 | // BinWrite is not object-safe.
147 | // The following is to make BinWrite with Box possible.
148 |
149 | pub trait BinaryEncode {
150 | fn binary_encode(&self) -> std::io::Result>;
151 | }
152 |
153 | impl BinaryEncode for T {
154 | fn binary_encode(&self) -> std::io::Result> {
155 | let mut bytes = Vec::new();
156 | self.write(&mut bytes)?;
157 | Ok(bytes)
158 | }
159 | }
160 |
161 | impl BinWriteTransform for Box {
162 | type Type = Vec;
163 |
164 | fn binwrite_transform(&self) -> std::io::Result {
165 | self.binary_encode()
166 | }
167 | }
168 |
--------------------------------------------------------------------------------
/zip-diff/src/fields.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::BinWriteTransform;
2 | use binwrite::BinWrite;
3 | use bitflags::bitflags;
4 | use chrono::{DateTime, Datelike, Timelike, Utc};
5 |
6 | #[derive(Clone, Copy, Default, Debug)]
7 | pub struct GeneralPurposeFlag(u16);
8 |
9 | bitflags! {
10 | impl GeneralPurposeFlag: u16 {
11 | const Encrypted = 1 << 0;
12 | const Compression1 = 1 << 1;
13 | const Compression2 = 1 << 2;
14 | const DataDescriptor = 1 << 3;
15 | const PatchData = 1 << 5;
16 | const StrongEncryption = 1 << 6;
17 | const LanguageEncoding = 1 << 11;
18 | const EncryptedCentralDirectory = 1 << 13;
19 | const _ = !0;
20 | }
21 | }
22 |
23 | impl BinWriteTransform for GeneralPurposeFlag {
24 | type Type = u16;
25 | fn binwrite_transform(&self) -> std::io::Result {
26 | Ok(self.0)
27 | }
28 | }
29 |
30 | #[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
31 | pub struct CompressionMethod(pub u16);
32 |
33 | impl CompressionMethod {
34 | pub const STORED: Self = Self(0);
35 | pub const SHRUNK: Self = Self(1);
36 | pub const REDUCED1: Self = Self(2);
37 | pub const REDUCED2: Self = Self(3);
38 | pub const REDUCED3: Self = Self(4);
39 | pub const REDUCED4: Self = Self(5);
40 | pub const IMPLODED: Self = Self(6);
41 | pub const DEFLATED: Self = Self(8);
42 | pub const DEFLATE64: Self = Self(9);
43 | pub const BZIP2: Self = Self(12);
44 | pub const LZMA: Self = Self(14);
45 | pub const ZSTD: Self = Self(93);
46 | pub const MP3: Self = Self(94);
47 | pub const XZ: Self = Self(95);
48 | pub const JPEG: Self = Self(96);
49 | }
50 |
51 | impl BinWriteTransform for CompressionMethod {
52 | type Type = u16;
53 | fn binwrite_transform(&self) -> std::io::Result {
54 | Ok(self.0)
55 | }
56 | }
57 |
58 | #[derive(Clone, Copy, Default, Debug)]
59 | pub struct InternalFileAttributes(u16);
60 |
61 | bitflags! {
62 | impl InternalFileAttributes: u16 {
63 | const TextFile = 1 << 0;
64 | const RecordLengthControl = 1 << 2;
65 | const _ = !0;
66 | }
67 | }
68 |
69 | impl BinWriteTransform for InternalFileAttributes {
70 | type Type = u16;
71 | fn binwrite_transform(&self) -> std::io::Result {
72 | Ok(self.0)
73 | }
74 | }
75 |
76 | #[derive(BinWrite, Clone, Copy, Debug)]
77 | pub struct DosDateTime {
78 | pub time: u16,
79 | pub date: u16,
80 | }
81 |
82 | impl DosDateTime {
83 | pub fn new(time: u16, date: u16) -> Self {
84 | Self { time, date }
85 | }
86 | }
87 |
88 | impl From> for DosDateTime {
89 | fn from(dt: DateTime) -> Self {
90 | let date = ((((dt.year() - 1980) as u32) << 9) | (dt.month() << 5) | dt.day()) as u16;
91 | let time = ((dt.hour() << 11) | (dt.minute() << 5) | (dt.second() / 2)) as u16;
92 | DosDateTime { date, time }
93 | }
94 | }
95 |
96 | impl Default for DosDateTime {
97 | fn default() -> Self {
98 | Utc::now().into()
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/config.rs:
--------------------------------------------------------------------------------
1 | use clap::Parser;
2 | use fs4::available_space;
3 | use serde::Deserialize;
4 | use std::collections::BTreeMap;
5 | use std::fs::{create_dir_all, File};
6 | use std::path::PathBuf;
7 | use std::sync::LazyLock;
8 | use std::time::{Duration, Instant};
9 | use sysinfo::System;
10 |
11 | pub struct Config {
12 | pub batch_size: usize,
13 | pub parsers: Vec,
14 | pub parsers_dir: PathBuf,
15 | pub input_dir: PathBuf,
16 | pub output_dir: PathBuf,
17 | pub samples_dir: PathBuf,
18 | pub results_dir: PathBuf,
19 | pub stats_file: PathBuf,
20 | pub argmax_ucb: bool,
21 | pub byte_mutation_only: bool,
22 | pub stop_at: Option,
23 | }
24 |
25 | pub static CONFIG: LazyLock = LazyLock::new(|| {
26 | let opts = Cli::parse();
27 |
28 | create_dir_all(&opts.input_dir).expect("failed to create input dir");
29 | create_dir_all(&opts.output_dir).expect("failed to create output dir");
30 | let batch_size = opts.batch_size.unwrap_or_else(|| default_batch_size(&opts));
31 |
32 | let parsers_dir = PathBuf::from(opts.parsers_dir);
33 | let input_dir = PathBuf::from(opts.input_dir);
34 | let output_dir = PathBuf::from(opts.output_dir);
35 | let samples_dir = PathBuf::from(opts.samples_dir);
36 | let results_dir = PathBuf::from(opts.results_dir);
37 |
38 | let stats_file = PathBuf::from(opts.stats_file);
39 | create_dir_all(stats_file.parent().expect("stats file path has no parent"))
40 | .expect("failed to create parent dir for stats file");
41 |
42 | let parsers_file =
43 | File::open(parsers_dir.join("parsers.json")).expect("failed to open parsers.json");
44 | let parser_map: BTreeMap =
45 | serde_json::from_reader(parsers_file).expect("failed to read parsers.json");
46 |
47 | let stop_at = opts
48 | .stop_after_seconds
49 | .map(|secs| Instant::now() + Duration::from_secs(secs));
50 |
51 | Config {
52 | batch_size,
53 | parsers: parser_map.into_keys().collect(),
54 | parsers_dir,
55 | input_dir,
56 | output_dir,
57 | samples_dir,
58 | results_dir,
59 | stats_file,
60 | argmax_ucb: opts.argmax_ucb,
61 | byte_mutation_only: opts.byte_mutation_only,
62 | stop_at,
63 | }
64 | });
65 |
66 | fn default_batch_size(opts: &Cli) -> usize {
67 | let mut sys = System::new();
68 | sys.refresh_memory();
69 | let ram = sys.total_memory();
70 | let ram_batch_size = ram.div_ceil(1024 * 1024 * 1024).saturating_sub(20) as usize;
71 | if ram_batch_size < 100 {
72 | eprintln!("Warning: Available RAM is below the recommended minimum");
73 | }
74 | let disk =
75 | available_space(&opts.output_dir).expect("failed to get available space for output dir");
76 | let disk_batch_size = disk.div_ceil(2 * 1024 * 1024 * 1024) as usize;
77 | if disk_batch_size < 100 {
78 | eprintln!("Warning: Available disk space is below the recommended minimum");
79 | }
80 | ram_batch_size.min(disk_batch_size)
81 | }
82 |
83 | #[derive(Parser)]
84 | struct Cli {
85 | /// number of samples per execution batch [default: depends on available resources]
86 | #[arg(short, long)]
87 | batch_size: Option,
88 | /// Stop running after how many seconds [default: infinite]
89 | #[arg(short, long)]
90 | stop_after_seconds: Option,
91 | /// directory to find the parsers
92 | #[arg(long, default_value = "../parsers")]
93 | parsers_dir: String,
94 | /// directory to temporarily save input samples for parsers in Docker
95 | #[arg(long, default_value = "../evaluation/input")]
96 | input_dir: String,
97 | /// directory to temporarily save temporary outputs for parsers in Docker
98 | #[arg(long, default_value = "../evaluation/output")]
99 | output_dir: String,
100 | /// directory to store interesting samples
101 | #[arg(long, default_value = "../evaluation/samples")]
102 | samples_dir: String,
103 | /// directory to store outputs of interesting samples
104 | #[arg(long, default_value = "../evaluation/results")]
105 | results_dir: String,
106 | /// file to save the fuzz stats
107 | #[arg(long, default_value = "../evaluation/stats.json")]
108 | stats_file: String,
109 | /// Use argmax UCB instead of softmax UCB
110 | #[arg(long, default_value_t = false)]
111 | argmax_ucb: bool,
112 | /// Use byte-level mutations only without ZIP-level mutations
113 | #[arg(long, default_value_t = false)]
114 | byte_mutation_only: bool,
115 | }
116 |
117 | #[allow(dead_code)]
118 | #[derive(Deserialize)]
119 | struct ParserInfo {
120 | name: String,
121 | version: String,
122 | }
123 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/corpus.rs:
--------------------------------------------------------------------------------
1 | use crate::feature::{Feature, PAIR_LIST};
2 | use crate::Input;
3 | use blake3::Hash;
4 | use rand::distributions::WeightedIndex;
5 | use rand::prelude::*;
6 | use rayon::prelude::*;
7 | use std::cmp::Reverse;
8 | use std::collections::HashSet;
9 |
10 | pub struct Seed {
11 | pub input: Input,
12 | pub hash: Hash,
13 | pub size: usize,
14 | pub feat: Feature,
15 | pub mutations: Vec<&'static str>,
16 | pub output_large: bool,
17 | pub selection_count: usize,
18 | pub fixed_energy: f64,
19 | }
20 |
21 | impl Seed {
22 | pub fn new(
23 | input: Input,
24 | hash: Hash,
25 | size: usize,
26 | feat: Feature,
27 | mutations: Vec<&'static str>,
28 | output_large: bool,
29 | ) -> Self {
30 | let mutation_count_energy = (-(mutations.len() as f64) / 4.0).exp();
31 | let size_energy = 100.0 / size.max(50) as f64;
32 | let ok_energy = feat.ok.count_ones(..) as f64 / feat.ok.len() as f64;
33 | Self {
34 | input,
35 | hash,
36 | size,
37 | feat,
38 | mutations,
39 | output_large,
40 | selection_count: 0,
41 | fixed_energy: mutation_count_energy + size_energy + ok_energy,
42 | }
43 | }
44 | }
45 |
46 | pub struct Corpus {
47 | seeds: Vec,
48 | feature_sum: Feature,
49 | hash_set: HashSet,
50 | weighted_index: Option>,
51 | }
52 |
53 | impl Corpus {
54 | pub fn new() -> Self {
55 | Self {
56 | seeds: Vec::new(),
57 | feature_sum: Feature::new(),
58 | hash_set: HashSet::new(),
59 | weighted_index: None,
60 | }
61 | }
62 |
63 | pub fn len(&self) -> usize {
64 | self.seeds.len()
65 | }
66 |
67 | pub fn zip_count(&self) -> usize {
68 | self.seeds
69 | .iter()
70 | .filter(|seed| matches!(seed.input, Input::Zip(_)))
71 | .count()
72 | }
73 |
74 | pub fn incons_count(&self) -> usize {
75 | self.feature_sum.inconsistency.count_ones(..)
76 | }
77 |
78 | pub fn feature_sum_summary(&self) -> String {
79 | self.feature_sum.summary()
80 | }
81 |
82 | pub fn consistent_pairs(&self) -> Vec<&'static (String, String)> {
83 | self.feature_sum.consistent_pairs()
84 | }
85 |
86 | pub fn best_seeds(&self) -> impl Iterator- {
87 | self.feature_sum.inconsistency.ones().map(|i| {
88 | let best = self
89 | .seeds
90 | .iter()
91 | .filter(|seed| seed.feat.inconsistency.contains(i))
92 | .max_by_key(|seed| {
93 | (
94 | Reverse(seed.mutations.len()),
95 | seed.feat.inconsistency.count_ones(..),
96 | seed.feat.ok.count_ones(..),
97 | Reverse(seed.size),
98 | )
99 | })
100 | .unwrap();
101 | let (a, b) = &PAIR_LIST[i];
102 | (a, b, best)
103 | })
104 | }
105 |
106 | pub fn insert_hash(&mut self, hash: Hash) -> bool {
107 | self.hash_set.insert(hash)
108 | }
109 |
110 | pub fn is_feature_interesting(&self, feat: &Feature) -> bool {
111 | self.seeds
112 | .par_iter()
113 | .all(|old| !feat.is_covered_by(&old.feat))
114 | }
115 |
116 | pub fn insert_seed(&mut self, seed: Seed) {
117 | self.feature_sum |= &seed.feat;
118 | self.weighted_index = None;
119 | self.seeds.retain(|old| !old.feat.is_covered_by(&seed.feat));
120 | self.seeds.push(seed);
121 | }
122 |
123 | pub fn construct_weights(&mut self) {
124 | let incons_popularity = self
125 | .seeds
126 | .par_iter()
127 | .fold_with(
128 | vec![0usize; self.feature_sum.inconsistency.len()],
129 | |mut sum, seed| {
130 | seed.feat.inconsistency.ones().for_each(|i| sum[i] += 1);
131 | sum
132 | },
133 | )
134 | .reduce_with(|mut a, b| {
135 | a.iter_mut().zip(b).for_each(|(x, y)| *x += y);
136 | a
137 | })
138 | .unwrap();
139 | let incons_energy_coef =
140 | self.seeds.len() as f64 / self.feature_sum.inconsistency.count_ones(..) as f64;
141 | let weights = self
142 | .seeds
143 | .par_iter()
144 | .map(|seed| {
145 | let selection_energy = (-(seed.selection_count as f64) / 4.0).exp();
146 | let incons_energy = seed
147 | .feat
148 | .inconsistency
149 | .ones()
150 | .map(|i| incons_energy_coef / incons_popularity[i] as f64)
151 | .sum::();
152 | let energy = seed.fixed_energy + incons_energy + selection_energy;
153 | if seed.output_large {
154 | energy / 10.0
155 | } else {
156 | energy
157 | }
158 | })
159 | .collect::>();
160 | self.weighted_index = Some(WeightedIndex::new(weights).expect("invalid weights"));
161 | }
162 |
163 | pub fn select_seed(&self, rng: &mut ThreadRng) -> (usize, &Seed) {
164 | let index = self
165 | .weighted_index
166 | .as_ref()
167 | .expect("weights not constructed")
168 | .sample(rng);
169 | (index, &self.seeds[index])
170 | }
171 |
172 | pub fn record_selection(&mut self, index: usize) {
173 | self.seeds[index].selection_count += 1;
174 | }
175 | }
176 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/execute.rs:
--------------------------------------------------------------------------------
1 | use crate::config::CONFIG;
2 | use crate::corpus::{Corpus, Seed};
3 | use crate::feature::Feature;
4 | use crate::mutation::{Sample, UcbHandle};
5 | use rayon::prelude::*;
6 | use std::fs;
7 | use std::io::{BufWriter, Write};
8 | use std::path::Path;
9 | use std::process::{Command, Stdio};
10 | use std::time::Instant;
11 | use tar::Builder as TarBuilder;
12 | use walkdir::WalkDir;
13 | use zstd::Encoder as ZstdEncoder;
14 |
15 | // An input is (input sample, mutation names, UCB handles)
16 | // Returns the number of actually executed (dedupped) inputs.
17 | pub fn execute(corpus: &mut Corpus, samples: Vec) -> Vec<(Vec, bool)> {
18 | save_inputs(&samples);
19 | let count = samples.len();
20 | println!("Executing {count} inputs");
21 | let start = Instant::now();
22 | run_parsers();
23 | println!(
24 | "Finished executing {count} inputs in {:.3} seconds",
25 | start.elapsed().as_secs_f64()
26 | );
27 | let start = Instant::now();
28 | let ucb_results = collect_results(corpus, samples);
29 | println!(
30 | "Collected results in {:.3} seconds",
31 | start.elapsed().as_secs_f64()
32 | );
33 | ucb_results
34 | }
35 |
36 | fn save_inputs(samples: &[Sample]) {
37 | fs::remove_dir_all(&CONFIG.input_dir).ok();
38 | fs::remove_dir_all(&CONFIG.output_dir).ok();
39 | fs::create_dir_all(&CONFIG.input_dir).expect("failed to create input dir");
40 |
41 | for sample in samples {
42 | let path = CONFIG.input_dir.join(&sample.name);
43 | fs::write(path, &sample.bytes).expect("failed to save input file");
44 | }
45 | }
46 |
47 | fn run_parsers() {
48 | Command::new("docker")
49 | .arg("compose")
50 | .arg("up")
51 | .current_dir(&CONFIG.parsers_dir)
52 | .stdout(Stdio::null())
53 | .stderr(Stdio::null())
54 | .spawn()
55 | .expect("failed to run parsers")
56 | .wait()
57 | .expect("failed to wait for parsers");
58 | }
59 |
60 | fn collect_results(corpus: &mut Corpus, samples: Vec) -> Vec<(Vec, bool)> {
61 | samples
62 | .into_iter()
63 | .filter_map(|s| {
64 | let filename = {
65 | let hash = s.hash.to_string();
66 | let dir = hash.split_at(2).0;
67 | format!("{dir}/{hash}.zip")
68 | };
69 | let sample_path = CONFIG.samples_dir.join(&filename);
70 | if sample_path.exists() {
71 | return None;
72 | }
73 | let feat = Feature::par_read(&s.name);
74 | let interesting = corpus.is_feature_interesting(&feat);
75 | if interesting {
76 | fs::create_dir_all(sample_path.parent().unwrap())
77 | .expect("failed to create data dir");
78 | fs::rename(CONFIG.input_dir.join(&s.name), sample_path)
79 | .expect("failed to move input file");
80 | let results_dir = CONFIG.results_dir.join(&filename);
81 | fs::create_dir_all(&results_dir).expect("failed to create results dir");
82 | // First move small outputs in parallel with rayon.
83 | // Then compress large outputs with parallelized ZSTD.
84 | let large_outputs = CONFIG
85 | .parsers
86 | .par_iter()
87 | .filter_map(|parser| {
88 | let output_path = CONFIG.output_dir.join(parser).join(&s.name);
89 | let result_path = results_dir.join(parser);
90 | if output_path.is_dir() && du(&output_path) > 1024 * 1024 {
91 | // tar.zst if larger than 1 MiB
92 | Some((result_path.with_extension("tar.zst"), output_path))
93 | } else if matches!(output_path.try_exists(), Ok(false)) {
94 | fs::write(&result_path, b"").expect(&format!(
95 | "failed to write error result to {}",
96 | result_path.display(),
97 | ));
98 | None
99 | } else {
100 | fs::rename(&output_path, &result_path).expect(&format!(
101 | "failed to move {} to {}",
102 | output_path.display(),
103 | result_path.display()
104 | ));
105 | None
106 | }
107 | })
108 | .collect::>();
109 | large_outputs.iter().for_each(archive_dir);
110 | corpus.insert_seed(Seed::new(
111 | s.input,
112 | s.hash,
113 | s.bytes.len(),
114 | feat,
115 | s.mutations,
116 | !large_outputs.is_empty(),
117 | ));
118 | }
119 | Some((s.ucb_handles, interesting))
120 | })
121 | .collect()
122 | }
123 |
124 | fn archive_dir((dest, src): &(impl AsRef, impl AsRef)) {
125 | let file = fs::File::create(dest).expect("failed to create result tar.zst");
126 | let mut writer = BufWriter::new(file);
127 | let mut zstd = ZstdEncoder::new(&mut writer, 1).expect("failed to create ZSTD writer");
128 | zstd.multithread(rayon::current_num_threads() as u32)
129 | .expect("failed to set multithread ZSTD");
130 | {
131 | let mut tar = TarBuilder::new(&mut zstd);
132 | tar.append_dir_all("", src)
133 | .expect("failed to archive output");
134 | tar.finish().expect("failed to finish TAR");
135 | }
136 | zstd.finish().expect("failed to finish ZSTD");
137 | writer.flush().expect("failed to flush output archive");
138 | // remove here to avoid occupying I/O cache
139 | fs::remove_dir_all(src).expect("failed to remove output directory");
140 | }
141 |
142 | fn du(path: impl AsRef) -> u64 {
143 | WalkDir::new(path)
144 | .into_iter()
145 | .filter_map(|entry| Some(entry.ok()?.metadata().ok()?.len()))
146 | .sum()
147 | }
148 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/feature.rs:
--------------------------------------------------------------------------------
1 | use crate::config::CONFIG;
2 | use fixedbitset::FixedBitSet;
3 | use std::ops::BitOrAssign;
4 | use std::path::Path;
5 | use std::sync::LazyLock;
6 | use zip_diff::hash::{read_parsing_result, ParsingResult};
7 |
8 | #[derive(Clone, PartialEq, Eq, Hash)]
9 | pub struct Feature {
10 | pub ok: FixedBitSet,
11 | pub inconsistency: FixedBitSet,
12 | }
13 |
14 | pub static PAIR_LIST: LazyLock> = LazyLock::new(Feature::pair_list);
15 |
16 | impl Feature {
17 | pub fn new() -> Self {
18 | let n = CONFIG.parsers.len();
19 | let ok = FixedBitSet::with_capacity(n);
20 | let inconsistency = FixedBitSet::with_capacity(n * (n - 1) / 2);
21 | Self { ok, inconsistency }
22 | }
23 |
24 | pub fn par_read(name: impl AsRef) -> Self {
25 | let mut feature = Self::new();
26 | feature.apply_testcase(name, true);
27 | feature
28 | }
29 |
30 | pub fn apply_testcase(&mut self, name: impl AsRef, par: bool) {
31 | let results = CONFIG
32 | .parsers
33 | .iter()
34 | .map(|parser| read_parsing_result(CONFIG.output_dir.join(parser).join(&name), par))
35 | .collect::>();
36 |
37 | let mut p = 0;
38 | for (i, x) in results.iter().enumerate() {
39 | if matches!(x, ParsingResult::Ok(_)) {
40 | self.ok.insert(i);
41 | }
42 | for y in &results[..i] {
43 | if x.inconsistent_with(y) {
44 | self.inconsistency.insert(p);
45 | }
46 | p += 1;
47 | }
48 | }
49 | }
50 |
51 | pub fn is_covered_by(&self, by: &Self) -> bool {
52 | self.inconsistency.is_subset(&by.inconsistency) && self.ok.is_subset(&by.ok)
53 | }
54 |
55 | pub fn consistent_pairs(&self) -> Vec<&'static (String, String)> {
56 | self.inconsistency.zeroes().map(|i| &PAIR_LIST[i]).collect()
57 | }
58 |
59 | pub fn summary(&self) -> String {
60 | let ok_count = self.ok.count_ones(..);
61 | let incons_count = self.inconsistency.count_ones(..);
62 | format!("ok: {ok_count:2}, incons: {incons_count:4}")
63 | }
64 |
65 | fn pair_list() -> Vec<(String, String)> {
66 | let mut result = Vec::new();
67 | for (i, x) in CONFIG.parsers.iter().enumerate() {
68 | for y in CONFIG.parsers.iter().take(i) {
69 | result.push((x.clone(), y.clone()));
70 | }
71 | }
72 | result
73 | }
74 | }
75 |
76 | impl BitOrAssign<&Feature> for Feature {
77 | fn bitor_assign(&mut self, rhs: &Feature) {
78 | self.ok |= &rhs.ok;
79 | self.inconsistency |= &rhs.inconsistency;
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/generate.rs:
--------------------------------------------------------------------------------
1 | use crate::config::CONFIG;
2 | use rand::distributions::{DistString, Standard};
3 | use rand::prelude::*;
4 | use zip_diff::fields::CompressionMethod;
5 | use zip_diff::zip::ZipArchive;
6 |
7 | pub fn init_corpus() -> Vec {
8 | let mut result = Vec::new();
9 |
10 | let mut zip = ZipArchive::default();
11 | zip.add_simple("a", b"a").unwrap();
12 | zip.add_simple("b/c", b"c").unwrap();
13 | zip.add_simple("b/d", b"d").unwrap();
14 | zip.finalize().unwrap();
15 | result.push(zip);
16 |
17 | let mut rng = thread_rng();
18 |
19 | for _ in 0..CONFIG.batch_size {
20 | let mut zip = ZipArchive::default();
21 | let count = rng.gen_range(0..5);
22 | for _ in 0..count {
23 | let name_len = rng.gen_range(0..5);
24 | let name = Standard.sample_string(&mut rng, name_len);
25 | let data_len = rng.gen_range(0..10);
26 | let mut data = Vec::with_capacity(data_len);
27 | data.resize_with(data_len, || rng.gen());
28 | let compression = match rng.gen_range(0..16) {
29 | 0..8 => CompressionMethod::STORED,
30 | 8..12 => CompressionMethod::DEFLATED,
31 | 12 => CompressionMethod::BZIP2,
32 | 13 => CompressionMethod::ZSTD,
33 | 14 => CompressionMethod::LZMA,
34 | 15 => CompressionMethod::XZ,
35 | _ => unreachable!(),
36 | };
37 | zip.add_file(
38 | &name,
39 | &data,
40 | compression,
41 | rng.gen_ratio(1, 5),
42 | rng.gen_ratio(1, 5),
43 | )
44 | .unwrap();
45 | }
46 | if rng.gen_ratio(1, 5) {
47 | zip.set_eocd(true).unwrap();
48 | }
49 | zip.finalize().unwrap();
50 | result.push(zip);
51 | }
52 |
53 | result
54 | }
55 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/main.rs:
--------------------------------------------------------------------------------
1 | #![allow(clippy::expect_fun_call)]
2 |
3 | mod config;
4 | mod corpus;
5 | mod execute;
6 | mod feature;
7 | mod generate;
8 | mod mutation;
9 | mod rand_utils;
10 | mod stats;
11 |
12 | use binwrite::BinWrite;
13 | use config::CONFIG;
14 | use corpus::Corpus;
15 | use execute::execute;
16 | use mutation::Mutator;
17 | use rand::thread_rng;
18 | use stats::Stats;
19 | use std::fs::canonicalize;
20 | use std::process::Command;
21 | use std::time::Instant;
22 | use zip_diff::zip::ZipArchive;
23 |
24 | #[derive(Clone)]
25 | pub enum Input {
26 | Zip(Box),
27 | Bytes(Vec),
28 | }
29 |
30 | fn main() {
31 | let input_dir = canonicalize(&CONFIG.input_dir).expect("failed to canonicalize input dir");
32 | let output_dir = canonicalize(&CONFIG.output_dir).expect("failed to canonicalize output dir");
33 | let parser_prepare_status = Command::new(CONFIG.parsers_dir.join("prepare.sh"))
34 | .env("INPUT_DIR", input_dir)
35 | .env("OUTPUT_DIR", output_dir)
36 | .status()
37 | .expect("failed to execute prepare.sh");
38 | assert!(parser_prepare_status.success(), "prepare.sh failed");
39 |
40 | let mut mutator = Mutator::new();
41 | let mut stats = Stats::new();
42 | let mut corpus = Corpus::new();
43 | let rng = &mut thread_rng();
44 |
45 | let initial_samples = generate::init_corpus()
46 | .into_iter()
47 | .filter_map(|zip| {
48 | let input = if CONFIG.byte_mutation_only {
49 | let mut buf = Vec::new();
50 | zip.write(&mut buf)
51 | .expect("failed to convert initial ZIP to bytes");
52 | Input::Bytes(buf)
53 | } else {
54 | Input::Zip(Box::new(zip))
55 | };
56 | let sample = mutator.generate_sample(&input, &[], 0, rng);
57 | corpus.insert_hash(sample.hash).then_some(sample)
58 | })
59 | .collect();
60 |
61 | execute(&mut corpus, initial_samples);
62 |
63 | loop {
64 | println!(
65 | "inputs: {}, corpus size: {} ({} zips), sum: {}",
66 | stats.input_count(),
67 | corpus.len(),
68 | corpus.zip_count(),
69 | corpus.feature_sum_summary()
70 | );
71 | corpus.construct_weights();
72 | mutator.construct_ucb();
73 | let (seed_indices, samples): (Vec<_>, Vec<_>) = std::iter::repeat(())
74 | .filter_map(|_| {
75 | let (seed_index, seed) = corpus.select_seed(rng);
76 | let mutate_times = rand_utils::rand_len(rng);
77 | let sample =
78 | mutator.generate_sample(&seed.input, &seed.mutations, mutate_times, rng);
79 | corpus
80 | .insert_hash(sample.hash)
81 | .then_some((seed_index, sample))
82 | })
83 | .take(CONFIG.batch_size)
84 | .unzip();
85 | for index in seed_indices {
86 | corpus.record_selection(index);
87 | }
88 | let ucb_results = execute(&mut corpus, samples);
89 | mutator.record_ucb(&ucb_results);
90 | stats.record_iteration(ucb_results.len(), &corpus, &mutator);
91 | stats.save();
92 | if let Some(stop_at) = CONFIG.stop_at {
93 | if Instant::now() > stop_at {
94 | break;
95 | }
96 | }
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/rand_utils.rs:
--------------------------------------------------------------------------------
1 | use crate::CONFIG;
2 | use num_traits::{NumCast, Saturating, Unsigned, Zero};
3 | use rand::distributions::uniform::{SampleRange, SampleUniform};
4 | use rand::distributions::{Standard, WeightedIndex};
5 | use rand::prelude::*;
6 | use zip_diff::zip::ZipArchive;
7 |
8 | pub struct Ucb {
9 | scores: Vec,
10 | trials: Vec,
11 | weighted_index: Option>,
12 | }
13 |
14 | impl Ucb {
15 | pub fn new(len: usize) -> Self {
16 | Self {
17 | scores: vec![0.0; len],
18 | trials: vec![0.0; len],
19 | weighted_index: None,
20 | }
21 | }
22 |
23 | pub fn construct(&mut self) {
24 | for i in 0..self.scores.len() {
25 | // recent results are more important than old results
26 | self.scores[i] *= 0.995;
27 | self.trials[i] *= 0.995;
28 | }
29 | let double_ln_total_trial: f64 = 2.0 * self.trials.iter().sum::().max(1.0).ln();
30 | let weights = self
31 | .scores
32 | .iter()
33 | .zip(self.trials.iter().map(|t| t.max(1.0)))
34 | .map(|(score, trial)| {
35 | let ucb = score / trial + (double_ln_total_trial / trial).sqrt();
36 | (ucb * 5.0).exp() // softmax temperature
37 | });
38 | self.weighted_index = if CONFIG.argmax_ucb {
39 | let mut max_weight = f64::NEG_INFINITY;
40 | for w in weights.clone() {
41 | if w > max_weight {
42 | max_weight = w;
43 | }
44 | }
45 | Some(
46 | WeightedIndex::new(weights.map(|w| {
47 | if w == max_weight {
48 | 1.0
49 | } else {
50 | 1e-6 // not zero to avoid loop when always fail to mutate
51 | }
52 | }))
53 | .unwrap(),
54 | )
55 | } else {
56 | Some(WeightedIndex::new(weights).unwrap())
57 | };
58 | }
59 |
60 | pub fn sample(&self, rng: &mut R) -> usize {
61 | self.weighted_index
62 | .as_ref()
63 | .expect("need to construt before sampling")
64 | .sample(rng)
65 | }
66 |
67 | pub fn record(&mut self, id: usize, trial: f64, score: f64) {
68 | self.trials[id] += trial;
69 | self.scores[id] += score;
70 | self.weighted_index = None;
71 | }
72 |
73 | pub fn scores(&self) -> &[f64] {
74 | &self.scores
75 | }
76 |
77 | pub fn trials(&self) -> &[f64] {
78 | &self.trials
79 | }
80 | }
81 |
82 | #[derive(Clone, Copy, PartialEq, Eq)]
83 | pub enum HeaderLocation {
84 | Lfh,
85 | Cdh,
86 | Both,
87 | }
88 |
89 | impl HeaderLocation {
90 | pub fn lfh(self) -> bool {
91 | matches!(self, Self::Lfh | Self::Both)
92 | }
93 |
94 | pub fn cdh(self) -> bool {
95 | matches!(self, Self::Cdh | Self::Both)
96 | }
97 | }
98 |
99 | impl Distribution for Standard {
100 | fn sample(&self, rng: &mut R) -> HeaderLocation {
101 | let i = (0..5).choose(rng).unwrap();
102 | match i {
103 | 0 => HeaderLocation::Lfh,
104 | 1 => HeaderLocation::Cdh,
105 | _ => HeaderLocation::Both,
106 | }
107 | }
108 | }
109 |
110 | pub fn rand_header(zip: &ZipArchive, rng: &mut R) -> Option<(usize, HeaderLocation)> {
111 | let loc = rng.gen();
112 |
113 | let len = match loc {
114 | HeaderLocation::Lfh => zip.files.len(),
115 | HeaderLocation::Cdh => zip.cd.len(),
116 | HeaderLocation::Both => zip.files.len().min(zip.cd.len()),
117 | };
118 |
119 | let index = (0..len).choose(rng)?;
120 |
121 | Some((index, loc))
122 | }
123 |
124 | /// returns a random number in 1..=32, returns x with possibility 2^-x
125 | pub fn rand_len(rng: &mut R) -> usize {
126 | rng.next_u64().trailing_zeros() as usize + 1
127 | }
128 |
129 | pub fn mutate_len(size: &mut N, rng: &mut R)
130 | where
131 | R: RngCore,
132 | N: Copy + Saturating + Zero + Unsigned + NumCast,
133 | {
134 | let delta = N::from(rand_len(rng)).unwrap();
135 | if size.is_zero() || rng.gen() {
136 | *size = size.saturating_add(delta);
137 | } else {
138 | *size = size.saturating_sub(delta);
139 | }
140 | }
141 |
142 | pub fn rand_range(rng: &mut G, range: R) -> Option<(T, T)>
143 | where
144 | G: Rng,
145 | T: SampleUniform + Ord,
146 | R: SampleRange + Clone,
147 | {
148 | if range.is_empty() {
149 | return None;
150 | }
151 | let x = rng.gen_range(range.clone());
152 | let y = rng.gen_range(range);
153 | if x < y {
154 | Some((x, y))
155 | } else {
156 | Some((y, x))
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/zip-diff/src/fuzz/stats.rs:
--------------------------------------------------------------------------------
1 | use crate::config::CONFIG;
2 | use crate::corpus::Corpus;
3 | use crate::mutation::{MutationStats, Mutator};
4 | use serde::Serialize;
5 | use std::collections::BTreeMap;
6 | use std::fs::File;
7 | use std::time::Instant;
8 |
9 | #[derive(Serialize)]
10 | struct Iteration {
11 | input_count: usize,
12 | corpus_size: usize,
13 | incons_count: usize,
14 | seconds_used: f64,
15 | }
16 |
17 | #[derive(Serialize)]
18 | struct SeedStat {
19 | hash: String,
20 | mutations: Vec<&'static str>,
21 | ok_count: usize,
22 | incons_count: usize,
23 | selection_count: usize,
24 | }
25 |
26 | #[derive(Serialize)]
27 | pub struct Stats {
28 | #[serde(skip)]
29 | start_at: Instant,
30 | /// total number of generated inputs
31 | input_count: usize,
32 | /// hash of best seeds
33 | best_seeds: Vec,
34 | /// map from parser pair to best seed hash
35 | best_seed_map: BTreeMap<&'static String, BTreeMap<&'static String, String>>,
36 | /// fuzzing iteration history
37 | iterations: Vec,
38 | /// parser pairs that are consistent in the test cases
39 | consistent_pairs: Vec<&'static (String, String)>,
40 | /// Mutation trials
41 | mutations: Option,
42 | // ablation configs
43 | argmax_ucb: bool,
44 | byte_mutation_only: bool,
45 | }
46 |
47 | impl Stats {
48 | pub fn new() -> Self {
49 | Self {
50 | start_at: Instant::now(),
51 | input_count: 0,
52 | best_seeds: Vec::new(),
53 | best_seed_map: BTreeMap::new(),
54 | iterations: Vec::new(),
55 | consistent_pairs: Vec::new(),
56 | mutations: None,
57 | argmax_ucb: CONFIG.argmax_ucb,
58 | byte_mutation_only: CONFIG.byte_mutation_only,
59 | }
60 | }
61 |
62 | pub fn record_iteration(&mut self, new_input_count: usize, corpus: &Corpus, mutator: &Mutator) {
63 | self.input_count += new_input_count;
64 | let mut best_seeds = Vec::new();
65 | self.best_seed_map = BTreeMap::new();
66 | for (a, b, seed) in corpus.best_seeds() {
67 | self.best_seed_map
68 | .entry(a)
69 | .or_default()
70 | .insert(b, seed.hash.to_string());
71 | best_seeds.push(seed);
72 | }
73 | best_seeds.sort_unstable_by_key(|seed| seed.hash.as_bytes());
74 | best_seeds.dedup_by_key(|seed| &seed.hash);
75 | self.best_seeds = best_seeds
76 | .into_iter()
77 | .map(|seed| SeedStat {
78 | hash: seed.hash.to_string(),
79 | mutations: seed.mutations.clone(),
80 | ok_count: seed.feat.ok.count_ones(..),
81 | incons_count: seed.feat.inconsistency.count_ones(..),
82 | selection_count: seed.selection_count,
83 | })
84 | .collect();
85 | self.iterations.push(Iteration {
86 | input_count: self.input_count,
87 | corpus_size: corpus.len(),
88 | incons_count: corpus.incons_count(),
89 | seconds_used: self.start_at.elapsed().as_secs_f64(),
90 | });
91 | self.consistent_pairs = corpus.consistent_pairs();
92 | self.mutations = Some(mutator.stats());
93 | }
94 |
95 | pub fn input_count(&self) -> usize {
96 | self.input_count
97 | }
98 |
99 | pub fn save(&self) {
100 | let file = File::create(&CONFIG.stats_file).expect("failed to create stats file");
101 | serde_json::to_writer_pretty(file, self).expect("failed to write stats");
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/zip-diff/src/hash.rs:
--------------------------------------------------------------------------------
1 | use blake3::{Hash, Hasher};
2 | use std::path::Path;
3 |
4 | #[derive(Clone, Copy)]
5 | pub enum ParsingResult {
6 | Ok(Hash),
7 | Err,
8 | }
9 |
10 | impl ParsingResult {
11 | pub fn inconsistent_with(&self, rhs: &Self) -> bool {
12 | match (self, rhs) {
13 | (ParsingResult::Ok(lhs), ParsingResult::Ok(rhs)) => lhs != rhs,
14 | _ => false,
15 | }
16 | }
17 | }
18 |
19 | pub fn read_parsing_result(path: impl AsRef, par: bool) -> ParsingResult {
20 | let path = path.as_ref();
21 | if path.is_dir() {
22 | ParsingResult::Ok(dirhash(path, par).unwrap_or(Hash::from_bytes(Default::default())))
23 | } else {
24 | ParsingResult::Err
25 | }
26 | }
27 |
28 | // Returns `None` for empty directory
29 | fn dirhash(path: impl AsRef, par: bool) -> Option {
30 | let path = path.as_ref();
31 | let path_display = path.display();
32 | let mut hasher = Hasher::new();
33 |
34 | if path.is_symlink() {
35 | hasher.update(b"L");
36 | hasher.update(
37 | &path
38 | .read_link()
39 | .unwrap_or_else(|_| panic!("failed to read link {path_display}"))
40 | .into_os_string()
41 | .into_encoded_bytes(),
42 | );
43 | } else if path.is_file() {
44 | hasher.update(b"F");
45 | if par {
46 | hasher.update_mmap_rayon(path)
47 | } else {
48 | hasher.update_mmap(path)
49 | }
50 | .unwrap_or_else(|_| panic!("failed to read file {path_display}"));
51 | } else if path.is_dir() {
52 | hasher.update(b"D");
53 | let mut children = path
54 | .read_dir()
55 | .unwrap_or_else(|_| panic!("failed to read dir {path_display}"))
56 | .filter_map(|entry| {
57 | let entry =
58 | entry.unwrap_or_else(|_| panic!("failed to read dir entry in {path_display}"));
59 | let entry_path = entry.path();
60 | let mut hasher = Hasher::new();
61 | let name = entry.file_name().into_encoded_bytes();
62 | if name.iter().all(|x| {
63 | x.is_ascii_alphanumeric() || matches!(x, b'.' | b'_' | b'-' | b'[' | b']')
64 | }) {
65 | hasher.update(b"N");
66 | hasher.update(&name);
67 | } else {
68 | // treat all special file names as the same
69 | hasher.update(b"S");
70 | }
71 | hasher.update(
72 | dirhash(entry_path, par)? /* ignore empty dir */
73 | .as_bytes(),
74 | );
75 | Some(hasher.finalize().into())
76 | })
77 | .collect::>();
78 | if children.is_empty() {
79 | return None;
80 | }
81 | children.sort_unstable();
82 | for child in children {
83 | hasher.update(&child);
84 | }
85 | } else {
86 | panic!("file does not exist, permission error, or unknown file type: {path_display}");
87 | }
88 |
89 | Some(hasher.finalize())
90 | }
91 |
--------------------------------------------------------------------------------
/zip-diff/src/lfh.rs:
--------------------------------------------------------------------------------
1 | use crate::extra::{ExtraField, Zip64ExtendedInfo};
2 | use crate::fields::*;
3 | use crate::utils::{binwrite_transform, BinCount};
4 | use anyhow::{bail, Context, Result};
5 | use binwrite::BinWrite;
6 | use educe::Educe;
7 |
8 | #[derive(BinWrite, Clone, Educe)]
9 | #[educe(Debug, Default)]
10 | pub struct LocalFileHeader {
11 | #[educe(Default = Self::SIGNATURE)]
12 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
13 | pub signature: u32,
14 | #[educe(Default = 20)]
15 | pub version_needed: u16,
16 | #[binwrite(with(binwrite_transform))]
17 | pub general_purpose_flag: GeneralPurposeFlag,
18 | #[binwrite(with(binwrite_transform))]
19 | pub compression_method: CompressionMethod,
20 | pub last_mod: DosDateTime,
21 | #[educe(Debug(method(std::fmt::LowerHex::fmt)))]
22 | pub crc32: u32,
23 | pub compressed_size: u32,
24 | pub uncompressed_size: u32,
25 | pub file_name_length: u16,
26 | pub extra_field_length: u16,
27 | #[educe(Debug(method(crate::utils::fmt_utf8)))]
28 | pub file_name: Vec,
29 | pub extra_fields: Vec,
30 | /// only one of `extra_fields` and `extra_fields_raw` can be set
31 | #[educe(Debug(method(crate::utils::fmt_hex)))]
32 | pub extra_fields_raw: Vec,
33 |
34 | #[binwrite(ignore)]
35 | pub zip64: Zip64ExtendedInfo,
36 | #[binwrite(ignore)]
37 | pub keep_empty_zip64: bool,
38 | }
39 |
40 | impl LocalFileHeader {
41 | pub const SIGNATURE: u32 = 0x04034b50;
42 |
43 | /// Set LFH field and ZIP64 field according to size
44 | pub fn set_compressed_size(&mut self, size: usize, force_zip64: bool) {
45 | if !force_zip64 {
46 | if let Ok(size) = size.try_into() {
47 | self.compressed_size = size;
48 | self.zip64.compressed_size = None;
49 | return;
50 | }
51 | }
52 | self.compressed_size = u32::MAX;
53 | self.zip64.compressed_size = Some(size as u64);
54 | }
55 |
56 | /// Set LFH field and ZIP64 field according to size
57 | pub fn set_uncompressed_size(&mut self, size: usize, force_zip64: bool) {
58 | if !force_zip64 {
59 | if let Ok(size) = size.try_into() {
60 | self.uncompressed_size = size;
61 | self.zip64.original_size = None;
62 | return;
63 | }
64 | }
65 | self.uncompressed_size = u32::MAX;
66 | self.zip64.original_size = Some(size as u64);
67 | }
68 |
69 | pub fn set_file_name(&mut self, file_name: &str) -> Result<()> {
70 | file_name.as_bytes().clone_into(&mut self.file_name);
71 | self.file_name_length = self.file_name.len().try_into()?;
72 | Ok(())
73 | }
74 |
75 | /// Finalize extra fields, add ZIP64 field
76 | pub fn finalize(&mut self) -> Result<()> {
77 | if self.keep_empty_zip64 || !self.zip64.is_empty() {
78 | self.extra_fields.push(ExtraField {
79 | header_id: 0,
80 | size: 0,
81 | data: Box::new(self.zip64.clone()),
82 | });
83 | }
84 |
85 | if !self.extra_fields.is_empty() && !self.extra_fields_raw.is_empty() {
86 | bail!("extra_fields and extra_fields_raw cannot be set at the same time");
87 | }
88 |
89 | if self.extra_fields.is_empty() {
90 | self.extra_field_length = self
91 | .extra_fields_raw
92 | .len()
93 | .try_into()
94 | .context("Extra fields too long")?;
95 | } else {
96 | for field in &mut self.extra_fields {
97 | field.finalize()?;
98 | }
99 |
100 | self.extra_field_length = self
101 | .extra_fields
102 | .byte_count()
103 | .context("Failed to count extra fields")?
104 | .try_into()
105 | .context("Extra fields too long")?;
106 | }
107 |
108 | Ok(())
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/zip-diff/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod cdh;
2 | pub mod compress;
3 | pub mod dd;
4 | pub mod eocd;
5 | pub mod extra;
6 | pub mod fields;
7 | pub mod hash;
8 | pub mod lfh;
9 | pub mod utils;
10 | pub mod zip;
11 |
--------------------------------------------------------------------------------
/zip-diff/src/utils.rs:
--------------------------------------------------------------------------------
1 | use crate::cdh::CentralDirectoryHeader;
2 | use crate::compress::decompress;
3 | use crate::fields::CompressionMethod;
4 | use crate::zip::FileEntry;
5 | use anyhow::{anyhow, Context, Result};
6 | use binwrite::{BinWrite, WriterOption};
7 | use std::fmt::{self, Debug, Formatter};
8 | use std::io::{self, Write};
9 |
10 | pub fn binwrite_option(
11 | option: &Option,
12 | writer: &mut W,
13 | options: &WriterOption,
14 | ) -> io::Result<()>
15 | where
16 | W: Write,
17 | T: BinWrite,
18 | {
19 | if let Some(val) = option {
20 | val.write_options(writer, options)?;
21 | }
22 | Ok(())
23 | }
24 |
25 | pub trait BinWriteTransform {
26 | type Type: BinWrite;
27 |
28 | fn binwrite_transform(&self) -> io::Result;
29 | }
30 |
31 | pub fn binwrite_transform(var: &T, writer: &mut W, options: &WriterOption) -> io::Result<()>
32 | where
33 | W: Write,
34 | T: BinWriteTransform,
35 | {
36 | var.binwrite_transform()?.write_options(writer, options)
37 | }
38 |
39 | pub trait BinCount {
40 | /// Count how many bytes would be written via `BinWrite`.
41 | fn byte_count(&self) -> Result;
42 | }
43 |
44 | impl BinCount for T {
45 | fn byte_count(&self) -> Result {
46 | let mut counter = WriteCounter::new();
47 | self.write(&mut counter)?;
48 | Ok(counter.count)
49 | }
50 | }
51 |
52 | struct WriteCounter {
53 | count: usize,
54 | }
55 |
56 | impl WriteCounter {
57 | fn new() -> Self {
58 | WriteCounter { count: 0 }
59 | }
60 | }
61 |
62 | impl Write for WriteCounter {
63 | fn write(&mut self, buf: &[u8]) -> io::Result {
64 | self.count += buf.len();
65 | Ok(buf.len())
66 | }
67 |
68 | fn flush(&mut self) -> io::Result<()> {
69 | Ok(())
70 | }
71 | }
72 |
73 | pub fn find_file(iter: I, file_name: &str) -> Result<(usize, T)>
74 | where
75 | T: GetFileName,
76 | I: IntoIterator
- ,
77 | {
78 | iter.into_iter()
79 | .enumerate()
80 | .find(|(_, f)| f.get_file_name() == file_name.as_bytes())
81 | .context(format!("Failed to find {}", file_name))
82 | }
83 |
84 | pub trait GetFileName {
85 | fn get_file_name(&self) -> &[u8];
86 | }
87 |
88 | impl GetFileName for FileEntry {
89 | fn get_file_name(&self) -> &[u8] {
90 | &self.lfh.file_name
91 | }
92 | }
93 |
94 | impl GetFileName for &FileEntry {
95 | fn get_file_name(&self) -> &[u8] {
96 | &self.lfh.file_name
97 | }
98 | }
99 |
100 | impl GetFileName for &mut FileEntry {
101 | fn get_file_name(&self) -> &[u8] {
102 | &self.lfh.file_name
103 | }
104 | }
105 |
106 | impl GetFileName for &mut CentralDirectoryHeader {
107 | fn get_file_name(&self) -> &[u8] {
108 | &self.file_name
109 | }
110 | }
111 |
112 | pub fn align_entry_size(entries: &mut [&mut FileEntry], padding: u8) -> Result<()> {
113 | for entry in entries.iter_mut() {
114 | entry.data = decompress(entry.lfh.compression_method, &entry.data)?;
115 | }
116 |
117 | let max_len = entries
118 | .iter()
119 | .map(|entry| entry.data.len())
120 | .max()
121 | .ok_or(anyhow!("no entry provided"))?;
122 |
123 | for entry in entries {
124 | entry.data.resize(max_len, padding);
125 | entry.lfh.compressed_size = max_len as u32;
126 | entry.lfh.uncompressed_size = max_len as u32;
127 | entry.lfh.compression_method = CompressionMethod::STORED;
128 | entry.lfh.crc32 = crc32fast::hash(&entry.data);
129 | }
130 |
131 | Ok(())
132 | }
133 |
134 | pub fn fmt_utf8(b: &[u8], f: &mut Formatter) -> fmt::Result {
135 | std::str::from_utf8(b).map_err(|_| fmt::Error)?.fmt(f)
136 | }
137 |
138 | pub fn fmt_hex(b: &[u8], f: &mut Formatter) -> fmt::Result {
139 | for x in b {
140 | write!(f, "{x:02x} ")?;
141 | }
142 | Ok(())
143 | }
144 |
145 | // reference: https://github.com/shuax/custom_crc32
146 | pub fn crc32_patch(data: &[u8], target: u32) -> u32 {
147 | const fn crc32_rev(byte: u32) -> u32 {
148 | const POLY: u32 = 0xedb88320;
149 | let mut x = byte << 24;
150 | let mut i = 0;
151 | while i < 8 {
152 | if x & 0x80000000 != 0 {
153 | x = ((x ^ POLY) << 1) | 1;
154 | } else {
155 | x <<= 1;
156 | }
157 | i += 1;
158 | }
159 | x
160 | }
161 |
162 | let current = !crc32fast::hash(data);
163 | let mut result = !target;
164 | for i in 0..4 {
165 | result = (result << 8) ^ crc32_rev(result >> 24) ^ ((current >> ((3 - i) * 8)) & 0xff);
166 | }
167 | result
168 | }
169 |
--------------------------------------------------------------------------------
/zip-diff/src/zip.rs:
--------------------------------------------------------------------------------
1 | use crate::cdh::CentralDirectoryHeader;
2 | use crate::compress::compress;
3 | use crate::dd::{DataDescriptor, U32or64};
4 | use crate::eocd::{
5 | EndOfCentralDirectoryRecord, Zip64EndOfCentralDirectoryLocator,
6 | Zip64EndOfCentralDirectoryRecord,
7 | };
8 | use crate::fields::{CompressionMethod, GeneralPurposeFlag};
9 | use crate::lfh::LocalFileHeader;
10 | use crate::utils::{binwrite_option, BinCount};
11 | use anyhow::{Context, Result};
12 | use binwrite::BinWrite;
13 | use educe::Educe;
14 | use std::fmt::{self, Formatter};
15 |
16 | #[derive(BinWrite, Clone, Default, Debug)]
17 | pub struct ZipArchive {
18 | pub files: Vec,
19 | pub cd: Vec,
20 | #[binwrite(with(binwrite_option))]
21 | pub zip64_eocdr: Option,
22 | #[binwrite(with(binwrite_option))]
23 | pub zip64_eocdl: Option,
24 | pub eocdr: EndOfCentralDirectoryRecord,
25 | }
26 |
27 | #[derive(BinWrite, Clone, Default, Educe)]
28 | #[educe(Debug)]
29 | pub struct FileEntry {
30 | pub lfh: LocalFileHeader,
31 | #[educe(Debug(method = fmt_len))]
32 | pub data: Vec,
33 | #[binwrite(with(binwrite_option))]
34 | pub dd: Option,
35 | }
36 |
37 | fn fmt_len(v: &[T], f: &mut Formatter<'_>) -> fmt::Result {
38 | write!(f, "Vec<{}> ({})", std::any::type_name::(), v.len())
39 | }
40 |
41 | impl FileEntry {
42 | pub fn new(
43 | name: &str,
44 | uncompressed_data: &[u8],
45 | compression_method: CompressionMethod,
46 | force_zip64: bool,
47 | use_dd: bool,
48 | ) -> Result {
49 | let compressed_data = compress(compression_method, uncompressed_data)?;
50 | let crc32 = crc32fast::hash(uncompressed_data);
51 |
52 | let mut lfh = LocalFileHeader {
53 | compression_method,
54 | file_name_length: name.len().try_into().context("File name too long")?,
55 | file_name: name.into(),
56 | ..Default::default()
57 | };
58 |
59 | // When data descriptor is used, also set these fields for CDH.
60 | lfh.crc32 = crc32;
61 | lfh.set_compressed_size(compressed_data.len(), force_zip64);
62 | lfh.set_uncompressed_size(uncompressed_data.len(), force_zip64);
63 |
64 | let dd = if use_dd {
65 | lfh.general_purpose_flag
66 | .insert(GeneralPurposeFlag::DataDescriptor);
67 |
68 | let compressed_size = if let Some(size) = lfh.zip64.compressed_size {
69 | lfh.keep_empty_zip64 = true;
70 | U32or64::U64(size)
71 | } else {
72 | U32or64::U32(lfh.compressed_size)
73 | };
74 |
75 | let uncompressed_size = if let Some(size) = lfh.zip64.original_size {
76 | lfh.keep_empty_zip64 = true;
77 | U32or64::U64(size)
78 | } else {
79 | U32or64::U32(lfh.uncompressed_size)
80 | };
81 |
82 | Some(DataDescriptor {
83 | signature: Some(DataDescriptor::SIGNATURE),
84 | crc32,
85 | compressed_size,
86 | uncompressed_size,
87 | })
88 | } else {
89 | None
90 | };
91 |
92 | Ok(Self {
93 | lfh,
94 | data: compressed_data,
95 | dd,
96 | })
97 | }
98 |
99 | pub fn push_into_cd(
100 | &self,
101 | cd: &mut Vec,
102 | offset: &mut usize,
103 | ) -> Result<()> {
104 | let mut cdh: CentralDirectoryHeader = self.into();
105 | cdh.set_offset(*offset, false);
106 | cdh.finalize()?;
107 | cd.push(cdh);
108 | *offset += self.byte_count()?;
109 | Ok(())
110 | }
111 | }
112 |
113 | impl ZipArchive {
114 | pub fn add_file(
115 | &mut self,
116 | name: &str,
117 | uncompressed_data: &[u8],
118 | compression_method: CompressionMethod,
119 | force_zip64: bool,
120 | use_dd: bool,
121 | ) -> Result<()> {
122 | self.files.push(FileEntry::new(
123 | name,
124 | uncompressed_data,
125 | compression_method,
126 | force_zip64,
127 | use_dd,
128 | )?);
129 | Ok(())
130 | }
131 |
132 | pub fn add_simple(&mut self, name: &str, data: &[u8]) -> Result<()> {
133 | self.add_file(name, data, CompressionMethod::STORED, false, false)
134 | }
135 |
136 | pub fn set_eocd(&mut self, force_zip64: bool) -> Result<()> {
137 | let mut offset = 0;
138 | if let Some(last_cdh) = self.cd.last() {
139 | offset += last_cdh.relative_header_offset as usize;
140 | }
141 | if let Some(last_file) = self.files.last() {
142 | offset += last_file.byte_count()?;
143 | }
144 |
145 | let mut zip64_eocdr = Zip64EndOfCentralDirectoryRecord {
146 | this_disk_cdh_count: self.cd.len() as u64,
147 | total_cdh_count: self.cd.len() as u64,
148 | size_of_cd: self.cd.byte_count()? as u64,
149 | offset_of_cd_wrt_starting_disk: offset as u64,
150 | ..Default::default()
151 | };
152 |
153 | if let (false, Ok(eocdr)) = (
154 | force_zip64,
155 | TryInto::::try_into(&zip64_eocdr),
156 | ) {
157 | self.eocdr = eocdr;
158 | self.zip64_eocdl = None;
159 | self.zip64_eocdr = None;
160 | } else {
161 | zip64_eocdr.finalize()?;
162 | self.eocdr = EndOfCentralDirectoryRecord::all_ff();
163 | self.zip64_eocdl = Some(Zip64EndOfCentralDirectoryLocator::from_offset(
164 | offset as u64 + zip64_eocdr.size_of_cd,
165 | ));
166 | self.zip64_eocdr = Some(zip64_eocdr);
167 | }
168 |
169 | Ok(())
170 | }
171 |
172 | pub fn finalize(&mut self) -> Result<()> {
173 | self.cd.clear();
174 |
175 | let mut offset: usize = 0;
176 |
177 | for file in &mut self.files {
178 | let mut cdh: CentralDirectoryHeader = (&*file).into();
179 | cdh.set_offset(offset, false);
180 | cdh.finalize()?;
181 | self.cd.push(cdh);
182 | file.lfh.finalize()?;
183 | offset += file.byte_count().context("Failed to count file bytes")?;
184 | }
185 |
186 | self.set_eocd(false)
187 | }
188 |
189 | pub fn set_offsets(&mut self, base: usize) -> Result<()> {
190 | let mut offset: usize = base;
191 |
192 | for (file, cdh) in self.files.iter_mut().zip(self.cd.iter_mut()) {
193 | cdh.set_offset(offset, false);
194 | cdh.finalize()?;
195 | file.lfh.finalize()?;
196 | offset += file.byte_count().context("Failed to count file bytes")?;
197 | }
198 |
199 | self.set_eocd(false)
200 | }
201 | }
202 |
--------------------------------------------------------------------------------