├── .github
    └── workflows
    │   ├── ci.yml
    │   └── release.yml
├── .gitignore
├── .goreleaser.yaml
├── LICENSE
├── Makefile
├── README.md
├── adapters
    └── cli
    │   ├── build_binary.go
    │   └── driver.go
├── archiver.go
├── archiver_options.go
├── archiver_test.go
├── cli.go
├── cli_test.go
├── cmd
    ├── punzip
    │   ├── main.go
    │   └── punzip_test.go
    └── pzip
    │   ├── main.go
    │   └── pzip_test.go
├── extra.go
├── extractor.go
├── extractor_options.go
├── extractor_test.go
├── go.mod
├── go.sum
├── internal
    └── testutils
    │   └── archiver.go
├── pool
    ├── file.go
    ├── file_test.go
    ├── file_worker_pool.go
    ├── file_worker_pool_test.go
    └── worker_pool.go
├── specifications
    ├── archive.go
    └── extract.go
└── testdata
    ├── hello.md
    ├── hello.txt
    ├── hello
        ├── hello.txt
        └── nested
        │   └── hello.md
    └── test.zip


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   test:
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         os:
17 |           - ubuntu-latest
18 |           - macos-latest
19 |         go:
20 |           - '1.21'
21 |         include:
22 |         - go: '1.21'
23 |           GO_SEMVER: '~1.21.0'
24 | 
25 |     runs-on: ${{ matrix.os }}
26 | 
27 |     steps:
28 |       - name: Checkout code
29 |         uses: actions/checkout@v4
30 | 
31 |       - name: Install Go
32 |         uses: actions/setup-go@v4
33 |         with:
34 |           go-version: ${{ matrix.GO_SEMVER }}
35 |           check-latest: true
36 | 
37 |       - name: Get dependencies
38 |         run: |
39 |           go get -v -t -d ./...
40 | 
41 |       - name: Build pzip
42 |         working-directory: ./cmd/pzip
43 |         env:
44 |           CGO_ENABLED: 0
45 |         run: |
46 |           go build -v
47 | 
48 |       - name: Build punzip
49 |         working-directory: ./cmd/punzip
50 |         env:
51 |           CGO_ENABLED: 0
52 |         run: |
53 |           go build -v
54 | 
55 | 
56 |       - name: Run tests
57 |         run: |
58 |           go test -v -race ./...
59 | 
60 | 
61 |   goreleaser-check:
62 |     runs-on: ubuntu-latest
63 |     steps:
64 |       - name: Checkout code
65 |         uses: actions/checkout@v4
66 | 
67 |       - uses: goreleaser/goreleaser-action@v4
68 |         with:
69 |           version: latest
70 |           args: check
71 |         env:
72 |           TAG: ${{ steps.vars.outputs.version_tag }}
73 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |   release:
10 |     name: Release
11 |     strategy:
12 |       matrix:
13 |         os:
14 |           - ubuntu-latest
15 |         go:
16 |           - '1.21'
17 |         include:
18 |         - go: '1.21'
19 |           GO_SEMVER: '~1.21.0'
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |       - name: Checkout code
24 |         uses: actions/checkout@v3
25 |         with:
26 |           fetch-depth: 0
27 |       - name: Install Go
28 |         uses: actions/setup-go@v4
29 |         with:
30 |           go-version: ${{ matrix.GO_SEMVER }}
31 |           check-latest: true
32 |       - name: Install Cloudsmith CLI
33 |         run: pip install --upgrade cloudsmith-cli
34 |       - name: Run GoReleaser
35 |         uses: goreleaser/goreleaser-action@v4
36 |         with:
37 |           version: latest
38 |           args: release --clean --timeout 30m
39 |         env:
40 |           GITHUB_TOKEN: ${{ secrets.GH_PAT }}
41 |           TAG: ${{ steps.vars.outputs.version_tag }}
42 |       - name: Publish .deb to Cloudsmith
43 |         if: ${{ steps.vars.output.tag_special == '' }}
44 |         env:
45 |           CLOUDSMITH_API_KEY: ${{ secrets.CLOUDSMITH_API_KEY }}
46 |         run: |
47 |           for filename in dist/*.deb; do
48 |             echo "Pushing $filename to 'stable'"
49 |             cloudsmith push deb pzip/stable/any-distro/any-version $filename
50 |           done
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | todos.txt
 2 | 
 3 | # internal benchmarking and profiling artifacts
 4 | benchmarks.txt
 5 | benchmark
 6 | profiles
 7 | 
 8 | # mac specific
 9 | .DS_Store
10 | 
11 | # goreleaser artifacts
12 | dist
13 | pzip-build
14 | pzip-dist
15 | 


--------------------------------------------------------------------------------
/.goreleaser.yaml:
--------------------------------------------------------------------------------
  1 | project_name: pzip
  2 | 
  3 | before:
  4 |   hooks:
  5 |     - go mod tidy
  6 | 
  7 | builds:
  8 |   - id: pzip
  9 |     env:
 10 |     - CGO_ENABLED=0
 11 |     - GO111MODULE=on
 12 |     main: ./cmd/pzip/
 13 |     binary: pzip
 14 |     goos:
 15 |       - linux
 16 |       - darwin
 17 |       - freebsd
 18 |     goarch:
 19 |       - amd64
 20 |       - arm64
 21 |   - id: punzip
 22 |     env:
 23 |     - CGO_ENABLED=0
 24 |     - GO111MODULE=on
 25 |     main: ./cmd/punzip/
 26 |     binary: punzip
 27 |     goos:
 28 |       - linux
 29 |       - darwin
 30 |       - freebsd
 31 |     goarch:
 32 |       - amd64
 33 |       - arm64
 34 | 
 35 | archives:
 36 |   - id: pzip-archive
 37 |     format: tar.gz
 38 |     builds:
 39 |       - pzip
 40 |     name_template: >-
 41 |       pzip_
 42 |       {{- title .Os }}_
 43 |       {{- if eq .Arch "amd64" }}x86_64
 44 |       {{- else if eq .Arch "386" }}i386
 45 |       {{- else }}{{ .Arch }}{{ end }}
 46 |       {{- if .Arm }}v{{ .Arm }}{{ end }}
 47 |     format_overrides:
 48 |     - goos: windows
 49 |       format: zip
 50 |   - id: punzip-archive
 51 |     format: tar.gz
 52 |     builds:
 53 |       - punzip
 54 |     name_template: >-
 55 |       punzip_
 56 |       {{- title .Os }}_
 57 |       {{- if eq .Arch "amd64" }}x86_64
 58 |       {{- else if eq .Arch "386" }}i386
 59 |       {{- else }}{{ .Arch }}{{ end }}
 60 |       {{- if .Arm }}v{{ .Arm }}{{ end }}
 61 |     format_overrides:
 62 |     - goos: windows
 63 |       format: zip
 64 | 
 65 | snapshot:
 66 |   name_template: "{{ incpatch .Version }}-next"
 67 | 
 68 | changelog:
 69 |   sort: asc
 70 |   filters:
 71 |     exclude:
 72 |       - '^docs?:'
 73 |       - '^tests?:'
 74 |       - '^readme:'
 75 | 
 76 | nfpms:
 77 |   - id: pzip-package
 78 |     builds:
 79 |       - pzip
 80 |     package_name: pzip
 81 |     maintainer: Yusuf Birader <ybirader@users.noreply.github.com>
 82 |     homepage: https://github.com/ybirader/pzip
 83 |     description: |
 84 |       pzip, short for parallel-zip, is a blazing fast concurrent zip archiver.
 85 |     license: Apache 2.0
 86 |     formats:
 87 |       - deb
 88 |     bindir: /usr/bin
 89 |   - id: punzip-package
 90 |     builds:
 91 |       - punzip
 92 |     package_name: punzip
 93 |     maintainer: Yusuf Birader <ybirader@users.noreply.github.com>
 94 |     homepage: https://github.com/ybirader/pzip
 95 |     description: |
 96 |       punzip, short for parallel-unzip, is a blazing fast concurrent zip extractor.
 97 |     license: Apache 2.0
 98 |     formats:
 99 |       - deb
100 |     bindir: /usr/bin
101 | 
102 | release:
103 |   github:
104 |     owner: ybirader
105 |     name: pzip
106 |   draft: true
107 |   prerelease: auto
108 |   header: |
109 |     ## Features
110 | 
111 |     List of newly introduced features:
112 | 
113 |     - Item 1
114 |     - Item 2
115 | 
116 |     ## Bug fixes
117 | 
118 |     List of fixed issues:
119 | 
120 |     - Item 1
121 |     - Item 2
122 | 
123 | brews:
124 |   - name: pzip
125 |     description: "pzip, short for parallel-zip, is a blazing fast concurrent zip archiver."
126 |     license: Apache 2.0
127 |     homepage: https://github.com/ybirader/pzip
128 |     ids:
129 |       - pzip-archive
130 |     repository:
131 |       name: homebrew-pzip
132 |       owner: ybirader
133 |   - name: punzip
134 |     description: "punzip, short for parallel-unzip, is a blazing fast concurrent zip extractor."
135 |     license: Apache 2.0
136 |     homepage: https://github.com/ybirader/pzip
137 |     ids:
138 |       - punzip-archive
139 |     repository:
140 |       name: homebrew-pzip
141 |       owner: ybirader
142 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2023 Yusuf Birader
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | test:
2 | 	go test ./...
3 | 
4 | test-short:
5 | 	go test -short ./...
6 | 
7 | build:
8 | 	go build -o ./cmd/pzip ./cmd/pzip && go build -o ./cmd/punzip ./cmd/punzip
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![logo-5](https://github.com/ybirader/pzip/assets/68111562/0b3cee2c-1af0-4753-b088-8a488f8ff642)
  2 | 
  3 | # pzip
  4 | pzip, short for parallel-zip, is a blazing fast concurrent zip archiver and extractor.
  5 | 
  6 | ## Features
  7 | 
  8 | - Archives files and directories into a valid zip archive, using DEFLATE.
  9 | - Preserves modification times of files.
 10 | - Files are read and compressed concurrently
 11 | 
 12 | ## Installation
 13 | 
 14 | ### Command Line
 15 | 
 16 | For command-line usage, we provide two binaries which can be installed separately:
 17 | - **pzip-** concurrent zip archiving
 18 | - **punzip-** concurrent zip extraction
 19 | 
 20 | To install, run:
 21 | 
 22 | ### macOS
 23 | 
 24 | For zip archiving: `brew install ybirader/pzip/pzip`
 25 | 
 26 | For zip extraction: `brew install ybirader/pzip/punzip`
 27 | 
 28 | #### Debian, Ubuntu, Raspbian
 29 | 
 30 | For the latest stable release:
 31 | 
 32 | ```
 33 | curl -1sLf 'https://dl.cloudsmith.io/public/pzip/stable/setup.deb.sh' | sudo -E bash
 34 | sudo apt update
 35 | sudo apt install pzip
 36 | ```
 37 | 
 38 | ```
 39 | curl -1sLf 'https://dl.cloudsmith.io/public/pzip/stable/setup.deb.sh' | sudo -E bash
 40 | sudo apt update
 41 | sudo apt install punzip
 42 | ```
 43 | 
 44 | ### Go
 45 | 
 46 | Alternatively, if you have Go installed:
 47 | ```
 48 | go install github.com/ybirader/pzip
 49 | ```
 50 | 
 51 | ### Build from source
 52 | 
 53 | To build from source, we require Go 1.21 or newer.
 54 | 
 55 | 1. Clone the repository by running `git clone "https://github.com/ybirader/pzip.git"`
 56 | 2. Build both pzip and punzip by running `make build` or build separately via `cd cmd/pzip && go build` and `cd cmd/punzip && go build`
 57 | 
 58 | ## Usage
 59 | 
 60 | ### Archiving
 61 | 
 62 | `pzip`'s API is similar to that of the standard zip utlity found on most *-nix systems.
 63 | 
 64 | ```
 65 | pzip /path/to/compressed.zip path/to/file_or_directory1 path/to/file_or_directory2 ... path/to/file_or_directoryN
 66 | ```
 67 | 
 68 | Alternatively, pzip can be imported as a package
 69 | 
 70 | ```go
 71 | archive, err := os.Create("archive.zip")
 72 | if err != nil {
 73 |   log.Fatal(err)
 74 | }
 75 | 
 76 | archiver, err := pzip.NewArchiver(archive)
 77 | if err != nil {
 78 |   log.Fatal(err)
 79 | }
 80 | defer archiver.Close()
 81 | 
 82 | files := []string{ "./hello", "./hello.txt", "./bye.md" }
 83 | 
 84 | err = archiver.Archive(context.Background(), files)
 85 | if err != nil {
 86 |   log.Fatal(err)
 87 | }
 88 | ```
 89 | 
 90 | The concurrency of the archiver can be configured using the corresponding flag:
 91 | ```
 92 | pzip --concurrency 2 /path/to/compressed.zip path/to/file_or_directory1 path/to/file_or_directory2 ... path/to/file_or_directoryN
 93 | 
 94 | ```
 95 | or by passing the `ArchiverConcurrency` option:
 96 | ```go
 97 | archiver, err := pzip.NewArchiver(archive, ArchiverConcurrency(2))
 98 | ```
 99 | 
100 | ### Extraction
101 | 
102 | `punzip`'s API is similar to that of the standard unzip utlity found on most *-nix systems.
103 | 
104 | ```
105 | punzip /path/to/compressed.zip
106 | ```
107 | 
108 | By default, `punzip` extracts into the current directory. We can extract to a particular path by:
109 | ```
110 | punzip -d /path/to/output /path/to/compressed.zip
111 | ```
112 | 
113 | Using the Go package, we have:
114 | ```go
115 | outputDirPath := "./output"
116 | archivePath := "./archive.zip"
117 | 
118 | extractor, err := pzip.NewExtractor(outputDirPath)
119 | if err != nil {
120 |   log.Fatal(err)
121 | }
122 | defer extractor.Close()
123 | 
124 | err = extractor.Extract(context.Background(), archivePath)
125 | if err != nil {
126 |   log.Fatal(err)
127 | }
128 | ```
129 | 
130 | As with pzip, we can configure the concurrency of the extractor using:
131 | 
132 | ```
133 | punzip --concurrency 2 /path/to/compressed.zip
134 | ```
135 | 
136 | Similarly, with the Go package, we pass in the `ExtractorConcurrency` option:
137 | ```go
138 | extractor, err := pzip.NewExtractor(outputDirPath, ExtractorConcurrency(2))
139 | ```
140 | 
141 | 
142 | ### Benchmarks
143 | 
144 | pzip was benchmarked using Matt Mahoney's [sample directory](https://mattmahoney.net/dc/10gb.html).
145 | 
146 | Using the standard `zip` utlity, we get the following time to archive:
147 | ```
148 | real    14m31.809s
149 | user    13m12.833s
150 | sys     0m24.193s
151 | ```
152 | 
153 | Running the same benchmark with pzip, we find that:
154 | 
155 | ```
156 | real    0m56.851s
157 | user    3m32.619s
158 | sys     1m25.040s
159 | ```
160 | 
161 | ## Contributing
162 | 
163 | To contribute to pzip, first submit or comment in an issue to discuss your contribution, then open a pull request (PR).
164 | 
165 | ## License
166 | 
167 | pzip is released under the [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) license.
168 | 
169 | ## Acknowledgements
170 | 
171 | Many thanks to the folks at [Cloudsmith](https://cloudsmith.com) for graciously providing Debian package hosting. Cloudsmith is the only fully hosted, cloud-native, universal package management solution, that enables your organization to create, store and share packages in any format, to any place, with total confidence.
172 | 
173 | 


--------------------------------------------------------------------------------
/adapters/cli/build_binary.go:
--------------------------------------------------------------------------------
 1 | package cli
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"os/exec"
 6 | 	"path/filepath"
 7 | 	"runtime"
 8 | )
 9 | 
10 | func BuildBinary() (binPath string, cleanup func(), err error) {
11 | 	binName := "pzip-test"
12 | 
13 | 	if runtime.GOOS == "windows" {
14 | 		binName += ".exe"
15 | 	}
16 | 
17 | 	build := exec.Command("go", "build", "-o", binName)
18 | 
19 | 	if err := build.Run(); err != nil {
20 | 		return "", nil, err
21 | 	}
22 | 
23 | 	dir, err := os.Getwd()
24 | 	if err != nil {
25 | 		return "", nil, err
26 | 	}
27 | 
28 | 	binPath = filepath.Join(dir, binName)
29 | 
30 | 	cleanup = func() {
31 | 		os.Remove(binPath)
32 | 	}
33 | 
34 | 	return
35 | }
36 | 


--------------------------------------------------------------------------------
/adapters/cli/driver.go:
--------------------------------------------------------------------------------
 1 | package cli
 2 | 
 3 | import (
 4 | 	"log"
 5 | 	"os/exec"
 6 | )
 7 | 
 8 | type Driver struct {
 9 | 	binPath     string
10 | 	archivePath string
11 | 	dirPath     string
12 | }
13 | 
14 | func NewDriver(binPath, archivePath, dirPath string) *Driver {
15 | 	return &Driver{binPath, archivePath, dirPath}
16 | }
17 | 
18 | func (d *Driver) DirPath() string {
19 | 	return d.dirPath
20 | }
21 | 
22 | func (d *Driver) ArchivePath() string {
23 | 	return d.archivePath
24 | }
25 | 
26 | func (d *Driver) Archive() {
27 | 	pzip := exec.Command(d.binPath, d.ArchivePath(), d.DirPath())
28 | 
29 | 	if err := pzip.Run(); err != nil {
30 | 		log.Fatal("ERROR: could not run pzip binary", err)
31 | 	}
32 | }
33 | 
34 | func (d *Driver) Extract() {
35 | 	punzip := exec.Command(d.binPath, d.ArchivePath())
36 | 
37 | 	if err := punzip.Run(); err != nil {
38 | 		log.Fatal("ERROR: could not run punzip binary", err)
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/archiver.go:
--------------------------------------------------------------------------------
  1 | package pzip
  2 | 
  3 | import (
  4 | 	"archive/zip"
  5 | 	"bufio"
  6 | 	"context"
  7 | 	"fmt"
  8 | 	"hash/crc32"
  9 | 	"io"
 10 | 	"io/fs"
 11 | 	"os"
 12 | 	"path/filepath"
 13 | 	"runtime"
 14 | 	"sync"
 15 | 	"unicode/utf8"
 16 | 
 17 | 	"github.com/ybirader/pzip/pool"
 18 | )
 19 | 
 20 | const (
 21 | 	defaultCompression = -1
 22 | 	zipVersion20       = 20
 23 | 	sequentialWrites   = 1
 24 | )
 25 | 
 26 | const bufferSize = 32 * 1024
 27 | 
 28 | var bufferPool = sync.Pool{
 29 | 	New: func() any {
 30 | 		return bufio.NewReaderSize(nil, bufferSize)
 31 | 	},
 32 | }
 33 | 
 34 | type archiver struct {
 35 | 	xArchive            *os.File
 36 | 	concurrency         int
 37 | 	w                   *zip.Writer
 38 | 	fileProcessPool     pool.WorkerPool[pool.File]
 39 | 	fileWriterPool      pool.WorkerPool[pool.File]
 40 | 	chroot              string
 41 | 	absoluteArchivePath string
 42 | }
 43 | 
 44 | // NewArchiver returns a new pzip archiver. The archiver can be configured by passing in a number of options.
 45 | // Available options include ArchiverConcurrency(n int). It returns an error if the archiver can't be created
 46 | // Close() should be called on the returned archiver when done
 47 | func NewArchiver(archive *os.File, options ...archiverOption) (*archiver, error) {
 48 | 	a := &archiver{
 49 | 		xArchive:    archive,
 50 | 		w:           zip.NewWriter(archive),
 51 | 		concurrency: runtime.GOMAXPROCS(0),
 52 | 	}
 53 | 
 54 | 	var err error
 55 | 	a.absoluteArchivePath, err = filepath.Abs(archive.Name())
 56 | 	if err != nil {
 57 | 		return nil, fmt.Errorf("absolute archive path %q: %w", archive.Name(), err)
 58 | 	}
 59 | 
 60 | 	fileProcessExecutor := func(file *pool.File) error {
 61 | 		err := a.compress(file)
 62 | 		if err != nil {
 63 | 			return fmt.Errorf("compress file %q: %w", file.Path, err)
 64 | 		}
 65 | 
 66 | 		a.fileWriterPool.Enqueue(file)
 67 | 
 68 | 		return nil
 69 | 	}
 70 | 
 71 | 	fileProcessPool, err := pool.NewFileWorkerPool(fileProcessExecutor, &pool.Config{Concurrency: a.concurrency, Capacity: 1})
 72 | 	if err != nil {
 73 | 		return nil, fmt.Errorf("new file process pool: %w", err)
 74 | 	}
 75 | 	a.fileProcessPool = fileProcessPool
 76 | 
 77 | 	fileWriterExecutor := func(file *pool.File) error {
 78 | 		err := a.archive(file)
 79 | 		if err != nil {
 80 | 			return fmt.Errorf("archive %q: %w", file.Path, err)
 81 | 		}
 82 | 
 83 | 		return nil
 84 | 	}
 85 | 
 86 | 	fileWriterPool, err := pool.NewFileWorkerPool(fileWriterExecutor, &pool.Config{Concurrency: sequentialWrites, Capacity: 1})
 87 | 	if err != nil {
 88 | 		return nil, fmt.Errorf("new file writer pool: %w", err)
 89 | 	}
 90 | 	a.fileWriterPool = fileWriterPool
 91 | 
 92 | 	for _, option := range options {
 93 | 		err = option(a)
 94 | 		if err != nil {
 95 | 			return nil, err
 96 | 		}
 97 | 	}
 98 | 
 99 | 	return a, nil
100 | }
101 | 
102 | // Archive compresses and stores (archives) the files at the provides filePaths to
103 | // the corresponding archive registered with the archiver. Archiving is canceled when the
104 | // associated ctx is canceled. The first error that arises during archiving is returned.
105 | func (a *archiver) Archive(ctx context.Context, filePaths []string) error {
106 | 	a.fileProcessPool.Start(ctx)
107 | 	a.fileWriterPool.Start(ctx)
108 | 
109 | 	for _, path := range filePaths {
110 | 		info, err := os.Lstat(path)
111 | 		if err != nil {
112 | 			return fmt.Errorf("lstat %q: %w", path, err)
113 | 		}
114 | 
115 | 		if info.IsDir() {
116 | 			if err = a.archiveDir(path); err != nil {
117 | 				return fmt.Errorf("archive dir %q: %w", path, err)
118 | 			}
119 | 		} else {
120 | 			a.chroot = ""
121 | 			file, err := pool.NewFile(path, info, "")
122 | 			if err != nil {
123 | 				return fmt.Errorf("new file %q: %w", path, err)
124 | 			}
125 | 
126 | 			a.archiveFile(file)
127 | 		}
128 | 	}
129 | 
130 | 	if err := a.fileProcessPool.Close(); err != nil {
131 | 		return fmt.Errorf("close file process pool: %w", err)
132 | 	}
133 | 
134 | 	if err := a.fileWriterPool.Close(); err != nil {
135 | 		return fmt.Errorf("close file writer pool: %w", err)
136 | 	}
137 | 
138 | 	return nil
139 | }
140 | 
141 | func (a *archiver) Close() error {
142 | 	if err := a.w.Close(); err != nil {
143 | 		return fmt.Errorf("close zip writer: %w", err)
144 | 	}
145 | 
146 | 	return nil
147 | }
148 | 
149 | func (a *archiver) archiveDir(root string) error {
150 | 	if err := a.changeRoot(root); err != nil {
151 | 		return fmt.Errorf("change root to %q: %w", root, err)
152 | 	}
153 | 
154 | 	if err := a.walkDir(); err != nil {
155 | 		return fmt.Errorf("walk directory: %w", err)
156 | 	}
157 | 
158 | 	return nil
159 | }
160 | 
161 | // archiveFile enqueues file for archiving if it doesn't match
162 | // our output file.
163 | func (a *archiver) archiveFile(file *pool.File) {
164 | 	if file.Path == a.absoluteArchivePath {
165 | 		// Don't archive the output file.
166 | 		return
167 | 	}
168 | 
169 | 	a.fileProcessPool.Enqueue(file)
170 | }
171 | 
172 | func (a *archiver) changeRoot(root string) error {
173 | 	absRoot, err := filepath.Abs(root)
174 | 	if err != nil {
175 | 		return fmt.Errorf("get absolute path of %q: %w", root, err)
176 | 	}
177 | 
178 | 	a.chroot = absRoot
179 | 	return nil
180 | }
181 | 
182 | func (a *archiver) walkDir() error {
183 | 	if err := filepath.Walk(a.chroot, func(path string, info fs.FileInfo, err error) error {
184 | 		if err != nil {
185 | 			return err
186 | 		}
187 | 
188 | 		file, err := pool.NewFile(path, info, a.chroot)
189 | 		if err != nil {
190 | 			return fmt.Errorf("new file %q: %w", path, err)
191 | 		}
192 | 		a.archiveFile(file)
193 | 
194 | 		return nil
195 | 	}); err != nil {
196 | 		return fmt.Errorf("walk directory %q: %w", a.chroot, err)
197 | 	}
198 | 
199 | 	return nil
200 | }
201 | 
202 | func (a *archiver) compress(file *pool.File) error {
203 | 	if file.Info.IsDir() {
204 | 		if err := a.populateHeader(file); err != nil {
205 | 			return fmt.Errorf("populate header for %q: %w", file.Path, err)
206 | 		}
207 | 		return nil
208 | 	}
209 | 
210 | 	hasher := crc32.NewIEEE()
211 | 
212 | 	if err := a.copy(io.MultiWriter(file.Compressor, hasher), file); err != nil {
213 | 		return fmt.Errorf("copy %q: %w", file.Path, err)
214 | 	}
215 | 
216 | 	if err := file.Compressor.Close(); err != nil {
217 | 		return fmt.Errorf("close compressor for %q: %w", file.Path, err)
218 | 	}
219 | 
220 | 	if err := a.populateHeader(file); err != nil {
221 | 		return fmt.Errorf("populate header for %q: %w", file.Path, err)
222 | 	}
223 | 
224 | 	file.Header.CRC32 = hasher.Sum32()
225 | 	return nil
226 | }
227 | 
228 | func (a *archiver) copy(w io.Writer, file *pool.File) error {
229 | 	f, err := os.Open(file.Path)
230 | 	if err != nil {
231 | 		return fmt.Errorf("open %q: %w", file.Path, err)
232 | 	}
233 | 	defer f.Close()
234 | 
235 | 	buf := bufferPool.Get().(*bufio.Reader)
236 | 	buf.Reset(f)
237 | 
238 | 	_, err = io.Copy(w, buf)
239 | 	bufferPool.Put(buf)
240 | 	if err != nil {
241 | 		return fmt.Errorf("copy %q: %w", file.Path, err)
242 | 	}
243 | 
244 | 	return nil
245 | }
246 | 
247 | func (a *archiver) populateHeader(file *pool.File) error {
248 | 	header := file.Header
249 | 
250 | 	utf8ValidName, utf8RequireName := detectUTF8(header.Name)
251 | 	utf8ValidComment, utf8RequireComment := detectUTF8(header.Comment)
252 | 	switch {
253 | 	case header.NonUTF8:
254 | 		header.Flags &^= 0x800
255 | 	case (utf8RequireName || utf8RequireComment) && (utf8ValidName && utf8ValidComment):
256 | 		header.Flags |= 0x800
257 | 	}
258 | 
259 | 	header.CreatorVersion = header.CreatorVersion&0xff00 | zipVersion20
260 | 	header.ReaderVersion = zipVersion20
261 | 
262 | 	// we store local times in header.Modified- other zip readers expect this
263 | 	// we set extended timestamp (UTC) info as an Extra for compatibility
264 | 	// we only set mod time, not time of last access or time of original creation
265 | 	// https://libzip.org/specifications/extrafld.txt
266 | 
267 | 	if !header.Modified.IsZero() {
268 | 		header.Extra = append(header.Extra, NewExtendedTimestampExtraField(header.Modified).Encode()...)
269 | 	}
270 | 
271 | 	if file.Info.IsDir() {
272 | 		header.Name += "/"
273 | 		header.Method = zip.Store
274 | 		header.Flags &^= 0x8 // won't write data descriptor (crc32, comp, uncomp)
275 | 		header.UncompressedSize64 = 0
276 | 	} else {
277 | 		header.Method = zip.Deflate
278 | 		header.Flags |= 0x8 // will write data descriptor (crc32, comp, uncomp)
279 | 		header.CompressedSize64 = uint64(file.Written())
280 | 	}
281 | 
282 | 	file.Header = header
283 | 
284 | 	return nil
285 | }
286 | 
287 | func (a *archiver) archive(file *pool.File) error {
288 | 	fileWriter, err := a.w.CreateRaw(file.Header)
289 | 	if err != nil {
290 | 		return fmt.Errorf("create raw for %q: %w", file.Path, err)
291 | 	}
292 | 
293 | 	if _, err = io.Copy(fileWriter, file.CompressedData); err != nil {
294 | 		return fmt.Errorf("write compressed data for %q: %w", file.Path, err)
295 | 	}
296 | 
297 | 	if file.Overflowed() {
298 | 		if _, err = file.Overflow.Seek(0, io.SeekStart); err != nil {
299 | 			return fmt.Errorf("seek overflow for %q: %w", file.Path, err)
300 | 		}
301 | 		if _, err = io.Copy(fileWriter, file.Overflow); err != nil {
302 | 			return fmt.Errorf("copy overflow for %q: %w", file.Path, err)
303 | 		}
304 | 
305 | 		file.Overflow.Close()
306 | 		if err = os.Remove(file.Overflow.Name()); err != nil {
307 | 			return fmt.Errorf("remove overflow for %q: %w", file.Overflow.Name(), err)
308 | 		}
309 | 	}
310 | 
311 | 	pool.FilePool.Put(file)
312 | 
313 | 	return nil
314 | }
315 | 
316 | // https://cs.opensource.google/go/go/+/refs/tags/go1.21.0:src/archive/zip/writer.go
317 | func detectUTF8(s string) (valid, require bool) {
318 | 	for i := 0; i < len(s); {
319 | 		r, size := utf8.DecodeRuneInString(s[i:])
320 | 		i += size
321 | 
322 | 		if r < 0x20 || r > 0x7d || r == 0x5c {
323 | 			if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
324 | 				return false, false
325 | 			}
326 | 			require = true
327 | 		}
328 | 	}
329 | 	return true, require
330 | }
331 | 


--------------------------------------------------------------------------------
/archiver_options.go:
--------------------------------------------------------------------------------
 1 | package pzip
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | const minConcurrency = 1
 8 | 
 9 | type archiverOption func(*archiver) error
10 | 
11 | // ArchiverConcurrency sets the number of goroutines used during archiving
12 | // An error is returned if n is less than 1.
13 | func ArchiverConcurrency(n int) archiverOption {
14 | 	return func(a *archiver) error {
15 | 		if n < minConcurrency {
16 | 			return fmt.Errorf("concurrency %d not greater than zero", n)
17 | 		}
18 | 
19 | 		a.concurrency = n
20 | 		return nil
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/archiver_test.go:
--------------------------------------------------------------------------------
  1 | package pzip
  2 | 
  3 | import (
  4 | 	"archive/zip"
  5 | 	"bytes"
  6 | 	"context"
  7 | 	"encoding/binary"
  8 | 	"fmt"
  9 | 	"path/filepath"
 10 | 	"testing"
 11 | 	"time"
 12 | 
 13 | 	"github.com/alecthomas/assert/v2"
 14 | 	"github.com/ybirader/pzip/internal/testutils"
 15 | 	"github.com/ybirader/pzip/pool"
 16 | )
 17 | 
 18 | const (
 19 | 	testdataRoot             = "testdata/"
 20 | 	archivePath              = testdataRoot + "archive.zip"
 21 | 	helloTxtFileFixture      = testdataRoot + "hello.txt"
 22 | 	helloMarkdownFileFixture = testdataRoot + "hello.md"
 23 | 	helloDirectoryFixture    = testdataRoot + "hello/"
 24 | )
 25 | 
 26 | func TestArchive(t *testing.T) {
 27 | 	t.Run("archives a single file with a name", func(t *testing.T) {
 28 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
 29 | 		defer cleanup()
 30 | 
 31 | 		archiver, err := NewArchiver(archive)
 32 | 		assert.NoError(t, err)
 33 | 		err = archiver.Archive(context.Background(), []string{helloTxtFileFixture})
 34 | 		assert.NoError(t, err)
 35 | 		archiver.Close()
 36 | 
 37 | 		archiveReader := testutils.GetArchiveReader(t, archive.Name())
 38 | 		defer archiveReader.Close()
 39 | 
 40 | 		assert.Equal(t, 1, len(archiveReader.File))
 41 | 		testutils.AssertArchiveContainsFile(t, archiveReader.File, "hello.txt")
 42 | 
 43 | 		info := testutils.GetFileInfo(t, helloTxtFileFixture)
 44 | 
 45 | 		got := archiveReader.File[0].UncompressedSize64
 46 | 		want := uint64(info.Size())
 47 | 
 48 | 		assert.Equal(t, want, got, "expected file %s to have raw size %d but got %d", info.Name(), want, got)
 49 | 	})
 50 | 
 51 | 	t.Run("retains the last modified date of an archived file", func(t *testing.T) {
 52 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
 53 | 		defer cleanup()
 54 | 
 55 | 		archiver, err := NewArchiver(archive)
 56 | 		assert.NoError(t, err)
 57 | 		err = archiver.Archive(context.Background(), []string{helloTxtFileFixture})
 58 | 		assert.NoError(t, err)
 59 | 		archiver.Close()
 60 | 
 61 | 		archiveReader := testutils.GetArchiveReader(t, archive.Name())
 62 | 		defer archiveReader.Close()
 63 | 
 64 | 		info := testutils.GetFileInfo(t, helloTxtFileFixture)
 65 | 
 66 | 		archivedFile, found := testutils.Find(archiveReader.File, func(file *zip.File) bool {
 67 | 			return file.Name == "hello.txt"
 68 | 		})
 69 | 		assert.True(t, found)
 70 | 
 71 | 		assertMatchingTimes(t, archivedFile.Modified, info.ModTime())
 72 | 	})
 73 | 
 74 | 	t.Run("archives two files", func(t *testing.T) {
 75 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
 76 | 		defer cleanup()
 77 | 
 78 | 		archiver, err := NewArchiver(archive)
 79 | 		assert.NoError(t, err)
 80 | 		err = archiver.Archive(context.Background(), []string{helloTxtFileFixture, helloMarkdownFileFixture})
 81 | 		assert.NoError(t, err)
 82 | 		archiver.Close()
 83 | 
 84 | 		archiveReader := testutils.GetArchiveReader(t, archive.Name())
 85 | 		defer archiveReader.Close()
 86 | 
 87 | 		assert.Equal(t, 2, len(archiveReader.File))
 88 | 	})
 89 | 
 90 | 	t.Run("archives a directory of files", func(t *testing.T) {
 91 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
 92 | 		defer cleanup()
 93 | 
 94 | 		archiver, err := NewArchiver(archive)
 95 | 		assert.NoError(t, err)
 96 | 		err = archiver.Archive(context.Background(), []string{helloDirectoryFixture})
 97 | 		assert.NoError(t, err)
 98 | 		archiver.Close()
 99 | 
100 | 		archiveReader := testutils.GetArchiveReader(t, archive.Name())
101 | 		defer archiveReader.Close()
102 | 
103 | 		assert.Equal(t, 4, len(archiveReader.File))
104 | 	})
105 | 
106 | 	t.Run("can archive files separately", func(t *testing.T) {
107 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
108 | 		defer cleanup()
109 | 
110 | 		archiver, err := NewArchiver(archive)
111 | 		assert.NoError(t, err)
112 | 		err = archiver.Archive(context.Background(), []string{helloTxtFileFixture})
113 | 		assert.NoError(t, err)
114 | 		err = archiver.Archive(context.Background(), []string{helloMarkdownFileFixture})
115 | 		assert.NoError(t, err)
116 | 		archiver.Close()
117 | 
118 | 		archiveReader := testutils.GetArchiveReader(t, archive.Name())
119 | 		defer archiveReader.Close()
120 | 
121 | 		assert.Equal(t, 2, len(archiveReader.File))
122 | 	})
123 | }
124 | 
125 | func TestCompress(t *testing.T) {
126 | 	t.Run("when file has compressed size less than or equal to buffer size", func(t *testing.T) {
127 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
128 | 		defer cleanup()
129 | 
130 | 		archiver, err := NewArchiver(archive)
131 | 		assert.NoError(t, err)
132 | 
133 | 		info := testutils.GetFileInfo(t, helloTxtFileFixture)
134 | 		file, err := pool.NewFile(helloTxtFileFixture, info, "")
135 | 		assert.NoError(t, err)
136 | 
137 | 		err = archiver.compress(file)
138 | 		assert.NoError(t, err)
139 | 
140 | 		assert.False(t, file.Overflowed())
141 | 		assert.Equal(t, zip.Deflate, file.Header.Method)
142 | 		assertMatchingTimes(t, info.ModTime(), file.Header.Modified)
143 | 		assert.Equal(t, info.Mode(), file.Header.Mode())
144 | 		assert.NotZero(t, file.Header.CRC32)
145 | 		assert.Equal(t, uint64(info.Size()), file.Header.UncompressedSize64)
146 | 		assert.Equal(t, uint64(file.CompressedData.Len()), file.Header.CompressedSize64)
147 | 		assert.Equal(t, int64(file.CompressedData.Len()), file.Written())
148 | 		assertExtendedTimestamp(t, file.Header.Extra)
149 | 	})
150 | 
151 | 	t.Run("writes a maximum of buffer cap bytes and remainder directly to temp file", func(t *testing.T) {
152 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
153 | 		defer cleanup()
154 | 
155 | 		archiver, err := NewArchiver(archive)
156 | 		assert.NoError(t, err)
157 | 
158 | 		info := testutils.GetFileInfo(t, helloTxtFileFixture)
159 | 		file, err := pool.NewFile(helloTxtFileFixture, info, "")
160 | 		assert.NoError(t, err)
161 | 		bufCap := 5
162 | 		file.CompressedData = bytes.NewBuffer(make([]byte, 0, bufCap))
163 | 
164 | 		err = archiver.compress(file)
165 | 		assert.NoError(t, err)
166 | 
167 | 		assert.Equal(t, file.CompressedData.Len(), bufCap)
168 | 		assert.True(t, file.Overflowed())
169 | 		assertGreaterThan(t, file.Written(), int64(file.CompressedData.Len()))
170 | 		assert.Equal(t, file.Written(), int64(file.Header.CompressedSize64))
171 | 	})
172 | 
173 | 	t.Run("for directories", func(t *testing.T) {
174 | 		archive, cleanup := testutils.CreateTempArchive(t, archivePath)
175 | 		defer cleanup()
176 | 
177 | 		archiver, err := NewArchiver(archive)
178 | 		assert.NoError(t, err)
179 | 
180 | 		filePath := filepath.Join(helloDirectoryFixture, "nested")
181 | 		info := testutils.GetFileInfo(t, filePath)
182 | 		file, err := pool.NewFile(filePath, info, helloDirectoryFixture)
183 | 		assert.NoError(t, err)
184 | 
185 | 		err = archiver.compress(file)
186 | 		assert.NoError(t, err)
187 | 
188 | 		assert.Equal(t, "hello/nested/", file.Header.Name)
189 | 		assert.False(t, file.Overflowed())
190 | 		assert.Equal(t, zip.Store, file.Header.Method)
191 | 		assert.Zero(t, file.Header.CRC32)
192 | 		assert.Equal(t, 0, file.Header.UncompressedSize64)
193 | 		assert.Equal(t, 0, file.Header.CompressedSize64)
194 | 		assert.Equal(t, int64(file.CompressedData.Len()), file.Written())
195 | 	})
196 | }
197 | 
198 | func assertExtendedTimestamp(t testing.TB, extraField []byte) {
199 | 	want := make([]byte, 2)
200 | 	binary.LittleEndian.PutUint16(want, extendedTimestampTag)
201 | 	got := extraField[:2]
202 | 	assert.Equal(t, want, got, "expected header to contain extended timestamp")
203 | }
204 | 
205 | func assertMatchingTimes(t testing.TB, t1, t2 time.Time) {
206 | 	t.Helper()
207 | 
208 | 	assert.True(t,
209 | 		t1.Year() == t2.Year() && t1.YearDay() == t2.YearDay() && t1.Second() == t2.Second(),
210 | 		fmt.Sprintf("expected %+v to match %+v but didn't", t1, t2))
211 | }
212 | 
213 | func assertGreaterThan(t testing.TB, a, b int64) {
214 | 	if b >= a {
215 | 		t.Fatalf("expected %d to be greater than %d", a, b)
216 | 	}
217 | }
218 | 


--------------------------------------------------------------------------------
/cli.go:
--------------------------------------------------------------------------------
 1 | package pzip
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | )
 8 | 
 9 | type ArchiverCLI struct {
10 | 	ArchivePath string
11 | 	Files       []string
12 | 	Concurrency int
13 | }
14 | 
15 | func (a *ArchiverCLI) Archive(ctx context.Context) error {
16 | 	archive, err := os.Create(a.ArchivePath)
17 | 	if err != nil {
18 | 		return fmt.Errorf("create archive at %q: %w", a.ArchivePath, err)
19 | 	}
20 | 	defer archive.Close()
21 | 
22 | 	archiver, err := NewArchiver(archive, ArchiverConcurrency(a.Concurrency))
23 | 	if err != nil {
24 | 		return fmt.Errorf("create archiver: %w", err)
25 | 	}
26 | 	defer archiver.Close()
27 | 
28 | 	err = archiver.Archive(ctx, a.Files)
29 | 	if err != nil {
30 | 		return fmt.Errorf("archive files: %w", err)
31 | 	}
32 | 
33 | 	return nil
34 | }
35 | 
36 | type ExtractorCLI struct {
37 | 	ArchivePath string
38 | 	OutputDir   string
39 | 	Concurrency int
40 | }
41 | 
42 | func (e *ExtractorCLI) Extract(ctx context.Context) error {
43 | 	extractor, err := NewExtractor(e.OutputDir, ExtractorConcurrency(e.Concurrency))
44 | 	if err != nil {
45 | 		return fmt.Errorf("new extractor: %w", err)
46 | 	}
47 | 	defer extractor.Close()
48 | 
49 | 	if err = extractor.Extract(ctx, e.ArchivePath); err != nil {
50 | 		return fmt.Errorf("extract %q to %q: %w", e.ArchivePath, e.OutputDir, err)
51 | 
52 | 	}
53 | 
54 | 	return nil
55 | }
56 | 


--------------------------------------------------------------------------------
/cli_test.go:
--------------------------------------------------------------------------------
 1 | package pzip_test
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 	"path/filepath"
 7 | 	"runtime"
 8 | 	"testing"
 9 | 
10 | 	"github.com/alecthomas/assert/v2"
11 | 	"github.com/ybirader/pzip"
12 | 	"github.com/ybirader/pzip/internal/testutils"
13 | )
14 | 
15 | const (
16 | 	benchmarkRoot            = "testdata/benchmark"
17 | 	benchmarkDir             = "minibench"            // modify this to match the file/directory you want to benchmark
18 | 	benchmarkArchive         = "miniextractbench.zip" // modify this to match archive you want to benchmark
19 | 	testArchiveDirectoryName = "hello"
20 | )
21 | 
22 | func TestArchiverCLI(t *testing.T) {
23 | 	t.Run("archives a directory and some files", func(t *testing.T) {
24 | 		files := []string{"testdata/hello", "testdata/hello.txt"}
25 | 		archivePath := "testdata/archive.zip"
26 | 		defer os.RemoveAll(archivePath)
27 | 
28 | 		cli := pzip.ArchiverCLI{archivePath, files, runtime.GOMAXPROCS(0)}
29 | 		err := cli.Archive(context.Background())
30 | 		assert.NoError(t, err)
31 | 
32 | 		archiveReader := testutils.GetArchiveReader(t, archivePath)
33 | 		defer archiveReader.Close()
34 | 
35 | 		assert.Equal(t, 5, len(archiveReader.File))
36 | 	})
37 | }
38 | 
39 | func TestExtractorCLI(t *testing.T) {
40 | 	t.Run("extracts an archive", func(t *testing.T) {
41 | 		archivePath := "testdata/test.zip"
42 | 		outputDirPath := "testdata/test"
43 | 
44 | 		err := os.Mkdir(outputDirPath, 0755)
45 | 		assert.NoError(t, err)
46 | 		extractedDirPath := filepath.Join(outputDirPath, testArchiveDirectoryName)
47 | 		defer os.RemoveAll(outputDirPath)
48 | 
49 | 		cli := pzip.ExtractorCLI{archivePath, outputDirPath, runtime.GOMAXPROCS(0)}
50 | 		err = cli.Extract(context.Background())
51 | 		assert.NoError(t, err)
52 | 
53 | 		assert.Equal(t, 3, len(testutils.GetAllFiles(t, extractedDirPath)))
54 | 	})
55 | }
56 | 
57 | // BenchmarkArchiverCLI benchmarks the archiving of a file/directory, referenced by benchmarkDir in the benchmarkRoot directory
58 | func BenchmarkArchiverCLI(b *testing.B) {
59 | 	outputDirPath := filepath.Join(benchmarkRoot, benchmarkDir)
60 | 	archivePath := filepath.Join(benchmarkRoot, benchmarkDir+".zip")
61 | 
62 | 	cli := pzip.ArchiverCLI{archivePath, []string{outputDirPath}, runtime.GOMAXPROCS(0)}
63 | 
64 | 	b.ReportAllocs()
65 | 	b.ResetTimer()
66 | 
67 | 	for i := 0; i < b.N; i++ {
68 | 		if err := cli.Archive(context.Background()); err != nil {
69 | 			b.Fatal(err)
70 | 		}
71 | 	}
72 | }
73 | 
74 | // BenchmarkExtractorCLI benchmarks extracting an archive, referenced by benchmarkArchive
75 | func BenchmarkExtractorCLI(b *testing.B) {
76 | 	archivePath := filepath.Join(benchmarkRoot, benchmarkArchive)
77 | 
78 | 	cli := pzip.ExtractorCLI{archivePath, benchmarkRoot, runtime.GOMAXPROCS(0)}
79 | 
80 | 	b.ReportAllocs()
81 | 	b.ResetTimer()
82 | 
83 | 	for i := 0; i < b.N; i++ {
84 | 		if err := cli.Extract(context.Background()); err != nil {
85 | 			b.Fatal(err)
86 | 		}
87 | 	}
88 | }
89 | 


--------------------------------------------------------------------------------
/cmd/punzip/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"os"
 9 | 	"os/signal"
10 | 	"runtime"
11 | 
12 | 	"github.com/ybirader/pzip"
13 | )
14 | 
15 | const description = "punzip is a tool for extracting files concurrently."
16 | 
17 | func main() {
18 | 	flag.Usage = func() {
19 | 		fmt.Fprintln(os.Stderr, description)
20 | 		fmt.Fprintln(os.Stderr, "\nUsage:")
21 | 		flag.PrintDefaults()
22 | 	}
23 | 
24 | 	var concurrency int
25 | 	var outputDir string
26 | 	flag.IntVar(&concurrency, "concurrency", runtime.GOMAXPROCS(0), "allow up to n compression routines")
27 | 	flag.StringVar(&outputDir, "d", ".", "extract files into the specified directory")
28 | 
29 | 	flag.Parse()
30 | 
31 | 	args := flag.Args()
32 | 
33 | 	if len(args) < 1 {
34 | 		flag.Usage()
35 | 		return
36 | 	}
37 | 
38 | 	cli := pzip.ExtractorCLI{ArchivePath: args[0], OutputDir: outputDir, Concurrency: concurrency}
39 | 	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
40 | 	go func() {
41 | 		<-ctx.Done()
42 | 		stop()
43 | 	}()
44 | 
45 | 	err := cli.Extract(ctx)
46 | 	if err != nil {
47 | 		log.Fatal(err)
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/cmd/punzip/punzip_test.go:
--------------------------------------------------------------------------------
 1 | package main_test
 2 | 
 3 | import (
 4 | 	"os/exec"
 5 | 	"path/filepath"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/alecthomas/assert/v2"
 9 | 	"github.com/ybirader/pzip/adapters/cli"
10 | 	"github.com/ybirader/pzip/internal/testutils"
11 | 	"github.com/ybirader/pzip/specifications"
12 | )
13 | 
14 | const (
15 | 	testdataRoot = "../../testdata"
16 | 	archivePath  = testdataRoot + "/test.zip"
17 | )
18 | 
19 | func TestPunzip(t *testing.T) {
20 | 	binPath, cleanup, err := cli.BuildBinary()
21 | 	if err != nil {
22 | 		t.Fatal("ERROR: could not build binary", err)
23 | 	}
24 | 	t.Cleanup(cleanup)
25 | 
26 | 	t.Run("outputs usage to stderr when no arguments or flags provided", func(t *testing.T) {
27 | 		pzip := exec.Command(binPath)
28 | 		out := testutils.GetOutput(t, pzip)
29 | 
30 | 		assert.Contains(t, out, "punzip is a tool for extracting files concurrently.\n")
31 | 		assert.Contains(t, out, "Usage")
32 | 	})
33 | 	t.Run("extracts an archive", func(t *testing.T) {
34 | 		if testing.Short() {
35 | 			t.Skip()
36 | 		}
37 | 
38 | 		absArchivePath, err := filepath.Abs(archivePath)
39 | 		assert.NoError(t, err)
40 | 
41 | 		driver := cli.NewDriver(binPath, absArchivePath, "")
42 | 
43 | 		specifications.Extract(t, driver)
44 | 	})
45 | }
46 | 


--------------------------------------------------------------------------------
/cmd/pzip/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"os"
 9 | 	"os/signal"
10 | 	"runtime"
11 | 
12 | 	"github.com/ybirader/pzip"
13 | )
14 | 
15 | const description = "pzip is a tool for archiving files concurrently."
16 | 
17 | func main() {
18 | 	flag.Usage = func() {
19 | 		fmt.Fprintln(os.Stderr, description)
20 | 		fmt.Fprintln(os.Stderr, "\nUsage:")
21 | 		flag.PrintDefaults()
22 | 	}
23 | 
24 | 	var concurrency int
25 | 	flag.IntVar(&concurrency, "concurrency", runtime.GOMAXPROCS(0), "allow up to n compression routines")
26 | 
27 | 	flag.Parse()
28 | 
29 | 	args := flag.Args()
30 | 
31 | 	if len(args) < 1 {
32 | 		flag.Usage()
33 | 		return
34 | 	} else if len(args) < 2 {
35 | 		fmt.Fprintln(os.Stderr, "pzip error: invalid usage")
36 | 		return
37 | 	}
38 | 
39 | 	cli := pzip.ArchiverCLI{ArchivePath: args[0], Files: args[1:], Concurrency: concurrency}
40 | 	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
41 | 	go func() {
42 | 		<-ctx.Done()
43 | 		stop()
44 | 	}()
45 | 
46 | 	err := cli.Archive(ctx)
47 | 	if err != nil {
48 | 		os.RemoveAll(cli.ArchivePath)
49 | 		log.Fatal(err)
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/cmd/pzip/pzip_test.go:
--------------------------------------------------------------------------------
 1 | package main_test
 2 | 
 3 | import (
 4 | 	"os/exec"
 5 | 	"path/filepath"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/alecthomas/assert/v2"
 9 | 	"github.com/ybirader/pzip/adapters/cli"
10 | 	"github.com/ybirader/pzip/internal/testutils"
11 | 	"github.com/ybirader/pzip/specifications"
12 | )
13 | 
14 | const (
15 | 	testdataRoot = "../../testdata"
16 | 	archivePath  = testdataRoot + "/archive.zip"
17 | 	dirPath      = testdataRoot + "/hello"
18 | )
19 | 
20 | func TestPzip(t *testing.T) {
21 | 	binPath, cleanup, err := cli.BuildBinary()
22 | 	if err != nil {
23 | 		t.Fatal("ERROR: could not build binary", err)
24 | 	}
25 | 	t.Cleanup(cleanup)
26 | 
27 | 	t.Run("outputs usage to stderr when no arguments or flags provided", func(t *testing.T) {
28 | 		pzip := exec.Command(binPath)
29 | 		out := testutils.GetOutput(t, pzip)
30 | 
31 | 		assert.Contains(t, out, "pzip is a tool for archiving files concurrently.\n")
32 | 		assert.Contains(t, out, "Usage")
33 | 	})
34 | 
35 | 	t.Run("outputs error when only one argument passed", func(t *testing.T) {
36 | 		pzip := exec.Command(binPath, "archive.zip")
37 | 		out := testutils.GetOutput(t, pzip)
38 | 
39 | 		assert.Contains(t, out, "pzip error: invalid usage\n")
40 | 	})
41 | 
42 | 	t.Run("archives directory", func(t *testing.T) {
43 | 		if testing.Short() {
44 | 			t.Skip()
45 | 		}
46 | 
47 | 		absArchivePath, err := filepath.Abs(archivePath)
48 | 		if err != nil {
49 | 			t.Fatalf("ERROR: could not get path to archive %s", archivePath)
50 | 		}
51 | 
52 | 		absDirPath, err := filepath.Abs(dirPath)
53 | 		if err != nil {
54 | 			t.Fatalf("ERROR: could not get path to directory %s", dirPath)
55 | 		}
56 | 
57 | 		driver := cli.NewDriver(binPath, absArchivePath, absDirPath)
58 | 
59 | 		specifications.Archive(t, driver)
60 | 	})
61 | }
62 | 


--------------------------------------------------------------------------------
/extra.go:
--------------------------------------------------------------------------------
 1 | package pzip
 2 | 
 3 | import (
 4 | 	"encoding/binary"
 5 | 	"time"
 6 | )
 7 | 
 8 | const extendedTimestampTag = 0x5455
 9 | 
10 | // ExtendedTimeStampExtraField is the extended timestamp field, as defined in the zip specification (See 4.5.3 https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT).
11 | type ExtendedTimestampExtraField struct {
12 | 	modified time.Time
13 | }
14 | 
15 | func NewExtendedTimestampExtraField(modified time.Time) *ExtendedTimestampExtraField {
16 | 	return &ExtendedTimestampExtraField{
17 | 		modified,
18 | 	}
19 | }
20 | 
21 | // Encode returns the modified time of the associated ExtendedTimestampExtraField as a slice of bytes.
22 | func (e *ExtendedTimestampExtraField) Encode() []byte {
23 | 	extraBuf := make([]byte, 0, 9) // 2*SizeOf(uint16) + SizeOf(uint) + SizeOf(uint32)
24 | 	extraBuf = binary.LittleEndian.AppendUint16(extraBuf, extendedTimestampTag)
25 | 	extraBuf = binary.LittleEndian.AppendUint16(extraBuf, 5) // block size
26 | 	extraBuf = append(extraBuf, uint8(1))                    // flags
27 | 	extraBuf = binary.LittleEndian.AppendUint32(extraBuf, uint32(e.modified.Unix()))
28 | 	return extraBuf
29 | }
30 | 


--------------------------------------------------------------------------------
/extractor.go:
--------------------------------------------------------------------------------
  1 | package pzip
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"runtime"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/klauspost/compress/zip"
 13 | 	"github.com/ybirader/pzip/pool"
 14 | )
 15 | 
 16 | type extractor struct {
 17 | 	outputDir      string
 18 | 	archiveReader  *zip.ReadCloser
 19 | 	fileWorkerPool pool.WorkerPool[zip.File]
 20 | 	concurrency    int
 21 | }
 22 | 
 23 | // NewExtractor returns a new pzip extractor. The extractor can be configured by passing in a number of options.
 24 | // Available options include ExtractorConcurrency(n int). It returns an error if the extractor can't be created
 25 | // Close() should be called on the returned extractor when done
 26 | func NewExtractor(outputDir string, options ...extractorOption) (*extractor, error) {
 27 | 	absOutputDir, err := filepath.Abs(outputDir)
 28 | 	if err != nil {
 29 | 		return nil, fmt.Errorf("absolute path %q: %w", outputDir, err)
 30 | 	}
 31 | 	e := &extractor{outputDir: absOutputDir, concurrency: runtime.GOMAXPROCS(0)}
 32 | 
 33 | 	fileExecutor := func(file *zip.File) error {
 34 | 		if err := e.extractFile(file); err != nil {
 35 | 			return fmt.Errorf("extract file %q: %w", file.Name, err)
 36 | 		}
 37 | 
 38 | 		return nil
 39 | 	}
 40 | 
 41 | 	fileWorkerPool, err := pool.NewFileWorkerPool(fileExecutor, &pool.Config{Concurrency: e.concurrency, Capacity: 10})
 42 | 	if err != nil {
 43 | 		return nil, fmt.Errorf("new file worker pool: %w", err)
 44 | 	}
 45 | 
 46 | 	e.fileWorkerPool = fileWorkerPool
 47 | 
 48 | 	for _, option := range options {
 49 | 		if err = option(e); err != nil {
 50 | 			return nil, err
 51 | 		}
 52 | 	}
 53 | 
 54 | 	return e, nil
 55 | }
 56 | 
 57 | // Extract extracts the files from the specified archivePath to
 58 | // the corresponding outputDir registered with the extractor. Extraction is canceled when the
 59 | // associated ctx is canceled. The first error that arises during extraction is returned.
 60 | func (e *extractor) Extract(ctx context.Context, archivePath string) (err error) {
 61 | 	e.archiveReader, err = zip.OpenReader(archivePath)
 62 | 	if err != nil {
 63 | 		return fmt.Errorf("open archive %q: %w", archivePath, err)
 64 | 	}
 65 | 
 66 | 	e.fileWorkerPool.Start(ctx)
 67 | 
 68 | 	for _, file := range e.archiveReader.File {
 69 | 		e.fileWorkerPool.Enqueue(file)
 70 | 	}
 71 | 
 72 | 	if err = e.fileWorkerPool.Close(); err != nil {
 73 | 		return fmt.Errorf("close file worker pool: %w", err)
 74 | 	}
 75 | 
 76 | 	return nil
 77 | }
 78 | 
 79 | func (e *extractor) Close() error {
 80 | 	if err := e.archiveReader.Close(); err != nil {
 81 | 		return fmt.Errorf("close archive reader: %w", err)
 82 | 	}
 83 | 
 84 | 	return nil
 85 | }
 86 | 
 87 | func (e *extractor) extractFile(file *zip.File) (err error) {
 88 | 	outputPath := e.outputPath(file.Name)
 89 | 
 90 | 	dir := filepath.Dir(outputPath)
 91 | 	if err = os.MkdirAll(dir, 0755); err != nil {
 92 | 		return fmt.Errorf("create directory %q: %w", dir, err)
 93 | 	}
 94 | 
 95 | 	if e.isDir(file.Name) {
 96 | 		if err = e.writeDir(outputPath, file); err != nil {
 97 | 			return fmt.Errorf("write directory %q: %w", file.Name, err)
 98 | 		}
 99 | 		return nil
100 | 	}
101 | 
102 | 	if err = e.writeFile(outputPath, file); err != nil {
103 | 		return fmt.Errorf("write file %q: %w", file.Name, err)
104 | 	}
105 | 
106 | 	return nil
107 | }
108 | 
109 | func (e *extractor) writeDir(outputPath string, file *zip.File) error {
110 | 	err := os.Mkdir(outputPath, file.Mode())
111 | 	if os.IsExist(err) {
112 | 		if err = os.Chmod(outputPath, file.Mode()); err != nil {
113 | 			return fmt.Errorf("chmod directory %q: %w", outputPath, err)
114 | 		}
115 | 	} else if err != nil {
116 | 		return fmt.Errorf("create directory %q: %w", outputPath, err)
117 | 	}
118 | 
119 | 	return nil
120 | }
121 | 
122 | func (e *extractor) writeFile(outputPath string, file *zip.File) (err error) {
123 | 	outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_WRONLY, file.Mode())
124 | 	if err != nil {
125 | 		return fmt.Errorf("create file %q: %w", outputPath, err)
126 | 	}
127 | 	defer func() {
128 | 		if cerr := outputFile.Close(); cerr != nil && err == nil {
129 | 			err = fmt.Errorf("close output file %q: %w", outputPath, cerr)
130 | 		}
131 | 	}()
132 | 
133 | 	srcFile, err := file.Open()
134 | 	if err != nil {
135 | 		return fmt.Errorf("open file %q: %w", file.Name, err)
136 | 	}
137 | 	defer func() {
138 | 		if cerr := srcFile.Close(); cerr != nil && err == nil {
139 | 			err = fmt.Errorf("close source file %q: %w", file.Name, cerr)
140 | 		}
141 | 	}()
142 | 
143 | 	if _, err = io.Copy(outputFile, srcFile); err != nil {
144 | 		return fmt.Errorf("decompress file %q: %w", file.Name, err)
145 | 	}
146 | 
147 | 	return nil
148 | }
149 | 
150 | func (e *extractor) isDir(name string) bool {
151 | 	return strings.HasSuffix(filepath.ToSlash(name), "/")
152 | }
153 | 
154 | func (e *extractor) outputPath(name string) string {
155 | 	return filepath.Join(e.outputDir, name)
156 | }
157 | 


--------------------------------------------------------------------------------
/extractor_options.go:
--------------------------------------------------------------------------------
 1 | package pzip
 2 | 
 3 | import "fmt"
 4 | 
 5 | type extractorOption func(*extractor) error
 6 | 
 7 | // ExtractorConcurrency sets the number of goroutines used during extraction
 8 | // An error is returned if n is less than 1.
 9 | func ExtractorConcurrency(n int) extractorOption {
10 | 	return func(e *extractor) error {
11 | 		if n < minConcurrency {
12 | 			return fmt.Errorf("concurrency %d not greater than zero", n)
13 | 		}
14 | 
15 | 		e.concurrency = n
16 | 		return nil
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/extractor_test.go:
--------------------------------------------------------------------------------
 1 | package pzip
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"io/fs"
 6 | 	"os"
 7 | 	"path/filepath"
 8 | 	"testing"
 9 | 
10 | 	"github.com/alecthomas/assert/v2"
11 | 	"github.com/ybirader/pzip/internal/testutils"
12 | )
13 | 
14 | const (
15 | 	testArchiveFixture = testdataRoot + "test.zip" // test.zip fixture is an archive of the helloDirectory fixture
16 | 	outputDirPath      = testdataRoot + "test"
17 | )
18 | 
19 | func TestExtract(t *testing.T) {
20 | 	t.Run("writes decompressed archive files to output directory", func(t *testing.T) {
21 | 		err := os.Mkdir(outputDirPath, 0755)
22 | 		assert.NoError(t, err)
23 | 		defer os.RemoveAll(outputDirPath)
24 | 
25 | 		extractor, err := NewExtractor(outputDirPath)
26 | 		assert.NoError(t, err)
27 | 		defer extractor.Close()
28 | 
29 | 		err = extractor.Extract(context.Background(), testArchiveFixture)
30 | 		assert.NoError(t, err)
31 | 
32 | 		files := testutils.GetAllFiles(t, filepath.Join(outputDirPath, "hello"))
33 | 		assert.Equal(t, []string{"hello.txt", "nested", "hello.md"}, testutils.Map(files, func(element fs.FileInfo) string {
34 | 			return element.Name()
35 | 		}))
36 | 
37 | 		helloFileInfo := files[0]
38 | 		assert.NotZero(t, helloFileInfo.Size())
39 | 	})
40 | }
41 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/ybirader/pzip
 2 | 
 3 | go 1.21
 4 | 
 5 | require (
 6 | 	github.com/alecthomas/assert/v2 v2.3.0
 7 | 	github.com/klauspost/compress v1.16.7
 8 | 	golang.org/x/sync v0.3.0
 9 | )
10 | 
11 | require (
12 | 	github.com/alecthomas/repr v0.2.0 // indirect
13 | 	github.com/hexops/gotextdiff v1.0.3 // indirect
14 | )
15 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/alecthomas/assert/v2 v2.3.0 h1:mAsH2wmvjsuvyBvAmCtm7zFsBlb8mIHx5ySLVdDZXL0=
 2 | github.com/alecthomas/assert/v2 v2.3.0/go.mod h1:pXcQ2Asjp247dahGEmsZ6ru0UVwnkhktn7S0bBDLxvQ=
 3 | github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
 4 | github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
 5 | github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 6 | github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 7 | github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
 8 | github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 9 | golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
10 | golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
11 | 


--------------------------------------------------------------------------------
/internal/testutils/archiver.go:
--------------------------------------------------------------------------------
  1 | package testutils
  2 | 
  3 | import (
  4 | 	"archive/zip"
  5 | 	"fmt"
  6 | 	"io/fs"
  7 | 	"os"
  8 | 	"os/exec"
  9 | 	"path/filepath"
 10 | 	"testing"
 11 | 
 12 | 	"github.com/alecthomas/assert/v2"
 13 | )
 14 | 
 15 | func CreateTempArchive(t testing.TB, name string) (*os.File, func()) {
 16 | 	t.Helper()
 17 | 
 18 | 	archive, err := os.Create(name)
 19 | 	assert.NoError(t, err, fmt.Sprintf("could not create archive %s: %v", name, err))
 20 | 
 21 | 	cleanup := func() {
 22 | 		archive.Close()
 23 | 		os.RemoveAll(archive.Name())
 24 | 	}
 25 | 
 26 | 	return archive, cleanup
 27 | }
 28 | 
 29 | func GetFileInfo(t testing.TB, name string) fs.FileInfo {
 30 | 	t.Helper()
 31 | 
 32 | 	info, err := os.Stat(name)
 33 | 	assert.NoError(t, err, fmt.Sprintf("could not get file info for %s", name))
 34 | 
 35 | 	return info
 36 | }
 37 | 
 38 | func GetArchiveReader(t testing.TB, name string) *zip.ReadCloser {
 39 | 	t.Helper()
 40 | 
 41 | 	reader, err := zip.OpenReader(name)
 42 | 	assert.NoError(t, err)
 43 | 
 44 | 	return reader
 45 | }
 46 | 
 47 | func AssertArchiveContainsFile(t testing.TB, files []*zip.File, name string) {
 48 | 	t.Helper()
 49 | 
 50 | 	_, found := Find(files, func(f *zip.File) bool {
 51 | 		return f.Name == name
 52 | 	})
 53 | 
 54 | 	if !found {
 55 | 		t.Errorf("expected file %s to be in archive but wasn't", name)
 56 | 	}
 57 | }
 58 | 
 59 | func Find[T any](elements []T, cb func(element T) bool) (T, bool) {
 60 | 	for _, e := range elements {
 61 | 		if cb(e) {
 62 | 			return e, true
 63 | 		}
 64 | 	}
 65 | 
 66 | 	return *new(T), false
 67 | }
 68 | 
 69 | func GetAllFiles(t testing.TB, dirPath string) []fs.FileInfo {
 70 | 	var result []fs.FileInfo
 71 | 
 72 | 	err := filepath.Walk(dirPath, func(path string, info fs.FileInfo, err error) error {
 73 | 		if err != nil {
 74 | 			return err
 75 | 		}
 76 | 
 77 | 		if dirPath == path {
 78 | 			return nil
 79 | 		}
 80 | 
 81 | 		result = append(result, info)
 82 | 
 83 | 		return nil
 84 | 	})
 85 | 
 86 | 	if err != nil {
 87 | 		t.Fatalf("could not walk directory %s: %v", dirPath, err)
 88 | 	}
 89 | 
 90 | 	return result
 91 | }
 92 | 
 93 | func GetOutput(t testing.TB, cmd *exec.Cmd) string {
 94 | 	out, err := cmd.CombinedOutput()
 95 | 	if err != nil {
 96 | 		t.Fatal("ERROR: could not get output of cmd", string(out), err)
 97 | 	}
 98 | 
 99 | 	return string(out)
100 | }
101 | 
102 | func Map[T, K any](elements []T, cb func(element T) K) []K {
103 | 	results := make([]K, len(elements))
104 | 
105 | 	for i, element := range elements {
106 | 		results[i] = cb(element)
107 | 	}
108 | 
109 | 	return results
110 | }
111 | 


--------------------------------------------------------------------------------
/pool/file.go:
--------------------------------------------------------------------------------
  1 | package pool
  2 | 
  3 | import (
  4 | 	"archive/zip"
  5 | 	"bytes"
  6 | 	"fmt"
  7 | 	"io/fs"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"sync"
 11 | 
 12 | 	"github.com/klauspost/compress/flate"
 13 | )
 14 | 
 15 | const DefaultBufferSize = 2 * 1024 * 1024
 16 | 
 17 | var FilePool = sync.Pool{
 18 | 	New: func() any {
 19 | 		return &File{CompressedData: bytes.NewBuffer(make([]byte, DefaultBufferSize))}
 20 | 	},
 21 | }
 22 | 
 23 | // A File refers to a file-backed buffer
 24 | type File struct {
 25 | 	Info           fs.FileInfo
 26 | 	Header         *zip.FileHeader
 27 | 	CompressedData *bytes.Buffer
 28 | 	Overflow       *os.File
 29 | 	Compressor     *flate.Writer
 30 | 	Path           string
 31 | 	written        int64
 32 | }
 33 | 
 34 | func NewFile(path string, info fs.FileInfo, relativeTo string) (*File, error) {
 35 | 	f := FilePool.Get().(*File)
 36 | 	err := f.Reset(path, info, relativeTo)
 37 | 	return f, err
 38 | }
 39 | 
 40 | // Reset resets the file-backed buffer ready to be used by another file.
 41 | func (f *File) Reset(path string, info fs.FileInfo, relativeTo string) error {
 42 | 	hdr, err := zip.FileInfoHeader(info)
 43 | 	if err != nil {
 44 | 		return fmt.Errorf("file info header for %q: %w", path, err)
 45 | 	}
 46 | 	f.Path = path
 47 | 	f.Info = info
 48 | 	f.Header = hdr
 49 | 	f.CompressedData.Reset()
 50 | 	f.Overflow = nil
 51 | 	f.written = 0
 52 | 
 53 | 	if f.Compressor == nil {
 54 | 		f.Compressor, err = flate.NewWriter(f, flate.DefaultCompression)
 55 | 		if err != nil {
 56 | 			return fmt.Errorf("new compressor: %w", err)
 57 | 		}
 58 | 	} else {
 59 | 		f.Compressor.Reset(f)
 60 | 	}
 61 | 
 62 | 	if relativeTo != "" {
 63 | 		if err := f.setNameRelativeTo(relativeTo); err != nil {
 64 | 			return fmt.Errorf("set name relative to %q: %w", relativeTo, err)
 65 | 		}
 66 | 	}
 67 | 
 68 | 	return nil
 69 | }
 70 | 
 71 | func (f *File) Write(p []byte) (n int, err error) {
 72 | 	if f.CompressedData.Available() != 0 {
 73 | 		maxWriteable := min(f.CompressedData.Available(), len(p))
 74 | 		f.written += int64(maxWriteable)
 75 | 		f.CompressedData.Write(p[:maxWriteable])
 76 | 		p = p[maxWriteable:]
 77 | 	}
 78 | 
 79 | 	if len(p) > 0 {
 80 | 		if f.Overflow == nil {
 81 | 			if f.Overflow, err = os.CreateTemp("", "pzip-overflow"); err != nil {
 82 | 				return len(p), fmt.Errorf("create temporary file: %w", err)
 83 | 			}
 84 | 		}
 85 | 
 86 | 		if _, err := f.Overflow.Write(p); err != nil {
 87 | 			return len(p), fmt.Errorf("write temporary file for %q: %w", f.Header.Name, err)
 88 | 		}
 89 | 		f.written += int64(len(p))
 90 | 	}
 91 | 
 92 | 	return len(p), nil
 93 | }
 94 | 
 95 | // Written returns the number of bytes of the file compressed and written to a destination
 96 | func (f *File) Written() int64 {
 97 | 	return f.written
 98 | }
 99 | 
100 | // Overflowed returns true if the compressed contents of the file was too large to fit in the in-memory buffer.
101 | // The overflowed contents are written to a temporary file.
102 | func (f *File) Overflowed() bool {
103 | 	return f.Overflow != nil
104 | }
105 | 
106 | func (f *File) setNameRelativeTo(root string) error {
107 | 	relativeToRoot, err := filepath.Rel(root, f.Path)
108 | 	if err != nil {
109 | 		return fmt.Errorf("relative path of %q to root %q: %w", f.Path, root, err)
110 | 	}
111 | 	f.Header.Name = filepath.ToSlash(filepath.Join(filepath.Base(root), relativeToRoot))
112 | 	return nil
113 | }
114 | 


--------------------------------------------------------------------------------
/pool/file_test.go:
--------------------------------------------------------------------------------
 1 | package pool_test
 2 | 
 3 | import (
 4 | 	"path/filepath"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/alecthomas/assert/v2"
 8 | 	"github.com/ybirader/pzip/internal/testutils"
 9 | 	"github.com/ybirader/pzip/pool"
10 | )
11 | 
12 | const (
13 | 	testdataRoot             = "../testdata/"
14 | 	archivePath              = testdataRoot + "archive.zip"
15 | 	helloTxtFileFixture      = testdataRoot + "hello.txt"
16 | 	helloMarkdownFileFixture = testdataRoot + "hello.md"
17 | 	helloDirectoryFixture    = testdataRoot + "hello/"
18 | )
19 | 
20 | func TestNewFile(t *testing.T) {
21 | 	t.Run("with file name relative to archive root when file path is relative", func(t *testing.T) {
22 | 		info := testutils.GetFileInfo(t, helloTxtFileFixture)
23 | 		file, err := pool.NewFile(helloTxtFileFixture, info, "")
24 | 		assert.NoError(t, err)
25 | 
26 | 		assert.Equal(t, "hello.txt", file.Header.Name)
27 | 	})
28 | 
29 | 	t.Run("with file name relative to archive root when file path is absolute", func(t *testing.T) {
30 | 		absFilePath, err := filepath.Abs(helloTxtFileFixture)
31 | 		assert.NoError(t, err)
32 | 		info := testutils.GetFileInfo(t, absFilePath)
33 | 		file, err := pool.NewFile(absFilePath, info, "")
34 | 		assert.NoError(t, err)
35 | 
36 | 		assert.Equal(t, "hello.txt", file.Header.Name)
37 | 	})
38 | 
39 | 	t.Run("with file name relative to archive root for directories", func(t *testing.T) {
40 | 		filePath := filepath.Join(helloDirectoryFixture, "nested/hello.md")
41 | 		info := testutils.GetFileInfo(t, filePath)
42 | 
43 | 		file, err := pool.NewFile(filePath, info, helloDirectoryFixture)
44 | 		assert.NoError(t, err)
45 | 
46 | 		assert.Equal(t, "hello/nested/hello.md", file.Header.Name)
47 | 	})
48 | 
49 | 	t.Run("resets file as new", func(t *testing.T) {
50 | 		filePath := filepath.Join(helloDirectoryFixture, "nested/hello.md")
51 | 		info := testutils.GetFileInfo(t, filePath)
52 | 
53 | 		file, err := pool.NewFile(filePath, info, helloDirectoryFixture)
54 | 		assert.NoError(t, err)
55 | 
56 | 		newInfo := testutils.GetFileInfo(t, helloTxtFileFixture)
57 | 		err = file.Reset(helloTxtFileFixture, newInfo, "")
58 | 		assert.NoError(t, err)
59 | 
60 | 		assert.Equal(t, helloTxtFileFixture, file.Path)
61 | 		assert.Equal(t, newInfo, file.Info)
62 | 		assert.Equal(t, "hello.txt", file.Header.Name)
63 | 		assert.Equal(t, 0, file.CompressedData.Len())
64 | 		assert.Equal(t, pool.DefaultBufferSize, file.CompressedData.Cap())
65 | 	})
66 | }
67 | 


--------------------------------------------------------------------------------
/pool/file_worker_pool.go:
--------------------------------------------------------------------------------
  1 | package pool
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 
  7 | 	"golang.org/x/sync/errgroup"
  8 | )
  9 | 
 10 | const (
 11 | 	minConcurrency = 1
 12 | )
 13 | 
 14 | type Config struct {
 15 | 	Concurrency int
 16 | 	Capacity    int
 17 | }
 18 | 
 19 | // A FileWorkerPool is a worker pool in which files are enqueued and for each file, the executor function is called.
 20 | // The number of files that can be enqueued for processing at any time is defined by the capacity. The number of
 21 | // workers processing files is set by configuring concurrency.
 22 | type FileWorkerPool[T any] struct {
 23 | 	tasks       chan *T
 24 | 	executor    func(f *T) error
 25 | 	g           *errgroup.Group
 26 | 	ctxCancel   func(error)
 27 | 	concurrency int
 28 | 	capacity    int
 29 | }
 30 | 
 31 | func NewFileWorkerPool[T any](executor func(f *T) error, config *Config) (*FileWorkerPool[T], error) {
 32 | 	if config.Concurrency < minConcurrency {
 33 | 		return nil, fmt.Errorf("concurrency %d not greater than zero", config.Concurrency)
 34 | 	}
 35 | 
 36 | 	return &FileWorkerPool[T]{
 37 | 		tasks:       make(chan *T, config.Capacity),
 38 | 		executor:    executor,
 39 | 		g:           new(errgroup.Group),
 40 | 		concurrency: config.Concurrency,
 41 | 		capacity:    config.Capacity,
 42 | 	}, nil
 43 | }
 44 | 
 45 | // Start creates n goroutine workers, where n can be configured by setting
 46 | // the concurrency option of the FileWorkerPool. The workers listen and execute tasks
 47 | // as they are enqueued. The workers are shut down when an error occurs or the associated
 48 | // ctx is canceled.
 49 | func (f *FileWorkerPool[T]) Start(ctx context.Context) {
 50 | 	f.reset()
 51 | 
 52 | 	ctx, cancel := context.WithCancelCause(ctx)
 53 | 	f.ctxCancel = cancel
 54 | 
 55 | 	for i := 0; i < f.concurrency; i++ {
 56 | 		f.g.Go(func() error {
 57 | 			if err := f.listen(ctx); err != nil {
 58 | 				f.ctxCancel(err)
 59 | 				return err
 60 | 			}
 61 | 
 62 | 			return nil
 63 | 		})
 64 | 	}
 65 | }
 66 | 
 67 | // Enqueue enqueues a file for processing
 68 | func (f *FileWorkerPool[T]) Enqueue(file *T) {
 69 | 	f.tasks <- file
 70 | }
 71 | 
 72 | // PendingFiles returns the number of tasks that are waiting to be processed
 73 | func (f FileWorkerPool[T]) PendingFiles() int {
 74 | 	return len(f.tasks)
 75 | }
 76 | 
 77 | // Close gracefully shuts down the FileWorkerPool, ensuring all enqueued tasks have been processed.
 78 | // Files cannot be enqueued after Close has been called; attempting this will cause a panic.
 79 | // Close returns the first error that was encountered during file processing.
 80 | func (f *FileWorkerPool[T]) Close() error {
 81 | 	close(f.tasks)
 82 | 	err := f.g.Wait()
 83 | 	f.ctxCancel(err)
 84 | 	return err
 85 | }
 86 | 
 87 | func (f *FileWorkerPool[T]) listen(ctx context.Context) error {
 88 | 	for file := range f.tasks {
 89 | 		if err := f.executor(file); err != nil {
 90 | 			return fmt.Errorf("process file: %w", err)
 91 | 		} else if err := ctx.Err(); err != nil {
 92 | 			return err
 93 | 		}
 94 | 	}
 95 | 
 96 | 	return nil
 97 | }
 98 | 
 99 | func (f *FileWorkerPool[T]) reset() {
100 | 	f.tasks = make(chan *T, f.capacity)
101 | }
102 | 


--------------------------------------------------------------------------------
/pool/file_worker_pool_test.go:
--------------------------------------------------------------------------------
 1 | package pool_test
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"errors"
 7 | 	"testing"
 8 | 
 9 | 	"github.com/alecthomas/assert/v2"
10 | 	"github.com/ybirader/pzip/pool"
11 | )
12 | 
13 | func TestFileWorkerPool(t *testing.T) {
14 | 	t.Run("can enqueue tasks", func(t *testing.T) {
15 | 		fileProcessPool, err := pool.NewFileWorkerPool(func(f *pool.File) error { return nil }, &pool.Config{Concurrency: 1, Capacity: 1})
16 | 		assert.NoError(t, err)
17 | 		fileProcessPool.Start(context.Background())
18 | 
19 | 		fileProcessPool.Enqueue(&pool.File{})
20 | 
21 | 		assert.Equal(t, 1, fileProcessPool.PendingFiles())
22 | 	})
23 | 
24 | 	t.Run("has workers process files to completion", func(t *testing.T) {
25 | 		output := bytes.Buffer{}
26 | 		executor := func(_ *pool.File) error {
27 | 			output.WriteString("hello, world!")
28 | 			return nil
29 | 		}
30 | 
31 | 		fileProcessPool, err := pool.NewFileWorkerPool(executor, &pool.Config{Concurrency: 1, Capacity: 1})
32 | 		assert.NoError(t, err)
33 | 		fileProcessPool.Start(context.Background())
34 | 
35 | 		fileProcessPool.Enqueue(&pool.File{})
36 | 
37 | 		err = fileProcessPool.Close()
38 | 
39 | 		assert.NoError(t, err)
40 | 		assert.Equal(t, 0, fileProcessPool.PendingFiles())
41 | 		assert.Equal(t, "hello, world!", output.String())
42 | 	})
43 | 
44 | 	t.Run("returns an error if number of workers is less than one", func(t *testing.T) {
45 | 		executor := func(_ *pool.File) error { return nil }
46 | 
47 | 		_, err := pool.NewFileWorkerPool(executor, &pool.Config{Concurrency: 0, Capacity: 1})
48 | 		assert.Error(t, err)
49 | 	})
50 | 
51 | 	t.Run("can be closed and restarted", func(t *testing.T) {
52 | 		output := bytes.Buffer{}
53 | 		executor := func(_ *pool.File) error {
54 | 			output.WriteString("hello ")
55 | 			return nil
56 | 		}
57 | 
58 | 		fileProcessPool, err := pool.NewFileWorkerPool(executor, &pool.Config{Concurrency: 1, Capacity: 1})
59 | 		assert.NoError(t, err)
60 | 
61 | 		fileProcessPool.Start(context.Background())
62 | 		fileProcessPool.Enqueue(&pool.File{})
63 | 		err = fileProcessPool.Close()
64 | 		assert.NoError(t, err)
65 | 
66 | 		fileProcessPool.Start(context.Background())
67 | 		fileProcessPool.Enqueue(&pool.File{})
68 | 		err = fileProcessPool.Close()
69 | 
70 | 		assert.NoError(t, err)
71 | 		assert.Equal(t, "hello hello ", output.String())
72 | 	})
73 | 
74 | 	t.Run("stops workers with first error encountered by a goroutine", func(t *testing.T) {
75 | 		executor := func(file *pool.File) error {
76 | 			if file.Path == "1" {
77 | 				return errors.New("file is corrupt")
78 | 			}
79 | 
80 | 			return nil
81 | 		}
82 | 
83 | 		fileProcessPool, err := pool.NewFileWorkerPool(executor, &pool.Config{Concurrency: 2, Capacity: 1})
84 | 		assert.NoError(t, err)
85 | 
86 | 		fileProcessPool.Start(context.Background())
87 | 
88 | 		fileProcessPool.Enqueue(&pool.File{})
89 | 		fileProcessPool.Enqueue(&pool.File{})
90 | 		fileProcessPool.Enqueue(&pool.File{Path: "1"})
91 | 
92 | 		err = fileProcessPool.Close()
93 | 
94 | 		assert.Error(t, err)
95 | 	})
96 | }
97 | 


--------------------------------------------------------------------------------
/pool/worker_pool.go:
--------------------------------------------------------------------------------
 1 | package pool
 2 | 
 3 | import "context"
 4 | 
 5 | type WorkerPool[T any] interface {
 6 | 	Start(ctx context.Context)
 7 | 	Close() error
 8 | 	Enqueue(v *T)
 9 | }
10 | 


--------------------------------------------------------------------------------
/specifications/archive.go:
--------------------------------------------------------------------------------
 1 | package specifications
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"os/exec"
 7 | 	"path/filepath"
 8 | 	"testing"
 9 | 
10 | 	"github.com/alecthomas/assert/v2"
11 | )
12 | 
13 | type Archiver interface {
14 | 	ArchivePath() string
15 | 	DirPath() string
16 | 	Archive()
17 | }
18 | 
19 | func Archive(t *testing.T, driver Archiver) {
20 | 	driver.Archive()
21 | 	defer os.RemoveAll(driver.ArchivePath())
22 | 
23 | 	assertValidArchive(t, driver.ArchivePath(), driver.DirPath())
24 | }
25 | 
26 | func assertValidArchive(t testing.TB, archivePath, dirPath string) {
27 | 	t.Helper()
28 | 
29 | 	tmpDirPath, err := os.MkdirTemp("", "unzipped-archive")
30 | 	if err != nil {
31 | 		t.Fatal("ERROR: could not create temp directory", err)
32 | 	}
33 | 	defer os.RemoveAll(tmpDirPath)
34 | 
35 | 	unzip := exec.Command("unzip", archivePath, "-d", tmpDirPath)
36 | 	unzipOutput, err := unzip.CombinedOutput()
37 | 	if err != nil {
38 | 		t.Fatalf("ERROR: could not unzip archive %s: %s: %v", archivePath, unzipOutput, err)
39 | 	}
40 | 
41 | 	diff := exec.Command("diff", "--recursive", "--brief", dirPath, filepath.Join(tmpDirPath, filepath.Base(dirPath)))
42 | 	diffOutput, err := diff.Output()
43 | 	if err != nil {
44 | 		t.Fatal("ERROR: could not get stdout of diff", err)
45 | 	}
46 | 
47 | 	assert.Zero(t, len(diffOutput), fmt.Sprintf("expected no output from diff but got %s", diffOutput))
48 | }
49 | 


--------------------------------------------------------------------------------
/specifications/extract.go:
--------------------------------------------------------------------------------
 1 | package specifications
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | 	"testing"
 7 | )
 8 | 
 9 | const testArchiveDirectoryName = "hello"
10 | 
11 | type Extractor interface {
12 | 	DirPath() string
13 | 	ArchivePath() string
14 | 	Extract()
15 | }
16 | 
17 | func Extract(t *testing.T, driver Extractor) {
18 | 	driver.Extract()
19 | 	dirPath := filepath.Join(driver.DirPath(), testArchiveDirectoryName)
20 | 	defer os.RemoveAll(dirPath)
21 | 
22 | 	assertValidArchive(t, driver.ArchivePath(), dirPath)
23 | }
24 | 


--------------------------------------------------------------------------------
/testdata/hello.md:
--------------------------------------------------------------------------------
1 | This is a second file that needs archiving
2 | 


--------------------------------------------------------------------------------
/testdata/hello.txt:
--------------------------------------------------------------------------------
1 | hello, world!
2 | 


--------------------------------------------------------------------------------
/testdata/hello/hello.txt:
--------------------------------------------------------------------------------
1 | This is a file at the top-level of the test directory
2 | 


--------------------------------------------------------------------------------
/testdata/hello/nested/hello.md:
--------------------------------------------------------------------------------
1 | This is a nested file within the test directory tree.
2 | 


--------------------------------------------------------------------------------
/testdata/test.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ybirader/pzip/9eb13490a5a50cc90eb522783ce372a55bcd5196/testdata/test.zip


--------------------------------------------------------------------------------