├── archiver_windows.go ├── go.mod ├── extractor_windows.go ├── util.go ├── .github └── workflows │ └── go.yml ├── archiver_unix.go ├── extractor_unix.go ├── extractor_options.go ├── LICENSE ├── go.sum ├── archiver_options.go ├── internal └── filepool │ ├── filepool.go │ └── filepool_test.go ├── register.go ├── README.md ├── extractor.go ├── archiver.go ├── extractor_test.go └── archiver_test.go /archiver_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | // +build windows 3 | 4 | package fastzip 5 | 6 | import ( 7 | "io" 8 | "os" 9 | 10 | "github.com/klauspost/compress/zip" 11 | ) 12 | 13 | func (a *Archiver) createHeader(fi os.FileInfo, hdr *zip.FileHeader) (io.Writer, error) { 14 | return a.zw.CreateHeader(hdr) 15 | } 16 | 17 | func (a *Archiver) createRaw(fi os.FileInfo, hdr *zip.FileHeader) (io.Writer, error) { 18 | return a.zw.CreateRaw(hdr) 19 | } 20 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/saracen/fastzip 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/klauspost/compress v1.18.0 7 | github.com/saracen/zipextra v0.0.0-20250129175152-f1aa42d25216 8 | github.com/stretchr/testify v1.10.0 9 | golang.org/x/sync v0.11.0 10 | golang.org/x/sys v0.30.0 11 | ) 12 | 13 | require ( 14 | github.com/davecgh/go-spew v1.1.1 // indirect 15 | github.com/pmezard/go-difflib v1.0.0 // indirect 16 | gopkg.in/yaml.v3 v3.0.1 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /extractor_windows.go: -------------------------------------------------------------------------------- 1 | // +build windows 2 | 3 | package fastzip 4 | 5 | import ( 6 | "os" 7 | "time" 8 | ) 9 | 10 | func lchmod(name string, mode os.FileMode) error { 11 | if mode&os.ModeSymlink != 0 { 12 | return nil 13 | } 14 | 15 | return os.Chmod(name, mode) 16 | } 17 | 18 | func lchtimes(name string, mode os.FileMode, atime, mtime time.Time) error { 19 | if mode&os.ModeSymlink != 0 { 20 | return nil 21 | } 22 | 23 | return os.Chtimes(name, atime, mtime) 24 | } 25 | 26 | func lchown(name string, uid, gid int) error { 27 | return nil 28 | } 29 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "sync/atomic" 7 | ) 8 | 9 | func dclose(c io.Closer, err *error) { 10 | if cerr := c.Close(); cerr != nil && *err == nil { 11 | *err = cerr 12 | } 13 | } 14 | 15 | func incOnSuccess(inc *int64, err error) { 16 | if err == nil { 17 | atomic.AddInt64(inc, 1) 18 | } 19 | } 20 | 21 | type countWriter struct { 22 | w io.Writer 23 | written *int64 24 | ctx context.Context 25 | } 26 | 27 | func (w countWriter) Write(p []byte) (n int, err error) { 28 | if err = w.ctx.Err(); err == nil { 29 | n, err = w.w.Write(p) 30 | 31 | atomic.AddInt64(w.written, int64(n)) 32 | } 33 | return n, err 34 | } 35 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | strategy: 12 | matrix: 13 | # earliest supported go version and latest 2 versions we support 14 | go: [1.22.x, 1.24.x, 1.25.x] 15 | os: [ubuntu-latest, macos-latest, windows-latest] 16 | runs-on: ${{ matrix.os }} 17 | steps: 18 | - uses: actions/checkout@v5 19 | 20 | - name: Setup go 21 | uses: actions/setup-go@v5 22 | with: 23 | go-version: ${{ matrix.go }} 24 | 25 | - name: Run tests 26 | run: go test --cpu 1,4 -race -coverprofile coverage.txt -covermode atomic ./... 27 | 28 | - uses: codecov/codecov-action@v5 29 | -------------------------------------------------------------------------------- /archiver_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | // +build !windows 3 | 4 | package fastzip 5 | 6 | import ( 7 | "io" 8 | "math/big" 9 | "os" 10 | "syscall" 11 | 12 | "github.com/klauspost/compress/zip" 13 | "github.com/saracen/zipextra" 14 | ) 15 | 16 | func (a *Archiver) createHeader(fi os.FileInfo, hdr *zip.FileHeader) (io.Writer, error) { 17 | stat, ok := fi.Sys().(*syscall.Stat_t) 18 | if ok { 19 | hdr.Extra = append(hdr.Extra, zipextra.NewInfoZIPNewUnix(big.NewInt(int64(stat.Uid)), big.NewInt(int64(stat.Gid))).Encode()...) 20 | } 21 | 22 | return a.zw.CreateHeader(hdr) 23 | } 24 | 25 | func (a *Archiver) createRaw(fi os.FileInfo, hdr *zip.FileHeader) (io.Writer, error) { 26 | stat, ok := fi.Sys().(*syscall.Stat_t) 27 | if ok { 28 | hdr.Extra = append(hdr.Extra, zipextra.NewInfoZIPNewUnix(big.NewInt(int64(stat.Uid)), big.NewInt(int64(stat.Gid))).Encode()...) 29 | } 30 | 31 | return a.zw.CreateRaw(hdr) 32 | } 33 | -------------------------------------------------------------------------------- /extractor_unix.go: -------------------------------------------------------------------------------- 1 | // +build !windows 2 | 3 | package fastzip 4 | 5 | import ( 6 | "os" 7 | "runtime" 8 | "time" 9 | 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | func lchmod(name string, mode os.FileMode) error { 14 | var flags int 15 | if runtime.GOOS == "linux" { 16 | if mode&os.ModeSymlink != 0 { 17 | return nil 18 | } 19 | } else { 20 | flags = unix.AT_SYMLINK_NOFOLLOW 21 | } 22 | 23 | err := unix.Fchmodat(unix.AT_FDCWD, name, uint32(mode), flags) 24 | if err != nil { 25 | return &os.PathError{Op: "lchmod", Path: name, Err: err} 26 | } 27 | 28 | return nil 29 | } 30 | 31 | func lchtimes(name string, mode os.FileMode, atime, mtime time.Time) error { 32 | at := unix.NsecToTimeval(atime.UnixNano()) 33 | mt := unix.NsecToTimeval(mtime.UnixNano()) 34 | tv := [2]unix.Timeval{at, mt} 35 | 36 | err := unix.Lutimes(name, tv[:]) 37 | if err != nil { 38 | return &os.PathError{Op: "lchtimes", Path: name, Err: err} 39 | } 40 | 41 | return nil 42 | } 43 | 44 | func lchown(name string, uid, gid int) error { 45 | return os.Lchown(name, uid, gid) 46 | } 47 | -------------------------------------------------------------------------------- /extractor_options.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | // ExtractorOption is an option used when creating an extractor. 4 | type ExtractorOption func(*extractorOptions) error 5 | 6 | type extractorOptions struct { 7 | concurrency int 8 | chownErrorHandler func(name string, err error) error 9 | } 10 | 11 | // WithExtractorConcurrency will set the maximum number of files being 12 | // extracted concurrently. The default is set to GOMAXPROCS. 13 | func WithExtractorConcurrency(n int) ExtractorOption { 14 | return func(o *extractorOptions) error { 15 | if n <= 0 { 16 | return ErrMinConcurrency 17 | } 18 | o.concurrency = n 19 | return nil 20 | } 21 | } 22 | 23 | // WithExtractorChownErrorHandler sets an error handler to be called if errors are 24 | // encountered when trying to preserve ownership of extracted files. Returning 25 | // nil will continue extraction, returning any error will cause Extract() to 26 | // error. 27 | func WithExtractorChownErrorHandler(fn func(name string, err error) error) ExtractorOption { 28 | return func(o *extractorOptions) error { 29 | o.chownErrorHandler = fn 30 | return nil 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Arran Walker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 4 | github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/saracen/zipextra v0.0.0-20250129175152-f1aa42d25216 h1:8zyjtFyKi5NJySVOJRiHmSN1vl6qugQ5n9C4X7WyY3U= 8 | github.com/saracen/zipextra v0.0.0-20250129175152-f1aa42d25216/go.mod h1:hnzuad9d2wdd3z8fC6UouHQK5qZxqv3F/E6MMzXc7q0= 9 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 10 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 11 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 12 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 13 | golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= 14 | golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 17 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 18 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 19 | -------------------------------------------------------------------------------- /archiver_options.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | ErrMinConcurrency = errors.New("concurrency must be at least 1") 9 | ) 10 | 11 | // ArchiverOption is an option used when creating an archiver. 12 | type ArchiverOption func(*archiverOptions) error 13 | 14 | type archiverOptions struct { 15 | method uint16 16 | concurrency int 17 | bufferSize int 18 | stageDir string 19 | offset int64 20 | } 21 | 22 | // WithArchiverMethod sets the zip method to be used for compressible files. 23 | func WithArchiverMethod(method uint16) ArchiverOption { 24 | return func(o *archiverOptions) error { 25 | o.method = method 26 | return nil 27 | } 28 | } 29 | 30 | // WithArchiverConcurrency will set the maximum number of files to be 31 | // compressed concurrently. The default is set to GOMAXPROCS. 32 | func WithArchiverConcurrency(n int) ArchiverOption { 33 | return func(o *archiverOptions) error { 34 | if n <= 0 { 35 | return ErrMinConcurrency 36 | } 37 | o.concurrency = n 38 | return nil 39 | } 40 | } 41 | 42 | // WithArchiverBufferSize sets the buffer size for each file to be compressed 43 | // concurrently. If a compressed file's data exceeds the buffer size, a 44 | // temporary file is written (to the stage directory) to hold the additional 45 | // data. The default is 2 mebibytes, so if concurrency is 16, 32 mebibytes of 46 | // memory will be allocated. 47 | func WithArchiverBufferSize(n int) ArchiverOption { 48 | return func(o *archiverOptions) error { 49 | if n < 0 { 50 | n = 0 51 | } 52 | o.bufferSize = n 53 | return nil 54 | } 55 | } 56 | 57 | // WithStageDirectory sets the directory to be used to stage compressed files 58 | // before they're written to the archive. The default is the directory to be 59 | // archived. 60 | func WithStageDirectory(dir string) ArchiverOption { 61 | return func(o *archiverOptions) error { 62 | o.stageDir = dir 63 | return nil 64 | } 65 | } 66 | 67 | // WithArchiverOffset sets the offset of the beginning of the zip data. This 68 | // should be used when zip data is appended to an existing file. 69 | func WithArchiverOffset(n int64) ArchiverOption { 70 | return func(o *archiverOptions) error { 71 | o.offset = n 72 | return nil 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /internal/filepool/filepool.go: -------------------------------------------------------------------------------- 1 | package filepool 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "hash" 7 | "hash/crc32" 8 | "io" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | ) 13 | 14 | var ErrPoolSizeLessThanZero = errors.New("pool size must be greater than zero") 15 | 16 | const defaultBufferSize = 2 * 1024 * 1024 17 | 18 | type filePoolCloseError []error 19 | 20 | func (e filePoolCloseError) Len() int { 21 | return len(e) 22 | } 23 | 24 | func (e filePoolCloseError) Error() string { 25 | if len(e) == 1 { 26 | return e[0].Error() 27 | } 28 | 29 | var sb strings.Builder 30 | for _, err := range e { 31 | sb.WriteString(err.Error() + "\n") 32 | } 33 | 34 | return sb.String() 35 | } 36 | 37 | func (e filePoolCloseError) Unwrap() error { 38 | if len(e) > 1 { 39 | return e[1:] 40 | } 41 | return nil 42 | } 43 | 44 | // FilePool represents a pool of files that can be used as buffers. 45 | type FilePool struct { 46 | files []*File 47 | limiter chan int 48 | } 49 | 50 | // New returns a new FilePool. 51 | func New(dir string, poolSize int, bufferSize int) (*FilePool, error) { 52 | if poolSize <= 0 { 53 | return nil, ErrPoolSizeLessThanZero 54 | } 55 | fp := &FilePool{} 56 | 57 | fp.files = make([]*File, poolSize) 58 | fp.limiter = make(chan int, poolSize) 59 | 60 | if bufferSize < 0 { 61 | bufferSize = defaultBufferSize 62 | } 63 | 64 | for i := range fp.files { 65 | fp.files[i] = newFile(dir, i, bufferSize) 66 | fp.limiter <- i 67 | } 68 | 69 | return fp, nil 70 | } 71 | 72 | // Get gets a file from the pool. 73 | func (fp *FilePool) Get() *File { 74 | idx := <-fp.limiter 75 | return fp.files[idx] 76 | } 77 | 78 | // Put puts a file back into the pool. 79 | func (fp *FilePool) Put(f *File) { 80 | f.reset() 81 | fp.limiter <- f.idx 82 | } 83 | 84 | // Close closes and removes all files in the pool. 85 | func (fp *FilePool) Close() error { 86 | var err filePoolCloseError 87 | for _, f := range fp.files { 88 | if f == nil || f.f == nil { 89 | continue 90 | } 91 | 92 | if cerr := f.f.Close(); cerr != nil { 93 | err = append(err, cerr) 94 | } 95 | if rerr := os.Remove(f.f.Name()); rerr != nil && !os.IsNotExist(rerr) { 96 | err = append(err, rerr) 97 | } 98 | } 99 | 100 | fp.files = nil 101 | if err.Len() > 0 { 102 | return err 103 | } 104 | return nil 105 | } 106 | 107 | // File is a file backed buffer. 108 | type File struct { 109 | dir string 110 | idx int 111 | w int64 112 | r int64 113 | crc hash.Hash32 114 | 115 | f *os.File 116 | buf []byte 117 | size int 118 | } 119 | 120 | func newFile(dir string, idx, size int) *File { 121 | return &File{ 122 | dir: dir, 123 | idx: idx, 124 | size: size, 125 | crc: crc32.NewIEEE(), 126 | } 127 | } 128 | 129 | func (f *File) Write(p []byte) (n int, err error) { 130 | if f.buf == nil && f.size > 0 { 131 | f.buf = make([]byte, f.size) 132 | } 133 | 134 | if f.w < int64(len(f.buf)) { 135 | n = copy(f.buf[f.w:], p) 136 | p = p[n:] 137 | f.w += int64(n) 138 | } 139 | 140 | if len(p) > 0 { 141 | if f.f == nil { 142 | f.f, err = os.Create(filepath.Join(f.dir, fmt.Sprintf("fastzip_%02d", f.idx))) 143 | if err != nil { 144 | return n, err 145 | } 146 | } 147 | 148 | bn := n 149 | n, err = f.f.WriteAt(p, f.w-int64(len(f.buf))) 150 | f.w += int64(n) 151 | n += bn 152 | if err != nil { 153 | return n, err 154 | } 155 | } 156 | 157 | return n, err 158 | } 159 | 160 | func (f *File) Read(p []byte) (n int, err error) { 161 | remaining := f.w - f.r 162 | if remaining <= 0 { 163 | return 0, io.EOF 164 | } 165 | if int64(len(p)) > remaining { 166 | p = p[:remaining] 167 | } 168 | 169 | if f.r < int64(len(f.buf)) { 170 | n = copy(p, f.buf[f.r:]) 171 | f.r += int64(n) 172 | p = p[n:] 173 | } 174 | 175 | if len(p) > 0 && f.r >= int64(len(f.buf)) { 176 | bn := n 177 | n, err = f.f.ReadAt(p, f.r-int64(len(f.buf))) 178 | f.r += int64(n) 179 | n += bn 180 | } 181 | 182 | return n, err 183 | } 184 | 185 | func (f *File) Written() uint64 { 186 | return uint64(f.w) 187 | } 188 | 189 | func (f *File) Hasher() io.Writer { 190 | return f.crc 191 | } 192 | 193 | func (f *File) Checksum() uint32 { 194 | return f.crc.Sum32() 195 | } 196 | 197 | func (f *File) reset() { 198 | f.w = 0 199 | f.r = 0 200 | f.crc.Reset() 201 | if f.f != nil { 202 | f.f.Truncate(0) 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /register.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "sync" 7 | 8 | stdflate "compress/flate" 9 | 10 | "github.com/klauspost/compress/flate" 11 | "github.com/klauspost/compress/zstd" 12 | ) 13 | 14 | type flater interface { 15 | Close() error 16 | Flush() error 17 | Reset(dst io.Writer) 18 | Write(data []byte) (n int, err error) 19 | } 20 | 21 | func newFlateReaderPool(newReaderFn func(w io.Reader) io.ReadCloser) *sync.Pool { 22 | pool := &sync.Pool{} 23 | pool.New = func() interface{} { 24 | return &flateReader{pool, bufio.NewReaderSize(nil, 32*1024), newReaderFn(nil)} 25 | } 26 | return pool 27 | } 28 | 29 | type flateReader struct { 30 | pool *sync.Pool 31 | buf *bufio.Reader 32 | io.ReadCloser 33 | } 34 | 35 | func (fr *flateReader) Reset(r io.Reader) { 36 | fr.buf.Reset(r) 37 | fr.ReadCloser.(flate.Resetter).Reset(fr.buf, nil) 38 | } 39 | 40 | func (fr *flateReader) Close() error { 41 | err := fr.ReadCloser.Close() 42 | fr.pool.Put(fr) 43 | return err 44 | } 45 | 46 | // FlateDecompressor returns a pooled performant zip.Decompressor. 47 | func FlateDecompressor() func(r io.Reader) io.ReadCloser { 48 | pool := newFlateReaderPool(flate.NewReader) 49 | 50 | return func(r io.Reader) io.ReadCloser { 51 | fr := pool.Get().(*flateReader) 52 | fr.Reset(r) 53 | return fr 54 | } 55 | } 56 | 57 | // StdFlateDecompressor returns a pooled standard library zip.Decompressor. 58 | func StdFlateDecompressor() func(r io.Reader) io.ReadCloser { 59 | pool := newFlateReaderPool(stdflate.NewReader) 60 | 61 | return func(r io.Reader) io.ReadCloser { 62 | fr := pool.Get().(*flateReader) 63 | fr.Reset(r) 64 | return fr 65 | } 66 | } 67 | 68 | type zstdReader struct { 69 | pool *sync.Pool 70 | buf *bufio.Reader 71 | *zstd.Decoder 72 | } 73 | 74 | func (zr *zstdReader) Close() error { 75 | err := zr.Decoder.Reset(nil) 76 | zr.pool.Put(zr) 77 | return err 78 | } 79 | 80 | // ZstdDecompressor returns a pooled zstd decoder. 81 | func ZstdDecompressor() func(r io.Reader) io.ReadCloser { 82 | pool := &sync.Pool{} 83 | pool.New = func() interface{} { 84 | r, _ := zstd.NewReader(nil, zstd.WithDecoderLowmem(true), zstd.WithDecoderMaxWindow(128<<20), zstd.WithDecoderConcurrency(1)) 85 | return &zstdReader{pool, bufio.NewReaderSize(nil, 32*1024), r} 86 | } 87 | 88 | return func(r io.Reader) io.ReadCloser { 89 | fr := pool.Get().(*zstdReader) 90 | fr.Decoder.Reset(r) 91 | return fr 92 | } 93 | } 94 | 95 | func newFlateWriterPool(level int, newWriterFn func(w io.Writer, level int) (flater, error)) *sync.Pool { 96 | pool := &sync.Pool{} 97 | pool.New = func() interface{} { 98 | fw, err := newWriterFn(nil, level) 99 | if err != nil { 100 | panic(err) 101 | } 102 | 103 | return &flateWriter{pool, fw} 104 | } 105 | return pool 106 | } 107 | 108 | type flateWriter struct { 109 | pool *sync.Pool 110 | flater 111 | } 112 | 113 | func (fw *flateWriter) Reset(w io.Writer) { 114 | fw.flater.Reset(w) 115 | } 116 | 117 | func (fw *flateWriter) Close() error { 118 | err := fw.flater.Close() 119 | fw.pool.Put(fw) 120 | return err 121 | } 122 | 123 | // FlateCompressor returns a pooled performant zip.Compressor configured to a 124 | // specified compression level. Invalid flate levels will panic. 125 | func FlateCompressor(level int) func(w io.Writer) (io.WriteCloser, error) { 126 | pool := newFlateWriterPool(level, func(w io.Writer, level int) (flater, error) { 127 | return flate.NewWriter(w, level) 128 | }) 129 | 130 | return func(w io.Writer) (io.WriteCloser, error) { 131 | fw := pool.Get().(*flateWriter) 132 | fw.Reset(w) 133 | return fw, nil 134 | } 135 | } 136 | 137 | // StdFlateCompressor returns a pooled standard library zip.Compressor 138 | // configured to a specified compression level. Invalid flate levels will 139 | // panic. 140 | func StdFlateCompressor(level int) func(w io.Writer) (io.WriteCloser, error) { 141 | pool := newFlateWriterPool(level, func(w io.Writer, level int) (flater, error) { 142 | return stdflate.NewWriter(w, level) 143 | }) 144 | 145 | return func(w io.Writer) (io.WriteCloser, error) { 146 | fw := pool.Get().(*flateWriter) 147 | fw.Reset(w) 148 | return fw, nil 149 | } 150 | } 151 | 152 | func ZstdCompressor(level int) func(w io.Writer) (io.WriteCloser, error) { 153 | pool := newFlateWriterPool(level, func(w io.Writer, level int) (flater, error) { 154 | return zstd.NewWriter(w, zstd.WithEncoderCRC(false), zstd.WithEncoderLevel(zstd.EncoderLevel(level))) 155 | }) 156 | 157 | return func(w io.Writer) (io.WriteCloser, error) { 158 | fw := pool.Get().(*flateWriter) 159 | fw.Reset(w) 160 | return fw, nil 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /internal/filepool/filepool_test.go: -------------------------------------------------------------------------------- 1 | package filepool 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "os" 9 | "path/filepath" 10 | "runtime" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/assert" 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func TestFilePoolSizes(t *testing.T) { 18 | tests := []struct { 19 | size int 20 | err error 21 | }{ 22 | {-1, ErrPoolSizeLessThanZero}, 23 | {0, ErrPoolSizeLessThanZero}, 24 | {4, nil}, 25 | {8, nil}, 26 | } 27 | 28 | for _, tc := range tests { 29 | t.Run(fmt.Sprintf("size %d", tc.size), func(t *testing.T) { 30 | dir := t.TempDir() 31 | 32 | fp, err := New(dir, tc.size, 0) 33 | require.Equal(t, tc.err, err) 34 | if tc.err != nil { 35 | return 36 | } 37 | 38 | // writing should produce the temporary file 39 | for i := 0; i < tc.size; i++ { 40 | f := fp.Get() 41 | _, err = f.Write([]byte("foobar")) 42 | assert.NoError(t, err) 43 | fp.Put(f) 44 | 45 | _, err = os.Lstat(filepath.Join(dir, fmt.Sprintf("fastzip_%02d", i))) 46 | assert.NoError(t, err, fmt.Sprintf("fastzip_%02d should exist", i)) 47 | } 48 | 49 | // closing should cleanup temporary files 50 | assert.NoError(t, fp.Close()) 51 | for i := 0; i < tc.size; i++ { 52 | _, err = os.Lstat(filepath.Join(dir, fmt.Sprintf("fastzip_%02d", i))) 53 | assert.Error(t, err, fmt.Sprintf("fastzip_%02d shouldn't exist", i)) 54 | } 55 | }) 56 | } 57 | } 58 | 59 | func TestFilePoolReset(t *testing.T) { 60 | dir := t.TempDir() 61 | 62 | fp, err := New(dir, 16, 0) 63 | require.NoError(t, err) 64 | for i := range fp.files { 65 | file := fp.Get() 66 | _, err = file.Write(bytes.Repeat([]byte("0"), i)) 67 | assert.NoError(t, err) 68 | 69 | b, err := io.ReadAll(file) 70 | assert.NoError(t, err) 71 | assert.Len(t, b, i) 72 | assert.Equal(t, uint64(i), file.Written()) 73 | 74 | _, err = file.Hasher().Write([]byte("hello")) 75 | assert.NoError(t, err) 76 | assert.Equal(t, uint32(0x3610a686), file.Checksum()) 77 | 78 | fp.Put(file) 79 | } 80 | 81 | for range fp.files { 82 | file := fp.Get() 83 | 84 | b, err := io.ReadAll(file) 85 | assert.NoError(t, err) 86 | assert.Len(t, b, 0) 87 | assert.Equal(t, uint64(0), file.Written()) 88 | assert.Equal(t, uint32(0), file.Checksum()) 89 | 90 | fp.Put(file) 91 | } 92 | 93 | assert.NoError(t, fp.Close()) 94 | } 95 | 96 | func TestFilePoolCloseError(t *testing.T) { 97 | dir := t.TempDir() 98 | 99 | fp, err := New(dir, 16, 0) 100 | require.NoError(t, err) 101 | 102 | for _, file := range fp.files { 103 | f := fp.Get() 104 | _, err := f.Write([]byte("foobar")) 105 | assert.NoError(t, err) 106 | fp.Put(f) 107 | 108 | require.NoError(t, file.f.Close()) 109 | } 110 | 111 | err = fp.Close() 112 | require.Error(t, err, "expected already closed error") 113 | assert.Contains(t, err.Error(), "file already closed\n") 114 | count := 0 115 | for { 116 | count++ 117 | if err = errors.Unwrap(err); err == nil { 118 | break 119 | } 120 | } 121 | assert.Equal(t, 16, count) 122 | } 123 | 124 | func TestFilePoolNoErrorOnAlreadyDeleted(t *testing.T) { 125 | if runtime.GOOS == "windows" { 126 | t.Skip("Skipping test on windows (cannot delete in-use file)") 127 | } 128 | 129 | dir := t.TempDir() 130 | fp, err := New(dir, 16, 0) 131 | require.NoError(t, err) 132 | 133 | for range fp.files { 134 | f := fp.Get() 135 | _, err := f.Write([]byte("foobar")) 136 | assert.NoError(t, err) 137 | fp.Put(f) 138 | } 139 | 140 | err = os.RemoveAll(dir) 141 | require.NoError(t, err) 142 | 143 | assert.NoError(t, fp.Close()) 144 | } 145 | 146 | func TestFilePoolFileBuffer(t *testing.T) { 147 | dir := t.TempDir() 148 | 149 | tests := map[string]struct { 150 | data []byte 151 | fileExists bool 152 | }{ 153 | "below buffer length": { 154 | data: []byte("123456789"), 155 | fileExists: false, 156 | }, 157 | "equal to buffer length": { 158 | data: []byte("1234567890"), 159 | fileExists: false, 160 | }, 161 | "above buffer length": { 162 | data: []byte("1234567890x"), 163 | fileExists: true, 164 | }, 165 | } 166 | 167 | for tn, tc := range tests { 168 | t.Run(tn, func(t *testing.T) { 169 | fp, err := New(dir, 1, 10) 170 | require.NoError(t, err) 171 | defer fp.Close() 172 | require.Len(t, fp.files, 1) 173 | 174 | f := fp.files[0] 175 | n, err := f.Write(tc.data) 176 | assert.NoError(t, err) 177 | assert.Equal(t, len(tc.data), n) 178 | 179 | _, err = os.Lstat(filepath.Join(dir, "fastzip_00")) 180 | if tc.fileExists { 181 | assert.NoError(t, err, "fastzip_00 should exist") 182 | } else { 183 | assert.Error(t, err, "fastzip_00 should not exist") 184 | } 185 | 186 | // split reads to ensure read/write indexes track correctly 187 | buf := make([]byte, 20) 188 | size := 0 189 | { 190 | n, err := f.Read(buf[:5]) 191 | assert.NoError(t, err) 192 | size += n 193 | } 194 | { 195 | n, err := f.Read(buf[5:]) 196 | assert.NoError(t, err) 197 | size += n 198 | } 199 | 200 | assert.Equal(t, tc.data, buf[:size]) 201 | }) 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fastzip 2 | 3 | [![godoc](https://godoc.org/github.com/saracen/fastzip?status.svg)](http://godoc.org/github.com/saracen/fastzip) 4 | [![Build Status](https://travis-ci.org/saracen/fastzip.svg?branch=master)](https://travis-ci.org/saracen/fastzip) 5 | 6 | Fastzip is an opinionated Zip archiver and extractor with a focus on speed. 7 | 8 | - Archiving and extraction of files and directories can only occur within 9 | a specified directory. 10 | - Permissions, ownership (uid, gid on linux/unix) and modification times are 11 | preserved. 12 | - Buffers used for copying files are recycled to reduce allocations. 13 | - Files are archived and extracted concurrently. 14 | - By default, the excellent 15 | [`github.com/klauspost/compress/flate`](https://github.com/klauspost/compress) 16 | library is used for compression and decompression. 17 | 18 | ## Example 19 | ### Archiver 20 | ```go 21 | // Create archive file 22 | w, err := os.Create("archive.zip") 23 | if err != nil { 24 | panic(err) 25 | } 26 | defer w.Close() 27 | 28 | // Create new Archiver 29 | a, err := fastzip.NewArchiver(w, "~/fastzip-archiving") 30 | if err != nil { 31 | panic(err) 32 | } 33 | defer a.Close() 34 | 35 | // Register a non-default level compressor if required 36 | // a.RegisterCompressor(zip.Deflate, fastzip.FlateCompressor(1)) 37 | 38 | // Walk directory, adding the files we want to add 39 | files := make(map[string]os.FileInfo) 40 | if err = filepath.Walk("~/fastzip-archiving", func(pathname string, info os.FileInfo, err error) error { 41 | files[pathname] = info 42 | return nil 43 | }); err != nil { 44 | panic(err) 45 | } 46 | 47 | // Archive 48 | if err = a.Archive(context.Background(), files); err != nil { 49 | panic(err) 50 | } 51 | ``` 52 | 53 | ### Extractor 54 | ```go 55 | // Create new extractor 56 | e, err := fastzip.NewExtractor("archive.zip", "~/fastzip-extraction") 57 | if err != nil { 58 | panic(err) 59 | } 60 | defer e.Close() 61 | 62 | // Extract archive files 63 | if err = e.Extract(context.Background()); err != nil { 64 | panic(err) 65 | } 66 | ``` 67 | 68 | ## Benchmarks 69 | 70 | Archiving and extracting a Go 1.13 GOROOT directory, 342M, 10308 files. 71 | 72 | StandardFlate is using `compress/flate`, NonStandardFlate is 73 | `klauspost/compress/flate`, both on level 5. This was performed on a server with an SSD and 24-cores. Each test was conducted 74 | using the `WithArchiverConcurrency` and `WithExtractorConcurrency` options of 1, 2, 4, 8 and 16. 75 | 76 | ``` 77 | $ go test -bench Benchmark* -archivedir go1.13 -benchtime=30s -timeout=20m 78 | 79 | goos: linux 80 | goarch: amd64 81 | pkg: github.com/saracen/fastzip 82 | BenchmarkArchiveStore_1-24 39 788604969 ns/op 421.66 MB/s 9395405 B/op 266271 allocs/op 83 | BenchmarkArchiveStandardFlate_1-24 2 16154127468 ns/op 20.58 MB/s 12075824 B/op 257251 allocs/op 84 | BenchmarkArchiveStandardFlate_2-24 4 8686391074 ns/op 38.28 MB/s 15898644 B/op 260757 allocs/op 85 | BenchmarkArchiveStandardFlate_4-24 7 4391603068 ns/op 75.72 MB/s 19295604 B/op 260871 allocs/op 86 | BenchmarkArchiveStandardFlate_8-24 14 2291624196 ns/op 145.10 MB/s 21999205 B/op 260970 allocs/op 87 | BenchmarkArchiveStandardFlate_16-24 16 2105056696 ns/op 157.96 MB/s 29237232 B/op 261225 allocs/op 88 | BenchmarkArchiveNonStandardFlate_1-24 6 6011250439 ns/op 55.32 MB/s 11070960 B/op 257204 allocs/op 89 | BenchmarkArchiveNonStandardFlate_2-24 9 3629347294 ns/op 91.62 MB/s 18870130 B/op 262279 allocs/op 90 | BenchmarkArchiveNonStandardFlate_4-24 18 1766182097 ns/op 188.27 MB/s 22976928 B/op 262349 allocs/op 91 | BenchmarkArchiveNonStandardFlate_8-24 34 1002516188 ns/op 331.69 MB/s 29860872 B/op 262473 allocs/op 92 | BenchmarkArchiveNonStandardFlate_16-24 46 757112363 ns/op 439.20 MB/s 42036132 B/op 262714 allocs/op 93 | BenchmarkExtractStore_1-24 20 1625582744 ns/op 202.66 MB/s 22900375 B/op 330528 allocs/op 94 | BenchmarkExtractStore_2-24 42 786644031 ns/op 418.80 MB/s 22307976 B/op 329272 allocs/op 95 | BenchmarkExtractStore_4-24 92 384075767 ns/op 857.76 MB/s 22247288 B/op 328667 allocs/op 96 | BenchmarkExtractStore_8-24 165 215884636 ns/op 1526.02 MB/s 22354996 B/op 328459 allocs/op 97 | BenchmarkExtractStore_16-24 226 157087517 ns/op 2097.20 MB/s 22258691 B/op 328393 allocs/op 98 | BenchmarkExtractStandardFlate_1-24 6 5501808448 ns/op 23.47 MB/s 86148462 B/op 495586 allocs/op 99 | BenchmarkExtractStandardFlate_2-24 13 2748387174 ns/op 46.99 MB/s 84232141 B/op 491343 allocs/op 100 | BenchmarkExtractStandardFlate_4-24 21 1511063035 ns/op 85.47 MB/s 84998750 B/op 490124 allocs/op 101 | BenchmarkExtractStandardFlate_8-24 32 995911009 ns/op 129.67 MB/s 86188957 B/op 489574 allocs/op 102 | BenchmarkExtractStandardFlate_16-24 46 652641882 ns/op 197.88 MB/s 88256113 B/op 489575 allocs/op 103 | BenchmarkExtractNonStandardFlate_1-24 7 4989810851 ns/op 25.88 MB/s 64552948 B/op 373541 allocs/op 104 | BenchmarkExtractNonStandardFlate_2-24 13 2478287953 ns/op 52.11 MB/s 63413947 B/op 373183 allocs/op 105 | BenchmarkExtractNonStandardFlate_4-24 26 1333552250 ns/op 96.84 MB/s 63546389 B/op 373925 allocs/op 106 | BenchmarkExtractNonStandardFlate_8-24 37 817039739 ns/op 158.06 MB/s 64354655 B/op 375357 allocs/op 107 | BenchmarkExtractNonStandardFlate_16-24 63 566984549 ns/op 227.77 MB/s 65444227 B/op 379664 allocs/op 108 | ``` 109 | -------------------------------------------------------------------------------- /extractor.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "io" 8 | "os" 9 | "path/filepath" 10 | "runtime" 11 | "strings" 12 | "sync" 13 | "sync/atomic" 14 | "time" 15 | 16 | "github.com/klauspost/compress/zip" 17 | "github.com/klauspost/compress/zstd" 18 | "github.com/saracen/zipextra" 19 | "golang.org/x/sync/errgroup" 20 | ) 21 | 22 | var bufioWriterPool = sync.Pool{ 23 | New: func() interface{} { 24 | return bufio.NewWriterSize(nil, 32*1024) 25 | }, 26 | } 27 | 28 | var ( 29 | defaultDecompressor = FlateDecompressor() 30 | defaultZstdDecompressor = ZstdDecompressor() 31 | ) 32 | 33 | // Extractor is an opinionated Zip file extractor. 34 | // 35 | // Files are extracted in parallel. Only regular files, symlinks and directories 36 | // are supported. Files can only be extracted to the specified chroot directory. 37 | // 38 | // Access permissions, ownership (unix) and modification times are preserved. 39 | type Extractor struct { 40 | // This 2 fields are accessed via atomic operations 41 | // They are at the start of the struct so they are properly 8 byte aligned 42 | written, entries int64 43 | 44 | zr *zip.Reader 45 | closer io.Closer 46 | m sync.Mutex 47 | options extractorOptions 48 | chroot string 49 | } 50 | 51 | // NewExtractor opens a zip file and returns a new extractor. 52 | // 53 | // Close() should be called to close the extractor's underlying zip.Reader 54 | // when done. 55 | func NewExtractor(filename, chroot string, opts ...ExtractorOption) (*Extractor, error) { 56 | zr, err := zip.OpenReader(filename) 57 | if err != nil { 58 | return nil, err 59 | } 60 | 61 | return newExtractor(&zr.Reader, zr, chroot, opts) 62 | } 63 | 64 | // NewExtractor returns a new extractor, reading from the reader provided. 65 | // 66 | // The size of the archive should be provided. 67 | // 68 | // Unlike with NewExtractor(), calling Close() on the extractor is unnecessary. 69 | func NewExtractorFromReader(r io.ReaderAt, size int64, chroot string, opts ...ExtractorOption) (*Extractor, error) { 70 | zr, err := zip.NewReader(r, size) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | return newExtractor(zr, nil, chroot, opts) 76 | } 77 | 78 | func newExtractor(r *zip.Reader, c io.Closer, chroot string, opts []ExtractorOption) (*Extractor, error) { 79 | var err error 80 | if chroot, err = filepath.Abs(chroot); err != nil { 81 | return nil, err 82 | } 83 | 84 | e := &Extractor{ 85 | chroot: chroot, 86 | zr: r, 87 | closer: c, 88 | } 89 | 90 | e.options.concurrency = runtime.GOMAXPROCS(0) 91 | for _, o := range opts { 92 | err := o(&e.options) 93 | if err != nil { 94 | return nil, err 95 | } 96 | } 97 | 98 | e.RegisterDecompressor(zip.Deflate, defaultDecompressor) 99 | e.RegisterDecompressor(zstd.ZipMethodWinZip, defaultZstdDecompressor) 100 | 101 | return e, nil 102 | } 103 | 104 | // RegisterDecompressor allows custom decompressors for a specified method ID. 105 | // The common methods Store and Deflate are built in. 106 | func (e *Extractor) RegisterDecompressor(method uint16, dcomp zip.Decompressor) { 107 | e.zr.RegisterDecompressor(method, dcomp) 108 | } 109 | 110 | // Files returns the file within the archive. 111 | func (e *Extractor) Files() []*zip.File { 112 | return e.zr.File 113 | } 114 | 115 | // Close closes the underlying ZipReader. 116 | func (e *Extractor) Close() error { 117 | if e.closer == nil { 118 | return nil 119 | } 120 | return e.closer.Close() 121 | } 122 | 123 | // Written returns how many bytes and entries have been written to disk. 124 | // Written can be called whilst extraction is in progress. 125 | func (e *Extractor) Written() (bytes, entries int64) { 126 | return atomic.LoadInt64(&e.written), atomic.LoadInt64(&e.entries) 127 | } 128 | 129 | // Extract extracts files, creates symlinks and directories from the 130 | // archive. 131 | func (e *Extractor) Extract(ctx context.Context) (err error) { 132 | limiter := make(chan struct{}, e.options.concurrency) 133 | 134 | wg, ctx := errgroup.WithContext(ctx) 135 | defer func() { 136 | if werr := wg.Wait(); werr != nil { 137 | err = werr 138 | } 139 | }() 140 | 141 | for i, file := range e.zr.File { 142 | if file.Mode()&irregularModes != 0 { 143 | continue 144 | } 145 | 146 | var path string 147 | path, err = filepath.Abs(filepath.Join(e.chroot, file.Name)) 148 | if err != nil { 149 | return err 150 | } 151 | 152 | if !strings.HasPrefix(path, e.chroot+string(filepath.Separator)) && path != e.chroot { 153 | return fmt.Errorf("%s cannot be extracted outside of chroot (%s)", path, e.chroot) 154 | } 155 | 156 | if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { 157 | return err 158 | } 159 | 160 | if ctx.Err() != nil { 161 | return ctx.Err() 162 | } 163 | 164 | switch { 165 | case file.Mode()&os.ModeSymlink != 0: 166 | // defer the creation of symlinks 167 | // this is to prevent a traversal vulnerability where a symlink is 168 | // first created and then files are additional extracted into it 169 | continue 170 | 171 | case file.Mode().IsDir(): 172 | err = e.createDirectory(path, file) 173 | 174 | default: 175 | limiter <- struct{}{} 176 | 177 | gf := e.zr.File[i] 178 | wg.Go(func() error { 179 | defer func() { <-limiter }() 180 | err := e.createFile(ctx, path, gf) 181 | if err == nil { 182 | err = e.updateFileMetadata(path, gf) 183 | } 184 | return err 185 | }) 186 | } 187 | if err != nil { 188 | return err 189 | } 190 | } 191 | 192 | if err := wg.Wait(); err != nil { 193 | return err 194 | } 195 | 196 | // Create all symlinks. This will update parent directory mtimes to current time. 197 | for _, file := range e.zr.File { 198 | if file.Mode()&os.ModeSymlink == 0 { 199 | continue 200 | } 201 | 202 | path, err := filepath.Abs(filepath.Join(e.chroot, file.Name)) 203 | if err != nil { 204 | return err 205 | } 206 | 207 | // createSymlink() handles the symlink's own timestamp preservation 208 | // but will update the parent directory's mtime to current time. 209 | if err := e.createSymlink(path, file); err != nil { 210 | return err 211 | } 212 | } 213 | 214 | // Update ALL directory metadata after symlinks are created. 215 | // This ensures all directory timestamps and permissions are correctly preserved. 216 | for _, file := range e.zr.File { 217 | if !file.Mode().IsDir() { 218 | continue 219 | } 220 | 221 | path, err := filepath.Abs(filepath.Join(e.chroot, file.Name)) 222 | if err != nil { 223 | return err 224 | } 225 | 226 | err = e.updateFileMetadata(path, file) 227 | if err != nil { 228 | return err 229 | } 230 | } 231 | 232 | return nil 233 | } 234 | 235 | func (e *Extractor) createDirectory(path string, file *zip.File) error { 236 | err := os.Mkdir(path, 0777) 237 | if os.IsExist(err) { 238 | err = nil 239 | } 240 | incOnSuccess(&e.entries, err) 241 | return err 242 | } 243 | 244 | func (e *Extractor) createSymlink(path string, file *zip.File) error { 245 | if err := os.Remove(path); err != nil && !os.IsNotExist(err) { 246 | return err 247 | } 248 | 249 | r, err := file.Open() 250 | if err != nil { 251 | return err 252 | } 253 | defer r.Close() 254 | 255 | name, err := io.ReadAll(r) 256 | if err != nil { 257 | return err 258 | } 259 | 260 | if err := os.Symlink(string(name), path); err != nil { 261 | return err 262 | } 263 | 264 | err = e.updateFileMetadata(path, file) 265 | incOnSuccess(&e.entries, err) 266 | 267 | return err 268 | } 269 | 270 | func (e *Extractor) createFile(ctx context.Context, path string, file *zip.File) (err error) { 271 | if err := os.Remove(path); err != nil && !os.IsNotExist(err) { 272 | return err 273 | } 274 | 275 | r, err := file.Open() 276 | if err != nil { 277 | return err 278 | } 279 | defer dclose(r, &err) 280 | 281 | f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) 282 | if err != nil { 283 | return err 284 | } 285 | defer dclose(f, &err) 286 | 287 | bw := bufioWriterPool.Get().(*bufio.Writer) 288 | defer bufioWriterPool.Put(bw) 289 | 290 | bw.Reset(countWriter{f, &e.written, ctx}) 291 | if _, err = bw.ReadFrom(r); err != nil { 292 | return err 293 | } 294 | 295 | err = bw.Flush() 296 | incOnSuccess(&e.entries, err) 297 | 298 | return err 299 | } 300 | 301 | func (e *Extractor) updateFileMetadata(path string, file *zip.File) error { 302 | fields, err := zipextra.Parse(file.Extra) 303 | if err != nil { 304 | return err 305 | } 306 | 307 | if err := lchtimes(path, file.Mode(), time.Now(), file.Modified); err != nil { 308 | return err 309 | } 310 | 311 | if err := lchmod(path, file.Mode()); err != nil { 312 | return err 313 | } 314 | 315 | unixfield, ok := fields[zipextra.ExtraFieldUnixN] 316 | if !ok { 317 | return nil 318 | } 319 | 320 | unix, err := unixfield.InfoZIPNewUnix() 321 | if err != nil { 322 | return err 323 | } 324 | 325 | err = lchown(path, int(unix.Uid.Int64()), int(unix.Gid.Int64())) 326 | if err == nil { 327 | return nil 328 | } 329 | 330 | if e.options.chownErrorHandler == nil { 331 | return nil 332 | } 333 | 334 | e.m.Lock() 335 | defer e.m.Unlock() 336 | 337 | return e.options.chownErrorHandler(file.Name, err) 338 | } 339 | -------------------------------------------------------------------------------- /archiver.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "hash/crc32" 8 | "io" 9 | "os" 10 | "path/filepath" 11 | "runtime" 12 | "sort" 13 | "strings" 14 | "sync" 15 | "sync/atomic" 16 | "time" 17 | "unicode/utf8" 18 | 19 | "github.com/klauspost/compress/zip" 20 | "github.com/klauspost/compress/zstd" 21 | "github.com/saracen/fastzip/internal/filepool" 22 | "github.com/saracen/zipextra" 23 | "golang.org/x/sync/errgroup" 24 | ) 25 | 26 | const irregularModes = os.ModeSocket | os.ModeDevice | os.ModeCharDevice | os.ModeNamedPipe 27 | 28 | var bufioReaderPool = sync.Pool{ 29 | New: func() interface{} { 30 | return bufio.NewReaderSize(nil, 32*1024) 31 | }, 32 | } 33 | 34 | var ( 35 | defaultCompressor = FlateCompressor(-1) 36 | defaultZstdCompressor = ZstdCompressor(int(zstd.SpeedDefault)) 37 | ) 38 | 39 | // Archiver is an opinionated Zip archiver. 40 | // 41 | // Only regular files, symlinks and directories are supported. Only files that 42 | // are children of the specified chroot directory will be archived. 43 | // 44 | // Access permissions, ownership (unix) and modification times are preserved. 45 | type Archiver struct { 46 | // This 2 fields are accessed via atomic operations 47 | // They are at the start of the struct so they are properly 8 byte aligned 48 | written, entries int64 49 | 50 | zw *zip.Writer 51 | options archiverOptions 52 | chroot string 53 | m sync.Mutex 54 | 55 | compressors map[uint16]zip.Compressor 56 | } 57 | 58 | // NewArchiver returns a new Archiver. 59 | func NewArchiver(w io.Writer, chroot string, opts ...ArchiverOption) (*Archiver, error) { 60 | var err error 61 | if chroot, err = filepath.Abs(chroot); err != nil { 62 | return nil, err 63 | } 64 | 65 | a := &Archiver{ 66 | chroot: chroot, 67 | compressors: make(map[uint16]zip.Compressor), 68 | } 69 | 70 | a.options.method = zip.Deflate 71 | a.options.concurrency = runtime.GOMAXPROCS(0) 72 | a.options.stageDir = chroot 73 | a.options.bufferSize = -1 74 | for _, o := range opts { 75 | err := o(&a.options) 76 | if err != nil { 77 | return nil, err 78 | } 79 | } 80 | 81 | a.zw = zip.NewWriter(w) 82 | a.zw.SetOffset(a.options.offset) 83 | 84 | // register flate compressor 85 | a.RegisterCompressor(zip.Deflate, defaultCompressor) 86 | a.RegisterCompressor(zstd.ZipMethodWinZip, defaultZstdCompressor) 87 | 88 | return a, nil 89 | } 90 | 91 | // RegisterCompressor registers custom compressors for a specified method ID. 92 | // The common methods Store and Deflate are built in. 93 | func (a *Archiver) RegisterCompressor(method uint16, comp zip.Compressor) { 94 | a.zw.RegisterCompressor(method, comp) 95 | a.compressors[method] = comp 96 | } 97 | 98 | // Close closes the underlying ZipWriter. 99 | func (a *Archiver) Close() error { 100 | return a.zw.Close() 101 | } 102 | 103 | // Written returns how many bytes and entries have been written to the archive. 104 | // Written can be called whilst archiving is in progress. 105 | func (a *Archiver) Written() (bytes, entries int64) { 106 | return atomic.LoadInt64(&a.written), atomic.LoadInt64(&a.entries) 107 | } 108 | 109 | // Archive archives all files, symlinks and directories. 110 | func (a *Archiver) Archive(ctx context.Context, files map[string]os.FileInfo) (err error) { 111 | names := make([]string, 0, len(files)) 112 | for name := range files { 113 | names = append(names, name) 114 | } 115 | sort.Strings(names) 116 | 117 | var fp *filepool.FilePool 118 | 119 | concurrency := a.options.concurrency 120 | if len(files) < concurrency { 121 | concurrency = len(files) 122 | } 123 | if concurrency > 1 { 124 | fp, err = filepool.New(a.options.stageDir, concurrency, a.options.bufferSize) 125 | if err != nil { 126 | return err 127 | } 128 | defer dclose(fp, &err) 129 | } 130 | 131 | wg, ctx := errgroup.WithContext(ctx) 132 | defer func() { 133 | if werr := wg.Wait(); werr != nil { 134 | err = werr 135 | } 136 | }() 137 | 138 | hdrs := make([]zip.FileHeader, len(names)) 139 | 140 | for i, name := range names { 141 | fi := files[name] 142 | if fi.Mode()&irregularModes != 0 { 143 | continue 144 | } 145 | 146 | path, err := filepath.Abs(name) 147 | if err != nil { 148 | return err 149 | } 150 | 151 | if !strings.HasPrefix(path, a.chroot+string(filepath.Separator)) && path != a.chroot { 152 | return fmt.Errorf("%s cannot be archived from outside of chroot (%s)", name, a.chroot) 153 | } 154 | 155 | rel, err := filepath.Rel(a.chroot, path) 156 | if err != nil { 157 | return err 158 | } 159 | 160 | hdr := &hdrs[i] 161 | fileInfoHeader(rel, fi, hdr) 162 | 163 | if ctx.Err() != nil { 164 | return ctx.Err() 165 | } 166 | 167 | switch { 168 | case hdr.Mode()&os.ModeSymlink != 0: 169 | err = a.createSymlink(path, fi, hdr) 170 | 171 | case hdr.Mode().IsDir(): 172 | err = a.createDirectory(fi, hdr) 173 | 174 | default: 175 | if hdr.UncompressedSize64 > 0 { 176 | hdr.Method = a.options.method 177 | } 178 | 179 | if fp == nil { 180 | err = a.createFile(ctx, path, fi, hdr, nil) 181 | incOnSuccess(&a.entries, err) 182 | } else { 183 | f := fp.Get() 184 | wg.Go(func() error { 185 | err := a.createFile(ctx, path, fi, hdr, f) 186 | fp.Put(f) 187 | incOnSuccess(&a.entries, err) 188 | return err 189 | }) 190 | } 191 | } 192 | 193 | if err != nil { 194 | return err 195 | } 196 | } 197 | 198 | return wg.Wait() 199 | } 200 | 201 | func fileInfoHeader(name string, fi os.FileInfo, hdr *zip.FileHeader) { 202 | hdr.Name = filepath.ToSlash(name) 203 | hdr.UncompressedSize64 = uint64(fi.Size()) 204 | hdr.Modified = fi.ModTime() 205 | hdr.SetMode(fi.Mode()) 206 | 207 | if hdr.Mode().IsDir() { 208 | hdr.Name += "/" 209 | } 210 | 211 | const uint32max = (1 << 32) - 1 212 | if hdr.UncompressedSize64 > uint32max { 213 | hdr.UncompressedSize = uint32max 214 | } else { 215 | hdr.UncompressedSize = uint32(hdr.UncompressedSize64) 216 | } 217 | } 218 | 219 | func (a *Archiver) createDirectory(fi os.FileInfo, hdr *zip.FileHeader) error { 220 | a.m.Lock() 221 | defer a.m.Unlock() 222 | 223 | _, err := a.createHeader(fi, hdr) 224 | incOnSuccess(&a.entries, err) 225 | return err 226 | } 227 | 228 | func (a *Archiver) createSymlink(path string, fi os.FileInfo, hdr *zip.FileHeader) error { 229 | a.m.Lock() 230 | defer a.m.Unlock() 231 | 232 | link, err := os.Readlink(path) 233 | if err != nil { 234 | return err 235 | } 236 | 237 | // Don't use a data descriptor to shave a few bytes and to make sure that the symlink can be stream-unzipped 238 | hdr.Flags &= ^uint16(0x8) 239 | hdr.Method = zip.Store 240 | hdr.CompressedSize64 = uint64(len(link)) 241 | hdr.UncompressedSize64 = hdr.CompressedSize64 242 | hdr.CRC32 = crc32.ChecksumIEEE([]byte(link)) 243 | 244 | w, err := a.createHeaderRaw(fi, hdr) 245 | if err != nil { 246 | return err 247 | } 248 | 249 | _, err = io.WriteString(w, link) 250 | incOnSuccess(&a.entries, err) 251 | return err 252 | } 253 | 254 | func (a *Archiver) createFile(ctx context.Context, path string, fi os.FileInfo, hdr *zip.FileHeader, tmp *filepool.File) error { 255 | f, err := os.Open(path) 256 | if err != nil { 257 | return err 258 | } 259 | defer f.Close() 260 | 261 | return a.compressFile(ctx, f, fi, hdr, tmp) 262 | } 263 | 264 | // compressFile pre-compresses the file first to a file from the filepool, 265 | // making use of zip.CreateRaw. This allows for concurrent files to be 266 | // compressed and then added to the zip file when ready. 267 | // If no filepool file is available (when using a concurrency of 1) or the 268 | // compressed file is larger than the uncompressed version, the file is moved 269 | // to the zip file using the conventional zip.CreateHeader. 270 | func (a *Archiver) compressFile(ctx context.Context, f *os.File, fi os.FileInfo, hdr *zip.FileHeader, tmp *filepool.File) error { 271 | comp, ok := a.compressors[hdr.Method] 272 | // if we don't have the registered compressor, it most likely means Store is 273 | // being used, so we revert to non-concurrent behaviour 274 | if !ok || tmp == nil { 275 | return a.compressFileSimple(ctx, f, fi, hdr) 276 | } 277 | 278 | fw, err := comp(tmp) 279 | if err != nil { 280 | return err 281 | } 282 | 283 | br := bufioReaderPool.Get().(*bufio.Reader) 284 | defer bufioReaderPool.Put(br) 285 | br.Reset(f) 286 | 287 | _, err = io.Copy(io.MultiWriter(fw, tmp.Hasher()), br) 288 | dclose(fw, &err) 289 | if err != nil { 290 | return err 291 | } 292 | 293 | hdr.Flags |= 0x8 294 | hdr.CompressedSize64 = tmp.Written() 295 | // if compressed file is larger, use the uncompressed version. 296 | if hdr.CompressedSize64 > hdr.UncompressedSize64 { 297 | f.Seek(0, io.SeekStart) 298 | hdr.Method = zip.Store 299 | return a.compressFileSimple(ctx, f, fi, hdr) 300 | } 301 | hdr.CRC32 = tmp.Checksum() 302 | 303 | a.m.Lock() 304 | defer a.m.Unlock() 305 | 306 | w, err := a.createHeaderRaw(fi, hdr) 307 | if err != nil { 308 | return err 309 | } 310 | 311 | br.Reset(tmp) 312 | _, err = br.WriteTo(countWriter{w, &a.written, ctx}) 313 | return err 314 | } 315 | 316 | // compressFileSimple uses the conventional zip.createHeader. This differs from 317 | // compressFile as it locks the zip _whilst_ compressing (if the method is not 318 | // Store). 319 | func (a *Archiver) compressFileSimple(ctx context.Context, f *os.File, fi os.FileInfo, hdr *zip.FileHeader) error { 320 | br := bufioReaderPool.Get().(*bufio.Reader) 321 | defer bufioReaderPool.Put(br) 322 | br.Reset(f) 323 | 324 | a.m.Lock() 325 | defer a.m.Unlock() 326 | 327 | w, err := a.createHeader(fi, hdr) 328 | if err != nil { 329 | return err 330 | } 331 | 332 | _, err = br.WriteTo(countWriter{w, &a.written, ctx}) 333 | return err 334 | } 335 | 336 | func (a *Archiver) createHeaderRaw(fi os.FileInfo, fh *zip.FileHeader) (io.Writer, error) { 337 | // When the standard Go library's version of CreateRaw was added, rather 338 | // than solely focus on custom compression in "raw" mode, it also removed 339 | // the convenience of setting up common zip flags and timestamp logic. This 340 | // here replicates what CreateHeader() does: 341 | // https://github.com/golang/go/blob/go1.17/src/archive/zip/writer.go#L271 342 | const zipVersion20 = 20 343 | 344 | utf8Valid1, utf8Require1 := detectUTF8(fh.Name) 345 | utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) 346 | switch { 347 | case fh.NonUTF8: 348 | fh.Flags &^= 0x800 349 | case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): 350 | fh.Flags |= 0x800 351 | } 352 | 353 | fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 354 | fh.ReaderVersion = zipVersion20 355 | 356 | if !fh.Modified.IsZero() { 357 | fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) 358 | fh.Extra = append(fh.Extra, zipextra.NewExtendedTimestamp(fh.Modified).Encode()...) 359 | } 360 | 361 | return a.createRaw(fi, fh) 362 | } 363 | 364 | // https://github.com/golang/go/blob/go1.17.7/src/archive/zip/writer.go#L229 365 | func detectUTF8(s string) (valid, require bool) { 366 | for i := 0; i < len(s); { 367 | r, size := utf8.DecodeRuneInString(s[i:]) 368 | i += size 369 | if r < 0x20 || r > 0x7d || r == 0x5c { 370 | if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { 371 | return false, false 372 | } 373 | require = true 374 | } 375 | } 376 | return true, require 377 | } 378 | 379 | // https://github.com/golang/go/blob/go1.17.7/src/archive/zip/struct.go#L242 380 | func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) { 381 | fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9) 382 | fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11) 383 | return 384 | } 385 | -------------------------------------------------------------------------------- /extractor_test.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | "testing" 10 | "time" 11 | 12 | "github.com/klauspost/compress/zip" 13 | "github.com/klauspost/compress/zstd" 14 | "github.com/stretchr/testify/assert" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | func testExtract(t *testing.T, filename string, files map[string]testFile) map[string]os.FileInfo { 19 | dir := t.TempDir() 20 | e, err := NewExtractor(filename, dir) 21 | require.NoError(t, err) 22 | defer e.Close() 23 | 24 | for _, f := range e.Files() { 25 | assert.Equal(t, filepath.ToSlash(f.Name), f.Name, "zip file path separator not /") 26 | } 27 | 28 | require.NoError(t, e.Extract(context.Background())) 29 | 30 | result := make(map[string]os.FileInfo) 31 | err = filepath.Walk(dir, func(pathname string, fi os.FileInfo, err error) error { 32 | if err != nil { 33 | return err 34 | } 35 | rel, err := filepath.Rel(dir, pathname) 36 | if err != nil { 37 | return err 38 | } 39 | if rel == "." { 40 | return nil 41 | } 42 | 43 | rel = filepath.ToSlash(rel) 44 | require.Contains(t, files, rel) 45 | 46 | result[pathname] = fi 47 | 48 | mode := files[rel].mode 49 | assert.Equal(t, mode.Perm(), fi.Mode().Perm(), "file %v perm not equal", rel) 50 | assert.Equal(t, mode.IsDir(), fi.IsDir(), "file %v is_dir not equal", rel) 51 | assert.Equal(t, mode&os.ModeSymlink, fi.Mode()&os.ModeSymlink, "file %v mode symlink not equal", rel) 52 | 53 | if fi.IsDir() || fi.Mode()&os.ModeSymlink != 0 { 54 | return nil 55 | } 56 | 57 | contents, err := os.ReadFile(pathname) 58 | require.NoError(t, err) 59 | assert.Equal(t, string(files[rel].contents), string(contents)) 60 | 61 | return nil 62 | }) 63 | require.NoError(t, err) 64 | 65 | return result 66 | } 67 | 68 | func TestExtractCancelContext(t *testing.T) { 69 | twoMB := strings.Repeat("1", 2*1024*1024) 70 | testFiles := map[string]testFile{} 71 | for i := 0; i < 100; i++ { 72 | testFiles[fmt.Sprintf("file_%d", i)] = testFile{mode: 0666, contents: twoMB} 73 | } 74 | 75 | files, dir := testCreateFiles(t, testFiles) 76 | defer os.RemoveAll(dir) 77 | 78 | testCreateArchive(t, dir, files, func(filename, chroot string) { 79 | e, err := NewExtractor(filename, dir, WithExtractorConcurrency(1)) 80 | require.NoError(t, err) 81 | 82 | ctx, cancel := context.WithCancel(context.Background()) 83 | defer cancel() 84 | 85 | done := make(chan struct{}) 86 | go func() { 87 | defer func() { done <- struct{}{} }() 88 | 89 | require.EqualError(t, e.Extract(ctx), "context canceled") 90 | }() 91 | 92 | for { 93 | select { 94 | case <-done: 95 | return 96 | 97 | default: 98 | // cancel as soon as any data is written 99 | if bytes, _ := e.Written(); bytes > 0 { 100 | cancel() 101 | } 102 | } 103 | } 104 | }) 105 | } 106 | 107 | func TestExtractorWithDecompressor(t *testing.T) { 108 | testFiles := map[string]testFile{ 109 | "foo.go": {mode: 0666}, 110 | "bar.go": {mode: 0666}, 111 | } 112 | 113 | files, dir := testCreateFiles(t, testFiles) 114 | defer os.RemoveAll(dir) 115 | 116 | testCreateArchive(t, dir, files, func(filename, chroot string) { 117 | e, err := NewExtractor(filename, dir) 118 | require.NoError(t, err) 119 | e.RegisterDecompressor(zip.Deflate, StdFlateDecompressor()) 120 | defer e.Close() 121 | 122 | require.NoError(t, e.Extract(context.Background())) 123 | }) 124 | } 125 | 126 | func TestExtractorWithConcurrency(t *testing.T) { 127 | testFiles := map[string]testFile{ 128 | "foo.go": {mode: 0666}, 129 | "bar.go": {mode: 0666}, 130 | } 131 | 132 | concurrencyTests := []struct { 133 | concurrency int 134 | pass bool 135 | }{ 136 | {-1, false}, 137 | {0, false}, 138 | {1, true}, 139 | {30, true}, 140 | } 141 | 142 | files, dir := testCreateFiles(t, testFiles) 143 | defer os.RemoveAll(dir) 144 | 145 | testCreateArchive(t, dir, files, func(filename, chroot string) { 146 | for _, test := range concurrencyTests { 147 | e, err := NewExtractor(filename, dir, WithExtractorConcurrency(test.concurrency)) 148 | if test.pass { 149 | assert.NoError(t, err) 150 | require.NoError(t, e.Close()) 151 | } else { 152 | assert.Error(t, err) 153 | } 154 | } 155 | }) 156 | } 157 | 158 | func TestExtractorWithChownErrorHandler(t *testing.T) { 159 | testFiles := map[string]testFile{ 160 | "foo.go": {mode: 0666}, 161 | "bar.go": {mode: 0666}, 162 | } 163 | 164 | files, dir := testCreateFiles(t, testFiles) 165 | defer os.RemoveAll(dir) 166 | 167 | testCreateArchive(t, dir, files, func(filename, chroot string) { 168 | e, err := NewExtractor(filename, dir, WithExtractorChownErrorHandler(func(name string, err error) error { 169 | assert.Fail(t, "should have no error") 170 | return nil 171 | })) 172 | assert.NoError(t, err) 173 | assert.NoError(t, e.Extract(context.Background())) 174 | require.NoError(t, e.Close()) 175 | }) 176 | } 177 | 178 | func TestExtractorFromReader(t *testing.T) { 179 | testFiles := map[string]testFile{ 180 | "foo.go": {mode: 0666}, 181 | "bar.go": {mode: 0666}, 182 | } 183 | 184 | files, dir := testCreateFiles(t, testFiles) 185 | defer os.RemoveAll(dir) 186 | 187 | testCreateArchive(t, dir, files, func(filename, chroot string) { 188 | f, err := os.Open(filename) 189 | require.NoError(t, err) 190 | 191 | fi, err := f.Stat() 192 | require.NoError(t, err) 193 | 194 | e, err := NewExtractorFromReader(f, fi.Size(), chroot) 195 | require.NoError(t, err) 196 | require.NoError(t, e.Extract(context.Background())) 197 | require.NoError(t, e.Close()) 198 | }) 199 | } 200 | 201 | func TestExtractorDetectSymlinkTraversal(t *testing.T) { 202 | dir := t.TempDir() 203 | archivePath := filepath.Join(dir, "vuln.zip") 204 | f, err := os.Create(archivePath) 205 | require.NoError(t, err) 206 | zw := zip.NewWriter(f) 207 | 208 | // create symlink 209 | symlink := &zip.FileHeader{Name: "root/inner"} 210 | symlink.SetMode(os.ModeSymlink) 211 | w, err := zw.CreateHeader(symlink) 212 | require.NoError(t, err) 213 | 214 | _, err = w.Write([]byte("../")) 215 | require.NoError(t, err) 216 | 217 | // create file within symlink 218 | _, err = zw.Create("root/inner/vuln") 219 | require.NoError(t, err) 220 | 221 | zw.Close() 222 | f.Close() 223 | 224 | e, err := NewExtractor(archivePath, dir) 225 | require.NoError(t, err) 226 | defer e.Close() 227 | 228 | require.Error(t, e.Extract(context.Background())) 229 | } 230 | 231 | func aopts(options ...ArchiverOption) []ArchiverOption { 232 | return options 233 | } 234 | 235 | func benchmarkExtractOptions(b *testing.B, stdDeflate bool, ao []ArchiverOption, eo ...ExtractorOption) { 236 | files := make(map[string]os.FileInfo) 237 | filepath.Walk(*archiveDir, func(filename string, fi os.FileInfo, err error) error { 238 | files[filename] = fi 239 | return nil 240 | }) 241 | 242 | dir := b.TempDir() 243 | archiveName := filepath.Join(dir, "fastzip-benchmark-extract.zip") 244 | f, err := os.Create(archiveName) 245 | require.NoError(b, err) 246 | defer os.Remove(f.Name()) 247 | 248 | ao = append(ao, WithStageDirectory(dir)) 249 | a, err := NewArchiver(f, *archiveDir, ao...) 250 | require.NoError(b, err) 251 | 252 | err = a.Archive(context.Background(), files) 253 | require.NoError(b, err) 254 | require.NoError(b, a.Close()) 255 | require.NoError(b, f.Close()) 256 | b.ReportAllocs() 257 | b.ResetTimer() 258 | 259 | fi, _ := os.Stat(archiveName) 260 | b.SetBytes(fi.Size()) 261 | for n := 0; n < b.N; n++ { 262 | e, err := NewExtractor(archiveName, dir, eo...) 263 | if stdDeflate { 264 | e.RegisterDecompressor(zip.Deflate, StdFlateDecompressor()) 265 | } 266 | require.NoError(b, err) 267 | require.NoError(b, e.Extract(context.Background())) 268 | } 269 | } 270 | 271 | func TestExtractSymlinkDirectoryTimestamps(t *testing.T) { 272 | // Create a specific past time for testing (different from fixedModTime used by testCreateFiles) 273 | pastTime := time.Date(2019, 3, 15, 14, 30, 0, 0, time.UTC) 274 | 275 | testFiles := map[string]testFile{ 276 | "target_file": {mode: 0644, contents: "target content"}, 277 | "parent_dir": {mode: 0755 | os.ModeDir}, 278 | "parent_dir/symlink": {mode: 0777 | os.ModeSymlink, contents: "../target_file"}, 279 | "another_dir": {mode: 0755 | os.ModeDir}, 280 | "another_dir/file.txt": {mode: 0644, contents: "regular file"}, 281 | } 282 | 283 | // Create files using the existing test helper 284 | files, dir := testCreateFiles(t, testFiles) 285 | defer os.RemoveAll(dir) 286 | 287 | // Override timestamps on directories to our specific past time 288 | // (testCreateFiles sets all timestamps to fixedModTime = 2020-02-01) 289 | require.NoError(t, os.Chtimes(filepath.Join(dir, "parent_dir"), pastTime, pastTime)) 290 | require.NoError(t, os.Chtimes(filepath.Join(dir, "another_dir"), pastTime, pastTime)) 291 | 292 | // Update the FileInfo in the map to reflect the new timestamps 293 | parentDirPath := filepath.Join(dir, "parent_dir") 294 | anotherDirPath := filepath.Join(dir, "another_dir") 295 | 296 | parentDirInfo, err := os.Lstat(parentDirPath) 297 | require.NoError(t, err) 298 | anotherDirInfo, err := os.Lstat(anotherDirPath) 299 | require.NoError(t, err) 300 | 301 | // Update the FileInfo entries using the exact absolute paths 302 | files[parentDirPath] = parentDirInfo 303 | files[anotherDirPath] = anotherDirInfo 304 | 305 | testCreateArchive(t, dir, files, func(filename, chroot string) { 306 | // Extract to a new directory 307 | extractDir := t.TempDir() 308 | e, err := NewExtractor(filename, extractDir) 309 | require.NoError(t, err) 310 | defer e.Close() 311 | 312 | // Wait a bit to ensure current time is different from pastTime 313 | time.Sleep(50 * time.Millisecond) 314 | currentTime := time.Now() 315 | 316 | require.NoError(t, e.Extract(context.Background())) 317 | 318 | // Check that directory containing symlink preserved its timestamp 319 | parentDirPath := filepath.Join(extractDir, "parent_dir") 320 | parentDirInfo, err := os.Lstat(parentDirPath) 321 | require.NoError(t, err) 322 | 323 | // The directory timestamp should match the original archived time, 324 | // not the current extraction time 325 | actualTime := parentDirInfo.ModTime().UTC().Truncate(time.Second) 326 | expectedTime := pastTime.Truncate(time.Second) 327 | extractTime := currentTime.UTC().Truncate(time.Second) 328 | 329 | assert.Equal(t, expectedTime, actualTime, 330 | "Directory containing symlink should preserve original timestamp (%v), not extraction time (%v)", 331 | expectedTime, extractTime) 332 | 333 | // Also check that regular directory (without symlink) preserves timestamp 334 | anotherDirPath := filepath.Join(extractDir, "another_dir") 335 | anotherDirInfo, err := os.Lstat(anotherDirPath) 336 | require.NoError(t, err) 337 | 338 | actualTime2 := anotherDirInfo.ModTime().UTC().Truncate(time.Second) 339 | assert.Equal(t, expectedTime, actualTime2, 340 | "Regular directory should also preserve original timestamp") 341 | 342 | // Verify symlink itself exists and points to correct target 343 | symlinkPath := filepath.Join(extractDir, "parent_dir", "symlink") 344 | symlinkInfo, err := os.Lstat(symlinkPath) 345 | require.NoError(t, err) 346 | 347 | // Verify it's actually a symlink 348 | assert.True(t, symlinkInfo.Mode()&os.ModeSymlink != 0, 349 | "Should be a symlink") 350 | 351 | // Verify symlink points to correct target 352 | target, err := os.Readlink(symlinkPath) 353 | require.NoError(t, err) 354 | 355 | // Test that the symlink actually resolves to the expected file 356 | expectedTargetPath := filepath.Join(extractDir, "target_file") 357 | actualTargetPath := filepath.Join(filepath.Dir(symlinkPath), target) 358 | actualTargetPath = filepath.Clean(actualTargetPath) 359 | 360 | // Verify both paths point to the same file 361 | expectedInfo, err := os.Stat(expectedTargetPath) 362 | require.NoError(t, err) 363 | actualInfo, err := os.Stat(actualTargetPath) 364 | require.NoError(t, err) 365 | 366 | // Compare file contents or other properties to ensure they're the same file 367 | assert.Equal(t, expectedInfo.Size(), actualInfo.Size(), 368 | "Symlink should resolve to the correct target file") 369 | 370 | // The key assertion: ensure directories containing symlinks 371 | // don't have their timestamps updated during symlink creation 372 | timeDifference := actualTime.Sub(extractTime).Abs() 373 | assert.Greater(t, timeDifference, time.Duration(30*time.Second), 374 | "Directory timestamp should be significantly different from extraction time, "+ 375 | "indicating it was preserved from the archive rather than updated during extraction") 376 | }) 377 | } 378 | 379 | func BenchmarkExtractStore_1(b *testing.B) { 380 | benchmarkExtractOptions(b, true, aopts(WithArchiverMethod(zip.Store)), WithExtractorConcurrency(1)) 381 | } 382 | 383 | func BenchmarkExtractStore_2(b *testing.B) { 384 | benchmarkExtractOptions(b, true, aopts(WithArchiverMethod(zip.Store)), WithExtractorConcurrency(2)) 385 | } 386 | 387 | func BenchmarkExtractStore_4(b *testing.B) { 388 | benchmarkExtractOptions(b, true, aopts(WithArchiverMethod(zip.Store)), WithExtractorConcurrency(4)) 389 | } 390 | 391 | func BenchmarkExtractStore_8(b *testing.B) { 392 | benchmarkExtractOptions(b, true, aopts(WithArchiverMethod(zip.Store)), WithExtractorConcurrency(8)) 393 | } 394 | 395 | func BenchmarkExtractStore_16(b *testing.B) { 396 | benchmarkExtractOptions(b, true, aopts(WithArchiverMethod(zip.Store)), WithExtractorConcurrency(16)) 397 | } 398 | 399 | func BenchmarkExtractStandardFlate_1(b *testing.B) { 400 | benchmarkExtractOptions(b, true, nil, WithExtractorConcurrency(1)) 401 | } 402 | 403 | func BenchmarkExtractStandardFlate_2(b *testing.B) { 404 | benchmarkExtractOptions(b, true, nil, WithExtractorConcurrency(2)) 405 | } 406 | 407 | func BenchmarkExtractStandardFlate_4(b *testing.B) { 408 | benchmarkExtractOptions(b, true, nil, WithExtractorConcurrency(4)) 409 | } 410 | 411 | func BenchmarkExtractStandardFlate_8(b *testing.B) { 412 | benchmarkExtractOptions(b, true, nil, WithExtractorConcurrency(8)) 413 | } 414 | 415 | func BenchmarkExtractStandardFlate_16(b *testing.B) { 416 | benchmarkExtractOptions(b, true, nil, WithExtractorConcurrency(16)) 417 | } 418 | 419 | func BenchmarkExtractNonStandardFlate_1(b *testing.B) { 420 | benchmarkExtractOptions(b, false, nil, WithExtractorConcurrency(1)) 421 | } 422 | 423 | func BenchmarkExtractNonStandardFlate_2(b *testing.B) { 424 | benchmarkExtractOptions(b, false, nil, WithExtractorConcurrency(2)) 425 | } 426 | 427 | func BenchmarkExtractNonStandardFlate_4(b *testing.B) { 428 | benchmarkExtractOptions(b, false, nil, WithExtractorConcurrency(4)) 429 | } 430 | 431 | func BenchmarkExtractNonStandardFlate_8(b *testing.B) { 432 | benchmarkExtractOptions(b, false, nil, WithExtractorConcurrency(8)) 433 | } 434 | 435 | func BenchmarkExtractNonStandardFlate_16(b *testing.B) { 436 | benchmarkExtractOptions(b, false, nil, WithExtractorConcurrency(16)) 437 | } 438 | 439 | func BenchmarkExtractZstd_1(b *testing.B) { 440 | benchmarkExtractOptions(b, false, aopts(WithArchiverMethod(zstd.ZipMethodWinZip)), WithExtractorConcurrency(1)) 441 | } 442 | 443 | func BenchmarkExtractZstd_2(b *testing.B) { 444 | benchmarkExtractOptions(b, false, aopts(WithArchiverMethod(zstd.ZipMethodWinZip)), WithExtractorConcurrency(2)) 445 | } 446 | 447 | func BenchmarkExtractZstd_4(b *testing.B) { 448 | benchmarkExtractOptions(b, false, aopts(WithArchiverMethod(zstd.ZipMethodWinZip)), WithExtractorConcurrency(4)) 449 | } 450 | 451 | func BenchmarkExtractZstd_8(b *testing.B) { 452 | benchmarkExtractOptions(b, false, aopts(WithArchiverMethod(zstd.ZipMethodWinZip)), WithExtractorConcurrency(8)) 453 | } 454 | 455 | func BenchmarkExtractZstd_16(b *testing.B) { 456 | benchmarkExtractOptions(b, false, aopts(WithArchiverMethod(zstd.ZipMethodWinZip)), WithExtractorConcurrency(16)) 457 | } 458 | -------------------------------------------------------------------------------- /archiver_test.go: -------------------------------------------------------------------------------- 1 | package fastzip 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | "path/filepath" 11 | "runtime" 12 | "sort" 13 | "strings" 14 | "testing" 15 | "time" 16 | 17 | "github.com/klauspost/compress/zip" 18 | "github.com/klauspost/compress/zstd" 19 | "github.com/stretchr/testify/assert" 20 | "github.com/stretchr/testify/require" 21 | ) 22 | 23 | var fixedModTime = time.Date(2020, time.February, 1, 6, 0, 0, 0, time.UTC) 24 | 25 | type testFile struct { 26 | mode os.FileMode 27 | contents string 28 | } 29 | 30 | func testCreateFiles(t *testing.T, files map[string]testFile) (map[string]os.FileInfo, string) { 31 | dir := t.TempDir() 32 | 33 | filenames := make([]string, 0, len(files)) 34 | for path := range files { 35 | filenames = append(filenames, path) 36 | } 37 | sort.Strings(filenames) 38 | 39 | var err error 40 | for _, path := range filenames { 41 | tf := files[path] 42 | path = filepath.Join(dir, path) 43 | 44 | switch { 45 | case tf.mode&os.ModeSymlink != 0 && tf.mode&os.ModeDir != 0: 46 | err = os.Symlink(tf.contents, path) 47 | 48 | case tf.mode&os.ModeDir != 0: 49 | err = os.Mkdir(path, tf.mode) 50 | 51 | case tf.mode&os.ModeSymlink != 0: 52 | err = os.Symlink(tf.contents, path) 53 | 54 | default: 55 | err = os.WriteFile(path, []byte(tf.contents), tf.mode) 56 | } 57 | require.NoError(t, err) 58 | require.NoError(t, lchmod(path, tf.mode)) 59 | require.NoError(t, lchtimes(path, tf.mode, fixedModTime, fixedModTime)) 60 | } 61 | 62 | archiveFiles := make(map[string]os.FileInfo) 63 | err = filepath.Walk(dir, func(pathname string, fi os.FileInfo, err error) error { 64 | archiveFiles[pathname] = fi 65 | return nil 66 | }) 67 | require.NoError(t, err) 68 | 69 | return archiveFiles, dir 70 | } 71 | 72 | func testCreateArchive(t *testing.T, dir string, files map[string]os.FileInfo, fn func(filename, chroot string), opts ...ArchiverOption) { 73 | f, err := ioutil.TempFile("", "fastzip-test") 74 | require.NoError(t, err) 75 | defer os.Remove(f.Name()) 76 | defer f.Close() 77 | 78 | a, err := NewArchiver(f, dir, opts...) 79 | require.NoError(t, err) 80 | require.NoError(t, a.Archive(context.Background(), files)) 81 | require.NoError(t, a.Close()) 82 | 83 | _, entries := a.Written() 84 | require.EqualValues(t, len(files), entries) 85 | 86 | fn(f.Name(), dir) 87 | } 88 | 89 | func TestArchive(t *testing.T) { 90 | symMode := os.FileMode(0777) 91 | if runtime.GOOS == "windows" { 92 | symMode = 0666 93 | } 94 | 95 | testFiles := map[string]testFile{ 96 | "foo": {mode: os.ModeDir | 0777}, 97 | "foo/foo.go": {mode: 0666}, 98 | "bar": {mode: os.ModeDir | 0777}, 99 | "bar/bar.go": {mode: 0666}, 100 | "bar/foo": {mode: os.ModeDir | 0777}, 101 | "bar/foo/bar": {mode: os.ModeDir | 0777}, 102 | "bar/foo/bar/foo": {mode: os.ModeDir | 0777}, 103 | "bar/foo/bar/foo/bar": {mode: 0666}, 104 | "bar/symlink": {mode: os.ModeSymlink | symMode, contents: "bar/foo/bar/foo"}, 105 | "bar/symlink.go": {mode: os.ModeSymlink | symMode, contents: "foo/foo.go"}, 106 | "bar/compressible": {mode: 0666, contents: "11111111111111111111111111111111111111111111111111"}, 107 | "bar/uncompressible": {mode: 0666, contents: "A3#bez&OqCusPr)d&D]Vot9Eo0z^5O*VZm3:sO3HptL.H-4cOv"}, 108 | "empty_dir": {mode: os.ModeDir | 0777}, 109 | "large_file": {mode: 0666, contents: strings.Repeat("abcdefzmkdldjsdfkjsdfsdfiqwpsdfa", 65536)}, 110 | } 111 | 112 | tests := map[string][]ArchiverOption{ 113 | "default options": nil, 114 | "no buffer": {WithArchiverBufferSize(0)}, 115 | "with store": {WithArchiverMethod(zip.Store)}, 116 | "with concurrency 2": {WithArchiverConcurrency(2)}, 117 | } 118 | 119 | for tn, opts := range tests { 120 | t.Run(tn, func(t *testing.T) { 121 | files, dir := testCreateFiles(t, testFiles) 122 | defer os.RemoveAll(dir) 123 | 124 | testCreateArchive(t, dir, files, func(filename, chroot string) { 125 | for pathname, fi := range testExtract(t, filename, testFiles) { 126 | if fi.IsDir() { 127 | continue 128 | } 129 | if runtime.GOOS == "windows" && fi.Mode()&os.ModeSymlink != 0 { 130 | continue 131 | } 132 | assert.Equal(t, fixedModTime.Unix(), fi.ModTime().Unix(), "file %v mod time not equal", pathname) 133 | } 134 | }, opts...) 135 | }) 136 | } 137 | } 138 | 139 | func TestArchiveCancelContext(t *testing.T) { 140 | twoMB := strings.Repeat("1", 2*1024*1024) 141 | testFiles := map[string]testFile{} 142 | for i := 0; i < 100; i++ { 143 | testFiles[fmt.Sprintf("file_%d", i)] = testFile{mode: 0666, contents: twoMB} 144 | } 145 | 146 | files, dir := testCreateFiles(t, testFiles) 147 | defer os.RemoveAll(dir) 148 | 149 | f, err := ioutil.TempFile("", "fastzip-test") 150 | require.NoError(t, err) 151 | defer os.Remove(f.Name()) 152 | defer f.Close() 153 | 154 | a, err := NewArchiver(f, dir, WithArchiverConcurrency(1)) 155 | a.RegisterCompressor(zip.Deflate, FlateCompressor(1)) 156 | require.NoError(t, err) 157 | 158 | ctx, cancel := context.WithCancel(context.Background()) 159 | defer cancel() 160 | 161 | done := make(chan struct{}) 162 | go func() { 163 | defer func() { done <- struct{}{} }() 164 | 165 | require.EqualError(t, a.Archive(ctx, files), "context canceled") 166 | }() 167 | 168 | defer func() { 169 | require.NoError(t, a.Close()) 170 | }() 171 | 172 | for { 173 | select { 174 | case <-done: 175 | return 176 | 177 | default: 178 | // cancel as soon as any data is written 179 | if bytes, _ := a.Written(); bytes > 0 { 180 | cancel() 181 | } 182 | } 183 | } 184 | } 185 | 186 | func TestArchiveWithCompressor(t *testing.T) { 187 | testFiles := map[string]testFile{ 188 | "foo.go": {mode: 0666}, 189 | "bar.go": {mode: 0666}, 190 | } 191 | 192 | files, dir := testCreateFiles(t, testFiles) 193 | defer os.RemoveAll(dir) 194 | 195 | f, err := ioutil.TempFile("", "fastzip-test") 196 | require.NoError(t, err) 197 | defer os.Remove(f.Name()) 198 | defer f.Close() 199 | 200 | a, err := NewArchiver(f, dir) 201 | a.RegisterCompressor(zip.Deflate, FlateCompressor(1)) 202 | require.NoError(t, err) 203 | require.NoError(t, a.Archive(context.Background(), files)) 204 | require.NoError(t, a.Close()) 205 | 206 | bytes, entries := a.Written() 207 | require.EqualValues(t, 0, bytes) 208 | require.EqualValues(t, 3, entries) 209 | 210 | testExtract(t, f.Name(), testFiles) 211 | } 212 | 213 | func TestArchiveWithMethod(t *testing.T) { 214 | testFiles := map[string]testFile{ 215 | "foo.go": {mode: 0666}, 216 | "bar.go": {mode: 0666}, 217 | } 218 | 219 | files, dir := testCreateFiles(t, testFiles) 220 | defer os.RemoveAll(dir) 221 | 222 | f, err := ioutil.TempFile("", "fastzip-test") 223 | require.NoError(t, err) 224 | defer os.Remove(f.Name()) 225 | defer f.Close() 226 | 227 | a, err := NewArchiver(f, dir, WithArchiverMethod(zip.Store)) 228 | require.NoError(t, err) 229 | require.NoError(t, a.Archive(context.Background(), files)) 230 | require.NoError(t, a.Close()) 231 | 232 | bytes, entries := a.Written() 233 | require.EqualValues(t, 0, bytes) 234 | require.EqualValues(t, 3, entries) 235 | 236 | testExtract(t, f.Name(), testFiles) 237 | } 238 | 239 | func TestArchiveWithStageDirectory(t *testing.T) { 240 | testFiles := map[string]testFile{ 241 | "foo.go": {mode: 0666}, 242 | "bar.go": {mode: 0666}, 243 | } 244 | 245 | files, chroot := testCreateFiles(t, testFiles) 246 | defer os.RemoveAll(chroot) 247 | 248 | dir := t.TempDir() 249 | f, err := ioutil.TempFile("", "fastzip-test") 250 | require.NoError(t, err) 251 | defer os.Remove(f.Name()) 252 | defer f.Close() 253 | 254 | a, err := NewArchiver(f, chroot, WithStageDirectory(dir)) 255 | require.NoError(t, err) 256 | require.NoError(t, a.Archive(context.Background(), files)) 257 | require.NoError(t, a.Close()) 258 | 259 | bytes, entries := a.Written() 260 | require.EqualValues(t, 0, bytes) 261 | require.EqualValues(t, 3, entries) 262 | 263 | stageFiles, err := os.ReadDir(dir) 264 | require.NoError(t, err) 265 | require.Zero(t, len(stageFiles)) 266 | 267 | testExtract(t, f.Name(), testFiles) 268 | } 269 | 270 | func TestArchiveWithConcurrency(t *testing.T) { 271 | testFiles := map[string]testFile{ 272 | "foo.go": {mode: 0666}, 273 | "bar.go": {mode: 0666}, 274 | } 275 | 276 | concurrencyTests := []struct { 277 | concurrency int 278 | pass bool 279 | }{ 280 | {-1, false}, 281 | {0, false}, 282 | {1, true}, 283 | {30, true}, 284 | } 285 | 286 | files, dir := testCreateFiles(t, testFiles) 287 | defer os.RemoveAll(dir) 288 | 289 | for _, test := range concurrencyTests { 290 | func() { 291 | f, err := ioutil.TempFile("", "fastzip-test") 292 | require.NoError(t, err) 293 | defer os.Remove(f.Name()) 294 | defer f.Close() 295 | 296 | a, err := NewArchiver(f, dir, WithArchiverConcurrency(test.concurrency)) 297 | if !test.pass { 298 | require.Error(t, err) 299 | return 300 | } 301 | 302 | require.NoError(t, err) 303 | require.NoError(t, a.Archive(context.Background(), files)) 304 | require.NoError(t, a.Close()) 305 | 306 | bytes, entries := a.Written() 307 | require.EqualValues(t, 0, bytes) 308 | require.EqualValues(t, 3, entries) 309 | 310 | testExtract(t, f.Name(), testFiles) 311 | }() 312 | } 313 | } 314 | 315 | func TestArchiveWithBufferSize(t *testing.T) { 316 | testFiles := map[string]testFile{ 317 | "foobar.go": {mode: 0666}, 318 | "compressible": {mode: 0666, contents: "11111111111111111111111111111111111111111111111111"}, 319 | "uncompressible": {mode: 0666, contents: "A3#bez&OqCusPr)d&D]Vot9Eo0z^5O*VZm3:sO3HptL.H-4cOv"}, 320 | "empty_dir": {mode: os.ModeDir | 0777}, 321 | "large_file": {mode: 0666, contents: strings.Repeat("abcdefzmkdldjsdfkjsdfsdfiqwpsdfa", 65536)}, 322 | } 323 | 324 | tests := []struct { 325 | buffersize int 326 | zero bool 327 | }{ 328 | {-100, false}, 329 | {-2, false}, 330 | {-1, false}, 331 | {0, true}, 332 | {32 * 1024, true}, 333 | {64 * 1024, true}, 334 | } 335 | 336 | files, dir := testCreateFiles(t, testFiles) 337 | defer os.RemoveAll(dir) 338 | 339 | for _, test := range tests { 340 | func() { 341 | f, err := ioutil.TempFile("", "fastzip-test") 342 | require.NoError(t, err) 343 | defer os.Remove(f.Name()) 344 | defer f.Close() 345 | 346 | a, err := NewArchiver(f, dir, WithArchiverBufferSize(test.buffersize)) 347 | require.NoError(t, err) 348 | require.NoError(t, a.Archive(context.Background(), files)) 349 | require.NoError(t, a.Close()) 350 | 351 | if !test.zero { 352 | require.Equal(t, 0, a.options.bufferSize) 353 | } else { 354 | require.Equal(t, test.buffersize, a.options.bufferSize) 355 | } 356 | 357 | _, entries := a.Written() 358 | require.EqualValues(t, 6, entries) 359 | 360 | testExtract(t, f.Name(), testFiles) 361 | }() 362 | } 363 | } 364 | 365 | func TestArchiveChroot(t *testing.T) { 366 | dir := t.TempDir() 367 | f, err := os.Create(filepath.Join(dir, "archive.zip")) 368 | require.NoError(t, err) 369 | defer f.Close() 370 | 371 | require.NoError(t, os.MkdirAll(filepath.Join(dir, "chroot"), 0777)) 372 | 373 | a, err := NewArchiver(f, filepath.Join(dir, "chroot")) 374 | require.NoError(t, err) 375 | 376 | tests := []struct { 377 | paths []string 378 | good bool 379 | }{ 380 | {[]string{"chroot/good"}, true}, 381 | {[]string{"chroot/good", "bad"}, false}, 382 | {[]string{"bad"}, false}, 383 | {[]string{"chroot/../bad"}, false}, 384 | {[]string{"chroot/../chroot/good"}, true}, 385 | } 386 | 387 | for _, test := range tests { 388 | files := make(map[string]os.FileInfo) 389 | 390 | for _, filename := range test.paths { 391 | w, err := os.Create(filepath.Join(dir, filename)) 392 | require.NoError(t, err) 393 | stat, err := w.Stat() 394 | require.NoError(t, err) 395 | require.NoError(t, w.Close()) 396 | 397 | files[w.Name()] = stat 398 | } 399 | 400 | err = a.Archive(context.Background(), files) 401 | if test.good { 402 | assert.NoError(t, err) 403 | } else { 404 | assert.Error(t, err) 405 | } 406 | } 407 | } 408 | 409 | func TestArchiveWithOffset(t *testing.T) { 410 | testFiles := map[string]testFile{ 411 | "foo.go": {mode: 0666}, 412 | "bar.go": {mode: 0666}, 413 | } 414 | 415 | files, dir := testCreateFiles(t, testFiles) 416 | defer os.RemoveAll(dir) 417 | 418 | f, err := ioutil.TempFile("", "fastzip-test") 419 | require.NoError(t, err) 420 | defer os.Remove(f.Name()) 421 | defer f.Close() 422 | 423 | f.Seek(1000, io.SeekStart) 424 | 425 | a, err := NewArchiver(f, dir, WithArchiverOffset(1000)) 426 | require.NoError(t, err) 427 | require.NoError(t, a.Archive(context.Background(), files)) 428 | require.NoError(t, a.Close()) 429 | 430 | bytes, entries := a.Written() 431 | require.EqualValues(t, 0, bytes) 432 | require.EqualValues(t, 3, entries) 433 | 434 | testExtract(t, f.Name(), testFiles) 435 | } 436 | 437 | var archiveDir = flag.String("archivedir", runtime.GOROOT(), "The directory to use for archive benchmarks") 438 | 439 | func benchmarkArchiveOptions(b *testing.B, stdDeflate bool, options ...ArchiverOption) { 440 | files := make(map[string]os.FileInfo) 441 | size := int64(0) 442 | filepath.Walk(*archiveDir, func(filename string, fi os.FileInfo, err error) error { 443 | files[filename] = fi 444 | size += fi.Size() 445 | return nil 446 | }) 447 | 448 | dir := b.TempDir() 449 | 450 | options = append(options, WithStageDirectory(dir)) 451 | 452 | b.ReportAllocs() 453 | b.SetBytes(size) 454 | b.ResetTimer() 455 | for n := 0; n < b.N; n++ { 456 | f, err := os.Create(filepath.Join(dir, "fastzip-benchmark.zip")) 457 | require.NoError(b, err) 458 | 459 | a, err := NewArchiver(f, *archiveDir, options...) 460 | if stdDeflate { 461 | a.RegisterCompressor(zip.Deflate, StdFlateCompressor(-1)) 462 | } else { 463 | a.RegisterCompressor(zip.Deflate, FlateCompressor(-1)) 464 | } 465 | require.NoError(b, err) 466 | 467 | err = a.Archive(context.Background(), files) 468 | require.NoError(b, err) 469 | 470 | require.NoError(b, a.Close()) 471 | require.NoError(b, f.Close()) 472 | require.NoError(b, os.Remove(f.Name())) 473 | } 474 | } 475 | 476 | func BenchmarkArchiveStore_1(b *testing.B) { 477 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(1), WithArchiverMethod(zip.Store)) 478 | } 479 | 480 | func BenchmarkArchiveStandardFlate_1(b *testing.B) { 481 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(1)) 482 | } 483 | 484 | func BenchmarkArchiveStandardFlate_2(b *testing.B) { 485 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(2)) 486 | } 487 | 488 | func BenchmarkArchiveStandardFlate_4(b *testing.B) { 489 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(4)) 490 | } 491 | 492 | func BenchmarkArchiveStandardFlate_8(b *testing.B) { 493 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(8)) 494 | } 495 | 496 | func BenchmarkArchiveStandardFlate_16(b *testing.B) { 497 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(16)) 498 | } 499 | 500 | func BenchmarkArchiveNonStandardFlate_1(b *testing.B) { 501 | benchmarkArchiveOptions(b, false, WithArchiverConcurrency(1)) 502 | } 503 | 504 | func BenchmarkArchiveNonStandardFlate_2(b *testing.B) { 505 | benchmarkArchiveOptions(b, false, WithArchiverConcurrency(2)) 506 | } 507 | 508 | func BenchmarkArchiveNonStandardFlate_4(b *testing.B) { 509 | benchmarkArchiveOptions(b, false, WithArchiverConcurrency(4)) 510 | } 511 | 512 | func BenchmarkArchiveNonStandardFlate_8(b *testing.B) { 513 | benchmarkArchiveOptions(b, false, WithArchiverConcurrency(8)) 514 | } 515 | 516 | func BenchmarkArchiveNonStandardFlate_16(b *testing.B) { 517 | benchmarkArchiveOptions(b, false, WithArchiverConcurrency(16), WithArchiverMethod(zstd.ZipMethodWinZip)) 518 | } 519 | 520 | func BenchmarkArchiveZstd_1(b *testing.B) { 521 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(1), WithArchiverMethod(zstd.ZipMethodWinZip)) 522 | } 523 | 524 | func BenchmarkArchiveZstd_2(b *testing.B) { 525 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(2), WithArchiverMethod(zstd.ZipMethodWinZip)) 526 | } 527 | 528 | func BenchmarkArchiveZstd_4(b *testing.B) { 529 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(4), WithArchiverMethod(zstd.ZipMethodWinZip)) 530 | } 531 | 532 | func BenchmarkArchiveZstd_8(b *testing.B) { 533 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(8), WithArchiverMethod(zstd.ZipMethodWinZip)) 534 | } 535 | 536 | func BenchmarkArchiveZstd_16(b *testing.B) { 537 | benchmarkArchiveOptions(b, true, WithArchiverConcurrency(16), WithArchiverMethod(zstd.ZipMethodWinZip)) 538 | } 539 | --------------------------------------------------------------------------------