├── go.mod ├── README.md ├── crcReader.go ├── reader_test.go ├── centralDirectory.go ├── descriptor.go ├── LICENSE ├── stolen.go └── reader.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/krolaw/zipstream 2 | 3 | go 1.23.1 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZipStream - Reads zip files from io.Reader 2 | Doesn't yet support Zip64 archives - see help wanted below. 3 | 4 | ## Author 5 | http://richard.warburton.it/ 6 | 7 | ## Documentation 8 | http://godoc.org/github.com/krolaw/zipstream 9 | 10 | ## History 11 | https://github.com/golang/go/issues/10568 12 | 13 | ## Help Wanted 14 | I'm looking for some small zip64 archives to play with. -------------------------------------------------------------------------------- /crcReader.go: -------------------------------------------------------------------------------- 1 | package zipstream 2 | 3 | import ( 4 | "archive/zip" 5 | "hash" 6 | "io" 7 | ) 8 | 9 | type crcReader struct { 10 | io.Reader 11 | hash hash.Hash32 12 | crc *uint32 13 | } 14 | 15 | func (r *crcReader) Read(b []byte) (n int, err error) { 16 | n, err = r.Reader.Read(b) 17 | r.hash.Write(b[:n]) 18 | if err == nil { 19 | return 20 | } 21 | if err == io.EOF { 22 | if r.crc != nil && *r.crc != 0 && r.hash.Sum32() != *r.crc { 23 | err = zip.ErrChecksum 24 | } 25 | } 26 | return 27 | } 28 | -------------------------------------------------------------------------------- /reader_test.go: -------------------------------------------------------------------------------- 1 | package zipstream 2 | 3 | import ( 4 | "archive/zip" 5 | "bytes" 6 | "io" 7 | "math/rand" 8 | "testing" 9 | ) 10 | 11 | func TestReader(t *testing.T) { 12 | testReader(t, []byte(`Juan`)) 13 | 14 | s := new(bytes.Buffer) 15 | io.Copy(s, io.LimitReader(rand.New(rand.NewSource(1)), 16384)) 16 | testReader(t, s.Bytes()) 17 | } 18 | 19 | func testReader(t *testing.T, s []byte) { 20 | 21 | var wbuf bytes.Buffer 22 | for j := 0; j < 2; j++ { 23 | z := zip.NewWriter(&wbuf) 24 | for i := 0; i < 2; i++ { 25 | zw, err := z.Create("tmp") 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | if _, err := zw.Write(s); err != nil { 30 | t.Fatal(err) 31 | } 32 | } 33 | 34 | if err := z.Close(); err != nil { 35 | t.Fatal(err) 36 | } 37 | } 38 | 39 | zr := NewReader(&wbuf) 40 | for j := 0; j < 2; j++ { 41 | fcount := 0 42 | for { 43 | _, err := zr.Next() 44 | if err != nil { 45 | if err != io.EOF { 46 | t.Fatal(err) 47 | } 48 | if fcount != 2 { 49 | t.Fatal("Embeded file missing", j, fcount, err) 50 | } 51 | break // No more files 52 | } 53 | fcount++ 54 | s2, err := io.ReadAll(zr) 55 | if err != nil { 56 | t.Fatal(err) 57 | } 58 | if bytes.Compare(s, s2) != 0 { 59 | t.Fatal("Decompressed data does not match original") 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /centralDirectory.go: -------------------------------------------------------------------------------- 1 | package zipstream 2 | 3 | import ( 4 | "archive/zip" 5 | "bufio" 6 | "encoding/binary" 7 | "errors" 8 | "io" 9 | ) 10 | 11 | // We're not interested in the central directory's data, we just want to skip over it, 12 | // clearing the stream of the current zip, in case anything else needs to be sent over 13 | // the same stream. 14 | func discardCentralDirectory(br *bufio.Reader) error { 15 | for { 16 | sigBytes, err := br.Peek(4) 17 | if err != nil { 18 | return err 19 | } 20 | switch sig := binary.LittleEndian.Uint32(sigBytes); sig { 21 | case directoryHeaderSignature: 22 | if err := discardDirectoryHeaderRecord(br); err != nil { 23 | return err 24 | } 25 | case directoryEndSignature: 26 | if err := discardDirectoryEndRecord(br); err != nil { 27 | return err 28 | } 29 | return io.EOF 30 | case directory64EndSignature: 31 | return errors.New("Zip64 not yet supported") 32 | case directory64LocSignature: // Not sure what this is yet 33 | return errors.New("Zip64 not yet supported") 34 | default: 35 | return zip.ErrFormat 36 | } 37 | } 38 | } 39 | 40 | func discardDirectoryHeaderRecord(br *bufio.Reader) error { 41 | if _, err := br.Discard(28); err != nil { 42 | return err 43 | } 44 | lb, err := br.Peek(6) 45 | if err != nil { 46 | return err 47 | } 48 | lengths := int(binary.LittleEndian.Uint16(lb[:2])) + // File name length 49 | int(binary.LittleEndian.Uint16(lb[2:4])) + // Extra field length 50 | int(binary.LittleEndian.Uint16(lb[4:])) // File comment length 51 | _, err = br.Discard(18 + lengths) 52 | return err 53 | } 54 | 55 | func discardDirectoryEndRecord(br *bufio.Reader) error { 56 | if _, err := br.Discard(20); err != nil { 57 | return err 58 | } 59 | commentLength, err := br.Peek(2) 60 | if err != nil { 61 | return err 62 | } 63 | _, err = br.Discard(2 + int(binary.LittleEndian.Uint16(commentLength))) 64 | return err 65 | } 66 | -------------------------------------------------------------------------------- /descriptor.go: -------------------------------------------------------------------------------- 1 | package zipstream 2 | 3 | import ( 4 | "archive/zip" 5 | "bufio" 6 | "bytes" 7 | "encoding/binary" 8 | "io" 9 | ) 10 | 11 | type descriptorReader struct { 12 | br *bufio.Reader 13 | size uint64 14 | eof bool 15 | fileHeader *zip.FileHeader 16 | } 17 | 18 | var ( 19 | sigBytes = []byte{0x50, 0x4b} 20 | ) 21 | 22 | func (r *descriptorReader) Read(p []byte) (n int, err error) { 23 | if r.eof { 24 | return 0, io.EOF 25 | } 26 | 27 | if n = len(p); n > maxRead { 28 | n = maxRead 29 | } 30 | 31 | z, err := r.br.Peek(n + readAhead) 32 | if err != nil { 33 | if err == io.EOF && len(z) < 46+22 { // Min length of Central directory + End of central directory 34 | return 0, err 35 | } 36 | n = len(z) 37 | } 38 | 39 | // Look for header of next file or central directory 40 | discard := n 41 | s := 16 42 | for !r.eof && s < n { 43 | i := bytes.Index(z[s:len(z)-4], sigBytes) + s 44 | if i == -1 { 45 | break 46 | } 47 | 48 | // If directoryHeaderSignature or fileHeaderSignature file could be finished 49 | if sig := binary.LittleEndian.Uint32(z[i : i+4]); sig == fileHeaderSignature || 50 | sig == directoryHeaderSignature { 51 | 52 | // Now check for compressed file sizes to ensure not false positive and if zip64. 53 | 54 | if i < len(z)-8 { // Zip32 55 | // Zip32 optional dataDescriptorSignature 56 | offset := 0 57 | if binary.LittleEndian.Uint32(z[i-16:i-12]) == dataDescriptorSignature { 58 | offset = 4 59 | } 60 | 61 | // Zip32 compressed file size 62 | if binary.LittleEndian.Uint32(z[i-8:i-4]) == uint32(r.size)+uint32(i-12-offset) { 63 | n, discard = i-12-offset, i 64 | r.eof = true 65 | r.fileHeader.CRC32 = binary.LittleEndian.Uint32(z[i-12 : i-8]) 66 | break 67 | } 68 | } 69 | 70 | if i > 24 { 71 | // Zip64 optional dataDescriptorSignature 72 | offset := 0 73 | if binary.LittleEndian.Uint32(z[i-24:i-20]) == dataDescriptorSignature { 74 | offset = 4 75 | } 76 | 77 | // Zip64 compressed file size 78 | if i >= 8 && binary.LittleEndian.Uint64(z[i-16:i-8]) == r.size+uint64(i-20-offset) { 79 | n, discard = i-20-offset, i 80 | r.eof = true 81 | break 82 | } 83 | } 84 | } 85 | 86 | s = i + 2 87 | } 88 | copy(p, z[:n]) 89 | r.br.Discard(discard) 90 | r.size += uint64(n) 91 | return 92 | } 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | // Major portions copied from the GO std lib, copyright below: 2 | 3 | Copyright (c) 2012 The Go Authors. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following disclaimer 13 | in the documentation and/or other materials provided with the 14 | distribution. 15 | * Neither the name of Google Inc. nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | // Code enabling ZipStreaming, copyright below: 32 | 33 | Copyright (c) 2015 Richard Warburton. All rights reserved. 34 | 35 | Redistribution and use in source and binary forms, with or without 36 | modification, are permitted provided that the following conditions are 37 | met: 38 | 39 | * Redistributions of source code must retain the above copyright 40 | notice, this list of conditions and the following disclaimer. 41 | * Redistributions in binary form must reproduce the above 42 | copyright notice, this list of conditions and the following disclaimer 43 | in the documentation and/or other materials provided with the 44 | distribution. 45 | * The name Richard Warburton may not be used to endorse or promote 46 | products derived from this software without specific prior written 47 | permission. 48 | 49 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 50 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 51 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 52 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 53 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 54 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 55 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 56 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 57 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 58 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 59 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 60 | -------------------------------------------------------------------------------- /stolen.go: -------------------------------------------------------------------------------- 1 | package zipstream 2 | 3 | // Everything in this file was blatently stolen from golang.org/pkg/archive/zip 4 | 5 | // Copyright 2010 The Go Authors. All rights reserved. 6 | // Use of this source code is governed by a BSD-style 7 | // license that can be found in the LICENSE file. 8 | 9 | import ( 10 | "compress/flate" 11 | "encoding/binary" 12 | "io" 13 | "io/ioutil" 14 | "sync" 15 | ) 16 | 17 | // #### struct.go 18 | 19 | // Compression methods. 20 | const ( 21 | Store uint16 = 0 22 | Deflate uint16 = 8 23 | ) 24 | 25 | const ( 26 | fileHeaderSignature = 0x04034b50 27 | directoryHeaderSignature = 0x02014b50 28 | directoryEndSignature = 0x06054b50 29 | directory64LocSignature = 0x07064b50 30 | directory64EndSignature = 0x06064b50 31 | dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder 32 | fileHeaderLen = 30 // + filename + extra 33 | directoryHeaderLen = 46 // + filename + extra + comment 34 | directoryEndLen = 22 // + comment 35 | dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size 36 | dataDescriptor64Len = 24 // descriptor with 8 byte sizes 37 | directory64LocLen = 20 // 38 | directory64EndLen = 56 // + extra 39 | 40 | // Constants for the first byte in CreatorVersion 41 | creatorFAT = 0 42 | creatorUnix = 3 43 | creatorNTFS = 11 44 | creatorVFAT = 14 45 | creatorMacOSX = 19 46 | 47 | // version numbers 48 | zipVersion20 = 20 // 2.0 49 | zipVersion45 = 45 // 4.5 (reads and writes zip64 archives) 50 | 51 | // limits for non zip64 files 52 | uint16max = (1 << 16) - 1 53 | uint32max = (1 << 32) - 1 54 | 55 | // extra header id's 56 | zip64ExtraId = 0x0001 // zip64 Extended Information Extra Field 57 | ) 58 | 59 | // #### register.go 60 | 61 | // Decompressor is a function that wraps a Reader with a decompressing Reader. 62 | // The decompressed ReadCloser is returned to callers who open files from 63 | // within the archive. These callers are responsible for closing this reader 64 | // when they're finished reading. 65 | type Decompressor func(io.Reader) io.ReadCloser 66 | 67 | var ( 68 | mu sync.RWMutex // guards compressor and decompressor maps 69 | 70 | decompressors = map[uint16]Decompressor{ 71 | Store: ioutil.NopCloser, 72 | Deflate: flate.NewReader, 73 | } 74 | ) 75 | 76 | // RegisterDecompressor allows custom decompressors for a specified method ID. 77 | func RegisterDecompressor(method uint16, d Decompressor) { 78 | mu.Lock() 79 | defer mu.Unlock() 80 | 81 | if _, ok := decompressors[method]; ok { 82 | panic("decompressor already registered") 83 | } 84 | decompressors[method] = d 85 | } 86 | 87 | func decompressor(method uint16) Decompressor { 88 | mu.RLock() 89 | defer mu.RUnlock() 90 | return decompressors[method] 91 | } 92 | 93 | // #### reader.go 94 | 95 | type readBuf []byte 96 | 97 | func (b *readBuf) uint16() uint16 { 98 | v := binary.LittleEndian.Uint16(*b) 99 | *b = (*b)[2:] 100 | return v 101 | } 102 | 103 | func (b *readBuf) uint32() uint32 { 104 | v := binary.LittleEndian.Uint32(*b) 105 | *b = (*b)[4:] 106 | return v 107 | } 108 | 109 | func (b *readBuf) uint64() uint64 { 110 | v := binary.LittleEndian.Uint64(*b) 111 | *b = (*b)[8:] 112 | return v 113 | } 114 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | // Package zipstream provides support for reading ZIP archives through an io.Reader. 2 | // 3 | // Zip64 archives are not yet supported. 4 | package zipstream 5 | 6 | import ( 7 | "archive/zip" 8 | "bufio" 9 | "bytes" 10 | "encoding/binary" 11 | "hash/crc32" 12 | "io" 13 | ) 14 | 15 | const ( 16 | readAhead = 28 17 | maxRead = 4096 18 | bufferSize = maxRead + readAhead 19 | ) 20 | 21 | // A Reader provides sequential access to the contents of a zip archive. 22 | // A zip archive consists of a sequence of files, 23 | // The Next method advances to the next file in the archive (including the first), 24 | // and then it can be treated as an io.Reader to access the file's data. 25 | // The Buffered method recovers any bytes read beyond the end of the zip file, 26 | // necessary if you plan to process anything after it that is not another zip file. 27 | type Reader struct { 28 | io.Reader 29 | br *bufio.Reader 30 | } 31 | 32 | // NewReader creates a new Reader reading from r. 33 | func NewReader(r io.Reader) *Reader { 34 | return &Reader{br: bufio.NewReaderSize(r, bufferSize)} 35 | } 36 | 37 | // Next advances to the next entry in the zip archive. 38 | // 39 | // io.EOF is returned when the end of the zip file has been reached. 40 | // If Next is called again, it will presume another zip file immediately follows 41 | // and it will advance into it. 42 | func (r *Reader) Next() (*zip.FileHeader, error) { 43 | if r.Reader != nil { 44 | if _, err := io.Copy(io.Discard, r.Reader); err != nil { 45 | return nil, err 46 | } 47 | } 48 | 49 | for { 50 | sigData, err := r.br.Peek(4096) 51 | if err != nil { 52 | if err == io.EOF && len(sigData) < 46+22 { // Min length of Central directory + End of central directory 53 | return nil, err 54 | } 55 | } 56 | 57 | switch sig := binary.LittleEndian.Uint32(sigData); sig { 58 | case fileHeaderSignature: 59 | break 60 | case directoryHeaderSignature: // Directory appears at end of file so we are finished 61 | return nil, discardCentralDirectory(r.br) 62 | default: 63 | index := bytes.Index(sigData[1:], sigBytes) 64 | if index == -1 { 65 | r.br.Discard(len(sigData) - len(sigBytes) + 1) 66 | continue 67 | } else { 68 | r.br.Discard(index + 1) 69 | } 70 | } 71 | break 72 | } 73 | 74 | headBuf := make([]byte, fileHeaderLen) 75 | if _, err := io.ReadFull(r.br, headBuf); err != nil { 76 | return nil, err 77 | } 78 | b := readBuf(headBuf[4:]) 79 | 80 | f := &zip.FileHeader{ 81 | ReaderVersion: b.uint16(), 82 | Flags: b.uint16(), 83 | Method: b.uint16(), 84 | ModifiedTime: b.uint16(), 85 | ModifiedDate: b.uint16(), 86 | CRC32: b.uint32(), 87 | CompressedSize: b.uint32(), // TODO handle zip64 88 | UncompressedSize: b.uint32(), // TODO handle zip64 89 | } 90 | 91 | filenameLen := b.uint16() 92 | extraLen := b.uint16() 93 | 94 | d := make([]byte, filenameLen+extraLen) 95 | if _, err := io.ReadFull(r.br, d); err != nil { 96 | return nil, err 97 | } 98 | f.Name = string(d[:filenameLen]) 99 | f.Extra = d[filenameLen : filenameLen+extraLen] 100 | 101 | dcomp := decompressor(f.Method) 102 | if dcomp == nil { 103 | return nil, zip.ErrAlgorithm 104 | } 105 | 106 | // TODO handle encryption here 107 | crc := &crcReader{ 108 | hash: crc32.NewIEEE(), 109 | crc: &f.CRC32, 110 | } 111 | if f.Flags&0x8 != 0 { // If has dataDescriptor 112 | crc.Reader = dcomp(&descriptorReader{br: r.br, fileHeader: f}) 113 | } else { 114 | crc.Reader = dcomp(io.LimitReader(r.br, int64(f.CompressedSize))) 115 | crc.crc = &f.CRC32 116 | } 117 | r.Reader = crc 118 | return f, nil 119 | } 120 | 121 | // Buffered returns any bytes beyond the end of the zip file that it may have 122 | // read. These are necessary if you plan to process anything after it, 123 | // that isn't another zip file. 124 | func (r *Reader) Buffered() io.Reader { return r.br } 125 | --------------------------------------------------------------------------------