├── .gitignore ├── .travis.yml ├── GO_LICENSE ├── LICENSE ├── README.md ├── gunzip.go ├── gunzip_test.go ├── gzip.go ├── gzip_norace_test.go ├── gzip_test.go ├── gzip_unreliable_test.go └── testdata ├── bigempty.gz ├── issue6550.gz └── test.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | arch: 3 | - amd64 4 | - ppc64le 5 | language: go 6 | 7 | os: 8 | - linux 9 | - osx 10 | 11 | go: 12 | - 1.13.x 13 | - 1.14.x 14 | - 1.15.x 15 | - master 16 | 17 | env: 18 | - GO111MODULE=off 19 | 20 | script: 21 | - diff <(gofmt -d .) <(printf "") 22 | - go test -v -cpu=1,2,4 . 23 | - go test -v -cpu=2 -race -short . 24 | 25 | matrix: 26 | allow_failures: 27 | - go: 'master' 28 | fast_finish: true 29 | -------------------------------------------------------------------------------- /GO_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Klaus Post 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pgzip 2 | ===== 3 | 4 | Go parallel gzip compression/decompression. This is a fully gzip compatible drop in replacement for "compress/gzip". 5 | 6 | This will split compression into blocks that are compressed in parallel. 7 | This can be useful for compressing big amounts of data. The output is a standard gzip file. 8 | 9 | The gzip decompression is modified so it decompresses ahead of the current reader. 10 | This means that reads will be non-blocking if the decompressor can keep ahead of your code reading from it. 11 | CRC calculation also takes place in a separate goroutine. 12 | 13 | You should only use this if you are (de)compressing big amounts of data, 14 | say **more than 1MB** at the time, otherwise you will not see any benefit, 15 | and it will likely be faster to use the internal gzip library 16 | or [this package](https://github.com/klauspost/compress). 17 | 18 | It is important to note that this library creates and reads *standard gzip files*. 19 | You do not have to match the compressor/decompressor to get the described speedups, 20 | and the gzip files are fully compatible with other gzip readers/writers. 21 | 22 | A golang variant of this is [bgzf](https://godoc.org/github.com/biogo/hts/bgzf), 23 | which has the same feature, as well as seeking in the resulting file. 24 | The only drawback is a slightly bigger overhead compared to this and pure gzip. 25 | See a comparison below. 26 | 27 | [![GoDoc][1]][2] [![Build Status][3]][4] 28 | 29 | [1]: https://godoc.org/github.com/klauspost/pgzip?status.svg 30 | [2]: https://godoc.org/github.com/klauspost/pgzip 31 | [3]: https://travis-ci.org/klauspost/pgzip.svg 32 | [4]: https://travis-ci.org/klauspost/pgzip 33 | 34 | Installation 35 | ==== 36 | ```go get github.com/klauspost/pgzip/...``` 37 | 38 | You might need to get/update the dependencies: 39 | 40 | ``` 41 | go get -u github.com/klauspost/compress 42 | ``` 43 | 44 | Usage 45 | ==== 46 | [Godoc Doumentation](https://godoc.org/github.com/klauspost/pgzip) 47 | 48 | To use as a replacement for gzip, exchange 49 | 50 | ```import "compress/gzip"``` 51 | with 52 | ```import gzip "github.com/klauspost/pgzip"```. 53 | 54 | # Changes 55 | 56 | * Oct 6, 2016: Fixed an issue if the destination writer returned an error. 57 | * Oct 6, 2016: Better buffer reuse, should now generate less garbage. 58 | * Oct 6, 2016: Output does not change based on write sizes. 59 | * Dec 8, 2015: Decoder now supports the io.WriterTo interface, giving a speedup and less GC pressure. 60 | * Oct 9, 2015: Reduced allocations by ~35 by using sync.Pool. ~15% overall speedup. 61 | 62 | Changes in [github.com/klauspost/compress](https://github.com/klauspost/compress#changelog) are also carried over, so see that for more changes. 63 | 64 | ## Compression 65 | The simplest way to use this is to simply do the same as you would when using [compress/gzip](http://golang.org/pkg/compress/gzip). 66 | 67 | To change the block size, use the added (*pgzip.Writer).SetConcurrency(blockSize, blocks int) function. With this you can control the approximate size of your blocks, as well as how many you want to be processing in parallel. Default values for this is SetConcurrency(1MB, runtime.GOMAXPROCS(0)), meaning blocks are split at 1 MB and up to the number of CPU threads blocks can be processing at once before the writer blocks. 68 | 69 | 70 | Example: 71 | ``` 72 | var b bytes.Buffer 73 | w := gzip.NewWriter(&b) 74 | w.SetConcurrency(100000, 10) 75 | w.Write([]byte("hello, world\n")) 76 | w.Close() 77 | ``` 78 | 79 | To get any performance gains, you should at least be compressing more than 1 megabyte of data at the time. 80 | 81 | You should at least have a block size of 100k and at least a number of blocks that match the number of cores your would like to utilize, but about twice the number of blocks would be the best. 82 | 83 | Another side effect of this is, that it is likely to speed up your other code, since writes to the compressor only blocks if the compressor is already compressing the number of blocks you have specified. This also means you don't have worry about buffering input to the compressor. 84 | 85 | ## Decompression 86 | 87 | Decompression works similar to compression. That means that you simply call pgzip the same way as you would call [compress/gzip](http://golang.org/pkg/compress/gzip). 88 | 89 | The only difference is that if you want to specify your own readahead, you have to use `pgzip.NewReaderN(r io.Reader, blockSize, blocks int)` to get a reader with your custom blocksizes. The `blockSize` is the size of each block decoded, and `blocks` is the maximum number of blocks that is decoded ahead. 90 | 91 | See [Example on playground](http://play.golang.org/p/uHv1B5NbDh) 92 | 93 | Performance 94 | ==== 95 | ## Compression 96 | 97 | See my blog post in [Benchmarks of Golang Gzip](https://blog.klauspost.com/go-gzipdeflate-benchmarks/). 98 | 99 | Compression cost is usually about 0.2% with default settings with a block size of 250k. 100 | 101 | Example with GOMAXPROC set to 32 (16 core CPU) 102 | 103 | Content is [Matt Mahoneys 10GB corpus](http://mattmahoney.net/dc/10gb.html). Compression level 6. 104 | 105 | Compressor | MB/sec | speedup | size | size overhead (lower=better) 106 | ------------|----------|---------|------|--------- 107 | [gzip](http://golang.org/pkg/compress/gzip) (golang) | 16.91MB/s (1 thread) | 1.0x | 4781329307 | 0% 108 | [gzip](http://github.com/klauspost/compress/gzip) (klauspost) | 127.10MB/s (1 thread) | 7.52x | 4885366806 | +2.17% 109 | [pgzip](https://github.com/klauspost/pgzip) (klauspost) | 2085.35MB/s| 123.34x | 4886132566 | +2.19% 110 | [pargzip](https://godoc.org/github.com/golang/build/pargzip) (builder) | 334.04MB/s | 19.76x | 4786890417 | +0.12% 111 | 112 | pgzip also contains a [huffman only compression](https://github.com/klauspost/compress#linear-time-compression-huffman-only) mode, that will allow compression at ~450MB per core per second, largely independent of the content. 113 | 114 | See the [complete sheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) for different content types and compression settings. 115 | 116 | ## Decompression 117 | 118 | The decompression speedup is there because it allows you to do other work while the decompression is taking place. 119 | 120 | In the example above, the numbers are as follows on a 4 CPU machine: 121 | 122 | Decompressor | Time | Speedup 123 | -------------|------|-------- 124 | [gzip](http://golang.org/pkg/compress/gzip) (golang) | 1m28.85s | 0% 125 | [pgzip](https://github.com/klauspost/pgzip) (klauspost) | 43.48s | 104% 126 | 127 | But wait, since gzip decompression is inherently singlethreaded (aside from CRC calculation) how can it be more than 100% faster? Because pgzip due to its design also acts as a buffer. When using unbuffered gzip, you are also waiting for io when you are decompressing. If the gzip decoder can keep up, it will always have data ready for your reader, and you will not be waiting for input to the gzip decompressor to complete. 128 | 129 | This is pretty much an optimal situation for pgzip, but it reflects most common usecases for CPU intensive gzip usage. 130 | 131 | I haven't included [bgzf](https://godoc.org/github.com/biogo/hts/bgzf) in this comparison, since it only can decompress files created by a compatible encoder, and therefore cannot be considered a generic gzip decompressor. But if you are able to compress your files with a bgzf compatible program, you can expect it to scale beyond 100%. 132 | 133 | # License 134 | This contains large portions of code from the go repository - see GO_LICENSE for more information. The changes are released under MIT License. See LICENSE for more information. 135 | -------------------------------------------------------------------------------- /gunzip.go: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package pgzip implements reading and writing of gzip format compressed files, 6 | // as specified in RFC 1952. 7 | // 8 | // This is a drop in replacement for "compress/gzip". 9 | // This will split compression into blocks that are compressed in parallel. 10 | // This can be useful for compressing big amounts of data. 11 | // The gzip decompression has not been modified, but remains in the package, 12 | // so you can use it as a complete replacement for "compress/gzip". 13 | // 14 | // See more at https://github.com/klauspost/pgzip 15 | package pgzip 16 | 17 | import ( 18 | "bufio" 19 | "errors" 20 | "hash" 21 | "hash/crc32" 22 | "io" 23 | "sync" 24 | "time" 25 | 26 | "github.com/klauspost/compress/flate" 27 | ) 28 | 29 | const ( 30 | gzipID1 = 0x1f 31 | gzipID2 = 0x8b 32 | gzipDeflate = 8 33 | flagText = 1 << 0 34 | flagHdrCrc = 1 << 1 35 | flagExtra = 1 << 2 36 | flagName = 1 << 3 37 | flagComment = 1 << 4 38 | ) 39 | 40 | func makeReader(r io.Reader) flate.Reader { 41 | if rr, ok := r.(flate.Reader); ok { 42 | return rr 43 | } 44 | return bufio.NewReader(r) 45 | } 46 | 47 | var ( 48 | // ErrChecksum is returned when reading GZIP data that has an invalid checksum. 49 | ErrChecksum = errors.New("gzip: invalid checksum") 50 | // ErrHeader is returned when reading GZIP data that has an invalid header. 51 | ErrHeader = errors.New("gzip: invalid header") 52 | ) 53 | 54 | // The gzip file stores a header giving metadata about the compressed file. 55 | // That header is exposed as the fields of the Writer and Reader structs. 56 | type Header struct { 57 | Comment string // comment 58 | Extra []byte // "extra data" 59 | ModTime time.Time // modification time 60 | Name string // file name 61 | OS byte // operating system type 62 | } 63 | 64 | // A Reader is an io.Reader that can be read to retrieve 65 | // uncompressed data from a gzip-format compressed file. 66 | // 67 | // In general, a gzip file can be a concatenation of gzip files, 68 | // each with its own header. Reads from the Reader 69 | // return the concatenation of the uncompressed data of each. 70 | // Only the first header is recorded in the Reader fields. 71 | // 72 | // Gzip files store a length and checksum of the uncompressed data. 73 | // The Reader will return a ErrChecksum when Read 74 | // reaches the end of the uncompressed data if it does not 75 | // have the expected length or checksum. Clients should treat data 76 | // returned by Read as tentative until they receive the io.EOF 77 | // marking the end of the data. 78 | type Reader struct { 79 | Header 80 | r flate.Reader 81 | decompressor io.ReadCloser 82 | digest hash.Hash32 83 | size uint32 84 | flg byte 85 | buf [512]byte 86 | err error 87 | closeErr chan error 88 | multistream bool 89 | 90 | readAhead chan read 91 | roff int // read offset 92 | current []byte 93 | closeReader chan struct{} 94 | lastBlock bool 95 | blockSize int 96 | blocks int 97 | 98 | activeRA bool // Indication if readahead is active 99 | mu sync.Mutex // Lock for above 100 | 101 | blockPool chan []byte 102 | } 103 | 104 | type read struct { 105 | b []byte 106 | err error 107 | } 108 | 109 | // NewReader creates a new Reader reading the given reader. 110 | // The implementation buffers input and may read more data than necessary from r. 111 | // It is the caller's responsibility to call Close on the Reader when done. 112 | func NewReader(r io.Reader) (*Reader, error) { 113 | z := new(Reader) 114 | z.blocks = defaultBlocks 115 | z.blockSize = defaultBlockSize 116 | z.r = makeReader(r) 117 | z.digest = crc32.NewIEEE() 118 | z.multistream = true 119 | z.blockPool = make(chan []byte, z.blocks) 120 | for i := 0; i < z.blocks; i++ { 121 | z.blockPool <- make([]byte, z.blockSize) 122 | } 123 | if err := z.readHeader(true); err != nil { 124 | return nil, err 125 | } 126 | return z, nil 127 | } 128 | 129 | // NewReaderN creates a new Reader reading the given reader. 130 | // The implementation buffers input and may read more data than necessary from r. 131 | // It is the caller's responsibility to call Close on the Reader when done. 132 | // 133 | // With this you can control the approximate size of your blocks, 134 | // as well as how many blocks you want to have prefetched. 135 | // 136 | // Default values for this is blockSize = 250000, blocks = 16, 137 | // meaning up to 16 blocks of maximum 250000 bytes will be 138 | // prefetched. 139 | func NewReaderN(r io.Reader, blockSize, blocks int) (*Reader, error) { 140 | z := new(Reader) 141 | z.blocks = blocks 142 | z.blockSize = blockSize 143 | z.r = makeReader(r) 144 | z.digest = crc32.NewIEEE() 145 | z.multistream = true 146 | 147 | // Account for too small values 148 | if z.blocks <= 0 { 149 | z.blocks = defaultBlocks 150 | } 151 | if z.blockSize <= 512 { 152 | z.blockSize = defaultBlockSize 153 | } 154 | z.blockPool = make(chan []byte, z.blocks) 155 | for i := 0; i < z.blocks; i++ { 156 | z.blockPool <- make([]byte, z.blockSize) 157 | } 158 | if err := z.readHeader(true); err != nil { 159 | return nil, err 160 | } 161 | return z, nil 162 | } 163 | 164 | // Reset discards the Reader z's state and makes it equivalent to the 165 | // result of its original state from NewReader, but reading from r instead. 166 | // This permits reusing a Reader rather than allocating a new one. 167 | func (z *Reader) Reset(r io.Reader) error { 168 | z.killReadAhead() 169 | z.r = makeReader(r) 170 | z.digest = crc32.NewIEEE() 171 | z.size = 0 172 | z.err = nil 173 | z.multistream = true 174 | 175 | // Account for uninitialized values 176 | if z.blocks <= 0 { 177 | z.blocks = defaultBlocks 178 | } 179 | if z.blockSize <= 512 { 180 | z.blockSize = defaultBlockSize 181 | } 182 | 183 | if z.blockPool == nil { 184 | z.blockPool = make(chan []byte, z.blocks) 185 | for i := 0; i < z.blocks; i++ { 186 | z.blockPool <- make([]byte, z.blockSize) 187 | } 188 | } 189 | 190 | return z.readHeader(true) 191 | } 192 | 193 | // Multistream controls whether the reader supports multistream files. 194 | // 195 | // If enabled (the default), the Reader expects the input to be a sequence 196 | // of individually gzipped data streams, each with its own header and 197 | // trailer, ending at EOF. The effect is that the concatenation of a sequence 198 | // of gzipped files is treated as equivalent to the gzip of the concatenation 199 | // of the sequence. This is standard behavior for gzip readers. 200 | // 201 | // Calling Multistream(false) disables this behavior; disabling the behavior 202 | // can be useful when reading file formats that distinguish individual gzip 203 | // data streams or mix gzip data streams with other data streams. 204 | // In this mode, when the Reader reaches the end of the data stream, 205 | // Read returns io.EOF. If the underlying reader implements io.ByteReader, 206 | // it will be left positioned just after the gzip stream. 207 | // To start the next stream, call z.Reset(r) followed by z.Multistream(false). 208 | // If there is no next stream, z.Reset(r) will return io.EOF. 209 | func (z *Reader) Multistream(ok bool) { 210 | z.multistream = ok 211 | } 212 | 213 | // GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). 214 | func get4(p []byte) uint32 { 215 | return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 216 | } 217 | 218 | func (z *Reader) readString() (string, error) { 219 | var err error 220 | needconv := false 221 | for i := 0; ; i++ { 222 | if i >= len(z.buf) { 223 | return "", ErrHeader 224 | } 225 | z.buf[i], err = z.r.ReadByte() 226 | if err != nil { 227 | return "", err 228 | } 229 | if z.buf[i] > 0x7f { 230 | needconv = true 231 | } 232 | if z.buf[i] == 0 { 233 | // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). 234 | if needconv { 235 | s := make([]rune, 0, i) 236 | for _, v := range z.buf[0:i] { 237 | s = append(s, rune(v)) 238 | } 239 | return string(s), nil 240 | } 241 | return string(z.buf[0:i]), nil 242 | } 243 | } 244 | } 245 | 246 | func (z *Reader) read2() (uint32, error) { 247 | _, err := io.ReadFull(z.r, z.buf[0:2]) 248 | if err != nil { 249 | return 0, err 250 | } 251 | return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil 252 | } 253 | 254 | func (z *Reader) readHeader(save bool) error { 255 | z.killReadAhead() 256 | 257 | _, err := io.ReadFull(z.r, z.buf[0:10]) 258 | if err != nil { 259 | return err 260 | } 261 | if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { 262 | return ErrHeader 263 | } 264 | z.flg = z.buf[3] 265 | if save { 266 | z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0) 267 | // z.buf[8] is xfl, ignored 268 | z.OS = z.buf[9] 269 | } 270 | z.digest.Reset() 271 | z.digest.Write(z.buf[0:10]) 272 | 273 | if z.flg&flagExtra != 0 { 274 | n, err := z.read2() 275 | if err != nil { 276 | return err 277 | } 278 | data := make([]byte, n) 279 | if _, err = io.ReadFull(z.r, data); err != nil { 280 | return err 281 | } 282 | if save { 283 | z.Extra = data 284 | } 285 | } 286 | 287 | var s string 288 | if z.flg&flagName != 0 { 289 | if s, err = z.readString(); err != nil { 290 | return err 291 | } 292 | if save { 293 | z.Name = s 294 | } 295 | } 296 | 297 | if z.flg&flagComment != 0 { 298 | if s, err = z.readString(); err != nil { 299 | return err 300 | } 301 | if save { 302 | z.Comment = s 303 | } 304 | } 305 | 306 | if z.flg&flagHdrCrc != 0 { 307 | n, err := z.read2() 308 | if err != nil { 309 | return err 310 | } 311 | sum := z.digest.Sum32() & 0xFFFF 312 | if n != sum { 313 | return ErrHeader 314 | } 315 | } 316 | 317 | z.digest.Reset() 318 | z.decompressor = flate.NewReader(z.r) 319 | z.doReadAhead() 320 | return nil 321 | } 322 | 323 | func (z *Reader) killReadAhead() error { 324 | z.mu.Lock() 325 | defer z.mu.Unlock() 326 | if z.activeRA { 327 | if z.closeReader != nil { 328 | close(z.closeReader) 329 | } 330 | 331 | // Wait for decompressor to be closed and return error, if any. 332 | e, ok := <-z.closeErr 333 | z.activeRA = false 334 | 335 | for blk := range z.readAhead { 336 | if blk.b != nil { 337 | z.blockPool <- blk.b 338 | } 339 | } 340 | if cap(z.current) > 0 { 341 | z.blockPool <- z.current 342 | z.current = nil 343 | } 344 | if !ok { 345 | // Channel is closed, so if there was any error it has already been returned. 346 | return nil 347 | } 348 | return e 349 | } 350 | return nil 351 | } 352 | 353 | // Starts readahead. 354 | // Will return on error (including io.EOF) 355 | // or when z.closeReader is closed. 356 | func (z *Reader) doReadAhead() { 357 | z.mu.Lock() 358 | defer z.mu.Unlock() 359 | z.activeRA = true 360 | 361 | if z.blocks <= 0 { 362 | z.blocks = defaultBlocks 363 | } 364 | if z.blockSize <= 512 { 365 | z.blockSize = defaultBlockSize 366 | } 367 | ra := make(chan read, z.blocks) 368 | z.readAhead = ra 369 | closeReader := make(chan struct{}, 0) 370 | z.closeReader = closeReader 371 | z.lastBlock = false 372 | closeErr := make(chan error, 1) 373 | z.closeErr = closeErr 374 | z.size = 0 375 | z.roff = 0 376 | z.current = nil 377 | decomp := z.decompressor 378 | 379 | go func() { 380 | defer func() { 381 | closeErr <- decomp.Close() 382 | close(closeErr) 383 | close(ra) 384 | }() 385 | 386 | // We hold a local reference to digest, since 387 | // it way be changed by reset. 388 | digest := z.digest 389 | var wg sync.WaitGroup 390 | for { 391 | var buf []byte 392 | select { 393 | case buf = <-z.blockPool: 394 | case <-closeReader: 395 | return 396 | } 397 | buf = buf[0:z.blockSize] 398 | // Try to fill the buffer 399 | n, err := io.ReadFull(decomp, buf) 400 | if err == io.ErrUnexpectedEOF { 401 | if n > 0 { 402 | err = nil 403 | } else { 404 | // If we got zero bytes, we need to establish if 405 | // we reached end of stream or truncated stream. 406 | _, err = decomp.Read([]byte{}) 407 | if err == io.EOF { 408 | err = nil 409 | } 410 | } 411 | } 412 | if n < len(buf) { 413 | buf = buf[0:n] 414 | } 415 | wg.Wait() 416 | wg.Add(1) 417 | go func() { 418 | digest.Write(buf) 419 | wg.Done() 420 | }() 421 | z.size += uint32(n) 422 | 423 | // If we return any error, out digest must be ready 424 | if err != nil { 425 | wg.Wait() 426 | } 427 | select { 428 | case z.readAhead <- read{b: buf, err: err}: 429 | case <-closeReader: 430 | // Sent on close, we don't care about the next results 431 | z.blockPool <- buf 432 | return 433 | } 434 | if err != nil { 435 | return 436 | } 437 | } 438 | }() 439 | } 440 | 441 | func (z *Reader) Read(p []byte) (n int, err error) { 442 | if z.err != nil { 443 | return 0, z.err 444 | } 445 | if len(p) == 0 { 446 | return 0, nil 447 | } 448 | 449 | for { 450 | if len(z.current) == 0 && !z.lastBlock { 451 | read := <-z.readAhead 452 | 453 | if read.err != nil { 454 | // If not nil, the reader will have exited 455 | z.closeReader = nil 456 | 457 | if read.err != io.EOF { 458 | z.err = read.err 459 | return 460 | } 461 | if read.err == io.EOF { 462 | z.lastBlock = true 463 | err = nil 464 | } 465 | } 466 | z.current = read.b 467 | z.roff = 0 468 | } 469 | avail := z.current[z.roff:] 470 | if len(p) >= len(avail) { 471 | // If len(p) >= len(current), return all content of current 472 | n = copy(p, avail) 473 | z.blockPool <- z.current 474 | z.current = nil 475 | if z.lastBlock { 476 | err = io.EOF 477 | break 478 | } 479 | } else { 480 | // We copy as much as there is space for 481 | n = copy(p, avail) 482 | z.roff += n 483 | } 484 | return 485 | } 486 | 487 | // Finished file; check checksum + size. 488 | if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { 489 | z.err = err 490 | return 0, err 491 | } 492 | crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8]) 493 | sum := z.digest.Sum32() 494 | if sum != crc32 || isize != z.size { 495 | z.err = ErrChecksum 496 | return 0, z.err 497 | } 498 | 499 | // File is ok; should we attempt reading one more? 500 | if !z.multistream { 501 | return 0, io.EOF 502 | } 503 | 504 | // Is there another? 505 | if err = z.readHeader(false); err != nil { 506 | z.err = err 507 | return 508 | } 509 | 510 | // Yes. Reset and read from it. 511 | return z.Read(p) 512 | } 513 | 514 | func (z *Reader) WriteTo(w io.Writer) (n int64, err error) { 515 | total := int64(0) 516 | avail := z.current[z.roff:] 517 | if len(avail) != 0 { 518 | n, err := w.Write(avail) 519 | if n != len(avail) { 520 | return total, io.ErrShortWrite 521 | } 522 | total += int64(n) 523 | if err != nil { 524 | return total, err 525 | } 526 | z.blockPool <- z.current 527 | z.current = nil 528 | } 529 | for { 530 | if z.err != nil { 531 | return total, z.err 532 | } 533 | // We write both to output and digest. 534 | for { 535 | // Read from input 536 | read := <-z.readAhead 537 | if read.err != nil { 538 | // If not nil, the reader will have exited 539 | z.closeReader = nil 540 | 541 | if read.err != io.EOF { 542 | z.err = read.err 543 | return total, z.err 544 | } 545 | if read.err == io.EOF { 546 | z.lastBlock = true 547 | err = nil 548 | } 549 | } 550 | // Write what we got 551 | n, err := w.Write(read.b) 552 | if n != len(read.b) { 553 | return total, io.ErrShortWrite 554 | } 555 | total += int64(n) 556 | if err != nil { 557 | return total, err 558 | } 559 | // Put block back 560 | z.blockPool <- read.b 561 | if z.lastBlock { 562 | break 563 | } 564 | } 565 | 566 | // Finished file; check checksum + size. 567 | if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { 568 | z.err = err 569 | return total, err 570 | } 571 | crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8]) 572 | sum := z.digest.Sum32() 573 | if sum != crc32 || isize != z.size { 574 | z.err = ErrChecksum 575 | return total, z.err 576 | } 577 | // File is ok; should we attempt reading one more? 578 | if !z.multistream { 579 | return total, nil 580 | } 581 | 582 | // Is there another? 583 | err = z.readHeader(false) 584 | if err == io.EOF { 585 | return total, nil 586 | } 587 | if err != nil { 588 | z.err = err 589 | return total, err 590 | } 591 | } 592 | } 593 | 594 | // Close closes the Reader. It does not close the underlying io.Reader. 595 | func (z *Reader) Close() error { 596 | return z.killReadAhead() 597 | } 598 | -------------------------------------------------------------------------------- /gunzip_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package pgzip 6 | 7 | import ( 8 | "bytes" 9 | oldgz "compress/gzip" 10 | "crypto/rand" 11 | "io" 12 | "io/ioutil" 13 | "os" 14 | "runtime/pprof" 15 | "strings" 16 | "testing" 17 | "time" 18 | 19 | kpgzip "github.com/klauspost/compress/gzip" 20 | ) 21 | 22 | type gunzipTest struct { 23 | name string 24 | desc string 25 | raw string 26 | gzip []byte 27 | err error 28 | } 29 | 30 | var gunzipTests = []gunzipTest{ 31 | { // has 1 empty fixed-huffman block 32 | "empty.txt", 33 | "empty.txt", 34 | "", 35 | []byte{ 36 | 0x1f, 0x8b, 0x08, 0x08, 0xf7, 0x5e, 0x14, 0x4a, 37 | 0x00, 0x03, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2e, 38 | 0x74, 0x78, 0x74, 0x00, 0x03, 0x00, 0x00, 0x00, 39 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 40 | }, 41 | nil, 42 | }, 43 | { // has 1 non-empty fixed huffman block 44 | "hello.txt", 45 | "hello.txt", 46 | "hello world\n", 47 | []byte{ 48 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 49 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 50 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 51 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 52 | 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, 53 | 0x00, 0x00, 54 | }, 55 | nil, 56 | }, 57 | { // concatenation 58 | "hello.txt", 59 | "hello.txt x2", 60 | "hello world\n" + 61 | "hello world\n", 62 | []byte{ 63 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 64 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 65 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 66 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 67 | 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, 68 | 0x00, 0x00, 69 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 70 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 71 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 72 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 73 | 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, 74 | 0x00, 0x00, 75 | }, 76 | nil, 77 | }, 78 | { // has a fixed huffman block with some length-distance pairs 79 | "shesells.txt", 80 | "shesells.txt", 81 | "she sells seashells by the seashore\n", 82 | []byte{ 83 | 0x1f, 0x8b, 0x08, 0x08, 0x72, 0x66, 0x8b, 0x4a, 84 | 0x00, 0x03, 0x73, 0x68, 0x65, 0x73, 0x65, 0x6c, 85 | 0x6c, 0x73, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x2b, 86 | 0xce, 0x48, 0x55, 0x28, 0x4e, 0xcd, 0xc9, 0x29, 87 | 0x06, 0x92, 0x89, 0xc5, 0x19, 0x60, 0x56, 0x52, 88 | 0xa5, 0x42, 0x09, 0x58, 0x18, 0x28, 0x90, 0x5f, 89 | 0x94, 0xca, 0x05, 0x00, 0x76, 0xb0, 0x3b, 0xeb, 90 | 0x24, 0x00, 0x00, 0x00, 91 | }, 92 | nil, 93 | }, 94 | { // has dynamic huffman blocks 95 | "gettysburg", 96 | "gettysburg", 97 | " Four score and seven years ago our fathers brought forth on\n" + 98 | "this continent, a new nation, conceived in Liberty, and dedicated\n" + 99 | "to the proposition that all men are created equal.\n" + 100 | " Now we are engaged in a great Civil War, testing whether that\n" + 101 | "nation, or any nation so conceived and so dedicated, can long\n" + 102 | "endure.\n" + 103 | " We are met on a great battle-field of that war.\n" + 104 | " We have come to dedicate a portion of that field, as a final\n" + 105 | "resting place for those who here gave their lives that that\n" + 106 | "nation might live. It is altogether fitting and proper that\n" + 107 | "we should do this.\n" + 108 | " But, in a larger sense, we can not dedicate — we can not\n" + 109 | "consecrate — we can not hallow — this ground.\n" + 110 | " The brave men, living and dead, who struggled here, have\n" + 111 | "consecrated it, far above our poor power to add or detract.\n" + 112 | "The world will little note, nor long remember what we say here,\n" + 113 | "but it can never forget what they did here.\n" + 114 | " It is for us the living, rather, to be dedicated here to the\n" + 115 | "unfinished work which they who fought here have thus far so\n" + 116 | "nobly advanced. It is rather for us to be here dedicated to\n" + 117 | "the great task remaining before us — that from these honored\n" + 118 | "dead we take increased devotion to that cause for which they\n" + 119 | "gave the last full measure of devotion —\n" + 120 | " that we here highly resolve that these dead shall not have\n" + 121 | "died in vain — that this nation, under God, shall have a new\n" + 122 | "birth of freedom — and that government of the people, by the\n" + 123 | "people, for the people, shall not perish from this earth.\n" + 124 | "\n" + 125 | "Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania\n", 126 | []byte{ 127 | 0x1f, 0x8b, 0x08, 0x08, 0xd1, 0x12, 0x2b, 0x4a, 128 | 0x00, 0x03, 0x67, 0x65, 0x74, 0x74, 0x79, 0x73, 129 | 0x62, 0x75, 0x72, 0x67, 0x00, 0x65, 0x54, 0xcd, 130 | 0x6e, 0xd4, 0x30, 0x10, 0xbe, 0xfb, 0x29, 0xe6, 131 | 0x01, 0x42, 0xa5, 0x0a, 0x09, 0xc1, 0x11, 0x90, 132 | 0x40, 0x48, 0xa8, 0xe2, 0x80, 0xd4, 0xf3, 0x24, 133 | 0x9e, 0x24, 0x56, 0xbd, 0x9e, 0xc5, 0x76, 0x76, 134 | 0x95, 0x1b, 0x0f, 0xc1, 0x13, 0xf2, 0x24, 0x7c, 135 | 0x63, 0x77, 0x9b, 0x4a, 0x5c, 0xaa, 0x6e, 0x6c, 136 | 0xcf, 0x7c, 0x7f, 0x33, 0x44, 0x5f, 0x74, 0xcb, 137 | 0x54, 0x26, 0xcd, 0x42, 0x9c, 0x3c, 0x15, 0xb9, 138 | 0x48, 0xa2, 0x5d, 0x38, 0x17, 0xe2, 0x45, 0xc9, 139 | 0x4e, 0x67, 0xae, 0xab, 0xe0, 0xf7, 0x98, 0x75, 140 | 0x5b, 0xd6, 0x4a, 0xb3, 0xe6, 0xba, 0x92, 0x26, 141 | 0x57, 0xd7, 0x50, 0x68, 0xd2, 0x54, 0x43, 0x92, 142 | 0x54, 0x07, 0x62, 0x4a, 0x72, 0xa5, 0xc4, 0x35, 143 | 0x68, 0x1a, 0xec, 0x60, 0x92, 0x70, 0x11, 0x4f, 144 | 0x21, 0xd1, 0xf7, 0x30, 0x4a, 0xae, 0xfb, 0xd0, 145 | 0x9a, 0x78, 0xf1, 0x61, 0xe2, 0x2a, 0xde, 0x55, 146 | 0x25, 0xd4, 0xa6, 0x73, 0xd6, 0xb3, 0x96, 0x60, 147 | 0xef, 0xf0, 0x9b, 0x2b, 0x71, 0x8c, 0x74, 0x02, 148 | 0x10, 0x06, 0xac, 0x29, 0x8b, 0xdd, 0x25, 0xf9, 149 | 0xb5, 0x71, 0xbc, 0x73, 0x44, 0x0f, 0x7a, 0xa5, 150 | 0xab, 0xb4, 0x33, 0x49, 0x0b, 0x2f, 0xbd, 0x03, 151 | 0xd3, 0x62, 0x17, 0xe9, 0x73, 0xb8, 0x84, 0x48, 152 | 0x8f, 0x9c, 0x07, 0xaa, 0x52, 0x00, 0x6d, 0xa1, 153 | 0xeb, 0x2a, 0xc6, 0xa0, 0x95, 0x76, 0x37, 0x78, 154 | 0x9a, 0x81, 0x65, 0x7f, 0x46, 0x4b, 0x45, 0x5f, 155 | 0xe1, 0x6d, 0x42, 0xe8, 0x01, 0x13, 0x5c, 0x38, 156 | 0x51, 0xd4, 0xb4, 0x38, 0x49, 0x7e, 0xcb, 0x62, 157 | 0x28, 0x1e, 0x3b, 0x82, 0x93, 0x54, 0x48, 0xf1, 158 | 0xd2, 0x7d, 0xe4, 0x5a, 0xa3, 0xbc, 0x99, 0x83, 159 | 0x44, 0x4f, 0x3a, 0x77, 0x36, 0x57, 0xce, 0xcf, 160 | 0x2f, 0x56, 0xbe, 0x80, 0x90, 0x9e, 0x84, 0xea, 161 | 0x51, 0x1f, 0x8f, 0xcf, 0x90, 0xd4, 0x60, 0xdc, 162 | 0x5e, 0xb4, 0xf7, 0x10, 0x0b, 0x26, 0xe0, 0xff, 163 | 0xc4, 0xd1, 0xe5, 0x67, 0x2e, 0xe7, 0xc8, 0x93, 164 | 0x98, 0x05, 0xb8, 0xa8, 0x45, 0xc0, 0x4d, 0x09, 165 | 0xdc, 0x84, 0x16, 0x2b, 0x0d, 0x9a, 0x21, 0x53, 166 | 0x04, 0x8b, 0xd2, 0x0b, 0xbd, 0xa2, 0x4c, 0xa7, 167 | 0x60, 0xee, 0xd9, 0xe1, 0x1d, 0xd1, 0xb7, 0x4a, 168 | 0x30, 0x8f, 0x63, 0xd5, 0xa5, 0x8b, 0x33, 0x87, 169 | 0xda, 0x1a, 0x18, 0x79, 0xf3, 0xe3, 0xa6, 0x17, 170 | 0x94, 0x2e, 0xab, 0x6e, 0xa0, 0xe3, 0xcd, 0xac, 171 | 0x50, 0x8c, 0xca, 0xa7, 0x0d, 0x76, 0x37, 0xd1, 172 | 0x23, 0xe7, 0x05, 0x57, 0x8b, 0xa4, 0x22, 0x83, 173 | 0xd9, 0x62, 0x52, 0x25, 0xad, 0x07, 0xbb, 0xbf, 174 | 0xbf, 0xff, 0xbc, 0xfa, 0xee, 0x20, 0x73, 0x91, 175 | 0x29, 0xff, 0x7f, 0x02, 0x71, 0x62, 0x84, 0xb5, 176 | 0xf6, 0xb5, 0x25, 0x6b, 0x41, 0xde, 0x92, 0xb7, 177 | 0x76, 0x3f, 0x91, 0x91, 0x31, 0x1b, 0x41, 0x84, 178 | 0x62, 0x30, 0x0a, 0x37, 0xa4, 0x5e, 0x18, 0x3a, 179 | 0x99, 0x08, 0xa5, 0xe6, 0x6d, 0x59, 0x22, 0xec, 180 | 0x33, 0x39, 0x86, 0x26, 0xf5, 0xab, 0x66, 0xc8, 181 | 0x08, 0x20, 0xcf, 0x0c, 0xd7, 0x47, 0x45, 0x21, 182 | 0x0b, 0xf6, 0x59, 0xd5, 0xfe, 0x5c, 0x8d, 0xaa, 183 | 0x12, 0x7b, 0x6f, 0xa1, 0xf0, 0x52, 0x33, 0x4f, 184 | 0xf5, 0xce, 0x59, 0xd3, 0xab, 0x66, 0x10, 0xbf, 185 | 0x06, 0xc4, 0x31, 0x06, 0x73, 0xd6, 0x80, 0xa2, 186 | 0x78, 0xc2, 0x45, 0xcb, 0x03, 0x65, 0x39, 0xc9, 187 | 0x09, 0xd1, 0x06, 0x04, 0x33, 0x1a, 0x5a, 0xf1, 188 | 0xde, 0x01, 0xb8, 0x71, 0x83, 0xc4, 0xb5, 0xb3, 189 | 0xc3, 0x54, 0x65, 0x33, 0x0d, 0x5a, 0xf7, 0x9b, 190 | 0x90, 0x7c, 0x27, 0x1f, 0x3a, 0x58, 0xa3, 0xd8, 191 | 0xfd, 0x30, 0x5f, 0xb7, 0xd2, 0x66, 0xa2, 0x93, 192 | 0x1c, 0x28, 0xb7, 0xe9, 0x1b, 0x0c, 0xe1, 0x28, 193 | 0x47, 0x26, 0xbb, 0xe9, 0x7d, 0x7e, 0xdc, 0x96, 194 | 0x10, 0x92, 0x50, 0x56, 0x7c, 0x06, 0xe2, 0x27, 195 | 0xb4, 0x08, 0xd3, 0xda, 0x7b, 0x98, 0x34, 0x73, 196 | 0x9f, 0xdb, 0xf6, 0x62, 0xed, 0x31, 0x41, 0x13, 197 | 0xd3, 0xa2, 0xa8, 0x4b, 0x3a, 0xc6, 0x1d, 0xe4, 198 | 0x2f, 0x8c, 0xf8, 0xfb, 0x97, 0x64, 0xf4, 0xb6, 199 | 0x2f, 0x80, 0x5a, 0xf3, 0x56, 0xe0, 0x40, 0x50, 200 | 0xd5, 0x19, 0xd0, 0x1e, 0xfc, 0xca, 0xe5, 0xc9, 201 | 0xd4, 0x60, 0x00, 0x81, 0x2e, 0xa3, 0xcc, 0xb6, 202 | 0x52, 0xf0, 0xb4, 0xdb, 0x69, 0x99, 0xce, 0x7a, 203 | 0x32, 0x4c, 0x08, 0xed, 0xaa, 0x10, 0x10, 0xe3, 204 | 0x6f, 0xee, 0x99, 0x68, 0x95, 0x9f, 0x04, 0x71, 205 | 0xb2, 0x49, 0x2f, 0x62, 0xa6, 0x5e, 0xb4, 0xef, 206 | 0x02, 0xed, 0x4f, 0x27, 0xde, 0x4a, 0x0f, 0xfd, 207 | 0xc1, 0xcc, 0xdd, 0x02, 0x8f, 0x08, 0x16, 0x54, 208 | 0xdf, 0xda, 0xca, 0xe0, 0x82, 0xf1, 0xb4, 0x31, 209 | 0x7a, 0xa9, 0x81, 0xfe, 0x90, 0xb7, 0x3e, 0xdb, 210 | 0xd3, 0x35, 0xc0, 0x20, 0x80, 0x33, 0x46, 0x4a, 211 | 0x63, 0xab, 0xd1, 0x0d, 0x29, 0xd2, 0xe2, 0x84, 212 | 0xb8, 0xdb, 0xfa, 0xe9, 0x89, 0x44, 0x86, 0x7c, 213 | 0xe8, 0x0b, 0xe6, 0x02, 0x6a, 0x07, 0x9b, 0x96, 214 | 0xd0, 0xdb, 0x2e, 0x41, 0x4c, 0xa1, 0xd5, 0x57, 215 | 0x45, 0x14, 0xfb, 0xe3, 0xa6, 0x72, 0x5b, 0x87, 216 | 0x6e, 0x0c, 0x6d, 0x5b, 0xce, 0xe0, 0x2f, 0xe2, 217 | 0x21, 0x81, 0x95, 0xb0, 0xe8, 0xb6, 0x32, 0x0b, 218 | 0xb2, 0x98, 0x13, 0x52, 0x5d, 0xfb, 0xec, 0x63, 219 | 0x17, 0x8a, 0x9e, 0x23, 0x22, 0x36, 0xee, 0xcd, 220 | 0xda, 0xdb, 0xcf, 0x3e, 0xf1, 0xc7, 0xf1, 0x01, 221 | 0x12, 0x93, 0x0a, 0xeb, 0x6f, 0xf2, 0x02, 0x15, 222 | 0x96, 0x77, 0x5d, 0xef, 0x9c, 0xfb, 0x88, 0x91, 223 | 0x59, 0xf9, 0x84, 0xdd, 0x9b, 0x26, 0x8d, 0x80, 224 | 0xf9, 0x80, 0x66, 0x2d, 0xac, 0xf7, 0x1f, 0x06, 225 | 0xba, 0x7f, 0xff, 0xee, 0xed, 0x40, 0x5f, 0xa5, 226 | 0xd6, 0xbd, 0x8c, 0x5b, 0x46, 0xd2, 0x7e, 0x48, 227 | 0x4a, 0x65, 0x8f, 0x08, 0x42, 0x60, 0xf7, 0x0f, 228 | 0xb9, 0x16, 0x0b, 0x0c, 0x1a, 0x06, 0x00, 0x00, 229 | }, 230 | nil, 231 | }, 232 | { // has 1 non-empty fixed huffman block then garbage 233 | "hello.txt", 234 | "hello.txt + garbage", 235 | "hello world\n", 236 | []byte{ 237 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 238 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 239 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 240 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 241 | 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, 242 | 0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!', 243 | }, 244 | ErrHeader, 245 | }, 246 | { // has 1 non-empty fixed huffman block not enough header 247 | "hello.txt", 248 | "hello.txt + garbage", 249 | "hello world\n", 250 | []byte{ 251 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 252 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 253 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 254 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 255 | 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, 256 | 0x00, 0x00, gzipID1, 257 | }, 258 | io.ErrUnexpectedEOF, 259 | }, 260 | { // has 1 non-empty fixed huffman block but corrupt checksum 261 | "hello.txt", 262 | "hello.txt + corrupt checksum", 263 | "hello world\n", 264 | []byte{ 265 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 266 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 267 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 268 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 269 | 0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00, 270 | 0x00, 0x00, 271 | }, 272 | ErrChecksum, 273 | }, 274 | { // has 1 non-empty fixed huffman block but corrupt size 275 | "hello.txt", 276 | "hello.txt + corrupt size", 277 | "hello world\n", 278 | []byte{ 279 | 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, 280 | 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, 281 | 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, 282 | 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, 283 | 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00, 284 | 0x00, 0x00, 285 | }, 286 | ErrChecksum, 287 | }, 288 | } 289 | 290 | func TestDecompressor(t *testing.T) { 291 | b := new(bytes.Buffer) 292 | for _, tt := range gunzipTests { 293 | in := bytes.NewReader(tt.gzip) 294 | gzip, err := NewReader(in) 295 | if err != nil { 296 | t.Errorf("%s: NewReader: %s", tt.name, err) 297 | continue 298 | } 299 | defer gzip.Close() 300 | if tt.name != gzip.Name { 301 | t.Errorf("%s: got name %s", tt.name, gzip.Name) 302 | } 303 | b.Reset() 304 | n, err := io.Copy(b, gzip) 305 | if err != tt.err { 306 | t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) 307 | } 308 | s := b.String() 309 | if s != tt.raw { 310 | t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) 311 | } 312 | 313 | // Test Reader Reset. 314 | in = bytes.NewReader(tt.gzip) 315 | err = gzip.Reset(in) 316 | if err != nil { 317 | t.Errorf("%s: Reset: %s", tt.name, err) 318 | continue 319 | } 320 | if tt.name != gzip.Name { 321 | t.Errorf("%s: got name %s", tt.name, gzip.Name) 322 | } 323 | b.Reset() 324 | n, err = io.Copy(b, gzip) 325 | if err != tt.err { 326 | t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) 327 | } 328 | s = b.String() 329 | if s != tt.raw { 330 | t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) 331 | } 332 | } 333 | } 334 | 335 | func TestDecompressorReset(t *testing.T) { 336 | b := new(bytes.Buffer) 337 | var gzip *Reader 338 | 339 | for _, tt := range gunzipTests { 340 | in := bytes.NewReader(tt.gzip) 341 | if gzip == nil { 342 | var err error 343 | gzip, err = NewReader(in) 344 | if err != nil { 345 | t.Fatalf("NewReader: %s", err) 346 | } 347 | defer gzip.Close() 348 | } else { 349 | err := gzip.Reset(in) 350 | if err != nil { 351 | t.Errorf("%s: Reset: %s", tt.name, err) 352 | continue 353 | } 354 | } 355 | if tt.name != gzip.Name { 356 | t.Errorf("%s: got name %s", tt.name, gzip.Name) 357 | } 358 | b.Reset() 359 | 360 | n, err := io.Copy(b, gzip) 361 | if err != tt.err { 362 | t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) 363 | } 364 | s := b.String() 365 | if s != tt.raw { 366 | t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) 367 | } 368 | 369 | // Test Reader Reset. 370 | in = bytes.NewReader(tt.gzip) 371 | err = gzip.Reset(in) 372 | if err != nil { 373 | t.Errorf("%s: Reset: %s", tt.name, err) 374 | continue 375 | } 376 | if tt.name != gzip.Name { 377 | t.Errorf("%s: got name %s", tt.name, gzip.Name) 378 | } 379 | b.Reset() 380 | n, err = io.Copy(b, gzip) 381 | if err != tt.err { 382 | t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) 383 | } 384 | s = b.String() 385 | if s != tt.raw { 386 | t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) 387 | } 388 | } 389 | } 390 | 391 | func TestDecompressorResetNoRead(t *testing.T) { 392 | done := make(chan struct{}) 393 | defer close(done) 394 | go func() { 395 | select { 396 | // Typical runtime is 2-3s, so we add an order of magnitude. 397 | case <-time.After(30 * time.Second): 398 | pprof.Lookup("goroutine").WriteTo(os.Stdout, 1) 399 | case <-done: 400 | } 401 | }() 402 | in, err := ioutil.ReadFile("testdata/bigempty.gz") 403 | if err != nil { 404 | t.Fatal(err) 405 | } 406 | gz, err := NewReader(bytes.NewBuffer(in)) 407 | if err != nil { 408 | t.Fatal(err) 409 | } 410 | for i := 0; i < 100; i++ { 411 | if testing.Short() && i > 10 { 412 | break 413 | } 414 | err := gz.Reset(bytes.NewBuffer(in)) 415 | if err != nil { 416 | t.Fatal(i, err) 417 | } 418 | // Read 100KB, ignore the rest 419 | lr := io.LimitedReader{N: 100 << 10, R: gz} 420 | _, err = io.Copy(ioutil.Discard, &lr) 421 | if err != nil { 422 | t.Fatal(i, err) 423 | } 424 | } 425 | } 426 | 427 | func TestIssue6550(t *testing.T) { 428 | f, err := os.Open("testdata/issue6550.gz") 429 | if err != nil { 430 | t.Fatal(err) 431 | } 432 | gzip, err := NewReader(f) 433 | if err != nil { 434 | t.Fatalf("NewReader(testdata/issue6550.gz): %v", err) 435 | } 436 | defer gzip.Close() 437 | done := make(chan bool, 1) 438 | go func() { 439 | _, err := io.Copy(ioutil.Discard, gzip) 440 | if err == nil { 441 | t.Errorf("Copy succeeded") 442 | } else { 443 | t.Logf("Copy failed (correctly): %v", err) 444 | } 445 | done <- true 446 | }() 447 | select { 448 | case <-time.After(1 * time.Second): 449 | t.Errorf("Copy hung") 450 | case <-done: 451 | // ok 452 | } 453 | } 454 | 455 | func TestInitialReset(t *testing.T) { 456 | var r Reader 457 | if err := r.Reset(bytes.NewReader(gunzipTests[1].gzip)); err != nil { 458 | t.Error(err) 459 | } 460 | var buf bytes.Buffer 461 | if _, err := io.Copy(&buf, &r); err != nil { 462 | t.Error(err) 463 | } 464 | if s := buf.String(); s != gunzipTests[1].raw { 465 | t.Errorf("got %q want %q", s, gunzipTests[1].raw) 466 | } 467 | } 468 | 469 | func TestMultistreamFalse(t *testing.T) { 470 | // Find concatenation test. 471 | var tt gunzipTest 472 | for _, tt = range gunzipTests { 473 | if strings.HasSuffix(tt.desc, " x2") { 474 | goto Found 475 | } 476 | } 477 | t.Fatal("cannot find hello.txt x2 in gunzip tests") 478 | 479 | Found: 480 | br := bytes.NewReader(tt.gzip) 481 | var r Reader 482 | if err := r.Reset(br); err != nil { 483 | t.Fatalf("first reset: %v", err) 484 | } 485 | 486 | // Expect two streams with "hello world\n", then real EOF. 487 | const hello = "hello world\n" 488 | 489 | r.Multistream(false) 490 | data, err := ioutil.ReadAll(&r) 491 | if string(data) != hello || err != nil { 492 | t.Fatalf("first stream = %q, %v, want %q, %v", string(data), err, hello, nil) 493 | } 494 | 495 | if err := r.Reset(br); err != nil { 496 | t.Fatalf("second reset: %v", err) 497 | } 498 | r.Multistream(false) 499 | data, err = ioutil.ReadAll(&r) 500 | if string(data) != hello || err != nil { 501 | t.Fatalf("second stream = %q, %v, want %q, %v", string(data), err, hello, nil) 502 | } 503 | 504 | if err := r.Reset(br); err != io.EOF { 505 | t.Fatalf("third reset: err=%v, want io.EOF", err) 506 | } 507 | } 508 | 509 | func TestWriteTo(t *testing.T) { 510 | input := make([]byte, 100000) 511 | n, err := rand.Read(input) 512 | if err != nil { 513 | t.Fatal(err) 514 | } 515 | if n != len(input) { 516 | t.Fatal("did not fill buffer") 517 | } 518 | compressed := &bytes.Buffer{} 519 | // Do it twice to test MultiStream functionality 520 | for i := 0; i < 2; i++ { 521 | w, err := NewWriterLevel(compressed, -2) 522 | if err != nil { 523 | t.Fatal(err) 524 | } 525 | n, err = w.Write(input) 526 | if err != nil { 527 | t.Fatal(err) 528 | } 529 | if n != len(input) { 530 | t.Fatal("did not fill buffer") 531 | } 532 | w.Close() 533 | } 534 | input = append(input, input...) 535 | buf := compressed.Bytes() 536 | 537 | dec, err := NewReader(bytes.NewBuffer(buf)) 538 | if err != nil { 539 | t.Fatal(err) 540 | } 541 | // ReadAll does not use WriteTo, but we wrap it in a NopCloser to be sure. 542 | readall, err := ioutil.ReadAll(ioutil.NopCloser(dec)) 543 | if err != nil { 544 | t.Fatal(err) 545 | } 546 | if len(readall) != len(input) { 547 | t.Fatal("did not decompress everything") 548 | } 549 | if bytes.Compare(readall, input) != 0 { 550 | t.Fatal("output did not match input") 551 | } 552 | 553 | dec, err = NewReader(bytes.NewBuffer(buf)) 554 | if err != nil { 555 | t.Fatal(err) 556 | } 557 | wtbuf := &bytes.Buffer{} 558 | written, err := dec.WriteTo(wtbuf) 559 | if err != nil { 560 | t.Fatal(err) 561 | } 562 | if written != int64(len(input)) { 563 | t.Error("Returned length did not match, expected", len(input), "got", written) 564 | } 565 | if wtbuf.Len() != len(input) { 566 | t.Error("Actual Length did not match, expected", len(input), "got", wtbuf.Len()) 567 | } 568 | if bytes.Compare(wtbuf.Bytes(), input) != 0 { 569 | t.Fatal("output did not match input") 570 | } 571 | } 572 | 573 | func BenchmarkGunzipCopy(b *testing.B) { 574 | dat, _ := ioutil.ReadFile("testdata/test.json") 575 | dat = append(dat, dat...) 576 | dat = append(dat, dat...) 577 | dat = append(dat, dat...) 578 | dat = append(dat, dat...) 579 | dat = append(dat, dat...) 580 | dst := &bytes.Buffer{} 581 | w, _ := NewWriterLevel(dst, 1) 582 | _, err := w.Write(dat) 583 | if err != nil { 584 | b.Fatal(err) 585 | } 586 | w.Close() 587 | input := dst.Bytes() 588 | r, err := NewReader(bytes.NewBuffer(input)) 589 | b.SetBytes(int64(len(dat))) 590 | b.ResetTimer() 591 | for n := 0; n < b.N; n++ { 592 | err = r.Reset(bytes.NewBuffer(input)) 593 | if err != nil { 594 | b.Fatal(err) 595 | } 596 | _, err = io.Copy(ioutil.Discard, r) 597 | if err != nil { 598 | b.Fatal(err) 599 | } 600 | } 601 | } 602 | 603 | func BenchmarkGunzipReadAll(b *testing.B) { 604 | dat, _ := ioutil.ReadFile("testdata/test.json") 605 | dat = append(dat, dat...) 606 | dat = append(dat, dat...) 607 | dat = append(dat, dat...) 608 | dat = append(dat, dat...) 609 | dat = append(dat, dat...) 610 | dst := &bytes.Buffer{} 611 | w, _ := NewWriterLevel(dst, 1) 612 | _, err := w.Write(dat) 613 | if err != nil { 614 | b.Fatal(err) 615 | } 616 | w.Close() 617 | input := dst.Bytes() 618 | r, err := NewReader(bytes.NewBuffer(input)) 619 | b.SetBytes(int64(len(dat))) 620 | b.ResetTimer() 621 | for n := 0; n < b.N; n++ { 622 | err = r.Reset(bytes.NewBuffer(input)) 623 | if err != nil { 624 | b.Fatal(err) 625 | } 626 | _, err = ioutil.ReadAll(ioutil.NopCloser(r)) 627 | if err != nil { 628 | b.Fatal(err) 629 | } 630 | } 631 | } 632 | 633 | func BenchmarkGunzipStdLib(b *testing.B) { 634 | dat, _ := ioutil.ReadFile("testdata/test.json") 635 | dat = append(dat, dat...) 636 | dat = append(dat, dat...) 637 | dat = append(dat, dat...) 638 | dat = append(dat, dat...) 639 | dat = append(dat, dat...) 640 | dst := &bytes.Buffer{} 641 | w, _ := NewWriterLevel(dst, 1) 642 | _, err := w.Write(dat) 643 | if err != nil { 644 | b.Fatal(err) 645 | } 646 | w.Close() 647 | input := dst.Bytes() 648 | r, err := oldgz.NewReader(bytes.NewBuffer(input)) 649 | b.SetBytes(int64(len(dat))) 650 | b.ResetTimer() 651 | for n := 0; n < b.N; n++ { 652 | err = r.Reset(bytes.NewBuffer(input)) 653 | if err != nil { 654 | b.Fatal(err) 655 | } 656 | _, err = io.Copy(ioutil.Discard, r) 657 | if err != nil { 658 | b.Fatal(err) 659 | } 660 | } 661 | } 662 | 663 | func BenchmarkGunzipFlate(b *testing.B) { 664 | dat, _ := ioutil.ReadFile("testdata/test.json") 665 | dat = append(dat, dat...) 666 | dat = append(dat, dat...) 667 | dat = append(dat, dat...) 668 | dat = append(dat, dat...) 669 | dat = append(dat, dat...) 670 | dst := &bytes.Buffer{} 671 | w, _ := NewWriterLevel(dst, 1) 672 | _, err := w.Write(dat) 673 | if err != nil { 674 | b.Fatal(err) 675 | } 676 | w.Close() 677 | input := dst.Bytes() 678 | r, err := kpgzip.NewReader(bytes.NewBuffer(input)) 679 | b.SetBytes(int64(len(dat))) 680 | b.ResetTimer() 681 | for n := 0; n < b.N; n++ { 682 | err = r.Reset(bytes.NewBuffer(input)) 683 | if err != nil { 684 | b.Fatal(err) 685 | } 686 | _, err = io.Copy(ioutil.Discard, r) 687 | if err != nil { 688 | b.Fatal(err) 689 | } 690 | } 691 | } 692 | 693 | func TestTruncatedGunzip(t *testing.T) { 694 | in := []byte(strings.Repeat("ASDFASDFASDFASDFASDF", 1000)) 695 | var buf bytes.Buffer 696 | enc := kpgzip.NewWriter(&buf) 697 | _, err := enc.Write(in) 698 | if err != nil { 699 | t.Fatal(err) 700 | } 701 | enc.Close() 702 | testdata := buf.Bytes() 703 | for i := 5; i < len(testdata); i += 10 { 704 | timer := time.NewTimer(time.Second) 705 | done := make(chan struct{}) 706 | fail := make(chan struct{}) 707 | go func() { 708 | r, err := NewReader(bytes.NewBuffer(testdata[:i])) 709 | if err == nil { 710 | b, err := ioutil.ReadAll(r) 711 | if err == nil && !bytes.Equal(testdata[:i], b) { 712 | close(fail) 713 | } 714 | } 715 | close(done) 716 | }() 717 | select { 718 | case <-timer.C: 719 | t.Fatal("Timeout decoding") 720 | case <-fail: 721 | t.Fatal("No error, but mismatch") 722 | case <-done: 723 | timer.Stop() 724 | } 725 | } 726 | } 727 | 728 | func TestTruncatedGunzipBlocks(t *testing.T) { 729 | var in = make([]byte, 512*10) 730 | rand.Read(in) 731 | var buf bytes.Buffer 732 | for i := 0; i < len(in); i += 512 { 733 | enc, _ := kpgzip.NewWriterLevel(&buf, 0) 734 | _, err := enc.Write(in[:i]) 735 | if err != nil { 736 | t.Fatal(err) 737 | } 738 | enc.Close() 739 | 740 | timer := time.NewTimer(time.Second) 741 | done := make(chan struct{}) 742 | fail := make(chan struct{}) 743 | go func() { 744 | r, err := NewReaderN(&buf, 512, 10) 745 | if err == nil { 746 | b, err := ioutil.ReadAll(r) 747 | if err == nil && !bytes.Equal(b, in[:i]) { 748 | close(fail) 749 | } 750 | } 751 | close(done) 752 | }() 753 | select { 754 | case <-timer.C: 755 | t.Fatal("Timeout decoding") 756 | case <-fail: 757 | t.Fatal("No error, but mismatch") 758 | case <-done: 759 | timer.Stop() 760 | } 761 | } 762 | } 763 | -------------------------------------------------------------------------------- /gzip.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package pgzip 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "fmt" 11 | "hash" 12 | "hash/crc32" 13 | "io" 14 | "runtime" 15 | "sync" 16 | "time" 17 | 18 | "github.com/klauspost/compress/flate" 19 | ) 20 | 21 | const ( 22 | defaultBlockSize = 1 << 20 23 | tailSize = 16384 24 | defaultBlocks = 4 25 | ) 26 | 27 | // These constants are copied from the flate package, so that code that imports 28 | // "compress/gzip" does not also have to import "compress/flate". 29 | const ( 30 | NoCompression = flate.NoCompression 31 | BestSpeed = flate.BestSpeed 32 | BestCompression = flate.BestCompression 33 | DefaultCompression = flate.DefaultCompression 34 | ConstantCompression = flate.ConstantCompression 35 | HuffmanOnly = flate.HuffmanOnly 36 | ) 37 | 38 | // A Writer is an io.WriteCloser. 39 | // Writes to a Writer are compressed and written to w. 40 | type Writer struct { 41 | Header 42 | w io.Writer 43 | level int 44 | wroteHeader bool 45 | blockSize int 46 | blocks int 47 | currentBuffer []byte 48 | prevTail []byte 49 | digest hash.Hash32 50 | size int 51 | closed bool 52 | buf [10]byte 53 | errMu sync.RWMutex 54 | err error 55 | pushedErr chan struct{} 56 | results chan result 57 | dictFlatePool sync.Pool 58 | dstPool sync.Pool 59 | wg sync.WaitGroup 60 | } 61 | 62 | type result struct { 63 | result chan []byte 64 | notifyWritten chan struct{} 65 | } 66 | 67 | // Use SetConcurrency to finetune the concurrency level if needed. 68 | // 69 | // With this you can control the approximate size of your blocks, 70 | // as well as how many you want to be processing in parallel. 71 | // 72 | // Default values for this is SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)), 73 | // meaning blocks are split at 1 MB and up to the number of CPU threads 74 | // can be processing at once before the writer blocks. 75 | func (z *Writer) SetConcurrency(blockSize, blocks int) error { 76 | if blockSize <= tailSize { 77 | return fmt.Errorf("gzip: block size cannot be less than or equal to %d", tailSize) 78 | } 79 | if blocks <= 0 { 80 | return errors.New("gzip: blocks cannot be zero or less") 81 | } 82 | if blockSize == z.blockSize && blocks == z.blocks { 83 | return nil 84 | } 85 | z.blockSize = blockSize 86 | z.results = make(chan result, blocks) 87 | z.blocks = blocks 88 | z.dstPool.New = func() interface{} { return make([]byte, 0, blockSize+(blockSize)>>4) } 89 | return nil 90 | } 91 | 92 | // NewWriter returns a new Writer. 93 | // Writes to the returned writer are compressed and written to w. 94 | // 95 | // It is the caller's responsibility to call Close on the WriteCloser when done. 96 | // Writes may be buffered and not flushed until Close. 97 | // 98 | // Callers that wish to set the fields in Writer.Header must do so before 99 | // the first call to Write or Close. The Comment and Name header fields are 100 | // UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO 101 | // 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an 102 | // error on Write. 103 | func NewWriter(w io.Writer) *Writer { 104 | z, _ := NewWriterLevel(w, DefaultCompression) 105 | return z 106 | } 107 | 108 | // NewWriterLevel is like NewWriter but specifies the compression level instead 109 | // of assuming DefaultCompression. 110 | // 111 | // The compression level can be DefaultCompression, NoCompression, or any 112 | // integer value between BestSpeed and BestCompression inclusive. The error 113 | // returned will be nil if the level is valid. 114 | func NewWriterLevel(w io.Writer, level int) (*Writer, error) { 115 | if level < ConstantCompression || level > BestCompression { 116 | return nil, fmt.Errorf("gzip: invalid compression level: %d", level) 117 | } 118 | z := new(Writer) 119 | z.SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)) 120 | z.init(w, level) 121 | return z, nil 122 | } 123 | 124 | // This function must be used by goroutines to set an 125 | // error condition, since z.err access is restricted 126 | // to the callers goruotine. 127 | func (z *Writer) pushError(err error) { 128 | z.errMu.Lock() 129 | if z.err != nil { 130 | z.errMu.Unlock() 131 | return 132 | } 133 | z.err = err 134 | close(z.pushedErr) 135 | z.errMu.Unlock() 136 | } 137 | 138 | func (z *Writer) init(w io.Writer, level int) { 139 | z.wg.Wait() 140 | digest := z.digest 141 | if digest != nil { 142 | digest.Reset() 143 | } else { 144 | digest = crc32.NewIEEE() 145 | } 146 | z.Header = Header{OS: 255} 147 | z.w = w 148 | z.level = level 149 | z.digest = digest 150 | z.pushedErr = make(chan struct{}, 0) 151 | z.results = make(chan result, z.blocks) 152 | z.err = nil 153 | z.closed = false 154 | z.Comment = "" 155 | z.Extra = nil 156 | z.ModTime = time.Time{} 157 | z.wroteHeader = false 158 | z.currentBuffer = nil 159 | z.buf = [10]byte{} 160 | z.prevTail = nil 161 | z.size = 0 162 | if z.dictFlatePool.New == nil { 163 | z.dictFlatePool.New = func() interface{} { 164 | f, _ := flate.NewWriterDict(w, level, nil) 165 | return f 166 | } 167 | } 168 | } 169 | 170 | // Reset discards the Writer z's state and makes it equivalent to the 171 | // result of its original state from NewWriter or NewWriterLevel, but 172 | // writing to w instead. This permits reusing a Writer rather than 173 | // allocating a new one. 174 | func (z *Writer) Reset(w io.Writer) { 175 | if z.results != nil && !z.closed { 176 | close(z.results) 177 | } 178 | z.SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)) 179 | z.init(w, z.level) 180 | } 181 | 182 | // GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). 183 | func put2(p []byte, v uint16) { 184 | p[0] = uint8(v >> 0) 185 | p[1] = uint8(v >> 8) 186 | } 187 | 188 | func put4(p []byte, v uint32) { 189 | p[0] = uint8(v >> 0) 190 | p[1] = uint8(v >> 8) 191 | p[2] = uint8(v >> 16) 192 | p[3] = uint8(v >> 24) 193 | } 194 | 195 | // writeBytes writes a length-prefixed byte slice to z.w. 196 | func (z *Writer) writeBytes(b []byte) error { 197 | if len(b) > 0xffff { 198 | return errors.New("gzip.Write: Extra data is too large") 199 | } 200 | put2(z.buf[0:2], uint16(len(b))) 201 | _, err := z.w.Write(z.buf[0:2]) 202 | if err != nil { 203 | return err 204 | } 205 | _, err = z.w.Write(b) 206 | return err 207 | } 208 | 209 | // writeString writes a UTF-8 string s in GZIP's format to z.w. 210 | // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). 211 | func (z *Writer) writeString(s string) (err error) { 212 | // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. 213 | needconv := false 214 | for _, v := range s { 215 | if v == 0 || v > 0xff { 216 | return errors.New("gzip.Write: non-Latin-1 header string") 217 | } 218 | if v > 0x7f { 219 | needconv = true 220 | } 221 | } 222 | if needconv { 223 | b := make([]byte, 0, len(s)) 224 | for _, v := range s { 225 | b = append(b, byte(v)) 226 | } 227 | _, err = z.w.Write(b) 228 | } else { 229 | _, err = io.WriteString(z.w, s) 230 | } 231 | if err != nil { 232 | return err 233 | } 234 | // GZIP strings are NUL-terminated. 235 | z.buf[0] = 0 236 | _, err = z.w.Write(z.buf[0:1]) 237 | return err 238 | } 239 | 240 | // compressCurrent will compress the data currently buffered 241 | // This should only be called from the main writer/flush/closer 242 | func (z *Writer) compressCurrent(flush bool) { 243 | c := z.currentBuffer 244 | if len(c) > z.blockSize { 245 | // This can never happen through the public interface. 246 | panic("len(z.currentBuffer) > z.blockSize (most likely due to concurrent Write race)") 247 | } 248 | 249 | r := result{} 250 | r.result = make(chan []byte, 1) 251 | r.notifyWritten = make(chan struct{}, 0) 252 | // Reserve a result slot 253 | select { 254 | case z.results <- r: 255 | case <-z.pushedErr: 256 | return 257 | } 258 | 259 | z.wg.Add(1) 260 | tail := z.prevTail 261 | if len(c) > tailSize { 262 | buf := z.dstPool.Get().([]byte) // Put in .compressBlock 263 | // Copy tail from current buffer before handing the buffer over to the 264 | // compressBlock goroutine. 265 | buf = append(buf[:0], c[len(c)-tailSize:]...) 266 | z.prevTail = buf 267 | } else { 268 | z.prevTail = nil 269 | } 270 | go z.compressBlock(c, tail, r, z.closed) 271 | 272 | z.currentBuffer = z.dstPool.Get().([]byte) // Put in .compressBlock 273 | z.currentBuffer = z.currentBuffer[:0] 274 | 275 | // Wait if flushing 276 | if flush { 277 | <-r.notifyWritten 278 | } 279 | } 280 | 281 | // Returns an error if it has been set. 282 | // Cannot be used by functions that are from internal goroutines. 283 | func (z *Writer) checkError() error { 284 | z.errMu.RLock() 285 | err := z.err 286 | z.errMu.RUnlock() 287 | return err 288 | } 289 | 290 | // Write writes a compressed form of p to the underlying io.Writer. The 291 | // compressed bytes are not necessarily flushed to output until 292 | // the Writer is closed or Flush() is called. 293 | // 294 | // The function will return quickly, if there are unused buffers. 295 | // The sent slice (p) is copied, and the caller is free to re-use the buffer 296 | // when the function returns. 297 | // 298 | // Errors that occur during compression will be reported later, and a nil error 299 | // does not signify that the compression succeeded (since it is most likely still running) 300 | // That means that the call that returns an error may not be the call that caused it. 301 | // Only Flush and Close functions are guaranteed to return any errors up to that point. 302 | func (z *Writer) Write(p []byte) (int, error) { 303 | if err := z.checkError(); err != nil { 304 | return 0, err 305 | } 306 | // Write the GZIP header lazily. 307 | if !z.wroteHeader { 308 | z.wroteHeader = true 309 | z.buf[0] = gzipID1 310 | z.buf[1] = gzipID2 311 | z.buf[2] = gzipDeflate 312 | z.buf[3] = 0 313 | if z.Extra != nil { 314 | z.buf[3] |= 0x04 315 | } 316 | if z.Name != "" { 317 | z.buf[3] |= 0x08 318 | } 319 | if z.Comment != "" { 320 | z.buf[3] |= 0x10 321 | } 322 | put4(z.buf[4:8], uint32(z.ModTime.Unix())) 323 | if z.level == BestCompression { 324 | z.buf[8] = 2 325 | } else if z.level == BestSpeed { 326 | z.buf[8] = 4 327 | } else { 328 | z.buf[8] = 0 329 | } 330 | z.buf[9] = z.OS 331 | var n int 332 | var err error 333 | n, err = z.w.Write(z.buf[0:10]) 334 | if err != nil { 335 | z.pushError(err) 336 | return n, err 337 | } 338 | if z.Extra != nil { 339 | err = z.writeBytes(z.Extra) 340 | if err != nil { 341 | z.pushError(err) 342 | return n, err 343 | } 344 | } 345 | if z.Name != "" { 346 | err = z.writeString(z.Name) 347 | if err != nil { 348 | z.pushError(err) 349 | return n, err 350 | } 351 | } 352 | if z.Comment != "" { 353 | err = z.writeString(z.Comment) 354 | if err != nil { 355 | z.pushError(err) 356 | return n, err 357 | } 358 | } 359 | // Start receiving data from compressors 360 | go func() { 361 | listen := z.results 362 | var failed bool 363 | for { 364 | r, ok := <-listen 365 | // If closed, we are finished. 366 | if !ok { 367 | return 368 | } 369 | if failed { 370 | close(r.notifyWritten) 371 | continue 372 | } 373 | buf := <-r.result 374 | n, err := z.w.Write(buf) 375 | if err != nil { 376 | z.pushError(err) 377 | close(r.notifyWritten) 378 | failed = true 379 | continue 380 | } 381 | if n != len(buf) { 382 | z.pushError(fmt.Errorf("gzip: short write %d should be %d", n, len(buf))) 383 | failed = true 384 | close(r.notifyWritten) 385 | continue 386 | } 387 | z.dstPool.Put(buf) 388 | close(r.notifyWritten) 389 | } 390 | }() 391 | z.currentBuffer = z.dstPool.Get().([]byte) 392 | z.currentBuffer = z.currentBuffer[:0] 393 | } 394 | q := p 395 | for len(q) > 0 { 396 | length := len(q) 397 | if length+len(z.currentBuffer) > z.blockSize { 398 | length = z.blockSize - len(z.currentBuffer) 399 | } 400 | z.digest.Write(q[:length]) 401 | z.currentBuffer = append(z.currentBuffer, q[:length]...) 402 | if len(z.currentBuffer) > z.blockSize { 403 | panic("z.currentBuffer too large (most likely due to concurrent Write race)") 404 | } 405 | if len(z.currentBuffer) == z.blockSize { 406 | z.compressCurrent(false) 407 | if err := z.checkError(); err != nil { 408 | return len(p) - len(q), err 409 | } 410 | } 411 | z.size += length 412 | q = q[length:] 413 | } 414 | return len(p), z.checkError() 415 | } 416 | 417 | // Step 1: compresses buffer to buffer 418 | // Step 2: send writer to channel 419 | // Step 3: Close result channel to indicate we are done 420 | func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) { 421 | defer func() { 422 | close(r.result) 423 | z.wg.Done() 424 | }() 425 | buf := z.dstPool.Get().([]byte) // Corresponding Put in .Write's result writer 426 | dest := bytes.NewBuffer(buf[:0]) 427 | 428 | compressor := z.dictFlatePool.Get().(*flate.Writer) // Put below 429 | compressor.ResetDict(dest, prevTail) 430 | compressor.Write(p) 431 | z.dstPool.Put(p) // Corresponding Get in .Write and .compressCurrent 432 | 433 | err := compressor.Flush() 434 | if err != nil { 435 | z.pushError(err) 436 | return 437 | } 438 | if closed { 439 | err = compressor.Close() 440 | if err != nil { 441 | z.pushError(err) 442 | return 443 | } 444 | } 445 | z.dictFlatePool.Put(compressor) // Get above 446 | 447 | if prevTail != nil { 448 | z.dstPool.Put(prevTail) // Get in .compressCurrent 449 | } 450 | 451 | // Read back buffer 452 | buf = dest.Bytes() 453 | r.result <- buf 454 | } 455 | 456 | // Flush flushes any pending compressed data to the underlying writer. 457 | // 458 | // It is useful mainly in compressed network protocols, to ensure that 459 | // a remote reader has enough data to reconstruct a packet. Flush does 460 | // not return until the data has been written. If the underlying 461 | // writer returns an error, Flush returns that error. 462 | // 463 | // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. 464 | func (z *Writer) Flush() error { 465 | if err := z.checkError(); err != nil { 466 | return err 467 | } 468 | if z.closed { 469 | return nil 470 | } 471 | if !z.wroteHeader { 472 | _, err := z.Write(nil) 473 | if err != nil { 474 | return err 475 | } 476 | } 477 | // We send current block to compression 478 | z.compressCurrent(true) 479 | 480 | return z.checkError() 481 | } 482 | 483 | // UncompressedSize will return the number of bytes written. 484 | // pgzip only, not a function in the official gzip package. 485 | func (z *Writer) UncompressedSize() int { 486 | return z.size 487 | } 488 | 489 | // Close closes the Writer, flushing any unwritten data to the underlying 490 | // io.Writer, but does not close the underlying io.Writer. 491 | func (z *Writer) Close() error { 492 | if err := z.checkError(); err != nil { 493 | return err 494 | } 495 | if z.closed { 496 | return nil 497 | } 498 | 499 | z.closed = true 500 | if !z.wroteHeader { 501 | z.Write(nil) 502 | if err := z.checkError(); err != nil { 503 | return err 504 | } 505 | } 506 | z.compressCurrent(true) 507 | if err := z.checkError(); err != nil { 508 | return err 509 | } 510 | close(z.results) 511 | put4(z.buf[0:4], z.digest.Sum32()) 512 | put4(z.buf[4:8], uint32(z.size)) 513 | _, err := z.w.Write(z.buf[0:8]) 514 | if err != nil { 515 | z.pushError(err) 516 | return err 517 | } 518 | return nil 519 | } 520 | -------------------------------------------------------------------------------- /gzip_norace_test.go: -------------------------------------------------------------------------------- 1 | // These tests are skipped when the race detector (-race) is on 2 | // +build !race 3 | 4 | package pgzip 5 | 6 | import ( 7 | "bytes" 8 | "io/ioutil" 9 | "runtime" 10 | "runtime/debug" 11 | "testing" 12 | ) 13 | 14 | // Test that the sync.Pools are working properly and we are not leaking buffers 15 | // Disabled with -race, because the race detector allocates a lot of memory 16 | func TestAllocations(t *testing.T) { 17 | 18 | w := NewWriter(ioutil.Discard) 19 | w.SetConcurrency(100000, 10) 20 | data := bytes.Repeat([]byte("TEST"), 41234) // varying block splits 21 | 22 | // Prime the pool to do initial allocs 23 | for i := 0; i < 10; i++ { 24 | _, _ = w.Write(data) 25 | } 26 | _ = w.Flush() 27 | 28 | allocBytes := allocBytesPerRun(1000, func() { 29 | _, _ = w.Write(data) 30 | }) 31 | t.Logf("Allocated %.0f bytes per Write on average", allocBytes) 32 | 33 | // Locally it still allocates 660 bytes, which can probably be further reduced, 34 | // but it's better than the 175846 bytes before the pool release fix this tests. 35 | // TODO: Further reduce allocations 36 | if allocBytes > 10240 { 37 | t.Errorf("Write allocated too much memory per run (%.0f bytes), Pool used incorrectly?", allocBytes) 38 | } 39 | } 40 | 41 | // allocBytesPerRun returns the average total size of allocations during calls to f. 42 | // The return value is in bytes. 43 | // 44 | // To compute the number of allocations, the function will first be run once as 45 | // a warm-up. The average total size of allocations over the specified number of 46 | // runs will then be measured and returned. 47 | // 48 | // AllocBytesPerRun sets GOMAXPROCS to 1 during its measurement and will restore 49 | // it before returning. 50 | // 51 | // This function is based on testing.AllocsPerRun, which counts the number of 52 | // allocations instead of the total size of them in bytes. 53 | func allocBytesPerRun(runs int, f func()) (avg float64) { 54 | defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) 55 | // Disable garbage collector, because it could clear our pools during the run 56 | oldGCPercent := debug.SetGCPercent(-1) 57 | defer debug.SetGCPercent(oldGCPercent) 58 | 59 | // Warm up the function 60 | f() 61 | 62 | // Measure the starting statistics 63 | var memstats runtime.MemStats 64 | runtime.ReadMemStats(&memstats) 65 | oldTotal := memstats.TotalAlloc 66 | 67 | // Run the function the specified number of times 68 | for i := 0; i < runs; i++ { 69 | f() 70 | } 71 | 72 | // Read the final statistics 73 | runtime.ReadMemStats(&memstats) 74 | allocs := memstats.TotalAlloc - oldTotal 75 | 76 | // Average the mallocs over the runs (not counting the warm-up). 77 | // We are forced to return a float64 because the API is silly, but do 78 | // the division as integers so we can ask if AllocsPerRun()==1 79 | // instead of AllocsPerRun()<2. 80 | return float64(allocs / uint64(runs)) 81 | } 82 | -------------------------------------------------------------------------------- /gzip_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package pgzip 6 | 7 | import ( 8 | "bufio" 9 | "bytes" 10 | "fmt" 11 | "io" 12 | "io/ioutil" 13 | "math/rand" 14 | "strconv" 15 | "sync" 16 | "testing" 17 | "time" 18 | ) 19 | 20 | // TestEmpty tests that an empty payload still forms a valid GZIP stream. 21 | func TestEmpty(t *testing.T) { 22 | buf := new(bytes.Buffer) 23 | 24 | if err := NewWriter(buf).Close(); err != nil { 25 | t.Fatalf("Writer.Close: %v", err) 26 | } 27 | 28 | r, err := NewReader(buf) 29 | if err != nil { 30 | t.Fatalf("NewReader: %v", err) 31 | } 32 | b, err := ioutil.ReadAll(r) 33 | if err != nil { 34 | t.Fatalf("ReadAll: %v", err) 35 | } 36 | if len(b) != 0 { 37 | t.Fatalf("got %d bytes, want 0", len(b)) 38 | } 39 | if err := r.Close(); err != nil { 40 | t.Fatalf("Reader.Close: %v", err) 41 | } 42 | } 43 | 44 | // TestRoundTrip tests that gzipping and then gunzipping is the identity 45 | // function. 46 | func TestRoundTrip(t *testing.T) { 47 | buf := new(bytes.Buffer) 48 | 49 | w := NewWriter(buf) 50 | w.Comment = "comment" 51 | w.Extra = []byte("extra") 52 | w.ModTime = time.Unix(1e8, 0) 53 | w.Name = "name" 54 | if _, err := w.Write([]byte("payload")); err != nil { 55 | t.Fatalf("Write: %v", err) 56 | } 57 | if err := w.Close(); err != nil { 58 | t.Fatalf("Writer.Close: %v", err) 59 | } 60 | 61 | r, err := NewReader(buf) 62 | if err != nil { 63 | t.Fatalf("NewReader: %v", err) 64 | } 65 | b, err := ioutil.ReadAll(r) 66 | if err != nil { 67 | t.Fatalf("ReadAll: %v", err) 68 | } 69 | if string(b) != "payload" { 70 | t.Fatalf("payload is %q, want %q", string(b), "payload") 71 | } 72 | if r.Comment != "comment" { 73 | t.Fatalf("comment is %q, want %q", r.Comment, "comment") 74 | } 75 | if string(r.Extra) != "extra" { 76 | t.Fatalf("extra is %q, want %q", r.Extra, "extra") 77 | } 78 | if r.ModTime.Unix() != 1e8 { 79 | t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8)) 80 | } 81 | if r.Name != "name" { 82 | t.Fatalf("name is %q, want %q", r.Name, "name") 83 | } 84 | if err := r.Close(); err != nil { 85 | t.Fatalf("Reader.Close: %v", err) 86 | } 87 | } 88 | 89 | // TestLatin1 tests the internal functions for converting to and from Latin-1. 90 | func TestLatin1(t *testing.T) { 91 | latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0} 92 | utf8 := "Äußerung" 93 | z := Reader{r: bufio.NewReader(bytes.NewReader(latin1))} 94 | s, err := z.readString() 95 | if err != nil { 96 | t.Fatalf("readString: %v", err) 97 | } 98 | if s != utf8 { 99 | t.Fatalf("read latin-1: got %q, want %q", s, utf8) 100 | } 101 | 102 | buf := bytes.NewBuffer(make([]byte, 0, len(latin1))) 103 | c := Writer{w: buf} 104 | if err = c.writeString(utf8); err != nil { 105 | t.Fatalf("writeString: %v", err) 106 | } 107 | s = buf.String() 108 | if s != string(latin1) { 109 | t.Fatalf("write utf-8: got %q, want %q", s, string(latin1)) 110 | } 111 | } 112 | 113 | // TestLatin1RoundTrip tests that metadata that is representable in Latin-1 114 | // survives a round trip. 115 | func TestLatin1RoundTrip(t *testing.T) { 116 | testCases := []struct { 117 | name string 118 | ok bool 119 | }{ 120 | {"", true}, 121 | {"ASCII is OK", true}, 122 | {"unless it contains a NUL\x00", false}, 123 | {"no matter where \x00 occurs", false}, 124 | {"\x00\x00\x00", false}, 125 | {"Látin-1 also passes (U+00E1)", true}, 126 | {"but LĀtin Extended-A (U+0100) does not", false}, 127 | {"neither does 日本語", false}, 128 | {"invalid UTF-8 also \xffails", false}, 129 | {"\x00 as does Látin-1 with NUL", false}, 130 | } 131 | for _, tc := range testCases { 132 | buf := new(bytes.Buffer) 133 | 134 | w := NewWriter(buf) 135 | w.Name = tc.name 136 | err := w.Close() 137 | if (err == nil) != tc.ok { 138 | t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err) 139 | continue 140 | } 141 | if !tc.ok { 142 | continue 143 | } 144 | 145 | r, err := NewReader(buf) 146 | if err != nil { 147 | t.Errorf("NewReader: %v", err) 148 | continue 149 | } 150 | _, err = ioutil.ReadAll(r) 151 | if err != nil { 152 | t.Errorf("ReadAll: %v", err) 153 | continue 154 | } 155 | if r.Name != tc.name { 156 | t.Errorf("name is %q, want %q", r.Name, tc.name) 157 | continue 158 | } 159 | if err := r.Close(); err != nil { 160 | t.Errorf("Reader.Close: %v", err) 161 | continue 162 | } 163 | } 164 | } 165 | 166 | func TestWriterFlush(t *testing.T) { 167 | buf := new(bytes.Buffer) 168 | 169 | w := NewWriter(buf) 170 | w.Comment = "comment" 171 | w.Extra = []byte("extra") 172 | w.ModTime = time.Unix(1e8, 0) 173 | w.Name = "name" 174 | 175 | n0 := buf.Len() 176 | if n0 != 0 { 177 | t.Fatalf("buffer size = %d before writes; want 0", n0) 178 | } 179 | 180 | if err := w.Flush(); err != nil { 181 | t.Fatal(err) 182 | } 183 | 184 | n1 := buf.Len() 185 | if n1 == 0 { 186 | t.Fatal("no data after first flush") 187 | } 188 | 189 | w.Write([]byte("x")) 190 | 191 | n2 := buf.Len() 192 | if n1 != n2 { 193 | t.Fatalf("after writing a single byte, size changed from %d to %d; want no change", n1, n2) 194 | } 195 | 196 | if err := w.Flush(); err != nil { 197 | t.Fatal(err) 198 | } 199 | 200 | n3 := buf.Len() 201 | if n2 == n3 { 202 | t.Fatal("Flush didn't flush any data") 203 | } 204 | } 205 | 206 | // Multiple gzip files concatenated form a valid gzip file. 207 | func TestConcat(t *testing.T) { 208 | var buf bytes.Buffer 209 | w := NewWriter(&buf) 210 | w.Write([]byte("hello ")) 211 | w.Close() 212 | w = NewWriter(&buf) 213 | w.Write([]byte("world\n")) 214 | w.Close() 215 | 216 | r, err := NewReader(&buf) 217 | data, err := ioutil.ReadAll(r) 218 | if string(data) != "hello world\n" || err != nil { 219 | t.Fatalf("ReadAll = %q, %v, want %q, nil", data, err, "hello world") 220 | } 221 | } 222 | 223 | func TestWriterReset(t *testing.T) { 224 | buf := new(bytes.Buffer) 225 | buf2 := new(bytes.Buffer) 226 | z := NewWriter(buf) 227 | msg := []byte("hello world") 228 | z.Write(msg) 229 | z.Close() 230 | z.Reset(buf2) 231 | z.Write(msg) 232 | z.Close() 233 | if buf.String() != buf2.String() { 234 | t.Errorf("buf2 %q != original buf of %q", buf2.String(), buf.String()) 235 | } 236 | } 237 | 238 | var testbuf []byte 239 | 240 | func testFile(i int, t *testing.T) { 241 | dat, _ := ioutil.ReadFile("testdata/test.json") 242 | dl := len(dat) 243 | if len(testbuf) != i*dl { 244 | // Make results predictable 245 | testbuf = make([]byte, i*dl) 246 | for j := 0; j < i; j++ { 247 | copy(testbuf[j*dl:j*dl+dl], dat) 248 | } 249 | } 250 | 251 | br := bytes.NewBuffer(testbuf) 252 | var buf bytes.Buffer 253 | w, _ := NewWriterLevel(&buf, 6) 254 | io.Copy(w, br) 255 | w.Close() 256 | r, err := NewReader(&buf) 257 | if err != nil { 258 | t.Fatal(err.Error()) 259 | } 260 | decoded, err := ioutil.ReadAll(r) 261 | if err != nil { 262 | t.Fatal(err.Error()) 263 | } 264 | if !bytes.Equal(testbuf, decoded) { 265 | t.Errorf("decoded content does not match.") 266 | } 267 | } 268 | 269 | func TestFile1(t *testing.T) { testFile(1, t) } 270 | func TestFile10(t *testing.T) { testFile(10, t) } 271 | 272 | func TestFile50(t *testing.T) { 273 | if testing.Short() { 274 | t.Skip("skipping during short test") 275 | } 276 | testFile(50, t) 277 | } 278 | 279 | func TestFile200(t *testing.T) { 280 | if testing.Short() { 281 | t.Skip("skipping during short test") 282 | } 283 | testFile(200, t) 284 | } 285 | 286 | func testBigGzip(i int, t *testing.T) { 287 | if len(testbuf) != i { 288 | // Make results predictable 289 | rand.Seed(1337) 290 | testbuf = make([]byte, i) 291 | for idx := range testbuf { 292 | testbuf[idx] = byte(65 + rand.Intn(32)) 293 | } 294 | } 295 | 296 | br := bytes.NewBuffer(testbuf) 297 | var buf bytes.Buffer 298 | w, _ := NewWriterLevel(&buf, 6) 299 | io.Copy(w, br) 300 | // Test UncompressedSize() 301 | if len(testbuf) != w.UncompressedSize() { 302 | t.Errorf("uncompressed size does not match. buffer:%d, UncompressedSize():%d", len(testbuf), w.UncompressedSize()) 303 | } 304 | err := w.Close() 305 | if err != nil { 306 | t.Fatal(err.Error()) 307 | } 308 | // Close should not affect the number 309 | if len(testbuf) != w.UncompressedSize() { 310 | t.Errorf("uncompressed size does not match. buffer:%d, UncompressedSize():%d", len(testbuf), w.UncompressedSize()) 311 | } 312 | 313 | r, err := NewReader(&buf) 314 | if err != nil { 315 | t.Fatal(err.Error()) 316 | } 317 | decoded, err := ioutil.ReadAll(r) 318 | if err != nil { 319 | t.Fatal(err.Error()) 320 | } 321 | if !bytes.Equal(testbuf, decoded) { 322 | t.Errorf("decoded content does not match.") 323 | } 324 | } 325 | 326 | func TestGzip1K(t *testing.T) { testBigGzip(1000, t) } 327 | func TestGzip100K(t *testing.T) { testBigGzip(100000, t) } 328 | func TestGzip1M(t *testing.T) { 329 | if testing.Short() { 330 | t.Skip("skipping during short test") 331 | } 332 | 333 | testBigGzip(1000000, t) 334 | } 335 | func TestGzip10M(t *testing.T) { 336 | if testing.Short() { 337 | t.Skip("skipping during short test") 338 | } 339 | testBigGzip(10000000, t) 340 | } 341 | 342 | // Test if two runs produce identical results. 343 | func TestDeterministicLM2(t *testing.T) { testDeterm(-2, t) } 344 | func TestDeterministicL0(t *testing.T) { testDeterm(0, t) } 345 | func TestDeterministicL1(t *testing.T) { testDeterm(1, t) } 346 | func TestDeterministicL2(t *testing.T) { testDeterm(2, t) } 347 | func TestDeterministicL3(t *testing.T) { testDeterm(3, t) } 348 | func TestDeterministicL4(t *testing.T) { testDeterm(4, t) } 349 | func TestDeterministicL5(t *testing.T) { testDeterm(5, t) } 350 | func TestDeterministicL6(t *testing.T) { testDeterm(6, t) } 351 | func TestDeterministicL7(t *testing.T) { testDeterm(7, t) } 352 | func TestDeterministicL8(t *testing.T) { testDeterm(8, t) } 353 | func TestDeterministicL9(t *testing.T) { testDeterm(9, t) } 354 | 355 | func testDeterm(i int, t *testing.T) { 356 | var length = defaultBlockSize*defaultBlocks + 500 357 | if testing.Short() { 358 | length = defaultBlockSize*2 + 500 359 | } 360 | rand.Seed(1337) 361 | t1 := make([]byte, length) 362 | for idx := range t1 { 363 | t1[idx] = byte(65 + rand.Intn(8)) 364 | } 365 | 366 | br := bytes.NewBuffer(t1) 367 | var b1 bytes.Buffer 368 | w, err := NewWriterLevel(&b1, i) 369 | if err != nil { 370 | t.Fatal(err) 371 | } 372 | // Use a very small prime sized buffer. 373 | cbuf := make([]byte, 787) 374 | _, err = copyBuffer(w, br, cbuf) 375 | if err != nil { 376 | t.Fatal(err) 377 | } 378 | w.Flush() 379 | w.Close() 380 | 381 | rand.Seed(1337) 382 | t2 := make([]byte, length) 383 | for idx := range t2 { 384 | t2[idx] = byte(65 + rand.Intn(8)) 385 | } 386 | 387 | br2 := bytes.NewBuffer(t2) 388 | var b2 bytes.Buffer 389 | w2, err := NewWriterLevel(&b2, i) 390 | if err != nil { 391 | t.Fatal(err) 392 | } 393 | // We choose a different buffer size, 394 | // bigger than a maximum block, and also a prime. 395 | cbuf = make([]byte, 81761) 396 | _, err = copyBuffer(w2, br2, cbuf) 397 | if err != nil { 398 | t.Fatal(err) 399 | } 400 | w2.Flush() 401 | w2.Close() 402 | 403 | b1b := b1.Bytes() 404 | b2b := b2.Bytes() 405 | 406 | if bytes.Compare(b1b, b2b) != 0 { 407 | t.Fatalf("Level %d did not produce deterministric result, len(a) = %d, len(b) = %d", i, len(b1b), len(b2b)) 408 | } 409 | } 410 | 411 | func BenchmarkGzipL1(b *testing.B) { benchmarkGzipN(b, 1) } 412 | func BenchmarkGzipL2(b *testing.B) { benchmarkGzipN(b, 2) } 413 | func BenchmarkGzipL3(b *testing.B) { benchmarkGzipN(b, 3) } 414 | func BenchmarkGzipL4(b *testing.B) { benchmarkGzipN(b, 4) } 415 | func BenchmarkGzipL5(b *testing.B) { benchmarkGzipN(b, 5) } 416 | func BenchmarkGzipL6(b *testing.B) { benchmarkGzipN(b, 6) } 417 | func BenchmarkGzipL7(b *testing.B) { benchmarkGzipN(b, 7) } 418 | func BenchmarkGzipL8(b *testing.B) { benchmarkGzipN(b, 8) } 419 | func BenchmarkGzipL9(b *testing.B) { benchmarkGzipN(b, 9) } 420 | 421 | func benchmarkGzipN(b *testing.B, level int) { 422 | dat, _ := ioutil.ReadFile("testdata/test.json") 423 | dat = append(dat, dat...) 424 | dat = append(dat, dat...) 425 | dat = append(dat, dat...) 426 | dat = append(dat, dat...) 427 | dat = append(dat, dat...) 428 | 429 | b.SetBytes(int64(len(dat))) 430 | b.ResetTimer() 431 | for n := 0; n < b.N; n++ { 432 | w, _ := NewWriterLevel(ioutil.Discard, level) 433 | w.Write(dat) 434 | w.Flush() 435 | w.Close() 436 | } 437 | } 438 | 439 | type errorWriter struct { 440 | mu sync.RWMutex 441 | returnError bool 442 | } 443 | 444 | func (e *errorWriter) ErrorNow() { 445 | e.mu.Lock() 446 | e.returnError = true 447 | e.mu.Unlock() 448 | } 449 | 450 | func (e *errorWriter) Reset() { 451 | e.mu.Lock() 452 | e.returnError = false 453 | e.mu.Unlock() 454 | } 455 | 456 | func (e *errorWriter) Write(b []byte) (int, error) { 457 | e.mu.RLock() 458 | defer e.mu.RUnlock() 459 | if e.returnError { 460 | return 0, fmt.Errorf("Intentional Error") 461 | } 462 | return len(b), nil 463 | } 464 | 465 | // TestErrors tests that errors are returned and that 466 | // error state is maintained and reset by Reset. 467 | func TestErrors(t *testing.T) { 468 | ew := &errorWriter{} 469 | w := NewWriter(ew) 470 | dat, _ := ioutil.ReadFile("testdata/test.json") 471 | n := 0 472 | ew.ErrorNow() 473 | for { 474 | _, err := w.Write(dat) 475 | if err != nil { 476 | break 477 | } 478 | if n > 1000 { 479 | t.Fatal("did not get error before 1000 iterations") 480 | } 481 | n++ 482 | } 483 | if err := w.Close(); err == nil { 484 | t.Fatal("Writer.Close: Should have returned error") 485 | } 486 | ew.Reset() 487 | w.Reset(ew) 488 | _, err := w.Write(dat) 489 | if err != nil { 490 | t.Fatal("Writer after Reset, unexpected error:", err) 491 | } 492 | ew.ErrorNow() 493 | if err = w.Flush(); err == nil { 494 | t.Fatal("Writer.Flush: Should have returned error") 495 | } 496 | if err = w.Close(); err == nil { 497 | t.Fatal("Writer.Close: Should have returned error") 498 | } 499 | // Test Sync only 500 | w.Reset(ew) 501 | if err = w.Flush(); err == nil { 502 | t.Fatal("Writer.Flush: Should have returned error") 503 | } 504 | if err = w.Close(); err == nil { 505 | t.Fatal("Writer.Close: Should have returned error") 506 | } 507 | // Test Close only 508 | w.Reset(ew) 509 | if err = w.Close(); err == nil { 510 | t.Fatal("Writer.Close: Should have returned error") 511 | } 512 | 513 | } 514 | 515 | // A writer that fails after N bytes. 516 | type errorWriter2 struct { 517 | N int 518 | } 519 | 520 | func (e *errorWriter2) Write(b []byte) (int, error) { 521 | e.N -= len(b) 522 | if e.N <= 0 { 523 | return 0, io.ErrClosedPipe 524 | } 525 | return len(b), nil 526 | } 527 | 528 | // Test if errors from the underlying writer is passed upwards. 529 | func TestWriteError(t *testing.T) { 530 | n := defaultBlockSize + 1024 531 | if !testing.Short() { 532 | n *= 4 533 | } 534 | // Make it incompressible... 535 | in := make([]byte, n+1<<10) 536 | io.ReadFull(rand.New(rand.NewSource(0xabad1dea)), in) 537 | 538 | // We create our own buffer to control number of writes. 539 | copyBuf := make([]byte, 128) 540 | for l := 0; l < 10; l++ { 541 | t.Run("level-"+strconv.Itoa(l), func(t *testing.T) { 542 | for fail := 1; fail < n; fail *= 10 { 543 | // Fail after 'fail' writes 544 | ew := &errorWriter2{N: fail} 545 | w, err := NewWriterLevel(ew, l) 546 | if err != nil { 547 | t.Fatalf("NewWriter: level %d: %v", l, err) 548 | } 549 | // Set concurrency low enough that errors should propagate. 550 | w.SetConcurrency(128<<10, 4) 551 | _, err = copyBuffer(w, bytes.NewBuffer(in), copyBuf) 552 | if err == nil { 553 | t.Errorf("Level %d: Expected an error, writer was %#v", l, ew) 554 | } 555 | n2, err := w.Write([]byte{1, 2, 2, 3, 4, 5}) 556 | if n2 != 0 { 557 | t.Error("Level", l, "Expected 0 length write, got", n2) 558 | } 559 | if err == nil { 560 | t.Error("Level", l, "Expected an error") 561 | } 562 | err = w.Flush() 563 | if err == nil { 564 | t.Error("Level", l, "Expected an error on flush") 565 | } 566 | err = w.Close() 567 | if err == nil { 568 | t.Error("Level", l, "Expected an error on close") 569 | } 570 | 571 | w.Reset(ioutil.Discard) 572 | n2, err = w.Write([]byte{1, 2, 3, 4, 5, 6}) 573 | if err != nil { 574 | t.Error("Level", l, "Got unexpected error after reset:", err) 575 | } 576 | if n2 == 0 { 577 | t.Error("Level", l, "Got 0 length write, expected > 0") 578 | } 579 | if testing.Short() { 580 | return 581 | } 582 | } 583 | }) 584 | } 585 | } 586 | 587 | // copyBuffer is a copy of io.CopyBuffer, since we want to support older go versions. 588 | // This is modified to never use io.WriterTo or io.ReaderFrom interfaces. 589 | func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { 590 | if buf == nil { 591 | buf = make([]byte, 32*1024) 592 | } 593 | for { 594 | nr, er := src.Read(buf) 595 | if nr > 0 { 596 | nw, ew := dst.Write(buf[0:nr]) 597 | if nw > 0 { 598 | written += int64(nw) 599 | } 600 | if ew != nil { 601 | err = ew 602 | break 603 | } 604 | if nr != nw { 605 | err = io.ErrShortWrite 606 | break 607 | } 608 | } 609 | if er == io.EOF { 610 | break 611 | } 612 | if er != nil { 613 | err = er 614 | break 615 | } 616 | } 617 | return written, err 618 | } 619 | -------------------------------------------------------------------------------- /gzip_unreliable_test.go: -------------------------------------------------------------------------------- 1 | // These tests are unreliable or only pass under certain conditions. 2 | // To run: go test -v -count=1 -cpu=1,2,4,8,16 -tags=unreliable 3 | // +build unreliable,!race 4 | 5 | package pgzip 6 | 7 | import ( 8 | "bytes" 9 | "sync" 10 | "testing" 11 | "time" 12 | ) 13 | 14 | type SlowDiscard time.Duration 15 | 16 | func (delay SlowDiscard) Write(p []byte) (int, error) { 17 | time.Sleep(time.Duration(delay)) 18 | return len(p), nil 19 | } 20 | 21 | // Test that the panics catch unsafe concurrent writing (a panic is better than data corruption) 22 | // This test is UNRELIABLE and slow. The more concurrency (GOMAXPROCS), the more likely 23 | // a race condition will be hit. If GOMAXPROCS=1, the condition is never hit. 24 | func TestConcurrentRacePanic(t *testing.T) { 25 | w := NewWriter(SlowDiscard(2 * time.Millisecond)) 26 | w.SetConcurrency(1000, 1) 27 | data := bytes.Repeat([]byte("T"), 100000) // varying block splits 28 | 29 | const n = 1000 30 | recovered := make(chan string, n) 31 | var wg sync.WaitGroup 32 | start := make(chan struct{}) 33 | for i := 0; i < n; i++ { 34 | wg.Add(1) 35 | go func() { 36 | defer wg.Done() 37 | defer func() { 38 | s, ok := recover().(string) 39 | if ok { 40 | recovered <- s 41 | t.Logf("Recovered from panic: %s", s) 42 | } 43 | }() 44 | // INCORRECT CONCURRENT USAGE! 45 | <-start 46 | _, _ = w.Write(data) 47 | }() 48 | } 49 | close(start) // give the start signal 50 | 51 | timer := time.NewTimer(10 * time.Second) 52 | defer timer.Stop() 53 | hasPanic := false 54 | select { 55 | case <-recovered: 56 | // OK, expected 57 | hasPanic = true 58 | case <-timer.C: 59 | t.Error("Timout") 60 | } 61 | wg.Wait() 62 | if !hasPanic { 63 | t.Error("Expected a panic, but none happened") 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /testdata/bigempty.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klauspost/pgzip/17e8dac29df8ce00febbd08ee5d8ee922024a003/testdata/bigempty.gz -------------------------------------------------------------------------------- /testdata/issue6550.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klauspost/pgzip/17e8dac29df8ce00febbd08ee5d8ee922024a003/testdata/issue6550.gz --------------------------------------------------------------------------------