├── .circleci └── config.yml ├── LICENSE ├── README.md ├── adler32.go ├── crc32.go ├── czlib.go ├── czlib_test.go ├── fastzlib.c ├── fastzlib.go ├── fastzlib.h ├── fastzlib_test.go ├── go.mod ├── reader.go ├── writer.go └── zstream.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | jobs: 4 | "golang-1.15": 5 | docker: 6 | - image: circleci/golang:1.15 7 | steps: 8 | - checkout 9 | - run: 'sudo apt-get update' 10 | - run: 'sudo apt-get install -y zlib1g-dev' 11 | - run: 'wget https://github.com/DataDog/zstd/files/2246767/mr.zip' 12 | - run: 'unzip mr.zip' 13 | - run: 'go build' 14 | - run: 'PAYLOAD=`pwd`/mr go test -v' 15 | - run: 'PAYLOAD=`pwd`/mr go test -bench .' 16 | "golang-1.16": 17 | docker: 18 | - image: circleci/golang:1.16 19 | steps: 20 | - checkout 21 | - run: 'sudo apt-get update' 22 | - run: 'sudo apt-get install -y zlib1g-dev' 23 | - run: 'wget https://github.com/DataDog/zstd/files/2246767/mr.zip' 24 | - run: 'unzip mr.zip' 25 | - run: 'go build' 26 | - run: 'PAYLOAD=`pwd`/mr go test -v' 27 | - run: 'PAYLOAD=`pwd`/mr go test -bench .' 28 | "golang-latest": 29 | docker: 30 | - image: circleci/golang:latest 31 | steps: 32 | - checkout 33 | - run: 'sudo apt-get update' 34 | - run: 'sudo apt-get install -y zlib1g-dev' 35 | - run: 'wget https://github.com/DataDog/zstd/files/2246767/mr.zip' 36 | - run: 'unzip mr.zip' 37 | - run: 'go build' 38 | - run: 'PAYLOAD=`pwd`/mr go test -v' 39 | - run: 'PAYLOAD=`pwd`/mr go test -bench .' 40 | 41 | workflows: 42 | version: 2 43 | build: 44 | jobs: 45 | - "golang-1.15" 46 | - "golang-1.16" 47 | - "golang-latest" 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Simplified BSD License 2 | 3 | Copyright (c) 2012, Google Inc. 4 | Copyright (c) 2016, Datadog 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | * Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | *This repository is considered stable but not actively maintained anymore. It is still in use in many places and safe for production use; but the zlib protocol being stable, we have not made any changes in recent times. Time to reply on issues/PRs may not be on par with other Datadog's repositories.* 3 | 4 | 5 | # czlib 6 | 7 | [![GoDoc](https://godoc.org/github.com/DataDog/czlib?status.svg)](https://godoc.org/github.com/DataDog/czlib) 8 | 9 | `czlib` started as a fork of the [vitess project’s cgzip](https://github.com/youtube/vitess/tree/master/go/cgzip) package. Our primary data pipeline uses zlib compressed messages, but the standard library’s pure Go implementation can be significantly slower than the C zlib library. In order to address this gap, we modified a few flags in cgzip to make it encode and decode with zlib wrapping rather than with gzip headers. 10 | 11 | We’ve detailed some of the other more novel design decisions in czlib, including its batch interfaces, in our [general blog on performance in Go](https://www.datadoghq.com/blog/go-performance-tales/) a couple of years ago. Performance varies quite a bit among the various interfaces, so it pays to benchmark using a message that is typical for your system by running the czlib benchmark suite with `PAYLOAD=path_to_message go test -run=NONE -bench .` 12 | 13 | Here are some benchmark results for compression and decompression of czlib compared to the standard library: 14 | ``` 15 | go version go1.22.6 darwin/arm64 16 | pkg: github.com/DataDog/czlib 17 | 18 | # 2KiB file 19 | │ CompressStdZlib │ Compress │ 20 | │ sec/op │ sec/op vs base │ 21 | *-10 75.20µ ± 12% 39.84µ ± 31% -47.02% (p=0.000 n=10) 22 | │ CompressStdZlib │ Compress │ 23 | │ B/s │ B/s vs base │ 24 | *-10 27.71Mi ± 11% 52.30Mi ± 24% +88.73% (p=0.000 n=10) 25 | 26 | │ DecompressStdZlib │ Decompress │ 27 | │ sec/op │ sec/op vs base │ 28 | *-10 18.353µ ± 5% 4.993µ ± 4% -72.80% (p=0.000 n=10) 29 | │ DecompressStdZlib │ Decompress │ 30 | │ B/s │ B/s vs base │ 31 | *-10 113.5Mi ± 5% 417.4Mi ± 3% +267.60% (p=0.000 n=10) 32 | 33 | # Silesia compression corpus - mr (~10MB) 34 | │ CompressStdZlib │ Compress │ 35 | │ sec/op │ sec/op vs base │ 36 | *-10 327.1m ± 1% 381.0m ± 1% +16.46% (p=0.000 n=10) 37 | 38 | │ CompressStdZlib │ Compress │ 39 | │ B/s │ B/s vs base │ 40 | *-10 29.07Mi ± 1% 24.96Mi ± 1% -14.14% (p=0.000 n=10) 41 | 42 | │ DecompressStdZlib │ Decompress │ 43 | │ sec/op │ sec/op vs base │ 44 | *-10 51.20m ± 1% 13.96m ± 2% -72.74% (p=0.000 n=10) 45 | │ DecompressStdZlib │ Decompress │ 46 | │ B/s │ B/s vs base │ 47 | *-10 185.7Mi ± 1% 681.2Mi ± 2% +266.81% (p=0.000 n=10) 48 | ``` 49 | 50 | [See more on the blog post](https://www.datadoghq.com/blog/engineering/releasing-czlib-and-zstd-go-bindings/) -------------------------------------------------------------------------------- /adler32.go: -------------------------------------------------------------------------------- 1 | // Pulled from https://github.com/youtube/vitess 229422035ca0c716ad0c1397ea1351fe62b0d35a 2 | // Copyright 2012, Google Inc. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | package czlib 7 | 8 | // NOTE: the routines defined in this file are used for verification in 9 | // czlib_test.go, but you cannot use cgo in test files, so they are 10 | // defined here despite not being exposed. 11 | 12 | // #cgo pkg-config: zlib 13 | 14 | /* 15 | 16 | #include "zlib.h" 17 | */ 18 | import "C" 19 | 20 | import ( 21 | "hash" 22 | "unsafe" 23 | ) 24 | 25 | type adler32Hash struct { 26 | adler C.uLong 27 | } 28 | 29 | // an empty buffer has an adler32 of '1' by default, so start with that 30 | // (the go hash/adler32 does the same) 31 | func newAdler32() hash.Hash32 { 32 | a := &adler32Hash{} 33 | a.Reset() 34 | return a 35 | } 36 | 37 | // Write implements an io.Writer interface 38 | func (a *adler32Hash) Write(p []byte) (n int, err error) { 39 | if len(p) > 0 { 40 | a.adler = C.adler32(a.adler, (*C.Bytef)(unsafe.Pointer(&p[0])), (C.uInt)(len(p))) 41 | } 42 | return len(p), nil 43 | } 44 | 45 | // Sum implements a hash.Hash interface 46 | func (a *adler32Hash) Sum(b []byte) []byte { 47 | s := a.Sum32() 48 | b = append(b, byte(s>>24)) 49 | b = append(b, byte(s>>16)) 50 | b = append(b, byte(s>>8)) 51 | b = append(b, byte(s)) 52 | return b 53 | } 54 | 55 | // Reset resets the hash to default value 56 | func (a *adler32Hash) Reset() { 57 | a.adler = C.adler32(0, (*C.Bytef)(unsafe.Pointer(nil)), 0) 58 | } 59 | 60 | // Size returns the (fixed) size of the hash 61 | func (a *adler32Hash) Size() int { 62 | return 4 63 | } 64 | 65 | // BlockSize returns the (fixed) block size 66 | func (a *adler32Hash) BlockSize() int { 67 | return 1 68 | } 69 | 70 | // Sum32 implements a hash.Hash32 interface 71 | func (a *adler32Hash) Sum32() uint32 { 72 | return uint32(a.adler) 73 | } 74 | 75 | // helper method for partial checksums. From the zlib.h header: 76 | // 77 | // Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 78 | // and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for 79 | // each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of 80 | // seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. 81 | func adler32Combine(adler1, adler2 uint32, len2 int) uint32 { 82 | return uint32(C.adler32_combine(C.uLong(adler1), C.uLong(adler2), C.z_off_t(len2))) 83 | } 84 | -------------------------------------------------------------------------------- /crc32.go: -------------------------------------------------------------------------------- 1 | // Pulled from https://github.com/youtube/vitess 229422035ca0c716ad0c1397ea1351fe62b0d35a 2 | // Copyright 2012, Google Inc. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | package czlib 7 | 8 | // NOTE: the routines defined in this file are used for verification in 9 | // czlib_test.go, but you cannot use cgo in test files, so they are 10 | // defined here despite not being exposed. 11 | 12 | /* 13 | #include "zlib.h" 14 | */ 15 | import "C" 16 | 17 | import ( 18 | "hash" 19 | "unsafe" 20 | ) 21 | 22 | type crc32Hash struct { 23 | crc C.uLong 24 | } 25 | 26 | // an empty buffer has an crc32 of '1' by default, so start with that 27 | // (the go hash/crc32 does the same) 28 | func newCrc32() hash.Hash32 { 29 | c := &crc32Hash{} 30 | c.Reset() 31 | return c 32 | } 33 | 34 | // Write implements an io.Writer interface 35 | func (a *crc32Hash) Write(p []byte) (n int, err error) { 36 | if len(p) > 0 { 37 | a.crc = C.crc32(a.crc, (*C.Bytef)(unsafe.Pointer(&p[0])), (C.uInt)(len(p))) 38 | } 39 | return len(p), nil 40 | } 41 | 42 | // Sum implements a hash.Hash interface 43 | func (a *crc32Hash) Sum(b []byte) []byte { 44 | s := a.Sum32() 45 | b = append(b, byte(s>>24)) 46 | b = append(b, byte(s>>16)) 47 | b = append(b, byte(s>>8)) 48 | b = append(b, byte(s)) 49 | return b 50 | } 51 | 52 | // Reset resets the hash to default value 53 | func (a *crc32Hash) Reset() { 54 | a.crc = C.crc32(0, (*C.Bytef)(unsafe.Pointer(nil)), 0) 55 | } 56 | 57 | // Size returns the (fixed) size of the hash 58 | func (a *crc32Hash) Size() int { 59 | return 4 60 | } 61 | 62 | // BlockSize returns the (fixed) block size of the hash 63 | func (a *crc32Hash) BlockSize() int { 64 | return 1 65 | } 66 | 67 | // Sum32 implements a hash.Hash32 interface 68 | func (a *crc32Hash) Sum32() uint32 { 69 | return uint32(a.crc) 70 | } 71 | 72 | // helper method for partial checksums. From the zlib.h header: 73 | // 74 | // Combine two CRC-32 checksums into one. For two sequences of bytes, seq1 75 | // and seq2 with lengths len1 and len2, CRC-32 checksums were calculated for 76 | // each, crc1 and crc2. crc32_combine() returns the CRC-32 checksum of 77 | // seq1 and seq2 concatenated, requiring only crc1, crc2, and len2. 78 | func crc32Combine(crc1, crc2 uint32, len2 int) uint32 { 79 | return uint32(C.crc32_combine(C.uLong(crc1), C.uLong(crc2), C.z_off_t(len2))) 80 | } 81 | -------------------------------------------------------------------------------- /czlib.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Datadog Inc. All rights reserved. 2 | 3 | package czlib 4 | 5 | import ( 6 | "compress/flate" 7 | "compress/zlib" 8 | ) 9 | 10 | // Constants copied from the flate package, so that code that imports czlib 11 | // does not also have to import "compress/flate". 12 | const ( 13 | NoCompression = flate.NoCompression 14 | BestSpeed = flate.BestSpeed 15 | BestCompression = flate.BestCompression 16 | DefaultCompression = flate.DefaultCompression 17 | ) 18 | 19 | var ( 20 | // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. 21 | ErrChecksum = zlib.ErrChecksum 22 | // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. 23 | ErrDictionary = zlib.ErrDictionary 24 | // ErrHeader is returned when reading ZLIB data that has an invalid header. 25 | ErrHeader = zlib.ErrHeader 26 | ) 27 | -------------------------------------------------------------------------------- /czlib_test.go: -------------------------------------------------------------------------------- 1 | package czlib 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "hash/adler32" 7 | "hash/crc32" 8 | "hash/crc64" 9 | "io" 10 | "math/rand" 11 | "testing" 12 | "time" 13 | ) 14 | 15 | type prettyTimer struct { 16 | name string 17 | before time.Time 18 | } 19 | 20 | func newPrettyTimer(name string) *prettyTimer { 21 | return &prettyTimer{name, time.Now()} 22 | } 23 | 24 | func (pt *prettyTimer) stopAndPrintCompress(t *testing.T, size, processed int) { 25 | durationMs := int(int64(time.Now().Sub(pt.before)) / 1000) 26 | if durationMs == 0 { 27 | return 28 | } 29 | t.Log(pt.name + ":") 30 | t.Log(" size :", size) 31 | t.Log(" time :", durationMs, "ms") 32 | t.Log(" speed:", processed*1000/durationMs, "KB/s") 33 | } 34 | 35 | func (pt *prettyTimer) stopAndPrintUncompress(t *testing.T, processed int) { 36 | durationMs := int(int64(time.Now().Sub(pt.before)) / 1000) 37 | if durationMs == 0 { 38 | return 39 | } 40 | t.Log(" " + pt.name + ":") 41 | t.Log(" time :", durationMs, "ms") 42 | t.Log(" speed:", processed*1000/durationMs, "KB/s") 43 | } 44 | 45 | func compareCompressedBuffer(t *testing.T, source []byte, compressed *bytes.Buffer) { 46 | // compare using go's gunzip 47 | toGunzip := bytes.NewBuffer(compressed.Bytes()) 48 | gunzip, err := zlib.NewReader(toGunzip) 49 | if err != nil { 50 | t.Errorf("zlib.NewReader failed: %v", err) 51 | } 52 | uncompressed := &bytes.Buffer{} 53 | pt := newPrettyTimer("go unzip") 54 | _, err = io.Copy(uncompressed, gunzip) 55 | if err != nil { 56 | t.Errorf("Copy failed: %v", err) 57 | } 58 | pt.stopAndPrintUncompress(t, uncompressed.Len()) 59 | if !bytes.Equal(source, uncompressed.Bytes()) { 60 | t.Errorf("Bytes are not equal") 61 | } 62 | 63 | // compare using czlib gunzip 64 | toGunzip = bytes.NewBuffer(compressed.Bytes()) 65 | cgunzip, err := NewReader(toGunzip) 66 | if err != nil { 67 | t.Errorf("czlib.NewReader failed: %v", err) 68 | } 69 | uncompressed = &bytes.Buffer{} 70 | pt = newPrettyTimer("czlib unzip") 71 | _, err = io.Copy(uncompressed, cgunzip) 72 | if err != nil { 73 | t.Errorf("Copy failed: %v", err) 74 | } 75 | pt.stopAndPrintUncompress(t, uncompressed.Len()) 76 | if !bytes.Equal(source, uncompressed.Bytes()) { 77 | t.Errorf("Bytes are not equal") 78 | } 79 | } 80 | 81 | func testChecksums(t *testing.T, data []byte) { 82 | t.Log("Checksums:") 83 | 84 | // crc64 with go library 85 | goCrc64 := crc64.New(crc64.MakeTable(crc64.ECMA)) 86 | toChecksum := bytes.NewBuffer(data) 87 | pt := newPrettyTimer("go crc64") 88 | _, err := io.Copy(goCrc64, toChecksum) 89 | if err != nil { 90 | t.Errorf("Copy failed: %v", err) 91 | } 92 | pt.stopAndPrintUncompress(t, len(data)) 93 | 94 | // adler32 with go library 95 | goAdler32 := adler32.New() 96 | toChecksum = bytes.NewBuffer(data) 97 | pt = newPrettyTimer("go adler32") 98 | _, err = io.Copy(goAdler32, toChecksum) 99 | if err != nil { 100 | t.Errorf("Copy failed: %v", err) 101 | } 102 | goResult := goAdler32.Sum32() 103 | pt.stopAndPrintUncompress(t, len(data)) 104 | t.Log(" sum :", goResult) 105 | 106 | // adler32 with czlib library 107 | czlibAdler32 := newAdler32() 108 | toChecksum = bytes.NewBuffer(data) 109 | pt = newPrettyTimer("czlib adler32") 110 | _, err = io.Copy(czlibAdler32, toChecksum) 111 | if err != nil { 112 | t.Errorf("Copy failed: %v", err) 113 | } 114 | czlibResult := czlibAdler32.Sum32() 115 | pt.stopAndPrintUncompress(t, len(data)) 116 | t.Log(" sum :", czlibResult) 117 | 118 | // test both results are the same 119 | if goResult != czlibResult { 120 | t.Errorf("go and czlib adler32 mismatch") 121 | } 122 | 123 | // now test partial checksuming also works with adler32 124 | cutoff := len(data) / 3 125 | toChecksum = bytes.NewBuffer(data[0:cutoff]) 126 | czlibAdler32.Reset() 127 | _, err = io.Copy(czlibAdler32, toChecksum) 128 | if err != nil { 129 | t.Errorf("Copy failed: %v", err) 130 | } 131 | adler1 := czlibAdler32.Sum32() 132 | t.Log(" a1 :", adler1) 133 | t.Log(" len1 :", cutoff) 134 | 135 | toChecksum = bytes.NewBuffer(data[cutoff:]) 136 | czlibAdler32.Reset() 137 | _, err = io.Copy(czlibAdler32, toChecksum) 138 | if err != nil { 139 | t.Errorf("Copy failed: %v", err) 140 | } 141 | adler2 := czlibAdler32.Sum32() 142 | t.Log(" a2 :", adler2) 143 | t.Log(" len2 :", len(data)-cutoff) 144 | 145 | adlerCombined := adler32Combine(adler1, adler2, len(data)-cutoff) 146 | t.Log(" comb :", adlerCombined) 147 | 148 | if czlibResult != adlerCombined { 149 | t.Errorf("full and combined adler32 mismatch") 150 | } 151 | 152 | // crc32 with go library 153 | goCrc32 := crc32.New(crc32.MakeTable(crc32.IEEE)) 154 | toChecksum = bytes.NewBuffer(data) 155 | pt = newPrettyTimer("go crc32") 156 | _, err = io.Copy(goCrc32, toChecksum) 157 | if err != nil { 158 | t.Errorf("Copy failed: %v", err) 159 | } 160 | goResult = goCrc32.Sum32() 161 | pt.stopAndPrintUncompress(t, len(data)) 162 | t.Log(" sum :", goResult) 163 | 164 | // crc32 with czlib library 165 | czlibCrc32 := newCrc32() 166 | toChecksum = bytes.NewBuffer(data) 167 | pt = newPrettyTimer("czlib crc32") 168 | _, err = io.Copy(czlibCrc32, toChecksum) 169 | if err != nil { 170 | t.Errorf("Copy failed: %v", err) 171 | } 172 | czlibResult = czlibCrc32.Sum32() 173 | pt.stopAndPrintUncompress(t, len(data)) 174 | t.Log(" sum :", czlibResult) 175 | 176 | // test both results are the same 177 | if goResult != czlibResult { 178 | t.Errorf("go and czlib crc32 mismatch") 179 | } 180 | 181 | // now test partial checksuming also works with crc32 182 | toChecksum = bytes.NewBuffer(data[0:cutoff]) 183 | czlibCrc32.Reset() 184 | _, err = io.Copy(czlibCrc32, toChecksum) 185 | if err != nil { 186 | t.Errorf("Copy failed: %v", err) 187 | } 188 | crc1 := czlibCrc32.Sum32() 189 | t.Log(" crc1 :", crc1) 190 | t.Log(" len1 :", cutoff) 191 | 192 | toChecksum = bytes.NewBuffer(data[cutoff:]) 193 | czlibCrc32.Reset() 194 | _, err = io.Copy(czlibCrc32, toChecksum) 195 | if err != nil { 196 | t.Errorf("Copy failed: %v", err) 197 | } 198 | crc2 := czlibCrc32.Sum32() 199 | t.Log(" crc2 :", crc2) 200 | t.Log(" len2 :", len(data)-cutoff) 201 | 202 | crcCombined := crc32Combine(crc1, crc2, len(data)-cutoff) 203 | t.Log(" comb :", crcCombined) 204 | 205 | if czlibResult != crcCombined { 206 | t.Errorf("full and combined crc32 mismatch") 207 | } 208 | } 209 | 210 | func runCompare(t *testing.T, testSize int, level int) { 211 | 212 | // create a test chunk, put semi-random bytes in there 213 | // (so compression actually will compress some) 214 | toEncode := make([]byte, testSize) 215 | where := 0 216 | for where < testSize { 217 | toFill := rand.Intn(16) 218 | filler := 0x61 + rand.Intn(24) 219 | for i := 0; i < toFill && where < testSize; i++ { 220 | toEncode[where] = byte(filler) 221 | where++ 222 | } 223 | } 224 | t.Log("Original size:", len(toEncode)) 225 | 226 | // now time a regular zlib writer to a Buffer 227 | compressed := &bytes.Buffer{} 228 | reader := bytes.NewBuffer(toEncode) 229 | pt := newPrettyTimer("Go zlib") 230 | gz, err := zlib.NewWriterLevel(compressed, level) 231 | _, err = io.Copy(gz, reader) 232 | if err != nil { 233 | t.Errorf("Copy failed: %v", err) 234 | } 235 | gz.Close() 236 | pt.stopAndPrintCompress(t, compressed.Len(), len(toEncode)) 237 | compareCompressedBuffer(t, toEncode, compressed) 238 | 239 | // this code used to time gzip forked vs cgzip, but since gzip header != zlib 240 | // header it no longer works and isn't really relevant 241 | 242 | // now time a forked gzip 243 | /* 244 | compressed2 := &bytes.Buffer{} 245 | reader = bytes.NewBuffer(toEncode) 246 | cmd := exec.Command("gzip", fmt.Sprintf("-%v", level), "-c") 247 | stdout, err := cmd.StdoutPipe() 248 | if err != nil { 249 | t.Errorf("StdoutPipe failed: %v", err) 250 | } 251 | stdin, err := cmd.StdinPipe() 252 | if err != nil { 253 | t.Errorf("StdinPipe failed: %v", err) 254 | } 255 | wg := sync.WaitGroup{} 256 | wg.Add(1) 257 | go func() { 258 | io.Copy(compressed2, stdout) 259 | wg.Done() 260 | }() 261 | if err = cmd.Start(); err != nil { 262 | t.Errorf("Start failed: %v", err) 263 | } 264 | pt = newPrettyTimer("Forked gzip") 265 | _, err = io.Copy(stdin, reader) 266 | if err != nil { 267 | t.Errorf("Copy failed: %v", err) 268 | } 269 | stdin.Close() 270 | wg.Wait() 271 | if err := cmd.Wait(); err != nil { 272 | t.Errorf("Wait failed: %v", err) 273 | } 274 | pt.stopAndPrintCompress(t, compressed2.Len(), len(toEncode)) 275 | compareCompressedBuffer(t, toEncode, compressed2) 276 | */ 277 | 278 | // and time the cgo version 279 | compressed3 := &bytes.Buffer{} 280 | reader = bytes.NewBuffer(toEncode) 281 | pt = newPrettyTimer("czlib") 282 | cgz, err := NewWriterLevel(compressed3, level) 283 | if err != nil { 284 | t.Errorf("NewWriterLevel failed: %v", err) 285 | } 286 | _, err = io.Copy(cgz, reader) 287 | if err != nil { 288 | t.Errorf("Copy failed: %v", err) 289 | } 290 | if err := cgz.Flush(); err != nil { 291 | t.Errorf("Flush failed: %v", err) 292 | } 293 | if err := cgz.Close(); err != nil { 294 | t.Errorf("Close failed: %v", err) 295 | } 296 | pt.stopAndPrintCompress(t, compressed3.Len(), len(toEncode)) 297 | compareCompressedBuffer(t, toEncode, compressed3) 298 | 299 | testChecksums(t, toEncode) 300 | } 301 | 302 | // use 'go test -v' and bigger sizes to show meaningful rates 303 | func TestCompare(t *testing.T) { 304 | runCompare(t, 1*1024*1024, 1) 305 | } 306 | 307 | func TestCompareBest(t *testing.T) { 308 | runCompare(t, 1*1024*1024, 9) 309 | } 310 | -------------------------------------------------------------------------------- /fastzlib.c: -------------------------------------------------------------------------------- 1 | /* fastzlib.c 2 | * 3 | * This module is an implementation of a non-streaming "compress" and 4 | * "decompress", Similar to and based on Python 2.7's "zlibmodule.c". 5 | * 6 | * Hopefully this will perform very similarly to Python's zlib module, 7 | * as both vitess' cgzip and compress/zlib perform between very and 8 | * quite poorly in comparisson on small payloads. 9 | */ 10 | #include "_cgo_export.h" 11 | #include 12 | #include 13 | #include 14 | #include "fastzlib.h" 15 | 16 | // 16k chunk size used by Python zlibmodule.c and http://www.zlib.net/zpipe.c 17 | #define DEFAULTALLOC (16*1024) 18 | #define ERRMSG_MAX (1024*2) 19 | 20 | // Create an error message for various types of errors. 21 | char *zlib_error(z_stream zst, int err, char *msg) { 22 | char *errmsg = NULL; 23 | 24 | if (err == Z_VERSION_ERROR) { 25 | errmsg = "library version mismatch"; 26 | } 27 | if (errmsg == Z_NULL) { 28 | errmsg = zst.msg; 29 | } 30 | if (errmsg == Z_NULL) { 31 | switch(err) { 32 | case Z_BUF_ERROR: 33 | errmsg = "incomplete or truncated stream"; 34 | break; 35 | case Z_STREAM_ERROR: 36 | errmsg = "inconsistent stream state"; 37 | break; 38 | case Z_DATA_ERROR: 39 | errmsg = "invalid input data"; 40 | break; 41 | } 42 | } 43 | if (errmsg == Z_NULL) { 44 | errmsg = (char *)malloc(ERRMSG_MAX); 45 | snprintf(errmsg, ERRMSG_MAX, "Error: %d %s", err, msg); 46 | } else { 47 | char *orig = errmsg; 48 | errmsg = (char *)malloc(ERRMSG_MAX); 49 | snprintf(errmsg, ERRMSG_MAX, "Error %d %s: %s", err, msg, orig); 50 | } 51 | 52 | return errmsg; 53 | } 54 | 55 | // Entirely decompress input, returning a ByteArray 56 | // return value is a ByteArray whose err/str, if set, must be freed. 57 | ByteArray c_decompress(char *input, uint length) { 58 | ByteArray ret; 59 | int err, wsize=MAX_WBITS; 60 | z_stream zst; 61 | 62 | ret.len = DEFAULTALLOC; 63 | ret.str = (char *)malloc(ret.len); 64 | 65 | zst.avail_in = length; 66 | zst.avail_out = DEFAULTALLOC; 67 | zst.zalloc = (alloc_func)Z_NULL; 68 | zst.zfree = (free_func)Z_NULL; 69 | zst.next_out = (Byte *)ret.str; 70 | zst.next_in = (Byte *)input; 71 | 72 | err = inflateInit2(&zst, wsize); 73 | 74 | switch(err) { 75 | case(Z_OK): 76 | break; 77 | case(Z_MEM_ERROR): 78 | // something here might not even be possible.. 79 | goto error; 80 | default: 81 | inflateEnd(&zst); 82 | goto error; 83 | } 84 | 85 | do { 86 | err = inflate(&zst, Z_FINISH); 87 | switch(err) { 88 | case(Z_STREAM_END): 89 | break; 90 | case(Z_BUF_ERROR): 91 | /* 92 | * If there is at least 1 byte of room according to zst.avail_out 93 | * and we get this error, assume that it means zlib cannot 94 | * process the inflate call() due to an error in the data. 95 | */ 96 | if (zst.avail_out > 0) { 97 | inflateEnd(&zst); 98 | goto error; 99 | } 100 | /* fall through */ 101 | case(Z_OK): 102 | /* need more memory, double size of return string each time. */ 103 | ret.str = (char *)realloc(ret.str, ret.len <<1); 104 | if (ret.str == NULL) { 105 | inflateEnd(&zst); 106 | goto error; 107 | } 108 | 109 | zst.next_out = (unsigned char *)(ret.str + ret.len); 110 | zst.avail_out = ret.len; 111 | ret.len = ret.len << 1; 112 | break; 113 | default: 114 | inflateEnd(&zst); 115 | goto error; 116 | } 117 | } while (err != Z_STREAM_END); 118 | 119 | err = inflateEnd(&zst); 120 | if (err != Z_OK) { 121 | goto error; 122 | } 123 | 124 | /* success! return everything */ 125 | ret.len = zst.total_out; 126 | ret.err = NULL; 127 | return ret; 128 | 129 | error: 130 | if (ret.str != NULL) free(ret.str); 131 | ret.err = zlib_error(zst, err, "while decompressing data"); 132 | return ret; 133 | } 134 | 135 | // entirely compress input, returning a compressed ByteArray 136 | // return value is a ByteArray whose err/str, if set, must be freed. 137 | ByteArray c_compress(char *input, uint length) { 138 | ByteArray ret; 139 | // FIXME: allow this to be tunable? 140 | int err, level=Z_DEFAULT_COMPRESSION; 141 | z_stream zst; 142 | 143 | // allocate the maximum possible length of the output up front 144 | // this calculation comes from python and is almost certainly safe 145 | zst.avail_out = length + length/1000 + 12 + 1; 146 | ret.str = (char *)malloc(zst.avail_out); 147 | 148 | if (ret.str == NULL) { 149 | ret.err = zlib_error(zst, Z_MEM_ERROR, "Out of memory while compressing data."); 150 | goto error; 151 | } 152 | 153 | zst.zalloc = (alloc_func)Z_NULL; 154 | zst.zfree = (free_func)Z_NULL; 155 | zst.opaque = Z_NULL; 156 | zst.next_out = (Byte *)ret.str; 157 | zst.next_in = (Byte *)input; 158 | zst.avail_in = length; 159 | 160 | err = deflateInit(&zst, level); 161 | 162 | switch(err) { 163 | case(Z_OK): 164 | break; 165 | case(Z_MEM_ERROR): 166 | ret.err = zlib_error(zst, err, "Out of memory while compressing data"); 167 | goto error; 168 | // XXX: this isn't currently possible but will be in future 169 | case(Z_STREAM_ERROR): 170 | ret.err = zlib_error(zst, err, "Bad compression level"); 171 | goto error; 172 | default: 173 | deflateEnd(&zst); 174 | ret.err = zlib_error(zst, err, "while compressing data"); 175 | goto error; 176 | } 177 | 178 | err = deflate(&zst, Z_FINISH); 179 | 180 | if (err != Z_STREAM_END) { 181 | ret.err = zlib_error(zst, err, "while compressing data"); 182 | deflateEnd(&zst); 183 | goto error; 184 | } 185 | 186 | err = deflateEnd(&zst); 187 | 188 | if (err != Z_OK) { 189 | ret.err = zlib_error(zst, err, "while finishing compression"); 190 | goto error; 191 | } else { 192 | // XXX: we've allocated the maximum possible size (worst case) for zlib 193 | // compression up top, but it's likely that zst.total_out (the end size 194 | // of the buffer) is much smaller; Python actually performs a copy here 195 | // so that the old, larger buffer can be freed immediately. 196 | // 197 | // This is a good strategy for a lot of reasons. We could use realloc 198 | // to shrink the amount of space allocated to our buffer pointer, but 199 | // the excess memory is not actually freed; instead it is made available 200 | // to future calls to malloc/calloc. 201 | // 202 | // Since the common case, Compress, will do this in Go and then free 203 | // immediately, it doesn't make sense to actually do this copy here, but 204 | // this might open up UnsafeCompress to weird allocation slowdowns if it's 205 | // doing lots of compression and decompression. 206 | ret.str = (char *)realloc(ret.str, zst.total_out); 207 | ret.len = zst.total_out; 208 | ret.err = NULL; 209 | } 210 | 211 | return ret; 212 | 213 | error: 214 | if (ret.str != NULL) free(ret.str); 215 | return ret; 216 | } 217 | 218 | 219 | // entirely compress input, returning a compressed ByteArray 220 | // return value is a ByteArray whose err/str, if set, must be freed. 221 | // This version of compress uses the defaultbuffer + doubling growth 222 | // like the decompress version does. 223 | ByteArray c_compress2(char *input, uint length) { 224 | ByteArray ret; 225 | // FIXME: allow this to be tunable? 226 | int err, grow, level=Z_DEFAULT_COMPRESSION; 227 | z_stream zst; 228 | 229 | // allocation strategy: 230 | // we want to use a single worst-case allocation for very small inputs, 231 | // DEFAULTALLOC for medium sized inputs, and length/8 for large inputs. 232 | 233 | if (length < DEFAULTALLOC) { 234 | //printf("Using worst-case alloc of %d\n", length + length/1000 + 13); 235 | ret.len = zst.avail_out = length + length/1000 + 12 + 1; 236 | } else if (length/8 > DEFAULTALLOC) { 237 | //printf("Using quarter buffer alloc of %d\n", length/4); 238 | ret.len = zst.avail_out = length/8; 239 | } else { 240 | //printf("Using DEFAULTALLOC of %d\n", DEFAULTALLOC); 241 | ret.len = zst.avail_out = DEFAULTALLOC; 242 | } 243 | ret.str = (char *)malloc(zst.avail_out); 244 | 245 | if (ret.str == NULL) { 246 | ret.err = zlib_error(zst, Z_MEM_ERROR, "Out of memory while compressing data."); 247 | goto error; 248 | } 249 | 250 | zst.zalloc = (alloc_func)Z_NULL; 251 | zst.zfree = (free_func)Z_NULL; 252 | zst.opaque = Z_NULL; 253 | zst.next_out = (Byte *)ret.str; 254 | zst.next_in = (Byte *)input; 255 | zst.avail_in = length; 256 | 257 | err = deflateInit2(&zst, level, Z_DEFLATED, 258 | 15, // 16 makes it a gzip file, 15 is default 259 | 8, Z_DEFAULT_STRATEGY); // default values 260 | 261 | switch(err) { 262 | case(Z_OK): 263 | break; 264 | case(Z_MEM_ERROR): 265 | ret.err = zlib_error(zst, err, "Out of memory while compressing data"); 266 | goto error; 267 | // XXX: this isn't currently possible but will be in future 268 | case(Z_STREAM_ERROR): 269 | ret.err = zlib_error(zst, err, "Bad compression level"); 270 | goto error; 271 | default: 272 | deflateEnd(&zst); 273 | ret.err = zlib_error(zst, err, "while compressing data"); 274 | goto error; 275 | } 276 | 277 | do { 278 | err = deflate(&zst, Z_FINISH); 279 | 280 | switch(err) { 281 | case(Z_STREAM_END): 282 | break; 283 | case(Z_BUF_ERROR): 284 | /* 285 | * If there is at least 1 byte of room according to zst.avail_out 286 | * and we get this error, assume that it means zlib cannot 287 | * process the inflate call() due to an error in the data. 288 | */ 289 | if (zst.avail_out > 0) { 290 | deflateEnd(&zst); 291 | goto error; 292 | } 293 | /* fall through */ 294 | case(Z_OK): 295 | /* we need more memory; increase by 1/8th the original buffer each time */ 296 | grow = ret.len + length/8; 297 | ret.str = (char *)realloc(ret.str, grow); 298 | 299 | if (ret.str == NULL) { 300 | deflateEnd(&zst); 301 | goto error; 302 | } 303 | 304 | zst.next_out = (unsigned char *)(ret.str + ret.len); 305 | zst.avail_out = grow - ret.len; 306 | ret.len = grow; 307 | break; 308 | default: 309 | deflateEnd(&zst); 310 | goto error; 311 | } 312 | } while (err != Z_STREAM_END); 313 | 314 | err = deflateEnd(&zst); 315 | 316 | if (err != Z_OK) { 317 | goto error; 318 | } 319 | 320 | /* success! return everything */ 321 | ret.str = (char *)realloc(ret.str, zst.total_out); 322 | ret.len = zst.total_out; 323 | ret.err = NULL; 324 | return ret; 325 | 326 | error: 327 | if (ret.str != NULL) free(ret.str); 328 | if (ret.err == NULL) ret.err = zlib_error(zst, err, "while compressing data"); 329 | return ret; 330 | } 331 | -------------------------------------------------------------------------------- /fastzlib.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013, Datadog Inc. All rights reserved. 2 | 3 | package czlib 4 | 5 | import ( 6 | "errors" 7 | "unsafe" 8 | ) 9 | 10 | /* 11 | #cgo pkg-config: zlib 12 | #include "fastzlib.h" 13 | #include 14 | */ 15 | import "C" 16 | 17 | // An UnsafeByte is a []byte whose backing array has been allocated in C and 18 | // thus is not subject to the Go garbage collector. The Unsafe versions of 19 | // Compress and Decompress return this in order to prevent copying the unsafe 20 | // memory into collected memory. 21 | type UnsafeByte []byte 22 | 23 | // NewUnsafeByte creates a []byte from the unsafe pointer without a copy, 24 | // using the method outlined in this mailing list post: 25 | // https://groups.google.com/forum/#!topic/golang-nuts/KyXR0fDp0HA 26 | // but amended to use the three-index slices from go1.2 to set the capacity 27 | // of b correctly: 28 | // https://tip.golang.org/doc/go1.2#three_index 29 | // This means this code only works in go1.2+. 30 | // 31 | // This shouldn't copy the underlying array; it's just casting it 32 | // Afterwards, we use reflect to fix the Cap & len of the slice. 33 | func NewUnsafeByte(p *C.char, length int) UnsafeByte { 34 | var b UnsafeByte 35 | b = UnsafeByte((*[1<<31 - 1]byte)(unsafe.Pointer(p))[:length:length]) 36 | return b 37 | } 38 | 39 | // Free the underlying byte array; doing this twice would be bad. 40 | func (b UnsafeByte) Free() { 41 | C.free(unsafe.Pointer(&b[0])) 42 | } 43 | 44 | // Compress returns the input compressed using zlib, or an error if encountered. 45 | func Compress(input []byte) ([]byte, error) { 46 | var cInput *C.char 47 | if len(input) != 0 { 48 | cInput = (*C.char)(unsafe.Pointer(&input[0])) 49 | } 50 | ret := C.c_compress2(cInput, C.uint(len(input))) 51 | 52 | // if there was an error compressing, return it and free the original message 53 | if ret.err != nil { 54 | msg := C.GoString((*C.char)(ret.err)) 55 | C.free(unsafe.Pointer(ret.err)) 56 | return []byte{}, errors.New(msg) 57 | } 58 | 59 | // NOTE: this creates a copy of the return *char as a Go []byte. 60 | // FIXME: uint -> int conversion here is dangerous 61 | b := C.GoBytes(unsafe.Pointer(ret.str), C.int(ret.len)) 62 | C.free(unsafe.Pointer(ret.str)) 63 | return b, nil 64 | } 65 | 66 | // Decompress returns the input decompressed using zlib, or an error if encountered. 67 | func Decompress(input []byte) ([]byte, error) { 68 | var cInput *C.char 69 | if len(input) != 0 { 70 | cInput = (*C.char)(unsafe.Pointer(&input[0])) 71 | } 72 | // send the input byte without copying iy 73 | ret := C.c_decompress(cInput, C.uint(len(input))) 74 | 75 | // if there was an error decompressing, return it and free the original message 76 | if ret.err != nil { 77 | msg := C.GoString((*C.char)(ret.err)) 78 | C.free(unsafe.Pointer(ret.err)) 79 | return []byte{}, errors.New(msg) 80 | } 81 | 82 | // NOTE: this creates a copy of the return *char as a Go []byte. 83 | // FIXME: uint -> int conversion here is dangerous 84 | b := C.GoBytes(unsafe.Pointer(ret.str), C.int(ret.len)) 85 | C.free(unsafe.Pointer(ret.str)) 86 | return b, nil 87 | } 88 | 89 | // UnsafeDecompress unzips input into an UnsafeByte without copying the result 90 | // malloced in C. The UnsafeByte returned can be used as a normal []byte but 91 | // must be manually free'd w/ UnsafeByte.Free() 92 | func UnsafeDecompress(input []byte) (UnsafeByte, error) { 93 | cInput := (*C.char)(unsafe.Pointer(&input[0])) 94 | ret := C.c_decompress(cInput, C.uint(len(input))) 95 | 96 | // if there was an error decompressing, return it and free the original message 97 | if ret.err != nil { 98 | msg := C.GoString((*C.char)(ret.err)) 99 | C.free(unsafe.Pointer(ret.err)) 100 | return UnsafeByte{}, errors.New(msg) 101 | } 102 | 103 | b := NewUnsafeByte((*C.char)(ret.str), int(ret.len)) 104 | return b, nil 105 | } 106 | 107 | // UnsafeCompress zips input into an UnsafeByte without copying the result 108 | // malloced in C. The UnsafeByte returned can be used as a normal []byte but must 109 | // be manually free'd w/ UnsafeByte.Free() 110 | func UnsafeCompress(input []byte) (UnsafeByte, error) { 111 | cInput := (*C.char)(unsafe.Pointer(&input[0])) 112 | ret := C.c_compress(cInput, C.uint(len(input))) 113 | 114 | // if there was an error decompressing, return it and free the original message 115 | if ret.err != nil { 116 | msg := C.GoString((*C.char)(ret.err)) 117 | C.free(unsafe.Pointer(ret.err)) 118 | return UnsafeByte{}, errors.New(msg) 119 | } 120 | 121 | b := NewUnsafeByte((*C.char)(ret.str), int(ret.len)) 122 | return b, nil 123 | } 124 | -------------------------------------------------------------------------------- /fastzlib.h: -------------------------------------------------------------------------------- 1 | 2 | typedef unsigned int uint; 3 | 4 | /* simulate the Go return type ([]byte, error) so that the Go function 5 | * can allocate the right amt of memory to copy the str if required or 6 | * report errors directly from the C lib. 7 | */ 8 | typedef struct { 9 | char *str; 10 | uint len; 11 | char *err; 12 | } ByteArray; 13 | 14 | ByteArray c_decompress(char *input, uint length); 15 | ByteArray c_compress(char *input, uint length); 16 | ByteArray c_compress2(char *input, uint length); 17 | 18 | -------------------------------------------------------------------------------- /fastzlib_test.go: -------------------------------------------------------------------------------- 1 | package czlib 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "crypto/rand" 7 | "fmt" 8 | "io/ioutil" 9 | "os" 10 | "testing" 11 | ) 12 | 13 | var gzipped, raw []byte 14 | 15 | func zip(b []byte) []byte { 16 | var out bytes.Buffer 17 | w := zlib.NewWriter(&out) 18 | w.Write(b) 19 | w.Close() 20 | return out.Bytes() 21 | } 22 | 23 | func init() { 24 | var err error 25 | payload := os.Getenv("PAYLOAD") 26 | if len(payload) == 0 { 27 | fmt.Println("You must provide PAYLOAD env var for path to test payload.") 28 | return 29 | } 30 | raw, err = ioutil.ReadFile(payload) 31 | if err != nil { 32 | fmt.Printf("Error opening payload: %s\n", err) 33 | } 34 | gzipped = zip(raw) 35 | // fmt.Printf("%d byte test payload (%d orig)\n", len(gzipped), len(raw)) 36 | } 37 | 38 | // Generate an n-byte long []byte 39 | func genData(n int) ([]byte, error) { 40 | b := make([]byte, n) 41 | _, err := rand.Read(b) 42 | return b, err 43 | } 44 | 45 | func TestAllZlib(t *testing.T) { 46 | type compressFunc func([]byte) ([]byte, error) 47 | funcs := []compressFunc{Compress, gzip, zzip} 48 | names := []string{"Compress", "gzip", "zzip"} 49 | for _, i := range []int{10, 128, 1000, 1024 * 10, 1024 * 100, 1024 * 1024, 1024 * 1024 * 7} { 50 | data, err := genData(i) 51 | if err != nil { 52 | t.Error(err) 53 | continue 54 | } 55 | for i, f := range funcs { 56 | comp, err := f(data) 57 | if err != nil { 58 | t.Fatalf("Compression failed on %v: %s", names[i], err) 59 | } 60 | decomp, err := Decompress(comp) 61 | if err != nil { 62 | t.Fatalf("Decompression failed on %v: %s", names[i], err) 63 | } 64 | if bytes.Compare(decomp, data) != 0 { 65 | t.Fatalf("deflate->inflate does not match original for %s", names[i]) 66 | } 67 | } 68 | } 69 | } 70 | 71 | func TestEmpty(t *testing.T) { 72 | var empty []byte 73 | _, err := Compress(empty) 74 | if err != nil { 75 | t.Fatalf("unexpected error compressing empty slice") 76 | } 77 | _, err = Decompress(empty) 78 | if err == nil { 79 | t.Fatalf("unexpected success decompressing empty slice") 80 | } 81 | } 82 | 83 | func TestUnsafeZlib(t *testing.T) { 84 | for _, i := range []int{10, 128, 1000, 1024 * 10, 1024 * 100, 1024 * 1024, 1024 * 1024 * 7} { 85 | data, err := genData(i) 86 | if err != nil { 87 | t.Error(err) 88 | continue 89 | } 90 | comp, err := UnsafeCompress(data) 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | decomp, err := UnsafeDecompress(comp) 95 | if err != nil { 96 | t.Fatal(err) 97 | } 98 | if bytes.Compare(decomp, data) != 0 { 99 | t.Fatal("Compress -> Decompress on byte array failed to match original data.") 100 | } 101 | comp.Free() 102 | decomp.Free() 103 | } 104 | } 105 | 106 | // Compression benchmarks 107 | func BenchmarkCompressUnsafe(b *testing.B) { 108 | if raw == nil { 109 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 110 | } 111 | b.SetBytes(int64(len(raw))) 112 | for i := 0; i < b.N; i++ { 113 | u, _ := UnsafeCompress(raw) 114 | u.Free() 115 | } 116 | } 117 | 118 | func BenchmarkCompress(b *testing.B) { 119 | if raw == nil { 120 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 121 | } 122 | b.SetBytes(int64(len(raw))) 123 | for i := 0; i < b.N; i++ { 124 | Compress(raw) 125 | } 126 | } 127 | 128 | func BenchmarkCompressStream(b *testing.B) { 129 | if raw == nil { 130 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 131 | } 132 | b.SetBytes(int64(len(raw))) 133 | for i := 0; i < b.N; i++ { 134 | gzip(raw) 135 | } 136 | } 137 | 138 | func BenchmarkCompressStdZlib(b *testing.B) { 139 | if raw == nil { 140 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 141 | } 142 | b.SetBytes(int64(len(raw))) 143 | for i := 0; i < b.N; i++ { 144 | zzip(raw) 145 | } 146 | } 147 | 148 | // Decomression benchmarks 149 | 150 | func BenchmarkDecompressUnsafe(b *testing.B) { 151 | if raw == nil { 152 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 153 | } 154 | b.SetBytes(int64(len(raw))) 155 | for i := 0; i < b.N; i++ { 156 | u, _ := UnsafeDecompress(gzipped) 157 | u.Free() 158 | } 159 | } 160 | 161 | func BenchmarkDecompress(b *testing.B) { 162 | if raw == nil { 163 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 164 | } 165 | b.SetBytes(int64(len(raw))) 166 | for i := 0; i < b.N; i++ { 167 | Decompress(gzipped) 168 | } 169 | } 170 | 171 | func BenchmarkDecompressStream(b *testing.B) { 172 | if raw == nil { 173 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 174 | } 175 | b.SetBytes(int64(len(raw))) 176 | for i := 0; i < b.N; i++ { 177 | gunzip(gzipped) 178 | } 179 | } 180 | 181 | func BenchmarkDecompressStdZlib(b *testing.B) { 182 | if raw == nil { 183 | b.Skip("You must provide PAYLOAD env var for benchmarking.") 184 | } 185 | b.SetBytes(int64(len(raw))) 186 | for i := 0; i < b.N; i++ { 187 | zunzip(gzipped) 188 | } 189 | } 190 | 191 | // helpers 192 | 193 | func gunzip(body []byte) ([]byte, error) { 194 | reader, err := NewReader(bytes.NewBuffer(body)) 195 | if err != nil { 196 | return []byte{}, err 197 | } 198 | return ioutil.ReadAll(reader) 199 | } 200 | 201 | // unzip a gzipped []byte payload, returning the unzipped []byte and error 202 | func zunzip(body []byte) ([]byte, error) { 203 | reader, err := zlib.NewReader(bytes.NewBuffer(body)) 204 | if err != nil { 205 | return []byte{}, err 206 | } 207 | return ioutil.ReadAll(reader) 208 | } 209 | 210 | func gzip(body []byte) ([]byte, error) { 211 | outb := make([]byte, 0, 16*1024) 212 | out := bytes.NewBuffer(outb) 213 | writer := NewWriter(out) 214 | n, err := writer.Write(body) 215 | if n != len(body) { 216 | return []byte{}, fmt.Errorf("compressed %d, expected %d", n, len(body)) 217 | } 218 | if err != nil { 219 | return []byte{}, err 220 | } 221 | err = writer.Close() 222 | if err != nil { 223 | return []byte{}, err 224 | } 225 | return out.Bytes(), nil 226 | } 227 | 228 | func zzip(body []byte) ([]byte, error) { 229 | outb := make([]byte, 0, len(body)) 230 | out := bytes.NewBuffer(outb) 231 | writer := zlib.NewWriter(out) 232 | n, err := writer.Write(body) 233 | if n != len(body) { 234 | return []byte{}, fmt.Errorf("compressed %d, expected %d", n, len(body)) 235 | } 236 | if err != nil { 237 | return []byte{}, err 238 | } 239 | err = writer.Close() 240 | if err != nil { 241 | return []byte{}, err 242 | } 243 | return out.Bytes(), nil 244 | } 245 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/DataDog/czlib 2 | 3 | go 1.14 4 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | // Pulled from https://github.com/youtube/vitess 229422035ca0c716ad0c1397ea1351fe62b0d35a 2 | // Copyright 2012, Google Inc. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | package czlib 7 | 8 | import "io" 9 | 10 | // err starts out as nil 11 | // we will call inflateEnd when we set err to a value: 12 | // - whatever error is returned by the underlying reader 13 | // - io.EOF if Close was called 14 | type reader struct { 15 | r io.Reader 16 | in []byte 17 | strm zstream 18 | err error 19 | skipIn bool 20 | } 21 | 22 | // NewReader creates a new io.ReadCloser. Reads from the returned io.ReadCloser 23 | //read and decompress data from r. The implementation buffers input and may read 24 | // more data than necessary from r. 25 | // It is the caller's responsibility to call Close on the ReadCloser when done. 26 | func NewReader(r io.Reader) (io.ReadCloser, error) { 27 | return NewReaderBuffer(r, DEFAULT_COMPRESSED_BUFFER_SIZE) 28 | } 29 | 30 | // NewReaderBuffer has the same behavior as NewReader but the user can provides 31 | // a custom buffer size. 32 | func NewReaderBuffer(r io.Reader, bufferSize int) (io.ReadCloser, error) { 33 | z := &reader{r: r, in: make([]byte, bufferSize)} 34 | if err := z.strm.inflateInit(); err != nil { 35 | return nil, err 36 | } 37 | return z, nil 38 | } 39 | 40 | func (z *reader) Read(p []byte) (int, error) { 41 | if z.err != nil { 42 | return 0, z.err 43 | } 44 | 45 | if len(p) == 0 { 46 | return 0, nil 47 | } 48 | 49 | // read and deflate until the output buffer is full 50 | z.strm.setOutBuf(p, len(p)) 51 | 52 | for { 53 | // if we have no data to inflate, read more 54 | if !z.skipIn && z.strm.availIn() == 0 { 55 | var n int 56 | n, z.err = z.r.Read(z.in) 57 | // If we got data and EOF, pretend we didn't get the 58 | // EOF. That way we will return the right values 59 | // upstream. Note this will trigger another read 60 | // later on, that should return (0, EOF). 61 | if n > 0 && z.err == io.EOF { 62 | z.err = nil 63 | } 64 | 65 | // FIXME(alainjobart) this code is not compliant with 66 | // the Reader interface. We should process all the 67 | // data we got from the reader, and then return the 68 | // error, whatever it is. 69 | if (z.err != nil && z.err != io.EOF) || (n == 0 && z.err == io.EOF) { 70 | z.strm.inflateEnd() 71 | return 0, z.err 72 | } 73 | 74 | z.strm.setInBuf(z.in, n) 75 | } else { 76 | z.skipIn = false 77 | } 78 | 79 | // inflate some 80 | ret, err := z.strm.inflate(zNoFlush) 81 | if err != nil { 82 | z.err = err 83 | z.strm.inflateEnd() 84 | return 0, z.err 85 | } 86 | 87 | // if we read something, we're good 88 | have := len(p) - z.strm.availOut() 89 | if have > 0 { 90 | z.skipIn = ret == Z_OK && z.strm.availOut() == 0 91 | return have, z.err 92 | } 93 | } 94 | } 95 | 96 | // Close closes the Reader. It does not close the underlying io.Reader. 97 | func (z *reader) Close() error { 98 | if z.err != nil { 99 | if z.err != io.EOF { 100 | return z.err 101 | } 102 | return nil 103 | } 104 | z.strm.inflateEnd() 105 | z.err = io.EOF 106 | return nil 107 | } 108 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | // Pulled from https://github.com/youtube/vitess 229422035ca0c716ad0c1397ea1351fe62b0d35a 2 | // Copyright 2012, Google Inc. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | package czlib 7 | 8 | import ( 9 | "fmt" 10 | "io" 11 | ) 12 | 13 | // Allowed flush values 14 | const ( 15 | Z_NO_FLUSH = 0 16 | Z_PARTIAL_FLUSH = 1 17 | Z_SYNC_FLUSH = 2 18 | Z_FULL_FLUSH = 3 19 | Z_FINISH = 4 20 | Z_BLOCK = 5 21 | Z_TREES = 6 22 | ) 23 | 24 | // Return codes 25 | const ( 26 | Z_OK = 0 27 | Z_STREAM_END = 1 28 | Z_NEED_DICT = 2 29 | Z_ERRNO = -1 30 | Z_STREAM_ERROR = -2 31 | Z_DATA_ERROR = -3 32 | Z_MEM_ERROR = -4 33 | Z_BUF_ERROR = -5 34 | Z_VERSION_ERROR = -6 35 | ) 36 | 37 | // our default buffer size 38 | // most go io functions use 32KB as buffer size, so 32KB 39 | // works well here for compressed data buffer 40 | const ( 41 | DEFAULT_COMPRESSED_BUFFER_SIZE = 32 * 1024 42 | ) 43 | 44 | // Writer implements a io.WriteCloser 45 | // we will call deflateEnd when we set err to a value: 46 | // - whatever error is returned by the underlying writer 47 | // - io.EOF if Close was called 48 | type Writer struct { 49 | w io.Writer 50 | out []byte 51 | strm zstream 52 | err error 53 | } 54 | 55 | // NewWriter returns a new zlib writer that writes to the underlying writer 56 | func NewWriter(w io.Writer) *Writer { 57 | z, _ := NewWriterLevelBuffer(w, DefaultCompression, DEFAULT_COMPRESSED_BUFFER_SIZE) 58 | return z 59 | } 60 | 61 | // NewWriterLevel let the user provide a compression level value 62 | func NewWriterLevel(w io.Writer, level int) (*Writer, error) { 63 | return NewWriterLevelBuffer(w, level, DEFAULT_COMPRESSED_BUFFER_SIZE) 64 | } 65 | 66 | // NewWriterLevelBuffer let the user provide compression level and buffer size values 67 | func NewWriterLevelBuffer(w io.Writer, level, bufferSize int) (*Writer, error) { 68 | z := &Writer{w: w, out: make([]byte, bufferSize)} 69 | if err := z.strm.deflateInit(level); err != nil { 70 | return nil, err 71 | } 72 | return z, nil 73 | } 74 | 75 | // this is the main function: it advances the write with either 76 | // new data or something else to do, like a flush 77 | func (z *Writer) write(p []byte, flush int) int { 78 | if len(p) == 0 { 79 | z.strm.setInBuf(nil, 0) 80 | } else { 81 | z.strm.setInBuf(p, len(p)) 82 | } 83 | // we loop until we don't get a full output buffer 84 | // each loop completely writes the output buffer to the underlying 85 | // writer 86 | for { 87 | // deflate one buffer 88 | z.strm.setOutBuf(z.out, len(z.out)) 89 | z.strm.deflate(flush) 90 | 91 | // write everything 92 | from := 0 93 | have := len(z.out) - int(z.strm.availOut()) 94 | for have > 0 { 95 | var n int 96 | n, z.err = z.w.Write(z.out[from:have]) 97 | if z.err != nil { 98 | z.strm.deflateEnd() 99 | return 0 100 | } 101 | from += n 102 | have -= n 103 | } 104 | 105 | // we stop trying if we get a partial response 106 | if z.strm.availOut() != 0 { 107 | break 108 | } 109 | } 110 | // the library guarantees this 111 | if z.strm.availIn() != 0 { 112 | panic(fmt.Errorf("cgzip: Unexpected error (2)")) 113 | } 114 | return len(p) 115 | } 116 | 117 | // Write implements the io.Writer interface 118 | func (z *Writer) Write(p []byte) (n int, err error) { 119 | if z.err != nil { 120 | return 0, z.err 121 | } 122 | n = z.write(p, Z_NO_FLUSH) 123 | return n, z.err 124 | } 125 | 126 | // Flush let the user flush the zlib buffer to the underlying writer buffer 127 | func (z *Writer) Flush() error { 128 | if z.err != nil { 129 | return z.err 130 | } 131 | z.write(nil, Z_SYNC_FLUSH) 132 | return z.err 133 | } 134 | 135 | // Close closes the zlib buffer but does not close the wrapped io.Writer originally 136 | // passed to NewWriterX. 137 | func (z *Writer) Close() error { 138 | if z.err != nil { 139 | return z.err 140 | } 141 | z.write(nil, Z_FINISH) 142 | if z.err != nil { 143 | return z.err 144 | } 145 | z.strm.deflateEnd() 146 | z.err = io.EOF 147 | return nil 148 | } 149 | -------------------------------------------------------------------------------- /zstream.go: -------------------------------------------------------------------------------- 1 | // Pulled from https://github.com/youtube/vitess 229422035ca0c716ad0c1397ea1351fe62b0d35a 2 | // Copyright 2015, Google Inc. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | package czlib 7 | 8 | // See http://www.zlib.net/zlib_how.html for more information on this 9 | 10 | /* 11 | #cgo CFLAGS: -Werror=implicit 12 | #cgo pkg-config: zlib 13 | 14 | #include "zlib.h" 15 | 16 | // inflateInit is a macro, so using a wrapper function 17 | int zstream_inflate_init(char *strm) { 18 | ((z_stream*)strm)->zalloc = Z_NULL; 19 | ((z_stream*)strm)->zfree = Z_NULL; 20 | ((z_stream*)strm)->opaque = Z_NULL; 21 | ((z_stream*)strm)->avail_in = 0; 22 | ((z_stream*)strm)->next_in = Z_NULL; 23 | return inflateInit((z_stream*)strm); 24 | } 25 | 26 | // deflateInit is a macro, so using a wrapper function 27 | int zstream_deflate_init(char *strm, int level) { 28 | ((z_stream*)strm)->zalloc = Z_NULL; 29 | ((z_stream*)strm)->zfree = Z_NULL; 30 | ((z_stream*)strm)->opaque = Z_NULL; 31 | return deflateInit((z_stream*)strm, level); 32 | } 33 | 34 | unsigned int zstream_avail_in(char *strm) { 35 | return ((z_stream*)strm)->avail_in; 36 | } 37 | 38 | unsigned int zstream_avail_out(char *strm) { 39 | return ((z_stream*)strm)->avail_out; 40 | } 41 | 42 | char* zstream_msg(char *strm) { 43 | return ((z_stream*)strm)->msg; 44 | } 45 | 46 | void zstream_set_in_buf(char *strm, void *buf, unsigned int len) { 47 | ((z_stream*)strm)->next_in = (Bytef*)buf; 48 | ((z_stream*)strm)->avail_in = len; 49 | } 50 | 51 | void zstream_set_out_buf(char *strm, void *buf, unsigned int len) { 52 | ((z_stream*)strm)->next_out = (Bytef*)buf; 53 | ((z_stream*)strm)->avail_out = len; 54 | } 55 | 56 | int zstream_inflate(char *strm, int flag) { 57 | return inflate((z_stream*)strm, flag); 58 | } 59 | 60 | int zstream_deflate(char *strm, int flag) { 61 | return deflate((z_stream*)strm, flag); 62 | } 63 | 64 | void zstream_inflate_end(char *strm) { 65 | inflateEnd((z_stream*)strm); 66 | } 67 | 68 | void zstream_deflate_end(char *strm) { 69 | deflateEnd((z_stream*)strm); 70 | } 71 | */ 72 | import "C" 73 | 74 | import ( 75 | "fmt" 76 | "unsafe" 77 | ) 78 | 79 | const ( 80 | zNoFlush = C.Z_NO_FLUSH 81 | ) 82 | 83 | // z_stream is a buffer that's big enough to fit a C.z_stream. 84 | // This lets us allocate a C.z_stream within Go, while keeping the contents 85 | // opaque to the Go GC. Otherwise, the GC would look inside and complain that 86 | // the pointers are invalid, since they point to objects allocated by C code. 87 | type zstream [unsafe.Sizeof(C.z_stream{})]C.char 88 | 89 | func (strm *zstream) inflateInit() error { 90 | result := C.zstream_inflate_init(&strm[0]) 91 | if result != Z_OK { 92 | return fmt.Errorf("cgzip: failed to initialize inflate (%v): %v", result, strm.msg()) 93 | } 94 | return nil 95 | } 96 | 97 | func (strm *zstream) deflateInit(level int) error { 98 | result := C.zstream_deflate_init(&strm[0], C.int(level)) 99 | if result != Z_OK { 100 | return fmt.Errorf("cgzip: failed to initialize deflate (%v): %v", result, strm.msg()) 101 | } 102 | return nil 103 | } 104 | 105 | func (strm *zstream) inflateEnd() { 106 | C.zstream_inflate_end(&strm[0]) 107 | } 108 | 109 | func (strm *zstream) deflateEnd() { 110 | C.zstream_deflate_end(&strm[0]) 111 | } 112 | 113 | func (strm *zstream) availIn() int { 114 | return int(C.zstream_avail_in(&strm[0])) 115 | } 116 | 117 | func (strm *zstream) availOut() int { 118 | return int(C.zstream_avail_out(&strm[0])) 119 | } 120 | 121 | func (strm *zstream) msg() string { 122 | return C.GoString(C.zstream_msg(&strm[0])) 123 | } 124 | 125 | func (strm *zstream) setInBuf(buf []byte, size int) { 126 | if buf == nil { 127 | C.zstream_set_in_buf(&strm[0], nil, C.uint(size)) 128 | } else { 129 | C.zstream_set_in_buf(&strm[0], unsafe.Pointer(&buf[0]), C.uint(size)) 130 | } 131 | } 132 | 133 | func (strm *zstream) setOutBuf(buf []byte, size int) { 134 | if buf == nil { 135 | C.zstream_set_out_buf(&strm[0], nil, C.uint(size)) 136 | } else { 137 | C.zstream_set_out_buf(&strm[0], unsafe.Pointer(&buf[0]), C.uint(size)) 138 | } 139 | } 140 | 141 | func (strm *zstream) inflate(flag int) (int, error) { 142 | ret := C.zstream_inflate(&strm[0], C.int(flag)) 143 | switch ret { 144 | case Z_NEED_DICT: 145 | ret = Z_DATA_ERROR 146 | fallthrough 147 | case Z_DATA_ERROR, Z_MEM_ERROR: 148 | return int(ret), fmt.Errorf("cgzip: failed to inflate (%v): %v", ret, strm.msg()) 149 | } 150 | return int(ret), nil 151 | } 152 | 153 | func (strm *zstream) deflate(flag int) { 154 | ret := C.zstream_deflate(&strm[0], C.int(flag)) 155 | if ret == Z_STREAM_ERROR { 156 | // all the other error cases are normal, 157 | // and this should never happen 158 | panic(fmt.Errorf("cgzip: Unexpected error (1)")) 159 | } 160 | } 161 | --------------------------------------------------------------------------------