├── .travis.yml ├── LICENSE.md ├── README.md ├── api.go ├── brotli ├── bit_reader.go ├── bit_writer.go ├── brotli_test.go ├── common.go ├── common_test.go ├── context.go ├── dict.go ├── dict_decoder.go ├── dict_decoder_test.go ├── dict_encoder.go ├── dict_encoder_test.go ├── prefix.go ├── prefix_decoder.go ├── prefix_encoder.go ├── prefix_test.go ├── reader.go ├── reader_test.go ├── testdata │ ├── alice29.txt │ ├── alice29.txt.br │ ├── asyoulik.txt │ ├── asyoulik.txt.br │ ├── compressed_file │ ├── compressed_file.br │ ├── compressed_repeated │ ├── compressed_repeated.br │ ├── digits-best-1e4.br │ ├── digits-best-1e5.br │ ├── digits-best-1e6.br │ ├── digits-default-1e4.br │ ├── digits-default-1e5.br │ ├── digits-default-1e6.br │ ├── digits-speed-1e4.br │ ├── digits-speed-1e5.br │ ├── digits-speed-1e6.br │ ├── lcet10.txt │ ├── lcet10.txt.br │ ├── mapsdatazrh │ ├── mapsdatazrh.br │ ├── monkey │ ├── monkey.br │ ├── plrabn12.txt │ ├── plrabn12.txt.br │ ├── random_org_10k.bin │ ├── random_org_10k.bin.br │ ├── twain-best-1e4.br │ ├── twain-best-1e5.br │ ├── twain-best-1e6.br │ ├── twain-default-1e4.br │ ├── twain-default-1e5.br │ ├── twain-default-1e6.br │ ├── twain-speed-1e4.br │ ├── twain-speed-1e5.br │ ├── twain-speed-1e6.br │ ├── ukkonooa │ └── ukkonooa.br ├── transform.go ├── transform_test.go ├── writer.go └── writer_test.go ├── bzip2 ├── bwt.go ├── bwt_test.go ├── bzip2_test.go ├── common.go ├── common_test.go ├── fuzz_off.go ├── fuzz_on.go ├── internal │ └── sais │ │ ├── common.go │ │ ├── sais_byte.go │ │ ├── sais_gen.go │ │ └── sais_int.go ├── mtf_rle2.go ├── mtf_rle2_test.go ├── prefix.go ├── prefix_test.go ├── reader.go ├── reader_test.go ├── rle1.go ├── rle1_test.go ├── testdata │ ├── gauntlet_test3.bin │ ├── gauntlet_test3.bwt │ ├── silesia_ooffice.bin │ ├── silesia_ooffice.bwt │ ├── silesia_xray.bin │ ├── silesia_xray.bwt │ ├── testfiles_test3.bin │ ├── testfiles_test3.bwt │ ├── testfiles_test4.bin │ └── testfiles_test4.bwt ├── writer.go └── writer_test.go ├── doc ├── brotli-framing-format.md ├── bzip2-format.pdf ├── bzip2 │ ├── bitmap-decode.png │ ├── bitmap.psd │ ├── bwt-decode.png │ ├── bwt-encode.png │ ├── bwt.psd │ ├── bzip2-format.docx │ ├── diagram.psd │ ├── hexdump-complex.png │ ├── hexdump-simple.png │ ├── hexdump.psd │ ├── mtf-decode.png │ ├── mtf.psd │ ├── stream-complex.png │ ├── stream-simple.png │ └── tree-example.png ├── xflate-format.pdf └── xflate │ ├── hexdump-complex.png │ ├── hexdump-simple.png │ ├── hexdump.psd │ ├── stream-complex.png │ ├── stream-simple.png │ ├── tree-example.png │ ├── tree-hclen.png │ └── xflate-format.docx ├── flate ├── common.go ├── dict_decoder.go ├── flate_test.go ├── prefix.go ├── reader.go └── reader_test.go ├── go.mod ├── go.sum ├── internal ├── cgo │ ├── README.md │ ├── brotli │ │ └── brotli.go │ ├── bzip2 │ │ └── bzip2.go │ ├── flate │ │ └── flate.go │ ├── lzma │ │ └── lzma.go │ └── zstd │ │ └── zstd.go ├── common.go ├── common_test.go ├── debug.go ├── errors │ └── errors.go ├── gofuzz.go ├── prefix │ ├── debug.go │ ├── decoder.go │ ├── encoder.go │ ├── prefix.go │ ├── prefix_test.go │ ├── range.go │ ├── reader.go │ ├── wrap.go │ └── writer.go ├── release.go ├── testutil │ ├── bitgen.go │ ├── bitgen_test.go │ ├── rand.go │ ├── util.go │ └── util_test.go └── tool │ ├── bench │ ├── benchmarks.go │ ├── cgo_brotli.go │ ├── cgo_bzip2.go │ ├── cgo_flate.go │ ├── cgo_lzma.go │ ├── cgo_zstd.go │ ├── codec_test.go │ ├── config.go │ ├── lib_ds.go │ ├── lib_kp.go │ ├── lib_std.go │ ├── lib_uk.go │ └── main.go │ ├── bitgen │ └── main.go │ └── fuzz │ ├── brotli │ └── brotli.go │ ├── bzip2 │ └── bzip2.go │ ├── bzip2_bwt │ └── bzip2_bwt.go │ ├── fuzz.sh │ └── xflate_meta │ └── xflate_meta.go ├── testdata ├── binary.bin ├── digits.txt ├── huffman.go ├── huffman.txt ├── random.bin ├── random.go ├── repeats.bin ├── repeats.go ├── twain.txt ├── zeros.bin └── zeros.go ├── xflate ├── common.go ├── example_test.go ├── flate.go ├── index.go ├── index_test.go ├── internal │ └── meta │ │ ├── meta.go │ │ ├── meta_stats.go │ │ ├── meta_test.go │ │ ├── reader.go │ │ ├── reader_test.go │ │ ├── writer.go │ │ └── writer_test.go ├── meta_fuzz.go ├── reader.go ├── reader_test.go ├── writer.go ├── writer_test.go ├── xflate_stats.go └── xflate_test.go ├── zbench.sh ├── zfuzz.sh ├── zprof.sh └── ztest.sh /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: go 3 | before_install: 4 | - curl -L https://github.com/google/brotli/archive/v1.0.2.tar.gz | tar -zxv 5 | - (cd brotli-1.0.2 && mkdir out && cd out && ../configure-cmake && make && sudo make install) 6 | - rm -rf brotli-1.0.2 7 | - curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv 8 | - (cd zstd-1.3.2 && sudo make install) 9 | - rm -rf zstd-1.3.2 10 | - sudo ldconfig 11 | - mkdir /tmp/go1.12 12 | - curl -L -s https://dl.google.com/go/go1.12.linux-amd64.tar.gz | tar -zxf - -C /tmp/go1.12 --strip-components 1 13 | - unset GOROOT 14 | - (GO111MODULE=on /tmp/go1.12/bin/go mod vendor) 15 | - (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get golang.org/x/lint/golint@8f45f776aaf18cebc8d65861cc70c33c60471952) 16 | - (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get honnef.co/go/tools/cmd/staticcheck@2019.1) 17 | matrix: 18 | include: 19 | - go: 1.9.x 20 | script: 21 | - go test -v -race ./... 22 | - go: 1.10.x 23 | script: 24 | - go test -v -race ./... 25 | - go: 1.11.x 26 | script: 27 | - go test -v -race ./... 28 | - go: 1.12.x 29 | script: 30 | - ./ztest.sh 31 | - go: master 32 | script: 33 | - go test -v -race ./... 34 | allow_failures: 35 | - go: master 36 | fast_finish: true 37 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2015, Joe Tsai and The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation and/or 10 | other materials provided with the distribution. 11 | * Neither the copyright holder nor the names of its contributors may be used to 12 | endorse or promote products derived from this software without specific prior 13 | written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Collection of compression libraries for Go # 2 | 3 | [![GoDoc](https://godoc.org/github.com/dsnet/compress/cmp?status.svg)](https://godoc.org/github.com/dsnet/compress) 4 | [![Build Status](https://travis-ci.org/dsnet/compress.svg?branch=master)](https://travis-ci.org/dsnet/compress) 5 | [![Report Card](https://goreportcard.com/badge/github.com/dsnet/compress)](https://goreportcard.com/report/github.com/dsnet/compress) 6 | 7 | ## Introduction ## 8 | 9 | **NOTE: This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason.** 10 | 11 | This repository hosts a collection of compression related libraries. The goal of this project is to provide pure Go implementations for popular compression algorithms beyond what the Go standard library provides. The goals for these packages are as follows: 12 | * Maintainable: That the code remains well documented, well tested, readable, easy to maintain, and easy to verify that it conforms to the specification for the format being implemented. 13 | * Performant: To be able to compress and decompress within at least 80% of the rates that the C implementations are able to achieve. 14 | * Flexible: That the code provides low-level and fine granularity control over the compression streams similar to what the C APIs would provide. 15 | 16 | Of these three, the first objective is often at odds with the other two objectives and provides interesting challenges. Higher performance can often be achieved by muddling abstraction layers or using non-intuitive low-level primitives. Also, more features and functionality, while useful in some situations, often complicates the API. Thus, this package will attempt to satisfy all the goals, but will defer to favoring maintainability when the performance or flexibility benefits are not significant enough. 17 | 18 | 19 | ## Library Status ## 20 | 21 | For the packages available, only some features are currently implemented: 22 | 23 | | Package | Reader | Writer | 24 | | ------- | :----: | :----: | 25 | | brotli | :white_check_mark: | | 26 | | bzip2 | :white_check_mark: | :white_check_mark: | 27 | | flate | :white_check_mark: | | 28 | | xflate | :white_check_mark: | :white_check_mark: | 29 | 30 | This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason. When the library becomes more mature, it is planned to eventually conform to some strict versioning scheme like [Semantic Versioning](http://semver.org/). 31 | 32 | However, in the meanwhile, this library does provide some basic API guarantees. For the types defined below, the method signatures are guaranteed to not change. Note that the author still reserves the right to change the fields within each ```Reader``` and ```Writer``` structs. 33 | ```go 34 | type ReaderConfig struct { ... } 35 | type Reader struct { ... } 36 | func NewReader(io.Reader, *ReaderConfig) (*Reader, error) { ... } 37 | func (*Reader) Read([]byte) (int, error) { ... } 38 | func (*Reader) Close() error { ... } 39 | 40 | type WriterConfig struct { ... } 41 | type Writer struct { ... } 42 | func NewWriter(io.Writer, *WriterConfig) (*Writer, error) { ... } 43 | func (*Writer) Write([]byte) (int, error) { ... } 44 | func (*Writer) Close() error { ... } 45 | ``` 46 | 47 | To see what work still remains, see the [Task List](https://github.com/dsnet/compress/wiki/Task-List). 48 | 49 | ## Performance ## 50 | 51 | See [Performance Metrics](https://github.com/dsnet/compress/wiki/Performance-Metrics). 52 | 53 | 54 | ## Frequently Asked Questions ## 55 | 56 | See [Frequently Asked Questions](https://github.com/dsnet/compress/wiki/Frequently-Asked-Questions). 57 | 58 | 59 | ## Installation ## 60 | 61 | Run the command: 62 | 63 | ```go get -u github.com/dsnet/compress``` 64 | 65 | This library requires `Go1.9` or higher in order to build. 66 | 67 | 68 | ## Packages ## 69 | 70 | | Package | Description | 71 | | :------ | :---------- | 72 | | [brotli](http://godoc.org/github.com/dsnet/compress/brotli) | Package brotli implements the Brotli format, described in RFC 7932. | 73 | | [bzip2](http://godoc.org/github.com/dsnet/compress/bzip2) | Package bzip2 implements the BZip2 compressed data format. | 74 | | [flate](http://godoc.org/github.com/dsnet/compress/flate) | Package flate implements the DEFLATE format, described in RFC 1951. | 75 | | [xflate](http://godoc.org/github.com/dsnet/compress/xflate) | Package xflate implements the XFLATE format, an random-access extension to DEFLATE. | 76 | -------------------------------------------------------------------------------- /api.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package compress is a collection of compression libraries. 6 | package compress 7 | 8 | import ( 9 | "bufio" 10 | "io" 11 | 12 | "github.com/dsnet/compress/internal/errors" 13 | ) 14 | 15 | // The Error interface identifies all compression related errors. 16 | type Error interface { 17 | error 18 | CompressError() 19 | 20 | // IsDeprecated reports the use of a deprecated and unsupported feature. 21 | IsDeprecated() bool 22 | 23 | // IsCorrupted reports whether the input stream was corrupted. 24 | IsCorrupted() bool 25 | } 26 | 27 | var _ Error = errors.Error{} 28 | 29 | // ByteReader is an interface accepted by all decompression Readers. 30 | // It guarantees that the decompressor never reads more data than is necessary 31 | // from the underlying io.Reader. 32 | type ByteReader interface { 33 | io.Reader 34 | io.ByteReader 35 | } 36 | 37 | var _ ByteReader = (*bufio.Reader)(nil) 38 | 39 | // BufferedReader is an interface accepted by all decompression Readers. 40 | // It guarantees that the decompressor never reads more data than is necessary 41 | // from the underlying io.Reader. Since BufferedReader allows a decompressor 42 | // to peek at bytes further along in the stream without advancing the read 43 | // pointer, decompression can experience a significant performance gain when 44 | // provided a reader that satisfies this interface. Thus, a decompressor will 45 | // prefer this interface over ByteReader for performance reasons. 46 | // 47 | // The bufio.Reader satisfies this interface. 48 | type BufferedReader interface { 49 | io.Reader 50 | 51 | // Buffered returns the number of bytes currently buffered. 52 | // 53 | // This value becomes invalid following the next Read/Discard operation. 54 | Buffered() int 55 | 56 | // Peek returns the next n bytes without advancing the reader. 57 | // 58 | // If Peek returns fewer than n bytes, it also returns an error explaining 59 | // why the peek is short. Peek must support peeking of at least 8 bytes. 60 | // If 0 <= n <= Buffered(), Peek is guaranteed to succeed without reading 61 | // from the underlying io.Reader. 62 | // 63 | // This result becomes invalid following the next Read/Discard operation. 64 | Peek(n int) ([]byte, error) 65 | 66 | // Discard skips the next n bytes, returning the number of bytes discarded. 67 | // 68 | // If Discard skips fewer than n bytes, it also returns an error. 69 | // If 0 <= n <= Buffered(), Discard is guaranteed to succeed without reading 70 | // from the underlying io.Reader. 71 | Discard(n int) (int, error) 72 | } 73 | 74 | var _ BufferedReader = (*bufio.Reader)(nil) 75 | -------------------------------------------------------------------------------- /brotli/bit_writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import "io" 8 | 9 | type bitWriter struct { 10 | wr io.Writer 11 | offset int64 // Number of bytes written to underlying io.Writer 12 | } 13 | 14 | func (bw *bitWriter) Init(w io.Writer) { 15 | return 16 | } 17 | 18 | func (bw *bitWriter) Write(buf []byte) (int, error) { 19 | return 0, nil 20 | } 21 | 22 | func (bw *bitWriter) WriteBits(val, nb uint) { 23 | return 24 | } 25 | 26 | func (bw *bitWriter) WritePads() { 27 | return 28 | } 29 | 30 | func (bw *bitWriter) WriteSymbol(pe *prefixEncoder, sym uint) { 31 | return 32 | } 33 | -------------------------------------------------------------------------------- /brotli/brotli_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "flag" 11 | "os/exec" 12 | "strings" 13 | ) 14 | 15 | var zcheck = flag.Bool("zcheck", false, "verify test vectors with C brotli library") 16 | 17 | func cmdCompress(input []byte) ([]byte, error) { return cmdExec(input) } 18 | func cmdDecompress(input []byte) ([]byte, error) { return cmdExec(input, "-d") } 19 | 20 | // cmdExec executes the bzip2 tool, passing the input in as stdin. 21 | // It returns the stdout and an error. 22 | func cmdExec(input []byte, args ...string) ([]byte, error) { 23 | var bo, be bytes.Buffer 24 | cmd := exec.Command("bro", args...) 25 | cmd.Stdin = bytes.NewReader(input) 26 | cmd.Stdout = &bo 27 | cmd.Stderr = &be 28 | err := cmd.Run() 29 | ss := strings.Split(strings.TrimSpace(be.String()), "\n") 30 | if len(ss) > 0 && ss[len(ss)-1] != "" { 31 | // Assume any stderr indicates an error and last line is the message. 32 | return nil, errors.New(ss[len(ss)-1]) 33 | } 34 | return bo.Bytes(), err 35 | } 36 | -------------------------------------------------------------------------------- /brotli/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package brotli implements the Brotli compressed data format, 6 | // described in RFC 7932. 7 | package brotli 8 | 9 | import ( 10 | "fmt" 11 | 12 | "github.com/dsnet/compress/internal/errors" 13 | ) 14 | 15 | func errorf(c int, f string, a ...interface{}) error { 16 | return errors.Error{Code: c, Pkg: "brotli", Msg: fmt.Sprintf(f, a...)} 17 | } 18 | 19 | // errWrap converts a lower-level errors.Error to be one from this package. 20 | // The replaceCode passed in will be used to replace the code for any errors 21 | // with the errors.Invalid code. 22 | // 23 | // For the Reader, set this to errors.Corrupted. 24 | // For the Writer, set this to errors.Internal. 25 | func errWrap(err error, replaceCode int) error { 26 | if cerr, ok := err.(errors.Error); ok { 27 | if errors.IsInvalid(cerr) { 28 | cerr.Code = replaceCode 29 | } 30 | err = errorf(cerr.Code, "%s", cerr.Msg) 31 | } 32 | return err 33 | } 34 | 35 | var ( 36 | errClosed = errorf(errors.Closed, "") 37 | errCorrupted = errorf(errors.Corrupted, "") 38 | errInvalid = errorf(errors.Invalid, "") 39 | errUnaligned = errorf(errors.Invalid, "non-aligned bit buffer") 40 | ) 41 | 42 | var ( 43 | reverseLUT [256]uint8 44 | ) 45 | 46 | func init() { 47 | initLUTs() 48 | } 49 | 50 | func initLUTs() { 51 | initCommonLUTs() 52 | initPrefixLUTs() 53 | initContextLUTs() 54 | initDictLUTs() 55 | } 56 | 57 | func initCommonLUTs() { 58 | for i := range reverseLUT { 59 | b := uint8(i) 60 | b = (b&0xaa)>>1 | (b&0x55)<<1 61 | b = (b&0xcc)>>2 | (b&0x33)<<2 62 | b = (b&0xf0)>>4 | (b&0x0f)<<4 63 | reverseLUT[i] = b 64 | } 65 | } 66 | 67 | // neededBits computes the minimum number of bits needed to encode n elements. 68 | func neededBits(n uint32) (nb uint) { 69 | for n--; n > 0; n >>= 1 { 70 | nb++ 71 | } 72 | return 73 | } 74 | 75 | // reverseUint32 reverses all bits of v. 76 | func reverseUint32(v uint32) (x uint32) { 77 | x |= uint32(reverseLUT[byte(v>>0)]) << 24 78 | x |= uint32(reverseLUT[byte(v>>8)]) << 16 79 | x |= uint32(reverseLUT[byte(v>>16)]) << 8 80 | x |= uint32(reverseLUT[byte(v>>24)]) << 0 81 | return x 82 | } 83 | 84 | // reverseBits reverses the lower n bits of v. 85 | func reverseBits(v uint32, n uint) uint32 { 86 | return reverseUint32(v << (32 - n)) 87 | } 88 | 89 | func allocUint8s(s []uint8, n int) []uint8 { 90 | if cap(s) >= n { 91 | return s[:n] 92 | } 93 | return make([]uint8, n, n*3/2) 94 | } 95 | 96 | func allocUint32s(s []uint32, n int) []uint32 { 97 | if cap(s) >= n { 98 | return s[:n] 99 | } 100 | return make([]uint32, n, n*3/2) 101 | } 102 | 103 | func extendSliceUints32s(s [][]uint32, n int) [][]uint32 { 104 | if cap(s) >= n { 105 | return s[:n] 106 | } 107 | ss := make([][]uint32, n, n*3/2) 108 | copy(ss, s[:cap(s)]) 109 | return ss 110 | } 111 | 112 | func extendDecoders(s []prefixDecoder, n int) []prefixDecoder { 113 | if cap(s) >= n { 114 | return s[:n] 115 | } 116 | ss := make([]prefixDecoder, n, n*3/2) 117 | copy(ss, s[:cap(s)]) 118 | return ss 119 | } 120 | -------------------------------------------------------------------------------- /brotli/common_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import ( 8 | "bytes" 9 | "hash/crc32" 10 | "testing" 11 | ) 12 | 13 | func TestTableCRC(t *testing.T) { 14 | // Convert transformLUT to byte array according to Appendix B of the RFC. 15 | var transformBuf bytes.Buffer 16 | for _, t := range transformLUT { 17 | transformBuf.WriteString(t.prefix + "\x00") 18 | transformBuf.WriteByte(byte(t.transform)) 19 | transformBuf.WriteString(t.suffix + "\x00") 20 | } 21 | 22 | vectors := []struct { 23 | crc uint32 24 | buf []byte 25 | }{ 26 | {crc: 0x5136cb04, buf: dictLUT[:]}, 27 | {crc: 0x8e91efb7, buf: contextLUT0[:]}, 28 | {crc: 0xd01a32f4, buf: contextLUT1[:]}, 29 | {crc: 0x0dd7a0d6, buf: contextLUT2[:]}, 30 | {crc: 0x3d965f81, buf: transformBuf.Bytes()}, 31 | } 32 | 33 | for i, v := range vectors { 34 | crc := crc32.ChecksumIEEE(v.buf) 35 | if crc != v.crc { 36 | t.Errorf("test %d, CRC-32 mismatch: got %08x, want %08x", i, crc, v.crc) 37 | } 38 | } 39 | } 40 | 41 | // This package relies on dynamic generation of LUTs to reduce the static 42 | // binary size. This benchmark attempts to measure the startup cost of init. 43 | // This benchmark is not thread-safe; so do not run it in parallel with other 44 | // tests or benchmarks! 45 | func BenchmarkInit(b *testing.B) { 46 | b.ReportAllocs() 47 | for i := 0; i < b.N; i++ { 48 | initLUTs() 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /brotli/context.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | // These constants are defined in RFC section 7.1. 8 | const ( 9 | contextLSB6 = iota 10 | contextMSB6 11 | contextUTF8 12 | contextSigned 13 | 14 | numContextModes 15 | ) 16 | 17 | // These constants are defined in RFC sections 2 and 7.3. 18 | const ( 19 | maxLitContextIDs = 64 20 | maxDistContextIDs = 4 21 | ) 22 | 23 | // These LUTs are taken directly from RFC section 7.1. 24 | var ( 25 | contextLUT0 = [256]uint8{ 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 29 | 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, 30 | 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 31 | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 32 | 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 33 | 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, 34 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 35 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 36 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 37 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 38 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 39 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 40 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 41 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 42 | } 43 | 44 | contextLUT1 = [256]uint8{ 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 49 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 51 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 52 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, 53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 60 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 61 | } 62 | 63 | contextLUT2 = [256]uint8{ 64 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 65 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 66 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 67 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 68 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 69 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 70 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 71 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 72 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 73 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 74 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 75 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 76 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 77 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 78 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 79 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 80 | } 81 | ) 82 | 83 | // These LUTs are dynamically computed from the LUTs in the specification. 84 | var ( 85 | contextP1LUT [256 * numContextModes]uint8 86 | contextP2LUT [256 * numContextModes]uint8 87 | ) 88 | 89 | // initContextLUTs computes LUTs so that context ID computation can be 90 | // efficiently without any branches. 91 | func initContextLUTs() { 92 | for i := 0; i < 256; i++ { 93 | for m := 0; m < numContextModes; m++ { 94 | base := m << 8 95 | 96 | // Operations performed here are specified in RFC section 7.1. 97 | switch m { 98 | case contextLSB6: 99 | contextP1LUT[base+i] = byte(i) & 0x3f 100 | contextP2LUT[base+i] = 0 101 | case contextMSB6: 102 | contextP1LUT[base+i] = byte(i) >> 2 103 | contextP2LUT[base+i] = 0 104 | case contextUTF8: 105 | contextP1LUT[base+i] = contextLUT0[byte(i)] 106 | contextP2LUT[base+i] = contextLUT1[byte(i)] 107 | case contextSigned: 108 | contextP1LUT[base+i] = contextLUT2[byte(i)] << 3 109 | contextP2LUT[base+i] = contextLUT2[byte(i)] 110 | default: 111 | panic("unknown context mode") 112 | } 113 | } 114 | } 115 | } 116 | 117 | // getLitContextID computes the context ID for literals from RFC section 7.1. 118 | // Bytes p1 and p2 are the last and second-to-last byte, respectively. 119 | func getLitContextID(p1, p2 byte, mode uint8) uint8 { 120 | base := uint(mode) << 8 121 | return contextP1LUT[base+uint(p1)] | contextP2LUT[base+uint(p2)] 122 | } 123 | 124 | // getDistContextID computes the context ID for distances using the copy length 125 | // as specified in RFC section 7.2. 126 | func getDistContextID(l int) uint8 { 127 | if l > 4 { 128 | return 3 129 | } 130 | return uint8(l - 2) 131 | } 132 | -------------------------------------------------------------------------------- /brotli/dict_decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | // The dictDecoder implements the LZ77 sliding dictionary that is commonly used 8 | // in various compression formats. For performance reasons, this implementation 9 | // performs little to no sanity checks about the arguments. As such, the 10 | // invariants documented for each method call must be respected. Furthermore, 11 | // to reduce the memory footprint decompressing short streams, the dictionary 12 | // starts with a relatively small size and then lazily grows. 13 | 14 | const ( 15 | initSize = 4096 // Initial size allocated for sliding dictionary 16 | growFactor = 4 // Rate the dictionary is grown to match expected size 17 | ) 18 | 19 | type dictDecoder struct { 20 | // Invariant: len(hist) <= size 21 | size int // Sliding window size 22 | hist []byte // Sliding window history, dynamically grown to match size 23 | 24 | // Invariant: 0 <= rdPos <= wrPos <= len(hist) 25 | wrPos int // Current output position in buffer 26 | rdPos int // Have emitted hist[:rdPos] already 27 | full bool // Has a full window length been written yet? 28 | } 29 | 30 | func (dd *dictDecoder) Init(size int) { 31 | *dd = dictDecoder{hist: dd.hist} 32 | 33 | // Regardless of what size claims, start with a small dictionary to avoid 34 | // denial-of-service attacks with large memory allocation. 35 | dd.size = size 36 | if dd.hist == nil { 37 | dd.hist = make([]byte, initSize) 38 | } 39 | dd.hist = dd.hist[:cap(dd.hist)] 40 | if len(dd.hist) > dd.size { 41 | dd.hist = dd.hist[:dd.size] 42 | } 43 | for i := range dd.hist { 44 | dd.hist[i] = 0 // Zero out history to make LastBytes logic easier 45 | } 46 | } 47 | 48 | // HistSize reports the total amount of historical data in the dictionary. 49 | func (dd *dictDecoder) HistSize() int { 50 | if dd.full { 51 | return dd.size 52 | } 53 | return dd.wrPos 54 | } 55 | 56 | // AvailSize reports the available amount of output buffer space. 57 | func (dd *dictDecoder) AvailSize() int { 58 | return len(dd.hist) - dd.wrPos 59 | } 60 | 61 | // WriteSlice returns a slice of the available buffer to write data to. 62 | // 63 | // This invariant will be kept: len(s) <= AvailSize() 64 | func (dd *dictDecoder) WriteSlice() []byte { 65 | return dd.hist[dd.wrPos:] 66 | } 67 | 68 | // WriteMark advances the writer pointer by cnt. 69 | // 70 | // This invariant must be kept: 0 <= cnt <= AvailSize() 71 | func (dd *dictDecoder) WriteMark(cnt int) { 72 | dd.wrPos += cnt 73 | } 74 | 75 | // WriteCopy copies a string at a given (distance, length) to the output. 76 | // This returns the number of bytes copied and may be less than the requested 77 | // length if the available space in the output buffer is too small. 78 | // 79 | // This invariant must be kept: 0 < dist <= HistSize() 80 | func (dd *dictDecoder) WriteCopy(dist, length int) int { 81 | wrBase := dd.wrPos 82 | wrEnd := dd.wrPos + length 83 | if wrEnd > len(dd.hist) { 84 | wrEnd = len(dd.hist) 85 | } 86 | 87 | // Copy non-overlapping section after destination. 88 | rdPos := dd.wrPos - dist 89 | if rdPos < 0 { 90 | rdPos += len(dd.hist) 91 | dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:]) 92 | rdPos = 0 93 | } 94 | 95 | // Copy overlapping section before destination. 96 | for dd.wrPos < wrEnd { 97 | dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:dd.wrPos]) 98 | } 99 | return dd.wrPos - wrBase 100 | } 101 | 102 | // ReadFlush returns a slice of the historical buffer that is ready to be 103 | // emitted to the user. A call to ReadFlush is only valid after all of the data 104 | // from a previous call to ReadFlush has been consumed. 105 | func (dd *dictDecoder) ReadFlush() []byte { 106 | toRead := dd.hist[dd.rdPos:dd.wrPos] 107 | dd.rdPos = dd.wrPos 108 | if dd.wrPos == len(dd.hist) { 109 | if len(dd.hist) == dd.size { 110 | dd.wrPos, dd.rdPos = 0, 0 111 | dd.full = true 112 | } else { 113 | // Allocate a larger history buffer. 114 | size := cap(dd.hist) * growFactor 115 | if size > dd.size { 116 | size = dd.size 117 | } 118 | hist := make([]byte, size) 119 | copy(hist, dd.hist) 120 | dd.hist = hist 121 | } 122 | } 123 | return toRead 124 | } 125 | 126 | // LastBytes reports the last 2 bytes in the dictionary. If they do not exist, 127 | // then zero values are returned. 128 | func (dd *dictDecoder) LastBytes() (p1, p2 byte) { 129 | if dd.wrPos > 1 { 130 | return dd.hist[dd.wrPos-1], dd.hist[dd.wrPos-2] 131 | } else if dd.wrPos > 0 { 132 | return dd.hist[dd.wrPos-1], dd.hist[len(dd.hist)-1] 133 | } else { 134 | return dd.hist[len(dd.hist)-1], dd.hist[len(dd.hist)-2] 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /brotli/dict_decoder_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import ( 8 | "bytes" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestDictDecoder(t *testing.T) { 14 | const abc = "ABC\n" 15 | const fox = "The quick brown fox jumped over the lazy dog!\n" 16 | const poem = "The Road Not Taken\nRobert Frost\n" + 17 | "\n" + 18 | "Two roads diverged in a yellow wood,\n" + 19 | "And sorry I could not travel both\n" + 20 | "And be one traveler, long I stood\n" + 21 | "And looked down one as far as I could\n" + 22 | "To where it bent in the undergrowth;\n" + 23 | "\n" + 24 | "Then took the other, as just as fair,\n" + 25 | "And having perhaps the better claim,\n" + 26 | "Because it was grassy and wanted wear;\n" + 27 | "Though as for that the passing there\n" + 28 | "Had worn them really about the same,\n" + 29 | "\n" + 30 | "And both that morning equally lay\n" + 31 | "In leaves no step had trodden black.\n" + 32 | "Oh, I kept the first for another day!\n" + 33 | "Yet knowing how way leads on to way,\n" + 34 | "I doubted if I should ever come back.\n" + 35 | "\n" + 36 | "I shall be telling this with a sigh\n" + 37 | "Somewhere ages and ages hence:\n" + 38 | "Two roads diverged in a wood, and I-\n" + 39 | "I took the one less traveled by,\n" + 40 | "And that has made all the difference.\n" 41 | var refs = []struct { 42 | dist int // Backward distance (0 if this is an insertion) 43 | length int // Length of copy or insertion 44 | }{ 45 | {0, 38}, {33, 3}, {0, 48}, {79, 3}, {0, 11}, {34, 5}, {0, 6}, {23, 7}, 46 | {0, 8}, {50, 3}, {0, 2}, {69, 3}, {34, 5}, {0, 4}, {97, 3}, {0, 4}, 47 | {43, 5}, {0, 6}, {7, 4}, {88, 7}, {0, 12}, {80, 3}, {0, 2}, {141, 4}, 48 | {0, 1}, {196, 3}, {0, 3}, {157, 3}, {0, 6}, {181, 3}, {0, 2}, {23, 3}, 49 | {77, 3}, {28, 5}, {128, 3}, {110, 4}, {70, 3}, {0, 4}, {85, 6}, {0, 2}, 50 | {182, 6}, {0, 4}, {133, 3}, {0, 7}, {47, 5}, {0, 20}, {112, 5}, {0, 1}, 51 | {58, 3}, {0, 8}, {59, 3}, {0, 4}, {173, 3}, {0, 5}, {114, 3}, {0, 4}, 52 | {92, 5}, {0, 2}, {71, 3}, {0, 2}, {76, 5}, {0, 1}, {46, 3}, {96, 4}, 53 | {130, 4}, {0, 3}, {360, 3}, {0, 3}, {178, 5}, {0, 7}, {75, 3}, {0, 3}, 54 | {45, 6}, {0, 6}, {299, 6}, {180, 3}, {70, 6}, {0, 1}, {48, 3}, {66, 4}, 55 | {0, 3}, {47, 5}, {0, 9}, {325, 3}, {0, 1}, {359, 3}, {318, 3}, {0, 2}, 56 | {199, 3}, {0, 1}, {344, 3}, {0, 3}, {248, 3}, {0, 10}, {310, 3}, {0, 3}, 57 | {93, 6}, {0, 3}, {252, 3}, {157, 4}, {0, 2}, {273, 5}, {0, 14}, {99, 4}, 58 | {0, 1}, {464, 4}, {0, 2}, {92, 4}, {495, 3}, {0, 1}, {322, 4}, {16, 4}, 59 | {0, 3}, {402, 3}, {0, 2}, {237, 4}, {0, 2}, {432, 4}, {0, 1}, {483, 5}, 60 | {0, 2}, {294, 4}, {0, 2}, {306, 3}, {113, 5}, {0, 1}, {26, 4}, {164, 3}, 61 | {488, 4}, {0, 1}, {542, 3}, {248, 6}, {0, 5}, {205, 3}, {0, 8}, {48, 3}, 62 | {449, 6}, {0, 2}, {192, 3}, {328, 4}, {9, 5}, {433, 3}, {0, 3}, {622, 25}, 63 | {615, 5}, {46, 5}, {0, 2}, {104, 3}, {475, 10}, {549, 3}, {0, 4}, {597, 8}, 64 | {314, 3}, {0, 1}, {473, 6}, {317, 5}, {0, 1}, {400, 3}, {0, 3}, {109, 3}, 65 | {151, 3}, {48, 4}, {0, 4}, {125, 3}, {108, 3}, {0, 2}, 66 | } 67 | 68 | var want string 69 | var buf bytes.Buffer 70 | var dd dictDecoder 71 | dd.Init(1 << 11) 72 | 73 | checkLastBytes := func(str string) { 74 | if len(str) < 2 { 75 | str = "\x00\x00" + str 76 | } 77 | str = str[len(str)-2:] 78 | p1, p2 := dd.LastBytes() 79 | got := string([]byte{p2, p1}) 80 | if got != str { 81 | t.Errorf("last bytes mismatch: got %q, want %q", got, str) 82 | } 83 | } 84 | writeCopy := func(dist, length int) { 85 | if dist < length { 86 | cnt := (dist + length - 1) / dist 87 | want += strings.Repeat(want[len(want)-dist:], cnt)[:length] 88 | } else { 89 | want += want[len(want)-dist:][:length] 90 | } 91 | 92 | for length > 0 { 93 | length -= dd.WriteCopy(dist, length) 94 | if dd.AvailSize() == 0 { 95 | buf.Write(dd.ReadFlush()) 96 | } 97 | } 98 | 99 | checkLastBytes(want) 100 | } 101 | writeString := func(str string) { 102 | want += str 103 | 104 | for len(str) > 0 { 105 | cnt := copy(dd.WriteSlice(), str) 106 | str = str[cnt:] 107 | dd.WriteMark(cnt) 108 | if dd.AvailSize() == 0 { 109 | buf.Write(dd.ReadFlush()) 110 | } 111 | } 112 | 113 | checkLastBytes(want) 114 | } 115 | 116 | writeString("") 117 | writeString(".") 118 | str := poem 119 | for _, ref := range refs { 120 | if ref.dist == 0 { 121 | writeString(str[:ref.length]) 122 | } else { 123 | writeCopy(ref.dist, ref.length) 124 | } 125 | str = str[ref.length:] 126 | } 127 | writeCopy(dd.HistSize(), 33) 128 | writeString(abc) 129 | writeCopy(len(abc), 59*len(abc)) 130 | writeString(fox) 131 | writeCopy(len(fox), 9*len(fox)) 132 | writeString(".") 133 | writeCopy(1, 9) 134 | writeString(strings.ToUpper(poem)) 135 | writeCopy(len(poem), 7*len(poem)) 136 | writeCopy(dd.HistSize(), 10) 137 | 138 | buf.Write(dd.ReadFlush()) 139 | if buf.String() != want { 140 | t.Errorf("final string mismatch:\ngot %q\nwant %q", buf.String(), want) 141 | } 142 | } 143 | 144 | func BenchmarkDictDecoderCopy(b *testing.B) { 145 | nb := 1 << 24 146 | b.SetBytes(int64(nb)) 147 | 148 | for i := 0; i < b.N; i++ { 149 | var dd dictDecoder 150 | dd.Init(1 << 16) 151 | 152 | copy(dd.WriteSlice(), "abc") 153 | dd.WriteMark(3) 154 | 155 | dist, length := 3, nb 156 | for length > 0 { 157 | length -= dd.WriteCopy(dist, length) 158 | if dd.AvailSize() == 0 { 159 | dd.ReadFlush() 160 | } 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /brotli/dict_encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /brotli/dict_encoder_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /brotli/prefix_encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | type prefixEncoder struct{} 8 | 9 | func (pe *prefixEncoder) Init(codes []prefixCode) {} 10 | -------------------------------------------------------------------------------- /brotli/prefix_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /brotli/testdata/alice29.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/alice29.txt.br -------------------------------------------------------------------------------- /brotli/testdata/asyoulik.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/asyoulik.txt.br -------------------------------------------------------------------------------- /brotli/testdata/compressed_file: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_file -------------------------------------------------------------------------------- /brotli/testdata/compressed_file.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_file.br -------------------------------------------------------------------------------- /brotli/testdata/compressed_repeated: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_repeated -------------------------------------------------------------------------------- /brotli/testdata/compressed_repeated.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_repeated.br -------------------------------------------------------------------------------- /brotli/testdata/digits-best-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-best-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/digits-best-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-best-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/digits-best-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-best-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/digits-default-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-default-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/digits-default-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-default-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/digits-default-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-default-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/digits-speed-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-speed-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/digits-speed-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-speed-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/digits-speed-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-speed-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/lcet10.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/lcet10.txt.br -------------------------------------------------------------------------------- /brotli/testdata/mapsdatazrh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/mapsdatazrh -------------------------------------------------------------------------------- /brotli/testdata/mapsdatazrh.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/mapsdatazrh.br -------------------------------------------------------------------------------- /brotli/testdata/monkey: -------------------------------------------------------------------------------- 1 | znxcvnmz,xvnm.,zxcnv.,xcn.z,vn.zvn.zxcvn.,zxcn.vn.v,znm.,vnzx.,vnzxc.vn.z,vnz.,nv.z,nvmzxc,nvzxcvcnm.,vczxvnzxcnvmxc.zmcnvzm.,nvmc,nzxmc,vn.mnnmzxc,vnxcnmv,znvzxcnmv,.xcnvm,zxcnzxv.zx,qweryweurqioweupropqwutioweupqrioweutiopweuriopweuriopqwurioputiopqwuriowuqerioupqweropuweropqwurweuqriopuropqwuriopuqwriopuqweopruioqweurqweuriouqweopruioupqiytioqtyiowtyqptypryoqweutioioqtweqruowqeytiowquiourowetyoqwupiotweuqiorweuqroipituqwiorqwtioweuriouytuioerytuioweryuitoweytuiweyuityeruirtyuqriqweuropqweiruioqweurioqwuerioqwyuituierwotueryuiotweyrtuiwertyioweryrueioqptyioruyiopqwtjkasdfhlafhlasdhfjklashjkfhasjklfhklasjdfhklasdhfjkalsdhfklasdhjkflahsjdkfhklasfhjkasdfhasfjkasdhfklsdhalghhaf;hdklasfhjklashjklfasdhfasdjklfhsdjklafsd;hkldadfjjklasdhfjasddfjklfhakjklasdjfkl;asdjfasfljasdfhjklasdfhjkaghjkashf;djfklasdjfkljasdklfjklasdjfkljasdfkljaklfj -------------------------------------------------------------------------------- /brotli/testdata/monkey.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/monkey.br -------------------------------------------------------------------------------- /brotli/testdata/plrabn12.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/plrabn12.txt.br -------------------------------------------------------------------------------- /brotli/testdata/random_org_10k.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/random_org_10k.bin -------------------------------------------------------------------------------- /brotli/testdata/random_org_10k.bin.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/random_org_10k.bin.br -------------------------------------------------------------------------------- /brotli/testdata/twain-best-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-best-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/twain-best-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-best-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/twain-best-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-best-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/twain-default-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-default-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/twain-default-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-default-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/twain-default-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-default-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/twain-speed-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-speed-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/twain-speed-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-speed-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/twain-speed-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-speed-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/ukkonooa: -------------------------------------------------------------------------------- 1 | ukko nooa, ukko nooa oli kunnon mies, kun han meni saunaan, pisti laukun naulaan, ukko nooa, ukko nooa oli kunnon mies. -------------------------------------------------------------------------------- /brotli/testdata/ukkonooa.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/ukkonooa.br -------------------------------------------------------------------------------- /brotli/transform_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import "testing" 8 | 9 | func TestTransform(t *testing.T) { 10 | vectors := []struct { 11 | id int 12 | input string 13 | output string 14 | }{ 15 | {id: 0, input: "Hello, world!", output: "Hello, world!"}, 16 | {id: 23, input: "groups of", output: "groups"}, 17 | {id: 42, input: "s for the ", output: "s for "}, 18 | {id: 48, input: "presentation", output: "prese"}, 19 | {id: 56, input: "maintenance", output: "maint"}, 20 | {id: 23, input: "Alexandria", output: "Alexand"}, 21 | {id: 23, input: "archives", output: "archi"}, 22 | {id: 49, input: "fighty", output: "fighting "}, 23 | {id: 49, input: "12", output: "1ing "}, 24 | {id: 49, input: "1", output: "ing "}, 25 | {id: 49, input: "", output: "ing "}, 26 | {id: 64, input: "123456789a", output: "1"}, 27 | {id: 64, input: "123456789", output: ""}, 28 | {id: 64, input: "1", output: ""}, 29 | {id: 64, input: "", output: ""}, 30 | {id: 3, input: "afloat", output: "float"}, 31 | {id: 3, input: "12", output: "2"}, 32 | {id: 3, input: "1", output: ""}, 33 | {id: 3, input: "", output: ""}, 34 | {id: 54, input: "123456789a", output: "a"}, 35 | {id: 54, input: "123456789", output: ""}, 36 | {id: 54, input: "1", output: ""}, 37 | {id: 54, input: "", output: ""}, 38 | {id: 73, input: "", output: " the of the "}, 39 | {id: 73, input: "dichlorodifluoromethanes", output: " the dichlorodifluoromethanes of the "}, 40 | {id: 15, input: "", output: " "}, 41 | {id: 15, input: "meow", output: " Meow "}, 42 | {id: 15, input: "-scale", output: " -scale "}, 43 | {id: 15, input: "почти", output: " Почти "}, 44 | {id: 15, input: "互联网", output: " 亗联网 "}, 45 | {id: 119, input: "", output: " ='"}, 46 | {id: 119, input: "meow", output: " MEOW='"}, 47 | {id: 119, input: "-scale", output: " -SCALE='"}, 48 | {id: 119, input: "почти", output: " ПОѧѢИ='"}, 49 | {id: 119, input: "互联网", output: " 亗聑罔='"}, 50 | } 51 | 52 | var buf [maxWordSize]byte 53 | for i, v := range vectors { 54 | cnt := transformWord(buf[:], []byte(v.input), v.id) 55 | output := string(buf[:cnt]) 56 | 57 | if output != v.output { 58 | t.Errorf("test %d, output mismatch: got %q, want %q", i, output, v.output) 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /brotli/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import "io" 8 | 9 | type writer struct { 10 | InputOffset int64 // Total number of bytes issued to Write 11 | OutputOffset int64 // Total number of bytes written to underlying io.Writer 12 | 13 | wr bitWriter // Output destination 14 | err error // Persistent error 15 | } 16 | 17 | type writerConfig struct { 18 | _ struct{} // Blank field to prevent unkeyed struct literals 19 | } 20 | 21 | func newWriter(w io.Writer, conf *writerConfig) (*writer, error) { 22 | return nil, nil 23 | } 24 | 25 | func (bw *writer) Write(buf []byte) (int, error) { 26 | return 0, nil 27 | } 28 | 29 | func (bw *writer) Close() error { 30 | return nil 31 | } 32 | 33 | func (bw *writer) Reset(w io.Writer) error { 34 | return nil 35 | } 36 | -------------------------------------------------------------------------------- /brotli/writer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /bzip2/bwt.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import "github.com/dsnet/compress/bzip2/internal/sais" 8 | 9 | // The Burrows-Wheeler Transform implementation used here is based on the 10 | // Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan. 11 | // This implementation uses the sais algorithm originally written by Yuta Mori. 12 | // 13 | // The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a 14 | // mathematical relationship between Suffix Arrays and the Burrows-Wheeler 15 | // Transform, such that a SA can be converted to a BWT in O(n) time. 16 | // 17 | // References: 18 | // 19 | // http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf 20 | // https://github.com/cscott/compressjs/blob/master/lib/BWT.js 21 | // https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space 22 | type burrowsWheelerTransform struct { 23 | buf []byte 24 | sa []int 25 | perm []uint32 26 | } 27 | 28 | func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) { 29 | if len(buf) == 0 { 30 | return -1 31 | } 32 | 33 | // TODO(dsnet): Find a way to avoid the duplicate input string method. 34 | // We only need to do this because suffix arrays (by definition) only 35 | // operate non-wrapped suffixes of a string. On the other hand, 36 | // the BWT specifically used in bzip2 operate on a strings that wrap-around 37 | // when being sorted. 38 | 39 | // Step 1: Concatenate the input string to itself so that we can use the 40 | // suffix array algorithm for bzip2's variant of BWT. 41 | n := len(buf) 42 | bwt.buf = append(append(bwt.buf[:0], buf...), buf...) 43 | if cap(bwt.sa) < 2*n { 44 | bwt.sa = make([]int, 2*n) 45 | } 46 | t := bwt.buf[:2*n] 47 | sa := bwt.sa[:2*n] 48 | 49 | // Step 2: Compute the suffix array (SA). The input string, t, will not be 50 | // modified, while the results will be written to the output, sa. 51 | sais.ComputeSA(t, sa) 52 | 53 | // Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the 54 | // input, we have two copies of the input; in buf and buf2. Thus, we write 55 | // the transformation to buf, while using buf2. 56 | var j int 57 | buf2 := t[n:] 58 | for _, i := range sa { 59 | if i < n { 60 | if i == 0 { 61 | ptr = j 62 | i = n 63 | } 64 | buf[j] = buf2[i-1] 65 | j++ 66 | } 67 | } 68 | return ptr 69 | } 70 | 71 | func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) { 72 | if len(buf) == 0 { 73 | return 74 | } 75 | 76 | // Step 1: Compute cumm, where cumm[ch] reports the total number of 77 | // characters that precede the character ch in the alphabet. 78 | var cumm [256]int 79 | for _, v := range buf { 80 | cumm[v]++ 81 | } 82 | var sum int 83 | for i, v := range cumm { 84 | cumm[i] = sum 85 | sum += v 86 | } 87 | 88 | // Step 2: Compute perm, where perm[ptr] contains a pointer to the next 89 | // byte in buf and the next pointer in perm itself. 90 | if cap(bwt.perm) < len(buf) { 91 | bwt.perm = make([]uint32, len(buf)) 92 | } 93 | perm := bwt.perm[:len(buf)] 94 | for i, b := range buf { 95 | perm[cumm[b]] = uint32(i) 96 | cumm[b]++ 97 | } 98 | 99 | // Step 3: Follow each pointer in perm to the next byte, starting with the 100 | // origin pointer. 101 | if cap(bwt.buf) < len(buf) { 102 | bwt.buf = make([]byte, len(buf)) 103 | } 104 | buf2 := bwt.buf[:len(buf)] 105 | i := perm[ptr] 106 | for j := range buf2 { 107 | buf2[j] = buf[i] 108 | i = perm[i] 109 | } 110 | copy(buf, buf2) 111 | } 112 | -------------------------------------------------------------------------------- /bzip2/bwt_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "testing" 9 | 10 | "github.com/dsnet/compress/internal/testutil" 11 | ) 12 | 13 | func TestBurrowsWheelerTransform(t *testing.T) { 14 | vectors := []struct { 15 | input []byte // The input test string 16 | output []byte // Expected output string after BWT 17 | ptr int // The BWT origin pointer 18 | }{{ 19 | input: []byte(""), 20 | output: []byte(""), 21 | ptr: -1, 22 | }, { 23 | input: []byte("Hello, world!"), 24 | output: []byte(",do!lHrellwo "), 25 | ptr: 3, 26 | }, { 27 | input: []byte("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES"), 28 | output: []byte("TEXYDST.E.IXIXIXXSSMPPS.B..E.S.EUSFXDIIOIIIT"), 29 | ptr: 29, 30 | }, { 31 | input: []byte("0123456789"), 32 | output: []byte("9012345678"), 33 | ptr: 0, 34 | }, { 35 | input: []byte("9876543210"), 36 | output: []byte("1234567890"), 37 | ptr: 9, 38 | }, { 39 | input: []byte("The quick brown fox jumped over the lazy dog."), 40 | output: []byte("kynxederg.l ie hhpv otTu c uwd rfm eb qjoooza"), 41 | ptr: 9, 42 | }, { 43 | input: []byte("" + 44 | "Mary had a little lamb, its fleece was white as snow" + 45 | "Mary had a little lamb, its fleece was white as snow" + 46 | "Mary had a little lamb, its fleece was white as snow" + 47 | "Mary had a little lamb, its fleece was white as snow" + 48 | "Mary had a little lamb, its fleece was white as snow" + 49 | "Mary had a little lamb, its fleece was white as snow" + 50 | "Mary had a little lamb, its fleece was white as snow" + 51 | "Mary had a little lamb, its fleece was white as snow" + 52 | "Nary had a little lamb, its fleece was white as snow"), 53 | output: []byte("" + 54 | "dddddddddeeeeeeeeesssssssssyyyyyyyyy,,,,,,,,,eeeeeee" + 55 | "eeaaaaaaaaassssssssseeeeeeeeesssssssssbbbbbbbbbwwwww" + 56 | "wwww hhhhhhhhhlllllllllNMMMMMMMM www" + 57 | "wwwwwwmmmmmmmmmeeeeeeeeeaaaaaaaaatttttttttlllllllllc" + 58 | "cccccccceeeeeeeeelllllllll wwwwwwww" + 59 | "whhhhhhhhh lllllllll tttttttttffffff" + 60 | "fff aaaaaaaaasssssssssnnnnnnnnnaaaaaaaaatttt" + 61 | "tttttaaaaaaaaaaaaaaaaaa iiiiiiiiitttttttttii" + 62 | "iiiiiiiiiiiiiiiiooooooooo rrrrrrrrr"), 63 | ptr: 99, 64 | }, { 65 | input: []byte("" + 66 | "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGAC" + 67 | "AGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAA" + 68 | "TACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATG" + 69 | "AAACGCATTAGCACCACCATTACCACCACCATCACCACCACCATCACCATTACCATTACCACAG" + 70 | "GTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCT" + 71 | "TTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAG" + 72 | "TGGCAAATGCAGAACGTTTTCTGCGGGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCA"), 73 | output: []byte("" + 74 | "TAGAATAAATGGAGACTCTAATACTCTACTGGAAACAGACCACAAACATACCTGGTCGTAGATT" + 75 | "CCCCCCATCCCTAAGAAACGAGTCCCCACATCATCACCTCGACTGGGCCGAGACTAAGCCCCCA" + 76 | "ACTGAACCCCCTTACGAAGGCGGAAGCTCCGCCCTGTAGAAAAGACGAATGCCAACCCCCGTAA" + 77 | "AAAAAAGAATAAAAGGCGAATAGCGCAATAGGGGAGCAATTTTCGTACTTATAGAGGAGTGATT" + 78 | "ATTCTTTCTAACACGGTGGACACTAGGCTATTTATTTGCGAAGATTTGGAACGGGCCCACAAAC" + 79 | "ACTGAGGGACGGATCGATATAGATGCTATCGGTGGGTGGTTTTATAATAAATAAGATATTGGTC" + 80 | "TTTCACTCCCCTGCAATCAGGCCGGCAGCGAATAAAAGACTTTGCATAGAGCTTTTACTGTTTC"), 81 | ptr: 99, 82 | }, { 83 | input: testutil.MustLoadFile("testdata/gauntlet_test3.bin"), 84 | output: testutil.MustLoadFile("testdata/gauntlet_test3.bwt"), 85 | ptr: 0, 86 | }, { 87 | input: testutil.MustLoadFile("testdata/silesia_ooffice.bin"), 88 | output: testutil.MustLoadFile("testdata/silesia_ooffice.bwt"), 89 | ptr: 461, 90 | }, { 91 | input: testutil.MustLoadFile("testdata/silesia_xray.bin"), 92 | output: testutil.MustLoadFile("testdata/silesia_xray.bwt"), 93 | ptr: 1532, 94 | }, { 95 | input: testutil.MustLoadFile("testdata/testfiles_test3.bin"), 96 | output: testutil.MustLoadFile("testdata/testfiles_test3.bwt"), 97 | ptr: 0, 98 | }, { 99 | input: testutil.MustLoadFile("testdata/testfiles_test4.bin"), 100 | output: testutil.MustLoadFile("testdata/testfiles_test4.bwt"), 101 | ptr: 1026, 102 | }} 103 | 104 | bwt := new(burrowsWheelerTransform) 105 | for i, v := range vectors { 106 | output := append([]byte(nil), v.input...) 107 | ptr := bwt.Encode(output) 108 | input := append([]byte(nil), v.output...) 109 | bwt.Decode(input, ptr) 110 | 111 | if got, want, ok := testutil.BytesCompare(input, v.input); !ok { 112 | t.Errorf("test %d, input mismatch:\ngot %s\nwant %s", i, got, want) 113 | } 114 | if got, want, ok := testutil.BytesCompare(output, v.output); !ok { 115 | t.Errorf("test %d, output mismatch:\ngot %s\nwant %s", i, got, want) 116 | } 117 | if ptr != v.ptr { 118 | t.Errorf("test %d, pointer mismatch: got %d, want %d", i, ptr, v.ptr) 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /bzip2/bzip2_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "flag" 11 | "io" 12 | "os/exec" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/dsnet/compress/internal/testutil" 17 | ) 18 | 19 | var zcheck = flag.Bool("zcheck", false, "verify test vectors with C bzip2 library") 20 | 21 | func cmdCompress(input []byte) ([]byte, error) { return cmdExec(input, "-z") } 22 | func cmdDecompress(input []byte) ([]byte, error) { return cmdExec(input, "-d") } 23 | 24 | // cmdExec executes the bzip2 tool, passing the input in as stdin. 25 | // It returns the stdout and an error. 26 | func cmdExec(input []byte, args ...string) ([]byte, error) { 27 | var bo, be bytes.Buffer 28 | cmd := exec.Command("bzip2", args...) 29 | cmd.Stdin = bytes.NewReader(input) 30 | cmd.Stdout = &bo 31 | cmd.Stderr = &be 32 | err := cmd.Run() 33 | ss := strings.Split(strings.TrimSpace(be.String()), "\n") 34 | if len(ss) > 0 && ss[len(ss)-1] != "" { 35 | // Assume any stderr indicates an error and last line is the message. 36 | return nil, errors.New(ss[len(ss)-1]) 37 | } 38 | return bo.Bytes(), err 39 | } 40 | 41 | var testdata = []struct { 42 | name string 43 | data []byte 44 | ratio float64 // The minimum expected ratio (uncompressed / compressed) 45 | }{ 46 | {"Nil", nil, 0}, 47 | {"Binary", testutil.MustLoadFile("../testdata/binary.bin"), 5.68}, 48 | {"Digits", testutil.MustLoadFile("../testdata/digits.txt"), 2.22}, 49 | {"Huffman", testutil.MustLoadFile("../testdata/huffman.txt"), 1.24}, 50 | {"Random", testutil.MustLoadFile("../testdata/random.bin"), 0.98}, 51 | {"Repeats", testutil.MustLoadFile("../testdata/repeats.bin"), 3.93}, 52 | {"Twain", testutil.MustLoadFile("../testdata/twain.txt"), 2.99}, 53 | {"Zeros", testutil.MustLoadFile("../testdata/zeros.bin"), 5825.0}, 54 | } 55 | 56 | var levels = []struct { 57 | name string 58 | level int 59 | }{ 60 | {"Speed", BestSpeed}, 61 | {"Default", DefaultCompression}, 62 | {"Compression", BestCompression}, 63 | } 64 | 65 | var sizes = []struct { 66 | name string 67 | size int 68 | }{ 69 | {"1e4", 1e4}, 70 | {"1e5", 1e5}, 71 | {"1e6", 1e6}, 72 | } 73 | 74 | func TestRoundTrip(t *testing.T) { 75 | for _, v := range testdata { 76 | v := v 77 | t.Run(v.name, func(t *testing.T) { 78 | t.Parallel() 79 | 80 | var buf1, buf2 bytes.Buffer 81 | 82 | // Compress the input. 83 | wr, err := NewWriter(&buf1, nil) 84 | if err != nil { 85 | t.Errorf("NewWriter() = (_, %v), want (_, nil)", err) 86 | } 87 | n, err := io.Copy(wr, bytes.NewReader(v.data)) 88 | if n != int64(len(v.data)) || err != nil { 89 | t.Errorf("Copy() = (%d, %v), want (%d, nil)", n, err, len(v.data)) 90 | } 91 | if err := wr.Close(); err != nil { 92 | t.Errorf("Close() = %v, want nil", err) 93 | } 94 | 95 | // Verify that the compression ratio is within expected bounds. 96 | ratio := float64(len(v.data)) / float64(buf1.Len()) 97 | if ratio < v.ratio { 98 | t.Errorf("poor compression ratio: %0.2f < %0.2f", ratio, v.ratio) 99 | } 100 | 101 | // Verify that the C library can decompress the output of Writer and 102 | // that the Reader can decompress the output of the C library. 103 | if *zcheck { 104 | zd, err := cmdDecompress(buf1.Bytes()) 105 | if err != nil { 106 | t.Errorf("unexpected cmdDecompress error: %v", err) 107 | } 108 | if got, want, ok := testutil.BytesCompare(zd, v.data); !ok { 109 | t.Errorf("output data mismatch:\ngot %s\nwant %s", got, want) 110 | } 111 | zc, err := cmdCompress(v.data) 112 | if err != nil { 113 | t.Errorf("unexpected cmdCompress error: %v", err) 114 | } 115 | zratio := float64(len(v.data)) / float64(len(zc)) 116 | if ratio < 0.9*zratio { 117 | t.Errorf("poor compression ratio: %0.2f < %0.2f", ratio, 0.9*zratio) 118 | } 119 | buf1.Reset() 120 | buf1.Write(zc) // Use output of C library for Reader test 121 | } 122 | 123 | // Decompress the output. 124 | rd, err := NewReader(&buf1, nil) 125 | if err != nil { 126 | t.Errorf("NewReader() = (_, %v), want (_, nil)", err) 127 | } 128 | n, err = io.Copy(&buf2, rd) 129 | if n != int64(len(v.data)) || err != nil { 130 | t.Errorf("Copy() = (%d, %v), want (%d, nil)", n, err, len(v.data)) 131 | } 132 | if err := rd.Close(); err != nil { 133 | t.Errorf("Close() = %v, want nil", err) 134 | } 135 | if got, want, ok := testutil.BytesCompare(buf2.Bytes(), v.data); !ok { 136 | t.Errorf("output data mismatch:\ngot %s\nwant %s", got, want) 137 | } 138 | }) 139 | } 140 | } 141 | 142 | func runBenchmarks(b *testing.B, f func(b *testing.B, buf []byte, lvl int)) { 143 | for _, td := range testdata { 144 | if len(td.data) == 0 { 145 | continue 146 | } 147 | if testing.Short() && !(td.name == "Twain" || td.name == "Digits") { 148 | continue 149 | } 150 | for _, tl := range levels { 151 | for _, ts := range sizes { 152 | buf := testutil.ResizeData(td.data, ts.size) 153 | b.Run(td.name+"/"+tl.name+"/"+ts.name, func(b *testing.B) { 154 | f(b, buf, tl.level) 155 | }) 156 | } 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /bzip2/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package bzip2 implements the BZip2 compressed data format. 6 | // 7 | // Canonical C implementation: 8 | // 9 | // http://bzip.org 10 | // 11 | // Unofficial format specification: 12 | // 13 | // https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf 14 | package bzip2 15 | 16 | import ( 17 | "fmt" 18 | "hash/crc32" 19 | 20 | "github.com/dsnet/compress/internal" 21 | "github.com/dsnet/compress/internal/errors" 22 | ) 23 | 24 | // There does not exist a formal specification of the BZip2 format. As such, 25 | // much of this work is derived by either reverse engineering the original C 26 | // source code or using secondary sources. 27 | // 28 | // Significant amounts of fuzz testing is done to ensure that outputs from 29 | // this package is properly decoded by the C library. Furthermore, we test that 30 | // both this package and the C library agree about what inputs are invalid. 31 | // 32 | // Compression stack: 33 | // Run-length encoding 1 (RLE1) 34 | // Burrows-Wheeler transform (BWT) 35 | // Move-to-front transform (MTF) 36 | // Run-length encoding 2 (RLE2) 37 | // Prefix encoding (PE) 38 | // 39 | // References: 40 | // http://bzip.org/ 41 | // https://en.wikipedia.org/wiki/Bzip2 42 | // https://code.google.com/p/jbzip2/ 43 | 44 | const ( 45 | BestSpeed = 1 46 | BestCompression = 9 47 | DefaultCompression = 6 48 | ) 49 | 50 | const ( 51 | hdrMagic = 0x425a // Hex of "BZ" 52 | blkMagic = 0x314159265359 // BCD of PI 53 | endMagic = 0x177245385090 // BCD of sqrt(PI) 54 | 55 | blockSize = 100000 56 | ) 57 | 58 | func errorf(c int, f string, a ...interface{}) error { 59 | return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)} 60 | } 61 | 62 | func panicf(c int, f string, a ...interface{}) { 63 | errors.Panic(errorf(c, f, a...)) 64 | } 65 | 66 | // errWrap converts a lower-level errors.Error to be one from this package. 67 | // The replaceCode passed in will be used to replace the code for any errors 68 | // with the errors.Invalid code. 69 | // 70 | // For the Reader, set this to errors.Corrupted. 71 | // For the Writer, set this to errors.Internal. 72 | func errWrap(err error, replaceCode int) error { 73 | if cerr, ok := err.(errors.Error); ok { 74 | if errors.IsInvalid(cerr) { 75 | cerr.Code = replaceCode 76 | } 77 | err = errorf(cerr.Code, "%s", cerr.Msg) 78 | } 79 | return err 80 | } 81 | 82 | var errClosed = errorf(errors.Closed, "") 83 | 84 | // crc computes the CRC-32 used by BZip2. 85 | // 86 | // The CRC-32 computation in bzip2 treats bytes as having bits in big-endian 87 | // order. That is, the MSB is read before the LSB. Thus, we can use the 88 | // standard library version of CRC-32 IEEE with some minor adjustments. 89 | // 90 | // The byte array is used as an intermediate buffer to swap the bits of every 91 | // byte of the input. 92 | type crc struct { 93 | val uint32 94 | buf [256]byte 95 | } 96 | 97 | // update computes the CRC-32 of appending buf to c. 98 | func (c *crc) update(buf []byte) { 99 | cval := internal.ReverseUint32(c.val) 100 | for len(buf) > 0 { 101 | n := len(buf) 102 | if n > len(c.buf) { 103 | n = len(c.buf) 104 | } 105 | for i, b := range buf[:n] { 106 | c.buf[i] = internal.ReverseLUT[b] 107 | } 108 | cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n]) 109 | buf = buf[n:] 110 | } 111 | c.val = internal.ReverseUint32(cval) 112 | } 113 | -------------------------------------------------------------------------------- /bzip2/common_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "strconv" 9 | "testing" 10 | 11 | "github.com/dsnet/compress/internal/testutil" 12 | ) 13 | 14 | func TestCRC(t *testing.T) { 15 | vectors := []struct { 16 | crc uint32 17 | str string 18 | }{ 19 | {0x00000000, ""}, 20 | {0x19939b6b, "a"}, 21 | {0xe993fdcd, "ab"}, 22 | {0x648cbb73, "abc"}, 23 | {0x3d4c334b, "abcd"}, 24 | {0xa35b4df4, "abcde"}, 25 | {0xa0f54fb9, "abcdef"}, 26 | {0x077539d7, "abcdefg"}, 27 | {0x5024ec61, "abcdefgh"}, 28 | {0x63e0bcd4, "abcdefghi"}, 29 | {0x73826444, "abcdefghij"}, 30 | {0xbf786ee7, "Discard medicine more than two years old."}, 31 | {0x106324f0, "He who has a shady past knows that nice guys finish last."}, 32 | {0x0ef9b7d7, "I wouldn't marry him with a ten foot pole."}, 33 | {0x2f42217b, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, 34 | {0xb64c598c, "The days of the digital watch are numbered. -Tom Stoppard"}, 35 | {0xf4e5a7c3, "Nepal premier won't resign."}, 36 | {0x2b43233e, "For every action there is an equal and opposite government program."}, 37 | {0x7b83ef6f, "His money is twice tainted: 'taint yours and 'taint mine."}, 38 | {0x503c2258, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, 39 | {0x4dc300fa, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, 40 | {0x97fa4243, "size: a.out: bad magic"}, 41 | {0xc9549847, "The major problem is with sendmail. -Mark Horton"}, 42 | {0xeaa630ab, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, 43 | {0xcd8bb88c, "If the enemy is within range, then so are you."}, 44 | {0x95cc0d9d, "It's well we cannot hear the screams/That we create in others' dreams."}, 45 | {0x14c42897, "You remind me of a TV show, but that's all right: I watch it anyway."}, 46 | {0x0de498f1, "C is as portable as Stonehedge!!"}, 47 | {0x79e7cf74, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, 48 | {0x33e2329e, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, 49 | {0xa4302570, "How can you write a big system without C++? -Paul Glick"}, 50 | } 51 | 52 | var crc crc 53 | for i, v := range vectors { 54 | splits := []int{ 55 | 0 * (len(v.str) / 1), 56 | 1 * (len(v.str) / 4), 57 | 2 * (len(v.str) / 4), 58 | 3 * (len(v.str) / 4), 59 | 1 * (len(v.str) / 1), 60 | } 61 | for _, j := range splits { 62 | str1, str2 := []byte(v.str[:j]), []byte(v.str[j:]) 63 | crc.val = 0 64 | crc.update(str1) 65 | if crc.update(str2); crc.val != v.crc { 66 | t.Errorf("test %d, crc.update(crc1, str2): got 0x%08x, want 0x%08x", i, crc.val, v.crc) 67 | } 68 | } 69 | } 70 | } 71 | 72 | func BenchmarkCRC(b *testing.B) { 73 | var c crc 74 | d := testutil.ResizeData([]byte("the quick brown fox jumped over the lazy dog"), 1<<16) 75 | for i := 1; i <= len(d); i <<= 4 { 76 | b.Run(strconv.Itoa(i), func(b *testing.B) { 77 | b.SetBytes(int64(i)) 78 | for j := 0; j < b.N; j++ { 79 | c.update(d[:i]) 80 | } 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /bzip2/fuzz_off.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build !gofuzz 6 | // +build !gofuzz 7 | 8 | // This file exists to suppress fuzzing details from release builds. 9 | 10 | package bzip2 11 | 12 | type fuzzReader struct{} 13 | 14 | func (*fuzzReader) updateChecksum(int64, uint32) {} 15 | -------------------------------------------------------------------------------- /bzip2/fuzz_on.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build gofuzz 6 | // +build gofuzz 7 | 8 | // This file exists to export internal implementation details for fuzz testing. 9 | 10 | package bzip2 11 | 12 | func ForwardBWT(buf []byte) (ptr int) { 13 | var bwt burrowsWheelerTransform 14 | return bwt.Encode(buf) 15 | } 16 | 17 | func ReverseBWT(buf []byte, ptr int) { 18 | var bwt burrowsWheelerTransform 19 | bwt.Decode(buf, ptr) 20 | } 21 | 22 | type fuzzReader struct { 23 | Checksums Checksums 24 | } 25 | 26 | // updateChecksum updates Checksums. 27 | // 28 | // If a valid pos is provided, it appends the (pos, val) pair to the slice. 29 | // Otherwise, it will update the last record with the new value. 30 | func (fr *fuzzReader) updateChecksum(pos int64, val uint32) { 31 | if pos >= 0 { 32 | fr.Checksums = append(fr.Checksums, Checksum{pos, val}) 33 | } else { 34 | fr.Checksums[len(fr.Checksums)-1].Value = val 35 | } 36 | } 37 | 38 | type Checksum struct { 39 | Offset int64 // Bit offset of the checksum 40 | Value uint32 // Checksum value 41 | } 42 | 43 | type Checksums []Checksum 44 | 45 | // Apply overwrites all checksum fields in d with the ones in cs. 46 | func (cs Checksums) Apply(d []byte) []byte { 47 | d = append([]byte(nil), d...) 48 | for _, c := range cs { 49 | setU32(d, c.Offset, c.Value) 50 | } 51 | return d 52 | } 53 | 54 | func setU32(d []byte, pos int64, val uint32) { 55 | for i := uint(0); i < 32; i++ { 56 | bpos := uint64(pos) + uint64(i) 57 | d[bpos/8] &= ^byte(1 << (7 - bpos%8)) 58 | d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8) 59 | } 60 | } 61 | 62 | // Verify checks that all checksum fields in d matches those in cs. 63 | func (cs Checksums) Verify(d []byte) bool { 64 | for _, c := range cs { 65 | if getU32(d, c.Offset) != c.Value { 66 | return false 67 | } 68 | } 69 | return true 70 | } 71 | 72 | func getU32(d []byte, pos int64) (val uint32) { 73 | for i := uint(0); i < 32; i++ { 74 | bpos := uint64(pos) + uint64(i) 75 | val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i) 76 | } 77 | return val 78 | } 79 | -------------------------------------------------------------------------------- /bzip2/internal/sais/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package sais implements a linear time suffix array algorithm. 6 | package sais 7 | 8 | //go:generate go run sais_gen.go byte sais_byte.go 9 | //go:generate go run sais_gen.go int sais_int.go 10 | 11 | // This package ports the C sais implementation by Yuta Mori. The ports are 12 | // located in sais_byte.go and sais_int.go, which are identical to each other 13 | // except for the types. Since Go does not support generics, we use generators to 14 | // create the two files. 15 | // 16 | // References: 17 | // https://sites.google.com/site/yuta256/sais 18 | // https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings 19 | // https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction 20 | 21 | // ComputeSA computes the suffix array of t and places the result in sa. 22 | // Both t and sa must be the same length. 23 | func ComputeSA(t []byte, sa []int) { 24 | if len(sa) != len(t) { 25 | panic("mismatching sizes") 26 | } 27 | computeSA_byte(t, sa, 0, len(t), 256) 28 | } 29 | -------------------------------------------------------------------------------- /bzip2/mtf_rle2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import "github.com/dsnet/compress/internal/errors" 8 | 9 | // moveToFront implements both the MTF and RLE stages of bzip2 at the same time. 10 | // Any runs of zeros in the encoded output will be replaced by a sequence of 11 | // RUNA and RUNB symbols are encode the length of the run. 12 | // 13 | // The RLE encoding used can actually be encoded to and decoded from using 14 | // normal two's complement arithmetic. The methodology for doing so is below. 15 | // 16 | // Assuming the following: 17 | // 18 | // num: The value being encoded by RLE encoding. 19 | // run: A sequence of RUNA and RUNB symbols represented as a binary integer, 20 | // where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN 21 | // symbols are at the least-significant bit positions. 22 | // cnt: The number of RUNA and RUNB symbols. 23 | // 24 | // Then the RLE encoding used by bzip2 has this mathematical property: 25 | // 26 | // num+1 == (1< len(mtf.dictBuf) { 38 | panicf(errors.Internal, "alphabet too large") 39 | } 40 | copy(mtf.dictBuf[:], dict) 41 | mtf.dictLen = len(dict) 42 | mtf.blkSize = blkSize 43 | } 44 | 45 | func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) { 46 | dict := mtf.dictBuf[:mtf.dictLen] 47 | syms = mtf.syms[:0] 48 | 49 | if len(vals) > mtf.blkSize { 50 | panicf(errors.Internal, "exceeded block size") 51 | } 52 | 53 | var lastNum uint32 54 | for _, val := range vals { 55 | // Normal move-to-front transform. 56 | var idx uint8 // Reverse lookup idx in dict 57 | for di, dv := range dict { 58 | if dv == val { 59 | idx = uint8(di) 60 | break 61 | } 62 | } 63 | copy(dict[1:], dict[:idx]) 64 | dict[0] = val 65 | 66 | // Run-length encoding augmentation. 67 | if idx == 0 { 68 | lastNum++ 69 | continue 70 | } 71 | if lastNum > 0 { 72 | for rc := lastNum + 1; rc != 1; rc >>= 1 { 73 | syms = append(syms, uint16(rc&1)) 74 | } 75 | lastNum = 0 76 | } 77 | syms = append(syms, uint16(idx)+1) 78 | } 79 | if lastNum > 0 { 80 | for rc := lastNum + 1; rc != 1; rc >>= 1 { 81 | syms = append(syms, uint16(rc&1)) 82 | } 83 | } 84 | mtf.syms = syms 85 | return syms 86 | } 87 | 88 | func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) { 89 | dict := mtf.dictBuf[:mtf.dictLen] 90 | vals = mtf.vals[:0] 91 | 92 | var lastCnt uint 93 | var lastRun uint32 94 | for _, sym := range syms { 95 | // Run-length encoding augmentation. 96 | if sym < 2 { 97 | lastRun |= uint32(sym) << lastCnt 98 | lastCnt++ 99 | continue 100 | } 101 | if lastCnt > 0 { 102 | cnt := int((1< mtf.blkSize || lastCnt > 24 { 104 | panicf(errors.Corrupted, "run-length decoding exceeded block size") 105 | } 106 | for i := cnt; i > 0; i-- { 107 | vals = append(vals, dict[0]) 108 | } 109 | lastCnt, lastRun = 0, 0 110 | } 111 | 112 | // Normal move-to-front transform. 113 | val := dict[sym-1] // Forward lookup val in dict 114 | copy(dict[1:], dict[:sym-1]) 115 | dict[0] = val 116 | 117 | if len(vals) >= mtf.blkSize { 118 | panicf(errors.Corrupted, "run-length decoding exceeded block size") 119 | } 120 | vals = append(vals, val) 121 | } 122 | if lastCnt > 0 { 123 | cnt := int((1< mtf.blkSize || lastCnt > 24 { 125 | panicf(errors.Corrupted, "run-length decoding exceeded block size") 126 | } 127 | for i := cnt; i > 0; i-- { 128 | vals = append(vals, dict[0]) 129 | } 130 | } 131 | mtf.vals = vals 132 | return vals 133 | } 134 | -------------------------------------------------------------------------------- /bzip2/mtf_rle2_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/dsnet/compress/internal/errors" 12 | ) 13 | 14 | func TestMoveToFront(t *testing.T) { 15 | getDict := func(buf []byte) []uint8 { 16 | var dictMap [256]bool 17 | for _, b := range buf { 18 | dictMap[b] = true 19 | } 20 | var dictArr [256]uint8 21 | dict := dictArr[:0] 22 | for j, b := range dictMap { 23 | if b { 24 | dict = append(dict, uint8(j)) 25 | } 26 | } 27 | return dict 28 | } 29 | 30 | vectors := []struct { 31 | size int // If zero, default to 1MiB 32 | input []byte 33 | output []uint16 34 | fail bool 35 | }{{ 36 | input: []byte{}, 37 | output: []uint16{}, 38 | }, { 39 | input: []byte{2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 40 | output: []uint16{1, 1, 0}, 41 | }, { 42 | input: []byte{9, 8, 7, 6, 5, 4, 3, 2, 1}, 43 | output: []uint16{9, 9, 9, 9, 9, 9, 9, 9, 9}, 44 | }, { 45 | input: []byte{42, 47, 42, 47, 42, 47, 42, 47, 42, 47, 42, 47}, 46 | output: []uint16{0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 47 | }, { 48 | input: []byte{0, 5, 2, 3, 4, 4, 3, 1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 2, 3, 3}, 49 | output: []uint16{0, 6, 4, 5, 6, 0, 2, 6, 4, 3, 0, 1, 4, 1, 5, 4, 4, 0}, 50 | }, { 51 | input: []byte{100, 111, 108, 104, 10, 114, 101, 108, 108, 119, 111, 32}, 52 | output: []uint16{3, 7, 7, 7, 5, 8, 8, 5, 0, 9, 7, 9}, 53 | }, { 54 | input: []byte{ 55 | 103, 33, 107, 121, 110, 120, 101, 100, 101, 114, 44, 100, 111, 10, 32, 56 | 108, 32, 105, 101, 108, 32, 104, 104, 112, 72, 118, 32, 111, 116, 84, 57 | 117, 32, 99, 32, 114, 101, 108, 117, 119, 108, 100, 119, 32, 114, 102, 58 | 109, 32, 101, 111, 98, 32, 113, 106, 111, 111, 32, 111, 122, 97, 59 | }, 60 | output: []uint16{ 61 | 13, 4, 17, 30, 21, 30, 16, 16, 2, 26, 12, 4, 24, 12, 13, 23, 2, 22, 9, 62 | 4, 4, 22, 0, 25, 18, 29, 5, 10, 28, 21, 29, 5, 25, 2, 17, 13, 13, 6, 30, 63 | 3, 17, 3, 7, 7, 27, 29, 4, 9, 13, 28, 4, 30, 30, 5, 0, 4, 2, 31, 31, 64 | }, 65 | }, { 66 | input: []byte{ 67 | 74, 69, 205, 44, 38, 175, 207, 101, 59, 108, 42, 155, 208, 50, 38, 115, 68 | 190, 138, 163, 35, 13, 172, 160, 74, 68, 173, 99, 57, 213, 158, 248, 69 | 209, 176, 52, 135, 21, 26, 248, 186, 186, 219, 113, 172, 163, 13, 22, 70 | 100, 134, 4, 141, 53, 244, 99, 126, 214, 59, 53, 43, 146, 67, 131, 51, 71 | 212, 146, 245, 72 | }, 73 | output: []uint16{20, 20, 44, 13, 11, 41, 45, 26, 22, 27, 17, 37, 46, 21, 74 | 10, 31, 46, 37, 42, 24, 21, 43, 43, 22, 33, 44, 35, 34, 49, 45, 54, 75 | 49, 48, 38, 46, 35, 37, 7, 49, 0, 52, 45, 19, 22, 21, 40, 45, 48, 42, 76 | 49, 46, 53, 24, 49, 53, 41, 6, 48, 52, 51, 52, 52, 53, 5, 54, 77 | }, 78 | }, { 79 | input: []byte{ 80 | 153, 45, 45, 38, 135, 179, 26, 154, 165, 170, 170, 170, 170, 18, 109, 81 | 240, 174, 150, 87, 164, 30, 30, 30, 30, 30, 30, 30, 148, 190, 10, 60, 82 | 13, 13, 13, 13, 13, 6, 81, 200, 13, 225, 32, 17, 43, 22, 179, 13, 13, 83 | 17, 236, 236, 236, 236, 236, 236, 236, 121, 211, 2, 211, 185, 54, 16, 84 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 50, 85 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 40, 86 | }, 87 | output: []uint16{ 88 | 27, 17, 0, 15, 25, 33, 15, 29, 31, 32, 0, 0, 17, 28, 40, 34, 33, 31, 89 | 34, 25, 1, 1, 34, 36, 23, 33, 25, 1, 0, 25, 34, 37, 4, 39, 32, 31, 34, 90 | 33, 26, 7, 0, 5, 40, 1, 1, 38, 40, 34, 2, 40, 40, 38, 38, 0, 1, 1, 0, 91 | 40, 2, 0, 1, 1, 0, 40, 92 | }, 93 | }, { 94 | size: 10, 95 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 96 | output: []uint16{0, 1, 2, 1, 3, 0}, 97 | fail: false, 98 | }, { 99 | size: 10, 100 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 101 | output: []uint16{0, 1, 2, 1, 3, 1}, 102 | fail: true, 103 | }, { 104 | size: 10, 105 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 106 | output: []uint16{0, 1, 2, 1, 3, 2, 2}, 107 | fail: true, 108 | }, { 109 | size: 10, 110 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 111 | output: []uint16{1, 1, 2, 1, 3, 0}, 112 | fail: true, 113 | }, { 114 | size: 9, 115 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 116 | fail: true, 117 | }} 118 | 119 | mtf := new(moveToFront) 120 | for i, v := range vectors { 121 | var err error 122 | var input []byte 123 | var output []uint16 124 | func() { 125 | defer errors.Recover(&err) 126 | if v.size == 0 { 127 | v.size = 1 << 20 128 | } 129 | dict := getDict(v.input) 130 | mtf.Init(dict, v.size) 131 | output = mtf.Encode(v.input) 132 | mtf.Init(dict, v.size) 133 | input = mtf.Decode(v.output) 134 | }() 135 | 136 | fail := err != nil 137 | if fail && !v.fail { 138 | t.Errorf("test %d, unexpected error: %v", i, err) 139 | } 140 | if !fail && v.fail { 141 | t.Errorf("test %d, unexpected success", i) 142 | } 143 | if fail || v.fail { 144 | continue 145 | } 146 | if !reflect.DeepEqual(input, v.input) && !(len(input) == 0 && len(v.input) == 0) { 147 | t.Errorf("test %d, input mismatch:\ngot %v\nwant %v", i, input, v.input) 148 | } 149 | if !reflect.DeepEqual(output, v.output) && !(len(output) == 0 && len(v.output) == 0) { 150 | t.Errorf("test %d, output mismatch:\ngot %v\nwant %v", i, output, v.output) 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /bzip2/prefix_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/dsnet/compress/internal/prefix" 12 | ) 13 | 14 | func TestDegenerateCodes(t *testing.T) { 15 | vectors := []struct { 16 | input prefix.PrefixCodes 17 | output prefix.PrefixCodes 18 | }{{ 19 | input: []prefix.PrefixCode{ 20 | {Sym: 0, Len: 1}, 21 | }, 22 | output: []prefix.PrefixCode{ 23 | {Sym: 0, Len: 1, Val: 0}, // 0 24 | {Sym: 258, Len: 1, Val: 1}, // 1 25 | }, 26 | }, { 27 | input: []prefix.PrefixCode{ 28 | {Sym: 0, Len: 1}, 29 | {Sym: 1, Len: 1}, 30 | {Sym: 2, Len: 1}, 31 | }, 32 | output: []prefix.PrefixCode{ 33 | {Sym: 0, Len: 1, Val: 0}, // 0 34 | {Sym: 1, Len: 1, Val: 1}, // 1 35 | }, 36 | }, { 37 | input: []prefix.PrefixCode{ 38 | {Sym: 0, Len: 3}, 39 | {Sym: 1, Len: 4}, 40 | {Sym: 2, Len: 3}, 41 | }, 42 | output: []prefix.PrefixCode{ 43 | {Sym: 0, Len: 3, Val: 0}, // 000 44 | {Sym: 1, Len: 4, Val: 2}, // 0010 45 | {Sym: 2, Len: 3, Val: 4}, // 100 46 | {Sym: 258, Len: 4, Val: 10}, // 1010 47 | {Sym: 259, Len: 3, Val: 6}, // 110 48 | {Sym: 260, Len: 1, Val: 1}, // 1 49 | }, 50 | }, { 51 | input: []prefix.PrefixCode{ 52 | {Sym: 0, Len: 1}, 53 | {Sym: 1, Len: 3}, 54 | {Sym: 2, Len: 4}, 55 | {Sym: 3, Len: 3}, 56 | {Sym: 4, Len: 2}, 57 | }, 58 | output: []prefix.PrefixCode{ 59 | {Sym: 0, Len: 1, Val: 0}, // 0 60 | {Sym: 1, Len: 3, Val: 3}, // 011 61 | {Sym: 3, Len: 3, Val: 7}, // 111 62 | {Sym: 4, Len: 2, Val: 1}, // 01 63 | }, 64 | }} 65 | 66 | for i, v := range vectors { 67 | input := append(prefix.PrefixCodes(nil), v.input...) 68 | output := handleDegenerateCodes(input) 69 | 70 | if !reflect.DeepEqual(output, v.output) { 71 | t.Errorf("test %d, output mismatch:\ngot %v\nwant %v", i, output, v.output) 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /bzip2/rle1.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import "github.com/dsnet/compress/internal/errors" 8 | 9 | // rleDone is a special "error" to indicate that the RLE stage is done. 10 | var rleDone = errorf(errors.Unknown, "RLE1 stage is completed") 11 | 12 | // runLengthEncoding implements the first RLE stage of bzip2. Every sequence 13 | // of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a 14 | // single byte representing the repeat length. Similar to the C bzip2 15 | // implementation, the encoder will always terminate repeat sequences with a 16 | // count (even if it is the end of the buffer), and it will also never produce 17 | // run lengths of 256..259. The decoder can handle the latter case. 18 | // 19 | // For example, if the input was: 20 | // 21 | // input: "AAAAAAABBBBCCCD" 22 | // 23 | // Then the output will be: 24 | // 25 | // output: "AAAA\x03BBBB\x00CCCD" 26 | type runLengthEncoding struct { 27 | buf []byte 28 | idx int 29 | lastVal byte 30 | lastCnt int 31 | } 32 | 33 | func (rle *runLengthEncoding) Init(buf []byte) { 34 | *rle = runLengthEncoding{buf: buf} 35 | } 36 | 37 | func (rle *runLengthEncoding) Write(buf []byte) (int, error) { 38 | for i, b := range buf { 39 | if rle.lastVal != b { 40 | rle.lastCnt = 0 41 | } 42 | rle.lastCnt++ 43 | switch { 44 | case rle.lastCnt < 4: 45 | if rle.idx >= len(rle.buf) { 46 | return i, rleDone 47 | } 48 | rle.buf[rle.idx] = b 49 | rle.idx++ 50 | case rle.lastCnt == 4: 51 | if rle.idx+1 >= len(rle.buf) { 52 | return i, rleDone 53 | } 54 | rle.buf[rle.idx] = b 55 | rle.idx++ 56 | rle.buf[rle.idx] = 0 57 | rle.idx++ 58 | case rle.lastCnt < 256: 59 | rle.buf[rle.idx-1]++ 60 | default: 61 | if rle.idx >= len(rle.buf) { 62 | return i, rleDone 63 | } 64 | rle.lastCnt = 1 65 | rle.buf[rle.idx] = b 66 | rle.idx++ 67 | } 68 | rle.lastVal = b 69 | } 70 | return len(buf), nil 71 | } 72 | 73 | func (rle *runLengthEncoding) Read(buf []byte) (int, error) { 74 | for i := range buf { 75 | switch { 76 | case rle.lastCnt == -4: 77 | if rle.idx >= len(rle.buf) { 78 | return i, errorf(errors.Corrupted, "missing terminating run-length repeater") 79 | } 80 | rle.lastCnt = int(rle.buf[rle.idx]) 81 | rle.idx++ 82 | if rle.lastCnt > 0 { 83 | break // Break the switch 84 | } 85 | fallthrough // Count was zero, continue the work 86 | case rle.lastCnt <= 0: 87 | if rle.idx >= len(rle.buf) { 88 | return i, rleDone 89 | } 90 | b := rle.buf[rle.idx] 91 | rle.idx++ 92 | if b != rle.lastVal { 93 | rle.lastCnt = 0 94 | rle.lastVal = b 95 | } 96 | } 97 | buf[i] = rle.lastVal 98 | rle.lastCnt-- 99 | } 100 | return len(buf), nil 101 | } 102 | 103 | func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] } 104 | -------------------------------------------------------------------------------- /bzip2/rle1_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "strings" 11 | "testing" 12 | 13 | "github.com/dsnet/compress/internal/testutil" 14 | ) 15 | 16 | func TestRunLengthEncoder(t *testing.T) { 17 | vectors := []struct { 18 | size int 19 | input string 20 | output string 21 | done bool 22 | }{{ 23 | size: 0, 24 | input: "", 25 | output: "", 26 | }, { 27 | size: 6, 28 | input: "abc", 29 | output: "abc", 30 | }, { 31 | size: 6, 32 | input: "abcccc", 33 | output: "abccc", 34 | done: true, 35 | }, { 36 | size: 7, 37 | input: "abcccc", 38 | output: "abcccc\x00", 39 | }, { 40 | size: 14, 41 | input: "aaaabbbbcccc", 42 | output: "aaaa\x00bbbb\x00ccc", 43 | done: true, 44 | }, { 45 | size: 15, 46 | input: "aaaabbbbcccc", 47 | output: "aaaa\x00bbbb\x00cccc\x00", 48 | }, { 49 | size: 16, 50 | input: strings.Repeat("a", 4), 51 | output: "aaaa\x00", 52 | }, { 53 | size: 16, 54 | input: strings.Repeat("a", 255), 55 | output: "aaaa\xfb", 56 | }, { 57 | size: 16, 58 | input: strings.Repeat("a", 256), 59 | output: "aaaa\xfba", 60 | }, { 61 | size: 16, 62 | input: strings.Repeat("a", 259), 63 | output: "aaaa\xfbaaaa\x00", 64 | }, { 65 | size: 16, 66 | input: strings.Repeat("a", 500), 67 | output: "aaaa\xfbaaaa\xf1", 68 | }, { 69 | size: 64, 70 | input: "aaabbbcccddddddeeefgghiiijkllmmmmmmmmnnoo", 71 | output: "aaabbbcccdddd\x02eeefgghiiijkllmmmm\x04nnoo", 72 | }} 73 | 74 | buf := make([]byte, 3) 75 | for i, v := range vectors { 76 | rd := strings.NewReader(v.input) 77 | rle := new(runLengthEncoding) 78 | rle.Init(make([]byte, v.size)) 79 | _, err := io.CopyBuffer(rle, struct{ io.Reader }{rd}, buf) 80 | output := rle.Bytes() 81 | 82 | if got, want, ok := testutil.BytesCompare(output, []byte(v.output)); !ok { 83 | t.Errorf("test %d, output mismatch:\ngot %s\nwant %s", i, got, want) 84 | } 85 | if done := err == rleDone; done != v.done { 86 | t.Errorf("test %d, done mismatch: got %v want %v", i, done, v.done) 87 | } 88 | } 89 | } 90 | 91 | func TestRunLengthDecoder(t *testing.T) { 92 | vectors := []struct { 93 | input string 94 | output string 95 | fail bool 96 | }{{ 97 | input: "", 98 | output: "", 99 | }, { 100 | input: "abc", 101 | output: "abc", 102 | }, { 103 | input: "aaaa", 104 | output: "aaaa", 105 | fail: true, 106 | }, { 107 | input: "baaaa\x00aaaa", 108 | output: "baaaaaaaa", 109 | fail: true, 110 | }, { 111 | input: "abcccc\x00", 112 | output: "abcccc", 113 | }, { 114 | input: "aaaa\x00bbbb\x00ccc", 115 | output: "aaaabbbbccc", 116 | }, { 117 | input: "aaaa\x00bbbb\x00cccc\x00", 118 | output: "aaaabbbbcccc", 119 | }, { 120 | input: "aaaa\x00aaaa\x00aaaa\x00", 121 | output: "aaaaaaaaaaaa", 122 | }, { 123 | input: "aaaa\xffaaaa\xffaaaa\xff", 124 | output: strings.Repeat("a", 259*3), 125 | }, { 126 | input: "bbbaaaa\xffaaaa\xffaaaa\xff", 127 | output: "bbb" + strings.Repeat("a", 259*3), 128 | }, { 129 | input: "aaaa\x00", 130 | output: strings.Repeat("a", 4), 131 | }, { 132 | input: "aaaa\xfb", 133 | output: strings.Repeat("a", 255), 134 | }, { 135 | input: "aaaa\xfba", 136 | output: strings.Repeat("a", 256), 137 | }, { 138 | input: "aaaa\xfbaaaa\x00", 139 | output: strings.Repeat("a", 259), 140 | }, { 141 | input: "aaaa\xfbaaaa\xf1", 142 | output: strings.Repeat("a", 500), 143 | }, { 144 | input: "aaabbbcccdddd\x02eeefgghiiijkllmmmm\x04nnoo", 145 | output: "aaabbbcccddddddeeefgghiiijkllmmmmmmmmnnoo", 146 | }} 147 | 148 | buf := make([]byte, 3) 149 | for i, v := range vectors { 150 | wr := new(bytes.Buffer) 151 | rle := new(runLengthEncoding) 152 | rle.Init([]byte(v.input)) 153 | _, err := io.CopyBuffer(struct{ io.Writer }{wr}, rle, buf) 154 | output := wr.Bytes() 155 | 156 | if got, want, ok := testutil.BytesCompare(output, []byte(v.output)); !ok { 157 | t.Errorf("test %d, output mismatch:\ngot %s\nwant %s", i, got, want) 158 | } 159 | if fail := err != rleDone; fail != v.fail { 160 | t.Errorf("test %d, failure mismatch: got %t, want %t", i, fail, v.fail) 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /bzip2/testdata/gauntlet_test3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/gauntlet_test3.bin -------------------------------------------------------------------------------- /bzip2/testdata/gauntlet_test3.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/gauntlet_test3.bwt -------------------------------------------------------------------------------- /bzip2/testdata/silesia_ooffice.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_ooffice.bin -------------------------------------------------------------------------------- /bzip2/testdata/silesia_ooffice.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_ooffice.bwt -------------------------------------------------------------------------------- /bzip2/testdata/silesia_xray.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_xray.bin -------------------------------------------------------------------------------- /bzip2/testdata/silesia_xray.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_xray.bwt -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test3.bin -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test3.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test3.bwt -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test4.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test4.bin -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test4.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test4.bwt -------------------------------------------------------------------------------- /bzip2/writer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "io/ioutil" 11 | "testing" 12 | ) 13 | 14 | func BenchmarkEncode(b *testing.B) { 15 | runBenchmarks(b, func(b *testing.B, data []byte, lvl int) { 16 | b.StopTimer() 17 | b.ReportAllocs() 18 | 19 | br := new(bytes.Reader) 20 | wr, _ := NewWriter(nil, &WriterConfig{Level: lvl}) 21 | 22 | b.SetBytes(int64(len(data))) 23 | b.StartTimer() 24 | for i := 0; i < b.N; i++ { 25 | br.Reset(data) 26 | wr.Reset(ioutil.Discard) 27 | 28 | n, err := io.Copy(wr, br) 29 | if n != int64(len(data)) || err != nil { 30 | b.Fatalf("Copy() = (%d, %v), want (%d, nil)", n, err, len(data)) 31 | } 32 | if err := wr.Close(); err != nil { 33 | b.Fatalf("Close() = %v, want nil", err) 34 | } 35 | } 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /doc/bzip2-format.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2-format.pdf -------------------------------------------------------------------------------- /doc/bzip2/bitmap-decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bitmap-decode.png -------------------------------------------------------------------------------- /doc/bzip2/bitmap.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bitmap.psd -------------------------------------------------------------------------------- /doc/bzip2/bwt-decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bwt-decode.png -------------------------------------------------------------------------------- /doc/bzip2/bwt-encode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bwt-encode.png -------------------------------------------------------------------------------- /doc/bzip2/bwt.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bwt.psd -------------------------------------------------------------------------------- /doc/bzip2/bzip2-format.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bzip2-format.docx -------------------------------------------------------------------------------- /doc/bzip2/diagram.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/diagram.psd -------------------------------------------------------------------------------- /doc/bzip2/hexdump-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/hexdump-complex.png -------------------------------------------------------------------------------- /doc/bzip2/hexdump-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/hexdump-simple.png -------------------------------------------------------------------------------- /doc/bzip2/hexdump.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/hexdump.psd -------------------------------------------------------------------------------- /doc/bzip2/mtf-decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/mtf-decode.png -------------------------------------------------------------------------------- /doc/bzip2/mtf.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/mtf.psd -------------------------------------------------------------------------------- /doc/bzip2/stream-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/stream-complex.png -------------------------------------------------------------------------------- /doc/bzip2/stream-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/stream-simple.png -------------------------------------------------------------------------------- /doc/bzip2/tree-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/tree-example.png -------------------------------------------------------------------------------- /doc/xflate-format.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate-format.pdf -------------------------------------------------------------------------------- /doc/xflate/hexdump-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/hexdump-complex.png -------------------------------------------------------------------------------- /doc/xflate/hexdump-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/hexdump-simple.png -------------------------------------------------------------------------------- /doc/xflate/hexdump.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/hexdump.psd -------------------------------------------------------------------------------- /doc/xflate/stream-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/stream-complex.png -------------------------------------------------------------------------------- /doc/xflate/stream-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/stream-simple.png -------------------------------------------------------------------------------- /doc/xflate/tree-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/tree-example.png -------------------------------------------------------------------------------- /doc/xflate/tree-hclen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/tree-hclen.png -------------------------------------------------------------------------------- /doc/xflate/xflate-format.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/xflate-format.docx -------------------------------------------------------------------------------- /flate/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package flate implements the DEFLATE compressed data format, 6 | // described in RFC 1951. 7 | package flate 8 | 9 | import ( 10 | "fmt" 11 | 12 | "github.com/dsnet/compress/internal/errors" 13 | ) 14 | 15 | const ( 16 | maxHistSize = 1 << 15 17 | endBlockSym = 256 18 | ) 19 | 20 | func errorf(c int, f string, a ...interface{}) error { 21 | return errors.Error{Code: c, Pkg: "flate", Msg: fmt.Sprintf(f, a...)} 22 | } 23 | 24 | func panicf(c int, f string, a ...interface{}) { 25 | errors.Panic(errorf(c, f, a...)) 26 | } 27 | 28 | // errWrap converts a lower-level errors.Error to be one from this package. 29 | // The replaceCode passed in will be used to replace the code for any errors 30 | // with the errors.Invalid code. 31 | // 32 | // For the Reader, set this to errors.Corrupted. 33 | // For the Writer, set this to errors.Internal. 34 | func errWrap(err error, replaceCode int) error { 35 | if cerr, ok := err.(errors.Error); ok { 36 | if errors.IsInvalid(cerr) { 37 | cerr.Code = replaceCode 38 | } 39 | err = errorf(cerr.Code, "%s", cerr.Msg) 40 | } 41 | return err 42 | } 43 | 44 | var errClosed = errorf(errors.Closed, "") 45 | -------------------------------------------------------------------------------- /flate/dict_decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package flate 6 | 7 | // The dictDecoder implements the LZ77 sliding dictionary that is commonly used 8 | // in various compression formats. For performance reasons, this implementation 9 | // performs little to no sanity checks about the arguments. As such, the 10 | // invariants documented for each method call must be respected. Furthermore, 11 | // to reduce the memory footprint decompressing short streams, the dictionary 12 | // starts with a relatively small size and then lazily grows. 13 | 14 | const ( 15 | initSize = 4096 // Initial size allocated for sliding dictionary 16 | growFactor = 4 // Rate the dictionary is grown to match expected size 17 | ) 18 | 19 | type dictDecoder struct { 20 | // Invariant: len(hist) <= size 21 | size int // Sliding window size 22 | hist []byte // Sliding window history, dynamically grown to match size 23 | 24 | // Invariant: 0 <= rdPos <= wrPos <= len(hist) 25 | wrPos int // Current output position in buffer 26 | rdPos int // Have emitted hist[:rdPos] already 27 | full bool // Has a full window length been written yet? 28 | } 29 | 30 | func (dd *dictDecoder) Init(size int) { 31 | *dd = dictDecoder{hist: dd.hist} 32 | 33 | // Regardless of what size claims, start with a small dictionary to avoid 34 | // denial-of-service attacks with large memory allocation. 35 | dd.size = size 36 | if dd.hist == nil { 37 | dd.hist = make([]byte, initSize) 38 | } 39 | dd.hist = dd.hist[:cap(dd.hist)] 40 | if len(dd.hist) > dd.size { 41 | dd.hist = dd.hist[:dd.size] 42 | } 43 | } 44 | 45 | // HistSize reports the total amount of historical data in the dictionary. 46 | func (dd *dictDecoder) HistSize() int { 47 | if dd.full { 48 | return dd.size 49 | } 50 | return dd.wrPos 51 | } 52 | 53 | // AvailSize reports the available amount of output buffer space. 54 | func (dd *dictDecoder) AvailSize() int { 55 | return len(dd.hist) - dd.wrPos 56 | } 57 | 58 | // WriteSlice returns a slice of the available buffer to write data to. 59 | // 60 | // This invariant will be kept: len(s) <= AvailSize() 61 | func (dd *dictDecoder) WriteSlice() []byte { 62 | return dd.hist[dd.wrPos:] 63 | } 64 | 65 | // WriteMark advances the write pointer by cnt. 66 | // 67 | // This invariant must be kept: 0 <= cnt <= AvailSize() 68 | func (dd *dictDecoder) WriteMark(cnt int) { 69 | dd.wrPos += cnt 70 | } 71 | 72 | // WriteByte writes a single byte to the dictionary. 73 | // 74 | // This invariant must be kept: 0 < AvailSize() 75 | func (dd *dictDecoder) WriteByte(c byte) { 76 | dd.hist[dd.wrPos] = c 77 | dd.wrPos++ 78 | } 79 | 80 | // TryWriteCopy tries to copy a string at a given (distance, length) to the 81 | // output. This specialized version is optimized for short distances. 82 | // 83 | // This method is designed to be inlined for performance reasons. 84 | // 85 | // This invariant must be kept: 0 < dist <= HistSize() 86 | func (dd *dictDecoder) TryWriteCopy(dist, length int) int { 87 | wrPos := dd.wrPos 88 | wrEnd := wrPos + length 89 | if wrPos < dist || wrEnd > len(dd.hist) { 90 | return 0 91 | } 92 | 93 | // Copy overlapping section before destination. 94 | wrBase := wrPos 95 | rdPos := wrPos - dist 96 | loop: 97 | wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos]) 98 | if wrPos < wrEnd { 99 | goto loop // Avoid for-loop so that this function can be inlined 100 | } 101 | dd.wrPos = wrPos 102 | return wrPos - wrBase 103 | } 104 | 105 | // WriteCopy copies a string at a given (distance, length) to the output. 106 | // This returns the number of bytes copied and may be less than the requested 107 | // length if the available space in the output buffer is too small. 108 | // 109 | // This invariant must be kept: 0 < dist <= HistSize() 110 | func (dd *dictDecoder) WriteCopy(dist, length int) int { 111 | wrBase := dd.wrPos 112 | wrPos := wrBase 113 | rdPos := wrPos - dist 114 | wrEnd := wrPos + length 115 | if wrEnd > len(dd.hist) { 116 | wrEnd = len(dd.hist) 117 | } 118 | 119 | // Copy non-overlapping section after destination. 120 | if rdPos < 0 { 121 | rdPos += len(dd.hist) 122 | wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:]) 123 | rdPos = 0 124 | } 125 | 126 | // Copy overlapping section before destination. 127 | for wrPos < wrEnd { 128 | wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos]) 129 | } 130 | dd.wrPos = wrPos 131 | return wrPos - wrBase 132 | } 133 | 134 | // ReadFlush returns a slice of the historical buffer that is ready to be 135 | // emitted to the user. A call to ReadFlush is only valid after all of the data 136 | // from a previous call to ReadFlush has been consumed. 137 | func (dd *dictDecoder) ReadFlush() []byte { 138 | toRead := dd.hist[dd.rdPos:dd.wrPos] 139 | dd.rdPos = dd.wrPos 140 | if dd.wrPos == len(dd.hist) { 141 | if len(dd.hist) == dd.size { 142 | dd.wrPos, dd.rdPos = 0, 0 143 | dd.full = true 144 | } else { 145 | // Allocate a larger history buffer. 146 | size := cap(dd.hist) * growFactor 147 | if size > dd.size { 148 | size = dd.size 149 | } 150 | hist := make([]byte, size) 151 | copy(hist, dd.hist) 152 | dd.hist = hist 153 | } 154 | } 155 | return toRead 156 | } 157 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dsnet/compress 2 | 3 | go 1.9 4 | 5 | require ( 6 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780 7 | github.com/google/go-cmp v0.5.5 // indirect 8 | github.com/klauspost/compress v1.4.1 9 | github.com/klauspost/cpuid v1.2.0 // indirect 10 | github.com/ulikunitz/xz v0.5.8 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780 h1:tFh1tRc4CA31yP6qDcu+Trax5wW5GuMxvkIba07qVLY= 2 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= 3 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 4 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 5 | github.com/klauspost/compress v1.4.1 h1:8VMb5+0wMgdBykOV96DwNwKFQ+WTI4pzYURP99CcB9E= 6 | github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= 7 | github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE= 8 | github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= 9 | github.com/ulikunitz/xz v0.5.8 h1:ERv8V6GKqVi23rgu5cj9pVfVzJbOqAY2Ntl88O6c2nQ= 10 | github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= 11 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 12 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 13 | -------------------------------------------------------------------------------- /internal/cgo/README.md: -------------------------------------------------------------------------------- 1 | **Note:** The cgo directory contains a collection of cgo wrappers over the 2 | canonical C implementation for each compression format. These cgo wrappers are 3 | only used by the fuzzer and bench tools to test for correctness and performance 4 | of the Go implementations relative to the C implementations. 5 | There are no unit tests for each wrapper since they are thoroughly tested by 6 | the aforementioned tools. 7 | -------------------------------------------------------------------------------- /internal/cgo/flate/flate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo 6 | // +build cgo 7 | 8 | // Package flate implements the DEFLATE compressed data format, 9 | // described in RFC 1951, using C wrappers. 10 | package flate 11 | 12 | /* 13 | #cgo LDFLAGS: -lz 14 | 15 | #include 16 | #include "zlib.h" 17 | 18 | z_streamp zfDecCreate() { 19 | z_streamp state = calloc(1, sizeof(z_stream)); 20 | inflateInit2(state, -MAX_WBITS); 21 | return state; 22 | } 23 | 24 | int zfDecStream( 25 | z_streamp state, 26 | uInt* avail_in, Bytef* next_in, 27 | uInt* avail_out, Bytef* next_out 28 | ) { 29 | state->avail_in = *avail_in; 30 | state->avail_out = *avail_out; 31 | state->next_in = next_in; 32 | state->next_out = next_out; 33 | int ret = inflate(state, Z_NO_FLUSH); 34 | *avail_in = state->avail_in; 35 | *avail_out = state->avail_out; 36 | state->next_in = NULL; 37 | state->next_out = NULL; 38 | return ret; 39 | } 40 | 41 | void zfDecDestroy(z_streamp state) { 42 | inflateEnd(state); 43 | free(state); 44 | } 45 | 46 | z_streamp zfEncCreate(int level) { 47 | z_streamp state = calloc(1, sizeof(z_stream)); 48 | deflateInit2(state, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); 49 | return state; 50 | } 51 | 52 | int zfEncStream( 53 | z_streamp state, int flush, 54 | uInt* avail_in, Bytef* next_in, 55 | uInt* avail_out, Bytef* next_out 56 | ) { 57 | state->avail_in = *avail_in; 58 | state->avail_out = *avail_out; 59 | state->next_in = next_in; 60 | state->next_out = next_out; 61 | int ret = deflate(state, flush); 62 | *avail_in = state->avail_in; 63 | *avail_out = state->avail_out; 64 | state->next_in = NULL; 65 | state->next_out = NULL; 66 | return ret; 67 | } 68 | 69 | void zfEncDestroy(z_streamp state) { 70 | deflateEnd(state); 71 | free(state); 72 | } 73 | */ 74 | import "C" 75 | 76 | import ( 77 | "errors" 78 | "io" 79 | "unsafe" 80 | ) 81 | 82 | type reader struct { 83 | r io.Reader 84 | err error 85 | state C.z_streamp 86 | buf []byte 87 | arr [1 << 14]byte 88 | } 89 | 90 | func NewReader(r io.Reader) io.ReadCloser { 91 | zr := &reader{r: r, state: C.zfDecCreate()} 92 | if zr.state == nil { 93 | panic("flate: could not allocate decoder state") 94 | } 95 | return zr 96 | } 97 | 98 | func (zr *reader) Read(buf []byte) (int, error) { 99 | if zr.state == nil { 100 | return 0, io.ErrClosedPipe 101 | } 102 | 103 | var n int 104 | for zr.err == nil && (len(buf) > 0 && n == 0) { 105 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 106 | ret := C.zfDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut) 107 | n += len(buf) - int(availOut) 108 | buf = buf[len(buf)-int(availOut):] 109 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 110 | 111 | switch ret { 112 | case C.Z_OK: 113 | return n, nil 114 | case C.Z_BUF_ERROR: 115 | if len(zr.buf) == 0 { 116 | n1, err := zr.r.Read(zr.arr[:]) 117 | if n1 > 0 { 118 | zr.buf = zr.arr[:n1] 119 | } else if err != nil { 120 | if err == io.EOF { 121 | err = io.ErrUnexpectedEOF 122 | } 123 | zr.err = err 124 | } 125 | } 126 | case C.Z_STREAM_END: 127 | return n, io.EOF 128 | default: 129 | zr.err = errors.New("flate: corrupted input") 130 | } 131 | } 132 | return n, zr.err 133 | } 134 | 135 | func (zr *reader) Close() error { 136 | if zr.state != nil { 137 | defer func() { 138 | C.zfDecDestroy(zr.state) 139 | zr.state = nil 140 | }() 141 | } 142 | return zr.err 143 | } 144 | 145 | type writer struct { 146 | w io.Writer 147 | err error 148 | state C.z_streamp 149 | buf []byte 150 | arr [1 << 14]byte 151 | } 152 | 153 | func NewWriter(w io.Writer, level int) io.WriteCloser { 154 | if level < C.Z_NO_COMPRESSION || level > C.Z_BEST_COMPRESSION { 155 | panic("flate: invalid compression level") 156 | } 157 | 158 | zw := &writer{w: w, state: C.zfEncCreate(C.int(level))} 159 | if zw.state == nil { 160 | panic("flate: could not allocate encoder state") 161 | } 162 | return zw 163 | } 164 | 165 | func (zw *writer) Write(buf []byte) (int, error) { 166 | return zw.write(buf, C.Z_NO_FLUSH) 167 | } 168 | 169 | func (zw *writer) write(buf []byte, op C.int) (int, error) { 170 | if zw.state == nil { 171 | return 0, io.ErrClosedPipe 172 | } 173 | 174 | var n int 175 | flush := op != C.Z_NO_FLUSH 176 | for zw.err == nil && (len(buf) > 0 || flush) { 177 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 178 | ret := C.zfEncStream(zw.state, op, &availIn, ptrIn, &availOut, ptrOut) 179 | n += len(buf) - int(availIn) 180 | buf = buf[len(buf)-int(availIn):] 181 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 182 | 183 | if len(zw.buf) > 0 { 184 | if _, err := zw.w.Write(zw.buf); err != nil { 185 | zw.err = err 186 | } 187 | } 188 | switch ret { 189 | case C.Z_OK, C.Z_BUF_ERROR: 190 | continue // Do nothing 191 | case C.Z_STREAM_END: 192 | return n, zw.err 193 | default: 194 | zw.err = errors.New("flate: compression error") 195 | } 196 | } 197 | return n, zw.err 198 | } 199 | 200 | func (zw *writer) Close() error { 201 | if zw.state != nil { 202 | defer func() { 203 | C.zfEncDestroy(zw.state) 204 | zw.state = nil 205 | }() 206 | zw.write(nil, C.Z_FINISH) 207 | } 208 | return zw.err 209 | } 210 | 211 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.uInt, ptrIn, ptrOut *C.Bytef) { 212 | sizeIn = C.uInt(len(in)) 213 | sizeOut = C.uInt(len(out)) 214 | if len(in) > 0 { 215 | ptrIn = (*C.Bytef)(unsafe.Pointer(&in[0])) 216 | } 217 | if len(out) > 0 { 218 | ptrOut = (*C.Bytef)(unsafe.Pointer(&out[0])) 219 | } 220 | return 221 | } 222 | -------------------------------------------------------------------------------- /internal/cgo/lzma/lzma.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo 6 | // +build cgo 7 | 8 | // Package lzma implements the LZMA2 compressed data format using C wrappers. 9 | package lzma 10 | 11 | /* 12 | #cgo LDFLAGS: -llzma 13 | 14 | #include 15 | #include 16 | #include "lzma.h" 17 | 18 | // zlState is a tuple of C allocated data structures. 19 | // 20 | // The liblzma documentation is not clear about whether the filters struct must 21 | // stay live past calls to lzma_raw_encoder and lzma_raw_decoder. 22 | // To be on the safe side, we allocate them and keep them around until the end. 23 | typedef struct { 24 | lzma_stream stream; 25 | lzma_filter filters[2]; 26 | lzma_options_lzma options; 27 | } zlState; 28 | 29 | zlState* zlDecCreate() { 30 | zlState* state = calloc(1, sizeof(zlState)); 31 | state->filters[0].id = LZMA_FILTER_LZMA2; 32 | state->filters[0].options = &state->options; 33 | state->filters[1].id = LZMA_VLI_UNKNOWN; 34 | state->options.dict_size = LZMA_DICT_SIZE_DEFAULT; 35 | 36 | assert(lzma_raw_decoder(&state->stream, state->filters) == LZMA_OK); 37 | return state; 38 | } 39 | 40 | zlState* zlEncCreate(int level) { 41 | zlState* state = calloc(1, sizeof(zlState)); 42 | state->filters[0].id = LZMA_FILTER_LZMA2; 43 | state->filters[0].options = &state->options; 44 | state->filters[1].id = LZMA_VLI_UNKNOWN; 45 | 46 | assert(!lzma_lzma_preset(&state->options, level)); 47 | assert(lzma_raw_encoder(&state->stream, state->filters) == LZMA_OK); 48 | return state; 49 | } 50 | 51 | lzma_ret zlStream( 52 | lzma_stream* strm, lzma_action action, 53 | size_t* avail_in, uint8_t* next_in, 54 | size_t* avail_out, uint8_t* next_out 55 | ) { 56 | strm->avail_in = *avail_in; 57 | strm->avail_out = *avail_out; 58 | strm->next_in = next_in; 59 | strm->next_out = next_out; 60 | lzma_ret ret = lzma_code(strm, action); 61 | *avail_in = strm->avail_in; 62 | *avail_out = strm->avail_out; 63 | strm->next_in = NULL; 64 | strm->next_out = NULL; 65 | return ret; 66 | } 67 | 68 | void zlDestroy(zlState* state) { 69 | lzma_end(&state->stream); 70 | free(state); 71 | } 72 | */ 73 | import "C" 74 | 75 | import ( 76 | "errors" 77 | "io" 78 | "unsafe" 79 | ) 80 | 81 | type reader struct { 82 | r io.Reader 83 | err error 84 | state *C.zlState 85 | buf []byte 86 | arr [1 << 14]byte 87 | } 88 | 89 | func NewReader(r io.Reader) io.ReadCloser { 90 | zr := &reader{r: r, state: C.zlDecCreate()} 91 | if zr.state == nil { 92 | panic("lzma: could not allocate decoder state") 93 | } 94 | return zr 95 | } 96 | 97 | func (zr *reader) Read(buf []byte) (int, error) { 98 | if zr.state == nil { 99 | return 0, io.ErrClosedPipe 100 | } 101 | 102 | var n int 103 | for zr.err == nil && (len(buf) > 0 && n == 0) { 104 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 105 | ret := C.zlStream(&zr.state.stream, 0, &availIn, ptrIn, &availOut, ptrOut) 106 | n += len(buf) - int(availOut) 107 | buf = buf[len(buf)-int(availOut):] 108 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 109 | 110 | switch ret { 111 | case C.LZMA_OK: 112 | return n, nil 113 | case C.LZMA_BUF_ERROR: 114 | if len(zr.buf) == 0 { 115 | n1, err := zr.r.Read(zr.arr[:]) 116 | if n1 > 0 { 117 | zr.buf = zr.arr[:n1] 118 | } else if err != nil { 119 | if err == io.EOF { 120 | err = io.ErrUnexpectedEOF 121 | } 122 | zr.err = err 123 | } 124 | } 125 | case C.LZMA_STREAM_END: 126 | return n, io.EOF 127 | default: 128 | zr.err = errors.New("lzma: corrupted input") 129 | } 130 | } 131 | return n, zr.err 132 | } 133 | 134 | func (zr *reader) Close() error { 135 | if zr.state != nil { 136 | defer func() { 137 | C.zlDestroy(zr.state) 138 | zr.state = nil 139 | }() 140 | } 141 | return zr.err 142 | } 143 | 144 | type writer struct { 145 | w io.Writer 146 | err error 147 | state *C.zlState 148 | buf []byte 149 | arr [1 << 14]byte 150 | } 151 | 152 | func NewWriter(w io.Writer, level int) io.WriteCloser { 153 | if level < 0 || level > 9 { 154 | panic("lzma: invalid compression level") 155 | } 156 | 157 | zw := &writer{w: w, state: C.zlEncCreate(C.int(level))} 158 | if zw.state == nil { 159 | panic("lzma: could not allocate encoder state") 160 | } 161 | return zw 162 | } 163 | 164 | func (zw *writer) Write(buf []byte) (int, error) { 165 | return zw.write(buf, C.LZMA_RUN) 166 | } 167 | 168 | func (zw *writer) write(buf []byte, op C.lzma_action) (int, error) { 169 | if zw.state == nil { 170 | return 0, io.ErrClosedPipe 171 | } 172 | 173 | var n int 174 | flush := op != C.LZMA_RUN 175 | for zw.err == nil && (len(buf) > 0 || flush) { 176 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 177 | ret := C.zlStream(&zw.state.stream, op, &availIn, ptrIn, &availOut, ptrOut) 178 | n += len(buf) - int(availIn) 179 | buf = buf[len(buf)-int(availIn):] 180 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 181 | 182 | if len(zw.buf) > 0 { 183 | if _, err := zw.w.Write(zw.buf); err != nil { 184 | zw.err = err 185 | } 186 | } 187 | switch ret { 188 | case C.LZMA_OK, C.LZMA_BUF_ERROR: 189 | continue // Do nothing 190 | case C.LZMA_STREAM_END: 191 | return n, zw.err 192 | default: 193 | zw.err = errors.New("lzma: compression error") 194 | } 195 | } 196 | return n, zw.err 197 | } 198 | 199 | func (zw *writer) Close() error { 200 | if zw.state != nil { 201 | defer func() { 202 | C.zlDestroy(zw.state) 203 | zw.state = nil 204 | }() 205 | zw.write(nil, C.LZMA_FINISH) 206 | } 207 | return zw.err 208 | } 209 | 210 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.size_t, ptrIn, ptrOut *C.uint8_t) { 211 | sizeIn = C.size_t(len(in)) 212 | sizeOut = C.size_t(len(out)) 213 | if len(in) > 0 { 214 | ptrIn = (*C.uint8_t)(unsafe.Pointer(&in[0])) 215 | } 216 | if len(out) > 0 { 217 | ptrOut = (*C.uint8_t)(unsafe.Pointer(&out[0])) 218 | } 219 | return 220 | } 221 | -------------------------------------------------------------------------------- /internal/cgo/zstd/zstd.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo 6 | // +build cgo 7 | 8 | // Package zstd implements the Zstandard compressed data format using C wrappers. 9 | package zstd 10 | 11 | /* 12 | // This relies upon the shared library built from github.com/facebook/zstd. 13 | // 14 | // The steps to build and install the shared library is as follows: 15 | // curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv 16 | // cd zstd-1.3.2 17 | // sudo make install 18 | 19 | #cgo LDFLAGS: -lzstd 20 | 21 | #include 22 | #include 23 | #include "zstd.h" 24 | 25 | ZSTD_DStream* zsDecCreate() { 26 | ZSTD_DStream* state = ZSTD_createDStream(); 27 | ZSTD_initDStream(state); 28 | return state; 29 | } 30 | 31 | size_t zsDecStream( 32 | ZSTD_DStream* state, 33 | size_t* avail_in, uint8_t* next_in, 34 | size_t* avail_out, uint8_t* next_out 35 | ) { 36 | ZSTD_inBuffer in = {next_in, *avail_in, 0}; 37 | ZSTD_outBuffer out = {next_out, *avail_out, 0}; 38 | size_t ret = ZSTD_decompressStream(state, &out, &in); 39 | *avail_in = in.size - in.pos; 40 | *avail_out = out.size - out.pos; 41 | in.src = NULL; 42 | out.dst = NULL; 43 | return ret; 44 | } 45 | 46 | void zsDecDestroy(ZSTD_DStream* state) { 47 | ZSTD_freeDStream(state); 48 | } 49 | 50 | ZSTD_CStream* zsEncCreate(int level) { 51 | ZSTD_CStream* state = ZSTD_createCStream(); 52 | ZSTD_initCStream(state, level); 53 | return state; 54 | } 55 | 56 | size_t zsEncStream( 57 | ZSTD_CStream* state, int finish, 58 | size_t* avail_in, uint8_t* next_in, 59 | size_t* avail_out, uint8_t* next_out 60 | ) { 61 | ZSTD_inBuffer in = {next_in, *avail_in, 0}; 62 | ZSTD_outBuffer out = {next_out, *avail_out, 0}; 63 | size_t ret = finish ? 64 | ZSTD_endStream(state, &out) : ZSTD_compressStream(state, &out, &in); 65 | *avail_in = in.size - in.pos; 66 | *avail_out = out.size - out.pos; 67 | in.src = NULL; 68 | out.dst = NULL; 69 | return ret; 70 | } 71 | 72 | void zsEncDestroy(ZSTD_CStream* state) { 73 | ZSTD_freeCStream(state); 74 | } 75 | */ 76 | import "C" 77 | 78 | import ( 79 | "errors" 80 | "io" 81 | "unsafe" 82 | ) 83 | 84 | type reader struct { 85 | r io.Reader 86 | err error 87 | state *C.ZSTD_DStream 88 | buf []byte 89 | arr [1 << 14]byte 90 | } 91 | 92 | func NewReader(r io.Reader) io.ReadCloser { 93 | zr := &reader{r: r, state: C.zsDecCreate()} 94 | if zr.state == nil { 95 | panic("zstd: could not allocate decoder state") 96 | } 97 | return zr 98 | } 99 | 100 | func (zr *reader) Read(buf []byte) (int, error) { 101 | if zr.state == nil { 102 | return 0, io.ErrClosedPipe 103 | } 104 | 105 | var n int 106 | for zr.err == nil && (len(buf) > 0 && n == 0) { 107 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 108 | ret := C.zsDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut) 109 | n += len(buf) - int(availOut) 110 | buf = buf[len(buf)-int(availOut):] 111 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 112 | 113 | switch { 114 | case C.ZSTD_isError(ret) > 0: 115 | zr.err = errors.New("zstd: corrupted input") 116 | case ret == 0: 117 | return n, io.EOF 118 | case n > 0: 119 | return n, nil 120 | case len(zr.buf) == 0 && n == 0: 121 | n1, err := zr.r.Read(zr.arr[:]) 122 | if n1 > 0 { 123 | zr.buf = zr.arr[:n1] 124 | } else if err != nil { 125 | if err == io.EOF { 126 | err = io.ErrUnexpectedEOF 127 | } 128 | zr.err = err 129 | } 130 | } 131 | } 132 | return n, zr.err 133 | } 134 | 135 | func (zr *reader) Close() error { 136 | if zr.state != nil { 137 | defer func() { 138 | C.zsDecDestroy(zr.state) 139 | zr.state = nil 140 | }() 141 | } 142 | return zr.err 143 | } 144 | 145 | type writer struct { 146 | w io.Writer 147 | err error 148 | state *C.ZSTD_CStream 149 | buf []byte 150 | arr [1 << 14]byte 151 | } 152 | 153 | func NewWriter(w io.Writer, level int) io.WriteCloser { 154 | if level < 1 || level > 22 { 155 | panic("zstd: invalid compression level") 156 | } 157 | 158 | zw := &writer{w: w, state: C.zsEncCreate(C.int(level))} 159 | if zw.state == nil { 160 | panic("zstd: could not allocate encoder state") 161 | } 162 | return zw 163 | } 164 | 165 | func (zw *writer) Write(buf []byte) (int, error) { 166 | return zw.write(buf, 0) 167 | } 168 | 169 | func (zw *writer) write(buf []byte, finish C.int) (int, error) { 170 | if zw.state == nil { 171 | return 0, io.ErrClosedPipe 172 | } 173 | 174 | var n int 175 | for zw.err == nil && (len(buf) > 0 || finish > 0) { 176 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 177 | ret := C.zsEncStream(zw.state, finish, &availIn, ptrIn, &availOut, ptrOut) 178 | n += len(buf) - int(availIn) 179 | buf = buf[len(buf)-int(availIn):] 180 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 181 | 182 | if len(zw.buf) > 0 { 183 | if _, err := zw.w.Write(zw.buf); err != nil { 184 | zw.err = err 185 | } 186 | } 187 | switch { 188 | case C.ZSTD_isError(ret) > 0: 189 | zw.err = errors.New("zstd: compression error") 190 | case len(buf) == 0 && len(zw.buf) == 0: 191 | return n, zw.err 192 | case ret == 0 && finish > 0: 193 | return n, zw.err 194 | } 195 | } 196 | return n, zw.err 197 | } 198 | 199 | func (zw *writer) Close() error { 200 | if zw.state != nil { 201 | defer func() { 202 | C.zsEncDestroy(zw.state) 203 | zw.state = nil 204 | }() 205 | zw.write(nil, 1) 206 | } 207 | return zw.err 208 | } 209 | 210 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.size_t, ptrIn, ptrOut *C.uint8_t) { 211 | sizeIn = C.size_t(len(in)) 212 | sizeOut = C.size_t(len(out)) 213 | if len(in) > 0 { 214 | ptrIn = (*C.uint8_t)(unsafe.Pointer(&in[0])) 215 | } 216 | if len(out) > 0 { 217 | ptrOut = (*C.uint8_t)(unsafe.Pointer(&out[0])) 218 | } 219 | return 220 | } 221 | -------------------------------------------------------------------------------- /internal/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package internal is a collection of common compression algorithms. 6 | // 7 | // For performance reasons, these packages lack strong error checking and 8 | // require that the caller to ensure that strict invariants are kept. 9 | package internal 10 | 11 | var ( 12 | // IdentityLUT returns the input key itself. 13 | IdentityLUT = func() (lut [256]byte) { 14 | for i := range lut { 15 | lut[i] = uint8(i) 16 | } 17 | return lut 18 | }() 19 | 20 | // ReverseLUT returns the input key with its bits reversed. 21 | ReverseLUT = func() (lut [256]byte) { 22 | for i := range lut { 23 | b := uint8(i) 24 | b = (b&0xaa)>>1 | (b&0x55)<<1 25 | b = (b&0xcc)>>2 | (b&0x33)<<2 26 | b = (b&0xf0)>>4 | (b&0x0f)<<4 27 | lut[i] = b 28 | } 29 | return lut 30 | }() 31 | ) 32 | 33 | // ReverseUint32 reverses all bits of v. 34 | func ReverseUint32(v uint32) (x uint32) { 35 | x |= uint32(ReverseLUT[byte(v>>0)]) << 24 36 | x |= uint32(ReverseLUT[byte(v>>8)]) << 16 37 | x |= uint32(ReverseLUT[byte(v>>16)]) << 8 38 | x |= uint32(ReverseLUT[byte(v>>24)]) << 0 39 | return x 40 | } 41 | 42 | // ReverseUint32N reverses the lower n bits of v. 43 | func ReverseUint32N(v uint32, n uint) (x uint32) { 44 | return ReverseUint32(v << (32 - n)) 45 | } 46 | 47 | // ReverseUint64 reverses all bits of v. 48 | func ReverseUint64(v uint64) (x uint64) { 49 | x |= uint64(ReverseLUT[byte(v>>0)]) << 56 50 | x |= uint64(ReverseLUT[byte(v>>8)]) << 48 51 | x |= uint64(ReverseLUT[byte(v>>16)]) << 40 52 | x |= uint64(ReverseLUT[byte(v>>24)]) << 32 53 | x |= uint64(ReverseLUT[byte(v>>32)]) << 24 54 | x |= uint64(ReverseLUT[byte(v>>40)]) << 16 55 | x |= uint64(ReverseLUT[byte(v>>48)]) << 8 56 | x |= uint64(ReverseLUT[byte(v>>56)]) << 0 57 | return x 58 | } 59 | 60 | // ReverseUint64N reverses the lower n bits of v. 61 | func ReverseUint64N(v uint64, n uint) (x uint64) { 62 | return ReverseUint64(v << (64 - n)) 63 | } 64 | 65 | // MoveToFront is a data structure that allows for more efficient move-to-front 66 | // transformations. This specific implementation assumes that the alphabet is 67 | // densely packed within 0..255. 68 | type MoveToFront struct { 69 | dict [256]uint8 // Mapping from indexes to values 70 | tail int // Number of tail bytes that are already ordered 71 | } 72 | 73 | func (m *MoveToFront) Encode(vals []uint8) { 74 | copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity 75 | 76 | var max int 77 | for i, val := range vals { 78 | var idx uint8 // Reverse lookup idx in dict 79 | for di, dv := range m.dict { 80 | if dv == val { 81 | idx = uint8(di) 82 | break 83 | } 84 | } 85 | vals[i] = idx 86 | 87 | max |= int(idx) 88 | copy(m.dict[1:], m.dict[:idx]) 89 | m.dict[0] = val 90 | } 91 | m.tail = 256 - max - 1 92 | } 93 | 94 | func (m *MoveToFront) Decode(idxs []uint8) { 95 | copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity 96 | 97 | var max int 98 | for i, idx := range idxs { 99 | val := m.dict[idx] // Forward lookup val in dict 100 | idxs[i] = val 101 | 102 | max |= int(idx) 103 | copy(m.dict[1:], m.dict[:idx]) 104 | m.dict[0] = val 105 | } 106 | m.tail = 256 - max - 1 107 | } 108 | -------------------------------------------------------------------------------- /internal/debug.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build debug && !gofuzz 6 | // +build debug,!gofuzz 7 | 8 | package internal 9 | 10 | const ( 11 | Debug = true 12 | GoFuzz = false 13 | ) 14 | -------------------------------------------------------------------------------- /internal/errors/errors.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package errors implements functions to manipulate compression errors. 6 | // 7 | // In idiomatic Go, it is an anti-pattern to use panics as a form of error 8 | // reporting in the API. Instead, the expected way to transmit errors is by 9 | // returning an error value. Unfortunately, the checking of "err != nil" in 10 | // tight loops commonly found in compression causes non-negligible performance 11 | // degradation. While this may not be idiomatic, the internal packages of this 12 | // repository rely on panics as a normal means to convey errors. In order to 13 | // ensure that these panics do not leak across the public API, the public 14 | // packages must recover from these panics and present an error value. 15 | // 16 | // The Panic and Recover functions in this package provide a safe way to 17 | // recover from errors only generated from within this repository. 18 | // 19 | // Example usage: 20 | // 21 | // func Foo() (err error) { 22 | // defer errors.Recover(&err) 23 | // 24 | // if rand.Intn(2) == 0 { 25 | // // Unexpected panics will not be caught by Recover. 26 | // io.Closer(nil).Close() 27 | // } else { 28 | // // Errors thrown by Panic will be caught by Recover. 29 | // errors.Panic(errors.New("whoopsie")) 30 | // } 31 | // } 32 | package errors 33 | 34 | import "strings" 35 | 36 | const ( 37 | // Unknown indicates that there is no classification for this error. 38 | Unknown = iota 39 | 40 | // Internal indicates that this error is due to an internal bug. 41 | // Users should file a issue report if this type of error is encountered. 42 | Internal 43 | 44 | // Invalid indicates that this error is due to the user misusing the API 45 | // and is indicative of a bug on the user's part. 46 | Invalid 47 | 48 | // Deprecated indicates the use of a deprecated and unsupported feature. 49 | Deprecated 50 | 51 | // Corrupted indicates that the input stream is corrupted. 52 | Corrupted 53 | 54 | // Closed indicates that the handlers are closed. 55 | Closed 56 | ) 57 | 58 | var codeMap = map[int]string{ 59 | Unknown: "unknown error", 60 | Internal: "internal error", 61 | Invalid: "invalid argument", 62 | Deprecated: "deprecated format", 63 | Corrupted: "corrupted input", 64 | Closed: "closed handler", 65 | } 66 | 67 | type Error struct { 68 | Code int // The error type 69 | Pkg string // Name of the package where the error originated 70 | Msg string // Descriptive message about the error (optional) 71 | } 72 | 73 | func (e Error) Error() string { 74 | var ss []string 75 | for _, s := range []string{e.Pkg, codeMap[e.Code], e.Msg} { 76 | if s != "" { 77 | ss = append(ss, s) 78 | } 79 | } 80 | return strings.Join(ss, ": ") 81 | } 82 | 83 | func (e Error) CompressError() {} 84 | func (e Error) IsInternal() bool { return e.Code == Internal } 85 | func (e Error) IsInvalid() bool { return e.Code == Invalid } 86 | func (e Error) IsDeprecated() bool { return e.Code == Deprecated } 87 | func (e Error) IsCorrupted() bool { return e.Code == Corrupted } 88 | func (e Error) IsClosed() bool { return e.Code == Closed } 89 | 90 | func IsInternal(err error) bool { return isCode(err, Internal) } 91 | func IsInvalid(err error) bool { return isCode(err, Invalid) } 92 | func IsDeprecated(err error) bool { return isCode(err, Deprecated) } 93 | func IsCorrupted(err error) bool { return isCode(err, Corrupted) } 94 | func IsClosed(err error) bool { return isCode(err, Closed) } 95 | 96 | func isCode(err error, code int) bool { 97 | if cerr, ok := err.(Error); ok && cerr.Code == code { 98 | return true 99 | } 100 | return false 101 | } 102 | 103 | // errWrap is used by Panic and Recover to ensure that only errors raised by 104 | // Panic are recovered by Recover. 105 | type errWrap struct{ e *error } 106 | 107 | func Recover(err *error) { 108 | switch ex := recover().(type) { 109 | case nil: 110 | // Do nothing. 111 | case errWrap: 112 | *err = *ex.e 113 | default: 114 | panic(ex) 115 | } 116 | } 117 | 118 | func Panic(err error) { 119 | panic(errWrap{&err}) 120 | } 121 | -------------------------------------------------------------------------------- /internal/gofuzz.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build gofuzz 6 | // +build gofuzz 7 | 8 | package internal 9 | 10 | const ( 11 | Debug = true 12 | GoFuzz = true 13 | ) 14 | -------------------------------------------------------------------------------- /internal/prefix/debug.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build debug 6 | // +build debug 7 | 8 | package prefix 9 | 10 | import ( 11 | "fmt" 12 | "math" 13 | "strings" 14 | ) 15 | 16 | func max(a, b int) int { 17 | if a > b { 18 | return a 19 | } 20 | return b 21 | } 22 | 23 | func lenBase2(n uint) int { 24 | return int(math.Ceil(math.Log2(float64(n + 1)))) 25 | } 26 | func padBase2(v, n uint, m int) string { 27 | s := fmt.Sprintf("%b", 1< 0 { 29 | return strings.Repeat(" ", pad) + s 30 | } 31 | return s 32 | } 33 | 34 | func lenBase10(n int) int { 35 | return int(math.Ceil(math.Log10(float64(n + 1)))) 36 | } 37 | func padBase10(n, m int) string { 38 | s := fmt.Sprintf("%d", n) 39 | if pad := m - len(s); pad > 0 { 40 | return strings.Repeat(" ", pad) + s 41 | } 42 | return s 43 | } 44 | 45 | func (rc RangeCodes) String() string { 46 | var maxLen, maxBase int 47 | for _, c := range rc { 48 | maxLen = max(maxLen, int(c.Len)) 49 | maxBase = max(maxBase, int(c.Base)) 50 | } 51 | 52 | var ss []string 53 | ss = append(ss, "{") 54 | for i, c := range rc { 55 | base := padBase10(int(c.Base), lenBase10(maxBase)) 56 | if c.Len > 0 { 57 | base += fmt.Sprintf("-%d", c.End()-1) 58 | } 59 | ss = append(ss, fmt.Sprintf("\t%s: {len: %s, range: %s},", 60 | padBase10(int(i), lenBase10(len(rc)-1)), 61 | padBase10(int(c.Len), lenBase10(maxLen)), 62 | base, 63 | )) 64 | } 65 | ss = append(ss, "}") 66 | return strings.Join(ss, "\n") 67 | } 68 | 69 | func (pc PrefixCodes) String() string { 70 | var maxSym, maxLen, maxCnt int 71 | for _, c := range pc { 72 | maxSym = max(maxSym, int(c.Sym)) 73 | maxLen = max(maxLen, int(c.Len)) 74 | maxCnt = max(maxCnt, int(c.Cnt)) 75 | } 76 | 77 | var ss []string 78 | ss = append(ss, "{") 79 | for _, c := range pc { 80 | var cntStr string 81 | if maxCnt > 0 { 82 | cnt := int(32*float32(c.Cnt)/float32(maxCnt) + 0.5) 83 | cntStr = fmt.Sprintf("%s |%s", 84 | padBase10(int(c.Cnt), lenBase10(maxCnt)), 85 | strings.Repeat("#", cnt), 86 | ) 87 | } 88 | ss = append(ss, fmt.Sprintf("\t%s: %s, %s", 89 | padBase10(int(c.Sym), lenBase10(maxSym)), 90 | padBase2(uint(c.Val), uint(c.Len), maxLen), 91 | cntStr, 92 | )) 93 | } 94 | ss = append(ss, "}") 95 | return strings.Join(ss, "\n") 96 | } 97 | 98 | func (pd Decoder) String() string { 99 | var ss []string 100 | ss = append(ss, "{") 101 | if len(pd.chunks) > 0 { 102 | ss = append(ss, "\tchunks: {") 103 | for i, c := range pd.chunks { 104 | label := "sym" 105 | if uint(c&countMask) > uint(pd.chunkBits) { 106 | label = "idx" 107 | } 108 | ss = append(ss, fmt.Sprintf("\t\t%s: {%s: %s, len: %s}", 109 | padBase2(uint(i), uint(pd.chunkBits), int(pd.chunkBits)), 110 | label, padBase10(int(c>>countBits), 3), 111 | padBase10(int(c&countMask), 2), 112 | )) 113 | } 114 | ss = append(ss, "\t},") 115 | 116 | for j, links := range pd.links { 117 | ss = append(ss, fmt.Sprintf("\tlinks[%d]: {", j)) 118 | linkBits := lenBase2(uint(pd.linkMask)) 119 | for i, c := range links { 120 | ss = append(ss, fmt.Sprintf("\t\t%s: {sym: %s, len: %s},", 121 | padBase2(uint(i), uint(linkBits), int(linkBits)), 122 | padBase10(int(c>>countBits), 3), 123 | padBase10(int(c&countMask), 2), 124 | )) 125 | } 126 | ss = append(ss, "\t},") 127 | } 128 | } 129 | ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pd.chunkMask)) 130 | ss = append(ss, fmt.Sprintf("\tlinkMask: %b,", pd.linkMask)) 131 | ss = append(ss, fmt.Sprintf("\tchunkBits: %d,", pd.chunkBits)) 132 | ss = append(ss, fmt.Sprintf("\tMinBits: %d,", pd.MinBits)) 133 | ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pd.NumSyms)) 134 | ss = append(ss, "}") 135 | return strings.Join(ss, "\n") 136 | } 137 | 138 | func (pe Encoder) String() string { 139 | var maxLen int 140 | for _, c := range pe.chunks { 141 | maxLen = max(maxLen, int(c&countMask)) 142 | } 143 | 144 | var ss []string 145 | ss = append(ss, "{") 146 | if len(pe.chunks) > 0 { 147 | ss = append(ss, "\tchunks: {") 148 | for i, c := range pe.chunks { 149 | ss = append(ss, fmt.Sprintf("\t\t%s: %s,", 150 | padBase10(i, 3), 151 | padBase2(uint(c>>countBits), uint(c&countMask), maxLen), 152 | )) 153 | } 154 | ss = append(ss, "\t},") 155 | } 156 | ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pe.chunkMask)) 157 | ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pe.NumSyms)) 158 | ss = append(ss, "}") 159 | return strings.Join(ss, "\n") 160 | } 161 | -------------------------------------------------------------------------------- /internal/prefix/decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "sort" 9 | 10 | "github.com/dsnet/compress/internal" 11 | ) 12 | 13 | // The algorithm used to decode variable length codes is based on the lookup 14 | // method in zlib. If the code is less-than-or-equal to maxChunkBits, 15 | // then the symbol can be decoded using a single lookup into the chunks table. 16 | // Otherwise, the links table will be used for a second level lookup. 17 | // 18 | // The chunks slice is keyed by the contents of the bit buffer ANDed with 19 | // the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple 20 | // that is decoded as follow: 21 | // 22 | // var length = chunks[bitBuffer&chunkMask] & countMask 23 | // var symbol = chunks[bitBuffer&chunkMask] >> countBits 24 | // 25 | // If the decoded length is larger than chunkBits, then an overflow link table 26 | // must be used for further decoding. In this case, the symbol is actually the 27 | // index into the links tables. The second-level links table returned is 28 | // processed in the same way as the chunks table. 29 | // 30 | // if length > chunkBits { 31 | // var index = symbol // Previous symbol is index into links tables 32 | // length = links[index][bitBuffer>>chunkBits & linkMask] & countMask 33 | // symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits 34 | // } 35 | // 36 | // See the following: 37 | // http://www.gzip.org/algorithm.txt 38 | 39 | type Decoder struct { 40 | chunks []uint32 // First-level lookup map 41 | links [][]uint32 // Second-level lookup map 42 | chunkMask uint32 // Mask the length of the chunks table 43 | linkMask uint32 // Mask the length of the link table 44 | chunkBits uint32 // Bit-length of the chunks table 45 | 46 | MinBits uint32 // The minimum number of bits to safely make progress 47 | NumSyms uint32 // Number of symbols 48 | } 49 | 50 | // Init initializes Decoder according to the codes provided. 51 | func (pd *Decoder) Init(codes PrefixCodes) { 52 | // Handle special case trees. 53 | if len(codes) <= 1 { 54 | switch { 55 | case len(codes) == 0: // Empty tree (should error if used later) 56 | *pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0} 57 | case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero) 58 | pd.chunks = append(pd.chunks[:0], codes[0].Sym< c.Len { 75 | minBits = c.Len 76 | } 77 | if maxBits < c.Len { 78 | maxBits = c.Len 79 | } 80 | } 81 | 82 | // Allocate chunks table as needed. 83 | const maxChunkBits = 9 // This can be tuned for better performance 84 | pd.NumSyms = uint32(len(codes)) 85 | pd.MinBits = minBits 86 | pd.chunkBits = maxBits 87 | if pd.chunkBits > maxChunkBits { 88 | pd.chunkBits = maxChunkBits 89 | } 90 | numChunks := 1 << pd.chunkBits 91 | pd.chunks = allocUint32s(pd.chunks, numChunks) 92 | pd.chunkMask = uint32(numChunks - 1) 93 | 94 | // Allocate links tables as needed. 95 | pd.links = pd.links[:0] 96 | pd.linkMask = 0 97 | if pd.chunkBits < maxBits { 98 | numLinks := 1 << (maxBits - pd.chunkBits) 99 | pd.linkMask = uint32(numLinks - 1) 100 | 101 | var linkIdx uint32 102 | for i := range pd.chunks { 103 | pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized 104 | } 105 | for _, c := range codes { 106 | if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 { 107 | pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1) 108 | linkIdx++ 109 | } 110 | } 111 | 112 | pd.links = extendSliceUint32s(pd.links, int(linkIdx)) 113 | linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx)) 114 | for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks { 115 | pd.links[i] = linksFlat[j : j+numLinks] 116 | } 117 | } 118 | 119 | // Fill out chunks and links tables with values. 120 | for _, c := range codes { 121 | chunk := c.Sym<> countBits 129 | links := pd.links[linkIdx] 130 | skip := 1 << uint(c.Len-pd.chunkBits) 131 | for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip { 132 | links[j] = chunk 133 | } 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /internal/prefix/encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "sort" 9 | 10 | "github.com/dsnet/compress/internal" 11 | ) 12 | 13 | type Encoder struct { 14 | chunks []uint32 // First-level lookup map 15 | chunkMask uint32 // Mask the length of the chunks table 16 | 17 | NumSyms uint32 // Number of symbols 18 | } 19 | 20 | // Init initializes Encoder according to the codes provided. 21 | func (pe *Encoder) Init(codes PrefixCodes) { 22 | // Handle special case trees. 23 | if len(codes) <= 1 { 24 | switch { 25 | case len(codes) == 0: // Empty tree (should error if used later) 26 | *pe = Encoder{chunks: pe.chunks[:0], NumSyms: 0} 27 | case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero) 28 | pe.chunks = append(pe.chunks[:0], codes[0].Val< 0; n >>= 1 { 45 | numChunks <<= 1 46 | } 47 | pe.NumSyms = uint32(len(codes)) 48 | 49 | retry: 50 | // Allocate and reset chunks. 51 | pe.chunks = allocUint32s(pe.chunks, numChunks) 52 | pe.chunkMask = uint32(numChunks - 1) 53 | for i := range pe.chunks { 54 | pe.chunks[i] = 0 // Logic below relies on zero value as uninitialized 55 | } 56 | 57 | // Insert each symbol, checking that there are no conflicts. 58 | for _, c := range codes { 59 | if pe.chunks[c.Sym&pe.chunkMask] > 0 { 60 | // Collision found our "hash" table, so grow and try again. 61 | numChunks <<= 1 62 | goto retry 63 | } 64 | pe.chunks[c.Sym&pe.chunkMask] = c.Val<= rcs[i].Base 44 | // 45 | // Practically speaking, each range must be increasing and must not have any 46 | // gaps in between. It is okay for ranges to overlap. 47 | func (rcs RangeCodes) checkValid() bool { 48 | if len(rcs) == 0 { 49 | return false 50 | } 51 | pre := rcs[0] 52 | for _, cur := range rcs[1:] { 53 | preBase, preEnd := pre.Base, pre.End() 54 | curBase, curEnd := cur.Base, cur.End() 55 | if preBase > curBase || preEnd > curEnd || preEnd < curBase { 56 | return false 57 | } 58 | pre = cur 59 | } 60 | return true 61 | } 62 | 63 | func (re *RangeEncoder) Init(rcs RangeCodes) { 64 | if !rcs.checkValid() { 65 | panic("invalid range codes") 66 | } 67 | *re = RangeEncoder{rcs: rcs, minBase: uint(rcs.Base())} 68 | for sym, rc := range rcs { 69 | base := int(rc.Base) - int(re.minBase) 70 | end := int(rc.End()) - int(re.minBase) 71 | if base >= len(re.lut) { 72 | break 73 | } 74 | if end > len(re.lut) { 75 | end = len(re.lut) 76 | } 77 | for i := base; i < end; i++ { 78 | re.lut[i] = uint32(sym) 79 | } 80 | } 81 | } 82 | 83 | func (re *RangeEncoder) Encode(offset uint) (sym uint) { 84 | if idx := int(offset - re.minBase); idx < len(re.lut) { 85 | return uint(re.lut[idx]) 86 | } 87 | sym = uint(re.lut[len(re.lut)-1]) 88 | retry: 89 | if int(sym) >= len(re.rcs) || re.rcs[sym].Base > uint32(offset) { 90 | return sym - 1 91 | } 92 | sym++ 93 | goto retry // Avoid for-loop so that this function can be inlined 94 | } 95 | -------------------------------------------------------------------------------- /internal/prefix/wrap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "strings" 11 | ) 12 | 13 | // For some of the common Readers, we wrap and extend them to satisfy the 14 | // compress.BufferedReader interface to improve performance. 15 | 16 | type buffer struct { 17 | *bytes.Buffer 18 | } 19 | 20 | type bytesReader struct { 21 | *bytes.Reader 22 | pos int64 23 | buf []byte 24 | arr [512]byte 25 | } 26 | 27 | type stringReader struct { 28 | *strings.Reader 29 | pos int64 30 | buf []byte 31 | arr [512]byte 32 | } 33 | 34 | func (r *buffer) Buffered() int { 35 | return r.Len() 36 | } 37 | 38 | func (r *buffer) Peek(n int) ([]byte, error) { 39 | b := r.Bytes() 40 | if len(b) < n { 41 | return b, io.EOF 42 | } 43 | return b[:n], nil 44 | } 45 | 46 | func (r *buffer) Discard(n int) (int, error) { 47 | b := r.Next(n) 48 | if len(b) < n { 49 | return len(b), io.EOF 50 | } 51 | return n, nil 52 | } 53 | 54 | func (r *bytesReader) Buffered() int { 55 | r.update() 56 | if r.Len() > len(r.buf) { 57 | return len(r.buf) 58 | } 59 | return r.Len() 60 | } 61 | 62 | func (r *bytesReader) Peek(n int) ([]byte, error) { 63 | if n > len(r.arr) { 64 | return nil, io.ErrShortBuffer 65 | } 66 | 67 | // Return sub-slice of local buffer if possible. 68 | r.update() 69 | if len(r.buf) >= n { 70 | return r.buf[:n], nil 71 | } 72 | 73 | // Fill entire local buffer, and return appropriate sub-slice. 74 | cnt, err := r.ReadAt(r.arr[:], r.pos) 75 | r.buf = r.arr[:cnt] 76 | if cnt < n { 77 | return r.arr[:cnt], err 78 | } 79 | return r.arr[:n], nil 80 | } 81 | 82 | func (r *bytesReader) Discard(n int) (int, error) { 83 | var err error 84 | if n > r.Len() { 85 | n, err = r.Len(), io.EOF 86 | } 87 | r.Seek(int64(n), io.SeekCurrent) 88 | return n, err 89 | } 90 | 91 | // update reslices the internal buffer to be consistent with the read offset. 92 | func (r *bytesReader) update() { 93 | pos, _ := r.Seek(0, io.SeekCurrent) 94 | if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) { 95 | r.buf, r.pos = r.buf[off:], pos 96 | } else { 97 | r.buf, r.pos = nil, pos 98 | } 99 | } 100 | 101 | func (r *stringReader) Buffered() int { 102 | r.update() 103 | if r.Len() > len(r.buf) { 104 | return len(r.buf) 105 | } 106 | return r.Len() 107 | } 108 | 109 | func (r *stringReader) Peek(n int) ([]byte, error) { 110 | if n > len(r.arr) { 111 | return nil, io.ErrShortBuffer 112 | } 113 | 114 | // Return sub-slice of local buffer if possible. 115 | r.update() 116 | if len(r.buf) >= n { 117 | return r.buf[:n], nil 118 | } 119 | 120 | // Fill entire local buffer, and return appropriate sub-slice. 121 | cnt, err := r.ReadAt(r.arr[:], r.pos) 122 | r.buf = r.arr[:cnt] 123 | if cnt < n { 124 | return r.arr[:cnt], err 125 | } 126 | return r.arr[:n], nil 127 | } 128 | 129 | func (r *stringReader) Discard(n int) (int, error) { 130 | var err error 131 | if n > r.Len() { 132 | n, err = r.Len(), io.EOF 133 | } 134 | r.Seek(int64(n), io.SeekCurrent) 135 | return n, err 136 | } 137 | 138 | // update reslices the internal buffer to be consistent with the read offset. 139 | func (r *stringReader) update() { 140 | pos, _ := r.Seek(0, io.SeekCurrent) 141 | if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) { 142 | r.buf, r.pos = r.buf[off:], pos 143 | } else { 144 | r.buf, r.pos = nil, pos 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /internal/prefix/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "encoding/binary" 9 | "io" 10 | 11 | "github.com/dsnet/compress/internal/errors" 12 | ) 13 | 14 | // Writer implements a prefix encoder. For performance reasons, Writer will not 15 | // write bytes immediately to the underlying stream. 16 | type Writer struct { 17 | Offset int64 // Number of bytes written to the underlying io.Writer 18 | 19 | wr io.Writer 20 | bufBits uint64 // Buffer to hold some bits 21 | numBits uint // Number of valid bits in bufBits 22 | bigEndian bool // Are bits written in big-endian order? 23 | 24 | buf [512]byte 25 | cntBuf int 26 | } 27 | 28 | // Init initializes the bit Writer to write to w. If bigEndian is true, then 29 | // bits will be written starting from the most-significant bits of a byte 30 | // (as done in bzip2), otherwise it will write starting from the 31 | // least-significant bits of a byte (such as for deflate and brotli). 32 | func (pw *Writer) Init(w io.Writer, bigEndian bool) { 33 | *pw = Writer{wr: w, bigEndian: bigEndian} 34 | return 35 | } 36 | 37 | // BitsWritten reports the total number of bits issued to any Write method. 38 | func (pw *Writer) BitsWritten() int64 { 39 | return 8*pw.Offset + 8*int64(pw.cntBuf) + int64(pw.numBits) 40 | } 41 | 42 | // WritePads writes 0-7 bits to the bit buffer to achieve byte-alignment. 43 | func (pw *Writer) WritePads(v uint) { 44 | nb := -pw.numBits & 7 45 | pw.bufBits |= uint64(v) << pw.numBits 46 | pw.numBits += nb 47 | } 48 | 49 | // Write writes bytes from buf. 50 | // The bit-ordering mode does not affect this method. 51 | func (pw *Writer) Write(buf []byte) (cnt int, err error) { 52 | if pw.numBits > 0 || pw.cntBuf > 0 { 53 | if pw.numBits%8 != 0 { 54 | return 0, errorf(errors.Invalid, "non-aligned bit buffer") 55 | } 56 | if _, err := pw.Flush(); err != nil { 57 | return 0, err 58 | } 59 | } 60 | cnt, err = pw.wr.Write(buf) 61 | pw.Offset += int64(cnt) 62 | return cnt, err 63 | } 64 | 65 | // WriteOffset writes ofs in a (sym, extra) fashion using the provided prefix 66 | // Encoder and RangeEncoder. 67 | func (pw *Writer) WriteOffset(ofs uint, pe *Encoder, re *RangeEncoder) { 68 | sym := re.Encode(ofs) 69 | pw.WriteSymbol(sym, pe) 70 | rc := re.rcs[sym] 71 | pw.WriteBits(ofs-uint(rc.Base), uint(rc.Len)) 72 | } 73 | 74 | // TryWriteBits attempts to write nb bits using the contents of the bit buffer 75 | // alone. It reports whether it succeeded. 76 | // 77 | // This method is designed to be inlined for performance reasons. 78 | func (pw *Writer) TryWriteBits(v, nb uint) bool { 79 | if 64-pw.numBits < nb { 80 | return false 81 | } 82 | pw.bufBits |= uint64(v) << pw.numBits 83 | pw.numBits += nb 84 | return true 85 | } 86 | 87 | // WriteBits writes nb bits of v to the underlying writer. 88 | func (pw *Writer) WriteBits(v, nb uint) { 89 | if _, err := pw.PushBits(); err != nil { 90 | errors.Panic(err) 91 | } 92 | pw.bufBits |= uint64(v) << pw.numBits 93 | pw.numBits += nb 94 | } 95 | 96 | // TryWriteSymbol attempts to encode the next symbol using the contents of the 97 | // bit buffer alone. It reports whether it succeeded. 98 | // 99 | // This method is designed to be inlined for performance reasons. 100 | func (pw *Writer) TryWriteSymbol(sym uint, pe *Encoder) bool { 101 | chunk := pe.chunks[uint32(sym)&pe.chunkMask] 102 | nb := uint(chunk & countMask) 103 | if 64-pw.numBits < nb { 104 | return false 105 | } 106 | pw.bufBits |= uint64(chunk>>countBits) << pw.numBits 107 | pw.numBits += nb 108 | return true 109 | } 110 | 111 | // WriteSymbol writes the symbol using the provided prefix Encoder. 112 | func (pw *Writer) WriteSymbol(sym uint, pe *Encoder) { 113 | if _, err := pw.PushBits(); err != nil { 114 | errors.Panic(err) 115 | } 116 | chunk := pe.chunks[uint32(sym)&pe.chunkMask] 117 | nb := uint(chunk & countMask) 118 | pw.bufBits |= uint64(chunk>>countBits) << pw.numBits 119 | pw.numBits += nb 120 | } 121 | 122 | // Flush flushes all complete bytes from the bit buffer to the byte buffer, and 123 | // then flushes all bytes in the byte buffer to the underlying writer. 124 | // After this call, the bit Writer is will only withhold 7 bits at most. 125 | func (pw *Writer) Flush() (int64, error) { 126 | if pw.numBits < 8 && pw.cntBuf == 0 { 127 | return pw.Offset, nil 128 | } 129 | if _, err := pw.PushBits(); err != nil { 130 | return pw.Offset, err 131 | } 132 | cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf]) 133 | pw.cntBuf -= cnt 134 | pw.Offset += int64(cnt) 135 | return pw.Offset, err 136 | } 137 | 138 | // PushBits pushes as many bytes as possible from the bit buffer to the byte 139 | // buffer, reporting the number of bits pushed. 140 | func (pw *Writer) PushBits() (uint, error) { 141 | if pw.cntBuf >= len(pw.buf)-8 { 142 | cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf]) 143 | pw.cntBuf -= cnt 144 | pw.Offset += int64(cnt) 145 | if err != nil { 146 | return 0, err 147 | } 148 | } 149 | 150 | u := pw.bufBits 151 | if pw.bigEndian { 152 | // Swap all the bits within each byte. 153 | u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1 154 | u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2 155 | u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4 156 | } 157 | // Starting with Go 1.7, the compiler should use a wide integer 158 | // store here if the architecture supports it. 159 | binary.LittleEndian.PutUint64(pw.buf[pw.cntBuf:], u) 160 | 161 | nb := pw.numBits / 8 // Number of bytes to copy from bit buffer 162 | pw.cntBuf += int(nb) 163 | pw.bufBits >>= 8 * nb 164 | pw.numBits -= 8 * nb 165 | return 8 * nb, nil 166 | } 167 | -------------------------------------------------------------------------------- /internal/release.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build !debug && !gofuzz 6 | // +build !debug,!gofuzz 7 | 8 | package internal 9 | 10 | // Debug indicates whether the debug build tag was set. 11 | // 12 | // If set, programs may choose to print with more human-readable 13 | // debug information and also perform sanity checks that would otherwise be too 14 | // expensive to run in a release build. 15 | const Debug = false 16 | 17 | // GoFuzz indicates whether the gofuzz build tag was set. 18 | // 19 | // If set, programs may choose to disable certain checks (like checksums) that 20 | // would be nearly impossible for gofuzz to properly get right. 21 | // If GoFuzz is set, it implies that Debug is set as well. 22 | const GoFuzz = false 23 | -------------------------------------------------------------------------------- /internal/testutil/rand.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package testutil 6 | 7 | import ( 8 | "crypto/aes" 9 | "crypto/cipher" 10 | "encoding/binary" 11 | ) 12 | 13 | // Rand implements a deterministic pseudo-random number generator. 14 | // This differs from the math.Rand in that the exact output will be consistent 15 | // across different versions of Go. 16 | type Rand struct { 17 | cipher.Block 18 | blk [aes.BlockSize]byte 19 | } 20 | 21 | func NewRand(seed int) *Rand { 22 | var key [aes.BlockSize]byte 23 | binary.LittleEndian.PutUint64(key[:], uint64(seed)) 24 | r, _ := aes.NewCipher(key[:]) 25 | return &Rand{Block: r} 26 | } 27 | 28 | func (r *Rand) Int() int { 29 | r.Encrypt(r.blk[:], r.blk[:]) 30 | return int(binary.BigEndian.Uint32(r.blk[:]) >> 1) 31 | } 32 | 33 | func (r *Rand) Intn(n int) int { 34 | return r.Int() % n 35 | } 36 | 37 | func (r *Rand) Bytes(n int) []byte { 38 | b := make([]byte, n) 39 | bb := b 40 | for len(bb) > 0 { 41 | r.Encrypt(r.blk[:], r.blk[:]) 42 | cnt := copy(bb, r.blk[:]) 43 | bb = bb[cnt:] 44 | } 45 | return b 46 | } 47 | 48 | func (r *Rand) Perm(n int) []int { 49 | m := make([]int, n) 50 | for i := 0; i < n; i++ { 51 | j := r.Intn(i + 1) 52 | m[i] = m[j] 53 | m[j] = i 54 | } 55 | return m 56 | } 57 | -------------------------------------------------------------------------------- /internal/testutil/util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package testutil is a collection of testing helper methods. 6 | package testutil 7 | 8 | import ( 9 | "bytes" 10 | "encoding/hex" 11 | "fmt" 12 | "io" 13 | "io/ioutil" 14 | "strings" 15 | ) 16 | 17 | // ResizeData resizes the input. If n < 0, then the original input will be 18 | // returned as is. If n <= len(input), then the input slice will be truncated. 19 | // However, if n > len(input), then the input will be replicated to fill in 20 | // the missing bytes, but each replicated string will be XORed by some byte 21 | // mask to avoid favoring algorithms with large LZ77 windows. 22 | // 23 | // If n > len(input), then len(input) must be > 0. 24 | func ResizeData(input []byte, n int) []byte { 25 | if n < 0 { 26 | return input 27 | } 28 | if len(input) >= n { 29 | return input[:n] 30 | } 31 | if len(input) == 0 { 32 | panic("unable to replicate an empty string") 33 | } 34 | 35 | var mask byte 36 | output := make([]byte, n) 37 | for i := range output { 38 | idx := i % len(input) 39 | output[i] = input[idx] ^ mask 40 | if idx == len(input)-1 { 41 | mask++ 42 | } 43 | } 44 | return output 45 | } 46 | 47 | // MustLoadFile must load a file or else panics. 48 | func MustLoadFile(file string) []byte { 49 | b, err := ioutil.ReadFile(file) 50 | if err != nil { 51 | panic(err) 52 | } 53 | return b 54 | } 55 | 56 | // MustDecodeHex must decode a hexadecimal string or else panics. 57 | func MustDecodeHex(s string) []byte { 58 | b, err := hex.DecodeString(s) 59 | if err != nil { 60 | panic(err) 61 | } 62 | return b 63 | } 64 | 65 | // MustDecodeBitGen must decode a BitGen formatted string or else panics. 66 | func MustDecodeBitGen(s string) []byte { 67 | b, err := DecodeBitGen(s) 68 | if err != nil { 69 | panic(err) 70 | } 71 | return b 72 | } 73 | 74 | // BytesCompare compares inputs a and b and reports whether they are equal. 75 | // 76 | // If they are not equal, it returns two one-line strings that are 77 | // representative of the differences between the two strings. 78 | // The output will be quoted strings if it seems like the data is text, 79 | // otherwise, it will use hexadecimal strings. 80 | // 81 | // Example usage: 82 | // 83 | // if got, want, ok := testutil.BytesCompare(output, v.output); !ok { 84 | // t.Errorf("output mismatch:\ngot %s\nwant %s", got, want) 85 | // } 86 | func BytesCompare(a, b []byte) (sa, sb string, ok bool) { 87 | if ok = bytes.Equal(a, b); ok { 88 | return 89 | } 90 | 91 | commonPrefix := func(a, b []byte) int { 92 | if len(a) > len(b) { 93 | a, b = b, a 94 | } 95 | for i := range a { 96 | if a[i] != b[i] { 97 | return i 98 | } 99 | } 100 | return len(a) 101 | } 102 | 103 | formatter := func(a, b []byte, format string, trimHead, maxLen int) (sa, sb string) { 104 | trimHead -= maxLen / 2 // Always provide context of equal bytes 105 | if trimHead < 0 { 106 | trimHead = 0 107 | } 108 | if trimHead > (len(a) - maxLen) { 109 | trimHead = (len(a) - maxLen) 110 | } 111 | if trimHead > (len(b) - maxLen) { 112 | trimHead = (len(b) - maxLen) 113 | } 114 | 115 | var head, atail, btail string 116 | if trimHead > 0 { 117 | a = a[trimHead:] 118 | b = b[trimHead:] 119 | head = fmt.Sprintf("(%d bytes)...", trimHead) 120 | } 121 | if len(a) > maxLen { 122 | atail = fmt.Sprintf("...(%d bytes)", len(a)-maxLen) 123 | a = a[:maxLen] 124 | } 125 | if len(b) > maxLen { 126 | btail = fmt.Sprintf("...(%d bytes)", len(b)-maxLen) 127 | b = b[:maxLen] 128 | } 129 | sa = fmt.Sprintf("%s"+format+"%s", head, a, atail) 130 | sb = fmt.Sprintf("%s"+format+"%s", head, b, btail) 131 | return sa, sb 132 | } 133 | 134 | const maxLen = 64 135 | n := commonPrefix(a, b) 136 | sa, sb = formatter(a, b, "%q", n, maxLen) // Favor quoted output, first 137 | if s := sa + sb; strings.Count(s, `\u`)+strings.Count(s, `\x`) > maxLen/8 { 138 | sa, sb = formatter(a, b, "%x", n, maxLen/2) // Fallback to hex, next 139 | } 140 | return sa, sb, false 141 | } 142 | 143 | // BuggyReader returns Err after N bytes have been read from R. 144 | type BuggyReader struct { 145 | R io.Reader 146 | N int64 // Number of valid bytes to read 147 | Err error // Return this error after N bytes 148 | } 149 | 150 | func (br *BuggyReader) Read(buf []byte) (int, error) { 151 | if int64(len(buf)) > br.N { 152 | buf = buf[:br.N] 153 | } 154 | n, err := br.R.Read(buf) 155 | br.N -= int64(n) 156 | if err == nil && br.N <= 0 { 157 | return n, br.Err 158 | } 159 | return n, err 160 | } 161 | 162 | // BuggyWriter returns Err after N bytes have been written to W. 163 | type BuggyWriter struct { 164 | W io.Writer 165 | N int64 // Number of valid bytes to write 166 | Err error // Return this error after N bytes 167 | } 168 | 169 | func (bw *BuggyWriter) Write(buf []byte) (int, error) { 170 | if int64(len(buf)) > bw.N { 171 | buf = buf[:bw.N] 172 | } 173 | n, err := bw.W.Write(buf) 174 | bw.N -= int64(n) 175 | if err == nil && bw.N <= 0 { 176 | return n, bw.Err 177 | } 178 | return n, err 179 | } 180 | -------------------------------------------------------------------------------- /internal/testutil/util_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package testutil 6 | 7 | import "testing" 8 | 9 | func TestCompare(t *testing.T) { 10 | vectors := []struct { 11 | inA, inB string 12 | outA, outB string 13 | ok bool 14 | }{ 15 | {"", "", "", "", true}, 16 | {"", "foo", `""`, `"foo"`, false}, 17 | {"bar", "foo", `"bar"`, `"foo"`, false}, 18 | {"foo", "foo", "", "", true}, 19 | { 20 | "keyboardsmashfoo", "keyboardsmashbar", 21 | `"keyboardsmashfoo"`, `"keyboardsmashbar"`, 22 | false, 23 | }, 24 | { 25 | "keyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er32/q2890r3u0qv", 26 | "keyboardsmashfrioj8394ru4389", 27 | `"keyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er32/q2890"...(6 bytes)`, 28 | `"keyboardsmashfrioj8394ru4389"`, 29 | false, 30 | }, 31 | { 32 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 33 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfrioj8394ru4389", 34 | `(16 bytes)..."boardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u"...(36 bytes)`, 35 | `(16 bytes)..."boardsmashkeyboardsmashkeyboardsmashkeyboardsmashfrioj8394ru4389"`, 36 | false, 37 | }, 38 | { 39 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 40 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfriojfewafweafwaefweafewafwaefwaefwaefewafwae8394ru4389", 41 | `(34 bytes)..."smashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r"...(18 bytes)`, 42 | `(34 bytes)..."smashkeyboardsmashkeyboardsmashfriojfewafweafwaefweafewafwaefwae"...(22 bytes)`, 43 | false, 44 | }, 45 | { 46 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 47 | "\xfaO\xed\x93QK\xb1\xa9O!\xc0\xac\x8dD\xd8\xce\xc01\x1aa\x9c\x108\xbb", 48 | `6b6579626f617264736d6173686b6579626f617264736d6173686b6579626f61...(84 bytes)`, 49 | `fa4fed93514bb1a94f21c0ac8d44d8cec0311a619c1038bb`, 50 | false, 51 | }, 52 | { 53 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 54 | "keyboardsmashkeyboardsmashkeyboard\xfaO\xed\x93QK\xb1\xa9O!\xc0\xac\x8dD\xd8\xce\xc01\x1aa\x9c\x108\xbb", 55 | `(18 bytes)...617264736d6173686b6579626f617264736d6173686b6579626f617264736d61...(66 bytes)`, 56 | `(18 bytes)...617264736d6173686b6579626f617264fa4fed93514bb1a94f21c0ac8d44d8ce...(8 bytes)`, 57 | false, 58 | }, 59 | } 60 | 61 | for i, v := range vectors { 62 | sa, sb, ok := BytesCompare([]byte(v.inA), []byte(v.inB)) 63 | if sa != v.outA { 64 | t.Errorf("test %d, output A mismatch:\ngot %s\nwant %s", i, sa, v.outA) 65 | } 66 | if sb != v.outB { 67 | t.Errorf("test %d, output B mismatch:\ngot %s\nwant %s", i, sb, v.outB) 68 | } 69 | if ok != v.ok { 70 | t.Errorf("test %d, output equality mismatch: got %t, want %t", i, ok, v.ok) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_brotli.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo && !no_cgo_brotli 6 | // +build cgo,!no_cgo_brotli 7 | 8 | package main 9 | 10 | import "github.com/dsnet/compress/internal/cgo/brotli" 11 | 12 | func init() { 13 | RegisterEncoder(FormatBrotli, "cgo", brotli.NewWriter) 14 | RegisterDecoder(FormatBrotli, "cgo", brotli.NewReader) 15 | } 16 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_bzip2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo && !no_cgo_bzip2 6 | // +build cgo,!no_cgo_bzip2 7 | 8 | package main 9 | 10 | import "github.com/dsnet/compress/internal/cgo/bzip2" 11 | 12 | func init() { 13 | RegisterEncoder(FormatBZ2, "cgo", bzip2.NewWriter) 14 | RegisterDecoder(FormatBZ2, "cgo", bzip2.NewReader) 15 | } 16 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_flate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo && !no_cgo_flate 6 | // +build cgo,!no_cgo_flate 7 | 8 | package main 9 | 10 | import "github.com/dsnet/compress/internal/cgo/flate" 11 | 12 | func init() { 13 | RegisterEncoder(FormatFlate, "cgo", flate.NewWriter) 14 | RegisterDecoder(FormatFlate, "cgo", flate.NewReader) 15 | } 16 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_lzma.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo && !no_cgo_lzma 6 | // +build cgo,!no_cgo_lzma 7 | 8 | package main 9 | 10 | import "github.com/dsnet/compress/internal/cgo/lzma" 11 | 12 | func init() { 13 | RegisterEncoder(FormatLZMA2, "cgo", lzma.NewWriter) 14 | RegisterDecoder(FormatLZMA2, "cgo", lzma.NewReader) 15 | } 16 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_zstd.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build cgo && !no_cgo_zstd 6 | // +build cgo,!no_cgo_zstd 7 | 8 | package main 9 | 10 | import "github.com/dsnet/compress/internal/cgo/zstd" 11 | 12 | func init() { 13 | RegisterEncoder(FormatZstd, "cgo", zstd.NewWriter) 14 | RegisterDecoder(FormatZstd, "cgo", zstd.NewReader) 15 | } 16 | -------------------------------------------------------------------------------- /internal/tool/bench/codec_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package main 6 | 7 | import ( 8 | "bytes" 9 | "flag" 10 | "io" 11 | "os" 12 | "path/filepath" 13 | "runtime" 14 | "strings" 15 | "sync" 16 | "testing" 17 | 18 | "github.com/dsnet/compress/internal/testutil" 19 | ) 20 | 21 | // The unit tests can also be used to quickly test all of the implementations 22 | // with respect to each other for correctness. The command-line flags can be 23 | // used to specify any arbitrary corpus of test data to use. 24 | // 25 | // Example usage: 26 | // $ go test -c 27 | // $ ./bench.test \ 28 | // -paths $CORPUS_PATH \ 29 | // -globs "*.txt:*.bin" \ 30 | // -test.run "//fl/std|cgo" \ 31 | // -test.v 32 | 33 | var level int 34 | 35 | func TestMain(m *testing.M) { 36 | setDefaults() 37 | flag.Var(&paths, "paths", "List of paths to search for test files") 38 | flag.Var(&globs, "globs", "List of globs to match for test files") 39 | flag.IntVar(&level, "level", 6, "Default compression level to use") 40 | flag.Parse() 41 | os.Exit(m.Run()) 42 | } 43 | 44 | type semaphore chan struct{} 45 | 46 | func newSemaphore(n int) semaphore { return make(chan struct{}, n) } 47 | func (s *semaphore) Acquire() { *s <- struct{}{} } 48 | func (s *semaphore) Release() { <-*s } 49 | 50 | // Each sub-test is run in a goroutine so that we can have fine control over 51 | // exactly how many sub-tests are running. When running over a large corpus, 52 | // this helps prevent all the sub-tests from executing at once and OOMing 53 | // the machine. The semaphores below control the maximum number of concurrent 54 | // operations that can be running for each dimension. 55 | // 56 | // We avoid using t.Parallel since that causes t.Run to return immediately and 57 | // does not provide the caller with feedback that all sub-operations completed. 58 | // This causes the next operation to prematurely start, leading to overloads. 59 | var ( 60 | semaFiles = newSemaphore(runtime.NumCPU()) 61 | semaFormats = newSemaphore(runtime.NumCPU()) 62 | semaEncoders = newSemaphore(runtime.NumCPU()) 63 | semaDecoders = newSemaphore(runtime.NumCPU()) 64 | ) 65 | 66 | // TestCodecs tests that the output of each registered encoder is a valid input 67 | // for each registered decoder. This test runs in O(n^2) where n is the number 68 | // of registered codecs. This assumes that the number of test files and 69 | // compression formats stays relatively constant. 70 | func TestCodecs(t *testing.T) { 71 | var wg sync.WaitGroup 72 | defer wg.Wait() 73 | for _, fi := range getFiles(paths, globs) { 74 | fi := fi 75 | name := "File:" + strings.Replace(fi.Rel, string(filepath.Separator), "_", -1) 76 | goRun(t, &wg, &semaFiles, name, func(t *testing.T) { 77 | dd := testutil.MustLoadFile(fi.Abs) 78 | testFormats(t, dd) 79 | }) 80 | } 81 | } 82 | 83 | func testFormats(t *testing.T, dd []byte) { 84 | var wg sync.WaitGroup 85 | defer wg.Wait() 86 | for _, ft := range formats { 87 | ft := ft 88 | name := "Format:" + enumToFmt[ft] 89 | goRun(t, &wg, &semaFormats, name, func(t *testing.T) { 90 | if len(encoders[ft]) == 0 || len(decoders[ft]) == 0 { 91 | t.Skip("no codecs available") 92 | } 93 | testEncoders(t, ft, dd) 94 | }) 95 | } 96 | } 97 | 98 | func testEncoders(t *testing.T, ft Format, dd []byte) { 99 | var wg sync.WaitGroup 100 | defer wg.Wait() 101 | for encName := range encoders[ft] { 102 | encName := encName 103 | name := "Encoder:" + encName 104 | goRun(t, &wg, &semaEncoders, name, func(t *testing.T) { 105 | be := new(bytes.Buffer) 106 | zw := encoders[ft][encName](be, level) 107 | if _, err := io.Copy(zw, bytes.NewReader(dd)); err != nil { 108 | t.Fatalf("unexpected Write error: %v", err) 109 | } 110 | if err := zw.Close(); err != nil { 111 | t.Fatalf("unexpected Close error: %v", err) 112 | } 113 | de := be.Bytes() 114 | testDecoders(t, ft, dd, de) 115 | }) 116 | } 117 | } 118 | 119 | func testDecoders(t *testing.T, ft Format, dd, de []byte) { 120 | var wg sync.WaitGroup 121 | defer wg.Wait() 122 | for decName := range decoders[ft] { 123 | decName := decName 124 | name := "Decoder:" + decName 125 | goRun(t, &wg, &semaDecoders, name, func(t *testing.T) { 126 | bd := new(bytes.Buffer) 127 | zr := decoders[ft][decName](bytes.NewReader(de)) 128 | if _, err := io.Copy(bd, zr); err != nil { 129 | t.Fatalf("unexpected Read error: %v", err) 130 | } 131 | if err := zr.Close(); err != nil { 132 | t.Fatalf("unexpected Close error: %v", err) 133 | } 134 | if got, want, ok := testutil.BytesCompare(bd.Bytes(), dd); !ok { 135 | t.Errorf("data mismatch:\ngot %s\nwant %s", got, want) 136 | } 137 | }) 138 | } 139 | } 140 | 141 | func goRun(t *testing.T, wg *sync.WaitGroup, sm *semaphore, name string, fn func(t *testing.T)) { 142 | wg.Add(1) 143 | go func() { 144 | defer wg.Done() 145 | t.Run(name, func(t *testing.T) { 146 | sm.Acquire() 147 | defer sm.Release() 148 | defer recoverPanic(t) 149 | fn(t) 150 | }) 151 | }() 152 | } 153 | 154 | func recoverPanic(t *testing.T) { 155 | if ex := recover(); ex != nil { 156 | t.Fatalf("unexpected panic: %v", ex) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_ds.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build !no_lib_ds 6 | // +build !no_lib_ds 7 | 8 | package main 9 | 10 | import ( 11 | "io" 12 | 13 | "github.com/dsnet/compress/brotli" 14 | "github.com/dsnet/compress/bzip2" 15 | "github.com/dsnet/compress/flate" 16 | ) 17 | 18 | func init() { 19 | RegisterDecoder(FormatBrotli, "ds", 20 | func(r io.Reader) io.ReadCloser { 21 | zr, err := brotli.NewReader(r, nil) 22 | if err != nil { 23 | panic(err) 24 | } 25 | return zr 26 | }) 27 | RegisterDecoder(FormatFlate, "ds", 28 | func(r io.Reader) io.ReadCloser { 29 | zr, err := flate.NewReader(r, nil) 30 | if err != nil { 31 | panic(err) 32 | } 33 | return zr 34 | }) 35 | RegisterEncoder(FormatBZ2, "ds", 36 | func(w io.Writer, lvl int) io.WriteCloser { 37 | zw, err := bzip2.NewWriter(w, &bzip2.WriterConfig{Level: lvl}) 38 | if err != nil { 39 | panic(err) 40 | } 41 | return zw 42 | }) 43 | RegisterDecoder(FormatBZ2, "ds", 44 | func(r io.Reader) io.ReadCloser { 45 | zr, err := bzip2.NewReader(r, nil) 46 | if err != nil { 47 | panic(err) 48 | } 49 | return zr 50 | }) 51 | } 52 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_kp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build !no_lib_kp 6 | // +build !no_lib_kp 7 | 8 | package main 9 | 10 | import ( 11 | "io" 12 | 13 | "github.com/klauspost/compress/flate" 14 | ) 15 | 16 | func init() { 17 | RegisterEncoder(FormatFlate, "kp", 18 | func(w io.Writer, lvl int) io.WriteCloser { 19 | zw, err := flate.NewWriter(w, lvl) 20 | if err != nil { 21 | panic(err) 22 | } 23 | return zw 24 | }) 25 | RegisterDecoder(FormatFlate, "kp", 26 | func(r io.Reader) io.ReadCloser { 27 | return flate.NewReader(r) 28 | }) 29 | } 30 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_std.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build !no_lib_std 6 | // +build !no_lib_std 7 | 8 | package main 9 | 10 | import ( 11 | "io" 12 | "io/ioutil" 13 | 14 | "compress/bzip2" 15 | "compress/flate" 16 | ) 17 | 18 | func init() { 19 | RegisterEncoder(FormatFlate, "std", 20 | func(w io.Writer, lvl int) io.WriteCloser { 21 | zw, err := flate.NewWriter(w, lvl) 22 | if err != nil { 23 | panic(err) 24 | } 25 | return zw 26 | }) 27 | RegisterDecoder(FormatFlate, "std", 28 | func(r io.Reader) io.ReadCloser { 29 | return flate.NewReader(r) 30 | }) 31 | RegisterDecoder(FormatBZ2, "std", 32 | func(r io.Reader) io.ReadCloser { 33 | return ioutil.NopCloser(bzip2.NewReader(r)) 34 | }) 35 | } 36 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_uk.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build !no_lib_uk 6 | // +build !no_lib_uk 7 | 8 | package main 9 | 10 | import ( 11 | "io" 12 | "io/ioutil" 13 | 14 | "github.com/ulikunitz/xz/lzma" 15 | ) 16 | 17 | func init() { 18 | RegisterEncoder(FormatLZMA2, "uk", 19 | func(w io.Writer, lvl int) io.WriteCloser { 20 | // This level conversion logic emulates the conversion found in 21 | // LZMA2Options.java from https://git.tukaani.org/?p=xz-java.git 22 | if lvl < 0 || lvl > 9 { 23 | panic("invalid level") 24 | } 25 | dict := [...]int{ 26 | 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22, 27 | 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26, 28 | }[lvl] 29 | match := lzma.HashTable4 30 | // TODO(dsnet): This currently crashes on zero.bin when using 31 | // BinaryTree on revision 76f94b7c69c6f84be96bcfc2443042b198689565. 32 | /* 33 | if lvl > 4 { 34 | match = lzma.BinaryTree 35 | } 36 | */ 37 | 38 | zw, err := lzma.Writer2Config{DictCap: dict, Matcher: match}.NewWriter2(w) 39 | if err != nil { 40 | panic(err) 41 | } 42 | return zw 43 | }) 44 | RegisterDecoder(FormatLZMA2, "uk", 45 | func(r io.Reader) io.ReadCloser { 46 | zr, err := lzma.NewReader2(r) 47 | if err != nil { 48 | panic(err) 49 | } 50 | return ioutil.NopCloser(zr) 51 | }) 52 | } 53 | -------------------------------------------------------------------------------- /internal/tool/bitgen/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // BitGen to generate a binary from a BitGen formatting input. 6 | // It accepts the BitGen format from stdin and outputs to stdout. 7 | package main 8 | 9 | import ( 10 | "io/ioutil" 11 | "os" 12 | 13 | "github.com/dsnet/compress/internal/testutil" 14 | ) 15 | 16 | func main() { 17 | buf, err := ioutil.ReadAll(os.Stdin) 18 | if err != nil { 19 | panic(err) 20 | } 21 | 22 | buf = testutil.MustDecodeBitGen(string(buf)) 23 | 24 | _, err = os.Stdout.Write(buf) 25 | if err != nil { 26 | panic(err) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /internal/tool/fuzz/brotli/brotli.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package bzip2 8 | 9 | import ( 10 | "bytes" 11 | "io/ioutil" 12 | 13 | gbrotli "github.com/dsnet/compress/brotli" 14 | cbrotli "github.com/dsnet/compress/internal/cgo/brotli" 15 | ) 16 | 17 | func Fuzz(data []byte) int { 18 | // Decompress using the Go decoder. 19 | gr, err := gbrotli.NewReader(bytes.NewReader(data), nil) 20 | if err != nil { 21 | panic(err) 22 | } 23 | gb, gerr := ioutil.ReadAll(gr) 24 | if err := gr.Close(); gerr == nil { 25 | gerr = err 26 | } else if gerr != nil && err == nil { 27 | panic("nil on Close after non-nil error") 28 | } 29 | 30 | // Decompress using the C decoder. 31 | cr := cbrotli.NewReader(bytes.NewReader(data)) 32 | cb, cerr := ioutil.ReadAll(cr) 33 | if err := cr.Close(); cerr == nil { 34 | cerr = err 35 | } else if cerr != nil && err == nil { 36 | panic("nil on Close after non-nil error") 37 | } 38 | 39 | switch { 40 | case gerr == nil && cerr == nil: 41 | if !bytes.Equal(gb, cb) { 42 | panic("mismatching bytes") 43 | } 44 | case gerr != nil && cerr == nil: 45 | panic(gerr) 46 | case gerr == nil && cerr != nil: 47 | panic(cerr) 48 | default: 49 | // Ensure that both gb and cb have the same common prefix. 50 | if !bytes.HasPrefix(gb, cb) && !bytes.HasPrefix(cb, gb) { 51 | panic("mismatching leading bytes") 52 | } 53 | } 54 | 55 | if cerr == nil || gerr == nil { 56 | return 1 // Favor valid inputs 57 | } 58 | return 0 59 | } 60 | -------------------------------------------------------------------------------- /internal/tool/fuzz/bzip2/bzip2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package bzip2 8 | 9 | import ( 10 | "bytes" 11 | "errors" 12 | "io/ioutil" 13 | 14 | "github.com/dsnet/compress" 15 | gbzip2 "github.com/dsnet/compress/bzip2" 16 | cbzip2 "github.com/dsnet/compress/internal/cgo/bzip2" 17 | ) 18 | 19 | func Fuzz(data []byte) int { 20 | data, ok := testDecoders(data, true) 21 | for i := 1; i <= 9; i++ { 22 | testGoEncoder(data, i) 23 | testCEncoder(data, i) 24 | } 25 | if ok { 26 | return 1 // Favor valid inputs 27 | } 28 | return 0 29 | } 30 | 31 | // testDecoders tests that the input can be handled by both Go and C decoders. 32 | // This test does not panic if both decoders run into an error, since it 33 | // means that they both agree that the input is bad. 34 | // 35 | // If updateCRCs is set, then the Go bzip2 implementation will ignore all 36 | // checksum errors and manually adjust the checksum values before running the 37 | // C implementation. This hack drastically increases the probability that 38 | // gofuzz can generate a "valid" file. 39 | func testDecoders(data []byte, updateCRCs bool) ([]byte, bool) { 40 | // Decompress using the Go decoder. 41 | gr, err := gbzip2.NewReader(bytes.NewReader(data), nil) 42 | if err != nil { 43 | panic(err) 44 | } 45 | gb, gerr := ioutil.ReadAll(gr) 46 | if err := gr.Close(); gerr == nil { 47 | gerr = err 48 | } else if gerr != nil && err == nil { 49 | panic("nil on Close after non-nil error") 50 | } 51 | 52 | // Check or update the checksums. 53 | if gerr == nil { 54 | if updateCRCs { 55 | data = gr.Checksums.Apply(data) 56 | } else if !gr.Checksums.Verify(data) { 57 | gerr = errors.New("bzip2: checksum error") 58 | } 59 | } 60 | 61 | // Decompress using the C decoder. 62 | cr := cbzip2.NewReader(bytes.NewReader(data)) 63 | cb, cerr := ioutil.ReadAll(cr) 64 | if err := cr.Close(); cerr == nil { 65 | cerr = err 66 | } else if cerr != nil && err == nil { 67 | panic("nil on Close after non-nil error") 68 | } 69 | 70 | switch { 71 | case gerr == nil && cerr == nil: 72 | if !bytes.Equal(gb, cb) { 73 | panic("mismatching bytes") 74 | } 75 | return gb, true 76 | case gerr != nil && cerr == nil: 77 | // Ignore deprecated errors since there are no plans to provide 78 | // these features in the Go implementation. 79 | if err, ok := gerr.(compress.Error); ok && err.IsDeprecated() { 80 | return cb, false 81 | } 82 | panic(gerr) 83 | case gerr == nil && cerr != nil: 84 | panic(cerr) 85 | default: 86 | // Ensure that both gb and cb have the same common prefix. 87 | if !bytes.HasPrefix(gb, cb) && !bytes.HasPrefix(cb, gb) { 88 | panic("mismatching leading bytes") 89 | } 90 | return nil, false 91 | } 92 | } 93 | 94 | // testGoEncoder encodes the input data with the Go encoder and then checks that 95 | // both the Go and C decoders can properly decompress the output. 96 | func testGoEncoder(data []byte, level int) { 97 | // Compress using the Go encoder. 98 | bb := new(bytes.Buffer) 99 | gw, err := gbzip2.NewWriter(bb, &gbzip2.WriterConfig{Level: level}) 100 | if err != nil { 101 | panic(err) 102 | } 103 | defer gw.Close() 104 | n, err := gw.Write(data) 105 | if n != len(data) || err != nil { 106 | panic(err) 107 | } 108 | if err := gw.Close(); err != nil { 109 | panic(err) 110 | } 111 | 112 | // Decompress using both the Go and C decoders. 113 | b, ok := testDecoders(bb.Bytes(), false) 114 | if !ok { 115 | panic("decoder error") 116 | } 117 | if !bytes.Equal(b, data) { 118 | panic("mismatching bytes") 119 | } 120 | } 121 | 122 | // testCEncoder encodes the input data with the C encoder and then checks that 123 | // both the Go and C decoders can properly decompress the output. 124 | func testCEncoder(data []byte, level int) { 125 | // Compress using the C encoder. 126 | bb := new(bytes.Buffer) 127 | cw := cbzip2.NewWriter(bb, level) 128 | defer cw.Close() 129 | n, err := cw.Write(data) 130 | if n != len(data) || err != nil { 131 | panic(err) 132 | } 133 | if err := cw.Close(); err != nil { 134 | panic(err) 135 | } 136 | 137 | // Decompress using both the Go and C decoders. 138 | b, ok := testDecoders(bb.Bytes(), false) 139 | if !ok { 140 | panic("decoder error") 141 | } 142 | if !bytes.Equal(b, data) { 143 | panic("mismatching bytes") 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /internal/tool/fuzz/bzip2_bwt/bzip2_bwt.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package bzip2_bwt 8 | 9 | import ( 10 | "bytes" 11 | "hash/adler32" 12 | 13 | "github.com/dsnet/compress/bzip2" 14 | ) 15 | 16 | func Fuzz(data []byte) int { 17 | if len(data) == 0 { 18 | return -1 19 | } 20 | testReverse(data) 21 | testRoundTrip(data) 22 | return 0 23 | } 24 | 25 | // testReverse verifies that we can reverse the BWT on any arbitrary input 26 | // so long as we choose a valid origin pointer. 27 | func testReverse(data []byte) { 28 | data = append([]byte(nil), data...) // Make copy of data 29 | ptr := int(adler32.Checksum(data)) % len(data) 30 | bzip2.ReverseBWT(data, ptr) 31 | } 32 | 33 | // testRoundTrip verifies that a round-trip BWT faithfully reproduces the 34 | // input data set. 35 | func testRoundTrip(want []byte) { 36 | got := append([]byte(nil), want...) 37 | ptr := bzip2.ForwardBWT(got) 38 | bzip2.ReverseBWT(got, ptr) 39 | 40 | if ptr < 0 || ptr >= len(want) { 41 | panic("invalid origin pointer") 42 | } 43 | if !bytes.Equal(got, want) { 44 | panic("mismatching bytes") 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /internal/tool/fuzz/fuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "${BASH_SOURCE[0]}")" 6 | 7 | if [ $# == 0 ]; then 8 | echo "Usage: $0 PKG" 9 | echo 10 | echo -e "Valid packages:\n\t$(ls -d */ | sed 's/\/*$//g' | tr '\n' ' ')" 11 | exit 1 12 | fi 13 | 14 | # Check that the fuzzing tools are available. 15 | for TOOL in go-fuzz go-fuzz-build; do 16 | command -v $TOOL >/dev/null 2>&1 || { 17 | echo "Aborting: could not locate $TOOL."; exit 1; 18 | } 19 | done 20 | 21 | # Clone the initial work directory if it does not exist. 22 | if [ ! -d ".work" ]; then 23 | echo "Fuzzing workdir does not exist." 24 | git clone https://github.com/dsnet/compress-fuzz.git .work 25 | fi 26 | 27 | PKG=$(echo $1 | sed 's/\/*$//g') 28 | PKG_PATH="github.com/dsnet/compress/internal/tool/fuzz" 29 | shift 30 | 31 | echo "Building..." 32 | go-fuzz-build -o=".work/$PKG-fuzz.zip" $PKG_PATH/$PKG 33 | 34 | echo "Fuzzing..." 35 | go-fuzz -bin=".work/$PKG-fuzz.zip" -workdir=".work/$PKG" "$@" 36 | -------------------------------------------------------------------------------- /internal/tool/fuzz/xflate_meta/xflate_meta.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package xflate_meta 8 | 9 | import ( 10 | "bytes" 11 | "compress/flate" 12 | "io/ioutil" 13 | 14 | "github.com/dsnet/compress/xflate" 15 | ) 16 | 17 | func Fuzz(data []byte) int { 18 | mdata, ok := decodeMeta(data) 19 | if ok { 20 | testRoundTrip(mdata) 21 | return 1 22 | } else { 23 | testRoundTrip(data) 24 | return 0 25 | } 26 | } 27 | 28 | // decodeMeta attempts to decode the metadata. 29 | // If successful, it verifies that meta-encoded blocks are DEFLATE blocks. 30 | func decodeMeta(data []byte) ([]byte, bool) { 31 | r := bytes.NewReader(data) 32 | mr := xflate.NewMetaReader(r) 33 | b, err := ioutil.ReadAll(mr) 34 | if err != nil { 35 | return nil, false 36 | } 37 | pos := int(r.Size()) - r.Len() 38 | decompressMeta(data[:pos]) 39 | return b, true 40 | } 41 | 42 | // decompressMeta attempts to decompress the meta-encoded blocks. 43 | // It expects decompression to succeed and to output nothing. 44 | func decompressMeta(data []byte) { 45 | // Make a copy and append DEFLATE terminator block. 46 | data = append([]byte(nil), data...) 47 | data = append(data, []byte{0x01, 0x00, 0x00, 0xff, 0xff}...) 48 | 49 | r := bytes.NewReader(data) 50 | for r.Len() > 0 { 51 | zr := flate.NewReader(r) 52 | b, err := ioutil.ReadAll(zr) 53 | if err != nil { 54 | panic(err) 55 | } 56 | if len(b) > 0 { 57 | panic("non-zero meta-encoded block") 58 | } 59 | if err := zr.Close(); err != nil { 60 | panic(err) 61 | } 62 | } 63 | } 64 | 65 | // testRoundTrip encodes the input data and then decodes it, checking that the 66 | // metadata was losslessly preserved. 67 | func testRoundTrip(want []byte) { 68 | bb := new(bytes.Buffer) 69 | mw := xflate.NewMetaWriter(bb) 70 | n, err := mw.Write(want) 71 | if n != len(want) || err != nil { 72 | panic(err) 73 | } 74 | if err := mw.Close(); err != nil { 75 | panic(err) 76 | } 77 | 78 | got, ok := decodeMeta(bb.Bytes()) 79 | if !bytes.Equal(got, want) || !ok { 80 | panic("mismatching bytes") 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /testdata/binary.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/testdata/binary.bin -------------------------------------------------------------------------------- /testdata/huffman.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run huffman.go 8 | 9 | // Generates huffman.txt. This test file heavily favors prefix based encodings 10 | // since some symbols are heavily favored over others. This leads to compression 11 | // savings that can be gained by assigning shorter prefix codes to those more 12 | // frequent symbols. The number of symbols used is large enough such that it 13 | // avoids LZ77 dictionary matches. 14 | package main 15 | 16 | import ( 17 | "io/ioutil" 18 | "math/rand" 19 | "unicode/utf8" 20 | ) 21 | 22 | const ( 23 | name = "huffman.txt" 24 | size = 1 << 18 25 | ) 26 | 27 | const ( 28 | alpha1 = "abcdefghijklmnopqrstuvwxyz" 29 | alpha2 = alpha1 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 30 | alpha3 = alpha2 + "0123456789" + "+/" 31 | ) 32 | 33 | func main() { 34 | var b []byte 35 | r := rand.New(rand.NewSource(0)) 36 | 37 | for len(b) < size { 38 | n := 16 + r.Intn(64) // Length of substring 39 | p := r.Float32() 40 | switch { 41 | case p <= 0.75: 42 | // Write strings of base64 encoded values. 43 | for i := 0; i < n; i++ { 44 | p := r.Float32() 45 | switch { 46 | case p <= 0.1: 47 | // Write any lowercase letter. 48 | b = append(b, alpha1[r.Intn(len(alpha1))]) 49 | case p <= 0.7: 50 | // Write any lowercase or uppercase letter. 51 | b = append(b, alpha2[r.Intn(len(alpha2))]) 52 | case p <= 1.0: 53 | // Write any character from the base64 alphabet. 54 | b = append(b, alpha3[r.Intn(len(alpha3))]) 55 | } 56 | } 57 | case p <= 1.00: 58 | // Write strings of utf8 encoded values. 59 | for i := 0; i < n; i++ { 60 | p := r.Float32() 61 | switch { 62 | case p <= 0.65: 63 | // Write a 2-byte long utf8 code point. 64 | var buf [4]byte 65 | cnt := utf8.EncodeRune(buf[:], rune(0x80+r.Intn(0x780))) 66 | b = append(b, buf[:cnt]...) 67 | case p <= 1.00: 68 | // Write a 3-byte long utf8 code point. 69 | var buf [4]byte 70 | cnt := utf8.EncodeRune(buf[:], rune(0x800+r.Intn(0xF800))) 71 | b = append(b, buf[:cnt]...) 72 | } 73 | } 74 | } 75 | } 76 | 77 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 78 | panic(err) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /testdata/random.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/testdata/random.bin -------------------------------------------------------------------------------- /testdata/random.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run random.go 8 | 9 | // Generates random.bin. This test file contains random data throughout and 10 | // tests the worst case compression scenario. 11 | package main 12 | 13 | import ( 14 | "io/ioutil" 15 | "math/rand" 16 | ) 17 | 18 | const ( 19 | name = "random.bin" 20 | size = 1 << 18 21 | ) 22 | 23 | func main() { 24 | var b []byte 25 | r := rand.New(rand.NewSource(0)) 26 | 27 | for i := 0; i < size; i++ { 28 | b = append(b, byte(r.Int())) 29 | } 30 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 31 | panic(err) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /testdata/repeats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/testdata/repeats.bin -------------------------------------------------------------------------------- /testdata/repeats.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run repeats.go 8 | 9 | // Generates repeats.bin. This test file heavily favors LZ77 based compression 10 | // since a large bulk of its data is a copy from some distance ago. Also, since 11 | // the source data is mostly random, prefix encoding does not benefit as much. 12 | package main 13 | 14 | import ( 15 | "io/ioutil" 16 | "math/rand" 17 | ) 18 | 19 | const ( 20 | name = "repeats.bin" 21 | size = 1 << 18 22 | ) 23 | 24 | func main() { 25 | var b []byte 26 | r := rand.New(rand.NewSource(0)) 27 | 28 | randLen := func() (l int) { 29 | p := r.Float32() 30 | switch { 31 | case p <= 0.15: // 4..7 32 | l = 4 + r.Intn(4) 33 | case p <= 0.30: // 8..15 34 | l = 8 + r.Intn(8) 35 | case p <= 0.45: // 16..31 36 | l = 16 + r.Intn(16) 37 | case p <= 0.60: // 32..63 38 | l = 32 + r.Intn(32) 39 | case p <= 0.75: // 64..127 40 | l = 64 + r.Intn(64) 41 | case p <= 0.90: // 128..255 42 | l = 128 + r.Intn(128) 43 | case p <= 1.0: // 256..511 44 | l = 256 + r.Intn(256) 45 | } 46 | return l 47 | } 48 | 49 | randDist := func() (d int) { 50 | for d == 0 || d > len(b) { 51 | p := r.Float32() 52 | switch { 53 | case p <= 0.1: // 1..1 54 | d = 1 + r.Intn(1) 55 | case p <= 0.2: // 2..3 56 | d = 2 + r.Intn(2) 57 | case p <= 0.3: // 4..7 58 | d = 4 + r.Intn(4) 59 | case p <= 0.4: // 8..15 60 | d = 8 + r.Intn(8) 61 | case p <= 0.5: // 16..31 62 | d = 16 + r.Intn(16) 63 | case p <= 0.55: // 32..63 64 | d = 32 + r.Intn(32) 65 | case p <= 0.60: // 64..127 66 | d = 64 + r.Intn(64) 67 | case p <= 0.65: // 128..255 68 | d = 128 + r.Intn(128) 69 | case p <= 0.70: // 256..511 70 | d = 256 + r.Intn(256) 71 | case p <= 0.75: // 512..1023 72 | d = 512 + r.Intn(512) 73 | case p <= 0.80: // 1024..2047 74 | d = 1024 + r.Intn(1024) 75 | case p <= 0.85: // 2048..4095 76 | d = 2048 + r.Intn(2048) 77 | case p <= 0.90: // 4096..8191 78 | d = 4096 + r.Intn(4096) 79 | case p <= 0.95: // 8192..16383 80 | d = 8192 + r.Intn(8192) 81 | case p <= 1.00: // 16384..32767 82 | d = 16384 + r.Intn(16384) 83 | } 84 | } 85 | return d 86 | } 87 | 88 | writeRand := func(l int) { 89 | for i := 0; i < l; i++ { 90 | b = append(b, byte(r.Int())) 91 | } 92 | } 93 | 94 | writeCopy := func(d, l int) { 95 | for i := 0; i < l; i++ { 96 | b = append(b, b[len(b)-d]) 97 | } 98 | } 99 | 100 | writeRand(randLen()) 101 | for len(b) < size { 102 | p := r.Float32() 103 | switch { 104 | case p <= 0.1: 105 | // Generate random new data. 106 | writeRand(randLen()) 107 | case p <= 0.9: 108 | // Write a long distance copy. 109 | d, l := randDist(), randLen() 110 | for d <= l { 111 | d, l = randDist(), randLen() 112 | } 113 | writeCopy(d, l) 114 | case p <= 1.0: 115 | // Write a possibly short distance copy. 116 | writeCopy(randDist(), randLen()) 117 | } 118 | } 119 | 120 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 121 | panic(err) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /testdata/zeros.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run zeros.go 8 | 9 | // Generates zeros.bin. This test file contains zeroed data throughout and 10 | // tests the best case compression scenario. 11 | package main 12 | 13 | import "io/ioutil" 14 | 15 | const ( 16 | name = "zeros.bin" 17 | size = 1 << 18 18 | ) 19 | 20 | func main() { 21 | b := make([]byte, size) 22 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 23 | panic(err) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /xflate/flate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package xflate 6 | 7 | import ( 8 | "bufio" 9 | "compress/flate" 10 | "io" 11 | ) 12 | 13 | // TODO(dsnet): The standard library's version of flate.Reader and flate.Writer 14 | // do not track the input and output offsets. When we eventually switch over 15 | // to using the DEFLATE implementation in this repository, we can delete these. 16 | 17 | // countReader is a trivial io.Reader that counts the number of bytes read. 18 | type countReader struct { 19 | R io.Reader 20 | N int64 21 | } 22 | 23 | func (cr *countReader) Read(buf []byte) (int, error) { 24 | n, err := cr.R.Read(buf) 25 | cr.N += int64(n) 26 | return n, err 27 | } 28 | 29 | // flateReader is a trivial wrapper around flate.Reader keeps tracks of offsets. 30 | type flateReader struct { 31 | InputOffset int64 // Total number of bytes read from underlying io.Reader 32 | OutputOffset int64 // Total number of bytes emitted from Read 33 | 34 | zr io.ReadCloser 35 | br *bufio.Reader 36 | cr countReader 37 | } 38 | 39 | func newFlateReader(rd io.Reader) (*flateReader, error) { 40 | fr := new(flateReader) 41 | fr.cr = countReader{R: rd} 42 | fr.br = bufio.NewReader(&fr.cr) 43 | fr.zr = flate.NewReader(fr.br) 44 | return fr, nil 45 | } 46 | 47 | func (fr *flateReader) Reset(rd io.Reader) { 48 | *fr = flateReader{zr: fr.zr, br: fr.br} 49 | fr.cr = countReader{R: rd} 50 | fr.br.Reset(&fr.cr) 51 | fr.zr.(flate.Resetter).Reset(fr.br, nil) 52 | } 53 | 54 | func (fr *flateReader) Read(buf []byte) (int, error) { 55 | offset := fr.cr.N - int64(fr.br.Buffered()) 56 | n, err := fr.zr.Read(buf) 57 | fr.InputOffset += (fr.cr.N - int64(fr.br.Buffered())) - offset 58 | fr.OutputOffset += int64(n) 59 | return n, errWrap(err) 60 | } 61 | 62 | // countWriter is a trivial io.Writer that counts the number of bytes written. 63 | type countWriter struct { 64 | W io.Writer 65 | N int64 66 | } 67 | 68 | func (cw *countWriter) Write(buf []byte) (int, error) { 69 | n, err := cw.W.Write(buf) 70 | cw.N += int64(n) 71 | return n, err 72 | } 73 | 74 | // flateWriter is a trivial wrapper around flate.Writer keeps tracks of offsets. 75 | type flateWriter struct { 76 | InputOffset int64 // Total number of bytes issued to Write 77 | OutputOffset int64 // Total number of bytes written to underlying io.Writer 78 | 79 | zw *flate.Writer 80 | cw countWriter 81 | } 82 | 83 | func newFlateWriter(wr io.Writer, lvl int) (*flateWriter, error) { 84 | var err error 85 | fw := new(flateWriter) 86 | switch lvl { 87 | case 0: 88 | lvl = flate.DefaultCompression 89 | case -1: 90 | lvl = flate.NoCompression 91 | } 92 | fw.cw = countWriter{W: wr} 93 | fw.zw, err = flate.NewWriter(&fw.cw, lvl) 94 | return fw, errWrap(err) 95 | } 96 | 97 | func (fw *flateWriter) Reset(wr io.Writer) { 98 | *fw = flateWriter{zw: fw.zw} 99 | fw.cw = countWriter{W: wr} 100 | fw.zw.Reset(&fw.cw) 101 | } 102 | 103 | func (fw *flateWriter) Write(buf []byte) (int, error) { 104 | offset := fw.cw.N 105 | n, err := fw.zw.Write(buf) 106 | fw.OutputOffset += fw.cw.N - offset 107 | fw.InputOffset += int64(n) 108 | return n, errWrap(err) 109 | } 110 | 111 | func (fw *flateWriter) Flush() error { 112 | offset := fw.cw.N 113 | err := fw.zw.Flush() 114 | fw.OutputOffset += fw.cw.N - offset 115 | return errWrap(err) 116 | } 117 | -------------------------------------------------------------------------------- /xflate/index.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package xflate 6 | 7 | const ( 8 | unknownType = iota 9 | deflateType 10 | indexType 11 | footerType 12 | ) 13 | 14 | type index struct { 15 | // Records is a list of records that indicate the location of all chunks 16 | // in the stream. However, rather than recording the starting offset of 17 | // each chunk, only the ending offsets are recorded. 18 | // 19 | // The starting record {0, 0} is not included since it is implied. 20 | // The last record effectively holds the total size of the stream. 21 | Records []record 22 | 23 | BackSize int64 // Size of previous index when encoded 24 | IndexSize int64 // Size of this index when encoded 25 | } 26 | 27 | type record struct { 28 | CompOffset int64 // Offset in compressed stream where decompression can start from 29 | RawOffset int64 // The uncompressed offset that CompOffset is associated with 30 | Type int // Type of the record 31 | } 32 | 33 | // Reset resets the index. 34 | func (idx *index) Reset() { 35 | *idx = index{Records: idx.Records[:0]} 36 | } 37 | 38 | // AppendRecord appends a new record to the end of the index and reports whether 39 | // the operation was successful or not. 40 | func (idx *index) AppendRecord(compSize, rawSize int64, typ int) bool { 41 | if rawSize < 0 || compSize < 0 { 42 | return false // Invalid size 43 | } 44 | 45 | lastRec := idx.LastRecord() 46 | rec := record{ 47 | CompOffset: lastRec.CompOffset + compSize, 48 | RawOffset: lastRec.RawOffset + rawSize, 49 | Type: typ, 50 | } 51 | if rec.CompOffset < lastRec.CompOffset || rec.RawOffset < lastRec.RawOffset { 52 | return false // Overflow detected 53 | } 54 | idx.Records = append(idx.Records, rec) 55 | return true 56 | } 57 | 58 | // AppendIndex appends the contents of another index onto the current receiver 59 | // and reports whether the operation was successful or not. 60 | func (idx *index) AppendIndex(other *index) bool { 61 | var preRec record 62 | for i, rec := range other.Records { 63 | csize, rsize := rec.CompOffset-preRec.CompOffset, rec.RawOffset-preRec.RawOffset 64 | if !idx.AppendRecord(csize, rsize, rec.Type) { 65 | idx.Records = idx.Records[:len(idx.Records)-i] // Ensure atomic append 66 | return false 67 | } 68 | preRec = rec 69 | } 70 | return true 71 | } 72 | 73 | // Search searches for the record that best matches the raw offset given. 74 | // This search will return the location of the record with the lowest 75 | // RawOffset that is still greater than the given offset. 76 | // It return -1 if such a record does not exist. 77 | // 78 | // This method is intended to be used in conjunction with GetRecords, 79 | // which returns a pair of records (prev, curr). 80 | // With these records, the following can be computed: 81 | // 82 | // // Where in the underlying reader the decompressor should start from. 83 | // compOffset := prev.CompOffset 84 | // 85 | // // The total number of uncompressed bytes to discard to reach offset. 86 | // rawDiscard := offset - prev.RawOffset 87 | // 88 | // // The total compressed size of the current block. 89 | // compSize := curr.CompOffset - prev.CompOffset 90 | // 91 | // // The total uncompressed size of the current block. 92 | // rawSize := curr.RawOffset - prev.RawOffset 93 | func (idx *index) Search(offset int64) int { 94 | recs := idx.Records 95 | i, imin, imax := -1, 0, len(recs)-1 96 | for imax >= imin && i == -1 { 97 | imid := (imin + imax) / 2 98 | gteCurr := bool(offset >= recs[imid].RawOffset) 99 | ltNext := bool(imid+1 >= len(recs) || offset < recs[imid+1].RawOffset) 100 | if gteCurr && ltNext { 101 | i = imid 102 | } else if gteCurr { 103 | imin = imid + 1 104 | } else { 105 | imax = imid - 1 106 | } 107 | } 108 | return i + 1 109 | } 110 | 111 | // GetRecords returns the previous and current records at the given position. 112 | // This method will automatically bind the search position within the bounds 113 | // of the index. Thus, this will return zero value records if the position is 114 | // too low, and the last record if the value is too high. 115 | func (idx *index) GetRecords(i int) (prev, curr record) { 116 | recs := idx.Records 117 | if i > len(recs) { 118 | i = len(recs) 119 | } 120 | if i-1 >= 0 && i-1 < len(recs) { 121 | prev = recs[i-1] 122 | } 123 | if i >= 0 && i < len(recs) { 124 | curr = recs[i] 125 | } else { 126 | curr = prev 127 | curr.Type = unknownType 128 | } 129 | return prev, curr 130 | } 131 | 132 | // LastRecord returns the last record if it exists, otherwise the zero value. 133 | func (idx *index) LastRecord() record { 134 | var rec record 135 | if len(idx.Records) > 0 { 136 | rec = idx.Records[len(idx.Records)-1] 137 | } 138 | return rec 139 | } 140 | -------------------------------------------------------------------------------- /xflate/meta_fuzz.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | //go:build gofuzz 6 | // +build gofuzz 7 | 8 | // This file exists to export internal implementation details for fuzz testing. 9 | 10 | package xflate 11 | 12 | import ( 13 | "io" 14 | 15 | "github.com/dsnet/compress/xflate/internal/meta" 16 | ) 17 | 18 | func NewMetaReader(r io.Reader) *meta.Reader { 19 | return meta.NewReader(r) 20 | } 21 | 22 | func NewMetaWriter(r io.Writer) *meta.Writer { 23 | return meta.NewWriter(r) 24 | } 25 | -------------------------------------------------------------------------------- /xflate/xflate_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package xflate 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "testing" 11 | 12 | "github.com/dsnet/compress/internal/testutil" 13 | ) 14 | 15 | var ( 16 | testBinary = testutil.MustLoadFile("../testdata/binary.bin") 17 | testDigits = testutil.MustLoadFile("../testdata/digits.txt") 18 | testHuffman = testutil.MustLoadFile("../testdata/huffman.txt") 19 | testRandom = testutil.MustLoadFile("../testdata/random.bin") 20 | testRepeats = testutil.MustLoadFile("../testdata/repeats.bin") 21 | testTwain = testutil.MustLoadFile("../testdata/twain.txt") 22 | testZeros = testutil.MustLoadFile("../testdata/zeros.bin") 23 | ) 24 | 25 | func TestRoundTrip(t *testing.T) { 26 | vectors := []struct { 27 | name string 28 | input []byte 29 | }{ 30 | {"Nil", nil}, 31 | {"Binary", testBinary}, 32 | {"Digits", testDigits}, 33 | {"Huffman", testHuffman}, 34 | {"Random", testRandom}, 35 | {"Repeats", testRepeats}, 36 | {"Twain", testTwain}, 37 | {"Zeros", testZeros}, 38 | } 39 | 40 | for _, v := range vectors { 41 | v := v 42 | t.Run(v.name, func(t *testing.T) { 43 | t.Parallel() 44 | 45 | var wb, rb bytes.Buffer 46 | 47 | xw, err := NewWriter(&wb, &WriterConfig{ChunkSize: 1 << 10}) 48 | if err != nil { 49 | t.Errorf("unexpected error: NewWriter() = %v", err) 50 | } 51 | cnt, err := io.Copy(xw, bytes.NewReader(v.input)) 52 | if err != nil { 53 | t.Errorf("unexpected error: Write() = %v", err) 54 | } 55 | if cnt != int64(len(v.input)) { 56 | t.Errorf("write count mismatch: got %d, want %d", cnt, len(v.input)) 57 | } 58 | if err := xw.Close(); err != nil { 59 | t.Errorf("unexpected error: Close() = %v", err) 60 | } 61 | 62 | xr, err := NewReader(bytes.NewReader(wb.Bytes()), nil) 63 | if err != nil { 64 | t.Errorf("unexpected error: NewReader() = %v", err) 65 | } 66 | cnt, err = io.Copy(&rb, xr) 67 | if err != nil { 68 | t.Errorf("unexpected error: Read() = %v", err) 69 | } 70 | if cnt != int64(len(v.input)) { 71 | t.Errorf("read count mismatch: got %d, want %d", cnt, len(v.input)) 72 | } 73 | if err := xr.Close(); err != nil { 74 | t.Errorf("unexpected error: Close() = %v", err) 75 | } 76 | 77 | output := rb.Bytes() 78 | if got, want, ok := testutil.BytesCompare(output, v.input); !ok { 79 | t.Errorf("output data mismatch:\ngot %s\nwant %s", got, want) 80 | } 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /zbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | # zbench wraps internal/tool/bench and is useful for comparing benchmarks from 8 | # the implementations in this repository relative to other implementations. 9 | # 10 | # See internal/tool/bench/main.go for more details. 11 | cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/bench 12 | go run $(go list -f '{{ join .GoFiles "\n" }}') "$@" 13 | -------------------------------------------------------------------------------- /zfuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | # zfuzz wraps internal/tool/fuzz and is useful for fuzz testing each of 8 | # the implementations in this repository. 9 | cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/fuzz 10 | ./fuzz.sh "$@" 11 | -------------------------------------------------------------------------------- /zprof.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | if [ $# == 0 ]; then 8 | echo "Usage: $0 PKG_PATH TEST_ARGS..." 9 | echo "" 10 | echo "Runs coverage and performance benchmarks for a given package." 11 | echo "The results are stored in the _zprof_ directory." 12 | echo "" 13 | echo "Example:" 14 | echo " $0 flate -test.bench=Decode/Twain/Default" 15 | exit 1 16 | fi 17 | 18 | DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 19 | PKG_PATH=$1 20 | PKG_NAME=$(basename $PKG_PATH) 21 | shift 22 | 23 | TMPDIR=$(mktemp -d) 24 | trap "rm -rf $TMPDIR $PKG_PATH/$PKG_NAME.test" SIGINT SIGTERM EXIT 25 | 26 | ( 27 | cd $DIR/$PKG_PATH 28 | 29 | # Print the go version. 30 | go version 31 | 32 | # Perform coverage profiling. 33 | go test github.com/dsnet/compress/$PKG_PATH -coverprofile $TMPDIR/cover.profile 34 | if [ $? != 0 ]; then exit 1; fi 35 | go tool cover -html $TMPDIR/cover.profile -o cover.html 36 | 37 | # Perform performance profiling. 38 | if [ $# != 0 ]; then 39 | go test -c github.com/dsnet/compress/$PKG_PATH 40 | if [ $? != 0 ]; then exit 1; fi 41 | ./$PKG_NAME.test -test.cpuprofile $TMPDIR/cpu.profile -test.memprofile $TMPDIR/mem.profile -test.run - "$@" 42 | PPROF="go tool pprof" 43 | $PPROF -output=cpu.svg -web $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null 44 | $PPROF -output=cpu.html -weblist=. $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null 45 | $PPROF -output=mem_objects.svg -alloc_objects -web $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 46 | $PPROF -output=mem_objects.html -alloc_objects -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 47 | $PPROF -output=mem_space.svg -alloc_space -web $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 48 | $PPROF -output=mem_space.html -alloc_space -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 49 | fi 50 | 51 | rm -rf $DIR/_zprof_/$PKG_NAME 52 | mkdir -p $DIR/_zprof_/$PKG_NAME 53 | mv *.html *.svg $DIR/_zprof_/$PKG_NAME 2> /dev/null 54 | ) 55 | -------------------------------------------------------------------------------- /ztest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | cd $(go list -f '{{ .Dir }}' github.com/dsnet/compress) 8 | 9 | BOLD="\x1b[1mRunning: " 10 | PASS="\x1b[32mPASS" 11 | FAIL="\x1b[31mFAIL" 12 | RESET="\x1b[0m" 13 | 14 | echo -e "${BOLD}fmt${RESET}" 15 | RET_FMT=$(find . -name "*.go" | egrep -v "/(_.*_|\..*|testdata)/" | xargs gofmt -d) 16 | if [[ ! -z "$RET_FMT" ]]; then echo "$RET_FMT"; echo; fi 17 | 18 | echo -e "${BOLD}test${RESET}" 19 | RET_TEST=$(go test -race ./... | egrep -v "^(ok|[?])\s+") 20 | if [[ ! -z "$RET_TEST" ]]; then echo "$RET_TEST"; echo; fi 21 | 22 | echo -e "${BOLD}staticcheck${RESET}" 23 | RET_SCHK=$(staticcheck \ 24 | -ignore " 25 | github.com/dsnet/compress/brotli/*.go:SA4016 26 | github.com/dsnet/compress/brotli/*.go:S1023 27 | github.com/dsnet/compress/brotli/*.go:U1000 28 | github.com/dsnet/compress/bzip2/*.go:S1023 29 | github.com/dsnet/compress/flate/*.go:U1000 30 | github.com/dsnet/compress/internal/cgo/lzma/*.go:SA4000 31 | github.com/dsnet/compress/internal/prefix/*.go:S1004 32 | github.com/dsnet/compress/internal/prefix/*.go:S1023 33 | github.com/dsnet/compress/internal/prefix/*.go:SA4016 34 | github.com/dsnet/compress/internal/tool/bench/*.go:S1007 35 | github.com/dsnet/compress/xflate/internal/meta/*.go:S1023 36 | " ./... 2>&1) 37 | if [[ ! -z "$RET_SCHK" ]]; then echo "$RET_SCHK"; echo; fi 38 | 39 | echo -e "${BOLD}lint${RESET}" 40 | RET_LINT=$(golint ./... 2>&1 | 41 | egrep -v "^vendor/" | 42 | egrep -v "should have comment(.*)or be unexported" | 43 | egrep -v "^(.*)type name will be used as(.*)by other packages" | 44 | egrep -v "^brotli/transform.go:(.*)replace i [+]= 1 with i[+]{2}" | 45 | egrep -v "^internal/prefix/prefix.go:(.*)replace symBits(.*) [-]= 1 with symBits(.*)[-]{2}" | 46 | egrep -v "^xflate/common.go:(.*)NoCompression should be of the form" | 47 | egrep -v "^exit status") 48 | if [[ ! -z "$RET_LINT" ]]; then echo "$RET_LINT"; echo; fi 49 | 50 | if [[ ! -z "$RET_FMT" ]] || [ ! -z "$RET_TEST" ] || [[ ! -z "$RET_SCHK" ]] || [[ ! -z "$RET_LINT" ]]; then 51 | echo -e "${FAIL}${RESET}"; exit 1 52 | else 53 | echo -e "${PASS}${RESET}"; exit 0 54 | fi 55 | --------------------------------------------------------------------------------