├── .travis.yml
├── LICENSE.md
├── README.md
├── api.go
├── brotli
    ├── bit_reader.go
    ├── bit_writer.go
    ├── brotli_test.go
    ├── common.go
    ├── common_test.go
    ├── context.go
    ├── dict.go
    ├── dict_decoder.go
    ├── dict_decoder_test.go
    ├── dict_encoder.go
    ├── dict_encoder_test.go
    ├── prefix.go
    ├── prefix_decoder.go
    ├── prefix_encoder.go
    ├── prefix_test.go
    ├── reader.go
    ├── reader_test.go
    ├── testdata
    │   ├── alice29.txt
    │   ├── alice29.txt.br
    │   ├── asyoulik.txt
    │   ├── asyoulik.txt.br
    │   ├── compressed_file
    │   ├── compressed_file.br
    │   ├── compressed_repeated
    │   ├── compressed_repeated.br
    │   ├── digits-best-1e4.br
    │   ├── digits-best-1e5.br
    │   ├── digits-best-1e6.br
    │   ├── digits-default-1e4.br
    │   ├── digits-default-1e5.br
    │   ├── digits-default-1e6.br
    │   ├── digits-speed-1e4.br
    │   ├── digits-speed-1e5.br
    │   ├── digits-speed-1e6.br
    │   ├── lcet10.txt
    │   ├── lcet10.txt.br
    │   ├── mapsdatazrh
    │   ├── mapsdatazrh.br
    │   ├── monkey
    │   ├── monkey.br
    │   ├── plrabn12.txt
    │   ├── plrabn12.txt.br
    │   ├── random_org_10k.bin
    │   ├── random_org_10k.bin.br
    │   ├── twain-best-1e4.br
    │   ├── twain-best-1e5.br
    │   ├── twain-best-1e6.br
    │   ├── twain-default-1e4.br
    │   ├── twain-default-1e5.br
    │   ├── twain-default-1e6.br
    │   ├── twain-speed-1e4.br
    │   ├── twain-speed-1e5.br
    │   ├── twain-speed-1e6.br
    │   ├── ukkonooa
    │   └── ukkonooa.br
    ├── transform.go
    ├── transform_test.go
    ├── writer.go
    └── writer_test.go
├── bzip2
    ├── bwt.go
    ├── bwt_test.go
    ├── bzip2_test.go
    ├── common.go
    ├── common_test.go
    ├── fuzz_off.go
    ├── fuzz_on.go
    ├── internal
    │   └── sais
    │   │   ├── common.go
    │   │   ├── sais_byte.go
    │   │   ├── sais_gen.go
    │   │   └── sais_int.go
    ├── mtf_rle2.go
    ├── mtf_rle2_test.go
    ├── prefix.go
    ├── prefix_test.go
    ├── reader.go
    ├── reader_test.go
    ├── rle1.go
    ├── rle1_test.go
    ├── testdata
    │   ├── gauntlet_test3.bin
    │   ├── gauntlet_test3.bwt
    │   ├── silesia_ooffice.bin
    │   ├── silesia_ooffice.bwt
    │   ├── silesia_xray.bin
    │   ├── silesia_xray.bwt
    │   ├── testfiles_test3.bin
    │   ├── testfiles_test3.bwt
    │   ├── testfiles_test4.bin
    │   └── testfiles_test4.bwt
    ├── writer.go
    └── writer_test.go
├── doc
    ├── brotli-framing-format.md
    ├── bzip2-format.pdf
    ├── bzip2
    │   ├── bitmap-decode.png
    │   ├── bitmap.psd
    │   ├── bwt-decode.png
    │   ├── bwt-encode.png
    │   ├── bwt.psd
    │   ├── bzip2-format.docx
    │   ├── diagram.psd
    │   ├── hexdump-complex.png
    │   ├── hexdump-simple.png
    │   ├── hexdump.psd
    │   ├── mtf-decode.png
    │   ├── mtf.psd
    │   ├── stream-complex.png
    │   ├── stream-simple.png
    │   └── tree-example.png
    ├── xflate-format.pdf
    └── xflate
    │   ├── hexdump-complex.png
    │   ├── hexdump-simple.png
    │   ├── hexdump.psd
    │   ├── stream-complex.png
    │   ├── stream-simple.png
    │   ├── tree-example.png
    │   ├── tree-hclen.png
    │   └── xflate-format.docx
├── flate
    ├── common.go
    ├── dict_decoder.go
    ├── flate_test.go
    ├── prefix.go
    ├── reader.go
    └── reader_test.go
├── go.mod
├── go.sum
├── internal
    ├── cgo
    │   ├── README.md
    │   ├── brotli
    │   │   └── brotli.go
    │   ├── bzip2
    │   │   └── bzip2.go
    │   ├── flate
    │   │   └── flate.go
    │   ├── lzma
    │   │   └── lzma.go
    │   └── zstd
    │   │   └── zstd.go
    ├── common.go
    ├── common_test.go
    ├── debug.go
    ├── errors
    │   └── errors.go
    ├── gofuzz.go
    ├── prefix
    │   ├── debug.go
    │   ├── decoder.go
    │   ├── encoder.go
    │   ├── prefix.go
    │   ├── prefix_test.go
    │   ├── range.go
    │   ├── reader.go
    │   ├── wrap.go
    │   └── writer.go
    ├── release.go
    ├── testutil
    │   ├── bitgen.go
    │   ├── bitgen_test.go
    │   ├── rand.go
    │   ├── util.go
    │   └── util_test.go
    └── tool
    │   ├── bench
    │       ├── benchmarks.go
    │       ├── cgo_brotli.go
    │       ├── cgo_bzip2.go
    │       ├── cgo_flate.go
    │       ├── cgo_lzma.go
    │       ├── cgo_zstd.go
    │       ├── codec_test.go
    │       ├── config.go
    │       ├── lib_ds.go
    │       ├── lib_kp.go
    │       ├── lib_std.go
    │       ├── lib_uk.go
    │       └── main.go
    │   ├── bitgen
    │       └── main.go
    │   └── fuzz
    │       ├── brotli
    │           └── brotli.go
    │       ├── bzip2
    │           └── bzip2.go
    │       ├── bzip2_bwt
    │           └── bzip2_bwt.go
    │       ├── fuzz.sh
    │       └── xflate_meta
    │           └── xflate_meta.go
├── testdata
    ├── binary.bin
    ├── digits.txt
    ├── huffman.go
    ├── huffman.txt
    ├── random.bin
    ├── random.go
    ├── repeats.bin
    ├── repeats.go
    ├── twain.txt
    ├── zeros.bin
    └── zeros.go
├── xflate
    ├── common.go
    ├── example_test.go
    ├── flate.go
    ├── index.go
    ├── index_test.go
    ├── internal
    │   └── meta
    │   │   ├── meta.go
    │   │   ├── meta_stats.go
    │   │   ├── meta_test.go
    │   │   ├── reader.go
    │   │   ├── reader_test.go
    │   │   ├── writer.go
    │   │   └── writer_test.go
    ├── meta_fuzz.go
    ├── reader.go
    ├── reader_test.go
    ├── writer.go
    ├── writer_test.go
    ├── xflate_stats.go
    └── xflate_test.go
├── zbench.sh
├── zfuzz.sh
├── zprof.sh
└── ztest.sh


/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: go
 3 | before_install:
 4 |   - curl -L https://github.com/google/brotli/archive/v1.0.2.tar.gz | tar -zxv
 5 |   - (cd brotli-1.0.2 && mkdir out && cd out && ../configure-cmake && make && sudo make install)
 6 |   - rm -rf brotli-1.0.2
 7 |   - curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv
 8 |   - (cd zstd-1.3.2 && sudo make install)
 9 |   - rm -rf zstd-1.3.2
10 |   - sudo ldconfig
11 |   - mkdir /tmp/go1.12
12 |   - curl -L -s https://dl.google.com/go/go1.12.linux-amd64.tar.gz | tar -zxf - -C /tmp/go1.12 --strip-components 1
13 |   - unset GOROOT
14 |   - (GO111MODULE=on /tmp/go1.12/bin/go mod vendor)
15 |   - (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get golang.org/x/lint/golint@8f45f776aaf18cebc8d65861cc70c33c60471952)
16 |   - (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get honnef.co/go/tools/cmd/staticcheck@2019.1)
17 | matrix:
18 |   include:
19 |     - go: 1.9.x
20 |       script:
21 |         - go test -v -race ./...
22 |     - go: 1.10.x
23 |       script:
24 |         - go test -v -race ./...
25 |     - go: 1.11.x
26 |       script:
27 |         - go test -v -race ./...
28 |     - go: 1.12.x
29 |       script:
30 |         - ./ztest.sh
31 |     - go: master
32 |       script:
33 |         - go test -v -race ./...
34 |   allow_failures:
35 |     - go: master
36 |   fast_finish: true
37 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright © 2015, Joe Tsai and The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | * Redistributions of source code must retain the above copyright notice, this
 7 | list of conditions and the following disclaimer.
 8 | * Redistributions in binary form must reproduce the above copyright notice,
 9 | this list of conditions and the following disclaimer in the documentation and/or
10 | other materials provided with the distribution.
11 | * Neither the copyright holder nor the names of its contributors may be used to
12 | endorse or promote products derived from this software without specific prior
13 | written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Collection of compression libraries for Go #
 2 | 
 3 | [![GoDoc](https://godoc.org/github.com/dsnet/compress/cmp?status.svg)](https://godoc.org/github.com/dsnet/compress)
 4 | [![Build Status](https://travis-ci.org/dsnet/compress.svg?branch=master)](https://travis-ci.org/dsnet/compress)
 5 | [![Report Card](https://goreportcard.com/badge/github.com/dsnet/compress)](https://goreportcard.com/report/github.com/dsnet/compress)
 6 | 
 7 | ## Introduction ##
 8 | 
 9 | **NOTE: This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason.**
10 | 
11 | This repository hosts a collection of compression related libraries. The goal of this project is to provide pure Go implementations for popular compression algorithms beyond what the Go standard library provides. The goals for these packages are as follows:
12 | * Maintainable: That the code remains well documented, well tested, readable, easy to maintain, and easy to verify that it conforms to the specification for the format being implemented.
13 | * Performant: To be able to compress and decompress within at least 80% of the rates that the C implementations are able to achieve.
14 | * Flexible: That the code provides low-level and fine granularity control over the compression streams similar to what the C APIs would provide.
15 | 
16 | Of these three, the first objective is often at odds with the other two objectives and provides interesting challenges. Higher performance can often be achieved by muddling abstraction layers or using non-intuitive low-level primitives. Also, more features and functionality, while useful in some situations, often complicates the API. Thus, this package will attempt to satisfy all the goals, but will defer to favoring maintainability when the performance or flexibility benefits are not significant enough.
17 | 
18 | 
19 | ## Library Status ##
20 | 
21 | For the packages available, only some features are currently implemented:
22 | 
23 | | Package | Reader | Writer |
24 | | ------- | :----: | :----: |
25 | | brotli | :white_check_mark: | |
26 | | bzip2 | :white_check_mark: | :white_check_mark: |
27 | | flate | :white_check_mark: | |
28 | | xflate | :white_check_mark: | :white_check_mark: |
29 | 
30 | This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason. When the library becomes more mature, it is planned to eventually conform to some strict versioning scheme like [Semantic Versioning](http://semver.org/).
31 | 
32 | However, in the meanwhile, this library does provide some basic API guarantees. For the types defined below, the method signatures are guaranteed to not change. Note that the author still reserves the right to change the fields within each ```Reader``` and ```Writer``` structs.
33 | ```go
34 | type ReaderConfig struct { ... }
35 | type Reader struct { ... }
36 |   func NewReader(io.Reader, *ReaderConfig) (*Reader, error) { ... }
37 |   func (*Reader) Read([]byte) (int, error)                  { ... }
38 |   func (*Reader) Close() error                              { ... }
39 | 
40 | type WriterConfig struct { ... }
41 | type Writer struct { ... }
42 |   func NewWriter(io.Writer, *WriterConfig) (*Writer, error) { ... }
43 |   func (*Writer) Write([]byte) (int, error)                 { ... }
44 |   func (*Writer) Close() error                              { ... }
45 | ```
46 | 
47 | To see what work still remains, see the [Task List](https://github.com/dsnet/compress/wiki/Task-List).
48 | 
49 | ## Performance  ##
50 | 
51 | See [Performance Metrics](https://github.com/dsnet/compress/wiki/Performance-Metrics).
52 | 
53 | 
54 | ## Frequently Asked Questions ##
55 | 
56 | See [Frequently Asked Questions](https://github.com/dsnet/compress/wiki/Frequently-Asked-Questions).
57 | 
58 | 
59 | ## Installation ##
60 | 
61 | Run the command:
62 | 
63 | ```go get -u github.com/dsnet/compress```
64 | 
65 | This library requires `Go1.9` or higher in order to build.
66 | 
67 | 
68 | ## Packages ##
69 | 
70 | | Package | Description |
71 | | :------ | :---------- |
72 | | [brotli](http://godoc.org/github.com/dsnet/compress/brotli) | Package brotli implements the Brotli format, described in RFC 7932. |
73 | | [bzip2](http://godoc.org/github.com/dsnet/compress/bzip2) | Package bzip2 implements the BZip2 compressed data format. |
74 | | [flate](http://godoc.org/github.com/dsnet/compress/flate) | Package flate implements the DEFLATE format, described in RFC 1951. |
75 | | [xflate](http://godoc.org/github.com/dsnet/compress/xflate) | Package xflate implements the XFLATE format, an random-access extension to DEFLATE. |
76 | 


--------------------------------------------------------------------------------
/api.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // Package compress is a collection of compression libraries.
 6 | package compress
 7 | 
 8 | import (
 9 | 	"bufio"
10 | 	"io"
11 | 
12 | 	"github.com/dsnet/compress/internal/errors"
13 | )
14 | 
15 | // The Error interface identifies all compression related errors.
16 | type Error interface {
17 | 	error
18 | 	CompressError()
19 | 
20 | 	// IsDeprecated reports the use of a deprecated and unsupported feature.
21 | 	IsDeprecated() bool
22 | 
23 | 	// IsCorrupted reports whether the input stream was corrupted.
24 | 	IsCorrupted() bool
25 | }
26 | 
27 | var _ Error = errors.Error{}
28 | 
29 | // ByteReader is an interface accepted by all decompression Readers.
30 | // It guarantees that the decompressor never reads more data than is necessary
31 | // from the underlying io.Reader.
32 | type ByteReader interface {
33 | 	io.Reader
34 | 	io.ByteReader
35 | }
36 | 
37 | var _ ByteReader = (*bufio.Reader)(nil)
38 | 
39 | // BufferedReader is an interface accepted by all decompression Readers.
40 | // It guarantees that the decompressor never reads more data than is necessary
41 | // from the underlying io.Reader. Since BufferedReader allows a decompressor
42 | // to peek at bytes further along in the stream without advancing the read
43 | // pointer, decompression can experience a significant performance gain when
44 | // provided a reader that satisfies this interface. Thus, a decompressor will
45 | // prefer this interface over ByteReader for performance reasons.
46 | //
47 | // The bufio.Reader satisfies this interface.
48 | type BufferedReader interface {
49 | 	io.Reader
50 | 
51 | 	// Buffered returns the number of bytes currently buffered.
52 | 	//
53 | 	// This value becomes invalid following the next Read/Discard operation.
54 | 	Buffered() int
55 | 
56 | 	// Peek returns the next n bytes without advancing the reader.
57 | 	//
58 | 	// If Peek returns fewer than n bytes, it also returns an error explaining
59 | 	// why the peek is short. Peek must support peeking of at least 8 bytes.
60 | 	// If 0 <= n <= Buffered(), Peek is guaranteed to succeed without reading
61 | 	// from the underlying io.Reader.
62 | 	//
63 | 	// This result becomes invalid following the next Read/Discard operation.
64 | 	Peek(n int) ([]byte, error)
65 | 
66 | 	// Discard skips the next n bytes, returning the number of bytes discarded.
67 | 	//
68 | 	// If Discard skips fewer than n bytes, it also returns an error.
69 | 	// If 0 <= n <= Buffered(), Discard is guaranteed to succeed without reading
70 | 	// from the underlying io.Reader.
71 | 	Discard(n int) (int, error)
72 | }
73 | 
74 | var _ BufferedReader = (*bufio.Reader)(nil)
75 | 


--------------------------------------------------------------------------------
/brotli/bit_writer.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package brotli
 6 | 
 7 | import "io"
 8 | 
 9 | type bitWriter struct {
10 | 	wr     io.Writer
11 | 	offset int64 // Number of bytes written to underlying io.Writer
12 | }
13 | 
14 | func (bw *bitWriter) Init(w io.Writer) {
15 | 	return
16 | }
17 | 
18 | func (bw *bitWriter) Write(buf []byte) (int, error) {
19 | 	return 0, nil
20 | }
21 | 
22 | func (bw *bitWriter) WriteBits(val, nb uint) {
23 | 	return
24 | }
25 | 
26 | func (bw *bitWriter) WritePads() {
27 | 	return
28 | }
29 | 
30 | func (bw *bitWriter) WriteSymbol(pe *prefixEncoder, sym uint) {
31 | 	return
32 | }
33 | 


--------------------------------------------------------------------------------
/brotli/brotli_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package brotli
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"errors"
10 | 	"flag"
11 | 	"os/exec"
12 | 	"strings"
13 | )
14 | 
15 | var zcheck = flag.Bool("zcheck", false, "verify test vectors with C brotli library")
16 | 
17 | func cmdCompress(input []byte) ([]byte, error)   { return cmdExec(input) }
18 | func cmdDecompress(input []byte) ([]byte, error) { return cmdExec(input, "-d") }
19 | 
20 | // cmdExec executes the bzip2 tool, passing the input in as stdin.
21 | // It returns the stdout and an error.
22 | func cmdExec(input []byte, args ...string) ([]byte, error) {
23 | 	var bo, be bytes.Buffer
24 | 	cmd := exec.Command("bro", args...)
25 | 	cmd.Stdin = bytes.NewReader(input)
26 | 	cmd.Stdout = &bo
27 | 	cmd.Stderr = &be
28 | 	err := cmd.Run()
29 | 	ss := strings.Split(strings.TrimSpace(be.String()), "\n")
30 | 	if len(ss) > 0 && ss[len(ss)-1] != "" {
31 | 		// Assume any stderr indicates an error and last line is the message.
32 | 		return nil, errors.New(ss[len(ss)-1])
33 | 	}
34 | 	return bo.Bytes(), err
35 | }
36 | 


--------------------------------------------------------------------------------
/brotli/common.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // Package brotli implements the Brotli compressed data format,
  6 | // described in RFC 7932.
  7 | package brotli
  8 | 
  9 | import (
 10 | 	"fmt"
 11 | 
 12 | 	"github.com/dsnet/compress/internal/errors"
 13 | )
 14 | 
 15 | func errorf(c int, f string, a ...interface{}) error {
 16 | 	return errors.Error{Code: c, Pkg: "brotli", Msg: fmt.Sprintf(f, a...)}
 17 | }
 18 | 
 19 | // errWrap converts a lower-level errors.Error to be one from this package.
 20 | // The replaceCode passed in will be used to replace the code for any errors
 21 | // with the errors.Invalid code.
 22 | //
 23 | // For the Reader, set this to errors.Corrupted.
 24 | // For the Writer, set this to errors.Internal.
 25 | func errWrap(err error, replaceCode int) error {
 26 | 	if cerr, ok := err.(errors.Error); ok {
 27 | 		if errors.IsInvalid(cerr) {
 28 | 			cerr.Code = replaceCode
 29 | 		}
 30 | 		err = errorf(cerr.Code, "%s", cerr.Msg)
 31 | 	}
 32 | 	return err
 33 | }
 34 | 
 35 | var (
 36 | 	errClosed    = errorf(errors.Closed, "")
 37 | 	errCorrupted = errorf(errors.Corrupted, "")
 38 | 	errInvalid   = errorf(errors.Invalid, "")
 39 | 	errUnaligned = errorf(errors.Invalid, "non-aligned bit buffer")
 40 | )
 41 | 
 42 | var (
 43 | 	reverseLUT [256]uint8
 44 | )
 45 | 
 46 | func init() {
 47 | 	initLUTs()
 48 | }
 49 | 
 50 | func initLUTs() {
 51 | 	initCommonLUTs()
 52 | 	initPrefixLUTs()
 53 | 	initContextLUTs()
 54 | 	initDictLUTs()
 55 | }
 56 | 
 57 | func initCommonLUTs() {
 58 | 	for i := range reverseLUT {
 59 | 		b := uint8(i)
 60 | 		b = (b&0xaa)>>1 | (b&0x55)<<1
 61 | 		b = (b&0xcc)>>2 | (b&0x33)<<2
 62 | 		b = (b&0xf0)>>4 | (b&0x0f)<<4
 63 | 		reverseLUT[i] = b
 64 | 	}
 65 | }
 66 | 
 67 | // neededBits computes the minimum number of bits needed to encode n elements.
 68 | func neededBits(n uint32) (nb uint) {
 69 | 	for n--; n > 0; n >>= 1 {
 70 | 		nb++
 71 | 	}
 72 | 	return
 73 | }
 74 | 
 75 | // reverseUint32 reverses all bits of v.
 76 | func reverseUint32(v uint32) (x uint32) {
 77 | 	x |= uint32(reverseLUT[byte(v>>0)]) << 24
 78 | 	x |= uint32(reverseLUT[byte(v>>8)]) << 16
 79 | 	x |= uint32(reverseLUT[byte(v>>16)]) << 8
 80 | 	x |= uint32(reverseLUT[byte(v>>24)]) << 0
 81 | 	return x
 82 | }
 83 | 
 84 | // reverseBits reverses the lower n bits of v.
 85 | func reverseBits(v uint32, n uint) uint32 {
 86 | 	return reverseUint32(v << (32 - n))
 87 | }
 88 | 
 89 | func allocUint8s(s []uint8, n int) []uint8 {
 90 | 	if cap(s) >= n {
 91 | 		return s[:n]
 92 | 	}
 93 | 	return make([]uint8, n, n*3/2)
 94 | }
 95 | 
 96 | func allocUint32s(s []uint32, n int) []uint32 {
 97 | 	if cap(s) >= n {
 98 | 		return s[:n]
 99 | 	}
100 | 	return make([]uint32, n, n*3/2)
101 | }
102 | 
103 | func extendSliceUints32s(s [][]uint32, n int) [][]uint32 {
104 | 	if cap(s) >= n {
105 | 		return s[:n]
106 | 	}
107 | 	ss := make([][]uint32, n, n*3/2)
108 | 	copy(ss, s[:cap(s)])
109 | 	return ss
110 | }
111 | 
112 | func extendDecoders(s []prefixDecoder, n int) []prefixDecoder {
113 | 	if cap(s) >= n {
114 | 		return s[:n]
115 | 	}
116 | 	ss := make([]prefixDecoder, n, n*3/2)
117 | 	copy(ss, s[:cap(s)])
118 | 	return ss
119 | }
120 | 


--------------------------------------------------------------------------------
/brotli/common_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package brotli
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"hash/crc32"
10 | 	"testing"
11 | )
12 | 
13 | func TestTableCRC(t *testing.T) {
14 | 	// Convert transformLUT to byte array according to Appendix B of the RFC.
15 | 	var transformBuf bytes.Buffer
16 | 	for _, t := range transformLUT {
17 | 		transformBuf.WriteString(t.prefix + "\x00")
18 | 		transformBuf.WriteByte(byte(t.transform))
19 | 		transformBuf.WriteString(t.suffix + "\x00")
20 | 	}
21 | 
22 | 	vectors := []struct {
23 | 		crc uint32
24 | 		buf []byte
25 | 	}{
26 | 		{crc: 0x5136cb04, buf: dictLUT[:]},
27 | 		{crc: 0x8e91efb7, buf: contextLUT0[:]},
28 | 		{crc: 0xd01a32f4, buf: contextLUT1[:]},
29 | 		{crc: 0x0dd7a0d6, buf: contextLUT2[:]},
30 | 		{crc: 0x3d965f81, buf: transformBuf.Bytes()},
31 | 	}
32 | 
33 | 	for i, v := range vectors {
34 | 		crc := crc32.ChecksumIEEE(v.buf)
35 | 		if crc != v.crc {
36 | 			t.Errorf("test %d, CRC-32 mismatch: got %08x, want %08x", i, crc, v.crc)
37 | 		}
38 | 	}
39 | }
40 | 
41 | // This package relies on dynamic generation of LUTs to reduce the static
42 | // binary size. This benchmark attempts to measure the startup cost of init.
43 | // This benchmark is not thread-safe; so do not run it in parallel with other
44 | // tests or benchmarks!
45 | func BenchmarkInit(b *testing.B) {
46 | 	b.ReportAllocs()
47 | 	for i := 0; i < b.N; i++ {
48 | 		initLUTs()
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/brotli/context.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package brotli
  6 | 
  7 | // These constants are defined in RFC section 7.1.
  8 | const (
  9 | 	contextLSB6 = iota
 10 | 	contextMSB6
 11 | 	contextUTF8
 12 | 	contextSigned
 13 | 
 14 | 	numContextModes
 15 | )
 16 | 
 17 | // These constants are defined in RFC sections 2 and 7.3.
 18 | const (
 19 | 	maxLitContextIDs  = 64
 20 | 	maxDistContextIDs = 4
 21 | )
 22 | 
 23 | // These LUTs are taken directly from RFC section 7.1.
 24 | var (
 25 | 	contextLUT0 = [256]uint8{
 26 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
 27 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 28 | 		8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
 29 | 		44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
 30 | 		12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
 31 | 		52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
 32 | 		12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
 33 | 		60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
 34 | 		0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 35 | 		0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 36 | 		0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 37 | 		0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 38 | 		2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 39 | 		2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 40 | 		2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 41 | 		2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 42 | 	}
 43 | 
 44 | 	contextLUT1 = [256]uint8{
 45 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 46 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 47 | 		0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 48 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
 49 | 		1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 50 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
 51 | 		1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 52 | 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
 53 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 54 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 55 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 56 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 57 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 58 | 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 59 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 60 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 61 | 	}
 62 | 
 63 | 	contextLUT2 = [256]uint8{
 64 | 		0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 65 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 66 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 67 | 		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 68 | 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 69 | 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 70 | 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 71 | 		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 72 | 		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 73 | 		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 74 | 		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 75 | 		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 76 | 		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 77 | 		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 78 | 		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 79 | 		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
 80 | 	}
 81 | )
 82 | 
 83 | // These LUTs are dynamically computed from the LUTs in the specification.
 84 | var (
 85 | 	contextP1LUT [256 * numContextModes]uint8
 86 | 	contextP2LUT [256 * numContextModes]uint8
 87 | )
 88 | 
 89 | // initContextLUTs computes LUTs so that context ID computation can be
 90 | // efficiently without any branches.
 91 | func initContextLUTs() {
 92 | 	for i := 0; i < 256; i++ {
 93 | 		for m := 0; m < numContextModes; m++ {
 94 | 			base := m << 8
 95 | 
 96 | 			// Operations performed here are specified in RFC section 7.1.
 97 | 			switch m {
 98 | 			case contextLSB6:
 99 | 				contextP1LUT[base+i] = byte(i) & 0x3f
100 | 				contextP2LUT[base+i] = 0
101 | 			case contextMSB6:
102 | 				contextP1LUT[base+i] = byte(i) >> 2
103 | 				contextP2LUT[base+i] = 0
104 | 			case contextUTF8:
105 | 				contextP1LUT[base+i] = contextLUT0[byte(i)]
106 | 				contextP2LUT[base+i] = contextLUT1[byte(i)]
107 | 			case contextSigned:
108 | 				contextP1LUT[base+i] = contextLUT2[byte(i)] << 3
109 | 				contextP2LUT[base+i] = contextLUT2[byte(i)]
110 | 			default:
111 | 				panic("unknown context mode")
112 | 			}
113 | 		}
114 | 	}
115 | }
116 | 
117 | // getLitContextID computes the context ID for literals from RFC section 7.1.
118 | // Bytes p1 and p2 are the last and second-to-last byte, respectively.
119 | func getLitContextID(p1, p2 byte, mode uint8) uint8 {
120 | 	base := uint(mode) << 8
121 | 	return contextP1LUT[base+uint(p1)] | contextP2LUT[base+uint(p2)]
122 | }
123 | 
124 | // getDistContextID computes the context ID for distances using the copy length
125 | // as specified in RFC section 7.2.
126 | func getDistContextID(l int) uint8 {
127 | 	if l > 4 {
128 | 		return 3
129 | 	}
130 | 	return uint8(l - 2)
131 | }
132 | 


--------------------------------------------------------------------------------
/brotli/dict_decoder.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package brotli
  6 | 
  7 | // The dictDecoder implements the LZ77 sliding dictionary that is commonly used
  8 | // in various compression formats. For performance reasons, this implementation
  9 | // performs little to no sanity checks about the arguments. As such, the
 10 | // invariants documented for each method call must be respected. Furthermore,
 11 | // to reduce the memory footprint decompressing short streams, the dictionary
 12 | // starts with a relatively small size and then lazily grows.
 13 | 
 14 | const (
 15 | 	initSize   = 4096 // Initial size allocated for sliding dictionary
 16 | 	growFactor = 4    // Rate the dictionary is grown to match expected size
 17 | )
 18 | 
 19 | type dictDecoder struct {
 20 | 	// Invariant: len(hist) <= size
 21 | 	size int    // Sliding window size
 22 | 	hist []byte // Sliding window history, dynamically grown to match size
 23 | 
 24 | 	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
 25 | 	wrPos int  // Current output position in buffer
 26 | 	rdPos int  // Have emitted hist[:rdPos] already
 27 | 	full  bool // Has a full window length been written yet?
 28 | }
 29 | 
 30 | func (dd *dictDecoder) Init(size int) {
 31 | 	*dd = dictDecoder{hist: dd.hist}
 32 | 
 33 | 	// Regardless of what size claims, start with a small dictionary to avoid
 34 | 	// denial-of-service attacks with large memory allocation.
 35 | 	dd.size = size
 36 | 	if dd.hist == nil {
 37 | 		dd.hist = make([]byte, initSize)
 38 | 	}
 39 | 	dd.hist = dd.hist[:cap(dd.hist)]
 40 | 	if len(dd.hist) > dd.size {
 41 | 		dd.hist = dd.hist[:dd.size]
 42 | 	}
 43 | 	for i := range dd.hist {
 44 | 		dd.hist[i] = 0 // Zero out history to make LastBytes logic easier
 45 | 	}
 46 | }
 47 | 
 48 | // HistSize reports the total amount of historical data in the dictionary.
 49 | func (dd *dictDecoder) HistSize() int {
 50 | 	if dd.full {
 51 | 		return dd.size
 52 | 	}
 53 | 	return dd.wrPos
 54 | }
 55 | 
 56 | // AvailSize reports the available amount of output buffer space.
 57 | func (dd *dictDecoder) AvailSize() int {
 58 | 	return len(dd.hist) - dd.wrPos
 59 | }
 60 | 
 61 | // WriteSlice returns a slice of the available buffer to write data to.
 62 | //
 63 | // This invariant will be kept: len(s) <= AvailSize()
 64 | func (dd *dictDecoder) WriteSlice() []byte {
 65 | 	return dd.hist[dd.wrPos:]
 66 | }
 67 | 
 68 | // WriteMark advances the writer pointer by cnt.
 69 | //
 70 | // This invariant must be kept: 0 <= cnt <= AvailSize()
 71 | func (dd *dictDecoder) WriteMark(cnt int) {
 72 | 	dd.wrPos += cnt
 73 | }
 74 | 
 75 | // WriteCopy copies a string at a given (distance, length) to the output.
 76 | // This returns the number of bytes copied and may be less than the requested
 77 | // length if the available space in the output buffer is too small.
 78 | //
 79 | // This invariant must be kept: 0 < dist <= HistSize()
 80 | func (dd *dictDecoder) WriteCopy(dist, length int) int {
 81 | 	wrBase := dd.wrPos
 82 | 	wrEnd := dd.wrPos + length
 83 | 	if wrEnd > len(dd.hist) {
 84 | 		wrEnd = len(dd.hist)
 85 | 	}
 86 | 
 87 | 	// Copy non-overlapping section after destination.
 88 | 	rdPos := dd.wrPos - dist
 89 | 	if rdPos < 0 {
 90 | 		rdPos += len(dd.hist)
 91 | 		dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:])
 92 | 		rdPos = 0
 93 | 	}
 94 | 
 95 | 	// Copy overlapping section before destination.
 96 | 	for dd.wrPos < wrEnd {
 97 | 		dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:dd.wrPos])
 98 | 	}
 99 | 	return dd.wrPos - wrBase
100 | }
101 | 
102 | // ReadFlush returns a slice of the historical buffer that is ready to be
103 | // emitted to the user. A call to ReadFlush is only valid after all of the data
104 | // from a previous call to ReadFlush has been consumed.
105 | func (dd *dictDecoder) ReadFlush() []byte {
106 | 	toRead := dd.hist[dd.rdPos:dd.wrPos]
107 | 	dd.rdPos = dd.wrPos
108 | 	if dd.wrPos == len(dd.hist) {
109 | 		if len(dd.hist) == dd.size {
110 | 			dd.wrPos, dd.rdPos = 0, 0
111 | 			dd.full = true
112 | 		} else {
113 | 			// Allocate a larger history buffer.
114 | 			size := cap(dd.hist) * growFactor
115 | 			if size > dd.size {
116 | 				size = dd.size
117 | 			}
118 | 			hist := make([]byte, size)
119 | 			copy(hist, dd.hist)
120 | 			dd.hist = hist
121 | 		}
122 | 	}
123 | 	return toRead
124 | }
125 | 
126 | // LastBytes reports the last 2 bytes in the dictionary. If they do not exist,
127 | // then zero values are returned.
128 | func (dd *dictDecoder) LastBytes() (p1, p2 byte) {
129 | 	if dd.wrPos > 1 {
130 | 		return dd.hist[dd.wrPos-1], dd.hist[dd.wrPos-2]
131 | 	} else if dd.wrPos > 0 {
132 | 		return dd.hist[dd.wrPos-1], dd.hist[len(dd.hist)-1]
133 | 	} else {
134 | 		return dd.hist[len(dd.hist)-1], dd.hist[len(dd.hist)-2]
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/brotli/dict_decoder_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package brotli
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"strings"
 10 | 	"testing"
 11 | )
 12 | 
 13 | func TestDictDecoder(t *testing.T) {
 14 | 	const abc = "ABC\n"
 15 | 	const fox = "The quick brown fox jumped over the lazy dog!\n"
 16 | 	const poem = "The Road Not Taken\nRobert Frost\n" +
 17 | 		"\n" +
 18 | 		"Two roads diverged in a yellow wood,\n" +
 19 | 		"And sorry I could not travel both\n" +
 20 | 		"And be one traveler, long I stood\n" +
 21 | 		"And looked down one as far as I could\n" +
 22 | 		"To where it bent in the undergrowth;\n" +
 23 | 		"\n" +
 24 | 		"Then took the other, as just as fair,\n" +
 25 | 		"And having perhaps the better claim,\n" +
 26 | 		"Because it was grassy and wanted wear;\n" +
 27 | 		"Though as for that the passing there\n" +
 28 | 		"Had worn them really about the same,\n" +
 29 | 		"\n" +
 30 | 		"And both that morning equally lay\n" +
 31 | 		"In leaves no step had trodden black.\n" +
 32 | 		"Oh, I kept the first for another day!\n" +
 33 | 		"Yet knowing how way leads on to way,\n" +
 34 | 		"I doubted if I should ever come back.\n" +
 35 | 		"\n" +
 36 | 		"I shall be telling this with a sigh\n" +
 37 | 		"Somewhere ages and ages hence:\n" +
 38 | 		"Two roads diverged in a wood, and I-\n" +
 39 | 		"I took the one less traveled by,\n" +
 40 | 		"And that has made all the difference.\n"
 41 | 	var refs = []struct {
 42 | 		dist   int // Backward distance (0 if this is an insertion)
 43 | 		length int // Length of copy or insertion
 44 | 	}{
 45 | 		{0, 38}, {33, 3}, {0, 48}, {79, 3}, {0, 11}, {34, 5}, {0, 6}, {23, 7},
 46 | 		{0, 8}, {50, 3}, {0, 2}, {69, 3}, {34, 5}, {0, 4}, {97, 3}, {0, 4},
 47 | 		{43, 5}, {0, 6}, {7, 4}, {88, 7}, {0, 12}, {80, 3}, {0, 2}, {141, 4},
 48 | 		{0, 1}, {196, 3}, {0, 3}, {157, 3}, {0, 6}, {181, 3}, {0, 2}, {23, 3},
 49 | 		{77, 3}, {28, 5}, {128, 3}, {110, 4}, {70, 3}, {0, 4}, {85, 6}, {0, 2},
 50 | 		{182, 6}, {0, 4}, {133, 3}, {0, 7}, {47, 5}, {0, 20}, {112, 5}, {0, 1},
 51 | 		{58, 3}, {0, 8}, {59, 3}, {0, 4}, {173, 3}, {0, 5}, {114, 3}, {0, 4},
 52 | 		{92, 5}, {0, 2}, {71, 3}, {0, 2}, {76, 5}, {0, 1}, {46, 3}, {96, 4},
 53 | 		{130, 4}, {0, 3}, {360, 3}, {0, 3}, {178, 5}, {0, 7}, {75, 3}, {0, 3},
 54 | 		{45, 6}, {0, 6}, {299, 6}, {180, 3}, {70, 6}, {0, 1}, {48, 3}, {66, 4},
 55 | 		{0, 3}, {47, 5}, {0, 9}, {325, 3}, {0, 1}, {359, 3}, {318, 3}, {0, 2},
 56 | 		{199, 3}, {0, 1}, {344, 3}, {0, 3}, {248, 3}, {0, 10}, {310, 3}, {0, 3},
 57 | 		{93, 6}, {0, 3}, {252, 3}, {157, 4}, {0, 2}, {273, 5}, {0, 14}, {99, 4},
 58 | 		{0, 1}, {464, 4}, {0, 2}, {92, 4}, {495, 3}, {0, 1}, {322, 4}, {16, 4},
 59 | 		{0, 3}, {402, 3}, {0, 2}, {237, 4}, {0, 2}, {432, 4}, {0, 1}, {483, 5},
 60 | 		{0, 2}, {294, 4}, {0, 2}, {306, 3}, {113, 5}, {0, 1}, {26, 4}, {164, 3},
 61 | 		{488, 4}, {0, 1}, {542, 3}, {248, 6}, {0, 5}, {205, 3}, {0, 8}, {48, 3},
 62 | 		{449, 6}, {0, 2}, {192, 3}, {328, 4}, {9, 5}, {433, 3}, {0, 3}, {622, 25},
 63 | 		{615, 5}, {46, 5}, {0, 2}, {104, 3}, {475, 10}, {549, 3}, {0, 4}, {597, 8},
 64 | 		{314, 3}, {0, 1}, {473, 6}, {317, 5}, {0, 1}, {400, 3}, {0, 3}, {109, 3},
 65 | 		{151, 3}, {48, 4}, {0, 4}, {125, 3}, {108, 3}, {0, 2},
 66 | 	}
 67 | 
 68 | 	var want string
 69 | 	var buf bytes.Buffer
 70 | 	var dd dictDecoder
 71 | 	dd.Init(1 << 11)
 72 | 
 73 | 	checkLastBytes := func(str string) {
 74 | 		if len(str) < 2 {
 75 | 			str = "\x00\x00" + str
 76 | 		}
 77 | 		str = str[len(str)-2:]
 78 | 		p1, p2 := dd.LastBytes()
 79 | 		got := string([]byte{p2, p1})
 80 | 		if got != str {
 81 | 			t.Errorf("last bytes mismatch: got %q, want %q", got, str)
 82 | 		}
 83 | 	}
 84 | 	writeCopy := func(dist, length int) {
 85 | 		if dist < length {
 86 | 			cnt := (dist + length - 1) / dist
 87 | 			want += strings.Repeat(want[len(want)-dist:], cnt)[:length]
 88 | 		} else {
 89 | 			want += want[len(want)-dist:][:length]
 90 | 		}
 91 | 
 92 | 		for length > 0 {
 93 | 			length -= dd.WriteCopy(dist, length)
 94 | 			if dd.AvailSize() == 0 {
 95 | 				buf.Write(dd.ReadFlush())
 96 | 			}
 97 | 		}
 98 | 
 99 | 		checkLastBytes(want)
100 | 	}
101 | 	writeString := func(str string) {
102 | 		want += str
103 | 
104 | 		for len(str) > 0 {
105 | 			cnt := copy(dd.WriteSlice(), str)
106 | 			str = str[cnt:]
107 | 			dd.WriteMark(cnt)
108 | 			if dd.AvailSize() == 0 {
109 | 				buf.Write(dd.ReadFlush())
110 | 			}
111 | 		}
112 | 
113 | 		checkLastBytes(want)
114 | 	}
115 | 
116 | 	writeString("")
117 | 	writeString(".")
118 | 	str := poem
119 | 	for _, ref := range refs {
120 | 		if ref.dist == 0 {
121 | 			writeString(str[:ref.length])
122 | 		} else {
123 | 			writeCopy(ref.dist, ref.length)
124 | 		}
125 | 		str = str[ref.length:]
126 | 	}
127 | 	writeCopy(dd.HistSize(), 33)
128 | 	writeString(abc)
129 | 	writeCopy(len(abc), 59*len(abc))
130 | 	writeString(fox)
131 | 	writeCopy(len(fox), 9*len(fox))
132 | 	writeString(".")
133 | 	writeCopy(1, 9)
134 | 	writeString(strings.ToUpper(poem))
135 | 	writeCopy(len(poem), 7*len(poem))
136 | 	writeCopy(dd.HistSize(), 10)
137 | 
138 | 	buf.Write(dd.ReadFlush())
139 | 	if buf.String() != want {
140 | 		t.Errorf("final string mismatch:\ngot  %q\nwant %q", buf.String(), want)
141 | 	}
142 | }
143 | 
144 | func BenchmarkDictDecoderCopy(b *testing.B) {
145 | 	nb := 1 << 24
146 | 	b.SetBytes(int64(nb))
147 | 
148 | 	for i := 0; i < b.N; i++ {
149 | 		var dd dictDecoder
150 | 		dd.Init(1 << 16)
151 | 
152 | 		copy(dd.WriteSlice(), "abc")
153 | 		dd.WriteMark(3)
154 | 
155 | 		dist, length := 3, nb
156 | 		for length > 0 {
157 | 			length -= dd.WriteCopy(dist, length)
158 | 			if dd.AvailSize() == 0 {
159 | 				dd.ReadFlush()
160 | 			}
161 | 		}
162 | 	}
163 | }
164 | 


--------------------------------------------------------------------------------
/brotli/dict_encoder.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015, Joe Tsai. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE.md file.
4 | 
5 | package brotli
6 | 


--------------------------------------------------------------------------------
/brotli/dict_encoder_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015, Joe Tsai. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE.md file.
4 | 
5 | package brotli
6 | 


--------------------------------------------------------------------------------
/brotli/prefix_encoder.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package brotli
 6 | 
 7 | type prefixEncoder struct{}
 8 | 
 9 | func (pe *prefixEncoder) Init(codes []prefixCode) {}
10 | 


--------------------------------------------------------------------------------
/brotli/prefix_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015, Joe Tsai. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE.md file.
4 | 
5 | package brotli
6 | 


--------------------------------------------------------------------------------
/brotli/testdata/alice29.txt.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/alice29.txt.br


--------------------------------------------------------------------------------
/brotli/testdata/asyoulik.txt.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/asyoulik.txt.br


--------------------------------------------------------------------------------
/brotli/testdata/compressed_file:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_file


--------------------------------------------------------------------------------
/brotli/testdata/compressed_file.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_file.br


--------------------------------------------------------------------------------
/brotli/testdata/compressed_repeated:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_repeated


--------------------------------------------------------------------------------
/brotli/testdata/compressed_repeated.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/compressed_repeated.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-best-1e4.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-best-1e4.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-best-1e5.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-best-1e5.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-best-1e6.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-best-1e6.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-default-1e4.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-default-1e4.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-default-1e5.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-default-1e5.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-default-1e6.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-default-1e6.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-speed-1e4.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-speed-1e4.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-speed-1e5.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-speed-1e5.br


--------------------------------------------------------------------------------
/brotli/testdata/digits-speed-1e6.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/digits-speed-1e6.br


--------------------------------------------------------------------------------
/brotli/testdata/lcet10.txt.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/lcet10.txt.br


--------------------------------------------------------------------------------
/brotli/testdata/mapsdatazrh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/mapsdatazrh


--------------------------------------------------------------------------------
/brotli/testdata/mapsdatazrh.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/mapsdatazrh.br


--------------------------------------------------------------------------------
/brotli/testdata/monkey:
--------------------------------------------------------------------------------
1 | znxcvnmz,xvnm.,zxcnv.,xcn.z,vn.zvn.zxcvn.,zxcn.vn.v,znm.,vnzx.,vnzxc.vn.z,vnz.,nv.z,nvmzxc,nvzxcvcnm.,vczxvnzxcnvmxc.zmcnvzm.,nvmc,nzxmc,vn.mnnmzxc,vnxcnmv,znvzxcnmv,.xcnvm,zxcnzxv.zx,qweryweurqioweupropqwutioweupqrioweutiopweuriopweuriopqwurioputiopqwuriowuqerioupqweropuweropqwurweuqriopuropqwuriopuqwriopuqweopruioqweurqweuriouqweopruioupqiytioqtyiowtyqptypryoqweutioioqtweqruowqeytiowquiourowetyoqwupiotweuqiorweuqroipituqwiorqwtioweuriouytuioerytuioweryuitoweytuiweyuityeruirtyuqriqweuropqweiruioqweurioqwuerioqwyuituierwotueryuiotweyrtuiwertyioweryrueioqptyioruyiopqwtjkasdfhlafhlasdhfjklashjkfhasjklfhklasjdfhklasdhfjkalsdhfklasdhjkflahsjdkfhklasfhjkasdfhasfjkasdhfklsdhalghhaf;hdklasfhjklashjklfasdhfasdjklfhsdjklafsd;hkldadfjjklasdhfjasddfjklfhakjklasdjfkl;asdjfasfljasdfhjklasdfhjkaghjkashf;djfklasdjfkljasdklfjklasdjfkljasdfkljaklfj


--------------------------------------------------------------------------------
/brotli/testdata/monkey.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/monkey.br


--------------------------------------------------------------------------------
/brotli/testdata/plrabn12.txt.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/plrabn12.txt.br


--------------------------------------------------------------------------------
/brotli/testdata/random_org_10k.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/random_org_10k.bin


--------------------------------------------------------------------------------
/brotli/testdata/random_org_10k.bin.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/random_org_10k.bin.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-best-1e4.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-best-1e4.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-best-1e5.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-best-1e5.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-best-1e6.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-best-1e6.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-default-1e4.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-default-1e4.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-default-1e5.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-default-1e5.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-default-1e6.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-default-1e6.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-speed-1e4.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-speed-1e4.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-speed-1e5.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-speed-1e5.br


--------------------------------------------------------------------------------
/brotli/testdata/twain-speed-1e6.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/twain-speed-1e6.br


--------------------------------------------------------------------------------
/brotli/testdata/ukkonooa:
--------------------------------------------------------------------------------
1 | ukko nooa, ukko nooa oli kunnon mies, kun han meni saunaan, pisti laukun naulaan, ukko nooa, ukko nooa oli kunnon mies.


--------------------------------------------------------------------------------
/brotli/testdata/ukkonooa.br:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/brotli/testdata/ukkonooa.br


--------------------------------------------------------------------------------
/brotli/transform_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package brotli
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestTransform(t *testing.T) {
10 | 	vectors := []struct {
11 | 		id     int
12 | 		input  string
13 | 		output string
14 | 	}{
15 | 		{id: 0, input: "Hello, world!", output: "Hello, world!"},
16 | 		{id: 23, input: "groups of", output: "groups"},
17 | 		{id: 42, input: "s for the ", output: "s for "},
18 | 		{id: 48, input: "presentation", output: "prese"},
19 | 		{id: 56, input: "maintenance", output: "maint"},
20 | 		{id: 23, input: "Alexandria", output: "Alexand"},
21 | 		{id: 23, input: "archives", output: "archi"},
22 | 		{id: 49, input: "fighty", output: "fighting "},
23 | 		{id: 49, input: "12", output: "1ing "},
24 | 		{id: 49, input: "1", output: "ing "},
25 | 		{id: 49, input: "", output: "ing "},
26 | 		{id: 64, input: "123456789a", output: "1"},
27 | 		{id: 64, input: "123456789", output: ""},
28 | 		{id: 64, input: "1", output: ""},
29 | 		{id: 64, input: "", output: ""},
30 | 		{id: 3, input: "afloat", output: "float"},
31 | 		{id: 3, input: "12", output: "2"},
32 | 		{id: 3, input: "1", output: ""},
33 | 		{id: 3, input: "", output: ""},
34 | 		{id: 54, input: "123456789a", output: "a"},
35 | 		{id: 54, input: "123456789", output: ""},
36 | 		{id: 54, input: "1", output: ""},
37 | 		{id: 54, input: "", output: ""},
38 | 		{id: 73, input: "", output: " the  of the "},
39 | 		{id: 73, input: "dichlorodifluoromethanes", output: " the dichlorodifluoromethanes of the "},
40 | 		{id: 15, input: "", output: "  "},
41 | 		{id: 15, input: "meow", output: " Meow "},
42 | 		{id: 15, input: "-scale", output: " -scale "},
43 | 		{id: 15, input: "почти", output: " Почти "},
44 | 		{id: 15, input: "互联网", output: " 亗联网 "},
45 | 		{id: 119, input: "", output: " ='"},
46 | 		{id: 119, input: "meow", output: " MEOW='"},
47 | 		{id: 119, input: "-scale", output: " -SCALE='"},
48 | 		{id: 119, input: "почти", output: " ПОѧѢИ='"},
49 | 		{id: 119, input: "互联网", output: " 亗聑罔='"},
50 | 	}
51 | 
52 | 	var buf [maxWordSize]byte
53 | 	for i, v := range vectors {
54 | 		cnt := transformWord(buf[:], []byte(v.input), v.id)
55 | 		output := string(buf[:cnt])
56 | 
57 | 		if output != v.output {
58 | 			t.Errorf("test %d, output mismatch: got %q, want %q", i, output, v.output)
59 | 		}
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/brotli/writer.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package brotli
 6 | 
 7 | import "io"
 8 | 
 9 | type writer struct {
10 | 	InputOffset  int64 // Total number of bytes issued to Write
11 | 	OutputOffset int64 // Total number of bytes written to underlying io.Writer
12 | 
13 | 	wr  bitWriter // Output destination
14 | 	err error     // Persistent error
15 | }
16 | 
17 | type writerConfig struct {
18 | 	_ struct{} // Blank field to prevent unkeyed struct literals
19 | }
20 | 
21 | func newWriter(w io.Writer, conf *writerConfig) (*writer, error) {
22 | 	return nil, nil
23 | }
24 | 
25 | func (bw *writer) Write(buf []byte) (int, error) {
26 | 	return 0, nil
27 | }
28 | 
29 | func (bw *writer) Close() error {
30 | 	return nil
31 | }
32 | 
33 | func (bw *writer) Reset(w io.Writer) error {
34 | 	return nil
35 | }
36 | 


--------------------------------------------------------------------------------
/brotli/writer_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015, Joe Tsai. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE.md file.
4 | 
5 | package brotli
6 | 


--------------------------------------------------------------------------------
/bzip2/bwt.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import "github.com/dsnet/compress/bzip2/internal/sais"
  8 | 
  9 | // The Burrows-Wheeler Transform implementation used here is based on the
 10 | // Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan.
 11 | // This implementation uses the sais algorithm originally written by Yuta Mori.
 12 | //
 13 | // The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a
 14 | // mathematical relationship between Suffix Arrays and the Burrows-Wheeler
 15 | // Transform, such that a SA can be converted to a BWT in O(n) time.
 16 | //
 17 | // References:
 18 | //
 19 | //	http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
 20 | //	https://github.com/cscott/compressjs/blob/master/lib/BWT.js
 21 | //	https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space
 22 | type burrowsWheelerTransform struct {
 23 | 	buf  []byte
 24 | 	sa   []int
 25 | 	perm []uint32
 26 | }
 27 | 
 28 | func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) {
 29 | 	if len(buf) == 0 {
 30 | 		return -1
 31 | 	}
 32 | 
 33 | 	// TODO(dsnet): Find a way to avoid the duplicate input string method.
 34 | 	// We only need to do this because suffix arrays (by definition) only
 35 | 	// operate non-wrapped suffixes of a string. On the other hand,
 36 | 	// the BWT specifically used in bzip2 operate on a strings that wrap-around
 37 | 	// when being sorted.
 38 | 
 39 | 	// Step 1: Concatenate the input string to itself so that we can use the
 40 | 	// suffix array algorithm for bzip2's variant of BWT.
 41 | 	n := len(buf)
 42 | 	bwt.buf = append(append(bwt.buf[:0], buf...), buf...)
 43 | 	if cap(bwt.sa) < 2*n {
 44 | 		bwt.sa = make([]int, 2*n)
 45 | 	}
 46 | 	t := bwt.buf[:2*n]
 47 | 	sa := bwt.sa[:2*n]
 48 | 
 49 | 	// Step 2: Compute the suffix array (SA). The input string, t, will not be
 50 | 	// modified, while the results will be written to the output, sa.
 51 | 	sais.ComputeSA(t, sa)
 52 | 
 53 | 	// Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the
 54 | 	// input, we have two copies of the input; in buf and buf2. Thus, we write
 55 | 	// the transformation to buf, while using buf2.
 56 | 	var j int
 57 | 	buf2 := t[n:]
 58 | 	for _, i := range sa {
 59 | 		if i < n {
 60 | 			if i == 0 {
 61 | 				ptr = j
 62 | 				i = n
 63 | 			}
 64 | 			buf[j] = buf2[i-1]
 65 | 			j++
 66 | 		}
 67 | 	}
 68 | 	return ptr
 69 | }
 70 | 
 71 | func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) {
 72 | 	if len(buf) == 0 {
 73 | 		return
 74 | 	}
 75 | 
 76 | 	// Step 1: Compute cumm, where cumm[ch] reports the total number of
 77 | 	// characters that precede the character ch in the alphabet.
 78 | 	var cumm [256]int
 79 | 	for _, v := range buf {
 80 | 		cumm[v]++
 81 | 	}
 82 | 	var sum int
 83 | 	for i, v := range cumm {
 84 | 		cumm[i] = sum
 85 | 		sum += v
 86 | 	}
 87 | 
 88 | 	// Step 2: Compute perm, where perm[ptr] contains a pointer to the next
 89 | 	// byte in buf and the next pointer in perm itself.
 90 | 	if cap(bwt.perm) < len(buf) {
 91 | 		bwt.perm = make([]uint32, len(buf))
 92 | 	}
 93 | 	perm := bwt.perm[:len(buf)]
 94 | 	for i, b := range buf {
 95 | 		perm[cumm[b]] = uint32(i)
 96 | 		cumm[b]++
 97 | 	}
 98 | 
 99 | 	// Step 3: Follow each pointer in perm to the next byte, starting with the
100 | 	// origin pointer.
101 | 	if cap(bwt.buf) < len(buf) {
102 | 		bwt.buf = make([]byte, len(buf))
103 | 	}
104 | 	buf2 := bwt.buf[:len(buf)]
105 | 	i := perm[ptr]
106 | 	for j := range buf2 {
107 | 		buf2[j] = buf[i]
108 | 		i = perm[i]
109 | 	}
110 | 	copy(buf, buf2)
111 | }
112 | 


--------------------------------------------------------------------------------
/bzip2/bwt_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import (
  8 | 	"testing"
  9 | 
 10 | 	"github.com/dsnet/compress/internal/testutil"
 11 | )
 12 | 
 13 | func TestBurrowsWheelerTransform(t *testing.T) {
 14 | 	vectors := []struct {
 15 | 		input  []byte // The input test string
 16 | 		output []byte // Expected output string after BWT
 17 | 		ptr    int    // The BWT origin pointer
 18 | 	}{{
 19 | 		input:  []byte(""),
 20 | 		output: []byte(""),
 21 | 		ptr:    -1,
 22 | 	}, {
 23 | 		input:  []byte("Hello, world!"),
 24 | 		output: []byte(",do!lHrellwo "),
 25 | 		ptr:    3,
 26 | 	}, {
 27 | 		input:  []byte("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES"),
 28 | 		output: []byte("TEXYDST.E.IXIXIXXSSMPPS.B..E.S.EUSFXDIIOIIIT"),
 29 | 		ptr:    29,
 30 | 	}, {
 31 | 		input:  []byte("0123456789"),
 32 | 		output: []byte("9012345678"),
 33 | 		ptr:    0,
 34 | 	}, {
 35 | 		input:  []byte("9876543210"),
 36 | 		output: []byte("1234567890"),
 37 | 		ptr:    9,
 38 | 	}, {
 39 | 		input:  []byte("The quick brown fox jumped over the lazy dog."),
 40 | 		output: []byte("kynxederg.l ie hhpv otTu c uwd rfm eb qjoooza"),
 41 | 		ptr:    9,
 42 | 	}, {
 43 | 		input: []byte("" +
 44 | 			"Mary had a little lamb, its fleece was white as snow" +
 45 | 			"Mary had a little lamb, its fleece was white as snow" +
 46 | 			"Mary had a little lamb, its fleece was white as snow" +
 47 | 			"Mary had a little lamb, its fleece was white as snow" +
 48 | 			"Mary had a little lamb, its fleece was white as snow" +
 49 | 			"Mary had a little lamb, its fleece was white as snow" +
 50 | 			"Mary had a little lamb, its fleece was white as snow" +
 51 | 			"Mary had a little lamb, its fleece was white as snow" +
 52 | 			"Nary had a little lamb, its fleece was white as snow"),
 53 | 		output: []byte("" +
 54 | 			"dddddddddeeeeeeeeesssssssssyyyyyyyyy,,,,,,,,,eeeeeee" +
 55 | 			"eeaaaaaaaaassssssssseeeeeeeeesssssssssbbbbbbbbbwwwww" +
 56 | 			"wwww         hhhhhhhhhlllllllllNMMMMMMMM         www" +
 57 | 			"wwwwwwmmmmmmmmmeeeeeeeeeaaaaaaaaatttttttttlllllllllc" +
 58 | 			"cccccccceeeeeeeeelllllllll                  wwwwwwww" +
 59 | 			"whhhhhhhhh         lllllllll         tttttttttffffff" +
 60 | 			"fff         aaaaaaaaasssssssssnnnnnnnnnaaaaaaaaatttt" +
 61 | 			"tttttaaaaaaaaaaaaaaaaaa         iiiiiiiiitttttttttii" +
 62 | 			"iiiiiiiiiiiiiiiiooooooooo                  rrrrrrrrr"),
 63 | 		ptr: 99,
 64 | 	}, {
 65 | 		input: []byte("" +
 66 | 			"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGAC" +
 67 | 			"AGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAA" +
 68 | 			"TACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATG" +
 69 | 			"AAACGCATTAGCACCACCATTACCACCACCATCACCACCACCATCACCATTACCATTACCACAG" +
 70 | 			"GTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCT" +
 71 | 			"TTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAG" +
 72 | 			"TGGCAAATGCAGAACGTTTTCTGCGGGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCA"),
 73 | 		output: []byte("" +
 74 | 			"TAGAATAAATGGAGACTCTAATACTCTACTGGAAACAGACCACAAACATACCTGGTCGTAGATT" +
 75 | 			"CCCCCCATCCCTAAGAAACGAGTCCCCACATCATCACCTCGACTGGGCCGAGACTAAGCCCCCA" +
 76 | 			"ACTGAACCCCCTTACGAAGGCGGAAGCTCCGCCCTGTAGAAAAGACGAATGCCAACCCCCGTAA" +
 77 | 			"AAAAAAGAATAAAAGGCGAATAGCGCAATAGGGGAGCAATTTTCGTACTTATAGAGGAGTGATT" +
 78 | 			"ATTCTTTCTAACACGGTGGACACTAGGCTATTTATTTGCGAAGATTTGGAACGGGCCCACAAAC" +
 79 | 			"ACTGAGGGACGGATCGATATAGATGCTATCGGTGGGTGGTTTTATAATAAATAAGATATTGGTC" +
 80 | 			"TTTCACTCCCCTGCAATCAGGCCGGCAGCGAATAAAAGACTTTGCATAGAGCTTTTACTGTTTC"),
 81 | 		ptr: 99,
 82 | 	}, {
 83 | 		input:  testutil.MustLoadFile("testdata/gauntlet_test3.bin"),
 84 | 		output: testutil.MustLoadFile("testdata/gauntlet_test3.bwt"),
 85 | 		ptr:    0,
 86 | 	}, {
 87 | 		input:  testutil.MustLoadFile("testdata/silesia_ooffice.bin"),
 88 | 		output: testutil.MustLoadFile("testdata/silesia_ooffice.bwt"),
 89 | 		ptr:    461,
 90 | 	}, {
 91 | 		input:  testutil.MustLoadFile("testdata/silesia_xray.bin"),
 92 | 		output: testutil.MustLoadFile("testdata/silesia_xray.bwt"),
 93 | 		ptr:    1532,
 94 | 	}, {
 95 | 		input:  testutil.MustLoadFile("testdata/testfiles_test3.bin"),
 96 | 		output: testutil.MustLoadFile("testdata/testfiles_test3.bwt"),
 97 | 		ptr:    0,
 98 | 	}, {
 99 | 		input:  testutil.MustLoadFile("testdata/testfiles_test4.bin"),
100 | 		output: testutil.MustLoadFile("testdata/testfiles_test4.bwt"),
101 | 		ptr:    1026,
102 | 	}}
103 | 
104 | 	bwt := new(burrowsWheelerTransform)
105 | 	for i, v := range vectors {
106 | 		output := append([]byte(nil), v.input...)
107 | 		ptr := bwt.Encode(output)
108 | 		input := append([]byte(nil), v.output...)
109 | 		bwt.Decode(input, ptr)
110 | 
111 | 		if got, want, ok := testutil.BytesCompare(input, v.input); !ok {
112 | 			t.Errorf("test %d, input mismatch:\ngot  %s\nwant %s", i, got, want)
113 | 		}
114 | 		if got, want, ok := testutil.BytesCompare(output, v.output); !ok {
115 | 			t.Errorf("test %d, output mismatch:\ngot  %s\nwant %s", i, got, want)
116 | 		}
117 | 		if ptr != v.ptr {
118 | 			t.Errorf("test %d, pointer mismatch: got %d, want %d", i, ptr, v.ptr)
119 | 		}
120 | 	}
121 | }
122 | 


--------------------------------------------------------------------------------
/bzip2/bzip2_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"errors"
 10 | 	"flag"
 11 | 	"io"
 12 | 	"os/exec"
 13 | 	"strings"
 14 | 	"testing"
 15 | 
 16 | 	"github.com/dsnet/compress/internal/testutil"
 17 | )
 18 | 
 19 | var zcheck = flag.Bool("zcheck", false, "verify test vectors with C bzip2 library")
 20 | 
 21 | func cmdCompress(input []byte) ([]byte, error)   { return cmdExec(input, "-z") }
 22 | func cmdDecompress(input []byte) ([]byte, error) { return cmdExec(input, "-d") }
 23 | 
 24 | // cmdExec executes the bzip2 tool, passing the input in as stdin.
 25 | // It returns the stdout and an error.
 26 | func cmdExec(input []byte, args ...string) ([]byte, error) {
 27 | 	var bo, be bytes.Buffer
 28 | 	cmd := exec.Command("bzip2", args...)
 29 | 	cmd.Stdin = bytes.NewReader(input)
 30 | 	cmd.Stdout = &bo
 31 | 	cmd.Stderr = &be
 32 | 	err := cmd.Run()
 33 | 	ss := strings.Split(strings.TrimSpace(be.String()), "\n")
 34 | 	if len(ss) > 0 && ss[len(ss)-1] != "" {
 35 | 		// Assume any stderr indicates an error and last line is the message.
 36 | 		return nil, errors.New(ss[len(ss)-1])
 37 | 	}
 38 | 	return bo.Bytes(), err
 39 | }
 40 | 
 41 | var testdata = []struct {
 42 | 	name  string
 43 | 	data  []byte
 44 | 	ratio float64 // The minimum expected ratio (uncompressed / compressed)
 45 | }{
 46 | 	{"Nil", nil, 0},
 47 | 	{"Binary", testutil.MustLoadFile("../testdata/binary.bin"), 5.68},
 48 | 	{"Digits", testutil.MustLoadFile("../testdata/digits.txt"), 2.22},
 49 | 	{"Huffman", testutil.MustLoadFile("../testdata/huffman.txt"), 1.24},
 50 | 	{"Random", testutil.MustLoadFile("../testdata/random.bin"), 0.98},
 51 | 	{"Repeats", testutil.MustLoadFile("../testdata/repeats.bin"), 3.93},
 52 | 	{"Twain", testutil.MustLoadFile("../testdata/twain.txt"), 2.99},
 53 | 	{"Zeros", testutil.MustLoadFile("../testdata/zeros.bin"), 5825.0},
 54 | }
 55 | 
 56 | var levels = []struct {
 57 | 	name  string
 58 | 	level int
 59 | }{
 60 | 	{"Speed", BestSpeed},
 61 | 	{"Default", DefaultCompression},
 62 | 	{"Compression", BestCompression},
 63 | }
 64 | 
 65 | var sizes = []struct {
 66 | 	name string
 67 | 	size int
 68 | }{
 69 | 	{"1e4", 1e4},
 70 | 	{"1e5", 1e5},
 71 | 	{"1e6", 1e6},
 72 | }
 73 | 
 74 | func TestRoundTrip(t *testing.T) {
 75 | 	for _, v := range testdata {
 76 | 		v := v
 77 | 		t.Run(v.name, func(t *testing.T) {
 78 | 			t.Parallel()
 79 | 
 80 | 			var buf1, buf2 bytes.Buffer
 81 | 
 82 | 			// Compress the input.
 83 | 			wr, err := NewWriter(&buf1, nil)
 84 | 			if err != nil {
 85 | 				t.Errorf("NewWriter() = (_, %v), want (_, nil)", err)
 86 | 			}
 87 | 			n, err := io.Copy(wr, bytes.NewReader(v.data))
 88 | 			if n != int64(len(v.data)) || err != nil {
 89 | 				t.Errorf("Copy() = (%d, %v), want (%d, nil)", n, err, len(v.data))
 90 | 			}
 91 | 			if err := wr.Close(); err != nil {
 92 | 				t.Errorf("Close() = %v, want nil", err)
 93 | 			}
 94 | 
 95 | 			// Verify that the compression ratio is within expected bounds.
 96 | 			ratio := float64(len(v.data)) / float64(buf1.Len())
 97 | 			if ratio < v.ratio {
 98 | 				t.Errorf("poor compression ratio: %0.2f < %0.2f", ratio, v.ratio)
 99 | 			}
100 | 
101 | 			// Verify that the C library can decompress the output of Writer and
102 | 			// that the Reader can decompress the output of the C library.
103 | 			if *zcheck {
104 | 				zd, err := cmdDecompress(buf1.Bytes())
105 | 				if err != nil {
106 | 					t.Errorf("unexpected cmdDecompress error: %v", err)
107 | 				}
108 | 				if got, want, ok := testutil.BytesCompare(zd, v.data); !ok {
109 | 					t.Errorf("output data mismatch:\ngot  %s\nwant %s", got, want)
110 | 				}
111 | 				zc, err := cmdCompress(v.data)
112 | 				if err != nil {
113 | 					t.Errorf("unexpected cmdCompress error: %v", err)
114 | 				}
115 | 				zratio := float64(len(v.data)) / float64(len(zc))
116 | 				if ratio < 0.9*zratio {
117 | 					t.Errorf("poor compression ratio: %0.2f < %0.2f", ratio, 0.9*zratio)
118 | 				}
119 | 				buf1.Reset()
120 | 				buf1.Write(zc) // Use output of C library for Reader test
121 | 			}
122 | 
123 | 			// Decompress the output.
124 | 			rd, err := NewReader(&buf1, nil)
125 | 			if err != nil {
126 | 				t.Errorf("NewReader() = (_, %v), want (_, nil)", err)
127 | 			}
128 | 			n, err = io.Copy(&buf2, rd)
129 | 			if n != int64(len(v.data)) || err != nil {
130 | 				t.Errorf("Copy() = (%d, %v), want (%d, nil)", n, err, len(v.data))
131 | 			}
132 | 			if err := rd.Close(); err != nil {
133 | 				t.Errorf("Close() = %v, want nil", err)
134 | 			}
135 | 			if got, want, ok := testutil.BytesCompare(buf2.Bytes(), v.data); !ok {
136 | 				t.Errorf("output data mismatch:\ngot  %s\nwant %s", got, want)
137 | 			}
138 | 		})
139 | 	}
140 | }
141 | 
142 | func runBenchmarks(b *testing.B, f func(b *testing.B, buf []byte, lvl int)) {
143 | 	for _, td := range testdata {
144 | 		if len(td.data) == 0 {
145 | 			continue
146 | 		}
147 | 		if testing.Short() && !(td.name == "Twain" || td.name == "Digits") {
148 | 			continue
149 | 		}
150 | 		for _, tl := range levels {
151 | 			for _, ts := range sizes {
152 | 				buf := testutil.ResizeData(td.data, ts.size)
153 | 				b.Run(td.name+"/"+tl.name+"/"+ts.name, func(b *testing.B) {
154 | 					f(b, buf, tl.level)
155 | 				})
156 | 			}
157 | 		}
158 | 	}
159 | }
160 | 


--------------------------------------------------------------------------------
/bzip2/common.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // Package bzip2 implements the BZip2 compressed data format.
  6 | //
  7 | // Canonical C implementation:
  8 | //
  9 | //	http://bzip.org
 10 | //
 11 | // Unofficial format specification:
 12 | //
 13 | //	https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
 14 | package bzip2
 15 | 
 16 | import (
 17 | 	"fmt"
 18 | 	"hash/crc32"
 19 | 
 20 | 	"github.com/dsnet/compress/internal"
 21 | 	"github.com/dsnet/compress/internal/errors"
 22 | )
 23 | 
 24 | // There does not exist a formal specification of the BZip2 format. As such,
 25 | // much of this work is derived by either reverse engineering the original C
 26 | // source code or using secondary sources.
 27 | //
 28 | // Significant amounts of fuzz testing is done to ensure that outputs from
 29 | // this package is properly decoded by the C library. Furthermore, we test that
 30 | // both this package and the C library agree about what inputs are invalid.
 31 | //
 32 | // Compression stack:
 33 | //	Run-length encoding 1     (RLE1)
 34 | //	Burrows-Wheeler transform (BWT)
 35 | //	Move-to-front transform   (MTF)
 36 | //	Run-length encoding 2     (RLE2)
 37 | //	Prefix encoding           (PE)
 38 | //
 39 | // References:
 40 | //	http://bzip.org/
 41 | //	https://en.wikipedia.org/wiki/Bzip2
 42 | //	https://code.google.com/p/jbzip2/
 43 | 
 44 | const (
 45 | 	BestSpeed          = 1
 46 | 	BestCompression    = 9
 47 | 	DefaultCompression = 6
 48 | )
 49 | 
 50 | const (
 51 | 	hdrMagic = 0x425a         // Hex of "BZ"
 52 | 	blkMagic = 0x314159265359 // BCD of PI
 53 | 	endMagic = 0x177245385090 // BCD of sqrt(PI)
 54 | 
 55 | 	blockSize = 100000
 56 | )
 57 | 
 58 | func errorf(c int, f string, a ...interface{}) error {
 59 | 	return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)}
 60 | }
 61 | 
 62 | func panicf(c int, f string, a ...interface{}) {
 63 | 	errors.Panic(errorf(c, f, a...))
 64 | }
 65 | 
 66 | // errWrap converts a lower-level errors.Error to be one from this package.
 67 | // The replaceCode passed in will be used to replace the code for any errors
 68 | // with the errors.Invalid code.
 69 | //
 70 | // For the Reader, set this to errors.Corrupted.
 71 | // For the Writer, set this to errors.Internal.
 72 | func errWrap(err error, replaceCode int) error {
 73 | 	if cerr, ok := err.(errors.Error); ok {
 74 | 		if errors.IsInvalid(cerr) {
 75 | 			cerr.Code = replaceCode
 76 | 		}
 77 | 		err = errorf(cerr.Code, "%s", cerr.Msg)
 78 | 	}
 79 | 	return err
 80 | }
 81 | 
 82 | var errClosed = errorf(errors.Closed, "")
 83 | 
 84 | // crc computes the CRC-32 used by BZip2.
 85 | //
 86 | // The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
 87 | // order. That is, the MSB is read before the LSB. Thus, we can use the
 88 | // standard library version of CRC-32 IEEE with some minor adjustments.
 89 | //
 90 | // The byte array is used as an intermediate buffer to swap the bits of every
 91 | // byte of the input.
 92 | type crc struct {
 93 | 	val uint32
 94 | 	buf [256]byte
 95 | }
 96 | 
 97 | // update computes the CRC-32 of appending buf to c.
 98 | func (c *crc) update(buf []byte) {
 99 | 	cval := internal.ReverseUint32(c.val)
100 | 	for len(buf) > 0 {
101 | 		n := len(buf)
102 | 		if n > len(c.buf) {
103 | 			n = len(c.buf)
104 | 		}
105 | 		for i, b := range buf[:n] {
106 | 			c.buf[i] = internal.ReverseLUT[b]
107 | 		}
108 | 		cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n])
109 | 		buf = buf[n:]
110 | 	}
111 | 	c.val = internal.ReverseUint32(cval)
112 | }
113 | 


--------------------------------------------------------------------------------
/bzip2/common_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package bzip2
 6 | 
 7 | import (
 8 | 	"strconv"
 9 | 	"testing"
10 | 
11 | 	"github.com/dsnet/compress/internal/testutil"
12 | )
13 | 
14 | func TestCRC(t *testing.T) {
15 | 	vectors := []struct {
16 | 		crc uint32
17 | 		str string
18 | 	}{
19 | 		{0x00000000, ""},
20 | 		{0x19939b6b, "a"},
21 | 		{0xe993fdcd, "ab"},
22 | 		{0x648cbb73, "abc"},
23 | 		{0x3d4c334b, "abcd"},
24 | 		{0xa35b4df4, "abcde"},
25 | 		{0xa0f54fb9, "abcdef"},
26 | 		{0x077539d7, "abcdefg"},
27 | 		{0x5024ec61, "abcdefgh"},
28 | 		{0x63e0bcd4, "abcdefghi"},
29 | 		{0x73826444, "abcdefghij"},
30 | 		{0xbf786ee7, "Discard medicine more than two years old."},
31 | 		{0x106324f0, "He who has a shady past knows that nice guys finish last."},
32 | 		{0x0ef9b7d7, "I wouldn't marry him with a ten foot pole."},
33 | 		{0x2f42217b, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
34 | 		{0xb64c598c, "The days of the digital watch are numbered.  -Tom Stoppard"},
35 | 		{0xf4e5a7c3, "Nepal premier won't resign."},
36 | 		{0x2b43233e, "For every action there is an equal and opposite government program."},
37 | 		{0x7b83ef6f, "His money is twice tainted: 'taint yours and 'taint mine."},
38 | 		{0x503c2258, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
39 | 		{0x4dc300fa, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
40 | 		{0x97fa4243, "size:  a.out:  bad magic"},
41 | 		{0xc9549847, "The major problem is with sendmail.  -Mark Horton"},
42 | 		{0xeaa630ab, "Give me a rock, paper and scissors and I will move the world.  CCFestoon"},
43 | 		{0xcd8bb88c, "If the enemy is within range, then so are you."},
44 | 		{0x95cc0d9d, "It's well we cannot hear the screams/That we create in others' dreams."},
45 | 		{0x14c42897, "You remind me of a TV show, but that's all right: I watch it anyway."},
46 | 		{0x0de498f1, "C is as portable as Stonehedge!!"},
47 | 		{0x79e7cf74, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
48 | 		{0x33e2329e, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction.  Lewis-Randall Rule"},
49 | 		{0xa4302570, "How can you write a big system without C++?  -Paul Glick"},
50 | 	}
51 | 
52 | 	var crc crc
53 | 	for i, v := range vectors {
54 | 		splits := []int{
55 | 			0 * (len(v.str) / 1),
56 | 			1 * (len(v.str) / 4),
57 | 			2 * (len(v.str) / 4),
58 | 			3 * (len(v.str) / 4),
59 | 			1 * (len(v.str) / 1),
60 | 		}
61 | 		for _, j := range splits {
62 | 			str1, str2 := []byte(v.str[:j]), []byte(v.str[j:])
63 | 			crc.val = 0
64 | 			crc.update(str1)
65 | 			if crc.update(str2); crc.val != v.crc {
66 | 				t.Errorf("test %d, crc.update(crc1, str2): got 0x%08x, want 0x%08x", i, crc.val, v.crc)
67 | 			}
68 | 		}
69 | 	}
70 | }
71 | 
72 | func BenchmarkCRC(b *testing.B) {
73 | 	var c crc
74 | 	d := testutil.ResizeData([]byte("the quick brown fox jumped over the lazy dog"), 1<<16)
75 | 	for i := 1; i <= len(d); i <<= 4 {
76 | 		b.Run(strconv.Itoa(i), func(b *testing.B) {
77 | 			b.SetBytes(int64(i))
78 | 			for j := 0; j < b.N; j++ {
79 | 				c.update(d[:i])
80 | 			}
81 | 		})
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/bzip2/fuzz_off.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build !gofuzz
 6 | // +build !gofuzz
 7 | 
 8 | // This file exists to suppress fuzzing details from release builds.
 9 | 
10 | package bzip2
11 | 
12 | type fuzzReader struct{}
13 | 
14 | func (*fuzzReader) updateChecksum(int64, uint32) {}
15 | 


--------------------------------------------------------------------------------
/bzip2/fuzz_on.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build gofuzz
 6 | // +build gofuzz
 7 | 
 8 | // This file exists to export internal implementation details for fuzz testing.
 9 | 
10 | package bzip2
11 | 
12 | func ForwardBWT(buf []byte) (ptr int) {
13 | 	var bwt burrowsWheelerTransform
14 | 	return bwt.Encode(buf)
15 | }
16 | 
17 | func ReverseBWT(buf []byte, ptr int) {
18 | 	var bwt burrowsWheelerTransform
19 | 	bwt.Decode(buf, ptr)
20 | }
21 | 
22 | type fuzzReader struct {
23 | 	Checksums Checksums
24 | }
25 | 
26 | // updateChecksum updates Checksums.
27 | //
28 | // If a valid pos is provided, it appends the (pos, val) pair to the slice.
29 | // Otherwise, it will update the last record with the new value.
30 | func (fr *fuzzReader) updateChecksum(pos int64, val uint32) {
31 | 	if pos >= 0 {
32 | 		fr.Checksums = append(fr.Checksums, Checksum{pos, val})
33 | 	} else {
34 | 		fr.Checksums[len(fr.Checksums)-1].Value = val
35 | 	}
36 | }
37 | 
38 | type Checksum struct {
39 | 	Offset int64  // Bit offset of the checksum
40 | 	Value  uint32 // Checksum value
41 | }
42 | 
43 | type Checksums []Checksum
44 | 
45 | // Apply overwrites all checksum fields in d with the ones in cs.
46 | func (cs Checksums) Apply(d []byte) []byte {
47 | 	d = append([]byte(nil), d...)
48 | 	for _, c := range cs {
49 | 		setU32(d, c.Offset, c.Value)
50 | 	}
51 | 	return d
52 | }
53 | 
54 | func setU32(d []byte, pos int64, val uint32) {
55 | 	for i := uint(0); i < 32; i++ {
56 | 		bpos := uint64(pos) + uint64(i)
57 | 		d[bpos/8] &= ^byte(1 << (7 - bpos%8))
58 | 		d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8)
59 | 	}
60 | }
61 | 
62 | // Verify checks that all checksum fields in d matches those in cs.
63 | func (cs Checksums) Verify(d []byte) bool {
64 | 	for _, c := range cs {
65 | 		if getU32(d, c.Offset) != c.Value {
66 | 			return false
67 | 		}
68 | 	}
69 | 	return true
70 | }
71 | 
72 | func getU32(d []byte, pos int64) (val uint32) {
73 | 	for i := uint(0); i < 32; i++ {
74 | 		bpos := uint64(pos) + uint64(i)
75 | 		val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i)
76 | 	}
77 | 	return val
78 | }
79 | 


--------------------------------------------------------------------------------
/bzip2/internal/sais/common.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // Package sais implements a linear time suffix array algorithm.
 6 | package sais
 7 | 
 8 | //go:generate go run sais_gen.go byte sais_byte.go
 9 | //go:generate go run sais_gen.go int sais_int.go
10 | 
11 | // This package ports the C sais implementation by Yuta Mori. The ports are
12 | // located in sais_byte.go and sais_int.go, which are identical to each other
13 | // except for the types. Since Go does not support generics, we use generators to
14 | // create the two files.
15 | //
16 | // References:
17 | //	https://sites.google.com/site/yuta256/sais
18 | //	https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings
19 | //	https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction
20 | 
21 | // ComputeSA computes the suffix array of t and places the result in sa.
22 | // Both t and sa must be the same length.
23 | func ComputeSA(t []byte, sa []int) {
24 | 	if len(sa) != len(t) {
25 | 		panic("mismatching sizes")
26 | 	}
27 | 	computeSA_byte(t, sa, 0, len(t), 256)
28 | }
29 | 


--------------------------------------------------------------------------------
/bzip2/mtf_rle2.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import "github.com/dsnet/compress/internal/errors"
  8 | 
  9 | // moveToFront implements both the MTF and RLE stages of bzip2 at the same time.
 10 | // Any runs of zeros in the encoded output will be replaced by a sequence of
 11 | // RUNA and RUNB symbols are encode the length of the run.
 12 | //
 13 | // The RLE encoding used can actually be encoded to and decoded from using
 14 | // normal two's complement arithmetic. The methodology for doing so is below.
 15 | //
 16 | // Assuming the following:
 17 | //
 18 | //	num: The value being encoded by RLE encoding.
 19 | //	run: A sequence of RUNA and RUNB symbols represented as a binary integer,
 20 | //	where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN
 21 | //	symbols are at the least-significant bit positions.
 22 | //	cnt: The number of RUNA and RUNB symbols.
 23 | //
 24 | // Then the RLE encoding used by bzip2 has this mathematical property:
 25 | //
 26 | //	num+1 == (1<<cnt) | run
 27 | type moveToFront struct {
 28 | 	dictBuf [256]uint8
 29 | 	dictLen int
 30 | 
 31 | 	vals    []byte
 32 | 	syms    []uint16
 33 | 	blkSize int
 34 | }
 35 | 
 36 | func (mtf *moveToFront) Init(dict []uint8, blkSize int) {
 37 | 	if len(dict) > len(mtf.dictBuf) {
 38 | 		panicf(errors.Internal, "alphabet too large")
 39 | 	}
 40 | 	copy(mtf.dictBuf[:], dict)
 41 | 	mtf.dictLen = len(dict)
 42 | 	mtf.blkSize = blkSize
 43 | }
 44 | 
 45 | func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) {
 46 | 	dict := mtf.dictBuf[:mtf.dictLen]
 47 | 	syms = mtf.syms[:0]
 48 | 
 49 | 	if len(vals) > mtf.blkSize {
 50 | 		panicf(errors.Internal, "exceeded block size")
 51 | 	}
 52 | 
 53 | 	var lastNum uint32
 54 | 	for _, val := range vals {
 55 | 		// Normal move-to-front transform.
 56 | 		var idx uint8 // Reverse lookup idx in dict
 57 | 		for di, dv := range dict {
 58 | 			if dv == val {
 59 | 				idx = uint8(di)
 60 | 				break
 61 | 			}
 62 | 		}
 63 | 		copy(dict[1:], dict[:idx])
 64 | 		dict[0] = val
 65 | 
 66 | 		// Run-length encoding augmentation.
 67 | 		if idx == 0 {
 68 | 			lastNum++
 69 | 			continue
 70 | 		}
 71 | 		if lastNum > 0 {
 72 | 			for rc := lastNum + 1; rc != 1; rc >>= 1 {
 73 | 				syms = append(syms, uint16(rc&1))
 74 | 			}
 75 | 			lastNum = 0
 76 | 		}
 77 | 		syms = append(syms, uint16(idx)+1)
 78 | 	}
 79 | 	if lastNum > 0 {
 80 | 		for rc := lastNum + 1; rc != 1; rc >>= 1 {
 81 | 			syms = append(syms, uint16(rc&1))
 82 | 		}
 83 | 	}
 84 | 	mtf.syms = syms
 85 | 	return syms
 86 | }
 87 | 
 88 | func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) {
 89 | 	dict := mtf.dictBuf[:mtf.dictLen]
 90 | 	vals = mtf.vals[:0]
 91 | 
 92 | 	var lastCnt uint
 93 | 	var lastRun uint32
 94 | 	for _, sym := range syms {
 95 | 		// Run-length encoding augmentation.
 96 | 		if sym < 2 {
 97 | 			lastRun |= uint32(sym) << lastCnt
 98 | 			lastCnt++
 99 | 			continue
100 | 		}
101 | 		if lastCnt > 0 {
102 | 			cnt := int((1<<lastCnt)|lastRun) - 1
103 | 			if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
104 | 				panicf(errors.Corrupted, "run-length decoding exceeded block size")
105 | 			}
106 | 			for i := cnt; i > 0; i-- {
107 | 				vals = append(vals, dict[0])
108 | 			}
109 | 			lastCnt, lastRun = 0, 0
110 | 		}
111 | 
112 | 		// Normal move-to-front transform.
113 | 		val := dict[sym-1] // Forward lookup val in dict
114 | 		copy(dict[1:], dict[:sym-1])
115 | 		dict[0] = val
116 | 
117 | 		if len(vals) >= mtf.blkSize {
118 | 			panicf(errors.Corrupted, "run-length decoding exceeded block size")
119 | 		}
120 | 		vals = append(vals, val)
121 | 	}
122 | 	if lastCnt > 0 {
123 | 		cnt := int((1<<lastCnt)|lastRun) - 1
124 | 		if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
125 | 			panicf(errors.Corrupted, "run-length decoding exceeded block size")
126 | 		}
127 | 		for i := cnt; i > 0; i-- {
128 | 			vals = append(vals, dict[0])
129 | 		}
130 | 	}
131 | 	mtf.vals = vals
132 | 	return vals
133 | }
134 | 


--------------------------------------------------------------------------------
/bzip2/mtf_rle2_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import (
  8 | 	"reflect"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/dsnet/compress/internal/errors"
 12 | )
 13 | 
 14 | func TestMoveToFront(t *testing.T) {
 15 | 	getDict := func(buf []byte) []uint8 {
 16 | 		var dictMap [256]bool
 17 | 		for _, b := range buf {
 18 | 			dictMap[b] = true
 19 | 		}
 20 | 		var dictArr [256]uint8
 21 | 		dict := dictArr[:0]
 22 | 		for j, b := range dictMap {
 23 | 			if b {
 24 | 				dict = append(dict, uint8(j))
 25 | 			}
 26 | 		}
 27 | 		return dict
 28 | 	}
 29 | 
 30 | 	vectors := []struct {
 31 | 		size   int // If zero, default to 1MiB
 32 | 		input  []byte
 33 | 		output []uint16
 34 | 		fail   bool
 35 | 	}{{
 36 | 		input:  []byte{},
 37 | 		output: []uint16{},
 38 | 	}, {
 39 | 		input:  []byte{2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
 40 | 		output: []uint16{1, 1, 0},
 41 | 	}, {
 42 | 		input:  []byte{9, 8, 7, 6, 5, 4, 3, 2, 1},
 43 | 		output: []uint16{9, 9, 9, 9, 9, 9, 9, 9, 9},
 44 | 	}, {
 45 | 		input:  []byte{42, 47, 42, 47, 42, 47, 42, 47, 42, 47, 42, 47},
 46 | 		output: []uint16{0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
 47 | 	}, {
 48 | 		input:  []byte{0, 5, 2, 3, 4, 4, 3, 1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 2, 3, 3},
 49 | 		output: []uint16{0, 6, 4, 5, 6, 0, 2, 6, 4, 3, 0, 1, 4, 1, 5, 4, 4, 0},
 50 | 	}, {
 51 | 		input:  []byte{100, 111, 108, 104, 10, 114, 101, 108, 108, 119, 111, 32},
 52 | 		output: []uint16{3, 7, 7, 7, 5, 8, 8, 5, 0, 9, 7, 9},
 53 | 	}, {
 54 | 		input: []byte{
 55 | 			103, 33, 107, 121, 110, 120, 101, 100, 101, 114, 44, 100, 111, 10, 32,
 56 | 			108, 32, 105, 101, 108, 32, 104, 104, 112, 72, 118, 32, 111, 116, 84,
 57 | 			117, 32, 99, 32, 114, 101, 108, 117, 119, 108, 100, 119, 32, 114, 102,
 58 | 			109, 32, 101, 111, 98, 32, 113, 106, 111, 111, 32, 111, 122, 97,
 59 | 		},
 60 | 		output: []uint16{
 61 | 			13, 4, 17, 30, 21, 30, 16, 16, 2, 26, 12, 4, 24, 12, 13, 23, 2, 22, 9,
 62 | 			4, 4, 22, 0, 25, 18, 29, 5, 10, 28, 21, 29, 5, 25, 2, 17, 13, 13, 6, 30,
 63 | 			3, 17, 3, 7, 7, 27, 29, 4, 9, 13, 28, 4, 30, 30, 5, 0, 4, 2, 31, 31,
 64 | 		},
 65 | 	}, {
 66 | 		input: []byte{
 67 | 			74, 69, 205, 44, 38, 175, 207, 101, 59, 108, 42, 155, 208, 50, 38, 115,
 68 | 			190, 138, 163, 35, 13, 172, 160, 74, 68, 173, 99, 57, 213, 158, 248,
 69 | 			209, 176, 52, 135, 21, 26, 248, 186, 186, 219, 113, 172, 163, 13, 22,
 70 | 			100, 134, 4, 141, 53, 244, 99, 126, 214, 59, 53, 43, 146, 67, 131, 51,
 71 | 			212, 146, 245,
 72 | 		},
 73 | 		output: []uint16{20, 20, 44, 13, 11, 41, 45, 26, 22, 27, 17, 37, 46, 21,
 74 | 			10, 31, 46, 37, 42, 24, 21, 43, 43, 22, 33, 44, 35, 34, 49, 45, 54,
 75 | 			49, 48, 38, 46, 35, 37, 7, 49, 0, 52, 45, 19, 22, 21, 40, 45, 48, 42,
 76 | 			49, 46, 53, 24, 49, 53, 41, 6, 48, 52, 51, 52, 52, 53, 5, 54,
 77 | 		},
 78 | 	}, {
 79 | 		input: []byte{
 80 | 			153, 45, 45, 38, 135, 179, 26, 154, 165, 170, 170, 170, 170, 18, 109,
 81 | 			240, 174, 150, 87, 164, 30, 30, 30, 30, 30, 30, 30, 148, 190, 10, 60,
 82 | 			13, 13, 13, 13, 13, 6, 81, 200, 13, 225, 32, 17, 43, 22, 179, 13, 13,
 83 | 			17, 236, 236, 236, 236, 236, 236, 236, 121, 211, 2, 211, 185, 54, 16,
 84 | 			5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 50,
 85 | 			5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 40,
 86 | 		},
 87 | 		output: []uint16{
 88 | 			27, 17, 0, 15, 25, 33, 15, 29, 31, 32, 0, 0, 17, 28, 40, 34, 33, 31,
 89 | 			34, 25, 1, 1, 34, 36, 23, 33, 25, 1, 0, 25, 34, 37, 4, 39, 32, 31, 34,
 90 | 			33, 26, 7, 0, 5, 40, 1, 1, 38, 40, 34, 2, 40, 40, 38, 38, 0, 1, 1, 0,
 91 | 			40, 2, 0, 1, 1, 0, 40,
 92 | 		},
 93 | 	}, {
 94 | 		size:   10,
 95 | 		input:  []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3},
 96 | 		output: []uint16{0, 1, 2, 1, 3, 0},
 97 | 		fail:   false,
 98 | 	}, {
 99 | 		size:   10,
100 | 		input:  []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3},
101 | 		output: []uint16{0, 1, 2, 1, 3, 1},
102 | 		fail:   true,
103 | 	}, {
104 | 		size:   10,
105 | 		input:  []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3},
106 | 		output: []uint16{0, 1, 2, 1, 3, 2, 2},
107 | 		fail:   true,
108 | 	}, {
109 | 		size:   10,
110 | 		input:  []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3},
111 | 		output: []uint16{1, 1, 2, 1, 3, 0},
112 | 		fail:   true,
113 | 	}, {
114 | 		size:  9,
115 | 		input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3},
116 | 		fail:  true,
117 | 	}}
118 | 
119 | 	mtf := new(moveToFront)
120 | 	for i, v := range vectors {
121 | 		var err error
122 | 		var input []byte
123 | 		var output []uint16
124 | 		func() {
125 | 			defer errors.Recover(&err)
126 | 			if v.size == 0 {
127 | 				v.size = 1 << 20
128 | 			}
129 | 			dict := getDict(v.input)
130 | 			mtf.Init(dict, v.size)
131 | 			output = mtf.Encode(v.input)
132 | 			mtf.Init(dict, v.size)
133 | 			input = mtf.Decode(v.output)
134 | 		}()
135 | 
136 | 		fail := err != nil
137 | 		if fail && !v.fail {
138 | 			t.Errorf("test %d, unexpected error: %v", i, err)
139 | 		}
140 | 		if !fail && v.fail {
141 | 			t.Errorf("test %d, unexpected success", i)
142 | 		}
143 | 		if fail || v.fail {
144 | 			continue
145 | 		}
146 | 		if !reflect.DeepEqual(input, v.input) && !(len(input) == 0 && len(v.input) == 0) {
147 | 			t.Errorf("test %d, input mismatch:\ngot  %v\nwant %v", i, input, v.input)
148 | 		}
149 | 		if !reflect.DeepEqual(output, v.output) && !(len(output) == 0 && len(v.output) == 0) {
150 | 			t.Errorf("test %d, output mismatch:\ngot  %v\nwant %v", i, output, v.output)
151 | 		}
152 | 	}
153 | }
154 | 


--------------------------------------------------------------------------------
/bzip2/prefix_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package bzip2
 6 | 
 7 | import (
 8 | 	"reflect"
 9 | 	"testing"
10 | 
11 | 	"github.com/dsnet/compress/internal/prefix"
12 | )
13 | 
14 | func TestDegenerateCodes(t *testing.T) {
15 | 	vectors := []struct {
16 | 		input  prefix.PrefixCodes
17 | 		output prefix.PrefixCodes
18 | 	}{{
19 | 		input: []prefix.PrefixCode{
20 | 			{Sym: 0, Len: 1},
21 | 		},
22 | 		output: []prefix.PrefixCode{
23 | 			{Sym: 0, Len: 1, Val: 0},   // 0
24 | 			{Sym: 258, Len: 1, Val: 1}, // 1
25 | 		},
26 | 	}, {
27 | 		input: []prefix.PrefixCode{
28 | 			{Sym: 0, Len: 1},
29 | 			{Sym: 1, Len: 1},
30 | 			{Sym: 2, Len: 1},
31 | 		},
32 | 		output: []prefix.PrefixCode{
33 | 			{Sym: 0, Len: 1, Val: 0}, // 0
34 | 			{Sym: 1, Len: 1, Val: 1}, // 1
35 | 		},
36 | 	}, {
37 | 		input: []prefix.PrefixCode{
38 | 			{Sym: 0, Len: 3},
39 | 			{Sym: 1, Len: 4},
40 | 			{Sym: 2, Len: 3},
41 | 		},
42 | 		output: []prefix.PrefixCode{
43 | 			{Sym: 0, Len: 3, Val: 0},    //  000
44 | 			{Sym: 1, Len: 4, Val: 2},    // 0010
45 | 			{Sym: 2, Len: 3, Val: 4},    //  100
46 | 			{Sym: 258, Len: 4, Val: 10}, // 1010
47 | 			{Sym: 259, Len: 3, Val: 6},  //  110
48 | 			{Sym: 260, Len: 1, Val: 1},  //    1
49 | 		},
50 | 	}, {
51 | 		input: []prefix.PrefixCode{
52 | 			{Sym: 0, Len: 1},
53 | 			{Sym: 1, Len: 3},
54 | 			{Sym: 2, Len: 4},
55 | 			{Sym: 3, Len: 3},
56 | 			{Sym: 4, Len: 2},
57 | 		},
58 | 		output: []prefix.PrefixCode{
59 | 			{Sym: 0, Len: 1, Val: 0}, //   0
60 | 			{Sym: 1, Len: 3, Val: 3}, // 011
61 | 			{Sym: 3, Len: 3, Val: 7}, // 111
62 | 			{Sym: 4, Len: 2, Val: 1}, //  01
63 | 		},
64 | 	}}
65 | 
66 | 	for i, v := range vectors {
67 | 		input := append(prefix.PrefixCodes(nil), v.input...)
68 | 		output := handleDegenerateCodes(input)
69 | 
70 | 		if !reflect.DeepEqual(output, v.output) {
71 | 			t.Errorf("test %d, output mismatch:\ngot  %v\nwant %v", i, output, v.output)
72 | 		}
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/bzip2/rle1.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import "github.com/dsnet/compress/internal/errors"
  8 | 
  9 | // rleDone is a special "error" to indicate that the RLE stage is done.
 10 | var rleDone = errorf(errors.Unknown, "RLE1 stage is completed")
 11 | 
 12 | // runLengthEncoding implements the first RLE stage of bzip2. Every sequence
 13 | // of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a
 14 | // single byte representing the repeat length. Similar to the C bzip2
 15 | // implementation, the encoder will always terminate repeat sequences with a
 16 | // count (even if it is the end of the buffer), and it will also never produce
 17 | // run lengths of 256..259. The decoder can handle the latter case.
 18 | //
 19 | // For example, if the input was:
 20 | //
 21 | //	input:  "AAAAAAABBBBCCCD"
 22 | //
 23 | // Then the output will be:
 24 | //
 25 | //	output: "AAAA\x03BBBB\x00CCCD"
 26 | type runLengthEncoding struct {
 27 | 	buf     []byte
 28 | 	idx     int
 29 | 	lastVal byte
 30 | 	lastCnt int
 31 | }
 32 | 
 33 | func (rle *runLengthEncoding) Init(buf []byte) {
 34 | 	*rle = runLengthEncoding{buf: buf}
 35 | }
 36 | 
 37 | func (rle *runLengthEncoding) Write(buf []byte) (int, error) {
 38 | 	for i, b := range buf {
 39 | 		if rle.lastVal != b {
 40 | 			rle.lastCnt = 0
 41 | 		}
 42 | 		rle.lastCnt++
 43 | 		switch {
 44 | 		case rle.lastCnt < 4:
 45 | 			if rle.idx >= len(rle.buf) {
 46 | 				return i, rleDone
 47 | 			}
 48 | 			rle.buf[rle.idx] = b
 49 | 			rle.idx++
 50 | 		case rle.lastCnt == 4:
 51 | 			if rle.idx+1 >= len(rle.buf) {
 52 | 				return i, rleDone
 53 | 			}
 54 | 			rle.buf[rle.idx] = b
 55 | 			rle.idx++
 56 | 			rle.buf[rle.idx] = 0
 57 | 			rle.idx++
 58 | 		case rle.lastCnt < 256:
 59 | 			rle.buf[rle.idx-1]++
 60 | 		default:
 61 | 			if rle.idx >= len(rle.buf) {
 62 | 				return i, rleDone
 63 | 			}
 64 | 			rle.lastCnt = 1
 65 | 			rle.buf[rle.idx] = b
 66 | 			rle.idx++
 67 | 		}
 68 | 		rle.lastVal = b
 69 | 	}
 70 | 	return len(buf), nil
 71 | }
 72 | 
 73 | func (rle *runLengthEncoding) Read(buf []byte) (int, error) {
 74 | 	for i := range buf {
 75 | 		switch {
 76 | 		case rle.lastCnt == -4:
 77 | 			if rle.idx >= len(rle.buf) {
 78 | 				return i, errorf(errors.Corrupted, "missing terminating run-length repeater")
 79 | 			}
 80 | 			rle.lastCnt = int(rle.buf[rle.idx])
 81 | 			rle.idx++
 82 | 			if rle.lastCnt > 0 {
 83 | 				break // Break the switch
 84 | 			}
 85 | 			fallthrough // Count was zero, continue the work
 86 | 		case rle.lastCnt <= 0:
 87 | 			if rle.idx >= len(rle.buf) {
 88 | 				return i, rleDone
 89 | 			}
 90 | 			b := rle.buf[rle.idx]
 91 | 			rle.idx++
 92 | 			if b != rle.lastVal {
 93 | 				rle.lastCnt = 0
 94 | 				rle.lastVal = b
 95 | 			}
 96 | 		}
 97 | 		buf[i] = rle.lastVal
 98 | 		rle.lastCnt--
 99 | 	}
100 | 	return len(buf), nil
101 | }
102 | 
103 | func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] }
104 | 


--------------------------------------------------------------------------------
/bzip2/rle1_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package bzip2
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"io"
 10 | 	"strings"
 11 | 	"testing"
 12 | 
 13 | 	"github.com/dsnet/compress/internal/testutil"
 14 | )
 15 | 
 16 | func TestRunLengthEncoder(t *testing.T) {
 17 | 	vectors := []struct {
 18 | 		size   int
 19 | 		input  string
 20 | 		output string
 21 | 		done   bool
 22 | 	}{{
 23 | 		size:   0,
 24 | 		input:  "",
 25 | 		output: "",
 26 | 	}, {
 27 | 		size:   6,
 28 | 		input:  "abc",
 29 | 		output: "abc",
 30 | 	}, {
 31 | 		size:   6,
 32 | 		input:  "abcccc",
 33 | 		output: "abccc",
 34 | 		done:   true,
 35 | 	}, {
 36 | 		size:   7,
 37 | 		input:  "abcccc",
 38 | 		output: "abcccc\x00",
 39 | 	}, {
 40 | 		size:   14,
 41 | 		input:  "aaaabbbbcccc",
 42 | 		output: "aaaa\x00bbbb\x00ccc",
 43 | 		done:   true,
 44 | 	}, {
 45 | 		size:   15,
 46 | 		input:  "aaaabbbbcccc",
 47 | 		output: "aaaa\x00bbbb\x00cccc\x00",
 48 | 	}, {
 49 | 		size:   16,
 50 | 		input:  strings.Repeat("a", 4),
 51 | 		output: "aaaa\x00",
 52 | 	}, {
 53 | 		size:   16,
 54 | 		input:  strings.Repeat("a", 255),
 55 | 		output: "aaaa\xfb",
 56 | 	}, {
 57 | 		size:   16,
 58 | 		input:  strings.Repeat("a", 256),
 59 | 		output: "aaaa\xfba",
 60 | 	}, {
 61 | 		size:   16,
 62 | 		input:  strings.Repeat("a", 259),
 63 | 		output: "aaaa\xfbaaaa\x00",
 64 | 	}, {
 65 | 		size:   16,
 66 | 		input:  strings.Repeat("a", 500),
 67 | 		output: "aaaa\xfbaaaa\xf1",
 68 | 	}, {
 69 | 		size:   64,
 70 | 		input:  "aaabbbcccddddddeeefgghiiijkllmmmmmmmmnnoo",
 71 | 		output: "aaabbbcccdddd\x02eeefgghiiijkllmmmm\x04nnoo",
 72 | 	}}
 73 | 
 74 | 	buf := make([]byte, 3)
 75 | 	for i, v := range vectors {
 76 | 		rd := strings.NewReader(v.input)
 77 | 		rle := new(runLengthEncoding)
 78 | 		rle.Init(make([]byte, v.size))
 79 | 		_, err := io.CopyBuffer(rle, struct{ io.Reader }{rd}, buf)
 80 | 		output := rle.Bytes()
 81 | 
 82 | 		if got, want, ok := testutil.BytesCompare(output, []byte(v.output)); !ok {
 83 | 			t.Errorf("test %d, output mismatch:\ngot  %s\nwant %s", i, got, want)
 84 | 		}
 85 | 		if done := err == rleDone; done != v.done {
 86 | 			t.Errorf("test %d, done mismatch: got %v want %v", i, done, v.done)
 87 | 		}
 88 | 	}
 89 | }
 90 | 
 91 | func TestRunLengthDecoder(t *testing.T) {
 92 | 	vectors := []struct {
 93 | 		input  string
 94 | 		output string
 95 | 		fail   bool
 96 | 	}{{
 97 | 		input:  "",
 98 | 		output: "",
 99 | 	}, {
100 | 		input:  "abc",
101 | 		output: "abc",
102 | 	}, {
103 | 		input:  "aaaa",
104 | 		output: "aaaa",
105 | 		fail:   true,
106 | 	}, {
107 | 		input:  "baaaa\x00aaaa",
108 | 		output: "baaaaaaaa",
109 | 		fail:   true,
110 | 	}, {
111 | 		input:  "abcccc\x00",
112 | 		output: "abcccc",
113 | 	}, {
114 | 		input:  "aaaa\x00bbbb\x00ccc",
115 | 		output: "aaaabbbbccc",
116 | 	}, {
117 | 		input:  "aaaa\x00bbbb\x00cccc\x00",
118 | 		output: "aaaabbbbcccc",
119 | 	}, {
120 | 		input:  "aaaa\x00aaaa\x00aaaa\x00",
121 | 		output: "aaaaaaaaaaaa",
122 | 	}, {
123 | 		input:  "aaaa\xffaaaa\xffaaaa\xff",
124 | 		output: strings.Repeat("a", 259*3),
125 | 	}, {
126 | 		input:  "bbbaaaa\xffaaaa\xffaaaa\xff",
127 | 		output: "bbb" + strings.Repeat("a", 259*3),
128 | 	}, {
129 | 		input:  "aaaa\x00",
130 | 		output: strings.Repeat("a", 4),
131 | 	}, {
132 | 		input:  "aaaa\xfb",
133 | 		output: strings.Repeat("a", 255),
134 | 	}, {
135 | 		input:  "aaaa\xfba",
136 | 		output: strings.Repeat("a", 256),
137 | 	}, {
138 | 		input:  "aaaa\xfbaaaa\x00",
139 | 		output: strings.Repeat("a", 259),
140 | 	}, {
141 | 		input:  "aaaa\xfbaaaa\xf1",
142 | 		output: strings.Repeat("a", 500),
143 | 	}, {
144 | 		input:  "aaabbbcccdddd\x02eeefgghiiijkllmmmm\x04nnoo",
145 | 		output: "aaabbbcccddddddeeefgghiiijkllmmmmmmmmnnoo",
146 | 	}}
147 | 
148 | 	buf := make([]byte, 3)
149 | 	for i, v := range vectors {
150 | 		wr := new(bytes.Buffer)
151 | 		rle := new(runLengthEncoding)
152 | 		rle.Init([]byte(v.input))
153 | 		_, err := io.CopyBuffer(struct{ io.Writer }{wr}, rle, buf)
154 | 		output := wr.Bytes()
155 | 
156 | 		if got, want, ok := testutil.BytesCompare(output, []byte(v.output)); !ok {
157 | 			t.Errorf("test %d, output mismatch:\ngot  %s\nwant %s", i, got, want)
158 | 		}
159 | 		if fail := err != rleDone; fail != v.fail {
160 | 			t.Errorf("test %d, failure mismatch: got %t, want %t", i, fail, v.fail)
161 | 		}
162 | 	}
163 | }
164 | 


--------------------------------------------------------------------------------
/bzip2/testdata/gauntlet_test3.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/gauntlet_test3.bin


--------------------------------------------------------------------------------
/bzip2/testdata/gauntlet_test3.bwt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/gauntlet_test3.bwt


--------------------------------------------------------------------------------
/bzip2/testdata/silesia_ooffice.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_ooffice.bin


--------------------------------------------------------------------------------
/bzip2/testdata/silesia_ooffice.bwt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_ooffice.bwt


--------------------------------------------------------------------------------
/bzip2/testdata/silesia_xray.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_xray.bin


--------------------------------------------------------------------------------
/bzip2/testdata/silesia_xray.bwt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/silesia_xray.bwt


--------------------------------------------------------------------------------
/bzip2/testdata/testfiles_test3.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test3.bin


--------------------------------------------------------------------------------
/bzip2/testdata/testfiles_test3.bwt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test3.bwt


--------------------------------------------------------------------------------
/bzip2/testdata/testfiles_test4.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test4.bin


--------------------------------------------------------------------------------
/bzip2/testdata/testfiles_test4.bwt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/bzip2/testdata/testfiles_test4.bwt


--------------------------------------------------------------------------------
/bzip2/writer_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package bzip2
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"io"
10 | 	"io/ioutil"
11 | 	"testing"
12 | )
13 | 
14 | func BenchmarkEncode(b *testing.B) {
15 | 	runBenchmarks(b, func(b *testing.B, data []byte, lvl int) {
16 | 		b.StopTimer()
17 | 		b.ReportAllocs()
18 | 
19 | 		br := new(bytes.Reader)
20 | 		wr, _ := NewWriter(nil, &WriterConfig{Level: lvl})
21 | 
22 | 		b.SetBytes(int64(len(data)))
23 | 		b.StartTimer()
24 | 		for i := 0; i < b.N; i++ {
25 | 			br.Reset(data)
26 | 			wr.Reset(ioutil.Discard)
27 | 
28 | 			n, err := io.Copy(wr, br)
29 | 			if n != int64(len(data)) || err != nil {
30 | 				b.Fatalf("Copy() = (%d, %v), want (%d, nil)", n, err, len(data))
31 | 			}
32 | 			if err := wr.Close(); err != nil {
33 | 				b.Fatalf("Close() = %v, want nil", err)
34 | 			}
35 | 		}
36 | 	})
37 | }
38 | 


--------------------------------------------------------------------------------
/doc/bzip2-format.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2-format.pdf


--------------------------------------------------------------------------------
/doc/bzip2/bitmap-decode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bitmap-decode.png


--------------------------------------------------------------------------------
/doc/bzip2/bitmap.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bitmap.psd


--------------------------------------------------------------------------------
/doc/bzip2/bwt-decode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bwt-decode.png


--------------------------------------------------------------------------------
/doc/bzip2/bwt-encode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bwt-encode.png


--------------------------------------------------------------------------------
/doc/bzip2/bwt.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bwt.psd


--------------------------------------------------------------------------------
/doc/bzip2/bzip2-format.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/bzip2-format.docx


--------------------------------------------------------------------------------
/doc/bzip2/diagram.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/diagram.psd


--------------------------------------------------------------------------------
/doc/bzip2/hexdump-complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/hexdump-complex.png


--------------------------------------------------------------------------------
/doc/bzip2/hexdump-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/hexdump-simple.png


--------------------------------------------------------------------------------
/doc/bzip2/hexdump.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/hexdump.psd


--------------------------------------------------------------------------------
/doc/bzip2/mtf-decode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/mtf-decode.png


--------------------------------------------------------------------------------
/doc/bzip2/mtf.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/mtf.psd


--------------------------------------------------------------------------------
/doc/bzip2/stream-complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/stream-complex.png


--------------------------------------------------------------------------------
/doc/bzip2/stream-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/stream-simple.png


--------------------------------------------------------------------------------
/doc/bzip2/tree-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/bzip2/tree-example.png


--------------------------------------------------------------------------------
/doc/xflate-format.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate-format.pdf


--------------------------------------------------------------------------------
/doc/xflate/hexdump-complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/hexdump-complex.png


--------------------------------------------------------------------------------
/doc/xflate/hexdump-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/hexdump-simple.png


--------------------------------------------------------------------------------
/doc/xflate/hexdump.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/hexdump.psd


--------------------------------------------------------------------------------
/doc/xflate/stream-complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/stream-complex.png


--------------------------------------------------------------------------------
/doc/xflate/stream-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/stream-simple.png


--------------------------------------------------------------------------------
/doc/xflate/tree-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/tree-example.png


--------------------------------------------------------------------------------
/doc/xflate/tree-hclen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/tree-hclen.png


--------------------------------------------------------------------------------
/doc/xflate/xflate-format.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/doc/xflate/xflate-format.docx


--------------------------------------------------------------------------------
/flate/common.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // Package flate implements the DEFLATE compressed data format,
 6 | // described in RFC 1951.
 7 | package flate
 8 | 
 9 | import (
10 | 	"fmt"
11 | 
12 | 	"github.com/dsnet/compress/internal/errors"
13 | )
14 | 
15 | const (
16 | 	maxHistSize = 1 << 15
17 | 	endBlockSym = 256
18 | )
19 | 
20 | func errorf(c int, f string, a ...interface{}) error {
21 | 	return errors.Error{Code: c, Pkg: "flate", Msg: fmt.Sprintf(f, a...)}
22 | }
23 | 
24 | func panicf(c int, f string, a ...interface{}) {
25 | 	errors.Panic(errorf(c, f, a...))
26 | }
27 | 
28 | // errWrap converts a lower-level errors.Error to be one from this package.
29 | // The replaceCode passed in will be used to replace the code for any errors
30 | // with the errors.Invalid code.
31 | //
32 | // For the Reader, set this to errors.Corrupted.
33 | // For the Writer, set this to errors.Internal.
34 | func errWrap(err error, replaceCode int) error {
35 | 	if cerr, ok := err.(errors.Error); ok {
36 | 		if errors.IsInvalid(cerr) {
37 | 			cerr.Code = replaceCode
38 | 		}
39 | 		err = errorf(cerr.Code, "%s", cerr.Msg)
40 | 	}
41 | 	return err
42 | }
43 | 
44 | var errClosed = errorf(errors.Closed, "")
45 | 


--------------------------------------------------------------------------------
/flate/dict_decoder.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package flate
  6 | 
  7 | // The dictDecoder implements the LZ77 sliding dictionary that is commonly used
  8 | // in various compression formats. For performance reasons, this implementation
  9 | // performs little to no sanity checks about the arguments. As such, the
 10 | // invariants documented for each method call must be respected. Furthermore,
 11 | // to reduce the memory footprint decompressing short streams, the dictionary
 12 | // starts with a relatively small size and then lazily grows.
 13 | 
 14 | const (
 15 | 	initSize   = 4096 // Initial size allocated for sliding dictionary
 16 | 	growFactor = 4    // Rate the dictionary is grown to match expected size
 17 | )
 18 | 
 19 | type dictDecoder struct {
 20 | 	// Invariant: len(hist) <= size
 21 | 	size int    // Sliding window size
 22 | 	hist []byte // Sliding window history, dynamically grown to match size
 23 | 
 24 | 	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
 25 | 	wrPos int  // Current output position in buffer
 26 | 	rdPos int  // Have emitted hist[:rdPos] already
 27 | 	full  bool // Has a full window length been written yet?
 28 | }
 29 | 
 30 | func (dd *dictDecoder) Init(size int) {
 31 | 	*dd = dictDecoder{hist: dd.hist}
 32 | 
 33 | 	// Regardless of what size claims, start with a small dictionary to avoid
 34 | 	// denial-of-service attacks with large memory allocation.
 35 | 	dd.size = size
 36 | 	if dd.hist == nil {
 37 | 		dd.hist = make([]byte, initSize)
 38 | 	}
 39 | 	dd.hist = dd.hist[:cap(dd.hist)]
 40 | 	if len(dd.hist) > dd.size {
 41 | 		dd.hist = dd.hist[:dd.size]
 42 | 	}
 43 | }
 44 | 
 45 | // HistSize reports the total amount of historical data in the dictionary.
 46 | func (dd *dictDecoder) HistSize() int {
 47 | 	if dd.full {
 48 | 		return dd.size
 49 | 	}
 50 | 	return dd.wrPos
 51 | }
 52 | 
 53 | // AvailSize reports the available amount of output buffer space.
 54 | func (dd *dictDecoder) AvailSize() int {
 55 | 	return len(dd.hist) - dd.wrPos
 56 | }
 57 | 
 58 | // WriteSlice returns a slice of the available buffer to write data to.
 59 | //
 60 | // This invariant will be kept: len(s) <= AvailSize()
 61 | func (dd *dictDecoder) WriteSlice() []byte {
 62 | 	return dd.hist[dd.wrPos:]
 63 | }
 64 | 
 65 | // WriteMark advances the write pointer by cnt.
 66 | //
 67 | // This invariant must be kept: 0 <= cnt <= AvailSize()
 68 | func (dd *dictDecoder) WriteMark(cnt int) {
 69 | 	dd.wrPos += cnt
 70 | }
 71 | 
 72 | // WriteByte writes a single byte to the dictionary.
 73 | //
 74 | // This invariant must be kept: 0 < AvailSize()
 75 | func (dd *dictDecoder) WriteByte(c byte) {
 76 | 	dd.hist[dd.wrPos] = c
 77 | 	dd.wrPos++
 78 | }
 79 | 
 80 | // TryWriteCopy tries to copy a string at a given (distance, length) to the
 81 | // output. This specialized version is optimized for short distances.
 82 | //
 83 | // This method is designed to be inlined for performance reasons.
 84 | //
 85 | // This invariant must be kept: 0 < dist <= HistSize()
 86 | func (dd *dictDecoder) TryWriteCopy(dist, length int) int {
 87 | 	wrPos := dd.wrPos
 88 | 	wrEnd := wrPos + length
 89 | 	if wrPos < dist || wrEnd > len(dd.hist) {
 90 | 		return 0
 91 | 	}
 92 | 
 93 | 	// Copy overlapping section before destination.
 94 | 	wrBase := wrPos
 95 | 	rdPos := wrPos - dist
 96 | loop:
 97 | 	wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos])
 98 | 	if wrPos < wrEnd {
 99 | 		goto loop // Avoid for-loop so that this function can be inlined
100 | 	}
101 | 	dd.wrPos = wrPos
102 | 	return wrPos - wrBase
103 | }
104 | 
105 | // WriteCopy copies a string at a given (distance, length) to the output.
106 | // This returns the number of bytes copied and may be less than the requested
107 | // length if the available space in the output buffer is too small.
108 | //
109 | // This invariant must be kept: 0 < dist <= HistSize()
110 | func (dd *dictDecoder) WriteCopy(dist, length int) int {
111 | 	wrBase := dd.wrPos
112 | 	wrPos := wrBase
113 | 	rdPos := wrPos - dist
114 | 	wrEnd := wrPos + length
115 | 	if wrEnd > len(dd.hist) {
116 | 		wrEnd = len(dd.hist)
117 | 	}
118 | 
119 | 	// Copy non-overlapping section after destination.
120 | 	if rdPos < 0 {
121 | 		rdPos += len(dd.hist)
122 | 		wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:])
123 | 		rdPos = 0
124 | 	}
125 | 
126 | 	// Copy overlapping section before destination.
127 | 	for wrPos < wrEnd {
128 | 		wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos])
129 | 	}
130 | 	dd.wrPos = wrPos
131 | 	return wrPos - wrBase
132 | }
133 | 
134 | // ReadFlush returns a slice of the historical buffer that is ready to be
135 | // emitted to the user. A call to ReadFlush is only valid after all of the data
136 | // from a previous call to ReadFlush has been consumed.
137 | func (dd *dictDecoder) ReadFlush() []byte {
138 | 	toRead := dd.hist[dd.rdPos:dd.wrPos]
139 | 	dd.rdPos = dd.wrPos
140 | 	if dd.wrPos == len(dd.hist) {
141 | 		if len(dd.hist) == dd.size {
142 | 			dd.wrPos, dd.rdPos = 0, 0
143 | 			dd.full = true
144 | 		} else {
145 | 			// Allocate a larger history buffer.
146 | 			size := cap(dd.hist) * growFactor
147 | 			if size > dd.size {
148 | 				size = dd.size
149 | 			}
150 | 			hist := make([]byte, size)
151 | 			copy(hist, dd.hist)
152 | 			dd.hist = hist
153 | 		}
154 | 	}
155 | 	return toRead
156 | }
157 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/dsnet/compress
 2 | 
 3 | go 1.9
 4 | 
 5 | require (
 6 | 	github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780
 7 | 	github.com/google/go-cmp v0.5.5 // indirect
 8 | 	github.com/klauspost/compress v1.4.1
 9 | 	github.com/klauspost/cpuid v1.2.0 // indirect
10 | 	github.com/ulikunitz/xz v0.5.8
11 | )
12 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780 h1:tFh1tRc4CA31yP6qDcu+Trax5wW5GuMxvkIba07qVLY=
 2 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
 3 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
 4 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 5 | github.com/klauspost/compress v1.4.1 h1:8VMb5+0wMgdBykOV96DwNwKFQ+WTI4pzYURP99CcB9E=
 6 | github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 7 | github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
 8 | github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 9 | github.com/ulikunitz/xz v0.5.8 h1:ERv8V6GKqVi23rgu5cj9pVfVzJbOqAY2Ntl88O6c2nQ=
10 | github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
11 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
12 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
13 | 


--------------------------------------------------------------------------------
/internal/cgo/README.md:
--------------------------------------------------------------------------------
1 | **Note:** The cgo directory contains a collection of cgo wrappers over the
2 | canonical C implementation for each compression format. These cgo wrappers are
3 | only used by the fuzzer and bench tools to test for correctness and performance
4 | of the Go implementations relative to the C implementations.
5 | There are no unit tests for each wrapper since they are thoroughly tested by
6 | the aforementioned tools.
7 | 


--------------------------------------------------------------------------------
/internal/cgo/flate/flate.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | //go:build cgo
  6 | // +build cgo
  7 | 
  8 | // Package flate implements the DEFLATE compressed data format,
  9 | // described in RFC 1951, using C wrappers.
 10 | package flate
 11 | 
 12 | /*
 13 | #cgo LDFLAGS: -lz
 14 | 
 15 | #include <stdlib.h>
 16 | #include "zlib.h"
 17 | 
 18 | z_streamp zfDecCreate() {
 19 | 	z_streamp state = calloc(1, sizeof(z_stream));
 20 | 	inflateInit2(state, -MAX_WBITS);
 21 | 	return state;
 22 | }
 23 | 
 24 | int zfDecStream(
 25 | 	z_streamp state,
 26 | 	uInt* avail_in, Bytef* next_in,
 27 | 	uInt* avail_out, Bytef* next_out
 28 | ) {
 29 | 	state->avail_in = *avail_in;
 30 | 	state->avail_out = *avail_out;
 31 | 	state->next_in = next_in;
 32 | 	state->next_out = next_out;
 33 | 	int ret = inflate(state, Z_NO_FLUSH);
 34 | 	*avail_in = state->avail_in;
 35 | 	*avail_out = state->avail_out;
 36 | 	state->next_in = NULL;
 37 | 	state->next_out = NULL;
 38 | 	return ret;
 39 | }
 40 | 
 41 | void zfDecDestroy(z_streamp state) {
 42 | 	inflateEnd(state);
 43 | 	free(state);
 44 | }
 45 | 
 46 | z_streamp zfEncCreate(int level) {
 47 | 	z_streamp state = calloc(1, sizeof(z_stream));
 48 | 	deflateInit2(state, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 49 | 	return state;
 50 | }
 51 | 
 52 | int zfEncStream(
 53 | 	z_streamp state, int flush,
 54 | 	uInt* avail_in, Bytef* next_in,
 55 | 	uInt* avail_out, Bytef* next_out
 56 | ) {
 57 | 	state->avail_in = *avail_in;
 58 | 	state->avail_out = *avail_out;
 59 | 	state->next_in = next_in;
 60 | 	state->next_out = next_out;
 61 | 	int ret = deflate(state, flush);
 62 | 	*avail_in = state->avail_in;
 63 | 	*avail_out = state->avail_out;
 64 | 	state->next_in = NULL;
 65 | 	state->next_out = NULL;
 66 | 	return ret;
 67 | }
 68 | 
 69 | void zfEncDestroy(z_streamp state) {
 70 | 	deflateEnd(state);
 71 | 	free(state);
 72 | }
 73 | */
 74 | import "C"
 75 | 
 76 | import (
 77 | 	"errors"
 78 | 	"io"
 79 | 	"unsafe"
 80 | )
 81 | 
 82 | type reader struct {
 83 | 	r     io.Reader
 84 | 	err   error
 85 | 	state C.z_streamp
 86 | 	buf   []byte
 87 | 	arr   [1 << 14]byte
 88 | }
 89 | 
 90 | func NewReader(r io.Reader) io.ReadCloser {
 91 | 	zr := &reader{r: r, state: C.zfDecCreate()}
 92 | 	if zr.state == nil {
 93 | 		panic("flate: could not allocate decoder state")
 94 | 	}
 95 | 	return zr
 96 | }
 97 | 
 98 | func (zr *reader) Read(buf []byte) (int, error) {
 99 | 	if zr.state == nil {
100 | 		return 0, io.ErrClosedPipe
101 | 	}
102 | 
103 | 	var n int
104 | 	for zr.err == nil && (len(buf) > 0 && n == 0) {
105 | 		availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf)
106 | 		ret := C.zfDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut)
107 | 		n += len(buf) - int(availOut)
108 | 		buf = buf[len(buf)-int(availOut):]
109 | 		zr.buf = zr.buf[len(zr.buf)-int(availIn):]
110 | 
111 | 		switch ret {
112 | 		case C.Z_OK:
113 | 			return n, nil
114 | 		case C.Z_BUF_ERROR:
115 | 			if len(zr.buf) == 0 {
116 | 				n1, err := zr.r.Read(zr.arr[:])
117 | 				if n1 > 0 {
118 | 					zr.buf = zr.arr[:n1]
119 | 				} else if err != nil {
120 | 					if err == io.EOF {
121 | 						err = io.ErrUnexpectedEOF
122 | 					}
123 | 					zr.err = err
124 | 				}
125 | 			}
126 | 		case C.Z_STREAM_END:
127 | 			return n, io.EOF
128 | 		default:
129 | 			zr.err = errors.New("flate: corrupted input")
130 | 		}
131 | 	}
132 | 	return n, zr.err
133 | }
134 | 
135 | func (zr *reader) Close() error {
136 | 	if zr.state != nil {
137 | 		defer func() {
138 | 			C.zfDecDestroy(zr.state)
139 | 			zr.state = nil
140 | 		}()
141 | 	}
142 | 	return zr.err
143 | }
144 | 
145 | type writer struct {
146 | 	w     io.Writer
147 | 	err   error
148 | 	state C.z_streamp
149 | 	buf   []byte
150 | 	arr   [1 << 14]byte
151 | }
152 | 
153 | func NewWriter(w io.Writer, level int) io.WriteCloser {
154 | 	if level < C.Z_NO_COMPRESSION || level > C.Z_BEST_COMPRESSION {
155 | 		panic("flate: invalid compression level")
156 | 	}
157 | 
158 | 	zw := &writer{w: w, state: C.zfEncCreate(C.int(level))}
159 | 	if zw.state == nil {
160 | 		panic("flate: could not allocate encoder state")
161 | 	}
162 | 	return zw
163 | }
164 | 
165 | func (zw *writer) Write(buf []byte) (int, error) {
166 | 	return zw.write(buf, C.Z_NO_FLUSH)
167 | }
168 | 
169 | func (zw *writer) write(buf []byte, op C.int) (int, error) {
170 | 	if zw.state == nil {
171 | 		return 0, io.ErrClosedPipe
172 | 	}
173 | 
174 | 	var n int
175 | 	flush := op != C.Z_NO_FLUSH
176 | 	for zw.err == nil && (len(buf) > 0 || flush) {
177 | 		availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:])
178 | 		ret := C.zfEncStream(zw.state, op, &availIn, ptrIn, &availOut, ptrOut)
179 | 		n += len(buf) - int(availIn)
180 | 		buf = buf[len(buf)-int(availIn):]
181 | 		zw.buf = zw.arr[:len(zw.arr)-int(availOut)]
182 | 
183 | 		if len(zw.buf) > 0 {
184 | 			if _, err := zw.w.Write(zw.buf); err != nil {
185 | 				zw.err = err
186 | 			}
187 | 		}
188 | 		switch ret {
189 | 		case C.Z_OK, C.Z_BUF_ERROR:
190 | 			continue // Do nothing
191 | 		case C.Z_STREAM_END:
192 | 			return n, zw.err
193 | 		default:
194 | 			zw.err = errors.New("flate: compression error")
195 | 		}
196 | 	}
197 | 	return n, zw.err
198 | }
199 | 
200 | func (zw *writer) Close() error {
201 | 	if zw.state != nil {
202 | 		defer func() {
203 | 			C.zfEncDestroy(zw.state)
204 | 			zw.state = nil
205 | 		}()
206 | 		zw.write(nil, C.Z_FINISH)
207 | 	}
208 | 	return zw.err
209 | }
210 | 
211 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.uInt, ptrIn, ptrOut *C.Bytef) {
212 | 	sizeIn = C.uInt(len(in))
213 | 	sizeOut = C.uInt(len(out))
214 | 	if len(in) > 0 {
215 | 		ptrIn = (*C.Bytef)(unsafe.Pointer(&in[0]))
216 | 	}
217 | 	if len(out) > 0 {
218 | 		ptrOut = (*C.Bytef)(unsafe.Pointer(&out[0]))
219 | 	}
220 | 	return
221 | }
222 | 


--------------------------------------------------------------------------------
/internal/cgo/lzma/lzma.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | //go:build cgo
  6 | // +build cgo
  7 | 
  8 | // Package lzma implements the LZMA2 compressed data format using C wrappers.
  9 | package lzma
 10 | 
 11 | /*
 12 | #cgo LDFLAGS: -llzma
 13 | 
 14 | #include <assert.h>
 15 | #include <stdlib.h>
 16 | #include "lzma.h"
 17 | 
 18 | // zlState is a tuple of C allocated data structures.
 19 | //
 20 | // The liblzma documentation is not clear about whether the filters struct must
 21 | // stay live past calls to lzma_raw_encoder and lzma_raw_decoder.
 22 | // To be on the safe side, we allocate them and keep them around until the end.
 23 | typedef struct {
 24 | 	lzma_stream stream;
 25 | 	lzma_filter filters[2];
 26 | 	lzma_options_lzma options;
 27 | } zlState;
 28 | 
 29 | zlState* zlDecCreate() {
 30 | 	zlState* state = calloc(1, sizeof(zlState));
 31 | 	state->filters[0].id = LZMA_FILTER_LZMA2;
 32 | 	state->filters[0].options = &state->options;
 33 | 	state->filters[1].id = LZMA_VLI_UNKNOWN;
 34 | 	state->options.dict_size = LZMA_DICT_SIZE_DEFAULT;
 35 | 
 36 | 	assert(lzma_raw_decoder(&state->stream, state->filters) == LZMA_OK);
 37 | 	return state;
 38 | }
 39 | 
 40 | zlState* zlEncCreate(int level) {
 41 | 	zlState* state = calloc(1, sizeof(zlState));
 42 | 	state->filters[0].id = LZMA_FILTER_LZMA2;
 43 | 	state->filters[0].options = &state->options;
 44 | 	state->filters[1].id = LZMA_VLI_UNKNOWN;
 45 | 
 46 | 	assert(!lzma_lzma_preset(&state->options, level));
 47 | 	assert(lzma_raw_encoder(&state->stream, state->filters) == LZMA_OK);
 48 | 	return state;
 49 | }
 50 | 
 51 | lzma_ret zlStream(
 52 | 	lzma_stream* strm, lzma_action action,
 53 | 	size_t* avail_in, uint8_t* next_in,
 54 | 	size_t* avail_out, uint8_t* next_out
 55 | ) {
 56 | 	strm->avail_in = *avail_in;
 57 | 	strm->avail_out = *avail_out;
 58 | 	strm->next_in = next_in;
 59 | 	strm->next_out = next_out;
 60 | 	lzma_ret ret = lzma_code(strm, action);
 61 | 	*avail_in = strm->avail_in;
 62 | 	*avail_out = strm->avail_out;
 63 | 	strm->next_in = NULL;
 64 | 	strm->next_out = NULL;
 65 | 	return ret;
 66 | }
 67 | 
 68 | void zlDestroy(zlState* state) {
 69 | 	lzma_end(&state->stream);
 70 | 	free(state);
 71 | }
 72 | */
 73 | import "C"
 74 | 
 75 | import (
 76 | 	"errors"
 77 | 	"io"
 78 | 	"unsafe"
 79 | )
 80 | 
 81 | type reader struct {
 82 | 	r     io.Reader
 83 | 	err   error
 84 | 	state *C.zlState
 85 | 	buf   []byte
 86 | 	arr   [1 << 14]byte
 87 | }
 88 | 
 89 | func NewReader(r io.Reader) io.ReadCloser {
 90 | 	zr := &reader{r: r, state: C.zlDecCreate()}
 91 | 	if zr.state == nil {
 92 | 		panic("lzma: could not allocate decoder state")
 93 | 	}
 94 | 	return zr
 95 | }
 96 | 
 97 | func (zr *reader) Read(buf []byte) (int, error) {
 98 | 	if zr.state == nil {
 99 | 		return 0, io.ErrClosedPipe
100 | 	}
101 | 
102 | 	var n int
103 | 	for zr.err == nil && (len(buf) > 0 && n == 0) {
104 | 		availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf)
105 | 		ret := C.zlStream(&zr.state.stream, 0, &availIn, ptrIn, &availOut, ptrOut)
106 | 		n += len(buf) - int(availOut)
107 | 		buf = buf[len(buf)-int(availOut):]
108 | 		zr.buf = zr.buf[len(zr.buf)-int(availIn):]
109 | 
110 | 		switch ret {
111 | 		case C.LZMA_OK:
112 | 			return n, nil
113 | 		case C.LZMA_BUF_ERROR:
114 | 			if len(zr.buf) == 0 {
115 | 				n1, err := zr.r.Read(zr.arr[:])
116 | 				if n1 > 0 {
117 | 					zr.buf = zr.arr[:n1]
118 | 				} else if err != nil {
119 | 					if err == io.EOF {
120 | 						err = io.ErrUnexpectedEOF
121 | 					}
122 | 					zr.err = err
123 | 				}
124 | 			}
125 | 		case C.LZMA_STREAM_END:
126 | 			return n, io.EOF
127 | 		default:
128 | 			zr.err = errors.New("lzma: corrupted input")
129 | 		}
130 | 	}
131 | 	return n, zr.err
132 | }
133 | 
134 | func (zr *reader) Close() error {
135 | 	if zr.state != nil {
136 | 		defer func() {
137 | 			C.zlDestroy(zr.state)
138 | 			zr.state = nil
139 | 		}()
140 | 	}
141 | 	return zr.err
142 | }
143 | 
144 | type writer struct {
145 | 	w     io.Writer
146 | 	err   error
147 | 	state *C.zlState
148 | 	buf   []byte
149 | 	arr   [1 << 14]byte
150 | }
151 | 
152 | func NewWriter(w io.Writer, level int) io.WriteCloser {
153 | 	if level < 0 || level > 9 {
154 | 		panic("lzma: invalid compression level")
155 | 	}
156 | 
157 | 	zw := &writer{w: w, state: C.zlEncCreate(C.int(level))}
158 | 	if zw.state == nil {
159 | 		panic("lzma: could not allocate encoder state")
160 | 	}
161 | 	return zw
162 | }
163 | 
164 | func (zw *writer) Write(buf []byte) (int, error) {
165 | 	return zw.write(buf, C.LZMA_RUN)
166 | }
167 | 
168 | func (zw *writer) write(buf []byte, op C.lzma_action) (int, error) {
169 | 	if zw.state == nil {
170 | 		return 0, io.ErrClosedPipe
171 | 	}
172 | 
173 | 	var n int
174 | 	flush := op != C.LZMA_RUN
175 | 	for zw.err == nil && (len(buf) > 0 || flush) {
176 | 		availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:])
177 | 		ret := C.zlStream(&zw.state.stream, op, &availIn, ptrIn, &availOut, ptrOut)
178 | 		n += len(buf) - int(availIn)
179 | 		buf = buf[len(buf)-int(availIn):]
180 | 		zw.buf = zw.arr[:len(zw.arr)-int(availOut)]
181 | 
182 | 		if len(zw.buf) > 0 {
183 | 			if _, err := zw.w.Write(zw.buf); err != nil {
184 | 				zw.err = err
185 | 			}
186 | 		}
187 | 		switch ret {
188 | 		case C.LZMA_OK, C.LZMA_BUF_ERROR:
189 | 			continue // Do nothing
190 | 		case C.LZMA_STREAM_END:
191 | 			return n, zw.err
192 | 		default:
193 | 			zw.err = errors.New("lzma: compression error")
194 | 		}
195 | 	}
196 | 	return n, zw.err
197 | }
198 | 
199 | func (zw *writer) Close() error {
200 | 	if zw.state != nil {
201 | 		defer func() {
202 | 			C.zlDestroy(zw.state)
203 | 			zw.state = nil
204 | 		}()
205 | 		zw.write(nil, C.LZMA_FINISH)
206 | 	}
207 | 	return zw.err
208 | }
209 | 
210 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.size_t, ptrIn, ptrOut *C.uint8_t) {
211 | 	sizeIn = C.size_t(len(in))
212 | 	sizeOut = C.size_t(len(out))
213 | 	if len(in) > 0 {
214 | 		ptrIn = (*C.uint8_t)(unsafe.Pointer(&in[0]))
215 | 	}
216 | 	if len(out) > 0 {
217 | 		ptrOut = (*C.uint8_t)(unsafe.Pointer(&out[0]))
218 | 	}
219 | 	return
220 | }
221 | 


--------------------------------------------------------------------------------
/internal/cgo/zstd/zstd.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | //go:build cgo
  6 | // +build cgo
  7 | 
  8 | // Package zstd implements the Zstandard compressed data format using C wrappers.
  9 | package zstd
 10 | 
 11 | /*
 12 | // This relies upon the shared library built from github.com/facebook/zstd.
 13 | //
 14 | // The steps to build and install the shared library is as follows:
 15 | //	curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv
 16 | //	cd zstd-1.3.2
 17 | //	sudo make install
 18 | 
 19 | #cgo LDFLAGS: -lzstd
 20 | 
 21 | #include <stdlib.h>
 22 | #include <stdint.h>
 23 | #include "zstd.h"
 24 | 
 25 | ZSTD_DStream* zsDecCreate() {
 26 | 	ZSTD_DStream* state = ZSTD_createDStream();
 27 | 	ZSTD_initDStream(state);
 28 | 	return state;
 29 | }
 30 | 
 31 | size_t zsDecStream(
 32 | 	ZSTD_DStream* state,
 33 | 	size_t* avail_in, uint8_t* next_in,
 34 | 	size_t* avail_out, uint8_t* next_out
 35 | ) {
 36 | 	ZSTD_inBuffer in = {next_in, *avail_in, 0};
 37 | 	ZSTD_outBuffer out = {next_out, *avail_out, 0};
 38 | 	size_t ret = ZSTD_decompressStream(state, &out, &in);
 39 | 	*avail_in = in.size - in.pos;
 40 | 	*avail_out = out.size - out.pos;
 41 | 	in.src = NULL;
 42 | 	out.dst = NULL;
 43 | 	return ret;
 44 | }
 45 | 
 46 | void zsDecDestroy(ZSTD_DStream* state) {
 47 | 	ZSTD_freeDStream(state);
 48 | }
 49 | 
 50 | ZSTD_CStream* zsEncCreate(int level) {
 51 | 	ZSTD_CStream* state = ZSTD_createCStream();
 52 | 	ZSTD_initCStream(state, level);
 53 | 	return state;
 54 | }
 55 | 
 56 | size_t zsEncStream(
 57 | 	ZSTD_CStream* state, int finish,
 58 | 	size_t* avail_in, uint8_t* next_in,
 59 | 	size_t* avail_out, uint8_t* next_out
 60 | ) {
 61 | 	ZSTD_inBuffer in = {next_in, *avail_in, 0};
 62 | 	ZSTD_outBuffer out = {next_out, *avail_out, 0};
 63 | 	size_t ret = finish ?
 64 | 		ZSTD_endStream(state, &out) : ZSTD_compressStream(state, &out, &in);
 65 | 	*avail_in = in.size - in.pos;
 66 | 	*avail_out = out.size - out.pos;
 67 | 	in.src = NULL;
 68 | 	out.dst = NULL;
 69 | 	return ret;
 70 | }
 71 | 
 72 | void zsEncDestroy(ZSTD_CStream* state) {
 73 | 	ZSTD_freeCStream(state);
 74 | }
 75 | */
 76 | import "C"
 77 | 
 78 | import (
 79 | 	"errors"
 80 | 	"io"
 81 | 	"unsafe"
 82 | )
 83 | 
 84 | type reader struct {
 85 | 	r     io.Reader
 86 | 	err   error
 87 | 	state *C.ZSTD_DStream
 88 | 	buf   []byte
 89 | 	arr   [1 << 14]byte
 90 | }
 91 | 
 92 | func NewReader(r io.Reader) io.ReadCloser {
 93 | 	zr := &reader{r: r, state: C.zsDecCreate()}
 94 | 	if zr.state == nil {
 95 | 		panic("zstd: could not allocate decoder state")
 96 | 	}
 97 | 	return zr
 98 | }
 99 | 
100 | func (zr *reader) Read(buf []byte) (int, error) {
101 | 	if zr.state == nil {
102 | 		return 0, io.ErrClosedPipe
103 | 	}
104 | 
105 | 	var n int
106 | 	for zr.err == nil && (len(buf) > 0 && n == 0) {
107 | 		availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf)
108 | 		ret := C.zsDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut)
109 | 		n += len(buf) - int(availOut)
110 | 		buf = buf[len(buf)-int(availOut):]
111 | 		zr.buf = zr.buf[len(zr.buf)-int(availIn):]
112 | 
113 | 		switch {
114 | 		case C.ZSTD_isError(ret) > 0:
115 | 			zr.err = errors.New("zstd: corrupted input")
116 | 		case ret == 0:
117 | 			return n, io.EOF
118 | 		case n > 0:
119 | 			return n, nil
120 | 		case len(zr.buf) == 0 && n == 0:
121 | 			n1, err := zr.r.Read(zr.arr[:])
122 | 			if n1 > 0 {
123 | 				zr.buf = zr.arr[:n1]
124 | 			} else if err != nil {
125 | 				if err == io.EOF {
126 | 					err = io.ErrUnexpectedEOF
127 | 				}
128 | 				zr.err = err
129 | 			}
130 | 		}
131 | 	}
132 | 	return n, zr.err
133 | }
134 | 
135 | func (zr *reader) Close() error {
136 | 	if zr.state != nil {
137 | 		defer func() {
138 | 			C.zsDecDestroy(zr.state)
139 | 			zr.state = nil
140 | 		}()
141 | 	}
142 | 	return zr.err
143 | }
144 | 
145 | type writer struct {
146 | 	w     io.Writer
147 | 	err   error
148 | 	state *C.ZSTD_CStream
149 | 	buf   []byte
150 | 	arr   [1 << 14]byte
151 | }
152 | 
153 | func NewWriter(w io.Writer, level int) io.WriteCloser {
154 | 	if level < 1 || level > 22 {
155 | 		panic("zstd: invalid compression level")
156 | 	}
157 | 
158 | 	zw := &writer{w: w, state: C.zsEncCreate(C.int(level))}
159 | 	if zw.state == nil {
160 | 		panic("zstd: could not allocate encoder state")
161 | 	}
162 | 	return zw
163 | }
164 | 
165 | func (zw *writer) Write(buf []byte) (int, error) {
166 | 	return zw.write(buf, 0)
167 | }
168 | 
169 | func (zw *writer) write(buf []byte, finish C.int) (int, error) {
170 | 	if zw.state == nil {
171 | 		return 0, io.ErrClosedPipe
172 | 	}
173 | 
174 | 	var n int
175 | 	for zw.err == nil && (len(buf) > 0 || finish > 0) {
176 | 		availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:])
177 | 		ret := C.zsEncStream(zw.state, finish, &availIn, ptrIn, &availOut, ptrOut)
178 | 		n += len(buf) - int(availIn)
179 | 		buf = buf[len(buf)-int(availIn):]
180 | 		zw.buf = zw.arr[:len(zw.arr)-int(availOut)]
181 | 
182 | 		if len(zw.buf) > 0 {
183 | 			if _, err := zw.w.Write(zw.buf); err != nil {
184 | 				zw.err = err
185 | 			}
186 | 		}
187 | 		switch {
188 | 		case C.ZSTD_isError(ret) > 0:
189 | 			zw.err = errors.New("zstd: compression error")
190 | 		case len(buf) == 0 && len(zw.buf) == 0:
191 | 			return n, zw.err
192 | 		case ret == 0 && finish > 0:
193 | 			return n, zw.err
194 | 		}
195 | 	}
196 | 	return n, zw.err
197 | }
198 | 
199 | func (zw *writer) Close() error {
200 | 	if zw.state != nil {
201 | 		defer func() {
202 | 			C.zsEncDestroy(zw.state)
203 | 			zw.state = nil
204 | 		}()
205 | 		zw.write(nil, 1)
206 | 	}
207 | 	return zw.err
208 | }
209 | 
210 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.size_t, ptrIn, ptrOut *C.uint8_t) {
211 | 	sizeIn = C.size_t(len(in))
212 | 	sizeOut = C.size_t(len(out))
213 | 	if len(in) > 0 {
214 | 		ptrIn = (*C.uint8_t)(unsafe.Pointer(&in[0]))
215 | 	}
216 | 	if len(out) > 0 {
217 | 		ptrOut = (*C.uint8_t)(unsafe.Pointer(&out[0]))
218 | 	}
219 | 	return
220 | }
221 | 


--------------------------------------------------------------------------------
/internal/common.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // Package internal is a collection of common compression algorithms.
  6 | //
  7 | // For performance reasons, these packages lack strong error checking and
  8 | // require that the caller to ensure that strict invariants are kept.
  9 | package internal
 10 | 
 11 | var (
 12 | 	// IdentityLUT returns the input key itself.
 13 | 	IdentityLUT = func() (lut [256]byte) {
 14 | 		for i := range lut {
 15 | 			lut[i] = uint8(i)
 16 | 		}
 17 | 		return lut
 18 | 	}()
 19 | 
 20 | 	// ReverseLUT returns the input key with its bits reversed.
 21 | 	ReverseLUT = func() (lut [256]byte) {
 22 | 		for i := range lut {
 23 | 			b := uint8(i)
 24 | 			b = (b&0xaa)>>1 | (b&0x55)<<1
 25 | 			b = (b&0xcc)>>2 | (b&0x33)<<2
 26 | 			b = (b&0xf0)>>4 | (b&0x0f)<<4
 27 | 			lut[i] = b
 28 | 		}
 29 | 		return lut
 30 | 	}()
 31 | )
 32 | 
 33 | // ReverseUint32 reverses all bits of v.
 34 | func ReverseUint32(v uint32) (x uint32) {
 35 | 	x |= uint32(ReverseLUT[byte(v>>0)]) << 24
 36 | 	x |= uint32(ReverseLUT[byte(v>>8)]) << 16
 37 | 	x |= uint32(ReverseLUT[byte(v>>16)]) << 8
 38 | 	x |= uint32(ReverseLUT[byte(v>>24)]) << 0
 39 | 	return x
 40 | }
 41 | 
 42 | // ReverseUint32N reverses the lower n bits of v.
 43 | func ReverseUint32N(v uint32, n uint) (x uint32) {
 44 | 	return ReverseUint32(v << (32 - n))
 45 | }
 46 | 
 47 | // ReverseUint64 reverses all bits of v.
 48 | func ReverseUint64(v uint64) (x uint64) {
 49 | 	x |= uint64(ReverseLUT[byte(v>>0)]) << 56
 50 | 	x |= uint64(ReverseLUT[byte(v>>8)]) << 48
 51 | 	x |= uint64(ReverseLUT[byte(v>>16)]) << 40
 52 | 	x |= uint64(ReverseLUT[byte(v>>24)]) << 32
 53 | 	x |= uint64(ReverseLUT[byte(v>>32)]) << 24
 54 | 	x |= uint64(ReverseLUT[byte(v>>40)]) << 16
 55 | 	x |= uint64(ReverseLUT[byte(v>>48)]) << 8
 56 | 	x |= uint64(ReverseLUT[byte(v>>56)]) << 0
 57 | 	return x
 58 | }
 59 | 
 60 | // ReverseUint64N reverses the lower n bits of v.
 61 | func ReverseUint64N(v uint64, n uint) (x uint64) {
 62 | 	return ReverseUint64(v << (64 - n))
 63 | }
 64 | 
 65 | // MoveToFront is a data structure that allows for more efficient move-to-front
 66 | // transformations. This specific implementation assumes that the alphabet is
 67 | // densely packed within 0..255.
 68 | type MoveToFront struct {
 69 | 	dict [256]uint8 // Mapping from indexes to values
 70 | 	tail int        // Number of tail bytes that are already ordered
 71 | }
 72 | 
 73 | func (m *MoveToFront) Encode(vals []uint8) {
 74 | 	copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity
 75 | 
 76 | 	var max int
 77 | 	for i, val := range vals {
 78 | 		var idx uint8 // Reverse lookup idx in dict
 79 | 		for di, dv := range m.dict {
 80 | 			if dv == val {
 81 | 				idx = uint8(di)
 82 | 				break
 83 | 			}
 84 | 		}
 85 | 		vals[i] = idx
 86 | 
 87 | 		max |= int(idx)
 88 | 		copy(m.dict[1:], m.dict[:idx])
 89 | 		m.dict[0] = val
 90 | 	}
 91 | 	m.tail = 256 - max - 1
 92 | }
 93 | 
 94 | func (m *MoveToFront) Decode(idxs []uint8) {
 95 | 	copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity
 96 | 
 97 | 	var max int
 98 | 	for i, idx := range idxs {
 99 | 		val := m.dict[idx] // Forward lookup val in dict
100 | 		idxs[i] = val
101 | 
102 | 		max |= int(idx)
103 | 		copy(m.dict[1:], m.dict[:idx])
104 | 		m.dict[0] = val
105 | 	}
106 | 	m.tail = 256 - max - 1
107 | }
108 | 


--------------------------------------------------------------------------------
/internal/debug.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build debug && !gofuzz
 6 | // +build debug,!gofuzz
 7 | 
 8 | package internal
 9 | 
10 | const (
11 | 	Debug  = true
12 | 	GoFuzz = false
13 | )
14 | 


--------------------------------------------------------------------------------
/internal/errors/errors.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // Package errors implements functions to manipulate compression errors.
  6 | //
  7 | // In idiomatic Go, it is an anti-pattern to use panics as a form of error
  8 | // reporting in the API. Instead, the expected way to transmit errors is by
  9 | // returning an error value. Unfortunately, the checking of "err != nil" in
 10 | // tight loops commonly found in compression causes non-negligible performance
 11 | // degradation. While this may not be idiomatic, the internal packages of this
 12 | // repository rely on panics as a normal means to convey errors. In order to
 13 | // ensure that these panics do not leak across the public API, the public
 14 | // packages must recover from these panics and present an error value.
 15 | //
 16 | // The Panic and Recover functions in this package provide a safe way to
 17 | // recover from errors only generated from within this repository.
 18 | //
 19 | // Example usage:
 20 | //
 21 | //	func Foo() (err error) {
 22 | //		defer errors.Recover(&err)
 23 | //
 24 | //		if rand.Intn(2) == 0 {
 25 | //			// Unexpected panics will not be caught by Recover.
 26 | //			io.Closer(nil).Close()
 27 | //		} else {
 28 | //			// Errors thrown by Panic will be caught by Recover.
 29 | //			errors.Panic(errors.New("whoopsie"))
 30 | //		}
 31 | //	}
 32 | package errors
 33 | 
 34 | import "strings"
 35 | 
 36 | const (
 37 | 	// Unknown indicates that there is no classification for this error.
 38 | 	Unknown = iota
 39 | 
 40 | 	// Internal indicates that this error is due to an internal bug.
 41 | 	// Users should file a issue report if this type of error is encountered.
 42 | 	Internal
 43 | 
 44 | 	// Invalid indicates that this error is due to the user misusing the API
 45 | 	// and is indicative of a bug on the user's part.
 46 | 	Invalid
 47 | 
 48 | 	// Deprecated indicates the use of a deprecated and unsupported feature.
 49 | 	Deprecated
 50 | 
 51 | 	// Corrupted indicates that the input stream is corrupted.
 52 | 	Corrupted
 53 | 
 54 | 	// Closed indicates that the handlers are closed.
 55 | 	Closed
 56 | )
 57 | 
 58 | var codeMap = map[int]string{
 59 | 	Unknown:    "unknown error",
 60 | 	Internal:   "internal error",
 61 | 	Invalid:    "invalid argument",
 62 | 	Deprecated: "deprecated format",
 63 | 	Corrupted:  "corrupted input",
 64 | 	Closed:     "closed handler",
 65 | }
 66 | 
 67 | type Error struct {
 68 | 	Code int    // The error type
 69 | 	Pkg  string // Name of the package where the error originated
 70 | 	Msg  string // Descriptive message about the error (optional)
 71 | }
 72 | 
 73 | func (e Error) Error() string {
 74 | 	var ss []string
 75 | 	for _, s := range []string{e.Pkg, codeMap[e.Code], e.Msg} {
 76 | 		if s != "" {
 77 | 			ss = append(ss, s)
 78 | 		}
 79 | 	}
 80 | 	return strings.Join(ss, ": ")
 81 | }
 82 | 
 83 | func (e Error) CompressError()     {}
 84 | func (e Error) IsInternal() bool   { return e.Code == Internal }
 85 | func (e Error) IsInvalid() bool    { return e.Code == Invalid }
 86 | func (e Error) IsDeprecated() bool { return e.Code == Deprecated }
 87 | func (e Error) IsCorrupted() bool  { return e.Code == Corrupted }
 88 | func (e Error) IsClosed() bool     { return e.Code == Closed }
 89 | 
 90 | func IsInternal(err error) bool   { return isCode(err, Internal) }
 91 | func IsInvalid(err error) bool    { return isCode(err, Invalid) }
 92 | func IsDeprecated(err error) bool { return isCode(err, Deprecated) }
 93 | func IsCorrupted(err error) bool  { return isCode(err, Corrupted) }
 94 | func IsClosed(err error) bool     { return isCode(err, Closed) }
 95 | 
 96 | func isCode(err error, code int) bool {
 97 | 	if cerr, ok := err.(Error); ok && cerr.Code == code {
 98 | 		return true
 99 | 	}
100 | 	return false
101 | }
102 | 
103 | // errWrap is used by Panic and Recover to ensure that only errors raised by
104 | // Panic are recovered by Recover.
105 | type errWrap struct{ e *error }
106 | 
107 | func Recover(err *error) {
108 | 	switch ex := recover().(type) {
109 | 	case nil:
110 | 		// Do nothing.
111 | 	case errWrap:
112 | 		*err = *ex.e
113 | 	default:
114 | 		panic(ex)
115 | 	}
116 | }
117 | 
118 | func Panic(err error) {
119 | 	panic(errWrap{&err})
120 | }
121 | 


--------------------------------------------------------------------------------
/internal/gofuzz.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build gofuzz
 6 | // +build gofuzz
 7 | 
 8 | package internal
 9 | 
10 | const (
11 | 	Debug  = true
12 | 	GoFuzz = true
13 | )
14 | 


--------------------------------------------------------------------------------
/internal/prefix/debug.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | //go:build debug
  6 | // +build debug
  7 | 
  8 | package prefix
  9 | 
 10 | import (
 11 | 	"fmt"
 12 | 	"math"
 13 | 	"strings"
 14 | )
 15 | 
 16 | func max(a, b int) int {
 17 | 	if a > b {
 18 | 		return a
 19 | 	}
 20 | 	return b
 21 | }
 22 | 
 23 | func lenBase2(n uint) int {
 24 | 	return int(math.Ceil(math.Log2(float64(n + 1))))
 25 | }
 26 | func padBase2(v, n uint, m int) string {
 27 | 	s := fmt.Sprintf("%b", 1<<n|v)[1:]
 28 | 	if pad := m - len(s); pad > 0 {
 29 | 		return strings.Repeat(" ", pad) + s
 30 | 	}
 31 | 	return s
 32 | }
 33 | 
 34 | func lenBase10(n int) int {
 35 | 	return int(math.Ceil(math.Log10(float64(n + 1))))
 36 | }
 37 | func padBase10(n, m int) string {
 38 | 	s := fmt.Sprintf("%d", n)
 39 | 	if pad := m - len(s); pad > 0 {
 40 | 		return strings.Repeat(" ", pad) + s
 41 | 	}
 42 | 	return s
 43 | }
 44 | 
 45 | func (rc RangeCodes) String() string {
 46 | 	var maxLen, maxBase int
 47 | 	for _, c := range rc {
 48 | 		maxLen = max(maxLen, int(c.Len))
 49 | 		maxBase = max(maxBase, int(c.Base))
 50 | 	}
 51 | 
 52 | 	var ss []string
 53 | 	ss = append(ss, "{")
 54 | 	for i, c := range rc {
 55 | 		base := padBase10(int(c.Base), lenBase10(maxBase))
 56 | 		if c.Len > 0 {
 57 | 			base += fmt.Sprintf("-%d", c.End()-1)
 58 | 		}
 59 | 		ss = append(ss, fmt.Sprintf("\t%s:  {len: %s, range: %s},",
 60 | 			padBase10(int(i), lenBase10(len(rc)-1)),
 61 | 			padBase10(int(c.Len), lenBase10(maxLen)),
 62 | 			base,
 63 | 		))
 64 | 	}
 65 | 	ss = append(ss, "}")
 66 | 	return strings.Join(ss, "\n")
 67 | }
 68 | 
 69 | func (pc PrefixCodes) String() string {
 70 | 	var maxSym, maxLen, maxCnt int
 71 | 	for _, c := range pc {
 72 | 		maxSym = max(maxSym, int(c.Sym))
 73 | 		maxLen = max(maxLen, int(c.Len))
 74 | 		maxCnt = max(maxCnt, int(c.Cnt))
 75 | 	}
 76 | 
 77 | 	var ss []string
 78 | 	ss = append(ss, "{")
 79 | 	for _, c := range pc {
 80 | 		var cntStr string
 81 | 		if maxCnt > 0 {
 82 | 			cnt := int(32*float32(c.Cnt)/float32(maxCnt) + 0.5)
 83 | 			cntStr = fmt.Sprintf("%s |%s",
 84 | 				padBase10(int(c.Cnt), lenBase10(maxCnt)),
 85 | 				strings.Repeat("#", cnt),
 86 | 			)
 87 | 		}
 88 | 		ss = append(ss, fmt.Sprintf("\t%s:  %s,  %s",
 89 | 			padBase10(int(c.Sym), lenBase10(maxSym)),
 90 | 			padBase2(uint(c.Val), uint(c.Len), maxLen),
 91 | 			cntStr,
 92 | 		))
 93 | 	}
 94 | 	ss = append(ss, "}")
 95 | 	return strings.Join(ss, "\n")
 96 | }
 97 | 
 98 | func (pd Decoder) String() string {
 99 | 	var ss []string
100 | 	ss = append(ss, "{")
101 | 	if len(pd.chunks) > 0 {
102 | 		ss = append(ss, "\tchunks: {")
103 | 		for i, c := range pd.chunks {
104 | 			label := "sym"
105 | 			if uint(c&countMask) > uint(pd.chunkBits) {
106 | 				label = "idx"
107 | 			}
108 | 			ss = append(ss, fmt.Sprintf("\t\t%s:  {%s: %s, len: %s}",
109 | 				padBase2(uint(i), uint(pd.chunkBits), int(pd.chunkBits)),
110 | 				label, padBase10(int(c>>countBits), 3),
111 | 				padBase10(int(c&countMask), 2),
112 | 			))
113 | 		}
114 | 		ss = append(ss, "\t},")
115 | 
116 | 		for j, links := range pd.links {
117 | 			ss = append(ss, fmt.Sprintf("\tlinks[%d]: {", j))
118 | 			linkBits := lenBase2(uint(pd.linkMask))
119 | 			for i, c := range links {
120 | 				ss = append(ss, fmt.Sprintf("\t\t%s:  {sym: %s, len: %s},",
121 | 					padBase2(uint(i), uint(linkBits), int(linkBits)),
122 | 					padBase10(int(c>>countBits), 3),
123 | 					padBase10(int(c&countMask), 2),
124 | 				))
125 | 			}
126 | 			ss = append(ss, "\t},")
127 | 		}
128 | 	}
129 | 	ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pd.chunkMask))
130 | 	ss = append(ss, fmt.Sprintf("\tlinkMask:  %b,", pd.linkMask))
131 | 	ss = append(ss, fmt.Sprintf("\tchunkBits: %d,", pd.chunkBits))
132 | 	ss = append(ss, fmt.Sprintf("\tMinBits:   %d,", pd.MinBits))
133 | 	ss = append(ss, fmt.Sprintf("\tNumSyms:   %d,", pd.NumSyms))
134 | 	ss = append(ss, "}")
135 | 	return strings.Join(ss, "\n")
136 | }
137 | 
138 | func (pe Encoder) String() string {
139 | 	var maxLen int
140 | 	for _, c := range pe.chunks {
141 | 		maxLen = max(maxLen, int(c&countMask))
142 | 	}
143 | 
144 | 	var ss []string
145 | 	ss = append(ss, "{")
146 | 	if len(pe.chunks) > 0 {
147 | 		ss = append(ss, "\tchunks: {")
148 | 		for i, c := range pe.chunks {
149 | 			ss = append(ss, fmt.Sprintf("\t\t%s:  %s,",
150 | 				padBase10(i, 3),
151 | 				padBase2(uint(c>>countBits), uint(c&countMask), maxLen),
152 | 			))
153 | 		}
154 | 		ss = append(ss, "\t},")
155 | 	}
156 | 	ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pe.chunkMask))
157 | 	ss = append(ss, fmt.Sprintf("\tNumSyms:   %d,", pe.NumSyms))
158 | 	ss = append(ss, "}")
159 | 	return strings.Join(ss, "\n")
160 | }
161 | 


--------------------------------------------------------------------------------
/internal/prefix/decoder.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package prefix
  6 | 
  7 | import (
  8 | 	"sort"
  9 | 
 10 | 	"github.com/dsnet/compress/internal"
 11 | )
 12 | 
 13 | // The algorithm used to decode variable length codes is based on the lookup
 14 | // method in zlib. If the code is less-than-or-equal to maxChunkBits,
 15 | // then the symbol can be decoded using a single lookup into the chunks table.
 16 | // Otherwise, the links table will be used for a second level lookup.
 17 | //
 18 | // The chunks slice is keyed by the contents of the bit buffer ANDed with
 19 | // the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
 20 | // that is decoded as follow:
 21 | //
 22 | //	var length = chunks[bitBuffer&chunkMask] & countMask
 23 | //	var symbol = chunks[bitBuffer&chunkMask] >> countBits
 24 | //
 25 | // If the decoded length is larger than chunkBits, then an overflow link table
 26 | // must be used for further decoding. In this case, the symbol is actually the
 27 | // index into the links tables. The second-level links table returned is
 28 | // processed in the same way as the chunks table.
 29 | //
 30 | //	if length > chunkBits {
 31 | //		var index = symbol // Previous symbol is index into links tables
 32 | //		length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
 33 | //		symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
 34 | //	}
 35 | //
 36 | // See the following:
 37 | //	http://www.gzip.org/algorithm.txt
 38 | 
 39 | type Decoder struct {
 40 | 	chunks    []uint32   // First-level lookup map
 41 | 	links     [][]uint32 // Second-level lookup map
 42 | 	chunkMask uint32     // Mask the length of the chunks table
 43 | 	linkMask  uint32     // Mask the length of the link table
 44 | 	chunkBits uint32     // Bit-length of the chunks table
 45 | 
 46 | 	MinBits uint32 // The minimum number of bits to safely make progress
 47 | 	NumSyms uint32 // Number of symbols
 48 | }
 49 | 
 50 | // Init initializes Decoder according to the codes provided.
 51 | func (pd *Decoder) Init(codes PrefixCodes) {
 52 | 	// Handle special case trees.
 53 | 	if len(codes) <= 1 {
 54 | 		switch {
 55 | 		case len(codes) == 0: // Empty tree (should error if used later)
 56 | 			*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
 57 | 		case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
 58 | 			pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
 59 | 			*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
 60 | 		default:
 61 | 			panic("invalid codes")
 62 | 		}
 63 | 		return
 64 | 	}
 65 | 	if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
 66 | 		panic("input codes is not sorted")
 67 | 	}
 68 | 	if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
 69 | 		panic("detected incomplete or overlapping codes")
 70 | 	}
 71 | 
 72 | 	var minBits, maxBits uint32 = valueBits, 0
 73 | 	for _, c := range codes {
 74 | 		if minBits > c.Len {
 75 | 			minBits = c.Len
 76 | 		}
 77 | 		if maxBits < c.Len {
 78 | 			maxBits = c.Len
 79 | 		}
 80 | 	}
 81 | 
 82 | 	// Allocate chunks table as needed.
 83 | 	const maxChunkBits = 9 // This can be tuned for better performance
 84 | 	pd.NumSyms = uint32(len(codes))
 85 | 	pd.MinBits = minBits
 86 | 	pd.chunkBits = maxBits
 87 | 	if pd.chunkBits > maxChunkBits {
 88 | 		pd.chunkBits = maxChunkBits
 89 | 	}
 90 | 	numChunks := 1 << pd.chunkBits
 91 | 	pd.chunks = allocUint32s(pd.chunks, numChunks)
 92 | 	pd.chunkMask = uint32(numChunks - 1)
 93 | 
 94 | 	// Allocate links tables as needed.
 95 | 	pd.links = pd.links[:0]
 96 | 	pd.linkMask = 0
 97 | 	if pd.chunkBits < maxBits {
 98 | 		numLinks := 1 << (maxBits - pd.chunkBits)
 99 | 		pd.linkMask = uint32(numLinks - 1)
100 | 
101 | 		var linkIdx uint32
102 | 		for i := range pd.chunks {
103 | 			pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
104 | 		}
105 | 		for _, c := range codes {
106 | 			if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
107 | 				pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
108 | 				linkIdx++
109 | 			}
110 | 		}
111 | 
112 | 		pd.links = extendSliceUint32s(pd.links, int(linkIdx))
113 | 		linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
114 | 		for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
115 | 			pd.links[i] = linksFlat[j : j+numLinks]
116 | 		}
117 | 	}
118 | 
119 | 	// Fill out chunks and links tables with values.
120 | 	for _, c := range codes {
121 | 		chunk := c.Sym<<countBits | c.Len
122 | 		if c.Len <= pd.chunkBits {
123 | 			skip := 1 << uint(c.Len)
124 | 			for j := int(c.Val); j < len(pd.chunks); j += skip {
125 | 				pd.chunks[j] = chunk
126 | 			}
127 | 		} else {
128 | 			linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
129 | 			links := pd.links[linkIdx]
130 | 			skip := 1 << uint(c.Len-pd.chunkBits)
131 | 			for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
132 | 				links[j] = chunk
133 | 			}
134 | 		}
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/internal/prefix/encoder.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package prefix
 6 | 
 7 | import (
 8 | 	"sort"
 9 | 
10 | 	"github.com/dsnet/compress/internal"
11 | )
12 | 
13 | type Encoder struct {
14 | 	chunks    []uint32 // First-level lookup map
15 | 	chunkMask uint32   // Mask the length of the chunks table
16 | 
17 | 	NumSyms uint32 // Number of symbols
18 | }
19 | 
20 | // Init initializes Encoder according to the codes provided.
21 | func (pe *Encoder) Init(codes PrefixCodes) {
22 | 	// Handle special case trees.
23 | 	if len(codes) <= 1 {
24 | 		switch {
25 | 		case len(codes) == 0: // Empty tree (should error if used later)
26 | 			*pe = Encoder{chunks: pe.chunks[:0], NumSyms: 0}
27 | 		case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
28 | 			pe.chunks = append(pe.chunks[:0], codes[0].Val<<countBits|0)
29 | 			*pe = Encoder{chunks: pe.chunks[:1], NumSyms: 1}
30 | 		default:
31 | 			panic("invalid codes")
32 | 		}
33 | 		return
34 | 	}
35 | 	if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
36 | 		panic("input codes is not sorted")
37 | 	}
38 | 	if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
39 | 		panic("detected incomplete or overlapping codes")
40 | 	}
41 | 
42 | 	// Enough chunks to contain all the symbols.
43 | 	numChunks := 1
44 | 	for n := len(codes) - 1; n > 0; n >>= 1 {
45 | 		numChunks <<= 1
46 | 	}
47 | 	pe.NumSyms = uint32(len(codes))
48 | 
49 | retry:
50 | 	// Allocate and reset chunks.
51 | 	pe.chunks = allocUint32s(pe.chunks, numChunks)
52 | 	pe.chunkMask = uint32(numChunks - 1)
53 | 	for i := range pe.chunks {
54 | 		pe.chunks[i] = 0 // Logic below relies on zero value as uninitialized
55 | 	}
56 | 
57 | 	// Insert each symbol, checking that there are no conflicts.
58 | 	for _, c := range codes {
59 | 		if pe.chunks[c.Sym&pe.chunkMask] > 0 {
60 | 			// Collision found our "hash" table, so grow and try again.
61 | 			numChunks <<= 1
62 | 			goto retry
63 | 		}
64 | 		pe.chunks[c.Sym&pe.chunkMask] = c.Val<<countBits | c.Len
65 | 	}
66 | }
67 | 


--------------------------------------------------------------------------------
/internal/prefix/range.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package prefix
 6 | 
 7 | type RangeCode struct {
 8 | 	Base uint32 // Starting base offset of the range
 9 | 	Len  uint32 // Bit-length of a subsequent integer to add to base offset
10 | }
11 | type RangeCodes []RangeCode
12 | 
13 | type RangeEncoder struct {
14 | 	rcs     RangeCodes
15 | 	lut     [1024]uint32
16 | 	minBase uint
17 | }
18 | 
19 | // End reports the non-inclusive ending range.
20 | func (rc RangeCode) End() uint32 { return rc.Base + (1 << rc.Len) }
21 | 
22 | // MakeRangeCodes creates a RangeCodes, where each region is assumed to be
23 | // contiguously stacked, without any gaps, with bit-lengths taken from bits.
24 | func MakeRangeCodes(minBase uint, bits []uint) (rc RangeCodes) {
25 | 	for _, nb := range bits {
26 | 		rc = append(rc, RangeCode{Base: uint32(minBase), Len: uint32(nb)})
27 | 		minBase += 1 << nb
28 | 	}
29 | 	return rc
30 | }
31 | 
32 | // Base reports the inclusive starting range for all ranges.
33 | func (rcs RangeCodes) Base() uint32 { return rcs[0].Base }
34 | 
35 | // End reports the non-inclusive ending range for all ranges.
36 | func (rcs RangeCodes) End() uint32 { return rcs[len(rcs)-1].End() }
37 | 
38 | // checkValid reports whether the RangeCodes is valid. In order to be valid,
39 | // the following must hold true:
40 | //
41 | //	rcs[i-1].Base <= rcs[i].Base
42 | //	rcs[i-1].End  <= rcs[i].End
43 | //	rcs[i-1].End  >= rcs[i].Base
44 | //
45 | // Practically speaking, each range must be increasing and must not have any
46 | // gaps in between. It is okay for ranges to overlap.
47 | func (rcs RangeCodes) checkValid() bool {
48 | 	if len(rcs) == 0 {
49 | 		return false
50 | 	}
51 | 	pre := rcs[0]
52 | 	for _, cur := range rcs[1:] {
53 | 		preBase, preEnd := pre.Base, pre.End()
54 | 		curBase, curEnd := cur.Base, cur.End()
55 | 		if preBase > curBase || preEnd > curEnd || preEnd < curBase {
56 | 			return false
57 | 		}
58 | 		pre = cur
59 | 	}
60 | 	return true
61 | }
62 | 
63 | func (re *RangeEncoder) Init(rcs RangeCodes) {
64 | 	if !rcs.checkValid() {
65 | 		panic("invalid range codes")
66 | 	}
67 | 	*re = RangeEncoder{rcs: rcs, minBase: uint(rcs.Base())}
68 | 	for sym, rc := range rcs {
69 | 		base := int(rc.Base) - int(re.minBase)
70 | 		end := int(rc.End()) - int(re.minBase)
71 | 		if base >= len(re.lut) {
72 | 			break
73 | 		}
74 | 		if end > len(re.lut) {
75 | 			end = len(re.lut)
76 | 		}
77 | 		for i := base; i < end; i++ {
78 | 			re.lut[i] = uint32(sym)
79 | 		}
80 | 	}
81 | }
82 | 
83 | func (re *RangeEncoder) Encode(offset uint) (sym uint) {
84 | 	if idx := int(offset - re.minBase); idx < len(re.lut) {
85 | 		return uint(re.lut[idx])
86 | 	}
87 | 	sym = uint(re.lut[len(re.lut)-1])
88 | retry:
89 | 	if int(sym) >= len(re.rcs) || re.rcs[sym].Base > uint32(offset) {
90 | 		return sym - 1
91 | 	}
92 | 	sym++
93 | 	goto retry // Avoid for-loop so that this function can be inlined
94 | }
95 | 


--------------------------------------------------------------------------------
/internal/prefix/wrap.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package prefix
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"io"
 10 | 	"strings"
 11 | )
 12 | 
 13 | // For some of the common Readers, we wrap and extend them to satisfy the
 14 | // compress.BufferedReader interface to improve performance.
 15 | 
 16 | type buffer struct {
 17 | 	*bytes.Buffer
 18 | }
 19 | 
 20 | type bytesReader struct {
 21 | 	*bytes.Reader
 22 | 	pos int64
 23 | 	buf []byte
 24 | 	arr [512]byte
 25 | }
 26 | 
 27 | type stringReader struct {
 28 | 	*strings.Reader
 29 | 	pos int64
 30 | 	buf []byte
 31 | 	arr [512]byte
 32 | }
 33 | 
 34 | func (r *buffer) Buffered() int {
 35 | 	return r.Len()
 36 | }
 37 | 
 38 | func (r *buffer) Peek(n int) ([]byte, error) {
 39 | 	b := r.Bytes()
 40 | 	if len(b) < n {
 41 | 		return b, io.EOF
 42 | 	}
 43 | 	return b[:n], nil
 44 | }
 45 | 
 46 | func (r *buffer) Discard(n int) (int, error) {
 47 | 	b := r.Next(n)
 48 | 	if len(b) < n {
 49 | 		return len(b), io.EOF
 50 | 	}
 51 | 	return n, nil
 52 | }
 53 | 
 54 | func (r *bytesReader) Buffered() int {
 55 | 	r.update()
 56 | 	if r.Len() > len(r.buf) {
 57 | 		return len(r.buf)
 58 | 	}
 59 | 	return r.Len()
 60 | }
 61 | 
 62 | func (r *bytesReader) Peek(n int) ([]byte, error) {
 63 | 	if n > len(r.arr) {
 64 | 		return nil, io.ErrShortBuffer
 65 | 	}
 66 | 
 67 | 	// Return sub-slice of local buffer if possible.
 68 | 	r.update()
 69 | 	if len(r.buf) >= n {
 70 | 		return r.buf[:n], nil
 71 | 	}
 72 | 
 73 | 	// Fill entire local buffer, and return appropriate sub-slice.
 74 | 	cnt, err := r.ReadAt(r.arr[:], r.pos)
 75 | 	r.buf = r.arr[:cnt]
 76 | 	if cnt < n {
 77 | 		return r.arr[:cnt], err
 78 | 	}
 79 | 	return r.arr[:n], nil
 80 | }
 81 | 
 82 | func (r *bytesReader) Discard(n int) (int, error) {
 83 | 	var err error
 84 | 	if n > r.Len() {
 85 | 		n, err = r.Len(), io.EOF
 86 | 	}
 87 | 	r.Seek(int64(n), io.SeekCurrent)
 88 | 	return n, err
 89 | }
 90 | 
 91 | // update reslices the internal buffer to be consistent with the read offset.
 92 | func (r *bytesReader) update() {
 93 | 	pos, _ := r.Seek(0, io.SeekCurrent)
 94 | 	if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) {
 95 | 		r.buf, r.pos = r.buf[off:], pos
 96 | 	} else {
 97 | 		r.buf, r.pos = nil, pos
 98 | 	}
 99 | }
100 | 
101 | func (r *stringReader) Buffered() int {
102 | 	r.update()
103 | 	if r.Len() > len(r.buf) {
104 | 		return len(r.buf)
105 | 	}
106 | 	return r.Len()
107 | }
108 | 
109 | func (r *stringReader) Peek(n int) ([]byte, error) {
110 | 	if n > len(r.arr) {
111 | 		return nil, io.ErrShortBuffer
112 | 	}
113 | 
114 | 	// Return sub-slice of local buffer if possible.
115 | 	r.update()
116 | 	if len(r.buf) >= n {
117 | 		return r.buf[:n], nil
118 | 	}
119 | 
120 | 	// Fill entire local buffer, and return appropriate sub-slice.
121 | 	cnt, err := r.ReadAt(r.arr[:], r.pos)
122 | 	r.buf = r.arr[:cnt]
123 | 	if cnt < n {
124 | 		return r.arr[:cnt], err
125 | 	}
126 | 	return r.arr[:n], nil
127 | }
128 | 
129 | func (r *stringReader) Discard(n int) (int, error) {
130 | 	var err error
131 | 	if n > r.Len() {
132 | 		n, err = r.Len(), io.EOF
133 | 	}
134 | 	r.Seek(int64(n), io.SeekCurrent)
135 | 	return n, err
136 | }
137 | 
138 | // update reslices the internal buffer to be consistent with the read offset.
139 | func (r *stringReader) update() {
140 | 	pos, _ := r.Seek(0, io.SeekCurrent)
141 | 	if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) {
142 | 		r.buf, r.pos = r.buf[off:], pos
143 | 	} else {
144 | 		r.buf, r.pos = nil, pos
145 | 	}
146 | }
147 | 


--------------------------------------------------------------------------------
/internal/prefix/writer.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package prefix
  6 | 
  7 | import (
  8 | 	"encoding/binary"
  9 | 	"io"
 10 | 
 11 | 	"github.com/dsnet/compress/internal/errors"
 12 | )
 13 | 
 14 | // Writer implements a prefix encoder. For performance reasons, Writer will not
 15 | // write bytes immediately to the underlying stream.
 16 | type Writer struct {
 17 | 	Offset int64 // Number of bytes written to the underlying io.Writer
 18 | 
 19 | 	wr        io.Writer
 20 | 	bufBits   uint64 // Buffer to hold some bits
 21 | 	numBits   uint   // Number of valid bits in bufBits
 22 | 	bigEndian bool   // Are bits written in big-endian order?
 23 | 
 24 | 	buf    [512]byte
 25 | 	cntBuf int
 26 | }
 27 | 
 28 | // Init initializes the bit Writer to write to w. If bigEndian is true, then
 29 | // bits will be written starting from the most-significant bits of a byte
 30 | // (as done in bzip2), otherwise it will write starting from the
 31 | // least-significant bits of a byte (such as for deflate and brotli).
 32 | func (pw *Writer) Init(w io.Writer, bigEndian bool) {
 33 | 	*pw = Writer{wr: w, bigEndian: bigEndian}
 34 | 	return
 35 | }
 36 | 
 37 | // BitsWritten reports the total number of bits issued to any Write method.
 38 | func (pw *Writer) BitsWritten() int64 {
 39 | 	return 8*pw.Offset + 8*int64(pw.cntBuf) + int64(pw.numBits)
 40 | }
 41 | 
 42 | // WritePads writes 0-7 bits to the bit buffer to achieve byte-alignment.
 43 | func (pw *Writer) WritePads(v uint) {
 44 | 	nb := -pw.numBits & 7
 45 | 	pw.bufBits |= uint64(v) << pw.numBits
 46 | 	pw.numBits += nb
 47 | }
 48 | 
 49 | // Write writes bytes from buf.
 50 | // The bit-ordering mode does not affect this method.
 51 | func (pw *Writer) Write(buf []byte) (cnt int, err error) {
 52 | 	if pw.numBits > 0 || pw.cntBuf > 0 {
 53 | 		if pw.numBits%8 != 0 {
 54 | 			return 0, errorf(errors.Invalid, "non-aligned bit buffer")
 55 | 		}
 56 | 		if _, err := pw.Flush(); err != nil {
 57 | 			return 0, err
 58 | 		}
 59 | 	}
 60 | 	cnt, err = pw.wr.Write(buf)
 61 | 	pw.Offset += int64(cnt)
 62 | 	return cnt, err
 63 | }
 64 | 
 65 | // WriteOffset writes ofs in a (sym, extra) fashion using the provided prefix
 66 | // Encoder and RangeEncoder.
 67 | func (pw *Writer) WriteOffset(ofs uint, pe *Encoder, re *RangeEncoder) {
 68 | 	sym := re.Encode(ofs)
 69 | 	pw.WriteSymbol(sym, pe)
 70 | 	rc := re.rcs[sym]
 71 | 	pw.WriteBits(ofs-uint(rc.Base), uint(rc.Len))
 72 | }
 73 | 
 74 | // TryWriteBits attempts to write nb bits using the contents of the bit buffer
 75 | // alone. It reports whether it succeeded.
 76 | //
 77 | // This method is designed to be inlined for performance reasons.
 78 | func (pw *Writer) TryWriteBits(v, nb uint) bool {
 79 | 	if 64-pw.numBits < nb {
 80 | 		return false
 81 | 	}
 82 | 	pw.bufBits |= uint64(v) << pw.numBits
 83 | 	pw.numBits += nb
 84 | 	return true
 85 | }
 86 | 
 87 | // WriteBits writes nb bits of v to the underlying writer.
 88 | func (pw *Writer) WriteBits(v, nb uint) {
 89 | 	if _, err := pw.PushBits(); err != nil {
 90 | 		errors.Panic(err)
 91 | 	}
 92 | 	pw.bufBits |= uint64(v) << pw.numBits
 93 | 	pw.numBits += nb
 94 | }
 95 | 
 96 | // TryWriteSymbol attempts to encode the next symbol using the contents of the
 97 | // bit buffer alone. It reports whether it succeeded.
 98 | //
 99 | // This method is designed to be inlined for performance reasons.
100 | func (pw *Writer) TryWriteSymbol(sym uint, pe *Encoder) bool {
101 | 	chunk := pe.chunks[uint32(sym)&pe.chunkMask]
102 | 	nb := uint(chunk & countMask)
103 | 	if 64-pw.numBits < nb {
104 | 		return false
105 | 	}
106 | 	pw.bufBits |= uint64(chunk>>countBits) << pw.numBits
107 | 	pw.numBits += nb
108 | 	return true
109 | }
110 | 
111 | // WriteSymbol writes the symbol using the provided prefix Encoder.
112 | func (pw *Writer) WriteSymbol(sym uint, pe *Encoder) {
113 | 	if _, err := pw.PushBits(); err != nil {
114 | 		errors.Panic(err)
115 | 	}
116 | 	chunk := pe.chunks[uint32(sym)&pe.chunkMask]
117 | 	nb := uint(chunk & countMask)
118 | 	pw.bufBits |= uint64(chunk>>countBits) << pw.numBits
119 | 	pw.numBits += nb
120 | }
121 | 
122 | // Flush flushes all complete bytes from the bit buffer to the byte buffer, and
123 | // then flushes all bytes in the byte buffer to the underlying writer.
124 | // After this call, the bit Writer is will only withhold 7 bits at most.
125 | func (pw *Writer) Flush() (int64, error) {
126 | 	if pw.numBits < 8 && pw.cntBuf == 0 {
127 | 		return pw.Offset, nil
128 | 	}
129 | 	if _, err := pw.PushBits(); err != nil {
130 | 		return pw.Offset, err
131 | 	}
132 | 	cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf])
133 | 	pw.cntBuf -= cnt
134 | 	pw.Offset += int64(cnt)
135 | 	return pw.Offset, err
136 | }
137 | 
138 | // PushBits pushes as many bytes as possible from the bit buffer to the byte
139 | // buffer, reporting the number of bits pushed.
140 | func (pw *Writer) PushBits() (uint, error) {
141 | 	if pw.cntBuf >= len(pw.buf)-8 {
142 | 		cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf])
143 | 		pw.cntBuf -= cnt
144 | 		pw.Offset += int64(cnt)
145 | 		if err != nil {
146 | 			return 0, err
147 | 		}
148 | 	}
149 | 
150 | 	u := pw.bufBits
151 | 	if pw.bigEndian {
152 | 		// Swap all the bits within each byte.
153 | 		u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
154 | 		u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
155 | 		u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
156 | 	}
157 | 	// Starting with Go 1.7, the compiler should use a wide integer
158 | 	// store here if the architecture supports it.
159 | 	binary.LittleEndian.PutUint64(pw.buf[pw.cntBuf:], u)
160 | 
161 | 	nb := pw.numBits / 8 // Number of bytes to copy from bit buffer
162 | 	pw.cntBuf += int(nb)
163 | 	pw.bufBits >>= 8 * nb
164 | 	pw.numBits -= 8 * nb
165 | 	return 8 * nb, nil
166 | }
167 | 


--------------------------------------------------------------------------------
/internal/release.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build !debug && !gofuzz
 6 | // +build !debug,!gofuzz
 7 | 
 8 | package internal
 9 | 
10 | // Debug indicates whether the debug build tag was set.
11 | //
12 | // If set, programs may choose to print with more human-readable
13 | // debug information and also perform sanity checks that would otherwise be too
14 | // expensive to run in a release build.
15 | const Debug = false
16 | 
17 | // GoFuzz indicates whether the gofuzz build tag was set.
18 | //
19 | // If set, programs may choose to disable certain checks (like checksums) that
20 | // would be nearly impossible for gofuzz to properly get right.
21 | // If GoFuzz is set, it implies that Debug is set as well.
22 | const GoFuzz = false
23 | 


--------------------------------------------------------------------------------
/internal/testutil/rand.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package testutil
 6 | 
 7 | import (
 8 | 	"crypto/aes"
 9 | 	"crypto/cipher"
10 | 	"encoding/binary"
11 | )
12 | 
13 | // Rand implements a deterministic pseudo-random number generator.
14 | // This differs from the math.Rand in that the exact output will be consistent
15 | // across different versions of Go.
16 | type Rand struct {
17 | 	cipher.Block
18 | 	blk [aes.BlockSize]byte
19 | }
20 | 
21 | func NewRand(seed int) *Rand {
22 | 	var key [aes.BlockSize]byte
23 | 	binary.LittleEndian.PutUint64(key[:], uint64(seed))
24 | 	r, _ := aes.NewCipher(key[:])
25 | 	return &Rand{Block: r}
26 | }
27 | 
28 | func (r *Rand) Int() int {
29 | 	r.Encrypt(r.blk[:], r.blk[:])
30 | 	return int(binary.BigEndian.Uint32(r.blk[:]) >> 1)
31 | }
32 | 
33 | func (r *Rand) Intn(n int) int {
34 | 	return r.Int() % n
35 | }
36 | 
37 | func (r *Rand) Bytes(n int) []byte {
38 | 	b := make([]byte, n)
39 | 	bb := b
40 | 	for len(bb) > 0 {
41 | 		r.Encrypt(r.blk[:], r.blk[:])
42 | 		cnt := copy(bb, r.blk[:])
43 | 		bb = bb[cnt:]
44 | 	}
45 | 	return b
46 | }
47 | 
48 | func (r *Rand) Perm(n int) []int {
49 | 	m := make([]int, n)
50 | 	for i := 0; i < n; i++ {
51 | 		j := r.Intn(i + 1)
52 | 		m[i] = m[j]
53 | 		m[j] = i
54 | 	}
55 | 	return m
56 | }
57 | 


--------------------------------------------------------------------------------
/internal/testutil/util.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // Package testutil is a collection of testing helper methods.
  6 | package testutil
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"encoding/hex"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"io/ioutil"
 14 | 	"strings"
 15 | )
 16 | 
 17 | // ResizeData resizes the input. If n < 0, then the original input will be
 18 | // returned as is. If n <= len(input), then the input slice will be truncated.
 19 | // However, if n > len(input), then the input will be replicated to fill in
 20 | // the missing bytes, but each replicated string will be XORed by some byte
 21 | // mask to avoid favoring algorithms with large LZ77 windows.
 22 | //
 23 | // If n > len(input), then len(input) must be > 0.
 24 | func ResizeData(input []byte, n int) []byte {
 25 | 	if n < 0 {
 26 | 		return input
 27 | 	}
 28 | 	if len(input) >= n {
 29 | 		return input[:n]
 30 | 	}
 31 | 	if len(input) == 0 {
 32 | 		panic("unable to replicate an empty string")
 33 | 	}
 34 | 
 35 | 	var mask byte
 36 | 	output := make([]byte, n)
 37 | 	for i := range output {
 38 | 		idx := i % len(input)
 39 | 		output[i] = input[idx] ^ mask
 40 | 		if idx == len(input)-1 {
 41 | 			mask++
 42 | 		}
 43 | 	}
 44 | 	return output
 45 | }
 46 | 
 47 | // MustLoadFile must load a file or else panics.
 48 | func MustLoadFile(file string) []byte {
 49 | 	b, err := ioutil.ReadFile(file)
 50 | 	if err != nil {
 51 | 		panic(err)
 52 | 	}
 53 | 	return b
 54 | }
 55 | 
 56 | // MustDecodeHex must decode a hexadecimal string or else panics.
 57 | func MustDecodeHex(s string) []byte {
 58 | 	b, err := hex.DecodeString(s)
 59 | 	if err != nil {
 60 | 		panic(err)
 61 | 	}
 62 | 	return b
 63 | }
 64 | 
 65 | // MustDecodeBitGen must decode a BitGen formatted string or else panics.
 66 | func MustDecodeBitGen(s string) []byte {
 67 | 	b, err := DecodeBitGen(s)
 68 | 	if err != nil {
 69 | 		panic(err)
 70 | 	}
 71 | 	return b
 72 | }
 73 | 
 74 | // BytesCompare compares inputs a and b and reports whether they are equal.
 75 | //
 76 | // If they are not equal, it returns two one-line strings that are
 77 | // representative of the differences between the two strings.
 78 | // The output will be quoted strings if it seems like the data is text,
 79 | // otherwise, it will use hexadecimal strings.
 80 | //
 81 | // Example usage:
 82 | //
 83 | //	if got, want, ok := testutil.BytesCompare(output, v.output); !ok {
 84 | //		t.Errorf("output mismatch:\ngot  %s\nwant %s", got, want)
 85 | //	}
 86 | func BytesCompare(a, b []byte) (sa, sb string, ok bool) {
 87 | 	if ok = bytes.Equal(a, b); ok {
 88 | 		return
 89 | 	}
 90 | 
 91 | 	commonPrefix := func(a, b []byte) int {
 92 | 		if len(a) > len(b) {
 93 | 			a, b = b, a
 94 | 		}
 95 | 		for i := range a {
 96 | 			if a[i] != b[i] {
 97 | 				return i
 98 | 			}
 99 | 		}
100 | 		return len(a)
101 | 	}
102 | 
103 | 	formatter := func(a, b []byte, format string, trimHead, maxLen int) (sa, sb string) {
104 | 		trimHead -= maxLen / 2 // Always provide context of equal bytes
105 | 		if trimHead < 0 {
106 | 			trimHead = 0
107 | 		}
108 | 		if trimHead > (len(a) - maxLen) {
109 | 			trimHead = (len(a) - maxLen)
110 | 		}
111 | 		if trimHead > (len(b) - maxLen) {
112 | 			trimHead = (len(b) - maxLen)
113 | 		}
114 | 
115 | 		var head, atail, btail string
116 | 		if trimHead > 0 {
117 | 			a = a[trimHead:]
118 | 			b = b[trimHead:]
119 | 			head = fmt.Sprintf("(%d bytes)...", trimHead)
120 | 		}
121 | 		if len(a) > maxLen {
122 | 			atail = fmt.Sprintf("...(%d bytes)", len(a)-maxLen)
123 | 			a = a[:maxLen]
124 | 		}
125 | 		if len(b) > maxLen {
126 | 			btail = fmt.Sprintf("...(%d bytes)", len(b)-maxLen)
127 | 			b = b[:maxLen]
128 | 		}
129 | 		sa = fmt.Sprintf("%s"+format+"%s", head, a, atail)
130 | 		sb = fmt.Sprintf("%s"+format+"%s", head, b, btail)
131 | 		return sa, sb
132 | 	}
133 | 
134 | 	const maxLen = 64
135 | 	n := commonPrefix(a, b)
136 | 	sa, sb = formatter(a, b, "%q", n, maxLen) // Favor quoted output, first
137 | 	if s := sa + sb; strings.Count(s, `\u`)+strings.Count(s, `\x`) > maxLen/8 {
138 | 		sa, sb = formatter(a, b, "%x", n, maxLen/2) // Fallback to hex, next
139 | 	}
140 | 	return sa, sb, false
141 | }
142 | 
143 | // BuggyReader returns Err after N bytes have been read from R.
144 | type BuggyReader struct {
145 | 	R   io.Reader
146 | 	N   int64 // Number of valid bytes to read
147 | 	Err error // Return this error after N bytes
148 | }
149 | 
150 | func (br *BuggyReader) Read(buf []byte) (int, error) {
151 | 	if int64(len(buf)) > br.N {
152 | 		buf = buf[:br.N]
153 | 	}
154 | 	n, err := br.R.Read(buf)
155 | 	br.N -= int64(n)
156 | 	if err == nil && br.N <= 0 {
157 | 		return n, br.Err
158 | 	}
159 | 	return n, err
160 | }
161 | 
162 | // BuggyWriter returns Err after N bytes have been written to W.
163 | type BuggyWriter struct {
164 | 	W   io.Writer
165 | 	N   int64 // Number of valid bytes to write
166 | 	Err error // Return this error after N bytes
167 | }
168 | 
169 | func (bw *BuggyWriter) Write(buf []byte) (int, error) {
170 | 	if int64(len(buf)) > bw.N {
171 | 		buf = buf[:bw.N]
172 | 	}
173 | 	n, err := bw.W.Write(buf)
174 | 	bw.N -= int64(n)
175 | 	if err == nil && bw.N <= 0 {
176 | 		return n, bw.Err
177 | 	}
178 | 	return n, err
179 | }
180 | 


--------------------------------------------------------------------------------
/internal/testutil/util_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package testutil
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestCompare(t *testing.T) {
10 | 	vectors := []struct {
11 | 		inA, inB   string
12 | 		outA, outB string
13 | 		ok         bool
14 | 	}{
15 | 		{"", "", "", "", true},
16 | 		{"", "foo", `""`, `"foo"`, false},
17 | 		{"bar", "foo", `"bar"`, `"foo"`, false},
18 | 		{"foo", "foo", "", "", true},
19 | 		{
20 | 			"keyboardsmashfoo", "keyboardsmashbar",
21 | 			`"keyboardsmashfoo"`, `"keyboardsmashbar"`,
22 | 			false,
23 | 		},
24 | 		{
25 | 			"keyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er32/q2890r3u0qv",
26 | 			"keyboardsmashfrioj8394ru4389",
27 | 			`"keyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er32/q2890"...(6 bytes)`,
28 | 			`"keyboardsmashfrioj8394ru4389"`,
29 | 			false,
30 | 		},
31 | 		{
32 | 			"keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf",
33 | 			"keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfrioj8394ru4389",
34 | 			`(16 bytes)..."boardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u"...(36 bytes)`,
35 | 			`(16 bytes)..."boardsmashkeyboardsmashkeyboardsmashkeyboardsmashfrioj8394ru4389"`,
36 | 			false,
37 | 		},
38 | 		{
39 | 			"keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf",
40 | 			"keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfriojfewafweafwaefweafewafwaefwaefwaefewafwae8394ru4389",
41 | 			`(34 bytes)..."smashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r"...(18 bytes)`,
42 | 			`(34 bytes)..."smashkeyboardsmashkeyboardsmashfriojfewafweafwaefweafewafwaefwae"...(22 bytes)`,
43 | 			false,
44 | 		},
45 | 		{
46 | 			"keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf",
47 | 			"\xfaO\xed\x93QK\xb1\xa9O!\xc0\xac\x8dD\xd8\xce\xc01\x1aa\x9c\x108\xbb",
48 | 			`6b6579626f617264736d6173686b6579626f617264736d6173686b6579626f61...(84 bytes)`,
49 | 			`fa4fed93514bb1a94f21c0ac8d44d8cec0311a619c1038bb`,
50 | 			false,
51 | 		},
52 | 		{
53 | 			"keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf",
54 | 			"keyboardsmashkeyboardsmashkeyboard\xfaO\xed\x93QK\xb1\xa9O!\xc0\xac\x8dD\xd8\xce\xc01\x1aa\x9c\x108\xbb",
55 | 			`(18 bytes)...617264736d6173686b6579626f617264736d6173686b6579626f617264736d61...(66 bytes)`,
56 | 			`(18 bytes)...617264736d6173686b6579626f617264fa4fed93514bb1a94f21c0ac8d44d8ce...(8 bytes)`,
57 | 			false,
58 | 		},
59 | 	}
60 | 
61 | 	for i, v := range vectors {
62 | 		sa, sb, ok := BytesCompare([]byte(v.inA), []byte(v.inB))
63 | 		if sa != v.outA {
64 | 			t.Errorf("test %d, output A mismatch:\ngot  %s\nwant %s", i, sa, v.outA)
65 | 		}
66 | 		if sb != v.outB {
67 | 			t.Errorf("test %d, output B mismatch:\ngot  %s\nwant %s", i, sb, v.outB)
68 | 		}
69 | 		if ok != v.ok {
70 | 			t.Errorf("test %d, output equality mismatch: got %t, want %t", i, ok, v.ok)
71 | 		}
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/internal/tool/bench/cgo_brotli.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build cgo && !no_cgo_brotli
 6 | // +build cgo,!no_cgo_brotli
 7 | 
 8 | package main
 9 | 
10 | import "github.com/dsnet/compress/internal/cgo/brotli"
11 | 
12 | func init() {
13 | 	RegisterEncoder(FormatBrotli, "cgo", brotli.NewWriter)
14 | 	RegisterDecoder(FormatBrotli, "cgo", brotli.NewReader)
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/tool/bench/cgo_bzip2.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build cgo && !no_cgo_bzip2
 6 | // +build cgo,!no_cgo_bzip2
 7 | 
 8 | package main
 9 | 
10 | import "github.com/dsnet/compress/internal/cgo/bzip2"
11 | 
12 | func init() {
13 | 	RegisterEncoder(FormatBZ2, "cgo", bzip2.NewWriter)
14 | 	RegisterDecoder(FormatBZ2, "cgo", bzip2.NewReader)
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/tool/bench/cgo_flate.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build cgo && !no_cgo_flate
 6 | // +build cgo,!no_cgo_flate
 7 | 
 8 | package main
 9 | 
10 | import "github.com/dsnet/compress/internal/cgo/flate"
11 | 
12 | func init() {
13 | 	RegisterEncoder(FormatFlate, "cgo", flate.NewWriter)
14 | 	RegisterDecoder(FormatFlate, "cgo", flate.NewReader)
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/tool/bench/cgo_lzma.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build cgo && !no_cgo_lzma
 6 | // +build cgo,!no_cgo_lzma
 7 | 
 8 | package main
 9 | 
10 | import "github.com/dsnet/compress/internal/cgo/lzma"
11 | 
12 | func init() {
13 | 	RegisterEncoder(FormatLZMA2, "cgo", lzma.NewWriter)
14 | 	RegisterDecoder(FormatLZMA2, "cgo", lzma.NewReader)
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/tool/bench/cgo_zstd.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build cgo && !no_cgo_zstd
 6 | // +build cgo,!no_cgo_zstd
 7 | 
 8 | package main
 9 | 
10 | import "github.com/dsnet/compress/internal/cgo/zstd"
11 | 
12 | func init() {
13 | 	RegisterEncoder(FormatZstd, "cgo", zstd.NewWriter)
14 | 	RegisterDecoder(FormatZstd, "cgo", zstd.NewReader)
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/tool/bench/codec_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package main
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"flag"
 10 | 	"io"
 11 | 	"os"
 12 | 	"path/filepath"
 13 | 	"runtime"
 14 | 	"strings"
 15 | 	"sync"
 16 | 	"testing"
 17 | 
 18 | 	"github.com/dsnet/compress/internal/testutil"
 19 | )
 20 | 
 21 | // The unit tests can also be used to quickly test all of the implementations
 22 | // with respect to each other for correctness. The command-line flags can be
 23 | // used to specify any arbitrary corpus of test data to use.
 24 | //
 25 | // Example usage:
 26 | //	$ go test -c
 27 | //	$ ./bench.test \
 28 | //		-paths    $CORPUS_PATH   \
 29 | //		-globs    "*.txt:*.bin"  \
 30 | //		-test.run "//fl/std|cgo" \
 31 | //		-test.v
 32 | 
 33 | var level int
 34 | 
 35 | func TestMain(m *testing.M) {
 36 | 	setDefaults()
 37 | 	flag.Var(&paths, "paths", "List of paths to search for test files")
 38 | 	flag.Var(&globs, "globs", "List of globs to match for test files")
 39 | 	flag.IntVar(&level, "level", 6, "Default compression level to use")
 40 | 	flag.Parse()
 41 | 	os.Exit(m.Run())
 42 | }
 43 | 
 44 | type semaphore chan struct{}
 45 | 
 46 | func newSemaphore(n int) semaphore { return make(chan struct{}, n) }
 47 | func (s *semaphore) Acquire()      { *s <- struct{}{} }
 48 | func (s *semaphore) Release()      { <-*s }
 49 | 
 50 | // Each sub-test is run in a goroutine so that we can have fine control over
 51 | // exactly how many sub-tests are running. When running over a large corpus,
 52 | // this helps prevent all the sub-tests from executing at once and OOMing
 53 | // the machine. The semaphores below control the maximum number of concurrent
 54 | // operations that can be running for each dimension.
 55 | //
 56 | // We avoid using t.Parallel since that causes t.Run to return immediately and
 57 | // does not provide the caller with feedback that all sub-operations completed.
 58 | // This causes the next operation to prematurely start, leading to overloads.
 59 | var (
 60 | 	semaFiles    = newSemaphore(runtime.NumCPU())
 61 | 	semaFormats  = newSemaphore(runtime.NumCPU())
 62 | 	semaEncoders = newSemaphore(runtime.NumCPU())
 63 | 	semaDecoders = newSemaphore(runtime.NumCPU())
 64 | )
 65 | 
 66 | // TestCodecs tests that the output of each registered encoder is a valid input
 67 | // for each registered decoder. This test runs in O(n^2) where n is the number
 68 | // of registered codecs. This assumes that the number of test files and
 69 | // compression formats stays relatively constant.
 70 | func TestCodecs(t *testing.T) {
 71 | 	var wg sync.WaitGroup
 72 | 	defer wg.Wait()
 73 | 	for _, fi := range getFiles(paths, globs) {
 74 | 		fi := fi
 75 | 		name := "File:" + strings.Replace(fi.Rel, string(filepath.Separator), "_", -1)
 76 | 		goRun(t, &wg, &semaFiles, name, func(t *testing.T) {
 77 | 			dd := testutil.MustLoadFile(fi.Abs)
 78 | 			testFormats(t, dd)
 79 | 		})
 80 | 	}
 81 | }
 82 | 
 83 | func testFormats(t *testing.T, dd []byte) {
 84 | 	var wg sync.WaitGroup
 85 | 	defer wg.Wait()
 86 | 	for _, ft := range formats {
 87 | 		ft := ft
 88 | 		name := "Format:" + enumToFmt[ft]
 89 | 		goRun(t, &wg, &semaFormats, name, func(t *testing.T) {
 90 | 			if len(encoders[ft]) == 0 || len(decoders[ft]) == 0 {
 91 | 				t.Skip("no codecs available")
 92 | 			}
 93 | 			testEncoders(t, ft, dd)
 94 | 		})
 95 | 	}
 96 | }
 97 | 
 98 | func testEncoders(t *testing.T, ft Format, dd []byte) {
 99 | 	var wg sync.WaitGroup
100 | 	defer wg.Wait()
101 | 	for encName := range encoders[ft] {
102 | 		encName := encName
103 | 		name := "Encoder:" + encName
104 | 		goRun(t, &wg, &semaEncoders, name, func(t *testing.T) {
105 | 			be := new(bytes.Buffer)
106 | 			zw := encoders[ft][encName](be, level)
107 | 			if _, err := io.Copy(zw, bytes.NewReader(dd)); err != nil {
108 | 				t.Fatalf("unexpected Write error: %v", err)
109 | 			}
110 | 			if err := zw.Close(); err != nil {
111 | 				t.Fatalf("unexpected Close error: %v", err)
112 | 			}
113 | 			de := be.Bytes()
114 | 			testDecoders(t, ft, dd, de)
115 | 		})
116 | 	}
117 | }
118 | 
119 | func testDecoders(t *testing.T, ft Format, dd, de []byte) {
120 | 	var wg sync.WaitGroup
121 | 	defer wg.Wait()
122 | 	for decName := range decoders[ft] {
123 | 		decName := decName
124 | 		name := "Decoder:" + decName
125 | 		goRun(t, &wg, &semaDecoders, name, func(t *testing.T) {
126 | 			bd := new(bytes.Buffer)
127 | 			zr := decoders[ft][decName](bytes.NewReader(de))
128 | 			if _, err := io.Copy(bd, zr); err != nil {
129 | 				t.Fatalf("unexpected Read error: %v", err)
130 | 			}
131 | 			if err := zr.Close(); err != nil {
132 | 				t.Fatalf("unexpected Close error: %v", err)
133 | 			}
134 | 			if got, want, ok := testutil.BytesCompare(bd.Bytes(), dd); !ok {
135 | 				t.Errorf("data mismatch:\ngot  %s\nwant %s", got, want)
136 | 			}
137 | 		})
138 | 	}
139 | }
140 | 
141 | func goRun(t *testing.T, wg *sync.WaitGroup, sm *semaphore, name string, fn func(t *testing.T)) {
142 | 	wg.Add(1)
143 | 	go func() {
144 | 		defer wg.Done()
145 | 		t.Run(name, func(t *testing.T) {
146 | 			sm.Acquire()
147 | 			defer sm.Release()
148 | 			defer recoverPanic(t)
149 | 			fn(t)
150 | 		})
151 | 	}()
152 | }
153 | 
154 | func recoverPanic(t *testing.T) {
155 | 	if ex := recover(); ex != nil {
156 | 		t.Fatalf("unexpected panic: %v", ex)
157 | 	}
158 | }
159 | 


--------------------------------------------------------------------------------
/internal/tool/bench/lib_ds.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build !no_lib_ds
 6 | // +build !no_lib_ds
 7 | 
 8 | package main
 9 | 
10 | import (
11 | 	"io"
12 | 
13 | 	"github.com/dsnet/compress/brotli"
14 | 	"github.com/dsnet/compress/bzip2"
15 | 	"github.com/dsnet/compress/flate"
16 | )
17 | 
18 | func init() {
19 | 	RegisterDecoder(FormatBrotli, "ds",
20 | 		func(r io.Reader) io.ReadCloser {
21 | 			zr, err := brotli.NewReader(r, nil)
22 | 			if err != nil {
23 | 				panic(err)
24 | 			}
25 | 			return zr
26 | 		})
27 | 	RegisterDecoder(FormatFlate, "ds",
28 | 		func(r io.Reader) io.ReadCloser {
29 | 			zr, err := flate.NewReader(r, nil)
30 | 			if err != nil {
31 | 				panic(err)
32 | 			}
33 | 			return zr
34 | 		})
35 | 	RegisterEncoder(FormatBZ2, "ds",
36 | 		func(w io.Writer, lvl int) io.WriteCloser {
37 | 			zw, err := bzip2.NewWriter(w, &bzip2.WriterConfig{Level: lvl})
38 | 			if err != nil {
39 | 				panic(err)
40 | 			}
41 | 			return zw
42 | 		})
43 | 	RegisterDecoder(FormatBZ2, "ds",
44 | 		func(r io.Reader) io.ReadCloser {
45 | 			zr, err := bzip2.NewReader(r, nil)
46 | 			if err != nil {
47 | 				panic(err)
48 | 			}
49 | 			return zr
50 | 		})
51 | }
52 | 


--------------------------------------------------------------------------------
/internal/tool/bench/lib_kp.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build !no_lib_kp
 6 | // +build !no_lib_kp
 7 | 
 8 | package main
 9 | 
10 | import (
11 | 	"io"
12 | 
13 | 	"github.com/klauspost/compress/flate"
14 | )
15 | 
16 | func init() {
17 | 	RegisterEncoder(FormatFlate, "kp",
18 | 		func(w io.Writer, lvl int) io.WriteCloser {
19 | 			zw, err := flate.NewWriter(w, lvl)
20 | 			if err != nil {
21 | 				panic(err)
22 | 			}
23 | 			return zw
24 | 		})
25 | 	RegisterDecoder(FormatFlate, "kp",
26 | 		func(r io.Reader) io.ReadCloser {
27 | 			return flate.NewReader(r)
28 | 		})
29 | }
30 | 


--------------------------------------------------------------------------------
/internal/tool/bench/lib_std.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build !no_lib_std
 6 | // +build !no_lib_std
 7 | 
 8 | package main
 9 | 
10 | import (
11 | 	"io"
12 | 	"io/ioutil"
13 | 
14 | 	"compress/bzip2"
15 | 	"compress/flate"
16 | )
17 | 
18 | func init() {
19 | 	RegisterEncoder(FormatFlate, "std",
20 | 		func(w io.Writer, lvl int) io.WriteCloser {
21 | 			zw, err := flate.NewWriter(w, lvl)
22 | 			if err != nil {
23 | 				panic(err)
24 | 			}
25 | 			return zw
26 | 		})
27 | 	RegisterDecoder(FormatFlate, "std",
28 | 		func(r io.Reader) io.ReadCloser {
29 | 			return flate.NewReader(r)
30 | 		})
31 | 	RegisterDecoder(FormatBZ2, "std",
32 | 		func(r io.Reader) io.ReadCloser {
33 | 			return ioutil.NopCloser(bzip2.NewReader(r))
34 | 		})
35 | }
36 | 


--------------------------------------------------------------------------------
/internal/tool/bench/lib_uk.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build !no_lib_uk
 6 | // +build !no_lib_uk
 7 | 
 8 | package main
 9 | 
10 | import (
11 | 	"io"
12 | 	"io/ioutil"
13 | 
14 | 	"github.com/ulikunitz/xz/lzma"
15 | )
16 | 
17 | func init() {
18 | 	RegisterEncoder(FormatLZMA2, "uk",
19 | 		func(w io.Writer, lvl int) io.WriteCloser {
20 | 			// This level conversion logic emulates the conversion found in
21 | 			// LZMA2Options.java from https://git.tukaani.org/?p=xz-java.git
22 | 			if lvl < 0 || lvl > 9 {
23 | 				panic("invalid level")
24 | 			}
25 | 			dict := [...]int{
26 | 				1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22,
27 | 				1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26,
28 | 			}[lvl]
29 | 			match := lzma.HashTable4
30 | 			// TODO(dsnet): This currently crashes on zero.bin when using
31 | 			// BinaryTree on revision 76f94b7c69c6f84be96bcfc2443042b198689565.
32 | 			/*
33 | 				if lvl > 4 {
34 | 					match = lzma.BinaryTree
35 | 				}
36 | 			*/
37 | 
38 | 			zw, err := lzma.Writer2Config{DictCap: dict, Matcher: match}.NewWriter2(w)
39 | 			if err != nil {
40 | 				panic(err)
41 | 			}
42 | 			return zw
43 | 		})
44 | 	RegisterDecoder(FormatLZMA2, "uk",
45 | 		func(r io.Reader) io.ReadCloser {
46 | 			zr, err := lzma.NewReader2(r)
47 | 			if err != nil {
48 | 				panic(err)
49 | 			}
50 | 			return ioutil.NopCloser(zr)
51 | 		})
52 | }
53 | 


--------------------------------------------------------------------------------
/internal/tool/bitgen/main.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // BitGen to generate a binary from a BitGen formatting input.
 6 | // It accepts the BitGen format from stdin and outputs to stdout.
 7 | package main
 8 | 
 9 | import (
10 | 	"io/ioutil"
11 | 	"os"
12 | 
13 | 	"github.com/dsnet/compress/internal/testutil"
14 | )
15 | 
16 | func main() {
17 | 	buf, err := ioutil.ReadAll(os.Stdin)
18 | 	if err != nil {
19 | 		panic(err)
20 | 	}
21 | 
22 | 	buf = testutil.MustDecodeBitGen(string(buf))
23 | 
24 | 	_, err = os.Stdout.Write(buf)
25 | 	if err != nil {
26 | 		panic(err)
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/internal/tool/fuzz/brotli/brotli.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // +build gofuzz
 6 | 
 7 | package bzip2
 8 | 
 9 | import (
10 | 	"bytes"
11 | 	"io/ioutil"
12 | 
13 | 	gbrotli "github.com/dsnet/compress/brotli"
14 | 	cbrotli "github.com/dsnet/compress/internal/cgo/brotli"
15 | )
16 | 
17 | func Fuzz(data []byte) int {
18 | 	// Decompress using the Go decoder.
19 | 	gr, err := gbrotli.NewReader(bytes.NewReader(data), nil)
20 | 	if err != nil {
21 | 		panic(err)
22 | 	}
23 | 	gb, gerr := ioutil.ReadAll(gr)
24 | 	if err := gr.Close(); gerr == nil {
25 | 		gerr = err
26 | 	} else if gerr != nil && err == nil {
27 | 		panic("nil on Close after non-nil error")
28 | 	}
29 | 
30 | 	// Decompress using the C decoder.
31 | 	cr := cbrotli.NewReader(bytes.NewReader(data))
32 | 	cb, cerr := ioutil.ReadAll(cr)
33 | 	if err := cr.Close(); cerr == nil {
34 | 		cerr = err
35 | 	} else if cerr != nil && err == nil {
36 | 		panic("nil on Close after non-nil error")
37 | 	}
38 | 
39 | 	switch {
40 | 	case gerr == nil && cerr == nil:
41 | 		if !bytes.Equal(gb, cb) {
42 | 			panic("mismatching bytes")
43 | 		}
44 | 	case gerr != nil && cerr == nil:
45 | 		panic(gerr)
46 | 	case gerr == nil && cerr != nil:
47 | 		panic(cerr)
48 | 	default:
49 | 		// Ensure that both gb and cb have the same common prefix.
50 | 		if !bytes.HasPrefix(gb, cb) && !bytes.HasPrefix(cb, gb) {
51 | 			panic("mismatching leading bytes")
52 | 		}
53 | 	}
54 | 
55 | 	if cerr == nil || gerr == nil {
56 | 		return 1 // Favor valid inputs
57 | 	}
58 | 	return 0
59 | }
60 | 


--------------------------------------------------------------------------------
/internal/tool/fuzz/bzip2/bzip2.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // +build gofuzz
  6 | 
  7 | package bzip2
  8 | 
  9 | import (
 10 | 	"bytes"
 11 | 	"errors"
 12 | 	"io/ioutil"
 13 | 
 14 | 	"github.com/dsnet/compress"
 15 | 	gbzip2 "github.com/dsnet/compress/bzip2"
 16 | 	cbzip2 "github.com/dsnet/compress/internal/cgo/bzip2"
 17 | )
 18 | 
 19 | func Fuzz(data []byte) int {
 20 | 	data, ok := testDecoders(data, true)
 21 | 	for i := 1; i <= 9; i++ {
 22 | 		testGoEncoder(data, i)
 23 | 		testCEncoder(data, i)
 24 | 	}
 25 | 	if ok {
 26 | 		return 1 // Favor valid inputs
 27 | 	}
 28 | 	return 0
 29 | }
 30 | 
 31 | // testDecoders tests that the input can be handled by both Go and C decoders.
 32 | // This test does not panic if both decoders run into an error, since it
 33 | // means that they both agree that the input is bad.
 34 | //
 35 | // If updateCRCs is set, then the Go bzip2 implementation will ignore all
 36 | // checksum errors and manually adjust the checksum values before running the
 37 | // C implementation. This hack drastically increases the probability that
 38 | // gofuzz can generate a "valid" file.
 39 | func testDecoders(data []byte, updateCRCs bool) ([]byte, bool) {
 40 | 	// Decompress using the Go decoder.
 41 | 	gr, err := gbzip2.NewReader(bytes.NewReader(data), nil)
 42 | 	if err != nil {
 43 | 		panic(err)
 44 | 	}
 45 | 	gb, gerr := ioutil.ReadAll(gr)
 46 | 	if err := gr.Close(); gerr == nil {
 47 | 		gerr = err
 48 | 	} else if gerr != nil && err == nil {
 49 | 		panic("nil on Close after non-nil error")
 50 | 	}
 51 | 
 52 | 	// Check or update the checksums.
 53 | 	if gerr == nil {
 54 | 		if updateCRCs {
 55 | 			data = gr.Checksums.Apply(data)
 56 | 		} else if !gr.Checksums.Verify(data) {
 57 | 			gerr = errors.New("bzip2: checksum error")
 58 | 		}
 59 | 	}
 60 | 
 61 | 	// Decompress using the C decoder.
 62 | 	cr := cbzip2.NewReader(bytes.NewReader(data))
 63 | 	cb, cerr := ioutil.ReadAll(cr)
 64 | 	if err := cr.Close(); cerr == nil {
 65 | 		cerr = err
 66 | 	} else if cerr != nil && err == nil {
 67 | 		panic("nil on Close after non-nil error")
 68 | 	}
 69 | 
 70 | 	switch {
 71 | 	case gerr == nil && cerr == nil:
 72 | 		if !bytes.Equal(gb, cb) {
 73 | 			panic("mismatching bytes")
 74 | 		}
 75 | 		return gb, true
 76 | 	case gerr != nil && cerr == nil:
 77 | 		// Ignore deprecated errors since there are no plans to provide
 78 | 		// these features in the Go implementation.
 79 | 		if err, ok := gerr.(compress.Error); ok && err.IsDeprecated() {
 80 | 			return cb, false
 81 | 		}
 82 | 		panic(gerr)
 83 | 	case gerr == nil && cerr != nil:
 84 | 		panic(cerr)
 85 | 	default:
 86 | 		// Ensure that both gb and cb have the same common prefix.
 87 | 		if !bytes.HasPrefix(gb, cb) && !bytes.HasPrefix(cb, gb) {
 88 | 			panic("mismatching leading bytes")
 89 | 		}
 90 | 		return nil, false
 91 | 	}
 92 | }
 93 | 
 94 | // testGoEncoder encodes the input data with the Go encoder and then checks that
 95 | // both the Go and C decoders can properly decompress the output.
 96 | func testGoEncoder(data []byte, level int) {
 97 | 	// Compress using the Go encoder.
 98 | 	bb := new(bytes.Buffer)
 99 | 	gw, err := gbzip2.NewWriter(bb, &gbzip2.WriterConfig{Level: level})
100 | 	if err != nil {
101 | 		panic(err)
102 | 	}
103 | 	defer gw.Close()
104 | 	n, err := gw.Write(data)
105 | 	if n != len(data) || err != nil {
106 | 		panic(err)
107 | 	}
108 | 	if err := gw.Close(); err != nil {
109 | 		panic(err)
110 | 	}
111 | 
112 | 	// Decompress using both the Go and C decoders.
113 | 	b, ok := testDecoders(bb.Bytes(), false)
114 | 	if !ok {
115 | 		panic("decoder error")
116 | 	}
117 | 	if !bytes.Equal(b, data) {
118 | 		panic("mismatching bytes")
119 | 	}
120 | }
121 | 
122 | // testCEncoder encodes the input data with the C encoder and then checks that
123 | // both the Go and C decoders can properly decompress the output.
124 | func testCEncoder(data []byte, level int) {
125 | 	// Compress using the C encoder.
126 | 	bb := new(bytes.Buffer)
127 | 	cw := cbzip2.NewWriter(bb, level)
128 | 	defer cw.Close()
129 | 	n, err := cw.Write(data)
130 | 	if n != len(data) || err != nil {
131 | 		panic(err)
132 | 	}
133 | 	if err := cw.Close(); err != nil {
134 | 		panic(err)
135 | 	}
136 | 
137 | 	// Decompress using both the Go and C decoders.
138 | 	b, ok := testDecoders(bb.Bytes(), false)
139 | 	if !ok {
140 | 		panic("decoder error")
141 | 	}
142 | 	if !bytes.Equal(b, data) {
143 | 		panic("mismatching bytes")
144 | 	}
145 | }
146 | 


--------------------------------------------------------------------------------
/internal/tool/fuzz/bzip2_bwt/bzip2_bwt.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // +build gofuzz
 6 | 
 7 | package bzip2_bwt
 8 | 
 9 | import (
10 | 	"bytes"
11 | 	"hash/adler32"
12 | 
13 | 	"github.com/dsnet/compress/bzip2"
14 | )
15 | 
16 | func Fuzz(data []byte) int {
17 | 	if len(data) == 0 {
18 | 		return -1
19 | 	}
20 | 	testReverse(data)
21 | 	testRoundTrip(data)
22 | 	return 0
23 | }
24 | 
25 | // testReverse verifies that we can reverse the BWT on any arbitrary input
26 | // so long as we choose a valid origin pointer.
27 | func testReverse(data []byte) {
28 | 	data = append([]byte(nil), data...) // Make copy of data
29 | 	ptr := int(adler32.Checksum(data)) % len(data)
30 | 	bzip2.ReverseBWT(data, ptr)
31 | }
32 | 
33 | // testRoundTrip verifies that a round-trip BWT faithfully reproduces the
34 | // input data set.
35 | func testRoundTrip(want []byte) {
36 | 	got := append([]byte(nil), want...)
37 | 	ptr := bzip2.ForwardBWT(got)
38 | 	bzip2.ReverseBWT(got, ptr)
39 | 
40 | 	if ptr < 0 || ptr >= len(want) {
41 | 		panic("invalid origin pointer")
42 | 	}
43 | 	if !bytes.Equal(got, want) {
44 | 		panic("mismatching bytes")
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/internal/tool/fuzz/fuzz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | cd "$(dirname "${BASH_SOURCE[0]}")"
 6 | 
 7 | if [ $# == 0 ]; then
 8 | 	echo "Usage: $0 PKG"
 9 | 	echo
10 | 	echo -e "Valid packages:\n\t$(ls -d */ | sed 's/\/*$//g' | tr '\n' ' ')"
11 | 	exit 1
12 | fi
13 | 
14 | # Check that the fuzzing tools are available.
15 | for TOOL in go-fuzz go-fuzz-build; do
16 | 	command -v $TOOL >/dev/null 2>&1 || {
17 | 		echo "Aborting: could not locate $TOOL."; exit 1;
18 | 	}
19 | done
20 | 
21 | # Clone the initial work directory if it does not exist.
22 | if [ ! -d ".work" ]; then
23 | 	echo "Fuzzing workdir does not exist."
24 | 	git clone https://github.com/dsnet/compress-fuzz.git .work
25 | fi
26 | 
27 | PKG=$(echo $1 | sed 's/\/*$//g')
28 | PKG_PATH="github.com/dsnet/compress/internal/tool/fuzz"
29 | shift
30 | 
31 | echo "Building..."
32 | go-fuzz-build -o=".work/$PKG-fuzz.zip" $PKG_PATH/$PKG
33 | 
34 | echo "Fuzzing..."
35 | go-fuzz -bin=".work/$PKG-fuzz.zip" -workdir=".work/$PKG" "$@"
36 | 


--------------------------------------------------------------------------------
/internal/tool/fuzz/xflate_meta/xflate_meta.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // +build gofuzz
 6 | 
 7 | package xflate_meta
 8 | 
 9 | import (
10 | 	"bytes"
11 | 	"compress/flate"
12 | 	"io/ioutil"
13 | 
14 | 	"github.com/dsnet/compress/xflate"
15 | )
16 | 
17 | func Fuzz(data []byte) int {
18 | 	mdata, ok := decodeMeta(data)
19 | 	if ok {
20 | 		testRoundTrip(mdata)
21 | 		return 1
22 | 	} else {
23 | 		testRoundTrip(data)
24 | 		return 0
25 | 	}
26 | }
27 | 
28 | // decodeMeta attempts to decode the metadata.
29 | // If successful, it verifies that meta-encoded blocks are DEFLATE blocks.
30 | func decodeMeta(data []byte) ([]byte, bool) {
31 | 	r := bytes.NewReader(data)
32 | 	mr := xflate.NewMetaReader(r)
33 | 	b, err := ioutil.ReadAll(mr)
34 | 	if err != nil {
35 | 		return nil, false
36 | 	}
37 | 	pos := int(r.Size()) - r.Len()
38 | 	decompressMeta(data[:pos])
39 | 	return b, true
40 | }
41 | 
42 | // decompressMeta attempts to decompress the meta-encoded blocks.
43 | // It expects decompression to succeed and to output nothing.
44 | func decompressMeta(data []byte) {
45 | 	// Make a copy and append DEFLATE terminator block.
46 | 	data = append([]byte(nil), data...)
47 | 	data = append(data, []byte{0x01, 0x00, 0x00, 0xff, 0xff}...)
48 | 
49 | 	r := bytes.NewReader(data)
50 | 	for r.Len() > 0 {
51 | 		zr := flate.NewReader(r)
52 | 		b, err := ioutil.ReadAll(zr)
53 | 		if err != nil {
54 | 			panic(err)
55 | 		}
56 | 		if len(b) > 0 {
57 | 			panic("non-zero meta-encoded block")
58 | 		}
59 | 		if err := zr.Close(); err != nil {
60 | 			panic(err)
61 | 		}
62 | 	}
63 | }
64 | 
65 | // testRoundTrip encodes the input data and then decodes it, checking that the
66 | // metadata was losslessly preserved.
67 | func testRoundTrip(want []byte) {
68 | 	bb := new(bytes.Buffer)
69 | 	mw := xflate.NewMetaWriter(bb)
70 | 	n, err := mw.Write(want)
71 | 	if n != len(want) || err != nil {
72 | 		panic(err)
73 | 	}
74 | 	if err := mw.Close(); err != nil {
75 | 		panic(err)
76 | 	}
77 | 
78 | 	got, ok := decodeMeta(bb.Bytes())
79 | 	if !bytes.Equal(got, want) || !ok {
80 | 		panic("mismatching bytes")
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/testdata/binary.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/testdata/binary.bin


--------------------------------------------------------------------------------
/testdata/huffman.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // +build ignore
 6 | 
 7 | //go:generate go run huffman.go
 8 | 
 9 | // Generates huffman.txt. This test file heavily favors prefix based encodings
10 | // since some symbols are heavily favored over others. This leads to compression
11 | // savings that can be gained by assigning shorter prefix codes to those more
12 | // frequent symbols. The number of symbols used is large enough such that it
13 | // avoids LZ77 dictionary matches.
14 | package main
15 | 
16 | import (
17 | 	"io/ioutil"
18 | 	"math/rand"
19 | 	"unicode/utf8"
20 | )
21 | 
22 | const (
23 | 	name = "huffman.txt"
24 | 	size = 1 << 18
25 | )
26 | 
27 | const (
28 | 	alpha1 = "abcdefghijklmnopqrstuvwxyz"
29 | 	alpha2 = alpha1 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
30 | 	alpha3 = alpha2 + "0123456789" + "+/"
31 | )
32 | 
33 | func main() {
34 | 	var b []byte
35 | 	r := rand.New(rand.NewSource(0))
36 | 
37 | 	for len(b) < size {
38 | 		n := 16 + r.Intn(64) // Length of substring
39 | 		p := r.Float32()
40 | 		switch {
41 | 		case p <= 0.75:
42 | 			// Write strings of base64 encoded values.
43 | 			for i := 0; i < n; i++ {
44 | 				p := r.Float32()
45 | 				switch {
46 | 				case p <= 0.1:
47 | 					// Write any lowercase letter.
48 | 					b = append(b, alpha1[r.Intn(len(alpha1))])
49 | 				case p <= 0.7:
50 | 					// Write any lowercase or uppercase letter.
51 | 					b = append(b, alpha2[r.Intn(len(alpha2))])
52 | 				case p <= 1.0:
53 | 					// Write any character from the base64 alphabet.
54 | 					b = append(b, alpha3[r.Intn(len(alpha3))])
55 | 				}
56 | 			}
57 | 		case p <= 1.00:
58 | 			// Write strings of utf8 encoded values.
59 | 			for i := 0; i < n; i++ {
60 | 				p := r.Float32()
61 | 				switch {
62 | 				case p <= 0.65:
63 | 					// Write a 2-byte long utf8 code point.
64 | 					var buf [4]byte
65 | 					cnt := utf8.EncodeRune(buf[:], rune(0x80+r.Intn(0x780)))
66 | 					b = append(b, buf[:cnt]...)
67 | 				case p <= 1.00:
68 | 					// Write a 3-byte long utf8 code point.
69 | 					var buf [4]byte
70 | 					cnt := utf8.EncodeRune(buf[:], rune(0x800+r.Intn(0xF800)))
71 | 					b = append(b, buf[:cnt]...)
72 | 				}
73 | 			}
74 | 		}
75 | 	}
76 | 
77 | 	if err := ioutil.WriteFile(name, b[:size], 0664); err != nil {
78 | 		panic(err)
79 | 	}
80 | }
81 | 


--------------------------------------------------------------------------------
/testdata/random.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/testdata/random.bin


--------------------------------------------------------------------------------
/testdata/random.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // +build ignore
 6 | 
 7 | //go:generate go run random.go
 8 | 
 9 | // Generates random.bin. This test file contains random data throughout and
10 | // tests the worst case compression scenario.
11 | package main
12 | 
13 | import (
14 | 	"io/ioutil"
15 | 	"math/rand"
16 | )
17 | 
18 | const (
19 | 	name = "random.bin"
20 | 	size = 1 << 18
21 | )
22 | 
23 | func main() {
24 | 	var b []byte
25 | 	r := rand.New(rand.NewSource(0))
26 | 
27 | 	for i := 0; i < size; i++ {
28 | 		b = append(b, byte(r.Int()))
29 | 	}
30 | 	if err := ioutil.WriteFile(name, b[:size], 0664); err != nil {
31 | 		panic(err)
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/testdata/repeats.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsnet/compress/39efe44ab707ffd2c1ef32cc7dbebfe584718686/testdata/repeats.bin


--------------------------------------------------------------------------------
/testdata/repeats.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | // +build ignore
  6 | 
  7 | //go:generate go run repeats.go
  8 | 
  9 | // Generates repeats.bin. This test file heavily favors LZ77 based compression
 10 | // since a large bulk of its data is a copy from some distance ago. Also, since
 11 | // the source data is mostly random, prefix encoding does not benefit as much.
 12 | package main
 13 | 
 14 | import (
 15 | 	"io/ioutil"
 16 | 	"math/rand"
 17 | )
 18 | 
 19 | const (
 20 | 	name = "repeats.bin"
 21 | 	size = 1 << 18
 22 | )
 23 | 
 24 | func main() {
 25 | 	var b []byte
 26 | 	r := rand.New(rand.NewSource(0))
 27 | 
 28 | 	randLen := func() (l int) {
 29 | 		p := r.Float32()
 30 | 		switch {
 31 | 		case p <= 0.15: // 4..7
 32 | 			l = 4 + r.Intn(4)
 33 | 		case p <= 0.30: // 8..15
 34 | 			l = 8 + r.Intn(8)
 35 | 		case p <= 0.45: // 16..31
 36 | 			l = 16 + r.Intn(16)
 37 | 		case p <= 0.60: // 32..63
 38 | 			l = 32 + r.Intn(32)
 39 | 		case p <= 0.75: // 64..127
 40 | 			l = 64 + r.Intn(64)
 41 | 		case p <= 0.90: // 128..255
 42 | 			l = 128 + r.Intn(128)
 43 | 		case p <= 1.0: // 256..511
 44 | 			l = 256 + r.Intn(256)
 45 | 		}
 46 | 		return l
 47 | 	}
 48 | 
 49 | 	randDist := func() (d int) {
 50 | 		for d == 0 || d > len(b) {
 51 | 			p := r.Float32()
 52 | 			switch {
 53 | 			case p <= 0.1: // 1..1
 54 | 				d = 1 + r.Intn(1)
 55 | 			case p <= 0.2: // 2..3
 56 | 				d = 2 + r.Intn(2)
 57 | 			case p <= 0.3: // 4..7
 58 | 				d = 4 + r.Intn(4)
 59 | 			case p <= 0.4: // 8..15
 60 | 				d = 8 + r.Intn(8)
 61 | 			case p <= 0.5: // 16..31
 62 | 				d = 16 + r.Intn(16)
 63 | 			case p <= 0.55: // 32..63
 64 | 				d = 32 + r.Intn(32)
 65 | 			case p <= 0.60: // 64..127
 66 | 				d = 64 + r.Intn(64)
 67 | 			case p <= 0.65: // 128..255
 68 | 				d = 128 + r.Intn(128)
 69 | 			case p <= 0.70: // 256..511
 70 | 				d = 256 + r.Intn(256)
 71 | 			case p <= 0.75: // 512..1023
 72 | 				d = 512 + r.Intn(512)
 73 | 			case p <= 0.80: // 1024..2047
 74 | 				d = 1024 + r.Intn(1024)
 75 | 			case p <= 0.85: // 2048..4095
 76 | 				d = 2048 + r.Intn(2048)
 77 | 			case p <= 0.90: // 4096..8191
 78 | 				d = 4096 + r.Intn(4096)
 79 | 			case p <= 0.95: // 8192..16383
 80 | 				d = 8192 + r.Intn(8192)
 81 | 			case p <= 1.00: // 16384..32767
 82 | 				d = 16384 + r.Intn(16384)
 83 | 			}
 84 | 		}
 85 | 		return d
 86 | 	}
 87 | 
 88 | 	writeRand := func(l int) {
 89 | 		for i := 0; i < l; i++ {
 90 | 			b = append(b, byte(r.Int()))
 91 | 		}
 92 | 	}
 93 | 
 94 | 	writeCopy := func(d, l int) {
 95 | 		for i := 0; i < l; i++ {
 96 | 			b = append(b, b[len(b)-d])
 97 | 		}
 98 | 	}
 99 | 
100 | 	writeRand(randLen())
101 | 	for len(b) < size {
102 | 		p := r.Float32()
103 | 		switch {
104 | 		case p <= 0.1:
105 | 			// Generate random new data.
106 | 			writeRand(randLen())
107 | 		case p <= 0.9:
108 | 			// Write a long distance copy.
109 | 			d, l := randDist(), randLen()
110 | 			for d <= l {
111 | 				d, l = randDist(), randLen()
112 | 			}
113 | 			writeCopy(d, l)
114 | 		case p <= 1.0:
115 | 			// Write a possibly short distance copy.
116 | 			writeCopy(randDist(), randLen())
117 | 		}
118 | 	}
119 | 
120 | 	if err := ioutil.WriteFile(name, b[:size], 0664); err != nil {
121 | 		panic(err)
122 | 	}
123 | }
124 | 


--------------------------------------------------------------------------------
/testdata/zeros.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | // +build ignore
 6 | 
 7 | //go:generate go run zeros.go
 8 | 
 9 | // Generates zeros.bin. This test file contains zeroed data throughout and
10 | // tests the best case compression scenario.
11 | package main
12 | 
13 | import "io/ioutil"
14 | 
15 | const (
16 | 	name = "zeros.bin"
17 | 	size = 1 << 18
18 | )
19 | 
20 | func main() {
21 | 	b := make([]byte, size)
22 | 	if err := ioutil.WriteFile(name, b[:size], 0664); err != nil {
23 | 		panic(err)
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/xflate/flate.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package xflate
  6 | 
  7 | import (
  8 | 	"bufio"
  9 | 	"compress/flate"
 10 | 	"io"
 11 | )
 12 | 
 13 | // TODO(dsnet): The standard library's version of flate.Reader and flate.Writer
 14 | // do not track the input and output offsets. When we eventually switch over
 15 | // to using the DEFLATE implementation in this repository, we can delete these.
 16 | 
 17 | // countReader is a trivial io.Reader that counts the number of bytes read.
 18 | type countReader struct {
 19 | 	R io.Reader
 20 | 	N int64
 21 | }
 22 | 
 23 | func (cr *countReader) Read(buf []byte) (int, error) {
 24 | 	n, err := cr.R.Read(buf)
 25 | 	cr.N += int64(n)
 26 | 	return n, err
 27 | }
 28 | 
 29 | // flateReader is a trivial wrapper around flate.Reader keeps tracks of offsets.
 30 | type flateReader struct {
 31 | 	InputOffset  int64 // Total number of bytes read from underlying io.Reader
 32 | 	OutputOffset int64 // Total number of bytes emitted from Read
 33 | 
 34 | 	zr io.ReadCloser
 35 | 	br *bufio.Reader
 36 | 	cr countReader
 37 | }
 38 | 
 39 | func newFlateReader(rd io.Reader) (*flateReader, error) {
 40 | 	fr := new(flateReader)
 41 | 	fr.cr = countReader{R: rd}
 42 | 	fr.br = bufio.NewReader(&fr.cr)
 43 | 	fr.zr = flate.NewReader(fr.br)
 44 | 	return fr, nil
 45 | }
 46 | 
 47 | func (fr *flateReader) Reset(rd io.Reader) {
 48 | 	*fr = flateReader{zr: fr.zr, br: fr.br}
 49 | 	fr.cr = countReader{R: rd}
 50 | 	fr.br.Reset(&fr.cr)
 51 | 	fr.zr.(flate.Resetter).Reset(fr.br, nil)
 52 | }
 53 | 
 54 | func (fr *flateReader) Read(buf []byte) (int, error) {
 55 | 	offset := fr.cr.N - int64(fr.br.Buffered())
 56 | 	n, err := fr.zr.Read(buf)
 57 | 	fr.InputOffset += (fr.cr.N - int64(fr.br.Buffered())) - offset
 58 | 	fr.OutputOffset += int64(n)
 59 | 	return n, errWrap(err)
 60 | }
 61 | 
 62 | // countWriter is a trivial io.Writer that counts the number of bytes written.
 63 | type countWriter struct {
 64 | 	W io.Writer
 65 | 	N int64
 66 | }
 67 | 
 68 | func (cw *countWriter) Write(buf []byte) (int, error) {
 69 | 	n, err := cw.W.Write(buf)
 70 | 	cw.N += int64(n)
 71 | 	return n, err
 72 | }
 73 | 
 74 | // flateWriter is a trivial wrapper around flate.Writer keeps tracks of offsets.
 75 | type flateWriter struct {
 76 | 	InputOffset  int64 // Total number of bytes issued to Write
 77 | 	OutputOffset int64 // Total number of bytes written to underlying io.Writer
 78 | 
 79 | 	zw *flate.Writer
 80 | 	cw countWriter
 81 | }
 82 | 
 83 | func newFlateWriter(wr io.Writer, lvl int) (*flateWriter, error) {
 84 | 	var err error
 85 | 	fw := new(flateWriter)
 86 | 	switch lvl {
 87 | 	case 0:
 88 | 		lvl = flate.DefaultCompression
 89 | 	case -1:
 90 | 		lvl = flate.NoCompression
 91 | 	}
 92 | 	fw.cw = countWriter{W: wr}
 93 | 	fw.zw, err = flate.NewWriter(&fw.cw, lvl)
 94 | 	return fw, errWrap(err)
 95 | }
 96 | 
 97 | func (fw *flateWriter) Reset(wr io.Writer) {
 98 | 	*fw = flateWriter{zw: fw.zw}
 99 | 	fw.cw = countWriter{W: wr}
100 | 	fw.zw.Reset(&fw.cw)
101 | }
102 | 
103 | func (fw *flateWriter) Write(buf []byte) (int, error) {
104 | 	offset := fw.cw.N
105 | 	n, err := fw.zw.Write(buf)
106 | 	fw.OutputOffset += fw.cw.N - offset
107 | 	fw.InputOffset += int64(n)
108 | 	return n, errWrap(err)
109 | }
110 | 
111 | func (fw *flateWriter) Flush() error {
112 | 	offset := fw.cw.N
113 | 	err := fw.zw.Flush()
114 | 	fw.OutputOffset += fw.cw.N - offset
115 | 	return errWrap(err)
116 | }
117 | 


--------------------------------------------------------------------------------
/xflate/index.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015, Joe Tsai. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE.md file.
  4 | 
  5 | package xflate
  6 | 
  7 | const (
  8 | 	unknownType = iota
  9 | 	deflateType
 10 | 	indexType
 11 | 	footerType
 12 | )
 13 | 
 14 | type index struct {
 15 | 	// Records is a list of records that indicate the location of all chunks
 16 | 	// in the stream. However, rather than recording the starting offset of
 17 | 	// each chunk, only the ending offsets are recorded.
 18 | 	//
 19 | 	// The starting record {0, 0} is not included since it is implied.
 20 | 	// The last record effectively holds the total size of the stream.
 21 | 	Records []record
 22 | 
 23 | 	BackSize  int64 // Size of previous index when encoded
 24 | 	IndexSize int64 // Size of this index when encoded
 25 | }
 26 | 
 27 | type record struct {
 28 | 	CompOffset int64 // Offset in compressed stream where decompression can start from
 29 | 	RawOffset  int64 // The uncompressed offset that CompOffset is associated with
 30 | 	Type       int   // Type of the record
 31 | }
 32 | 
 33 | // Reset resets the index.
 34 | func (idx *index) Reset() {
 35 | 	*idx = index{Records: idx.Records[:0]}
 36 | }
 37 | 
 38 | // AppendRecord appends a new record to the end of the index and reports whether
 39 | // the operation was successful or not.
 40 | func (idx *index) AppendRecord(compSize, rawSize int64, typ int) bool {
 41 | 	if rawSize < 0 || compSize < 0 {
 42 | 		return false // Invalid size
 43 | 	}
 44 | 
 45 | 	lastRec := idx.LastRecord()
 46 | 	rec := record{
 47 | 		CompOffset: lastRec.CompOffset + compSize,
 48 | 		RawOffset:  lastRec.RawOffset + rawSize,
 49 | 		Type:       typ,
 50 | 	}
 51 | 	if rec.CompOffset < lastRec.CompOffset || rec.RawOffset < lastRec.RawOffset {
 52 | 		return false // Overflow detected
 53 | 	}
 54 | 	idx.Records = append(idx.Records, rec)
 55 | 	return true
 56 | }
 57 | 
 58 | // AppendIndex appends the contents of another index onto the current receiver
 59 | // and reports whether the operation was successful or not.
 60 | func (idx *index) AppendIndex(other *index) bool {
 61 | 	var preRec record
 62 | 	for i, rec := range other.Records {
 63 | 		csize, rsize := rec.CompOffset-preRec.CompOffset, rec.RawOffset-preRec.RawOffset
 64 | 		if !idx.AppendRecord(csize, rsize, rec.Type) {
 65 | 			idx.Records = idx.Records[:len(idx.Records)-i] // Ensure atomic append
 66 | 			return false
 67 | 		}
 68 | 		preRec = rec
 69 | 	}
 70 | 	return true
 71 | }
 72 | 
 73 | // Search searches for the record that best matches the raw offset given.
 74 | // This search will return the location of the record with the lowest
 75 | // RawOffset that is still greater than the given offset.
 76 | // It return -1 if such a record does not exist.
 77 | //
 78 | // This method is intended to be used in conjunction with GetRecords,
 79 | // which returns a pair of records (prev, curr).
 80 | // With these records, the following can be computed:
 81 | //
 82 | //	// Where in the underlying reader the decompressor should start from.
 83 | //	compOffset := prev.CompOffset
 84 | //
 85 | //	// The total number of uncompressed bytes to discard to reach offset.
 86 | //	rawDiscard := offset - prev.RawOffset
 87 | //
 88 | //	// The total compressed size of the current block.
 89 | //	compSize := curr.CompOffset - prev.CompOffset
 90 | //
 91 | //	// The total uncompressed size of the current block.
 92 | //	rawSize := curr.RawOffset - prev.RawOffset
 93 | func (idx *index) Search(offset int64) int {
 94 | 	recs := idx.Records
 95 | 	i, imin, imax := -1, 0, len(recs)-1
 96 | 	for imax >= imin && i == -1 {
 97 | 		imid := (imin + imax) / 2
 98 | 		gteCurr := bool(offset >= recs[imid].RawOffset)
 99 | 		ltNext := bool(imid+1 >= len(recs) || offset < recs[imid+1].RawOffset)
100 | 		if gteCurr && ltNext {
101 | 			i = imid
102 | 		} else if gteCurr {
103 | 			imin = imid + 1
104 | 		} else {
105 | 			imax = imid - 1
106 | 		}
107 | 	}
108 | 	return i + 1
109 | }
110 | 
111 | // GetRecords returns the previous and current records at the given position.
112 | // This method will automatically bind the search position within the bounds
113 | // of the index. Thus, this will return zero value records if the position is
114 | // too low, and the last record if the value is too high.
115 | func (idx *index) GetRecords(i int) (prev, curr record) {
116 | 	recs := idx.Records
117 | 	if i > len(recs) {
118 | 		i = len(recs)
119 | 	}
120 | 	if i-1 >= 0 && i-1 < len(recs) {
121 | 		prev = recs[i-1]
122 | 	}
123 | 	if i >= 0 && i < len(recs) {
124 | 		curr = recs[i]
125 | 	} else {
126 | 		curr = prev
127 | 		curr.Type = unknownType
128 | 	}
129 | 	return prev, curr
130 | }
131 | 
132 | // LastRecord returns the last record if it exists, otherwise the zero value.
133 | func (idx *index) LastRecord() record {
134 | 	var rec record
135 | 	if len(idx.Records) > 0 {
136 | 		rec = idx.Records[len(idx.Records)-1]
137 | 	}
138 | 	return rec
139 | }
140 | 


--------------------------------------------------------------------------------
/xflate/meta_fuzz.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2016, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | //go:build gofuzz
 6 | // +build gofuzz
 7 | 
 8 | // This file exists to export internal implementation details for fuzz testing.
 9 | 
10 | package xflate
11 | 
12 | import (
13 | 	"io"
14 | 
15 | 	"github.com/dsnet/compress/xflate/internal/meta"
16 | )
17 | 
18 | func NewMetaReader(r io.Reader) *meta.Reader {
19 | 	return meta.NewReader(r)
20 | }
21 | 
22 | func NewMetaWriter(r io.Writer) *meta.Writer {
23 | 	return meta.NewWriter(r)
24 | }
25 | 


--------------------------------------------------------------------------------
/xflate/xflate_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015, Joe Tsai. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE.md file.
 4 | 
 5 | package xflate
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"io"
10 | 	"testing"
11 | 
12 | 	"github.com/dsnet/compress/internal/testutil"
13 | )
14 | 
15 | var (
16 | 	testBinary  = testutil.MustLoadFile("../testdata/binary.bin")
17 | 	testDigits  = testutil.MustLoadFile("../testdata/digits.txt")
18 | 	testHuffman = testutil.MustLoadFile("../testdata/huffman.txt")
19 | 	testRandom  = testutil.MustLoadFile("../testdata/random.bin")
20 | 	testRepeats = testutil.MustLoadFile("../testdata/repeats.bin")
21 | 	testTwain   = testutil.MustLoadFile("../testdata/twain.txt")
22 | 	testZeros   = testutil.MustLoadFile("../testdata/zeros.bin")
23 | )
24 | 
25 | func TestRoundTrip(t *testing.T) {
26 | 	vectors := []struct {
27 | 		name  string
28 | 		input []byte
29 | 	}{
30 | 		{"Nil", nil},
31 | 		{"Binary", testBinary},
32 | 		{"Digits", testDigits},
33 | 		{"Huffman", testHuffman},
34 | 		{"Random", testRandom},
35 | 		{"Repeats", testRepeats},
36 | 		{"Twain", testTwain},
37 | 		{"Zeros", testZeros},
38 | 	}
39 | 
40 | 	for _, v := range vectors {
41 | 		v := v
42 | 		t.Run(v.name, func(t *testing.T) {
43 | 			t.Parallel()
44 | 
45 | 			var wb, rb bytes.Buffer
46 | 
47 | 			xw, err := NewWriter(&wb, &WriterConfig{ChunkSize: 1 << 10})
48 | 			if err != nil {
49 | 				t.Errorf("unexpected error: NewWriter() = %v", err)
50 | 			}
51 | 			cnt, err := io.Copy(xw, bytes.NewReader(v.input))
52 | 			if err != nil {
53 | 				t.Errorf("unexpected error: Write() = %v", err)
54 | 			}
55 | 			if cnt != int64(len(v.input)) {
56 | 				t.Errorf("write count mismatch: got %d, want %d", cnt, len(v.input))
57 | 			}
58 | 			if err := xw.Close(); err != nil {
59 | 				t.Errorf("unexpected error: Close() = %v", err)
60 | 			}
61 | 
62 | 			xr, err := NewReader(bytes.NewReader(wb.Bytes()), nil)
63 | 			if err != nil {
64 | 				t.Errorf("unexpected error: NewReader() = %v", err)
65 | 			}
66 | 			cnt, err = io.Copy(&rb, xr)
67 | 			if err != nil {
68 | 				t.Errorf("unexpected error: Read() = %v", err)
69 | 			}
70 | 			if cnt != int64(len(v.input)) {
71 | 				t.Errorf("read count mismatch: got %d, want %d", cnt, len(v.input))
72 | 			}
73 | 			if err := xr.Close(); err != nil {
74 | 				t.Errorf("unexpected error: Close() = %v", err)
75 | 			}
76 | 
77 | 			output := rb.Bytes()
78 | 			if got, want, ok := testutil.BytesCompare(output, v.input); !ok {
79 | 				t.Errorf("output data mismatch:\ngot  %s\nwant %s", got, want)
80 | 			}
81 | 		})
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/zbench.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2017, Joe Tsai. All rights reserved.
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE.md file.
 6 | 
 7 | # zbench wraps internal/tool/bench and is useful for comparing benchmarks from
 8 | # the implementations in this repository relative to other implementations.
 9 | #
10 | # See internal/tool/bench/main.go for more details.
11 | cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/bench
12 | go run $(go list -f '{{ join .GoFiles "\n" }}') "$@"
13 | 


--------------------------------------------------------------------------------
/zfuzz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2017, Joe Tsai. All rights reserved.
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE.md file.
 6 | 
 7 | # zfuzz wraps internal/tool/fuzz and is useful for fuzz testing each of
 8 | # the implementations in this repository.
 9 | cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/fuzz
10 | ./fuzz.sh "$@"
11 | 


--------------------------------------------------------------------------------
/zprof.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2017, Joe Tsai. All rights reserved.
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE.md file.
 6 | 
 7 | if [ $# == 0 ]; then
 8 | 	echo "Usage: $0 PKG_PATH TEST_ARGS..."
 9 | 	echo ""
10 | 	echo "Runs coverage and performance benchmarks for a given package."
11 | 	echo "The results are stored in the _zprof_ directory."
12 | 	echo ""
13 | 	echo "Example:"
14 | 	echo "	$0 flate -test.bench=Decode/Twain/Default"
15 | 	exit 1
16 | fi
17 | 
18 | DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19 | PKG_PATH=$1
20 | PKG_NAME=$(basename $PKG_PATH)
21 | shift
22 | 
23 | TMPDIR=$(mktemp -d)
24 | trap "rm -rf $TMPDIR $PKG_PATH/$PKG_NAME.test" SIGINT SIGTERM EXIT
25 | 
26 | (
27 | 	cd $DIR/$PKG_PATH
28 | 
29 | 	# Print the go version.
30 | 	go version
31 | 
32 | 	# Perform coverage profiling.
33 | 	go test github.com/dsnet/compress/$PKG_PATH -coverprofile $TMPDIR/cover.profile
34 | 	if [ $? != 0 ]; then exit 1; fi
35 | 	go tool cover -html $TMPDIR/cover.profile -o cover.html
36 | 
37 | 	# Perform performance profiling.
38 | 	if [ $# != 0 ]; then
39 | 		go test -c github.com/dsnet/compress/$PKG_PATH
40 | 		if [ $? != 0 ]; then exit 1; fi
41 | 		./$PKG_NAME.test -test.cpuprofile $TMPDIR/cpu.profile -test.memprofile $TMPDIR/mem.profile -test.run - "$@"
42 | 		PPROF="go tool pprof"
43 | 		$PPROF -output=cpu.svg          -web                      $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null
44 | 		$PPROF -output=cpu.html         -weblist=.                $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null
45 | 		$PPROF -output=mem_objects.svg  -alloc_objects -web       $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
46 | 		$PPROF -output=mem_objects.html -alloc_objects -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
47 | 		$PPROF -output=mem_space.svg    -alloc_space   -web       $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
48 | 		$PPROF -output=mem_space.html   -alloc_space   -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
49 | 	fi
50 | 
51 | 	rm -rf $DIR/_zprof_/$PKG_NAME
52 | 	mkdir -p $DIR/_zprof_/$PKG_NAME
53 | 	mv *.html *.svg $DIR/_zprof_/$PKG_NAME 2> /dev/null
54 | )
55 | 


--------------------------------------------------------------------------------
/ztest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2017, Joe Tsai. All rights reserved.
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE.md file.
 6 | 
 7 | cd $(go list -f '{{ .Dir }}' github.com/dsnet/compress)
 8 | 
 9 | BOLD="\x1b[1mRunning: "
10 | PASS="\x1b[32mPASS"
11 | FAIL="\x1b[31mFAIL"
12 | RESET="\x1b[0m"
13 | 
14 | echo -e "${BOLD}fmt${RESET}"
15 | RET_FMT=$(find . -name "*.go" | egrep -v "/(_.*_|\..*|testdata)/" | xargs gofmt -d)
16 | if [[ ! -z "$RET_FMT" ]]; then echo "$RET_FMT"; echo; fi
17 | 
18 | echo -e "${BOLD}test${RESET}"
19 | RET_TEST=$(go test -race ./... | egrep -v "^(ok|[?])\s+")
20 | if [[ ! -z "$RET_TEST" ]]; then echo "$RET_TEST"; echo; fi
21 | 
22 | echo -e "${BOLD}staticcheck${RESET}"
23 | RET_SCHK=$(staticcheck \
24 | 	-ignore "
25 | 		github.com/dsnet/compress/brotli/*.go:SA4016
26 | 		github.com/dsnet/compress/brotli/*.go:S1023
27 | 		github.com/dsnet/compress/brotli/*.go:U1000
28 | 		github.com/dsnet/compress/bzip2/*.go:S1023
29 | 		github.com/dsnet/compress/flate/*.go:U1000
30 | 		github.com/dsnet/compress/internal/cgo/lzma/*.go:SA4000
31 | 		github.com/dsnet/compress/internal/prefix/*.go:S1004
32 | 		github.com/dsnet/compress/internal/prefix/*.go:S1023
33 | 		github.com/dsnet/compress/internal/prefix/*.go:SA4016
34 | 		github.com/dsnet/compress/internal/tool/bench/*.go:S1007
35 | 		github.com/dsnet/compress/xflate/internal/meta/*.go:S1023
36 | 	" ./... 2>&1)
37 | if [[ ! -z "$RET_SCHK" ]]; then echo "$RET_SCHK"; echo; fi
38 | 
39 | echo -e "${BOLD}lint${RESET}"
40 | RET_LINT=$(golint ./... 2>&1 |
41 | 	egrep -v "^vendor/" |
42 | 	egrep -v "should have comment(.*)or be unexported" |
43 | 	egrep -v "^(.*)type name will be used as(.*)by other packages" |
44 | 	egrep -v "^brotli/transform.go:(.*)replace i [+]= 1 with i[+]{2}" |
45 | 	egrep -v "^internal/prefix/prefix.go:(.*)replace symBits(.*) [-]= 1 with symBits(.*)[-]{2}" |
46 | 	egrep -v "^xflate/common.go:(.*)NoCompression should be of the form" |
47 | 	egrep -v "^exit status")
48 | if [[ ! -z "$RET_LINT" ]]; then echo "$RET_LINT"; echo; fi
49 | 
50 | if [[ ! -z "$RET_FMT" ]] || [ ! -z "$RET_TEST" ] || [[ ! -z "$RET_SCHK" ]] || [[ ! -z "$RET_LINT" ]]; then
51 | 	echo -e "${FAIL}${RESET}"; exit 1
52 | else
53 | 	echo -e "${PASS}${RESET}"; exit 0
54 | fi
55 | 


--------------------------------------------------------------------------------